#!/usr/bin/python # Copyright (C) 2011 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; version 2 of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import cliapp import json import logging import os import urllib2 import string import sys from datetime import datetime import shutil import traceback __version__ = '0.0' def quote_url(url): ''' Convert URIs to strings that only contain digits, letters, % and _. NOTE: When changing the code of this function, make sure to also apply the same to the quote_url() function of morph. Otherwise the git bundles generated by lorry may no longer be found by morph. ''' valid_chars = string.digits + string.letters + '%_' transl = lambda x: x if x in valid_chars else '_' return ''.join([transl(x) for x in url]) class Lorry(cliapp.Application): def add_settings(self): self.settings.string(['working-area', 'w'], 'use DIR as the working area (for holding ' 'intermediate git repositories, etc)', metavar='DIR', default='workd') self.settings.string(['mirror-base-url-push'], 'base URL to use for pushing to the mirror ' 'server (default: %default)', metavar='URL', default='ssh://gitano@roadtrain.codethink.co.uk/delta') self.settings.string(['mirror-base-url-fetch'], 'base URL to use for bundle names and for ' 'pulling from the mirror server (default: ' '%default)', metavar='URL', default='git://git.baserock.org/delta') self.settings.boolean(['pull-only'], 'only pull from upstreams, do not push to ' 'the mirror server') self.settings.boolean(['verbose', 'v'], 'report what is going on to stdout') self.settings.boolean(['repack'], 'repack git repositories when an import has' 'been updated (default: %default)', default=True) self.settings.string(['command-stdout'], 'write the stdout of commands to this file', metavar='FILE', default=None) self.settings.string(['command-stderr'], 'write the stderr of commands to this file', metavar='FILE', default=None) self.settings.choice(['bundle'], ['first', 'never', 'always'], 'create bundles of git repositories.' 'first will only bundle if there is not already ' 'a bundle in BUNDLES (default: first)') self.settings.string(['bundle-dest'], 'put created bundles in BUNDLES', metavar='BUNDLES') def process_args(self, args): status = 0 self.settings['working-area'] = os.path.abspath(self.settings['working-area']) if not os.path.exists(self.settings['working-area']): os.makedirs(self.settings['working-area']) for arg in args: self.progress('Processing spec file %s' % arg) with open(arg) as f: specs = json.load(f) for name in sorted(specs.keys()): self.progress('Getting: %s' % name) try: self.gitify(name, specs[name]) except Exception,e: status += 1 sys.stderr.write( 'Error mirroring:\n%s' % traceback.format_exc()) logging.error(traceback.format_exc()) if status > 0 : logging.debug('Total Mirrors failed: %d' %status) status = 1 self.progress('Done') #print 'total failed:',status sys.exit(status) def bundle(self, name, gitdir): if self.settings['bundle'] == 'never': return bundlename = "%s/%s" % (self.settings['mirror-base-url-fetch'], name) path = os.path.join(self.settings['bundle-dest'], quote_url(bundlename)) + '.bndl' if not os.path.exists(path) or self.settings['bundle'] == 'always': self.progress('.. building bundle %s' % bundlename) # create the bundle self.run_program(['git', 'bundle', 'create', path, '--branches', '--tags'], cwd=gitdir) # FIXME this is a hack to avoid unrecognized headers in bundles, # which happens with some repositories. See # # http://marc.info/?l=git&m=132992959317420&w=2 # # for more information. From the bundle's header section, the # expression below will remove all garbage lines that appear # between the first line (the bundle format meta comment) and # the list of refs. expr = '1,/^[0-9a-f]\{40\}/{ /^[0-9a-f]\{40\}/!{/^[^#]/d}}' self.run_program(['sed', '-i', '-e', expr, path], cwd=gitdir) def gitify(self, name, spec): self.progress('Getting %s' % name) table = { 'bzr': self.gitify_bzr, 'cvs': self.gitify_cvs, 'git': self.mirror_git, 'hg': self.gitify_hg, 'svn': self.gitify_svn, 'tarball': self.gitify_tarball, } vcstype = spec['type'] if vcstype not in table: raise cliapp.AppException('Unknown VCS type %s' % vcstype) dirname = self.dirname(name) if not os.path.exists(dirname): os.mkdir(dirname) gitdir = os.path.join(dirname, 'git') backupdir = self.backup_gitdir(name, dirname, gitdir) try: table[vcstype](name, dirname, gitdir, spec) if self.settings['repack']: self.progress('.. repacking %s git repository' % name) self.run_program(['git', 'repack', '-a', '-d', '--depth=250', '--window=250'], cwd=gitdir) self.bundle(name, gitdir) except: if backupdir is not None: faildir = self.save_failgit(name, dirname, gitdir) self.restore_backup(name, dirname, gitdir, backupdir) self.output.write('Mirror of %s failed, state before mirror ' 'is saved at %s and state after mirror is ' 'saved at %s\n' % (name, backupdir, faildir)) logging.debug('Mirror of %s failed, state before mirror ' 'is saved at %s and state after mirror is ' 'saved at %s\n', name, backupdir, faildir) raise if not self.settings['pull-only']: if 'refspecs' in spec: self.push_to_mirror_server(name, gitdir, spec['refspecs']) else: self.push_to_mirror_server(name, gitdir) if backupdir is not None: self.progress('.. removing %s git repository backup' % name) shutil.rmtree(backupdir) def restore_backup(self, name, dirname, gitdir, backupdir): self.progress('.. restoring %s good git repository' % name) dotgit = os.path.join(gitdir, '.git') if not os.path.exists(dotgit): dotgit = gitdir shutil.rmtree(dotgit) self.copy_gitdir(backupdir, dotgit) def save_failgit(self, name, dirname, gitdir): self.progress('.. saving failed %s mirror git repository' % name) dotgit = os.path.join(gitdir, '.git') if not os.path.exists(dotgit): dotgit = gitdir time = datetime.now().strftime('%F-%T') backupdir = os.path.join(dirname, "git-post-fail-%s" % time) return self.copy_gitdir(dotgit, backupdir) def backup_gitdir(self, name, dirname, gitdir): dotgit = os.path.join(gitdir, '.git') if not os.path.exists(dotgit): dotgit = gitdir time = datetime.now().strftime('%F-%T') backupdir = os.path.join(dirname, "git-pre-update-%s" % time) self.progress('.. backing up %s git repository to %s' % (name, backupdir)) return self.copy_gitdir(dotgit, backupdir) def copy_gitdir(self, source, dest): if not os.path.exists(source): return None # copy everything except the objects dir def ignoreobjects(dirname, filenames): if dirname.endswith(source): return ['objects'] return [] shutil.copytree(source, dest, ignore=ignoreobjects) # hardlink the objects sourceobjects = os.path.join(source, 'objects') assert os.path.exists(sourceobjects), "No source objects" objectspath = os.path.join(dest, 'objects') os.mkdir(objectspath) for dirpath, dirnames, filenames in os.walk(objectspath): assert dirpath.startswith(objectspath) # strip objectspath and / from relpath relpath = dirpath[len(objectspath)+1:] for dirname in dirnames: os.mkdir(os.path.join(dest, relpath, dirname)) for filename in filenames: assert os.path.exists(os.path.join(dest, relpath)) os.link(os.path.join(dirpath, filename), os.path.join(dest, relpath, filename)) return dest def mirror_git(self, project_name, dirname, gitdir, spec): if not os.path.exists(gitdir): self.progress('.. doing initial clone') self.run_program(['git', 'clone', '--mirror', spec['url'], gitdir]) else: self.progress('.. updating existing clone') self.run_program(['git', 'fetch', spec['url'], '+refs/heads/*:refs/heads/*', '+refs/tags/*:refs/tags/*'], cwd=gitdir) def gitify_bzr(self, project_name, dirname, gitdir, spec): bzrdir = os.path.join(dirname, 'bzr') # check if repo exists if not os.path.exists(bzrdir): self.progress('.. creating bzr repository') self.run_program(['bzr', 'init-repo', '--no-trees', bzrdir]) if not os.path.exists(gitdir): self.progress('.. creating git repo') os.mkdir(gitdir) self.run_program(['git', 'init', gitdir]) # branches are the listed branches, plus the branch specified in url if 'branches' in spec: branches = spec['branches'] else: branches = {} if 'url' in spec: branches['trunk'] = spec['url'] logging.debug('all branches: %s' % repr(branches)) for branch, address in branches.iteritems(): branchdir = os.path.join(bzrdir, branch) if not os.path.exists(branchdir): self.progress('.. doing initial bzr branch') self.run_program(['bzr', 'branch', '--quiet', address, branchdir]) else: self.progress('.. updating bzr branch') self.run_program(['bzr', 'pull', '--quiet', address], cwd=branchdir) exports = {} bzrmarks = os.path.join(gitdir, '.git', 'marks.bzr') for branch, address in branches.iteritems(): branchdir = os.path.join(bzrdir, branch) self.progress('.. fast-exporting branch %s from bzr' % branch) exports[branch] = os.path.join(dirname, 'fast-export' + branch) cmdline = ['bzr', 'fast-export', '--git-branch=' + branch, branchdir, exports[branch]] if os.path.exists(bzrmarks): cmdline.append('--marks=' + bzrmarks) else: cmdline.append('--export-marks=' + bzrmarks) self.run_program(cmdline) gitmarks = os.path.join(gitdir, '.git', 'marks.git') for branch, address in branches.iteritems(): self.progress('.. fast-importing branch %s into git' % branch) with open(exports[branch], 'rb') as exportfile: cmdline = ['git', 'fast-import', '--export-marks=' + gitmarks] if os.path.exists(gitmarks): cmdline.append('--import-marks=' + gitmarks) self.run_program(cmdline, stdin=exportfile, cwd=gitdir) for branch, address in branches.iteritems(): branchdir = os.path.join(bzrdir, branch) self.progress('.. removing temporary fast-export file ' + exports[branch]) os.remove(exports[branch]) def gitify_svn(self, project_name, dirname, gitdir, spec): if not os.path.exists(gitdir): self.progress('.. doing initial clone') os.mkdir(gitdir) layout = spec["layout"] # if standard layour specified, fill in the defaults if layout == "standard": layout = { "trunk": "trunk", "tags": "tags/*", "branches": "branches/*" } # init the repo then manually set the refspecs to fetch into local # git-svn can apparently provide better history tracking by # fetching the root of the repository # git-svn will convert branch, trunk and tag paths to allow this, # but it is simpler to disable it and do it manually self.run_program(['git', 'svn', 'init', spec['url'], gitdir, '--svn-remote=svn', '--no-minimize-url']) self.run_program(['git', 'config', 'svn-remote.svn.fetch', layout["trunk"]+':refs/heads/master'], cwd=gitdir) self.run_program(['git', 'config', 'svn-remote.svn.branches', layout["branches"] + ':refs/heads/*'], cwd=gitdir) self.run_program(['git', 'config', 'svn-remote.svn.tags', layout["tags"] + ':refs/tags/*'], cwd=gitdir) else: self.progress('.. updating existing clone') # update the remote tracking branches self.run_program(['git', 'svn', 'fetch'], cwd=gitdir) def gitify_cvs(self, project_name, dirname, gitdir, spec): self.run_program(['git', 'cvsimport', '-a', '-d', spec['url'], '-C', gitdir, spec['module']]) def gitify_hg(self, project_name, dirname, gitdir, spec): hgdir = os.path.join(dirname, 'hg') if os.path.exists(hgdir): self.progress('.. updating hg branch') self.run_program(['hg', 'pull', '--quiet'], cwd=hgdir) else: self.progress('.. doing initial hg branch') self.run_program(['hg', 'clone', '--quiet', spec['url'], hgdir]) if not os.path.exists(gitdir): self.run_program(['git', 'init', gitdir]) self.progress('.. fast-exporting into git') self.run_program(['hg-fast-export', '--quiet', '-r', '../hg'], cwd=gitdir) def gitify_tarball(self, project_name, dirname, gitdir, spec): tardest = os.path.join(dirname, 'tarball') if not os.path.exists(tardest): with open(tardest, 'w') as tarfile: urlfile = urllib2.urlopen(spec['url']) tarfile.write(urlfile.read()) urlfile.close() if not os.path.exists(gitdir): self.run_program(['git', 'init', gitdir]) cmdline = ['tar', '--extract', '--file', tardest] # compression is handled in long form, so use gzip instead of z try: cmdline += ['--' + spec['compression']] except KeyError: pass # tarballs often have a directory on top, strip = 1 will remove it try: cmdline += ['--strip-components', str(spec['strip'])] except KeyError: pass self.run_program(cmdline, cwd=gitdir) self.run_program(['git', 'add', '.'], cwd=gitdir) self.run_program(['git', 'commit', '-m', 'Tarball conversion'], cwd=gitdir) def push_to_mirror_server(self, name, gitdir, pushrefspecs=['refs/heads/*:refs/heads/*', 'refs/tags/*:refs/tags/*']): pushurl = "%s/%s.git" % (self.settings['mirror-base-url-push'], name) self.progress('.. pushing %s to mirror server %s' % (name, pushurl)) self.run_program(['git', 'push', pushurl]+pushrefspecs, cwd=gitdir) def run_program(self, argv, **kwargs): if self.settings['command-stdout']: kwargs['stdout'] = open(self.settings['command-stdout'], 'a') if self.settings['command-stderr']: kwargs['stderr'] = open(self.settings['command-stderr'], 'a') logging.debug('Running: argv=%s kwargs=%s' % (repr(argv), repr(kwargs))) exit, out, err = self.runcmd_unchecked(argv, **kwargs) logging.debug('Command: %s\nExit: %s\nStdout:\n%sStderr:\n%s' % (argv, exit, self.indent(out or ''), self.indent(err or ''))) if exit != 0: raise Exception('%s failed (exit code %s):\n%s' % (' '.join(argv), exit, self.indent(err or ''))) return out def indent(self, string): return ''.join(' %s\n' % line for line in string.splitlines()) def dirname(self, project_name): assert '/' not in project_name assert '\0' not in project_name return os.path.join(self.settings['working-area'], project_name) def progress(self, msg): logging.debug(msg) if self.settings['verbose']: self.output.write('%s\n' % msg) if __name__ == '__main__': Lorry(version=__version__).run()