From ebed9ace77eed4b8e40675cff40a96f61ae291e2 Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Tue, 25 Sep 2012 13:46:02 +0000 Subject: Use tarballs instead of bundles This makes a non-backward-compatible change to morph which switches it to using tarballs instead of bundles when initialising cached git repositories. This is faster because it doesn't require index-pack --fix-thin operations on the machine running morph. --- morphlib/app.py | 10 ++--- morphlib/localrepocache.py | 64 +++++++++++++++++----------- morphlib/localrepocache_tests.py | 35 +++++++++++---- morphlib/plugins/show_dependencies_plugin.py | 4 +- morphlib/plugins/update_gits_plugin.py | 4 +- morphlib/util.py | 4 +- 6 files changed, 78 insertions(+), 43 deletions(-) (limited to 'morphlib') diff --git a/morphlib/app.py b/morphlib/app.py index 30efb215..accbb9e4 100755 --- a/morphlib/app.py +++ b/morphlib/app.py @@ -81,10 +81,10 @@ class Morph(cliapp.Application): 'contain a %s where the shortname gets ' 'replaced', default=defaults['repo-alias']) - self.settings.string(['bundle-server'], - 'base URL to download bundles. ' + self.settings.string(['tarball-server'], + 'base URL to download tarballs. ' 'If not provided, defaults to ' - 'http://TROVEHOST/bundles/', + 'http://TROVEHOST/tarballs/', metavar='URL', default=None) self.settings.string(['cache-server'], @@ -192,8 +192,8 @@ class Morph(cliapp.Application): if self.settings['cache-server'] is None: self.settings['cache-server'] = 'http://%s:8080/' % ( self.settings['trove-host']) - if self.settings['bundle-server'] is None: - self.settings['bundle-server'] = 'http://%s/bundles/' % ( + if self.settings['tarball-server'] is None: + self.settings['tarball-server'] = 'http://%s/tarballs/' % ( self.settings['trove-host']) if 'MORPH_DUMP_PROCESSED_CONFIG' in os.environ: self.settings.dump_config(sys.stdout) diff --git a/morphlib/localrepocache.py b/morphlib/localrepocache.py index de619770..ae5fa655 100644 --- a/morphlib/localrepocache.py +++ b/morphlib/localrepocache.py @@ -41,7 +41,7 @@ def quote_url(url): ''' Convert URIs to strings that only contain digits, letters, % and _. NOTE: When changing the code of this function, make sure to also apply - the same to the quote_url() function of lorry. Otherwise the git bundles + the same to the quote_url() function of lorry. Otherwise the git tarballs generated by lorry may no longer be found by morph. ''' @@ -86,18 +86,18 @@ class LocalRepoCache(object): created. Instead of cloning via a normal 'git clone' directly from the - git server, we first try to download a bundle from a url, and - if that works, we clone from the bundle. + git server, we first try to download a tarball from a url, and + if that works, we unpack the tarball. ''' - def __init__(self, app, cachedir, resolver, bundle_base_url=None): + def __init__(self, app, cachedir, resolver, tarball_base_url=None): self._app = app self._cachedir = cachedir self._resolver = resolver - if bundle_base_url and not bundle_base_url.endswith('/'): - bundle_base_url += '/' # pragma: no cover - self._bundle_base_url = bundle_base_url + if tarball_base_url and not tarball_base_url.endswith('/'): + tarball_base_url += '/' # pragma: no cover + self._tarball_base_url = tarball_base_url self._cached_repo_objects = {} def _exists(self, filename): # pragma: no cover @@ -120,24 +120,34 @@ class LocalRepoCache(object): self._app.runcmd(['git'] + args, cwd=cwd) + def _runcmd(self, args, cwd=None): # pragma: no cover + '''Execute a command. + + This is a method of its own so that unit tests can easily override + all use of the external git command. + + ''' + + self._app.runcmd(args, cwd=cwd) + def _fetch(self, url, filename): # pragma: no cover '''Fetch contents of url into a file. This method is meant to be overridden by unit tests. ''' - self._app.status(msg="Trying to fetch %(bundle)s to seed the cache", - bundle=url, + self._app.status(msg="Trying to fetch %(tarball)s to seed the cache", + tarball=url, chatty=True) source_handle = None try: source_handle = urllib2.urlopen(url) with open(filename, 'wb') as target_handle: shutil.copyfileobj(source_handle, target_handle) - self._app.status(msg="Bundle fetch successful", + self._app.status(msg="Tarball fetch successful", chatty=True) except Exception, e: - self._app.status(msg="Bundle fetch failed: %(reason)s", + self._app.status(msg="Tarball fetch failed: %(reason)s", reason=e, chatty=True) raise @@ -177,7 +187,7 @@ class LocalRepoCache(object): # FIXME: The following is a nicer way than to do this. # However, for compatibility, we need to use the same as the - # bundle server (set up by Lorry) uses. + # tarball server (set up by Lorry) uses. # return urllib.quote(url, safe='') return quote_url(url) @@ -193,26 +203,32 @@ class LocalRepoCache(object): path = self._cache_name(url) return self._exists(path) - def _clone_with_bundle(self, repourl, path): + def _clone_with_tarball(self, repourl, path): escaped = self._escape(repourl) - bundle_url = urlparse.urljoin(self._bundle_base_url, escaped) + '.bndl' - bundle_path = path + '.bundle' + tarball_url = urlparse.urljoin(self._tarball_base_url, + escaped) + '.tar' + tarball_path = path + '.tar' try: - self._fetch(bundle_url, bundle_path) + self._fetch(tarball_url, tarball_path) except urllib2.URLError, e: - return False, 'Unable to fetch bundle %s: %s' % (bundle_url, e) + return False, 'Unable to fetch tarball %s: %s' % (tarball_url, e) try: - self._git(['clone', '--mirror', '-n', bundle_path, path]) - self._git(['remote', 'set-url', 'origin', repourl], cwd=path) + self._mkdir(path) + self._runcmd(['tar', 'xf', tarball_path], cwd=path) + self._git(['config', 'remote.origin.url', repourl], cwd=path) + self._git(['config', 'remote.origin.mirror', 'true'], cwd=path) + self._git(['config', 'remote.origin.fetch', '+refs/*:refs/*'], + cwd=path) except cliapp.AppException, e: # pragma: no cover if self._exists(path): shutil.rmtree(path) - return False, 'Unable to extract bundle %s: %s' % (bundle_path, e) + return False, 'Unable to extract tarball %s: %s' % ( + tarball_path, e) finally: - if self._exists(bundle_path): - self._remove(bundle_path) + if self._exists(tarball_path): + self._remove(tarball_path) return True, None @@ -231,10 +247,10 @@ class LocalRepoCache(object): except NotCached, e: pass - if self._bundle_base_url: + if self._tarball_base_url: repourl = self._resolver.pull_url(reponame) path = self._cache_name(repourl) - ok, error = self._clone_with_bundle(repourl, path) + ok, error = self._clone_with_tarball(repourl, path) if ok: return self.get_repo(reponame) else: diff --git a/morphlib/localrepocache_tests.py b/morphlib/localrepocache_tests.py index c178f8b4..26a92616 100644 --- a/morphlib/localrepocache_tests.py +++ b/morphlib/localrepocache_tests.py @@ -16,6 +16,7 @@ import unittest import urllib2 +import os import cliapp @@ -27,11 +28,11 @@ class LocalRepoCacheTests(unittest.TestCase): def setUp(self): aliases = ['upstream=git://example.com/#example.com:%s.git'] repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases) - bundle_base_url = 'http://lorry.example.com/bundles/' + tarball_base_url = 'http://lorry.example.com/tarballs/' self.reponame = 'upstream:reponame' self.repourl = 'git://example.com/reponame' escaped_url = 'git___example_com_reponame' - self.bundle_url = '%s%s.bndl' % (bundle_base_url, escaped_url) + self.tarball_url = '%s%s.tar' % (tarball_base_url, escaped_url) self.cachedir = '/cache/dir' self.cache_path = '%s/%s' % (self.cachedir, escaped_url) self.cache = set() @@ -39,12 +40,20 @@ class LocalRepoCacheTests(unittest.TestCase): self.fetched = [] self.removed = [] self.lrc = morphlib.localrepocache.LocalRepoCache( - object(), self.cachedir, repo_resolver, bundle_base_url) + object(), self.cachedir, repo_resolver, tarball_base_url) self.lrc._git = self.fake_git self.lrc._exists = self.fake_exists self.lrc._fetch = self.not_found self.lrc._mkdir = self.fake_mkdir self.lrc._remove = self.fake_remove + self.lrc._runcmd = self.fake_runcmd + + def fake_runcmd(self, args, cwd=None): + if args[0:2] == ['tar', 'xf']: + self.unpacked_tar = args[2] + self.cache.add(cwd) + else: + raise NotImplementedError() def fake_git(self, args, cwd=None): if args[0] == 'clone': @@ -58,7 +67,15 @@ class LocalRepoCacheTests(unittest.TestCase): elif args[0:2] == ['remote', 'set-url']: remote = args[2] url = args[3] - self.remotes[remote]['url'] = url + self.remotes[remote] = {'url': url} + elif args[0:2] == ['config', 'remote.origin.url']: + remote = 'origin' + url = args[2] + self.remotes[remote] = {'url': url} + elif args[0:2] == ['config', 'remote.origin.mirror']: + remote = 'origin' + elif args[0:2] == ['config', 'remote.origin.fetch']: + remote = 'origin' else: raise NotImplementedError() @@ -113,15 +130,17 @@ class LocalRepoCacheTests(unittest.TestCase): self.assertRaises(morphlib.localrepocache.NoRemote, self.lrc.cache_repo, self.repourl) - def test_does_not_mind_a_missing_bundle(self): + def test_does_not_mind_a_missing_tarball(self): self.lrc.cache_repo(self.repourl) self.assertEqual(self.fetched, []) - def test_fetches_bundle_when_it_exists(self): + def test_fetches_tarball_when_it_exists(self): self.lrc._fetch = self.fake_fetch + self.unpacked_tar = "" + self.mkdir_path = "" self.lrc.cache_repo(self.repourl) - self.assertEqual(self.fetched, [self.bundle_url]) - self.assertEqual(self.removed, [self.cache_path + '.bundle']) + self.assertEqual(self.fetched, [self.tarball_url]) + self.assertEqual(self.removed, [self.cache_path + '.tar']) self.assertEqual(self.remotes['origin']['url'], self.repourl) def test_gets_cached_shortened_repo(self): diff --git a/morphlib/plugins/show_dependencies_plugin.py b/morphlib/plugins/show_dependencies_plugin.py index 8d805a35..dee84c59 100644 --- a/morphlib/plugins/show_dependencies_plugin.py +++ b/morphlib/plugins/show_dependencies_plugin.py @@ -36,11 +36,11 @@ class ShowDependenciesPlugin(cliapp.Plugin): if not os.path.exists(self.app.settings['cachedir']): os.mkdir(self.app.settings['cachedir']) cachedir = os.path.join(self.app.settings['cachedir'], 'gits') - bundle_base_url = self.app.settings['bundle-server'] + tarball_base_url = self.app.settings['tarball-server'] repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver( self.app.settings['repo-alias']) lrc = morphlib.localrepocache.LocalRepoCache( - self.app, cachedir, repo_resolver, bundle_base_url) + self.app, cachedir, repo_resolver, tarball_base_url) if self.app.settings['cache-server']: rrc = morphlib.remoterepocache.RemoteRepoCache( self.app.settings['cache-server'], repo_resolver) diff --git a/morphlib/plugins/update_gits_plugin.py b/morphlib/plugins/update_gits_plugin.py index 92a687a3..608b34f1 100644 --- a/morphlib/plugins/update_gits_plugin.py +++ b/morphlib/plugins/update_gits_plugin.py @@ -45,9 +45,9 @@ class UpdateGitsPlugin(cliapp.Plugin): cachedir = os.path.join(app.settings['cachedir'], 'gits') repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver( app.settings['repo-alias']) - bundle_base_url = app.settings['bundle-server'] + tarball_base_url = app.settings['tarball-server'] cache = morphlib.localrepocache.LocalRepoCache( - app, cachedir, repo_resolver, bundle_base_url) + app, cachedir, repo_resolver, tarball_base_url) subs_to_process = set() diff --git a/morphlib/util.py b/morphlib/util.py index ce9d0dc9..af7c59ba 100644 --- a/morphlib/util.py +++ b/morphlib/util.py @@ -149,10 +149,10 @@ def new_repo_caches(app): # pragma: no cover aliases = app.settings['repo-alias'] cachedir = create_cachedir(app.settings) gits_dir = os.path.join(cachedir, 'gits') - bundle_base_url = app.settings['bundle-server'] + tarball_base_url = app.settings['tarball-server'] repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases) lrc = morphlib.localrepocache.LocalRepoCache( - app, gits_dir, repo_resolver, bundle_base_url=bundle_base_url) + app, gits_dir, repo_resolver, tarball_base_url=tarball_base_url) url = app.settings['cache-server'] if url: -- cgit v1.2.1