From 8f88b10048c858179a2c19c6eb479e59406d8b73 Mon Sep 17 00:00:00 2001 From: Daniel Firth Date: Tue, 10 Dec 2013 15:06:57 +0000 Subject: Refactored localrepocache --- morphlib/localrepocache.py | 130 ++++++++++----------------------------- morphlib/localrepocache_tests.py | 60 ++++++++---------- 2 files changed, 59 insertions(+), 131 deletions(-) diff --git a/morphlib/localrepocache.py b/morphlib/localrepocache.py index aa45cd3d..9c20e4bc 100644 --- a/morphlib/localrepocache.py +++ b/morphlib/localrepocache.py @@ -19,10 +19,11 @@ import os import re import urllib2 import urlparse -import shutil import string +import tempfile import cliapp +import fs.osfs import morphlib @@ -93,6 +94,7 @@ class LocalRepoCache(object): def __init__(self, app, cachedir, resolver, tarball_base_url=None): self._app = app + self.fs = fs.osfs.OSFS('/') self._cachedir = cachedir self._resolver = resolver if tarball_base_url and not tarball_base_url.endswith('/'): @@ -100,16 +102,6 @@ class LocalRepoCache(object): self._tarball_base_url = tarball_base_url self._cached_repo_objects = {} - def _exists(self, filename): # pragma: no cover - '''Does a file exist? - - This is a wrapper around os.path.exists, so that unit tests may - override it. - - ''' - - return os.path.exists(filename) - def _git(self, args, cwd=None): # pragma: no cover '''Execute git command. @@ -120,67 +112,24 @@ class LocalRepoCache(object): self._app.runcmd(['git'] + args, cwd=cwd) - def _runcmd(self, args, cwd=None): # pragma: no cover - '''Execute a command. - - This is a method of its own so that unit tests can easily override - all use of the external git command. - - ''' - - self._app.runcmd(args, cwd=cwd) - - def _fetch(self, url, filename): # pragma: no cover + def _fetch(self, url, path): # pragma: no cover '''Fetch contents of url into a file. This method is meant to be overridden by unit tests. ''' self._app.status(msg="Trying to fetch %(tarball)s to seed the cache", - tarball=url, - chatty=True) - source_handle = None - try: - source_handle = urllib2.urlopen(url) - with open(filename, 'wb') as target_handle: - shutil.copyfileobj(source_handle, target_handle) - self._app.status(msg="Tarball fetch successful", - chatty=True) - except Exception, e: - self._app.status(msg="Tarball fetch failed: %(reason)s", - reason=e, - chatty=True) - raise - finally: - if source_handle is not None: - source_handle.close() - - def _mkdir(self, dirname): # pragma: no cover - '''Create a directory. - - This method is meant to be overridden by unit tests. - - ''' - - os.mkdir(dirname) + tarball=url, chatty=True) + self._app.runcmd(['wget', '-q', '-O-', url], + ['tar', 'xf', '-'], cwd=path) - def _remove(self, filename): # pragma: no cover - '''Remove given file. + def _mkdtemp(self, dirname): # pragma: no cover + '''Creates a temporary directory. This method is meant to be overridden by unit tests. ''' - - os.remove(filename) - - def _rmtree(self, dirname): # pragma: no cover - '''Remove given directory tree. - - This method is meant to be overridden by unit tests. - - ''' - - shutil.rmtree(dirname) + return tempfile.mkdtemp(dir=dirname) def _escape(self, url): '''Escape a URL so it can be used as a basename in a file.''' @@ -194,45 +143,31 @@ class LocalRepoCache(object): def _cache_name(self, url): scheme, netloc, path, query, fragment = urlparse.urlsplit(url) - if scheme == 'file': - return path - else: - basename = self._escape(url) - path = os.path.join(self._cachedir, basename) - return path + if scheme != 'file': + path = os.path.join(self._cachedir, self._escape(url)) + return path def has_repo(self, reponame): '''Have we already got a cache of a given repo?''' url = self._resolver.pull_url(reponame) path = self._cache_name(url) - return self._exists(path) + return self.fs.exists(path) def _clone_with_tarball(self, repourl, path): - escaped = self._escape(repourl) tarball_url = urlparse.urljoin(self._tarball_base_url, - escaped) + '.tar' - tarball_path = path + '.tar' - + self._escape(repourl)) + '.tar' try: - self._fetch(tarball_url, tarball_path) - except urllib2.URLError, e: - return False, 'Unable to fetch tarball %s: %s' % (tarball_url, e) - - try: - self._mkdir(path) - self._runcmd(['tar', 'xf', tarball_path], cwd=path) + self.fs.makedir(path) + self._fetch(tarball_url, path) self._git(['config', 'remote.origin.url', repourl], cwd=path) self._git(['config', 'remote.origin.mirror', 'true'], cwd=path) self._git(['config', 'remote.origin.fetch', '+refs/*:refs/*'], cwd=path) - except cliapp.AppException, e: # pragma: no cover - if self._exists(path): - shutil.rmtree(path) + except BaseException, e: # pragma: no cover + if self.fs.exists(path): + self.fs.removedir(path, force=True) return False, 'Unable to extract tarball %s: %s' % ( - tarball_path, e) - finally: - if self._exists(tarball_path): - self._remove(tarball_path) + tarball_url, e) return True, None @@ -243,32 +178,35 @@ class LocalRepoCache(object): ''' errors = [] - if not self._exists(self._cachedir): - self._mkdir(self._cachedir) + if not self.fs.exists(self._cachedir): + self.fs.makedir(self._cachedir, recursive=True) try: return self.get_repo(reponame) except NotCached, e: pass + repourl = self._resolver.pull_url(reponame) + path = self._cache_name(repourl) if self._tarball_base_url: - repourl = self._resolver.pull_url(reponame) - path = self._cache_name(repourl) ok, error = self._clone_with_tarball(repourl, path) if ok: return self.get_repo(reponame) else: errors.append(error) - - repourl = self._resolver.pull_url(reponame) - path = self._cache_name(repourl) + self._app.status( + msg='Failed to fetch tarball, falling back to git clone.') + target = self._mkdtemp(self._cachedir) try: - self._git(['clone', '--mirror', '-n', repourl, path]) + self._git(['clone', '--mirror', '-n', repourl, target]) except cliapp.AppException, e: errors.append('Unable to clone from %s to %s: %s' % - (repourl, path, e)) + (repourl, target, e)) + if self.fs.exists(target): + self.fs.removedir(target, recursive=True, force=True) raise NoRemote(reponame, errors) + self.fs.rename(target, path) return self.get_repo(reponame) def get_repo(self, reponame): @@ -279,7 +217,7 @@ class LocalRepoCache(object): else: repourl = self._resolver.pull_url(reponame) path = self._cache_name(repourl) - if self._exists(path): + if self.fs.exists(path): repo = morphlib.cachedrepo.CachedRepo(self._app, reponame, repourl, path) self._cached_repo_objects[reponame] = repo diff --git a/morphlib/localrepocache_tests.py b/morphlib/localrepocache_tests.py index 6c5410ce..22b5ea54 100644 --- a/morphlib/localrepocache_tests.py +++ b/morphlib/localrepocache_tests.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012 Codethink Limited +# Copyright (C) 2012-2013 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,10 +19,17 @@ import urllib2 import os import cliapp +import fs.memoryfs import morphlib +class FakeApplication(object): + + def status(self, msg): + pass + + class LocalRepoCacheTests(unittest.TestCase): def setUp(self): @@ -35,35 +42,24 @@ class LocalRepoCacheTests(unittest.TestCase): self.tarball_url = '%s%s.tar' % (tarball_base_url, escaped_url) self.cachedir = '/cache/dir' self.cache_path = '%s/%s' % (self.cachedir, escaped_url) - self.cache = set() self.remotes = {} self.fetched = [] self.removed = [] self.lrc = morphlib.localrepocache.LocalRepoCache( - object(), self.cachedir, repo_resolver, tarball_base_url) + FakeApplication(), self.cachedir, repo_resolver, tarball_base_url) + self.lrc.fs = fs.memoryfs.MemoryFS() self.lrc._git = self.fake_git - self.lrc._exists = self.fake_exists self.lrc._fetch = self.not_found - self.lrc._mkdir = self.fake_mkdir - self.lrc._remove = self.fake_remove - self.lrc._runcmd = self.fake_runcmd - - def fake_runcmd(self, args, cwd=None): - if args[0:2] == ['tar', 'xf']: - self.unpacked_tar = args[2] - self.cache.add(cwd) - else: - raise NotImplementedError() + self.lrc._mkdtemp = self.fake_mkdtemp + self._mkdtemp_count = 0 def fake_git(self, args, cwd=None): if args[0] == 'clone': self.assertEqual(len(args), 5) remote = args[3] local = args[4] - if local in self.cache: - raise Exception('cloning twice to %s' % local) self.remotes['origin'] = {'url': remote, 'updates': 0} - self.cache.add(local) + self.lrc.fs.makedir(local, recursive=True) elif args[0:2] == ['remote', 'set-url']: remote = args[2] url = args[3] @@ -79,22 +75,14 @@ class LocalRepoCacheTests(unittest.TestCase): else: raise NotImplementedError() - def fake_exists(self, filename): - return filename in self.cache - - def fake_mkdir(self, dirname): - self.cache.add(dirname) - - def fake_remove(self, filename): - self.removed.append(filename) + def fake_mkdtemp(self, dirname): + thing = "foo"+str(self._mkdtemp_count) + self._mkdtemp_count += 1 + self.lrc.fs.makedir(dirname+"/"+thing) + return thing def not_found(self, url, path): - raise urllib2.URLError('Not found') - - def fake_fetch(self, url, path): - self.fetched.append(url) - self.cache.add(path) - return True + raise cliapp.AppException('Not found') def test_has_not_got_shortened_repo_initially(self): self.assertFalse(self.lrc.has_repo(self.reponame)) @@ -113,11 +101,11 @@ class LocalRepoCacheTests(unittest.TestCase): self.assertTrue(self.lrc.has_repo(self.repourl)) def test_cachedir_does_not_exist_initially(self): - self.assertFalse(self.cachedir in self.cache) + self.assertFalse(self.lrc.fs.exists(self.cachedir)) def test_creates_cachedir_if_missing(self): self.lrc.cache_repo(self.repourl) - self.assertTrue(self.cachedir in self.cache) + self.assertTrue(self.lrc.fs.exists(self.cachedir)) def test_happily_caches_same_repo_twice(self): self.lrc.cache_repo(self.repourl) @@ -125,6 +113,7 @@ class LocalRepoCacheTests(unittest.TestCase): def test_fails_to_cache_when_remote_does_not_exist(self): def fail(args): + self.lrc.fs.makedir(args[4]) raise cliapp.AppException('') self.lrc._git = fail self.assertRaises(morphlib.localrepocache.NoRemote, @@ -135,12 +124,12 @@ class LocalRepoCacheTests(unittest.TestCase): self.assertEqual(self.fetched, []) def test_fetches_tarball_when_it_exists(self): - self.lrc._fetch = self.fake_fetch + self.lrc._fetch = lambda url, path: self.fetched.append(url) self.unpacked_tar = "" self.mkdir_path = "" self.lrc.cache_repo(self.repourl) self.assertEqual(self.fetched, [self.tarball_url]) - self.assertEqual(self.removed, [self.cache_path + '.tar']) + self.assertFalse(self.lrc.fs.exists(self.cache_path + '.tar')) self.assertEqual(self.remotes['origin']['url'], self.repourl) def test_gets_cached_shortened_repo(self): @@ -165,6 +154,7 @@ class LocalRepoCacheTests(unittest.TestCase): self.assertTrue(self.repourl in str(e)) def test_avoids_caching_local_repo(self): + self.lrc.fs.makedir('/local/repo', recursive=True) self.lrc.cache_repo('file:///local/repo') cached = self.lrc.get_repo('file:///local/repo') assert cached.path == '/local/repo' -- cgit v1.2.1