diff options
Diffstat (limited to 'morphlib')
-rw-r--r-- | morphlib/__init__.py | 3 | ||||
-rw-r--r-- | morphlib/buildcommand.py | 18 | ||||
-rw-r--r-- | morphlib/builder.py | 4 | ||||
-rw-r--r-- | morphlib/builder_tests.py | 2 | ||||
-rw-r--r-- | morphlib/definitions_repo.py | 39 | ||||
-rw-r--r-- | morphlib/git.py | 9 | ||||
-rw-r--r-- | morphlib/localrepocache.py | 357 | ||||
-rw-r--r-- | morphlib/localrepocache_tests.py | 149 | ||||
-rw-r--r-- | morphlib/plugins/anchor_plugin.py | 5 | ||||
-rw-r--r-- | morphlib/plugins/artifact_inspection_plugin.py | 37 | ||||
-rw-r--r-- | morphlib/plugins/certify_plugin.py | 8 | ||||
-rw-r--r-- | morphlib/plugins/cross-bootstrap_plugin.py | 2 | ||||
-rw-r--r-- | morphlib/plugins/diff_plugin.py | 10 | ||||
-rw-r--r-- | morphlib/plugins/get_repo_plugin.py | 6 | ||||
-rw-r--r-- | morphlib/plugins/list_artifacts_plugin.py | 8 | ||||
-rw-r--r-- | morphlib/plugins/show_dependencies_plugin.py | 20 | ||||
-rw-r--r-- | morphlib/plugins/system_manifests_plugin.py | 34 | ||||
-rw-r--r-- | morphlib/remoterepocache.py | 105 | ||||
-rw-r--r-- | morphlib/remoterepocache_tests.py | 137 | ||||
-rw-r--r-- | morphlib/repocache.py | 565 | ||||
-rw-r--r-- | morphlib/repocache_tests.py | 281 | ||||
-rw-r--r-- | morphlib/sourceresolver.py | 80 | ||||
-rw-r--r-- | morphlib/util.py | 54 |
23 files changed, 976 insertions, 957 deletions
diff --git a/morphlib/__init__.py b/morphlib/__init__.py index 7724c41c..066ab9f6 100644 --- a/morphlib/__init__.py +++ b/morphlib/__init__.py @@ -69,15 +69,14 @@ import git import gitdir import gitindex import localartifactcache -import localrepocache import mountableimage import morphologyfinder import morphology import morphloader import morphset import remoteartifactcache -import remoterepocache import repoaliasresolver +import repocache import savefile import source import sourcepool diff --git a/morphlib/buildcommand.py b/morphlib/buildcommand.py index a2ad3301..e185a808 100644 --- a/morphlib/buildcommand.py +++ b/morphlib/buildcommand.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2011-2015 Codethink Limited +# Copyright (C) 2011-2016 Codethink Limited # Copyright © 2015 Richard Ipsum # # This program is free software; you can redistribute it and/or modify @@ -47,7 +47,7 @@ class BuildCommand(object): def __init__(self, app, build_env = None): self.app = app self.lac, self.rac = self.new_artifact_caches() - self.lrc, self.rrc = self.new_repo_caches() + self.repo_cache = morphlib.util.new_repo_cache(self.app) def build(self, repo_name, ref, filename, original_ref=None): '''Build a given system morphology.''' @@ -76,9 +76,6 @@ class BuildCommand(object): ''' return morphlib.util.new_artifact_caches(self.app.settings) - def new_repo_caches(self): - return morphlib.util.new_repo_caches(self.app) - def new_build_env(self, arch): '''Create a new BuildEnvironment instance.''' return morphlib.buildenvironment.BuildEnvironment(self.app.settings, @@ -95,10 +92,8 @@ class BuildCommand(object): ''' self.app.status(msg='Creating source pool', chatty=True) srcpool = morphlib.sourceresolver.create_source_pool( - self.lrc, self.rrc, repo_name, ref, filenames, - cachedir=self.app.settings['cachedir'], + self.repo_cache, repo_name, ref, filenames, original_ref=original_ref, - update_repos=not self.app.settings['no-git-update'], status_cb=self.app.status) return srcpool @@ -394,8 +389,9 @@ class BuildCommand(object): '''Update the local git repository cache with the sources.''' repo_name = source.repo_name - source.repo = self.lrc.get_updated_repo(repo_name, ref=source.sha1) - self.lrc.ensure_submodules(source.repo, source.sha1) + source.repo = self.repo_cache.get_updated_repo(repo_name, + ref=source.sha1) + self.repo_cache.ensure_submodules(source.repo, source.sha1) def cache_artifacts_locally(self, artifacts): '''Get artifacts missing from local cache from remote cache.''' @@ -540,7 +536,7 @@ class BuildCommand(object): '%(sha1)s', name=source.name, sha1=source.sha1[:7]) builder = morphlib.builder.Builder( - self.app, staging_area, self.lac, self.rac, self.lrc, + self.app, staging_area, self.lac, self.rac, self.repo_cache, self.app.settings['max-jobs'], setup_mounts) return builder.build_and_cache(source) diff --git a/morphlib/builder.py b/morphlib/builder.py index 2d0a4bd4..c980a276 100644 --- a/morphlib/builder.py +++ b/morphlib/builder.py @@ -45,7 +45,7 @@ def extract_sources(app, repo_cache, repo, sha1, srcdir): #pragma: no cover morphlib.gitdir.checkout_from_cached_repo(repo, sha1, destdir) morphlib.git.reset_workdir(app.runcmd, destdir) - submodules = morphlib.git.Submodules(app, repo.dirname, sha1) + submodules = morphlib.git.Submodules(repo.dirname, sha1, app.runcmd) try: submodules.load() except morphlib.git.NoModulesFileError: @@ -187,7 +187,7 @@ class BuilderBase(object): ''' assert isinstance(self.source.repo, - morphlib.localrepocache.CachedRepo) + morphlib.repocache.CachedRepo) meta = { 'artifact-name': artifact_name, 'source-name': self.source.name, diff --git a/morphlib/builder_tests.py b/morphlib/builder_tests.py index da1f432e..54bc4a8f 100644 --- a/morphlib/builder_tests.py +++ b/morphlib/builder_tests.py @@ -51,7 +51,7 @@ class FakeSource(object): self.name = 'a' with morphlib.gitdir_tests.allow_nonexistant_git_repos(): - self.repo = morphlib.localrepocache.CachedRepo( + self.repo = morphlib.repocache.CachedRepo( 'path', 'repo', 'url') self.repo_name = 'url' self.original_ref = 'e' diff --git a/morphlib/definitions_repo.py b/morphlib/definitions_repo.py index 4c13abee..8b022867 100644 --- a/morphlib/definitions_repo.py +++ b/morphlib/definitions_repo.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015 Codethink Limited +# Copyright (C) 2015-2016 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -127,9 +127,9 @@ class DefinitionsRepo(gitdir.GitDirectory): return bbcm() @contextlib.contextmanager - def source_pool(self, lrc, rrc, cachedir, ref, system_filename, + def source_pool(self, repo_cache, ref, system_filename, include_local_changes=False, push_local_changes=False, - update_repos=True, status_cb=None, build_ref_prefix=None, + status_cb=None, build_ref_prefix=None, git_user_name=None, git_user_email=None): '''Load the system defined in 'morph' and all the sources it contains. @@ -162,15 +162,10 @@ class DefinitionsRepo(gitdir.GitDirectory): setting, but that was probably only useful for `morph distbuild` and that now uses branch_with_local_changes(). - The 'lrc' and 'rrc' parameters are local and remote Git repo caches. - Use morphlib.util.new_repo_caches() to obtain these. The 'cachedir' - parameter points to where Git repos are cached by Morph, - app.settings['cachedir'] tells you that. - - The 'update_repos' flag allows you to disable updating Git repos, to - honour app.settings['no-git-update']. If one of the refs in the build - graph is not available locally and update_repos is False, you will see - a morphlib.gitdir.InvalidRefError exception. + The 'repo_cache' parameter is a morphlib.repocache.RepoCache instance. + If update_gits=False is set for this repo cache, and one of the refs in + the build graph is not available locally, you will see a + morphlib.gitdir.InvalidRefError exception. The 'status_cb' function will be called if set to output progress and status messages to the user. @@ -200,9 +195,8 @@ class DefinitionsRepo(gitdir.GitDirectory): status_cb(msg='Deciding on task order') yield morphlib.sourceresolver.create_source_pool( - lrc, rrc, repo_url, commit, [system_filename], - cachedir=cachedir, original_ref=original_ref, - update_repos=update_repos, status_cb=status_cb) + repo_cache, repo_url, commit, [system_filename], + original_ref=original_ref, status_cb=status_cb) else: repo_url = self.remote_url commit = self.resolve_ref_to_commit(ref) @@ -212,9 +206,8 @@ class DefinitionsRepo(gitdir.GitDirectory): try: yield morphlib.sourceresolver.create_source_pool( - lrc, rrc, repo_url, commit, [system_filename], - cachedir=cachedir, original_ref=ref, - update_repos=update_repos, status_cb=status_cb) + repo_cache, repo_url, commit, [system_filename], + original_ref=ref, status_cb=status_cb) except morphlib.sourceresolver.InvalidDefinitionsRefError as e: raise cliapp.AppException( 'Commit %s wasn\'t found in the "origin" remote %s. ' @@ -332,7 +325,7 @@ class DefinitionsRepoWithApp(DefinitionsRepo): self._git_user_name = morphlib.git.get_user_name(app.runcmd) self._git_user_email = morphlib.git.get_user_email(app.runcmd) - self._lrc, self._rrc = morphlib.util.new_repo_caches(app) + self.repo_cache = morphlib.util.new_repo_cache(self.app) def branch_with_local_changes(self, uuid, push=False): '''Equivalent to DefinitionsRepo.branch_with_local_changes().''' @@ -343,22 +336,20 @@ class DefinitionsRepoWithApp(DefinitionsRepo): build_ref_prefix=self.app.settings['build-ref-prefix'], git_user_name=self._git_user_name, git_user_email=self._git_user_email, - status_cb=self.app.status,) + status_cb=self.app.status) def source_pool(self, ref, system_filename): '''Equivalent to DefinitionsRepo.source_pool().''' local_changes = self.app.settings['local-changes'] return DefinitionsRepo.source_pool( - self, self._lrc, self._rrc, self.app.settings['cachedir'], - ref, system_filename, + self, self.repo_cache, ref, system_filename, include_local_changes=(local_changes == 'include'), push_local_changes=self.app.settings['push-build-branches'], build_ref_prefix=self.app.settings['build-ref-prefix'], git_user_name=self._git_user_name, git_user_email=self._git_user_email, - status_cb=self.app.status, - update_repos=(not self.app.settings['no-git-update'])) + status_cb=self.app.status) def _local_definitions_repo(path, search_for_root, app=None): '''Open a local Git repo containing Baserock definitions, at 'path'. diff --git a/morphlib/git.py b/morphlib/git.py index 190544ac..cab551ef 100644 --- a/morphlib/git.py +++ b/morphlib/git.py @@ -58,12 +58,13 @@ class MissingSubmoduleCommitError(cliapp.AppException): class Submodules(object): - def __init__(self, app, repo, ref): - self.app = app + def __init__(self, repo, ref, runcmd_cb=cliapp.runcmd): self.repo = repo self.ref = ref self.submodules = [] + self.runcmd_cb = runcmd_cb + def load(self): content = self._read_gitmodules_file() @@ -76,7 +77,7 @@ class Submodules(object): def _read_gitmodules_file(self): try: # try to read the .gitmodules file from the repo/ref - content = gitcmd(self.app.runcmd, 'cat-file', 'blob', + content = gitcmd(self.runcmd_cb, 'cat-file', 'blob', '%s:.gitmodules' % self.ref, cwd=self.repo, ignore_fail=True) @@ -100,7 +101,7 @@ class Submodules(object): try: # list objects in the parent repo tree to find the commit # object that corresponds to the submodule - commit = gitcmd(self.app.runcmd, 'ls-tree', self.ref, + commit = gitcmd(self.runcmd_cb, 'ls-tree', self.ref, submodule.path, cwd=self.repo) # read the commit hash from the output diff --git a/morphlib/localrepocache.py b/morphlib/localrepocache.py deleted file mode 100644 index 3a03fe1d..00000000 --- a/morphlib/localrepocache.py +++ /dev/null @@ -1,357 +0,0 @@ -# Copyright (C) 2012-2016 Codethink Limited -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see <http://www.gnu.org/licenses/>. - - -import os -import urlparse -import string -import sys -import tempfile - -import cliapp -import fs.osfs - -import morphlib -from morphlib.util import word_join_list as _word_join_list - - -# urlparse.urljoin needs to know details of the URL scheme being used. -# It does not know about git:// by default, so we teach it here. -gitscheme = ['git'] -urlparse.uses_relative.extend(gitscheme) -urlparse.uses_netloc.extend(gitscheme) -urlparse.uses_params.extend(gitscheme) -urlparse.uses_query.extend(gitscheme) -urlparse.uses_fragment.extend(gitscheme) - - -def quote_url(url): - ''' Convert URIs to strings that only contain digits, letters, % and _. - - NOTE: When changing the code of this function, make sure to also apply - the same to the quote_url() function of lorry. Otherwise the git tarballs - generated by lorry may no longer be found by morph. - - ''' - valid_chars = string.digits + string.letters + '%_' - transl = lambda x: x if x in valid_chars else '_' - return ''.join([transl(x) for x in url]) - - -class NoRemote(morphlib.Error): - - def __init__(self, reponame, errors): - self.reponame = reponame - self.errors = errors - - def __str__(self): - return '\n\t'.join(['Cannot find remote git repository: %s' % - self.reponame] + self.errors) - - -class NotCached(morphlib.Error): - def __init__(self, reponame): - self.reponame = reponame - - def __str__(self): # pragma: no cover - return 'Repository %s is not cached yet' % self.reponame - - -class UpdateError(cliapp.AppException): # pragma: no cover - - def __init__(self, repo): - cliapp.AppException.__init__( - self, 'Failed to update cached version of repo %s' % repo) - - -class CachedRepo(morphlib.gitdir.GitDirectory): - '''A locally cached Git repository with an origin remote set up. - - On instance of this class represents a locally cached version of a - remote Git repository. This remote repository is set up as the - 'origin' remote. - - Cached repositories are bare mirrors of the upstream. Locally created - branches will be lost the next time the repository updates. - - ''' - def __init__(self, path, original_name, url): - self.original_name = original_name - self.url = url - self.is_mirror = not url.startswith('file://') - self.already_updated = False - - super(CachedRepo, self).__init__(path) - - def __str__(self): # pragma: no cover - return self.url - - -class LocalRepoCache(object): - - '''Manage locally cached git repositories. - - When we build stuff, we need a local copy of the git repository. - To avoid having to clone the repositories for every build, we - maintain a local cache of the repositories: we first clone the - remote repository to the cache, and then make a local clone from - the cache to the build environment. This class manages the local - cached repositories. - - Repositories may be specified either using a full URL, in a form - understood by git(1), or as a repository name to which a base url - is prepended. The base urls are given to the class when it is - created. - - Instead of cloning via a normal 'git clone' directly from the - git server, we first try to download a tarball from a url, and - if that works, we unpack the tarball. - - ''' - - def __init__(self, app, cachedir, resolver, tarball_base_url=None): - self._app = app - self.fs = fs.osfs.OSFS('/') - self._cachedir = cachedir - self._resolver = resolver - if tarball_base_url and not tarball_base_url.endswith('/'): - tarball_base_url += '/' # pragma: no cover - self._tarball_base_url = tarball_base_url - self._cached_repo_objects = {} - - def _git(self, args, **kwargs): # pragma: no cover - '''Execute git command. - - This is a method of its own so that unit tests can easily override - all use of the external git command. - - ''' - - morphlib.git.gitcmd(self._app.runcmd, *args, **kwargs) - - def _fetch(self, url, path): # pragma: no cover - '''Fetch contents of url into a file. - - This method is meant to be overridden by unit tests. - - ''' - self._app.status(msg="Trying to fetch %(tarball)s to seed the cache", - tarball=url, chatty=True) - - if self._app.settings['verbose']: - verbosity_flags = [] - kwargs = dict(stderr=sys.stderr) - else: - verbosity_flags = ['--quiet'] - kwargs = dict() - - def wget_command(): - return ['wget'] + verbosity_flags + ['-O-', url] - - self._app.runcmd(wget_command(), - ['tar', '--no-same-owner', '-xf', '-'], - cwd=path, **kwargs) - - def _mkdtemp(self, dirname): # pragma: no cover - '''Creates a temporary directory. - - This method is meant to be overridden by unit tests. - - ''' - return tempfile.mkdtemp(dir=dirname) - - def _escape(self, url): - '''Escape a URL so it can be used as a basename in a file.''' - - # FIXME: The following is a nicer way than to do this. - # However, for compatibility, we need to use the same as the - # tarball server (set up by Lorry) uses. - # return urllib.quote(url, safe='') - - return quote_url(url) - - def _cache_name(self, url): - scheme, netloc, path, query, fragment = urlparse.urlsplit(url) - if scheme != 'file': - path = os.path.join(self._cachedir, self._escape(url)) - return path - - def has_repo(self, reponame): - '''Have we already got a cache of a given repo?''' - url = self._resolver.pull_url(reponame) - path = self._cache_name(url) - return self.fs.exists(path) - - def _clone_with_tarball(self, repourl, path): - tarball_url = urlparse.urljoin(self._tarball_base_url, - self._escape(repourl)) + '.tar' - try: - self.fs.makedir(path) - self._fetch(tarball_url, path) - self._git(['config', 'remote.origin.url', repourl], cwd=path) - self._git(['config', 'remote.origin.mirror', 'true'], cwd=path) - self._git(['config', 'remote.origin.fetch', '+refs/*:refs/*'], - cwd=path) - except BaseException as e: # pragma: no cover - if self.fs.exists(path): - self.fs.removedir(path, force=True) - return False, 'Unable to extract tarball %s: %s' % ( - tarball_url, e) - - return True, None - - def _cache_repo(self, reponame): - '''Clone the given repo into the cache. - - If the repo is already cloned, do nothing. - - ''' - errors = [] - if not self.fs.exists(self._cachedir): - self.fs.makedir(self._cachedir, recursive=True) - - try: - return self._get_repo(reponame) - except NotCached as e: - pass - - repourl = self._resolver.pull_url(reponame) - path = self._cache_name(repourl) - if self._tarball_base_url: - ok, error = self._clone_with_tarball(repourl, path) - if ok: - repo = self._get_repo(reponame) - self._update_repo(repo) - return repo - else: - errors.append(error) - self._app.status( - msg='Using git clone.') - - target = self._mkdtemp(self._cachedir) - - try: - self._git(['clone', '--mirror', '-n', repourl, target], - echo_stderr=self._app.settings['debug']) - except cliapp.AppException as e: - errors.append('Unable to clone from %s to %s: %s' % - (repourl, target, e)) - if self.fs.exists(target): - self.fs.removedir(target, recursive=True, force=True) - raise NoRemote(reponame, errors) - - self.fs.rename(target, path) - - repo = self._get_repo(reponame) - repo.already_updated = True - return repo - - def _get_repo(self, reponame): - '''Return an object representing a cached repository.''' - - if reponame in self._cached_repo_objects: - return self._cached_repo_objects[reponame] - else: - repourl = self._resolver.pull_url(reponame) - path = self._cache_name(repourl) - if self.fs.exists(path): - repo = CachedRepo(path, reponame, repourl) - self._cached_repo_objects[reponame] = repo - return repo - raise NotCached(reponame) - - def _update_repo(self, cachedrepo): # pragma: no cover - try: - cachedrepo.update_remotes( - echo_stderr=self._app.settings['verbose']) - cachedrepo.already_updated = True - except cliapp.AppException: - raise UpdateError(self) - - def get_updated_repo(self, repo_name, - ref=None, refs=None): # pragma: no cover - '''Return object representing cached repository. - - If all the specified refs in 'ref' or 'refs' point to SHA1s that are - already in the repository, or --no-git-update is set, then the - repository won't be updated. - - ''' - - if self._app.settings['no-git-update']: - self._app.status(msg='Not updating existing git repository ' - '%(repo_name)s ' - 'because of no-git-update being set', - chatty=True, - repo_name=repo_name) - return self._get_repo(repo_name) - - if ref is not None and refs is None: - refs = (ref,) - - if self.has_repo(repo_name): - repo = self._get_repo(repo_name) - if refs: - required_refs = set(refs) - missing_refs = set() - for required_ref in required_refs: - if morphlib.git.is_valid_sha1(required_ref): - try: - repo.resolve_ref_to_commit(required_ref) - continue - except morphlib.gitdir.InvalidRefError: - pass - missing_refs.add(required_ref) - - if not missing_refs: - self._app.status( - msg='Not updating git repository %(repo_name)s ' - 'because it already contains %(sha1s)s', - chatty=True, repo_name=repo_name, - sha1s=_word_join_list(tuple(required_refs))) - return repo - - self._app.status(msg='Updating %(repo_name)s', - repo_name=repo_name) - self._update_repo(repo) - return repo - else: - self._app.status(msg='Cloning %(repo_name)s', - repo_name=repo_name) - return self._cache_repo(repo_name) - - def ensure_submodules(self, toplevel_repo, - toplevel_ref): # pragma: no cover - '''Ensure any submodules of a given repo are cached and up to date.''' - - def submodules_for_repo(repo_path, ref): - try: - submodules = morphlib.git.Submodules(self._app, repo_path, ref) - submodules.load() - return [(submod.url, submod.commit) for submod in submodules] - except morphlib.git.NoModulesFileError: - return [] - - done = set() - subs_to_process = submodules_for_repo(toplevel_repo.dirname, - toplevel_ref) - while subs_to_process: - url, ref = subs_to_process.pop() - done.add((url, ref)) - - cached_repo = self.get_updated_repo(url, ref=ref) - - for submod in submodules_for_repo(cached_repo.dirname, ref): - if submod not in done: - subs_to_process.append(submod) diff --git a/morphlib/localrepocache_tests.py b/morphlib/localrepocache_tests.py deleted file mode 100644 index 91fdb216..00000000 --- a/morphlib/localrepocache_tests.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright (C) 2012-2016 Codethink Limited -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see <http://www.gnu.org/licenses/>. - - -import unittest -import urllib2 -import os - -import cliapp -import fs.memoryfs - -import morphlib -import morphlib.gitdir_tests - - -class FakeApplication(object): - - def __init__(self): - self.settings = { - 'debug': True, - 'verbose': True, - 'no-git-update': False, - } - - def status(self, **kwargs): - pass - - -class LocalRepoCacheTests(unittest.TestCase): - - def setUp(self): - aliases = ['upstream=git://example.com/#example.com:%s.git'] - repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases) - tarball_base_url = 'http://lorry.example.com/tarballs/' - self.reponame = 'upstream:reponame' - self.repourl = 'git://example.com/reponame' - escaped_url = 'git___example_com_reponame' - self.tarball_url = '%s%s.tar' % (tarball_base_url, escaped_url) - self.cachedir = '/cache/dir' - self.cache_path = '%s/%s' % (self.cachedir, escaped_url) - self.remotes = {} - self.fetched = [] - self.lrc = morphlib.localrepocache.LocalRepoCache( - FakeApplication(), self.cachedir, repo_resolver, tarball_base_url) - self.lrc.fs = fs.memoryfs.MemoryFS() - self.lrc._git = self.fake_git - self.lrc._fetch = self.not_found - self.lrc._mkdtemp = self.fake_mkdtemp - self.lrc._update_repo = lambda *args: None - self._mkdtemp_count = 0 - - def fake_git(self, args, **kwargs): - if args[0] == 'clone': - self.assertEqual(len(args), 5) - remote = args[3] - local = args[4] - self.remotes['origin'] = {'url': remote, 'updates': 0} - self.lrc.fs.makedir(local, recursive=True) - elif args[0:2] == ['remote', 'set-url']: - remote = args[2] - url = args[3] - self.remotes[remote] = {'url': url} - elif args[0:2] == ['config', 'remote.origin.url']: - remote = 'origin' - url = args[2] - self.remotes[remote] = {'url': url} - elif args[0:2] == ['config', 'remote.origin.mirror']: - remote = 'origin' - elif args[0:2] == ['config', 'remote.origin.fetch']: - remote = 'origin' - else: - raise NotImplementedError() - - def fake_mkdtemp(self, dirname): - thing = "foo"+str(self._mkdtemp_count) - self._mkdtemp_count += 1 - self.lrc.fs.makedir(dirname+"/"+thing) - return thing - - def not_found(self, url, path): - raise cliapp.AppException('Not found') - - def test_has_not_got_shortened_repo_initially(self): - self.assertFalse(self.lrc.has_repo(self.reponame)) - - def test_has_not_got_absolute_repo_initially(self): - self.assertFalse(self.lrc.has_repo(self.repourl)) - - def test_cachedir_does_not_exist_initially(self): - self.assertFalse(self.lrc.fs.exists(self.cachedir)) - - def test_creates_cachedir_if_missing(self): - with morphlib.gitdir_tests.allow_nonexistant_git_repos(): - self.lrc.get_updated_repo(self.repourl, ref='master') - self.assertTrue(self.lrc.fs.exists(self.cachedir)) - - def test_happily_caches_same_repo_twice(self): - with morphlib.gitdir_tests.allow_nonexistant_git_repos(): - self.lrc.get_updated_repo(self.repourl, ref='master') - self.lrc.get_updated_repo(self.repourl, ref='master') - - def test_fails_to_cache_when_remote_does_not_exist(self): - def fail(args, **kwargs): - self.lrc.fs.makedir(args[4]) - raise cliapp.AppException('') - self.lrc._git = fail - self.assertRaises(morphlib.localrepocache.NoRemote, - self.lrc.get_updated_repo, self.repourl, 'master') - - def test_does_not_mind_a_missing_tarball(self): - with morphlib.gitdir_tests.allow_nonexistant_git_repos(): - self.lrc.get_updated_repo(self.repourl, ref='master') - self.assertEqual(self.fetched, []) - - def test_fetches_tarball_when_it_exists(self): - self.lrc._fetch = lambda url, path: self.fetched.append(url) - - with morphlib.gitdir_tests.allow_nonexistant_git_repos(): - self.lrc.get_updated_repo(self.repourl, ref='master') - - self.assertEqual(self.fetched, [self.tarball_url]) - self.assertFalse(self.lrc.fs.exists(self.cache_path + '.tar')) - self.assertEqual(self.remotes['origin']['url'], self.repourl) - - def test_escapes_repourl_as_filename(self): - escaped = self.lrc._escape(self.repourl) - self.assertFalse('/' in escaped) - - def test_noremote_error_message_contains_repo_name(self): - e = morphlib.localrepocache.NoRemote(self.repourl, []) - self.assertTrue(self.repourl in str(e)) - - def test_avoids_caching_local_repo(self): - self.lrc.fs.makedir('/local/repo', recursive=True) - with morphlib.gitdir_tests.allow_nonexistant_git_repos(): - cached = self.lrc.get_updated_repo('file:///local/repo', - refs='master') - assert cached.dirname == '/local/repo' diff --git a/morphlib/plugins/anchor_plugin.py b/morphlib/plugins/anchor_plugin.py index 7465c479..a9d07b39 100644 --- a/morphlib/plugins/anchor_plugin.py +++ b/morphlib/plugins/anchor_plugin.py @@ -137,9 +137,8 @@ class AnchorPlugin(cliapp.Plugin): for reponame, sources in sources_by_reponame.iteritems(): # UGLY HACK we need to push *FROM* our local repo cache to # avoid cloning everything multiple times. - repo = bc.lrc.get_updated_repo(reponame, - refs=(s.original_ref - for s in sources)) + repo = bc.repo_cache.get_updated_repo( + reponame, refs=(s.original_ref for s in sources)) remote = Remote(repo) push_url = resolver.push_url(reponame) diff --git a/morphlib/plugins/artifact_inspection_plugin.py b/morphlib/plugins/artifact_inspection_plugin.py index 413a0072..d396f93b 100644 --- a/morphlib/plugins/artifact_inspection_plugin.py +++ b/morphlib/plugins/artifact_inspection_plugin.py @@ -36,28 +36,23 @@ class NotASystemArtifactError(cliapp.AppException): class ProjectVersionGuesser(object): - def __init__(self, app, lrc, rrc, interesting_files): + def __init__(self, app, repo_cache, interesting_files): self.app = app - self.lrc = lrc - self.rrc = rrc + self.repo_cache = repo_cache self.interesting_files = interesting_files def file_contents(self, repo, ref, tree): filenames = [x for x in self.interesting_files if x in tree] - if filenames: - if self.lrc.has_repo(repo): - repository = self.lrc.get_updated_repo(repo, ref) - for filename in filenames: - yield filename, repository.read_file(filename, ref) - elif self.rrc: - for filename in filenames: - yield filename, self.rrc.cat_file(repo, ref, filename) + for filename in filenames: + # This can use a remote repo cache if available, to avoid having + # to clone every repo locally. + yield filename, self.repo_cache.cat_file(repo, ref, filename) class AutotoolsVersionGuesser(ProjectVersionGuesser): - def __init__(self, app, lrc, rrc): - ProjectVersionGuesser.__init__(self, app, lrc, rrc, [ + def __init__(self, app, repo_cache): + ProjectVersionGuesser.__init__(self, app, repo_cache, [ 'configure.ac', 'configure.in', 'configure.ac.in', @@ -136,9 +131,9 @@ class VersionGuesser(object): def __init__(self, app): self.app = app - self.lrc, self.rrc = morphlib.util.new_repo_caches(app) + self.repo_cache = morphlib.util.new_repo_cache(app) self.guessers = [ - AutotoolsVersionGuesser(app, self.lrc, self.rrc) + AutotoolsVersionGuesser(app, self.repo_cache) ] def guess_version(self, repo, ref): @@ -146,14 +141,10 @@ class VersionGuesser(object): repo=repo, ref=ref, chatty=True) version = None try: - if self.lrc.has_repo(repo): - repository = self.lrc.get_updated_repo(repo, ref) - tree = repository.list_files(ref=ref, recurse=False) - elif self.rrc: - repository = None - tree = self.rrc.ls_tree(repo, ref) - else: - return None + # This can use a remote repo cache if available, to avoid having + # to clone every repo locally. + tree = self.repo_cache.ls_tree(repo, ref) + for guesser in self.guessers: version = guesser.guess_version(repo, ref, tree) if version: diff --git a/morphlib/plugins/certify_plugin.py b/morphlib/plugins/certify_plugin.py index 735d0332..72d24a51 100644 --- a/morphlib/plugins/certify_plugin.py +++ b/morphlib/plugins/certify_plugin.py @@ -57,7 +57,7 @@ class CertifyPlugin(cliapp.Plugin): system_filenames = map(morphlib.util.sanitise_morphology_path, args[2:]) - self.lrc, self.rrc = morphlib.util.new_repo_caches(self.app) + self.repo_cache = morphlib.util.new_repo_cache(self.app) self.resolver = morphlib.artifactresolver.ArtifactResolver() for system_filename in system_filenames: @@ -69,9 +69,7 @@ class CertifyPlugin(cliapp.Plugin): self.app.status( msg='Creating source pool for %s' % system_filename, chatty=True) source_pool = morphlib.sourceresolver.create_source_pool( - self.lrc, self.rrc, repo, ref, [system_filename], - cachedir=self.app.settings['cachedir'], - update_repos = not self.app.settings['no-git-update'], + self.repo_cache, repo, ref, [system_filename], status_cb=self.app.status) self.app.status( @@ -115,7 +113,7 @@ class CertifyPlugin(cliapp.Plugin): .format(name, ref)) certified = False - cached = self.lrc.get_updated_repo(source.repo_name, ref) + cached = self.repo_cache.get_updated_repo(source.repo_name, ref) # Test that sha1 ref is anchored in a tag or branch, # and thus not a candidate for removal on `git gc`. diff --git a/morphlib/plugins/cross-bootstrap_plugin.py b/morphlib/plugins/cross-bootstrap_plugin.py index 273e677d..8b8fbb2d 100644 --- a/morphlib/plugins/cross-bootstrap_plugin.py +++ b/morphlib/plugins/cross-bootstrap_plugin.py @@ -304,7 +304,7 @@ class CrossBootstrapPlugin(cliapp.Plugin): system_artifact.source, build_env, use_chroot=False) builder = BootstrapSystemBuilder( self.app, staging_area, build_command.lac, build_command.rac, - system_artifact.source, build_command.lrc, 1, False) + system_artifact.source, build_command.repo_cache, 1, False) builder.build_and_cache() self.app.status( diff --git a/morphlib/plugins/diff_plugin.py b/morphlib/plugins/diff_plugin.py index 26964df8..24a6d69a 100644 --- a/morphlib/plugins/diff_plugin.py +++ b/morphlib/plugins/diff_plugin.py @@ -22,7 +22,6 @@ from morphlib.cmdline_parse_utils import (definition_lists_synopsis, from morphlib.morphologyfinder import MorphologyFinder from morphlib.morphloader import MorphologyLoader from morphlib.morphset import MorphologySet -from morphlib.util import new_repo_caches class DiffPlugin(cliapp.Plugin): @@ -60,9 +59,10 @@ class DiffPlugin(cliapp.Plugin): name, from_source.repo_name, to_source.repo_name)) if from_source.original_ref != to_source.original_ref: - from_repo, to_repo = (self.bc.lrc.get_updated_repo(s.repo_name, - ref=s.sha1) - for s in (from_source, to_source)) + repo_cache = self.bc.repo_cache + from_repo, to_repo = (repo_cache.get_updated_repo(s.repo_name, + ref=s.sha1) + for s in (from_source, to_source)) from_desc = from_repo.version_guess(from_source.sha1) to_desc = to_repo.version_guess(to_source.sha1) @@ -100,7 +100,7 @@ class DiffPlugin(cliapp.Plugin): def get_systems((reponame, ref, definitions)): 'Convert a definition path list into a list of systems' ml = MorphologyLoader() - repo = self.bc.lrc.get_updated_repo(reponame, ref=ref) + repo = self.bc.repo_cache.get_updated_repo(reponame, ref=ref) mf = MorphologyFinder(gitdir=repo, ref=ref) # We may have been given an empty set of definitions as input, in # which case we instead use every we find. diff --git a/morphlib/plugins/get_repo_plugin.py b/morphlib/plugins/get_repo_plugin.py index fc81d6e5..ce0b7af0 100644 --- a/morphlib/plugins/get_repo_plugin.py +++ b/morphlib/plugins/get_repo_plugin.py @@ -101,9 +101,9 @@ class GetRepoPlugin(cliapp.Plugin): '%(stratum)s stratum', ref=ref or chunk_spec['ref'], chunk=chunk_spec['name'], stratum=morph['name']) - lrc, rrc = morphlib.util.new_repo_caches(self.app) - cached_repo = lrc.get_updated_repo(chunk_spec['repo'], - chunk_spec['ref']) + repo_cache = morphlib.util.new_repo_cache(self.app) + cached_repo = repo_cache.get_updated_repo(chunk_spec['repo'], + chunk_spec['ref']) try: self._clone_repo(cached_repo, dirname, diff --git a/morphlib/plugins/list_artifacts_plugin.py b/morphlib/plugins/list_artifacts_plugin.py index c2e6b459..2c098c2a 100644 --- a/morphlib/plugins/list_artifacts_plugin.py +++ b/morphlib/plugins/list_artifacts_plugin.py @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2015 Codethink Limited +# Copyright (C) 2014-2016 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -58,7 +58,7 @@ class ListArtifactsPlugin(cliapp.Plugin): system_filenames = map(morphlib.util.sanitise_morphology_path, args[2:]) - self.lrc, self.rrc = morphlib.util.new_repo_caches(self.app) + self.repo_cache = morphlib.util.new_repo_cache(self.app) self.resolver = morphlib.artifactresolver.ArtifactResolver() artifact_files = set() @@ -85,9 +85,7 @@ class ListArtifactsPlugin(cliapp.Plugin): self.app.status( msg='Creating source pool for %s' % system_filename, chatty=True) source_pool = morphlib.sourceresolver.create_source_pool( - self.lrc, self.rrc, repo, ref, [system_filename], - cachedir=self.app.settings['cachedir'], - update_repos = not self.app.settings['no-git-update'], + self.repo_cache, repo, ref, [system_filename], status_cb=self.app.status) self.app.status( diff --git a/morphlib/plugins/show_dependencies_plugin.py b/morphlib/plugins/show_dependencies_plugin.py index 42f8f273..bfe4d6c2 100644 --- a/morphlib/plugins/show_dependencies_plugin.py +++ b/morphlib/plugins/show_dependencies_plugin.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2015 Codethink Limited +# Copyright (C) 2012-2016 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -42,24 +42,6 @@ class ShowDependenciesPlugin(cliapp.Plugin): of build dependencies of the constituent components. ''' - - if not os.path.exists(self.app.settings['cachedir']): - os.mkdir(self.app.settings['cachedir']) - cachedir = os.path.join(self.app.settings['cachedir'], 'gits') - tarball_base_url = self.app.settings['tarball-server'] - repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver( - self.app.settings['repo-alias']) - lrc = morphlib.localrepocache.LocalRepoCache( - self.app, cachedir, repo_resolver, tarball_base_url) - - remote_url = morphlib.util.get_git_resolve_cache_server( - self.app.settings) - if remote_url: - rrc = morphlib.remoterepocache.RemoteRepoCache( - remote_url, repo_resolver) - else: - rrc = None - build_command = morphlib.buildcommand.BuildCommand(self.app) # traverse the morphs to list all the sources diff --git a/morphlib/plugins/system_manifests_plugin.py b/morphlib/plugins/system_manifests_plugin.py index 86388737..7fe33102 100644 --- a/morphlib/plugins/system_manifests_plugin.py +++ b/morphlib/plugins/system_manifests_plugin.py @@ -84,7 +84,7 @@ class SystemManifestsPlugin(cliapp.Plugin): system_filenames = map(morphlib.util.sanitise_morphology_path, args[2:]) - self.lrc, self.rrc = morphlib.util.new_repo_caches(self.app) + self.repo_cache = morphlib.util.new_repo_cache(self.app) self.resolver = morphlib.artifactresolver.ArtifactResolver() for system_filename in system_filenames: @@ -104,9 +104,7 @@ class SystemManifestsPlugin(cliapp.Plugin): msg='Creating source pool for %(system)s', system=system_filename, chatty=True) source_pool = morphlib.sourceresolver.create_source_pool( - self.lrc, self.rrc, repo, ref, [system_filename], - cachedir=self.app.settings['cachedir'], - update_repos = not self.app.settings['no-git-update'], + self.repo_cache, repo, ref, [system_filename], status_cb=self.app.status) self.app.status( @@ -135,10 +133,11 @@ class SystemManifestsPlugin(cliapp.Plugin): except IndexError: trove_id = None with morphlib.util.temp_dir(dir=self.app.settings['tempdir']) as td: - lorries = get_lorry_repos(td, self.lrc, self.app.status, trove_id, + lorries = get_lorry_repos(td, self.repo_cache, self.app.status, + trove_id, self.app.settings['trove-host']) manifest = Manifest(system_artifact.name, td, self.app.status, - self.lrc) + self.repo_cache) old_prefix = self.app.status_prefix sources = set(a.source for a in system_artifact.walk() @@ -150,7 +149,8 @@ class SystemManifestsPlugin(cliapp.Plugin): name = source.morphology['name'] ref = source.original_ref - cached = self.lrc.get_updated_repo(source.repo_name, ref) + cached = self.repo_cache.get_updated_repo(source.repo_name, + ref) new_prefix = '[%d/%d][%s] ' % (i, len(sources), name) self.app.status_prefix = old_prefix + new_prefix @@ -169,8 +169,8 @@ def run_licensecheck(filename): else: return output[len(filename) + 2:].strip() -def checkout_repo(lrc, repo, dest, ref='master'): - cached = lrc.get_updated_repo(repo, ref) +def checkout_repo(repo_cache, repo, dest, ref='master'): + cached = repo_cache.get_updated_repo(repo, ref) if not os.path.exists(dest): morphlib.gitdir.checkout_from_cached_repo(repo, ref, dest) @@ -235,14 +235,15 @@ def get_upstream_address(chunk_url, lorries, status): chunk=chunk_url) return 'UNKNOWN' -def get_lorry_repos(tempdir, lrc, status, trove_id, trove_host): +def get_lorry_repos(tempdir, repo_cache, status, trove_id, trove_host): lorries = [] try: baserock_lorry_repo = 'baserock:local-config/lorries' lorrydir = os.path.join(tempdir, 'baserock-lorries') - baserock_lorrydir = checkout_repo(lrc, baserock_lorry_repo, lorrydir) + baserock_lorrydir = checkout_repo(repo_cache, baserock_lorry_repo, + lorrydir) lorries.extend(load_lorries(lorrydir)) - except morphlib.localrepocache.NoRemote as e: + except morphlib.repocache.NoRemote as e: status(msg="WARNING: Could not find lorries from git.baserock.org, " "expected to find them on %(trove)s at %(reponame)s", trove=trove_host, reponame = e.reponame) @@ -252,9 +253,10 @@ def get_lorry_repos(tempdir, lrc, status, trove_id, trove_host): trove_lorry_repo = ('http://%s/git/%s/local-config/lorries' % (trove_host, trove_id)) lorrydir = os.path.join(tempdir, '%s-lorries' % trove_id) - trove_lorrydir = checkout_repo(lrc, trove_lorry_repo, lorrydir) + trove_lorrydir = checkout_repo(repo_cache, trove_lorry_repo, + lorrydir) lorries.extend(load_lorries(lorrydir)) - except morphlib.localrepocache.NoRemote as e: + except morphlib.repocache.NoRemote as e: status(msg="WARNING: Could not find lorries repo on %(trove)s " "at %(reponame)s", trove=trove_host, reponame=e.reponame) @@ -268,10 +270,10 @@ def get_lorry_repos(tempdir, lrc, status, trove_id, trove_host): class Manifest(object): """Writes out a manifest of what's included in a system.""" - def __init__(self, system_name, tempdir, status_cb, lrc): + def __init__(self, system_name, tempdir, status_cb, repo_cache): self.tempdir = tempdir self.status = status_cb - self.lrc = lrc + self.repo_cache = repo_cache path = os.path.join(os.getcwd(), system_name + '-manifest.csv') self.status(msg='Creating %(path)s', path=path) self.file = open(path, 'wb') diff --git a/morphlib/remoterepocache.py b/morphlib/remoterepocache.py deleted file mode 100644 index 4a6d9fe9..00000000 --- a/morphlib/remoterepocache.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (C) 2012-2015 Codethink Limited -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see <http://www.gnu.org/licenses/>. - - -import cliapp -import json -import logging -import urllib2 -import urlparse -import urllib - - -class ResolveRefError(cliapp.AppException): - - def __init__(self, repo_name, ref): - cliapp.AppException.__init__( - self, 'Failed to resolve ref %s for repo %s' % - (ref, repo_name)) - - -class CatFileError(cliapp.AppException): - - def __init__(self, repo_name, ref, filename): - cliapp.AppException.__init__( - self, 'Failed to cat file %s in ref %s of repo %s' % - (filename, ref, repo_name)) - -class LsTreeError(cliapp.AppException): - - def __init__(self, repo_name, ref): - cliapp.AppException.__init__( - self, 'Failed to list tree in ref %s of repo %s' % - (ref, repo_name)) - - -class RemoteRepoCache(object): - - def __init__(self, server_url, resolver): - self.server_url = server_url - self._resolver = resolver - - def resolve_ref(self, repo_name, ref): - repo_url = self._resolver.pull_url(repo_name) - try: - return self._resolve_ref_for_repo_url(repo_url, ref) - except BaseException as e: - logging.error('Caught exception: %s' % str(e)) - raise ResolveRefError(repo_name, ref) - - def cat_file(self, repo_name, ref, filename): - repo_url = self._resolver.pull_url(repo_name) - try: - return self._cat_file_for_repo_url(repo_url, ref, filename) - except urllib2.HTTPError as e: - logging.error('Caught exception: %s' % str(e)) - if e.code == 404: - raise CatFileError(repo_name, ref, filename) - raise # pragma: no cover - - def ls_tree(self, repo_name, ref): - repo_url = self._resolver.pull_url(repo_name) - try: - info = json.loads(self._ls_tree_for_repo_url(repo_url, ref)) - return info['tree'].keys() - except BaseException as e: - logging.error('Caught exception: %s' % str(e)) - raise LsTreeError(repo_name, ref) - - def _resolve_ref_for_repo_url(self, repo_url, ref): # pragma: no cover - data = self._make_request( - 'sha1s?repo=%s&ref=%s' % self._quote_strings(repo_url, ref)) - info = json.loads(data) - return info['sha1'], info['tree'] - - def _cat_file_for_repo_url(self, repo_url, ref, - filename): # pragma: no cover - return self._make_request( - 'files?repo=%s&ref=%s&filename=%s' - % self._quote_strings(repo_url, ref, filename)) - - def _ls_tree_for_repo_url(self, repo_url, ref): # pragma: no cover - return self._make_request( - 'trees?repo=%s&ref=%s' % self._quote_strings(repo_url, ref)) - - def _quote_strings(self, *args): # pragma: no cover - return tuple(urllib.quote(string) for string in args) - - def _make_request(self, path): # pragma: no cover - server_url = self.server_url - if not server_url.endswith('/'): - server_url += '/' - url = urlparse.urljoin(server_url, '/1.0/%s' % path) - handle = urllib2.urlopen(url) - return handle.read() diff --git a/morphlib/remoterepocache_tests.py b/morphlib/remoterepocache_tests.py deleted file mode 100644 index 966e74d5..00000000 --- a/morphlib/remoterepocache_tests.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright (C) 2012-2015 Codethink Limited -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see <http://www.gnu.org/licenses/>. - - -import json -import unittest -import urllib2 - -import morphlib - - -class RemoteRepoCacheTests(unittest.TestCase): - - def _resolve_ref_for_repo_url(self, repo_url, ref): - return self.sha1s[repo_url][ref] - - def _cat_file_for_repo_url(self, repo_url, sha1, filename): - try: - return self.files[repo_url][sha1][filename] - except KeyError: - raise urllib2.HTTPError(url='', code=404, msg='Not found', - hdrs={}, fp=None) - - def _ls_tree_for_repo_url(self, repo_url, sha1): - return json.dumps({ - 'repo': repo_url, - 'ref': sha1, - 'tree': self.files[repo_url][sha1] - }) - - def setUp(self): - self.sha1s = { - 'git://gitorious.org/baserock/morph': { - 'master': 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9' - } - } - self.files = { - 'git://gitorious.org/baserock-morphs/linux': { - 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9': { - 'linux.morph': 'linux morphology' - } - } - } - self.server_url = 'http://foo.bar' - aliases = [ - 'upstream=git://gitorious.org/baserock-morphs/#foo', - 'baserock=git://gitorious.org/baserock/#foo' - ] - resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases) - self.cache = morphlib.remoterepocache.RemoteRepoCache( - self.server_url, resolver) - self.cache._resolve_ref_for_repo_url = self._resolve_ref_for_repo_url - self.cache._cat_file_for_repo_url = self._cat_file_for_repo_url - self.cache._ls_tree_for_repo_url = self._ls_tree_for_repo_url - - def test_sets_server_url(self): - self.assertEqual(self.cache.server_url, self.server_url) - - def test_resolve_existing_ref_for_existing_repo(self): - sha1 = self.cache.resolve_ref('baserock:morph', 'master') - self.assertEqual( - sha1, - self.sha1s['git://gitorious.org/baserock/morph']['master']) - - def test_fail_resolving_existing_ref_for_non_existent_repo(self): - self.assertRaises(morphlib.remoterepocache.ResolveRefError, - self.cache.resolve_ref, 'non-existent-repo', - 'master') - - def test_fail_resolving_non_existent_ref_for_existing_repo(self): - self.assertRaises(morphlib.remoterepocache.ResolveRefError, - self.cache.resolve_ref, 'baserock:morph', - 'non-existent-ref') - - def test_fail_resolving_non_existent_ref_for_non_existent_repo(self): - self.assertRaises(morphlib.remoterepocache.ResolveRefError, - self.cache.resolve_ref, 'non-existent-repo', - 'non-existent-ref') - - def test_cat_existing_file_in_existing_repo_and_ref(self): - content = self.cache.cat_file( - 'upstream:linux', 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9', - 'linux.morph') - self.assertEqual(content, 'linux morphology') - - def test_fail_cat_file_using_invalid_sha1(self): - self.assertRaises(morphlib.remoterepocache.CatFileError, - self.cache.cat_file, 'upstream:linux', 'blablabla', - 'linux.morph') - - def test_fail_cat_non_existent_file_in_existing_repo_and_ref(self): - self.assertRaises(morphlib.remoterepocache.CatFileError, - self.cache.cat_file, 'upstream:linux', - 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9', - 'non-existent-file') - - def test_fail_cat_file_in_non_existent_ref_in_existing_repo(self): - self.assertRaises(morphlib.remoterepocache.CatFileError, - self.cache.cat_file, 'upstream:linux', - 'ecd7a325095a0d19b8c3d76f578d85b979461d41', - 'linux.morph') - - def test_fail_cat_file_in_non_existent_repo(self): - self.assertRaises(morphlib.remoterepocache.CatFileError, - self.cache.cat_file, 'non-existent-repo', - 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9', - 'some-file') - - def test_ls_tree_in_existing_repo_and_ref(self): - content = self.cache.ls_tree( - 'upstream:linux', 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9') - self.assertEqual(content, ['linux.morph']) - - def test_fail_ls_tree_using_invalid_sha1(self): - self.assertRaises(morphlib.remoterepocache.LsTreeError, - self.cache.ls_tree, 'upstream:linux', 'blablabla') - - def test_fail_ls_file_in_non_existent_ref_in_existing_repo(self): - self.assertRaises(morphlib.remoterepocache.LsTreeError, - self.cache.ls_tree, 'upstream:linux', - 'ecd7a325095a0d19b8c3d76f578d85b979461d41') - - def test_fail_ls_tree_in_non_existent_repo(self): - self.assertRaises(morphlib.remoterepocache.LsTreeError, - self.cache.ls_tree, 'non-existent-repo', - 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9') diff --git a/morphlib/repocache.py b/morphlib/repocache.py new file mode 100644 index 00000000..f6978ec4 --- /dev/null +++ b/morphlib/repocache.py @@ -0,0 +1,565 @@ +# Copyright (C) 2012-2016 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see <http://www.gnu.org/licenses/>. + + +import cliapp +import fs.osfs + +import json +import logging +import os +import string +import sys +import tempfile +import urllib2 +import urlparse +import urllib + +import morphlib +from morphlib.util import word_join_list as _word_join_list + + +# urlparse.urljoin needs to know details of the URL scheme being used. +# It does not know about git:// by default, so we teach it here. +gitscheme = ['git'] +urlparse.uses_relative.extend(gitscheme) +urlparse.uses_netloc.extend(gitscheme) +urlparse.uses_params.extend(gitscheme) +urlparse.uses_query.extend(gitscheme) +urlparse.uses_fragment.extend(gitscheme) + + +def quote_url(url): + ''' Convert URIs to strings that only contain digits, letters, % and _. + + NOTE: When changing the code of this function, make sure to also apply + the same to the quote_url() function of lorry. Otherwise the git tarballs + generated by lorry may no longer be found by morph. + + ''' + valid_chars = string.digits + string.letters + '%_' + transl = lambda x: x if x in valid_chars else '_' + return ''.join([transl(x) for x in url]) + + +class NoRemote(morphlib.Error): + + def __init__(self, reponame, errors): + self.reponame = reponame + self.errors = errors + + def __str__(self): + return '\n\t'.join(['Cannot find remote git repository: %s' % + self.reponame] + self.errors) + + +class NotCached(morphlib.Error): + def __init__(self, reponame): + self.reponame = reponame + + def __str__(self): # pragma: no cover + return 'Repository %s is not cached yet' % self.reponame + + +class UpdateError(cliapp.AppException): # pragma: no cover + + def __init__(self, repo): + cliapp.AppException.__init__( + self, 'Failed to update cached version of repo %s' % repo) + + +class CachedRepo(morphlib.gitdir.GitDirectory): + '''A locally cached Git repository with an origin remote set up. + + On instance of this class represents a locally cached version of a + remote Git repository. This remote repository is set up as the + 'origin' remote. + + Cached repositories are bare mirrors of the upstream. Locally created + branches will be lost the next time the repository updates. + + ''' + def __init__(self, path, original_name, url): + self.original_name = original_name + self.url = url + self.is_mirror = not url.startswith('file://') + self.already_updated = False + + super(CachedRepo, self).__init__(path) + + def __str__(self): # pragma: no cover + return self.url + + +class RepoCache(object): + '''Manage a collection of Git repositories. + + When we build stuff, we need a local copy of the git repository. + To avoid having to clone the repositories for every build, we + maintain a local cache of the repositories: we first clone the + remote repository to the cache, and then make a local clone from + the cache to the build environment. This class manages the local + cached repositories. + + Repositories may be specified either using a full URL, in a form + understood by git(1), or as a repository name to which a base url + is prepended. The base urls are given to the class when it is + created. + + Instead of cloning via a normal 'git clone' directly from the + git server, we first try to download a tarball from a url, and + if that works, we unpack the tarball. + + Certain questions about a repo can be resolved without cloning the whole + thing, if an instance of 'morph-cache-server' is available on the remote + Git server. This makes calculating the build graph for the first time + a whole lot faster, as we avoid cloning every repo locally. The + git_resolve_cache_url parameter enables this feature. Baserock 'Trove' + systems run 'morph-cache-server' by default. + + The 'custom_fs' parameter takes a PyFilesystem instance, which you can use + to override where 'cachedir' is stored. This should probably only be used + for testing. + + ''' + def __init__(self, cachedir, resolver, tarball_base_url=None, + git_resolve_cache_url=None, + update_gits=True, + runcmd_cb=cliapp.runcmd, status_cb=lambda **kwargs: None, + verbose=False, debug=False, + custom_fs=None): + self.fs = custom_fs or fs.osfs.OSFS('/') + + self.fs.makedir(cachedir, recursive=True, allow_recreate=True) + + self.cachedir = cachedir + self._resolver = resolver + if tarball_base_url and not tarball_base_url.endswith('/'): + tarball_base_url += '/' + self._tarball_base_url = tarball_base_url + self._cached_repo_objects = {} + + # Corresponds to the app 'no-git-update' setting + self.update_gits = update_gits + + self.runcmd_cb = runcmd_cb + self.status_cb = status_cb + self.verbose = verbose + self.debug = debug + + if git_resolve_cache_url: # pragma: no cover + self.remote_cache = RemoteRepoCache(git_resolve_cache_url, + resolver) + else: + self.remote_cache = None + + def _git(self, args, **kwargs): # pragma: no cover + '''Execute git command. + + This is a method of its own so that unit tests can easily override + all use of the external git command. + + ''' + + morphlib.git.gitcmd(self.runcmd_cb, *args, **kwargs) + + def _fetch(self, url, path): # pragma: no cover + '''Fetch contents of url into a file. + + This method is meant to be overridden by unit tests. + + ''' + self.status_cb(msg="Trying to fetch %(tarball)s to seed the cache", + tarball=url, chatty=True) + + if self.verbose: + verbosity_flags = [] + kwargs = dict(stderr=sys.stderr) + else: + verbosity_flags = ['--quiet'] + kwargs = dict() + + def wget_command(): + return ['wget'] + verbosity_flags + ['-O-', url] + + self.runcmd_cb(wget_command(), + ['tar', '--no-same-owner', '-xf', '-'], + cwd=path, **kwargs) + + def _mkdtemp(self, dirname): # pragma: no cover + '''Creates a temporary directory. + + This method is meant to be overridden by unit tests. + + ''' + return tempfile.mkdtemp(dir=self.fs.getsyspath(dirname)) + + def _escape(self, url): + '''Escape a URL so it can be used as a basename in a file.''' + + # FIXME: The following is a nicer way than to do this. + # However, for compatibility, we need to use the same as the + # tarball server (set up by Lorry) uses. + # return urllib.quote(url, safe='') + + return quote_url(url) + + def _cache_name(self, url): + scheme, netloc, path, query, fragment = urlparse.urlsplit(url) + if scheme != 'file': + path = os.path.join(self.cachedir, self._escape(url)) + return path + + def has_repo(self, reponame): + '''Have we already got a cache of a given repo?''' + url = self._resolver.pull_url(reponame) + path = self._cache_name(url) + return self.fs.exists(path) + + def _clone_with_tarball(self, repourl, path): + tarball_url = urlparse.urljoin(self._tarball_base_url, + self._escape(repourl)) + '.tar' + try: + self.fs.makedir(path) + self._fetch(tarball_url, path) + self._git(['config', 'remote.origin.url', repourl], cwd=path) + self._git(['config', 'remote.origin.mirror', 'true'], cwd=path) + self._git(['config', 'remote.origin.fetch', '+refs/*:refs/*'], + cwd=path) + except BaseException as e: + if self.fs.exists(path): + self.fs.removedir(path, force=True) + return False, 'Unable to extract tarball %s: %s' % ( + tarball_url, e) + + return True, None + + def _new_cached_repo_instance(self, path, reponame, repourl): + return CachedRepo(path, reponame, repourl) + + def _cache_repo(self, reponame): + '''Clone the given repo into the cache. + + If the repo is already cloned, do nothing. + + ''' + errors = [] + + repourl = self._resolver.pull_url(reponame) + path = self._cache_name(repourl) + if self._tarball_base_url: + ok, error = self._clone_with_tarball(repourl, path) + if ok: + repo = self._get_repo(reponame) + self._update_repo(repo) + return repo + else: + errors.append(error) + self.status_cb(msg='Using git clone.') + + target = self._mkdtemp(self.cachedir) + + try: + self._git(['clone', '--mirror', '-n', repourl, target], + echo_stderr=self.debug) + except cliapp.AppException as e: + errors.append('Unable to clone from %s to %s: %s' % + (repourl, target, e)) + if self.fs.exists(target): + self.fs.removedir(target, force=True) + raise NoRemote(reponame, errors) + + self.fs.rename(target, path) + + repo = self._new_cached_repo_instance(path, reponame, repourl) + repo.already_updated = True + return repo + + def _get_repo(self, reponame): + '''Return an object representing a cached repository.''' + + if reponame in self._cached_repo_objects: + return self._cached_repo_objects[reponame] + else: + repourl = self._resolver.pull_url(reponame) + path = self._cache_name(repourl) + if self.fs.exists(path): + repo = self._new_cached_repo_instance(path, reponame, repourl) + self._cached_repo_objects[reponame] = repo + return repo + elif self.update_gits: + return self._cache_repo(reponame) + else: + raise NotCached(reponame) + + def _update_repo(self, cachedrepo): # pragma: no cover + try: + cachedrepo.update_remotes( + echo_stderr=self.verbose) + cachedrepo.already_updated = True + except cliapp.AppException: + raise UpdateError(self) + + def get_updated_repo(self, repo_name, + ref=None, refs=None): + '''Return object representing cached repository. + + If all the specified refs in 'ref' or 'refs' point to SHA1s that are + already in the repository, or --no-git-update is set, then the + repository won't be updated. + + ''' + + if not self.update_gits: + self.status_cb(msg='Not updating existing git repository ' + '%(repo_name)s ' + 'because of no-git-update being set', + chatty=True, + repo_name=repo_name) + return self._get_repo(repo_name) + + if ref is not None and refs is None: + refs = (ref,) + else: + refs = list(refs) + + if self.has_repo(repo_name): + repo = self._get_repo(repo_name) + if refs: + required_refs = set(refs) + missing_refs = set() + for required_ref in required_refs: # pragma: no cover + if morphlib.git.is_valid_sha1(required_ref): + try: + repo.resolve_ref_to_commit(required_ref) + continue + except morphlib.gitdir.InvalidRefError: + pass + missing_refs.add(required_ref) + + if not missing_refs: # pragma: no cover + self.status_cb( + msg='Not updating git repository %(repo_name)s ' + 'because it already contains %(sha1s)s', + chatty=True, repo_name=repo_name, + sha1s=_word_join_list(tuple(required_refs))) + return repo + + if ref: + ref_str = 'ref %s' % ref + else: + ref_str = '%i refs' % len(refs) + self.status_cb(msg='Updating %(repo_name)s for %(ref_str)s', + repo_name=repo_name, ref_str=ref_str) + self._update_repo(repo) + return repo + else: + self.status_cb(msg='Cloning %(repo_name)s', repo_name=repo_name) + return self._get_repo(repo_name) + + def ensure_submodules(self, toplevel_repo, + toplevel_ref): # pragma: no cover + '''Ensure any submodules of a given repo are cached and up to date.''' + + def submodules_for_repo(repo_path, ref): + try: + submodules = morphlib.git.Submodules(repo_path, ref, + runcmd_cb=self.runcmd_cb) + submodules.load() + return [(submod.url, submod.commit) for submod in submodules] + except morphlib.git.NoModulesFileError: + return [] + + done = set() + subs_to_process = submodules_for_repo(toplevel_repo.dirname, + toplevel_ref) + while subs_to_process: + url, ref = subs_to_process.pop() + done.add((url, ref)) + + cached_repo = self.get_updated_repo(url, ref=ref) + + for submod in submodules_for_repo(cached_repo.dirname, ref): + if submod not in done: + subs_to_process.append(submod) + + def resolve_ref_to_commit_and_tree(self, repo_name, + ref): # pragma: no cover + '''Given the name of a ref, returns the commit and tree SHA1. + + If a remote cache server is available, this function can query the + remote cache server to avoid needing to clone the entire repo. + + This might break if the ref points to a tag, not a commit. + + ''' + absref = None + tree = None + + if self.has_repo(repo_name): + repo = self.get_updated_repo(repo_name, ref) + # If the user passed --no-git-update, and the ref is a SHA1 not + # available locally, this call will raise an exception. + absref = repo.resolve_ref_to_commit(ref) + tree = repo.resolve_ref_to_tree(absref) + elif self.remote_cache is not None: + try: + absref, tree = self.remote_cache.resolve_ref(repo_name, ref) + if absref is not None: + self.status_cb( + msg='Resolved %(repo_name)s %(ref)s via remote repo ' + 'cache', repo_name=repo_name, ref=ref, chatty=True) + except BaseException as e: + logging.warning('Caught (and ignored) exception: %s' % str(e)) + + if absref is None: + # As a last resort, clone the repo to resolve the ref. + repo = self.get_updated_repo(repo_name, ref) + absref = repo.resolve_ref_to_commit(ref) + tree = repo.resolve_ref_to_tree(absref) + + return absref, tree + + def ls_tree(self, repo_name, ref): # pragma: no cover + '''Lists the files contained in a commit. + + If a remote cache server is available, this function can query the + remote cache server to avoid needing to clone the entire repo. + + The list is non-recursive, so you can only see files in the top + directory of the repo. To do a recursive operation, use a GitDir + instance returned by get_updated_repo(). + + ''' + files = [] + + if self.has_repo(repo_name): + repo = self.get_updated_repo(repo_name, ref) + files = repo.list_files(ref=ref, recurse=False) + elif self.remote_cache is not None: + files = self.remote_cache.ls_tree(repo_name, ref) + + if len(files) == 0: + # As a last resort, clone the repo to do get the file list. + repo = self.get_updated_repo(repo_name, ref) + files = repo.list_files(ref=ref, recurse=False) + + return files + + def cat_file(self, repo_name, ref, filename): # pragma: no cover + '''Returns a single file from a repo. + + If a remote cache server is available, this function can query the + remote cache server to avoid needing to clone the entire repo. + + ''' + contents = None + + if self.has_repo(repo_name): + repo = self.get_updated_repo(repo_name, ref) + contents = repo.get_file_from_ref(ref, filename) + elif self.remote_cache is not None: + contents = self.remote_cache.cat_file(repo_name, ref, filename) + + if not contents: + # As a last resort, clone the repo to do get the file list. + repo = self.get_updated_repo(repo_name, ref) + contents = repo.get_file_from_ref(ref, filename) + + return contents + + +class RemoteResolveRefError(cliapp.AppException): + + def __init__(self, repo_name, ref): + cliapp.AppException.__init__( + self, 'Failed to resolve ref %s for repo %s from remote cache' % + (ref, repo_name)) + + +class RemoteCatFileError(cliapp.AppException): + + def __init__(self, repo_name, ref, filename): + cliapp.AppException.__init__( + self, 'Failed to cat file %s in ref %s of repo %s, from remote ' + 'cache' % (filename, ref, repo_name)) + + +class RemoteLsTreeError(cliapp.AppException): + + def __init__(self, repo_name, ref): + cliapp.AppException.__init__( + self, 'Failed to list tree in ref %s of repo %s, from remote' + 'cache' % (ref, repo_name)) + + +class RemoteRepoCache(object): + + def __init__(self, server_url, resolver): + self.server_url = server_url + self._resolver = resolver + + def resolve_ref(self, repo_name, ref): + repo_url = self._resolver.pull_url(repo_name) + try: + return self._resolve_ref_for_repo_url(repo_url, ref) + except BaseException as e: + logging.error('Caught exception: %s' % str(e)) + raise RemoteResolveRefError(repo_name, ref) + + def cat_file(self, repo_name, ref, filename): + repo_url = self._resolver.pull_url(repo_name) + try: + return self._cat_file_for_repo_url(repo_url, ref, filename) + except urllib2.HTTPError as e: + logging.error('Caught exception: %s' % str(e)) + if e.code == 404: + raise RemoteCatFileError(repo_name, ref, filename) + raise # pragma: no cover + + def ls_tree(self, repo_name, ref): + repo_url = self._resolver.pull_url(repo_name) + try: + info = json.loads(self._ls_tree_for_repo_url(repo_url, ref)) + return info['tree'].keys() + except BaseException as e: + logging.error('Caught exception: %s' % str(e)) + raise RemoteLsTreeError(repo_name, ref) + + def _resolve_ref_for_repo_url(self, repo_url, ref): # pragma: no cover + data = self._make_request( + 'sha1s?repo=%s&ref=%s' % self._quote_strings(repo_url, ref)) + info = json.loads(data) + return info['sha1'], info['tree'] + + def _cat_file_for_repo_url(self, repo_url, ref, + filename): # pragma: no cover + return self._make_request( + 'files?repo=%s&ref=%s&filename=%s' + % self._quote_strings(repo_url, ref, filename)) + + def _ls_tree_for_repo_url(self, repo_url, ref): # pragma: no cover + return self._make_request( + 'trees?repo=%s&ref=%s' % self._quote_strings(repo_url, ref)) + + def _quote_strings(self, *args): # pragma: no cover + return tuple(urllib.quote(string) for string in args) + + def _make_request(self, path): # pragma: no cover + server_url = self.server_url + if not server_url.endswith('/'): + server_url += '/' + url = urlparse.urljoin(server_url, '/1.0/%s' % path) + handle = urllib2.urlopen(url) + return handle.read() diff --git a/morphlib/repocache_tests.py b/morphlib/repocache_tests.py new file mode 100644 index 00000000..6e07aedb --- /dev/null +++ b/morphlib/repocache_tests.py @@ -0,0 +1,281 @@ +# Copyright (C) 2012-2016 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see <http://www.gnu.org/licenses/>. + + +import unittest +import urllib2 +import json +import os + +import cliapp +import fs.memoryfs +import tempfile + +import morphlib +import morphlib.gitdir_tests + + +class TestableRepoCache(morphlib.repocache.RepoCache): + '''Adapts the RepoCache class for unit testing. + + All Git operations are stubbed out. You can track what Git operations have + taken place by looking at the 'remotes' dict -- any 'clone' operations will + set an entry in there. The 'tarballs_fetched' list tracks what tarballs + of Git repos would have been downloaded. + + There is a single repo alias, 'example' which expands to + git://example.com/. + + ''' + def __init__(self, update_gits=True): + aliases = ['example=git://example.com/#example.com:%s.git'] + repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases) + tarball_base_url = 'http://lorry.example.com/tarballs' + cachedir = '/cache/gits/' + memoryfs = fs.memoryfs.MemoryFS() + + morphlib.repocache.RepoCache.__init__( + self, cachedir, repo_resolver, tarball_base_url=tarball_base_url, + custom_fs=memoryfs, update_gits=update_gits) + + self.remotes = {} + self.tarballs_fetched = [] + + self._mkdtemp_count = 0 + + def _mkdtemp(self, dirname): + thing = "foo"+str(self._mkdtemp_count) + self._mkdtemp_count += 1 + self.fs.makedir(dirname+"/"+thing) + return thing + + def _fetch(self, url, path): + self.tarballs_fetched.append(url) + + def _git(self, args, **kwargs): + if args[0] == 'clone': + assert len(args) == 5 + remote = args[3] + local = args[4] + self.remotes['origin'] = {'url': remote, 'updates': 0} + self.fs.makedir(local, recursive=True) + elif args[0:2] == ['remote', 'set-url']: + remote = args[2] + url = args[3] + self.remotes[remote] = {'url': url} + elif args[0:2] == ['config', 'remote.origin.url']: + remote = 'origin' + url = args[2] + self.remotes[remote] = {'url': url} + elif args[0:2] == ['config', 'remote.origin.mirror']: + remote = 'origin' + elif args[0:2] == ['config', 'remote.origin.fetch']: + remote = 'origin' + else: + raise NotImplementedError() + + def _update_repo(self, cached_repo): + pass + + +class RepoCacheTests(unittest.TestCase): + + def test_has_not_got_repo_initially(self): + repo_cache = TestableRepoCache() + self.assertFalse(repo_cache.has_repo('example:repo')) + self.assertFalse(repo_cache.has_repo('git://example.com/repo')) + + def test_happily_caches_same_repo_twice(self): + repo_cache = TestableRepoCache() + with morphlib.gitdir_tests.allow_nonexistant_git_repos(): + repo_cache.get_updated_repo('example:repo', ref='master') + repo_cache.get_updated_repo('example:repo', ref='master') + + def test_fails_to_cache_when_remote_does_not_exist(self): + repo_cache = TestableRepoCache() + + def clone_fails(args, **kwargs): + repo_cache.fs.makedir(args[4]) + raise cliapp.AppException('') + repo_cache._git = clone_fails + + with self.assertRaises(morphlib.repocache.NoRemote): + repo_cache.get_updated_repo('example:repo', 'master') + + def test_does_not_mind_a_missing_tarball(self): + repo_cache = TestableRepoCache() + + def no_tarball(*args, **kwargs): + raise cliapp.AppException('Not found') + repo_cache._fetch = no_tarball + + with morphlib.gitdir_tests.allow_nonexistant_git_repos(): + repo_cache.get_updated_repo('example:repo', ref='master') + self.assertEqual(repo_cache.tarballs_fetched, []) + + def test_fetches_tarball_when_it_exists(self): + repo_url = 'git://example.com/reponame' + repo_cache = TestableRepoCache() + + with morphlib.gitdir_tests.allow_nonexistant_git_repos(): + repo_cache.get_updated_repo(repo_url, ref='master') + + tarball_url = '%s%s.tar' % (repo_cache._tarball_base_url, + repo_cache._escape(repo_url)) + self.assertEqual(repo_cache.tarballs_fetched, [tarball_url]) + + # Check that the cache updated the repo after fetching the tarball. + self.assertEqual(repo_cache.remotes['origin']['url'], repo_url) + + def test_escapes_repourl_as_filename(self): + repo_cache = TestableRepoCache() + escaped = repo_cache._escape('git://example.com/reponame') + self.assertFalse('/' in escaped) + + def test_noremote_error_message_contains_repo_name(self): + repo_url = 'git://example.com/reponame' + e = morphlib.repocache.NoRemote(repo_url, []) + self.assertTrue(repo_url in str(e)) + + def test_avoids_caching_local_repo(self): + repo_cache = TestableRepoCache() + + repo_cache.fs.makedir('/local/repo', recursive=True) + with morphlib.gitdir_tests.allow_nonexistant_git_repos(): + cached = repo_cache.get_updated_repo( + 'file:///local/repo', refs='master') + assert cached.dirname == '/local/repo' + + def test_no_git_update_setting(self): + repo_cache = TestableRepoCache(update_gits=False) + + with self.assertRaises(morphlib.repocache.NotCached): + repo_cache.get_updated_repo('example:repo', ref='master') + + +class RemoteRepoCacheTests(unittest.TestCase): + def _resolve_ref_for_repo_url(self, repo_url, ref): + return self.sha1s[repo_url][ref] + + def _cat_file_for_repo_url(self, repo_url, sha1, filename): + try: + return self.files[repo_url][sha1][filename] + except KeyError: + raise urllib2.HTTPError(url='', code=404, msg='Not found', + hdrs={}, fp=None) + + def _ls_tree_for_repo_url(self, repo_url, sha1): + return json.dumps({ + 'repo': repo_url, + 'ref': sha1, + 'tree': self.files[repo_url][sha1] + }) + + def setUp(self): + self.sha1s = { + 'git://gitorious.org/baserock/morph': { + 'master': 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9' + } + } + self.files = { + 'git://gitorious.org/baserock-morphs/linux': { + 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9': { + 'linux.morph': 'linux morphology' + } + } + } + self.server_url = 'http://foo.bar' + aliases = [ + 'upstream=git://gitorious.org/baserock-morphs/#foo', + 'baserock=git://gitorious.org/baserock/#foo' + ] + resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases) + self.cache = morphlib.repocache.RemoteRepoCache( + self.server_url, resolver) + self.cache._resolve_ref_for_repo_url = self._resolve_ref_for_repo_url + self.cache._cat_file_for_repo_url = self._cat_file_for_repo_url + self.cache._ls_tree_for_repo_url = self._ls_tree_for_repo_url + + def test_sets_server_url(self): + self.assertEqual(self.cache.server_url, self.server_url) + + def test_resolve_existing_ref_for_existing_repo(self): + sha1 = self.cache.resolve_ref('baserock:morph', 'master') + self.assertEqual( + sha1, + self.sha1s['git://gitorious.org/baserock/morph']['master']) + + def test_fail_resolving_existing_ref_for_non_existent_repo(self): + self.assertRaises(morphlib.repocache.RemoteResolveRefError, + self.cache.resolve_ref, 'non-existent-repo', + 'master') + + def test_fail_resolving_non_existent_ref_for_existing_repo(self): + self.assertRaises(morphlib.repocache.RemoteResolveRefError, + self.cache.resolve_ref, 'baserock:morph', + 'non-existent-ref') + + def test_fail_resolving_non_existent_ref_for_non_existent_repo(self): + self.assertRaises(morphlib.repocache.RemoteResolveRefError, + self.cache.resolve_ref, 'non-existent-repo', + 'non-existent-ref') + + def test_cat_existing_file_in_existing_repo_and_ref(self): + content = self.cache.cat_file( + 'upstream:linux', 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9', + 'linux.morph') + self.assertEqual(content, 'linux morphology') + + def test_fail_cat_file_using_invalid_sha1(self): + self.assertRaises(morphlib.repocache.RemoteCatFileError, + self.cache.cat_file, 'upstream:linux', 'blablabla', + 'linux.morph') + + def test_fail_cat_non_existent_file_in_existing_repo_and_ref(self): + self.assertRaises(morphlib.repocache.RemoteCatFileError, + self.cache.cat_file, 'upstream:linux', + 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9', + 'non-existent-file') + + def test_fail_cat_file_in_non_existent_ref_in_existing_repo(self): + self.assertRaises(morphlib.repocache.RemoteCatFileError, + self.cache.cat_file, 'upstream:linux', + 'ecd7a325095a0d19b8c3d76f578d85b979461d41', + 'linux.morph') + + def test_fail_cat_file_in_non_existent_repo(self): + self.assertRaises(morphlib.repocache.RemoteCatFileError, + self.cache.cat_file, 'non-existent-repo', + 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9', + 'some-file') + + def test_ls_tree_in_existing_repo_and_ref(self): + content = self.cache.ls_tree( + 'upstream:linux', 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9') + self.assertEqual(content, ['linux.morph']) + + def test_fail_ls_tree_using_invalid_sha1(self): + self.assertRaises(morphlib.repocache.RemoteLsTreeError, + self.cache.ls_tree, 'upstream:linux', 'blablabla') + + def test_fail_ls_file_in_non_existent_ref_in_existing_repo(self): + self.assertRaises(morphlib.repocache.RemoteLsTreeError, + self.cache.ls_tree, 'upstream:linux', + 'ecd7a325095a0d19b8c3d76f578d85b979461d41') + + def test_fail_ls_tree_in_non_existent_repo(self): + self.assertRaises(morphlib.repocache.RemoteLsTreeError, + self.cache.ls_tree, 'non-existent-repo', + 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9') + diff --git a/morphlib/sourceresolver.py b/morphlib/sourceresolver.py index c6f77cf9..f8dac8b1 100644 --- a/morphlib/sourceresolver.py +++ b/morphlib/sourceresolver.py @@ -138,17 +138,17 @@ class SourceResolver(object): be either a normal URL, or a keyed URL using a repo-alias like 'baserock:baserock/definitions'. - The 'remote repo cache' is a Baserock Trove system. It functions as a - normal Git server, but in addition it runs a service on port 8080 called - 'morph-cache-server' which can resolve refs, list their contents and read - specific files from the repos it holds. This allows the SourceResolver to - work out how to build something without cloning the whole repo. (If a local - build of that source ends up being necessary then it will get cloned into - the local cache later on). - - The second layer of caching is the local repository cache, which mirrors - entire repositories in $cachedir/gits. If a repo is not in the remote repo - cache then it must be present in the local repo cache. + Each commit used in a build is resolved to a tree SHA1, which means that + merge commits and changes to commit messages don't affect the cache + identity of a chunk. This does mean we need to query every repo in the + build graph, though. + + All requests for information on a repo use the 'repocache' module. This + maintains a local copy of all the Git repos we need to work with. A repo + cache can also use a remote 'morph-cache-server' instance, if available, + to query certain information about a repo without cloning it locally. + Using this we can resolve commits to trees without having to clone every + repo locally, which is a huge performance improvement in some cases. The third layer of caching is a simple commit SHA1 -> tree SHA mapping. It turns out that even if all repos are available locally, running @@ -168,14 +168,11 @@ class SourceResolver(object): ''' - def __init__(self, local_repo_cache, remote_repo_cache, - tree_cache_manager, update_repos, - status_cb=None): - self.lrc = local_repo_cache - self.rrc = remote_repo_cache + def __init__(self, repo_cache, tree_cache_manager, status_cb=None): + self.repo_cache = repo_cache self.tree_cache_manager = tree_cache_manager - self.update = update_repos + self.update = repo_cache.update_gits self.status = status_cb def _resolve_ref(self, resolved_trees, reponame, ref): @@ -184,9 +181,6 @@ class SourceResolver(object): If update is True then this has the side-effect of updating or cloning the repository into the local repo cache. - This function is complex due to the 3 layers of caching described in - the SourceResolver docstring. - ''' # The Baserock reference definitions use absolute refs so, and, if the @@ -198,29 +192,8 @@ class SourceResolver(object): logging.debug('tree (%s, %s) not in cache', reponame, ref) - absref = None - if self.lrc.has_repo(reponame): - repo = self.lrc.get_updated_repo(reponame, ref) - # If the user passed --no-git-update, and the ref is a SHA1 not - # available locally, this call will raise an exception. - absref = repo.resolve_ref_to_commit(ref) - tree = repo.resolve_ref_to_tree(absref) - elif self.rrc is not None: - try: - absref, tree = self.rrc.resolve_ref(reponame, ref) - if absref is not None: - self.status(msg='Resolved %(reponame)s %(ref)s via remote ' - 'repo cache', - reponame=reponame, - ref=ref, - chatty=True) - except BaseException as e: - logging.warning('Caught (and ignored) exception: %s' % str(e)) - - if absref is None: - repo = self.lrc.get_updated_repo(reponame, ref) - absref = repo.resolve_ref_to_commit(ref) - tree = repo.resolve_ref_to_tree(absref) + absref, tree = self.repo_cache.resolve_ref_to_commit_and_tree(reponame, + ref) logging.debug('Writing tree to cache with ref (%s, %s)', reponame, absref) @@ -430,7 +403,7 @@ class SourceResolver(object): if definitions_original_ref: definitions_ref = definitions_original_ref - definitions_cached_repo = self.lrc.get_updated_repo( + definitions_cached_repo = self.repo_cache.get_updated_repo( repo_name=definitions_repo, ref=definitions_absref) definitions_cached_repo.extract_commit( definitions_absref, definitions_checkout_dir) @@ -489,9 +462,8 @@ def _find_duplicate_chunks(sourcepool): #pragma: no cover return {k: v for (k, v) in chunk_sources_by_name.iteritems() if len(v) > 1} -def create_source_pool(lrc, rrc, repo, ref, filenames, cachedir, - original_ref=None, update_repos=True, - status_cb=None): +def create_source_pool(repo_cache, repo, ref, filenames, + original_ref=None, status_cb=None): '''Find all the sources involved in building a given system. Given a system morphology, this function will traverse the tree of stratum @@ -502,8 +474,12 @@ def create_source_pool(lrc, rrc, repo, ref, filenames, cachedir, Note that Git submodules are not considered 'sources' in the current implementation, and so they must be handled separately. - The 'lrc' and 'rrc' parameters specify the local and remote Git repository - caches used for resolving the sources. + The 'repo_cache' parameter specifies a repo cache which is used when + accessing the source repos. If a git_resolve_cache_server is set for this + repo cache, and all repos in the build are known to it, then this function + will only need the definitions.git repo available locally. If not, then all + repos must be cloned in order to resolve the refs to tree SHA1s, which is + a slow process! ''' pool = morphlib.sourcepool.SourcePool() @@ -529,10 +505,10 @@ def create_source_pool(lrc, rrc, repo, ref, filenames, cachedir, pool.add(source) tree_cache_manager = PickleCacheManager( - os.path.join(cachedir, tree_cache_filename), tree_cache_size) + os.path.join(repo_cache.cachedir, tree_cache_filename), + tree_cache_size) - resolver = SourceResolver(lrc, rrc, tree_cache_manager, update_repos, - status_cb) + resolver = SourceResolver(repo_cache, tree_cache_manager, status_cb) resolver.traverse_morphs(repo, ref, filenames, visit=add_to_pool, definitions_original_ref=original_ref) diff --git a/morphlib/util.py b/morphlib/util.py index 3b3e4d2b..ba170952 100644 --- a/morphlib/util.py +++ b/morphlib/util.py @@ -102,21 +102,16 @@ def make_concurrency(cores=None): return min(n, 20) -def create_cachedir(settings): # pragma: no cover - '''Return cache directory, creating it if necessary.''' - - cachedir = settings['cachedir'] +def ensure_directory_exists(path): # pragma: no cover # Don't check the folder exists and handle the exception that happens in # this case to avoid errors if the folder is created by something else # just after the check. try: - os.mkdir(cachedir) + os.makedirs(path) except OSError as e: if e.errno != errno.EEXIST: raise - return cachedir - def get_artifact_cache_server(settings): # pragma: no cover if settings['artifact-cache-server']: @@ -141,17 +136,8 @@ def new_artifact_caches(settings): # pragma: no cover ''' - cachedir = create_cachedir(settings) - artifact_cachedir = os.path.join(cachedir, 'artifacts') - # Don't check the folder exists and handle the exception that happens in - # this case to avoid errors if the folder is created by something else - # just after the check. - try: - os.mkdir(artifact_cachedir) - except OSError as e: - if e.errno != errno.EEXIST: - raise - + artifact_cachedir = os.path.join(settings['cachedir'], 'artifacts') + ensure_directory_exists(artifact_cachedir) lac = morphlib.localartifactcache.LocalArtifactCache( fs.osfs.OSFS(artifact_cachedir)) @@ -222,24 +208,26 @@ def combine_aliases(app): # pragma: no cover return alias_map.values() -def new_repo_caches(app): # pragma: no cover - '''Create new objects for local, remote git repository caches.''' - aliases = app.settings['repo-alias'] - cachedir = create_cachedir(app.settings) - gits_dir = os.path.join(cachedir, 'gits') - tarball_base_url = app.settings['tarball-server'] - repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases) - lrc = morphlib.localrepocache.LocalRepoCache( - app, gits_dir, repo_resolver, tarball_base_url=tarball_base_url) +def new_repo_cache(app): # pragma: no cover + '''Create a RepoCache instance using settings from app.settings.''' - url = get_git_resolve_cache_server(app.settings) - if url: - rrc = morphlib.remoterepocache.RemoteRepoCache(url, repo_resolver) - else: - rrc = None + gits_dir = os.path.join(app.settings['cachedir'], 'gits') + tarball_base_url = app.settings['tarball-server'] + git_resolve_cache_url = get_git_resolve_cache_server(app.settings) + aliases = app.settings['repo-alias'] + repo_alias_resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases) + + return morphlib.repocache.RepoCache( + gits_dir, repo_alias_resolver, + tarball_base_url=tarball_base_url, + git_resolve_cache_url=git_resolve_cache_url, + update_gits=(not app.settings['no-git-update']), + runcmd_cb=app.runcmd, + status_cb=app.status, + verbose=app.settings['verbose'], + debug=app.settings['debug']) - return lrc, rrc def env_variable_is_password(key): # pragma: no cover return 'PASSWORD' in key |