summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Thursfield <sam.thursfield@codethink.co.uk>2016-03-02 17:11:34 +0000
committerSam Thursfield <sam.thursfield@codethink.co.uk>2016-03-16 19:11:21 +0000
commitd58d8e8f7a4ec03ff14021a4515c8283dad52573 (patch)
tree5ece2d0524e4423bb953e6140831c9fde93b7219
parent014a029ade9a045a839ca86c35690b218098ea33 (diff)
downloadmorph-d58d8e8f7a4ec03ff14021a4515c8283dad52573.tar.gz
Unify local and remote repo cache modules
There's not really any reason you'd want to use the RemoteRepoCache class except as a workaround for the slow speed of some LocalRepoCache operations, so I can't see this ruining anyone's day. The main reason for doing this is so we can simply the sourceresolver code. One reason that the sourceresolver class is so hopelessly complicated is that it right now has to use two incompatible interfaces for Git repo caches. I've taken the opportunity to detangle the RepoCache class from the App class. Now all of the configuration for the RepoCache class is passed into the constructor explicitly. This makes the class usable from outside Morph: resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases=[]) repo_cache = morphlib.repocache.RepoCache('/src/cache/gits', resolver) Change-Id: I596c81d7645b67504c88e555172a8c238f4f8a66
-rw-r--r--morphlib/__init__.py3
-rw-r--r--morphlib/buildcommand.py18
-rw-r--r--morphlib/builder.py4
-rw-r--r--morphlib/builder_tests.py2
-rw-r--r--morphlib/definitions_repo.py39
-rw-r--r--morphlib/git.py9
-rw-r--r--morphlib/localrepocache.py357
-rw-r--r--morphlib/localrepocache_tests.py149
-rw-r--r--morphlib/plugins/anchor_plugin.py5
-rw-r--r--morphlib/plugins/artifact_inspection_plugin.py37
-rw-r--r--morphlib/plugins/certify_plugin.py8
-rw-r--r--morphlib/plugins/cross-bootstrap_plugin.py2
-rw-r--r--morphlib/plugins/diff_plugin.py10
-rw-r--r--morphlib/plugins/get_repo_plugin.py6
-rw-r--r--morphlib/plugins/list_artifacts_plugin.py8
-rw-r--r--morphlib/plugins/show_dependencies_plugin.py20
-rw-r--r--morphlib/plugins/system_manifests_plugin.py34
-rw-r--r--morphlib/remoterepocache.py105
-rw-r--r--morphlib/remoterepocache_tests.py137
-rw-r--r--morphlib/repocache.py565
-rw-r--r--morphlib/repocache_tests.py281
-rw-r--r--morphlib/sourceresolver.py80
-rw-r--r--morphlib/util.py54
23 files changed, 976 insertions, 957 deletions
diff --git a/morphlib/__init__.py b/morphlib/__init__.py
index 7724c41c..066ab9f6 100644
--- a/morphlib/__init__.py
+++ b/morphlib/__init__.py
@@ -69,15 +69,14 @@ import git
import gitdir
import gitindex
import localartifactcache
-import localrepocache
import mountableimage
import morphologyfinder
import morphology
import morphloader
import morphset
import remoteartifactcache
-import remoterepocache
import repoaliasresolver
+import repocache
import savefile
import source
import sourcepool
diff --git a/morphlib/buildcommand.py b/morphlib/buildcommand.py
index a2ad3301..e185a808 100644
--- a/morphlib/buildcommand.py
+++ b/morphlib/buildcommand.py
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
-# Copyright (C) 2011-2015 Codethink Limited
+# Copyright (C) 2011-2016 Codethink Limited
# Copyright © 2015 Richard Ipsum
#
# This program is free software; you can redistribute it and/or modify
@@ -47,7 +47,7 @@ class BuildCommand(object):
def __init__(self, app, build_env = None):
self.app = app
self.lac, self.rac = self.new_artifact_caches()
- self.lrc, self.rrc = self.new_repo_caches()
+ self.repo_cache = morphlib.util.new_repo_cache(self.app)
def build(self, repo_name, ref, filename, original_ref=None):
'''Build a given system morphology.'''
@@ -76,9 +76,6 @@ class BuildCommand(object):
'''
return morphlib.util.new_artifact_caches(self.app.settings)
- def new_repo_caches(self):
- return morphlib.util.new_repo_caches(self.app)
-
def new_build_env(self, arch):
'''Create a new BuildEnvironment instance.'''
return morphlib.buildenvironment.BuildEnvironment(self.app.settings,
@@ -95,10 +92,8 @@ class BuildCommand(object):
'''
self.app.status(msg='Creating source pool', chatty=True)
srcpool = morphlib.sourceresolver.create_source_pool(
- self.lrc, self.rrc, repo_name, ref, filenames,
- cachedir=self.app.settings['cachedir'],
+ self.repo_cache, repo_name, ref, filenames,
original_ref=original_ref,
- update_repos=not self.app.settings['no-git-update'],
status_cb=self.app.status)
return srcpool
@@ -394,8 +389,9 @@ class BuildCommand(object):
'''Update the local git repository cache with the sources.'''
repo_name = source.repo_name
- source.repo = self.lrc.get_updated_repo(repo_name, ref=source.sha1)
- self.lrc.ensure_submodules(source.repo, source.sha1)
+ source.repo = self.repo_cache.get_updated_repo(repo_name,
+ ref=source.sha1)
+ self.repo_cache.ensure_submodules(source.repo, source.sha1)
def cache_artifacts_locally(self, artifacts):
'''Get artifacts missing from local cache from remote cache.'''
@@ -540,7 +536,7 @@ class BuildCommand(object):
'%(sha1)s',
name=source.name, sha1=source.sha1[:7])
builder = morphlib.builder.Builder(
- self.app, staging_area, self.lac, self.rac, self.lrc,
+ self.app, staging_area, self.lac, self.rac, self.repo_cache,
self.app.settings['max-jobs'], setup_mounts)
return builder.build_and_cache(source)
diff --git a/morphlib/builder.py b/morphlib/builder.py
index 2d0a4bd4..c980a276 100644
--- a/morphlib/builder.py
+++ b/morphlib/builder.py
@@ -45,7 +45,7 @@ def extract_sources(app, repo_cache, repo, sha1, srcdir): #pragma: no cover
morphlib.gitdir.checkout_from_cached_repo(repo, sha1, destdir)
morphlib.git.reset_workdir(app.runcmd, destdir)
- submodules = morphlib.git.Submodules(app, repo.dirname, sha1)
+ submodules = morphlib.git.Submodules(repo.dirname, sha1, app.runcmd)
try:
submodules.load()
except morphlib.git.NoModulesFileError:
@@ -187,7 +187,7 @@ class BuilderBase(object):
'''
assert isinstance(self.source.repo,
- morphlib.localrepocache.CachedRepo)
+ morphlib.repocache.CachedRepo)
meta = {
'artifact-name': artifact_name,
'source-name': self.source.name,
diff --git a/morphlib/builder_tests.py b/morphlib/builder_tests.py
index da1f432e..54bc4a8f 100644
--- a/morphlib/builder_tests.py
+++ b/morphlib/builder_tests.py
@@ -51,7 +51,7 @@ class FakeSource(object):
self.name = 'a'
with morphlib.gitdir_tests.allow_nonexistant_git_repos():
- self.repo = morphlib.localrepocache.CachedRepo(
+ self.repo = morphlib.repocache.CachedRepo(
'path', 'repo', 'url')
self.repo_name = 'url'
self.original_ref = 'e'
diff --git a/morphlib/definitions_repo.py b/morphlib/definitions_repo.py
index 4c13abee..8b022867 100644
--- a/morphlib/definitions_repo.py
+++ b/morphlib/definitions_repo.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2015 Codethink Limited
+# Copyright (C) 2015-2016 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -127,9 +127,9 @@ class DefinitionsRepo(gitdir.GitDirectory):
return bbcm()
@contextlib.contextmanager
- def source_pool(self, lrc, rrc, cachedir, ref, system_filename,
+ def source_pool(self, repo_cache, ref, system_filename,
include_local_changes=False, push_local_changes=False,
- update_repos=True, status_cb=None, build_ref_prefix=None,
+ status_cb=None, build_ref_prefix=None,
git_user_name=None, git_user_email=None):
'''Load the system defined in 'morph' and all the sources it contains.
@@ -162,15 +162,10 @@ class DefinitionsRepo(gitdir.GitDirectory):
setting, but that was probably only useful for `morph distbuild` and
that now uses branch_with_local_changes().
- The 'lrc' and 'rrc' parameters are local and remote Git repo caches.
- Use morphlib.util.new_repo_caches() to obtain these. The 'cachedir'
- parameter points to where Git repos are cached by Morph,
- app.settings['cachedir'] tells you that.
-
- The 'update_repos' flag allows you to disable updating Git repos, to
- honour app.settings['no-git-update']. If one of the refs in the build
- graph is not available locally and update_repos is False, you will see
- a morphlib.gitdir.InvalidRefError exception.
+ The 'repo_cache' parameter is a morphlib.repocache.RepoCache instance.
+ If update_gits=False is set for this repo cache, and one of the refs in
+ the build graph is not available locally, you will see a
+ morphlib.gitdir.InvalidRefError exception.
The 'status_cb' function will be called if set to output progress and
status messages to the user.
@@ -200,9 +195,8 @@ class DefinitionsRepo(gitdir.GitDirectory):
status_cb(msg='Deciding on task order')
yield morphlib.sourceresolver.create_source_pool(
- lrc, rrc, repo_url, commit, [system_filename],
- cachedir=cachedir, original_ref=original_ref,
- update_repos=update_repos, status_cb=status_cb)
+ repo_cache, repo_url, commit, [system_filename],
+ original_ref=original_ref, status_cb=status_cb)
else:
repo_url = self.remote_url
commit = self.resolve_ref_to_commit(ref)
@@ -212,9 +206,8 @@ class DefinitionsRepo(gitdir.GitDirectory):
try:
yield morphlib.sourceresolver.create_source_pool(
- lrc, rrc, repo_url, commit, [system_filename],
- cachedir=cachedir, original_ref=ref,
- update_repos=update_repos, status_cb=status_cb)
+ repo_cache, repo_url, commit, [system_filename],
+ original_ref=ref, status_cb=status_cb)
except morphlib.sourceresolver.InvalidDefinitionsRefError as e:
raise cliapp.AppException(
'Commit %s wasn\'t found in the "origin" remote %s. '
@@ -332,7 +325,7 @@ class DefinitionsRepoWithApp(DefinitionsRepo):
self._git_user_name = morphlib.git.get_user_name(app.runcmd)
self._git_user_email = morphlib.git.get_user_email(app.runcmd)
- self._lrc, self._rrc = morphlib.util.new_repo_caches(app)
+ self.repo_cache = morphlib.util.new_repo_cache(self.app)
def branch_with_local_changes(self, uuid, push=False):
'''Equivalent to DefinitionsRepo.branch_with_local_changes().'''
@@ -343,22 +336,20 @@ class DefinitionsRepoWithApp(DefinitionsRepo):
build_ref_prefix=self.app.settings['build-ref-prefix'],
git_user_name=self._git_user_name,
git_user_email=self._git_user_email,
- status_cb=self.app.status,)
+ status_cb=self.app.status)
def source_pool(self, ref, system_filename):
'''Equivalent to DefinitionsRepo.source_pool().'''
local_changes = self.app.settings['local-changes']
return DefinitionsRepo.source_pool(
- self, self._lrc, self._rrc, self.app.settings['cachedir'],
- ref, system_filename,
+ self, self.repo_cache, ref, system_filename,
include_local_changes=(local_changes == 'include'),
push_local_changes=self.app.settings['push-build-branches'],
build_ref_prefix=self.app.settings['build-ref-prefix'],
git_user_name=self._git_user_name,
git_user_email=self._git_user_email,
- status_cb=self.app.status,
- update_repos=(not self.app.settings['no-git-update']))
+ status_cb=self.app.status)
def _local_definitions_repo(path, search_for_root, app=None):
'''Open a local Git repo containing Baserock definitions, at 'path'.
diff --git a/morphlib/git.py b/morphlib/git.py
index 190544ac..cab551ef 100644
--- a/morphlib/git.py
+++ b/morphlib/git.py
@@ -58,12 +58,13 @@ class MissingSubmoduleCommitError(cliapp.AppException):
class Submodules(object):
- def __init__(self, app, repo, ref):
- self.app = app
+ def __init__(self, repo, ref, runcmd_cb=cliapp.runcmd):
self.repo = repo
self.ref = ref
self.submodules = []
+ self.runcmd_cb = runcmd_cb
+
def load(self):
content = self._read_gitmodules_file()
@@ -76,7 +77,7 @@ class Submodules(object):
def _read_gitmodules_file(self):
try:
# try to read the .gitmodules file from the repo/ref
- content = gitcmd(self.app.runcmd, 'cat-file', 'blob',
+ content = gitcmd(self.runcmd_cb, 'cat-file', 'blob',
'%s:.gitmodules' % self.ref, cwd=self.repo,
ignore_fail=True)
@@ -100,7 +101,7 @@ class Submodules(object):
try:
# list objects in the parent repo tree to find the commit
# object that corresponds to the submodule
- commit = gitcmd(self.app.runcmd, 'ls-tree', self.ref,
+ commit = gitcmd(self.runcmd_cb, 'ls-tree', self.ref,
submodule.path, cwd=self.repo)
# read the commit hash from the output
diff --git a/morphlib/localrepocache.py b/morphlib/localrepocache.py
deleted file mode 100644
index 3a03fe1d..00000000
--- a/morphlib/localrepocache.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (C) 2012-2016 Codethink Limited
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-import os
-import urlparse
-import string
-import sys
-import tempfile
-
-import cliapp
-import fs.osfs
-
-import morphlib
-from morphlib.util import word_join_list as _word_join_list
-
-
-# urlparse.urljoin needs to know details of the URL scheme being used.
-# It does not know about git:// by default, so we teach it here.
-gitscheme = ['git']
-urlparse.uses_relative.extend(gitscheme)
-urlparse.uses_netloc.extend(gitscheme)
-urlparse.uses_params.extend(gitscheme)
-urlparse.uses_query.extend(gitscheme)
-urlparse.uses_fragment.extend(gitscheme)
-
-
-def quote_url(url):
- ''' Convert URIs to strings that only contain digits, letters, % and _.
-
- NOTE: When changing the code of this function, make sure to also apply
- the same to the quote_url() function of lorry. Otherwise the git tarballs
- generated by lorry may no longer be found by morph.
-
- '''
- valid_chars = string.digits + string.letters + '%_'
- transl = lambda x: x if x in valid_chars else '_'
- return ''.join([transl(x) for x in url])
-
-
-class NoRemote(morphlib.Error):
-
- def __init__(self, reponame, errors):
- self.reponame = reponame
- self.errors = errors
-
- def __str__(self):
- return '\n\t'.join(['Cannot find remote git repository: %s' %
- self.reponame] + self.errors)
-
-
-class NotCached(morphlib.Error):
- def __init__(self, reponame):
- self.reponame = reponame
-
- def __str__(self): # pragma: no cover
- return 'Repository %s is not cached yet' % self.reponame
-
-
-class UpdateError(cliapp.AppException): # pragma: no cover
-
- def __init__(self, repo):
- cliapp.AppException.__init__(
- self, 'Failed to update cached version of repo %s' % repo)
-
-
-class CachedRepo(morphlib.gitdir.GitDirectory):
- '''A locally cached Git repository with an origin remote set up.
-
- On instance of this class represents a locally cached version of a
- remote Git repository. This remote repository is set up as the
- 'origin' remote.
-
- Cached repositories are bare mirrors of the upstream. Locally created
- branches will be lost the next time the repository updates.
-
- '''
- def __init__(self, path, original_name, url):
- self.original_name = original_name
- self.url = url
- self.is_mirror = not url.startswith('file://')
- self.already_updated = False
-
- super(CachedRepo, self).__init__(path)
-
- def __str__(self): # pragma: no cover
- return self.url
-
-
-class LocalRepoCache(object):
-
- '''Manage locally cached git repositories.
-
- When we build stuff, we need a local copy of the git repository.
- To avoid having to clone the repositories for every build, we
- maintain a local cache of the repositories: we first clone the
- remote repository to the cache, and then make a local clone from
- the cache to the build environment. This class manages the local
- cached repositories.
-
- Repositories may be specified either using a full URL, in a form
- understood by git(1), or as a repository name to which a base url
- is prepended. The base urls are given to the class when it is
- created.
-
- Instead of cloning via a normal 'git clone' directly from the
- git server, we first try to download a tarball from a url, and
- if that works, we unpack the tarball.
-
- '''
-
- def __init__(self, app, cachedir, resolver, tarball_base_url=None):
- self._app = app
- self.fs = fs.osfs.OSFS('/')
- self._cachedir = cachedir
- self._resolver = resolver
- if tarball_base_url and not tarball_base_url.endswith('/'):
- tarball_base_url += '/' # pragma: no cover
- self._tarball_base_url = tarball_base_url
- self._cached_repo_objects = {}
-
- def _git(self, args, **kwargs): # pragma: no cover
- '''Execute git command.
-
- This is a method of its own so that unit tests can easily override
- all use of the external git command.
-
- '''
-
- morphlib.git.gitcmd(self._app.runcmd, *args, **kwargs)
-
- def _fetch(self, url, path): # pragma: no cover
- '''Fetch contents of url into a file.
-
- This method is meant to be overridden by unit tests.
-
- '''
- self._app.status(msg="Trying to fetch %(tarball)s to seed the cache",
- tarball=url, chatty=True)
-
- if self._app.settings['verbose']:
- verbosity_flags = []
- kwargs = dict(stderr=sys.stderr)
- else:
- verbosity_flags = ['--quiet']
- kwargs = dict()
-
- def wget_command():
- return ['wget'] + verbosity_flags + ['-O-', url]
-
- self._app.runcmd(wget_command(),
- ['tar', '--no-same-owner', '-xf', '-'],
- cwd=path, **kwargs)
-
- def _mkdtemp(self, dirname): # pragma: no cover
- '''Creates a temporary directory.
-
- This method is meant to be overridden by unit tests.
-
- '''
- return tempfile.mkdtemp(dir=dirname)
-
- def _escape(self, url):
- '''Escape a URL so it can be used as a basename in a file.'''
-
- # FIXME: The following is a nicer way than to do this.
- # However, for compatibility, we need to use the same as the
- # tarball server (set up by Lorry) uses.
- # return urllib.quote(url, safe='')
-
- return quote_url(url)
-
- def _cache_name(self, url):
- scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
- if scheme != 'file':
- path = os.path.join(self._cachedir, self._escape(url))
- return path
-
- def has_repo(self, reponame):
- '''Have we already got a cache of a given repo?'''
- url = self._resolver.pull_url(reponame)
- path = self._cache_name(url)
- return self.fs.exists(path)
-
- def _clone_with_tarball(self, repourl, path):
- tarball_url = urlparse.urljoin(self._tarball_base_url,
- self._escape(repourl)) + '.tar'
- try:
- self.fs.makedir(path)
- self._fetch(tarball_url, path)
- self._git(['config', 'remote.origin.url', repourl], cwd=path)
- self._git(['config', 'remote.origin.mirror', 'true'], cwd=path)
- self._git(['config', 'remote.origin.fetch', '+refs/*:refs/*'],
- cwd=path)
- except BaseException as e: # pragma: no cover
- if self.fs.exists(path):
- self.fs.removedir(path, force=True)
- return False, 'Unable to extract tarball %s: %s' % (
- tarball_url, e)
-
- return True, None
-
- def _cache_repo(self, reponame):
- '''Clone the given repo into the cache.
-
- If the repo is already cloned, do nothing.
-
- '''
- errors = []
- if not self.fs.exists(self._cachedir):
- self.fs.makedir(self._cachedir, recursive=True)
-
- try:
- return self._get_repo(reponame)
- except NotCached as e:
- pass
-
- repourl = self._resolver.pull_url(reponame)
- path = self._cache_name(repourl)
- if self._tarball_base_url:
- ok, error = self._clone_with_tarball(repourl, path)
- if ok:
- repo = self._get_repo(reponame)
- self._update_repo(repo)
- return repo
- else:
- errors.append(error)
- self._app.status(
- msg='Using git clone.')
-
- target = self._mkdtemp(self._cachedir)
-
- try:
- self._git(['clone', '--mirror', '-n', repourl, target],
- echo_stderr=self._app.settings['debug'])
- except cliapp.AppException as e:
- errors.append('Unable to clone from %s to %s: %s' %
- (repourl, target, e))
- if self.fs.exists(target):
- self.fs.removedir(target, recursive=True, force=True)
- raise NoRemote(reponame, errors)
-
- self.fs.rename(target, path)
-
- repo = self._get_repo(reponame)
- repo.already_updated = True
- return repo
-
- def _get_repo(self, reponame):
- '''Return an object representing a cached repository.'''
-
- if reponame in self._cached_repo_objects:
- return self._cached_repo_objects[reponame]
- else:
- repourl = self._resolver.pull_url(reponame)
- path = self._cache_name(repourl)
- if self.fs.exists(path):
- repo = CachedRepo(path, reponame, repourl)
- self._cached_repo_objects[reponame] = repo
- return repo
- raise NotCached(reponame)
-
- def _update_repo(self, cachedrepo): # pragma: no cover
- try:
- cachedrepo.update_remotes(
- echo_stderr=self._app.settings['verbose'])
- cachedrepo.already_updated = True
- except cliapp.AppException:
- raise UpdateError(self)
-
- def get_updated_repo(self, repo_name,
- ref=None, refs=None): # pragma: no cover
- '''Return object representing cached repository.
-
- If all the specified refs in 'ref' or 'refs' point to SHA1s that are
- already in the repository, or --no-git-update is set, then the
- repository won't be updated.
-
- '''
-
- if self._app.settings['no-git-update']:
- self._app.status(msg='Not updating existing git repository '
- '%(repo_name)s '
- 'because of no-git-update being set',
- chatty=True,
- repo_name=repo_name)
- return self._get_repo(repo_name)
-
- if ref is not None and refs is None:
- refs = (ref,)
-
- if self.has_repo(repo_name):
- repo = self._get_repo(repo_name)
- if refs:
- required_refs = set(refs)
- missing_refs = set()
- for required_ref in required_refs:
- if morphlib.git.is_valid_sha1(required_ref):
- try:
- repo.resolve_ref_to_commit(required_ref)
- continue
- except morphlib.gitdir.InvalidRefError:
- pass
- missing_refs.add(required_ref)
-
- if not missing_refs:
- self._app.status(
- msg='Not updating git repository %(repo_name)s '
- 'because it already contains %(sha1s)s',
- chatty=True, repo_name=repo_name,
- sha1s=_word_join_list(tuple(required_refs)))
- return repo
-
- self._app.status(msg='Updating %(repo_name)s',
- repo_name=repo_name)
- self._update_repo(repo)
- return repo
- else:
- self._app.status(msg='Cloning %(repo_name)s',
- repo_name=repo_name)
- return self._cache_repo(repo_name)
-
- def ensure_submodules(self, toplevel_repo,
- toplevel_ref): # pragma: no cover
- '''Ensure any submodules of a given repo are cached and up to date.'''
-
- def submodules_for_repo(repo_path, ref):
- try:
- submodules = morphlib.git.Submodules(self._app, repo_path, ref)
- submodules.load()
- return [(submod.url, submod.commit) for submod in submodules]
- except morphlib.git.NoModulesFileError:
- return []
-
- done = set()
- subs_to_process = submodules_for_repo(toplevel_repo.dirname,
- toplevel_ref)
- while subs_to_process:
- url, ref = subs_to_process.pop()
- done.add((url, ref))
-
- cached_repo = self.get_updated_repo(url, ref=ref)
-
- for submod in submodules_for_repo(cached_repo.dirname, ref):
- if submod not in done:
- subs_to_process.append(submod)
diff --git a/morphlib/localrepocache_tests.py b/morphlib/localrepocache_tests.py
deleted file mode 100644
index 91fdb216..00000000
--- a/morphlib/localrepocache_tests.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Copyright (C) 2012-2016 Codethink Limited
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-import unittest
-import urllib2
-import os
-
-import cliapp
-import fs.memoryfs
-
-import morphlib
-import morphlib.gitdir_tests
-
-
-class FakeApplication(object):
-
- def __init__(self):
- self.settings = {
- 'debug': True,
- 'verbose': True,
- 'no-git-update': False,
- }
-
- def status(self, **kwargs):
- pass
-
-
-class LocalRepoCacheTests(unittest.TestCase):
-
- def setUp(self):
- aliases = ['upstream=git://example.com/#example.com:%s.git']
- repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases)
- tarball_base_url = 'http://lorry.example.com/tarballs/'
- self.reponame = 'upstream:reponame'
- self.repourl = 'git://example.com/reponame'
- escaped_url = 'git___example_com_reponame'
- self.tarball_url = '%s%s.tar' % (tarball_base_url, escaped_url)
- self.cachedir = '/cache/dir'
- self.cache_path = '%s/%s' % (self.cachedir, escaped_url)
- self.remotes = {}
- self.fetched = []
- self.lrc = morphlib.localrepocache.LocalRepoCache(
- FakeApplication(), self.cachedir, repo_resolver, tarball_base_url)
- self.lrc.fs = fs.memoryfs.MemoryFS()
- self.lrc._git = self.fake_git
- self.lrc._fetch = self.not_found
- self.lrc._mkdtemp = self.fake_mkdtemp
- self.lrc._update_repo = lambda *args: None
- self._mkdtemp_count = 0
-
- def fake_git(self, args, **kwargs):
- if args[0] == 'clone':
- self.assertEqual(len(args), 5)
- remote = args[3]
- local = args[4]
- self.remotes['origin'] = {'url': remote, 'updates': 0}
- self.lrc.fs.makedir(local, recursive=True)
- elif args[0:2] == ['remote', 'set-url']:
- remote = args[2]
- url = args[3]
- self.remotes[remote] = {'url': url}
- elif args[0:2] == ['config', 'remote.origin.url']:
- remote = 'origin'
- url = args[2]
- self.remotes[remote] = {'url': url}
- elif args[0:2] == ['config', 'remote.origin.mirror']:
- remote = 'origin'
- elif args[0:2] == ['config', 'remote.origin.fetch']:
- remote = 'origin'
- else:
- raise NotImplementedError()
-
- def fake_mkdtemp(self, dirname):
- thing = "foo"+str(self._mkdtemp_count)
- self._mkdtemp_count += 1
- self.lrc.fs.makedir(dirname+"/"+thing)
- return thing
-
- def not_found(self, url, path):
- raise cliapp.AppException('Not found')
-
- def test_has_not_got_shortened_repo_initially(self):
- self.assertFalse(self.lrc.has_repo(self.reponame))
-
- def test_has_not_got_absolute_repo_initially(self):
- self.assertFalse(self.lrc.has_repo(self.repourl))
-
- def test_cachedir_does_not_exist_initially(self):
- self.assertFalse(self.lrc.fs.exists(self.cachedir))
-
- def test_creates_cachedir_if_missing(self):
- with morphlib.gitdir_tests.allow_nonexistant_git_repos():
- self.lrc.get_updated_repo(self.repourl, ref='master')
- self.assertTrue(self.lrc.fs.exists(self.cachedir))
-
- def test_happily_caches_same_repo_twice(self):
- with morphlib.gitdir_tests.allow_nonexistant_git_repos():
- self.lrc.get_updated_repo(self.repourl, ref='master')
- self.lrc.get_updated_repo(self.repourl, ref='master')
-
- def test_fails_to_cache_when_remote_does_not_exist(self):
- def fail(args, **kwargs):
- self.lrc.fs.makedir(args[4])
- raise cliapp.AppException('')
- self.lrc._git = fail
- self.assertRaises(morphlib.localrepocache.NoRemote,
- self.lrc.get_updated_repo, self.repourl, 'master')
-
- def test_does_not_mind_a_missing_tarball(self):
- with morphlib.gitdir_tests.allow_nonexistant_git_repos():
- self.lrc.get_updated_repo(self.repourl, ref='master')
- self.assertEqual(self.fetched, [])
-
- def test_fetches_tarball_when_it_exists(self):
- self.lrc._fetch = lambda url, path: self.fetched.append(url)
-
- with morphlib.gitdir_tests.allow_nonexistant_git_repos():
- self.lrc.get_updated_repo(self.repourl, ref='master')
-
- self.assertEqual(self.fetched, [self.tarball_url])
- self.assertFalse(self.lrc.fs.exists(self.cache_path + '.tar'))
- self.assertEqual(self.remotes['origin']['url'], self.repourl)
-
- def test_escapes_repourl_as_filename(self):
- escaped = self.lrc._escape(self.repourl)
- self.assertFalse('/' in escaped)
-
- def test_noremote_error_message_contains_repo_name(self):
- e = morphlib.localrepocache.NoRemote(self.repourl, [])
- self.assertTrue(self.repourl in str(e))
-
- def test_avoids_caching_local_repo(self):
- self.lrc.fs.makedir('/local/repo', recursive=True)
- with morphlib.gitdir_tests.allow_nonexistant_git_repos():
- cached = self.lrc.get_updated_repo('file:///local/repo',
- refs='master')
- assert cached.dirname == '/local/repo'
diff --git a/morphlib/plugins/anchor_plugin.py b/morphlib/plugins/anchor_plugin.py
index 7465c479..a9d07b39 100644
--- a/morphlib/plugins/anchor_plugin.py
+++ b/morphlib/plugins/anchor_plugin.py
@@ -137,9 +137,8 @@ class AnchorPlugin(cliapp.Plugin):
for reponame, sources in sources_by_reponame.iteritems():
# UGLY HACK we need to push *FROM* our local repo cache to
# avoid cloning everything multiple times.
- repo = bc.lrc.get_updated_repo(reponame,
- refs=(s.original_ref
- for s in sources))
+ repo = bc.repo_cache.get_updated_repo(
+ reponame, refs=(s.original_ref for s in sources))
remote = Remote(repo)
push_url = resolver.push_url(reponame)
diff --git a/morphlib/plugins/artifact_inspection_plugin.py b/morphlib/plugins/artifact_inspection_plugin.py
index 413a0072..d396f93b 100644
--- a/morphlib/plugins/artifact_inspection_plugin.py
+++ b/morphlib/plugins/artifact_inspection_plugin.py
@@ -36,28 +36,23 @@ class NotASystemArtifactError(cliapp.AppException):
class ProjectVersionGuesser(object):
- def __init__(self, app, lrc, rrc, interesting_files):
+ def __init__(self, app, repo_cache, interesting_files):
self.app = app
- self.lrc = lrc
- self.rrc = rrc
+ self.repo_cache = repo_cache
self.interesting_files = interesting_files
def file_contents(self, repo, ref, tree):
filenames = [x for x in self.interesting_files if x in tree]
- if filenames:
- if self.lrc.has_repo(repo):
- repository = self.lrc.get_updated_repo(repo, ref)
- for filename in filenames:
- yield filename, repository.read_file(filename, ref)
- elif self.rrc:
- for filename in filenames:
- yield filename, self.rrc.cat_file(repo, ref, filename)
+ for filename in filenames:
+ # This can use a remote repo cache if available, to avoid having
+ # to clone every repo locally.
+ yield filename, self.repo_cache.cat_file(repo, ref, filename)
class AutotoolsVersionGuesser(ProjectVersionGuesser):
- def __init__(self, app, lrc, rrc):
- ProjectVersionGuesser.__init__(self, app, lrc, rrc, [
+ def __init__(self, app, repo_cache):
+ ProjectVersionGuesser.__init__(self, app, repo_cache, [
'configure.ac',
'configure.in',
'configure.ac.in',
@@ -136,9 +131,9 @@ class VersionGuesser(object):
def __init__(self, app):
self.app = app
- self.lrc, self.rrc = morphlib.util.new_repo_caches(app)
+ self.repo_cache = morphlib.util.new_repo_cache(app)
self.guessers = [
- AutotoolsVersionGuesser(app, self.lrc, self.rrc)
+ AutotoolsVersionGuesser(app, self.repo_cache)
]
def guess_version(self, repo, ref):
@@ -146,14 +141,10 @@ class VersionGuesser(object):
repo=repo, ref=ref, chatty=True)
version = None
try:
- if self.lrc.has_repo(repo):
- repository = self.lrc.get_updated_repo(repo, ref)
- tree = repository.list_files(ref=ref, recurse=False)
- elif self.rrc:
- repository = None
- tree = self.rrc.ls_tree(repo, ref)
- else:
- return None
+ # This can use a remote repo cache if available, to avoid having
+ # to clone every repo locally.
+ tree = self.repo_cache.ls_tree(repo, ref)
+
for guesser in self.guessers:
version = guesser.guess_version(repo, ref, tree)
if version:
diff --git a/morphlib/plugins/certify_plugin.py b/morphlib/plugins/certify_plugin.py
index 735d0332..72d24a51 100644
--- a/morphlib/plugins/certify_plugin.py
+++ b/morphlib/plugins/certify_plugin.py
@@ -57,7 +57,7 @@ class CertifyPlugin(cliapp.Plugin):
system_filenames = map(morphlib.util.sanitise_morphology_path,
args[2:])
- self.lrc, self.rrc = morphlib.util.new_repo_caches(self.app)
+ self.repo_cache = morphlib.util.new_repo_cache(self.app)
self.resolver = morphlib.artifactresolver.ArtifactResolver()
for system_filename in system_filenames:
@@ -69,9 +69,7 @@ class CertifyPlugin(cliapp.Plugin):
self.app.status(
msg='Creating source pool for %s' % system_filename, chatty=True)
source_pool = morphlib.sourceresolver.create_source_pool(
- self.lrc, self.rrc, repo, ref, [system_filename],
- cachedir=self.app.settings['cachedir'],
- update_repos = not self.app.settings['no-git-update'],
+ self.repo_cache, repo, ref, [system_filename],
status_cb=self.app.status)
self.app.status(
@@ -115,7 +113,7 @@ class CertifyPlugin(cliapp.Plugin):
.format(name, ref))
certified = False
- cached = self.lrc.get_updated_repo(source.repo_name, ref)
+ cached = self.repo_cache.get_updated_repo(source.repo_name, ref)
# Test that sha1 ref is anchored in a tag or branch,
# and thus not a candidate for removal on `git gc`.
diff --git a/morphlib/plugins/cross-bootstrap_plugin.py b/morphlib/plugins/cross-bootstrap_plugin.py
index 273e677d..8b8fbb2d 100644
--- a/morphlib/plugins/cross-bootstrap_plugin.py
+++ b/morphlib/plugins/cross-bootstrap_plugin.py
@@ -304,7 +304,7 @@ class CrossBootstrapPlugin(cliapp.Plugin):
system_artifact.source, build_env, use_chroot=False)
builder = BootstrapSystemBuilder(
self.app, staging_area, build_command.lac, build_command.rac,
- system_artifact.source, build_command.lrc, 1, False)
+ system_artifact.source, build_command.repo_cache, 1, False)
builder.build_and_cache()
self.app.status(
diff --git a/morphlib/plugins/diff_plugin.py b/morphlib/plugins/diff_plugin.py
index 26964df8..24a6d69a 100644
--- a/morphlib/plugins/diff_plugin.py
+++ b/morphlib/plugins/diff_plugin.py
@@ -22,7 +22,6 @@ from morphlib.cmdline_parse_utils import (definition_lists_synopsis,
from morphlib.morphologyfinder import MorphologyFinder
from morphlib.morphloader import MorphologyLoader
from morphlib.morphset import MorphologySet
-from morphlib.util import new_repo_caches
class DiffPlugin(cliapp.Plugin):
@@ -60,9 +59,10 @@ class DiffPlugin(cliapp.Plugin):
name, from_source.repo_name, to_source.repo_name))
if from_source.original_ref != to_source.original_ref:
- from_repo, to_repo = (self.bc.lrc.get_updated_repo(s.repo_name,
- ref=s.sha1)
- for s in (from_source, to_source))
+ repo_cache = self.bc.repo_cache
+ from_repo, to_repo = (repo_cache.get_updated_repo(s.repo_name,
+ ref=s.sha1)
+ for s in (from_source, to_source))
from_desc = from_repo.version_guess(from_source.sha1)
to_desc = to_repo.version_guess(to_source.sha1)
@@ -100,7 +100,7 @@ class DiffPlugin(cliapp.Plugin):
def get_systems((reponame, ref, definitions)):
'Convert a definition path list into a list of systems'
ml = MorphologyLoader()
- repo = self.bc.lrc.get_updated_repo(reponame, ref=ref)
+ repo = self.bc.repo_cache.get_updated_repo(reponame, ref=ref)
mf = MorphologyFinder(gitdir=repo, ref=ref)
# We may have been given an empty set of definitions as input, in
# which case we instead use every we find.
diff --git a/morphlib/plugins/get_repo_plugin.py b/morphlib/plugins/get_repo_plugin.py
index fc81d6e5..ce0b7af0 100644
--- a/morphlib/plugins/get_repo_plugin.py
+++ b/morphlib/plugins/get_repo_plugin.py
@@ -101,9 +101,9 @@ class GetRepoPlugin(cliapp.Plugin):
'%(stratum)s stratum',
ref=ref or chunk_spec['ref'], chunk=chunk_spec['name'],
stratum=morph['name'])
- lrc, rrc = morphlib.util.new_repo_caches(self.app)
- cached_repo = lrc.get_updated_repo(chunk_spec['repo'],
- chunk_spec['ref'])
+ repo_cache = morphlib.util.new_repo_cache(self.app)
+ cached_repo = repo_cache.get_updated_repo(chunk_spec['repo'],
+ chunk_spec['ref'])
try:
self._clone_repo(cached_repo, dirname,
diff --git a/morphlib/plugins/list_artifacts_plugin.py b/morphlib/plugins/list_artifacts_plugin.py
index c2e6b459..2c098c2a 100644
--- a/morphlib/plugins/list_artifacts_plugin.py
+++ b/morphlib/plugins/list_artifacts_plugin.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2014-2015 Codethink Limited
+# Copyright (C) 2014-2016 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -58,7 +58,7 @@ class ListArtifactsPlugin(cliapp.Plugin):
system_filenames = map(morphlib.util.sanitise_morphology_path,
args[2:])
- self.lrc, self.rrc = morphlib.util.new_repo_caches(self.app)
+ self.repo_cache = morphlib.util.new_repo_cache(self.app)
self.resolver = morphlib.artifactresolver.ArtifactResolver()
artifact_files = set()
@@ -85,9 +85,7 @@ class ListArtifactsPlugin(cliapp.Plugin):
self.app.status(
msg='Creating source pool for %s' % system_filename, chatty=True)
source_pool = morphlib.sourceresolver.create_source_pool(
- self.lrc, self.rrc, repo, ref, [system_filename],
- cachedir=self.app.settings['cachedir'],
- update_repos = not self.app.settings['no-git-update'],
+ self.repo_cache, repo, ref, [system_filename],
status_cb=self.app.status)
self.app.status(
diff --git a/morphlib/plugins/show_dependencies_plugin.py b/morphlib/plugins/show_dependencies_plugin.py
index 42f8f273..bfe4d6c2 100644
--- a/morphlib/plugins/show_dependencies_plugin.py
+++ b/morphlib/plugins/show_dependencies_plugin.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2012-2015 Codethink Limited
+# Copyright (C) 2012-2016 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -42,24 +42,6 @@ class ShowDependenciesPlugin(cliapp.Plugin):
of build dependencies of the constituent components.
'''
-
- if not os.path.exists(self.app.settings['cachedir']):
- os.mkdir(self.app.settings['cachedir'])
- cachedir = os.path.join(self.app.settings['cachedir'], 'gits')
- tarball_base_url = self.app.settings['tarball-server']
- repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver(
- self.app.settings['repo-alias'])
- lrc = morphlib.localrepocache.LocalRepoCache(
- self.app, cachedir, repo_resolver, tarball_base_url)
-
- remote_url = morphlib.util.get_git_resolve_cache_server(
- self.app.settings)
- if remote_url:
- rrc = morphlib.remoterepocache.RemoteRepoCache(
- remote_url, repo_resolver)
- else:
- rrc = None
-
build_command = morphlib.buildcommand.BuildCommand(self.app)
# traverse the morphs to list all the sources
diff --git a/morphlib/plugins/system_manifests_plugin.py b/morphlib/plugins/system_manifests_plugin.py
index 86388737..7fe33102 100644
--- a/morphlib/plugins/system_manifests_plugin.py
+++ b/morphlib/plugins/system_manifests_plugin.py
@@ -84,7 +84,7 @@ class SystemManifestsPlugin(cliapp.Plugin):
system_filenames = map(morphlib.util.sanitise_morphology_path,
args[2:])
- self.lrc, self.rrc = morphlib.util.new_repo_caches(self.app)
+ self.repo_cache = morphlib.util.new_repo_cache(self.app)
self.resolver = morphlib.artifactresolver.ArtifactResolver()
for system_filename in system_filenames:
@@ -104,9 +104,7 @@ class SystemManifestsPlugin(cliapp.Plugin):
msg='Creating source pool for %(system)s',
system=system_filename, chatty=True)
source_pool = morphlib.sourceresolver.create_source_pool(
- self.lrc, self.rrc, repo, ref, [system_filename],
- cachedir=self.app.settings['cachedir'],
- update_repos = not self.app.settings['no-git-update'],
+ self.repo_cache, repo, ref, [system_filename],
status_cb=self.app.status)
self.app.status(
@@ -135,10 +133,11 @@ class SystemManifestsPlugin(cliapp.Plugin):
except IndexError:
trove_id = None
with morphlib.util.temp_dir(dir=self.app.settings['tempdir']) as td:
- lorries = get_lorry_repos(td, self.lrc, self.app.status, trove_id,
+ lorries = get_lorry_repos(td, self.repo_cache, self.app.status,
+ trove_id,
self.app.settings['trove-host'])
manifest = Manifest(system_artifact.name, td, self.app.status,
- self.lrc)
+ self.repo_cache)
old_prefix = self.app.status_prefix
sources = set(a.source for a in system_artifact.walk()
@@ -150,7 +149,8 @@ class SystemManifestsPlugin(cliapp.Plugin):
name = source.morphology['name']
ref = source.original_ref
- cached = self.lrc.get_updated_repo(source.repo_name, ref)
+ cached = self.repo_cache.get_updated_repo(source.repo_name,
+ ref)
new_prefix = '[%d/%d][%s] ' % (i, len(sources), name)
self.app.status_prefix = old_prefix + new_prefix
@@ -169,8 +169,8 @@ def run_licensecheck(filename):
else:
return output[len(filename) + 2:].strip()
-def checkout_repo(lrc, repo, dest, ref='master'):
- cached = lrc.get_updated_repo(repo, ref)
+def checkout_repo(repo_cache, repo, dest, ref='master'):
+ cached = repo_cache.get_updated_repo(repo, ref)
if not os.path.exists(dest):
morphlib.gitdir.checkout_from_cached_repo(repo, ref, dest)
@@ -235,14 +235,15 @@ def get_upstream_address(chunk_url, lorries, status):
chunk=chunk_url)
return 'UNKNOWN'
-def get_lorry_repos(tempdir, lrc, status, trove_id, trove_host):
+def get_lorry_repos(tempdir, repo_cache, status, trove_id, trove_host):
lorries = []
try:
baserock_lorry_repo = 'baserock:local-config/lorries'
lorrydir = os.path.join(tempdir, 'baserock-lorries')
- baserock_lorrydir = checkout_repo(lrc, baserock_lorry_repo, lorrydir)
+ baserock_lorrydir = checkout_repo(repo_cache, baserock_lorry_repo,
+ lorrydir)
lorries.extend(load_lorries(lorrydir))
- except morphlib.localrepocache.NoRemote as e:
+ except morphlib.repocache.NoRemote as e:
status(msg="WARNING: Could not find lorries from git.baserock.org, "
"expected to find them on %(trove)s at %(reponame)s",
trove=trove_host, reponame = e.reponame)
@@ -252,9 +253,10 @@ def get_lorry_repos(tempdir, lrc, status, trove_id, trove_host):
trove_lorry_repo = ('http://%s/git/%s/local-config/lorries' %
(trove_host, trove_id))
lorrydir = os.path.join(tempdir, '%s-lorries' % trove_id)
- trove_lorrydir = checkout_repo(lrc, trove_lorry_repo, lorrydir)
+ trove_lorrydir = checkout_repo(repo_cache, trove_lorry_repo,
+ lorrydir)
lorries.extend(load_lorries(lorrydir))
- except morphlib.localrepocache.NoRemote as e:
+ except morphlib.repocache.NoRemote as e:
status(msg="WARNING: Could not find lorries repo on %(trove)s "
"at %(reponame)s",
trove=trove_host, reponame=e.reponame)
@@ -268,10 +270,10 @@ def get_lorry_repos(tempdir, lrc, status, trove_id, trove_host):
class Manifest(object):
"""Writes out a manifest of what's included in a system."""
- def __init__(self, system_name, tempdir, status_cb, lrc):
+ def __init__(self, system_name, tempdir, status_cb, repo_cache):
self.tempdir = tempdir
self.status = status_cb
- self.lrc = lrc
+ self.repo_cache = repo_cache
path = os.path.join(os.getcwd(), system_name + '-manifest.csv')
self.status(msg='Creating %(path)s', path=path)
self.file = open(path, 'wb')
diff --git a/morphlib/remoterepocache.py b/morphlib/remoterepocache.py
deleted file mode 100644
index 4a6d9fe9..00000000
--- a/morphlib/remoterepocache.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# Copyright (C) 2012-2015 Codethink Limited
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-import cliapp
-import json
-import logging
-import urllib2
-import urlparse
-import urllib
-
-
-class ResolveRefError(cliapp.AppException):
-
- def __init__(self, repo_name, ref):
- cliapp.AppException.__init__(
- self, 'Failed to resolve ref %s for repo %s' %
- (ref, repo_name))
-
-
-class CatFileError(cliapp.AppException):
-
- def __init__(self, repo_name, ref, filename):
- cliapp.AppException.__init__(
- self, 'Failed to cat file %s in ref %s of repo %s' %
- (filename, ref, repo_name))
-
-class LsTreeError(cliapp.AppException):
-
- def __init__(self, repo_name, ref):
- cliapp.AppException.__init__(
- self, 'Failed to list tree in ref %s of repo %s' %
- (ref, repo_name))
-
-
-class RemoteRepoCache(object):
-
- def __init__(self, server_url, resolver):
- self.server_url = server_url
- self._resolver = resolver
-
- def resolve_ref(self, repo_name, ref):
- repo_url = self._resolver.pull_url(repo_name)
- try:
- return self._resolve_ref_for_repo_url(repo_url, ref)
- except BaseException as e:
- logging.error('Caught exception: %s' % str(e))
- raise ResolveRefError(repo_name, ref)
-
- def cat_file(self, repo_name, ref, filename):
- repo_url = self._resolver.pull_url(repo_name)
- try:
- return self._cat_file_for_repo_url(repo_url, ref, filename)
- except urllib2.HTTPError as e:
- logging.error('Caught exception: %s' % str(e))
- if e.code == 404:
- raise CatFileError(repo_name, ref, filename)
- raise # pragma: no cover
-
- def ls_tree(self, repo_name, ref):
- repo_url = self._resolver.pull_url(repo_name)
- try:
- info = json.loads(self._ls_tree_for_repo_url(repo_url, ref))
- return info['tree'].keys()
- except BaseException as e:
- logging.error('Caught exception: %s' % str(e))
- raise LsTreeError(repo_name, ref)
-
- def _resolve_ref_for_repo_url(self, repo_url, ref): # pragma: no cover
- data = self._make_request(
- 'sha1s?repo=%s&ref=%s' % self._quote_strings(repo_url, ref))
- info = json.loads(data)
- return info['sha1'], info['tree']
-
- def _cat_file_for_repo_url(self, repo_url, ref,
- filename): # pragma: no cover
- return self._make_request(
- 'files?repo=%s&ref=%s&filename=%s'
- % self._quote_strings(repo_url, ref, filename))
-
- def _ls_tree_for_repo_url(self, repo_url, ref): # pragma: no cover
- return self._make_request(
- 'trees?repo=%s&ref=%s' % self._quote_strings(repo_url, ref))
-
- def _quote_strings(self, *args): # pragma: no cover
- return tuple(urllib.quote(string) for string in args)
-
- def _make_request(self, path): # pragma: no cover
- server_url = self.server_url
- if not server_url.endswith('/'):
- server_url += '/'
- url = urlparse.urljoin(server_url, '/1.0/%s' % path)
- handle = urllib2.urlopen(url)
- return handle.read()
diff --git a/morphlib/remoterepocache_tests.py b/morphlib/remoterepocache_tests.py
deleted file mode 100644
index 966e74d5..00000000
--- a/morphlib/remoterepocache_tests.py
+++ /dev/null
@@ -1,137 +0,0 @@
-# Copyright (C) 2012-2015 Codethink Limited
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-import json
-import unittest
-import urllib2
-
-import morphlib
-
-
-class RemoteRepoCacheTests(unittest.TestCase):
-
- def _resolve_ref_for_repo_url(self, repo_url, ref):
- return self.sha1s[repo_url][ref]
-
- def _cat_file_for_repo_url(self, repo_url, sha1, filename):
- try:
- return self.files[repo_url][sha1][filename]
- except KeyError:
- raise urllib2.HTTPError(url='', code=404, msg='Not found',
- hdrs={}, fp=None)
-
- def _ls_tree_for_repo_url(self, repo_url, sha1):
- return json.dumps({
- 'repo': repo_url,
- 'ref': sha1,
- 'tree': self.files[repo_url][sha1]
- })
-
- def setUp(self):
- self.sha1s = {
- 'git://gitorious.org/baserock/morph': {
- 'master': 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9'
- }
- }
- self.files = {
- 'git://gitorious.org/baserock-morphs/linux': {
- 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9': {
- 'linux.morph': 'linux morphology'
- }
- }
- }
- self.server_url = 'http://foo.bar'
- aliases = [
- 'upstream=git://gitorious.org/baserock-morphs/#foo',
- 'baserock=git://gitorious.org/baserock/#foo'
- ]
- resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases)
- self.cache = morphlib.remoterepocache.RemoteRepoCache(
- self.server_url, resolver)
- self.cache._resolve_ref_for_repo_url = self._resolve_ref_for_repo_url
- self.cache._cat_file_for_repo_url = self._cat_file_for_repo_url
- self.cache._ls_tree_for_repo_url = self._ls_tree_for_repo_url
-
- def test_sets_server_url(self):
- self.assertEqual(self.cache.server_url, self.server_url)
-
- def test_resolve_existing_ref_for_existing_repo(self):
- sha1 = self.cache.resolve_ref('baserock:morph', 'master')
- self.assertEqual(
- sha1,
- self.sha1s['git://gitorious.org/baserock/morph']['master'])
-
- def test_fail_resolving_existing_ref_for_non_existent_repo(self):
- self.assertRaises(morphlib.remoterepocache.ResolveRefError,
- self.cache.resolve_ref, 'non-existent-repo',
- 'master')
-
- def test_fail_resolving_non_existent_ref_for_existing_repo(self):
- self.assertRaises(morphlib.remoterepocache.ResolveRefError,
- self.cache.resolve_ref, 'baserock:morph',
- 'non-existent-ref')
-
- def test_fail_resolving_non_existent_ref_for_non_existent_repo(self):
- self.assertRaises(morphlib.remoterepocache.ResolveRefError,
- self.cache.resolve_ref, 'non-existent-repo',
- 'non-existent-ref')
-
- def test_cat_existing_file_in_existing_repo_and_ref(self):
- content = self.cache.cat_file(
- 'upstream:linux', 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9',
- 'linux.morph')
- self.assertEqual(content, 'linux morphology')
-
- def test_fail_cat_file_using_invalid_sha1(self):
- self.assertRaises(morphlib.remoterepocache.CatFileError,
- self.cache.cat_file, 'upstream:linux', 'blablabla',
- 'linux.morph')
-
- def test_fail_cat_non_existent_file_in_existing_repo_and_ref(self):
- self.assertRaises(morphlib.remoterepocache.CatFileError,
- self.cache.cat_file, 'upstream:linux',
- 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9',
- 'non-existent-file')
-
- def test_fail_cat_file_in_non_existent_ref_in_existing_repo(self):
- self.assertRaises(morphlib.remoterepocache.CatFileError,
- self.cache.cat_file, 'upstream:linux',
- 'ecd7a325095a0d19b8c3d76f578d85b979461d41',
- 'linux.morph')
-
- def test_fail_cat_file_in_non_existent_repo(self):
- self.assertRaises(morphlib.remoterepocache.CatFileError,
- self.cache.cat_file, 'non-existent-repo',
- 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9',
- 'some-file')
-
- def test_ls_tree_in_existing_repo_and_ref(self):
- content = self.cache.ls_tree(
- 'upstream:linux', 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9')
- self.assertEqual(content, ['linux.morph'])
-
- def test_fail_ls_tree_using_invalid_sha1(self):
- self.assertRaises(morphlib.remoterepocache.LsTreeError,
- self.cache.ls_tree, 'upstream:linux', 'blablabla')
-
- def test_fail_ls_file_in_non_existent_ref_in_existing_repo(self):
- self.assertRaises(morphlib.remoterepocache.LsTreeError,
- self.cache.ls_tree, 'upstream:linux',
- 'ecd7a325095a0d19b8c3d76f578d85b979461d41')
-
- def test_fail_ls_tree_in_non_existent_repo(self):
- self.assertRaises(morphlib.remoterepocache.LsTreeError,
- self.cache.ls_tree, 'non-existent-repo',
- 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9')
diff --git a/morphlib/repocache.py b/morphlib/repocache.py
new file mode 100644
index 00000000..f6978ec4
--- /dev/null
+++ b/morphlib/repocache.py
@@ -0,0 +1,565 @@
+# Copyright (C) 2012-2016 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+import cliapp
+import fs.osfs
+
+import json
+import logging
+import os
+import string
+import sys
+import tempfile
+import urllib2
+import urlparse
+import urllib
+
+import morphlib
+from morphlib.util import word_join_list as _word_join_list
+
+
+# urlparse.urljoin needs to know details of the URL scheme being used.
+# It does not know about git:// by default, so we teach it here.
+gitscheme = ['git']
+urlparse.uses_relative.extend(gitscheme)
+urlparse.uses_netloc.extend(gitscheme)
+urlparse.uses_params.extend(gitscheme)
+urlparse.uses_query.extend(gitscheme)
+urlparse.uses_fragment.extend(gitscheme)
+
+
+def quote_url(url):
+ ''' Convert URIs to strings that only contain digits, letters, % and _.
+
+ NOTE: When changing the code of this function, make sure to also apply
+ the same to the quote_url() function of lorry. Otherwise the git tarballs
+ generated by lorry may no longer be found by morph.
+
+ '''
+ valid_chars = string.digits + string.letters + '%_'
+ transl = lambda x: x if x in valid_chars else '_'
+ return ''.join([transl(x) for x in url])
+
+
+class NoRemote(morphlib.Error):
+
+ def __init__(self, reponame, errors):
+ self.reponame = reponame
+ self.errors = errors
+
+ def __str__(self):
+ return '\n\t'.join(['Cannot find remote git repository: %s' %
+ self.reponame] + self.errors)
+
+
+class NotCached(morphlib.Error):
+ def __init__(self, reponame):
+ self.reponame = reponame
+
+ def __str__(self): # pragma: no cover
+ return 'Repository %s is not cached yet' % self.reponame
+
+
+class UpdateError(cliapp.AppException): # pragma: no cover
+
+ def __init__(self, repo):
+ cliapp.AppException.__init__(
+ self, 'Failed to update cached version of repo %s' % repo)
+
+
+class CachedRepo(morphlib.gitdir.GitDirectory):
+ '''A locally cached Git repository with an origin remote set up.
+
+ On instance of this class represents a locally cached version of a
+ remote Git repository. This remote repository is set up as the
+ 'origin' remote.
+
+ Cached repositories are bare mirrors of the upstream. Locally created
+ branches will be lost the next time the repository updates.
+
+ '''
+ def __init__(self, path, original_name, url):
+ self.original_name = original_name
+ self.url = url
+ self.is_mirror = not url.startswith('file://')
+ self.already_updated = False
+
+ super(CachedRepo, self).__init__(path)
+
+ def __str__(self): # pragma: no cover
+ return self.url
+
+
+class RepoCache(object):
+ '''Manage a collection of Git repositories.
+
+ When we build stuff, we need a local copy of the git repository.
+ To avoid having to clone the repositories for every build, we
+ maintain a local cache of the repositories: we first clone the
+ remote repository to the cache, and then make a local clone from
+ the cache to the build environment. This class manages the local
+ cached repositories.
+
+ Repositories may be specified either using a full URL, in a form
+ understood by git(1), or as a repository name to which a base url
+ is prepended. The base urls are given to the class when it is
+ created.
+
+ Instead of cloning via a normal 'git clone' directly from the
+ git server, we first try to download a tarball from a url, and
+ if that works, we unpack the tarball.
+
+ Certain questions about a repo can be resolved without cloning the whole
+ thing, if an instance of 'morph-cache-server' is available on the remote
+ Git server. This makes calculating the build graph for the first time
+ a whole lot faster, as we avoid cloning every repo locally. The
+ git_resolve_cache_url parameter enables this feature. Baserock 'Trove'
+ systems run 'morph-cache-server' by default.
+
+ The 'custom_fs' parameter takes a PyFilesystem instance, which you can use
+ to override where 'cachedir' is stored. This should probably only be used
+ for testing.
+
+ '''
+ def __init__(self, cachedir, resolver, tarball_base_url=None,
+ git_resolve_cache_url=None,
+ update_gits=True,
+ runcmd_cb=cliapp.runcmd, status_cb=lambda **kwargs: None,
+ verbose=False, debug=False,
+ custom_fs=None):
+ self.fs = custom_fs or fs.osfs.OSFS('/')
+
+ self.fs.makedir(cachedir, recursive=True, allow_recreate=True)
+
+ self.cachedir = cachedir
+ self._resolver = resolver
+ if tarball_base_url and not tarball_base_url.endswith('/'):
+ tarball_base_url += '/'
+ self._tarball_base_url = tarball_base_url
+ self._cached_repo_objects = {}
+
+ # Corresponds to the app 'no-git-update' setting
+ self.update_gits = update_gits
+
+ self.runcmd_cb = runcmd_cb
+ self.status_cb = status_cb
+ self.verbose = verbose
+ self.debug = debug
+
+ if git_resolve_cache_url: # pragma: no cover
+ self.remote_cache = RemoteRepoCache(git_resolve_cache_url,
+ resolver)
+ else:
+ self.remote_cache = None
+
+ def _git(self, args, **kwargs): # pragma: no cover
+ '''Execute git command.
+
+ This is a method of its own so that unit tests can easily override
+ all use of the external git command.
+
+ '''
+
+ morphlib.git.gitcmd(self.runcmd_cb, *args, **kwargs)
+
+ def _fetch(self, url, path): # pragma: no cover
+ '''Fetch contents of url into a file.
+
+ This method is meant to be overridden by unit tests.
+
+ '''
+ self.status_cb(msg="Trying to fetch %(tarball)s to seed the cache",
+ tarball=url, chatty=True)
+
+ if self.verbose:
+ verbosity_flags = []
+ kwargs = dict(stderr=sys.stderr)
+ else:
+ verbosity_flags = ['--quiet']
+ kwargs = dict()
+
+ def wget_command():
+ return ['wget'] + verbosity_flags + ['-O-', url]
+
+ self.runcmd_cb(wget_command(),
+ ['tar', '--no-same-owner', '-xf', '-'],
+ cwd=path, **kwargs)
+
+ def _mkdtemp(self, dirname): # pragma: no cover
+ '''Creates a temporary directory.
+
+ This method is meant to be overridden by unit tests.
+
+ '''
+ return tempfile.mkdtemp(dir=self.fs.getsyspath(dirname))
+
+ def _escape(self, url):
+ '''Escape a URL so it can be used as a basename in a file.'''
+
+ # FIXME: The following is a nicer way than to do this.
+ # However, for compatibility, we need to use the same as the
+ # tarball server (set up by Lorry) uses.
+ # return urllib.quote(url, safe='')
+
+ return quote_url(url)
+
+ def _cache_name(self, url):
+ scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
+ if scheme != 'file':
+ path = os.path.join(self.cachedir, self._escape(url))
+ return path
+
+ def has_repo(self, reponame):
+ '''Have we already got a cache of a given repo?'''
+ url = self._resolver.pull_url(reponame)
+ path = self._cache_name(url)
+ return self.fs.exists(path)
+
+ def _clone_with_tarball(self, repourl, path):
+ tarball_url = urlparse.urljoin(self._tarball_base_url,
+ self._escape(repourl)) + '.tar'
+ try:
+ self.fs.makedir(path)
+ self._fetch(tarball_url, path)
+ self._git(['config', 'remote.origin.url', repourl], cwd=path)
+ self._git(['config', 'remote.origin.mirror', 'true'], cwd=path)
+ self._git(['config', 'remote.origin.fetch', '+refs/*:refs/*'],
+ cwd=path)
+ except BaseException as e:
+ if self.fs.exists(path):
+ self.fs.removedir(path, force=True)
+ return False, 'Unable to extract tarball %s: %s' % (
+ tarball_url, e)
+
+ return True, None
+
+ def _new_cached_repo_instance(self, path, reponame, repourl):
+ return CachedRepo(path, reponame, repourl)
+
+ def _cache_repo(self, reponame):
+ '''Clone the given repo into the cache.
+
+ If the repo is already cloned, do nothing.
+
+ '''
+ errors = []
+
+ repourl = self._resolver.pull_url(reponame)
+ path = self._cache_name(repourl)
+ if self._tarball_base_url:
+ ok, error = self._clone_with_tarball(repourl, path)
+ if ok:
+ repo = self._get_repo(reponame)
+ self._update_repo(repo)
+ return repo
+ else:
+ errors.append(error)
+ self.status_cb(msg='Using git clone.')
+
+ target = self._mkdtemp(self.cachedir)
+
+ try:
+ self._git(['clone', '--mirror', '-n', repourl, target],
+ echo_stderr=self.debug)
+ except cliapp.AppException as e:
+ errors.append('Unable to clone from %s to %s: %s' %
+ (repourl, target, e))
+ if self.fs.exists(target):
+ self.fs.removedir(target, force=True)
+ raise NoRemote(reponame, errors)
+
+ self.fs.rename(target, path)
+
+ repo = self._new_cached_repo_instance(path, reponame, repourl)
+ repo.already_updated = True
+ return repo
+
+ def _get_repo(self, reponame):
+ '''Return an object representing a cached repository.'''
+
+ if reponame in self._cached_repo_objects:
+ return self._cached_repo_objects[reponame]
+ else:
+ repourl = self._resolver.pull_url(reponame)
+ path = self._cache_name(repourl)
+ if self.fs.exists(path):
+ repo = self._new_cached_repo_instance(path, reponame, repourl)
+ self._cached_repo_objects[reponame] = repo
+ return repo
+ elif self.update_gits:
+ return self._cache_repo(reponame)
+ else:
+ raise NotCached(reponame)
+
+ def _update_repo(self, cachedrepo): # pragma: no cover
+ try:
+ cachedrepo.update_remotes(
+ echo_stderr=self.verbose)
+ cachedrepo.already_updated = True
+ except cliapp.AppException:
+ raise UpdateError(self)
+
+ def get_updated_repo(self, repo_name,
+ ref=None, refs=None):
+ '''Return object representing cached repository.
+
+ If all the specified refs in 'ref' or 'refs' point to SHA1s that are
+ already in the repository, or --no-git-update is set, then the
+ repository won't be updated.
+
+ '''
+
+ if not self.update_gits:
+ self.status_cb(msg='Not updating existing git repository '
+ '%(repo_name)s '
+ 'because of no-git-update being set',
+ chatty=True,
+ repo_name=repo_name)
+ return self._get_repo(repo_name)
+
+ if ref is not None and refs is None:
+ refs = (ref,)
+ else:
+ refs = list(refs)
+
+ if self.has_repo(repo_name):
+ repo = self._get_repo(repo_name)
+ if refs:
+ required_refs = set(refs)
+ missing_refs = set()
+ for required_ref in required_refs: # pragma: no cover
+ if morphlib.git.is_valid_sha1(required_ref):
+ try:
+ repo.resolve_ref_to_commit(required_ref)
+ continue
+ except morphlib.gitdir.InvalidRefError:
+ pass
+ missing_refs.add(required_ref)
+
+ if not missing_refs: # pragma: no cover
+ self.status_cb(
+ msg='Not updating git repository %(repo_name)s '
+ 'because it already contains %(sha1s)s',
+ chatty=True, repo_name=repo_name,
+ sha1s=_word_join_list(tuple(required_refs)))
+ return repo
+
+ if ref:
+ ref_str = 'ref %s' % ref
+ else:
+ ref_str = '%i refs' % len(refs)
+ self.status_cb(msg='Updating %(repo_name)s for %(ref_str)s',
+ repo_name=repo_name, ref_str=ref_str)
+ self._update_repo(repo)
+ return repo
+ else:
+ self.status_cb(msg='Cloning %(repo_name)s', repo_name=repo_name)
+ return self._get_repo(repo_name)
+
+ def ensure_submodules(self, toplevel_repo,
+ toplevel_ref): # pragma: no cover
+ '''Ensure any submodules of a given repo are cached and up to date.'''
+
+ def submodules_for_repo(repo_path, ref):
+ try:
+ submodules = morphlib.git.Submodules(repo_path, ref,
+ runcmd_cb=self.runcmd_cb)
+ submodules.load()
+ return [(submod.url, submod.commit) for submod in submodules]
+ except morphlib.git.NoModulesFileError:
+ return []
+
+ done = set()
+ subs_to_process = submodules_for_repo(toplevel_repo.dirname,
+ toplevel_ref)
+ while subs_to_process:
+ url, ref = subs_to_process.pop()
+ done.add((url, ref))
+
+ cached_repo = self.get_updated_repo(url, ref=ref)
+
+ for submod in submodules_for_repo(cached_repo.dirname, ref):
+ if submod not in done:
+ subs_to_process.append(submod)
+
+ def resolve_ref_to_commit_and_tree(self, repo_name,
+ ref): # pragma: no cover
+ '''Given the name of a ref, returns the commit and tree SHA1.
+
+ If a remote cache server is available, this function can query the
+ remote cache server to avoid needing to clone the entire repo.
+
+ This might break if the ref points to a tag, not a commit.
+
+ '''
+ absref = None
+ tree = None
+
+ if self.has_repo(repo_name):
+ repo = self.get_updated_repo(repo_name, ref)
+ # If the user passed --no-git-update, and the ref is a SHA1 not
+ # available locally, this call will raise an exception.
+ absref = repo.resolve_ref_to_commit(ref)
+ tree = repo.resolve_ref_to_tree(absref)
+ elif self.remote_cache is not None:
+ try:
+ absref, tree = self.remote_cache.resolve_ref(repo_name, ref)
+ if absref is not None:
+ self.status_cb(
+ msg='Resolved %(repo_name)s %(ref)s via remote repo '
+ 'cache', repo_name=repo_name, ref=ref, chatty=True)
+ except BaseException as e:
+ logging.warning('Caught (and ignored) exception: %s' % str(e))
+
+ if absref is None:
+ # As a last resort, clone the repo to resolve the ref.
+ repo = self.get_updated_repo(repo_name, ref)
+ absref = repo.resolve_ref_to_commit(ref)
+ tree = repo.resolve_ref_to_tree(absref)
+
+ return absref, tree
+
+ def ls_tree(self, repo_name, ref): # pragma: no cover
+ '''Lists the files contained in a commit.
+
+ If a remote cache server is available, this function can query the
+ remote cache server to avoid needing to clone the entire repo.
+
+ The list is non-recursive, so you can only see files in the top
+ directory of the repo. To do a recursive operation, use a GitDir
+ instance returned by get_updated_repo().
+
+ '''
+ files = []
+
+ if self.has_repo(repo_name):
+ repo = self.get_updated_repo(repo_name, ref)
+ files = repo.list_files(ref=ref, recurse=False)
+ elif self.remote_cache is not None:
+ files = self.remote_cache.ls_tree(repo_name, ref)
+
+ if len(files) == 0:
+ # As a last resort, clone the repo to do get the file list.
+ repo = self.get_updated_repo(repo_name, ref)
+ files = repo.list_files(ref=ref, recurse=False)
+
+ return files
+
+ def cat_file(self, repo_name, ref, filename): # pragma: no cover
+ '''Returns a single file from a repo.
+
+ If a remote cache server is available, this function can query the
+ remote cache server to avoid needing to clone the entire repo.
+
+ '''
+ contents = None
+
+ if self.has_repo(repo_name):
+ repo = self.get_updated_repo(repo_name, ref)
+ contents = repo.get_file_from_ref(ref, filename)
+ elif self.remote_cache is not None:
+ contents = self.remote_cache.cat_file(repo_name, ref, filename)
+
+ if not contents:
+ # As a last resort, clone the repo to do get the file list.
+ repo = self.get_updated_repo(repo_name, ref)
+ contents = repo.get_file_from_ref(ref, filename)
+
+ return contents
+
+
+class RemoteResolveRefError(cliapp.AppException):
+
+ def __init__(self, repo_name, ref):
+ cliapp.AppException.__init__(
+ self, 'Failed to resolve ref %s for repo %s from remote cache' %
+ (ref, repo_name))
+
+
+class RemoteCatFileError(cliapp.AppException):
+
+ def __init__(self, repo_name, ref, filename):
+ cliapp.AppException.__init__(
+ self, 'Failed to cat file %s in ref %s of repo %s, from remote '
+ 'cache' % (filename, ref, repo_name))
+
+
+class RemoteLsTreeError(cliapp.AppException):
+
+ def __init__(self, repo_name, ref):
+ cliapp.AppException.__init__(
+ self, 'Failed to list tree in ref %s of repo %s, from remote'
+ 'cache' % (ref, repo_name))
+
+
+class RemoteRepoCache(object):
+
+ def __init__(self, server_url, resolver):
+ self.server_url = server_url
+ self._resolver = resolver
+
+ def resolve_ref(self, repo_name, ref):
+ repo_url = self._resolver.pull_url(repo_name)
+ try:
+ return self._resolve_ref_for_repo_url(repo_url, ref)
+ except BaseException as e:
+ logging.error('Caught exception: %s' % str(e))
+ raise RemoteResolveRefError(repo_name, ref)
+
+ def cat_file(self, repo_name, ref, filename):
+ repo_url = self._resolver.pull_url(repo_name)
+ try:
+ return self._cat_file_for_repo_url(repo_url, ref, filename)
+ except urllib2.HTTPError as e:
+ logging.error('Caught exception: %s' % str(e))
+ if e.code == 404:
+ raise RemoteCatFileError(repo_name, ref, filename)
+ raise # pragma: no cover
+
+ def ls_tree(self, repo_name, ref):
+ repo_url = self._resolver.pull_url(repo_name)
+ try:
+ info = json.loads(self._ls_tree_for_repo_url(repo_url, ref))
+ return info['tree'].keys()
+ except BaseException as e:
+ logging.error('Caught exception: %s' % str(e))
+ raise RemoteLsTreeError(repo_name, ref)
+
+ def _resolve_ref_for_repo_url(self, repo_url, ref): # pragma: no cover
+ data = self._make_request(
+ 'sha1s?repo=%s&ref=%s' % self._quote_strings(repo_url, ref))
+ info = json.loads(data)
+ return info['sha1'], info['tree']
+
+ def _cat_file_for_repo_url(self, repo_url, ref,
+ filename): # pragma: no cover
+ return self._make_request(
+ 'files?repo=%s&ref=%s&filename=%s'
+ % self._quote_strings(repo_url, ref, filename))
+
+ def _ls_tree_for_repo_url(self, repo_url, ref): # pragma: no cover
+ return self._make_request(
+ 'trees?repo=%s&ref=%s' % self._quote_strings(repo_url, ref))
+
+ def _quote_strings(self, *args): # pragma: no cover
+ return tuple(urllib.quote(string) for string in args)
+
+ def _make_request(self, path): # pragma: no cover
+ server_url = self.server_url
+ if not server_url.endswith('/'):
+ server_url += '/'
+ url = urlparse.urljoin(server_url, '/1.0/%s' % path)
+ handle = urllib2.urlopen(url)
+ return handle.read()
diff --git a/morphlib/repocache_tests.py b/morphlib/repocache_tests.py
new file mode 100644
index 00000000..6e07aedb
--- /dev/null
+++ b/morphlib/repocache_tests.py
@@ -0,0 +1,281 @@
+# Copyright (C) 2012-2016 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+import unittest
+import urllib2
+import json
+import os
+
+import cliapp
+import fs.memoryfs
+import tempfile
+
+import morphlib
+import morphlib.gitdir_tests
+
+
+class TestableRepoCache(morphlib.repocache.RepoCache):
+ '''Adapts the RepoCache class for unit testing.
+
+ All Git operations are stubbed out. You can track what Git operations have
+ taken place by looking at the 'remotes' dict -- any 'clone' operations will
+ set an entry in there. The 'tarballs_fetched' list tracks what tarballs
+ of Git repos would have been downloaded.
+
+ There is a single repo alias, 'example' which expands to
+ git://example.com/.
+
+ '''
+ def __init__(self, update_gits=True):
+ aliases = ['example=git://example.com/#example.com:%s.git']
+ repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases)
+ tarball_base_url = 'http://lorry.example.com/tarballs'
+ cachedir = '/cache/gits/'
+ memoryfs = fs.memoryfs.MemoryFS()
+
+ morphlib.repocache.RepoCache.__init__(
+ self, cachedir, repo_resolver, tarball_base_url=tarball_base_url,
+ custom_fs=memoryfs, update_gits=update_gits)
+
+ self.remotes = {}
+ self.tarballs_fetched = []
+
+ self._mkdtemp_count = 0
+
+ def _mkdtemp(self, dirname):
+ thing = "foo"+str(self._mkdtemp_count)
+ self._mkdtemp_count += 1
+ self.fs.makedir(dirname+"/"+thing)
+ return thing
+
+ def _fetch(self, url, path):
+ self.tarballs_fetched.append(url)
+
+ def _git(self, args, **kwargs):
+ if args[0] == 'clone':
+ assert len(args) == 5
+ remote = args[3]
+ local = args[4]
+ self.remotes['origin'] = {'url': remote, 'updates': 0}
+ self.fs.makedir(local, recursive=True)
+ elif args[0:2] == ['remote', 'set-url']:
+ remote = args[2]
+ url = args[3]
+ self.remotes[remote] = {'url': url}
+ elif args[0:2] == ['config', 'remote.origin.url']:
+ remote = 'origin'
+ url = args[2]
+ self.remotes[remote] = {'url': url}
+ elif args[0:2] == ['config', 'remote.origin.mirror']:
+ remote = 'origin'
+ elif args[0:2] == ['config', 'remote.origin.fetch']:
+ remote = 'origin'
+ else:
+ raise NotImplementedError()
+
+ def _update_repo(self, cached_repo):
+ pass
+
+
+class RepoCacheTests(unittest.TestCase):
+
+ def test_has_not_got_repo_initially(self):
+ repo_cache = TestableRepoCache()
+ self.assertFalse(repo_cache.has_repo('example:repo'))
+ self.assertFalse(repo_cache.has_repo('git://example.com/repo'))
+
+ def test_happily_caches_same_repo_twice(self):
+ repo_cache = TestableRepoCache()
+ with morphlib.gitdir_tests.allow_nonexistant_git_repos():
+ repo_cache.get_updated_repo('example:repo', ref='master')
+ repo_cache.get_updated_repo('example:repo', ref='master')
+
+ def test_fails_to_cache_when_remote_does_not_exist(self):
+ repo_cache = TestableRepoCache()
+
+ def clone_fails(args, **kwargs):
+ repo_cache.fs.makedir(args[4])
+ raise cliapp.AppException('')
+ repo_cache._git = clone_fails
+
+ with self.assertRaises(morphlib.repocache.NoRemote):
+ repo_cache.get_updated_repo('example:repo', 'master')
+
+ def test_does_not_mind_a_missing_tarball(self):
+ repo_cache = TestableRepoCache()
+
+ def no_tarball(*args, **kwargs):
+ raise cliapp.AppException('Not found')
+ repo_cache._fetch = no_tarball
+
+ with morphlib.gitdir_tests.allow_nonexistant_git_repos():
+ repo_cache.get_updated_repo('example:repo', ref='master')
+ self.assertEqual(repo_cache.tarballs_fetched, [])
+
+ def test_fetches_tarball_when_it_exists(self):
+ repo_url = 'git://example.com/reponame'
+ repo_cache = TestableRepoCache()
+
+ with morphlib.gitdir_tests.allow_nonexistant_git_repos():
+ repo_cache.get_updated_repo(repo_url, ref='master')
+
+ tarball_url = '%s%s.tar' % (repo_cache._tarball_base_url,
+ repo_cache._escape(repo_url))
+ self.assertEqual(repo_cache.tarballs_fetched, [tarball_url])
+
+ # Check that the cache updated the repo after fetching the tarball.
+ self.assertEqual(repo_cache.remotes['origin']['url'], repo_url)
+
+ def test_escapes_repourl_as_filename(self):
+ repo_cache = TestableRepoCache()
+ escaped = repo_cache._escape('git://example.com/reponame')
+ self.assertFalse('/' in escaped)
+
+ def test_noremote_error_message_contains_repo_name(self):
+ repo_url = 'git://example.com/reponame'
+ e = morphlib.repocache.NoRemote(repo_url, [])
+ self.assertTrue(repo_url in str(e))
+
+ def test_avoids_caching_local_repo(self):
+ repo_cache = TestableRepoCache()
+
+ repo_cache.fs.makedir('/local/repo', recursive=True)
+ with morphlib.gitdir_tests.allow_nonexistant_git_repos():
+ cached = repo_cache.get_updated_repo(
+ 'file:///local/repo', refs='master')
+ assert cached.dirname == '/local/repo'
+
+ def test_no_git_update_setting(self):
+ repo_cache = TestableRepoCache(update_gits=False)
+
+ with self.assertRaises(morphlib.repocache.NotCached):
+ repo_cache.get_updated_repo('example:repo', ref='master')
+
+
+class RemoteRepoCacheTests(unittest.TestCase):
+ def _resolve_ref_for_repo_url(self, repo_url, ref):
+ return self.sha1s[repo_url][ref]
+
+ def _cat_file_for_repo_url(self, repo_url, sha1, filename):
+ try:
+ return self.files[repo_url][sha1][filename]
+ except KeyError:
+ raise urllib2.HTTPError(url='', code=404, msg='Not found',
+ hdrs={}, fp=None)
+
+ def _ls_tree_for_repo_url(self, repo_url, sha1):
+ return json.dumps({
+ 'repo': repo_url,
+ 'ref': sha1,
+ 'tree': self.files[repo_url][sha1]
+ })
+
+ def setUp(self):
+ self.sha1s = {
+ 'git://gitorious.org/baserock/morph': {
+ 'master': 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9'
+ }
+ }
+ self.files = {
+ 'git://gitorious.org/baserock-morphs/linux': {
+ 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9': {
+ 'linux.morph': 'linux morphology'
+ }
+ }
+ }
+ self.server_url = 'http://foo.bar'
+ aliases = [
+ 'upstream=git://gitorious.org/baserock-morphs/#foo',
+ 'baserock=git://gitorious.org/baserock/#foo'
+ ]
+ resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases)
+ self.cache = morphlib.repocache.RemoteRepoCache(
+ self.server_url, resolver)
+ self.cache._resolve_ref_for_repo_url = self._resolve_ref_for_repo_url
+ self.cache._cat_file_for_repo_url = self._cat_file_for_repo_url
+ self.cache._ls_tree_for_repo_url = self._ls_tree_for_repo_url
+
+ def test_sets_server_url(self):
+ self.assertEqual(self.cache.server_url, self.server_url)
+
+ def test_resolve_existing_ref_for_existing_repo(self):
+ sha1 = self.cache.resolve_ref('baserock:morph', 'master')
+ self.assertEqual(
+ sha1,
+ self.sha1s['git://gitorious.org/baserock/morph']['master'])
+
+ def test_fail_resolving_existing_ref_for_non_existent_repo(self):
+ self.assertRaises(morphlib.repocache.RemoteResolveRefError,
+ self.cache.resolve_ref, 'non-existent-repo',
+ 'master')
+
+ def test_fail_resolving_non_existent_ref_for_existing_repo(self):
+ self.assertRaises(morphlib.repocache.RemoteResolveRefError,
+ self.cache.resolve_ref, 'baserock:morph',
+ 'non-existent-ref')
+
+ def test_fail_resolving_non_existent_ref_for_non_existent_repo(self):
+ self.assertRaises(morphlib.repocache.RemoteResolveRefError,
+ self.cache.resolve_ref, 'non-existent-repo',
+ 'non-existent-ref')
+
+ def test_cat_existing_file_in_existing_repo_and_ref(self):
+ content = self.cache.cat_file(
+ 'upstream:linux', 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9',
+ 'linux.morph')
+ self.assertEqual(content, 'linux morphology')
+
+ def test_fail_cat_file_using_invalid_sha1(self):
+ self.assertRaises(morphlib.repocache.RemoteCatFileError,
+ self.cache.cat_file, 'upstream:linux', 'blablabla',
+ 'linux.morph')
+
+ def test_fail_cat_non_existent_file_in_existing_repo_and_ref(self):
+ self.assertRaises(morphlib.repocache.RemoteCatFileError,
+ self.cache.cat_file, 'upstream:linux',
+ 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9',
+ 'non-existent-file')
+
+ def test_fail_cat_file_in_non_existent_ref_in_existing_repo(self):
+ self.assertRaises(morphlib.repocache.RemoteCatFileError,
+ self.cache.cat_file, 'upstream:linux',
+ 'ecd7a325095a0d19b8c3d76f578d85b979461d41',
+ 'linux.morph')
+
+ def test_fail_cat_file_in_non_existent_repo(self):
+ self.assertRaises(morphlib.repocache.RemoteCatFileError,
+ self.cache.cat_file, 'non-existent-repo',
+ 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9',
+ 'some-file')
+
+ def test_ls_tree_in_existing_repo_and_ref(self):
+ content = self.cache.ls_tree(
+ 'upstream:linux', 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9')
+ self.assertEqual(content, ['linux.morph'])
+
+ def test_fail_ls_tree_using_invalid_sha1(self):
+ self.assertRaises(morphlib.repocache.RemoteLsTreeError,
+ self.cache.ls_tree, 'upstream:linux', 'blablabla')
+
+ def test_fail_ls_file_in_non_existent_ref_in_existing_repo(self):
+ self.assertRaises(morphlib.repocache.RemoteLsTreeError,
+ self.cache.ls_tree, 'upstream:linux',
+ 'ecd7a325095a0d19b8c3d76f578d85b979461d41')
+
+ def test_fail_ls_tree_in_non_existent_repo(self):
+ self.assertRaises(morphlib.repocache.RemoteLsTreeError,
+ self.cache.ls_tree, 'non-existent-repo',
+ 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9')
+
diff --git a/morphlib/sourceresolver.py b/morphlib/sourceresolver.py
index c6f77cf9..f8dac8b1 100644
--- a/morphlib/sourceresolver.py
+++ b/morphlib/sourceresolver.py
@@ -138,17 +138,17 @@ class SourceResolver(object):
be either a normal URL, or a keyed URL using a repo-alias like
'baserock:baserock/definitions'.
- The 'remote repo cache' is a Baserock Trove system. It functions as a
- normal Git server, but in addition it runs a service on port 8080 called
- 'morph-cache-server' which can resolve refs, list their contents and read
- specific files from the repos it holds. This allows the SourceResolver to
- work out how to build something without cloning the whole repo. (If a local
- build of that source ends up being necessary then it will get cloned into
- the local cache later on).
-
- The second layer of caching is the local repository cache, which mirrors
- entire repositories in $cachedir/gits. If a repo is not in the remote repo
- cache then it must be present in the local repo cache.
+ Each commit used in a build is resolved to a tree SHA1, which means that
+ merge commits and changes to commit messages don't affect the cache
+ identity of a chunk. This does mean we need to query every repo in the
+ build graph, though.
+
+ All requests for information on a repo use the 'repocache' module. This
+ maintains a local copy of all the Git repos we need to work with. A repo
+ cache can also use a remote 'morph-cache-server' instance, if available,
+ to query certain information about a repo without cloning it locally.
+ Using this we can resolve commits to trees without having to clone every
+ repo locally, which is a huge performance improvement in some cases.
The third layer of caching is a simple commit SHA1 -> tree SHA mapping. It
turns out that even if all repos are available locally, running
@@ -168,14 +168,11 @@ class SourceResolver(object):
'''
- def __init__(self, local_repo_cache, remote_repo_cache,
- tree_cache_manager, update_repos,
- status_cb=None):
- self.lrc = local_repo_cache
- self.rrc = remote_repo_cache
+ def __init__(self, repo_cache, tree_cache_manager, status_cb=None):
+ self.repo_cache = repo_cache
self.tree_cache_manager = tree_cache_manager
- self.update = update_repos
+ self.update = repo_cache.update_gits
self.status = status_cb
def _resolve_ref(self, resolved_trees, reponame, ref):
@@ -184,9 +181,6 @@ class SourceResolver(object):
If update is True then this has the side-effect of updating or cloning
the repository into the local repo cache.
- This function is complex due to the 3 layers of caching described in
- the SourceResolver docstring.
-
'''
# The Baserock reference definitions use absolute refs so, and, if the
@@ -198,29 +192,8 @@ class SourceResolver(object):
logging.debug('tree (%s, %s) not in cache', reponame, ref)
- absref = None
- if self.lrc.has_repo(reponame):
- repo = self.lrc.get_updated_repo(reponame, ref)
- # If the user passed --no-git-update, and the ref is a SHA1 not
- # available locally, this call will raise an exception.
- absref = repo.resolve_ref_to_commit(ref)
- tree = repo.resolve_ref_to_tree(absref)
- elif self.rrc is not None:
- try:
- absref, tree = self.rrc.resolve_ref(reponame, ref)
- if absref is not None:
- self.status(msg='Resolved %(reponame)s %(ref)s via remote '
- 'repo cache',
- reponame=reponame,
- ref=ref,
- chatty=True)
- except BaseException as e:
- logging.warning('Caught (and ignored) exception: %s' % str(e))
-
- if absref is None:
- repo = self.lrc.get_updated_repo(reponame, ref)
- absref = repo.resolve_ref_to_commit(ref)
- tree = repo.resolve_ref_to_tree(absref)
+ absref, tree = self.repo_cache.resolve_ref_to_commit_and_tree(reponame,
+ ref)
logging.debug('Writing tree to cache with ref (%s, %s)',
reponame, absref)
@@ -430,7 +403,7 @@ class SourceResolver(object):
if definitions_original_ref:
definitions_ref = definitions_original_ref
- definitions_cached_repo = self.lrc.get_updated_repo(
+ definitions_cached_repo = self.repo_cache.get_updated_repo(
repo_name=definitions_repo, ref=definitions_absref)
definitions_cached_repo.extract_commit(
definitions_absref, definitions_checkout_dir)
@@ -489,9 +462,8 @@ def _find_duplicate_chunks(sourcepool): #pragma: no cover
return {k: v for (k, v) in chunk_sources_by_name.iteritems() if len(v) > 1}
-def create_source_pool(lrc, rrc, repo, ref, filenames, cachedir,
- original_ref=None, update_repos=True,
- status_cb=None):
+def create_source_pool(repo_cache, repo, ref, filenames,
+ original_ref=None, status_cb=None):
'''Find all the sources involved in building a given system.
Given a system morphology, this function will traverse the tree of stratum
@@ -502,8 +474,12 @@ def create_source_pool(lrc, rrc, repo, ref, filenames, cachedir,
Note that Git submodules are not considered 'sources' in the current
implementation, and so they must be handled separately.
- The 'lrc' and 'rrc' parameters specify the local and remote Git repository
- caches used for resolving the sources.
+ The 'repo_cache' parameter specifies a repo cache which is used when
+ accessing the source repos. If a git_resolve_cache_server is set for this
+ repo cache, and all repos in the build are known to it, then this function
+ will only need the definitions.git repo available locally. If not, then all
+ repos must be cloned in order to resolve the refs to tree SHA1s, which is
+ a slow process!
'''
pool = morphlib.sourcepool.SourcePool()
@@ -529,10 +505,10 @@ def create_source_pool(lrc, rrc, repo, ref, filenames, cachedir,
pool.add(source)
tree_cache_manager = PickleCacheManager(
- os.path.join(cachedir, tree_cache_filename), tree_cache_size)
+ os.path.join(repo_cache.cachedir, tree_cache_filename),
+ tree_cache_size)
- resolver = SourceResolver(lrc, rrc, tree_cache_manager, update_repos,
- status_cb)
+ resolver = SourceResolver(repo_cache, tree_cache_manager, status_cb)
resolver.traverse_morphs(repo, ref, filenames,
visit=add_to_pool,
definitions_original_ref=original_ref)
diff --git a/morphlib/util.py b/morphlib/util.py
index 3b3e4d2b..ba170952 100644
--- a/morphlib/util.py
+++ b/morphlib/util.py
@@ -102,21 +102,16 @@ def make_concurrency(cores=None):
return min(n, 20)
-def create_cachedir(settings): # pragma: no cover
- '''Return cache directory, creating it if necessary.'''
-
- cachedir = settings['cachedir']
+def ensure_directory_exists(path): # pragma: no cover
# Don't check the folder exists and handle the exception that happens in
# this case to avoid errors if the folder is created by something else
# just after the check.
try:
- os.mkdir(cachedir)
+ os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
- return cachedir
-
def get_artifact_cache_server(settings): # pragma: no cover
if settings['artifact-cache-server']:
@@ -141,17 +136,8 @@ def new_artifact_caches(settings): # pragma: no cover
'''
- cachedir = create_cachedir(settings)
- artifact_cachedir = os.path.join(cachedir, 'artifacts')
- # Don't check the folder exists and handle the exception that happens in
- # this case to avoid errors if the folder is created by something else
- # just after the check.
- try:
- os.mkdir(artifact_cachedir)
- except OSError as e:
- if e.errno != errno.EEXIST:
- raise
-
+ artifact_cachedir = os.path.join(settings['cachedir'], 'artifacts')
+ ensure_directory_exists(artifact_cachedir)
lac = morphlib.localartifactcache.LocalArtifactCache(
fs.osfs.OSFS(artifact_cachedir))
@@ -222,24 +208,26 @@ def combine_aliases(app): # pragma: no cover
return alias_map.values()
-def new_repo_caches(app): # pragma: no cover
- '''Create new objects for local, remote git repository caches.'''
- aliases = app.settings['repo-alias']
- cachedir = create_cachedir(app.settings)
- gits_dir = os.path.join(cachedir, 'gits')
- tarball_base_url = app.settings['tarball-server']
- repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases)
- lrc = morphlib.localrepocache.LocalRepoCache(
- app, gits_dir, repo_resolver, tarball_base_url=tarball_base_url)
+def new_repo_cache(app): # pragma: no cover
+ '''Create a RepoCache instance using settings from app.settings.'''
- url = get_git_resolve_cache_server(app.settings)
- if url:
- rrc = morphlib.remoterepocache.RemoteRepoCache(url, repo_resolver)
- else:
- rrc = None
+ gits_dir = os.path.join(app.settings['cachedir'], 'gits')
+ tarball_base_url = app.settings['tarball-server']
+ git_resolve_cache_url = get_git_resolve_cache_server(app.settings)
+ aliases = app.settings['repo-alias']
+ repo_alias_resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases)
+
+ return morphlib.repocache.RepoCache(
+ gits_dir, repo_alias_resolver,
+ tarball_base_url=tarball_base_url,
+ git_resolve_cache_url=git_resolve_cache_url,
+ update_gits=(not app.settings['no-git-update']),
+ runcmd_cb=app.runcmd,
+ status_cb=app.status,
+ verbose=app.settings['verbose'],
+ debug=app.settings['debug'])
- return lrc, rrc
def env_variable_is_password(key): # pragma: no cover
return 'PASSWORD' in key