summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--morphlib/__init__.py3
-rw-r--r--morphlib/buildcommand.py18
-rw-r--r--morphlib/builder.py4
-rw-r--r--morphlib/builder_tests.py2
-rw-r--r--morphlib/definitions_repo.py39
-rw-r--r--morphlib/git.py9
-rw-r--r--morphlib/localrepocache.py357
-rw-r--r--morphlib/localrepocache_tests.py149
-rw-r--r--morphlib/plugins/anchor_plugin.py5
-rw-r--r--morphlib/plugins/artifact_inspection_plugin.py37
-rw-r--r--morphlib/plugins/certify_plugin.py8
-rw-r--r--morphlib/plugins/cross-bootstrap_plugin.py2
-rw-r--r--morphlib/plugins/diff_plugin.py10
-rw-r--r--morphlib/plugins/get_repo_plugin.py6
-rw-r--r--morphlib/plugins/list_artifacts_plugin.py8
-rw-r--r--morphlib/plugins/show_dependencies_plugin.py20
-rw-r--r--morphlib/plugins/system_manifests_plugin.py34
-rw-r--r--morphlib/remoterepocache.py105
-rw-r--r--morphlib/remoterepocache_tests.py137
-rw-r--r--morphlib/repocache.py565
-rw-r--r--morphlib/repocache_tests.py281
-rw-r--r--morphlib/sourceresolver.py80
-rw-r--r--morphlib/util.py54
23 files changed, 976 insertions, 957 deletions
diff --git a/morphlib/__init__.py b/morphlib/__init__.py
index 7724c41c..066ab9f6 100644
--- a/morphlib/__init__.py
+++ b/morphlib/__init__.py
@@ -69,15 +69,14 @@ import git
import gitdir
import gitindex
import localartifactcache
-import localrepocache
import mountableimage
import morphologyfinder
import morphology
import morphloader
import morphset
import remoteartifactcache
-import remoterepocache
import repoaliasresolver
+import repocache
import savefile
import source
import sourcepool
diff --git a/morphlib/buildcommand.py b/morphlib/buildcommand.py
index a2ad3301..e185a808 100644
--- a/morphlib/buildcommand.py
+++ b/morphlib/buildcommand.py
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
-# Copyright (C) 2011-2015 Codethink Limited
+# Copyright (C) 2011-2016 Codethink Limited
# Copyright © 2015 Richard Ipsum
#
# This program is free software; you can redistribute it and/or modify
@@ -47,7 +47,7 @@ class BuildCommand(object):
def __init__(self, app, build_env = None):
self.app = app
self.lac, self.rac = self.new_artifact_caches()
- self.lrc, self.rrc = self.new_repo_caches()
+ self.repo_cache = morphlib.util.new_repo_cache(self.app)
def build(self, repo_name, ref, filename, original_ref=None):
'''Build a given system morphology.'''
@@ -76,9 +76,6 @@ class BuildCommand(object):
'''
return morphlib.util.new_artifact_caches(self.app.settings)
- def new_repo_caches(self):
- return morphlib.util.new_repo_caches(self.app)
-
def new_build_env(self, arch):
'''Create a new BuildEnvironment instance.'''
return morphlib.buildenvironment.BuildEnvironment(self.app.settings,
@@ -95,10 +92,8 @@ class BuildCommand(object):
'''
self.app.status(msg='Creating source pool', chatty=True)
srcpool = morphlib.sourceresolver.create_source_pool(
- self.lrc, self.rrc, repo_name, ref, filenames,
- cachedir=self.app.settings['cachedir'],
+ self.repo_cache, repo_name, ref, filenames,
original_ref=original_ref,
- update_repos=not self.app.settings['no-git-update'],
status_cb=self.app.status)
return srcpool
@@ -394,8 +389,9 @@ class BuildCommand(object):
'''Update the local git repository cache with the sources.'''
repo_name = source.repo_name
- source.repo = self.lrc.get_updated_repo(repo_name, ref=source.sha1)
- self.lrc.ensure_submodules(source.repo, source.sha1)
+ source.repo = self.repo_cache.get_updated_repo(repo_name,
+ ref=source.sha1)
+ self.repo_cache.ensure_submodules(source.repo, source.sha1)
def cache_artifacts_locally(self, artifacts):
'''Get artifacts missing from local cache from remote cache.'''
@@ -540,7 +536,7 @@ class BuildCommand(object):
'%(sha1)s',
name=source.name, sha1=source.sha1[:7])
builder = morphlib.builder.Builder(
- self.app, staging_area, self.lac, self.rac, self.lrc,
+ self.app, staging_area, self.lac, self.rac, self.repo_cache,
self.app.settings['max-jobs'], setup_mounts)
return builder.build_and_cache(source)
diff --git a/morphlib/builder.py b/morphlib/builder.py
index 2d0a4bd4..c980a276 100644
--- a/morphlib/builder.py
+++ b/morphlib/builder.py
@@ -45,7 +45,7 @@ def extract_sources(app, repo_cache, repo, sha1, srcdir): #pragma: no cover
morphlib.gitdir.checkout_from_cached_repo(repo, sha1, destdir)
morphlib.git.reset_workdir(app.runcmd, destdir)
- submodules = morphlib.git.Submodules(app, repo.dirname, sha1)
+ submodules = morphlib.git.Submodules(repo.dirname, sha1, app.runcmd)
try:
submodules.load()
except morphlib.git.NoModulesFileError:
@@ -187,7 +187,7 @@ class BuilderBase(object):
'''
assert isinstance(self.source.repo,
- morphlib.localrepocache.CachedRepo)
+ morphlib.repocache.CachedRepo)
meta = {
'artifact-name': artifact_name,
'source-name': self.source.name,
diff --git a/morphlib/builder_tests.py b/morphlib/builder_tests.py
index da1f432e..54bc4a8f 100644
--- a/morphlib/builder_tests.py
+++ b/morphlib/builder_tests.py
@@ -51,7 +51,7 @@ class FakeSource(object):
self.name = 'a'
with morphlib.gitdir_tests.allow_nonexistant_git_repos():
- self.repo = morphlib.localrepocache.CachedRepo(
+ self.repo = morphlib.repocache.CachedRepo(
'path', 'repo', 'url')
self.repo_name = 'url'
self.original_ref = 'e'
diff --git a/morphlib/definitions_repo.py b/morphlib/definitions_repo.py
index 4c13abee..8b022867 100644
--- a/morphlib/definitions_repo.py
+++ b/morphlib/definitions_repo.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2015 Codethink Limited
+# Copyright (C) 2015-2016 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -127,9 +127,9 @@ class DefinitionsRepo(gitdir.GitDirectory):
return bbcm()
@contextlib.contextmanager
- def source_pool(self, lrc, rrc, cachedir, ref, system_filename,
+ def source_pool(self, repo_cache, ref, system_filename,
include_local_changes=False, push_local_changes=False,
- update_repos=True, status_cb=None, build_ref_prefix=None,
+ status_cb=None, build_ref_prefix=None,
git_user_name=None, git_user_email=None):
'''Load the system defined in 'morph' and all the sources it contains.
@@ -162,15 +162,10 @@ class DefinitionsRepo(gitdir.GitDirectory):
setting, but that was probably only useful for `morph distbuild` and
that now uses branch_with_local_changes().
- The 'lrc' and 'rrc' parameters are local and remote Git repo caches.
- Use morphlib.util.new_repo_caches() to obtain these. The 'cachedir'
- parameter points to where Git repos are cached by Morph,
- app.settings['cachedir'] tells you that.
-
- The 'update_repos' flag allows you to disable updating Git repos, to
- honour app.settings['no-git-update']. If one of the refs in the build
- graph is not available locally and update_repos is False, you will see
- a morphlib.gitdir.InvalidRefError exception.
+ The 'repo_cache' parameter is a morphlib.repocache.RepoCache instance.
+ If update_gits=False is set for this repo cache, and one of the refs in
+ the build graph is not available locally, you will see a
+ morphlib.gitdir.InvalidRefError exception.
The 'status_cb' function will be called if set to output progress and
status messages to the user.
@@ -200,9 +195,8 @@ class DefinitionsRepo(gitdir.GitDirectory):
status_cb(msg='Deciding on task order')
yield morphlib.sourceresolver.create_source_pool(
- lrc, rrc, repo_url, commit, [system_filename],
- cachedir=cachedir, original_ref=original_ref,
- update_repos=update_repos, status_cb=status_cb)
+ repo_cache, repo_url, commit, [system_filename],
+ original_ref=original_ref, status_cb=status_cb)
else:
repo_url = self.remote_url
commit = self.resolve_ref_to_commit(ref)
@@ -212,9 +206,8 @@ class DefinitionsRepo(gitdir.GitDirectory):
try:
yield morphlib.sourceresolver.create_source_pool(
- lrc, rrc, repo_url, commit, [system_filename],
- cachedir=cachedir, original_ref=ref,
- update_repos=update_repos, status_cb=status_cb)
+ repo_cache, repo_url, commit, [system_filename],
+ original_ref=ref, status_cb=status_cb)
except morphlib.sourceresolver.InvalidDefinitionsRefError as e:
raise cliapp.AppException(
'Commit %s wasn\'t found in the "origin" remote %s. '
@@ -332,7 +325,7 @@ class DefinitionsRepoWithApp(DefinitionsRepo):
self._git_user_name = morphlib.git.get_user_name(app.runcmd)
self._git_user_email = morphlib.git.get_user_email(app.runcmd)
- self._lrc, self._rrc = morphlib.util.new_repo_caches(app)
+ self.repo_cache = morphlib.util.new_repo_cache(self.app)
def branch_with_local_changes(self, uuid, push=False):
'''Equivalent to DefinitionsRepo.branch_with_local_changes().'''
@@ -343,22 +336,20 @@ class DefinitionsRepoWithApp(DefinitionsRepo):
build_ref_prefix=self.app.settings['build-ref-prefix'],
git_user_name=self._git_user_name,
git_user_email=self._git_user_email,
- status_cb=self.app.status,)
+ status_cb=self.app.status)
def source_pool(self, ref, system_filename):
'''Equivalent to DefinitionsRepo.source_pool().'''
local_changes = self.app.settings['local-changes']
return DefinitionsRepo.source_pool(
- self, self._lrc, self._rrc, self.app.settings['cachedir'],
- ref, system_filename,
+ self, self.repo_cache, ref, system_filename,
include_local_changes=(local_changes == 'include'),
push_local_changes=self.app.settings['push-build-branches'],
build_ref_prefix=self.app.settings['build-ref-prefix'],
git_user_name=self._git_user_name,
git_user_email=self._git_user_email,
- status_cb=self.app.status,
- update_repos=(not self.app.settings['no-git-update']))
+ status_cb=self.app.status)
def _local_definitions_repo(path, search_for_root, app=None):
'''Open a local Git repo containing Baserock definitions, at 'path'.
diff --git a/morphlib/git.py b/morphlib/git.py
index 190544ac..cab551ef 100644
--- a/morphlib/git.py
+++ b/morphlib/git.py
@@ -58,12 +58,13 @@ class MissingSubmoduleCommitError(cliapp.AppException):
class Submodules(object):
- def __init__(self, app, repo, ref):
- self.app = app
+ def __init__(self, repo, ref, runcmd_cb=cliapp.runcmd):
self.repo = repo
self.ref = ref
self.submodules = []
+ self.runcmd_cb = runcmd_cb
+
def load(self):
content = self._read_gitmodules_file()
@@ -76,7 +77,7 @@ class Submodules(object):
def _read_gitmodules_file(self):
try:
# try to read the .gitmodules file from the repo/ref
- content = gitcmd(self.app.runcmd, 'cat-file', 'blob',
+ content = gitcmd(self.runcmd_cb, 'cat-file', 'blob',
'%s:.gitmodules' % self.ref, cwd=self.repo,
ignore_fail=True)
@@ -100,7 +101,7 @@ class Submodules(object):
try:
# list objects in the parent repo tree to find the commit
# object that corresponds to the submodule
- commit = gitcmd(self.app.runcmd, 'ls-tree', self.ref,
+ commit = gitcmd(self.runcmd_cb, 'ls-tree', self.ref,
submodule.path, cwd=self.repo)
# read the commit hash from the output
diff --git a/morphlib/localrepocache.py b/morphlib/localrepocache.py
deleted file mode 100644
index 3a03fe1d..00000000
--- a/morphlib/localrepocache.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (C) 2012-2016 Codethink Limited
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-import os
-import urlparse
-import string
-import sys
-import tempfile
-
-import cliapp
-import fs.osfs
-
-import morphlib
-from morphlib.util import word_join_list as _word_join_list
-
-
-# urlparse.urljoin needs to know details of the URL scheme being used.
-# It does not know about git:// by default, so we teach it here.
-gitscheme = ['git']
-urlparse.uses_relative.extend(gitscheme)
-urlparse.uses_netloc.extend(gitscheme)
-urlparse.uses_params.extend(gitscheme)
-urlparse.uses_query.extend(gitscheme)
-urlparse.uses_fragment.extend(gitscheme)
-
-
-def quote_url(url):
- ''' Convert URIs to strings that only contain digits, letters, % and _.
-
- NOTE: When changing the code of this function, make sure to also apply
- the same to the quote_url() function of lorry. Otherwise the git tarballs
- generated by lorry may no longer be found by morph.
-
- '''
- valid_chars = string.digits + string.letters + '%_'
- transl = lambda x: x if x in valid_chars else '_'
- return ''.join([transl(x) for x in url])
-
-
-class NoRemote(morphlib.Error):
-
- def __init__(self, reponame, errors):
- self.reponame = reponame
- self.errors = errors
-
- def __str__(self):
- return '\n\t'.join(['Cannot find remote git repository: %s' %
- self.reponame] + self.errors)
-
-
-class NotCached(morphlib.Error):
- def __init__(self, reponame):
- self.reponame = reponame
-
- def __str__(self): # pragma: no cover
- return 'Repository %s is not cached yet' % self.reponame
-
-
-class UpdateError(cliapp.AppException): # pragma: no cover
-
- def __init__(self, repo):
- cliapp.AppException.__init__(
- self, 'Failed to update cached version of repo %s' % repo)
-
-
-class CachedRepo(morphlib.gitdir.GitDirectory):
- '''A locally cached Git repository with an origin remote set up.
-
- On instance of this class represents a locally cached version of a
- remote Git repository. This remote repository is set up as the
- 'origin' remote.
-
- Cached repositories are bare mirrors of the upstream. Locally created
- branches will be lost the next time the repository updates.
-
- '''
- def __init__(self, path, original_name, url):
- self.original_name = original_name
- self.url = url
- self.is_mirror = not url.startswith('file://')
- self.already_updated = False
-
- super(CachedRepo, self).__init__(path)
-
- def __str__(self): # pragma: no cover
- return self.url
-
-
-class LocalRepoCache(object):
-
- '''Manage locally cached git repositories.
-
- When we build stuff, we need a local copy of the git repository.
- To avoid having to clone the repositories for every build, we
- maintain a local cache of the repositories: we first clone the
- remote repository to the cache, and then make a local clone from
- the cache to the build environment. This class manages the local
- cached repositories.
-
- Repositories may be specified either using a full URL, in a form
- understood by git(1), or as a repository name to which a base url
- is prepended. The base urls are given to the class when it is
- created.
-
- Instead of cloning via a normal 'git clone' directly from the
- git server, we first try to download a tarball from a url, and
- if that works, we unpack the tarball.
-
- '''
-
- def __init__(self, app, cachedir, resolver, tarball_base_url=None):
- self._app = app
- self.fs = fs.osfs.OSFS('/')
- self._cachedir = cachedir
- self._resolver = resolver
- if tarball_base_url and not tarball_base_url.endswith('/'):
- tarball_base_url += '/' # pragma: no cover
- self._tarball_base_url = tarball_base_url
- self._cached_repo_objects = {}
-
- def _git(self, args, **kwargs): # pragma: no cover
- '''Execute git command.
-
- This is a method of its own so that unit tests can easily override
- all use of the external git command.
-
- '''
-
- morphlib.git.gitcmd(self._app.runcmd, *args, **kwargs)
-
- def _fetch(self, url, path): # pragma: no cover
- '''Fetch contents of url into a file.
-
- This method is meant to be overridden by unit tests.
-
- '''
- self._app.status(msg="Trying to fetch %(tarball)s to seed the cache",
- tarball=url, chatty=True)
-
- if self._app.settings['verbose']:
- verbosity_flags = []
- kwargs = dict(stderr=sys.stderr)
- else:
- verbosity_flags = ['--quiet']
- kwargs = dict()
-
- def wget_command():
- return ['wget'] + verbosity_flags + ['-O-', url]
-
- self._app.runcmd(wget_command(),
- ['tar', '--no-same-owner', '-xf', '-'],
- cwd=path, **kwargs)
-
- def _mkdtemp(self, dirname): # pragma: no cover
- '''Creates a temporary directory.
-
- This method is meant to be overridden by unit tests.
-
- '''
- return tempfile.mkdtemp(dir=dirname)
-
- def _escape(self, url):
- '''Escape a URL so it can be used as a basename in a file.'''
-
- # FIXME: The following is a nicer way than to do this.
- # However, for compatibility, we need to use the same as the
- # tarball server (set up by Lorry) uses.
- # return urllib.quote(url, safe='')
-
- return quote_url(url)
-
- def _cache_name(self, url):
- scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
- if scheme != 'file':
- path = os.path.join(self._cachedir, self._escape(url))
- return path
-
- def has_repo(self, reponame):
- '''Have we already got a cache of a given repo?'''
- url = self._resolver.pull_url(reponame)
- path = self._cache_name(url)
- return self.fs.exists(path)
-
- def _clone_with_tarball(self, repourl, path):
- tarball_url = urlparse.urljoin(self._tarball_base_url,
- self._escape(repourl)) + '.tar'
- try:
- self.fs.makedir(path)
- self._fetch(tarball_url, path)
- self._git(['config', 'remote.origin.url', repourl], cwd=path)
- self._git(['config', 'remote.origin.mirror', 'true'], cwd=path)
- self._git(['config', 'remote.origin.fetch', '+refs/*:refs/*'],
- cwd=path)
- except BaseException as e: # pragma: no cover
- if self.fs.exists(path):
- self.fs.removedir(path, force=True)
- return False, 'Unable to extract tarball %s: %s' % (
- tarball_url, e)
-
- return True, None
-
- def _cache_repo(self, reponame):
- '''Clone the given repo into the cache.
-
- If the repo is already cloned, do nothing.
-
- '''
- errors = []
- if not self.fs.exists(self._cachedir):
- self.fs.makedir(self._cachedir, recursive=True)
-
- try:
- return self._get_repo(reponame)
- except NotCached as e:
- pass
-
- repourl = self._resolver.pull_url(reponame)
- path = self._cache_name(repourl)
- if self._tarball_base_url:
- ok, error = self._clone_with_tarball(repourl, path)
- if ok:
- repo = self._get_repo(reponame)
- self._update_repo(repo)
- return repo
- else:
- errors.append(error)
- self._app.status(
- msg='Using git clone.')
-
- target = self._mkdtemp(self._cachedir)
-
- try:
- self._git(['clone', '--mirror', '-n', repourl, target],
- echo_stderr=self._app.settings['debug'])
- except cliapp.AppException as e:
- errors.append('Unable to clone from %s to %s: %s' %
- (repourl, target, e))
- if self.fs.exists(target):
- self.fs.removedir(target, recursive=True, force=True)
- raise NoRemote(reponame, errors)
-
- self.fs.rename(target, path)
-
- repo = self._get_repo(reponame)
- repo.already_updated = True
- return repo
-
- def _get_repo(self, reponame):
- '''Return an object representing a cached repository.'''
-
- if reponame in self._cached_repo_objects:
- return self._cached_repo_objects[reponame]
- else:
- repourl = self._resolver.pull_url(reponame)
- path = self._cache_name(repourl)
- if self.fs.exists(path):
- repo = CachedRepo(path, reponame, repourl)
- self._cached_repo_objects[reponame] = repo
- return repo
- raise NotCached(reponame)
-
- def _update_repo(self, cachedrepo): # pragma: no cover
- try:
- cachedrepo.update_remotes(
- echo_stderr=self._app.settings['verbose'])
- cachedrepo.already_updated = True
- except cliapp.AppException:
- raise UpdateError(self)
-
- def get_updated_repo(self, repo_name,
- ref=None, refs=None): # pragma: no cover
- '''Return object representing cached repository.
-
- If all the specified refs in 'ref' or 'refs' point to SHA1s that are
- already in the repository, or --no-git-update is set, then the
- repository won't be updated.
-
- '''
-
- if self._app.settings['no-git-update']:
- self._app.status(msg='Not updating existing git repository '
- '%(repo_name)s '
- 'because of no-git-update being set',
- chatty=True,
- repo_name=repo_name)
- return self._get_repo(repo_name)
-
- if ref is not None and refs is None:
- refs = (ref,)
-
- if self.has_repo(repo_name):
- repo = self._get_repo(repo_name)
- if refs:
- required_refs = set(refs)
- missing_refs = set()
- for required_ref in required_refs:
- if morphlib.git.is_valid_sha1(required_ref):
- try:
- repo.resolve_ref_to_commit(required_ref)
- continue
- except morphlib.gitdir.InvalidRefError:
- pass
- missing_refs.add(required_ref)
-
- if not missing_refs:
- self._app.status(
- msg='Not updating git repository %(repo_name)s '
- 'because it already contains %(sha1s)s',
- chatty=True, repo_name=repo_name,
- sha1s=_word_join_list(tuple(required_refs)))
- return repo
-
- self._app.status(msg='Updating %(repo_name)s',
- repo_name=repo_name)
- self._update_repo(repo)
- return repo
- else:
- self._app.status(msg='Cloning %(repo_name)s',
- repo_name=repo_name)
- return self._cache_repo(repo_name)
-
- def ensure_submodules(self, toplevel_repo,
- toplevel_ref): # pragma: no cover
- '''Ensure any submodules of a given repo are cached and up to date.'''
-
- def submodules_for_repo(repo_path, ref):
- try:
- submodules = morphlib.git.Submodules(self._app, repo_path, ref)
- submodules.load()
- return [(submod.url, submod.commit) for submod in submodules]
- except morphlib.git.NoModulesFileError:
- return []
-
- done = set()
- subs_to_process = submodules_for_repo(toplevel_repo.dirname,
- toplevel_ref)
- while subs_to_process:
- url, ref = subs_to_process.pop()
- done.add((url, ref))
-
- cached_repo = self.get_updated_repo(url, ref=ref)
-
- for submod in submodules_for_repo(cached_repo.dirname, ref):
- if submod not in done:
- subs_to_process.append(submod)
diff --git a/morphlib/localrepocache_tests.py b/morphlib/localrepocache_tests.py
deleted file mode 100644
index 91fdb216..00000000
--- a/morphlib/localrepocache_tests.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Copyright (C) 2012-2016 Codethink Limited
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-import unittest
-import urllib2
-import os
-
-import cliapp
-import fs.memoryfs
-
-import morphlib
-import morphlib.gitdir_tests
-
-
-class FakeApplication(object):
-
- def __init__(self):
- self.settings = {
- 'debug': True,
- 'verbose': True,
- 'no-git-update': False,
- }
-
- def status(self, **kwargs):
- pass
-
-
-class LocalRepoCacheTests(unittest.TestCase):
-
- def setUp(self):
- aliases = ['upstream=git://example.com/#example.com:%s.git']
- repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases)
- tarball_base_url = 'http://lorry.example.com/tarballs/'
- self.reponame = 'upstream:reponame'
- self.repourl = 'git://example.com/reponame'
- escaped_url = 'git___example_com_reponame'
- self.tarball_url = '%s%s.tar' % (tarball_base_url, escaped_url)
- self.cachedir = '/cache/dir'
- self.cache_path = '%s/%s' % (self.cachedir, escaped_url)
- self.remotes = {}
- self.fetched = []
- self.lrc = morphlib.localrepocache.LocalRepoCache(
- FakeApplication(), self.cachedir, repo_resolver, tarball_base_url)
- self.lrc.fs = fs.memoryfs.MemoryFS()
- self.lrc._git = self.fake_git
- self.lrc._fetch = self.not_found
- self.lrc._mkdtemp = self.fake_mkdtemp
- self.lrc._update_repo = lambda *args: None
- self._mkdtemp_count = 0
-
- def fake_git(self, args, **kwargs):
- if args[0] == 'clone':
- self.assertEqual(len(args), 5)
- remote = args[3]
- local = args[4]
- self.remotes['origin'] = {'url': remote, 'updates': 0}
- self.lrc.fs.makedir(local, recursive=True)
- elif args[0:2] == ['remote', 'set-url']:
- remote = args[2]
- url = args[3]
- self.remotes[remote] = {'url': url}
- elif args[0:2] == ['config', 'remote.origin.url']:
- remote = 'origin'
- url = args[2]
- self.remotes[remote] = {'url': url}
- elif args[0:2] == ['config', 'remote.origin.mirror']:
- remote = 'origin'
- elif args[0:2] == ['config', 'remote.origin.fetch']:
- remote = 'origin'
- else:
- raise NotImplementedError()
-
- def fake_mkdtemp(self, dirname):
- thing = "foo"+str(self._mkdtemp_count)
- self._mkdtemp_count += 1
- self.lrc.fs.makedir(dirname+"/"+thing)
- return thing
-
- def not_found(self, url, path):
- raise cliapp.AppException('Not found')
-
- def test_has_not_got_shortened_repo_initially(self):
- self.assertFalse(self.lrc.has_repo(self.reponame))
-
- def test_has_not_got_absolute_repo_initially(self):
- self.assertFalse(self.lrc.has_repo(self.repourl))
-
- def test_cachedir_does_not_exist_initially(self):
- self.assertFalse(self.lrc.fs.exists(self.cachedir))
-
- def test_creates_cachedir_if_missing(self):
- with morphlib.gitdir_tests.allow_nonexistant_git_repos():
- self.lrc.get_updated_repo(self.repourl, ref='master')
- self.assertTrue(self.lrc.fs.exists(self.cachedir))
-
- def test_happily_caches_same_repo_twice(self):
- with morphlib.gitdir_tests.allow_nonexistant_git_repos():
- self.lrc.get_updated_repo(self.repourl, ref='master')
- self.lrc.get_updated_repo(self.repourl, ref='master')
-
- def test_fails_to_cache_when_remote_does_not_exist(self):
- def fail(args, **kwargs):
- self.lrc.fs.makedir(args[4])
- raise cliapp.AppException('')
- self.lrc._git = fail
- self.assertRaises(morphlib.localrepocache.NoRemote,
- self.lrc.get_updated_repo, self.repourl, 'master')
-
- def test_does_not_mind_a_missing_tarball(self):
- with morphlib.gitdir_tests.allow_nonexistant_git_repos():
- self.lrc.get_updated_repo(self.repourl, ref='master')
- self.assertEqual(self.fetched, [])
-
- def test_fetches_tarball_when_it_exists(self):
- self.lrc._fetch = lambda url, path: self.fetched.append(url)
-
- with morphlib.gitdir_tests.allow_nonexistant_git_repos():
- self.lrc.get_updated_repo(self.repourl, ref='master')
-
- self.assertEqual(self.fetched, [self.tarball_url])
- self.assertFalse(self.lrc.fs.exists(self.cache_path + '.tar'))
- self.assertEqual(self.remotes['origin']['url'], self.repourl)
-
- def test_escapes_repourl_as_filename(self):
- escaped = self.lrc._escape(self.repourl)
- self.assertFalse('/' in escaped)
-
- def test_noremote_error_message_contains_repo_name(self):
- e = morphlib.localrepocache.NoRemote(self.repourl, [])
- self.assertTrue(self.repourl in str(e))
-
- def test_avoids_caching_local_repo(self):
- self.lrc.fs.makedir('/local/repo', recursive=True)
- with morphlib.gitdir_tests.allow_nonexistant_git_repos():
- cached = self.lrc.get_updated_repo('file:///local/repo',
- refs='master')
- assert cached.dirname == '/local/repo'
diff --git a/morphlib/plugins/anchor_plugin.py b/morphlib/plugins/anchor_plugin.py
index 7465c479..a9d07b39 100644
--- a/morphlib/plugins/anchor_plugin.py
+++ b/morphlib/plugins/anchor_plugin.py
@@ -137,9 +137,8 @@ class AnchorPlugin(cliapp.Plugin):
for reponame, sources in sources_by_reponame.iteritems():
# UGLY HACK we need to push *FROM* our local repo cache to
# avoid cloning everything multiple times.
- repo = bc.lrc.get_updated_repo(reponame,
- refs=(s.original_ref
- for s in sources))
+ repo = bc.repo_cache.get_updated_repo(
+ reponame, refs=(s.original_ref for s in sources))
remote = Remote(repo)
push_url = resolver.push_url(reponame)
diff --git a/morphlib/plugins/artifact_inspection_plugin.py b/morphlib/plugins/artifact_inspection_plugin.py
index 413a0072..d396f93b 100644
--- a/morphlib/plugins/artifact_inspection_plugin.py
+++ b/morphlib/plugins/artifact_inspection_plugin.py
@@ -36,28 +36,23 @@ class NotASystemArtifactError(cliapp.AppException):
class ProjectVersionGuesser(object):
- def __init__(self, app, lrc, rrc, interesting_files):
+ def __init__(self, app, repo_cache, interesting_files):
self.app = app
- self.lrc = lrc
- self.rrc = rrc
+ self.repo_cache = repo_cache
self.interesting_files = interesting_files
def file_contents(self, repo, ref, tree):
filenames = [x for x in self.interesting_files if x in tree]
- if filenames:
- if self.lrc.has_repo(repo):
- repository = self.lrc.get_updated_repo(repo, ref)
- for filename in filenames:
- yield filename, repository.read_file(filename, ref)
- elif self.rrc:
- for filename in filenames:
- yield filename, self.rrc.cat_file(repo, ref, filename)
+ for filename in filenames:
+ # This can use a remote repo cache if available, to avoid having
+ # to clone every repo locally.
+ yield filename, self.repo_cache.cat_file(repo, ref, filename)
class AutotoolsVersionGuesser(ProjectVersionGuesser):
- def __init__(self, app, lrc, rrc):
- ProjectVersionGuesser.__init__(self, app, lrc, rrc, [
+ def __init__(self, app, repo_cache):
+ ProjectVersionGuesser.__init__(self, app, repo_cache, [
'configure.ac',
'configure.in',
'configure.ac.in',
@@ -136,9 +131,9 @@ class VersionGuesser(object):
def __init__(self, app):
self.app = app
- self.lrc, self.rrc = morphlib.util.new_repo_caches(app)
+ self.repo_cache = morphlib.util.new_repo_cache(app)
self.guessers = [
- AutotoolsVersionGuesser(app, self.lrc, self.rrc)
+ AutotoolsVersionGuesser(app, self.repo_cache)
]
def guess_version(self, repo, ref):
@@ -146,14 +141,10 @@ class VersionGuesser(object):
repo=repo, ref=ref, chatty=True)
version = None
try:
- if self.lrc.has_repo(repo):
- repository = self.lrc.get_updated_repo(repo, ref)
- tree = repository.list_files(ref=ref, recurse=False)
- elif self.rrc:
- repository = None
- tree = self.rrc.ls_tree(repo, ref)
- else:
- return None
+ # This can use a remote repo cache if available, to avoid having
+ # to clone every repo locally.
+ tree = self.repo_cache.ls_tree(repo, ref)
+
for guesser in self.guessers:
version = guesser.guess_version(repo, ref, tree)
if version:
diff --git a/morphlib/plugins/certify_plugin.py b/morphlib/plugins/certify_plugin.py
index 735d0332..72d24a51 100644
--- a/morphlib/plugins/certify_plugin.py
+++ b/morphlib/plugins/certify_plugin.py
@@ -57,7 +57,7 @@ class CertifyPlugin(cliapp.Plugin):
system_filenames = map(morphlib.util.sanitise_morphology_path,
args[2:])
- self.lrc, self.rrc = morphlib.util.new_repo_caches(self.app)
+ self.repo_cache = morphlib.util.new_repo_cache(self.app)
self.resolver = morphlib.artifactresolver.ArtifactResolver()
for system_filename in system_filenames:
@@ -69,9 +69,7 @@ class CertifyPlugin(cliapp.Plugin):
self.app.status(
msg='Creating source pool for %s' % system_filename, chatty=True)
source_pool = morphlib.sourceresolver.create_source_pool(
- self.lrc, self.rrc, repo, ref, [system_filename],
- cachedir=self.app.settings['cachedir'],
- update_repos = not self.app.settings['no-git-update'],
+ self.repo_cache, repo, ref, [system_filename],
status_cb=self.app.status)
self.app.status(
@@ -115,7 +113,7 @@ class CertifyPlugin(cliapp.Plugin):
.format(name, ref))
certified = False
- cached = self.lrc.get_updated_repo(source.repo_name, ref)
+ cached = self.repo_cache.get_updated_repo(source.repo_name, ref)
# Test that sha1 ref is anchored in a tag or branch,
# and thus not a candidate for removal on `git gc`.
diff --git a/morphlib/plugins/cross-bootstrap_plugin.py b/morphlib/plugins/cross-bootstrap_plugin.py
index 273e677d..8b8fbb2d 100644
--- a/morphlib/plugins/cross-bootstrap_plugin.py
+++ b/morphlib/plugins/cross-bootstrap_plugin.py
@@ -304,7 +304,7 @@ class CrossBootstrapPlugin(cliapp.Plugin):
system_artifact.source, build_env, use_chroot=False)
builder = BootstrapSystemBuilder(
self.app, staging_area, build_command.lac, build_command.rac,
- system_artifact.source, build_command.lrc, 1, False)
+ system_artifact.source, build_command.repo_cache, 1, False)
builder.build_and_cache()
self.app.status(
diff --git a/morphlib/plugins/diff_plugin.py b/morphlib/plugins/diff_plugin.py
index 26964df8..24a6d69a 100644
--- a/morphlib/plugins/diff_plugin.py
+++ b/morphlib/plugins/diff_plugin.py
@@ -22,7 +22,6 @@ from morphlib.cmdline_parse_utils import (definition_lists_synopsis,
from morphlib.morphologyfinder import MorphologyFinder
from morphlib.morphloader import MorphologyLoader
from morphlib.morphset import MorphologySet
-from morphlib.util import new_repo_caches
class DiffPlugin(cliapp.Plugin):
@@ -60,9 +59,10 @@ class DiffPlugin(cliapp.Plugin):
name, from_source.repo_name, to_source.repo_name))
if from_source.original_ref != to_source.original_ref:
- from_repo, to_repo = (self.bc.lrc.get_updated_repo(s.repo_name,
- ref=s.sha1)
- for s in (from_source, to_source))
+ repo_cache = self.bc.repo_cache
+ from_repo, to_repo = (repo_cache.get_updated_repo(s.repo_name,
+ ref=s.sha1)
+ for s in (from_source, to_source))
from_desc = from_repo.version_guess(from_source.sha1)
to_desc = to_repo.version_guess(to_source.sha1)
@@ -100,7 +100,7 @@ class DiffPlugin(cliapp.Plugin):
def get_systems((reponame, ref, definitions)):
'Convert a definition path list into a list of systems'
ml = MorphologyLoader()
- repo = self.bc.lrc.get_updated_repo(reponame, ref=ref)
+ repo = self.bc.repo_cache.get_updated_repo(reponame, ref=ref)
mf = MorphologyFinder(gitdir=repo, ref=ref)
# We may have been given an empty set of definitions as input, in
# which case we instead use every we find.
diff --git a/morphlib/plugins/get_repo_plugin.py b/morphlib/plugins/get_repo_plugin.py
index fc81d6e5..ce0b7af0 100644
--- a/morphlib/plugins/get_repo_plugin.py
+++ b/morphlib/plugins/get_repo_plugin.py
@@ -101,9 +101,9 @@ class GetRepoPlugin(cliapp.Plugin):
'%(stratum)s stratum',
ref=ref or chunk_spec['ref'], chunk=chunk_spec['name'],
stratum=morph['name'])
- lrc, rrc = morphlib.util.new_repo_caches(self.app)
- cached_repo = lrc.get_updated_repo(chunk_spec['repo'],
- chunk_spec['ref'])
+ repo_cache = morphlib.util.new_repo_cache(self.app)
+ cached_repo = repo_cache.get_updated_repo(chunk_spec['repo'],
+ chunk_spec['ref'])
try:
self._clone_repo(cached_repo, dirname,
diff --git a/morphlib/plugins/list_artifacts_plugin.py b/morphlib/plugins/list_artifacts_plugin.py
index c2e6b459..2c098c2a 100644
--- a/morphlib/plugins/list_artifacts_plugin.py
+++ b/morphlib/plugins/list_artifacts_plugin.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2014-2015 Codethink Limited
+# Copyright (C) 2014-2016 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -58,7 +58,7 @@ class ListArtifactsPlugin(cliapp.Plugin):
system_filenames = map(morphlib.util.sanitise_morphology_path,
args[2:])
- self.lrc, self.rrc = morphlib.util.new_repo_caches(self.app)
+ self.repo_cache = morphlib.util.new_repo_cache(self.app)
self.resolver = morphlib.artifactresolver.ArtifactResolver()
artifact_files = set()
@@ -85,9 +85,7 @@ class ListArtifactsPlugin(cliapp.Plugin):
self.app.status(
msg='Creating source pool for %s' % system_filename, chatty=True)
source_pool = morphlib.sourceresolver.create_source_pool(
- self.lrc, self.rrc, repo, ref, [system_filename],
- cachedir=self.app.settings['cachedir'],
- update_repos = not self.app.settings['no-git-update'],
+ self.repo_cache, repo, ref, [system_filename],
status_cb=self.app.status)
self.app.status(
diff --git a/morphlib/plugins/show_dependencies_plugin.py b/morphlib/plugins/show_dependencies_plugin.py
index 42f8f273..bfe4d6c2 100644
--- a/morphlib/plugins/show_dependencies_plugin.py
+++ b/morphlib/plugins/show_dependencies_plugin.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2012-2015 Codethink Limited
+# Copyright (C) 2012-2016 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -42,24 +42,6 @@ class ShowDependenciesPlugin(cliapp.Plugin):
of build dependencies of the constituent components.
'''
-
- if not os.path.exists(self.app.settings['cachedir']):
- os.mkdir(self.app.settings['cachedir'])
- cachedir = os.path.join(self.app.settings['cachedir'], 'gits')
- tarball_base_url = self.app.settings['tarball-server']
- repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver(
- self.app.settings['repo-alias'])
- lrc = morphlib.localrepocache.LocalRepoCache(
- self.app, cachedir, repo_resolver, tarball_base_url)
-
- remote_url = morphlib.util.get_git_resolve_cache_server(
- self.app.settings)
- if remote_url:
- rrc = morphlib.remoterepocache.RemoteRepoCache(
- remote_url, repo_resolver)
- else:
- rrc = None
-
build_command = morphlib.buildcommand.BuildCommand(self.app)
# traverse the morphs to list all the sources
diff --git a/morphlib/plugins/system_manifests_plugin.py b/morphlib/plugins/system_manifests_plugin.py
index 86388737..7fe33102 100644
--- a/morphlib/plugins/system_manifests_plugin.py
+++ b/morphlib/plugins/system_manifests_plugin.py
@@ -84,7 +84,7 @@ class SystemManifestsPlugin(cliapp.Plugin):
system_filenames = map(morphlib.util.sanitise_morphology_path,
args[2:])
- self.lrc, self.rrc = morphlib.util.new_repo_caches(self.app)
+ self.repo_cache = morphlib.util.new_repo_cache(self.app)
self.resolver = morphlib.artifactresolver.ArtifactResolver()
for system_filename in system_filenames:
@@ -104,9 +104,7 @@ class SystemManifestsPlugin(cliapp.Plugin):
msg='Creating source pool for %(system)s',
system=system_filename, chatty=True)
source_pool = morphlib.sourceresolver.create_source_pool(
- self.lrc, self.rrc, repo, ref, [system_filename],
- cachedir=self.app.settings['cachedir'],
- update_repos = not self.app.settings['no-git-update'],
+ self.repo_cache, repo, ref, [system_filename],
status_cb=self.app.status)
self.app.status(
@@ -135,10 +133,11 @@ class SystemManifestsPlugin(cliapp.Plugin):
except IndexError:
trove_id = None
with morphlib.util.temp_dir(dir=self.app.settings['tempdir']) as td:
- lorries = get_lorry_repos(td, self.lrc, self.app.status, trove_id,
+ lorries = get_lorry_repos(td, self.repo_cache, self.app.status,
+ trove_id,
self.app.settings['trove-host'])
manifest = Manifest(system_artifact.name, td, self.app.status,
- self.lrc)
+ self.repo_cache)
old_prefix = self.app.status_prefix
sources = set(a.source for a in system_artifact.walk()
@@ -150,7 +149,8 @@ class SystemManifestsPlugin(cliapp.Plugin):
name = source.morphology['name']
ref = source.original_ref
- cached = self.lrc.get_updated_repo(source.repo_name, ref)
+ cached = self.repo_cache.get_updated_repo(source.repo_name,
+ ref)
new_prefix = '[%d/%d][%s] ' % (i, len(sources), name)
self.app.status_prefix = old_prefix + new_prefix
@@ -169,8 +169,8 @@ def run_licensecheck(filename):
else:
return output[len(filename) + 2:].strip()
-def checkout_repo(lrc, repo, dest, ref='master'):
- cached = lrc.get_updated_repo(repo, ref)
+def checkout_repo(repo_cache, repo, dest, ref='master'):
+ cached = repo_cache.get_updated_repo(repo, ref)
if not os.path.exists(dest):
morphlib.gitdir.checkout_from_cached_repo(repo, ref, dest)
@@ -235,14 +235,15 @@ def get_upstream_address(chunk_url, lorries, status):
chunk=chunk_url)
return 'UNKNOWN'
-def get_lorry_repos(tempdir, lrc, status, trove_id, trove_host):
+def get_lorry_repos(tempdir, repo_cache, status, trove_id, trove_host):
lorries = []
try:
baserock_lorry_repo = 'baserock:local-config/lorries'
lorrydir = os.path.join(tempdir, 'baserock-lorries')
- baserock_lorrydir = checkout_repo(lrc, baserock_lorry_repo, lorrydir)
+ baserock_lorrydir = checkout_repo(repo_cache, baserock_lorry_repo,
+ lorrydir)
lorries.extend(load_lorries(lorrydir))
- except morphlib.localrepocache.NoRemote as e:
+ except morphlib.repocache.NoRemote as e:
status(msg="WARNING: Could not find lorries from git.baserock.org, "
"expected to find them on %(trove)s at %(reponame)s",
trove=trove_host, reponame = e.reponame)
@@ -252,9 +253,10 @@ def get_lorry_repos(tempdir, lrc, status, trove_id, trove_host):
trove_lorry_repo = ('http://%s/git/%s/local-config/lorries' %
(trove_host, trove_id))
lorrydir = os.path.join(tempdir, '%s-lorries' % trove_id)
- trove_lorrydir = checkout_repo(lrc, trove_lorry_repo, lorrydir)
+ trove_lorrydir = checkout_repo(repo_cache, trove_lorry_repo,
+ lorrydir)
lorries.extend(load_lorries(lorrydir))
- except morphlib.localrepocache.NoRemote as e:
+ except morphlib.repocache.NoRemote as e:
status(msg="WARNING: Could not find lorries repo on %(trove)s "
"at %(reponame)s",
trove=trove_host, reponame=e.reponame)
@@ -268,10 +270,10 @@ def get_lorry_repos(tempdir, lrc, status, trove_id, trove_host):
class Manifest(object):
"""Writes out a manifest of what's included in a system."""
- def __init__(self, system_name, tempdir, status_cb, lrc):
+ def __init__(self, system_name, tempdir, status_cb, repo_cache):
self.tempdir = tempdir
self.status = status_cb
- self.lrc = lrc
+ self.repo_cache = repo_cache
path = os.path.join(os.getcwd(), system_name + '-manifest.csv')
self.status(msg='Creating %(path)s', path=path)
self.file = open(path, 'wb')
diff --git a/morphlib/remoterepocache.py b/morphlib/remoterepocache.py
deleted file mode 100644
index 4a6d9fe9..00000000
--- a/morphlib/remoterepocache.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# Copyright (C) 2012-2015 Codethink Limited
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-import cliapp
-import json
-import logging
-import urllib2
-import urlparse
-import urllib
-
-
-class ResolveRefError(cliapp.AppException):
-
- def __init__(self, repo_name, ref):
- cliapp.AppException.__init__(
- self, 'Failed to resolve ref %s for repo %s' %
- (ref, repo_name))
-
-
-class CatFileError(cliapp.AppException):
-
- def __init__(self, repo_name, ref, filename):
- cliapp.AppException.__init__(
- self, 'Failed to cat file %s in ref %s of repo %s' %
- (filename, ref, repo_name))
-
-class LsTreeError(cliapp.AppException):
-
- def __init__(self, repo_name, ref):
- cliapp.AppException.__init__(
- self, 'Failed to list tree in ref %s of repo %s' %
- (ref, repo_name))
-
-
-class RemoteRepoCache(object):
-
- def __init__(self, server_url, resolver):
- self.server_url = server_url
- self._resolver = resolver
-
- def resolve_ref(self, repo_name, ref):
- repo_url = self._resolver.pull_url(repo_name)
- try:
- return self._resolve_ref_for_repo_url(repo_url, ref)
- except BaseException as e:
- logging.error('Caught exception: %s' % str(e))
- raise ResolveRefError(repo_name, ref)
-
- def cat_file(self, repo_name, ref, filename):
- repo_url = self._resolver.pull_url(repo_name)
- try:
- return self._cat_file_for_repo_url(repo_url, ref, filename)
- except urllib2.HTTPError as e:
- logging.error('Caught exception: %s' % str(e))
- if e.code == 404:
- raise CatFileError(repo_name, ref, filename)
- raise # pragma: no cover
-
- def ls_tree(self, repo_name, ref):
- repo_url = self._resolver.pull_url(repo_name)
- try:
- info = json.loads(self._ls_tree_for_repo_url(repo_url, ref))
- return info['tree'].keys()
- except BaseException as e:
- logging.error('Caught exception: %s' % str(e))
- raise LsTreeError(repo_name, ref)
-
- def _resolve_ref_for_repo_url(self, repo_url, ref): # pragma: no cover
- data = self._make_request(
- 'sha1s?repo=%s&ref=%s' % self._quote_strings(repo_url, ref))
- info = json.loads(data)
- return info['sha1'], info['tree']
-
- def _cat_file_for_repo_url(self, repo_url, ref,
- filename): # pragma: no cover
- return self._make_request(
- 'files?repo=%s&ref=%s&filename=%s'
- % self._quote_strings(repo_url, ref, filename))
-
- def _ls_tree_for_repo_url(self, repo_url, ref): # pragma: no cover
- return self._make_request(
- 'trees?repo=%s&ref=%s' % self._quote_strings(repo_url, ref))
-
- def _quote_strings(self, *args): # pragma: no cover
- return tuple(urllib.quote(string) for string in args)
-
- def _make_request(self, path): # pragma: no cover
- server_url = self.server_url
- if not server_url.endswith('/'):
- server_url += '/'
- url = urlparse.urljoin(server_url, '/1.0/%s' % path)
- handle = urllib2.urlopen(url)
- return handle.read()
diff --git a/morphlib/remoterepocache_tests.py b/morphlib/remoterepocache_tests.py
deleted file mode 100644
index 966e74d5..00000000
--- a/morphlib/remoterepocache_tests.py
+++ /dev/null
@@ -1,137 +0,0 @@
-# Copyright (C) 2012-2015 Codethink Limited
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-import json
-import unittest
-import urllib2
-
-import morphlib
-
-
-class RemoteRepoCacheTests(unittest.TestCase):
-
- def _resolve_ref_for_repo_url(self, repo_url, ref):
- return self.sha1s[repo_url][ref]
-
- def _cat_file_for_repo_url(self, repo_url, sha1, filename):
- try:
- return self.files[repo_url][sha1][filename]
- except KeyError:
- raise urllib2.HTTPError(url='', code=404, msg='Not found',
- hdrs={}, fp=None)
-
- def _ls_tree_for_repo_url(self, repo_url, sha1):
- return json.dumps({
- 'repo': repo_url,
- 'ref': sha1,
- 'tree': self.files[repo_url][sha1]
- })
-
- def setUp(self):
- self.sha1s = {
- 'git://gitorious.org/baserock/morph': {
- 'master': 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9'
- }
- }
- self.files = {
- 'git://gitorious.org/baserock-morphs/linux': {
- 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9': {
- 'linux.morph': 'linux morphology'
- }
- }
- }
- self.server_url = 'http://foo.bar'
- aliases = [
- 'upstream=git://gitorious.org/baserock-morphs/#foo',
- 'baserock=git://gitorious.org/baserock/#foo'
- ]
- resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases)
- self.cache = morphlib.remoterepocache.RemoteRepoCache(
- self.server_url, resolver)
- self.cache._resolve_ref_for_repo_url = self._resolve_ref_for_repo_url
- self.cache._cat_file_for_repo_url = self._cat_file_for_repo_url
- self.cache._ls_tree_for_repo_url = self._ls_tree_for_repo_url
-
- def test_sets_server_url(self):
- self.assertEqual(self.cache.server_url, self.server_url)
-
- def test_resolve_existing_ref_for_existing_repo(self):
- sha1 = self.cache.resolve_ref('baserock:morph', 'master')
- self.assertEqual(
- sha1,
- self.sha1s['git://gitorious.org/baserock/morph']['master'])
-
- def test_fail_resolving_existing_ref_for_non_existent_repo(self):
- self.assertRaises(morphlib.remoterepocache.ResolveRefError,
- self.cache.resolve_ref, 'non-existent-repo',
- 'master')
-
- def test_fail_resolving_non_existent_ref_for_existing_repo(self):
- self.assertRaises(morphlib.remoterepocache.ResolveRefError,
- self.cache.resolve_ref, 'baserock:morph',
- 'non-existent-ref')
-
- def test_fail_resolving_non_existent_ref_for_non_existent_repo(self):
- self.assertRaises(morphlib.remoterepocache.ResolveRefError,
- self.cache.resolve_ref, 'non-existent-repo',
- 'non-existent-ref')
-
- def test_cat_existing_file_in_existing_repo_and_ref(self):
- content = self.cache.cat_file(
- 'upstream:linux', 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9',
- 'linux.morph')
- self.assertEqual(content, 'linux morphology')
-
- def test_fail_cat_file_using_invalid_sha1(self):
- self.assertRaises(morphlib.remoterepocache.CatFileError,
- self.cache.cat_file, 'upstream:linux', 'blablabla',
- 'linux.morph')
-
- def test_fail_cat_non_existent_file_in_existing_repo_and_ref(self):
- self.assertRaises(morphlib.remoterepocache.CatFileError,
- self.cache.cat_file, 'upstream:linux',
- 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9',
- 'non-existent-file')
-
- def test_fail_cat_file_in_non_existent_ref_in_existing_repo(self):
- self.assertRaises(morphlib.remoterepocache.CatFileError,
- self.cache.cat_file, 'upstream:linux',
- 'ecd7a325095a0d19b8c3d76f578d85b979461d41',
- 'linux.morph')
-
- def test_fail_cat_file_in_non_existent_repo(self):
- self.assertRaises(morphlib.remoterepocache.CatFileError,
- self.cache.cat_file, 'non-existent-repo',
- 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9',
- 'some-file')
-
- def test_ls_tree_in_existing_repo_and_ref(self):
- content = self.cache.ls_tree(
- 'upstream:linux', 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9')
- self.assertEqual(content, ['linux.morph'])
-
- def test_fail_ls_tree_using_invalid_sha1(self):
- self.assertRaises(morphlib.remoterepocache.LsTreeError,
- self.cache.ls_tree, 'upstream:linux', 'blablabla')
-
- def test_fail_ls_file_in_non_existent_ref_in_existing_repo(self):
- self.assertRaises(morphlib.remoterepocache.LsTreeError,
- self.cache.ls_tree, 'upstream:linux',
- 'ecd7a325095a0d19b8c3d76f578d85b979461d41')
-
- def test_fail_ls_tree_in_non_existent_repo(self):
- self.assertRaises(morphlib.remoterepocache.LsTreeError,
- self.cache.ls_tree, 'non-existent-repo',
- 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9')
diff --git a/morphlib/repocache.py b/morphlib/repocache.py
new file mode 100644
index 00000000..f6978ec4
--- /dev/null
+++ b/morphlib/repocache.py
@@ -0,0 +1,565 @@
+# Copyright (C) 2012-2016 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+import cliapp
+import fs.osfs
+
+import json
+import logging
+import os
+import string
+import sys
+import tempfile
+import urllib2
+import urlparse
+import urllib
+
+import morphlib
+from morphlib.util import word_join_list as _word_join_list
+
+
+# urlparse.urljoin needs to know details of the URL scheme being used.
+# It does not know about git:// by default, so we teach it here.
+gitscheme = ['git']
+urlparse.uses_relative.extend(gitscheme)
+urlparse.uses_netloc.extend(gitscheme)
+urlparse.uses_params.extend(gitscheme)
+urlparse.uses_query.extend(gitscheme)
+urlparse.uses_fragment.extend(gitscheme)
+
+
+def quote_url(url):
+ ''' Convert URIs to strings that only contain digits, letters, % and _.
+
+ NOTE: When changing the code of this function, make sure to also apply
+ the same to the quote_url() function of lorry. Otherwise the git tarballs
+ generated by lorry may no longer be found by morph.
+
+ '''
+ valid_chars = string.digits + string.letters + '%_'
+ transl = lambda x: x if x in valid_chars else '_'
+ return ''.join([transl(x) for x in url])
+
+
+class NoRemote(morphlib.Error):
+
+ def __init__(self, reponame, errors):
+ self.reponame = reponame
+ self.errors = errors
+
+ def __str__(self):
+ return '\n\t'.join(['Cannot find remote git repository: %s' %
+ self.reponame] + self.errors)
+
+
+class NotCached(morphlib.Error):
+ def __init__(self, reponame):
+ self.reponame = reponame
+
+ def __str__(self): # pragma: no cover
+ return 'Repository %s is not cached yet' % self.reponame
+
+
+class UpdateError(cliapp.AppException): # pragma: no cover
+
+ def __init__(self, repo):
+ cliapp.AppException.__init__(
+ self, 'Failed to update cached version of repo %s' % repo)
+
+
+class CachedRepo(morphlib.gitdir.GitDirectory):
+ '''A locally cached Git repository with an origin remote set up.
+
+ On instance of this class represents a locally cached version of a
+ remote Git repository. This remote repository is set up as the
+ 'origin' remote.
+
+ Cached repositories are bare mirrors of the upstream. Locally created
+ branches will be lost the next time the repository updates.
+
+ '''
+ def __init__(self, path, original_name, url):
+ self.original_name = original_name
+ self.url = url
+ self.is_mirror = not url.startswith('file://')
+ self.already_updated = False
+
+ super(CachedRepo, self).__init__(path)
+
+ def __str__(self): # pragma: no cover
+ return self.url
+
+
+class RepoCache(object):
+ '''Manage a collection of Git repositories.
+
+ When we build stuff, we need a local copy of the git repository.
+ To avoid having to clone the repositories for every build, we
+ maintain a local cache of the repositories: we first clone the
+ remote repository to the cache, and then make a local clone from
+ the cache to the build environment. This class manages the local
+ cached repositories.
+
+ Repositories may be specified either using a full URL, in a form
+ understood by git(1), or as a repository name to which a base url
+ is prepended. The base urls are given to the class when it is
+ created.
+
+ Instead of cloning via a normal 'git clone' directly from the
+ git server, we first try to download a tarball from a url, and
+ if that works, we unpack the tarball.
+
+ Certain questions about a repo can be resolved without cloning the whole
+ thing, if an instance of 'morph-cache-server' is available on the remote
+ Git server. This makes calculating the build graph for the first time
+ a whole lot faster, as we avoid cloning every repo locally. The
+ git_resolve_cache_url parameter enables this feature. Baserock 'Trove'
+ systems run 'morph-cache-server' by default.
+
+ The 'custom_fs' parameter takes a PyFilesystem instance, which you can use
+ to override where 'cachedir' is stored. This should probably only be used
+ for testing.
+
+ '''
+ def __init__(self, cachedir, resolver, tarball_base_url=None,
+ git_resolve_cache_url=None,
+ update_gits=True,
+ runcmd_cb=cliapp.runcmd, status_cb=lambda **kwargs: None,
+ verbose=False, debug=False,
+ custom_fs=None):
+ self.fs = custom_fs or fs.osfs.OSFS('/')
+
+ self.fs.makedir(cachedir, recursive=True, allow_recreate=True)
+
+ self.cachedir = cachedir
+ self._resolver = resolver
+ if tarball_base_url and not tarball_base_url.endswith('/'):
+ tarball_base_url += '/'
+ self._tarball_base_url = tarball_base_url
+ self._cached_repo_objects = {}
+
+ # Corresponds to the app 'no-git-update' setting
+ self.update_gits = update_gits
+
+ self.runcmd_cb = runcmd_cb
+ self.status_cb = status_cb
+ self.verbose = verbose
+ self.debug = debug
+
+ if git_resolve_cache_url: # pragma: no cover
+ self.remote_cache = RemoteRepoCache(git_resolve_cache_url,
+ resolver)
+ else:
+ self.remote_cache = None
+
+ def _git(self, args, **kwargs): # pragma: no cover
+ '''Execute git command.
+
+ This is a method of its own so that unit tests can easily override
+ all use of the external git command.
+
+ '''
+
+ morphlib.git.gitcmd(self.runcmd_cb, *args, **kwargs)
+
+ def _fetch(self, url, path): # pragma: no cover
+ '''Fetch contents of url into a file.
+
+ This method is meant to be overridden by unit tests.
+
+ '''
+ self.status_cb(msg="Trying to fetch %(tarball)s to seed the cache",
+ tarball=url, chatty=True)
+
+ if self.verbose:
+ verbosity_flags = []
+ kwargs = dict(stderr=sys.stderr)
+ else:
+ verbosity_flags = ['--quiet']
+ kwargs = dict()
+
+ def wget_command():
+ return ['wget'] + verbosity_flags + ['-O-', url]
+
+ self.runcmd_cb(wget_command(),
+ ['tar', '--no-same-owner', '-xf', '-'],
+ cwd=path, **kwargs)
+
+ def _mkdtemp(self, dirname): # pragma: no cover
+ '''Creates a temporary directory.
+
+ This method is meant to be overridden by unit tests.
+
+ '''
+ return tempfile.mkdtemp(dir=self.fs.getsyspath(dirname))
+
+ def _escape(self, url):
+ '''Escape a URL so it can be used as a basename in a file.'''
+
+ # FIXME: The following is a nicer way than to do this.
+ # However, for compatibility, we need to use the same as the
+ # tarball server (set up by Lorry) uses.
+ # return urllib.quote(url, safe='')
+
+ return quote_url(url)
+
+ def _cache_name(self, url):
+ scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
+ if scheme != 'file':
+ path = os.path.join(self.cachedir, self._escape(url))
+ return path
+
+ def has_repo(self, reponame):
+ '''Have we already got a cache of a given repo?'''
+ url = self._resolver.pull_url(reponame)
+ path = self._cache_name(url)
+ return self.fs.exists(path)
+
+ def _clone_with_tarball(self, repourl, path):
+ tarball_url = urlparse.urljoin(self._tarball_base_url,
+ self._escape(repourl)) + '.tar'
+ try:
+ self.fs.makedir(path)
+ self._fetch(tarball_url, path)
+ self._git(['config', 'remote.origin.url', repourl], cwd=path)
+ self._git(['config', 'remote.origin.mirror', 'true'], cwd=path)
+ self._git(['config', 'remote.origin.fetch', '+refs/*:refs/*'],
+ cwd=path)
+ except BaseException as e:
+ if self.fs.exists(path):
+ self.fs.removedir(path, force=True)
+ return False, 'Unable to extract tarball %s: %s' % (
+ tarball_url, e)
+
+ return True, None
+
+ def _new_cached_repo_instance(self, path, reponame, repourl):
+ return CachedRepo(path, reponame, repourl)
+
+ def _cache_repo(self, reponame):
+ '''Clone the given repo into the cache.
+
+ If the repo is already cloned, do nothing.
+
+ '''
+ errors = []
+
+ repourl = self._resolver.pull_url(reponame)
+ path = self._cache_name(repourl)
+ if self._tarball_base_url:
+ ok, error = self._clone_with_tarball(repourl, path)
+ if ok:
+ repo = self._get_repo(reponame)
+ self._update_repo(repo)
+ return repo
+ else:
+ errors.append(error)
+ self.status_cb(msg='Using git clone.')
+
+ target = self._mkdtemp(self.cachedir)
+
+ try:
+ self._git(['clone', '--mirror', '-n', repourl, target],
+ echo_stderr=self.debug)
+ except cliapp.AppException as e:
+ errors.append('Unable to clone from %s to %s: %s' %
+ (repourl, target, e))
+ if self.fs.exists(target):
+ self.fs.removedir(target, force=True)
+ raise NoRemote(reponame, errors)
+
+ self.fs.rename(target, path)
+
+ repo = self._new_cached_repo_instance(path, reponame, repourl)
+ repo.already_updated = True
+ return repo
+
+ def _get_repo(self, reponame):
+ '''Return an object representing a cached repository.'''
+
+ if reponame in self._cached_repo_objects:
+ return self._cached_repo_objects[reponame]
+ else:
+ repourl = self._resolver.pull_url(reponame)
+ path = self._cache_name(repourl)
+ if self.fs.exists(path):
+ repo = self._new_cached_repo_instance(path, reponame, repourl)
+ self._cached_repo_objects[reponame] = repo
+ return repo
+ elif self.update_gits:
+ return self._cache_repo(reponame)
+ else:
+ raise NotCached(reponame)
+
+ def _update_repo(self, cachedrepo): # pragma: no cover
+ try:
+ cachedrepo.update_remotes(
+ echo_stderr=self.verbose)
+ cachedrepo.already_updated = True
+ except cliapp.AppException:
+ raise UpdateError(self)
+
+ def get_updated_repo(self, repo_name,
+ ref=None, refs=None):
+ '''Return object representing cached repository.
+
+ If all the specified refs in 'ref' or 'refs' point to SHA1s that are
+ already in the repository, or --no-git-update is set, then the
+ repository won't be updated.
+
+ '''
+
+ if not self.update_gits:
+ self.status_cb(msg='Not updating existing git repository '
+ '%(repo_name)s '
+ 'because of no-git-update being set',
+ chatty=True,
+ repo_name=repo_name)
+ return self._get_repo(repo_name)
+
+ if ref is not None and refs is None:
+ refs = (ref,)
+ else:
+ refs = list(refs)
+
+ if self.has_repo(repo_name):
+ repo = self._get_repo(repo_name)
+ if refs:
+ required_refs = set(refs)
+ missing_refs = set()
+ for required_ref in required_refs: # pragma: no cover
+ if morphlib.git.is_valid_sha1(required_ref):
+ try:
+ repo.resolve_ref_to_commit(required_ref)
+ continue
+ except morphlib.gitdir.InvalidRefError:
+ pass
+ missing_refs.add(required_ref)
+
+ if not missing_refs: # pragma: no cover
+ self.status_cb(
+ msg='Not updating git repository %(repo_name)s '
+ 'because it already contains %(sha1s)s',
+ chatty=True, repo_name=repo_name,
+ sha1s=_word_join_list(tuple(required_refs)))
+ return repo
+
+ if ref:
+ ref_str = 'ref %s' % ref
+ else:
+ ref_str = '%i refs' % len(refs)
+ self.status_cb(msg='Updating %(repo_name)s for %(ref_str)s',
+ repo_name=repo_name, ref_str=ref_str)
+ self._update_repo(repo)
+ return repo
+ else:
+ self.status_cb(msg='Cloning %(repo_name)s', repo_name=repo_name)
+ return self._get_repo(repo_name)
+
+ def ensure_submodules(self, toplevel_repo,
+ toplevel_ref): # pragma: no cover
+ '''Ensure any submodules of a given repo are cached and up to date.'''
+
+ def submodules_for_repo(repo_path, ref):
+ try:
+ submodules = morphlib.git.Submodules(repo_path, ref,
+ runcmd_cb=self.runcmd_cb)
+ submodules.load()
+ return [(submod.url, submod.commit) for submod in submodules]
+ except morphlib.git.NoModulesFileError:
+ return []
+
+ done = set()
+ subs_to_process = submodules_for_repo(toplevel_repo.dirname,
+ toplevel_ref)
+ while subs_to_process:
+ url, ref = subs_to_process.pop()
+ done.add((url, ref))
+
+ cached_repo = self.get_updated_repo(url, ref=ref)
+
+ for submod in submodules_for_repo(cached_repo.dirname, ref):
+ if submod not in done:
+ subs_to_process.append(submod)
+
+ def resolve_ref_to_commit_and_tree(self, repo_name,
+ ref): # pragma: no cover
+ '''Given the name of a ref, returns the commit and tree SHA1.
+
+ If a remote cache server is available, this function can query the
+ remote cache server to avoid needing to clone the entire repo.
+
+ This might break if the ref points to a tag, not a commit.
+
+ '''
+ absref = None
+ tree = None
+
+ if self.has_repo(repo_name):
+ repo = self.get_updated_repo(repo_name, ref)
+ # If the user passed --no-git-update, and the ref is a SHA1 not
+ # available locally, this call will raise an exception.
+ absref = repo.resolve_ref_to_commit(ref)
+ tree = repo.resolve_ref_to_tree(absref)
+ elif self.remote_cache is not None:
+ try:
+ absref, tree = self.remote_cache.resolve_ref(repo_name, ref)
+ if absref is not None:
+ self.status_cb(
+ msg='Resolved %(repo_name)s %(ref)s via remote repo '
+ 'cache', repo_name=repo_name, ref=ref, chatty=True)
+ except BaseException as e:
+ logging.warning('Caught (and ignored) exception: %s' % str(e))
+
+ if absref is None:
+ # As a last resort, clone the repo to resolve the ref.
+ repo = self.get_updated_repo(repo_name, ref)
+ absref = repo.resolve_ref_to_commit(ref)
+ tree = repo.resolve_ref_to_tree(absref)
+
+ return absref, tree
+
+ def ls_tree(self, repo_name, ref): # pragma: no cover
+ '''Lists the files contained in a commit.
+
+ If a remote cache server is available, this function can query the
+ remote cache server to avoid needing to clone the entire repo.
+
+ The list is non-recursive, so you can only see files in the top
+ directory of the repo. To do a recursive operation, use a GitDir
+ instance returned by get_updated_repo().
+
+ '''
+ files = []
+
+ if self.has_repo(repo_name):
+ repo = self.get_updated_repo(repo_name, ref)
+ files = repo.list_files(ref=ref, recurse=False)
+ elif self.remote_cache is not None:
+ files = self.remote_cache.ls_tree(repo_name, ref)
+
+ if len(files) == 0:
+ # As a last resort, clone the repo to do get the file list.
+ repo = self.get_updated_repo(repo_name, ref)
+ files = repo.list_files(ref=ref, recurse=False)
+
+ return files
+
+ def cat_file(self, repo_name, ref, filename): # pragma: no cover
+ '''Returns a single file from a repo.
+
+ If a remote cache server is available, this function can query the
+ remote cache server to avoid needing to clone the entire repo.
+
+ '''
+ contents = None
+
+ if self.has_repo(repo_name):
+ repo = self.get_updated_repo(repo_name, ref)
+ contents = repo.get_file_from_ref(ref, filename)
+ elif self.remote_cache is not None:
+ contents = self.remote_cache.cat_file(repo_name, ref, filename)
+
+ if not contents:
+ # As a last resort, clone the repo to do get the file list.
+ repo = self.get_updated_repo(repo_name, ref)
+ contents = repo.get_file_from_ref(ref, filename)
+
+ return contents
+
+
+class RemoteResolveRefError(cliapp.AppException):
+
+ def __init__(self, repo_name, ref):
+ cliapp.AppException.__init__(
+ self, 'Failed to resolve ref %s for repo %s from remote cache' %
+ (ref, repo_name))
+
+
+class RemoteCatFileError(cliapp.AppException):
+
+ def __init__(self, repo_name, ref, filename):
+ cliapp.AppException.__init__(
+ self, 'Failed to cat file %s in ref %s of repo %s, from remote '
+ 'cache' % (filename, ref, repo_name))
+
+
+class RemoteLsTreeError(cliapp.AppException):
+
+ def __init__(self, repo_name, ref):
+ cliapp.AppException.__init__(
+ self, 'Failed to list tree in ref %s of repo %s, from remote'
+ 'cache' % (ref, repo_name))
+
+
+class RemoteRepoCache(object):
+
+ def __init__(self, server_url, resolver):
+ self.server_url = server_url
+ self._resolver = resolver
+
+ def resolve_ref(self, repo_name, ref):
+ repo_url = self._resolver.pull_url(repo_name)
+ try:
+ return self._resolve_ref_for_repo_url(repo_url, ref)
+ except BaseException as e:
+ logging.error('Caught exception: %s' % str(e))
+ raise RemoteResolveRefError(repo_name, ref)
+
+ def cat_file(self, repo_name, ref, filename):
+ repo_url = self._resolver.pull_url(repo_name)
+ try:
+ return self._cat_file_for_repo_url(repo_url, ref, filename)
+ except urllib2.HTTPError as e:
+ logging.error('Caught exception: %s' % str(e))
+ if e.code == 404:
+ raise RemoteCatFileError(repo_name, ref, filename)
+ raise # pragma: no cover
+
+ def ls_tree(self, repo_name, ref):
+ repo_url = self._resolver.pull_url(repo_name)
+ try:
+ info = json.loads(self._ls_tree_for_repo_url(repo_url, ref))
+ return info['tree'].keys()
+ except BaseException as e:
+ logging.error('Caught exception: %s' % str(e))
+ raise RemoteLsTreeError(repo_name, ref)
+
+ def _resolve_ref_for_repo_url(self, repo_url, ref): # pragma: no cover
+ data = self._make_request(
+ 'sha1s?repo=%s&ref=%s' % self._quote_strings(repo_url, ref))
+ info = json.loads(data)
+ return info['sha1'], info['tree']
+
+ def _cat_file_for_repo_url(self, repo_url, ref,
+ filename): # pragma: no cover
+ return self._make_request(
+ 'files?repo=%s&ref=%s&filename=%s'
+ % self._quote_strings(repo_url, ref, filename))
+
+ def _ls_tree_for_repo_url(self, repo_url, ref): # pragma: no cover
+ return self._make_request(
+ 'trees?repo=%s&ref=%s' % self._quote_strings(repo_url, ref))
+
+ def _quote_strings(self, *args): # pragma: no cover
+ return tuple(urllib.quote(string) for string in args)
+
+ def _make_request(self, path): # pragma: no cover
+ server_url = self.server_url
+ if not server_url.endswith('/'):
+ server_url += '/'
+ url = urlparse.urljoin(server_url, '/1.0/%s' % path)
+ handle = urllib2.urlopen(url)
+ return handle.read()
diff --git a/morphlib/repocache_tests.py b/morphlib/repocache_tests.py
new file mode 100644
index 00000000..6e07aedb
--- /dev/null
+++ b/morphlib/repocache_tests.py
@@ -0,0 +1,281 @@
+# Copyright (C) 2012-2016 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+import unittest
+import urllib2
+import json
+import os
+
+import cliapp
+import fs.memoryfs
+import tempfile
+
+import morphlib
+import morphlib.gitdir_tests
+
+
+class TestableRepoCache(morphlib.repocache.RepoCache):
+ '''Adapts the RepoCache class for unit testing.
+
+ All Git operations are stubbed out. You can track what Git operations have
+ taken place by looking at the 'remotes' dict -- any 'clone' operations will
+ set an entry in there. The 'tarballs_fetched' list tracks what tarballs
+ of Git repos would have been downloaded.
+
+ There is a single repo alias, 'example' which expands to
+ git://example.com/.
+
+ '''
+ def __init__(self, update_gits=True):
+ aliases = ['example=git://example.com/#example.com:%s.git']
+ repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases)
+ tarball_base_url = 'http://lorry.example.com/tarballs'
+ cachedir = '/cache/gits/'
+ memoryfs = fs.memoryfs.MemoryFS()
+
+ morphlib.repocache.RepoCache.__init__(
+ self, cachedir, repo_resolver, tarball_base_url=tarball_base_url,
+ custom_fs=memoryfs, update_gits=update_gits)
+
+ self.remotes = {}
+ self.tarballs_fetched = []
+
+ self._mkdtemp_count = 0
+
+ def _mkdtemp(self, dirname):
+ thing = "foo"+str(self._mkdtemp_count)
+ self._mkdtemp_count += 1
+ self.fs.makedir(dirname+"/"+thing)
+ return thing
+
+ def _fetch(self, url, path):
+ self.tarballs_fetched.append(url)
+
+ def _git(self, args, **kwargs):
+ if args[0] == 'clone':
+ assert len(args) == 5
+ remote = args[3]
+ local = args[4]
+ self.remotes['origin'] = {'url': remote, 'updates': 0}
+ self.fs.makedir(local, recursive=True)
+ elif args[0:2] == ['remote', 'set-url']:
+ remote = args[2]
+ url = args[3]
+ self.remotes[remote] = {'url': url}
+ elif args[0:2] == ['config', 'remote.origin.url']:
+ remote = 'origin'
+ url = args[2]
+ self.remotes[remote] = {'url': url}
+ elif args[0:2] == ['config', 'remote.origin.mirror']:
+ remote = 'origin'
+ elif args[0:2] == ['config', 'remote.origin.fetch']:
+ remote = 'origin'
+ else:
+ raise NotImplementedError()
+
+ def _update_repo(self, cached_repo):
+ pass
+
+
+class RepoCacheTests(unittest.TestCase):
+
+ def test_has_not_got_repo_initially(self):
+ repo_cache = TestableRepoCache()
+ self.assertFalse(repo_cache.has_repo('example:repo'))
+ self.assertFalse(repo_cache.has_repo('git://example.com/repo'))
+
+ def test_happily_caches_same_repo_twice(self):
+ repo_cache = TestableRepoCache()
+ with morphlib.gitdir_tests.allow_nonexistant_git_repos():
+ repo_cache.get_updated_repo('example:repo', ref='master')
+ repo_cache.get_updated_repo('example:repo', ref='master')
+
+ def test_fails_to_cache_when_remote_does_not_exist(self):
+ repo_cache = TestableRepoCache()
+
+ def clone_fails(args, **kwargs):
+ repo_cache.fs.makedir(args[4])
+ raise cliapp.AppException('')
+ repo_cache._git = clone_fails
+
+ with self.assertRaises(morphlib.repocache.NoRemote):
+ repo_cache.get_updated_repo('example:repo', 'master')
+
+ def test_does_not_mind_a_missing_tarball(self):
+ repo_cache = TestableRepoCache()
+
+ def no_tarball(*args, **kwargs):
+ raise cliapp.AppException('Not found')
+ repo_cache._fetch = no_tarball
+
+ with morphlib.gitdir_tests.allow_nonexistant_git_repos():
+ repo_cache.get_updated_repo('example:repo', ref='master')
+ self.assertEqual(repo_cache.tarballs_fetched, [])
+
+ def test_fetches_tarball_when_it_exists(self):
+ repo_url = 'git://example.com/reponame'
+ repo_cache = TestableRepoCache()
+
+ with morphlib.gitdir_tests.allow_nonexistant_git_repos():
+ repo_cache.get_updated_repo(repo_url, ref='master')
+
+ tarball_url = '%s%s.tar' % (repo_cache._tarball_base_url,
+ repo_cache._escape(repo_url))
+ self.assertEqual(repo_cache.tarballs_fetched, [tarball_url])
+
+ # Check that the cache updated the repo after fetching the tarball.
+ self.assertEqual(repo_cache.remotes['origin']['url'], repo_url)
+
+ def test_escapes_repourl_as_filename(self):
+ repo_cache = TestableRepoCache()
+ escaped = repo_cache._escape('git://example.com/reponame')
+ self.assertFalse('/' in escaped)
+
+ def test_noremote_error_message_contains_repo_name(self):
+ repo_url = 'git://example.com/reponame'
+ e = morphlib.repocache.NoRemote(repo_url, [])
+ self.assertTrue(repo_url in str(e))
+
+ def test_avoids_caching_local_repo(self):
+ repo_cache = TestableRepoCache()
+
+ repo_cache.fs.makedir('/local/repo', recursive=True)
+ with morphlib.gitdir_tests.allow_nonexistant_git_repos():
+ cached = repo_cache.get_updated_repo(
+ 'file:///local/repo', refs='master')
+ assert cached.dirname == '/local/repo'
+
+ def test_no_git_update_setting(self):
+ repo_cache = TestableRepoCache(update_gits=False)
+
+ with self.assertRaises(morphlib.repocache.NotCached):
+ repo_cache.get_updated_repo('example:repo', ref='master')
+
+
+class RemoteRepoCacheTests(unittest.TestCase):
+ def _resolve_ref_for_repo_url(self, repo_url, ref):
+ return self.sha1s[repo_url][ref]
+
+ def _cat_file_for_repo_url(self, repo_url, sha1, filename):
+ try:
+ return self.files[repo_url][sha1][filename]
+ except KeyError:
+ raise urllib2.HTTPError(url='', code=404, msg='Not found',
+ hdrs={}, fp=None)
+
+ def _ls_tree_for_repo_url(self, repo_url, sha1):
+ return json.dumps({
+ 'repo': repo_url,
+ 'ref': sha1,
+ 'tree': self.files[repo_url][sha1]
+ })
+
+ def setUp(self):
+ self.sha1s = {
+ 'git://gitorious.org/baserock/morph': {
+ 'master': 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9'
+ }
+ }
+ self.files = {
+ 'git://gitorious.org/baserock-morphs/linux': {
+ 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9': {
+ 'linux.morph': 'linux morphology'
+ }
+ }
+ }
+ self.server_url = 'http://foo.bar'
+ aliases = [
+ 'upstream=git://gitorious.org/baserock-morphs/#foo',
+ 'baserock=git://gitorious.org/baserock/#foo'
+ ]
+ resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases)
+ self.cache = morphlib.repocache.RemoteRepoCache(
+ self.server_url, resolver)
+ self.cache._resolve_ref_for_repo_url = self._resolve_ref_for_repo_url
+ self.cache._cat_file_for_repo_url = self._cat_file_for_repo_url
+ self.cache._ls_tree_for_repo_url = self._ls_tree_for_repo_url
+
+ def test_sets_server_url(self):
+ self.assertEqual(self.cache.server_url, self.server_url)
+
+ def test_resolve_existing_ref_for_existing_repo(self):
+ sha1 = self.cache.resolve_ref('baserock:morph', 'master')
+ self.assertEqual(
+ sha1,
+ self.sha1s['git://gitorious.org/baserock/morph']['master'])
+
+ def test_fail_resolving_existing_ref_for_non_existent_repo(self):
+ self.assertRaises(morphlib.repocache.RemoteResolveRefError,
+ self.cache.resolve_ref, 'non-existent-repo',
+ 'master')
+
+ def test_fail_resolving_non_existent_ref_for_existing_repo(self):
+ self.assertRaises(morphlib.repocache.RemoteResolveRefError,
+ self.cache.resolve_ref, 'baserock:morph',
+ 'non-existent-ref')
+
+ def test_fail_resolving_non_existent_ref_for_non_existent_repo(self):
+ self.assertRaises(morphlib.repocache.RemoteResolveRefError,
+ self.cache.resolve_ref, 'non-existent-repo',
+ 'non-existent-ref')
+
+ def test_cat_existing_file_in_existing_repo_and_ref(self):
+ content = self.cache.cat_file(
+ 'upstream:linux', 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9',
+ 'linux.morph')
+ self.assertEqual(content, 'linux morphology')
+
+ def test_fail_cat_file_using_invalid_sha1(self):
+ self.assertRaises(morphlib.repocache.RemoteCatFileError,
+ self.cache.cat_file, 'upstream:linux', 'blablabla',
+ 'linux.morph')
+
+ def test_fail_cat_non_existent_file_in_existing_repo_and_ref(self):
+ self.assertRaises(morphlib.repocache.RemoteCatFileError,
+ self.cache.cat_file, 'upstream:linux',
+ 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9',
+ 'non-existent-file')
+
+ def test_fail_cat_file_in_non_existent_ref_in_existing_repo(self):
+ self.assertRaises(morphlib.repocache.RemoteCatFileError,
+ self.cache.cat_file, 'upstream:linux',
+ 'ecd7a325095a0d19b8c3d76f578d85b979461d41',
+ 'linux.morph')
+
+ def test_fail_cat_file_in_non_existent_repo(self):
+ self.assertRaises(morphlib.repocache.RemoteCatFileError,
+ self.cache.cat_file, 'non-existent-repo',
+ 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9',
+ 'some-file')
+
+ def test_ls_tree_in_existing_repo_and_ref(self):
+ content = self.cache.ls_tree(
+ 'upstream:linux', 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9')
+ self.assertEqual(content, ['linux.morph'])
+
+ def test_fail_ls_tree_using_invalid_sha1(self):
+ self.assertRaises(morphlib.repocache.RemoteLsTreeError,
+ self.cache.ls_tree, 'upstream:linux', 'blablabla')
+
+ def test_fail_ls_file_in_non_existent_ref_in_existing_repo(self):
+ self.assertRaises(morphlib.repocache.RemoteLsTreeError,
+ self.cache.ls_tree, 'upstream:linux',
+ 'ecd7a325095a0d19b8c3d76f578d85b979461d41')
+
+ def test_fail_ls_tree_in_non_existent_repo(self):
+ self.assertRaises(morphlib.repocache.RemoteLsTreeError,
+ self.cache.ls_tree, 'non-existent-repo',
+ 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9')
+
diff --git a/morphlib/sourceresolver.py b/morphlib/sourceresolver.py
index c6f77cf9..f8dac8b1 100644
--- a/morphlib/sourceresolver.py
+++ b/morphlib/sourceresolver.py
@@ -138,17 +138,17 @@ class SourceResolver(object):
be either a normal URL, or a keyed URL using a repo-alias like
'baserock:baserock/definitions'.
- The 'remote repo cache' is a Baserock Trove system. It functions as a
- normal Git server, but in addition it runs a service on port 8080 called
- 'morph-cache-server' which can resolve refs, list their contents and read
- specific files from the repos it holds. This allows the SourceResolver to
- work out how to build something without cloning the whole repo. (If a local
- build of that source ends up being necessary then it will get cloned into
- the local cache later on).
-
- The second layer of caching is the local repository cache, which mirrors
- entire repositories in $cachedir/gits. If a repo is not in the remote repo
- cache then it must be present in the local repo cache.
+ Each commit used in a build is resolved to a tree SHA1, which means that
+ merge commits and changes to commit messages don't affect the cache
+ identity of a chunk. This does mean we need to query every repo in the
+ build graph, though.
+
+ All requests for information on a repo use the 'repocache' module. This
+ maintains a local copy of all the Git repos we need to work with. A repo
+ cache can also use a remote 'morph-cache-server' instance, if available,
+ to query certain information about a repo without cloning it locally.
+ Using this we can resolve commits to trees without having to clone every
+ repo locally, which is a huge performance improvement in some cases.
The third layer of caching is a simple commit SHA1 -> tree SHA mapping. It
turns out that even if all repos are available locally, running
@@ -168,14 +168,11 @@ class SourceResolver(object):
'''
- def __init__(self, local_repo_cache, remote_repo_cache,
- tree_cache_manager, update_repos,
- status_cb=None):
- self.lrc = local_repo_cache
- self.rrc = remote_repo_cache
+ def __init__(self, repo_cache, tree_cache_manager, status_cb=None):
+ self.repo_cache = repo_cache
self.tree_cache_manager = tree_cache_manager
- self.update = update_repos
+ self.update = repo_cache.update_gits
self.status = status_cb
def _resolve_ref(self, resolved_trees, reponame, ref):
@@ -184,9 +181,6 @@ class SourceResolver(object):
If update is True then this has the side-effect of updating or cloning
the repository into the local repo cache.
- This function is complex due to the 3 layers of caching described in
- the SourceResolver docstring.
-
'''
# The Baserock reference definitions use absolute refs so, and, if the
@@ -198,29 +192,8 @@ class SourceResolver(object):
logging.debug('tree (%s, %s) not in cache', reponame, ref)
- absref = None
- if self.lrc.has_repo(reponame):
- repo = self.lrc.get_updated_repo(reponame, ref)
- # If the user passed --no-git-update, and the ref is a SHA1 not
- # available locally, this call will raise an exception.
- absref = repo.resolve_ref_to_commit(ref)
- tree = repo.resolve_ref_to_tree(absref)
- elif self.rrc is not None:
- try:
- absref, tree = self.rrc.resolve_ref(reponame, ref)
- if absref is not None:
- self.status(msg='Resolved %(reponame)s %(ref)s via remote '
- 'repo cache',
- reponame=reponame,
- ref=ref,
- chatty=True)
- except BaseException as e:
- logging.warning('Caught (and ignored) exception: %s' % str(e))
-
- if absref is None:
- repo = self.lrc.get_updated_repo(reponame, ref)
- absref = repo.resolve_ref_to_commit(ref)
- tree = repo.resolve_ref_to_tree(absref)
+ absref, tree = self.repo_cache.resolve_ref_to_commit_and_tree(reponame,
+ ref)
logging.debug('Writing tree to cache with ref (%s, %s)',
reponame, absref)
@@ -430,7 +403,7 @@ class SourceResolver(object):
if definitions_original_ref:
definitions_ref = definitions_original_ref
- definitions_cached_repo = self.lrc.get_updated_repo(
+ definitions_cached_repo = self.repo_cache.get_updated_repo(
repo_name=definitions_repo, ref=definitions_absref)
definitions_cached_repo.extract_commit(
definitions_absref, definitions_checkout_dir)
@@ -489,9 +462,8 @@ def _find_duplicate_chunks(sourcepool): #pragma: no cover
return {k: v for (k, v) in chunk_sources_by_name.iteritems() if len(v) > 1}
-def create_source_pool(lrc, rrc, repo, ref, filenames, cachedir,
- original_ref=None, update_repos=True,
- status_cb=None):
+def create_source_pool(repo_cache, repo, ref, filenames,
+ original_ref=None, status_cb=None):
'''Find all the sources involved in building a given system.
Given a system morphology, this function will traverse the tree of stratum
@@ -502,8 +474,12 @@ def create_source_pool(lrc, rrc, repo, ref, filenames, cachedir,
Note that Git submodules are not considered 'sources' in the current
implementation, and so they must be handled separately.
- The 'lrc' and 'rrc' parameters specify the local and remote Git repository
- caches used for resolving the sources.
+ The 'repo_cache' parameter specifies a repo cache which is used when
+ accessing the source repos. If a git_resolve_cache_server is set for this
+ repo cache, and all repos in the build are known to it, then this function
+ will only need the definitions.git repo available locally. If not, then all
+ repos must be cloned in order to resolve the refs to tree SHA1s, which is
+ a slow process!
'''
pool = morphlib.sourcepool.SourcePool()
@@ -529,10 +505,10 @@ def create_source_pool(lrc, rrc, repo, ref, filenames, cachedir,
pool.add(source)
tree_cache_manager = PickleCacheManager(
- os.path.join(cachedir, tree_cache_filename), tree_cache_size)
+ os.path.join(repo_cache.cachedir, tree_cache_filename),
+ tree_cache_size)
- resolver = SourceResolver(lrc, rrc, tree_cache_manager, update_repos,
- status_cb)
+ resolver = SourceResolver(repo_cache, tree_cache_manager, status_cb)
resolver.traverse_morphs(repo, ref, filenames,
visit=add_to_pool,
definitions_original_ref=original_ref)
diff --git a/morphlib/util.py b/morphlib/util.py
index 3b3e4d2b..ba170952 100644
--- a/morphlib/util.py
+++ b/morphlib/util.py
@@ -102,21 +102,16 @@ def make_concurrency(cores=None):
return min(n, 20)
-def create_cachedir(settings): # pragma: no cover
- '''Return cache directory, creating it if necessary.'''
-
- cachedir = settings['cachedir']
+def ensure_directory_exists(path): # pragma: no cover
# Don't check the folder exists and handle the exception that happens in
# this case to avoid errors if the folder is created by something else
# just after the check.
try:
- os.mkdir(cachedir)
+ os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
- return cachedir
-
def get_artifact_cache_server(settings): # pragma: no cover
if settings['artifact-cache-server']:
@@ -141,17 +136,8 @@ def new_artifact_caches(settings): # pragma: no cover
'''
- cachedir = create_cachedir(settings)
- artifact_cachedir = os.path.join(cachedir, 'artifacts')
- # Don't check the folder exists and handle the exception that happens in
- # this case to avoid errors if the folder is created by something else
- # just after the check.
- try:
- os.mkdir(artifact_cachedir)
- except OSError as e:
- if e.errno != errno.EEXIST:
- raise
-
+ artifact_cachedir = os.path.join(settings['cachedir'], 'artifacts')
+ ensure_directory_exists(artifact_cachedir)
lac = morphlib.localartifactcache.LocalArtifactCache(
fs.osfs.OSFS(artifact_cachedir))
@@ -222,24 +208,26 @@ def combine_aliases(app): # pragma: no cover
return alias_map.values()
-def new_repo_caches(app): # pragma: no cover
- '''Create new objects for local, remote git repository caches.'''
- aliases = app.settings['repo-alias']
- cachedir = create_cachedir(app.settings)
- gits_dir = os.path.join(cachedir, 'gits')
- tarball_base_url = app.settings['tarball-server']
- repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases)
- lrc = morphlib.localrepocache.LocalRepoCache(
- app, gits_dir, repo_resolver, tarball_base_url=tarball_base_url)
+def new_repo_cache(app): # pragma: no cover
+ '''Create a RepoCache instance using settings from app.settings.'''
- url = get_git_resolve_cache_server(app.settings)
- if url:
- rrc = morphlib.remoterepocache.RemoteRepoCache(url, repo_resolver)
- else:
- rrc = None
+ gits_dir = os.path.join(app.settings['cachedir'], 'gits')
+ tarball_base_url = app.settings['tarball-server']
+ git_resolve_cache_url = get_git_resolve_cache_server(app.settings)
+ aliases = app.settings['repo-alias']
+ repo_alias_resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases)
+
+ return morphlib.repocache.RepoCache(
+ gits_dir, repo_alias_resolver,
+ tarball_base_url=tarball_base_url,
+ git_resolve_cache_url=git_resolve_cache_url,
+ update_gits=(not app.settings['no-git-update']),
+ runcmd_cb=app.runcmd,
+ status_cb=app.status,
+ verbose=app.settings['verbose'],
+ debug=app.settings['debug'])
- return lrc, rrc
def env_variable_is_password(key): # pragma: no cover
return 'PASSWORD' in key