summaryrefslogtreecommitdiff
path: root/morphlib/localrepocache.py
diff options
context:
space:
mode:
authorSam Thursfield <sam.thursfield@codethink.co.uk>2016-03-02 17:11:34 +0000
committerSam Thursfield <sam.thursfield@codethink.co.uk>2016-03-16 19:11:21 +0000
commitd58d8e8f7a4ec03ff14021a4515c8283dad52573 (patch)
tree5ece2d0524e4423bb953e6140831c9fde93b7219 /morphlib/localrepocache.py
parent014a029ade9a045a839ca86c35690b218098ea33 (diff)
downloadmorph-d58d8e8f7a4ec03ff14021a4515c8283dad52573.tar.gz
Unify local and remote repo cache modules
There's not really any reason you'd want to use the RemoteRepoCache class except as a workaround for the slow speed of some LocalRepoCache operations, so I can't see this ruining anyone's day. The main reason for doing this is so we can simply the sourceresolver code. One reason that the sourceresolver class is so hopelessly complicated is that it right now has to use two incompatible interfaces for Git repo caches. I've taken the opportunity to detangle the RepoCache class from the App class. Now all of the configuration for the RepoCache class is passed into the constructor explicitly. This makes the class usable from outside Morph: resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases=[]) repo_cache = morphlib.repocache.RepoCache('/src/cache/gits', resolver) Change-Id: I596c81d7645b67504c88e555172a8c238f4f8a66
Diffstat (limited to 'morphlib/localrepocache.py')
-rw-r--r--morphlib/localrepocache.py357
1 files changed, 0 insertions, 357 deletions
diff --git a/morphlib/localrepocache.py b/morphlib/localrepocache.py
deleted file mode 100644
index 3a03fe1d..00000000
--- a/morphlib/localrepocache.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (C) 2012-2016 Codethink Limited
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-import os
-import urlparse
-import string
-import sys
-import tempfile
-
-import cliapp
-import fs.osfs
-
-import morphlib
-from morphlib.util import word_join_list as _word_join_list
-
-
-# urlparse.urljoin needs to know details of the URL scheme being used.
-# It does not know about git:// by default, so we teach it here.
-gitscheme = ['git']
-urlparse.uses_relative.extend(gitscheme)
-urlparse.uses_netloc.extend(gitscheme)
-urlparse.uses_params.extend(gitscheme)
-urlparse.uses_query.extend(gitscheme)
-urlparse.uses_fragment.extend(gitscheme)
-
-
-def quote_url(url):
- ''' Convert URIs to strings that only contain digits, letters, % and _.
-
- NOTE: When changing the code of this function, make sure to also apply
- the same to the quote_url() function of lorry. Otherwise the git tarballs
- generated by lorry may no longer be found by morph.
-
- '''
- valid_chars = string.digits + string.letters + '%_'
- transl = lambda x: x if x in valid_chars else '_'
- return ''.join([transl(x) for x in url])
-
-
-class NoRemote(morphlib.Error):
-
- def __init__(self, reponame, errors):
- self.reponame = reponame
- self.errors = errors
-
- def __str__(self):
- return '\n\t'.join(['Cannot find remote git repository: %s' %
- self.reponame] + self.errors)
-
-
-class NotCached(morphlib.Error):
- def __init__(self, reponame):
- self.reponame = reponame
-
- def __str__(self): # pragma: no cover
- return 'Repository %s is not cached yet' % self.reponame
-
-
-class UpdateError(cliapp.AppException): # pragma: no cover
-
- def __init__(self, repo):
- cliapp.AppException.__init__(
- self, 'Failed to update cached version of repo %s' % repo)
-
-
-class CachedRepo(morphlib.gitdir.GitDirectory):
- '''A locally cached Git repository with an origin remote set up.
-
- On instance of this class represents a locally cached version of a
- remote Git repository. This remote repository is set up as the
- 'origin' remote.
-
- Cached repositories are bare mirrors of the upstream. Locally created
- branches will be lost the next time the repository updates.
-
- '''
- def __init__(self, path, original_name, url):
- self.original_name = original_name
- self.url = url
- self.is_mirror = not url.startswith('file://')
- self.already_updated = False
-
- super(CachedRepo, self).__init__(path)
-
- def __str__(self): # pragma: no cover
- return self.url
-
-
-class LocalRepoCache(object):
-
- '''Manage locally cached git repositories.
-
- When we build stuff, we need a local copy of the git repository.
- To avoid having to clone the repositories for every build, we
- maintain a local cache of the repositories: we first clone the
- remote repository to the cache, and then make a local clone from
- the cache to the build environment. This class manages the local
- cached repositories.
-
- Repositories may be specified either using a full URL, in a form
- understood by git(1), or as a repository name to which a base url
- is prepended. The base urls are given to the class when it is
- created.
-
- Instead of cloning via a normal 'git clone' directly from the
- git server, we first try to download a tarball from a url, and
- if that works, we unpack the tarball.
-
- '''
-
- def __init__(self, app, cachedir, resolver, tarball_base_url=None):
- self._app = app
- self.fs = fs.osfs.OSFS('/')
- self._cachedir = cachedir
- self._resolver = resolver
- if tarball_base_url and not tarball_base_url.endswith('/'):
- tarball_base_url += '/' # pragma: no cover
- self._tarball_base_url = tarball_base_url
- self._cached_repo_objects = {}
-
- def _git(self, args, **kwargs): # pragma: no cover
- '''Execute git command.
-
- This is a method of its own so that unit tests can easily override
- all use of the external git command.
-
- '''
-
- morphlib.git.gitcmd(self._app.runcmd, *args, **kwargs)
-
- def _fetch(self, url, path): # pragma: no cover
- '''Fetch contents of url into a file.
-
- This method is meant to be overridden by unit tests.
-
- '''
- self._app.status(msg="Trying to fetch %(tarball)s to seed the cache",
- tarball=url, chatty=True)
-
- if self._app.settings['verbose']:
- verbosity_flags = []
- kwargs = dict(stderr=sys.stderr)
- else:
- verbosity_flags = ['--quiet']
- kwargs = dict()
-
- def wget_command():
- return ['wget'] + verbosity_flags + ['-O-', url]
-
- self._app.runcmd(wget_command(),
- ['tar', '--no-same-owner', '-xf', '-'],
- cwd=path, **kwargs)
-
- def _mkdtemp(self, dirname): # pragma: no cover
- '''Creates a temporary directory.
-
- This method is meant to be overridden by unit tests.
-
- '''
- return tempfile.mkdtemp(dir=dirname)
-
- def _escape(self, url):
- '''Escape a URL so it can be used as a basename in a file.'''
-
- # FIXME: The following is a nicer way than to do this.
- # However, for compatibility, we need to use the same as the
- # tarball server (set up by Lorry) uses.
- # return urllib.quote(url, safe='')
-
- return quote_url(url)
-
- def _cache_name(self, url):
- scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
- if scheme != 'file':
- path = os.path.join(self._cachedir, self._escape(url))
- return path
-
- def has_repo(self, reponame):
- '''Have we already got a cache of a given repo?'''
- url = self._resolver.pull_url(reponame)
- path = self._cache_name(url)
- return self.fs.exists(path)
-
- def _clone_with_tarball(self, repourl, path):
- tarball_url = urlparse.urljoin(self._tarball_base_url,
- self._escape(repourl)) + '.tar'
- try:
- self.fs.makedir(path)
- self._fetch(tarball_url, path)
- self._git(['config', 'remote.origin.url', repourl], cwd=path)
- self._git(['config', 'remote.origin.mirror', 'true'], cwd=path)
- self._git(['config', 'remote.origin.fetch', '+refs/*:refs/*'],
- cwd=path)
- except BaseException as e: # pragma: no cover
- if self.fs.exists(path):
- self.fs.removedir(path, force=True)
- return False, 'Unable to extract tarball %s: %s' % (
- tarball_url, e)
-
- return True, None
-
- def _cache_repo(self, reponame):
- '''Clone the given repo into the cache.
-
- If the repo is already cloned, do nothing.
-
- '''
- errors = []
- if not self.fs.exists(self._cachedir):
- self.fs.makedir(self._cachedir, recursive=True)
-
- try:
- return self._get_repo(reponame)
- except NotCached as e:
- pass
-
- repourl = self._resolver.pull_url(reponame)
- path = self._cache_name(repourl)
- if self._tarball_base_url:
- ok, error = self._clone_with_tarball(repourl, path)
- if ok:
- repo = self._get_repo(reponame)
- self._update_repo(repo)
- return repo
- else:
- errors.append(error)
- self._app.status(
- msg='Using git clone.')
-
- target = self._mkdtemp(self._cachedir)
-
- try:
- self._git(['clone', '--mirror', '-n', repourl, target],
- echo_stderr=self._app.settings['debug'])
- except cliapp.AppException as e:
- errors.append('Unable to clone from %s to %s: %s' %
- (repourl, target, e))
- if self.fs.exists(target):
- self.fs.removedir(target, recursive=True, force=True)
- raise NoRemote(reponame, errors)
-
- self.fs.rename(target, path)
-
- repo = self._get_repo(reponame)
- repo.already_updated = True
- return repo
-
- def _get_repo(self, reponame):
- '''Return an object representing a cached repository.'''
-
- if reponame in self._cached_repo_objects:
- return self._cached_repo_objects[reponame]
- else:
- repourl = self._resolver.pull_url(reponame)
- path = self._cache_name(repourl)
- if self.fs.exists(path):
- repo = CachedRepo(path, reponame, repourl)
- self._cached_repo_objects[reponame] = repo
- return repo
- raise NotCached(reponame)
-
- def _update_repo(self, cachedrepo): # pragma: no cover
- try:
- cachedrepo.update_remotes(
- echo_stderr=self._app.settings['verbose'])
- cachedrepo.already_updated = True
- except cliapp.AppException:
- raise UpdateError(self)
-
- def get_updated_repo(self, repo_name,
- ref=None, refs=None): # pragma: no cover
- '''Return object representing cached repository.
-
- If all the specified refs in 'ref' or 'refs' point to SHA1s that are
- already in the repository, or --no-git-update is set, then the
- repository won't be updated.
-
- '''
-
- if self._app.settings['no-git-update']:
- self._app.status(msg='Not updating existing git repository '
- '%(repo_name)s '
- 'because of no-git-update being set',
- chatty=True,
- repo_name=repo_name)
- return self._get_repo(repo_name)
-
- if ref is not None and refs is None:
- refs = (ref,)
-
- if self.has_repo(repo_name):
- repo = self._get_repo(repo_name)
- if refs:
- required_refs = set(refs)
- missing_refs = set()
- for required_ref in required_refs:
- if morphlib.git.is_valid_sha1(required_ref):
- try:
- repo.resolve_ref_to_commit(required_ref)
- continue
- except morphlib.gitdir.InvalidRefError:
- pass
- missing_refs.add(required_ref)
-
- if not missing_refs:
- self._app.status(
- msg='Not updating git repository %(repo_name)s '
- 'because it already contains %(sha1s)s',
- chatty=True, repo_name=repo_name,
- sha1s=_word_join_list(tuple(required_refs)))
- return repo
-
- self._app.status(msg='Updating %(repo_name)s',
- repo_name=repo_name)
- self._update_repo(repo)
- return repo
- else:
- self._app.status(msg='Cloning %(repo_name)s',
- repo_name=repo_name)
- return self._cache_repo(repo_name)
-
- def ensure_submodules(self, toplevel_repo,
- toplevel_ref): # pragma: no cover
- '''Ensure any submodules of a given repo are cached and up to date.'''
-
- def submodules_for_repo(repo_path, ref):
- try:
- submodules = morphlib.git.Submodules(self._app, repo_path, ref)
- submodules.load()
- return [(submod.url, submod.commit) for submod in submodules]
- except morphlib.git.NoModulesFileError:
- return []
-
- done = set()
- subs_to_process = submodules_for_repo(toplevel_repo.dirname,
- toplevel_ref)
- while subs_to_process:
- url, ref = subs_to_process.pop()
- done.add((url, ref))
-
- cached_repo = self.get_updated_repo(url, ref=ref)
-
- for submod in submodules_for_repo(cached_repo.dirname, ref):
- if submod not in done:
- subs_to_process.append(submod)