From 014a029ade9a045a839ca86c35690b218098ea33 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Thu, 3 Mar 2016 15:56:40 +0000 Subject: Get rid of the CachedRepo class (almost) For a long time the CachedRepo class has basically been a wrapper around the GitDir class, but with a few extra methods that don't really even belong there. It is now a tiny class in the localrepocache module which just keeps track of a few extra attributes. All other functionality is provided by the gitdir module. This commit also removes the `git clone` approach for copying repos out of the cache. The alternative approach implemented by git.copy_repository() was slightly faster when I tested, so for now we should use that everywhere. Longer term we should find out why this is quicker than `git clone`, and fix Git itself to be fast. Change-Id: I1686ab43253d44c3903d9a0bad8bb75528e9cf75 --- morphlib/__init__.py | 3 +- morphlib/builder.py | 6 +- morphlib/builder_tests.py | 6 +- morphlib/cachedrepo.py | 297 ---------------------------- morphlib/cachedrepo_tests.py | 212 -------------------- morphlib/git.py | 21 +- morphlib/gitdir.py | 70 +++++-- morphlib/gitdir_tests.py | 14 +- morphlib/localrepocache.py | 54 ++++- morphlib/localrepocache_tests.py | 29 ++- morphlib/plugins/anchor_plugin.py | 4 +- morphlib/plugins/cross-bootstrap_plugin.py | 4 +- morphlib/plugins/diff_plugin.py | 6 +- morphlib/plugins/get_repo_plugin.py | 6 +- morphlib/plugins/system_manifests_plugin.py | 6 +- morphlib/stagingarea.py | 2 +- 16 files changed, 154 insertions(+), 586 deletions(-) delete mode 100644 morphlib/cachedrepo.py delete mode 100644 morphlib/cachedrepo_tests.py diff --git a/morphlib/__init__.py b/morphlib/__init__.py index 2ea657e7..7724c41c 100644 --- a/morphlib/__init__.py +++ b/morphlib/__init__.py @@ -1,4 +1,4 @@ -# Copyright (C) 2011-2015 Codethink Limited +# Copyright (C) 2011-2016 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -57,7 +57,6 @@ import buildcommand import buildenvironment import buildsystem import builder -import cachedrepo import cachekeycomputer import cmdline_parse_utils import defaults diff --git a/morphlib/builder.py b/morphlib/builder.py index 6a5ad184..2d0a4bd4 100644 --- a/morphlib/builder.py +++ b/morphlib/builder.py @@ -43,9 +43,9 @@ def extract_sources(app, repo_cache, repo, sha1, srcdir): #pragma: no cover source=repo.original_name, target=destdir) - repo.checkout(sha1, destdir) + morphlib.gitdir.checkout_from_cached_repo(repo, sha1, destdir) morphlib.git.reset_workdir(app.runcmd, destdir) - submodules = morphlib.git.Submodules(app, repo.path, sha1) + submodules = morphlib.git.Submodules(app, repo.dirname, sha1) try: submodules.load() except morphlib.git.NoModulesFileError: @@ -187,7 +187,7 @@ class BuilderBase(object): ''' assert isinstance(self.source.repo, - morphlib.cachedrepo.CachedRepo) + morphlib.localrepocache.CachedRepo) meta = { 'artifact-name': artifact_name, 'source-name': self.source.name, diff --git a/morphlib/builder_tests.py b/morphlib/builder_tests.py index 9e465227..da1f432e 100644 --- a/morphlib/builder_tests.py +++ b/morphlib/builder_tests.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2015 Codethink Limited +# Copyright (C) 2012-2016 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -51,8 +51,8 @@ class FakeSource(object): self.name = 'a' with morphlib.gitdir_tests.allow_nonexistant_git_repos(): - self.repo = morphlib.cachedrepo.CachedRepo( - FakeApp(), 'repo', 'url', 'path') + self.repo = morphlib.localrepocache.CachedRepo( + 'path', 'repo', 'url') self.repo_name = 'url' self.original_ref = 'e' self.sha1 = 'f' diff --git a/morphlib/cachedrepo.py b/morphlib/cachedrepo.py deleted file mode 100644 index 76cdaa86..00000000 --- a/morphlib/cachedrepo.py +++ /dev/null @@ -1,297 +0,0 @@ -# Copyright (C) 2012-2015 Codethink Limited -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . - - -import cliapp - -import os -import tempfile - -import morphlib - - -class CheckoutDirectoryExistsError(cliapp.AppException): - - def __init__(self, repo, target_dir): - cliapp.AppException.__init__( - self, - 'Checkout directory %s for repo %s already exists' % - (target_dir, repo)) - - -class CloneError(cliapp.AppException): - - def __init__(self, repo, target_dir): - cliapp.AppException.__init__( - self, - 'Failed to clone %s into %s' % (repo.original_name, target_dir)) - - -class CopyError(cliapp.AppException): - - def __init__(self, repo, target_dir): - cliapp.AppException.__init__( - self, - 'Failed to copy %s into %s' % (repo.original_name, target_dir)) - - -class CheckoutError(cliapp.AppException): - - def __init__(self, repo, ref, target_dir): - cliapp.AppException.__init__( - self, - 'Failed to check out ref %s in %s' % (ref, target_dir)) - - -class UpdateError(cliapp.AppException): - - def __init__(self, repo): - cliapp.AppException.__init__( - self, 'Failed to update cached version of repo %s' % repo) - - -class CachedRepo(object): - - '''A locally cached Git repository with an origin remote set up. - - On instance of this class represents a locally cached version of a - remote Git repository. This remote repository is set up as the - 'origin' remote. - - Cached repositories are bare mirrors of the upstream. Locally created - branches will be lost the next time the repository updates. - - CachedRepo objects can resolve Git refs into SHA1s. Given a SHA1 - ref, they can also be asked to return the contents of a file via the - cat() method. They can furthermore check out the repository into - a local directory using a SHA1 ref. Last but not least, any cached - repo may be updated from it's origin remote using the update() - method. - - ''' - - def __init__(self, app, original_name, url, path): - '''Creates a new CachedRepo for a repo name, URL and local path.''' - - self.app = app - self.original_name = original_name - self.url = url - self.path = path - self.is_mirror = not url.startswith('file://') - self.already_updated = False - - self.gitdir = morphlib.gitdir.GitDirectory(path) - - def ref_exists(self, ref): # pragma: no cover - '''Returns True if the given ref exists in the repo''' - return self.gitdir.ref_exists(ref) - - def resolve_ref_to_commit(self, ref): # pragma: no cover - '''Resolve a named ref to a commit SHA1. - - Raises gitdir.InvalidRefError if the ref does not exist. - - ''' - return self.gitdir.resolve_ref_to_commit(ref) - - def resolve_ref_to_tree(self, ref): # pragma: no cover - '''Resolve a named ref to a tree SHA1. - - Raises gitdir.InvalidRefError if the ref does not exist. - - ''' - return self.gitdir.resolve_ref_to_tree(ref) - - def read_file(self, filename, ref): # pragma: no cover - '''Attempts to read a file from a given ref. - - Raises a gitdir.InvalidRefError if the ref is not found in the - repository. Raises an IOError if the requested file is not found in - the ref. - - ''' - return self.gitdir.read_file(filename, ref) - - def tags_containing_sha1(self, ref): # pragma: no cover - '''Check whether given sha1 is contained in any tags - - Raises a gitdir.InvalidRefError if the ref is not found in the - repository. Raises gitdir.ExpectedSha1Error if the ref is not - a sha1. - - ''' - return self.gitdir.tags_containing_sha1(ref) - - def branches_containing_sha1(self, ref): # pragma: no cover - '''Check whether given sha1 is contained in any branches - - Raises a gitdir.InvalidRefError if the ref is not found in the - repository. Raises gitdir.ExpectedSha1Error if the ref is not - a sha1. - - ''' - return self.gitdir.branches_containing_sha1(ref) - - def version_guess(self, ref): # pragma: no cover - '''Guess version number using `git describe --tags` - - Raises a gitdir.InvalidRefError if the ref is not found in the - repository. - - ''' - return self.gitdir.version_guess(ref) - - def list_files(self, ref, recurse=True): # pragma: no cover - '''Return filenames found in the tree pointed to by the given ref. - - Returns a gitdir.InvalidRefError if the ref is not found in the - repository. - - ''' - return self.gitdir.list_files(ref, recurse) - - def clone_checkout(self, ref, target_dir): - '''Clone from the cache into the target path and check out a given ref. - - Raises a CheckoutDirectoryExistsError if the target - directory already exists. Raises a gitdir.InvalidRefError if the - ref is not found in the repository. Raises a CheckoutError if - something else goes wrong while copying the repository or checking - out the SHA1 ref. - - ''' - - if os.path.exists(target_dir): - raise CheckoutDirectoryExistsError(self, target_dir) - - self.gitdir.resolve_ref_to_commit(ref) - - self._clone_into(target_dir, ref) - - def checkout(self, ref, target_dir): - '''Unpacks the repository in a directory and checks out a commit ref. - - Raises an gitdir.InvalidRefError if the ref is not found in the - repository. Raises a CopyError if something goes wrong with the copy - of the repository. Raises a CheckoutError if something else goes wrong - while copying the repository or checking out the SHA1 ref. - - ''' - - if not os.path.exists(target_dir): - os.mkdir(target_dir) - - # Note, we copy instead of cloning because it's much faster in the case - # that the target is on a different filesystem from the cache. We then - # take care to turn the copy into something as good as a real clone. - self._copy_repository(self.path, target_dir) - - self._checkout_ref_in_clone(ref, target_dir) - - def extract_commit(self, ref, target_dir): - '''Extract files from a given commit into target_dir. - - This is different to a 'checkout': a checkout assumes a working tree - associated with a repository. Here, the repository is immutable (it's - in the cache) and we just want to look at the files in a quick way - (quicker than going 'git cat-file everything'). - - This seems marginally quicker than doing a shallow clone. Running - `morph list-artifacts` 10 times gave an average time of 1.334s - using `git clone --depth 1` and an average time of 1.261s using - this code. - - ''' - if not os.path.exists(target_dir): - os.makedirs(target_dir) - - with tempfile.NamedTemporaryFile() as index_file: - index = self.gitdir.get_index(index_file=index_file.name) - index.set_to_tree(ref) - index.checkout(working_tree=target_dir) - - def requires_update_for_ref(self, ref): - '''Returns False if there's no need to update this cached repo. - - If the ref points to a specific commit that's already available - locally, there's never any need to update. If it's a named ref and this - repo wasn't already updated in the lifetime of the current process, - it's necessary to update. - - ''' - if not self.is_mirror: - # Repos with file:/// URLs don't ever need updating. - return False - - if self.already_updated: - return False - - # Named refs that are valid SHA1s will confuse this code. - ref_can_change = not morphlib.git.is_valid_sha1(ref) - - if ref_can_change or not self.gitdir.ref_exists(ref): - return True - else: - return False - - def update(self): - '''Updates the cached repository using its origin remote. - - Raises an UpdateError if anything goes wrong while performing - the update. - - ''' - - if not self.is_mirror: - return - - try: - self.gitdir.update_remotes( - echo_stderr=self.app.settings['verbose']) - self.already_updated = True - except cliapp.AppException: - raise UpdateError(self) - - def _runcmd(self, *args, **kwargs): # pragma: no cover - if not 'cwd' in kwargs: - kwargs['cwd'] = self.path - return self.app.runcmd(*args, **kwargs) - - def _clone_into(self, target_dir, ref): # pragma: no cover - '''Actually perform the clone''' - try: - morphlib.git.clone_into(self._runcmd, self.path, target_dir, - ref) - except cliapp.AppException: - raise CloneError(self, target_dir) - - def _copy_repository(self, source_dir, target_dir): # pragma: no cover - try: - morphlib.git.copy_repository( - self._runcmd, source_dir, target_dir, self.is_mirror) - except cliapp.AppException: - raise CopyError(self, target_dir) - - def _checkout_ref_in_clone(self, ref, clone_dir): # pragma: no cover - # This is a separate GitDirectory instance. Don't confuse it with the - # internal .gitdir attribute! - working_gitdir = morphlib.gitdir.GitDirectory(clone_dir) - try: - working_gitdir.checkout(ref) - except cliapp.AppException as e: - raise CheckoutError(self, ref, clone_dir) - return working_gitdir - - def __str__(self): # pragma: no cover - return self.url diff --git a/morphlib/cachedrepo_tests.py b/morphlib/cachedrepo_tests.py deleted file mode 100644 index d26c0236..00000000 --- a/morphlib/cachedrepo_tests.py +++ /dev/null @@ -1,212 +0,0 @@ -# Copyright (C) 2012-2015 Codethink Limited -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . - - -import logging -import os -import unittest - -import fs.tempfs -import cliapp - -import morphlib -import morphlib.gitdir_tests - - -class FakeApplication(object): - - def __init__(self): - self.settings = { - 'verbose': True - } - - -class FakeIndex(object): - - def __init__(self, index_file): - self.index_file = index_file - self.ref = None - - def set_to_tree(self, ref): - self.ref = ref - - def checkout(self, working_tree=None): - if working_tree: - with open(os.path.join(working_tree, 'foo.morph'), 'w') as f: - f.write('contents of foo.morph') - - -class CachedRepoTests(unittest.TestCase): - - known_commit = 'a4da32f5a81c8bc6d660404724cedc3bc0914a75' - bad_sha1_known_to_rev_parse = 'cafecafecafecafecafecafecafecafecafecafe' - - def rev_parse(self, ref): - output = { - self.bad_sha1_known_to_rev_parse: self.bad_sha1_known_to_rev_parse, - 'a4da32f5a81c8bc6d660404724cedc3bc0914a75': - 'a4da32f5a81c8bc6d660404724cedc3bc0914a75', - 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9': - 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9', - 'master': 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9', - 'baserock/morph': '8b780e2e6f102fcf400ff973396566d36d730501' - } - ref = ref.rstrip('^{commit}') - try: - return output[ref] - except KeyError: - raise cliapp.AppException('git rev-parse --verify %s' % ref) - - def copy_repository(self, source_dir, target_dir): - if target_dir.endswith('failed-checkout'): - raise morphlib.cachedrepo.CopyError(self.repo, target_dir) - - def checkout_ref(self, ref, target_dir): - if ref == '079bbfd447c8534e464ce5d40b80114c2022ebf4': - raise morphlib.cachedrepo.CheckoutError(self.repo, ref, target_dir) - else: - with open(os.path.join(target_dir, 'foo.morph'), 'w') as f: - f.write('contents of foo.morph') - - def clone_into(self, target_dir, ref): - if target_dir.endswith('failed-checkout'): - raise morphlib.cachedrepo.CloneError(self.repo, target_dir) - self.clone_target = target_dir - self.clone_ref = ref - - def update_successfully(self, **kwargs): - pass - - def update_with_failure(self, **kwargs): - raise cliapp.AppException('git remote update origin') - - def get_index(self, index_file=None): - return FakeIndex(index_file) - - def setUp(self): - self.repo_name = 'foo' - self.repo_url = 'git://foo.bar/foo.git' - self.repo_path = '/tmp/foo' - with morphlib.gitdir_tests.allow_nonexistant_git_repos(): - self.repo = morphlib.cachedrepo.CachedRepo( - FakeApplication(), self.repo_name, self.repo_url, - self.repo_path) - self.tempfs = fs.tempfs.TempFS() - - def test_constructor_sets_name_and_url_and_path(self): - self.assertEqual(self.repo.original_name, self.repo_name) - self.assertEqual(self.repo.url, self.repo_url) - self.assertEqual(self.repo.path, self.repo_path) - - def test_fail_clone_checkout_into_existing_directory(self): - self.repo.gitdir.checkout = self.checkout_ref - self.repo._clone_into = self.clone_into - - self.assertRaises(morphlib.cachedrepo.CheckoutDirectoryExistsError, - self.repo.clone_checkout, - 'e28a23812eadf2fce6583b8819b9c5dbd36b9fb9', - self.tempfs.root_path) - - def test_fail_checkout_due_to_clone_error(self): - self.repo.gitdir._rev_parse = self.rev_parse - self.repo._clone_into = self.clone_into - - self.assertRaises( - morphlib.cachedrepo.CloneError, self.repo.clone_checkout, - 'a4da32f5a81c8bc6d660404724cedc3bc0914a75', - self.tempfs.getsyspath('failed-checkout')) - - def test_fail_checkout_due_to_copy_error(self): - self.repo.gitdir._rev_parse = self.rev_parse - self.repo._copy_repository = self.copy_repository - - self.assertRaises(morphlib.cachedrepo.CopyError, self.repo.checkout, - 'a4da32f5a81c8bc6d660404724cedc3bc0914a75', - self.tempfs.getsyspath('failed-checkout')) - - def test_fail_checkout_from_invalid_ref(self): - self.repo.gitdir._rev_parse = self.rev_parse - self.repo._copy_repository = self.copy_repository - self.repo._checkout_ref_in_clone = self.checkout_ref - - self.assertRaises( - morphlib.cachedrepo.CheckoutError, self.repo.checkout, - '079bbfd447c8534e464ce5d40b80114c2022ebf4', - self.tempfs.getsyspath('checkout-from-invalid-ref')) - - def test_checkout_from_existing_ref_into_new_directory(self): - self.repo.gitdir._rev_parse = self.rev_parse - self.repo._copy_repository = self.copy_repository - self.repo._checkout_ref_in_clone = self.checkout_ref - - unpack_dir = self.tempfs.getsyspath('unpack-dir') - self.repo.checkout('e28a23812eadf2fce6583b8819b9c5dbd36b9fb9', - unpack_dir) - self.assertTrue(os.path.exists(unpack_dir)) - - morph_filename = os.path.join(unpack_dir, 'foo.morph') - self.assertTrue(os.path.exists(morph_filename)) - - def test_extract_commit_into_new_directory(self): - self.repo.gitdir.get_index = self.get_index - unpack_dir = self.tempfs.getsyspath('unpack-dir') - self.repo.extract_commit('e28a23812eadf2fce6583b8819b9c5dbd36b9fb9', - unpack_dir) - self.assertTrue(os.path.exists(unpack_dir)) - - morph_filename = os.path.join(unpack_dir, 'foo.morph') - self.assertTrue(os.path.exists(morph_filename)) - - def test_successful_update(self): - self.repo.gitdir.update_remotes = self.update_successfully - self.repo.update() - - def test_failing_update(self): - self.repo.gitdir.update_remotes = self.update_with_failure - self.assertRaises(morphlib.cachedrepo.UpdateError, self.repo.update) - - def test_no_update_if_local(self): - with morphlib.gitdir_tests.allow_nonexistant_git_repos(): - self.repo = morphlib.cachedrepo.CachedRepo( - object(), 'local:repo', 'file:///local/repo/', '/local/repo/') - self.repo.gitdir.update_remotes = self.update_with_failure - self.repo.gitdir._rev_parse = self.rev_parse - - self.assertFalse(self.repo.requires_update_for_ref(self.known_commit)) - self.repo.update() - - def test_clone_checkout(self): - self.repo.gitdir._rev_parse = self.rev_parse - self.repo._clone_into = self.clone_into - - self.repo.clone_checkout('master', '/.DOES_NOT_EXIST') - self.assertEqual(self.clone_target, '/.DOES_NOT_EXIST') - self.assertEqual(self.clone_ref, 'master') - - def test_no_need_to_update_repo_for_existing_sha1(self): - # If the SHA1 is present locally already there's no need to update. - # If it's a named ref then it might have changed in the remote, so we - # must still update. - self.repo.gitdir._rev_parse = self.rev_parse - - self.assertFalse(self.repo.requires_update_for_ref(self.known_commit)) - self.assertTrue(self.repo.requires_update_for_ref('named_ref')) - - def test_no_need_to_update_repo_if_already_updated(self): - self.repo.gitdir.update_remotes = self.update_successfully - self.repo.gitdir._rev_parse = self.rev_parse - - self.assertTrue(self.repo.requires_update_for_ref('named_ref')) - self.repo.update() - self.assertFalse(self.repo.requires_update_for_ref('named_ref')) diff --git a/morphlib/git.py b/morphlib/git.py index b6f54d02..190544ac 100644 --- a/morphlib/git.py +++ b/morphlib/git.py @@ -1,4 +1,4 @@ -# Copyright (C) 2011-2015 Codethink Limited +# Copyright (C) 2011-2016 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -226,6 +226,10 @@ def copy_repository(runcmd, repo, destdir, is_mirror=True): This also fixes up the repository afterwards, so that it can contain code etc. It does not leave any given branch ready for use. + This is slightly faster than `git clone` for large repositories, + as of Git 2.3.0. Long term, we should fix `git clone` to be as fast + as possible, and use that. + ''' if is_mirror == False: runcmd(['cp', '-a', os.path.join(repo, '.git'), @@ -272,21 +276,6 @@ def reset_workdir(runcmd, gitdir): gitcmd(runcmd, 'reset', '--hard', 'HEAD', cwd=gitdir) -def clone_into(runcmd, srcpath, targetpath, ref=None): - '''Clones a repo in srcpath into targetpath, optionally directly at ref.''' - - if ref is None: - gitcmd(runcmd, 'clone', srcpath, targetpath) - elif is_valid_sha1(ref): - gitcmd(runcmd, 'clone', srcpath, targetpath) - gitcmd(runcmd, 'checkout', ref, cwd=targetpath) - else: - gitcmd(runcmd, 'clone', '-b', ref, srcpath, targetpath) - gd = morphlib.gitdir.GitDirectory(targetpath) - if gd.has_fat(): - gd.fat_init() - gd.fat_pull() - def is_valid_sha1(ref): '''Checks whether a string is a valid SHA1.''' diff --git a/morphlib/gitdir.py b/morphlib/gitdir.py index 59a4be51..ca4a4c76 100644 --- a/morphlib/gitdir.py +++ b/morphlib/gitdir.py @@ -1,4 +1,4 @@ -# Copyright (C) 2013-2015 Codethink Limited +# Copyright (C) 2013-2016 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,6 +19,7 @@ import cliapp import itertools import os import re +import tempfile import morphlib @@ -455,7 +456,6 @@ class GitDirectory(object): methods for specific git operations instead. ''' - return cliapp.runcmd(argv, cwd=self.dirname, **kwargs) def _runcmd_unchecked(self, *args, **kwargs): @@ -468,9 +468,6 @@ class GitDirectory(object): # Exact error is logged already by the runcmd() function. raise NoGitRepoError(self.dirname) - def clone_into(self, dst, ref=None): # pragma: no cover - morphlib.git.clone_into(cliapp.runcmd, self.dirname, dst, ref=ref) - def checkout(self, branch_name): # pragma: no cover '''Check out a git branch.''' morphlib.git.gitcmd(self._runcmd, 'checkout', branch_name) @@ -848,6 +845,29 @@ class GitDirectory(object): def get_relpath(self, path): # pragma: no cover return os.path.relpath(path, self.dirname) + def extract_commit(self, ref, target_dir): + '''Extract files from a given commit into target_dir. + + This is different to a checkout: a checkout assumes a working tree + associated with a repository, where here we just copy the files out so + they can be read quickly. + + Use read_file() if you only want to read one or two files. + + This approach is marginally quicker than doing a shallow clone. Running + `morph list-artifacts` 10 times gave an average time of 1.334s + using `git clone --depth 1` and an average time of 1.261s using + this code. + + ''' + if not os.path.exists(target_dir): + os.makedirs(target_dir) + + with tempfile.NamedTemporaryFile() as index_file: + index = self.get_index(index_file=index_file.name) + index.set_to_tree(ref) + index.checkout(working_tree=target_dir) + def init(dirname): '''Initialise a new git repository.''' @@ -857,13 +877,39 @@ def init(dirname): return gd -def clone_from_cached_repo(cached_repo, dirname, ref): # pragma: no cover - '''Clone a CachedRepo into the desired directory. +def checkout_from_cached_repo(cached_repo, ref, target_dir, + runcmd_cb=cliapp.runcmd): # pragma: no cover + '''Clones the repository to a directory and checks out a given ref. - The given ref is checked out (or git's default branch is checked out - if ref is None). + This doesn't actually call `git clone`. The approach used is to `cp -a` + the repo in place, then tweak it so it appears to be a clone. This seems + *slightly* faster than using `git clone`, as of Git version 2.3.0. - ''' + I tested this by checking out a ref of linux.git, having cleared my + system's caches. It took 7m06s using `git clone`, and 6m53s using the + 'cp' approach. - cached_repo.clone_checkout(ref, dirname) - return GitDirectory(dirname) + ''' + if not os.path.exists(target_dir): + os.mkdir(target_dir) + + # Note, we copy instead of cloning because it's much faster in the case + # that the target is on a different filesystem from the cache. We then + # take care to turn the copy into something as good as a real clone. + try: + morphlib.git.copy_repository( + runcmd_cb, cached_repo.dirname, target_dir, cached_repo.is_mirror) + except cliapp.AppException: + raise CopyError(cached_repo.original_name, target_dir) + + gitdir = morphlib.gitdir.GitDirectory(target_dir) + try: + gitdir.checkout(ref) + except cliapp.AppException: + raise CheckoutError(cached_repo.original_name, ref, target_dir) + + if gitdir.has_fat(): + gitdir.fat_init() + gitdir.fat_pull() + + return gitdir diff --git a/morphlib/gitdir_tests.py b/morphlib/gitdir_tests.py index 0ec7b0f1..4da98bbc 100644 --- a/morphlib/gitdir_tests.py +++ b/morphlib/gitdir_tests.py @@ -1,4 +1,4 @@ -# Copyright (C) 2013-2015 Codethink Limited +# Copyright (C) 2013-2016 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,6 +15,8 @@ # =*= License: GPL-2 =*= +import fs.tempfs + import contextlib import datetime import os @@ -305,6 +307,16 @@ class GitDirectoryContentsTests(unittest.TestCase): morphlib.git.gitcmd(gd._runcmd, 'reset', '--hard') self.assertEqual(gd.describe(), 'example') + def test_extract_commit_into_new_directory(self): + gd = morphlib.gitdir.GitDirectory(self.dirname) + + unpack_dir = fs.tempfs.TempFS().getsyspath('unpack-dir') + gd.extract_commit('master', unpack_dir) + self.assertTrue(os.path.exists(unpack_dir)) + + morph_filename = os.path.join(unpack_dir, 'bar.morph') + self.assertTrue(os.path.exists(morph_filename)) + class GitDirectoryFileTypeTests(unittest.TestCase): diff --git a/morphlib/localrepocache.py b/morphlib/localrepocache.py index a7c5d1ec..3a03fe1d 100644 --- a/morphlib/localrepocache.py +++ b/morphlib/localrepocache.py @@ -68,6 +68,36 @@ class NotCached(morphlib.Error): return 'Repository %s is not cached yet' % self.reponame +class UpdateError(cliapp.AppException): # pragma: no cover + + def __init__(self, repo): + cliapp.AppException.__init__( + self, 'Failed to update cached version of repo %s' % repo) + + +class CachedRepo(morphlib.gitdir.GitDirectory): + '''A locally cached Git repository with an origin remote set up. + + On instance of this class represents a locally cached version of a + remote Git repository. This remote repository is set up as the + 'origin' remote. + + Cached repositories are bare mirrors of the upstream. Locally created + branches will be lost the next time the repository updates. + + ''' + def __init__(self, path, original_name, url): + self.original_name = original_name + self.url = url + self.is_mirror = not url.startswith('file://') + self.already_updated = False + + super(CachedRepo, self).__init__(path) + + def __str__(self): # pragma: no cover + return self.url + + class LocalRepoCache(object): '''Manage locally cached git repositories. @@ -202,7 +232,7 @@ class LocalRepoCache(object): ok, error = self._clone_with_tarball(repourl, path) if ok: repo = self._get_repo(reponame) - repo.update() + self._update_repo(repo) return repo else: errors.append(error) @@ -227,11 +257,6 @@ class LocalRepoCache(object): repo.already_updated = True return repo - def _new_cached_repo_instance(self, reponame, repourl, - path): # pragma: no cover - return morphlib.cachedrepo.CachedRepo( - self._app, reponame, repourl, path) - def _get_repo(self, reponame): '''Return an object representing a cached repository.''' @@ -241,11 +266,19 @@ class LocalRepoCache(object): repourl = self._resolver.pull_url(reponame) path = self._cache_name(repourl) if self.fs.exists(path): - repo = self._new_cached_repo_instance(reponame, repourl, path) + repo = CachedRepo(path, reponame, repourl) self._cached_repo_objects[reponame] = repo return repo raise NotCached(reponame) + def _update_repo(self, cachedrepo): # pragma: no cover + try: + cachedrepo.update_remotes( + echo_stderr=self._app.settings['verbose']) + cachedrepo.already_updated = True + except cliapp.AppException: + raise UpdateError(self) + def get_updated_repo(self, repo_name, ref=None, refs=None): # pragma: no cover '''Return object representing cached repository. @@ -291,7 +324,7 @@ class LocalRepoCache(object): self._app.status(msg='Updating %(repo_name)s', repo_name=repo_name) - repo.update() + self._update_repo(repo) return repo else: self._app.status(msg='Cloning %(repo_name)s', @@ -311,13 +344,14 @@ class LocalRepoCache(object): return [] done = set() - subs_to_process = submodules_for_repo(toplevel_repo.path, toplevel_ref) + subs_to_process = submodules_for_repo(toplevel_repo.dirname, + toplevel_ref) while subs_to_process: url, ref = subs_to_process.pop() done.add((url, ref)) cached_repo = self.get_updated_repo(url, ref=ref) - for submod in submodules_for_repo(cached_repo.path, ref): + for submod in submodules_for_repo(cached_repo.dirname, ref): if submod not in done: subs_to_process.append(submod) diff --git a/morphlib/localrepocache_tests.py b/morphlib/localrepocache_tests.py index ea56bdf2..91fdb216 100644 --- a/morphlib/localrepocache_tests.py +++ b/morphlib/localrepocache_tests.py @@ -57,7 +57,7 @@ class LocalRepoCacheTests(unittest.TestCase): self.lrc._git = self.fake_git self.lrc._fetch = self.not_found self.lrc._mkdtemp = self.fake_mkdtemp - self.lrc._new_cached_repo_instance = self.new_cached_repo_instance + self.lrc._update_repo = lambda *args: None self._mkdtemp_count = 0 def fake_git(self, args, **kwargs): @@ -88,13 +88,6 @@ class LocalRepoCacheTests(unittest.TestCase): self.lrc.fs.makedir(dirname+"/"+thing) return thing - def new_cached_repo_instance(self, *args): - with morphlib.gitdir_tests.allow_nonexistant_git_repos(): - repo = morphlib.cachedrepo.CachedRepo( - FakeApplication(), *args) - repo.update = lambda: None - return repo - def not_found(self, url, path): raise cliapp.AppException('Not found') @@ -108,12 +101,14 @@ class LocalRepoCacheTests(unittest.TestCase): self.assertFalse(self.lrc.fs.exists(self.cachedir)) def test_creates_cachedir_if_missing(self): - self.lrc.get_updated_repo(self.repourl, ref='master') + with morphlib.gitdir_tests.allow_nonexistant_git_repos(): + self.lrc.get_updated_repo(self.repourl, ref='master') self.assertTrue(self.lrc.fs.exists(self.cachedir)) def test_happily_caches_same_repo_twice(self): - self.lrc.get_updated_repo(self.repourl, ref='master') - self.lrc.get_updated_repo(self.repourl, ref='master') + with morphlib.gitdir_tests.allow_nonexistant_git_repos(): + self.lrc.get_updated_repo(self.repourl, ref='master') + self.lrc.get_updated_repo(self.repourl, ref='master') def test_fails_to_cache_when_remote_does_not_exist(self): def fail(args, **kwargs): @@ -124,14 +119,14 @@ class LocalRepoCacheTests(unittest.TestCase): self.lrc.get_updated_repo, self.repourl, 'master') def test_does_not_mind_a_missing_tarball(self): - self.lrc.get_updated_repo(self.repourl, ref='master') + with morphlib.gitdir_tests.allow_nonexistant_git_repos(): + self.lrc.get_updated_repo(self.repourl, ref='master') self.assertEqual(self.fetched, []) def test_fetches_tarball_when_it_exists(self): self.lrc._fetch = lambda url, path: self.fetched.append(url) - with morphlib.gitdir_tests.monkeypatch( - morphlib.cachedrepo.CachedRepo, 'update', lambda self: None): + with morphlib.gitdir_tests.allow_nonexistant_git_repos(): self.lrc.get_updated_repo(self.repourl, ref='master') self.assertEqual(self.fetched, [self.tarball_url]) @@ -148,5 +143,7 @@ class LocalRepoCacheTests(unittest.TestCase): def test_avoids_caching_local_repo(self): self.lrc.fs.makedir('/local/repo', recursive=True) - cached = self.lrc.get_updated_repo('file:///local/repo', refs='master') - assert cached.path == '/local/repo' + with morphlib.gitdir_tests.allow_nonexistant_git_repos(): + cached = self.lrc.get_updated_repo('file:///local/repo', + refs='master') + assert cached.dirname == '/local/repo' diff --git a/morphlib/plugins/anchor_plugin.py b/morphlib/plugins/anchor_plugin.py index 62c66c15..7465c479 100644 --- a/morphlib/plugins/anchor_plugin.py +++ b/morphlib/plugins/anchor_plugin.py @@ -140,7 +140,7 @@ class AnchorPlugin(cliapp.Plugin): repo = bc.lrc.get_updated_repo(reponame, refs=(s.original_ref for s in sources)) - remote = Remote(repo.gitdir) + remote = Remote(repo) push_url = resolver.push_url(reponame) remote.set_push_url(push_url) @@ -178,7 +178,7 @@ class AnchorPlugin(cliapp.Plugin): lsinfo = dict((ref, sha1) for (sha1, ref) in remote.ls()) for flag, sha1, target, summary, reason in results: - commit = repo.gitdir.resolve_ref_to_commit(sha1) + commit = repo.resolve_ref_to_commit(sha1) # Fail if we failed to push something other than a tag # pushed to a branch diff --git a/morphlib/plugins/cross-bootstrap_plugin.py b/morphlib/plugins/cross-bootstrap_plugin.py index 265b273b..273e677d 100644 --- a/morphlib/plugins/cross-bootstrap_plugin.py +++ b/morphlib/plugins/cross-bootstrap_plugin.py @@ -1,4 +1,4 @@ -# Copyright (C) 2013-2015 Codethink Limited +# Copyright (C) 2013-2016 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -38,7 +38,7 @@ echo "Complete!" ''' def escape_source_name(source): - repo_name = source.repo.original_name + repo_name = source.repo_name ref = source.original_ref source_name = '%s__%s' % (repo_name, ref) return re.sub('[:/]', '_', source_name) diff --git a/morphlib/plugins/diff_plugin.py b/morphlib/plugins/diff_plugin.py index 9855c39f..26964df8 100644 --- a/morphlib/plugins/diff_plugin.py +++ b/morphlib/plugins/diff_plugin.py @@ -64,8 +64,8 @@ class DiffPlugin(cliapp.Plugin): ref=s.sha1) for s in (from_source, to_source)) - from_desc = from_repo.gitdir.version_guess(from_source.sha1) - to_desc = to_repo.gitdir.version_guess(to_source.sha1) + from_desc = from_repo.version_guess(from_source.sha1) + to_desc = to_repo.version_guess(to_source.sha1) self.app.output.write( '{} ref changed from {} to {}\n'.format(name, from_desc, @@ -101,7 +101,7 @@ class DiffPlugin(cliapp.Plugin): 'Convert a definition path list into a list of systems' ml = MorphologyLoader() repo = self.bc.lrc.get_updated_repo(reponame, ref=ref) - mf = MorphologyFinder(gitdir=repo.gitdir, ref=ref) + mf = MorphologyFinder(gitdir=repo, ref=ref) # We may have been given an empty set of definitions as input, in # which case we instead use every we find. if not definitions: diff --git a/morphlib/plugins/get_repo_plugin.py b/morphlib/plugins/get_repo_plugin.py index e8ebf229..fc81d6e5 100644 --- a/morphlib/plugins/get_repo_plugin.py +++ b/morphlib/plugins/get_repo_plugin.py @@ -46,13 +46,13 @@ class GetRepoPlugin(cliapp.Plugin): def _clone_repo(self, cached_repo, dirname, checkout_ref): '''Clone a cached git repository into the directory given by path.''' # Do the clone. - gd = morphlib.gitdir.clone_from_cached_repo( - cached_repo, dirname, checkout_ref) + gd = morphlib.gitdir.checkout_from_cached_repo( + cached_repo, checkout_ref, dirname) # Configure the "origin" remote to use the upstream git repository, # and not the locally cached copy. resolver = morphlib.repoaliasresolver.RepoAliasResolver( - cached_repo.app.settings['repo-alias']) + self.app.settings['repo-alias']) remote = gd.get_remote('origin') remote.set_fetch_url(resolver.pull_url(cached_repo.url)) remote.set_push_url(resolver.push_url(cached_repo.original_name)) diff --git a/morphlib/plugins/system_manifests_plugin.py b/morphlib/plugins/system_manifests_plugin.py index 4444ecb3..86388737 100644 --- a/morphlib/plugins/system_manifests_plugin.py +++ b/morphlib/plugins/system_manifests_plugin.py @@ -172,7 +172,7 @@ def run_licensecheck(filename): def checkout_repo(lrc, repo, dest, ref='master'): cached = lrc.get_updated_repo(repo, ref) if not os.path.exists(dest): - cached.checkout(ref, dest) + morphlib.gitdir.checkout_from_cached_repo(repo, ref, dest) def load_lorries(dir): lorries = [] @@ -292,8 +292,8 @@ class Manifest(object): try: self.status(msg='Checking out chunk repo into %(dir)s at %(ref)s', dir=dir, ref=ref, chatty=True) - cached_repo.checkout(ref, dir) - gd = morphlib.gitdir.GitDirectory(dir) + gd = morphlib.gitdir.checkout_from_cached_repo( + cached_repo, ref, dir) gd.update_submodules(app) self.status(msg='Getting license info', chatty=True) diff --git a/morphlib/stagingarea.py b/morphlib/stagingarea.py index 871b69b9..0edd70f3 100644 --- a/morphlib/stagingarea.py +++ b/morphlib/stagingarea.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2015 Codethink Limited +# Copyright (C) 2012-2016 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by -- cgit v1.2.1