diff options
-rw-r--r-- | morphlib/__init__.py | 1 | ||||
-rw-r--r-- | morphlib/buildcommand.py | 3 | ||||
-rw-r--r-- | morphlib/cachedrepo.py | 26 | ||||
-rw-r--r-- | morphlib/cachedrepo_tests.py | 30 | ||||
-rw-r--r-- | morphlib/gitindex.py | 22 | ||||
-rw-r--r-- | morphlib/gitindex_tests.py | 16 | ||||
-rw-r--r-- | morphlib/morphologyfactory.py | 90 | ||||
-rw-r--r-- | morphlib/plugins/list_artifacts_plugin.py | 3 | ||||
-rw-r--r-- | morphlib/sourceresolver.py | 412 | ||||
-rw-r--r-- | morphlib/sourceresolver_tests.py (renamed from morphlib/morphologyfactory_tests.py) | 130 | ||||
-rw-r--r-- | tests.build/empty-stratum.exit | 1 | ||||
-rwxr-xr-x | tests.build/empty-stratum.script | 36 | ||||
-rw-r--r-- | tests.build/empty-stratum.stderr | 1 | ||||
-rw-r--r-- | without-test-modules | 4 | ||||
-rw-r--r-- | yarns/branches-workspaces.yarn | 1 | ||||
-rw-r--r-- | yarns/building.yarn | 12 | ||||
-rw-r--r-- | yarns/deployment.yarn | 1 | ||||
-rw-r--r-- | yarns/implementations.yarn | 26 |
18 files changed, 570 insertions, 245 deletions
diff --git a/morphlib/__init__.py b/morphlib/__init__.py index a10ebe7b..d54340df 100644 --- a/morphlib/__init__.py +++ b/morphlib/__init__.py @@ -68,7 +68,6 @@ import gitindex import localartifactcache import localrepocache import mountableimage -import morphologyfactory import morphologyfinder import morphology import morphloader diff --git a/morphlib/buildcommand.py b/morphlib/buildcommand.py index a22e689b..0aa50a3b 100644 --- a/morphlib/buildcommand.py +++ b/morphlib/buildcommand.py @@ -96,6 +96,7 @@ class BuildCommand(object): self.app.status(msg='Creating source pool', chatty=True) srcpool = morphlib.sourceresolver.create_source_pool( self.lrc, self.rrc, repo_name, ref, filename, + cachedir=self.app.settings['cachedir'], original_ref=original_ref, update_repos=not self.app.settings['no-git-update'], status_cb=self.app.status) @@ -271,7 +272,7 @@ class BuildCommand(object): def build_in_order(self, root_artifact): '''Build everything specified in a build order.''' - self.app.status(msg='Building a set of sources', chatty=True) + self.app.status(msg='Building a set of sources') build_env = root_artifact.build_env ordered_sources = list(self.get_ordered_sources(root_artifact.walk())) old_prefix = self.app.status_prefix diff --git a/morphlib/cachedrepo.py b/morphlib/cachedrepo.py index aa2b5af1..8b38c5c9 100644 --- a/morphlib/cachedrepo.py +++ b/morphlib/cachedrepo.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2014 Codethink Limited +# Copyright (C) 2012-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,7 +15,9 @@ import cliapp + import os +import tempfile import morphlib @@ -169,6 +171,28 @@ class CachedRepo(object): self._checkout_ref_in_clone(ref, target_dir) + def extract_commit(self, ref, target_dir): + '''Extract files from a given commit into target_dir. + + This is different to a 'checkout': a checkout assumes a working tree + associated with a repository. Here, the repository is immutable (it's + in the cache) and we just want to look at the files in a quick way + (quicker than going 'git cat-file everything'). + + This seems marginally quicker than doing a shallow clone. Running + `morph list-artifacts` 10 times gave an average time of 1.334s + using `git clone --depth 1` and an average time of 1.261s using + this code. + + ''' + if not os.path.exists(target_dir): + os.makedirs(target_dir) + + with tempfile.NamedTemporaryFile() as index_file: + index = self._gitdir.get_index(index_file=index_file.name) + index.set_to_tree(ref) + index.checkout(working_tree=target_dir) + def requires_update_for_ref(self, ref): '''Returns False if there's no need to update this cached repo. diff --git a/morphlib/cachedrepo_tests.py b/morphlib/cachedrepo_tests.py index 6f87bfdd..6fe69ef5 100644 --- a/morphlib/cachedrepo_tests.py +++ b/morphlib/cachedrepo_tests.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2014 Codethink Limited +# Copyright (C) 2012-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -33,6 +33,21 @@ class FakeApplication(object): } +class FakeIndex(object): + + def __init__(self, index_file): + self.index_file = index_file + self.ref = None + + def set_to_tree(self, ref): + self.ref = ref + + def checkout(self, working_tree=None): + if working_tree: + with open(os.path.join(working_tree, 'foo.morph'), 'w') as f: + f.write('contents of foo.morph') + + class CachedRepoTests(unittest.TestCase): known_commit = 'a4da32f5a81c8bc6d660404724cedc3bc0914a75' @@ -77,6 +92,9 @@ class CachedRepoTests(unittest.TestCase): def update_with_failure(self, **kwargs): raise cliapp.AppException('git remote update origin') + def get_index(self, index_file=None): + return FakeIndex(index_file) + def setUp(self): self.repo_name = 'foo' self.repo_url = 'git://foo.bar/foo.git' @@ -141,6 +159,16 @@ class CachedRepoTests(unittest.TestCase): morph_filename = os.path.join(unpack_dir, 'foo.morph') self.assertTrue(os.path.exists(morph_filename)) + def test_extract_commit_into_new_directory(self): + self.repo._gitdir.get_index = self.get_index + unpack_dir = self.tempfs.getsyspath('unpack-dir') + self.repo.extract_commit('e28a23812eadf2fce6583b8819b9c5dbd36b9fb9', + unpack_dir) + self.assertTrue(os.path.exists(unpack_dir)) + + morph_filename = os.path.join(unpack_dir, 'foo.morph') + self.assertTrue(os.path.exists(morph_filename)) + def test_successful_update(self): self.repo._gitdir.update_remotes = self.update_successfully self.repo.update() diff --git a/morphlib/gitindex.py b/morphlib/gitindex.py index e22f6225..c5c07bd6 100644 --- a/morphlib/gitindex.py +++ b/morphlib/gitindex.py @@ -1,4 +1,4 @@ -# Copyright (C) 2013-2014 Codethink Limited +# Copyright (C) 2013-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -48,8 +48,16 @@ class GitIndex(object): def _run_git(self, *args, **kwargs): if self._index_file is not None: - kwargs['env'] = kwargs.get('env', dict(os.environ)) - kwargs['env']['GIT_INDEX_FILE'] = self._index_file + extra_env = kwargs.get('extra_env', {}) + extra_env['GIT_INDEX_FILE'] = self._index_file + kwargs['extra_env'] = extra_env + + if 'extra_env' in kwargs: + env = kwargs.get('env', dict(os.environ)) + env.update(kwargs['extra_env']) + kwargs['env'] = env + del kwargs['extra_env'] + return morphlib.git.gitcmd(self._gd._runcmd, *args, **kwargs) def _get_status(self): @@ -159,3 +167,11 @@ class GitIndex(object): def write_tree(self): '''Transform the index into a tree in the object store.''' return self._run_git('write-tree').strip() + + def checkout(self, working_tree=None): + '''Copy files from the index to the working tree.''' + if working_tree: + extra_env = {'GIT_WORK_TREE': working_tree} + else: + extra_env = {} + self._run_git('checkout-index', '--all', extra_env=extra_env) diff --git a/morphlib/gitindex_tests.py b/morphlib/gitindex_tests.py index 32d40a8c..3f9ff303 100644 --- a/morphlib/gitindex_tests.py +++ b/morphlib/gitindex_tests.py @@ -1,4 +1,4 @@ -# Copyright (C) 2013-2014 Codethink Limited +# Copyright (C) 2013-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -38,6 +38,8 @@ class GitIndexTests(unittest.TestCase): self.mirror = os.path.join(self.tempdir, 'mirror') morphlib.git.gitcmd(gd._runcmd, 'clone', '--mirror', self.dirname, self.mirror) + self.working_dir = os.path.join(self.tempdir, 'bar') + os.makedirs(self.working_dir) def tearDown(self): shutil.rmtree(self.tempdir) @@ -91,3 +93,15 @@ class GitIndexTests(unittest.TestCase): gd = morphlib.gitdir.GitDirectory(self.dirname) idx = gd.get_index() self.assertEqual(idx.write_tree(), gd.resolve_ref_to_tree(gd.HEAD)) + + def test_checkout(self): + gd = morphlib.gitdir.GitDirectory(self.dirname) + idx = gd.get_index() + idx.checkout(working_tree=self.working_dir) + self.assertTrue(os.path.exists(os.path.join(self.working_dir, 'foo'))) + + def test_checkout_without_working_dir(self): + gd = morphlib.gitdir.GitDirectory(self.dirname) + idx = gd.get_index() + idx.checkout() + self.assertTrue(os.path.exists(os.path.join(self.dirname, 'foo'))) diff --git a/morphlib/morphologyfactory.py b/morphlib/morphologyfactory.py deleted file mode 100644 index a3ac2749..00000000 --- a/morphlib/morphologyfactory.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (C) 2012-2014 Codethink Limited -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - -import os - -import morphlib -import cliapp - - -class MorphologyFactoryError(cliapp.AppException): - pass - - -class MorphologyNotFoundError(MorphologyFactoryError): - def __init__(self, filename): - MorphologyFactoryError.__init__( - self, "Couldn't find morphology: %s" % filename) - - -class NotcachedError(MorphologyFactoryError): - def __init__(self, repo_name): - MorphologyFactoryError.__init__( - self, "Repository %s is not cached locally and there is no " - "remote cache specified" % repo_name) - - -class MorphologyFactory(object): - - '''A way of creating morphologies which will provide a default''' - - def __init__(self, local_repo_cache, remote_repo_cache=None, - status_cb=None): - self._lrc = local_repo_cache - self._rrc = remote_repo_cache - - null_status_function = lambda **kwargs: None - self.status = status_cb or null_status_function - - def get_morphology(self, reponame, sha1, filename): - morph_name = os.path.splitext(os.path.basename(filename))[0] - loader = morphlib.morphloader.MorphologyLoader() - if self._lrc.has_repo(reponame): - self.status(msg="Looking for %s in local repo cache" % filename, - chatty=True) - try: - repo = self._lrc.get_repo(reponame) - text = repo.read_file(filename, sha1) - morph = loader.load_from_string(text) - except IOError: - morph = None - file_list = repo.list_files(ref=sha1, recurse=False) - elif self._rrc is not None: - self.status(msg="Retrieving %(reponame)s %(sha1)s %(filename)s" - " from the remote git cache.", - reponame=reponame, sha1=sha1, filename=filename, - chatty=True) - try: - text = self._rrc.cat_file(reponame, sha1, filename) - morph = loader.load_from_string(text) - except morphlib.remoterepocache.CatFileError: - morph = None - file_list = self._rrc.ls_tree(reponame, sha1) - else: - raise NotcachedError(reponame) - - if morph is None: - self.status(msg="File %s doesn't exist: attempting to infer " - "chunk morph from repo's build system" - % filename, chatty=True) - bs = morphlib.buildsystem.detect_build_system(file_list) - if bs is None: - raise MorphologyNotFoundError(filename) - morph = bs.get_morphology(morph_name) - loader.validate(morph) - loader.set_commands(morph) - loader.set_defaults(morph) - return morph diff --git a/morphlib/plugins/list_artifacts_plugin.py b/morphlib/plugins/list_artifacts_plugin.py index 6944cff4..53056bad 100644 --- a/morphlib/plugins/list_artifacts_plugin.py +++ b/morphlib/plugins/list_artifacts_plugin.py @@ -1,4 +1,4 @@ -# Copyright (C) 2014 Codethink Limited +# Copyright (C) 2014-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -86,6 +86,7 @@ class ListArtifactsPlugin(cliapp.Plugin): msg='Creating source pool for %s' % system_filename, chatty=True) source_pool = morphlib.sourceresolver.create_source_pool( self.lrc, self.rrc, repo, ref, system_filename, + cachedir=self.app.settings['cachedir'], update_repos = not self.app.settings['no-git-update'], status_cb=self.app.status) diff --git a/morphlib/sourceresolver.py b/morphlib/sourceresolver.py index 3a328eb7..29069d7d 100644 --- a/morphlib/sourceresolver.py +++ b/morphlib/sourceresolver.py @@ -1,4 +1,4 @@ -# Copyright (C) 2014 Codethink Limited +# Copyright (C) 2014-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,20 +14,96 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -import cliapp - import collections +import cPickle import logging +import os +import pylru +import shutil +import tempfile + +import cliapp import morphlib +tree_cache_size = 10000 +tree_cache_filename = 'trees.cache.pickle' +buildsystem_cache_size = 10000 +buildsystem_cache_filename = 'detected-chunk-buildsystems.cache.pickle' + + +class PickleCacheManager(object): # pragma: no cover + '''Cache manager for PyLRU that reads and writes to Pickle files. + + The 'pickle' format is less than ideal in many ways and is actually + slower than JSON in Python. However, the data we need to cache is keyed + by tuples and in JSON a dict can only be keyed with strings. For now, + using 'pickle' seems to be the least worst option. + + ''' + + def __init__(self, filename, size): + self.filename = filename + self.size = size + + def _populate_cache_from_file(self, filename, cache): + try: + with open(filename, 'r') as f: + data = cPickle.load(f) + for key, value in data.iteritems(): + cache[key] = value + except (EOFError, IOError, cPickle.PickleError) as e: + logging.warning('Failed to load cache %s: %s', self.filename, e) + + def load_cache(self): + '''Create a pylru.lrucache object prepopulated with saved data.''' + cache = pylru.lrucache(self.size) + # There should be a more efficient way to do this, by hooking into + # the json module directly. + self._populate_cache_from_file(self.filename, cache) + return cache + + def save_cache(self, cache): + '''Save the data from a pylru.lrucache object to disk. + + Any changes that have been made by other instances or processes since + load_cache() was called will be overwritten. + + ''' + data = {} + for key, value in cache.items(): + data[key] = value + try: + with morphlib.savefile.SaveFile(self.filename, 'w') as f: + cPickle.dump(data, f) + except (IOError, cPickle.PickleError) as e: + logging.warning('Failed to save cache to %s: %s', self.filename, e) + + +class SourceResolverError(cliapp.AppException): + pass + + +class MorphologyNotFoundError(SourceResolverError): # pragma: no cover + def __init__(self, filename): + SourceResolverError.__init__( + self, "Couldn't find morphology: %s" % filename) + + +class NotcachedError(SourceResolverError): + def __init__(self, repo_name): + SourceResolverError.__init__( + self, "Repository %s is not cached locally and there is no " + "remote cache specified" % repo_name) + class SourceResolver(object): '''Provides a way of resolving the set of sources for a given system. - There are two levels of caching involved in resolving the sources to build. + There are three levels of caching involved in resolving the sources to + build. - The canonical source for each source is specified in the build-command + The canonical repo for each source is specified in the build-command (for strata and systems) or in the stratum morphology (for chunks). It will be either a normal URL, or a keyed URL using a repo-alias like 'baserock:baserock/definitions'. @@ -44,25 +120,60 @@ class SourceResolver(object): entire repositories in $cachedir/gits. If a repo is not in the remote repo cache then it must be present in the local repo cache. + The third layer of caching is a simple commit SHA1 -> tree SHA mapping. It + turns out that even if all repos are available locally, running + 'git rev-parse' on hundreds of repos requires a lot of IO and can take + several minutes. Likewise, on a slow network connection it is time + consuming to keep querying the remote repo cache. This third layer of + caching works around both of those issues. + + The need for 3 levels of caching highlights design inconsistencies in + Baserock, but for now it is worth the effort to maintain this code to save + users from waiting 7 minutes each time that they want to build. The level 3 + cache is fairly simple because commits are immutable, so there is no danger + of this cache being stale as long as it is indexed by commit SHA1. Due to + the policy in Baserock of always using a commit SHA1 (rather than a named + ref) in the system definitions, it makes repeated builds of a system very + fast as no resolution needs to be done at all. + ''' - def __init__(self, local_repo_cache, remote_repo_cache, update_repos, + def __init__(self, local_repo_cache, remote_repo_cache, + tree_cache_manager, buildsystem_cache_manager, update_repos, status_cb=None): self.lrc = local_repo_cache self.rrc = remote_repo_cache + self.tree_cache_manager = tree_cache_manager + self.buildsystem_cache_manager = buildsystem_cache_manager self.update = update_repos - self.status = status_cb - def resolve_ref(self, reponame, ref): + self._resolved_trees = {} + self._resolved_morphologies = {} + self._resolved_buildsystems = {} + + self._definitions_checkout_dir = None + + def _resolve_ref(self, reponame, ref): # pragma: no cover '''Resolves commit and tree sha1s of the ref in a repo and returns it. - If update is True then this has the side-effect of updating - or cloning the repository into the local repo cache. + If update is True then this has the side-effect of updating or cloning + the repository into the local repo cache. + + This function is complex due to the 3 layers of caching described in + the SourceResolver docstring. + ''' - absref = None + # The Baserock reference definitions use absolute refs so, and, if the + # absref is cached, we can short-circuit all this code. + if (reponame, ref) in self._resolved_trees: + logging.debug('Returning tree (%s, %s) from tree cache', + reponame, ref) + return ref, self._resolved_trees[(reponame, ref)] + + absref = None if self.lrc.has_repo(reponame): repo = self.lrc.get_repo(reponame) if self.update and repo.requires_update_for_ref(ref): @@ -91,42 +202,139 @@ class SourceResolver(object): repo = self.lrc.cache_repo(reponame) repo.update() else: + # This is likely to raise an exception, because if the local + # repo cache had the repo we'd have already resolved the ref. repo = self.lrc.get_repo(reponame) absref = repo.resolve_ref_to_commit(ref) tree = repo.resolve_ref_to_tree(absref) + + logging.debug('Writing tree to cache with ref (%s, %s)', + reponame, absref) + self._resolved_trees[(reponame, absref)] = tree + return absref, tree - def traverse_morphs(self, definitions_repo, definitions_ref, - system_filenames, - visit=lambda rn, rf, fn, arf, m: None, - definitions_original_ref=None): - morph_factory = morphlib.morphologyfactory.MorphologyFactory( - self.lrc, self.rrc, self.status) - definitions_queue = collections.deque(system_filenames) - chunk_in_definitions_repo_queue = [] - chunk_in_source_repo_queue = [] + def _get_morphology(self, reponame, sha1, filename): + '''Read the morphology at the specified location. - resolved_commits = {} - resolved_trees = {} - resolved_morphologies = {} + Returns None if the file does not exist in the specified commit. - # Resolve the (repo, ref) pair for the definitions repo, cache result. - definitions_absref, definitions_tree = self.resolve_ref( - definitions_repo, definitions_ref) + ''' + key = (reponame, sha1, filename) + if key in self._resolved_morphologies: + return self._resolved_morphologies[key] + + if reponame == self._definitions_repo and \ + sha1 == self._definitions_absref: # pragma: no cover + defs_filename = os.path.join(self._definitions_checkout_dir, + filename) + else: + defs_filename = None + + + loader = morphlib.morphloader.MorphologyLoader() + if defs_filename and os.path.exists(defs_filename): # pragma: no cover + morph = loader.load_from_file(defs_filename) + elif self.lrc.has_repo(reponame): + self.status(msg="Looking for %(reponame)s:%(filename)s in the " + "local repo cache.", + reponame=reponame, filename=filename, chatty=True) + try: + repo = self.lrc.get_repo(reponame) + text = repo.read_file(filename, sha1) + morph = loader.load_from_string(text) + except IOError: + morph = None + file_list = repo.list_files(ref=sha1, recurse=False) + elif self.rrc is not None: + self.status(msg="Looking for %(reponame)s:%(filename)s in the " + "remote repo cache.", + reponame=reponame, filename=filename, chatty=True) + try: + text = self.rrc.cat_file(reponame, sha1, filename) + morph = loader.load_from_string(text) + except morphlib.remoterepocache.CatFileError: + morph = None + else: + # We assume that _resolve_ref() must have already been called and + # so the repo in question would have been made available already + # if it had been possible. + raise NotcachedError(reponame) + + if morph is None: + return None + else: + loader.validate(morph) + loader.set_commands(morph) + loader.set_defaults(morph) + self._resolved_morphologies[key] = morph + return morph + + def _detect_build_system(self, reponame, sha1, expected_filename): + '''Attempt to detect buildsystem of the given commit. + + Returns None if no known build system was detected. - if definitions_original_ref: - definitions_ref = definitions_original_ref + ''' + self.status(msg="File %s doesn't exist: attempting to infer " + "chunk morph from repo's build system" % + expected_filename, chatty=True) + + if self.lrc.has_repo(reponame): + repo = self.lrc.get_repo(reponame) + file_list = repo.list_files(ref=sha1, recurse=False) + elif self.rrc is not None: + file_list = self.rrc.ls_tree(reponame, sha1) + else: + # We assume that _resolve_ref() must have already been called and + # so the repo in question would have been made available already + # if it had been possible. + raise NotcachedError(reponame) + + buildsystem = morphlib.buildsystem.detect_build_system(file_list) + + if buildsystem is None: + # It might surprise you to discover that if we can't autodetect a + # build system, we raise MorphologyNotFoundError. Users are + # required to provide a morphology for any chunk where Morph can't + # infer the build instructions automatically, so this is the right + # error. + raise MorphologyNotFoundError(expected_filename) + + return buildsystem.name + + def _create_morphology_for_build_system(self, buildsystem_name, + morph_name): # pragma: no cover + bs = morphlib.buildsystem.lookup_build_system(buildsystem_name) + loader = morphlib.morphloader.MorphologyLoader() + morph = bs.get_morphology(morph_name) + loader.validate(morph) + loader.set_commands(morph) + loader.set_defaults(morph) + return morph + + def _process_definitions_with_children(self, system_filenames, + definitions_repo, + definitions_ref, + definitions_absref, + definitions_tree, + visit): # pragma: no cover + definitions_queue = collections.deque(system_filenames) + chunk_in_definitions_repo_queue = set() + chunk_in_source_repo_queue = set() while definitions_queue: filename = definitions_queue.popleft() - key = (definitions_repo, definitions_absref, filename) - if not key in resolved_morphologies: - resolved_morphologies[key] = morph_factory.get_morphology(*key) - morphology = resolved_morphologies[key] + morphology = self._get_morphology( + definitions_repo, definitions_absref, filename) + + if morphology is None: + raise MorphologyNotFoundError(filename) visit(definitions_repo, definitions_ref, filename, definitions_absref, definitions_tree, morphology) + if morphology['kind'] == 'cluster': raise cliapp.AppException( "Cannot build a morphology of type 'cluster'.") @@ -143,42 +351,109 @@ class SourceResolver(object): if 'morph' not in c: path = morphlib.util.sanitise_morphology_path( c.get('morph', c['name'])) - chunk_in_source_repo_queue.append( + chunk_in_source_repo_queue.add( (c['repo'], c['ref'], path)) continue - chunk_in_definitions_repo_queue.append( + chunk_in_definitions_repo_queue.add( (c['repo'], c['ref'], c['morph'])) - for repo, ref, filename in chunk_in_definitions_repo_queue: - if (repo, ref) not in resolved_trees: - commit_sha1, tree_sha1 = self.resolve_ref(repo, ref) - resolved_commits[repo, ref] = commit_sha1 - resolved_trees[repo, commit_sha1] = tree_sha1 - absref = resolved_commits[repo, ref] - tree = resolved_trees[repo, absref] - key = (definitions_repo, definitions_absref, filename) - if not key in resolved_morphologies: - resolved_morphologies[key] = morph_factory.get_morphology(*key) - morphology = resolved_morphologies[key] - visit(repo, ref, filename, absref, tree, morphology) - - for repo, ref, filename in chunk_in_source_repo_queue: - if (repo, ref) not in resolved_trees: - commit_sha1, tree_sha1 = self.resolve_ref(repo, ref) - resolved_commits[repo, ref] = commit_sha1 - resolved_trees[repo, commit_sha1] = tree_sha1 - absref = resolved_commits[repo, ref] - tree = resolved_trees[repo, absref] - key = (repo, absref, filename) - if key not in resolved_morphologies: - resolved_morphologies[key] = morph_factory.get_morphology(*key) - morphology = resolved_morphologies[key] - visit(repo, ref, filename, absref, tree, morphology) - - -def create_source_pool(lrc, rrc, repo, ref, filename, + return chunk_in_definitions_repo_queue, chunk_in_source_repo_queue + + def process_chunk(self, definition_repo, definition_ref, chunk_repo, + chunk_ref, filename, visit): # pragma: no cover + definition_key = (definition_repo, definition_ref, filename) + chunk_key = (chunk_repo, chunk_ref, filename) + + morph_name = os.path.splitext(os.path.basename(filename))[0] + + morphology = None + buildsystem = None + + if chunk_key in self._resolved_buildsystems: + buildsystem = self._resolved_buildsystems[chunk_key] + + if buildsystem is None: + # The morphologies aren't locally cached, so a morphology + # for a chunk kept in the chunk repo will be read every time. + # So, always keep your chunk morphs in your definitions repo, + # not in the chunk repo! + morphology = self._get_morphology(*definition_key) + + if morphology is None: + if buildsystem is None: + buildsystem = self._detect_build_system(*chunk_key) + if buildsystem is None: + raise MorphologyNotFoundError(filename) + else: + self._resolved_buildsystems[chunk_key] = buildsystem + morphology = self._create_morphology_for_build_system( + buildsystem, morph_name) + self._resolved_morphologies[definition_key] = morphology + + absref, tree = self._resolve_ref(chunk_repo, chunk_ref) + visit(chunk_repo, chunk_ref, filename, absref, tree, morphology) + + def traverse_morphs(self, definitions_repo, definitions_ref, + system_filenames, + visit=lambda rn, rf, fn, arf, m: None, + definitions_original_ref=None): # pragma: no cover + self._resolved_trees = self.tree_cache_manager.load_cache() + self._resolved_buildsystems = \ + self.buildsystem_cache_manager.load_cache() + + # Resolve the (repo, ref) pair for the definitions repo, cache result. + definitions_absref, definitions_tree = self._resolve_ref( + definitions_repo, definitions_ref) + + if definitions_original_ref: + definitions_ref = definitions_original_ref + + self._definitions_checkout_dir = tempfile.mkdtemp() + + try: + # FIXME: not an ideal way of passing this info across + self._definitions_repo = definitions_repo + self._definitions_absref = definitions_absref + try: + definitions_cached_repo = self.lrc.get_repo(definitions_repo) + except morphlib.localrepocache.NotCached: + definitions_cached_repo = self.lrc.cache_repo(definitions_repo) + definitions_cached_repo.extract_commit( + definitions_absref, self._definitions_checkout_dir) + + # First, process the system and its stratum morphologies. These + # will all live in the same Git repository, and will point to + # various chunk morphologies. + chunk_in_definitions_repo_queue, chunk_in_source_repo_queue = \ + self._process_definitions_with_children( + system_filenames, definitions_repo, definitions_ref, + definitions_absref, definitions_tree, visit) + + # Now process all the chunks involved in the build. First those + # with morphologies in definitions.git, and then (for compatibility + # reasons only) those with the morphology in the chunk's source + # repository. + for repo, ref, filename in chunk_in_definitions_repo_queue: + self.process_chunk(definitions_repo, definitions_absref, repo, + ref, filename, visit) + + for repo, ref, filename in chunk_in_source_repo_queue: + self.process_chunk(repo, ref, repo, ref, filename, visit) + finally: + shutil.rmtree(self._definitions_checkout_dir) + self._definitions_checkout_dir = None + + logging.debug('Saving contents of resolved tree cache') + self.tree_cache_manager.save_cache(self._resolved_trees) + + logging.debug('Saving contents of build systems cache') + self.buildsystem_cache_manager.save_cache( + self._resolved_buildsystems) + + +def create_source_pool(lrc, rrc, repo, ref, filename, cachedir, original_ref=None, update_repos=True, - status_cb=None): + status_cb=None): # pragma: no cover '''Find all the sources involved in building a given system. Given a system morphology, this function will traverse the tree of stratum @@ -202,7 +477,16 @@ def create_source_pool(lrc, rrc, repo, ref, filename, for source in sources: pool.add(source) - resolver = SourceResolver(lrc, rrc, update_repos, status_cb) + tree_cache_manager = PickleCacheManager( + os.path.join(cachedir, tree_cache_filename), tree_cache_size) + + buildsystem_cache_manager = PickleCacheManager( + os.path.join(cachedir, buildsystem_cache_filename), + buildsystem_cache_size) + + resolver = SourceResolver(lrc, rrc, tree_cache_manager, + buildsystem_cache_manager, update_repos, + status_cb) resolver.traverse_morphs(repo, ref, [filename], visit=add_to_pool, definitions_original_ref=original_ref) diff --git a/morphlib/morphologyfactory_tests.py b/morphlib/sourceresolver_tests.py index 5222ca6d..2410218a 100644 --- a/morphlib/morphologyfactory_tests.py +++ b/morphlib/sourceresolver_tests.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2014 Codethink Limited +# Copyright (C) 2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,12 +14,16 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +import os +import shutil +import tempfile import unittest import morphlib -from morphlib.morphologyfactory import (MorphologyFactory, - MorphologyNotFoundError, - NotcachedError) +from morphlib.sourceresolver import (SourceResolver, + PickleCacheManager, + MorphologyNotFoundError, + NotcachedError) from morphlib.remoterepocache import CatFileError @@ -123,11 +127,9 @@ class FakeLocalRepo(object): } return self.morphologies[filename] % values elif filename.endswith('.morph'): - return '''{ - "name": "%s", - "kind": "chunk", - "build-system": "dummy" - }''' % filename[:-len('.morph')] + return '''name: %s + kind: chunk + build-system: dummy''' % filename[:-len('.morph')] return 'text' def list_files(self, ref, recurse): @@ -146,14 +148,39 @@ class FakeLocalRepoCache(object): return self.lr -class MorphologyFactoryTests(unittest.TestCase): +class SourceResolverTests(unittest.TestCase): def setUp(self): + # create temp "definitions" repo + # set self.sr._definitions_repo to that + # trick it into presenting temp repo using FakeLocalRepoCache + # magic self.lr = FakeLocalRepo() self.lrc = FakeLocalRepoCache(self.lr) self.rrc = FakeRemoteRepoCache() - self.mf = MorphologyFactory(self.lrc, self.rrc) - self.lmf = MorphologyFactory(self.lrc, None) + + self.cachedir = tempfile.mkdtemp() + buildsystem_cache_file = os.path.join(self.cachedir, + 'detected-chunk-buildsystems.cache.pickle') + buildsystem_cache_manager = PickleCacheManager( + buildsystem_cache_file, 1000) + + tree_cache_file = os.path.join(self.cachedir, 'trees.cache.pickle') + tree_cache_manager = PickleCacheManager(tree_cache_file, 1000) + + def status(msg='', **kwargs): + pass + + self.sr = SourceResolver(self.lrc, self.rrc, tree_cache_manager, + buildsystem_cache_manager, True, status) + self.lsr = SourceResolver(self.lrc, None, tree_cache_manager, + buildsystem_cache_manager, True, status) + + self.sr._definitions_repo = None + self.lsr._definitions_repo = None + + def tearDown(self): + shutil.rmtree(self.cachedir) def nolocalfile(self, *args): raise IOError('File not found') @@ -172,6 +199,9 @@ class MorphologyFactoryTests(unittest.TestCase): def autotoolsbuildsystem(self, *args, **kwargs): return ['configure.in'] + def emptytree(self, *args, **kwargs): + return [] + def remotemorph(self, *args, **kwargs): return ['remote-chunk.morph'] @@ -185,97 +215,117 @@ class MorphologyFactoryTests(unittest.TestCase): def test_gets_morph_from_local_repo(self): self.lr.list_files = self.localmorph - morph = self.mf.get_morphology('reponame', 'sha1', + morph = self.sr._get_morphology('reponame', 'sha1', 'chunk.morph') self.assertEqual('chunk', morph['name']) + def test_gets_morph_from_cache(self): + self.lr.list_files = self.localmorph + morph_from_repo = self.sr._get_morphology('reponame', 'sha1', + 'chunk.morph') + morph_from_cache = self.sr._get_morphology('reponame', 'sha1', + 'chunk.morph') + self.assertEqual(morph_from_repo, morph_from_cache) + def test_gets_morph_from_remote_repo(self): self.rrc.ls_tree = self.remotemorph self.lrc.has_repo = self.doesnothaverepo - morph = self.mf.get_morphology('reponame', 'sha1', + morph = self.sr._get_morphology('reponame', 'sha1', 'remote-chunk.morph') self.assertEqual('remote-chunk', morph['name']) def test_autodetects_local_morphology(self): self.lr.read_file = self.nolocalmorph self.lr.list_files = self.autotoolsbuildsystem - morph = self.mf.get_morphology('reponame', 'sha1', - 'assumed-local.morph') - self.assertEqual('assumed-local', morph['name']) + name = self.sr._detect_build_system('reponame', 'sha1', + 'assumed-local.morph') + self.assertEqual('autotools', name) def test_autodetects_remote_morphology(self): self.lrc.has_repo = self.doesnothaverepo self.rrc.cat_file = self.noremotemorph self.rrc.ls_tree = self.autotoolsbuildsystem - morph = self.mf.get_morphology('reponame', 'sha1', - 'assumed-remote.morph') - self.assertEqual('assumed-remote', morph['name']) + name = self.sr._detect_build_system('reponame', 'sha1', + 'assumed-remote.morph') + self.assertEqual('autotools', name) - def test_raises_error_when_no_local_morph(self): + def test_returns_none_when_no_local_morph(self): self.lr.read_file = self.nolocalfile - self.assertRaises(MorphologyNotFoundError, self.mf.get_morphology, - 'reponame', 'sha1', 'unreached.morph') + morph = self.sr._get_morphology('reponame', 'sha1', 'unreached.morph') + self.assertEqual(morph, None) - def test_raises_error_when_fails_no_remote_morph(self): + def test_returns_none_when_fails_no_remote_morph(self): self.lrc.has_repo = self.doesnothaverepo self.rrc.cat_file = self.noremotefile - self.assertRaises(MorphologyNotFoundError, self.mf.get_morphology, - 'reponame', 'sha1', 'unreached.morph') + morph = self.sr._get_morphology('reponame', 'sha1', 'unreached.morph') + self.assertEqual(morph, None) + + def test_raises_error_when_repo_does_not_exist(self): + self.lrc.has_repo = self.doesnothaverepo + self.assertRaises(NotcachedError, self.lsr._detect_build_system, + 'reponame', 'sha1', 'non-existent.morph') + + def test_raises_error_when_failed_to_detect_build_system(self): + self.lr.read_file = self.nolocalfile + self.lr.list_files = self.emptytree + self.assertRaises(MorphologyNotFoundError, + self.sr._detect_build_system, + 'reponame', 'sha1', 'undetected.morph') def test_raises_error_when_name_mismatches(self): - self.assertRaises(morphlib.Error, self.mf.get_morphology, + self.assertRaises(morphlib.Error, self.sr._get_morphology, 'reponame', 'sha1', 'name-mismatch.morph') def test_looks_locally_with_no_remote(self): self.lr.list_files = self.localmorph - morph = self.lmf.get_morphology('reponame', 'sha1', - 'chunk.morph') + morph = self.lsr._get_morphology('reponame', 'sha1', + 'chunk.morph') self.assertEqual('chunk', morph['name']) def test_autodetects_locally_with_no_remote(self): self.lr.read_file = self.nolocalmorph self.lr.list_files = self.autotoolsbuildsystem - morph = self.mf.get_morphology('reponame', 'sha1', - 'assumed-local.morph') - self.assertEqual('assumed-local', morph['name']) + name = self.sr._detect_build_system('reponame', 'sha1', + 'assumed-local.morph') + self.assertEqual('autotools', name) def test_fails_when_local_not_cached_and_no_remote(self): self.lrc.has_repo = self.doesnothaverepo - self.assertRaises(NotcachedError, self.lmf.get_morphology, + self.assertRaises(NotcachedError, self.lsr._get_morphology, 'reponame', 'sha1', 'unreached.morph') def test_arch_is_validated(self): self.lr.arch = 'unknown' - self.assertRaises(morphlib.Error, self.mf.get_morphology, + self.assertRaises(morphlib.Error, self.sr._get_morphology, 'reponame', 'sha1', 'system.morph') def test_arch_arm_defaults_to_le(self): self.lr.arch = 'armv7' - morph = self.mf.get_morphology('reponame', 'sha1', 'system.morph') + morph = self.sr._get_morphology('reponame', 'sha1', 'system.morph') self.assertEqual(morph['arch'], 'armv7l') def test_fails_on_parse_error(self): - self.assertRaises(morphlib.Error, self.mf.get_morphology, + self.assertRaises(morphlib.Error, self.sr._get_morphology, 'reponame', 'sha1', 'parse-error.morph') def test_fails_on_no_chunk_bdeps(self): self.assertRaises(morphlib.morphloader.NoBuildDependenciesError, - self.mf.get_morphology, 'reponame', 'sha1', + self.sr._get_morphology, 'reponame', 'sha1', 'stratum-no-chunk-bdeps.morph') def test_fails_on_no_bdeps_or_bootstrap(self): self.assertRaises( morphlib.morphloader.NoStratumBuildDependenciesError, - self.mf.get_morphology, 'reponame', 'sha1', + self.sr._get_morphology, 'reponame', 'sha1', 'stratum-no-bdeps-no-bootstrap.morph') def test_succeeds_on_bdeps_no_bootstrap(self): - self.mf.get_morphology( + self.sr._get_morphology( 'reponame', 'sha1', 'stratum-bdeps-no-bootstrap.morph') def test_fails_on_empty_stratum(self): self.assertRaises( morphlib.morphloader.EmptyStratumError, - self.mf.get_morphology, 'reponame', 'sha1', 'stratum-empty.morph') + self.sr._get_morphology, 'reponame', 'sha1', 'stratum-empty.morph') diff --git a/tests.build/empty-stratum.exit b/tests.build/empty-stratum.exit deleted file mode 100644 index d00491fd..00000000 --- a/tests.build/empty-stratum.exit +++ /dev/null @@ -1 +0,0 @@ -1 diff --git a/tests.build/empty-stratum.script b/tests.build/empty-stratum.script deleted file mode 100755 index 19c36558..00000000 --- a/tests.build/empty-stratum.script +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/sh -# -# Copyright (C) 2013-2014 Codethink Limited -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -set -eu - -morphsrepo="$DATADIR/morphs-repo" -cd "$morphsrepo" - -git checkout --quiet -b empty-stratum - -# Create empty stratum to test S4585 -cat <<EOF > hello-stratum.morph -name: hello-stratum -kind: stratum -EOF -sed -i 's/master/empty-stratum/' hello-system.morph -git add hello-stratum.morph hello-system.morph - -git commit --quiet -m "add empty stratum" - -"$SRCDIR/scripts/test-morph" build-morphology \ - test:morphs-repo empty-stratum hello-system diff --git a/tests.build/empty-stratum.stderr b/tests.build/empty-stratum.stderr deleted file mode 100644 index 6a4ecb05..00000000 --- a/tests.build/empty-stratum.stderr +++ /dev/null @@ -1 +0,0 @@ -ERROR: Stratum hello-stratum has no chunks in string diff --git a/without-test-modules b/without-test-modules index 530deb4f..55e5291d 100644 --- a/without-test-modules +++ b/without-test-modules @@ -52,7 +52,3 @@ distbuild/timer_event_source.py distbuild/worker_build_scheduler.py # Not unit tested, since it needs a full system branch morphlib/buildbranch.py - -# Requires rather a lot of fake data in order to be unit tested; better to -# leave it to the functional tests. -morphlib/sourceresolver.py diff --git a/yarns/branches-workspaces.yarn b/yarns/branches-workspaces.yarn index 34aa97e0..a757822e 100644 --- a/yarns/branches-workspaces.yarn +++ b/yarns/branches-workspaces.yarn @@ -233,6 +233,7 @@ build branch is made to include that change. WHEN the user makes changes to test-chunk in branch master AND the user builds systems/test-system.morph of the master branch THEN the changes to test-chunk in branch master are included in the temporary build branch + FINALLY the git server is shut down ### When branches are created ### diff --git a/yarns/building.yarn b/yarns/building.yarn index 52f2b561..b5e46b73 100644 --- a/yarns/building.yarn +++ b/yarns/building.yarn @@ -63,6 +63,7 @@ so when we deploy the system, we can check whether it exists. WHEN the user attempts to deploy the cluster test-cluster.morph in branch master with options test-system.location="$DATADIR/test.tar" THEN morph succeeded AND tarball test.tar contains etc/passwd + FINALLY the git server is shut down Distbuilding ------------ @@ -100,3 +101,14 @@ repos cached locally. AND the distbuild worker is terminated AND the communal cache server is terminated AND the git server is shut down + +Empty strata don't build +------------------------ + + SCENARIO empty-strata + GIVEN a workspace + AND a git server + WHEN the user checks out the system branch called empty-stratum + AND the user attempts to build the system systems/empty-stratum-system.morph in branch empty-stratum + THEN morph failed + FINALLY the git server is shut down diff --git a/yarns/deployment.yarn b/yarns/deployment.yarn index 47aeff5d..6ec8c0af 100644 --- a/yarns/deployment.yarn +++ b/yarns/deployment.yarn @@ -345,3 +345,4 @@ Once it is rebuilt, it can be deployed. WHEN the user attempts to deploy the cluster test-cluster.morph in branch mybranch THEN morph succeeded AND file workspace/mybranch/test/morphs/test-system.tar exists + FINALLY the git server is shut down diff --git a/yarns/implementations.yarn b/yarns/implementations.yarn index 8b43286f..2557e2e5 100644 --- a/yarns/implementations.yarn +++ b/yarns/implementations.yarn @@ -336,6 +336,32 @@ another to hold a chunk. git commit -m Initial. git tag -a "test-tag" -m "Tagging test-tag" + # A new branch is created here as the presence of an empty stratum will + # break any morph commands which load all definitions in the repository. + git checkout -b empty-stratum + + install -m644 -D /dev/stdin << EOF "systems/empty-stratum-system.morph" + name: empty-stratum-system + kind: system + arch: $arch + strata: + - name: build-essential + morph: strata/build-essential.morph + - name: core + morph: strata/core.morph + - name: empty + morph: strata/empty.morph + EOF + + install -m644 -D /dev/stdin << EOF "strata/empty.morph" + name: empty + kind: stratum + EOF + + git add . + git commit -m 'Add an empty stratum' + git checkout master + # Start a git daemon to serve our git repositories port_file="$DATADIR/git-daemon-port" pid_file="$DATADIR/git-daemon-pid" |