From ddb8b27d5056074013a89d8ab277e39fdc1f069d Mon Sep 17 00:00:00 2001 From: Tiago Gomes Date: Fri, 20 Nov 2015 13:57:31 +0000 Subject: WIP Add support for multiple sources per chunk TODO: - Add API to the cache server to retrieve a submodule commit from a given path. - Fix cross-bootstrap command. Change-Id: I3475c2bcb648a272fee33bc878a521f79d4e6581 --- morphlib/sourceresolver.py | 95 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 78 insertions(+), 17 deletions(-) (limited to 'morphlib/sourceresolver.py') diff --git a/morphlib/sourceresolver.py b/morphlib/sourceresolver.py index 5d04ece9..a56b6594 100644 --- a/morphlib/sourceresolver.py +++ b/morphlib/sourceresolver.py @@ -153,7 +153,7 @@ class SourceResolver(object): The third layer of caching is a simple commit SHA1 -> tree SHA mapping. It turns out that even if all repos are available locally, running 'git rev-parse' on hundreds of repos requires a lot of IO and can take - several minutes. Likewise, on a slow network connection it is time + several /minutes. Likewise, on a slow network connection it is time consuming to keep querying the remote repo cache. This third layer of caching works around both of those issues. @@ -259,8 +259,59 @@ class SourceResolver(object): return morph + def _resolve_subtrees(self, parent_reponame, parent_ref, extra_sources, + resolved_trees): + + subtrees = [] + + def resolve_extra_refs(reponame, ref, extra_sources, base_path): + for extra_source in extra_sources: + if (extra_source['repo'], extra_source['path']) in ( + resolved_trees): + tree = resolved_trees[(extra_source['repo'], + extra_source['path'])] + else: + subref = extra_source.get('ref') + if not subref: + if self.lrc.has_repo(reponame): + repo = self.lrc.get_repo(reponame) + if self.update and ( + repo.requires_update_for_ref(ref)): + self.status(msg='Updating cached git ' + 'repository %(reponame)s for ' + 'ref %(ref)s', + reponame=reponame, ref=ref) + repo.update() + else: + # TODO Add support to the cache server for + # retrieving the submodule commit from a path + self.status(msg='Updating cached git repository ' + '%(reponame)s for ref %(ref)s', + reponame=reponame, ref=ref) + repo = self.lrc.get_updated_repo(reponame, ref) + subref = repo.get_submodule_commit( + ref, extra_source['path']) + _, tree = self._resolve_ref(resolved_trees, + extra_source['repo'], subref) + resolved_trees[(extra_source['repo'], + extra_source['path'])] = tree + path = os.path.normpath(os.path.join(base_path, + extra_source['path'])) + subtrees.append({'path': path, 'tree': tree}) + if 'extra-sources' in extra_source: + resolve_extra_refs(extra_source['repo'], + extra_source['ref'], + extra_source['extra-sources'], + path) + + if extra_sources: + resolve_extra_refs(parent_reponame, parent_ref, + extra_sources, '.') + return subtrees + def _process_definitions_with_children(self, resolved_morphologies, + resolved_trees, definitions_checkout_dir, definitions_repo, definitions_ref, @@ -274,7 +325,7 @@ class SourceResolver(object): # We don't need system's filename, so use 'None' definitions_queue = collections.deque((None, f) for f in system_filenames) - chunk_queue = set() + chunk_queue = [] def get_morphology(filename): return self._get_morphology(resolved_morphologies, @@ -291,7 +342,7 @@ class SourceResolver(object): visit(definitions_repo, definitions_ref, filename, definitions_absref, definitions_tree, morphology, - predefined_split_rules) + predefined_split_rules, []) if morphology['kind'] == 'cluster': raise cliapp.AppException( @@ -313,6 +364,9 @@ class SourceResolver(object): sanitise_morphology_path(s['morph'])) for s in morphology['build-depends']) for c in morphology['chunks']: + extra_sources = c.get('extra-sources') + subtrees = self._resolve_subtrees(c['repo'], c['ref'], + extra_sources, resolved_trees) if 'morph' in c: # Now, does this path actually exist? path = c['morph'] @@ -322,15 +376,16 @@ class SourceResolver(object): raise MorphologyReferenceNotFoundError( path, filename) - chunk_queue.add((c['name'], c['repo'], c['ref'], - path, None)) + chunk_queue.append((c['name'], c['repo'], c['ref'], + path, None, subtrees)) else: # We invent a filename here, so that the rest of the # Morph code doesn't need to know about the predefined # build instructions. chunk_filename = c['name'] + '.morph' - chunk_queue.add((c["name"], c['repo'], c['ref'], - chunk_filename, c['build-system'])) + chunk_queue.append((c['name'], c['repo'], c['ref'], + chunk_filename, c['build-system'], + subtrees)) return chunk_queue @@ -345,7 +400,8 @@ class SourceResolver(object): def process_chunk(self, resolved_morphologies, resolved_trees, definitions_checkout_dir, morph_loader, chunk_name, chunk_repo, chunk_ref, filename, chunk_buildsystem, - visit, predefined_build_systems, predefined_split_rules): + subtrees, visit, predefined_build_systems, + predefined_split_rules): absref, tree = self._resolve_ref(resolved_trees, chunk_repo, chunk_ref) if chunk_buildsystem is None: @@ -373,10 +429,10 @@ class SourceResolver(object): morph_loader, buildsystem, chunk_name) visit(chunk_repo, chunk_ref, filename, absref, tree, morphology, - predefined_split_rules) + predefined_split_rules, subtrees) def traverse_morphs(self, definitions_repo, definitions_ref, - system_filenames, + system_filenames, pool, visit=lambda rn, rf, fn, arf, m: None, definitions_original_ref=None): @@ -403,6 +459,7 @@ class SourceResolver(object): definitions_version = self._check_version_file( definitions_checkout_dir) + pool.definitions_version = definitions_version predefined_build_systems, predefined_split_rules = \ self._get_defaults( @@ -415,19 +472,23 @@ class SourceResolver(object): # will all live in the same Git repository, and will point to # various chunk morphologies. chunk_queue = self._process_definitions_with_children( - resolved_morphologies, definitions_checkout_dir, - definitions_repo, definitions_ref, definitions_absref, + resolved_morphologies, resolved_trees, + definitions_checkout_dir, definitions_repo, + definitions_ref, definitions_absref, definitions_tree, morph_loader, system_filenames, visit, predefined_split_rules) # Now process all the chunks involved in the build. - for name, repo, ref, filename, buildsystem in chunk_queue: + for name, repo, ref, filename, buildsystem, extra_sources in ( + chunk_queue): self.process_chunk(resolved_morphologies, resolved_trees, definitions_checkout_dir, morph_loader, name, repo, ref, filename, buildsystem, - visit, predefined_build_systems, + extra_sources, visit, + predefined_build_systems, predefined_split_rules) + class DuplicateChunkError(morphlib.Error): def _make_msg(self, (name, sources)): # pragma: no cover @@ -478,7 +539,7 @@ def create_source_pool(repo_cache, repo, ref, filenames, pool = morphlib.sourcepool.SourcePool() def add_to_pool(reponame, ref, filename, absref, tree, morphology, - predefined_split_rules): + predefined_split_rules, subtrees): # If there are duplicate chunks which have the same 'name' and the # same build instructions, we might cause a stack overflow in # cachekeycomputer.py when trying to hash the build graph. The @@ -493,7 +554,7 @@ def create_source_pool(repo_cache, repo, ref, filenames, sources = morphlib.source.make_sources( reponame, ref, filename, absref, tree, morphology, - predefined_split_rules) + predefined_split_rules, subtrees) for source in sources: pool.add(source) @@ -503,7 +564,7 @@ def create_source_pool(repo_cache, repo, ref, filenames, resolver = SourceResolver(repo_cache, tree_cache_manager, status_cb) resolver.traverse_morphs(repo, ref, filenames, - visit=add_to_pool, + pool, visit=add_to_pool, definitions_original_ref=original_ref) # No two chunks may have the same name -- cgit v1.2.1