From 2a9c29502a76a54aba3cc92c6e617abbe41ba1ee Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Thu, 23 Oct 2014 15:16:07 +0100 Subject: Move create_source_pool code into new 'sourceresolver' module This code is an essential part of 'morph build'. It's quite complex and really shouldn't be mixed in with the base Application class. Given a dedicated class we can store some state in the object and avoid functions with seven parameters, too. --- morphlib/sourceresolver.py | 176 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 morphlib/sourceresolver.py (limited to 'morphlib/sourceresolver.py') diff --git a/morphlib/sourceresolver.py b/morphlib/sourceresolver.py new file mode 100644 index 00000000..dee16ea1 --- /dev/null +++ b/morphlib/sourceresolver.py @@ -0,0 +1,176 @@ +# Copyright (C) 2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +import cliapp + +import collections +import logging + +import morphlib + + +class SourceResolver(object): + '''Provides a way of resolving the set of sources for a given system.''' + + def __init__(self, lrc, rrc, status_cb): + self.lrc = lrc + self.rrc = rrc + + self.status = status_cb + + def resolve_ref(self, reponame, ref, update=True): + '''Resolves commit and tree sha1s of the ref in a repo and returns it. + + If update is True then this has the side-effect of updating + or cloning the repository into the local repo cache. + ''' + absref = None + + if self.lrc.has_repo(reponame): + repo = self.lrc.get_repo(reponame) + if update and repo.requires_update_for_ref(ref): + self.status(msg='Updating cached git repository %(reponame)s ' + 'for ref %(ref)s', reponame=reponame, ref=ref) + repo.update() + # If the user passed --no-git-update, and the ref is a SHA1 not + # available locally, this call will raise an exception. + absref, tree = repo.resolve_ref(ref) + elif self.rrc is not None: + try: + absref, tree = self.rrc.resolve_ref(reponame, ref) + if absref is not None: + self.status(msg='Resolved %(reponame)s %(ref)s via remote ' + 'repo cache', + reponame=reponame, + ref=ref, + chatty=True) + except BaseException, e: + logging.warning('Caught (and ignored) exception: %s' % str(e)) + if absref is None: + if update: + self.status(msg='Caching git repository %(reponame)s', + reponame=reponame) + repo = self.lrc.cache_repo(reponame) + repo.update() + else: + repo = self.lrc.get_repo(reponame) + absref, tree = repo.resolve_ref(ref) + return absref, tree + + def traverse_morphs(self, definitions_repo, definitions_ref, + system_filenames, update=True, + visit=lambda rn, rf, fn, arf, m: None, + definitions_original_ref=None): + morph_factory = morphlib.morphologyfactory.MorphologyFactory( + self.lrc, self.rrc, self.status) + definitions_queue = collections.deque(system_filenames) + chunk_in_definitions_repo_queue = [] + chunk_in_source_repo_queue = [] + resolved_refs = {} + resolved_morphologies = {} + + # Resolve the (repo, ref) pair for the definitions repo, cache result. + definitions_absref, definitions_tree = self.resolve_ref( + definitions_repo, definitions_ref, update) + + if definitions_original_ref: + definitions_ref = definitions_original_ref + + while definitions_queue: + filename = definitions_queue.popleft() + + key = (definitions_repo, definitions_absref, filename) + if not key in resolved_morphologies: + resolved_morphologies[key] = morph_factory.get_morphology(*key) + morphology = resolved_morphologies[key] + + visit(definitions_repo, definitions_ref, filename, + definitions_absref, definitions_tree, morphology) + if morphology['kind'] == 'cluster': + raise cliapp.AppException( + "Cannot build a morphology of type 'cluster'.") + elif morphology['kind'] == 'system': + definitions_queue.extend( + morphlib.util.sanitise_morphology_path(s['morph']) + for s in morphology['strata']) + elif morphology['kind'] == 'stratum': + if morphology['build-depends']: + definitions_queue.extend( + morphlib.util.sanitise_morphology_path(s['morph']) + for s in morphology['build-depends']) + for c in morphology['chunks']: + if 'morph' not in c: + path = morphlib.util.sanitise_morphology_path( + c.get('morph', c['name'])) + chunk_in_source_repo_queue.append( + (c['repo'], c['ref'], path)) + continue + chunk_in_definitions_repo_queue.append( + (c['repo'], c['ref'], c['morph'])) + + for repo, ref, filename in chunk_in_definitions_repo_queue: + if (repo, ref) not in resolved_refs: + resolved_refs[repo, ref] = self.resolve_ref(repo, ref, update) + absref, tree = resolved_refs[repo, ref] + key = (definitions_repo, definitions_absref, filename) + if not key in resolved_morphologies: + resolved_morphologies[key] = morph_factory.get_morphology(*key) + morphology = resolved_morphologies[key] + visit(repo, ref, filename, absref, tree, morphology) + + for repo, ref, filename in chunk_in_source_repo_queue: + if (repo, ref) not in resolved_refs: + resolved_refs[repo, ref] = self.resolve_ref(repo, ref, update) + absref, tree = resolved_refs[repo, ref] + key = (repo, absref, filename) + if key not in resolved_morphologies: + resolved_morphologies[key] = morph_factory.get_morphology(*key) + morphology = resolved_morphologies[key] + visit(repo, ref, filename, absref, tree, morphology) + + +def create_source_pool(lrc, rrc, repo, ref, filename, + original_ref=None, update_repos=True, + status_cb=None): + '''Find all the sources involved in building a given system. + + Given a system morphology, this function will traverse the tree of stratum + and chunk morphologies that the system points to and create appropriate + Source objects. These are added to a new SourcePool object, which is + returned. + + Note that Git submodules are not considered 'sources' in the current + implementation, and so they must be handled separately. + + The 'lrc' and 'rrc' parameters specify the local and remote Git repository + caches used for resolving the sources. + + ''' + pool = morphlib.sourcepool.SourcePool() + + def add_to_pool(reponame, ref, filename, absref, tree, morphology): + sources = morphlib.source.make_sources(reponame, ref, + filename, absref, + tree, morphology) + for source in sources: + pool.add(source) + + resolver = SourceResolver(lrc, rrc, status_cb) + resolver.traverse_morphs(repo, ref, [filename], + update=update_repos, + visit=add_to_pool, + definitions_original_ref=original_ref) + return pool -- cgit v1.2.1 From 561dbc28fe03c30ddbe2c5a0af6ad481c0aad2d2 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Thu, 23 Oct 2014 15:48:43 +0100 Subject: Set the 'update' flag in the SourceResolver constructor --- morphlib/sourceresolver.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'morphlib/sourceresolver.py') diff --git a/morphlib/sourceresolver.py b/morphlib/sourceresolver.py index dee16ea1..4e1ff8d6 100644 --- a/morphlib/sourceresolver.py +++ b/morphlib/sourceresolver.py @@ -25,13 +25,16 @@ import morphlib class SourceResolver(object): '''Provides a way of resolving the set of sources for a given system.''' - def __init__(self, lrc, rrc, status_cb): - self.lrc = lrc - self.rrc = rrc + def __init__(self, local_repo_cache, remote_repo_cache, update_repos, + status_cb=None): + self.lrc = local_repo_cache + self.rrc = remote_repo_cache + + self.update = update_repos self.status = status_cb - def resolve_ref(self, reponame, ref, update=True): + def resolve_ref(self, reponame, ref): '''Resolves commit and tree sha1s of the ref in a repo and returns it. If update is True then this has the side-effect of updating @@ -41,7 +44,7 @@ class SourceResolver(object): if self.lrc.has_repo(reponame): repo = self.lrc.get_repo(reponame) - if update and repo.requires_update_for_ref(ref): + if self.update and repo.requires_update_for_ref(ref): self.status(msg='Updating cached git repository %(reponame)s ' 'for ref %(ref)s', reponame=reponame, ref=ref) repo.update() @@ -60,7 +63,7 @@ class SourceResolver(object): except BaseException, e: logging.warning('Caught (and ignored) exception: %s' % str(e)) if absref is None: - if update: + if self.update: self.status(msg='Caching git repository %(reponame)s', reponame=reponame) repo = self.lrc.cache_repo(reponame) @@ -71,7 +74,7 @@ class SourceResolver(object): return absref, tree def traverse_morphs(self, definitions_repo, definitions_ref, - system_filenames, update=True, + system_filenames, visit=lambda rn, rf, fn, arf, m: None, definitions_original_ref=None): morph_factory = morphlib.morphologyfactory.MorphologyFactory( @@ -84,7 +87,7 @@ class SourceResolver(object): # Resolve the (repo, ref) pair for the definitions repo, cache result. definitions_absref, definitions_tree = self.resolve_ref( - definitions_repo, definitions_ref, update) + definitions_repo, definitions_ref) if definitions_original_ref: definitions_ref = definitions_original_ref @@ -123,7 +126,7 @@ class SourceResolver(object): for repo, ref, filename in chunk_in_definitions_repo_queue: if (repo, ref) not in resolved_refs: - resolved_refs[repo, ref] = self.resolve_ref(repo, ref, update) + resolved_refs[repo, ref] = self.resolve_ref(repo, ref) absref, tree = resolved_refs[repo, ref] key = (definitions_repo, definitions_absref, filename) if not key in resolved_morphologies: @@ -133,7 +136,7 @@ class SourceResolver(object): for repo, ref, filename in chunk_in_source_repo_queue: if (repo, ref) not in resolved_refs: - resolved_refs[repo, ref] = self.resolve_ref(repo, ref, update) + resolved_refs[repo, ref] = self.resolve_ref(repo, ref) absref, tree = resolved_refs[repo, ref] key = (repo, absref, filename) if key not in resolved_morphologies: @@ -168,9 +171,8 @@ def create_source_pool(lrc, rrc, repo, ref, filename, for source in sources: pool.add(source) - resolver = SourceResolver(lrc, rrc, status_cb) + resolver = SourceResolver(lrc, rrc, update_repos, status_cb) resolver.traverse_morphs(repo, ref, [filename], - update=update_repos, visit=add_to_pool, definitions_original_ref=original_ref) return pool -- cgit v1.2.1 From e6f97c2057a027dc99128f653e84036e8247bedf Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Thu, 6 Nov 2014 16:29:26 +0000 Subject: Add docstring to SourceResolver class. --- morphlib/sourceresolver.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'morphlib/sourceresolver.py') diff --git a/morphlib/sourceresolver.py b/morphlib/sourceresolver.py index 4e1ff8d6..8c9fd8b8 100644 --- a/morphlib/sourceresolver.py +++ b/morphlib/sourceresolver.py @@ -23,7 +23,28 @@ import morphlib class SourceResolver(object): - '''Provides a way of resolving the set of sources for a given system.''' + '''Provides a way of resolving the set of sources for a given system. + + There are two levels of caching involved in resolving the sources to build. + + The canonical source for each source is specified in the build-command + (for strata and systems) or in the stratum morphology (for chunks). It will + be either a normal URL, or a keyed URL using a repo-alias like + 'baserock:baserock/definitions'. + + The 'remote repo cache' is a Baserock Trove system. It functions as a + normal Git server, but in addition it runs a service on port 8080 called + 'morph-cache-server' which can resolve refs, list their contents and read + specific files from the repos it holds. This allows the SourceResolver to + work out how to build something without cloning the whole repo. (If a local + build of that source ends up being necessary then it will get cloned into + the local cache later on). + + The second layer of caching is the local repository cache, which mirrors + entire repositories in $cachedir/gits. If a repo is not in the remote repo + cache then it must be present in the local repo cache. + + ''' def __init__(self, local_repo_cache, remote_repo_cache, update_repos, status_cb=None): -- cgit v1.2.1