summaryrefslogtreecommitdiff
path: root/morphlib/sourceresolver.py
diff options
context:
space:
mode:
authorSam Thursfield <sam.thursfield@codethink.co.uk>2016-03-02 17:11:34 +0000
committerSam Thursfield <sam.thursfield@codethink.co.uk>2016-03-16 19:11:21 +0000
commitd58d8e8f7a4ec03ff14021a4515c8283dad52573 (patch)
tree5ece2d0524e4423bb953e6140831c9fde93b7219 /morphlib/sourceresolver.py
parent014a029ade9a045a839ca86c35690b218098ea33 (diff)
downloadmorph-d58d8e8f7a4ec03ff14021a4515c8283dad52573.tar.gz
Unify local and remote repo cache modules
There's not really any reason you'd want to use the RemoteRepoCache class except as a workaround for the slow speed of some LocalRepoCache operations, so I can't see this ruining anyone's day. The main reason for doing this is so we can simply the sourceresolver code. One reason that the sourceresolver class is so hopelessly complicated is that it right now has to use two incompatible interfaces for Git repo caches. I've taken the opportunity to detangle the RepoCache class from the App class. Now all of the configuration for the RepoCache class is passed into the constructor explicitly. This makes the class usable from outside Morph: resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases=[]) repo_cache = morphlib.repocache.RepoCache('/src/cache/gits', resolver) Change-Id: I596c81d7645b67504c88e555172a8c238f4f8a66
Diffstat (limited to 'morphlib/sourceresolver.py')
-rw-r--r--morphlib/sourceresolver.py80
1 files changed, 28 insertions, 52 deletions
diff --git a/morphlib/sourceresolver.py b/morphlib/sourceresolver.py
index c6f77cf9..f8dac8b1 100644
--- a/morphlib/sourceresolver.py
+++ b/morphlib/sourceresolver.py
@@ -138,17 +138,17 @@ class SourceResolver(object):
be either a normal URL, or a keyed URL using a repo-alias like
'baserock:baserock/definitions'.
- The 'remote repo cache' is a Baserock Trove system. It functions as a
- normal Git server, but in addition it runs a service on port 8080 called
- 'morph-cache-server' which can resolve refs, list their contents and read
- specific files from the repos it holds. This allows the SourceResolver to
- work out how to build something without cloning the whole repo. (If a local
- build of that source ends up being necessary then it will get cloned into
- the local cache later on).
-
- The second layer of caching is the local repository cache, which mirrors
- entire repositories in $cachedir/gits. If a repo is not in the remote repo
- cache then it must be present in the local repo cache.
+ Each commit used in a build is resolved to a tree SHA1, which means that
+ merge commits and changes to commit messages don't affect the cache
+ identity of a chunk. This does mean we need to query every repo in the
+ build graph, though.
+
+ All requests for information on a repo use the 'repocache' module. This
+ maintains a local copy of all the Git repos we need to work with. A repo
+ cache can also use a remote 'morph-cache-server' instance, if available,
+ to query certain information about a repo without cloning it locally.
+ Using this we can resolve commits to trees without having to clone every
+ repo locally, which is a huge performance improvement in some cases.
The third layer of caching is a simple commit SHA1 -> tree SHA mapping. It
turns out that even if all repos are available locally, running
@@ -168,14 +168,11 @@ class SourceResolver(object):
'''
- def __init__(self, local_repo_cache, remote_repo_cache,
- tree_cache_manager, update_repos,
- status_cb=None):
- self.lrc = local_repo_cache
- self.rrc = remote_repo_cache
+ def __init__(self, repo_cache, tree_cache_manager, status_cb=None):
+ self.repo_cache = repo_cache
self.tree_cache_manager = tree_cache_manager
- self.update = update_repos
+ self.update = repo_cache.update_gits
self.status = status_cb
def _resolve_ref(self, resolved_trees, reponame, ref):
@@ -184,9 +181,6 @@ class SourceResolver(object):
If update is True then this has the side-effect of updating or cloning
the repository into the local repo cache.
- This function is complex due to the 3 layers of caching described in
- the SourceResolver docstring.
-
'''
# The Baserock reference definitions use absolute refs so, and, if the
@@ -198,29 +192,8 @@ class SourceResolver(object):
logging.debug('tree (%s, %s) not in cache', reponame, ref)
- absref = None
- if self.lrc.has_repo(reponame):
- repo = self.lrc.get_updated_repo(reponame, ref)
- # If the user passed --no-git-update, and the ref is a SHA1 not
- # available locally, this call will raise an exception.
- absref = repo.resolve_ref_to_commit(ref)
- tree = repo.resolve_ref_to_tree(absref)
- elif self.rrc is not None:
- try:
- absref, tree = self.rrc.resolve_ref(reponame, ref)
- if absref is not None:
- self.status(msg='Resolved %(reponame)s %(ref)s via remote '
- 'repo cache',
- reponame=reponame,
- ref=ref,
- chatty=True)
- except BaseException as e:
- logging.warning('Caught (and ignored) exception: %s' % str(e))
-
- if absref is None:
- repo = self.lrc.get_updated_repo(reponame, ref)
- absref = repo.resolve_ref_to_commit(ref)
- tree = repo.resolve_ref_to_tree(absref)
+ absref, tree = self.repo_cache.resolve_ref_to_commit_and_tree(reponame,
+ ref)
logging.debug('Writing tree to cache with ref (%s, %s)',
reponame, absref)
@@ -430,7 +403,7 @@ class SourceResolver(object):
if definitions_original_ref:
definitions_ref = definitions_original_ref
- definitions_cached_repo = self.lrc.get_updated_repo(
+ definitions_cached_repo = self.repo_cache.get_updated_repo(
repo_name=definitions_repo, ref=definitions_absref)
definitions_cached_repo.extract_commit(
definitions_absref, definitions_checkout_dir)
@@ -489,9 +462,8 @@ def _find_duplicate_chunks(sourcepool): #pragma: no cover
return {k: v for (k, v) in chunk_sources_by_name.iteritems() if len(v) > 1}
-def create_source_pool(lrc, rrc, repo, ref, filenames, cachedir,
- original_ref=None, update_repos=True,
- status_cb=None):
+def create_source_pool(repo_cache, repo, ref, filenames,
+ original_ref=None, status_cb=None):
'''Find all the sources involved in building a given system.
Given a system morphology, this function will traverse the tree of stratum
@@ -502,8 +474,12 @@ def create_source_pool(lrc, rrc, repo, ref, filenames, cachedir,
Note that Git submodules are not considered 'sources' in the current
implementation, and so they must be handled separately.
- The 'lrc' and 'rrc' parameters specify the local and remote Git repository
- caches used for resolving the sources.
+ The 'repo_cache' parameter specifies a repo cache which is used when
+ accessing the source repos. If a git_resolve_cache_server is set for this
+ repo cache, and all repos in the build are known to it, then this function
+ will only need the definitions.git repo available locally. If not, then all
+ repos must be cloned in order to resolve the refs to tree SHA1s, which is
+ a slow process!
'''
pool = morphlib.sourcepool.SourcePool()
@@ -529,10 +505,10 @@ def create_source_pool(lrc, rrc, repo, ref, filenames, cachedir,
pool.add(source)
tree_cache_manager = PickleCacheManager(
- os.path.join(cachedir, tree_cache_filename), tree_cache_size)
+ os.path.join(repo_cache.cachedir, tree_cache_filename),
+ tree_cache_size)
- resolver = SourceResolver(lrc, rrc, tree_cache_manager, update_repos,
- status_cb)
+ resolver = SourceResolver(repo_cache, tree_cache_manager, status_cb)
resolver.traverse_morphs(repo, ref, filenames,
visit=add_to_pool,
definitions_original_ref=original_ref)