diff options
author | Jannis Pohlmann <jannis.pohlmann@codethink.co.uk> | 2013-01-07 14:45:21 +0000 |
---|---|---|
committer | Jannis Pohlmann <jannis.pohlmann@codethink.co.uk> | 2013-01-07 18:19:38 +0000 |
commit | 451e2ab7ea3d595476ec5e57f33da8eef1661773 (patch) | |
tree | ff213d807a5db38d2b1dbc7471fa148f54554b7a | |
parent | 24577c3e4a2b8e08ef0fcf23a4664bc38530ef2b (diff) | |
download | morph-jannispohlmann/resolve-refs-and-morphologies-in-batches.tar.gz |
Resolve stratum and chunk refs in batchesjannispohlmann/resolve-refs-and-morphologies-in-batches
This uses the functionality introduced in the previous commit to
reduce the number of HTTP requests made during builds and generally
while creating source pools.
I noticed a weird thing in resolve_ref() while I wrote the
corresponding batch function resolve_refs(): unless I'm misreading
the code the repository caching at the end of resolve_ref() clones
repositories from the remote repo cache that already failed to
resolve the needed ref remotely. How would they be useful after
cloning? Anyway, resolve_refs() is implemented in the same way
but I added a FIXME note.
-rwxr-xr-x | morphlib/app.py | 97 | ||||
-rw-r--r-- | morphlib/remoterepocache.py | 8 |
2 files changed, 98 insertions, 7 deletions
diff --git a/morphlib/app.py b/morphlib/app.py index 9239bf31..b194911e 100755 --- a/morphlib/app.py +++ b/morphlib/app.py @@ -280,6 +280,66 @@ class Morph(cliapp.Application): absref, tree = repo.resolve_ref(ref) return absref, tree + def resolve_refs(self, refs, lrc, rrc, update=True): + resolved = {} + + # First resolve refs in all repositories that are already cached. + local_references = [x for x in refs if lrc.has_repo(x[0])] + for reponame, ref in local_references: + repo = lrc.get_repo(reponame) + if update: + self.status(msg='Updating cached git repository %(reponame)s', + reponame=reponame) + repo.update() + absref, tree = repo.resolve_ref(ref) + resolved[(reponame, ref)] = { + 'repo': reponame, + 'repo-url': repo.url, + 'ref': ref, + 'sha1': absref, + 'tree': tree + } + + # Then, if we have a remote repo cache, resolve refs in all + # repositories that we haven't cached locally yet. + if rrc: + remote_references = [x for x in refs if not x in local_references] + if remote_references: + self.status(msg='Resolving %(count)i references via ' + 'remote repository cache', + count=len(remote_references)) + resolved_remote_refs = rrc.resolve_refs(remote_references) + for reponame, ref in remote_references: + for reference in resolved_remote_refs.keys(): + del resolved_remote_refs[reference] + resolved.update(resolved_remote_refs) + + # Lastly, attempt to cache repositories for any ref that has not + # been resolved successfully so far. + # + # FIXME Doesn't this only ever cache repositories from the remote + # repo cache that don't have the ref anyway? It is the same that + # the resolve_ref() method does though... + uncached_references = [x for x in refs if not x in resolved] + for reponame, ref in uncached_references: + if update: + self.status(msg='Caching git repository %(reponame)s', + reponame=reponame) + repo = lrc.cache_repo(reponame) + repo.update() + else: + repo = lrc.get_repo(reponame) + absref, tree = repo.resolve_ref(ref) + resolved[(reponame, ref)] = { + 'repo': reponame, + 'repo-url': repo.url, + 'ref': ref, + 'sha1': absref, + 'tree': tree + } + + return resolved + def traverse_morphs(self, triplets, lrc, rrc, update=True, visit=lambda rn, rf, fn, arf, m: None): morph_factory = morphlib.morphologyfactory.MorphologyFactory(lrc, rrc, @@ -289,6 +349,32 @@ class Morph(cliapp.Application): resolved_refs = {} resolved_morphologies = {} + def resolve_refs(morphology, *fields): + # Resolve the references used in morphology at once. + refs = [] + for field in fields: + if field in morphology and morphology[field]: + refs.extend([(s['repo'], s['ref']) + for s in morphology[field]]) + sha1s = self.resolve_refs(refs, lrc, rrc, update) + + # Mark them all as resolved so they are not resolved twice. + for info in sha1s.itervalues(): + if 'error' in info: + raise cliapp.AppException( + 'Failed to resolve reference "%s" ' + 'in repository %s' % (info['ref'], info['repo'])) + else: + reference = (info['repo'], info['ref']) + resolved_refs[reference] = (info['sha1'], info['tree']) + + def load_morphology(reponame, absref, filename): + reference = (reponame, absref, filename) + if not reference in resolved_morphologies: + resolved_morphologies[reference] = \ + morph_factory.get_morphology(*reference) + return resolved_morphologies[reference] + while queue: reponame, ref, filename = queue.popleft() update_repo = update and reponame not in updated_repos @@ -303,17 +389,18 @@ class Morph(cliapp.Application): updated_repos.add(reponame) # Fetch the (repo, ref, filename) morphology, cache result. - reference = (reponame, absref, filename) - if not reference in resolved_morphologies: - resolved_morphologies[reference] = \ - morph_factory.get_morphology(reponame, absref, filename) - morphology = resolved_morphologies[reference] + morphology = load_morphology(reponame, absref, filename) visit(reponame, ref, filename, absref, tree, morphology) + + # Resolve the refs of all strata and/or chunks in the + # morphology at once. if morphology['kind'] == 'system': + resolve_refs(morphology, 'strata') queue.extend((s['repo'], s['ref'], '%s.morph' % s['morph']) for s in morphology['strata']) elif morphology['kind'] == 'stratum': + resolve_refs(morphology, 'build-depends', 'chunks') if morphology['build-depends']: queue.extend((s['repo'], s['ref'], '%s.morph' % s['morph']) for s in morphology['build-depends']) diff --git a/morphlib/remoterepocache.py b/morphlib/remoterepocache.py index 4a735187..2d2d3f4d 100644 --- a/morphlib/remoterepocache.py +++ b/morphlib/remoterepocache.py @@ -94,7 +94,9 @@ class RemoteRepoCache(object): request_data = [] for n in xrange(0, len(tuples)): request_data.append({'repo': urls[n], 'ref': tuples[n][1]}) + request_data = json.dumps(request_data) response_data = self._make_post_request('sha1s', request_data) + response_data = json.loads(response_data) data = {} for n in xrange(0, len(tuples)): data[tuples[n]] = { @@ -102,7 +104,7 @@ class RemoteRepoCache(object): 'repo-url': response_data[n]['repo'], 'ref': response_data[n]['ref'], } - if 'error' in resonse_data[n]: + if 'error' in response_data[n]: data[tuples[n]]['error'] = response_data[n]['error'] else: data[tuples[n]]['sha1'] = response_data[n]['sha1'] @@ -130,5 +132,7 @@ class RemoteRepoCache(object): if not server_url.endswith('/'): server_url += '/' url = urlparse.urljoin(server_url, '/1.0/%s' % path) - handle = urllib2.urlopen(url, data) + request = urllib2.Request( + url, data, {'Content-Type': 'application/json'}) + handle = urllib2.urlopen(request, data) return handle.read() |