From 451e2ab7ea3d595476ec5e57f33da8eef1661773 Mon Sep 17 00:00:00 2001 From: Jannis Pohlmann Date: Mon, 7 Jan 2013 14:45:21 +0000 Subject: Resolve stratum and chunk refs in batches This uses the functionality introduced in the previous commit to reduce the number of HTTP requests made during builds and generally while creating source pools. I noticed a weird thing in resolve_ref() while I wrote the corresponding batch function resolve_refs(): unless I'm misreading the code the repository caching at the end of resolve_ref() clones repositories from the remote repo cache that already failed to resolve the needed ref remotely. How would they be useful after cloning? Anyway, resolve_refs() is implemented in the same way but I added a FIXME note. --- morphlib/app.py | 97 ++++++++++++++++++++++++++++++++++++++++++--- morphlib/remoterepocache.py | 8 +++- 2 files changed, 98 insertions(+), 7 deletions(-) diff --git a/morphlib/app.py b/morphlib/app.py index 9239bf31..b194911e 100755 --- a/morphlib/app.py +++ b/morphlib/app.py @@ -280,6 +280,66 @@ class Morph(cliapp.Application): absref, tree = repo.resolve_ref(ref) return absref, tree + def resolve_refs(self, refs, lrc, rrc, update=True): + resolved = {} + + # First resolve refs in all repositories that are already cached. + local_references = [x for x in refs if lrc.has_repo(x[0])] + for reponame, ref in local_references: + repo = lrc.get_repo(reponame) + if update: + self.status(msg='Updating cached git repository %(reponame)s', + reponame=reponame) + repo.update() + absref, tree = repo.resolve_ref(ref) + resolved[(reponame, ref)] = { + 'repo': reponame, + 'repo-url': repo.url, + 'ref': ref, + 'sha1': absref, + 'tree': tree + } + + # Then, if we have a remote repo cache, resolve refs in all + # repositories that we haven't cached locally yet. + if rrc: + remote_references = [x for x in refs if not x in local_references] + if remote_references: + self.status(msg='Resolving %(count)i references via ' + 'remote repository cache', + count=len(remote_references)) + resolved_remote_refs = rrc.resolve_refs(remote_references) + for reponame, ref in remote_references: + for reference in resolved_remote_refs.keys(): + del resolved_remote_refs[reference] + resolved.update(resolved_remote_refs) + + # Lastly, attempt to cache repositories for any ref that has not + # been resolved successfully so far. + # + # FIXME Doesn't this only ever cache repositories from the remote + # repo cache that don't have the ref anyway? It is the same that + # the resolve_ref() method does though... + uncached_references = [x for x in refs if not x in resolved] + for reponame, ref in uncached_references: + if update: + self.status(msg='Caching git repository %(reponame)s', + reponame=reponame) + repo = lrc.cache_repo(reponame) + repo.update() + else: + repo = lrc.get_repo(reponame) + absref, tree = repo.resolve_ref(ref) + resolved[(reponame, ref)] = { + 'repo': reponame, + 'repo-url': repo.url, + 'ref': ref, + 'sha1': absref, + 'tree': tree + } + + return resolved + def traverse_morphs(self, triplets, lrc, rrc, update=True, visit=lambda rn, rf, fn, arf, m: None): morph_factory = morphlib.morphologyfactory.MorphologyFactory(lrc, rrc, @@ -289,6 +349,32 @@ class Morph(cliapp.Application): resolved_refs = {} resolved_morphologies = {} + def resolve_refs(morphology, *fields): + # Resolve the references used in morphology at once. + refs = [] + for field in fields: + if field in morphology and morphology[field]: + refs.extend([(s['repo'], s['ref']) + for s in morphology[field]]) + sha1s = self.resolve_refs(refs, lrc, rrc, update) + + # Mark them all as resolved so they are not resolved twice. + for info in sha1s.itervalues(): + if 'error' in info: + raise cliapp.AppException( + 'Failed to resolve reference "%s" ' + 'in repository %s' % (info['ref'], info['repo'])) + else: + reference = (info['repo'], info['ref']) + resolved_refs[reference] = (info['sha1'], info['tree']) + + def load_morphology(reponame, absref, filename): + reference = (reponame, absref, filename) + if not reference in resolved_morphologies: + resolved_morphologies[reference] = \ + morph_factory.get_morphology(*reference) + return resolved_morphologies[reference] + while queue: reponame, ref, filename = queue.popleft() update_repo = update and reponame not in updated_repos @@ -303,17 +389,18 @@ class Morph(cliapp.Application): updated_repos.add(reponame) # Fetch the (repo, ref, filename) morphology, cache result. - reference = (reponame, absref, filename) - if not reference in resolved_morphologies: - resolved_morphologies[reference] = \ - morph_factory.get_morphology(reponame, absref, filename) - morphology = resolved_morphologies[reference] + morphology = load_morphology(reponame, absref, filename) visit(reponame, ref, filename, absref, tree, morphology) + + # Resolve the refs of all strata and/or chunks in the + # morphology at once. if morphology['kind'] == 'system': + resolve_refs(morphology, 'strata') queue.extend((s['repo'], s['ref'], '%s.morph' % s['morph']) for s in morphology['strata']) elif morphology['kind'] == 'stratum': + resolve_refs(morphology, 'build-depends', 'chunks') if morphology['build-depends']: queue.extend((s['repo'], s['ref'], '%s.morph' % s['morph']) for s in morphology['build-depends']) diff --git a/morphlib/remoterepocache.py b/morphlib/remoterepocache.py index 4a735187..2d2d3f4d 100644 --- a/morphlib/remoterepocache.py +++ b/morphlib/remoterepocache.py @@ -94,7 +94,9 @@ class RemoteRepoCache(object): request_data = [] for n in xrange(0, len(tuples)): request_data.append({'repo': urls[n], 'ref': tuples[n][1]}) + request_data = json.dumps(request_data) response_data = self._make_post_request('sha1s', request_data) + response_data = json.loads(response_data) data = {} for n in xrange(0, len(tuples)): data[tuples[n]] = { @@ -102,7 +104,7 @@ class RemoteRepoCache(object): 'repo-url': response_data[n]['repo'], 'ref': response_data[n]['ref'], } - if 'error' in resonse_data[n]: + if 'error' in response_data[n]: data[tuples[n]]['error'] = response_data[n]['error'] else: data[tuples[n]]['sha1'] = response_data[n]['sha1'] @@ -130,5 +132,7 @@ class RemoteRepoCache(object): if not server_url.endswith('/'): server_url += '/' url = urlparse.urljoin(server_url, '/1.0/%s' % path) - handle = urllib2.urlopen(url, data) + request = urllib2.Request( + url, data, {'Content-Type': 'application/json'}) + handle = urllib2.urlopen(request, data) return handle.read() -- cgit v1.2.1