summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJannis Pohlmann <jannis.pohlmann@codethink.co.uk>2013-01-07 14:45:21 +0000
committerJannis Pohlmann <jannis.pohlmann@codethink.co.uk>2013-01-07 18:19:38 +0000
commit451e2ab7ea3d595476ec5e57f33da8eef1661773 (patch)
treeff213d807a5db38d2b1dbc7471fa148f54554b7a
parent24577c3e4a2b8e08ef0fcf23a4664bc38530ef2b (diff)
downloadmorph-jannispohlmann/resolve-refs-and-morphologies-in-batches.tar.gz
Resolve stratum and chunk refs in batchesjannispohlmann/resolve-refs-and-morphologies-in-batches
This uses the functionality introduced in the previous commit to reduce the number of HTTP requests made during builds and generally while creating source pools. I noticed a weird thing in resolve_ref() while I wrote the corresponding batch function resolve_refs(): unless I'm misreading the code the repository caching at the end of resolve_ref() clones repositories from the remote repo cache that already failed to resolve the needed ref remotely. How would they be useful after cloning? Anyway, resolve_refs() is implemented in the same way but I added a FIXME note.
-rwxr-xr-xmorphlib/app.py97
-rw-r--r--morphlib/remoterepocache.py8
2 files changed, 98 insertions, 7 deletions
diff --git a/morphlib/app.py b/morphlib/app.py
index 9239bf31..b194911e 100755
--- a/morphlib/app.py
+++ b/morphlib/app.py
@@ -280,6 +280,66 @@ class Morph(cliapp.Application):
absref, tree = repo.resolve_ref(ref)
return absref, tree
+ def resolve_refs(self, refs, lrc, rrc, update=True):
+ resolved = {}
+
+ # First resolve refs in all repositories that are already cached.
+ local_references = [x for x in refs if lrc.has_repo(x[0])]
+ for reponame, ref in local_references:
+ repo = lrc.get_repo(reponame)
+ if update:
+ self.status(msg='Updating cached git repository %(reponame)s',
+ reponame=reponame)
+ repo.update()
+ absref, tree = repo.resolve_ref(ref)
+ resolved[(reponame, ref)] = {
+ 'repo': reponame,
+ 'repo-url': repo.url,
+ 'ref': ref,
+ 'sha1': absref,
+ 'tree': tree
+ }
+
+ # Then, if we have a remote repo cache, resolve refs in all
+ # repositories that we haven't cached locally yet.
+ if rrc:
+ remote_references = [x for x in refs if not x in local_references]
+ if remote_references:
+ self.status(msg='Resolving %(count)i references via '
+ 'remote repository cache',
+ count=len(remote_references))
+ resolved_remote_refs = rrc.resolve_refs(remote_references)
+ for reponame, ref in remote_references:
+ for reference in resolved_remote_refs.keys():
+ del resolved_remote_refs[reference]
+ resolved.update(resolved_remote_refs)
+
+ # Lastly, attempt to cache repositories for any ref that has not
+ # been resolved successfully so far.
+ #
+ # FIXME Doesn't this only ever cache repositories from the remote
+ # repo cache that don't have the ref anyway? It is the same that
+ # the resolve_ref() method does though...
+ uncached_references = [x for x in refs if not x in resolved]
+ for reponame, ref in uncached_references:
+ if update:
+ self.status(msg='Caching git repository %(reponame)s',
+ reponame=reponame)
+ repo = lrc.cache_repo(reponame)
+ repo.update()
+ else:
+ repo = lrc.get_repo(reponame)
+ absref, tree = repo.resolve_ref(ref)
+ resolved[(reponame, ref)] = {
+ 'repo': reponame,
+ 'repo-url': repo.url,
+ 'ref': ref,
+ 'sha1': absref,
+ 'tree': tree
+ }
+
+ return resolved
+
def traverse_morphs(self, triplets, lrc, rrc, update=True,
visit=lambda rn, rf, fn, arf, m: None):
morph_factory = morphlib.morphologyfactory.MorphologyFactory(lrc, rrc,
@@ -289,6 +349,32 @@ class Morph(cliapp.Application):
resolved_refs = {}
resolved_morphologies = {}
+ def resolve_refs(morphology, *fields):
+ # Resolve the references used in morphology at once.
+ refs = []
+ for field in fields:
+ if field in morphology and morphology[field]:
+ refs.extend([(s['repo'], s['ref'])
+ for s in morphology[field]])
+ sha1s = self.resolve_refs(refs, lrc, rrc, update)
+
+ # Mark them all as resolved so they are not resolved twice.
+ for info in sha1s.itervalues():
+ if 'error' in info:
+ raise cliapp.AppException(
+ 'Failed to resolve reference "%s" '
+ 'in repository %s' % (info['ref'], info['repo']))
+ else:
+ reference = (info['repo'], info['ref'])
+ resolved_refs[reference] = (info['sha1'], info['tree'])
+
+ def load_morphology(reponame, absref, filename):
+ reference = (reponame, absref, filename)
+ if not reference in resolved_morphologies:
+ resolved_morphologies[reference] = \
+ morph_factory.get_morphology(*reference)
+ return resolved_morphologies[reference]
+
while queue:
reponame, ref, filename = queue.popleft()
update_repo = update and reponame not in updated_repos
@@ -303,17 +389,18 @@ class Morph(cliapp.Application):
updated_repos.add(reponame)
# Fetch the (repo, ref, filename) morphology, cache result.
- reference = (reponame, absref, filename)
- if not reference in resolved_morphologies:
- resolved_morphologies[reference] = \
- morph_factory.get_morphology(reponame, absref, filename)
- morphology = resolved_morphologies[reference]
+ morphology = load_morphology(reponame, absref, filename)
visit(reponame, ref, filename, absref, tree, morphology)
+
+ # Resolve the refs of all strata and/or chunks in the
+ # morphology at once.
if morphology['kind'] == 'system':
+ resolve_refs(morphology, 'strata')
queue.extend((s['repo'], s['ref'], '%s.morph' % s['morph'])
for s in morphology['strata'])
elif morphology['kind'] == 'stratum':
+ resolve_refs(morphology, 'build-depends', 'chunks')
if morphology['build-depends']:
queue.extend((s['repo'], s['ref'], '%s.morph' % s['morph'])
for s in morphology['build-depends'])
diff --git a/morphlib/remoterepocache.py b/morphlib/remoterepocache.py
index 4a735187..2d2d3f4d 100644
--- a/morphlib/remoterepocache.py
+++ b/morphlib/remoterepocache.py
@@ -94,7 +94,9 @@ class RemoteRepoCache(object):
request_data = []
for n in xrange(0, len(tuples)):
request_data.append({'repo': urls[n], 'ref': tuples[n][1]})
+ request_data = json.dumps(request_data)
response_data = self._make_post_request('sha1s', request_data)
+ response_data = json.loads(response_data)
data = {}
for n in xrange(0, len(tuples)):
data[tuples[n]] = {
@@ -102,7 +104,7 @@ class RemoteRepoCache(object):
'repo-url': response_data[n]['repo'],
'ref': response_data[n]['ref'],
}
- if 'error' in resonse_data[n]:
+ if 'error' in response_data[n]:
data[tuples[n]]['error'] = response_data[n]['error']
else:
data[tuples[n]]['sha1'] = response_data[n]['sha1']
@@ -130,5 +132,7 @@ class RemoteRepoCache(object):
if not server_url.endswith('/'):
server_url += '/'
url = urlparse.urljoin(server_url, '/1.0/%s' % path)
- handle = urllib2.urlopen(url, data)
+ request = urllib2.Request(
+ url, data, {'Content-Type': 'application/json'})
+ handle = urllib2.urlopen(request, data)
return handle.read()