summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Thursfield <sam@afuera.me.uk>2014-06-14 18:53:30 +0100
committerSam Thursfield <sam@afuera.me.uk>2014-08-29 15:15:57 +0000
commit4f73763ad1ff0bcb109f685afafcc6d1dffcfb0e (patch)
tree3b2928ee966eb3619e281c694436b8db6977efdc
parent0a8b830cf19c1b0c290c73a0100f15620a7e6c48 (diff)
downloadmorph-4f73763ad1ff0bcb109f685afafcc6d1dffcfb0e.tar.gz
Read remote morphologies in batches rather than one at a time
This should be a useful performance boost.
-rw-r--r--morphlib/app.py79
-rw-r--r--morphlib/morphologyfactory.py70
-rw-r--r--morphlib/remoterepocache.py23
3 files changed, 134 insertions, 38 deletions
diff --git a/morphlib/app.py b/morphlib/app.py
index c7fe237d..19f61de9 100644
--- a/morphlib/app.py
+++ b/morphlib/app.py
@@ -359,43 +359,52 @@ class Morph(cliapp.Application):
resolved_refs = {}
resolved_morphologies = {}
+ def fetch_morphologies(triplets):
+ morph_factory.get_morphologies(resolved_refs, resolved_morphologies, triplets)
+
while queue:
- reponame, ref, filename = queue.popleft()
- update_repo = update and reponame not in updated_repos
-
- # Resolve the (repo, ref) reference, cache result.
- reference = (reponame, ref)
- if not reference in resolved_refs:
- resolved_refs[reference] = self.resolve_ref(
- lrc, rrc, reponame, ref, update_repo)
- absref, tree = resolved_refs[reference]
-
- updated_repos.add(reponame)
-
- # Fetch the (repo, ref, filename) morphology, cache result.
- reference = (reponame, absref, filename)
- if not reference in resolved_morphologies:
- resolved_morphologies[reference] = \
- morph_factory.get_morphology(reponame, absref, filename)
- morphology = resolved_morphologies[reference]
-
- visit(reponame, ref, filename, absref, tree, morphology)
- if morphology['kind'] == 'cluster':
- raise cliapp.AppException(
- "Cannot build a morphology of type 'cluster'.")
- elif morphology['kind'] == 'system':
- queue.extend((s.get('repo') or reponame,
- s.get('ref') or ref,
- '%s.morph' % s['morph'])
- for s in morphology['strata'])
- elif morphology['kind'] == 'stratum':
- if morphology['build-depends']:
+ to_fetch = set()
+ while queue:
+ reponame, ref, filename = queue.popleft()
+ update_repo = update and reponame not in updated_repos
+
+ # Resolve the (repo, ref) reference, cache result.
+ reference = (reponame, ref)
+ if not reference in resolved_refs:
+ resolved_refs[reference] = self.resolve_ref(
+ lrc, rrc, reponame, ref, update_repo)
+ absref, tree = resolved_refs[reference]
+
+ updated_repos.add(reponame)
+ print 'resolved: %s %s %s' % ((reponame, ref, filename))
+ to_fetch.add((reponame, ref, filename))
+ print 'to_fetch: %s' % to_fetch
+
+ to_visit = to_fetch
+ if len(to_fetch) > 0:
+ fetch_morphologies(to_fetch)
+
+ while to_visit:
+ reponame, ref, filename = to_visit.pop()
+ absref, tree = resolved_refs[(reponame, ref)]
+ morphology = resolved_morphologies[(reponame, ref, filename)]
+ visit(reponame, ref, filename, absref, tree, morphology)
+ if morphology['kind'] == 'cluster':
+ raise cliapp.AppException(
+ "Cannot build a morphology of type 'cluster'.")
+ elif morphology['kind'] == 'system':
queue.extend((s.get('repo') or reponame,
- s.get('ref') or ref,
- '%s.morph' % s['morph'])
- for s in morphology['build-depends'])
- queue.extend((c['repo'], c['ref'], '%s.morph' % c['morph'])
- for c in morphology['chunks'])
+ s.get('ref') or ref,
+ '%s.morph' % s['morph'])
+ for s in morphology['strata'])
+ elif morphology['kind'] == 'stratum':
+ if morphology['build-depends']:
+ queue.extend((s.get('repo') or reponame,
+ s.get('ref') or ref,
+ '%s.morph' % s['morph'])
+ for s in morphology['build-depends'])
+ queue.extend((c['repo'], c['ref'], '%s.morph' % c['morph'])
+ for c in morphology['chunks'])
def cache_repo_and_submodules(self, cache, url, ref, done):
subs_to_process = set()
diff --git a/morphlib/morphologyfactory.py b/morphlib/morphologyfactory.py
index 1de42d57..cebc382f 100644
--- a/morphlib/morphologyfactory.py
+++ b/morphlib/morphologyfactory.py
@@ -14,6 +14,9 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+import base64
+import logging
+
import morphlib
import cliapp
@@ -141,6 +144,73 @@ class MorphologyFactory(object):
text)
return morphology
+ def get_morphologies(self, resolved_refs, resolved_morphologies, triplets):
+ '''Fetch morphologies that are not already resolved.
+
+ The resolved_morphologies dict is updated with the new morphologies.
+
+ All refs in the list of triplets are assumed to be stored in the
+ resolved_refs dict already.
+
+ '''
+ text_dict = {}
+ file_list_dict = {}
+ to_read = dict()
+ to_autodetect = set()
+ for triplet in triplets:
+ if triplet in resolved_morphologies:
+ continue
+
+ reponame, ref, filename = triplet
+ absref, tree = resolved_refs[(reponame, ref)]
+
+ if self._lrc.has_repo(reponame):
+ text = self._read_local_repo(
+ reponame, absref, filename)
+ text_dict[(reponame, ref, filename)] = text
+ if self._rrc is None:
+ raise NotcachedError(reponame)
+ else:
+ repourl = self._rrc._resolver.pull_url(reponame)
+ remote_triplet = (repourl, absref, filename)
+ #print 'to_read[%s,%s,%s] set' % remote_triplet
+ to_read[remote_triplet] = triplet
+
+ if len(to_read) > 0:
+ self.status(msg='Fetching %i morphologies from remote repo cache' %
+ len(to_read))
+ result = self._rrc.cat_file_multiple(to_read.keys())
+ for item in result:
+ remote_triplet = (item['repo'], item['ref'], item['filename'])
+ triplet = to_read[remote_triplet]
+ if 'data' in item:
+ text = base64.decodestring(item['data'])
+ text_dict[triplet] = text
+ else:
+ logging.debug('Remote cache: %s', item)
+ to_autodetect.add(triplet)
+
+ for triplet in to_autodetect:
+ reponame, ref, filename = triplet
+ self.status(msg='Fetching file list for %s from remote repo cache'
+ % reponame, chatty=True)
+ absref, tree = resolved_refs[(reponame, ref)]
+ file_list = self._rrc.ls_tree(reponame, absref)
+ assert file_list is not None
+ file_list_dict[triplet] = file_list
+
+ for triplet in triplets:
+ if triplet in resolved_morphologies:
+ continue
+ reponame, ref, filename = triplet
+ absref, tree = resolved_refs[(reponame, ref)]
+ file_list = file_list_dict.get(triplet, [])
+ text = text_dict.get(triplet, None)
+ assert text or file_list
+ morphology = self._parse_or_generate_morphology(
+ reponame, absref, filename, file_list, text)
+ resolved_morphologies[triplet] = morphology
+
def _check_and_tweak_system(self, morphology, reponame, sha1, filename):
'''Check and tweak a system morphology.'''
diff --git a/morphlib/remoterepocache.py b/morphlib/remoterepocache.py
index b1544b03..f790862e 100644
--- a/morphlib/remoterepocache.py
+++ b/morphlib/remoterepocache.py
@@ -55,7 +55,7 @@ class RemoteRepoCache(object):
repo_url = self._resolver.pull_url(repo_name)
try:
return self._resolve_ref_for_repo_url(repo_url, ref)
- except BaseException, e:
+ except urllib2.URLError, e:
logging.error('Caught exception: %s' % str(e))
raise ResolveRefError(repo_name, ref)
@@ -76,6 +76,18 @@ class RemoteRepoCache(object):
logging.error('Caught exception: %s' % str(e))
raise LsTreeError(repo_name, ref)
+ def cat_file_multiple(self, triplets):
+ if len(triplets) == 0:
+ return
+ request = []
+ for repo_name, ref, filename in triplets:
+ repo_url = self._resolver.pull_url(repo_name)
+ request.append(
+ dict(repo=repo_url, ref=ref, filename=filename))
+ result = self._make_request(
+ 'files', json_post_data=json.dumps(request))
+ return json.loads(result)
+
def _resolve_ref_for_repo_url(self, repo_url, ref): # pragma: no cover
data = self._make_request(
'sha1s?repo=%s&ref=%s' % self._quote_strings(repo_url, ref))
@@ -95,10 +107,15 @@ class RemoteRepoCache(object):
def _quote_strings(self, *args): # pragma: no cover
return tuple(urllib.quote(string) for string in args)
- def _make_request(self, path): # pragma: no cover
+ def _make_request(self, path, json_post_data=None): # pragma: no cover
server_url = self.server_url
if not server_url.endswith('/'):
server_url += '/'
url = urlparse.urljoin(server_url, '/1.0/%s' % path)
- handle = urllib2.urlopen(url)
+ if json_post_data is None:
+ headers = {}
+ else:
+ headers = {'Content-type': 'application/json'}
+ request = urllib2.Request(url, data=json_post_data, headers=headers)
+ handle = urllib2.urlopen(request)
return handle.read()