From b0d4fbb4ef9abc01a1b2f23282dd70ff8bd66528 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Sun, 15 Jun 2014 15:17:50 +0100 Subject: Resolve refs using remote cache in batches, not individually There is significant overhead involved in making a HTTP request, so it's worth the extra code to batch these requests as much as possible. --- morphlib/app.py | 101 ++++++++++++++++++++++++-------------------- morphlib/remoterepocache.py | 12 ++++++ 2 files changed, 68 insertions(+), 45 deletions(-) diff --git a/morphlib/app.py b/morphlib/app.py index 43567360..33393478 100644 --- a/morphlib/app.py +++ b/morphlib/app.py @@ -300,55 +300,66 @@ class Morph(cliapp.Application): visit=add_to_pool) return pool - def resolve_ref(self, lrc, rrc, reponame, ref, update=True): - '''Resolves commit and tree sha1s of the ref in a repo and returns it. + def resolve_refs(self, lrc, rrc, update, updated_repos, resolved_refs, + references): + '''Find commit and tree SHA1s for a given set of refs. - If update is True then this has the side-effect of updating - or cloning the repository into the local repo cache. - ''' - absref = None + This may clone or update the repo in the local repository cache, if + 'update' is True and there is no remote repo cache configured. + ''' def is_floating_ref(ref): # This code actually detects if the ref is a valid SHA1. Is there a # better way to discover if a ref is a named ref or not? sha1_match = re.match('[A-Fa-f0-9]{40}', ref) return True if sha1_match is None else False - if lrc.has_repo(reponame): - repo = lrc.get_repo(reponame) - if is_floating_ref(ref) or not repo.ref_exists(ref): - if update: - self.status( - msg='Updating cached git repository %(reponame)s for ' - 'ref %(ref)s', reponame=reponame, ref=ref) - repo.update() - else: - # If the ref is a SHA1 that is not available locally, the - # user will receive an error from repo.resolve_ref(). If - # it's a named ref that is # available locally that is - # updated in the remote repo, they will not get the update. - pass - absref, tree = repo.resolve_ref(ref) - elif rrc is not None: - try: - absref, tree = rrc.resolve_ref(reponame, ref) - self.status(msg='Resolved %(reponame)s %(ref)s via remote ' - 'repo cache', - reponame=reponame, - ref=ref, - chatty=True) - except urllib2.URLError as e: - logging.warning('Caught (and ignored) exception: %s' % str(e)) - if absref is None: + to_read = {} + + for reponame, ref in references: + if lrc.has_repo(reponame): + repo = lrc.get_repo(reponame) + if is_floating_ref(ref) or not repo.ref_exists(ref): + if update and reponame not in updated_repos: + self.status( + msg='Updating cached git repository %(reponame)s ' + 'for ref %(ref)s', reponame=reponame, ref=ref) + repo.update() + updated_repos.add(reponame) + else: + # If the ref is a SHA1 that is not available locally, + # the user will receive an error from + # repo.resolve_ref(). If it's a named ref that is + # available locally that is updated in the remote repo, + # they will not get the update. + pass + absref, tree = repo.resolve_ref(ref) + resolved_refs[(reponame, ref)] = (absref, tree) + elif rrc is not None: + repourl = rrc._resolver.pull_url(reponame) + to_read[(repourl, ref)] = (reponame, ref) + + if rrc is not None and len(to_read) > 0: + self.status(msg='Resolving %i refs from remote repo cache' % + len(to_read)) + result = rrc.resolve_ref_batch(to_read.keys()) + for item in result: + reponame, ref = to_read[(item['repo'], item['ref'])] + if 'error' in item: + logging.debug('Remote cache: %s', item) + raise morphlib.remoterepocache.ResolveRefError( + reponame, ref) + resolved_refs[(reponame, ref)] = (item['sha1'], item['tree']) + elif rrc is None: if update: - self.status(msg='Caching git repository %(reponame)s', - reponame=reponame) + self.status(msg='Caching git repository %(reponame)s for ref ' + '%(ref)s', reponame=reponame, ref=ref) repo = lrc.cache_repo(reponame) repo.update() else: - repo = lrc.get_repo(reponame) + raise morphlib.localrepocache.NotCached(reponame) absref, tree = repo.resolve_ref(ref) - return absref, tree + resolved_refs[(reponame, ref)] = (absref, tree) def traverse_morphs(self, triplets, lrc, rrc, update=True, visit=lambda rn, rf, fn, arf, m: None): @@ -360,22 +371,18 @@ class Morph(cliapp.Application): resolved_morphologies = {} def fetch_morphologies(triplets): - morph_factory.get_morphologies(resolved_refs, resolved_morphologies, triplets) + morph_factory.get_morphologies(resolved_refs, + resolved_morphologies, triplets) while queue: + to_resolve = set() to_fetch = set() while queue: reponame, ref, filename = queue.popleft() - update_repo = update and reponame not in updated_repos - # Resolve the (repo, ref) reference, cache result. reference = (reponame, ref) - if not reference in resolved_refs: - resolved_refs[reference] = self.resolve_ref( - lrc, rrc, reponame, ref, update_repo) - absref, tree = resolved_refs[reference] - - updated_repos.add(reponame) + if reference not in resolved_refs: + to_resolve.add(reference) #print 'resolved: %s %s %s' % ((reponame, ref, filename)) triplet = (reponame, ref, filename) @@ -383,6 +390,10 @@ class Morph(cliapp.Application): to_fetch.add(triplet) #print 'to_fetch: %s' % to_fetch + if len(to_resolve) > 0: + self.resolve_refs(lrc, rrc, update, updated_repos, + resolved_refs, to_resolve) + to_visit = to_fetch if len(to_fetch) > 0: fetch_morphologies(to_fetch) diff --git a/morphlib/remoterepocache.py b/morphlib/remoterepocache.py index f790862e..afef4f99 100644 --- a/morphlib/remoterepocache.py +++ b/morphlib/remoterepocache.py @@ -59,6 +59,18 @@ class RemoteRepoCache(object): logging.error('Caught exception: %s' % str(e)) raise ResolveRefError(repo_name, ref) + def resolve_ref_batch(self, references): + if len(references) == 0: + return + request = [] + for repo_name, ref in references: + repo_url = self._resolver.pull_url(repo_name) + request.append( + dict(repo=repo_url, ref=ref)) + result = self._make_request( + 'sha1s', json_post_data=json.dumps(request)) + return json.loads(result) + def cat_file(self, repo_name, ref, filename): repo_url = self._resolver.pull_url(repo_name) try: -- cgit v1.2.1