From 0855c357e74e6dd7aa7921d863e6144b89fc662b Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Tue, 10 Feb 2015 17:06:59 +0000 Subject: Only update Git submodules in cache when necessary This saves a duplicate `git remote update origin` that was being run as part of each chunk build. For any repos that have submodules, it also avoids updating repos if the SHA1 we need to build is already present locally. As well as speeding up builds slightly, this means Morph can now build without being connected to a network, as long as the local Git cache all of the necessary repos and commits in the build, without needing the '--no-git-update' option. The code is also now in a more logical place than before. --- morphlib/app.py | 20 ------------ morphlib/buildcommand.py | 35 ++------------------- morphlib/localrepocache.py | 78 +++++++++++++++++++++++++++++++++++++--------- 3 files changed, 66 insertions(+), 67 deletions(-) diff --git a/morphlib/app.py b/morphlib/app.py index 0c87f814..b8bae850 100644 --- a/morphlib/app.py +++ b/morphlib/app.py @@ -297,26 +297,6 @@ class Morph(cliapp.Application): morphlib.util.sanitise_morphology_path(args[2])) args = args[3:] - def cache_repo_and_submodules(self, cache, url, ref, done): - subs_to_process = set() - subs_to_process.add((url, ref)) - while subs_to_process: - url, ref = subs_to_process.pop() - done.add((url, ref)) - cached_repo = cache.cache_repo(url) - cached_repo.update() - - try: - submodules = morphlib.git.Submodules(self, cached_repo.path, - ref) - submodules.load() - except morphlib.git.NoModulesFileError: - pass - else: - for submod in submodules: - if (submod.url, submod.commit) not in done: - subs_to_process.add((submod.url, submod.commit)) - def _write_status(self, text): timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) self.output.write('%s %s\n' % (timestamp, text)) diff --git a/morphlib/buildcommand.py b/morphlib/buildcommand.py index a22e689b..6959c455 100644 --- a/morphlib/buildcommand.py +++ b/morphlib/buildcommand.py @@ -386,39 +386,8 @@ class BuildCommand(object): '''Update the local git repository cache with the sources.''' repo_name = source.repo_name - if self.app.settings['no-git-update']: - self.app.status(msg='Not updating existing git repository ' - '%(repo_name)s ' - 'because of no-git-update being set', - chatty=True, - repo_name=repo_name) - source.repo = self.lrc.get_repo(repo_name) - return - - if self.lrc.has_repo(repo_name): - source.repo = self.lrc.get_repo(repo_name) - try: - sha1 = source.sha1 - source.repo.resolve_ref_to_commit(sha1) - self.app.status(msg='Not updating git repository ' - '%(repo_name)s because it ' - 'already contains sha1 %(sha1)s', - chatty=True, repo_name=repo_name, - sha1=sha1) - except morphlib.gitdir.InvalidRefError: - self.app.status(msg='Updating %(repo_name)s', - repo_name=repo_name) - source.repo.update() - else: - self.app.status(msg='Cloning %(repo_name)s', - repo_name=repo_name) - source.repo = self.lrc.cache_repo(repo_name) - - # Update submodules. - done = set() - self.app.cache_repo_and_submodules( - self.lrc, source.repo.url, - source.sha1, done) + source.repo = self.lrc.get_updated_repo(repo_name, ref=source.sha1) + self.lrc.ensure_submodules(source.repo, source.sha1) def cache_artifacts_locally(self, artifacts): '''Get artifacts missing from local cache from remote cache.''' diff --git a/morphlib/localrepocache.py b/morphlib/localrepocache.py index 9bccb20b..ac59f5c8 100644 --- a/morphlib/localrepocache.py +++ b/morphlib/localrepocache.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2014 Codethink Limited +# Copyright (C) 2012-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,10 +14,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -import logging import os -import re -import urllib2 import urlparse import string import sys @@ -244,15 +241,68 @@ class LocalRepoCache(object): return repo raise NotCached(reponame) - def get_updated_repo(self, reponame): # pragma: no cover - '''Return object representing cached repository, which is updated.''' + def get_updated_repo(self, repo_name, ref=None): # pragma: no cover + '''Return object representing cached repository. - if not self._app.settings['no-git-update']: - cached_repo = self.cache_repo(reponame) - self._app.status( - msg='Updating git repository %s in cache' % reponame) - cached_repo.update() - else: - cached_repo = self.get_repo(reponame) - return cached_repo + If 'ref' is None, the repo will be updated unless + app.settings['no-git-update'] is set. + + If 'ref' is set to a SHA1, the repo will only be updated if 'ref' isn't + already available locally. + ''' + + if self._app.settings['no-git-update']: + self._app.status(msg='Not updating existing git repository ' + '%(repo_name)s ' + 'because of no-git-update being set', + chatty=True, + repo_name=repo_name) + return self.get_repo(repo_name) + + if self.has_repo(repo_name): + repo = self.get_repo(repo_name) + if ref and morphlib.git.is_valid_sha1(ref): + try: + repo.resolve_ref_to_commit(ref) + self._app.status(msg='Not updating git repository ' + '%(repo_name)s because it ' + 'already contains sha1 %(sha1)s', + chatty=True, repo_name=repo_name, + sha1=ref) + return repo + except morphlib.gitdir.InvalidRefError: + pass + + self._app.status(msg='Updating %(repo_name)s', + repo_name=repo_name) + repo.update() + return repo + else: + self._app.status(msg='Cloning %(repo_name)s', + repo_name=repo_name) + return self.cache_repo(repo_name) + + def ensure_submodules(self, toplevel_repo, + toplevel_ref): # pragma: no cover + '''Ensure any submodules of a given repo are cached and up to date.''' + + def submodules_for_repo(repo_path, ref): + try: + submodules = morphlib.git.Submodules(self._app, repo_path, ref) + submodules.load() + return [(submod.url, submod.commit) for submod in submodules] + except morphlib.git.NoModulesFileError: + return [] + + done = set() + subs_to_process = submodules_for_repo(toplevel_repo.path, toplevel_ref) + while subs_to_process: + url, ref = subs_to_process.pop() + done.add((url, ref)) + + cached_repo = self.get_updated_repo(url, ref=ref) + + for submod in submodules_for_repo(cached_repo.path, ref): + if (submod.url, submod.commit) not in done: + subs_to_process.add((submod.url, submod.commit)) -- cgit v1.2.1