summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Firth <dan.firth@codethink.co.uk>2016-12-02 16:18:04 +0000
committerDaniel Firth <dan.firth@codethink.co.uk>2016-12-02 16:18:04 +0000
commite3b9e3d0b0d2145ed7c55aa98ac81a8e1b2fedde (patch)
treefc0cdd665df7719917feb82230ac22479aa3e198
parentce7f83c35e4fe4124db90a3d6b5a3cf56e955024 (diff)
parent9f59a7e59b9537a33cbc066f51c2ccf058568fe5 (diff)
downloadybd-e3b9e3d0b0d2145ed7c55aa98ac81a8e1b2fedde.tar.gz
Merge branch 'lc/recursive_submodules_backport' into lc/staging/futurelc/staging/future
-rw-r--r--ybd/repos.py286
1 files changed, 125 insertions, 161 deletions
diff --git a/ybd/repos.py b/ybd/repos.py
index c52adcc..8e16fc1 100644
--- a/ybd/repos.py
+++ b/ybd/repos.py
@@ -37,20 +37,18 @@ else:
def get_repo_url(repo):
- if repo:
- for alias, url in app.config.get('aliases', {}).items():
- repo = repo.replace(alias, url)
- if repo[:4] == "http" and not repo.endswith('.git'):
- repo = repo + '.git'
+ '''Takes a repository url and returns a normalized version with the
+ aliases replaced with their values.'''
+ for alias, url in app.config['aliases'].items():
+ repo = repo.replace(alias, url)
+ if repo.startswith("http") and not repo.endswith('.git'):
+ repo = repo + '.git'
return repo
def get_repo_name(repo):
- ''' Convert URIs to strings that only contain digits, letters, _ and %.
-
- NOTE: this naming scheme is based on what lorry uses
-
- '''
+ '''Takes a repository url and returns a string suitable to use as a
+ directory name for storing on a filesystem'''
def transl(x):
return x if x in valid_chars else '_'
@@ -61,6 +59,17 @@ def get_repo_name(repo):
return ''.join([transl(x) for x in url])
+def get_transport_info(repo):
+ '''Helper function to generate a dictionary of three useful values from
+ a given repository url: the getd url, the normalized name, and
+ the actual location of the repository in the gits directory for this
+ GitMachine.'''
+ i = {'url': get_repo_url(repo),
+ 'name': get_repo_name(repo)}
+ i.update({'dir': os.path.join(app.config['gits'], i['name'])})
+ return i
+
+
def get_version(gitdir, ref='HEAD'):
try:
with app.chdir(gitdir), open(os.devnull, "w") as fnull:
@@ -104,15 +113,9 @@ def get_tree(dn):
if app.config.get('tree-server'):
app.log(dn, 'WARNING: no tree from tree-server for', ref)
- mirror(dn['name'], dn['repo'])
+ mirror(dn['repo'])
with app.chdir(gitdir), open(os.devnull, "w") as fnull:
- if call(['git', 'rev-parse', ref + '^{object}'], stdout=fnull,
- stderr=fnull):
- # can't resolve ref. is it upstream?
- app.log(dn, 'Fetching from upstream to resolve %s' % ref)
- update_mirror(dn['name'], dn['repo'], gitdir)
-
try:
tree = check_output(['git', 'rev-parse', ref + '^{tree}'],
universal_newlines=True)[0:-1]
@@ -124,171 +127,132 @@ def get_tree(dn):
app.log(dn, 'No tree for ref', (ref, gitdir), exit=True)
-def mirror(name, repo):
- tempfile.tempdir = app.config['tmp']
- tmpdir = tempfile.mkdtemp()
- repo_url = get_repo_url(repo)
- try:
- tar_file = get_repo_name(repo_url) + '.tar'
- app.log(name, 'Try fetching tarball %s' % tar_file)
- # try tarball first
- with app.chdir(tmpdir), open(os.devnull, "w") as fnull:
- call(['wget', os.path.join(app.config['tar-url'], tar_file)],
- stdout=fnull, stderr=fnull)
- call(['tar', 'xf', tar_file], stderr=fnull)
- call(['git', 'config', 'gc.autodetach', 'false'], stderr=fnull)
- os.remove(tar_file)
- update_mirror(name, repo, tmpdir)
- except:
- app.log(name, 'Try git clone from', repo_url)
- with open(os.devnull, "w") as fnull:
- if call(['git', 'clone', '--mirror', '-n', repo_url, tmpdir]):
- app.log(name, 'Failed to clone', repo, exit=True)
-
- with app.chdir(tmpdir):
- if call(['git', 'rev-parse']):
- app.log(name, 'Problem mirroring git repo at', tmpdir, exit=True)
-
- gitdir = os.path.join(app.config['gits'], get_repo_name(repo))
- try:
- shutil.move(tmpdir, gitdir)
- app.log(name, 'Git repo is mirrored at', gitdir)
- except:
- pass
-
-
-def fetch(repo):
- with app.chdir(repo), open(os.devnull, "w") as fnull:
- call(['git', 'fetch', 'origin'], stdout=fnull, stderr=fnull)
-
-
def mirror_has_ref(gitdir, ref):
with app.chdir(gitdir), open(os.devnull, "w") as fnull:
out = call(['git', 'cat-file', '-t', ref], stdout=fnull, stderr=fnull)
return out == 0
-def update_mirror(name, repo, gitdir):
- with app.chdir(gitdir), open(os.devnull, "w") as fnull:
- app.log(name, 'Refreshing mirror for %s' % repo)
- repo_url = get_repo_url(repo)
- if call(['git', 'fetch', repo_url, '+refs/*:refs/*', '--prune'],
+def mirror(repo):
+ '''Mirror the repository at the url given into the gits directory
+ using the fields provided by get_transport_info.'''
+ it = get_transport_info(repo)
+ tar_file = it['name'] + '.tar'
+ if not os.path.exists(it['dir']) and \
+ app.config.get('tar-url', None) is not None:
+ try:
+ mpdir = tempfile.mkdtemp()
+ with app.chdir(tmpdir), open(os.devnull, "w") as fnull:
+ t = os.path.join(app.config['tar-url'], tar_file)
+ print "Getting %s" % t
+ call(['wget', t], stdout=fnull, stderr=fnull)
+ call(['tar', 'xf', tar_file], stderr=fnull)
+ call(['git', 'config', 'gc.autodetach', 'false'], stderr=fnull)
+ os.remove(tar_file)
+ shutil.move(tmpdir, it['dir'])
+ except:
+ pass
+
+ if os.path.exists(it['dir']):
+ with app.chdir(it['dir']):
+ if call(['git', 'fetch', it['url'], '+refs/*:refs/*', '--prune']):
+ raise Exception("Failed to update mirror for %s" % it['url'])
+ else:
+ tmpdir = tempfile.mkdtemp()
+ print("Cloning %s" % it['url'])
+ if call(['git', 'clone', '--mirror', '-n', it['url'], tmpdir]):
+ raise Exception("Failed to clone %s" % it['url'])
+ shutil.move(tmpdir, it['dir'])
+
+
+def _local_checkout(fromdir, todir, commit):
+ '''Clone locally from fromdir into todir and checkout at the commit.'''
+ with open(os.devnull, "w") as fnull:
+ if call(['git', 'clone', '--quiet', '--no-hardlinks', fromdir, todir],
stdout=fnull, stderr=fnull):
- app.log(name, 'Git update mirror failed', repo, exit=True)
+ raise Exception('Git clone failed for %s' % todir)
+ with app.chdir(todir):
+ if call(['git', 'checkout', '--force', commit],
+ stdout=fnull, stderr=fnull):
+ raise Exception('Git checkout failed for %s at %s' %
+ (todir, commit))
def checkout(dn):
- _checkout(dn['name'], dn['repo'], dn['ref'], dn['checkout'])
-
- with app.chdir(dn['checkout']):
- if os.path.exists('.gitmodules') or dn.get('submodules'):
- checkout_submodules(dn)
+ arrange_into_folder(dn['repo'], dn['ref'],
+ app.config['aliases'], dn['checkout'])
utils.set_mtime_recursively(dn['checkout'])
-def _checkout(name, repo, ref, checkout):
- gitdir = os.path.join(app.config['gits'], get_repo_name(repo))
- if not os.path.exists(gitdir):
- mirror(name, repo)
- elif not mirror_has_ref(gitdir, ref):
- update_mirror(name, repo, gitdir)
- # checkout the required version from git
- with open(os.devnull, "w") as fnull:
- # We need to pass '--no-hardlinks' because right now there's nothing to
- # stop the build from overwriting the files in the .git directory
- # inside the sandbox. If they were hardlinks, it'd be possible for a
- # build to corrupt the repo cache. I think it would be faster if we
- # removed --no-hardlinks, though.
- if call(['git', 'clone', '--no-hardlinks', gitdir, checkout],
- stdout=fnull, stderr=fnull):
- app.log(name, 'Git clone failed for', gitdir, exit=True)
-
- with app.chdir(checkout):
- if call(['git', 'checkout', '--force', ref], stdout=fnull,
- stderr=fnull):
- app.log(name, 'Git checkout failed for', ref, exit=True)
+def arrange_into_folder(repo, ref, submodule_mask, folder):
+ '''Takes a repository url and ref, a submodule mask, and a target
+ folder, and checks out the repository into the folder translating
+ urls via the mask.'''
+ tree = [{'path': '', 'url': repo, 'commit': ref}] + \
+ list(submodule_info(repo, ref, submodule_mask))
+ if not os.path.exists(folder):
+ os.makedirs(folder)
+ for x in tree:
+ it = get_transport_info(x['url'])
+ co = os.path.join(folder, x['path'])
+ _local_checkout(it['dir'], co, x['commit'])
- app.log(name, 'Git checkout %s in %s' % (repo, checkout))
- app.log(name, 'Upstream version %s' % get_version(checkout, ref))
-
-def source_date_epoch(checkout):
+def get_submodule_details(checkout):
+ '''Takes a path to a checkout of a repository and reads the
+ .gitmodules file.'''
with app.chdir(checkout):
- return check_output(['git', 'log', '-1', '--pretty=%ct'])[:-1]
-
-
-def extract_commit(name, repo, ref, target_dir):
- '''Check out a single commit (or tree) from a Git repo.
- The checkout() function actually clones the entire repo, so this
- function is much quicker when you don't need to copy the whole repo into
- target_dir.
- '''
- gitdir = os.path.join(app.config['gits'], get_repo_name(repo))
- if not os.path.exists(gitdir):
- mirror(name, repo)
- elif not mirror_has_ref(gitdir, ref):
- update_mirror(name, repo, gitdir)
-
- with tempfile.NamedTemporaryFile() as git_index_file:
- git_env = os.environ.copy()
- git_env['GIT_INDEX_FILE'] = git_index_file.name
- git_env['GIT_WORK_TREE'] = target_dir
-
- app.log(name, 'Extracting commit', ref)
- if call(['git', 'read-tree', ref], env=git_env, cwd=gitdir):
- app.log(name, 'git read-tree failed for', ref, exit=True)
- app.log(name, 'Then checkout index', ref)
- if call(['git', 'checkout-index', '--all'], env=git_env, cwd=gitdir):
- app.log(name, 'Git checkout-index failed for', ref, exit=True)
- app.log(name, 'Done', ref)
-
- utils.set_mtime_recursively(target_dir)
-
-
-def checkout_submodules(dn):
- app.log(dn, 'Checking git submodules')
- with open('.gitmodules', "r") as gitfile:
- # drop indentation in sections, as RawConfigParser cannot handle it
- content = '\n'.join([l.strip() for l in gitfile.read().splitlines()])
- io = StringIO(content)
- parser = RawConfigParser()
- parser.readfp(io)
-
- for section in parser.sections():
- # validate section name against the 'submodule "foo"' pattern
- submodule = re.sub(r'submodule "(.*)"', r'\1', section)
- path = parser.get(section, 'path')
- try:
- url = dn['submodules'][path]['url']
- app.log(dn, 'Processing submodule %s from' % path, url)
- except:
- url = parser.get(section, 'url')
- app.log(dn, 'WARNING: fallback to submodule %s from' % path, url)
-
- try:
- # list objects in the parent repo tree to find the commit
- # object that corresponds to the submodule
- commit = check_output(['git', 'ls-tree', dn['ref'], path])
-
- # read the commit hash from the output
- fields = commit.split()
+ if os.path.exists('.gitmodules'):
+ with open('.gitmodules', "r") as gmfile:
+ content = '\n'.join(
+ [l.strip() for l in gmfile.read().splitlines()])
+ io = StringIO(content)
+ parser = RawConfigParser()
+ parser.readfp(io)
+ return parser
+
+
+def submodule_info(repo, ref, submodule_mask={}, acc=''):
+ '''A generator for returning submodule info for a repository at a given
+ ref against a submodule mask. This will loop through child
+ submodules recursively, returning a dictionary containing the full
+ path of that submodule from the top of the initial repository, as
+ well as the url and the commit.'''
+ mirror(repo)
+ try:
+ tmpdir = tempfile.mkdtemp()
+ it = get_transport_info(repo)
+ _local_checkout(it['dir'], tmpdir, ref)
+ p = get_submodule_details(tmpdir)
+ if not p:
+ return
+ for x in p.sections():
+ submodule = re.sub(r'submodule "(.*)"', r'\1', x)
+ path = p.get(x, 'path')
+ try:
+ url = submodule_mask[path]['url']
+ except:
+ url = p.get(x, 'url')
+
+ with app.chdir(tmpdir):
+ commstr = check_output(['git', 'ls-tree', ref, path]).split()
+ fields = list(map(lambda x: x.decode('unicode-escape'), commstr))
if len(fields) >= 2 and fields[1] == 'commit':
- submodule_commit = commit.split()[2]
-
- # fail if the commit hash is invalid
- if len(submodule_commit) != 40:
- raise Exception
+ commit = fields[2]
- fulldir = os.path.join(os.getcwd(), path)
- _checkout(dn['name'], url, submodule_commit, fulldir)
+ yield {'path': os.path.join(acc, path),
+ 'url': url, 'commit': commit}
+ subs = submodule_mask.get(path, {}).get('submodules', {})
+ for z in submodule_info(url, commit, subs, path):
+ yield z
+ finally:
+ shutil.rmtree(tmpdir)
- else:
- app.log(dn, 'Skipping submodule %s, not a commit:' % path,
- fields)
- except:
- app.log(dn, "Git submodules problem", exit=True)
+def source_date_epoch(checkout):
+ with app.chdir(checkout):
+ return check_output(['git', 'log', '-1', '--pretty=%ct'])[:-1]
@contextlib.contextmanager