diff options
author | Daniel Firth <dan.firth@codethink.co.uk> | 2016-12-02 16:18:04 +0000 |
---|---|---|
committer | Daniel Firth <dan.firth@codethink.co.uk> | 2016-12-02 16:18:04 +0000 |
commit | e3b9e3d0b0d2145ed7c55aa98ac81a8e1b2fedde (patch) | |
tree | fc0cdd665df7719917feb82230ac22479aa3e198 | |
parent | ce7f83c35e4fe4124db90a3d6b5a3cf56e955024 (diff) | |
parent | 9f59a7e59b9537a33cbc066f51c2ccf058568fe5 (diff) | |
download | ybd-e3b9e3d0b0d2145ed7c55aa98ac81a8e1b2fedde.tar.gz |
Merge branch 'lc/recursive_submodules_backport' into lc/staging/futurelc/staging/future
-rw-r--r-- | ybd/repos.py | 286 |
1 files changed, 125 insertions, 161 deletions
diff --git a/ybd/repos.py b/ybd/repos.py index c52adcc..8e16fc1 100644 --- a/ybd/repos.py +++ b/ybd/repos.py @@ -37,20 +37,18 @@ else: def get_repo_url(repo): - if repo: - for alias, url in app.config.get('aliases', {}).items(): - repo = repo.replace(alias, url) - if repo[:4] == "http" and not repo.endswith('.git'): - repo = repo + '.git' + '''Takes a repository url and returns a normalized version with the + aliases replaced with their values.''' + for alias, url in app.config['aliases'].items(): + repo = repo.replace(alias, url) + if repo.startswith("http") and not repo.endswith('.git'): + repo = repo + '.git' return repo def get_repo_name(repo): - ''' Convert URIs to strings that only contain digits, letters, _ and %. - - NOTE: this naming scheme is based on what lorry uses - - ''' + '''Takes a repository url and returns a string suitable to use as a + directory name for storing on a filesystem''' def transl(x): return x if x in valid_chars else '_' @@ -61,6 +59,17 @@ def get_repo_name(repo): return ''.join([transl(x) for x in url]) +def get_transport_info(repo): + '''Helper function to generate a dictionary of three useful values from + a given repository url: the getd url, the normalized name, and + the actual location of the repository in the gits directory for this + GitMachine.''' + i = {'url': get_repo_url(repo), + 'name': get_repo_name(repo)} + i.update({'dir': os.path.join(app.config['gits'], i['name'])}) + return i + + def get_version(gitdir, ref='HEAD'): try: with app.chdir(gitdir), open(os.devnull, "w") as fnull: @@ -104,15 +113,9 @@ def get_tree(dn): if app.config.get('tree-server'): app.log(dn, 'WARNING: no tree from tree-server for', ref) - mirror(dn['name'], dn['repo']) + mirror(dn['repo']) with app.chdir(gitdir), open(os.devnull, "w") as fnull: - if call(['git', 'rev-parse', ref + '^{object}'], stdout=fnull, - stderr=fnull): - # can't resolve ref. is it upstream? - app.log(dn, 'Fetching from upstream to resolve %s' % ref) - update_mirror(dn['name'], dn['repo'], gitdir) - try: tree = check_output(['git', 'rev-parse', ref + '^{tree}'], universal_newlines=True)[0:-1] @@ -124,171 +127,132 @@ def get_tree(dn): app.log(dn, 'No tree for ref', (ref, gitdir), exit=True) -def mirror(name, repo): - tempfile.tempdir = app.config['tmp'] - tmpdir = tempfile.mkdtemp() - repo_url = get_repo_url(repo) - try: - tar_file = get_repo_name(repo_url) + '.tar' - app.log(name, 'Try fetching tarball %s' % tar_file) - # try tarball first - with app.chdir(tmpdir), open(os.devnull, "w") as fnull: - call(['wget', os.path.join(app.config['tar-url'], tar_file)], - stdout=fnull, stderr=fnull) - call(['tar', 'xf', tar_file], stderr=fnull) - call(['git', 'config', 'gc.autodetach', 'false'], stderr=fnull) - os.remove(tar_file) - update_mirror(name, repo, tmpdir) - except: - app.log(name, 'Try git clone from', repo_url) - with open(os.devnull, "w") as fnull: - if call(['git', 'clone', '--mirror', '-n', repo_url, tmpdir]): - app.log(name, 'Failed to clone', repo, exit=True) - - with app.chdir(tmpdir): - if call(['git', 'rev-parse']): - app.log(name, 'Problem mirroring git repo at', tmpdir, exit=True) - - gitdir = os.path.join(app.config['gits'], get_repo_name(repo)) - try: - shutil.move(tmpdir, gitdir) - app.log(name, 'Git repo is mirrored at', gitdir) - except: - pass - - -def fetch(repo): - with app.chdir(repo), open(os.devnull, "w") as fnull: - call(['git', 'fetch', 'origin'], stdout=fnull, stderr=fnull) - - def mirror_has_ref(gitdir, ref): with app.chdir(gitdir), open(os.devnull, "w") as fnull: out = call(['git', 'cat-file', '-t', ref], stdout=fnull, stderr=fnull) return out == 0 -def update_mirror(name, repo, gitdir): - with app.chdir(gitdir), open(os.devnull, "w") as fnull: - app.log(name, 'Refreshing mirror for %s' % repo) - repo_url = get_repo_url(repo) - if call(['git', 'fetch', repo_url, '+refs/*:refs/*', '--prune'], +def mirror(repo): + '''Mirror the repository at the url given into the gits directory + using the fields provided by get_transport_info.''' + it = get_transport_info(repo) + tar_file = it['name'] + '.tar' + if not os.path.exists(it['dir']) and \ + app.config.get('tar-url', None) is not None: + try: + mpdir = tempfile.mkdtemp() + with app.chdir(tmpdir), open(os.devnull, "w") as fnull: + t = os.path.join(app.config['tar-url'], tar_file) + print "Getting %s" % t + call(['wget', t], stdout=fnull, stderr=fnull) + call(['tar', 'xf', tar_file], stderr=fnull) + call(['git', 'config', 'gc.autodetach', 'false'], stderr=fnull) + os.remove(tar_file) + shutil.move(tmpdir, it['dir']) + except: + pass + + if os.path.exists(it['dir']): + with app.chdir(it['dir']): + if call(['git', 'fetch', it['url'], '+refs/*:refs/*', '--prune']): + raise Exception("Failed to update mirror for %s" % it['url']) + else: + tmpdir = tempfile.mkdtemp() + print("Cloning %s" % it['url']) + if call(['git', 'clone', '--mirror', '-n', it['url'], tmpdir]): + raise Exception("Failed to clone %s" % it['url']) + shutil.move(tmpdir, it['dir']) + + +def _local_checkout(fromdir, todir, commit): + '''Clone locally from fromdir into todir and checkout at the commit.''' + with open(os.devnull, "w") as fnull: + if call(['git', 'clone', '--quiet', '--no-hardlinks', fromdir, todir], stdout=fnull, stderr=fnull): - app.log(name, 'Git update mirror failed', repo, exit=True) + raise Exception('Git clone failed for %s' % todir) + with app.chdir(todir): + if call(['git', 'checkout', '--force', commit], + stdout=fnull, stderr=fnull): + raise Exception('Git checkout failed for %s at %s' % + (todir, commit)) def checkout(dn): - _checkout(dn['name'], dn['repo'], dn['ref'], dn['checkout']) - - with app.chdir(dn['checkout']): - if os.path.exists('.gitmodules') or dn.get('submodules'): - checkout_submodules(dn) + arrange_into_folder(dn['repo'], dn['ref'], + app.config['aliases'], dn['checkout']) utils.set_mtime_recursively(dn['checkout']) -def _checkout(name, repo, ref, checkout): - gitdir = os.path.join(app.config['gits'], get_repo_name(repo)) - if not os.path.exists(gitdir): - mirror(name, repo) - elif not mirror_has_ref(gitdir, ref): - update_mirror(name, repo, gitdir) - # checkout the required version from git - with open(os.devnull, "w") as fnull: - # We need to pass '--no-hardlinks' because right now there's nothing to - # stop the build from overwriting the files in the .git directory - # inside the sandbox. If they were hardlinks, it'd be possible for a - # build to corrupt the repo cache. I think it would be faster if we - # removed --no-hardlinks, though. - if call(['git', 'clone', '--no-hardlinks', gitdir, checkout], - stdout=fnull, stderr=fnull): - app.log(name, 'Git clone failed for', gitdir, exit=True) - - with app.chdir(checkout): - if call(['git', 'checkout', '--force', ref], stdout=fnull, - stderr=fnull): - app.log(name, 'Git checkout failed for', ref, exit=True) +def arrange_into_folder(repo, ref, submodule_mask, folder): + '''Takes a repository url and ref, a submodule mask, and a target + folder, and checks out the repository into the folder translating + urls via the mask.''' + tree = [{'path': '', 'url': repo, 'commit': ref}] + \ + list(submodule_info(repo, ref, submodule_mask)) + if not os.path.exists(folder): + os.makedirs(folder) + for x in tree: + it = get_transport_info(x['url']) + co = os.path.join(folder, x['path']) + _local_checkout(it['dir'], co, x['commit']) - app.log(name, 'Git checkout %s in %s' % (repo, checkout)) - app.log(name, 'Upstream version %s' % get_version(checkout, ref)) - -def source_date_epoch(checkout): +def get_submodule_details(checkout): + '''Takes a path to a checkout of a repository and reads the + .gitmodules file.''' with app.chdir(checkout): - return check_output(['git', 'log', '-1', '--pretty=%ct'])[:-1] - - -def extract_commit(name, repo, ref, target_dir): - '''Check out a single commit (or tree) from a Git repo. - The checkout() function actually clones the entire repo, so this - function is much quicker when you don't need to copy the whole repo into - target_dir. - ''' - gitdir = os.path.join(app.config['gits'], get_repo_name(repo)) - if not os.path.exists(gitdir): - mirror(name, repo) - elif not mirror_has_ref(gitdir, ref): - update_mirror(name, repo, gitdir) - - with tempfile.NamedTemporaryFile() as git_index_file: - git_env = os.environ.copy() - git_env['GIT_INDEX_FILE'] = git_index_file.name - git_env['GIT_WORK_TREE'] = target_dir - - app.log(name, 'Extracting commit', ref) - if call(['git', 'read-tree', ref], env=git_env, cwd=gitdir): - app.log(name, 'git read-tree failed for', ref, exit=True) - app.log(name, 'Then checkout index', ref) - if call(['git', 'checkout-index', '--all'], env=git_env, cwd=gitdir): - app.log(name, 'Git checkout-index failed for', ref, exit=True) - app.log(name, 'Done', ref) - - utils.set_mtime_recursively(target_dir) - - -def checkout_submodules(dn): - app.log(dn, 'Checking git submodules') - with open('.gitmodules', "r") as gitfile: - # drop indentation in sections, as RawConfigParser cannot handle it - content = '\n'.join([l.strip() for l in gitfile.read().splitlines()]) - io = StringIO(content) - parser = RawConfigParser() - parser.readfp(io) - - for section in parser.sections(): - # validate section name against the 'submodule "foo"' pattern - submodule = re.sub(r'submodule "(.*)"', r'\1', section) - path = parser.get(section, 'path') - try: - url = dn['submodules'][path]['url'] - app.log(dn, 'Processing submodule %s from' % path, url) - except: - url = parser.get(section, 'url') - app.log(dn, 'WARNING: fallback to submodule %s from' % path, url) - - try: - # list objects in the parent repo tree to find the commit - # object that corresponds to the submodule - commit = check_output(['git', 'ls-tree', dn['ref'], path]) - - # read the commit hash from the output - fields = commit.split() + if os.path.exists('.gitmodules'): + with open('.gitmodules', "r") as gmfile: + content = '\n'.join( + [l.strip() for l in gmfile.read().splitlines()]) + io = StringIO(content) + parser = RawConfigParser() + parser.readfp(io) + return parser + + +def submodule_info(repo, ref, submodule_mask={}, acc=''): + '''A generator for returning submodule info for a repository at a given + ref against a submodule mask. This will loop through child + submodules recursively, returning a dictionary containing the full + path of that submodule from the top of the initial repository, as + well as the url and the commit.''' + mirror(repo) + try: + tmpdir = tempfile.mkdtemp() + it = get_transport_info(repo) + _local_checkout(it['dir'], tmpdir, ref) + p = get_submodule_details(tmpdir) + if not p: + return + for x in p.sections(): + submodule = re.sub(r'submodule "(.*)"', r'\1', x) + path = p.get(x, 'path') + try: + url = submodule_mask[path]['url'] + except: + url = p.get(x, 'url') + + with app.chdir(tmpdir): + commstr = check_output(['git', 'ls-tree', ref, path]).split() + fields = list(map(lambda x: x.decode('unicode-escape'), commstr)) if len(fields) >= 2 and fields[1] == 'commit': - submodule_commit = commit.split()[2] - - # fail if the commit hash is invalid - if len(submodule_commit) != 40: - raise Exception + commit = fields[2] - fulldir = os.path.join(os.getcwd(), path) - _checkout(dn['name'], url, submodule_commit, fulldir) + yield {'path': os.path.join(acc, path), + 'url': url, 'commit': commit} + subs = submodule_mask.get(path, {}).get('submodules', {}) + for z in submodule_info(url, commit, subs, path): + yield z + finally: + shutil.rmtree(tmpdir) - else: - app.log(dn, 'Skipping submodule %s, not a commit:' % path, - fields) - except: - app.log(dn, "Git submodules problem", exit=True) +def source_date_epoch(checkout): + with app.chdir(checkout): + return check_output(['git', 'log', '-1', '--pretty=%ct'])[:-1] @contextlib.contextmanager |