#!/usr/bin/env python # Copyright (C) 2016 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; version 2 of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . # THIS MIGRATION REQUIRES NETWORK ACCESS TO A BASEROCK GIT CACHE SERVER! If # you do not have your own Trove, or don't know what a Trove is, it should # work as-is, provided you have internet access that allows access to # http://git.baserock.org:8080/. # # If you do have your own Trove, change the value of TROVE_HOST below to # point to it. # '''Migration to Baserock Definitions format version 8. In definitions version 8, submodules must be declared explicitly for all chunks that contains a .gitmodules file in their root. This is so that mirrored source repositories don't need to maintain branches that point to the mirrored submodules, and can instead translate these at build time. ''' import requests import string import logging import re import sys import warnings import migrations from ConfigParser import RawConfigParser from StringIO import StringIO TROVE_HOST = 'git.baserock.org' REPO_ALIASES = { 'baserock:': 'git://%s/baserock/' % TROVE_HOST, 'freedesktop:': 'git://anongit.freedesktop.org/', 'github:': 'git://github.com/', 'gnome:': 'git://git.gnome.org/', 'upstream:': 'git://%s/delta/' % TROVE_HOST, } GIT_CACHE_SERVER_URL = 'http://%s:8080/' % TROVE_HOST FAIL_ON_REMOTE_CACHE_ERRORS = False TO_VERSION = 8 # From ybd.git file repos.py at commit eb3bf397ba729387f0d4145a8df8d3c1f9eb707f def get_repo_url(repo): for alias, url in REPO_ALIASES.items(): repo = repo.replace(alias, url) if repo.endswith('.git'): repo = repo[:-4] return repo def get_repo_name(repo): ''' Convert URIs to strings that only contain digits, letters, _ and %. NOTE: this naming scheme is based on what lorry uses ''' valid_chars = string.digits + string.ascii_letters + '%_' transl = lambda x: x if x in valid_chars else '_' return ''.join([transl(x) for x in get_repo_url(repo)]) ## End of code based on ybd repos.py # This function is taken from morphlib/git.py. def is_valid_sha1(ref): '''Checks whether a string is a valid SHA1.''' return len(ref) == 40 and all(x in string.hexdigits for x in ref) def resolve_ref_to_commit(url, ref): '''Resolve ref to a SHA1, using the remote morph-cache server. This is only useful for repos hosted on a Baserock Trove, but it does work without needing to clone the whole repo. ''' response = requests.get( GIT_CACHE_SERVER_URL + '1.0/sha1s', params={'repo': url, 'ref': ref}, headers={'Accept': 'application/json'}, timeout=9) logging.debug("Got response: %s" % response) try: response.raise_for_status() commit = response.json()['sha1'] except Exception as e: raise RuntimeError( "Unexpected response from server %s for repo %s: %s" % (GIT_CACHE_SERVER_URL, url, e.message)) return commit def get_toplevel_file_list_from_repo(url, commit): '''Try to list the set of files in the root directory of the repo at 'url'. This is only useful for repos hosted on a Baserock Trove, but it does work without needing to clone the whole repo. ''' try: response = requests.get( GIT_CACHE_SERVER_URL + '1.0/trees', params={'repo': url, 'ref': commit}, headers={'Accept': 'application/json'}, timeout=9) logging.debug("Got response: %s" % response) try: response.raise_for_status() toplevel_tree = response.json()['tree'] except Exception as e: raise RuntimeError( "Unexpected response from server %s for repo %s: %s" % (GIT_CACHE_SERVER_URL, url, e.message)) toplevel_filenames = toplevel_tree.keys() except requests.exceptions.ConnectionError as e: raise RuntimeError("Unable to connect to cache server %s while trying " "to query file list of repo %s. Error was: %s" % (GIT_CACHE_SERVER_URL, url, e.message)) return toplevel_filenames def validate_chunk_refs(contents, filename): assert contents['kind'] == 'stratum' valid = True for chunk_ref in contents.get('chunks', []): if chunk_ref.get('morph') is None: # No chunk .morph file -- this stratum was relying on build-system # autodetection here. if 'repo' not in chunk_ref: warnings.warn("%s: Chunk %s doesn't specify a source repo." % (filename, chunk_ref.get('name'))) valid = False if 'ref' not in chunk_ref: warnings.warn("%s: Chunk %s doesn't specify a source ref." % (filename, chunk_ref.get('name'))) valid = False return valid def move_dict_entry_last(dict_object, key, error_if_missing=False): '''Move an entry in a ordered dict to the end.''' # This is a hack, I couldn't find a method on the 'CommentedMap' type dict # that we receive from ruamel.yaml that would allow doing this neatly. if key in dict_object: value = dict_object[key] del dict_object[key] dict_object[key] = value else: if error_if_missing: raise KeyError(key) def submodules_to_dict(url, ref): try: response = requests.get( GIT_CACHE_SERVER_URL + '1.0/files', params={'repo': url, 'ref': ref, 'filename':'.gitmodules'}, headers={'Accept': 'application/json'}, timeout=9) logging.debug("Got response: %s" % response) try: response.raise_for_status() except Exception as e: raise RuntimeError( "Unexpected response from server %s for repo %s: %s" % (GIT_CACHE_SERVER_URL, url, e.message)) except requests.exceptions.ConnectionError as e: raise RuntimeError("Unable to connect to cache server %s while trying " "to read file '.gitmodules' of repo %s. Error " "was: %s" % (GIT_CACHE_SERVER_URL, url, e.message)) gitmodules = '' for line in response.text.splitlines(): gitmodules += "%s\n" % (line.strip()) io = StringIO(gitmodules) parser = RawConfigParser() parser.readfp(io) stuff = {} for section in parser.sections(): submodule = re.sub(r'submodule "(.*)"', r'\1', section) url = parser.get(section, 'url') stuff[submodule] = {'url': url} return stuff def add_submodules_to_strata(contents, filename): assert contents['kind'] == 'stratum' changed = False for chunk_ref in contents.get('chunks', []): chunk_git_url = get_repo_url(chunk_ref['repo']) chunk_git_ref = chunk_ref['ref'] if 'submodules' in chunk_ref: continue try: if is_valid_sha1(chunk_git_ref): chunk_git_commit = chunk_git_ref else: chunk_git_commit = resolve_ref_to_commit (chunk_git_url, chunk_git_ref) toplevel_file_list = get_toplevel_file_list_from_repo( chunk_git_url, chunk_git_commit) except Exception as e: logging.debug(e) message = ( "Unable to look up repo %s on remote Git server %s. Check " "that the repo URL is correct." % (chunk_git_url, TROVE_HOST)) warning = ( "If you are using a Trove that is not %s, please edit the " "TROVE_HOST constant in this script and run it again." % TROVE_HOST) if FAIL_ON_REMOTE_CACHE_ERRORS: raise RuntimeError(message + " " + warning) else: warnings.warn(message) warnings.warn(warning) continue logging.debug( "%s: got file list %s", chunk_git_url, toplevel_file_list) if u'.gitmodules' in toplevel_file_list: submodules = submodules_to_dict(chunk_git_url, chunk_git_commit) if submodules: chunk_ref['submodules'] = submodules changed = True return changed try: if migrations.check_definitions_version(TO_VERSION - 1): success = migrations.process_definitions( kinds=['stratum'], validate_cb=validate_chunk_refs, modify_cb=add_submodules_to_strata) if not success: sys.stderr.write( "Migration failed due to one or more warnings.\n") sys.exit(1) else: migrations.set_definitions_version(TO_VERSION) sys.stderr.write("Migration completed successfully.\n") sys.exit(0) else: sys.stderr.write("Nothing to do.\n") sys.exit(0) except RuntimeError as e: sys.stderr.write("Error: %s\n" % e.message) sys.exit(1)