# -*- coding: utf-8 -*- # # Copyright © 2014, 2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; version 2 of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import cliapp import morphlib import networkx import json import itertools import logging import os import tempfile import time import baserockimport class BaserockImportException(cliapp.AppException): pass def find(iterable, match): return next((x for x in iterable if match(x)), None) def run_extension(filename, args, env=os.environ): '''Run the import extension 'filename' with the given arguments. Returns the output written by the extension to its stdout. If the extension subprocess returns an error code (any value other than zero) then BaserockImportException will be raised, with the contents of stderr stored in its .message attribute. Note that the stdout and strerr processing expects each line to be terminated with '\n' (newline character). Any output beyond the last \n character will be ignored. ''' output = [] errors = [] ext_logger = logging.getLogger(filename) def report_extension_stdout(line): output.append(line) def report_extension_stderr(line): logging.debug('Received "%s" on stderr' % line) errors.append(line) def report_extension_logger(line): ext_logger.debug(line) ext = morphlib.extensions.ExtensionSubprocess( report_stdout=report_extension_stdout, report_stderr=report_extension_stderr, report_logger=report_extension_logger, ) def extensions_dir(): module_dir = os.path.dirname(baserockimport.__file__) return os.path.join(module_dir, 'exts') extension_path = os.path.join(extensions_dir(), filename) logging.debug("Running %s %s" % (extension_path, args)) cwd = '.' returncode = ext.run(extension_path, args, cwd, env, separate_mount_namespace=False) if returncode == 0: ext_logger.info('succeeded') else: for line in errors: ext_logger.error(line) message = '%s failed with code %s: %s' % ( filename, returncode, '\n'.join(errors)) raise BaserockImportException(message) return '\n'.join(output) class ImportLoop(object): '''Import a package and all of its dependencies into Baserock. This class holds the state for the processing loop. ''' def __init__(self, app, goal_kind, goal_name, goal_version, generate_chunk_morphs=True, ignore_version_field=False): '''Set up an ImportLoop to process dependencies of one goal package.''' self.app = app self.goal_kind = goal_kind self.goal_name = goal_name self.goal_version = goal_version self.generate_chunk_morphs = generate_chunk_morphs self.ignore_version_field = ignore_version_field self.lorry_set = baserockimport.lorryset.LorrySet( self.app.settings['lorries-dir']) self.morph_set = baserockimport.morphsetondisk.MorphologySetOnDisk( self.app.settings['definitions-dir']) self.morphloader = morphlib.morphloader.MorphologyLoader() self.importers = {} def enable_importer(self, kind, extra_args=[], **kwargs): '''Enable an importer extension in this ImportLoop instance. At least one importer extension must be enabled for the loop to do anything. Enabling more than one extension is handy for packaging systems which can list dependencies in other package universes: for example, Omnibus software components can depend on other Omnibus software components, but also on RubyGems. ''' assert kind not in self.importers self.importers[kind] = { 'extra_args': extra_args, 'kwargs': kwargs } def run(self): '''Process the goal package and all of its dependencies.''' start_time = time.time() start_displaytime = time.strftime('%x %X %Z', time.localtime()) self.app.status( '%s: Import of %s %s started', start_displaytime, self.goal_kind, self.goal_name) if not self.app.settings['update-existing']: self.app.status( 'Not updating existing Git checkouts or existing definitions') chunk_dir = os.path.join(self.morph_set.path, 'strata', self.goal_name) if not os.path.exists(chunk_dir): os.makedirs(chunk_dir) goal = baserockimport.package.Package( self.goal_kind, self.goal_name, self.goal_version, ignore_version_field=self.ignore_version_field) to_process = [goal] # Every Package object is added as a node in the 'processed' graph. # The set of nodes in graph corresponds to the set of packages needed # at runtime for the goal package to function. The edges in the graph # correspond to build-time dependencies between packages. This format # is convenient when we need to construct a suitable stratum morphology # for the goal package. processed = networkx.DiGraph() errors = {} # This is the main processing loop of an import! while len(to_process) > 0: current_item = to_process.pop() try: self._process_package(current_item) error = False except BaserockImportException as e: self.app.status('%s', e, error=True) errors[current_item] = e error = True if not error: self._update_queue_and_graph( current_item, current_item.dependencies, to_process, processed, errors) self._maybe_generate_stratum(processed, errors, self.goal_name) duration = time.time() - start_time end_displaytime = time.strftime('%x %X %Z', time.localtime()) self.app.status( '%s: Import of %s %s ended (took %i seconds)', end_displaytime, self.goal_kind, self.goal_name, duration) if error: self.app.status('Errors encountered during import', error=True) for e in errors: self.app.status('\t%s', e, error=True) def _process_package(self, package): '''Process a single package.''' kind = package.kind name = package.name version = package.version parent = package.parent parent_metadata_path = None if parent: parent_metadata_filename = ('strata/%s/%s.foreign-dependencies' % (self.goal_name, parent)) parent_metadata_path = os.path.join( self.app.settings['definitions-dir'], parent_metadata_filename) # 1. Make the source code available. lorry = self._find_or_create_lorry_file(kind, name, version, parent_metadata_path) source_repo, repo_url = self._fetch_or_update_source(lorry) checked_out_version, ref = self._checkout_source_version_for_package( source_repo, package) logging.debug('Checked out version: %s\tRef: %s', checked_out_version, ref) package.version_in_use = checked_out_version if not self.generate_chunk_morphs: package.detect_build_system(source_repo.list_files()) with morphlib.util.temp_dir() as td: source_repo.clone_into(td, ref=ref) temp_repo = morphlib.gitdir.GitDirectory(td) repo_path = temp_repo.dirname logging.debug('%s cloned to temporary repo at %s', source_repo.dirname, repo_path) if morphlib.git.is_valid_sha1(ref): self.app.status("%s %s: using %s commit %s", name, version, source_repo.dirname, ref) else: self.app.status("%s %s: using %s ref %s (commit %s)", name, version, source_repo.dirname, ref, temp_repo.resolve_ref_to_commit(ref)) # 2. Create a chunk morphology with build instructions. sha1 = temp_repo.resolve_ref_to_commit(ref) chunk_morph = None if self.generate_chunk_morphs: chunk_morph = self._find_or_create_chunk_morph( kind, name, checked_out_version, temp_repo, repo_url, sha1) if self.app.settings['use-local-sources']: package.repo_url = 'file://' + source_repo.dirname else: reponame = lorry.keys()[0] package.repo_url = 'upstream:%s' % reponame package.ref = sha1 package.named_ref = ref package.morphology = chunk_morph # 3. Calculate the dependencies of this package. dependencies = self._find_or_create_dependency_list( kind, name, checked_out_version, temp_repo, parent_metadata_path) package.dependencies = dependencies def _update_queue_and_graph(self, current_item, dependencies, to_process, processed, errors): '''Mark current_item as processed and enqueue any new dependencies.''' processed.add_node(current_item) for kind, kind_deps in dependencies.iteritems(): build_deps = kind_deps['build-dependencies'] for name, version in build_deps.iteritems(): self._update_queue_and_graph_with_dependency( current_item, kind, name, version, True, to_process, processed, errors) runtime_deps = kind_deps['runtime-dependencies'] for name, version in runtime_deps.iteritems(): self._update_queue_and_graph_with_dependency( current_item, kind, name, version, False, to_process, processed, errors) def _update_queue_and_graph_with_dependency(self, current_item, kind, name, version, is_build_dep, to_process, processed, errors): failed_dep_package = find(errors, lambda pkg: pkg.match(kind, name, version)) if failed_dep_package: logging.debug( "Ignoring %s as it failed earlier.", failed_dep_package) return dep_package = find(processed, lambda pkg: pkg.match(kind, name, version)) if dep_package is None: # Not yet processed queue_item = find(to_process, lambda pkg: pkg.match(kind, name, version)) if queue_item is None: queue_item = baserockimport.package.Package( kind, name, version, ignore_version_field=self.ignore_version_field) to_process.append(queue_item) dep_package = queue_item dep_package.add_required_by(current_item) if is_build_dep or current_item.is_build_dep: # A runtime dep of a build dep becomes a build dep itself. dep_package.is_build_dep = True processed.add_edge(dep_package, current_item) def _find_or_create_lorry_file(self, kind, name, version, parent_metadata_path): # Note that the lorry file may already exist for 'name', but lorry # files are named for project name rather than package name. In this # case we will generate the lorry, and try to add it to the set, at # which point LorrySet will notice the existing one and merge the two. comp = None if 'package_comp_callback' in self.importers[kind]['kwargs']: comp = self.importers[kind]['kwargs']['package_comp_callback'] else: comp = lambda x, y: x == y lorry = self.lorry_set.find_lorry_for_package(kind, name, comp) if lorry is None: lorry = self._generate_lorry_for_package(kind, name, version, parent_metadata_path) if len(lorry) != 1: raise Exception( 'Expected generated lorry file with one entry.') lorry_filename = lorry.keys()[0] if '/' in lorry_filename: # We try to be a bit clever and guess that if there's a prefix # in the name, e.g. 'ruby-gems/chef' then it should go in a # mega-lorry file, such as ruby-gems.lorry. parts = lorry_filename.split('/', 1) lorry_filename = parts[0] if lorry_filename == '': raise cliapp.AppException( 'Invalid lorry data for %s: %s' % (name, lorry)) self.lorry_set.add(lorry_filename, lorry) else: lorry_filename = lorry.keys()[0] logging.info( 'Found existing lorry file for %s: %s', name, lorry_filename) return lorry def _generate_lorry_for_package(self, kind, name, version, parent_metadata_path): logging.debug('Generating lorry for name: %s version: %s', name, version) tool = '%s.to_lorry' % kind if kind not in self.importers: raise Exception('Importer for %s was not enabled.' % kind) extra_args = self.importers[kind]['extra_args'] self.app.status( '%s: calling %s to generate lorry', name, tool) args = extra_args + [name] if version != 'master': args.append(version) new_env = None if parent_metadata_path: new_env = os.environ.copy() new_env['IMPORT_METAPATH'] = parent_metadata_path lorry_text = run_extension(tool, args, new_env or os.environ) try: lorry = json.loads(lorry_text) logging.debug('Got %s from lorry ext', lorry) except ValueError: raise cliapp.AppException( 'Invalid output from %s: %s' % (tool, lorry_text)) return lorry def _run_lorry(self, lorry): with tempfile.NamedTemporaryFile() as f: logging.debug(json.dumps(lorry)) json.dump(lorry, f) f.flush() cliapp.runcmd([ 'lorry', '--working-area', self.app.settings['lorry-working-dir'], '--pull-only', '--bundle', 'never', '--tarball', 'never', f.name]) def _fetch_or_update_source(self, lorry): assert len(lorry) == 1 lorry_name, lorry_entry = lorry.items()[0] url = lorry_entry['url'] reponame = '_'.join(lorry_name.split('/')) repopath = os.path.join( self.app.settings['lorry-working-dir'], reponame, 'git') checkoutpath = os.path.join( self.app.settings['checkouts-dir'], reponame) try: already_lorried = os.path.exists(repopath) if already_lorried: if self.app.settings['update-existing']: self.app.status('Updating lorry of %s', url) self._run_lorry(lorry) else: self.app.status('Lorrying %s', url) self._run_lorry(lorry) if os.path.exists(checkoutpath): repo = morphlib.gitdir.GitDirectory(checkoutpath) repo.update_remotes() else: if already_lorried: logging.warning( 'Expected %s to exist, but will recreate it', checkoutpath) cliapp.runcmd(['git', 'clone', repopath, checkoutpath]) repo = morphlib.gitdir.GitDirectory(checkoutpath) except cliapp.AppException as e: raise BaserockImportException(e.msg.rstrip()) return repo, url def _get_potential_tags(self, tag_formats, name, version): tags = (map(lambda s: s.format(name=name, version=v), tag_formats) for v in (version, version.rstrip('.0'), '%s.0' % version)) return itertools.chain.from_iterable(tags) def _checkout_source_version_for_package(self, source_repo, package): name = package.name version = package.version tag_formats = ('{version}', 'v{version}', '{name}-{version}') for tag in self._get_potential_tags(tag_formats, name, version): logging.debug('Checking whether tag %s exists', tag) if source_repo.ref_exists(tag): source_repo.checkout(tag) ref = tag break else: if self.app.settings['use-master-if-no-tag']: logging.warning( "Couldn't find tag %s in repo %s. Using 'master'.", tag, source_repo) source_repo.checkout('master') ref = version = 'master' else: raise BaserockImportException( 'Could not find ref for %s.' % package) return version, ref def _find_or_create_chunk_morph(self, kind, name, version, source_repo, repo_url, sha1): morphology_filename = 'strata/%s/%s-%s.morph' % ( self.goal_name, name, version) def generate_morphology(): morphology = self._generate_chunk_morph_for_package( source_repo, kind, name, version, morphology_filename) self.morph_set.save_morphology(morphology_filename, morphology) return morphology if self.app.settings['update-existing']: morphology = generate_morphology() else: morphology = self.morph_set.get_morphology( repo_url, sha1, morphology_filename) if morphology is None: # Existing chunk morphologies loaded from disk don't contain # the repo and ref information. That's stored in the stratum # morph. So the first time we touch a chunk morph we need to # set this info. logging.debug("Didn't find morphology for %s|%s|%s", repo_url, sha1, morphology_filename) morphology = self.morph_set.get_morphology( None, None, morphology_filename) if morphology is None: logging.debug("Didn't find morphology for None|None|%s", morphology_filename) morphology = generate_morphology() return morphology def _generate_chunk_morph_for_package(self, source_repo, kind, name, version, filename): tool = '%s.to_chunk' % kind if kind not in self.importers: raise Exception('Importer for %s was not enabled.' % kind) extra_args = self.importers[kind]['extra_args'] self.app.status( '%s %s: calling %s to generate chunk morph', name, version, tool) args = extra_args + [source_repo.dirname, name] if version != 'master': args.append(version) text = run_extension(tool, args) return self.morphloader.load_from_string(text, filename) def _find_or_create_dependency_list(self, kind, name, version, source_repo, parent_metadata_path): depends_filename = 'strata/%s/%s-%s.foreign-dependencies' % ( self.goal_name, name, version) depends_path = os.path.join(self.app.settings['definitions-dir'], depends_filename) def calculate_dependencies(): dependencies = self._calculate_dependencies_for_package( source_repo, kind, name, version, parent_metadata_path) with open(depends_path, 'w') as f: json.dump(dependencies, f) return dependencies if self.app.settings['update-existing']: dependencies = calculate_dependencies() elif os.path.exists(depends_path): with open(depends_path) as f: dependencies = json.load(f) else: logging.debug("Didn't find %s", depends_path) dependencies = calculate_dependencies() return dependencies def _calculate_dependencies_for_package(self, source_repo, kind, name, version, parent_metadata_path): tool = '%s.find_deps' % kind if kind not in self.importers: raise Exception('Importer for %s was not enabled.' % kind) extra_args = self.importers[kind]['extra_args'] self.app.status( '%s %s: calling %s to calculate dependencies', name, version, tool) new_env = None if parent_metadata_path: new_env = os.environ.copy() new_env['IMPORT_METAPATH'] = parent_metadata_path args = extra_args + [source_repo.dirname, name] if version != 'master': args.append(version) text = run_extension(tool, args, env=new_env or os.environ) logging.debug("Got '%s' from %s", text, tool) return json.loads(text) def _sort_chunks_by_build_order(self, graph): order = reversed(sorted(graph.nodes())) try: return networkx.topological_sort(graph, nbunch=order) except networkx.NetworkXUnfeasible: # Cycle detected! loop_subgraphs = networkx.strongly_connected_component_subgraphs( graph, copy=False) all_loops_str = [] for graph in loop_subgraphs: if graph.number_of_nodes() > 1: loops_str = '->'.join(str(node) for node in graph.nodes()) all_loops_str.append(loops_str) raise cliapp.AppException( 'One or more cycles detected in build graph: %s' % (', '.join(all_loops_str))) def _maybe_generate_stratum(self, graph, errors, goal_name): filename = os.path.join( self.app.settings['definitions-dir'], 'strata', '%s.morph' % goal_name) update_existing = self.app.settings['update-existing'] if self.app.settings['force-stratum-generation']: self._generate_stratum( graph, goal_name, filename, ignore_errors=True) elif len(errors) > 0: self.app.status( '\nErrors encountered, not generating a stratum morphology.') self.app.status( 'See the README files for guidance.') elif os.path.exists(filename) and not update_existing: self.app.status( msg='Found stratum morph for %s at %s, not overwriting' % (goal_name, filename)) else: self._generate_stratum(graph, goal_name, filename) def _generate_stratum(self, graph, goal_name, filename, ignore_errors=False): self.app.status(msg='Generating stratum morph for %s' % goal_name) def get_build_deps(kind, deps): return deps[kind].get('build-dependencies', {}) chunk_entries = [] for package in self._sort_chunks_by_build_order(graph): morphology = package.morphology entry = { 'name': package.name, 'repo': package.repo_url, 'ref': package.ref, 'unpetrify-ref': package.named_ref, } if package.build_system is not None: entry['build-system'] = package.build_system if morphology is not None: entry['morph'] = morphology.filename build_depends = [] ds = package.dependencies build_depends = list(itertools.chain.from_iterable( get_build_deps(kind, ds).keys() for kind in self.importers) if ds is not None else []) if build_depends: entry['build-depends'] = build_depends chunk_entries.append(entry) strata = itertools.chain.from_iterable( self.importers[kind]['kwargs'].get('strata', []) for kind in self.importers) stratum_build_depends = [{'morph': stratum} for stratum in strata] assert stratum_build_depends stratum_name = goal_name stratum = { 'name': stratum_name, 'kind': 'stratum', 'description': 'Autogenerated by Baserock import tool', 'build-depends': stratum_build_depends, 'chunks': chunk_entries, } morphology = morphlib.morphology.Morphology(stratum) morphology.filename = filename self.morphloader.save_to_file(filename, morphology)