diff options
author | Adam Coldrick <adam.coldrick@codethink.co.uk> | 2015-03-10 10:13:15 +0000 |
---|---|---|
committer | Adam Coldrick <adam.coldrick@codethink.co.uk> | 2015-03-10 15:13:34 +0000 |
commit | 65b28f8bbeba0ad46dbd1a07159a7e87e3970909 (patch) | |
tree | 10ebd5f65e8a3df9823211bd83aeafdd0d9733a0 | |
parent | aca17a1fdbb38ec505854688cf87a5d89925ad64 (diff) | |
download | morph-65b28f8bbeba0ad46dbd1a07159a7e87e3970909.tar.gz |
WIP: Make serialisation serialise less data
-rw-r--r-- | distbuild/serialise.py | 261 |
1 files changed, 108 insertions, 153 deletions
diff --git a/distbuild/serialise.py b/distbuild/serialise.py index a7c6c4b9..32ed334f 100644 --- a/distbuild/serialise.py +++ b/distbuild/serialise.py @@ -1,6 +1,6 @@ # distbuild/serialise.py -- (de)serialise Artifact object graphs # -# Copyright (C) 2012, 2014 Codethink Limited +# Copyright (C) 2012, 2014, 2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,100 +17,119 @@ import json +import logging import yaml import morphlib -import logging -def serialise_artifact(artifact): - '''Serialise an Artifact object and its dependencies into string form.''' +class ArtifactReference(object): - def encode_morphology(morphology): - result = {} - for key in morphology.keys(): - result[key] = morphology[key] - return result - - def encode_source(source, prune_leaf=False): - source_dic = { - 'name': source.name, - 'repo': None, - 'repo_name': source.repo_name, - 'original_ref': source.original_ref, - 'sha1': source.sha1, - 'tree': source.tree, - 'morphology': id(source.morphology), - 'filename': source.filename, - 'artifact_ids': [], - 'cache_id': source.cache_id, - 'cache_key': source.cache_key, - 'dependencies': [], - } - if not prune_leaf: - source_dic['artifact_ids'].extend(id(artifact) for (_, artifact) - in source.artifacts.iteritems()) - source_dic['dependencies'].extend(id(d) - for d in source.dependencies) + '''Container for some basic information about an artifact.''' - if source.morphology['kind'] == 'chunk': - source_dic['build_mode'] = source.build_mode - source_dic['prefix'] = source.prefix - return source_dic + def __init__(self, basename, encoded): + self._basename = basename + self._dict = encoded + def __getattr__(self, name): + if not name.startswith('_'): + return self._dict.get(name) + else: + super(ArtifactReference, self).__getattr(name) + + def __setattr__(self, name, val): + if not name.startswith('_'): + self._dict[name] = val + else: + super(ArtifactReference, self).__setattr__(name, val) + + def __repr__(self): + return 'REF: %s' % self.basename() + +# ret = ['\n-----', self.basename()] +# for key, val in self._dict.iteritems(): +# if key == 'dependencies': +# ret.append('dependencies:') +# for dep in self.dependencies: +# ret.append('* %s - %s' % (type(dep), dep.basename())) +# elif key == 'source_artifacts': +# ret.append('source_artifacts:') +# for dep in self.source_artifacts: +# ret.append('* %s' % dep) +# else: +# ret.append('%s: %s' % (key, val)) +# ret.append('\n-----') +# return '\n'.join(ret) + + def basename(self): + return self._basename + + +def serialise_artifact(artifact, repo, ref): + '''Serialise an Artifact object and its dependencies into string form.''' + + log = open('/home/adam/serialise-%s' % artifact.basename(), 'w+') def encode_artifact(a): + log.write('\nSERIALISE: serialising %s\n' % a.basename()) if artifact.source.morphology['kind'] == 'system': # pragma: no cover arch = artifact.source.morphology['arch'] else: arch = artifact.arch - return { - 'source_id': id(a.source), - 'name': a.name, + a_dict = { + 'root_filename': artifact.source.filename, 'arch': arch, - 'dependents': [id(d) - for d in a.dependents], + 'cache_key': a.source.cache_key, + 'basename': a.basename(), + 'filename': a.source.filename, + 'name': a.name, + 'kind': a.source.morphology['kind'], + 'repo': repo, + 'ref': ref, + 'dependencies': [], + 'source_repo': a.source.repo_name, + 'source_ref': a.source.sha1, + 'source_artifacts': [] } - + for dep in a.source.dependencies: + a_dict['dependencies'].append(dep.basename()) + for other in a.source.artifacts: + a_dict['source_artifacts'].append(other) + return a_dict + + def encode_artifact_reference(a): + log.write('SERIALISE: serialising reference: %s\n' % a.basename()) + a_dict = dict(a._dict) + a_dict['dependencies'] = [dep.basename() for dep in a.dependencies] + a_dict['source_artifacts'] = [str(sa) for sa in a.source_artifacts] + return a_dict + + def handle_artifact_reference(a): + if a.basename() in encoded_artifacts: + return + visited_artifacts[a.basename()] = a + for dep in a.dependencies: + handle_artifact_reference(dep) + encoded_artifacts[a.basename()] = encode_artifact_reference(a) + + log.write('\n\nSERIALISE: %s\n' % artifact.basename()) encoded_artifacts = {} - encoded_sources = {} - encoded_morphologies = {} visited_artifacts = {} - for a in artifact.walk(): - if id(a.source) not in encoded_sources: - for sa in a.source.artifacts.itervalues(): - if id(sa) not in encoded_artifacts: - visited_artifacts[id(sa)] = sa - encoded_artifacts[id(sa)] = encode_artifact(sa) - encoded_morphologies[id(a.source.morphology)] = \ - encode_morphology(a.source.morphology) - encoded_sources[id(a.source)] = encode_source(a.source) - - if id(a) not in encoded_artifacts: # pragma: no cover - visited_artifacts[id(a)] = a - encoded_artifacts[id(a)] = encode_artifact(a) - - # Include one level of dependents above encoded artifacts, as we need - # them to be able to tell whether two sources are in the same stratum. - for a in visited_artifacts.itervalues(): - for source in a.dependents: # pragma: no cover - if id(source) not in encoded_sources: - encoded_morphologies[id(source.morphology)] = \ - encode_morphology(source.morphology) - encoded_sources[id(source)] = \ - encode_source(source, prune_leaf=True) + if isinstance(artifact, morphlib.artifact.Artifact): + for a in artifact.walk(): + if a.basename() not in encoded_artifacts: # pragma: no cover + encoded_artifacts[a.basename()] = encode_artifact(a) + visited_artifacts[artifact.basename()] = artifact + elif isinstance(artifact, ArtifactReference): +# handle_artifact_reference(artifact) + encoded_artifacts[artifact.basename()] = encode_artifact_reference(artifact) content = { - 'sources': encoded_sources, - 'artifacts': encoded_artifacts, - 'morphologies': encoded_morphologies, - 'root_artifact': id(artifact), - 'default_split_rules': { - 'chunk': morphlib.artifactsplitrule.DEFAULT_CHUNK_RULES, - 'stratum': morphlib.artifactsplitrule.DEFAULT_STRATUM_RULES, - }, + 'root-artifact': artifact.basename(), + 'artifacts': encoded_artifacts } + log.write('\n\nSERIALISE: done %s\n' % artifact.basename()) return json.dumps(yaml.dump(content)) @@ -124,94 +143,30 @@ def deserialise_artifact(encoded): ''' - def decode_morphology(le_dict): - '''Convert a dict into something that kinda acts like a Morphology. - - As it happens, we don't need the full Morphology so we cheat. - Cheating is good. - - ''' - - return morphlib.morphology.Morphology(le_dict) - - def decode_source(le_dict, morphology, split_rules): - '''Convert a dict into a Source object.''' - - source = morphlib.source.Source(le_dict['name'], - le_dict['repo_name'], - le_dict['original_ref'], - le_dict['sha1'], - le_dict['tree'], - morphology, - le_dict['filename'], - split_rules) - - if morphology['kind'] == 'chunk': - source.build_mode = le_dict['build_mode'] - source.prefix = le_dict['prefix'] - source.cache_id = le_dict['cache_id'] - source.cache_key = le_dict['cache_key'] - return source - - def decode_artifact(artifact_dict, source): + def decode_artifact(artifact_dict): '''Convert dict into an Artifact object. Do not set dependencies, that will be dealt with later. ''' - - artifact = morphlib.artifact.Artifact(source, artifact_dict['name']) - artifact.arch = artifact_dict['arch'] - artifact.source = source - + artifact = ArtifactReference(artifact_dict) return artifact - le_dicts = yaml.load(json.loads(encoded)) - artifacts_dict = le_dicts['artifacts'] - sources_dict = le_dicts['sources'] - morphologies_dict = le_dicts['morphologies'] - root_artifact = le_dicts['root_artifact'] - assert root_artifact in artifacts_dict + content = yaml.load(json.loads(encoded)) + root = content['root-artifact'] + encoded_artifacts = content['artifacts'] artifacts = {} - sources = {} - morphologies = {id: decode_morphology(d) - for (id, d) in morphologies_dict.iteritems()} - - # Decode sources - for source_id, source_dict in sources_dict.iteritems(): - morphology = morphologies[source_dict['morphology']] - kind = morphology['kind'] - ruler = getattr(morphlib.artifactsplitrule, 'unify_%s_matches' % kind) - if kind in ('chunk', 'stratum'): - rules = ruler(morphology, le_dicts['default_split_rules'][kind]) - else: # pragma: no cover - rules = ruler(morphology) - sources[source_id] = decode_source(source_dict, morphology, rules) # decode artifacts - for artifact_id, artifact_dict in artifacts_dict.iteritems(): - source_id = artifact_dict['source_id'] - source = sources[source_id] - artifact = decode_artifact(artifact_dict, source) - artifacts[artifact_id] = artifact - - # add source artifacts reference - for source_id, source in sources.iteritems(): - source_dict = sources_dict[source_id] - source.artifacts = {artifacts[a].name: artifacts[a] - for a in source_dict['artifact_ids']} - - # add source dependencies - for source_id, source_dict in sources_dict.iteritems(): - source = sources[source_id] - source.dependencies = [artifacts[aid] - for aid in source_dict['dependencies']] - - # add artifact dependents - for artifact_id, artifact in artifacts.iteritems(): - artifact_dict = artifacts_dict[artifact_id] - artifact.dependents = [sources[sid] - for sid in artifact_dict['dependents']] - - return artifacts[root_artifact] + for basename, artifact_dict in encoded_artifacts.iteritems(): + artifact = ArtifactReference(basename, artifact_dict) + artifacts[basename] = artifact + + # add dependencies + for basename, a_dict in encoded_artifacts.iteritems(): + artifact = artifacts[basename] + artifact.dependencies = [artifacts.get(dep) + for dep in a_dict['dependencies']] + + return artifacts[root] |