diff options
author | Richard Ipsum <richard.ipsum@codethink.co.uk> | 2014-03-14 16:33:21 +0000 |
---|---|---|
committer | Richard Ipsum <richard.ipsum@codethink.co.uk> | 2014-03-26 21:03:51 +0000 |
commit | 2632a9ac870177f6ec743fdd96e40dc1d71314a8 (patch) | |
tree | 1b9939442c311b03585b146db8d9917835b3a16a /distbuild | |
parent | 4e1153649e5d531b7017ac2a1b7791f9ad3c774b (diff) | |
download | morph-2632a9ac870177f6ec743fdd96e40dc1d71314a8.tar.gz |
Make serialise work with artifact splitting
Serialisation was simple when we only had 1 artifact per source.
However, to allow smaller systems, we need artifact splitting to produce
multiple artifacts per chunk source.
So now the new serialisation format has a separate list of artifacts
and sources, rather than the Source being generated from the artifact's
serialisation.
Python's id() function is used to encode the references between the
various Sources and Artifacts, these are replaced with a reference to
the new object after deserialisation.
Previously the cache-key was used, but this is no longer sufficient to
uniquely identify an Artifact.
The resultant build graph after deserialisation is a little different
to what went in: Strata end up with a different Source per Artifact,
so it _is_ a 1 to 1 mapping, as opposed to Chunks, where it's many to 1.
We serialise strata and chunks differently because stratum artifacts
from the same source can have different dependencies, for example
core-devel can have different dependencies to core-runtime.
Without intervention we would serialise core-devel and core-devel's
dependencies without including core-runtime's dependencies.
To solve this we've decided to encode stratum artifacts completely
indepedently: each stratum artifact has its own source. This is safe
because stratum artifacts can be constructed independently,
as opposed to Chunks where all the Artifacts for a Source
are produced together.
This is a little hacky in its current form, but it simplifies matters
later in distbuild with regards to how it handles expressing that
every Artifact that shares a Source is built together.
Arguably, this should be the output of producing the build graph
anyway, since it more helpfully represents which Artifacts are built
together than checking the morphology kind all the time, but more
assumptions need checking in morph before it's safe to make this
change across the whole of the morph codebase.
Diffstat (limited to 'distbuild')
-rw-r--r-- | distbuild/serialise.py | 144 |
1 files changed, 118 insertions, 26 deletions
diff --git a/distbuild/serialise.py b/distbuild/serialise.py index 060833b1..cd871042 100644 --- a/distbuild/serialise.py +++ b/distbuild/serialise.py @@ -19,6 +19,7 @@ import json import morphlib +import logging morphology_attributes = [ @@ -36,6 +37,16 @@ def serialise_artifact(artifact): for x in morphology_attributes: result['__%s' % x] = getattr(morphology, x) return result + + def encode_artifact(artifact): + return { + 'name': artifact.name, + 'cache_id': artifact.cache_id, + 'cache_key': artifact.cache_key, + 'dependencies': artifact.dependencies, + 'dependents': artifact.dependents, + 'metadata_version': artifact.metadata_version, + } def encode_source(source): source_dic = { @@ -46,25 +57,35 @@ def serialise_artifact(artifact): 'tree': source.tree, 'morphology': encode_morphology(source.morphology), 'filename': source.filename, + + # dict keys are converted to strings by json + # so we encode the artifact ids as strings + 'artifact_ids': [str(id(artifact)) for (_, artifact) + in source.artifacts.iteritems()], } + if source.morphology['kind'] == 'chunk': source_dic['build_mode'] = source.build_mode source_dic['prefix'] = source.prefix return source_dic - def encode_single_artifact(a, encoded): + def encode_single_artifact(a, artifacts, source_id): if artifact.source.morphology['kind'] == 'system': arch = artifact.source.morphology['arch'] else: arch = artifact.arch + + logging.debug('encode_single_artifact dependencies: %s' + % str([('id: %s' % str(id(d)), d.name) for d in a.dependencies])) + return { - 'source': encode_source(a.source), + 'source_id': source_id, 'name': a.name, 'cache_id': a.cache_id, 'cache_key': a.cache_key, - 'dependencies': [encoded[d.cache_key]['cache_key'] - for d in a.dependencies], - 'arch': arch, + 'dependencies': [str(id(artifacts[id(d)])) + for d in a.dependencies], + 'arch': arch } visited = set() @@ -77,13 +98,46 @@ def serialise_artifact(artifact): yield ret yield a - encoded = {} + + artifacts = {} + encoded_artifacts = {} + encoded_sources = {} + for a in traverse(artifact): - if a.cache_key not in encoded: - encoded[a.cache_key] = encode_single_artifact(a, encoded) + logging.debug('traversing artifacts at %s' % a.name) + + if id(a.source) not in encoded_sources: + if a.source.morphology['kind'] == 'chunk': + for (_, sa) in a.source.artifacts.iteritems(): + if id(sa) not in artifacts: + logging.debug('encoding source artifact %s' % sa.name) + artifacts[id(sa)] = sa + encoded_artifacts[id(sa)] = encode_single_artifact(sa, + artifacts, id(a.source)) + else: + # We create separate sources for strata and systems, + # this is a bit of a hack, but needed to allow + # us to build strata and systems independently + + s = a.source + t = morphlib.source.Source(s.repo_name, s.original_ref, + s.sha1, s.tree, s.morphology, s.filename) + + t.artifacts = {a.name: a} + a.source = t + + encoded_sources[id(a.source)] = encode_source(a.source) + + if id(a) not in artifacts: + artifacts[id(a)] = a + logging.debug('encoding artifact %s' % a.name) + encoded_artifacts[id(a)] = encode_single_artifact(a, artifacts, + id(a.source)) - encoded['_root'] = artifact.cache_key - return json.dumps(encoded) + encoded_artifacts['_root'] = str(id(artifact)) + + return json.dumps({'sources': encoded_sources, + 'artifacts': encoded_artifacts}) def deserialise_artifact(encoded): @@ -121,7 +175,17 @@ def deserialise_artifact(encoded): setattr(morphology, x, le_dict['__%s' % x]) del morphology['__%s' % x] return morphology - + + def unserialise_source_artifacts(source, artifacts_dict): + '''Convert this dict into a list of artifacts''' + return {a['name']: Artifact(source, + a['name'], + a['cache_id'], + a['cache_key'], + a['dependencies'], + a['dependents'], + a['metadata_version']) for a in artifacts_dict} + def unserialise_source(le_dict): '''Convert a dict into a Source object.''' @@ -132,35 +196,63 @@ def deserialise_artifact(encoded): le_dict['tree'], morphology, le_dict['filename']) + if morphology['kind'] == 'chunk': source.build_mode = le_dict['build_mode'] source.prefix = le_dict['prefix'] return source - def unserialise_single_artifact(le_dict): + def unserialise_single_artifact(artifact_dict, source): '''Convert dict into an Artifact object. Do not set dependencies, that will be dealt with later. ''' - source = unserialise_source(le_dict['source']) - artifact = morphlib.artifact.Artifact(source, le_dict['name']) - artifact.cache_id = le_dict['cache_id'] - artifact.cache_key = le_dict['cache_key'] - artifact.arch = le_dict['arch'] + artifact = morphlib.artifact.Artifact(source, artifact_dict['name']) + artifact.cache_id = artifact_dict['cache_id'] + artifact.cache_key = artifact_dict['cache_key'] + artifact.arch = artifact_dict['arch'] + artifact.source = source + return artifact le_dicts = json.loads(encoded) - cache_keys = [k for k in le_dicts.keys() if k != '_root'] + artifacts_dict = le_dicts['artifacts'] + sources_dict = le_dicts['sources'] + + artifact_ids = ([artifacts_dict['_root']] + + filter(lambda k: k != '_root', artifacts_dict.keys())) + + source_ids = [sid for sid in sources_dict.keys()] + artifacts = {} - for cache_key in cache_keys: - le_dict = le_dicts[cache_key] - artifacts[cache_key] = unserialise_single_artifact(le_dict) - for cache_key in cache_keys: - le_dict = le_dicts[cache_key] - artifact = artifacts[cache_key] - artifact.dependencies = [artifacts[k] for k in le_dict['dependencies']] + sources = {} + + for source_id in source_ids: + source_dict = sources_dict[source_id] + sources[source_id] = unserialise_source(source_dict) + + # clear the source artifacts that get automatically generated + # we want to add the ones that were sent to us + sources[source_id].artifacts = {} + source_artifacts = source_dict['artifact_ids'] + + for artifact_id in source_artifacts: + if artifact_id not in artifacts: + artifact_dict = artifacts_dict[artifact_id] + artifact = unserialise_single_artifact(artifact_dict, + sources[source_id]) + + artifacts[artifact_id] = artifact + + key = artifacts[artifact_id].name + sources[source_id].artifacts[key] = artifacts[artifact_id] - return artifacts[le_dicts['_root']] + # now add the dependencies + for artifact_id in artifact_ids: + artifact = artifacts[artifact_id] + artifact.dependencies = [artifacts[aid] for aid in + artifacts_dict[artifact_id]['dependencies']] + return artifacts[artifacts_dict['_root']] |