summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Coldrick <adam.coldrick@codethink.co.uk>2015-03-10 10:13:15 +0000
committerBaserock Gerrit <gerrit@baserock.org>2015-04-24 10:22:57 +0000
commit5239f5190ce326b891f4b69555f91a69b1a1a2ba (patch)
tree2e78c8dab4aeb107f6fead89e9d56ad55bf8e8f1
parentaa6dfcbb70c03dfeb3f9af02283aa1ab83667162 (diff)
downloadmorph-5239f5190ce326b891f4b69555f91a69b1a1a2ba.tar.gz
Don't serialise the entire build graph
The controller no longer needs to know everything about an artifact as the workers can calculate the build graph themselves quickly. This reduces the amount of data which needs to be serialised by serialise-artifact, making the yaml dump quicker. Change-Id: I6bd0bed14c2efb2f499e9d6f0a97e6188353121a
-rw-r--r--distbuild/serialise.py273
-rw-r--r--distbuild/serialise_tests.py77
2 files changed, 131 insertions, 219 deletions
diff --git a/distbuild/serialise.py b/distbuild/serialise.py
index 3e39e684..5e83ffc2 100644
--- a/distbuild/serialise.py
+++ b/distbuild/serialise.py
@@ -16,46 +16,68 @@
import json
+import logging
import yaml
import morphlib
-import logging
-def serialise_artifact(artifact):
+class ArtifactReference(object): # pragma: no cover
+
+ '''Container for some basic information about an artifact.'''
+
+ def __init__(self, basename, encoded):
+ self._basename = basename
+ self._dict = encoded
+
+ def __getattr__(self, name):
+ if not name.startswith('_'):
+ return self._dict[name]
+ else:
+ super(ArtifactReference, self).__getattr__(name)
+
+ def __setattr__(self, name, val):
+ if not name.startswith('_'):
+ self._dict[name] = val
+ else:
+ super(ArtifactReference, self).__setattr__(name, val)
+
+ def basename(self):
+ return self._basename
+
+ def walk(self):
+ done = set()
+
+ def depth_first(a):
+ if a not in done:
+ done.add(a)
+ for dep in a.dependencies:
+ for ret in depth_first(dep):
+ yield ret
+ yield a
+
+ return list(depth_first(self))
+
+
+def serialise_artifact(artifact, repo, ref):
'''Serialise an Artifact object and its dependencies into string form.'''
- def encode_morphology(morphology):
- result = {}
- for key in morphology.keys():
- result[key] = morphology[key]
- return result
-
- def encode_source(source, prune_leaf=False):
- source_dic = {
- 'name': source.name,
- 'repo': None,
- 'repo_name': source.repo_name,
- 'original_ref': source.original_ref,
- 'sha1': source.sha1,
- 'tree': source.tree,
- 'morphology': id(source.morphology),
+ def encode_source(source):
+ s_dict = {
'filename': source.filename,
- 'artifact_ids': [],
- 'cache_id': source.cache_id,
- 'cache_key': source.cache_key,
- 'dependencies': [],
+ 'kind': source.morphology['kind'],
+ 'source_name': source.name,
+ 'source_repo': source.repo_name,
+ 'source_ref': source.original_ref,
+ 'source_sha1': source.sha1,
+ 'source_artifacts': [],
+ 'dependencies': []
}
- if not prune_leaf:
- source_dic['artifact_ids'].extend(id(artifact) for (_, artifact)
- in source.artifacts.iteritems())
- source_dic['dependencies'].extend(id(d)
- for d in source.dependencies)
-
- if source.morphology['kind'] == 'chunk':
- source_dic['build_mode'] = source.build_mode
- source_dic['prefix'] = source.prefix
- return source_dic
+ for dep in source.dependencies:
+ s_dict['dependencies'].append(dep.basename())
+ for sa in source.artifacts:
+ s_dict['source_artifacts'].append(sa)
+ return s_dict
def encode_artifact(a):
if artifact.source.morphology['kind'] == 'system': # pragma: no cover
@@ -63,53 +85,61 @@ def serialise_artifact(artifact):
else:
arch = artifact.arch
- return {
- 'source_id': id(a.source),
- 'name': a.name,
+ a_dict = {
'arch': arch,
- 'dependents': [id(d)
- for d in a.dependents],
+ 'cache_key': a.source.cache_key,
+ 'name': a.name,
+ 'repo': repo,
+ 'ref': ref,
+ }
+ return a_dict
+
+ def encode_artifact_reference(a): # pragma: no cover
+ a_dict = {
+ 'arch': a.arch,
+ 'cache_key': a.cache_key,
+ 'name': a.name,
+ 'repo': a.repo,
+ 'ref': a.ref
+ }
+ s_dict = {
+ 'filename': a.filename,
+ 'kind': a.kind,
+ 'source_name': a.source_name,
+ 'source_repo': a.source_repo,
+ 'source_ref': a.source_ref,
+ 'source_sha1': a.source_sha1,
+ 'source_artifacts': [],
+ 'dependencies': []
}
+ for dep in a.dependencies:
+ s_dict['dependencies'].append(dep.basename())
+ for sa in a.source_artifacts:
+ s_dict['source_artifacts'].append(sa)
+ return a_dict, s_dict
encoded_artifacts = {}
encoded_sources = {}
- encoded_morphologies = {}
- visited_artifacts = {}
-
- for a in artifact.walk():
- if id(a.source) not in encoded_sources:
- for sa in a.source.artifacts.itervalues():
- if id(sa) not in encoded_artifacts:
- visited_artifacts[id(sa)] = sa
- encoded_artifacts[id(sa)] = encode_artifact(sa)
- encoded_morphologies[id(a.source.morphology)] = \
- encode_morphology(a.source.morphology)
- encoded_sources[id(a.source)] = encode_source(a.source)
-
- if id(a) not in encoded_artifacts: # pragma: no cover
- visited_artifacts[id(a)] = a
- encoded_artifacts[id(a)] = encode_artifact(a)
-
- # Include one level of dependents above encoded artifacts, as we need
- # them to be able to tell whether two sources are in the same stratum.
- for a in visited_artifacts.itervalues():
- for source in a.dependents: # pragma: no cover
- if id(source) not in encoded_sources:
- encoded_morphologies[id(source.morphology)] = \
- encode_morphology(source.morphology)
- encoded_sources[id(source)] = \
- encode_source(source, prune_leaf=True)
+
+ if isinstance(artifact, ArtifactReference): # pragma: no cover
+ root_filename = artifact.root_filename
+ a_dict, s_dict = encode_artifact_reference(artifact)
+ encoded_artifacts[artifact.basename()] = a_dict
+ encoded_sources[artifact.cache_key] = s_dict
+ else:
+ root_filename = artifact.source.filename
+ for a in artifact.walk():
+ if a.basename() not in encoded_artifacts: # pragma: no cover
+ encoded_artifacts[a.basename()] = encode_artifact(a)
+ encoded_sources[a.source.cache_key] = encode_source(a.source)
content = {
- 'sources': encoded_sources,
+ 'root-artifact': artifact.basename(),
+ 'root-filename': root_filename,
'artifacts': encoded_artifacts,
- 'morphologies': encoded_morphologies,
- 'root_artifact': id(artifact),
- 'default_split_rules': {
- 'chunk': morphlib.artifactsplitrule.DEFAULT_CHUNK_RULES,
- 'stratum': morphlib.artifactsplitrule.DEFAULT_STRATUM_RULES,
- },
+ 'sources': encoded_sources
}
+
return json.dumps(yaml.dump(content))
@@ -122,95 +152,24 @@ def deserialise_artifact(encoded):
purposes, by Morph.
'''
-
- def decode_morphology(le_dict):
- '''Convert a dict into something that kinda acts like a Morphology.
-
- As it happens, we don't need the full Morphology so we cheat.
- Cheating is good.
-
- '''
-
- return morphlib.morphology.Morphology(le_dict)
-
- def decode_source(le_dict, morphology, split_rules):
- '''Convert a dict into a Source object.'''
-
- source = morphlib.source.Source(le_dict['name'],
- le_dict['repo_name'],
- le_dict['original_ref'],
- le_dict['sha1'],
- le_dict['tree'],
- morphology,
- le_dict['filename'],
- split_rules)
-
- if morphology['kind'] == 'chunk':
- source.build_mode = le_dict['build_mode']
- source.prefix = le_dict['prefix']
- source.cache_id = le_dict['cache_id']
- source.cache_key = le_dict['cache_key']
- return source
-
- def decode_artifact(artifact_dict, source):
- '''Convert dict into an Artifact object.
-
- Do not set dependencies, that will be dealt with later.
-
- '''
-
- artifact = morphlib.artifact.Artifact(source, artifact_dict['name'])
- artifact.arch = artifact_dict['arch']
- artifact.source = source
-
- return artifact
-
- le_dicts = yaml.load(json.loads(encoded))
- artifacts_dict = le_dicts['artifacts']
- sources_dict = le_dicts['sources']
- morphologies_dict = le_dicts['morphologies']
- root_artifact = le_dicts['root_artifact']
- assert root_artifact in artifacts_dict
+ content = yaml.load(json.loads(encoded))
+ root = content['root-artifact']
+ encoded_artifacts = content['artifacts']
+ encoded_sources = content['sources']
artifacts = {}
- sources = {}
- morphologies = {id: decode_morphology(d)
- for (id, d) in morphologies_dict.iteritems()}
-
- # Decode sources
- for source_id, source_dict in sources_dict.iteritems():
- morphology = morphologies[source_dict['morphology']]
- kind = morphology['kind']
- ruler = getattr(morphlib.artifactsplitrule, 'unify_%s_matches' % kind)
- if kind in ('chunk', 'stratum'):
- rules = ruler(morphology, le_dicts['default_split_rules'][kind])
- else: # pragma: no cover
- rules = ruler(morphology)
- sources[source_id] = decode_source(source_dict, morphology, rules)
# decode artifacts
- for artifact_id, artifact_dict in artifacts_dict.iteritems():
- source_id = artifact_dict['source_id']
- source = sources[source_id]
- artifact = decode_artifact(artifact_dict, source)
- artifacts[artifact_id] = artifact
-
- # add source artifacts reference
- for source_id, source in sources.iteritems():
- source_dict = sources_dict[source_id]
- source.artifacts = {artifacts[a].name: artifacts[a]
- for a in source_dict['artifact_ids']}
-
- # add source dependencies
- for source_id, source_dict in sources_dict.iteritems():
- source = sources[source_id]
- source.dependencies = [artifacts[aid]
- for aid in source_dict['dependencies']]
-
- # add artifact dependents
- for artifact_id, artifact in artifacts.iteritems():
- artifact_dict = artifacts_dict[artifact_id]
- artifact.dependents = [sources[sid]
- for sid in artifact_dict['dependents']]
-
- return artifacts[root_artifact]
+ for basename, artifact_dict in encoded_artifacts.iteritems():
+ artifact_dict.update(encoded_sources[artifact_dict['cache_key']])
+ artifact = ArtifactReference(basename, artifact_dict)
+ artifact.root_filename = content['root-filename']
+ artifacts[basename] = artifact
+
+ # add dependencies
+ for basename, a_dict in encoded_artifacts.iteritems():
+ artifact = artifacts[basename]
+ artifact.dependencies = [artifacts.get(dep)
+ for dep in artifact.dependencies]
+
+ return artifacts[root]
diff --git a/distbuild/serialise_tests.py b/distbuild/serialise_tests.py
index a0ad78f8..2de3ab85 100644
--- a/distbuild/serialise_tests.py
+++ b/distbuild/serialise_tests.py
@@ -20,32 +20,6 @@ import unittest
import distbuild
-class MockMorphology(object):
-
- def __init__(self, name, kind):
- self.dict = {
- 'name': '%s.morphology.name' % name,
- 'kind': kind,
- 'chunks': [],
- 'products': [
- {
- 'artifact': name,
- 'include': [r'.*'],
- },
- ],
- }
-
- @property
- def needs_artifact_metadata_cached(self):
- return self.dict['kind'] == 'stratum'
-
- def keys(self):
- return self.dict.keys()
-
- def __getitem__(self, key):
- return self.dict[key]
-
-
class MockSource(object):
build_mode = 'staging'
@@ -57,7 +31,7 @@ class MockSource(object):
self.original_ref = '%s.source.original_ref' % name
self.sha1 = '%s.source.sha1' % name
self.tree = '%s.source.tree' % name
- self.morphology = MockMorphology(name, kind)
+ self.morphology = {'kind': kind}
self.filename = '%s.source.filename' % name
self.dependencies = []
self.cache_id = {
@@ -78,6 +52,11 @@ class MockArtifact(object):
self.name = name
self.dependents = []
+ def basename(self):
+ return '%s.%s.%s' % (self.source.cache_key,
+ self.source.morphology['kind'],
+ self.name)
+
def walk(self): # pragma: no cover
done = set()
@@ -100,53 +79,28 @@ class SerialisationTests(unittest.TestCase):
self.art3 = MockArtifact('name3', 'chunk')
self.art4 = MockArtifact('name4', 'chunk')
- def assertEqualMorphologies(self, a, b):
- self.assertEqual(sorted(a.keys()), sorted(b.keys()))
- keys = sorted(a.keys())
- a_values = [a[k] for k in keys]
- b_values = [b[k] for k in keys]
- self.assertEqual(a_values, b_values)
- self.assertEqual(a.needs_artifact_metadata_cached,
- b.needs_artifact_metadata_cached)
-
- def assertEqualSources(self, a, b):
- self.assertEqual(a.repo, b.repo)
- self.assertEqual(a.repo_name, b.repo_name)
- self.assertEqual(a.original_ref, b.original_ref)
- self.assertEqual(a.sha1, b.sha1)
- self.assertEqual(a.tree, b.tree)
- self.assertEqualMorphologies(a.morphology, b.morphology)
- self.assertEqual(a.filename, b.filename)
-
- def assertEqualArtifacts(self, a, b):
- self.assertEqualSources(a.source, b.source)
- self.assertEqual(a.name, b.name)
- self.assertEqual(a.source.cache_id, b.source.cache_id)
- self.assertEqual(a.source.cache_key, b.source.cache_key)
- self.assertEqual(len(a.source.dependencies),
- len(b.source.dependencies))
- for i in range(len(a.source.dependencies)):
- self.assertEqualArtifacts(a.source.dependencies[i],
- b.source.dependencies[i])
-
def verify_round_trip(self, artifact):
- encoded = distbuild.serialise_artifact(artifact)
+ encoded = distbuild.serialise_artifact(artifact,
+ artifact.source.repo_name,
+ artifact.source.sha1)
decoded = distbuild.deserialise_artifact(encoded)
- self.assertEqualArtifacts(artifact, decoded)
+ self.assertEqual(artifact.basename(), decoded.basename())
objs = {}
queue = [decoded]
while queue:
obj = queue.pop()
- k = obj.source.cache_key
+ k = obj.cache_key
if k in objs:
self.assertTrue(obj is objs[k])
else:
objs[k] = obj
- queue.extend(obj.source.dependencies)
+ queue.extend(obj.dependencies)
def test_returns_string(self):
- encoded = distbuild.serialise_artifact(self.art1)
+ encoded = distbuild.serialise_artifact(self.art1,
+ self.art1.source.repo_name,
+ self.art1.source.sha1)
self.assertEqual(type(encoded), str)
def test_works_without_dependencies(self):
@@ -170,4 +124,3 @@ class SerialisationTests(unittest.TestCase):
self.art3.source.dependencies = [self.art4]
self.art1.source.dependencies = [self.art2, self.art3]
self.verify_round_trip(self.art1)
-