From 9cc12c9cd75ad98d18cbd9c277cd31bf5ed55adf Mon Sep 17 00:00:00 2001 From: Adam Coldrick Date: Tue, 5 May 2015 15:27:43 +0000 Subject: Clean up artifact serialisation We no longer serialise whole artifacts, so it doesn't make sense for things to still refer to serialise-artifact and similar. Change-Id: Id4d563a07041bbce77f13ac71dc3f7de39df5e23 --- distbuild/__init__.py | 4 +- distbuild/artifact_reference.py | 188 ++++++++++++++++++++++++++++++++++ distbuild/artifact_reference_tests.py | 126 +++++++++++++++++++++++ distbuild/build_controller.py | 4 +- distbuild/serialise.py | 175 ------------------------------- distbuild/serialise_tests.py | 126 ----------------------- distbuild/worker_build_scheduler.py | 8 +- 7 files changed, 322 insertions(+), 309 deletions(-) create mode 100644 distbuild/artifact_reference.py create mode 100644 distbuild/artifact_reference_tests.py delete mode 100644 distbuild/serialise.py delete mode 100644 distbuild/serialise_tests.py (limited to 'distbuild') diff --git a/distbuild/__init__.py b/distbuild/__init__.py index bb9ddc6e..dc5ff153 100644 --- a/distbuild/__init__.py +++ b/distbuild/__init__.py @@ -27,7 +27,9 @@ from mainloop import MainLoop from sockserv import ListenServer from jm import JsonMachine, JsonNewMessage, JsonEof -from serialise import serialise_artifact, deserialise_artifact +from artifact_reference import (encode_artifact, + encode_artifact_reference, + decode_artifact_reference) from idgen import IdentifierGenerator from route_map import RouteMap from timer_event_source import TimerEventSource, Timer diff --git a/distbuild/artifact_reference.py b/distbuild/artifact_reference.py new file mode 100644 index 00000000..bdcfbf23 --- /dev/null +++ b/distbuild/artifact_reference.py @@ -0,0 +1,188 @@ +# distbuild/artifact_reference.py -- Decode/encode ArtifactReference objects +# +# Copyright (C) 2012, 2014-2015 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . + + +import json +import logging +import yaml + +import morphlib + + +class ArtifactReference(object): # pragma: no cover + + '''Container for some basic information about an artifact.''' + + def __init__(self, basename, encoded): + self._basename = basename + self._dict = encoded + + def __getattr__(self, name): + if not name.startswith('_'): + return self._dict[name] + else: + super(ArtifactReference, self).__getattr__(name) + + def __setattr__(self, name, val): + if not name.startswith('_'): + self._dict[name] = val + else: + super(ArtifactReference, self).__setattr__(name, val) + + def basename(self): + return self._basename + + def walk(self): + done = set() + + def depth_first(a): + if a not in done: + done.add(a) + for dep in a.dependencies: + for ret in depth_first(dep): + yield ret + yield a + + return list(depth_first(self)) + + +def encode_artifact(artifact, repo, ref): + '''Encode part of an Artifact object and dependencies into string form.''' + + def get_source_dict(source): + source_dict = { + 'filename': source.filename, + 'kind': source.morphology['kind'], + 'source_name': source.name, + 'source_repo': source.repo_name, + 'source_ref': source.original_ref, + 'source_sha1': source.sha1, + 'source_artifact_names': [], + 'dependencies': [] + } + for dependency in source.dependencies: + source_dict['dependencies'].append(dependency.basename()) + for source_artifact_name in source.artifacts: + source_dict['source_artifact_names'].append(source_artifact_name) + return source_dict + + def get_artifact_dict(a): + if artifact.source.morphology['kind'] == 'system': # pragma: no cover + arch = artifact.source.morphology['arch'] + else: + arch = artifact.arch + + a_dict = { + 'arch': arch, + 'cache_key': a.source.cache_key, + 'name': a.name, + 'repo': repo, + 'ref': ref, + } + return a_dict + + encoded_artifacts = {} + encoded_sources = {} + + root_filename = artifact.source.filename + for a in artifact.walk(): + if a.basename() not in encoded_artifacts: # pragma: no cover + encoded_artifacts[a.basename()] = get_artifact_dict(a) + encoded_sources[a.source.cache_key] = get_source_dict(a.source) + + content = { + 'root-artifact': artifact.basename(), + 'root-filename': root_filename, + 'artifacts': encoded_artifacts, + 'sources': encoded_sources + } + + return json.dumps(yaml.dump(content)) + + +def encode_artifact_reference(artifact): # pragma: no cover + '''Encode an ArtifactReference object into string form. + + The ArtifactReference object is encoded such that it can be recreated by + ``decode_artifact_reference``. + + ''' + artifact_dict = { + 'arch': artifact.arch, + 'cache_key': artifact.cache_key, + 'name': artifact.name, + 'repo': artifact.repo, + 'ref': artifact.ref + } + source_dict = { + 'filename': artifact.filename, + 'kind': artifact.kind, + 'source_name': artifact.source_name, + 'source_repo': artifact.source_repo, + 'source_ref': artifact.source_ref, + 'source_sha1': artifact.source_sha1, + 'source_artifact_names': [], + 'dependencies': [] + } + + for dependency in artifact.dependencies: + source_dict['dependencies'].append(dependency.basename()) + + for source_artifact_name in artifact.source_artifact_names: + source_dict['source_artifact_names'].append(source_artifact_name) + + content = { + 'root-artifact': artifact.basename(), + 'root-filename': artifact.root_filename, + 'artifacts': {artifact.basename(): artifact_dict}, + 'sources': {artifact.cache_key: source_dict} + } + + return json.dumps(yaml.dump(content)) + + +def decode_artifact_reference(encoded): + '''Decode an ArtifactReference object from `encoded`. + + The argument should be a string returned by ``encode_artifact`` + or ``encode_artifact_reference``. The decoded ArtifactReference + object will be sufficient to represent a build graph and contain + enough information to allow `morph worker-build` to calculate a + build graph and find the original Artifact object it needs to + build. + + ''' + content = yaml.load(json.loads(encoded)) + root = content['root-artifact'] + encoded_artifacts = content['artifacts'] + encoded_sources = content['sources'] + + artifacts = {} + + # decode artifacts + for basename, artifact_dict in encoded_artifacts.iteritems(): + artifact_dict.update(encoded_sources[artifact_dict['cache_key']]) + artifact = ArtifactReference(basename, artifact_dict) + artifact.root_filename = content['root-filename'] + artifacts[basename] = artifact + + # add dependencies + for basename, a_dict in encoded_artifacts.iteritems(): + artifact = artifacts[basename] + artifact.dependencies = [artifacts.get(dep) + for dep in artifact.dependencies] + + return artifacts[root] diff --git a/distbuild/artifact_reference_tests.py b/distbuild/artifact_reference_tests.py new file mode 100644 index 00000000..e21918a1 --- /dev/null +++ b/distbuild/artifact_reference_tests.py @@ -0,0 +1,126 @@ +# distbuild/artifact_reference_tests.py -- unit tests for Artifact encoding +# +# Copyright (C) 2012, 2014-2015 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . + + +import unittest + +import distbuild + + +class MockSource(object): + + build_mode = 'staging' + prefix = '/usr' + def __init__(self, name, kind): + self.name = name + self.repo = None + self.repo_name = '%s.source.repo_name' % name + self.original_ref = '%s.source.original_ref' % name + self.sha1 = '%s.source.sha1' % name + self.tree = '%s.source.tree' % name + self.morphology = {'kind': kind} + self.filename = '%s.source.filename' % name + self.dependencies = [] + self.cache_id = { + 'blip': '%s.blip' % name, + 'integer': 42, + } + self.cache_key = '%s.cache_key' % name + self.artifacts = {} + + +class MockArtifact(object): + + arch = 'testarch' + + def __init__(self, name, kind): + self.source = MockSource(name, kind) + self.source.artifacts = {name: self} + self.name = name + self.dependents = [] + + def basename(self): + return '%s.%s.%s' % (self.source.cache_key, + self.source.morphology['kind'], + self.name) + + def walk(self): # pragma: no cover + done = set() + + def depth_first(a): + if a not in done: + done.add(a) + for dep in a.source.dependencies: + for ret in depth_first(dep): + yield ret + yield a + + return list(depth_first(self)) + + +class ArtifactReferenceTests(unittest.TestCase): + + def setUp(self): + self.art1 = MockArtifact('name1', 'stratum') + self.art2 = MockArtifact('name2', 'chunk') + self.art3 = MockArtifact('name3', 'chunk') + self.art4 = MockArtifact('name4', 'chunk') + + def verify_round_trip(self, artifact): + encoded = distbuild.encode_artifact(artifact, + artifact.source.repo_name, + artifact.source.sha1) + decoded = distbuild.decode_artifact_reference(encoded) + self.assertEqual(artifact.basename(), decoded.basename()) + + objs = {} + queue = [decoded] + while queue: + obj = queue.pop() + k = obj.cache_key + if k in objs: + self.assertTrue(obj is objs[k]) + else: + objs[k] = obj + queue.extend(obj.dependencies) + + def test_returns_string(self): + encoded = distbuild.encode_artifact(self.art1, + self.art1.source.repo_name, + self.art1.source.sha1) + self.assertEqual(type(encoded), str) + + def test_works_without_dependencies(self): + self.verify_round_trip(self.art1) + + def test_works_with_single_dependency(self): + self.art1.source.dependencies = [self.art2] + self.verify_round_trip(self.art1) + + def test_works_with_two_dependencies(self): + self.art1.source.dependencies = [self.art2, self.art3] + self.verify_round_trip(self.art1) + + def test_works_with_two_levels_of_dependencies(self): + self.art2.source.dependencies = [self.art4] + self.art1.source.dependencies = [self.art2, self.art3] + self.verify_round_trip(self.art1) + + def test_works_with_dag(self): + self.art2.source.dependencies = [self.art4] + self.art3.source.dependencies = [self.art4] + self.art1.source.dependencies = [self.art2, self.art3] + self.verify_round_trip(self.art1) diff --git a/distbuild/build_controller.py b/distbuild/build_controller.py index 5f281682..3a099b82 100644 --- a/distbuild/build_controller.py +++ b/distbuild/build_controller.py @@ -335,7 +335,7 @@ class BuildController(distbuild.StateMachine): self._artifact_error = distbuild.StringBuffer() argv = [ self._morph_instance, - 'serialise-artifact', + 'calculate-build-graph', '--quiet', self._request['repo'], self._request['ref'], @@ -380,7 +380,7 @@ class BuildController(distbuild.StateMachine): text = self._artifact_data.peek() try: - artifact = distbuild.deserialise_artifact(text) + artifact = distbuild.decode_artifact_reference(text) except ValueError as e: logging.error(traceback.format_exc()) self.fail('Failed to compute build graph: %s' % e) diff --git a/distbuild/serialise.py b/distbuild/serialise.py deleted file mode 100644 index 5e83ffc2..00000000 --- a/distbuild/serialise.py +++ /dev/null @@ -1,175 +0,0 @@ -# distbuild/serialise.py -- (de)serialise Artifact object graphs -# -# Copyright (C) 2012, 2014-2015 Codethink Limited -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . - - -import json -import logging -import yaml - -import morphlib - - -class ArtifactReference(object): # pragma: no cover - - '''Container for some basic information about an artifact.''' - - def __init__(self, basename, encoded): - self._basename = basename - self._dict = encoded - - def __getattr__(self, name): - if not name.startswith('_'): - return self._dict[name] - else: - super(ArtifactReference, self).__getattr__(name) - - def __setattr__(self, name, val): - if not name.startswith('_'): - self._dict[name] = val - else: - super(ArtifactReference, self).__setattr__(name, val) - - def basename(self): - return self._basename - - def walk(self): - done = set() - - def depth_first(a): - if a not in done: - done.add(a) - for dep in a.dependencies: - for ret in depth_first(dep): - yield ret - yield a - - return list(depth_first(self)) - - -def serialise_artifact(artifact, repo, ref): - '''Serialise an Artifact object and its dependencies into string form.''' - - def encode_source(source): - s_dict = { - 'filename': source.filename, - 'kind': source.morphology['kind'], - 'source_name': source.name, - 'source_repo': source.repo_name, - 'source_ref': source.original_ref, - 'source_sha1': source.sha1, - 'source_artifacts': [], - 'dependencies': [] - } - for dep in source.dependencies: - s_dict['dependencies'].append(dep.basename()) - for sa in source.artifacts: - s_dict['source_artifacts'].append(sa) - return s_dict - - def encode_artifact(a): - if artifact.source.morphology['kind'] == 'system': # pragma: no cover - arch = artifact.source.morphology['arch'] - else: - arch = artifact.arch - - a_dict = { - 'arch': arch, - 'cache_key': a.source.cache_key, - 'name': a.name, - 'repo': repo, - 'ref': ref, - } - return a_dict - - def encode_artifact_reference(a): # pragma: no cover - a_dict = { - 'arch': a.arch, - 'cache_key': a.cache_key, - 'name': a.name, - 'repo': a.repo, - 'ref': a.ref - } - s_dict = { - 'filename': a.filename, - 'kind': a.kind, - 'source_name': a.source_name, - 'source_repo': a.source_repo, - 'source_ref': a.source_ref, - 'source_sha1': a.source_sha1, - 'source_artifacts': [], - 'dependencies': [] - } - for dep in a.dependencies: - s_dict['dependencies'].append(dep.basename()) - for sa in a.source_artifacts: - s_dict['source_artifacts'].append(sa) - return a_dict, s_dict - - encoded_artifacts = {} - encoded_sources = {} - - if isinstance(artifact, ArtifactReference): # pragma: no cover - root_filename = artifact.root_filename - a_dict, s_dict = encode_artifact_reference(artifact) - encoded_artifacts[artifact.basename()] = a_dict - encoded_sources[artifact.cache_key] = s_dict - else: - root_filename = artifact.source.filename - for a in artifact.walk(): - if a.basename() not in encoded_artifacts: # pragma: no cover - encoded_artifacts[a.basename()] = encode_artifact(a) - encoded_sources[a.source.cache_key] = encode_source(a.source) - - content = { - 'root-artifact': artifact.basename(), - 'root-filename': root_filename, - 'artifacts': encoded_artifacts, - 'sources': encoded_sources - } - - return json.dumps(yaml.dump(content)) - - -def deserialise_artifact(encoded): - '''Re-construct the Artifact object (and dependencies). - - The argument should be a string returned by ``serialise_artifact``. - The reconstructed Artifact objects will be sufficiently like the - originals that they can be used as a build graph, and other such - purposes, by Morph. - - ''' - content = yaml.load(json.loads(encoded)) - root = content['root-artifact'] - encoded_artifacts = content['artifacts'] - encoded_sources = content['sources'] - - artifacts = {} - - # decode artifacts - for basename, artifact_dict in encoded_artifacts.iteritems(): - artifact_dict.update(encoded_sources[artifact_dict['cache_key']]) - artifact = ArtifactReference(basename, artifact_dict) - artifact.root_filename = content['root-filename'] - artifacts[basename] = artifact - - # add dependencies - for basename, a_dict in encoded_artifacts.iteritems(): - artifact = artifacts[basename] - artifact.dependencies = [artifacts.get(dep) - for dep in artifact.dependencies] - - return artifacts[root] diff --git a/distbuild/serialise_tests.py b/distbuild/serialise_tests.py deleted file mode 100644 index 2de3ab85..00000000 --- a/distbuild/serialise_tests.py +++ /dev/null @@ -1,126 +0,0 @@ -# distbuild/serialise_tests.py -- unit tests for Artifact serialisation -# -# Copyright (C) 2012, 2014-2015 Codethink Limited -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . - - -import unittest - -import distbuild - - -class MockSource(object): - - build_mode = 'staging' - prefix = '/usr' - def __init__(self, name, kind): - self.name = name - self.repo = None - self.repo_name = '%s.source.repo_name' % name - self.original_ref = '%s.source.original_ref' % name - self.sha1 = '%s.source.sha1' % name - self.tree = '%s.source.tree' % name - self.morphology = {'kind': kind} - self.filename = '%s.source.filename' % name - self.dependencies = [] - self.cache_id = { - 'blip': '%s.blip' % name, - 'integer': 42, - } - self.cache_key = '%s.cache_key' % name - self.artifacts = {} - - -class MockArtifact(object): - - arch = 'testarch' - - def __init__(self, name, kind): - self.source = MockSource(name, kind) - self.source.artifacts = {name: self} - self.name = name - self.dependents = [] - - def basename(self): - return '%s.%s.%s' % (self.source.cache_key, - self.source.morphology['kind'], - self.name) - - def walk(self): # pragma: no cover - done = set() - - def depth_first(a): - if a not in done: - done.add(a) - for dep in a.source.dependencies: - for ret in depth_first(dep): - yield ret - yield a - - return list(depth_first(self)) - - -class SerialisationTests(unittest.TestCase): - - def setUp(self): - self.art1 = MockArtifact('name1', 'stratum') - self.art2 = MockArtifact('name2', 'chunk') - self.art3 = MockArtifact('name3', 'chunk') - self.art4 = MockArtifact('name4', 'chunk') - - def verify_round_trip(self, artifact): - encoded = distbuild.serialise_artifact(artifact, - artifact.source.repo_name, - artifact.source.sha1) - decoded = distbuild.deserialise_artifact(encoded) - self.assertEqual(artifact.basename(), decoded.basename()) - - objs = {} - queue = [decoded] - while queue: - obj = queue.pop() - k = obj.cache_key - if k in objs: - self.assertTrue(obj is objs[k]) - else: - objs[k] = obj - queue.extend(obj.dependencies) - - def test_returns_string(self): - encoded = distbuild.serialise_artifact(self.art1, - self.art1.source.repo_name, - self.art1.source.sha1) - self.assertEqual(type(encoded), str) - - def test_works_without_dependencies(self): - self.verify_round_trip(self.art1) - - def test_works_with_single_dependency(self): - self.art1.source.dependencies = [self.art2] - self.verify_round_trip(self.art1) - - def test_works_with_two_dependencies(self): - self.art1.source.dependencies = [self.art2, self.art3] - self.verify_round_trip(self.art1) - - def test_works_with_two_levels_of_dependencies(self): - self.art2.source.dependencies = [self.art4] - self.art1.source.dependencies = [self.art2, self.art3] - self.verify_round_trip(self.art1) - - def test_works_with_dag(self): - self.art2.source.dependencies = [self.art4] - self.art3.source.dependencies = [self.art4] - self.art1.source.dependencies = [self.art2, self.art3] - self.verify_round_trip(self.art1) diff --git a/distbuild/worker_build_scheduler.py b/distbuild/worker_build_scheduler.py index 71e1c3ef..9397d5c9 100644 --- a/distbuild/worker_build_scheduler.py +++ b/distbuild/worker_build_scheduler.py @@ -527,9 +527,7 @@ class WorkerConnection(distbuild.StateMachine): msg = distbuild.message('exec-request', id=job.artifact.basename(), argv=argv, - stdin_contents=distbuild.serialise_artifact(job.artifact, - job.artifact.repo, - job.artifact.ref), + stdin_contents=distbuild.encode_artifact_reference(job.artifact), ) self._jm.send(msg) @@ -610,9 +608,9 @@ class WorkerConnection(distbuild.StateMachine): kind = job.artifact.kind if kind == 'chunk': - source_artifacts = job.artifact.source_artifacts + artifact_names = job.artifact.source_artifact_names - suffixes = ['%s.%s' % (kind, name) for name in source_artifacts] + suffixes = ['%s.%s' % (kind, name) for name in artifact_names] suffixes.append('build-log') else: filename = '%s.%s' % (kind, job.artifact.name) -- cgit v1.2.1