summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Thursfield <sam.thursfield@codethink.co.uk>2014-05-07 16:59:22 +0000
committerSam Thursfield <sam.thursfield@codethink.co.uk>2014-05-08 09:29:07 +0000
commitb8d3fd29bfe34ae58d2e750408d3b4178a489931 (patch)
tree9ca199d2d5dc0c4c6ad61c38c928bf475465e326
parent0564aadd75aaf961190eba278ab89ab079568bd7 (diff)
downloadmorph-b8d3fd29bfe34ae58d2e750408d3b4178a489931.tar.gz
Write checksums alongside artifacts in cache.
-rw-r--r--morphlib/bins.py34
-rw-r--r--morphlib/builder2.py33
-rw-r--r--morphlib/builder2_tests.py6
3 files changed, 58 insertions, 15 deletions
diff --git a/morphlib/bins.py b/morphlib/bins.py
index eb783ae2..28542962 100644
--- a/morphlib/bins.py
+++ b/morphlib/bins.py
@@ -29,6 +29,7 @@ import errno
import stat
import shutil
import tarfile
+import zlib
import morphlib
@@ -50,11 +51,34 @@ def safe_makefile(self, tarinfo, targetpath):
tarfile.TarFile.makefile = safe_makefile
+class ChecksummingOutputStream(object):
+ '''Wrap a stream object and checksum all data that is written.
+
+ The checksum used is Adler32. It is very fast. It's not suited for data
+ under 1KB and does not guard against intentional modifications much,
+ but for detecting corruption in the stored artifacts it is useful.
+
+ '''
+ def __init__(self, f):
+ self.f = f
+ self.checksum = 0
+
+ def read(self, *args, **kwargs):
+ raise NotImplementedError(
+ 'Attempted to read from a write-only stream.')
+
+ def write(self, data, *args, **kwargs):
+ self.f.write(data, *args, **kwargs)
+ self.checksum = (self.checksum + zlib.adler32(data)) & 0xFFFFFFFF
+
+
def create_chunk(rootdir, f, include, dump_memory_profile=None):
'''Create a chunk from the contents of a directory.
``f`` is an open file handle, to which the tar file is written.
+ This function returns a checksum of the resulting file.
+
'''
dump_memory_profile = dump_memory_profile or (lambda msg: None)
@@ -69,7 +93,8 @@ def create_chunk(rootdir, f, include, dump_memory_profile=None):
path_pairs = [(relname, os.path.join(rootdir, relname))
for relname in include]
- with tarfile.open(fileobj=f, mode='w') as tar:
+ stream = ChecksummingOutputStream(f)
+ with tarfile.open(fileobj=stream, mode='w|') as tar:
for relname, filename in path_pairs:
# Normalize mtime for everything.
tarinfo = tar.gettarinfo(filename,
@@ -89,6 +114,8 @@ def create_chunk(rootdir, f, include, dump_memory_profile=None):
os.remove(filename)
dump_memory_profile('after removing in create_chunks')
+ return stream.checksum
+
def create_system(rootdir, f, name):
'''Create a system artifact from the contents of a directory.
@@ -103,9 +130,12 @@ def create_system(rootdir, f, name):
info.linkname = os.path.relpath(info.linkname, unslashy_root)
return info
- with tarfile.open(fileobj=f, mode="w", name=name) as tar:
+ stream = ChecksummingOutputStream(f)
+ with tarfile.open(fileobj=stream, mode="w|", name=name) as tar:
tar.add(rootdir, recursive=True, filter=uproot_info)
+ return stream.checksum
+
def unpack_binary_from_file(f, dirname): # pragma: no cover
'''Unpack a binary into a directory.
diff --git a/morphlib/builder2.py b/morphlib/builder2.py
index e3b7df07..7f19d246 100644
--- a/morphlib/builder2.py
+++ b/morphlib/builder2.py
@@ -217,10 +217,15 @@ class BuilderBase(object):
self.build_watch = morphlib.stopwatch.Stopwatch()
self.setup_mounts = setup_mounts
- def save_build_times(self):
- '''Write the times captured by the stopwatch'''
+ def save_build_info(self, artifact_checksums=None):
+ '''Write the build information.
+
+ This consists of the artifact checksums, and build times.
+
+ '''
meta = {
- 'build-times': {}
+ 'build-times': {},
+ 'checksums': artifact_checksums
}
for stage in self.build_watch.ticks.iterkeys():
meta['build-times'][stage] = {
@@ -358,9 +363,9 @@ class ChunkBuilder(BuilderBase):
self.staging_area.abort()
raise
self.staging_area.chroot_close()
- built_artifacts = self.assemble_chunk_artifacts(destdir)
+ built_artifacts, checksums = self.assemble_chunk_artifacts(destdir)
- self.save_build_times()
+ self.save_build_info(checksums)
return built_artifacts
@@ -453,6 +458,7 @@ class ChunkBuilder(BuilderBase):
def assemble_chunk_artifacts(self, destdir): # pragma: no cover
built_artifacts = []
+ checksums = {}
filenames = []
source = self.artifact.source
split_rules = source.split_rules
@@ -503,14 +509,17 @@ class ChunkBuilder(BuilderBase):
self.app.status(msg='Creating chunk artifact %(name)s',
name=chunk_artifact_name)
- morphlib.bins.create_chunk(destdir, f, parented_paths)
+ checksum = morphlib.bins.create_chunk(destdir, f,
+ parented_paths)
+
built_artifacts.append(chunk_artifact)
+ checksums[chunk_artifact_name] = checksum
for dirname, subdirs, files in os.walk(destdir):
if files:
raise Exception('DESTDIR %s is not empty: %s' %
(destdir, files))
- return built_artifacts
+ return built_artifacts, checksums
def get_sources(self, srcdir): # pragma: no cover
s = self.artifact.source
@@ -559,7 +568,7 @@ class StratumBuilder(BuilderBase):
json.dump(meta, f, indent=4, sort_keys=True)
with self.local_artifact_cache.put(self.artifact) as f:
json.dump([c.basename() for c in constituents], f)
- self.save_build_times()
+ self.save_build_info()
return [self.artifact]
@@ -588,11 +597,13 @@ class SystemBuilder(BuilderBase): # pragma: no cover
self.unpack_strata(fs_root)
self.write_metadata(fs_root, rootfs_name)
self.run_system_integration_commands(fs_root)
+ # FIXME: this copy of the kernel escapes the CRC check so far!!!
self.copy_kernel_into_artifact_cache(fs_root)
artiname = self.artifact.source.morphology['name']
self.app.status(msg='Constructing tarball of root filesystem',
chatty=True)
- morphlib.bins.create_system(fs_root, handle, artiname)
+ checksum = morphlib.bins.create_system(fs_root, handle,
+ artiname)
except BaseException, e:
logging.error(traceback.format_exc())
self.app.status(msg='Error while building system',
@@ -602,7 +613,9 @@ class SystemBuilder(BuilderBase): # pragma: no cover
handle.close()
- self.save_build_times()
+ checksums = { artiname: checksum }
+ self.save_build_info(checksums)
+
return [self.artifact]
def unpack_one_stratum(self, stratum_artifact, target):
diff --git a/morphlib/builder2_tests.py b/morphlib/builder2_tests.py
index d0d56b17..015f1caa 100644
--- a/morphlib/builder2_tests.py
+++ b/morphlib/builder2_tests.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2012-2013 Codethink Limited
+# Copyright (C) 2012-2014 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -204,7 +204,7 @@ class BuilderBaseTests(unittest.TestCase):
def test_writes_build_times(self):
with self.builder.build_watch('nothing'):
pass
- self.builder.save_build_times()
+ self.builder.save_build_info()
self.assertTrue(self.artifact_cache.has_source_metadata(
self.artifact.source, self.artifact.cache_key, 'meta'))
@@ -213,7 +213,7 @@ class BuilderBaseTests(unittest.TestCase):
for event in events:
with self.builder.build_watch(event):
pass
- self.builder.save_build_times()
+ self.builder.save_build_info()
meta = json.load(self.artifact_cache.get_source_metadata(
self.artifact.source, self.artifact.cache_key, 'meta'))
self.assertEqual(sorted(events),