diff options
author | Sam Thursfield <sam.thursfield@codethink.co.uk> | 2014-05-07 16:59:22 +0000 |
---|---|---|
committer | Sam Thursfield <sam.thursfield@codethink.co.uk> | 2014-05-08 09:29:07 +0000 |
commit | b8d3fd29bfe34ae58d2e750408d3b4178a489931 (patch) | |
tree | 9ca199d2d5dc0c4c6ad61c38c928bf475465e326 | |
parent | 0564aadd75aaf961190eba278ab89ab079568bd7 (diff) | |
download | morph-b8d3fd29bfe34ae58d2e750408d3b4178a489931.tar.gz |
Write checksums alongside artifacts in cache.
-rw-r--r-- | morphlib/bins.py | 34 | ||||
-rw-r--r-- | morphlib/builder2.py | 33 | ||||
-rw-r--r-- | morphlib/builder2_tests.py | 6 |
3 files changed, 58 insertions, 15 deletions
diff --git a/morphlib/bins.py b/morphlib/bins.py index eb783ae2..28542962 100644 --- a/morphlib/bins.py +++ b/morphlib/bins.py @@ -29,6 +29,7 @@ import errno import stat import shutil import tarfile +import zlib import morphlib @@ -50,11 +51,34 @@ def safe_makefile(self, tarinfo, targetpath): tarfile.TarFile.makefile = safe_makefile +class ChecksummingOutputStream(object): + '''Wrap a stream object and checksum all data that is written. + + The checksum used is Adler32. It is very fast. It's not suited for data + under 1KB and does not guard against intentional modifications much, + but for detecting corruption in the stored artifacts it is useful. + + ''' + def __init__(self, f): + self.f = f + self.checksum = 0 + + def read(self, *args, **kwargs): + raise NotImplementedError( + 'Attempted to read from a write-only stream.') + + def write(self, data, *args, **kwargs): + self.f.write(data, *args, **kwargs) + self.checksum = (self.checksum + zlib.adler32(data)) & 0xFFFFFFFF + + def create_chunk(rootdir, f, include, dump_memory_profile=None): '''Create a chunk from the contents of a directory. ``f`` is an open file handle, to which the tar file is written. + This function returns a checksum of the resulting file. + ''' dump_memory_profile = dump_memory_profile or (lambda msg: None) @@ -69,7 +93,8 @@ def create_chunk(rootdir, f, include, dump_memory_profile=None): path_pairs = [(relname, os.path.join(rootdir, relname)) for relname in include] - with tarfile.open(fileobj=f, mode='w') as tar: + stream = ChecksummingOutputStream(f) + with tarfile.open(fileobj=stream, mode='w|') as tar: for relname, filename in path_pairs: # Normalize mtime for everything. tarinfo = tar.gettarinfo(filename, @@ -89,6 +114,8 @@ def create_chunk(rootdir, f, include, dump_memory_profile=None): os.remove(filename) dump_memory_profile('after removing in create_chunks') + return stream.checksum + def create_system(rootdir, f, name): '''Create a system artifact from the contents of a directory. @@ -103,9 +130,12 @@ def create_system(rootdir, f, name): info.linkname = os.path.relpath(info.linkname, unslashy_root) return info - with tarfile.open(fileobj=f, mode="w", name=name) as tar: + stream = ChecksummingOutputStream(f) + with tarfile.open(fileobj=stream, mode="w|", name=name) as tar: tar.add(rootdir, recursive=True, filter=uproot_info) + return stream.checksum + def unpack_binary_from_file(f, dirname): # pragma: no cover '''Unpack a binary into a directory. diff --git a/morphlib/builder2.py b/morphlib/builder2.py index e3b7df07..7f19d246 100644 --- a/morphlib/builder2.py +++ b/morphlib/builder2.py @@ -217,10 +217,15 @@ class BuilderBase(object): self.build_watch = morphlib.stopwatch.Stopwatch() self.setup_mounts = setup_mounts - def save_build_times(self): - '''Write the times captured by the stopwatch''' + def save_build_info(self, artifact_checksums=None): + '''Write the build information. + + This consists of the artifact checksums, and build times. + + ''' meta = { - 'build-times': {} + 'build-times': {}, + 'checksums': artifact_checksums } for stage in self.build_watch.ticks.iterkeys(): meta['build-times'][stage] = { @@ -358,9 +363,9 @@ class ChunkBuilder(BuilderBase): self.staging_area.abort() raise self.staging_area.chroot_close() - built_artifacts = self.assemble_chunk_artifacts(destdir) + built_artifacts, checksums = self.assemble_chunk_artifacts(destdir) - self.save_build_times() + self.save_build_info(checksums) return built_artifacts @@ -453,6 +458,7 @@ class ChunkBuilder(BuilderBase): def assemble_chunk_artifacts(self, destdir): # pragma: no cover built_artifacts = [] + checksums = {} filenames = [] source = self.artifact.source split_rules = source.split_rules @@ -503,14 +509,17 @@ class ChunkBuilder(BuilderBase): self.app.status(msg='Creating chunk artifact %(name)s', name=chunk_artifact_name) - morphlib.bins.create_chunk(destdir, f, parented_paths) + checksum = morphlib.bins.create_chunk(destdir, f, + parented_paths) + built_artifacts.append(chunk_artifact) + checksums[chunk_artifact_name] = checksum for dirname, subdirs, files in os.walk(destdir): if files: raise Exception('DESTDIR %s is not empty: %s' % (destdir, files)) - return built_artifacts + return built_artifacts, checksums def get_sources(self, srcdir): # pragma: no cover s = self.artifact.source @@ -559,7 +568,7 @@ class StratumBuilder(BuilderBase): json.dump(meta, f, indent=4, sort_keys=True) with self.local_artifact_cache.put(self.artifact) as f: json.dump([c.basename() for c in constituents], f) - self.save_build_times() + self.save_build_info() return [self.artifact] @@ -588,11 +597,13 @@ class SystemBuilder(BuilderBase): # pragma: no cover self.unpack_strata(fs_root) self.write_metadata(fs_root, rootfs_name) self.run_system_integration_commands(fs_root) + # FIXME: this copy of the kernel escapes the CRC check so far!!! self.copy_kernel_into_artifact_cache(fs_root) artiname = self.artifact.source.morphology['name'] self.app.status(msg='Constructing tarball of root filesystem', chatty=True) - morphlib.bins.create_system(fs_root, handle, artiname) + checksum = morphlib.bins.create_system(fs_root, handle, + artiname) except BaseException, e: logging.error(traceback.format_exc()) self.app.status(msg='Error while building system', @@ -602,7 +613,9 @@ class SystemBuilder(BuilderBase): # pragma: no cover handle.close() - self.save_build_times() + checksums = { artiname: checksum } + self.save_build_info(checksums) + return [self.artifact] def unpack_one_stratum(self, stratum_artifact, target): diff --git a/morphlib/builder2_tests.py b/morphlib/builder2_tests.py index d0d56b17..015f1caa 100644 --- a/morphlib/builder2_tests.py +++ b/morphlib/builder2_tests.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2013 Codethink Limited +# Copyright (C) 2012-2014 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -204,7 +204,7 @@ class BuilderBaseTests(unittest.TestCase): def test_writes_build_times(self): with self.builder.build_watch('nothing'): pass - self.builder.save_build_times() + self.builder.save_build_info() self.assertTrue(self.artifact_cache.has_source_metadata( self.artifact.source, self.artifact.cache_key, 'meta')) @@ -213,7 +213,7 @@ class BuilderBaseTests(unittest.TestCase): for event in events: with self.builder.build_watch(event): pass - self.builder.save_build_times() + self.builder.save_build_info() meta = json.load(self.artifact_cache.get_source_metadata( self.artifact.source, self.artifact.cache_key, 'meta')) self.assertEqual(sorted(events), |