diff options
-rw-r--r-- | morphlib/bins.py | 61 | ||||
-rw-r--r-- | morphlib/localartifactcache.py | 119 | ||||
-rw-r--r-- | morphlib/plugins/cache_check_plugin.py | 9 |
3 files changed, 179 insertions, 10 deletions
diff --git a/morphlib/bins.py b/morphlib/bins.py index 28542962..7dca8172 100644 --- a/morphlib/bins.py +++ b/morphlib/bins.py @@ -29,6 +29,7 @@ import errno import stat import shutil import tarfile +import functools import zlib import morphlib @@ -117,22 +118,64 @@ def create_chunk(rootdir, f, include, dump_memory_profile=None): return stream.checksum +def make_tarinfo_path_relative_to(root, info): + '''Strip rootdir from a file's path before adding to a tarfile.''' + + # tar.gettarinfo() makes all paths relative, we must follow that. + root = root.lstrip('/') + info.name = os.path.relpath(info.name, root) + if info.islnk(): + info.linkname = os.path.relpath(info.linkname, root) + return info + + +def create_chunk_2(rootdir, f, name, include): + '''Create a chunk artifact, new way. + + Output should be identical to create_chunk(), but it doesn't delete the + files after creating the chunk, and doesn't require the caller to work + out all the files that should go in. (But it does that because of chunk + splitting!!! *OH*.....) + ''' + + # This timestamp is used to normalize the mtime for every file in + # chunk artifact. This is useful to avoid problems from smallish + # clock skew. It needs to be recent enough, however, that GNU tar + # does not complain about an implausibly old timestamp. + normalized_timestamp = 683074800 + + stream = ChecksummingOutputStream(f) + with tarfile.open(fileobj=stream, mode='w|') as tar: + for filepath in sorted(paths): + if filepath == rootdir: + # I'm not sure how the ChunkBuilder.assemble_chunk_artifact() + # code path manages to avoid adding '.' to the tarfile, but it + # does + continue + # Normalize mtime for everything. + tarinfo = tar.gettarinfo(filepath) + tarinfo = make_tarinfo_path_relative_to(rootdir, tarinfo) + tarinfo.ctime = normalized_timestamp + tarinfo.mtime = normalized_timestamp + if tarinfo.isreg(): + # FIXME: why this? + with open(filepath, 'rb') as f: + tar.addfile(tarinfo, fileobj=f) + else: + tar.addfile(tarinfo) + + return stream.checksum + def create_system(rootdir, f, name): '''Create a system artifact from the contents of a directory. ''' - unslashy_root = rootdir[1:] - def uproot_info(info): - '''Strip rootdir from a file's path before adding to a tarfile.''' - info.name = os.path.relpath(info.name, unslashy_root) - if info.islnk(): - info.linkname = os.path.relpath(info.linkname, unslashy_root) - return info - stream = ChecksummingOutputStream(f) + + path_filter = functools.partial(make_tarinfo_path_relative_to, rootdir) with tarfile.open(fileobj=stream, mode="w|", name=name) as tar: - tar.add(rootdir, recursive=True, filter=uproot_info) + tar.add(rootdir, recursive=True, filter=path_filter) return stream.checksum diff --git a/morphlib/localartifactcache.py b/morphlib/localartifactcache.py index 10ddd638..faef27f4 100644 --- a/morphlib/localartifactcache.py +++ b/morphlib/localartifactcache.py @@ -15,8 +15,12 @@ import collections +import json +import logging import os +import tempfile import time +import zlib import morphlib @@ -138,3 +142,118 @@ class LocalArtifactCache(object): for filename in (x for x in self.cachefs.walkfiles() if x.startswith(cachekey)): self.cachefs.remove(filename) + + def _calculate_checksum(self, artifact_filename): + # FIXME: pick a block size + block_size = 10 * 1024 * 1024 # 10MB + checksum = 0 + with open(artifact_filename, 'rb') as f: + block = f.read(block_size) + checksum = (checksum + zlib.adler32(block)) & 0xFFFFFFFF + return checksum + + def _calculate_unpacked_chunk_checksum(self, chunk_dir): + # create a chunk artifact from the unpacked chunk and return the + # checksum. It should be identical, right ?? + # + # This code is not the same code used in builder2.ChunkBuilder. + # It's actually much better and as soon as I've checked that it + # produces identical results it should be used in builder2 too. + # I'm especially confused why bins.create_chunk() removes files, + # instead of leaving it up to the ChunkBuilder code. + + def filepaths(destdir): + for dirname, subdirs, basenames in os.walk(destdir): + subdirsymlinks = [os.path.join(dirname, x) for x in subdirs + if os.path.islink(x)] + filenames = [os.path.join(dirname, x) for x in basenames] + for path in [dirname] + subdirsymlinks + filenames: + yield path + paths = filepaths(rootdir) + + with tempfile.NamedTemporaryFile(delete=False) as f: + print ">>>> Filename: %s" % f.name + checksum = morphlib.bins.create_chunk_2( + chunk_dir, f, name=None, include=paths) + + return checksum + + def validate(self, unpacked_chunk_cache_dir): + '''Check for corruption in all cached binary artifacts.''' + cache_key = None + errors = {} + + n_artifacts = 0 + n_checksummed_artifacts = 0 + + def error(msg): + errors[cache_key] = errors.get(cache_key, '') + '\n' + msg + logging.error( + 'Error in locally cached build %s. %s' % (cache_key, msg)) + + for cache_key, artifacts, last_used in self.list_contents(): + if len(cache_key) < 64: + # Morph itself leaves junk temporary files around in the + # artifact cache directory, as does the user. Ignore it. + logging.info('Ignoring %s' % cache_key) + continue + + binary_artifacts = list(artifacts - {'build-log', 'meta'}) + kind = binary_artifacts[0].split('.', 1)[0] + + if kind == 'stratum': + continue + + logging.info( + msg='Checking artifacts for %s %s' % (kind, cache_key)) + + n_artifacts += len(artifacts) + + filename = self._source_metadata_filename(None, cache_key, 'meta') + try: + with open(filename) as f: + build_info = json.load(f) + except (IOError, OSError, ValueError) as e: + error('Unable to read source metadata: %s' % e) + continue + + if 'checksums' not in build_info: + # This is the case for artifacts created by old versions of + # Morph. We don't raise an error, for compatiblity. + logging.warning( + 'No checksums for build %s %s.' % (kind, cache_key)) + continue + + for artifact in binary_artifacts: + if '.' not in artifact: + logging.warning('Invalid artifact name %s' % artifact) + continue + + _, artifact_name = artifact.split('.', 1) + expected_checksum = build_info['checksums'].get(artifact_name) + + if expected_checksum == None: + error('Checksum missing for artifact %s!' % artifact_name) + continue + + artifact_filename = self.cachefs.getsyspath( + '%s.%s' % (cache_key, artifact)) + checksum = self._calculate_checksum(artifact_filename) + + if checksum != expected_checksum: + error('Artifact %s has checksum 0x%x, expected 0x%x' % + (artifact, checksum, expected_checksum)) + + n_checksummed_artifacts += 1 + + # Check for an unpacked version now. + cached_name = '%s.%s.d' % (cache_key, artifact) + cached_path = os.path.join(unpacked_chunk_cache_dir, + cached_name) + if os.path.exists(cached_path): + checksum = self._calculate_unpacked_chunk_checksum( + cached_path) + + if checksum != expected_checksum: + error('Unpacked chunk artifact %s has checksum 0x%x, expected 0x%x' % + (artifact, checksum, expected_checksum)) diff --git a/morphlib/plugins/cache_check_plugin.py b/morphlib/plugins/cache_check_plugin.py index 621d9d8a..4315c265 100644 --- a/morphlib/plugins/cache_check_plugin.py +++ b/morphlib/plugins/cache_check_plugin.py @@ -16,6 +16,7 @@ import cliapp import contextlib +import os import uuid import morphlib @@ -71,5 +72,11 @@ class CacheCheckPlugin(cliapp.Plugin): artifacts which are used at build time. ''' + self.app.status( + msg='Checking all locally cached build artifacts for corruption') + lac, rac = morphlib.util.new_artifact_caches(self.app.settings) - lac.validate() + unpacked_chunk_cache_dir = os.path.join(self.app.settings['tempdir'], 'chunks') + lac.validate(unpacked_chunk_cache_dir) + + # FIXME: ccache is not validated! don't use ccache, perhaps! |