summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--morphlib/bins.py61
-rw-r--r--morphlib/localartifactcache.py119
-rw-r--r--morphlib/plugins/cache_check_plugin.py9
3 files changed, 179 insertions, 10 deletions
diff --git a/morphlib/bins.py b/morphlib/bins.py
index 28542962..7dca8172 100644
--- a/morphlib/bins.py
+++ b/morphlib/bins.py
@@ -29,6 +29,7 @@ import errno
import stat
import shutil
import tarfile
+import functools
import zlib
import morphlib
@@ -117,22 +118,64 @@ def create_chunk(rootdir, f, include, dump_memory_profile=None):
return stream.checksum
+def make_tarinfo_path_relative_to(root, info):
+ '''Strip rootdir from a file's path before adding to a tarfile.'''
+
+ # tar.gettarinfo() makes all paths relative, we must follow that.
+ root = root.lstrip('/')
+ info.name = os.path.relpath(info.name, root)
+ if info.islnk():
+ info.linkname = os.path.relpath(info.linkname, root)
+ return info
+
+
+def create_chunk_2(rootdir, f, name, include):
+ '''Create a chunk artifact, new way.
+
+ Output should be identical to create_chunk(), but it doesn't delete the
+ files after creating the chunk, and doesn't require the caller to work
+ out all the files that should go in. (But it does that because of chunk
+ splitting!!! *OH*.....)
+ '''
+
+ # This timestamp is used to normalize the mtime for every file in
+ # chunk artifact. This is useful to avoid problems from smallish
+ # clock skew. It needs to be recent enough, however, that GNU tar
+ # does not complain about an implausibly old timestamp.
+ normalized_timestamp = 683074800
+
+ stream = ChecksummingOutputStream(f)
+ with tarfile.open(fileobj=stream, mode='w|') as tar:
+ for filepath in sorted(paths):
+ if filepath == rootdir:
+ # I'm not sure how the ChunkBuilder.assemble_chunk_artifact()
+ # code path manages to avoid adding '.' to the tarfile, but it
+ # does
+ continue
+ # Normalize mtime for everything.
+ tarinfo = tar.gettarinfo(filepath)
+ tarinfo = make_tarinfo_path_relative_to(rootdir, tarinfo)
+ tarinfo.ctime = normalized_timestamp
+ tarinfo.mtime = normalized_timestamp
+ if tarinfo.isreg():
+ # FIXME: why this?
+ with open(filepath, 'rb') as f:
+ tar.addfile(tarinfo, fileobj=f)
+ else:
+ tar.addfile(tarinfo)
+
+ return stream.checksum
+
def create_system(rootdir, f, name):
'''Create a system artifact from the contents of a directory.
'''
- unslashy_root = rootdir[1:]
- def uproot_info(info):
- '''Strip rootdir from a file's path before adding to a tarfile.'''
- info.name = os.path.relpath(info.name, unslashy_root)
- if info.islnk():
- info.linkname = os.path.relpath(info.linkname, unslashy_root)
- return info
-
stream = ChecksummingOutputStream(f)
+
+ path_filter = functools.partial(make_tarinfo_path_relative_to, rootdir)
with tarfile.open(fileobj=stream, mode="w|", name=name) as tar:
- tar.add(rootdir, recursive=True, filter=uproot_info)
+ tar.add(rootdir, recursive=True, filter=path_filter)
return stream.checksum
diff --git a/morphlib/localartifactcache.py b/morphlib/localartifactcache.py
index 10ddd638..faef27f4 100644
--- a/morphlib/localartifactcache.py
+++ b/morphlib/localartifactcache.py
@@ -15,8 +15,12 @@
import collections
+import json
+import logging
import os
+import tempfile
import time
+import zlib
import morphlib
@@ -138,3 +142,118 @@ class LocalArtifactCache(object):
for filename in (x for x in self.cachefs.walkfiles()
if x.startswith(cachekey)):
self.cachefs.remove(filename)
+
+ def _calculate_checksum(self, artifact_filename):
+ # FIXME: pick a block size
+ block_size = 10 * 1024 * 1024 # 10MB
+ checksum = 0
+ with open(artifact_filename, 'rb') as f:
+ block = f.read(block_size)
+ checksum = (checksum + zlib.adler32(block)) & 0xFFFFFFFF
+ return checksum
+
+ def _calculate_unpacked_chunk_checksum(self, chunk_dir):
+ # create a chunk artifact from the unpacked chunk and return the
+ # checksum. It should be identical, right ??
+ #
+ # This code is not the same code used in builder2.ChunkBuilder.
+ # It's actually much better and as soon as I've checked that it
+ # produces identical results it should be used in builder2 too.
+ # I'm especially confused why bins.create_chunk() removes files,
+ # instead of leaving it up to the ChunkBuilder code.
+
+ def filepaths(destdir):
+ for dirname, subdirs, basenames in os.walk(destdir):
+ subdirsymlinks = [os.path.join(dirname, x) for x in subdirs
+ if os.path.islink(x)]
+ filenames = [os.path.join(dirname, x) for x in basenames]
+ for path in [dirname] + subdirsymlinks + filenames:
+ yield path
+ paths = filepaths(rootdir)
+
+ with tempfile.NamedTemporaryFile(delete=False) as f:
+ print ">>>> Filename: %s" % f.name
+ checksum = morphlib.bins.create_chunk_2(
+ chunk_dir, f, name=None, include=paths)
+
+ return checksum
+
+ def validate(self, unpacked_chunk_cache_dir):
+ '''Check for corruption in all cached binary artifacts.'''
+ cache_key = None
+ errors = {}
+
+ n_artifacts = 0
+ n_checksummed_artifacts = 0
+
+ def error(msg):
+ errors[cache_key] = errors.get(cache_key, '') + '\n' + msg
+ logging.error(
+ 'Error in locally cached build %s. %s' % (cache_key, msg))
+
+ for cache_key, artifacts, last_used in self.list_contents():
+ if len(cache_key) < 64:
+ # Morph itself leaves junk temporary files around in the
+ # artifact cache directory, as does the user. Ignore it.
+ logging.info('Ignoring %s' % cache_key)
+ continue
+
+ binary_artifacts = list(artifacts - {'build-log', 'meta'})
+ kind = binary_artifacts[0].split('.', 1)[0]
+
+ if kind == 'stratum':
+ continue
+
+ logging.info(
+ msg='Checking artifacts for %s %s' % (kind, cache_key))
+
+ n_artifacts += len(artifacts)
+
+ filename = self._source_metadata_filename(None, cache_key, 'meta')
+ try:
+ with open(filename) as f:
+ build_info = json.load(f)
+ except (IOError, OSError, ValueError) as e:
+ error('Unable to read source metadata: %s' % e)
+ continue
+
+ if 'checksums' not in build_info:
+ # This is the case for artifacts created by old versions of
+ # Morph. We don't raise an error, for compatiblity.
+ logging.warning(
+ 'No checksums for build %s %s.' % (kind, cache_key))
+ continue
+
+ for artifact in binary_artifacts:
+ if '.' not in artifact:
+ logging.warning('Invalid artifact name %s' % artifact)
+ continue
+
+ _, artifact_name = artifact.split('.', 1)
+ expected_checksum = build_info['checksums'].get(artifact_name)
+
+ if expected_checksum == None:
+ error('Checksum missing for artifact %s!' % artifact_name)
+ continue
+
+ artifact_filename = self.cachefs.getsyspath(
+ '%s.%s' % (cache_key, artifact))
+ checksum = self._calculate_checksum(artifact_filename)
+
+ if checksum != expected_checksum:
+ error('Artifact %s has checksum 0x%x, expected 0x%x' %
+ (artifact, checksum, expected_checksum))
+
+ n_checksummed_artifacts += 1
+
+ # Check for an unpacked version now.
+ cached_name = '%s.%s.d' % (cache_key, artifact)
+ cached_path = os.path.join(unpacked_chunk_cache_dir,
+ cached_name)
+ if os.path.exists(cached_path):
+ checksum = self._calculate_unpacked_chunk_checksum(
+ cached_path)
+
+ if checksum != expected_checksum:
+ error('Unpacked chunk artifact %s has checksum 0x%x, expected 0x%x' %
+ (artifact, checksum, expected_checksum))
diff --git a/morphlib/plugins/cache_check_plugin.py b/morphlib/plugins/cache_check_plugin.py
index 621d9d8a..4315c265 100644
--- a/morphlib/plugins/cache_check_plugin.py
+++ b/morphlib/plugins/cache_check_plugin.py
@@ -16,6 +16,7 @@
import cliapp
import contextlib
+import os
import uuid
import morphlib
@@ -71,5 +72,11 @@ class CacheCheckPlugin(cliapp.Plugin):
artifacts which are used at build time.
'''
+ self.app.status(
+ msg='Checking all locally cached build artifacts for corruption')
+
lac, rac = morphlib.util.new_artifact_caches(self.app.settings)
- lac.validate()
+ unpacked_chunk_cache_dir = os.path.join(self.app.settings['tempdir'], 'chunks')
+ lac.validate(unpacked_chunk_cache_dir)
+
+ # FIXME: ccache is not validated! don't use ccache, perhaps!