summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Thursfield <sam.thursfield@codethink.co.uk>2014-05-09 15:18:50 +0000
committerSam Thursfield <sam.thursfield@codethink.co.uk>2014-05-09 15:18:50 +0000
commit42f597be112e1b6c67ab38d9b31e5b8700d9ae02 (patch)
tree93e4539b6610ac72f06ac11a1b7f0132617f6206
parentc6da8346b44920aa8fe3af37d30748bb915a68d0 (diff)
downloadmorph-42f597be112e1b6c67ab38d9b31e5b8700d9ae02.tar.gz
fixes to artifact cache validation
-rw-r--r--morphlib/localartifactcache.py91
-rw-r--r--morphlib/plugins/cache_check_plugin.py30
2 files changed, 69 insertions, 52 deletions
diff --git a/morphlib/localartifactcache.py b/morphlib/localartifactcache.py
index faef27f4..0631a512 100644
--- a/morphlib/localartifactcache.py
+++ b/morphlib/localartifactcache.py
@@ -15,12 +15,12 @@
import collections
+import hashlib
import json
import logging
import os
import tempfile
import time
-import zlib
import morphlib
@@ -146,11 +146,14 @@ class LocalArtifactCache(object):
def _calculate_checksum(self, artifact_filename):
# FIXME: pick a block size
block_size = 10 * 1024 * 1024 # 10MB
- checksum = 0
+ hasher = hashlib.sha1()
with open(artifact_filename, 'rb') as f:
- block = f.read(block_size)
- checksum = (checksum + zlib.adler32(block)) & 0xFFFFFFFF
- return checksum
+ while True:
+ data = f.read(block_size)
+ if len(data) == 0:
+ break
+ hasher.update(data)
+ return hasher.hexdigest()
def _calculate_unpacked_chunk_checksum(self, chunk_dir):
# create a chunk artifact from the unpacked chunk and return the
@@ -169,10 +172,9 @@ class LocalArtifactCache(object):
filenames = [os.path.join(dirname, x) for x in basenames]
for path in [dirname] + subdirsymlinks + filenames:
yield path
- paths = filepaths(rootdir)
+ paths = filepaths(chunk_dir)
- with tempfile.NamedTemporaryFile(delete=False) as f:
- print ">>>> Filename: %s" % f.name
+ with tempfile.TemporaryFile() as f:
checksum = morphlib.bins.create_chunk_2(
chunk_dir, f, name=None, include=paths)
@@ -180,51 +182,55 @@ class LocalArtifactCache(object):
def validate(self, unpacked_chunk_cache_dir):
'''Check for corruption in all cached binary artifacts.'''
- cache_key = None
- errors = {}
-
- n_artifacts = 0
- n_checksummed_artifacts = 0
+ bad = {}
+ no_checksum = []
- def error(msg):
- errors[cache_key] = errors.get(cache_key, '') + '\n' + msg
- logging.error(
- 'Error in locally cached build %s. %s' % (cache_key, msg))
+ def error(msg, *args):
+ msg = msg % args
+ bad[cache_key] = bad.get(cache_key, []) + [msg]
+ logging.error('Found artifact corruption: %s' % msg)
for cache_key, artifacts, last_used in self.list_contents():
- if len(cache_key) < 64:
+ binary_artifacts = list(artifacts - {'build-log', 'meta'})
+
+ if len(cache_key) < 64 or len(binary_artifacts) == 0:
# Morph itself leaves junk temporary files around in the
# artifact cache directory, as does the user. Ignore it.
logging.info('Ignoring %s' % cache_key)
continue
- binary_artifacts = list(artifacts - {'build-log', 'meta'})
kind = binary_artifacts[0].split('.', 1)[0]
if kind == 'stratum':
continue
- logging.info(
- msg='Checking artifacts for %s %s' % (kind, cache_key))
-
- n_artifacts += len(artifacts)
+ display_name = '%s %s' % (kind, cache_key)
+ logging.info('Checking artifacts for %s' % display_name)
filename = self._source_metadata_filename(None, cache_key, 'meta')
+ if not os.path.exists(filename):
+ # This artifact was downloaded from Trove. :(
+ logging.warning('No source metadata for %s.' % display_name)
+ continue
+
try:
with open(filename) as f:
build_info = json.load(f)
except (IOError, OSError, ValueError) as e:
- error('Unable to read source metadata: %s' % e)
+ error(
+ '%s: unable to read source metadata: %s', display_name, e)
continue
if 'checksums' not in build_info:
# This is the case for artifacts created by old versions of
# Morph. We don't raise an error, for compatiblity.
- logging.warning(
- 'No checksums for build %s %s.' % (kind, cache_key))
+ logging.warning('No checksums for %s.', display_name)
+ no_checksum.append(display_name)
continue
for artifact in binary_artifacts:
+ display_name = '%s.%s' % (cache_key, artifact)
+
if '.' not in artifact:
logging.warning('Invalid artifact name %s' % artifact)
continue
@@ -233,7 +239,7 @@ class LocalArtifactCache(object):
expected_checksum = build_info['checksums'].get(artifact_name)
if expected_checksum == None:
- error('Checksum missing for artifact %s!' % artifact_name)
+ error('%s: checksum missing!', display_name)
continue
artifact_filename = self.cachefs.getsyspath(
@@ -241,19 +247,18 @@ class LocalArtifactCache(object):
checksum = self._calculate_checksum(artifact_filename)
if checksum != expected_checksum:
- error('Artifact %s has checksum 0x%x, expected 0x%x' %
- (artifact, checksum, expected_checksum))
-
- n_checksummed_artifacts += 1
-
- # Check for an unpacked version now.
- cached_name = '%s.%s.d' % (cache_key, artifact)
- cached_path = os.path.join(unpacked_chunk_cache_dir,
- cached_name)
- if os.path.exists(cached_path):
- checksum = self._calculate_unpacked_chunk_checksum(
- cached_path)
-
- if checksum != expected_checksum:
- error('Unpacked chunk artifact %s has checksum 0x%x, expected 0x%x' %
- (artifact, checksum, expected_checksum))
+ error('%s has bad checksum %s', display_name, checksum)
+
+ if kind == 'chunk':
+ unpacked_name = '%s.%s.d' % (cache_key, artifact)
+ unpacked_path = os.path.join(
+ unpacked_chunk_cache_dir, unpacked_name)
+ if os.path.exists(unpacked_path):
+ checksum = self._calculate_unpacked_chunk_checksum(
+ unpacked_path)
+
+ if checksum != expected_checksum:
+ error('unpacked chunk %s has bad checksum %s',
+ unpacked_path, checksum)
+
+ return bad, no_checksum
diff --git a/morphlib/plugins/cache_check_plugin.py b/morphlib/plugins/cache_check_plugin.py
index 4315c265..0c4206c5 100644
--- a/morphlib/plugins/cache_check_plugin.py
+++ b/morphlib/plugins/cache_check_plugin.py
@@ -15,9 +15,7 @@
import cliapp
-import contextlib
import os
-import uuid
import morphlib
@@ -56,14 +54,13 @@ class CacheCheckPlugin(cliapp.Plugin):
git_errors = lrc.validate()
self.app.output.write(
- 'Found corruption in %i cached git repos.\n' % len(git_errors))
+ 'Found corruption in %i cached git repo(s).\n' % len(git_errors))
for repo_dir, error_text in git_errors.iteritems():
self.app.output.write(' %s\n' % repo_dir)
- if self.app.settings['verbose']:
- error_text_indented = '\n'.join(
- [' ' % line for line in error_text.split('\n')])
- self.app.output.write(" %s\n" % error_text_indented)
+ error_text_indented = '\n'.join(
+ [' ' + line for line in error_text.split('\n')])
+ self.app.output.write(" %s\n" % error_text_indented)
def check_artifact_cache(self, args):
'''Check for corruption in the local cache of built artifacts.
@@ -76,7 +73,22 @@ class CacheCheckPlugin(cliapp.Plugin):
msg='Checking all locally cached build artifacts for corruption')
lac, rac = morphlib.util.new_artifact_caches(self.app.settings)
- unpacked_chunk_cache_dir = os.path.join(self.app.settings['tempdir'], 'chunks')
- lac.validate(unpacked_chunk_cache_dir)
+ unpacked_chunk_cache_dir = os.path.join(
+ self.app.settings['tempdir'], 'chunks')
+ errors, no_checksum = lac.validate(unpacked_chunk_cache_dir)
+
+ if len(no_checksum) > 0:
+ self.app.output.write(
+ '%i builds do not have checksums' % len(no_checksum))
+ if self.app.settings['verbose']:
+ for name in no_checksum:
+ self.app.output.write(' %s\n' % name)
+
+ self.app.output.write(
+ 'Found corruption in %i cached builds(s).\n' % len(errors))
+
+ for cache_key, error_list in errors.iteritems():
+ for line in error_list:
+ self.app.output.write(" %s\n" % line)
# FIXME: ccache is not validated! don't use ccache, perhaps!