diff options
author | Richard Maw <richard.maw@codethink.co.uk> | 2012-06-14 16:12:40 +0000 |
---|---|---|
committer | Richard Maw <richard.maw@codethink.co.uk> | 2012-06-14 16:12:40 +0000 |
commit | 79cb73283231dca550a647edcfeff5466037d776 (patch) | |
tree | e22e1597256f36e4fb4821f096b2f0bf888437da /morphlib | |
parent | 945d85def198d417de3f5436800e7efa3e9a540e (diff) | |
parent | d9c8acc730ecc7ea203fb0950a7f461f2822ef4f (diff) | |
download | morph-79cb73283231dca550a647edcfeff5466037d776.tar.gz |
Merge branch 'richardmaw/strata-chunk-lists'
Diffstat (limited to 'morphlib')
-rw-r--r-- | morphlib/__init__.py | 1 | ||||
-rw-r--r-- | morphlib/artifactcachereference.py | 37 | ||||
-rw-r--r-- | morphlib/bins.py | 8 | ||||
-rw-r--r-- | morphlib/bins_tests.py | 27 | ||||
-rw-r--r-- | morphlib/builder2.py | 167 | ||||
-rw-r--r-- | morphlib/builder2_tests.py | 36 | ||||
-rw-r--r-- | morphlib/cachekeycomputer.py | 2 |
7 files changed, 190 insertions, 88 deletions
diff --git a/morphlib/__init__.py b/morphlib/__init__.py index baf4c253..4d9ad22e 100644 --- a/morphlib/__init__.py +++ b/morphlib/__init__.py @@ -26,6 +26,7 @@ class Error(cliapp.AppException): import artifact +import artifactcachereference import artifactresolver import bins import buildenvironment diff --git a/morphlib/artifactcachereference.py b/morphlib/artifactcachereference.py new file mode 100644 index 00000000..169bf96d --- /dev/null +++ b/morphlib/artifactcachereference.py @@ -0,0 +1,37 @@ +# Copyright (C) 2012 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +class ArtifactCacheReference(object): + + '''Represent the information needed to retrieve an artifact + + The artifact cache doesn't need to know the dependencies or the + morphology of an artifact, it just needs to know the basename + + The basename could be generated, from the name, cache_key and kind, + but if the algorithm changes then morph wouldn't be able to find + old artifacts with a saved ArtifactCacheReference. + + Conversely if it generated the basename then old strata wouldn't be + able to refer to new chunks, but strata change more often than the chunks. + ''' + def __init__(self, basename): + self._basename = basename + + def basename(self): + return self._basename + + def metadata_basename(self, metadata_name): + return '%s.%s' % (self._basename, metadata_name) diff --git a/morphlib/bins.py b/morphlib/bins.py index 93aa7b15..0c9ecadf 100644 --- a/morphlib/bins.py +++ b/morphlib/bins.py @@ -95,14 +95,6 @@ def create_chunk(rootdir, f, regexps, dump_memory_profile=None): dump_memory_profile('after removing in create_chunks') -def create_stratum(rootdir, f): - '''Create a stratum from the contents of a directory.''' - logging.debug('Creating stratum file %s from %s' % (f.name, rootdir)) - tar = tarfile.open(fileobj=f, mode='w:gz') - tar.add(rootdir, arcname='.') - tar.close() - - def unpack_binary_from_file(f, dirname): # pragma: no cover '''Unpack a binary into a directory. diff --git a/morphlib/bins_tests.py b/morphlib/bins_tests.py index 2da5d047..544e9013 100644 --- a/morphlib/bins_tests.py +++ b/morphlib/bins_tests.py @@ -132,30 +132,3 @@ class ChunkTests(BinsTest): self.assertEqual([x for x,y in self.recursive_lstat(self.instdir)], ['.', 'lib', 'lib/libfoo.so']) - -class StratumTests(BinsTest): - - def setUp(self): - self.tempdir = tempfile.mkdtemp() - self.instdir = os.path.join(self.tempdir, 'inst') - self.stratum_file = os.path.join(self.tempdir, 'stratum') - self.stratum_f = open(self.stratum_file, 'wb') - self.unpacked = os.path.join(self.tempdir, 'unpacked') - - def tearDown(self): - self.stratum_f.close() - shutil.rmtree(self.tempdir) - - def populate_instdir(self): - os.mkdir(self.instdir) - os.mkdir(os.path.join(self.instdir, 'bin')) - - def test_creates_and_unpacks_stratum_exactly(self): - self.populate_instdir() - morphlib.bins.create_stratum(self.instdir, self.stratum_f) - self.stratum_f.flush() - os.mkdir(self.unpacked) - morphlib.bins.unpack_binary(self.stratum_file, self.unpacked) - self.assertEqual(self.recursive_lstat(self.instdir), - self.recursive_lstat(self.unpacked)) - diff --git a/morphlib/builder2.py b/morphlib/builder2.py index caf0d8e1..4191c32a 100644 --- a/morphlib/builder2.py +++ b/morphlib/builder2.py @@ -23,6 +23,7 @@ from collections import defaultdict import tarfile import morphlib +from morphlib.artifactcachereference import ArtifactCacheReference def ldconfig(runcmd, rootdir): # pragma: no cover @@ -60,31 +61,74 @@ def ldconfig(runcmd, rootdir): # pragma: no cover else: logging.debug('No %s, not running ldconfig' % conf) - -def check_overlap(artifact, constituents, lac): #pragma: no cover +def download_depends(constituents, lac, rac, metadatas=None): + for constituent in constituents: + if not lac.has(constituent): + source = rac.get(constituent) + target = lac.put(constituent) + shutil.copyfileobj(source, target) + target.close() + source.close() + if metadatas is not None: + for metadata in metadatas: + if (not lac.has_artifact_metadata(constituent, metadata) + and rac.has_artifact_metadata(constituent, metadata)): + src = rac.get_artifact_metadata(constituent, metadata) + dst = lac.put_artifact_metadata(constituent, metadata) + shutil.copyfileobj(src, dst) + dst.close() + src.close() + +def get_chunk_files(f): # pragma: no cover + tar = tarfile.open(fileobj=f) + for member in tar.getmembers(): + if member.type is not tarfile.DIRTYPE: + yield member.name + tar.close() + +def get_stratum_files(f, lac): # pragma: no cover + for ca in (ArtifactCacheReference(a) for a in json.load(f)): + cf = lac.get(ca) + for filename in get_chunk_files(cf): + yield filename + cf.close() + +def get_overlaps(artifact, constituents, lac): #pragma: no cover # check whether strata overlap installed = defaultdict(set) for dep in constituents: handle = lac.get(dep) - tar = tarfile.open(fileobj=handle) - for member in tar.getmembers(): - if member.type is not tarfile.DIRTYPE: - installed[member.name].add(dep) - tar.close() + if artifact.source.morphology['kind'] == 'stratum': + for filename in get_chunk_files(handle): + installed[filename].add(dep) + elif artifact.source.morphology['kind'] == 'system': + for filename in get_stratum_files(handle, lac): + installed[filename].add(dep) handle.close() overlaps = defaultdict(set) for filename, artifacts in installed.iteritems(): if len(artifacts) > 1: overlaps[frozenset(artifacts)].add(filename) - if len(overlaps) > 0: - logging.warning('Overlaps in artifact %s detected' % artifact.name) - for overlapping, files in sorted(overlaps.iteritems()): - logging.warning(' Artifacts %s overlap with files:' % - ', '.join(sorted(a.name for a in overlapping)) - ) - for filename in sorted(files): - logging.warning(' %s' % filename) - + return overlaps + +def log_overlaps(overlaps): #pragma: no cover + for overlapping, files in sorted(overlaps.iteritems()): + logging.warning(' Artifacts %s overlap with files:' % + ', '.join(sorted(a.name for a in overlapping)) + ) + for filename in sorted(files): + logging.warning(' %s' % filename) + +def write_overlap_metadata(artifact, overlaps, lac): #pragma: no cover + f = lac.put_artifact_metadata(artifact, 'overlaps') + # the big list comprehension is because json can't serialize + # artifacts, sets or dicts with non-string keys + json.dump([ + [ + [a.name for a in afs], list(files) + ] for afs, files in overlaps.iteritems() + ], f, indent=4) + f.close() class BuilderBase(object): @@ -339,40 +383,35 @@ class StratumBuilder(BuilderBase): def build_and_cache(self): # pragma: no cover with self.build_watch('overall-build'): - destdir = self.staging_area.destdir(self.artifact.source) - constituents = [dependency for dependency in self.artifact.dependencies if dependency.source.morphology['kind'] == 'chunk'] - with self.build_watch('unpack-chunks'): + # the only reason the StratumBuilder has to download chunks is to + # check for overlap now that strata are lists of chunks + with self.build_watch('check-chunks'): # download the chunk artifact if necessary - for chunk_artifact in constituents: - if not self.local_artifact_cache.has(chunk_artifact): - source = self.remote_artifact_cache.get(chunk_artifact) - target = self.local_artifact_cache.put(chunk_artifact) - shutil.copyfileobj(source, target) - target.close() - source.close() - + download_depends(constituents, + self.local_artifact_cache, + self.remote_artifact_cache) # check for chunk overlaps - check_overlap(self.artifact, constituents, - self.local_artifact_cache) - - # unpack it from the local artifact cache - for chunk_artifact in constituents: - logging.debug('unpacking chunk %s into stratum %s' % - (chunk_artifact.basename(), - self.artifact.basename())) - f = self.local_artifact_cache.get(chunk_artifact) - morphlib.bins.unpack_binary_from_file(f, destdir) - f.close() - - with self.build_watch('create-binary'): + overlaps = get_overlaps(self.artifact, constituents, + self.local_artifact_cache) + if len(overlaps) > 0: + logging.warning('Overlaps in stratum artifact %s detected' + % self.artifact.name) + log_overlaps(overlaps) + write_overlap_metadata(self.artifact, overlaps, + self.local_artifact_cache) + + with self.build_watch('create-chunk-list'): + lac = self.local_artifact_cache artifact_name = self.artifact.source.morphology['name'] - self.write_metadata(destdir, artifact_name) artifact = self.new_artifact(artifact_name) + meta = self.create_metadata(artifact_name) + with lac.put_artifact_metadata(artifact, 'meta') as f: + json.dump(meta, f, indent=4, sort_keys=True) with self.local_artifact_cache.put(artifact) as f: - morphlib.bins.create_stratum(destdir, f) + json.dump([c.basename() for c in constituents], f) self.save_build_times() @@ -481,24 +520,48 @@ class SystemBuilder(BuilderBase): # pragma: no cover def _unpack_strata(self, path): logging.debug('Unpacking strata to %s' % path) with self.build_watch('unpack-strata'): - # download the stratum artifact if necessary + # download the stratum artifacts if necessary + download_depends(self.artifact.dependencies, + self.local_artifact_cache, + self.remote_artifact_cache, + ('meta',)) + + # download the chunk artifacts if necessary for stratum_artifact in self.artifact.dependencies: - if not self.local_artifact_cache.has(stratum_artifact): - source = self.remote_artifact_cache.get(stratum_artifact) - target = self.local_artifact_cache.put(stratum_artifact) - shutil.copyfileobj(source, target) - target.close() - source.close() + f = self.local_artifact_cache.get(stratum_artifact) + chunks = [ArtifactCacheReference(a) for a in json.load(f)] + download_depends(chunks, + self.local_artifact_cache, + self.remote_artifact_cache) + f.close() # check whether the strata overlap - check_overlap(self.artifact, self.artifact.dependencies, - self.local_artifact_cache) + overlaps = get_overlaps(self.artifact, self.artifact.dependencies, + self.local_artifact_cache) + if len(overlaps) > 0: + logging.warning('Overlaps in system artifact %s detected' % + self.artifact.name) + log_overlaps(overlaps) + write_overlap_metadata(self.artifact, overlaps, + self.local_artifact_cache) # unpack it from the local artifact cache for stratum_artifact in self.artifact.dependencies: f = self.local_artifact_cache.get(stratum_artifact) - morphlib.bins.unpack_binary_from_file(f, path) + for chunk in (ArtifactCacheReference(a) for a in json.load(f)): + chunk_handle = self.local_artifact_cache.get(chunk) + morphlib.bins.unpack_binary_from_file(chunk_handle, path) + chunk_handle.close() f.close() + meta = self.local_artifact_cache.get_artifact_metadata( + stratum_artifact, 'meta') + dst = morphlib.savefile.SaveFile( + os.path.join(path, 'baserock', + '%s.meta' % stratum_artifact.name), 'w') + shutil.copyfileobj(meta, dst) + dst.close() + meta.close() + ldconfig(self.app.runcmd, path) def _create_fstab(self, path): diff --git a/morphlib/builder2_tests.py b/morphlib/builder2_tests.py index 9730b59e..6214891d 100644 --- a/morphlib/builder2_tests.py +++ b/morphlib/builder2_tests.py @@ -80,9 +80,15 @@ class FakeFileHandle(object): def __enter__(self): return self - def __exit__(self, type, value, traceback): + def _writeback(self): self._cache._cached[self._key] = self._string + def __exit__(self, type, value, traceback): + self._writeback() + + def close(self): + self._writeback() + def write(self, string): self._string += string @@ -209,6 +215,34 @@ class BuilderBaseTests(unittest.TestCase): self.assertEqual(sorted(events), sorted(meta['build-times'].keys())) + def test_downloads_depends(self): + lac = FakeArtifactCache() + rac = FakeArtifactCache() + afacts = [FakeArtifact(name) for name in ('a', 'b', 'c')] + for a in afacts: + fh = rac.put(a) + fh.write(a.name) + fh.close() + morphlib.builder2.download_depends(afacts, lac, rac) + self.assertTrue(all(lac.has(a) for a in afacts)) + + def test_downloads_depends_metadata(self): + lac = FakeArtifactCache() + rac = FakeArtifactCache() + afacts = [FakeArtifact(name) for name in ('a', 'b', 'c')] + for a in afacts: + fh = rac.put(a) + fh.write(a.name) + fh.close() + fh = rac.put_artifact_metadata(a, 'meta') + fh.write('metadata') + fh.close() + morphlib.builder2.download_depends(afacts, lac, rac, ('meta',)) + self.assertTrue(all(lac.has(a) for a in afacts)) + self.assertTrue(all(lac.has_artifact_metadata(a, 'meta') + for a in afacts)) + + class ChunkBuilderTests(unittest.TestCase): def setUp(self): diff --git a/morphlib/cachekeycomputer.py b/morphlib/cachekeycomputer.py index e8cbed2c..720ec4c2 100644 --- a/morphlib/cachekeycomputer.py +++ b/morphlib/cachekeycomputer.py @@ -94,6 +94,8 @@ class CacheKeyComputer(object): checksum = hashlib.sha1() self._hash_thing(checksum, le_dict) keys['morphology-sha1'] = checksum.hexdigest() + if kind == 'stratum': + keys['stratum-format-version'] = 1 return keys |