diff options
-rw-r--r-- | morphlib/bins.py | 26 | ||||
-rw-r--r-- | morphlib/localartifactcache.py | 19 |
2 files changed, 24 insertions, 21 deletions
diff --git a/morphlib/bins.py b/morphlib/bins.py index 1ce5a0eb..956a9939 100644 --- a/morphlib/bins.py +++ b/morphlib/bins.py @@ -80,8 +80,7 @@ class HashedOutputStream(object): self.f.write(data, *args, **kwargs) self.hasher.update(data) - # FIXME: rename (hash is a builtin) - def hash(self): + def hexdigest(self): return self.hasher.hexdigest() @@ -127,7 +126,7 @@ def create_chunk(rootdir, f, include, dump_memory_profile=None): os.remove(filename) dump_memory_profile('after removing in create_chunks') - return stream.hash() + return stream.hexdigest() def make_tarinfo_path_relative_to(root, info): @@ -144,10 +143,10 @@ def make_tarinfo_path_relative_to(root, info): def create_chunk_2(rootdir, f, name, include): '''Create a chunk artifact, new way. - Output should be identical to create_chunk(), but it doesn't delete the - files after creating the chunk, and doesn't require the caller to work - out all the files that should go in. (But it does that because of chunk - splitting!!! *OH*.....) + Output is identical to create_chunk(), but it doesn't delete the files + after creating the chunk (which we don't want when verifying an unpacked + chunk artifact). + ''' # This timestamp is used to normalize the mtime for every file in @@ -160,11 +159,12 @@ def create_chunk_2(rootdir, f, name, include): with tarfile.open(fileobj=stream, mode='w|') as tar: for filepath in sorted(include): if filepath == rootdir: - # I'm not sure how the ChunkBuilder.assemble_chunk_artifact() - # code path manages to avoid adding '.' to the tarfile, but it - # does + # Avoid '.' in the tarfile. This makes us compatible with + # most existing chunk artifacts, but generally the -misc + # artifact actually *does* contain '.'. We should prevent + # that so that packed chunks can be fully reproduced from + # unpacked ones. continue - # Normalize mtime for everything. tarinfo = tar.gettarinfo(filepath) tarinfo = make_tarinfo_path_relative_to(rootdir, tarinfo) tarinfo.ctime = normalized_timestamp @@ -176,7 +176,7 @@ def create_chunk_2(rootdir, f, name, include): else: tar.addfile(tarinfo) - return stream.hash() + return stream.hexdigest() def create_system(rootdir, f, name): @@ -190,7 +190,7 @@ def create_system(rootdir, f, name): with tarfile.open(fileobj=stream, mode="w|", name=name) as tar: tar.add(rootdir, recursive=True, filter=path_filter) - return stream.hash + return stream.hexdigest() def unpack_binary_from_file(f, dirname): # pragma: no cover diff --git a/morphlib/localartifactcache.py b/morphlib/localartifactcache.py index 0631a512..91faadae 100644 --- a/morphlib/localartifactcache.py +++ b/morphlib/localartifactcache.py @@ -156,14 +156,17 @@ class LocalArtifactCache(object): return hasher.hexdigest() def _calculate_unpacked_chunk_checksum(self, chunk_dir): - # create a chunk artifact from the unpacked chunk and return the - # checksum. It should be identical, right ?? - # - # This code is not the same code used in builder2.ChunkBuilder. - # It's actually much better and as soon as I've checked that it - # produces identical results it should be used in builder2 too. - # I'm especially confused why bins.create_chunk() removes files, - # instead of leaving it up to the ChunkBuilder code. + '''Calculate checksum of an unpacked chunk artifact. + + The repacked chunk artifact should be identical to the original. This + is not true for -misc artifacts, currently, due to '.' being one of + the files. + + Currently a different code path is used compared to + builder2.ChunkBuilder. This one is much simpler and the old one + in ChunkBuilder should be replaced, I think. + + ''' def filepaths(destdir): for dirname, subdirs, basenames in os.walk(destdir): |