summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Maw <richard.maw@codethink.co.uk>2013-12-18 17:21:41 +0000
committerRichard Maw <richard.maw@codethink.co.uk>2014-01-16 16:58:24 +0000
commitc5c2583dcf6f3dffad06b1e635ed3e9c5fd6b6b2 (patch)
treedf427e481e5afd279c7da2fdafac404f7d175f1d
parent137645ee210637ce7fee2f1a5a6ad17f4e6674c0 (diff)
downloadmorph-c5c2583dcf6f3dffad06b1e635ed3e9c5fd6b6b2.tar.gz
Split chunk morphologies according to new rules
Filenames are now matched before chunks are constructed, so bins.create_chunk now takes a list of relative file names. bins.chunk_contents is gone, since this is now handled by passing source.split_rules.partition the file names. We now don't consider it to be a problem for directories to remain in the DESTDIR after artifacts have been removed, since we need to handle file matches implying their parent directories, and explicit matches against directories. NOTE: The bins_tests were broken in this patch, and are fixed in the next. This was done to try and aid readability of the patch series. Full functionality is still broken until stratum splitting is fixed.
-rw-r--r--morphlib/bins.py70
-rw-r--r--morphlib/builder2.py64
2 files changed, 49 insertions, 85 deletions
diff --git a/morphlib/bins.py b/morphlib/bins.py
index 6fb7dc5a..23e3b812 100644
--- a/morphlib/bins.py
+++ b/morphlib/bins.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2011-2013 Codethink Limited
+# Copyright (C) 2011-2014 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -50,60 +50,7 @@ def safe_makefile(self, tarinfo, targetpath):
tarfile.TarFile.makefile = safe_makefile
-def _chunk_filenames(rootdir, regexps, dump_memory_profile=None):
-
- '''Return the filenames for a chunk from the contents of a directory.
-
- Only files and directories that match at least one of the regular
- expressions are accepted. The regular expressions are implicitly
- anchored to the beginning of the string, but not the end. The
- filenames are relative to rootdir.
-
- '''
-
- dump_memory_profile = dump_memory_profile or (lambda msg: None)
-
- def matches(filename):
- return any(x.match(filename) for x in compiled)
-
- def names_to_root(filename):
- yield filename
- while filename != rootdir:
- filename = os.path.dirname(filename)
- yield filename
-
- compiled = [re.compile(x) for x in regexps]
- include = set()
- for dirname, subdirs, basenames in os.walk(rootdir):
- subdirpaths = [os.path.join(dirname, x) for x in subdirs]
- subdirsymlinks = [x for x in subdirpaths if os.path.islink(x)]
- filenames = [os.path.join(dirname, x) for x in basenames]
- for filename in [dirname] + subdirsymlinks + filenames:
- if matches(os.path.relpath(filename, rootdir)):
- for name in names_to_root(filename):
- if name not in include:
- include.add(name)
- else:
- logging.debug('regexp MISMATCH: %s' % filename)
- dump_memory_profile('after walking')
-
- return sorted(include) # get dirs before contents
-
-
-def chunk_contents(rootdir, regexps):
- ''' Return the list of files in a chunk, with the rootdir
- stripped off.
-
- '''
-
- filenames = _chunk_filenames(rootdir, regexps)
- # The first entry is the rootdir directory, which we don't need
- filenames.pop(0)
- contents = [str[len(rootdir):] for str in filenames]
- return contents
-
-
-def create_chunk(rootdir, f, regexps, dump_memory_profile=None):
+def create_chunk(rootdir, f, include, dump_memory_profile=None):
'''Create a chunk from the contents of a directory.
``f`` is an open file handle, to which the tar file is written.
@@ -118,14 +65,15 @@ def create_chunk(rootdir, f, regexps, dump_memory_profile=None):
# does not complain about an implausibly old timestamp.
normalized_timestamp = 683074800
- include = _chunk_filenames(rootdir, regexps, dump_memory_profile)
dump_memory_profile('at beginning of create_chunk')
+ path_pairs = [(relname, os.path.join(rootdir, relname))
+ for relname in include]
tar = tarfile.open(fileobj=f, mode='w')
- for filename in include:
+ for relname, filename in path_pairs:
# Normalize mtime for everything.
tarinfo = tar.gettarinfo(filename,
- arcname=os.path.relpath(filename, rootdir))
+ arcname=relname)
tarinfo.ctime = normalized_timestamp
tarinfo.mtime = normalized_timestamp
if tarinfo.isreg():
@@ -135,11 +83,9 @@ def create_chunk(rootdir, f, regexps, dump_memory_profile=None):
tar.addfile(tarinfo)
tar.close()
- include.remove(rootdir)
- for filename in reversed(include):
+ for relname, filename in reversed(path_pairs):
if os.path.isdir(filename) and not os.path.islink(filename):
- if not os.listdir(filename):
- os.rmdir(filename)
+ continue
else:
os.remove(filename)
dump_memory_profile('after removing in create_chunks')
diff --git a/morphlib/builder2.py b/morphlib/builder2.py
index 34ebaa81..594786e6 100644
--- a/morphlib/builder2.py
+++ b/morphlib/builder2.py
@@ -422,33 +422,51 @@ class ChunkBuilder(BuilderBase):
def assemble_chunk_artifacts(self, destdir): # pragma: no cover
built_artifacts = []
filenames = []
- with self.build_watch('create-chunks'):
- specs = self.artifact.source.morphology['products']
- if len(specs) == 0:
- specs = {
- self.artifact.source.morphology['name']: ['.'],
- }
- names = specs.keys()
- names.sort(key=lambda name: [ord(c) for c in name])
- for artifact_name in names:
- artifact = self.new_artifact(artifact_name)
- patterns = specs[artifact_name]
- patterns += [r'baserock/%s\.' % artifact_name]
+ source = self.artifact.source
+ split_rules = source.split_rules
+
+ def filepaths(destdir):
+ for dirname, subdirs, basenames in os.walk(destdir):
+ subdirsymlinks = [os.path.join(dirname, x) for x in subdirs
+ if os.path.islink(x)]
+ filenames = [os.path.join(dirname, x) for x in basenames]
+ for relpath in (os.path.relpath(x, destdir) for x in
+ [dirname] + subdirsymlinks + filenames):
+ yield relpath
+
+ with self.build_watch('determine-splits'):
+ matches, overlaps, unmatched = \
+ split_rules.partition(filepaths(destdir))
- with self.local_artifact_cache.put(artifact) as f:
- contents = morphlib.bins.chunk_contents(destdir, patterns)
- self.write_metadata(destdir, artifact_name, contents)
+ with self.build_watch('create-chunks'):
+ for chunk_artifact_name, chunk_artifact \
+ in source.artifacts.iteritems():
+ file_paths = matches[chunk_artifact_name]
+ chunk_artifact = source.artifacts[chunk_artifact_name]
+
+ def all_parents(path):
+ while path != '':
+ yield path
+ path = os.path.dirname(path)
+ def parentify(filenames):
+ names = set()
+ for name in filenames:
+ names.update(all_parents(name))
+ return sorted(names)
+ parented_paths = \
+ parentify(file_paths +
+ ['baserock/%s.meta' % chunk_artifact_name])
+
+ with self.local_artifact_cache.put(chunk_artifact) as f:
+ self.write_metadata(destdir, chunk_artifact_name,
+ parented_paths)
- self.app.status(msg='assembling chunk %s' % artifact_name,
- chatty=True)
- self.app.status(msg='assembling into %s' % f.name,
- chatty=True)
self.app.status(msg='Creating chunk artifact %(name)s',
- name=artifact.name)
- morphlib.bins.create_chunk(destdir, f, patterns)
- built_artifacts.append(artifact)
+ name=chunk_artifact_name)
+ morphlib.bins.create_chunk(destdir, f, parented_paths)
+ built_artifacts.append(chunk_artifact)
- files = os.listdir(destdir)
+ for dirname, subdirs, files in os.walk(destdir):
if files:
raise Exception('DESTDIR %s is not empty: %s' %
(destdir, files))