summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--morphlib/bins.py70
-rw-r--r--morphlib/builder2.py64
2 files changed, 49 insertions, 85 deletions
diff --git a/morphlib/bins.py b/morphlib/bins.py
index 6fb7dc5a..23e3b812 100644
--- a/morphlib/bins.py
+++ b/morphlib/bins.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2011-2013 Codethink Limited
+# Copyright (C) 2011-2014 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -50,60 +50,7 @@ def safe_makefile(self, tarinfo, targetpath):
tarfile.TarFile.makefile = safe_makefile
-def _chunk_filenames(rootdir, regexps, dump_memory_profile=None):
-
- '''Return the filenames for a chunk from the contents of a directory.
-
- Only files and directories that match at least one of the regular
- expressions are accepted. The regular expressions are implicitly
- anchored to the beginning of the string, but not the end. The
- filenames are relative to rootdir.
-
- '''
-
- dump_memory_profile = dump_memory_profile or (lambda msg: None)
-
- def matches(filename):
- return any(x.match(filename) for x in compiled)
-
- def names_to_root(filename):
- yield filename
- while filename != rootdir:
- filename = os.path.dirname(filename)
- yield filename
-
- compiled = [re.compile(x) for x in regexps]
- include = set()
- for dirname, subdirs, basenames in os.walk(rootdir):
- subdirpaths = [os.path.join(dirname, x) for x in subdirs]
- subdirsymlinks = [x for x in subdirpaths if os.path.islink(x)]
- filenames = [os.path.join(dirname, x) for x in basenames]
- for filename in [dirname] + subdirsymlinks + filenames:
- if matches(os.path.relpath(filename, rootdir)):
- for name in names_to_root(filename):
- if name not in include:
- include.add(name)
- else:
- logging.debug('regexp MISMATCH: %s' % filename)
- dump_memory_profile('after walking')
-
- return sorted(include) # get dirs before contents
-
-
-def chunk_contents(rootdir, regexps):
- ''' Return the list of files in a chunk, with the rootdir
- stripped off.
-
- '''
-
- filenames = _chunk_filenames(rootdir, regexps)
- # The first entry is the rootdir directory, which we don't need
- filenames.pop(0)
- contents = [str[len(rootdir):] for str in filenames]
- return contents
-
-
-def create_chunk(rootdir, f, regexps, dump_memory_profile=None):
+def create_chunk(rootdir, f, include, dump_memory_profile=None):
'''Create a chunk from the contents of a directory.
``f`` is an open file handle, to which the tar file is written.
@@ -118,14 +65,15 @@ def create_chunk(rootdir, f, regexps, dump_memory_profile=None):
# does not complain about an implausibly old timestamp.
normalized_timestamp = 683074800
- include = _chunk_filenames(rootdir, regexps, dump_memory_profile)
dump_memory_profile('at beginning of create_chunk')
+ path_pairs = [(relname, os.path.join(rootdir, relname))
+ for relname in include]
tar = tarfile.open(fileobj=f, mode='w')
- for filename in include:
+ for relname, filename in path_pairs:
# Normalize mtime for everything.
tarinfo = tar.gettarinfo(filename,
- arcname=os.path.relpath(filename, rootdir))
+ arcname=relname)
tarinfo.ctime = normalized_timestamp
tarinfo.mtime = normalized_timestamp
if tarinfo.isreg():
@@ -135,11 +83,9 @@ def create_chunk(rootdir, f, regexps, dump_memory_profile=None):
tar.addfile(tarinfo)
tar.close()
- include.remove(rootdir)
- for filename in reversed(include):
+ for relname, filename in reversed(path_pairs):
if os.path.isdir(filename) and not os.path.islink(filename):
- if not os.listdir(filename):
- os.rmdir(filename)
+ continue
else:
os.remove(filename)
dump_memory_profile('after removing in create_chunks')
diff --git a/morphlib/builder2.py b/morphlib/builder2.py
index 34ebaa81..594786e6 100644
--- a/morphlib/builder2.py
+++ b/morphlib/builder2.py
@@ -422,33 +422,51 @@ class ChunkBuilder(BuilderBase):
def assemble_chunk_artifacts(self, destdir): # pragma: no cover
built_artifacts = []
filenames = []
- with self.build_watch('create-chunks'):
- specs = self.artifact.source.morphology['products']
- if len(specs) == 0:
- specs = {
- self.artifact.source.morphology['name']: ['.'],
- }
- names = specs.keys()
- names.sort(key=lambda name: [ord(c) for c in name])
- for artifact_name in names:
- artifact = self.new_artifact(artifact_name)
- patterns = specs[artifact_name]
- patterns += [r'baserock/%s\.' % artifact_name]
+ source = self.artifact.source
+ split_rules = source.split_rules
+
+ def filepaths(destdir):
+ for dirname, subdirs, basenames in os.walk(destdir):
+ subdirsymlinks = [os.path.join(dirname, x) for x in subdirs
+ if os.path.islink(x)]
+ filenames = [os.path.join(dirname, x) for x in basenames]
+ for relpath in (os.path.relpath(x, destdir) for x in
+ [dirname] + subdirsymlinks + filenames):
+ yield relpath
+
+ with self.build_watch('determine-splits'):
+ matches, overlaps, unmatched = \
+ split_rules.partition(filepaths(destdir))
- with self.local_artifact_cache.put(artifact) as f:
- contents = morphlib.bins.chunk_contents(destdir, patterns)
- self.write_metadata(destdir, artifact_name, contents)
+ with self.build_watch('create-chunks'):
+ for chunk_artifact_name, chunk_artifact \
+ in source.artifacts.iteritems():
+ file_paths = matches[chunk_artifact_name]
+ chunk_artifact = source.artifacts[chunk_artifact_name]
+
+ def all_parents(path):
+ while path != '':
+ yield path
+ path = os.path.dirname(path)
+ def parentify(filenames):
+ names = set()
+ for name in filenames:
+ names.update(all_parents(name))
+ return sorted(names)
+ parented_paths = \
+ parentify(file_paths +
+ ['baserock/%s.meta' % chunk_artifact_name])
+
+ with self.local_artifact_cache.put(chunk_artifact) as f:
+ self.write_metadata(destdir, chunk_artifact_name,
+ parented_paths)
- self.app.status(msg='assembling chunk %s' % artifact_name,
- chatty=True)
- self.app.status(msg='assembling into %s' % f.name,
- chatty=True)
self.app.status(msg='Creating chunk artifact %(name)s',
- name=artifact.name)
- morphlib.bins.create_chunk(destdir, f, patterns)
- built_artifacts.append(artifact)
+ name=chunk_artifact_name)
+ morphlib.bins.create_chunk(destdir, f, parented_paths)
+ built_artifacts.append(chunk_artifact)
- files = os.listdir(destdir)
+ for dirname, subdirs, files in os.walk(destdir):
if files:
raise Exception('DESTDIR %s is not empty: %s' %
(destdir, files))