summaryrefslogtreecommitdiff
path: root/morphlib/bins.py
diff options
context:
space:
mode:
Diffstat (limited to 'morphlib/bins.py')
-rw-r--r--morphlib/bins.py70
1 files changed, 8 insertions, 62 deletions
diff --git a/morphlib/bins.py b/morphlib/bins.py
index 6fb7dc5a..23e3b812 100644
--- a/morphlib/bins.py
+++ b/morphlib/bins.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2011-2013 Codethink Limited
+# Copyright (C) 2011-2014 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -50,60 +50,7 @@ def safe_makefile(self, tarinfo, targetpath):
tarfile.TarFile.makefile = safe_makefile
-def _chunk_filenames(rootdir, regexps, dump_memory_profile=None):
-
- '''Return the filenames for a chunk from the contents of a directory.
-
- Only files and directories that match at least one of the regular
- expressions are accepted. The regular expressions are implicitly
- anchored to the beginning of the string, but not the end. The
- filenames are relative to rootdir.
-
- '''
-
- dump_memory_profile = dump_memory_profile or (lambda msg: None)
-
- def matches(filename):
- return any(x.match(filename) for x in compiled)
-
- def names_to_root(filename):
- yield filename
- while filename != rootdir:
- filename = os.path.dirname(filename)
- yield filename
-
- compiled = [re.compile(x) for x in regexps]
- include = set()
- for dirname, subdirs, basenames in os.walk(rootdir):
- subdirpaths = [os.path.join(dirname, x) for x in subdirs]
- subdirsymlinks = [x for x in subdirpaths if os.path.islink(x)]
- filenames = [os.path.join(dirname, x) for x in basenames]
- for filename in [dirname] + subdirsymlinks + filenames:
- if matches(os.path.relpath(filename, rootdir)):
- for name in names_to_root(filename):
- if name not in include:
- include.add(name)
- else:
- logging.debug('regexp MISMATCH: %s' % filename)
- dump_memory_profile('after walking')
-
- return sorted(include) # get dirs before contents
-
-
-def chunk_contents(rootdir, regexps):
- ''' Return the list of files in a chunk, with the rootdir
- stripped off.
-
- '''
-
- filenames = _chunk_filenames(rootdir, regexps)
- # The first entry is the rootdir directory, which we don't need
- filenames.pop(0)
- contents = [str[len(rootdir):] for str in filenames]
- return contents
-
-
-def create_chunk(rootdir, f, regexps, dump_memory_profile=None):
+def create_chunk(rootdir, f, include, dump_memory_profile=None):
'''Create a chunk from the contents of a directory.
``f`` is an open file handle, to which the tar file is written.
@@ -118,14 +65,15 @@ def create_chunk(rootdir, f, regexps, dump_memory_profile=None):
# does not complain about an implausibly old timestamp.
normalized_timestamp = 683074800
- include = _chunk_filenames(rootdir, regexps, dump_memory_profile)
dump_memory_profile('at beginning of create_chunk')
+ path_pairs = [(relname, os.path.join(rootdir, relname))
+ for relname in include]
tar = tarfile.open(fileobj=f, mode='w')
- for filename in include:
+ for relname, filename in path_pairs:
# Normalize mtime for everything.
tarinfo = tar.gettarinfo(filename,
- arcname=os.path.relpath(filename, rootdir))
+ arcname=relname)
tarinfo.ctime = normalized_timestamp
tarinfo.mtime = normalized_timestamp
if tarinfo.isreg():
@@ -135,11 +83,9 @@ def create_chunk(rootdir, f, regexps, dump_memory_profile=None):
tar.addfile(tarinfo)
tar.close()
- include.remove(rootdir)
- for filename in reversed(include):
+ for relname, filename in reversed(path_pairs):
if os.path.isdir(filename) and not os.path.islink(filename):
- if not os.listdir(filename):
- os.rmdir(filename)
+ continue
else:
os.remove(filename)
dump_memory_profile('after removing in create_chunks')