From e59ade699e91764ae02d63b853ae7f3bc6c1c3d4 Mon Sep 17 00:00:00 2001 From: Richard Maw Date: Tue, 13 Aug 2013 11:08:17 +0100 Subject: fsutils: add invert_paths function This will list all the paths generated by the walker generator function that aren't in the specified set. It removes directories from those returned by the walker, since with os.walk(topdown=True) this culls the search space. In the set of provided paths and the set of returned paths, if a directory is given, then its contents are virtually part of the set. This oddly specific behaviour is because invert_paths is to be used with linux-user-chroot to mark subtrees as read-only, when it only has a set of paths it wants to keep writable. It takes a walker, rather than being given a path and using os.walk, so that it is a pure function, so is easier to unit test. --- morphlib/fsutils.py | 82 ++++++++++++++++++++++++++++++++++++++++++----- morphlib/fsutils_tests.py | 74 ++++++++++++++++++++++++++++++++++++++++++ without-test-modules | 1 - 3 files changed, 148 insertions(+), 9 deletions(-) create mode 100644 morphlib/fsutils_tests.py diff --git a/morphlib/fsutils.py b/morphlib/fsutils.py index 9786e387..84884be8 100644 --- a/morphlib/fsutils.py +++ b/morphlib/fsutils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012 Codethink Limited +# Copyright (C) 2012-2013 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,18 +17,18 @@ import os import re -def create_image(runcmd, image_name, size): +def create_image(runcmd, image_name, size): # pragma: no cover # FIXME a pure python implementation may be better runcmd(['dd', 'if=/dev/zero', 'of=' + image_name, 'bs=1', 'seek=%d' % size, 'count=0']) -def partition_image(runcmd, image_name): +def partition_image(runcmd, image_name): # pragma: no cover # FIXME make this more flexible with partitioning options runcmd(['sfdisk', image_name], feed_stdin='1,,83,*\n') -def setup_device_mapping(runcmd, image_name): +def setup_device_mapping(runcmd, image_name): # pragma: no cover findstart = re.compile(r"start=\s+(\d+),") out = runcmd(['sfdisk', '-d', image_name]) for line in out.splitlines(): @@ -43,23 +43,89 @@ def setup_device_mapping(runcmd, image_name): return device.strip() -def create_fs(runcmd, partition): +def create_fs(runcmd, partition): # pragma: no cover runcmd(['mkfs.btrfs', '-L', 'baserock', partition]) -def mount(runcmd, partition, mount_point): +def mount(runcmd, partition, mount_point): # pragma: no cover if not os.path.exists(mount_point): os.mkdir(mount_point) runcmd(['mount', partition, mount_point]) -def unmount(runcmd, mount_point): +def unmount(runcmd, mount_point): # pragma: no cover runcmd(['umount', mount_point]) -def undo_device_mapping(runcmd, image_name): +def undo_device_mapping(runcmd, image_name): # pragma: no cover out = runcmd(['losetup', '-j', image_name]) for line in out.splitlines(): i = line.find(':') device = line[:i] runcmd(['losetup', '-d', device]) + + +def invert_paths(tree_walker, paths): + '''List paths from `tree_walker` that are not in `paths`. + + Given a traversal of a tree and a set of paths separated by os.sep, + return the files and directories that are not part of the set of + paths, culling directories that do not need to be recursed into, + if the traversal supports this. + + `tree_walker` is expected to follow similar behaviour to `os.walk()`. + + This function will remove directores from the ones listed, to avoid + traversing into these subdirectories, if it doesn't need to. + + As such, if a directory is returned, it is implied that its contents + are also not in the set of paths. + + If the tree walker does not support culling the traversal this way, + such as `os.walk(root, topdown=False)`, then the contents will also + be returned. + + The purpose for this is to list the directories that can be made + read-only, such that it would leave everything in paths writable. + + Each path in `paths` is expected to begin with the same path as + yielded by the tree walker. + + ''' + + def is_subpath(prefix, path): + prefix_components = prefix.split(os.sep) + path_components = path.split(os.sep) + return path_components[:len(prefix_components)] == prefix_components + + for dirpath, dirnames, filenames in tree_walker: + + dn_copy = list(dirnames) + for subdir in dn_copy: + subdirpath = os.path.join(dirpath, subdir) + for p in paths: + # Subdir is an exact match for a given path + # Don't recurse into it, so remove from list + # Also don't yield it as we're inverting + if subdirpath == p: + dirnames.remove(subdir) + break + # This directory is a parent directory of one + # of our paths, recurse into it, but don't yield it + elif is_subpath(subdirpath, p): + break + else: + dirnames.remove(subdir) + yield subdirpath + + for filename in filenames: + fullpath = os.path.join(dirpath, filename) + for p in paths: + # The file path is a child of one of the paths + # or is equal. Don't yield because either it is + # one of the specified paths, or is a file in a + # directory specified by a path + if is_subpath(p, fullpath): + break + else: + yield fullpath diff --git a/morphlib/fsutils_tests.py b/morphlib/fsutils_tests.py new file mode 100644 index 00000000..f49e6f89 --- /dev/null +++ b/morphlib/fsutils_tests.py @@ -0,0 +1,74 @@ +# Copyright (C) 2013 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +import os +import unittest + +import morphlib + + +def dummy_top_down_walker(root, treedict): + '''Something that imitates os.walk, but with a dict''' + + subdirs = [k for k in treedict if isinstance(treedict[k], dict)] + files = [k for k in treedict if not isinstance(treedict[k], dict)] + yield root, subdirs, files + for subdir in subdirs: + subwalker = dummy_top_down_walker(os.path.join(root, subdir), + treedict[subdir]) + for result in subwalker: + yield result + + +class InvertPathsTests(unittest.TestCase): + + def setUp(self): + self.flat_tree = {"foo": None, "bar": None, "baz": None} + self.nested_tree = { + "foo": { + "bar": None, + "baz": None, + }, + "fs": { + "btrfs": None, + "ext2": None, + "ext3": None, + "ext4": None, + "nfs": None, + }, + } + + def test_flat_lists_single_files(self): + walker = dummy_top_down_walker('.', self.flat_tree) + self.assertEqual(sorted(["./foo", "./bar", "./baz"]), + sorted(morphlib.fsutils.invert_paths(walker, []))) + + def test_flat_excludes_listed_files(self): + walker = dummy_top_down_walker('.', self.flat_tree) + self.assertTrue( + "./bar" not in morphlib.fsutils.invert_paths(walker, ["./bar"])) + + def test_nested_excludes_listed_files(self): + walker = dummy_top_down_walker('.', self.nested_tree) + excludes = ["./foo/bar", "./fs/nfs"] + found = frozenset(morphlib.fsutils.invert_paths(walker, excludes)) + self.assertTrue(all(path not in found for path in excludes)) + + def test_nested_excludes_whole_dir(self): + walker = dummy_top_down_walker('.', self.nested_tree) + found = frozenset(morphlib.fsutils.invert_paths(walker, ["./foo"])) + unexpected = ("./foo", "./foo/bar", "./foo/baz") + self.assertTrue(all(path not in found for path in unexpected)) diff --git a/without-test-modules b/without-test-modules index 9e2f39aa..baac8bd5 100644 --- a/without-test-modules +++ b/without-test-modules @@ -3,7 +3,6 @@ morphlib/artifactcachereference.py morphlib/builddependencygraph.py morphlib/tester.py morphlib/git.py -morphlib/fsutils.py morphlib/app.py morphlib/mountableimage.py morphlib/extractedtarball.py -- cgit v1.2.1 From b43a0b8ed9a26ff8a39189eb1b4534a1418326a9 Mon Sep 17 00:00:00 2001 From: Richard Maw Date: Tue, 13 Aug 2013 16:30:06 +0000 Subject: stagingarea: use invert_paths to bind-mount ro This improves upon the logic by allowing subdirectories to be marked as writable. This is not needed in its state here, but it will be built upon. It also does not attempt to make symlinks read-only, since the symlink resolution is done before chrooting, so there will be dangling links, which cause linux-user-chroot to fail during the bootstrap. This also uses the --chdir option of linux-user-chroot instead of running a shell script to cd and run the command. --- morphlib/stagingarea.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/morphlib/stagingarea.py b/morphlib/stagingarea.py index 3d756df7..63d145c3 100644 --- a/morphlib/stagingarea.py +++ b/morphlib/stagingarea.py @@ -287,18 +287,18 @@ class StagingArea(object): else: cwd = '/' - do_not_mount_dirs = [self.builddirname, self.destdirname, - 'dev', 'proc', 'tmp'] - - real_argv = ['linux-user-chroot'] - for d in os.listdir(self.dirname): - if d not in do_not_mount_dirs: - if os.path.isdir(os.path.join(self.dirname, d)): - real_argv += ['--mount-readonly', '/'+d] + do_not_mount_dirs = [os.path.join(self.dirname, d) + for d in (self.builddirname, + self.destdirname, + 'dev', 'proc', 'tmp')] + + real_argv = ['linux-user-chroot', '--chdir', cwd] + for d in morphlib.fsutils.invert_paths(os.walk(self.dirname), + do_not_mount_dirs): + if not os.path.islink(d): + real_argv += ['--mount-readonly', self.relative(d)] real_argv += [self.dirname] - real_argv += ['sh', '-c', 'cd "$1" && shift && exec "$@"', '--', - cwd] real_argv += argv try: -- cgit v1.2.1 From 0569173401faa85d044c77ce9f8b1a247456c878 Mon Sep 17 00:00:00 2001 From: Richard Maw Date: Wed, 14 Aug 2013 09:08:20 +0000 Subject: stagingarea: use linux-user-chroot in bootstraps This changes the semantics of the use_chroot flag to instead mean chrooting to / --- morphlib/stagingarea.py | 58 +++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/morphlib/stagingarea.py b/morphlib/stagingarea.py index 63d145c3..ba6fd53a 100644 --- a/morphlib/stagingarea.py +++ b/morphlib/stagingarea.py @@ -252,8 +252,8 @@ class StagingArea(object): builddir = self.builddir(source) destdir = self.destdir(source) - self.builddirname = self.relative(builddir).lstrip('/') - self.destdirname = self.relative(destdir).lstrip('/') + self.builddirname = builddir + self.destdirname = destdir self.do_mounts(setup_mounts) @@ -279,36 +279,38 @@ class StagingArea(object): kwargs['env'].update(kwargs['extra_env']) del kwargs['extra_env'] - if self.use_chroot: - cwd = kwargs.get('cwd') or '/' - if 'cwd' in kwargs: - cwd = kwargs['cwd'] - del kwargs['cwd'] - else: - cwd = '/' + cwd = kwargs.get('cwd') or '/' + if 'cwd' in kwargs: + cwd = kwargs['cwd'] + del kwargs['cwd'] + else: + cwd = '/' - do_not_mount_dirs = [os.path.join(self.dirname, d) - for d in (self.builddirname, - self.destdirname, - 'dev', 'proc', 'tmp')] + chroot_dir = self.dirname if self.use_chroot else '/' - real_argv = ['linux-user-chroot', '--chdir', cwd] - for d in morphlib.fsutils.invert_paths(os.walk(self.dirname), - do_not_mount_dirs): - if not os.path.islink(d): - real_argv += ['--mount-readonly', self.relative(d)] - real_argv += [self.dirname] + do_not_mount_dirs = [os.path.join(self.dirname, d) + for d in (self.builddirname, + self.destdirname, + 'dev', 'proc', 'tmp')] - real_argv += argv + logging.debug("Not mounting dirs %r" % do_not_mount_dirs) - try: - return self._app.runcmd(real_argv, **kwargs) - except cliapp.AppException as e: - raise cliapp.AppException('In staging area %s: running ' - 'command \'%s\' failed.' % - (self.dirname, ' '.join(argv))) - else: - return self._app.runcmd(argv, **kwargs) + real_argv = ['linux-user-chroot', '--chdir', cwd] + for d in morphlib.fsutils.invert_paths(os.walk(chroot_dir), + do_not_mount_dirs): + if not os.path.islink(d): + real_argv += ['--mount-readonly', self.relative(d)] + + real_argv += [chroot_dir] + + real_argv += argv + + try: + return self._app.runcmd(real_argv, **kwargs) + except cliapp.AppException as e: + raise cliapp.AppException('In staging area %s: running ' + 'command \'%s\' failed.' % + (self.dirname, ' '.join(argv))) def abort(self): # pragma: no cover '''Handle what to do with a staging area in the case of failure. -- cgit v1.2.1 From ec1665b75f646eec5db6078a190dab36096fe213 Mon Sep 17 00:00:00 2001 From: Richard Maw Date: Fri, 16 Aug 2013 13:09:04 +0100 Subject: Make the tempdir writable in bootstrap mode. Somehow this was working on x86 even though it had no ability to write to tempdirs, but on ARM it wasn't working. --- morphlib/stagingarea.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/morphlib/stagingarea.py b/morphlib/stagingarea.py index ba6fd53a..3c6d2d88 100644 --- a/morphlib/stagingarea.py +++ b/morphlib/stagingarea.py @@ -287,11 +287,15 @@ class StagingArea(object): cwd = '/' chroot_dir = self.dirname if self.use_chroot else '/' + temp_dir = kwargs["env"].get("TMPDIR", "/tmp") + staging_dirs = [self.builddirname, self.destdirname] + if self.use_chroot: + staging_dirs += ["dev", "proc", temp_dir.lstrip('/')] do_not_mount_dirs = [os.path.join(self.dirname, d) - for d in (self.builddirname, - self.destdirname, - 'dev', 'proc', 'tmp')] + for d in staging_dirs] + if not self.use_chroot: + do_not_mount_dirs += [temp_dir] logging.debug("Not mounting dirs %r" % do_not_mount_dirs) -- cgit v1.2.1