From 99e1be4580244193149d1f123f6d948b0e0604a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Fri, 1 Feb 2019 16:21:09 +0100 Subject: local.py: Do not follow symlinks in local directories isdir() follows symlinks on the host, resulting in potential host contamination. This change reorders the file checks to avoid this issue. --- buildstream/plugins/sources/local.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/buildstream/plugins/sources/local.py b/buildstream/plugins/sources/local.py index 55cdc14d3..ea2f6e013 100644 --- a/buildstream/plugins/sources/local.py +++ b/buildstream/plugins/sources/local.py @@ -133,11 +133,11 @@ def unique_key(filename): # Return some hard coded things for files which # have no content to calculate a key for - if os.path.isdir(filename): - return "0" - elif os.path.islink(filename): + if os.path.islink(filename): # For a symbolic link, use the link target as its unique identifier return os.readlink(filename) + elif os.path.isdir(filename): + return "0" return utils.sha256sum(filename) -- cgit v1.2.1 From f95e222e49e7dd6f05634e209a03856960332374 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Mon, 28 Jan 2019 21:38:54 +0000 Subject: sandbox/sandbox.py: Do not follow symlinks in _has_command() This is required to ensure symlinks are not resolved on the host. --- buildstream/sandbox/sandbox.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/buildstream/sandbox/sandbox.py b/buildstream/sandbox/sandbox.py index cb6f43314..2159c0fef 100644 --- a/buildstream/sandbox/sandbox.py +++ b/buildstream/sandbox/sandbox.py @@ -525,11 +525,11 @@ class Sandbox(): # (bool): Whether a command exists inside the sandbox. def _has_command(self, command, env=None): if os.path.isabs(command): - return os.path.exists(os.path.join( + return os.path.lexists(os.path.join( self._root, command.lstrip(os.sep))) for path in env.get('PATH').split(':'): - if os.path.exists(os.path.join( + if os.path.lexists(os.path.join( self._root, path.lstrip(os.sep), command)): return True -- cgit v1.2.1 From 89973fb3721a6b02ba1b1b7be46e34fbce137472 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Fri, 1 Feb 2019 09:57:59 +0000 Subject: utils.py: Remove list_dirs parameter from list_relative_paths() list_dirs was always True in the BuildStream code base. There was also a bug in the list_dirs=False code path as it did not return symlinks in `dirnames`. This is an API break, however, there are no known external callers. --- buildstream/plugins/sources/local.py | 2 +- buildstream/utils.py | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/buildstream/plugins/sources/local.py b/buildstream/plugins/sources/local.py index ea2f6e013..d4965cc9e 100644 --- a/buildstream/plugins/sources/local.py +++ b/buildstream/plugins/sources/local.py @@ -97,7 +97,7 @@ class LocalSource(Source): with self.timed_activity("Staging local files at {}".format(self.path)): if os.path.isdir(self.fullpath): - files = list(utils.list_relative_paths(self.fullpath, list_dirs=True)) + files = list(utils.list_relative_paths(self.fullpath)) utils.copy_files(self.fullpath, directory, files=files) else: destfile = os.path.join(directory, os.path.basename(self.path)) diff --git a/buildstream/utils.py b/buildstream/utils.py index c8d79c95a..224ac6ee8 100644 --- a/buildstream/utils.py +++ b/buildstream/utils.py @@ -111,7 +111,7 @@ class FileListResult(): return ret -def list_relative_paths(directory, *, list_dirs=True): +def list_relative_paths(directory): """A generator for walking directory relative paths This generator is useful for checking the full manifest of @@ -125,7 +125,6 @@ def list_relative_paths(directory, *, list_dirs=True): Args: directory (str): The directory to list files in - list_dirs (bool): Whether to list directories Yields: Relative filenames in `directory` @@ -152,16 +151,15 @@ def list_relative_paths(directory, *, list_dirs=True): # subdirectories in the walked `dirpath`, so we extract # these symlinks from `dirnames` # - if list_dirs: - for d in dirnames: - fullpath = os.path.join(dirpath, d) - if os.path.islink(fullpath): - yield os.path.join(basepath, d) + for d in dirnames: + fullpath = os.path.join(dirpath, d) + if os.path.islink(fullpath): + yield os.path.join(basepath, d) # We've decended into an empty directory, in this case we # want to include the directory itself, but not in any other # case. - if list_dirs and not filenames: + if not filenames: yield relpath # List the filenames in the walked directory -- cgit v1.2.1 From d1da3fb085507a9bc1a576d37008ed5b18319aec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Fri, 1 Feb 2019 10:45:30 +0000 Subject: utils.py: Fix sorting of symlinks to directories os.walk() resolves symlinks to check whether they point to a directory even when followlinks is set to False. We already work around that broken behavior by extracting symlinks from `dirnames`. However, the sort order was still incorrect as we returned symlinks in dirnames before files and other symlinks. This change fixes this, sorting all files and symlinks in a single list. --- buildstream/utils.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/buildstream/utils.py b/buildstream/utils.py index 224ac6ee8..76f95637e 100644 --- a/buildstream/utils.py +++ b/buildstream/utils.py @@ -131,17 +131,6 @@ def list_relative_paths(directory): """ for (dirpath, dirnames, filenames) in os.walk(directory): - # Modifying the dirnames directly ensures that the os.walk() generator - # allows us to specify the order in which they will be iterated. - dirnames.sort() - filenames.sort() - - relpath = os.path.relpath(dirpath, directory) - - # We don't want "./" pre-pended to all the entries in the root of - # `directory`, prefer to have no prefix in that case. - basepath = relpath if relpath != '.' and dirpath != directory else '' - # os.walk does not decend into symlink directories, which # makes sense because otherwise we might have redundant # directories, or end up descending into directories outside @@ -149,12 +138,23 @@ def list_relative_paths(directory): # # But symlinks to directories are still identified as # subdirectories in the walked `dirpath`, so we extract - # these symlinks from `dirnames` + # these symlinks from `dirnames` and add them to `filenames`. # for d in dirnames: fullpath = os.path.join(dirpath, d) if os.path.islink(fullpath): - yield os.path.join(basepath, d) + filenames.append(d) + + # Modifying the dirnames directly ensures that the os.walk() generator + # allows us to specify the order in which they will be iterated. + dirnames.sort() + filenames.sort() + + relpath = os.path.relpath(dirpath, directory) + + # We don't want "./" pre-pended to all the entries in the root of + # `directory`, prefer to have no prefix in that case. + basepath = relpath if relpath != '.' and dirpath != directory else '' # We've decended into an empty directory, in this case we # want to include the directory itself, but not in any other -- cgit v1.2.1 From 7cf67ed3f05dc0754d19a6313501ae0f85fd6eec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Fri, 1 Feb 2019 11:02:46 +0000 Subject: _casbaseddirectory.py: Do not mimic os.walk() in list_relative_paths() This matches the change in utils.list_relative_paths() that now sorts all symlinks as files, instead of following the broken behavior of os.walk(). --- buildstream/storage/_casbaseddirectory.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py index d88222273..09c8c9875 100644 --- a/buildstream/storage/_casbaseddirectory.py +++ b/buildstream/storage/_casbaseddirectory.py @@ -795,24 +795,11 @@ class CasBasedDirectory(Directory): Return value: List(str) - list of all paths """ - symlink_list = filter(lambda i: isinstance(i[1].pb_object, remote_execution_pb2.SymlinkNode), - self.index.items()) - file_list = list(filter(lambda i: isinstance(i[1].pb_object, remote_execution_pb2.FileNode), + file_list = list(filter(lambda i: not isinstance(i[1].buildstream_object, CasBasedDirectory), self.index.items())) directory_list = filter(lambda i: isinstance(i[1].buildstream_object, CasBasedDirectory), self.index.items()) - # We need to mimic the behaviour of os.walk, in which symlinks - # to directories count as directories and symlinks to file or - # broken symlinks count as files. os.walk doesn't follow - # symlinks, so we don't recurse. - for (k, v) in sorted(symlink_list): - target = self._resolve(k, absolute_symlinks_resolve=True) - if isinstance(target, CasBasedDirectory): - yield os.path.join(relpath, k) - else: - file_list.append((k, v)) - if file_list == [] and relpath != "": yield relpath else: -- cgit v1.2.1 From 7ec0bb5e06c83646611954b4a15da87442b73d4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Mon, 11 Feb 2019 06:44:20 +0100 Subject: tests/sources/local.py: Add directory symlink test --- tests/sources/local.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/sources/local.py b/tests/sources/local.py index f7c1f4bd2..0da9e8cc1 100644 --- a/tests/sources/local.py +++ b/tests/sources/local.py @@ -136,3 +136,23 @@ def test_stage_file_exists(cli, tmpdir, datafiles): result = cli.run(project=project, args=['build', 'target.bst']) result.assert_main_error(ErrorDomain.STREAM, None) result.assert_task_error(ErrorDomain.SOURCE, 'ensure-stage-dir-fail') + + +@pytest.mark.datafiles(os.path.join(DATA_DIR, 'directory')) +def test_stage_directory_symlink(cli, tmpdir, datafiles): + project = os.path.join(datafiles.dirname, datafiles.basename) + checkoutdir = os.path.join(str(tmpdir), "checkout") + + symlink = os.path.join(project, 'files', 'symlink-to-subdir') + os.symlink('subdir', symlink) + + # Build, checkout + result = cli.run(project=project, args=['build', 'target.bst']) + result.assert_success() + result = cli.run(project=project, args=['artifact', 'checkout', 'target.bst', '--directory', checkoutdir]) + result.assert_success() + + # Check that the checkout contains the expected directory and directory symlink + assert(os.path.exists(os.path.join(checkoutdir, 'subdir', 'anotherfile.txt'))) + assert(os.path.exists(os.path.join(checkoutdir, 'symlink-to-subdir', 'anotherfile.txt'))) + assert(os.path.islink(os.path.join(checkoutdir, 'symlink-to-subdir'))) -- cgit v1.2.1