diff options
author | bst-marge-bot <marge-bot@buildstream.build> | 2019-07-02 15:03:46 +0000 |
---|---|---|
committer | bst-marge-bot <marge-bot@buildstream.build> | 2019-07-02 15:03:46 +0000 |
commit | 2219874cdca548dd7f164ef3a0f77f5affeea2b0 (patch) | |
tree | ea7d64ec5ec39b8391e8bdb316ee952566516111 | |
parent | 7893f514404c6f18e40b94961f89b756f5041000 (diff) | |
parent | 86a76c29c09d92e58280c5a9fc1736f5efe1e88f (diff) | |
download | buildstream-2219874cdca548dd7f164ef3a0f77f5affeea2b0.tar.gz |
Merge branch 'tar-target-renaming' into 'master'
tar.py: Make link target renaming work between base-dirs
Closes #1052
See merge request BuildStream/buildstream!1431
7 files changed, 132 insertions, 9 deletions
diff --git a/src/buildstream/plugins/sources/tar.py b/src/buildstream/plugins/sources/tar.py index c90de74ea..e9e0fda27 100644 --- a/src/buildstream/plugins/sources/tar.py +++ b/src/buildstream/plugins/sources/tar.py @@ -114,7 +114,7 @@ class TarSource(DownloadableFileSource): base_dir = self._find_base_dir(tar, self.base_dir) if base_dir: - tar.extractall(path=directory, members=self._extract_members(tar, base_dir)) + tar.extractall(path=directory, members=self._extract_members(tar, base_dir, directory)) else: tar.extractall(path=directory) @@ -122,7 +122,28 @@ class TarSource(DownloadableFileSource): raise SourceError("{}: Error staging source: {}".format(self, e)) from e # Override and translate which filenames to extract - def _extract_members(self, tar, base_dir): + def _extract_members(self, tar, base_dir, target_dir): + + # Assert that a tarfile is safe to extract; specifically, make + # sure that we don't do anything outside of the target + # directory (this is possible, if, say, someone engineered a + # tarfile to contain paths that start with ..). + def assert_safe(member): + final_path = os.path.abspath(os.path.join(target_dir, member.path)) + if not final_path.startswith(target_dir): + raise SourceError("{}: Tarfile attempts to extract outside the staging area: " + "{} -> {}".format(self, member.path, final_path)) + + if member.islnk(): + linked_path = os.path.abspath(os.path.join(target_dir, member.linkname)) + if not linked_path.startswith(target_dir): + raise SourceError("{}: Tarfile attempts to hardlink outside the staging area: " + "{} -> {}".format(self, member.path, final_path)) + + # Don't need to worry about symlinks because they're just + # files here and won't be able to do much harm once we are + # in a sandbox. + if not base_dir.endswith(os.sep): base_dir = base_dir + os.sep @@ -132,21 +153,27 @@ class TarSource(DownloadableFileSource): # First, ensure that a member never starts with `./` if member.path.startswith('./'): member.path = member.path[2:] + if member.islnk() and member.linkname.startswith('./'): + member.linkname = member.linkname[2:] # Now extract only the paths which match the normalized path if member.path.startswith(base_dir): - - # If it's got a link name, give it the same treatment, we - # need the link targets to match up with what we are staging + # Hardlinks are smart and collapse into the "original" + # when their counterpart doesn't exist. This means we + # only need to modify links to files whose location we + # change. # - # NOTE: Its possible this is not perfect, we may need to - # consider links which point outside of the chosen - # base directory. + # Since we assert that we're not linking to anything + # outside the target directory, this should only ever + # be able to link to things inside the target + # directory, so we should cover all bases doing this. # - if member.type == tarfile.LNKTYPE: + if member.islnk() and member.linkname.startswith(base_dir): member.linkname = member.linkname[L:] member.path = member.path[L:] + + assert_safe(member) yield member # We want to iterate over all paths of a tarball, but getmembers() diff --git a/tests/sources/tar.py b/tests/sources/tar.py index a6c1a4d9f..07108ccbb 100644 --- a/tests/sources/tar.py +++ b/tests/sources/tar.py @@ -407,3 +407,86 @@ def test_homeless_environment(cli, tmpdir, datafiles): # Use a track, make sure the plugin tries to find a ~/.netrc result = cli.run(project=project, args=['source', 'track', 'target.bst'], env={'HOME': None}) result.assert_success() + + +@pytest.mark.datafiles(os.path.join(DATA_DIR, 'out-of-basedir-hardlinks')) +def test_out_of_basedir_hardlinks(cli, tmpdir, datafiles): + def ensure_link(member): + # By default, python will simply duplicate files - we want + # hardlinks! + if member.path == "contents/to_extract/a": + member.type = tarfile.LNKTYPE + member.linkname = "contents/elsewhere/a" + return member + + project = str(datafiles) + generate_project(project, tmpdir) + checkoutdir = os.path.join(str(tmpdir), "checkout") + + # Create a tarball with an odd hardlink + src_tar = os.path.join(str(tmpdir), "contents.tar.gz") + old_dir = os.getcwd() + os.chdir(str(tmpdir)) + with tarfile.open(src_tar, "w:gz") as tar: + tar.add("contents", filter=ensure_link) + os.chdir(old_dir) + + # Make sure our tarfile is actually created with the desired + # attributes set + with tarfile.open(src_tar, "r:gz") as tar: + assert any(member.islnk() and + member.path == "contents/to_extract/a" and + member.linkname == "contents/elsewhere/a" + for member in tar.getmembers()) + + # Assert that we will actually create a singular copy of the file + result = cli.run(project=project, args=['source', 'track', 'target.bst']) + result.assert_success() + result = cli.run(project=project, args=['source', 'fetch', 'target.bst']) + result.assert_success() + result = cli.run(project=project, args=['build', 'target.bst']) + result.assert_success() + result = cli.run(project=project, args=['artifact', 'checkout', 'target.bst', '--directory', checkoutdir]) + result.assert_success() + + original_dir = os.path.join(str(datafiles), 'contents', 'to_extract') + original_contents = list_dir_contents(original_dir) + checkout_contents = list_dir_contents(checkoutdir) + assert checkout_contents == original_contents + + +@pytest.mark.datafiles(os.path.join(DATA_DIR, 'out-of-basedir-hardlinks')) +def test_malicious_out_of_basedir_hardlinks(cli, tmpdir, datafiles): + project = str(datafiles) + generate_project(project, tmpdir) + + # Create a maliciously-hardlinked tarball + def ensure_link(member): + # By default, python will simply duplicate files - we want + # hardlinks! + if member.path == "contents/elsewhere/malicious": + member.type = tarfile.LNKTYPE + # This should not be allowed + member.linkname = "../../../malicious_target.bst" + return member + + src_tar = os.path.join(str(tmpdir), "contents.tar.gz") + old_dir = os.getcwd() + os.chdir(str(tmpdir)) + with tarfile.open(src_tar, "w:gz") as tar: + tar.add("contents", filter=ensure_link) + os.chdir(old_dir) + + # Make sure our tarfile is actually created with the desired + # attributes set + with tarfile.open(src_tar, "r:gz") as tar: + assert any(member.islnk() and + member.path == "contents/elsewhere/malicious" and + member.linkname == "../../../malicious_target.bst" + for member in tar.getmembers()) + + # Try to execute the exploit + result = cli.run(project=project, args=['source', 'track', 'malicious_target.bst']) + result.assert_success() + result = cli.run(project=project, args=['source', 'fetch', 'malicious_target.bst']) + result.assert_main_error(ErrorDomain.STREAM, None) diff --git a/tests/sources/tar/out-of-basedir-hardlinks/contents/elsewhere/a b/tests/sources/tar/out-of-basedir-hardlinks/contents/elsewhere/a new file mode 100644 index 000000000..ce0136250 --- /dev/null +++ b/tests/sources/tar/out-of-basedir-hardlinks/contents/elsewhere/a @@ -0,0 +1 @@ +hello diff --git a/tests/sources/tar/out-of-basedir-hardlinks/contents/elsewhere/malicious b/tests/sources/tar/out-of-basedir-hardlinks/contents/elsewhere/malicious new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tests/sources/tar/out-of-basedir-hardlinks/contents/elsewhere/malicious diff --git a/tests/sources/tar/out-of-basedir-hardlinks/contents/to_extract/a b/tests/sources/tar/out-of-basedir-hardlinks/contents/to_extract/a new file mode 100644 index 000000000..ce0136250 --- /dev/null +++ b/tests/sources/tar/out-of-basedir-hardlinks/contents/to_extract/a @@ -0,0 +1 @@ +hello diff --git a/tests/sources/tar/out-of-basedir-hardlinks/malicious_target.bst b/tests/sources/tar/out-of-basedir-hardlinks/malicious_target.bst new file mode 100644 index 000000000..b9debe961 --- /dev/null +++ b/tests/sources/tar/out-of-basedir-hardlinks/malicious_target.bst @@ -0,0 +1,5 @@ +kind: import +description: The kind of this element is irrelevant. +sources: +- kind: tar + url: tmpdir:/contents.tar.gz diff --git a/tests/sources/tar/out-of-basedir-hardlinks/target.bst b/tests/sources/tar/out-of-basedir-hardlinks/target.bst new file mode 100644 index 000000000..4f07e2e4c --- /dev/null +++ b/tests/sources/tar/out-of-basedir-hardlinks/target.bst @@ -0,0 +1,6 @@ +kind: import +description: The kind of this element is irrelevant. +sources: +- kind: tar + url: tmpdir:/contents.tar.gz + base-dir: contents/to_extract |