summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTristan Maat <tristan.maat@codethink.co.uk>2019-06-24 17:35:23 +0100
committerTristan Maat <tristan.maat@codethink.co.uk>2019-06-28 16:55:30 +0100
commita2781fb48af5f92a00a3e24d305739fc715f6dd5 (patch)
treef507f91dc3c82852d9e32f7ff379cac22a285f42
parent248786e481bed8dc4c21f70ce42b29f520bee770 (diff)
downloadbuildstream-tar-target-renaming.tar.gz
tar.py: Make link target renaming work between base-dirstar-target-renaming
Fixes #1052
-rw-r--r--src/buildstream/plugins/sources/tar.py45
-rw-r--r--tests/sources/tar.py83
-rw-r--r--tests/sources/tar/out-of-basedir-hardlinks/contents/elsewhere/a1
-rw-r--r--tests/sources/tar/out-of-basedir-hardlinks/contents/elsewhere/malicious0
-rw-r--r--tests/sources/tar/out-of-basedir-hardlinks/contents/to_extract/a1
-rw-r--r--tests/sources/tar/out-of-basedir-hardlinks/malicious_target.bst5
-rw-r--r--tests/sources/tar/out-of-basedir-hardlinks/target.bst6
7 files changed, 132 insertions, 9 deletions
diff --git a/src/buildstream/plugins/sources/tar.py b/src/buildstream/plugins/sources/tar.py
index c90de74ea..e9e0fda27 100644
--- a/src/buildstream/plugins/sources/tar.py
+++ b/src/buildstream/plugins/sources/tar.py
@@ -114,7 +114,7 @@ class TarSource(DownloadableFileSource):
base_dir = self._find_base_dir(tar, self.base_dir)
if base_dir:
- tar.extractall(path=directory, members=self._extract_members(tar, base_dir))
+ tar.extractall(path=directory, members=self._extract_members(tar, base_dir, directory))
else:
tar.extractall(path=directory)
@@ -122,7 +122,28 @@ class TarSource(DownloadableFileSource):
raise SourceError("{}: Error staging source: {}".format(self, e)) from e
# Override and translate which filenames to extract
- def _extract_members(self, tar, base_dir):
+ def _extract_members(self, tar, base_dir, target_dir):
+
+ # Assert that a tarfile is safe to extract; specifically, make
+ # sure that we don't do anything outside of the target
+ # directory (this is possible, if, say, someone engineered a
+ # tarfile to contain paths that start with ..).
+ def assert_safe(member):
+ final_path = os.path.abspath(os.path.join(target_dir, member.path))
+ if not final_path.startswith(target_dir):
+ raise SourceError("{}: Tarfile attempts to extract outside the staging area: "
+ "{} -> {}".format(self, member.path, final_path))
+
+ if member.islnk():
+ linked_path = os.path.abspath(os.path.join(target_dir, member.linkname))
+ if not linked_path.startswith(target_dir):
+ raise SourceError("{}: Tarfile attempts to hardlink outside the staging area: "
+ "{} -> {}".format(self, member.path, final_path))
+
+ # Don't need to worry about symlinks because they're just
+ # files here and won't be able to do much harm once we are
+ # in a sandbox.
+
if not base_dir.endswith(os.sep):
base_dir = base_dir + os.sep
@@ -132,21 +153,27 @@ class TarSource(DownloadableFileSource):
# First, ensure that a member never starts with `./`
if member.path.startswith('./'):
member.path = member.path[2:]
+ if member.islnk() and member.linkname.startswith('./'):
+ member.linkname = member.linkname[2:]
# Now extract only the paths which match the normalized path
if member.path.startswith(base_dir):
-
- # If it's got a link name, give it the same treatment, we
- # need the link targets to match up with what we are staging
+ # Hardlinks are smart and collapse into the "original"
+ # when their counterpart doesn't exist. This means we
+ # only need to modify links to files whose location we
+ # change.
#
- # NOTE: Its possible this is not perfect, we may need to
- # consider links which point outside of the chosen
- # base directory.
+ # Since we assert that we're not linking to anything
+ # outside the target directory, this should only ever
+ # be able to link to things inside the target
+ # directory, so we should cover all bases doing this.
#
- if member.type == tarfile.LNKTYPE:
+ if member.islnk() and member.linkname.startswith(base_dir):
member.linkname = member.linkname[L:]
member.path = member.path[L:]
+
+ assert_safe(member)
yield member
# We want to iterate over all paths of a tarball, but getmembers()
diff --git a/tests/sources/tar.py b/tests/sources/tar.py
index a6c1a4d9f..07108ccbb 100644
--- a/tests/sources/tar.py
+++ b/tests/sources/tar.py
@@ -407,3 +407,86 @@ def test_homeless_environment(cli, tmpdir, datafiles):
# Use a track, make sure the plugin tries to find a ~/.netrc
result = cli.run(project=project, args=['source', 'track', 'target.bst'], env={'HOME': None})
result.assert_success()
+
+
+@pytest.mark.datafiles(os.path.join(DATA_DIR, 'out-of-basedir-hardlinks'))
+def test_out_of_basedir_hardlinks(cli, tmpdir, datafiles):
+ def ensure_link(member):
+ # By default, python will simply duplicate files - we want
+ # hardlinks!
+ if member.path == "contents/to_extract/a":
+ member.type = tarfile.LNKTYPE
+ member.linkname = "contents/elsewhere/a"
+ return member
+
+ project = str(datafiles)
+ generate_project(project, tmpdir)
+ checkoutdir = os.path.join(str(tmpdir), "checkout")
+
+ # Create a tarball with an odd hardlink
+ src_tar = os.path.join(str(tmpdir), "contents.tar.gz")
+ old_dir = os.getcwd()
+ os.chdir(str(tmpdir))
+ with tarfile.open(src_tar, "w:gz") as tar:
+ tar.add("contents", filter=ensure_link)
+ os.chdir(old_dir)
+
+ # Make sure our tarfile is actually created with the desired
+ # attributes set
+ with tarfile.open(src_tar, "r:gz") as tar:
+ assert any(member.islnk() and
+ member.path == "contents/to_extract/a" and
+ member.linkname == "contents/elsewhere/a"
+ for member in tar.getmembers())
+
+ # Assert that we will actually create a singular copy of the file
+ result = cli.run(project=project, args=['source', 'track', 'target.bst'])
+ result.assert_success()
+ result = cli.run(project=project, args=['source', 'fetch', 'target.bst'])
+ result.assert_success()
+ result = cli.run(project=project, args=['build', 'target.bst'])
+ result.assert_success()
+ result = cli.run(project=project, args=['artifact', 'checkout', 'target.bst', '--directory', checkoutdir])
+ result.assert_success()
+
+ original_dir = os.path.join(str(datafiles), 'contents', 'to_extract')
+ original_contents = list_dir_contents(original_dir)
+ checkout_contents = list_dir_contents(checkoutdir)
+ assert checkout_contents == original_contents
+
+
+@pytest.mark.datafiles(os.path.join(DATA_DIR, 'out-of-basedir-hardlinks'))
+def test_malicious_out_of_basedir_hardlinks(cli, tmpdir, datafiles):
+ project = str(datafiles)
+ generate_project(project, tmpdir)
+
+ # Create a maliciously-hardlinked tarball
+ def ensure_link(member):
+ # By default, python will simply duplicate files - we want
+ # hardlinks!
+ if member.path == "contents/elsewhere/malicious":
+ member.type = tarfile.LNKTYPE
+ # This should not be allowed
+ member.linkname = "../../../malicious_target.bst"
+ return member
+
+ src_tar = os.path.join(str(tmpdir), "contents.tar.gz")
+ old_dir = os.getcwd()
+ os.chdir(str(tmpdir))
+ with tarfile.open(src_tar, "w:gz") as tar:
+ tar.add("contents", filter=ensure_link)
+ os.chdir(old_dir)
+
+ # Make sure our tarfile is actually created with the desired
+ # attributes set
+ with tarfile.open(src_tar, "r:gz") as tar:
+ assert any(member.islnk() and
+ member.path == "contents/elsewhere/malicious" and
+ member.linkname == "../../../malicious_target.bst"
+ for member in tar.getmembers())
+
+ # Try to execute the exploit
+ result = cli.run(project=project, args=['source', 'track', 'malicious_target.bst'])
+ result.assert_success()
+ result = cli.run(project=project, args=['source', 'fetch', 'malicious_target.bst'])
+ result.assert_main_error(ErrorDomain.STREAM, None)
diff --git a/tests/sources/tar/out-of-basedir-hardlinks/contents/elsewhere/a b/tests/sources/tar/out-of-basedir-hardlinks/contents/elsewhere/a
new file mode 100644
index 000000000..ce0136250
--- /dev/null
+++ b/tests/sources/tar/out-of-basedir-hardlinks/contents/elsewhere/a
@@ -0,0 +1 @@
+hello
diff --git a/tests/sources/tar/out-of-basedir-hardlinks/contents/elsewhere/malicious b/tests/sources/tar/out-of-basedir-hardlinks/contents/elsewhere/malicious
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tests/sources/tar/out-of-basedir-hardlinks/contents/elsewhere/malicious
diff --git a/tests/sources/tar/out-of-basedir-hardlinks/contents/to_extract/a b/tests/sources/tar/out-of-basedir-hardlinks/contents/to_extract/a
new file mode 100644
index 000000000..ce0136250
--- /dev/null
+++ b/tests/sources/tar/out-of-basedir-hardlinks/contents/to_extract/a
@@ -0,0 +1 @@
+hello
diff --git a/tests/sources/tar/out-of-basedir-hardlinks/malicious_target.bst b/tests/sources/tar/out-of-basedir-hardlinks/malicious_target.bst
new file mode 100644
index 000000000..b9debe961
--- /dev/null
+++ b/tests/sources/tar/out-of-basedir-hardlinks/malicious_target.bst
@@ -0,0 +1,5 @@
+kind: import
+description: The kind of this element is irrelevant.
+sources:
+- kind: tar
+ url: tmpdir:/contents.tar.gz
diff --git a/tests/sources/tar/out-of-basedir-hardlinks/target.bst b/tests/sources/tar/out-of-basedir-hardlinks/target.bst
new file mode 100644
index 000000000..4f07e2e4c
--- /dev/null
+++ b/tests/sources/tar/out-of-basedir-hardlinks/target.bst
@@ -0,0 +1,6 @@
+kind: import
+description: The kind of this element is irrelevant.
+sources:
+- kind: tar
+ url: tmpdir:/contents.tar.gz
+ base-dir: contents/to_extract