summaryrefslogtreecommitdiff
path: root/src/buildstream/plugins/sources/tar.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/buildstream/plugins/sources/tar.py')
-rw-r--r--src/buildstream/plugins/sources/tar.py253
1 files changed, 0 insertions, 253 deletions
diff --git a/src/buildstream/plugins/sources/tar.py b/src/buildstream/plugins/sources/tar.py
deleted file mode 100644
index fc4f5736a..000000000
--- a/src/buildstream/plugins/sources/tar.py
+++ /dev/null
@@ -1,253 +0,0 @@
-#
-# Copyright (C) 2017 Codethink Limited
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library. If not, see <http://www.gnu.org/licenses/>.
-#
-# Authors:
-# Jonathan Maw <jonathan.maw@codethink.co.uk>
-
-"""
-tar - stage files from tar archives
-===================================
-
-**Host dependencies:**
-
- * lzip (for .tar.lz files)
-
-**Usage:**
-
-.. code:: yaml
-
- # Specify the tar source kind
- kind: tar
-
- # Specify the tar url. Using an alias defined in your project
- # configuration is encouraged. 'bst source track' will update the
- # sha256sum in 'ref' to the downloaded file's sha256sum.
- url: upstream:foo.tar
-
- # Specify the ref. It's a sha256sum of the file you download.
- ref: 6c9f6f68a131ec6381da82f2bff978083ed7f4f7991d931bfa767b7965ebc94b
-
- # Specify a glob pattern to indicate the base directory to extract
- # from the tarball. The first matching directory will be used.
- #
- # Note that this is '*' by default since most standard release
- # tarballs contain a self named subdirectory at the root which
- # contains the files one normally wants to extract to build.
- #
- # To extract the root of the tarball directly, this can be set
- # to an empty string.
- base-dir: '*'
-
-See :ref:`built-in functionality doumentation <core_source_builtins>` for
-details on common configuration options for sources.
-"""
-
-import os
-import tarfile
-from contextlib import contextmanager
-from tempfile import TemporaryFile
-
-from buildstream import SourceError
-from buildstream import utils
-
-from ._downloadablefilesource import DownloadableFileSource
-
-
-class ReadableTarInfo(tarfile.TarInfo):
- """
- The goal is to override `TarFile`'s `extractall` semantics by ensuring that on extraction, the
- files are readable by the owner of the file. This is done by overriding the accessor for the
- `mode` attribute in `TarInfo`, the class that encapsulates the internal meta-data of the tarball,
- so that the owner-read bit is always set.
- """
-
- @property
- def mode(self):
- # Respect umask instead of the file mode stored in the archive.
- # The only bit used from the embedded mode is the executable bit for files.
- umask = utils.get_umask()
- if self.isdir() or bool(self.__permission | 0o100):
- return 0o777 & ~umask
- else:
- return 0o666 & ~umask
-
- @mode.setter
- def mode(self, permission):
- self.__permission = permission # pylint: disable=attribute-defined-outside-init
-
-
-class TarSource(DownloadableFileSource):
- # pylint: disable=attribute-defined-outside-init
-
- def configure(self, node):
- super().configure(node)
-
- self.base_dir = node.get_str("base-dir", "*")
- node.validate_keys(DownloadableFileSource.COMMON_CONFIG_KEYS + ["base-dir"])
-
- def preflight(self):
- self.host_lzip = None
- if self.url.endswith(".lz"):
- self.host_lzip = utils.get_host_tool("lzip")
-
- def get_unique_key(self):
- return super().get_unique_key() + [self.base_dir]
-
- @contextmanager
- def _run_lzip(self):
- assert self.host_lzip
- with TemporaryFile() as lzip_stdout:
- with open(self._get_mirror_file(), "r") as lzip_file:
- self.call([self.host_lzip, "-d"], stdin=lzip_file, stdout=lzip_stdout)
-
- lzip_stdout.seek(0, 0)
- yield lzip_stdout
-
- @contextmanager
- def _get_tar(self):
- if self.url.endswith(".lz"):
- with self._run_lzip() as lzip_dec:
- with tarfile.open(fileobj=lzip_dec, mode="r:", tarinfo=ReadableTarInfo) as tar:
- yield tar
- else:
- with tarfile.open(self._get_mirror_file(), tarinfo=ReadableTarInfo) as tar:
- yield tar
-
- def stage(self, directory):
- try:
- with self._get_tar() as tar:
- base_dir = None
- if self.base_dir:
- base_dir = self._find_base_dir(tar, self.base_dir)
-
- if base_dir:
- tar.extractall(path=directory, members=self._extract_members(tar, base_dir, directory))
- else:
- tar.extractall(path=directory)
-
- except (tarfile.TarError, OSError) as e:
- raise SourceError("{}: Error staging source: {}".format(self, e)) from e
-
- # Override and translate which filenames to extract
- def _extract_members(self, tar, base_dir, target_dir):
-
- # Assert that a tarfile is safe to extract; specifically, make
- # sure that we don't do anything outside of the target
- # directory (this is possible, if, say, someone engineered a
- # tarfile to contain paths that start with ..).
- def assert_safe(member):
- final_path = os.path.abspath(os.path.join(target_dir, member.path))
- if not final_path.startswith(target_dir):
- raise SourceError(
- "{}: Tarfile attempts to extract outside the staging area: "
- "{} -> {}".format(self, member.path, final_path)
- )
-
- if member.islnk():
- linked_path = os.path.abspath(os.path.join(target_dir, member.linkname))
- if not linked_path.startswith(target_dir):
- raise SourceError(
- "{}: Tarfile attempts to hardlink outside the staging area: "
- "{} -> {}".format(self, member.path, final_path)
- )
-
- # Don't need to worry about symlinks because they're just
- # files here and won't be able to do much harm once we are
- # in a sandbox.
-
- if not base_dir.endswith(os.sep):
- base_dir = base_dir + os.sep
-
- L = len(base_dir)
- for member in tar.getmembers():
-
- # First, ensure that a member never starts with `./`
- if member.path.startswith("./"):
- member.path = member.path[2:]
- if member.islnk() and member.linkname.startswith("./"):
- member.linkname = member.linkname[2:]
-
- # Now extract only the paths which match the normalized path
- if member.path.startswith(base_dir):
- # Hardlinks are smart and collapse into the "original"
- # when their counterpart doesn't exist. This means we
- # only need to modify links to files whose location we
- # change.
- #
- # Since we assert that we're not linking to anything
- # outside the target directory, this should only ever
- # be able to link to things inside the target
- # directory, so we should cover all bases doing this.
- #
- if member.islnk() and member.linkname.startswith(base_dir):
- member.linkname = member.linkname[L:]
-
- member.path = member.path[L:]
-
- assert_safe(member)
- yield member
-
- # We want to iterate over all paths of a tarball, but getmembers()
- # is not enough because some tarballs simply do not contain the leading
- # directory paths for the archived files.
- def _list_tar_paths(self, tar):
-
- visited = set()
- for member in tar.getmembers():
-
- # Remove any possible leading './', offer more consistent behavior
- # across tarballs encoded with or without a leading '.'
- member_name = member.name.lstrip("./")
-
- if not member.isdir():
-
- # Loop over the components of a path, for a path of a/b/c/d
- # we will first visit 'a', then 'a/b' and then 'a/b/c', excluding
- # the final component
- components = member_name.split("/")
- for i in range(len(components) - 1):
- dir_component = "/".join([components[j] for j in range(i + 1)])
- if dir_component not in visited:
- visited.add(dir_component)
- try:
- # Dont yield directory members which actually do
- # exist in the archive
- _ = tar.getmember(dir_component)
- except KeyError:
- if dir_component != ".":
- yield dir_component
-
- continue
-
- # Avoid considering the '.' directory, if any is included in the archive
- # this is to avoid the default 'base-dir: *' value behaving differently
- # depending on whether the tarball was encoded with a leading '.' or not
- if member_name == ".":
- continue
-
- yield member_name
-
- def _find_base_dir(self, tar, pattern):
- paths = self._list_tar_paths(tar)
- matches = sorted(list(utils.glob(paths, pattern)))
- if not matches:
- raise SourceError("{}: Could not find base directory matching pattern: {}".format(self, pattern))
-
- return matches[0]
-
-
-def setup():
- return TarSource