summaryrefslogtreecommitdiff
path: root/src/buildstream/sandbox/sandbox.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/buildstream/sandbox/sandbox.py')
-rw-r--r--src/buildstream/sandbox/sandbox.py717
1 files changed, 717 insertions, 0 deletions
diff --git a/src/buildstream/sandbox/sandbox.py b/src/buildstream/sandbox/sandbox.py
new file mode 100644
index 000000000..c96ccb57b
--- /dev/null
+++ b/src/buildstream/sandbox/sandbox.py
@@ -0,0 +1,717 @@
+#
+# Copyright (C) 2017 Codethink Limited
+# Copyright (C) 2018 Bloomberg Finance LP
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
+#
+# Authors:
+# Andrew Leeming <andrew.leeming@codethink.co.uk>
+# Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
+"""
+Sandbox - The build sandbox
+===========================
+:class:`.Element` plugins which want to interface with the sandbox
+need only understand this interface, while it may be given a different
+sandbox implementation, any sandbox implementation it is given will
+conform to this interface.
+
+See also: :ref:`sandboxing`.
+"""
+
+import os
+import shlex
+import contextlib
+from contextlib import contextmanager
+
+from .._exceptions import ImplError, BstError, SandboxError
+from .._message import Message, MessageType
+from ..storage._filebaseddirectory import FileBasedDirectory
+from ..storage._casbaseddirectory import CasBasedDirectory
+
+
+class SandboxFlags():
+ """Flags indicating how the sandbox should be run.
+ """
+
+ NONE = 0
+ """Use default sandbox configuration.
+ """
+
+ ROOT_READ_ONLY = 0x01
+ """The root filesystem is read only.
+
+ This is normally true except when running integration commands
+ on staged dependencies, where we have to update caches and run
+ things such as ldconfig.
+ """
+
+ NETWORK_ENABLED = 0x02
+ """Whether to expose host network.
+
+ This should not be set when running builds, but can
+ be allowed for running a shell in a sandbox.
+ """
+
+ INTERACTIVE = 0x04
+ """Whether to run the sandbox interactively
+
+ This determines if the sandbox should attempt to connect
+ the terminal through to the calling process, or detach
+ the terminal entirely.
+ """
+
+ INHERIT_UID = 0x08
+ """Whether to use the user id and group id from the host environment
+
+ This determines if processes in the sandbox should run with the
+ same user id and group id as BuildStream itself. By default,
+ processes run with user id and group id 0, protected by a user
+ namespace where available.
+ """
+
+
+class SandboxCommandError(SandboxError):
+ """Raised by :class:`.Sandbox` implementations when a command fails.
+
+ Args:
+ message (str): The error message to report to the user
+ detail (str): The detailed error string
+ collect (str): An optional directory containing partial install contents
+ """
+ def __init__(self, message, *, detail=None, collect=None):
+ super().__init__(message, detail=detail, reason='command-failed')
+
+ self.collect = collect
+
+
+class Sandbox():
+ """Sandbox()
+
+ Sandbox programming interface for :class:`.Element` plugins.
+ """
+
+ # Minimal set of devices for the sandbox
+ DEVICES = [
+ '/dev/urandom',
+ '/dev/random',
+ '/dev/zero',
+ '/dev/null'
+ ]
+
+ def __init__(self, context, project, directory, **kwargs):
+ self.__context = context
+ self.__project = project
+ self.__directories = []
+ self.__cwd = None
+ self.__env = None
+ self.__mount_sources = {}
+ self.__allow_real_directory = kwargs['allow_real_directory']
+ self.__allow_run = True
+
+ # Plugin ID for logging
+ plugin = kwargs.get('plugin', None)
+ if plugin:
+ self.__plugin_id = plugin._unique_id
+ else:
+ self.__plugin_id = None
+
+ # Configuration from kwargs common to all subclasses
+ self.__config = kwargs['config']
+ self.__stdout = kwargs['stdout']
+ self.__stderr = kwargs['stderr']
+ self.__bare_directory = kwargs['bare_directory']
+
+ # Setup the directories. Root and output_directory should be
+ # available to subclasses, hence being single-underscore. The
+ # others are private to this class.
+ # If the directory is bare, it probably doesn't need scratch
+ if self.__bare_directory:
+ self._root = directory
+ self.__scratch = None
+ os.makedirs(self._root, exist_ok=True)
+ else:
+ self._root = os.path.join(directory, 'root')
+ self.__scratch = os.path.join(directory, 'scratch')
+ for directory_ in [self._root, self.__scratch]:
+ os.makedirs(directory_, exist_ok=True)
+
+ self._output_directory = None
+ self._build_directory = None
+ self._build_directory_always = None
+ self._vdir = None
+ self._usebuildtree = False
+
+ # This is set if anyone requests access to the underlying
+ # directory via get_directory.
+ self._never_cache_vdirs = False
+
+ # Pending command batch
+ self.__batch = None
+
+ def get_directory(self):
+ """Fetches the sandbox root directory
+
+ The root directory is where artifacts for the base
+ runtime environment should be staged. Only works if
+ BST_VIRTUAL_DIRECTORY is not set.
+
+ Returns:
+ (str): The sandbox root directory
+
+ """
+ if self.__allow_real_directory:
+ self._never_cache_vdirs = True
+ return self._root
+ else:
+ raise BstError("You can't use get_directory")
+
+ def get_virtual_directory(self):
+ """Fetches the sandbox root directory as a virtual Directory.
+
+ The root directory is where artifacts for the base
+ runtime environment should be staged.
+
+ Use caution if you use get_directory and
+ get_virtual_directory. If you alter the contents of the
+ directory returned by get_directory, all objects returned by
+ get_virtual_directory or derived from them are invalid and you
+ must call get_virtual_directory again to get a new copy.
+
+ Returns:
+ (Directory): The sandbox root directory
+
+ """
+ if self._vdir is None or self._never_cache_vdirs:
+ if self._use_cas_based_directory():
+ cascache = self.__context.get_cascache()
+ self._vdir = CasBasedDirectory(cascache)
+ else:
+ self._vdir = FileBasedDirectory(self._root)
+ return self._vdir
+
+ def _set_virtual_directory(self, virtual_directory):
+ """ Sets virtual directory. Useful after remote execution
+ has rewritten the working directory.
+ """
+ self._vdir = virtual_directory
+
+ def set_environment(self, environment):
+ """Sets the environment variables for the sandbox
+
+ Args:
+ environment (dict): The environment variables to use in the sandbox
+ """
+ self.__env = environment
+
+ def set_work_directory(self, directory):
+ """Sets the work directory for commands run in the sandbox
+
+ Args:
+ directory (str): An absolute path within the sandbox
+ """
+ self.__cwd = directory
+
+ def set_output_directory(self, directory):
+ """Sets the output directory - the directory which is preserved
+ as an artifact after assembly.
+
+ Args:
+ directory (str): An absolute path within the sandbox
+ """
+ self._output_directory = directory
+
+ def mark_directory(self, directory, *, artifact=False):
+ """Marks a sandbox directory and ensures it will exist
+
+ Args:
+ directory (str): An absolute path within the sandbox to mark
+ artifact (bool): Whether the content staged at this location
+ contains artifacts
+
+ .. note::
+ Any marked directories will be read-write in the sandboxed
+ environment, only the root directory is allowed to be readonly.
+ """
+ self.__directories.append({
+ 'directory': directory,
+ 'artifact': artifact
+ })
+
+ def run(self, command, flags, *, cwd=None, env=None, label=None):
+ """Run a command in the sandbox.
+
+ If this is called outside a batch context, the command is immediately
+ executed.
+
+ If this is called in a batch context, the command is added to the batch
+ for later execution. If the command fails, later commands will not be
+ executed. Command flags must match batch flags.
+
+ Args:
+ command (list): The command to run in the sandboxed environment, as a list
+ of strings starting with the binary to run.
+ flags (:class:`.SandboxFlags`): The flags for running this command.
+ cwd (str): The sandbox relative working directory in which to run the command.
+ env (dict): A dictionary of string key, value pairs to set as environment
+ variables inside the sandbox environment.
+ label (str): An optional label for the command, used for logging. (*Since: 1.4*)
+
+ Returns:
+ (int|None): The program exit code, or None if running in batch context.
+
+ Raises:
+ (:class:`.ProgramNotFoundError`): If a host tool which the given sandbox
+ implementation requires is not found.
+
+ .. note::
+
+ The optional *cwd* argument will default to the value set with
+ :func:`~buildstream.sandbox.Sandbox.set_work_directory` and this
+ function must make sure the directory will be created if it does
+ not exist yet, even if a workspace is being used.
+ """
+
+ if not self.__allow_run:
+ raise SandboxError("Sandbox.run() has been disabled")
+
+ # Fallback to the sandbox default settings for
+ # the cwd and env.
+ #
+ cwd = self._get_work_directory(cwd=cwd)
+ env = self._get_environment(cwd=cwd, env=env)
+
+ # Convert single-string argument to a list
+ if isinstance(command, str):
+ command = [command]
+
+ if self.__batch:
+ assert flags == self.__batch.flags, \
+ "Inconsistent sandbox flags in single command batch"
+
+ batch_command = _SandboxBatchCommand(command, cwd=cwd, env=env, label=label)
+
+ current_group = self.__batch.current_group
+ current_group.append(batch_command)
+ return None
+ else:
+ return self._run(command, flags, cwd=cwd, env=env)
+
+ @contextmanager
+ def batch(self, flags, *, label=None, collect=None):
+ """Context manager for command batching
+
+ This provides a batch context that defers execution of commands until
+ the end of the context. If a command fails, the batch will be aborted
+ and subsequent commands will not be executed.
+
+ Command batches may be nested. Execution will start only when the top
+ level batch context ends.
+
+ Args:
+ flags (:class:`.SandboxFlags`): The flags for this command batch.
+ label (str): An optional label for the batch group, used for logging.
+ collect (str): An optional directory containing partial install contents
+ on command failure.
+
+ Raises:
+ (:class:`.SandboxCommandError`): If a command fails.
+
+ *Since: 1.4*
+ """
+
+ group = _SandboxBatchGroup(label=label)
+
+ if self.__batch:
+ # Nested batch
+ assert flags == self.__batch.flags, \
+ "Inconsistent sandbox flags in single command batch"
+
+ parent_group = self.__batch.current_group
+ parent_group.append(group)
+ self.__batch.current_group = group
+ try:
+ yield
+ finally:
+ self.__batch.current_group = parent_group
+ else:
+ # Top-level batch
+ batch = self._create_batch(group, flags, collect=collect)
+
+ self.__batch = batch
+ try:
+ yield
+ finally:
+ self.__batch = None
+
+ batch.execute()
+
+ #####################################################
+ # Abstract Methods for Sandbox implementations #
+ #####################################################
+
+ # _run()
+ #
+ # Abstract method for running a single command
+ #
+ # Args:
+ # command (list): The command to run in the sandboxed environment, as a list
+ # of strings starting with the binary to run.
+ # flags (:class:`.SandboxFlags`): The flags for running this command.
+ # cwd (str): The sandbox relative working directory in which to run the command.
+ # env (dict): A dictionary of string key, value pairs to set as environment
+ # variables inside the sandbox environment.
+ #
+ # Returns:
+ # (int): The program exit code.
+ #
+ def _run(self, command, flags, *, cwd, env):
+ raise ImplError("Sandbox of type '{}' does not implement _run()"
+ .format(type(self).__name__))
+
+ # _create_batch()
+ #
+ # Abstract method for creating a batch object. Subclasses can override
+ # this method to instantiate a subclass of _SandboxBatch.
+ #
+ # Args:
+ # main_group (:class:`_SandboxBatchGroup`): The top level batch group.
+ # flags (:class:`.SandboxFlags`): The flags for commands in this batch.
+ # collect (str): An optional directory containing partial install contents
+ # on command failure.
+ #
+ def _create_batch(self, main_group, flags, *, collect=None):
+ return _SandboxBatch(self, main_group, flags, collect=collect)
+
+ # _use_cas_based_directory()
+ #
+ # Whether to use CasBasedDirectory as sandbox root. If this returns `False`,
+ # FileBasedDirectory will be used.
+ #
+ # Returns:
+ # (bool): Whether to use CasBasedDirectory
+ #
+ def _use_cas_based_directory(self):
+ # Use CasBasedDirectory as sandbox root if neither Sandbox.get_directory()
+ # nor Sandbox.run() are required. This allows faster staging.
+ if not self.__allow_real_directory and not self.__allow_run:
+ return True
+
+ return 'BST_CAS_DIRECTORIES' in os.environ
+
+ ################################################
+ # Private methods #
+ ################################################
+ # _get_context()
+ #
+ # Fetches the context BuildStream was launched with.
+ #
+ # Returns:
+ # (Context): The context of this BuildStream invocation
+ def _get_context(self):
+ return self.__context
+
+ # _get_project()
+ #
+ # Fetches the Project this sandbox was created to build for.
+ #
+ # Returns:
+ # (Project): The project this sandbox was created for.
+ def _get_project(self):
+ return self.__project
+
+ # _get_marked_directories()
+ #
+ # Fetches the marked directories in the sandbox
+ #
+ # Returns:
+ # (list): A list of directory mark objects.
+ #
+ # The returned objects are dictionaries with the following attributes:
+ # directory: The absolute path within the sandbox
+ # artifact: Whether the path will contain artifacts or not
+ #
+ def _get_marked_directories(self):
+ return self.__directories
+
+ # _get_mount_source()
+ #
+ # Fetches the list of mount sources
+ #
+ # Returns:
+ # (dict): A dictionary where keys are mount points and values are the mount sources
+ def _get_mount_sources(self):
+ return self.__mount_sources
+
+ # _set_mount_source()
+ #
+ # Sets the mount source for a given mountpoint
+ #
+ # Args:
+ # mountpoint (str): The absolute mountpoint path inside the sandbox
+ # mount_source (str): the host path to be mounted at the mount point
+ def _set_mount_source(self, mountpoint, mount_source):
+ self.__mount_sources[mountpoint] = mount_source
+
+ # _get_environment()
+ #
+ # Fetches the environment variables for running commands
+ # in the sandbox.
+ #
+ # Args:
+ # cwd (str): The working directory the command has been requested to run in, if any.
+ # env (str): The environment the command has been requested to run in, if any.
+ #
+ # Returns:
+ # (str): The sandbox work directory
+ def _get_environment(self, *, cwd=None, env=None):
+ cwd = self._get_work_directory(cwd=cwd)
+ if env is None:
+ env = self.__env
+
+ # Naive getcwd implementations can break when bind-mounts to different
+ # paths on the same filesystem are present. Letting the command know
+ # what directory it is in makes it unnecessary to call the faulty
+ # getcwd.
+ env = dict(env)
+ env['PWD'] = cwd
+
+ return env
+
+ # _get_work_directory()
+ #
+ # Fetches the working directory for running commands
+ # in the sandbox.
+ #
+ # Args:
+ # cwd (str): The working directory the command has been requested to run in, if any.
+ #
+ # Returns:
+ # (str): The sandbox work directory
+ def _get_work_directory(self, *, cwd=None):
+ return cwd or self.__cwd or '/'
+
+ # _get_scratch_directory()
+ #
+ # Fetches the sandbox scratch directory, this directory can
+ # be used by the sandbox implementation to cache things or
+ # redirect temporary fuse mounts.
+ #
+ # The scratch directory is guaranteed to be on the same
+ # filesystem as the root directory.
+ #
+ # Returns:
+ # (str): The sandbox scratch directory
+ def _get_scratch_directory(self):
+ assert not self.__bare_directory, "Scratch is not going to work with bare directories"
+ return self.__scratch
+
+ # _get_output()
+ #
+ # Fetches the stdout & stderr
+ #
+ # Returns:
+ # (file): The stdout, or None to inherit
+ # (file): The stderr, or None to inherit
+ def _get_output(self):
+ return (self.__stdout, self.__stderr)
+
+ # _get_config()
+ #
+ # Fetches the sandbox configuration object.
+ #
+ # Returns:
+ # (SandboxConfig): An object containing the configuration
+ # data passed in during construction.
+ def _get_config(self):
+ return self.__config
+
+ # _has_command()
+ #
+ # Tests whether a command exists inside the sandbox
+ #
+ # Args:
+ # command (list): The command to test.
+ # env (dict): A dictionary of string key, value pairs to set as environment
+ # variables inside the sandbox environment.
+ # Returns:
+ # (bool): Whether a command exists inside the sandbox.
+ def _has_command(self, command, env=None):
+ if os.path.isabs(command):
+ return os.path.lexists(os.path.join(
+ self._root, command.lstrip(os.sep)))
+
+ for path in env.get('PATH').split(':'):
+ if os.path.lexists(os.path.join(
+ self._root, path.lstrip(os.sep), command)):
+ return True
+
+ return False
+
+ # _get_plugin_id()
+ #
+ # Get the plugin's unique identifier
+ #
+ def _get_plugin_id(self):
+ return self.__plugin_id
+
+ # _callback()
+ #
+ # If this is called outside a batch context, the specified function is
+ # invoked immediately.
+ #
+ # If this is called in a batch context, the function is added to the batch
+ # for later invocation.
+ #
+ # Args:
+ # callback (callable): The function to invoke
+ #
+ def _callback(self, callback):
+ if self.__batch:
+ batch_call = _SandboxBatchCall(callback)
+
+ current_group = self.__batch.current_group
+ current_group.append(batch_call)
+ else:
+ callback()
+
+ # _disable_run()
+ #
+ # Raise exception if `Sandbox.run()` is called. This enables use of
+ # CasBasedDirectory for faster staging when command execution is not
+ # required.
+ #
+ def _disable_run(self):
+ self.__allow_run = False
+
+ # _set_build_directory()
+ #
+ # Sets the build directory - the directory which may be preserved as
+ # buildtree in the artifact.
+ #
+ # Args:
+ # directory (str): An absolute path within the sandbox
+ # always (bool): True if the build directory should always be downloaded,
+ # False if it should be downloaded only on failure
+ #
+ def _set_build_directory(self, directory, *, always):
+ self._build_directory = directory
+ self._build_directory_always = always
+
+
+# _SandboxBatch()
+#
+# A batch of sandbox commands.
+#
+class _SandboxBatch():
+
+ def __init__(self, sandbox, main_group, flags, *, collect=None):
+ self.sandbox = sandbox
+ self.main_group = main_group
+ self.current_group = main_group
+ self.flags = flags
+ self.collect = collect
+
+ def execute(self):
+ self.main_group.execute(self)
+
+ def execute_group(self, group):
+ if group.label:
+ context = self.sandbox._get_context()
+ cm = context.timed_activity(group.label, unique_id=self.sandbox._get_plugin_id())
+ else:
+ cm = contextlib.suppress()
+
+ with cm:
+ group.execute_children(self)
+
+ def execute_command(self, command):
+ if command.label:
+ context = self.sandbox._get_context()
+ message = Message(self.sandbox._get_plugin_id(), MessageType.STATUS,
+ 'Running command', detail=command.label)
+ context.message(message)
+
+ exitcode = self.sandbox._run(command.command, self.flags, cwd=command.cwd, env=command.env)
+ if exitcode != 0:
+ cmdline = ' '.join(shlex.quote(cmd) for cmd in command.command)
+ label = command.label or cmdline
+ raise SandboxCommandError("Command failed with exitcode {}".format(exitcode),
+ detail=label, collect=self.collect)
+
+ def execute_call(self, call):
+ call.callback()
+
+
+# _SandboxBatchItem()
+#
+# An item in a command batch.
+#
+class _SandboxBatchItem():
+
+ def __init__(self, *, label=None):
+ self.label = label
+
+
+# _SandboxBatchCommand()
+#
+# A command item in a command batch.
+#
+class _SandboxBatchCommand(_SandboxBatchItem):
+
+ def __init__(self, command, *, cwd, env, label=None):
+ super().__init__(label=label)
+
+ self.command = command
+ self.cwd = cwd
+ self.env = env
+
+ def execute(self, batch):
+ batch.execute_command(self)
+
+
+# _SandboxBatchGroup()
+#
+# A group in a command batch.
+#
+class _SandboxBatchGroup(_SandboxBatchItem):
+
+ def __init__(self, *, label=None):
+ super().__init__(label=label)
+
+ self.children = []
+
+ def append(self, item):
+ self.children.append(item)
+
+ def execute(self, batch):
+ batch.execute_group(self)
+
+ def execute_children(self, batch):
+ for item in self.children:
+ item.execute(batch)
+
+
+# _SandboxBatchCall()
+#
+# A call item in a command batch.
+#
+class _SandboxBatchCall(_SandboxBatchItem):
+
+ def __init__(self, callback):
+ super().__init__()
+
+ self.callback = callback
+
+ def execute(self, batch):
+ batch.execute_call(self)