# # Copyright (C) 2016 Codethink Limited # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library. If not, see . # # Authors: # Andrew Leeming # Tristan Van Berkom import os import sys import time import errno import signal import subprocess import shutil from contextlib import ExitStack import psutil from .._exceptions import SandboxError from .. import utils, _signals from ._mount import MountMap from . import Sandbox, SandboxFlags # SandboxBwrap() # # Default bubblewrap based sandbox implementation. # class SandboxBwrap(Sandbox): # Minimal set of devices for the sandbox DEVICES = [ '/dev/full', '/dev/null', '/dev/urandom', '/dev/random', '/dev/zero' ] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.user_ns_available = kwargs['user_ns_available'] self.die_with_parent_available = kwargs['die_with_parent_available'] def run(self, command, flags, *, cwd=None, env=None): stdout, stderr = self._get_output() # Allowable access to underlying storage as we're part of the sandbox root_directory = self.get_virtual_directory()._get_underlying_directory() # Fallback to the sandbox default settings for # the cwd and env. # if cwd is None: cwd = self._get_work_directory() if env is None: env = self._get_environment() if cwd is None: cwd = '/' # Naive getcwd implementations can break when bind-mounts to different # paths on the same filesystem are present. Letting the command know # what directory it is in makes it unnecessary to call the faulty # getcwd. env['PWD'] = cwd if not self._has_command(command[0], env): raise SandboxError("Staged artifacts do not provide command " "'{}'".format(command[0]), reason='missing-command') # We want command args as a list of strings if isinstance(command, str): command = [command] # Create the mount map, this will tell us where # each mount point needs to be mounted from and to mount_map = MountMap(self, flags & SandboxFlags.ROOT_READ_ONLY) root_mount_source = mount_map.get_mount_source('/') # Grab the full path of the bwrap binary bwrap_command = [utils.get_host_tool('bwrap')] for k, v in env.items(): bwrap_command += ['--setenv', k, v] for k in os.environ.keys() - env.keys(): bwrap_command += ['--unsetenv', k] # Create a new pid namespace, this also ensures that any subprocesses # are cleaned up when the bwrap process exits. bwrap_command += ['--unshare-pid'] # Ensure subprocesses are cleaned up when the bwrap parent dies. if self.die_with_parent_available: bwrap_command += ['--die-with-parent'] # Add in the root filesystem stuff first. # # The rootfs is mounted as RW initially so that further mounts can be # placed on top. If a RO root is required, after all other mounts are # complete, root is remounted as RO bwrap_command += ["--bind", root_mount_source, "/"] if not flags & SandboxFlags.NETWORK_ENABLED: bwrap_command += ['--unshare-net'] bwrap_command += ['--unshare-uts', '--hostname', 'buildstream'] bwrap_command += ['--unshare-ipc'] if cwd is not None: bwrap_command += ['--chdir', cwd] # Give it a proc and tmpfs bwrap_command += [ '--proc', '/proc', '--tmpfs', '/tmp' ] # In interactive mode, we want a complete devpts inside # the container, so there is a /dev/console and such. In # the regular non-interactive sandbox, we want to hand pick # a minimal set of devices to expose to the sandbox. # if flags & SandboxFlags.INTERACTIVE: bwrap_command += ['--dev', '/dev'] else: for device in self.DEVICES: bwrap_command += ['--dev-bind', device, device] # Add bind mounts to any marked directories marked_directories = self._get_marked_directories() mount_source_overrides = self._get_mount_sources() for mark in marked_directories: mount_point = mark['directory'] if mount_point in mount_source_overrides: mount_source = mount_source_overrides[mount_point] else: mount_source = mount_map.get_mount_source(mount_point) # Use --dev-bind for all mounts, this is simply a bind mount which does # not restrictive about devices. # # While it's important for users to be able to mount devices # into the sandbox for `bst shell` testing purposes, it is # harmless to do in a build environment where the directories # we mount just never contain device files. # bwrap_command += ['--dev-bind', mount_source, mount_point] if flags & SandboxFlags.ROOT_READ_ONLY: bwrap_command += ["--remount-ro", "/"] # Set UID and GUI if self.user_ns_available: bwrap_command += ['--unshare-user'] if not flags & SandboxFlags.INHERIT_UID: uid = self._get_config().build_uid gid = self._get_config().build_gid bwrap_command += ['--uid', str(uid), '--gid', str(gid)] # Add the command bwrap_command += command # bwrap might create some directories while being suid # and may give them to root gid, if it does, we'll want # to clean them up after, so record what we already had # there just in case so that we can safely cleanup the debris. # existing_basedirs = { directory: os.path.exists(os.path.join(root_directory, directory)) for directory in ['tmp', 'dev', 'proc'] } # Use the MountMap context manager to ensure that any redirected # mounts through fuse layers are in context and ready for bwrap # to mount them from. # with ExitStack() as stack: stack.enter_context(mount_map.mounted(self)) # Ensure the cwd exists if cwd is not None: workdir = os.path.join(root_mount_source, cwd.lstrip(os.sep)) os.makedirs(workdir, exist_ok=True) # If we're interactive, we want to inherit our stdin, # otherwise redirect to /dev/null, ensuring process # disconnected from terminal. if flags & SandboxFlags.INTERACTIVE: stdin = sys.stdin else: stdin = stack.enter_context(open(os.devnull, "r")) # Run bubblewrap ! exit_code = self.run_bwrap(bwrap_command, stdin, stdout, stderr, (flags & SandboxFlags.INTERACTIVE)) # Cleanup things which bwrap might have left behind, while # everything is still mounted because bwrap can be creating # the devices on the fuse mount, so we should remove it there. if not flags & SandboxFlags.INTERACTIVE: for device in self.DEVICES: device_path = os.path.join(root_mount_source, device.lstrip('/')) # This will remove the device in a loop, allowing some # retries in case the device file leaked by bubblewrap is still busy self.try_remove_device(device_path) # Remove /tmp, this is a bwrap owned thing we want to be sure # never ends up in an artifact for basedir in ['tmp', 'dev', 'proc']: # Skip removal of directories which already existed before # launching bwrap if existing_basedirs[basedir]: continue base_directory = os.path.join(root_mount_source, basedir) if flags & SandboxFlags.INTERACTIVE: # Be more lenient in interactive mode here. # # In interactive mode; it's possible that the project shell # configuration has mounted some things below the base # directories, such as /dev/dri, and in this case it's less # important to consider cleanup, as we wont be collecting # this build result and creating an artifact. # # Note: Ideally; we should instead fix upstream bubblewrap to # cleanup any debris it creates at startup time, and do # the same ourselves for any directories we explicitly create. # shutil.rmtree(base_directory, ignore_errors=True) else: try: os.rmdir(base_directory) except FileNotFoundError: # ignore this, if bwrap cleaned up properly then it's not a problem. # # If the directory was not empty on the other hand, then this is clearly # a bug, bwrap mounted a tempfs here and when it exits, that better be empty. pass self._vdir._mark_changed() return exit_code def run_bwrap(self, argv, stdin, stdout, stderr, interactive): # Wrapper around subprocess.Popen() with common settings. # # This function blocks until the subprocess has terminated. # # It then returns a tuple of (exit code, stdout output, stderr output). # If stdout was not equal to subprocess.PIPE, stdout will be None. Same for # stderr. # Fetch the process actually launched inside the bwrap sandbox, or the # intermediat control bwrap processes. # # NOTE: # The main bwrap process itself is setuid root and as such we cannot # send it any signals. Since we launch bwrap with --unshare-pid, it's # direct child is another bwrap process which retains ownership of the # pid namespace. This is the right process to kill when terminating. # # The grandchild is the binary which we asked bwrap to launch on our # behalf, whatever this binary is, it is the right process to use # for suspending and resuming. In the case that this is a shell, the # shell will be group leader and all build scripts will stop/resume # with that shell. # def get_user_proc(bwrap_pid, grand_child=False): bwrap_proc = psutil.Process(bwrap_pid) bwrap_children = bwrap_proc.children() if bwrap_children: if grand_child: bwrap_grand_children = bwrap_children[0].children() if bwrap_grand_children: return bwrap_grand_children[0] else: return bwrap_children[0] return None def terminate_bwrap(): if process: user_proc = get_user_proc(process.pid) if user_proc: user_proc.kill() def suspend_bwrap(): if process: user_proc = get_user_proc(process.pid, grand_child=True) if user_proc: group_id = os.getpgid(user_proc.pid) os.killpg(group_id, signal.SIGSTOP) def resume_bwrap(): if process: user_proc = get_user_proc(process.pid, grand_child=True) if user_proc: group_id = os.getpgid(user_proc.pid) os.killpg(group_id, signal.SIGCONT) with ExitStack() as stack: # We want to launch bwrap in a new session in non-interactive # mode so that we handle the SIGTERM and SIGTSTP signals separately # from the nested bwrap process, but in interactive mode this # causes launched shells to lack job control (we dont really # know why that is). # if interactive: new_session = False else: new_session = True stack.enter_context(_signals.suspendable(suspend_bwrap, resume_bwrap)) stack.enter_context(_signals.terminator(terminate_bwrap)) process = subprocess.Popen( argv, # The default is to share file descriptors from the parent process # to the subprocess, which is rarely good for sandboxing. close_fds=True, stdin=stdin, stdout=stdout, stderr=stderr, start_new_session=new_session ) # Wait for the child process to finish, ensuring that # a SIGINT has exactly the effect the user probably # expects (i.e. let the child process handle it). try: while True: try: _, status = os.waitpid(process.pid, 0) # If the process exits due to a signal, we # brutally murder it to avoid zombies if not os.WIFEXITED(status): user_proc = get_user_proc(process.pid) if user_proc: utils._kill_process_tree(user_proc.pid) # If we receive a KeyboardInterrupt we continue # waiting for the process since we are in the same # process group and it should also have received # the SIGINT. except KeyboardInterrupt: continue break # If we can't find the process, it has already died of its # own accord, and therefore we don't need to check or kill # anything. except psutil.NoSuchProcess: pass # Return the exit code - see the documentation for # os.WEXITSTATUS to see why this is required. if os.WIFEXITED(status): exit_code = os.WEXITSTATUS(status) else: exit_code = -1 if interactive and stdin.isatty(): # Make this process the foreground process again, otherwise the # next read() on stdin will trigger SIGTTIN and stop the process. # This is required because the sandboxed process does not have # permission to do this on its own (running in separate PID namespace). # # tcsetpgrp() will trigger SIGTTOU when called from a background # process, so ignore it temporarily. handler = signal.signal(signal.SIGTTOU, signal.SIG_IGN) os.tcsetpgrp(0, os.getpid()) signal.signal(signal.SIGTTOU, handler) return exit_code def try_remove_device(self, device_path): # Put some upper limit on the tries here max_tries = 1000 tries = 0 while True: try: os.unlink(device_path) except OSError as e: if e.errno == errno.EBUSY: # This happens on some machines, seems there is a race sometimes # after bubblewrap returns and the device files it bind-mounted did # not finish unmounting. # if tries < max_tries: tries += 1 time.sleep(1 / 100) continue else: # We've reached the upper limit of tries, bail out now # because something must have went wrong # raise elif e.errno == errno.ENOENT: # Bubblewrap cleaned it up for us, no problem if we cant remove it break else: # Something unexpected, reraise this error raise else: # Successfully removed the symlink break