diff options
author | Jim MacArthur <jim+gitlab@mode7.co.uk> | 2018-08-01 13:16:04 +0000 |
---|---|---|
committer | Jim MacArthur <jim+gitlab@mode7.co.uk> | 2018-08-01 13:16:04 +0000 |
commit | eabc38998ea35f5cd07846e40460b35453a995a8 (patch) | |
tree | fb3d5480e8566c79679d19f6874cfe01308ecd40 | |
parent | d10e4668e79155c406fbe726be9458261083f3d7 (diff) | |
parent | 02c6c84682af1d6ee4a27588105e15c599085850 (diff) | |
download | buildstream-eabc38998ea35f5cd07846e40460b35453a995a8.tar.gz |
Merge branch 'jmac/virtual_directories' into 'master'
Abstract directory class and filesystem-backed implementation
See merge request BuildStream/buildstream!445
-rw-r--r-- | buildstream/_exceptions.py | 1 | ||||
-rw-r--r-- | buildstream/_stream.py | 52 | ||||
-rw-r--r-- | buildstream/element.py | 120 | ||||
-rw-r--r-- | buildstream/plugins/elements/compose.py | 45 | ||||
-rw-r--r-- | buildstream/plugins/elements/import.py | 23 | ||||
-rw-r--r-- | buildstream/plugins/elements/stack.py | 11 | ||||
-rw-r--r-- | buildstream/sandbox/_mount.py | 3 | ||||
-rw-r--r-- | buildstream/sandbox/_sandboxbwrap.py | 4 | ||||
-rw-r--r-- | buildstream/sandbox/_sandboxchroot.py | 2 | ||||
-rw-r--r-- | buildstream/sandbox/sandbox.py | 39 | ||||
-rw-r--r-- | buildstream/scriptelement.py | 9 | ||||
-rw-r--r-- | buildstream/storage/__init__.py | 22 | ||||
-rw-r--r-- | buildstream/storage/_filebaseddirectory.py | 216 | ||||
-rw-r--r-- | buildstream/storage/directory.py | 155 | ||||
-rw-r--r-- | buildstream/utils.py | 18 |
15 files changed, 554 insertions, 166 deletions
diff --git a/buildstream/_exceptions.py b/buildstream/_exceptions.py index c86b6780c..e55d942fd 100644 --- a/buildstream/_exceptions.py +++ b/buildstream/_exceptions.py @@ -88,6 +88,7 @@ class ErrorDomain(Enum): ELEMENT = 11 APP = 12 STREAM = 13 + VIRTUAL_FS = 14 # BstError is an internal base exception class for BuildSream diff --git a/buildstream/_stream.py b/buildstream/_stream.py index dcefc64f1..bd4b2ccf0 100644 --- a/buildstream/_stream.py +++ b/buildstream/_stream.py @@ -407,15 +407,16 @@ class Stream(): integrate=integrate) as sandbox: # Copy or move the sandbox to the target directory - sandbox_root = sandbox.get_directory() + sandbox_vroot = sandbox.get_virtual_directory() + if not tar: with target.timed_activity("Checking out files in '{}'" .format(location)): try: if hardlinks: - self._checkout_hardlinks(sandbox_root, location) + self._checkout_hardlinks(sandbox_vroot, location) else: - utils.copy_files(sandbox_root, location) + sandbox_vroot.export_files(location) except OSError as e: raise StreamError("Failed to checkout files: '{}'" .format(e)) from e @@ -424,14 +425,12 @@ class Stream(): with target.timed_activity("Creating tarball"): with os.fdopen(sys.stdout.fileno(), 'wb') as fo: with tarfile.open(fileobj=fo, mode="w|") as tf: - Stream._add_directory_to_tarfile( - tf, sandbox_root, '.') + sandbox_vroot.export_to_tar(tf, '.') else: with target.timed_activity("Creating tarball '{}'" .format(location)): with tarfile.open(location, "w:") as tf: - Stream._add_directory_to_tarfile( - tf, sandbox_root, '.') + sandbox_vroot.export_to_tar(tf, '.') except BstError as e: raise StreamError("Error while staging dependencies into a sandbox" @@ -1050,46 +1049,13 @@ class Stream(): # Helper function for checkout() # - def _checkout_hardlinks(self, sandbox_root, directory): + def _checkout_hardlinks(self, sandbox_vroot, directory): try: - removed = utils.safe_remove(directory) + utils.safe_remove(directory) except OSError as e: raise StreamError("Failed to remove checkout directory: {}".format(e)) from e - if removed: - # Try a simple rename of the sandbox root; if that - # doesnt cut it, then do the regular link files code path - try: - os.rename(sandbox_root, directory) - except OSError: - os.makedirs(directory, exist_ok=True) - utils.link_files(sandbox_root, directory) - else: - utils.link_files(sandbox_root, directory) - - # Add a directory entry deterministically to a tar file - # - # This function takes extra steps to ensure the output is deterministic. - # First, it sorts the results of os.listdir() to ensure the ordering of - # the files in the archive is the same. Second, it sets a fixed - # timestamp for each entry. See also https://bugs.python.org/issue24465. - @staticmethod - def _add_directory_to_tarfile(tf, dir_name, dir_arcname, mtime=0): - for filename in sorted(os.listdir(dir_name)): - name = os.path.join(dir_name, filename) - arcname = os.path.join(dir_arcname, filename) - - tarinfo = tf.gettarinfo(name, arcname) - tarinfo.mtime = mtime - - if tarinfo.isreg(): - with open(name, "rb") as f: - tf.addfile(tarinfo, f) - elif tarinfo.isdir(): - tf.addfile(tarinfo) - Stream._add_directory_to_tarfile(tf, name, arcname, mtime) - else: - tf.addfile(tarinfo) + sandbox_vroot.export_files(directory, can_link=True, can_destroy=True) # Write the element build script to the given directory def _write_element_script(self, directory, element): diff --git a/buildstream/element.py b/buildstream/element.py index 4260d32a5..2218ef94b 100644 --- a/buildstream/element.py +++ b/buildstream/element.py @@ -80,7 +80,6 @@ from collections import Mapping, OrderedDict from contextlib import contextmanager from enum import Enum import tempfile -import time import shutil from . import _yaml @@ -97,6 +96,9 @@ from . import _site from ._platform import Platform from .sandbox._config import SandboxConfig +from .storage.directory import Directory +from .storage._filebaseddirectory import FileBasedDirectory, VirtualDirectoryError + # _KeyStrength(): # @@ -195,6 +197,13 @@ class Element(Plugin): *Since: 1.2* """ + BST_VIRTUAL_DIRECTORY = False + """Whether to raise exceptions if an element uses Sandbox.get_directory + instead of Sandbox.get_virtual_directory. + + *Since: 1.4* + """ + def __init__(self, context, project, artifacts, meta, plugin_conf): self.__cache_key_dict = None # Dict for cache key calculation @@ -627,10 +636,10 @@ class Element(Plugin): # Hard link it into the staging area # - basedir = sandbox.get_directory() - stagedir = basedir \ + vbasedir = sandbox.get_virtual_directory() + vstagedir = vbasedir \ if path is None \ - else os.path.join(basedir, path.lstrip(os.sep)) + else vbasedir.descend(path.lstrip(os.sep).split(os.sep)) files = list(self.__compute_splits(include, exclude, orphans)) @@ -642,15 +651,8 @@ class Element(Plugin): link_files = files copy_files = [] - link_result = utils.link_files(artifact, stagedir, files=link_files, - report_written=True) - copy_result = utils.copy_files(artifact, stagedir, files=copy_files, - report_written=True) - - cur_time = time.time() - - for f in copy_result.files_written: - os.utime(os.path.join(stagedir, f), times=(cur_time, cur_time)) + link_result = vstagedir.import_files(artifact, files=link_files, report_written=True, can_link=True) + copy_result = vstagedir.import_files(artifact, files=copy_files, report_written=True, update_utimes=True) return link_result.combine(copy_result) @@ -1359,40 +1361,45 @@ class Element(Plugin): sandbox._set_mount_source(directory, workspace.get_absolute_path()) # Stage all sources that need to be copied - sandbox_root = sandbox.get_directory() - host_directory = os.path.join(sandbox_root, directory.lstrip(os.sep)) - self._stage_sources_at(host_directory, mount_workspaces=mount_workspaces) + sandbox_vroot = sandbox.get_virtual_directory() + host_vdirectory = sandbox_vroot.descend(directory.lstrip(os.sep).split(os.sep), create=True) + self._stage_sources_at(host_vdirectory, mount_workspaces=mount_workspaces) # _stage_sources_at(): # # Stage this element's sources to a directory # # Args: - # directory (str): An absolute path to stage the sources at + # vdirectory (:class:`.storage.Directory`): A virtual directory object to stage sources into. # mount_workspaces (bool): mount workspaces if True, copy otherwise # - def _stage_sources_at(self, directory, mount_workspaces=True): + def _stage_sources_at(self, vdirectory, mount_workspaces=True): with self.timed_activity("Staging sources", silent_nested=True): - if os.path.isdir(directory) and os.listdir(directory): - raise ElementError("Staging directory '{}' is not empty".format(directory)) - - workspace = self._get_workspace() - if workspace: - # If mount_workspaces is set and we're doing incremental builds, - # the workspace is already mounted into the sandbox. - if not (mount_workspaces and self.__can_build_incrementally()): - with self.timed_activity("Staging local files at {}".format(workspace.path)): - workspace.stage(directory) - else: - # No workspace, stage directly - for source in self.sources(): - source._stage(directory) - + if not isinstance(vdirectory, Directory): + vdirectory = FileBasedDirectory(vdirectory) + if not vdirectory.is_empty(): + raise ElementError("Staging directory '{}' is not empty".format(vdirectory)) + + with tempfile.TemporaryDirectory() as temp_staging_directory: + + workspace = self._get_workspace() + if workspace: + # If mount_workspaces is set and we're doing incremental builds, + # the workspace is already mounted into the sandbox. + if not (mount_workspaces and self.__can_build_incrementally()): + with self.timed_activity("Staging local files at {}".format(workspace.path)): + workspace.stage(temp_staging_directory) + else: + # No workspace, stage directly + for source in self.sources(): + source._stage(temp_staging_directory) + + vdirectory.import_files(temp_staging_directory) # Ensure deterministic mtime of sources at build time - utils._set_deterministic_mtime(directory) + vdirectory.set_deterministic_mtime() # Ensure deterministic owners of sources at build time - utils._set_deterministic_user(directory) + vdirectory.set_deterministic_user() # _set_required(): # @@ -1508,7 +1515,7 @@ class Element(Plugin): with _signals.terminator(cleanup_rootdir), \ self.__sandbox(rootdir, output_file, output_file, self.__sandbox_config) as sandbox: # nopep8 - sandbox_root = sandbox.get_directory() + sandbox_vroot = sandbox.get_virtual_directory() # By default, the dynamic public data is the same as the static public data. # The plugin's assemble() method may modify this, though. @@ -1540,11 +1547,11 @@ class Element(Plugin): # workspace = self._get_workspace() if workspace and self.__staged_sources_directory: - sandbox_root = sandbox.get_directory() - sandbox_path = os.path.join(sandbox_root, - self.__staged_sources_directory.lstrip(os.sep)) + sandbox_vroot = sandbox.get_virtual_directory() + path_components = self.__staged_sources_directory.lstrip(os.sep).split(os.sep) + sandbox_vpath = sandbox_vroot.descend(path_components) try: - utils.copy_files(workspace.path, sandbox_path) + sandbox_vpath.import_files(workspace.path) except UtilError as e: self.warn("Failed to preserve workspace state for failed build sysroot: {}" .format(e)) @@ -1556,7 +1563,11 @@ class Element(Plugin): raise finally: if collect is not None: - collectdir = os.path.join(sandbox_root, collect.lstrip(os.sep)) + try: + collectvdir = sandbox_vroot.descend(collect.lstrip(os.sep).split(os.sep)) + except VirtualDirectoryError: + # No collect directory existed + collectvdir = None # Create artifact directory structure assembledir = os.path.join(rootdir, 'artifact') @@ -1565,20 +1576,26 @@ class Element(Plugin): metadir = os.path.join(assembledir, 'meta') buildtreedir = os.path.join(assembledir, 'buildtree') os.mkdir(assembledir) - if collect is not None and os.path.exists(collectdir): + if collect is not None and collectvdir is not None: os.mkdir(filesdir) os.mkdir(logsdir) os.mkdir(metadir) os.mkdir(buildtreedir) # Hard link files from collect dir to files directory - if collect is not None and os.path.exists(collectdir): - utils.link_files(collectdir, filesdir) - - sandbox_build_dir = os.path.join(sandbox_root, self.get_variable('build-root').lstrip(os.sep)) - # Hard link files from build-root dir to buildtreedir directory - if os.path.isdir(sandbox_build_dir): - utils.link_files(sandbox_build_dir, buildtreedir) + if collect is not None and collectvdir is not None: + collectvdir.export_files(filesdir, can_link=True) + + try: + sandbox_build_dir = sandbox_vroot.descend( + self.get_variable('build-root').lstrip(os.sep).split(os.sep)) + # Hard link files from build-root dir to buildtreedir directory + sandbox_build_dir.export_files(buildtreedir) + except VirtualDirectoryError: + # Directory could not be found. Pre-virtual + # directory behaviour was to continue silently + # if the directory could not be found. + pass # Copy build log log_filename = context.get_log_filename() @@ -1626,7 +1643,7 @@ class Element(Plugin): self.__artifact_size = utils._get_dir_size(assembledir) self.__artifacts.commit(self, assembledir, self.__get_cache_keys_for_commit()) - if collect is not None and not os.path.exists(collectdir): + if collect is not None and collectvdir is None: raise ElementError( "Directory '{}' was not found inside the sandbox, " "unable to collect artifact contents" @@ -2126,7 +2143,8 @@ class Element(Plugin): directory, stdout=stdout, stderr=stderr, - config=config) + config=config, + allow_real_directory=not self.BST_VIRTUAL_DIRECTORY) yield sandbox else: diff --git a/buildstream/plugins/elements/compose.py b/buildstream/plugins/elements/compose.py index 44a760215..a206dd6d7 100644 --- a/buildstream/plugins/elements/compose.py +++ b/buildstream/plugins/elements/compose.py @@ -34,7 +34,6 @@ The default configuration and possible options are as such: """ import os -from buildstream import utils from buildstream import Element, Scope @@ -56,6 +55,9 @@ class ComposeElement(Element): # added, to reduce the potential for confusion BST_FORBID_SOURCES = True + # This plugin has been modified to avoid the use of Sandbox.get_directory + BST_VIRTUAL_DIRECTORY = True + def configure(self, node): self.node_validate(node, [ 'integrate', 'include', 'exclude', 'include-orphans' @@ -104,7 +106,8 @@ class ComposeElement(Element): orphans=self.include_orphans) manifest.update(files) - basedir = sandbox.get_directory() + # Make a snapshot of all the files. + vbasedir = sandbox.get_virtual_directory() modified_files = set() removed_files = set() added_files = set() @@ -116,38 +119,24 @@ class ComposeElement(Element): if require_split: # Make a snapshot of all the files before integration-commands are run. - snapshot = { - f: getmtime(os.path.join(basedir, f)) - for f in utils.list_relative_paths(basedir) - } + snapshot = set(vbasedir.list_relative_paths()) + vbasedir.mark_unmodified() for dep in self.dependencies(Scope.BUILD): dep.integrate(sandbox) if require_split: - # Calculate added, modified and removed files - basedir_contents = set(utils.list_relative_paths(basedir)) + post_integration_snapshot = vbasedir.list_relative_paths() + modified_files = set(vbasedir.list_modified_paths()) + basedir_contents = set(post_integration_snapshot) for path in manifest: - if path in basedir_contents: - if path in snapshot: - preintegration_mtime = snapshot[path] - if preintegration_mtime != getmtime(os.path.join(basedir, path)): - modified_files.add(path) - else: - # If the path appears in the manifest but not the initial snapshot, - # it may be a file staged inside a directory symlink. In this case - # the path we got from the manifest won't show up in the snapshot - # because utils.list_relative_paths() doesn't recurse into symlink - # directories. - pass - elif path in snapshot: + if path in snapshot and path not in basedir_contents: removed_files.add(path) for path in basedir_contents: if path not in snapshot: added_files.add(path) - self.info("Integration modified {}, added {} and removed {} files" .format(len(modified_files), len(added_files), len(removed_files))) @@ -166,8 +155,7 @@ class ComposeElement(Element): # instead of into a subdir. The element assemble() method should # support this in some way. # - installdir = os.path.join(basedir, 'buildstream', 'install') - os.makedirs(installdir, exist_ok=True) + installdir = vbasedir.descend(['buildstream', 'install'], create=True) # We already saved the manifest for created files in the integration phase, # now collect the rest of the manifest. @@ -191,19 +179,12 @@ class ComposeElement(Element): with self.timed_activity("Creating composition", detail=detail, silent_nested=True): self.info("Composing {} files".format(len(manifest))) - utils.link_files(basedir, installdir, files=manifest) + installdir.import_files(vbasedir, files=manifest, can_link=True) # And we're done return os.path.join(os.sep, 'buildstream', 'install') -# Like os.path.getmtime(), but doesnt explode on symlinks -# -def getmtime(path): - stat = os.lstat(path) - return stat.st_mtime - - # Plugin entry point def setup(): return ComposeElement diff --git a/buildstream/plugins/elements/import.py b/buildstream/plugins/elements/import.py index 93594b623..0eca2a902 100644 --- a/buildstream/plugins/elements/import.py +++ b/buildstream/plugins/elements/import.py @@ -31,7 +31,6 @@ The empty configuration is as such: """ import os -import shutil from buildstream import Element, BuildElement, ElementError @@ -39,6 +38,9 @@ from buildstream import Element, BuildElement, ElementError class ImportElement(BuildElement): # pylint: disable=attribute-defined-outside-init + # This plugin has been modified to avoid the use of Sandbox.get_directory + BST_VIRTUAL_DIRECTORY = True + def configure(self, node): self.source = self.node_subst_member(node, 'source') self.target = self.node_subst_member(node, 'target') @@ -68,27 +70,22 @@ class ImportElement(BuildElement): # Do not mount workspaces as the files are copied from outside the sandbox self._stage_sources_in_sandbox(sandbox, 'input', mount_workspaces=False) - rootdir = sandbox.get_directory() - inputdir = os.path.join(rootdir, 'input') - outputdir = os.path.join(rootdir, 'output') + rootdir = sandbox.get_virtual_directory() + inputdir = rootdir.descend(['input']) + outputdir = rootdir.descend(['output'], create=True) # The directory to grab - inputdir = os.path.join(inputdir, self.source.lstrip(os.sep)) - inputdir = inputdir.rstrip(os.sep) + inputdir = inputdir.descend(self.source.strip(os.sep).split(os.sep)) # The output target directory - outputdir = os.path.join(outputdir, self.target.lstrip(os.sep)) - outputdir = outputdir.rstrip(os.sep) - - # Ensure target directory parent - os.makedirs(os.path.dirname(outputdir), exist_ok=True) + outputdir = outputdir.descend(self.target.strip(os.sep).split(os.sep), create=True) - if not os.path.exists(inputdir): + if inputdir.is_empty(): raise ElementError("{}: No files were found inside directory '{}'" .format(self, self.source)) # Move it over - shutil.move(inputdir, outputdir) + outputdir.import_files(inputdir) # And we're done return '/output' diff --git a/buildstream/plugins/elements/stack.py b/buildstream/plugins/elements/stack.py index 087d4dac0..138afedf7 100644 --- a/buildstream/plugins/elements/stack.py +++ b/buildstream/plugins/elements/stack.py @@ -24,13 +24,15 @@ Stack elements are simply a symbolic element used for representing a logical group of elements. """ -import os from buildstream import Element # Element implementation for the 'stack' kind. class StackElement(Element): + # This plugin has been modified to avoid the use of Sandbox.get_directory + BST_VIRTUAL_DIRECTORY = True + def configure(self, node): pass @@ -52,7 +54,7 @@ class StackElement(Element): # Just create a dummy empty artifact, its existence is a statement # that all this stack's dependencies are built. - rootdir = sandbox.get_directory() + vrootdir = sandbox.get_virtual_directory() # XXX FIXME: This is currently needed because the artifact # cache wont let us commit an empty artifact. @@ -61,10 +63,7 @@ class StackElement(Element): # the actual artifact data in a subdirectory, then we # will be able to store some additional state in the # artifact cache, and we can also remove this hack. - outputdir = os.path.join(rootdir, 'output', 'bst') - - # Ensure target directory parent - os.makedirs(os.path.dirname(outputdir), exist_ok=True) + vrootdir.descend(['output', 'bst'], create=True) # And we're done return '/output' diff --git a/buildstream/sandbox/_mount.py b/buildstream/sandbox/_mount.py index 1540d9d4f..0f96a92b7 100644 --- a/buildstream/sandbox/_mount.py +++ b/buildstream/sandbox/_mount.py @@ -32,7 +32,8 @@ from .._fuse import SafeHardlinks class Mount(): def __init__(self, sandbox, mount_point, safe_hardlinks): scratch_directory = sandbox._get_scratch_directory() - root_directory = sandbox.get_directory() + # Getting external_directory here is acceptable as we're part of the sandbox code. + root_directory = sandbox.get_virtual_directory().external_directory self.mount_point = mount_point self.safe_hardlinks = safe_hardlinks diff --git a/buildstream/sandbox/_sandboxbwrap.py b/buildstream/sandbox/_sandboxbwrap.py index 9ed677620..010e4791d 100644 --- a/buildstream/sandbox/_sandboxbwrap.py +++ b/buildstream/sandbox/_sandboxbwrap.py @@ -56,7 +56,9 @@ class SandboxBwrap(Sandbox): def run(self, command, flags, *, cwd=None, env=None): stdout, stderr = self._get_output() - root_directory = self.get_directory() + + # Allowable access to underlying storage as we're part of the sandbox + root_directory = self.get_virtual_directory().external_directory # Fallback to the sandbox default settings for # the cwd and env. diff --git a/buildstream/sandbox/_sandboxchroot.py b/buildstream/sandbox/_sandboxchroot.py index 8788c3031..de4eb46e2 100644 --- a/buildstream/sandbox/_sandboxchroot.py +++ b/buildstream/sandbox/_sandboxchroot.py @@ -90,7 +90,7 @@ class SandboxChroot(Sandbox): # Nonetheless a better solution could perhaps be found. rootfs = stack.enter_context(utils._tempdir(dir='/var/run/buildstream')) - stack.enter_context(self.create_devices(self.get_directory(), flags)) + stack.enter_context(self.create_devices(self._root, flags)) stack.enter_context(self.mount_dirs(rootfs, flags, stdout, stderr)) if flags & SandboxFlags.INTERACTIVE: diff --git a/buildstream/sandbox/sandbox.py b/buildstream/sandbox/sandbox.py index 7e1e32b65..9fe1194bb 100644 --- a/buildstream/sandbox/sandbox.py +++ b/buildstream/sandbox/sandbox.py @@ -29,7 +29,8 @@ See also: :ref:`sandboxing`. """ import os -from .._exceptions import ImplError +from .._exceptions import ImplError, BstError +from ..storage._filebaseddirectory import FileBasedDirectory class SandboxFlags(): @@ -90,28 +91,50 @@ class Sandbox(): self.__cwd = None self.__env = None self.__mount_sources = {} + self.__allow_real_directory = kwargs['allow_real_directory'] + # Configuration from kwargs common to all subclasses self.__config = kwargs['config'] self.__stdout = kwargs['stdout'] self.__stderr = kwargs['stderr'] - # Setup the directories + # Setup the directories. Root should be available to subclasses, hence + # being single-underscore. The others are private to this class. + self._root = os.path.join(directory, 'root') self.__directory = directory - self.__root = os.path.join(self.__directory, 'root') self.__scratch = os.path.join(self.__directory, 'scratch') - for directory_ in [self.__root, self.__scratch]: + for directory_ in [self._root, self.__scratch]: os.makedirs(directory_, exist_ok=True) def get_directory(self): """Fetches the sandbox root directory The root directory is where artifacts for the base - runtime environment should be staged. + runtime environment should be staged. Only works if + BST_VIRTUAL_DIRECTORY is not set. Returns: (str): The sandbox root directory + + """ + if self.__allow_real_directory: + return self._root + else: + raise BstError("You can't use get_directory") + + def get_virtual_directory(self): + """Fetches the sandbox root directory + + The root directory is where artifacts for the base + runtime environment should be staged. Only works if + BST_VIRTUAL_DIRECTORY is not set. + + Returns: + (str): The sandbox root directory + """ - return self.__root + # For now, just create a new Directory every time we're asked + return FileBasedDirectory(self._root) def set_environment(self, environment): """Sets the environment variables for the sandbox @@ -293,11 +316,11 @@ class Sandbox(): def _has_command(self, command, env=None): if os.path.isabs(command): return os.path.exists(os.path.join( - self.get_directory(), command.lstrip(os.sep))) + self._root, command.lstrip(os.sep))) for path in env.get('PATH').split(':'): if os.path.exists(os.path.join( - self.get_directory(), path.lstrip(os.sep), command)): + self._root, path.lstrip(os.sep), command)): return True return False diff --git a/buildstream/scriptelement.py b/buildstream/scriptelement.py index 145dc2648..212402058 100644 --- a/buildstream/scriptelement.py +++ b/buildstream/scriptelement.py @@ -243,9 +243,8 @@ class ScriptElement(Element): with self.timed_activity("Staging {} at {}" .format(element.name, item['destination']), silent_nested=True): - real_dstdir = os.path.join(sandbox.get_directory(), - item['destination'].lstrip(os.sep)) - os.makedirs(os.path.dirname(real_dstdir), exist_ok=True) + virtual_dstdir = sandbox.get_virtual_directory() + virtual_dstdir.descend(item['destination'].lstrip(os.sep).split(os.sep), create=True) element.stage_dependency_artifacts(sandbox, Scope.RUN, path=item['destination']) for item in self.__layout: @@ -263,8 +262,8 @@ class ScriptElement(Element): for dep in element.dependencies(Scope.RUN): dep.integrate(sandbox) - os.makedirs(os.path.join(sandbox.get_directory(), self.__install_root.lstrip(os.sep)), - exist_ok=True) + install_root_path_components = self.__install_root.lstrip(os.sep).split(os.sep) + sandbox.get_virtual_directory().descend(install_root_path_components, create=True) def assemble(self, sandbox): diff --git a/buildstream/storage/__init__.py b/buildstream/storage/__init__.py new file mode 100644 index 000000000..33424ac8d --- /dev/null +++ b/buildstream/storage/__init__.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2018 Bloomberg Finance LP +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library. If not, see <http://www.gnu.org/licenses/>. +# +# Authors: +# Jim MacArthur <jim.macarthur@codethink.co.uk> + +from ._filebaseddirectory import FileBasedDirectory +from .directory import Directory diff --git a/buildstream/storage/_filebaseddirectory.py b/buildstream/storage/_filebaseddirectory.py new file mode 100644 index 000000000..5b3da28f4 --- /dev/null +++ b/buildstream/storage/_filebaseddirectory.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2018 Bloomberg Finance LP +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library. If not, see <http://www.gnu.org/licenses/>. +# +# Authors: +# Jim MacArthur <jim.macarthur@codethink.co.uk> + +""" +FileBasedDirectory +========= + +Implementation of the Directory class which backs onto a normal POSIX filing system. + +See also: :ref:`sandboxing`. +""" + +import os +import time +from .._exceptions import BstError, ErrorDomain +from .directory import Directory +from ..utils import link_files, copy_files, list_relative_paths, _get_link_mtime, _magic_timestamp +from ..utils import _set_deterministic_user, _set_deterministic_mtime + + +class VirtualDirectoryError(BstError): + """Raised by Directory functions when system calls fail. + This will be handled internally by the BuildStream core, + if you need to handle this error, then it should be reraised, + or either of the :class:`.ElementError` or :class:`.SourceError` + exceptions should be raised from this error. + """ + def __init__(self, message, reason=None): + super().__init__(message, domain=ErrorDomain.VIRTUAL_FS, reason=reason) + + +# FileBasedDirectory intentionally doesn't call its superclass constuctor, +# which is mean to be unimplemented. +# pylint: disable=super-init-not-called + + +class _FileObject(): + """A description of a file in a virtual directory. The contents of + this class are never used, but there needs to be something present + for files so is_empty() works correctly. + + """ + def __init__(self, virtual_directory: Directory, filename: str): + self.directory = virtual_directory + self.filename = filename + + +class FileBasedDirectory(Directory): + def __init__(self, external_directory=None): + self.external_directory = external_directory + self.index = {} + self._directory_read = False + + def _populate_index(self): + if self._directory_read: + return + for entry in os.listdir(self.external_directory): + if os.path.isdir(os.path.join(self.external_directory, entry)): + self.index[entry] = FileBasedDirectory(os.path.join(self.external_directory, entry)) + else: + self.index[entry] = _FileObject(self, entry) + self._directory_read = True + + def descend(self, subdirectory_spec, create=False): + """ See superclass Directory for arguments """ + # It's very common to send a directory name instead of a list and this causes + # bizarre errors, so check for it here + if not isinstance(subdirectory_spec, list): + subdirectory_spec = [subdirectory_spec] + + # Because of the way split works, it's common to get a list which begins with + # an empty string. Detect these and remove them. + while subdirectory_spec and subdirectory_spec[0] == "": + subdirectory_spec.pop(0) + + if not subdirectory_spec: + return self + + self._populate_index() + if subdirectory_spec[0] in self.index: + entry = self.index[subdirectory_spec[0]] + if isinstance(entry, FileBasedDirectory): + new_path = os.path.join(self.external_directory, subdirectory_spec[0]) + return FileBasedDirectory(new_path).descend(subdirectory_spec[1:], create) + else: + error = "Cannot descend into {}, which is a '{}' in the directory {}" + raise VirtualDirectoryError(error.format(subdirectory_spec[0], + type(entry).__name__, + self.external_directory)) + else: + if create: + new_path = os.path.join(self.external_directory, subdirectory_spec[0]) + os.makedirs(new_path, exist_ok=True) + return FileBasedDirectory(new_path).descend(subdirectory_spec[1:], create) + else: + error = "No entry called '{}' found in the directory rooted at {}" + raise VirtualDirectoryError(error.format(subdirectory_spec[0], self.external_directory)) + + def import_files(self, external_pathspec, *, files=None, + report_written=True, update_utimes=False, + can_link=False): + """ See superclass Directory for arguments """ + + if isinstance(external_pathspec, Directory): + source_directory = external_pathspec.external_directory + else: + source_directory = external_pathspec + + if can_link and not update_utimes: + import_result = link_files(source_directory, self.external_directory, files=files, + ignore_missing=False, report_written=report_written) + else: + import_result = copy_files(source_directory, self.external_directory, files=files, + ignore_missing=False, report_written=report_written) + if update_utimes: + cur_time = time.time() + + for f in import_result.files_written: + os.utime(os.path.join(self.external_directory, f), times=(cur_time, cur_time)) + return import_result + + def set_deterministic_mtime(self): + _set_deterministic_mtime(self.external_directory) + + def set_deterministic_user(self): + _set_deterministic_user(self.external_directory) + + def export_files(self, to_directory, *, can_link=False, can_destroy=False): + if can_destroy: + # Try a simple rename of the sandbox root; if that + # doesnt cut it, then do the regular link files code path + try: + os.rename(self.external_directory, to_directory) + return + except OSError: + # Proceed using normal link/copy + pass + + os.makedirs(to_directory, exist_ok=True) + if can_link: + link_files(self.external_directory, to_directory) + else: + copy_files(self.external_directory, to_directory) + + # Add a directory entry deterministically to a tar file + # + # This function takes extra steps to ensure the output is deterministic. + # First, it sorts the results of os.listdir() to ensure the ordering of + # the files in the archive is the same. Second, it sets a fixed + # timestamp for each entry. See also https://bugs.python.org/issue24465. + def export_to_tar(self, tf, dir_arcname, mtime=0): + # We need directories here, including non-empty ones, + # so list_relative_paths is not used. + for filename in sorted(os.listdir(self.external_directory)): + source_name = os.path.join(self.external_directory, filename) + arcname = os.path.join(dir_arcname, filename) + tarinfo = tf.gettarinfo(source_name, arcname) + tarinfo.mtime = mtime + + if tarinfo.isreg(): + with open(source_name, "rb") as f: + tf.addfile(tarinfo, f) + elif tarinfo.isdir(): + tf.addfile(tarinfo) + self.descend(filename.split(os.path.sep)).export_to_tar(tf, arcname, mtime) + else: + tf.addfile(tarinfo) + + def is_empty(self): + self._populate_index() + return len(self.index) == 0 + + def mark_unmodified(self): + """ Marks all files in this directory (recursively) as unmodified. + """ + _set_deterministic_mtime(self.external_directory) + + def list_modified_paths(self): + """Provide a list of relative paths which have been modified since the + last call to mark_unmodified. + + Return value: List(str) - list of modified paths + """ + return [f for f in list_relative_paths(self.external_directory) + if _get_link_mtime(os.path.join(self.external_directory, f)) != _magic_timestamp] + + def list_relative_paths(self): + """Provide a list of all relative paths. + + Return value: List(str) - list of all paths + """ + + return list_relative_paths(self.external_directory) + + def __str__(self): + # This returns the whole path (since we don't know where the directory started) + # which exposes the sandbox directory; we will have to assume for the time being + # that people will not abuse __str__. + return self.external_directory diff --git a/buildstream/storage/directory.py b/buildstream/storage/directory.py new file mode 100644 index 000000000..40a895acc --- /dev/null +++ b/buildstream/storage/directory.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2018 Bloomberg Finance LP +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library. If not, see <http://www.gnu.org/licenses/>. +# +# Authors: +# Jim MacArthur <jim.macarthur@codethink.co.uk> + +""" +Directory +========= + +This is a virtual Directory class to isolate the rest of BuildStream +from the backing store implementation. Sandboxes are allowed to read +from and write to the underlying storage, but all others must use this +Directory class to access files and directories in the sandbox. + +See also: :ref:`sandboxing`. + +""" + + +class Directory(): + def __init__(self, external_directory=None): + raise NotImplementedError() + + def descend(self, subdirectory_spec, create=False): + """Descend one or more levels of directory hierarchy and return a new + Directory object for that directory. + + Args: + subdirectory_spec (list of str): A list of strings which are all directory + names. + create (boolean): If this is true, the directories will be created if + they don't already exist. + + Yields: + A Directory object representing the found directory. + + Raises: + VirtualDirectoryError: if any of the components in subdirectory_spec + cannot be found, or are files, or symlinks to files. + + """ + raise NotImplementedError() + + # Import and export of files and links + def import_files(self, external_pathspec, *, files=None, + report_written=True, update_utimes=False, + can_link=False): + """Imports some or all files from external_path into this directory. + + Args: + external_pathspec: Either a string containing a pathname, or a + Directory object, to use as the source. + files (list of str): A list of all the files relative to + the external_pathspec to copy. If 'None' is supplied, all + files are copied. + report_written (bool): Return the full list of files + written. Defaults to true. If false, only a list of + overwritten files is returned. + update_utimes (bool): Update the access and modification time + of each file copied to the current time. + can_link (bool): Whether it's OK to create a hard link to the + original content, meaning the stored copy will change when the + original files change. Setting this doesn't guarantee hard + links will be made. can_link will never be used if + update_utimes is set. + + Yields: + (FileListResult) - A report of files imported and overwritten. + + """ + + raise NotImplementedError() + + def export_files(self, to_directory, *, can_link=False, can_destroy=False): + """Copies everything from this into to_directory. + + Args: + to_directory (string): a path outside this directory object + where the contents will be copied to. + can_link (bool): Whether we can create hard links in to_directory + instead of copying. Setting this does not guarantee hard links will be used. + can_destroy (bool): Can we destroy the data already in this + directory when exporting? If set, this may allow data to be + moved rather than copied which will be quicker. + """ + + raise NotImplementedError() + + def export_to_tar(self, tarfile, destination_dir, mtime=0): + """ Exports this directory into the given tar file. + + Args: + tarfile (TarFile): A Python TarFile object to export into. + destination_dir (str): The prefix for all filenames inside the archive. + mtime (int): mtimes of all files in the archive are set to this. + """ + raise NotImplementedError() + + # Convenience functions + def is_empty(self): + """ Return true if this directory has no files, subdirectories or links in it. + """ + raise NotImplementedError() + + def set_deterministic_mtime(self): + """ Sets a static modification time for all regular files in this directory. + The magic number for timestamps is 2011-11-11 11:11:11. + """ + raise NotImplementedError() + + def set_deterministic_user(self): + """ Sets all files in this directory to the current user's euid/egid. + """ + raise NotImplementedError() + + def mark_unmodified(self): + """ Marks all files in this directory (recursively) as unmodified. + """ + raise NotImplementedError() + + def list_modified_paths(self): + """Provide a list of relative paths which have been modified since the + last call to mark_unmodified. Includes directories only if + they are empty. + + Yields: + (List(str)) - list of all modified files with relative paths. + + """ + raise NotImplementedError() + + def list_relative_paths(self): + """Provide a list of all relative paths in this directory. Includes + directories only if they are empty. + + Yields: + (List(str)) - list of all files with relative paths. + + """ + raise NotImplementedError() diff --git a/buildstream/utils.py b/buildstream/utils.py index 68f99b9a3..93ab6fb0e 100644 --- a/buildstream/utils.py +++ b/buildstream/utils.py @@ -41,6 +41,9 @@ import psutil from . import _signals from ._exceptions import BstError, ErrorDomain +# The magic number for timestamps: 2011-11-11 11:11:11 +_magic_timestamp = calendar.timegm([2011, 11, 11, 11, 11, 11]) + # The separator we use for user specified aliases _ALIAS_SEPARATOR = ':' @@ -909,9 +912,6 @@ def _set_deterministic_user(directory): # directory (str): The directory to recursively set the mtime on # def _set_deterministic_mtime(directory): - # The magic number for timestamps: 2011-11-11 11:11:11 - magic_timestamp = calendar.timegm([2011, 11, 11, 11, 11, 11]) - for dirname, _, filenames in os.walk(directory.encode("utf-8"), topdown=False): for filename in filenames: pathname = os.path.join(dirname, filename) @@ -930,9 +930,9 @@ def _set_deterministic_mtime(directory): # However, nowadays it is possible at least on gnuish systems # with with the lutimes glibc function. if not os.path.islink(pathname): - os.utime(pathname, (magic_timestamp, magic_timestamp)) + os.utime(pathname, (_magic_timestamp, _magic_timestamp)) - os.utime(dirname, (magic_timestamp, magic_timestamp)) + os.utime(dirname, (_magic_timestamp, _magic_timestamp)) # _tempdir() @@ -1159,3 +1159,11 @@ def _deduplicate(iterable, key=None): if k not in seen: seen_add(k) yield element + + +# Like os.path.getmtime(), but returns the mtime of a link rather than +# the target, if the filesystem supports that. +# +def _get_link_mtime(path): + path_stat = os.lstat(path) + return path_stat.st_mtime |