summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim MacArthur <jim+gitlab@mode7.co.uk>2018-08-15 17:03:26 +0000
committerJim MacArthur <jim+gitlab@mode7.co.uk>2018-08-15 17:03:26 +0000
commit1292004636c659769ffa76009539957783aebbc3 (patch)
treeaffbb2d0ec2695f9960413dfd926e3b32c9033fa
parent007624429f22972a9d2d2620cbbbad18411ff4c9 (diff)
parent136deb2e75a18e2a7c4e79e07ddaf3de9965b397 (diff)
downloadbuildstream-1292004636c659769ffa76009539957783aebbc3.tar.gz
Merge branch 'jmac/cas_virtual_directory' into 'master'
CAS-backed virtual directory implementation See merge request BuildStream/buildstream!481
-rw-r--r--buildstream/__init__.py3
-rw-r--r--buildstream/_artifactcache/artifactcache.py2
-rw-r--r--buildstream/element.py39
-rw-r--r--buildstream/element_enums.py61
-rw-r--r--buildstream/sandbox/_mount.py6
-rw-r--r--buildstream/sandbox/_sandboxbwrap.py3
-rw-r--r--buildstream/sandbox/_sandboxchroot.py1
-rw-r--r--buildstream/sandbox/sandbox.py12
-rw-r--r--buildstream/storage/_casbaseddirectory.py563
-rw-r--r--buildstream/storage/_filebaseddirectory.py29
-rw-r--r--buildstream/storage/directory.py23
-rw-r--r--tests/sandboxes/storage-test/original/bin/bash1
-rw-r--r--tests/sandboxes/storage-test/original/bin/hello1
-rw-r--r--tests/sandboxes/storage-test/overlay/bin/bash1
-rw-r--r--tests/sandboxes/storage-tests.py57
15 files changed, 743 insertions, 59 deletions
diff --git a/buildstream/__init__.py b/buildstream/__init__.py
index 895adc60f..0f6efb0da 100644
--- a/buildstream/__init__.py
+++ b/buildstream/__init__.py
@@ -30,6 +30,7 @@ if "_BST_COMPLETION" not in os.environ:
from .sandbox import Sandbox, SandboxFlags
from .plugin import Plugin
from .source import Source, SourceError, Consistency, SourceFetcher
- from .element import Element, ElementError, Scope
+ from .element import Element, ElementError
+ from .element_enums import Scope
from .buildelement import BuildElement
from .scriptelement import ScriptElement
diff --git a/buildstream/_artifactcache/artifactcache.py b/buildstream/_artifactcache/artifactcache.py
index a7af94719..d98c291f9 100644
--- a/buildstream/_artifactcache/artifactcache.py
+++ b/buildstream/_artifactcache/artifactcache.py
@@ -21,7 +21,7 @@ import os
import string
from collections import Mapping, namedtuple
-from ..element import _KeyStrength
+from ..element_enums import _KeyStrength
from .._exceptions import ArtifactError, ImplError, LoadError, LoadErrorReason
from .._message import Message, MessageType
from .. import utils
diff --git a/buildstream/element.py b/buildstream/element.py
index bb205c777..40cac47cd 100644
--- a/buildstream/element.py
+++ b/buildstream/element.py
@@ -78,7 +78,6 @@ import stat
import copy
from collections import Mapping, OrderedDict
from contextlib import contextmanager
-from enum import Enum
import tempfile
import shutil
@@ -98,41 +97,9 @@ from .plugin import CoreWarnings
from .sandbox._config import SandboxConfig
from .storage.directory import Directory
-from .storage._filebaseddirectory import FileBasedDirectory, VirtualDirectoryError
-
-
-# _KeyStrength():
-#
-# Strength of cache key
-#
-class _KeyStrength(Enum):
-
- # Includes strong cache keys of all build dependencies and their
- # runtime dependencies.
- STRONG = 1
-
- # Includes names of direct build dependencies but does not include
- # cache keys of dependencies.
- WEAK = 2
-
-
-class Scope(Enum):
- """Types of scope for a given element"""
-
- ALL = 1
- """All elements which the given element depends on, following
- all elements required for building. Including the element itself.
- """
-
- BUILD = 2
- """All elements required for building the element, including their
- respective run dependencies. Not including the given element itself.
- """
-
- RUN = 3
- """All elements required for running the element. Including the element
- itself.
- """
+from .storage._filebaseddirectory import FileBasedDirectory
+from .storage.directory import VirtualDirectoryError
+from .element_enums import _KeyStrength, Scope
class ElementError(BstError):
diff --git a/buildstream/element_enums.py b/buildstream/element_enums.py
new file mode 100644
index 000000000..2f2fb54d2
--- /dev/null
+++ b/buildstream/element_enums.py
@@ -0,0 +1,61 @@
+#
+# Copyright (C) 2018 Bloomberg LP
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
+#
+# Authors:
+# Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
+# Jim MacArthur <jim.macarthur@codethink.co.uk>
+
+"""
+Element - Globally visible enumerations
+=======================================
+
+"""
+
+from enum import Enum
+
+
+# _KeyStrength():
+#
+# Strength of cache key
+#
+class _KeyStrength(Enum):
+
+ # Includes strong cache keys of all build dependencies and their
+ # runtime dependencies.
+ STRONG = 1
+
+ # Includes names of direct build dependencies but does not include
+ # cache keys of dependencies.
+ WEAK = 2
+
+
+class Scope(Enum):
+ """Types of scope for a given element"""
+
+ ALL = 1
+ """All elements which the given element depends on, following
+ all elements required for building. Including the element itself.
+ """
+
+ BUILD = 2
+ """All elements required for building the element, including their
+ respective run dependencies. Not including the given element itself.
+ """
+
+ RUN = 3
+ """All elements required for running the element. Including the element
+ itself.
+ """
diff --git a/buildstream/sandbox/_mount.py b/buildstream/sandbox/_mount.py
index 0f96a92b7..49068fe92 100644
--- a/buildstream/sandbox/_mount.py
+++ b/buildstream/sandbox/_mount.py
@@ -32,8 +32,10 @@ from .._fuse import SafeHardlinks
class Mount():
def __init__(self, sandbox, mount_point, safe_hardlinks):
scratch_directory = sandbox._get_scratch_directory()
- # Getting external_directory here is acceptable as we're part of the sandbox code.
- root_directory = sandbox.get_virtual_directory().external_directory
+ # Getting _get_underlying_directory() here is acceptable as
+ # we're part of the sandbox code. This will fail if our
+ # directory is CAS-based.
+ root_directory = sandbox.get_virtual_directory()._get_underlying_directory()
self.mount_point = mount_point
self.safe_hardlinks = safe_hardlinks
diff --git a/buildstream/sandbox/_sandboxbwrap.py b/buildstream/sandbox/_sandboxbwrap.py
index 3ef3d4cb9..ea7254c1b 100644
--- a/buildstream/sandbox/_sandboxbwrap.py
+++ b/buildstream/sandbox/_sandboxbwrap.py
@@ -58,7 +58,7 @@ class SandboxBwrap(Sandbox):
stdout, stderr = self._get_output()
# Allowable access to underlying storage as we're part of the sandbox
- root_directory = self.get_virtual_directory().external_directory
+ root_directory = self.get_virtual_directory()._get_underlying_directory()
# Fallback to the sandbox default settings for
# the cwd and env.
@@ -248,6 +248,7 @@ class SandboxBwrap(Sandbox):
# a bug, bwrap mounted a tempfs here and when it exits, that better be empty.
pass
+ self._vdir._mark_changed()
return exit_code
def run_bwrap(self, argv, stdin, stdout, stderr, interactive):
diff --git a/buildstream/sandbox/_sandboxchroot.py b/buildstream/sandbox/_sandboxchroot.py
index de4eb46e2..a902f22ad 100644
--- a/buildstream/sandbox/_sandboxchroot.py
+++ b/buildstream/sandbox/_sandboxchroot.py
@@ -106,6 +106,7 @@ class SandboxChroot(Sandbox):
status = self.chroot(rootfs, command, stdin, stdout,
stderr, cwd, env, flags)
+ self._vdir._mark_changed()
return status
# chroot()
diff --git a/buildstream/sandbox/sandbox.py b/buildstream/sandbox/sandbox.py
index 9fe1194bb..87a2fb9c9 100644
--- a/buildstream/sandbox/sandbox.py
+++ b/buildstream/sandbox/sandbox.py
@@ -31,6 +31,7 @@ See also: :ref:`sandboxing`.
import os
from .._exceptions import ImplError, BstError
from ..storage._filebaseddirectory import FileBasedDirectory
+from ..storage._casbaseddirectory import CasBasedDirectory
class SandboxFlags():
@@ -105,6 +106,7 @@ class Sandbox():
self.__scratch = os.path.join(self.__directory, 'scratch')
for directory_ in [self._root, self.__scratch]:
os.makedirs(directory_, exist_ok=True)
+ self._vdir = None
def get_directory(self):
"""Fetches the sandbox root directory
@@ -133,8 +135,14 @@ class Sandbox():
(str): The sandbox root directory
"""
- # For now, just create a new Directory every time we're asked
- return FileBasedDirectory(self._root)
+ if not self._vdir:
+ # BST_CAS_DIRECTORIES is a deliberately hidden environment variable which
+ # can be used to switch on CAS-based directories for testing.
+ if 'BST_CAS_DIRECTORIES' in os.environ:
+ self._vdir = CasBasedDirectory(self.__context, ref=None)
+ else:
+ self._vdir = FileBasedDirectory(self._root)
+ return self._vdir
def set_environment(self, environment):
"""Sets the environment variables for the sandbox
diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
new file mode 100644
index 000000000..5ca100793
--- /dev/null
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -0,0 +1,563 @@
+#
+# Copyright (C) 2018 Bloomberg LP
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
+#
+# Authors:
+# Jim MacArthur <jim.macarthur@codethink.co.uk>
+
+"""
+CasBasedDirectory
+=========
+
+Implementation of the Directory class which backs onto a Merkle-tree based content
+addressable storage system.
+
+See also: :ref:`sandboxing`.
+"""
+
+from collections import OrderedDict
+
+import os
+import tempfile
+import stat
+
+from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
+from .._exceptions import BstError
+from .directory import Directory, VirtualDirectoryError
+from ._filebaseddirectory import FileBasedDirectory
+from ..utils import FileListResult, safe_copy, list_relative_paths
+from .._artifactcache.cascache import CASCache
+
+
+class IndexEntry():
+ """ Used in our index of names to objects to store the 'modified' flag
+ for directory entries. Because we need both the remote_execution_pb2 object
+ and our own Directory object for directory entries, we store both. For files
+ and symlinks, only pb_object is used. """
+ def __init__(self, pb_object, buildstream_object=None, modified=False):
+ self.pb_object = pb_object # Short for 'protocol buffer object')
+ self.buildstream_object = buildstream_object
+ self.modified = modified
+
+
+# CasBasedDirectory intentionally doesn't call its superclass constuctor,
+# which is meant to be unimplemented.
+# pylint: disable=super-init-not-called
+
+class CasBasedDirectory(Directory):
+ """
+ CAS-based directories can have two names; one is a 'common name' which has no effect
+ on functionality, and the 'filename'. If a CasBasedDirectory has a parent, then 'filename'
+ must be the name of an entry in the parent directory's index which points to this object.
+ This is used to inform a parent directory that it must update the given hash for this
+ object when this object changes.
+
+ Typically a top-level CasBasedDirectory will have a common_name and no filename, and
+ subdirectories wil have a filename and no common_name. common_name can used to identify
+ CasBasedDirectory objects in a log file, since they have no unique position in a file
+ system.
+ """
+
+ # Two constants which define the separators used by the remote execution API.
+ _pb2_path_sep = "/"
+ _pb2_absolute_path_prefix = "/"
+
+ def __init__(self, context, ref=None, parent=None, common_name="untitled", filename=None):
+ self.context = context
+ self.cas_directory = os.path.join(context.artifactdir, 'cas')
+ self.filename = filename
+ self.common_name = common_name
+ self.pb2_directory = remote_execution_pb2.Directory()
+ self.cas_cache = CASCache(context)
+ if ref:
+ with open(self.cas_cache.objpath(ref), 'rb') as f:
+ self.pb2_directory.ParseFromString(f.read())
+
+ self.ref = ref
+ self.index = OrderedDict()
+ self.parent = parent
+ self._directory_read = False
+ self._populate_index()
+
+ def _populate_index(self):
+ if self._directory_read:
+ return
+ for entry in self.pb2_directory.directories:
+ buildStreamDirectory = CasBasedDirectory(self.context, ref=entry.digest,
+ parent=self, filename=entry.name)
+ self.index[entry.name] = IndexEntry(entry, buildstream_object=buildStreamDirectory)
+ for entry in self.pb2_directory.files:
+ self.index[entry.name] = IndexEntry(entry)
+ for entry in self.pb2_directory.symlinks:
+ self.index[entry.name] = IndexEntry(entry)
+ self._directory_read = True
+
+ def _recalculate_recursing_up(self, caller=None):
+ """Recalcuate the hash for this directory and store the results in
+ the cache. If this directory has a parent, tell it to
+ recalculate (since changing this directory changes an entry in
+ the parent).
+
+ """
+ self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
+ if caller:
+ old_dir = self._find_pb2_entry(caller.filename)
+ self.cas_cache.add_object(digest=old_dir.digest, buffer=caller.pb2_directory.SerializeToString())
+ if self.parent:
+ self.parent._recalculate_recursing_up(self)
+
+ def _recalculate_recursing_down(self, parent=None):
+ """Recalcuate the hash for this directory and any
+ subdirectories. Hashes for subdirectories should be calculated
+ and stored after a significant operation (e.g. an
+ import_files() call) but not after adding each file, as that
+ is extremely wasteful.
+
+ """
+ for entry in self.pb2_directory.directories:
+ self.index[entry.name].buildstream_object._recalculate_recursing_down(entry)
+
+ if parent:
+ self.ref = self.cas_cache.add_object(digest=parent.digest, buffer=self.pb2_directory.SerializeToString())
+ else:
+ self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
+ # We don't need to do anything more than that; files were already added ealier, and symlinks are
+ # part of the directory structure.
+
+ def _find_pb2_entry(self, name):
+ if name in self.index:
+ return self.index[name].pb_object
+ return None
+
+ def _find_self_in_parent(self):
+ assert self.parent is not None
+ parent = self.parent
+ for (k, v) in parent.index.items():
+ if v.buildstream_object == self:
+ return k
+ return None
+
+ def _add_directory(self, name):
+ if name in self.index:
+ newdir = self.index[name].buildstream_object
+ if not isinstance(newdir, CasBasedDirectory):
+ # TODO: This may not be an actual error; it may actually overwrite it
+ raise VirtualDirectoryError("New directory {} in {} would overwrite existing non-directory of type {}"
+ .format(name, str(self), type(newdir)))
+ dirnode = self._find_pb2_entry(name)
+ else:
+ newdir = CasBasedDirectory(self.context, parent=self, filename=name)
+ dirnode = self.pb2_directory.directories.add()
+
+ dirnode.name = name
+
+ # Calculate the hash for an empty directory
+ new_directory = remote_execution_pb2.Directory()
+ self.cas_cache.add_object(digest=dirnode.digest, buffer=new_directory.SerializeToString())
+ self.index[name] = IndexEntry(dirnode, buildstream_object=newdir)
+ return newdir
+
+ def _add_new_file(self, basename, filename):
+ filenode = self.pb2_directory.files.add()
+ filenode.name = filename
+ self.cas_cache.add_object(digest=filenode.digest, path=os.path.join(basename, filename))
+ is_executable = os.access(os.path.join(basename, filename), os.X_OK)
+ filenode.is_executable = is_executable
+ self.index[filename] = IndexEntry(filenode, modified=(filename in self.index))
+
+ def _add_new_link(self, basename, filename):
+ existing_link = self._find_pb2_entry(filename)
+ if existing_link:
+ symlinknode = existing_link
+ else:
+ symlinknode = self.pb2_directory.symlinks.add()
+ symlinknode.name = filename
+ # A symlink node has no digest.
+ symlinknode.target = os.readlink(os.path.join(basename, filename))
+ self.index[filename] = IndexEntry(symlinknode, modified=(existing_link is not None))
+
+ def delete_entry(self, name):
+ for collection in [self.pb2_directory.files, self.pb2_directory.symlinks, self.pb2_directory.directories]:
+ if name in collection:
+ collection.remove(name)
+ if name in self.index:
+ del self.index[name]
+
+ def descend(self, subdirectory_spec, create=False):
+ """Descend one or more levels of directory hierarchy and return a new
+ Directory object for that directory.
+
+ Arguments:
+ * subdirectory_spec (list of strings): A list of strings which are all directory
+ names.
+ * create (boolean): If this is true, the directories will be created if
+ they don't already exist.
+
+ Note: At the moment, creating a directory by descending does
+ not update this object in the CAS cache. However, performing
+ an import_files() into a subdirectory of any depth obtained by
+ descending from this object *will* cause this directory to be
+ updated and stored.
+
+ """
+
+ # It's very common to send a directory name instead of a list and this causes
+ # bizarre errors, so check for it here
+ if not isinstance(subdirectory_spec, list):
+ subdirectory_spec = [subdirectory_spec]
+
+ # Because of the way split works, it's common to get a list which begins with
+ # an empty string. Detect these and remove them.
+ while subdirectory_spec and subdirectory_spec[0] == "":
+ subdirectory_spec.pop(0)
+
+ # Descending into [] returns the same directory.
+ if not subdirectory_spec:
+ return self
+
+ if subdirectory_spec[0] in self.index:
+ entry = self.index[subdirectory_spec[0]].buildstream_object
+ if isinstance(entry, CasBasedDirectory):
+ return entry.descend(subdirectory_spec[1:], create)
+ else:
+ error = "Cannot descend into {}, which is a '{}' in the directory {}"
+ raise VirtualDirectoryError(error.format(subdirectory_spec[0],
+ type(entry).__name__,
+ self))
+ else:
+ if create:
+ newdir = self._add_directory(subdirectory_spec[0])
+ return newdir.descend(subdirectory_spec[1:], create)
+ else:
+ error = "No entry called '{}' found in {}. There are directories called {}."
+ directory_list = ",".join([entry.name for entry in self.pb2_directory.directories])
+ raise VirtualDirectoryError(error.format(subdirectory_spec[0], str(self),
+ directory_list))
+ return None
+
+ def find_root(self):
+ """ Finds the root of this directory tree by following 'parent' until there is
+ no parent. """
+ if self.parent:
+ return self.parent.find_root()
+ else:
+ return self
+
+ def _resolve_symlink_or_directory(self, name):
+ """Used only by _import_files_from_directory. Tries to resolve a
+ directory name or symlink name. 'name' must be an entry in this
+ directory. It must be a single symlink or directory name, not a path
+ separated by path separators. If it's an existing directory name, it
+ just returns the Directory object for that. If it's a symlink, it will
+ attempt to find the target of the symlink and return that as a
+ Directory object.
+
+ If a symlink target doesn't exist, it will attempt to create it
+ as a directory as long as it's within this directory tree.
+ """
+
+ if isinstance(self.index[name].buildstream_object, Directory):
+ return self.index[name].buildstream_object
+ # OK then, it's a symlink
+ symlink = self._find_pb2_entry(name)
+ absolute = symlink.target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
+ if absolute:
+ root = self.find_root()
+ else:
+ root = self
+ directory = root
+ components = symlink.target.split(CasBasedDirectory._pb2_path_sep)
+ for c in components:
+ if c == "..":
+ directory = directory.parent
+ else:
+ directory = directory.descend(c, create=True)
+ return directory
+
+ def _check_replacement(self, name, path_prefix, fileListResult):
+ """ Checks whether 'name' exists, and if so, whether we can overwrite it.
+ If we can, add the name to 'overwritten_files' and delete the existing entry.
+ Returns 'True' if the import should go ahead.
+ fileListResult.overwritten and fileListResult.ignore are updated depending
+ on the result. """
+ existing_entry = self._find_pb2_entry(name)
+ relative_pathname = os.path.join(path_prefix, name)
+ if existing_entry is None:
+ return True
+ if (isinstance(existing_entry,
+ (remote_execution_pb2.FileNode, remote_execution_pb2.SymlinkNode))):
+ fileListResult.overwritten.append(relative_pathname)
+ return True
+ elif isinstance(existing_entry, remote_execution_pb2.DirectoryNode):
+ # If 'name' maps to a DirectoryNode, then there must be an entry in index
+ # pointing to another Directory.
+ if self.index[name].buildstream_object.is_empty():
+ self.delete_entry(name)
+ fileListResult.overwritten.append(relative_pathname)
+ return True
+ else:
+ # We can't overwrite a non-empty directory, so we just ignore it.
+ fileListResult.ignored.append(relative_pathname)
+ return False
+ assert False, ("Entry '{}' is not a recognised file/link/directory and not None; it is {}"
+ .format(name, type(existing_entry)))
+ return False # In case asserts are disabled
+
+ def _import_directory_recursively(self, directory_name, source_directory, remaining_path, path_prefix):
+ """ _import_directory_recursively and _import_files_from_directory will be called alternately
+ as a directory tree is descended. """
+ if directory_name in self.index:
+ subdir = self._resolve_symlink_or_directory(directory_name)
+ else:
+ subdir = self._add_directory(directory_name)
+ new_path_prefix = os.path.join(path_prefix, directory_name)
+ subdir_result = subdir._import_files_from_directory(os.path.join(source_directory, directory_name),
+ [os.path.sep.join(remaining_path)],
+ path_prefix=new_path_prefix)
+ return subdir_result
+
+ def _import_files_from_directory(self, source_directory, files, path_prefix=""):
+ """ Imports files from a traditional directory """
+ result = FileListResult()
+ for entry in sorted(files):
+ split_path = entry.split(os.path.sep)
+ # The actual file on the FS we're importing
+ import_file = os.path.join(source_directory, entry)
+ # The destination filename, relative to the root where the import started
+ relative_pathname = os.path.join(path_prefix, entry)
+ if len(split_path) > 1:
+ directory_name = split_path[0]
+ # Hand this off to the importer for that subdir. This will only do one file -
+ # a better way would be to hand off all the files in this subdir at once.
+ subdir_result = self._import_directory_recursively(directory_name, source_directory,
+ split_path[1:], path_prefix)
+ result.combine(subdir_result)
+ elif os.path.islink(import_file):
+ if self._check_replacement(entry, path_prefix, result):
+ self._add_new_link(source_directory, entry)
+ result.files_written.append(relative_pathname)
+ elif os.path.isdir(import_file):
+ # A plain directory which already exists isn't a problem; just ignore it.
+ if entry not in self.index:
+ self._add_directory(entry)
+ elif os.path.isfile(import_file):
+ if self._check_replacement(entry, path_prefix, result):
+ self._add_new_file(source_directory, entry)
+ result.files_written.append(relative_pathname)
+ return result
+
+ def import_files(self, external_pathspec, *, files=None,
+ report_written=True, update_utimes=False,
+ can_link=False):
+ """Imports some or all files from external_path into this directory.
+
+ Keyword arguments: external_pathspec: Either a string
+ containing a pathname, or a Directory object, to use as the
+ source.
+
+ files (list of strings): A list of all the files relative to
+ the external_pathspec to copy. If 'None' is supplied, all
+ files are copied.
+
+ report_written (bool): Return the full list of files
+ written. Defaults to true. If false, only a list of
+ overwritten files is returned.
+
+ update_utimes (bool): Currently ignored, since CAS does not store utimes.
+
+ can_link (bool): Ignored, since hard links do not have any meaning within CAS.
+ """
+ if isinstance(external_pathspec, FileBasedDirectory):
+ source_directory = external_pathspec._get_underlying_directory()
+ elif isinstance(external_pathspec, CasBasedDirectory):
+ # TODO: This transfers from one CAS to another via the
+ # filesystem, which is very inefficient. Alter this so it
+ # transfers refs across directly.
+ with tempfile.TemporaryDirectory(prefix="roundtrip") as tmpdir:
+ external_pathspec.export_files(tmpdir)
+ if files is None:
+ files = list_relative_paths(tmpdir)
+ result = self._import_files_from_directory(tmpdir, files=files)
+ return result
+ else:
+ source_directory = external_pathspec
+
+ if files is None:
+ files = list_relative_paths(source_directory)
+
+ # TODO: No notice is taken of report_written, update_utimes or can_link.
+ # Current behaviour is to fully populate the report, which is inefficient,
+ # but still correct.
+ result = self._import_files_from_directory(source_directory, files=files)
+
+ # We need to recalculate and store the hashes of all directories both
+ # up and down the tree; we have changed our directory by importing files
+ # which changes our hash and all our parents' hashes of us. The trees
+ # lower down need to be stored in the CAS as they are not automatically
+ # added during construction.
+ self._recalculate_recursing_down()
+ if self.parent:
+ self.parent._recalculate_recursing_up(self)
+ return result
+
+ def set_deterministic_mtime(self):
+ """ Sets a static modification time for all regular files in this directory.
+ Since we don't store any modification time, we don't need to do anything.
+ """
+ pass
+
+ def set_deterministic_user(self):
+ """ Sets all files in this directory to the current user's euid/egid.
+ We also don't store user data, so this can be ignored.
+ """
+ pass
+
+ def export_files(self, to_directory, *, can_link=False, can_destroy=False):
+ """Copies everything from this into to_directory, which must be the name
+ of a traditional filesystem directory.
+
+ Arguments:
+
+ to_directory (string): a path outside this directory object
+ where the contents will be copied to.
+
+ can_link (bool): Whether we can create hard links in to_directory
+ instead of copying.
+
+ can_destroy (bool): Whether we can destroy elements in this
+ directory to export them (e.g. by renaming them as the
+ target).
+
+ """
+
+ if not os.path.exists(to_directory):
+ os.mkdir(to_directory)
+
+ for entry in self.pb2_directory.directories:
+ if entry.name not in self.index:
+ raise VirtualDirectoryError("CasDir {} contained {} in directories but not in the index"
+ .format(str(self), entry.name))
+ if not self._directory_read:
+ raise VirtualDirectoryError("CasDir {} has not been indexed yet".format(str(self)))
+ dest_dir = os.path.join(to_directory, entry.name)
+ if not os.path.exists(dest_dir):
+ os.mkdir(dest_dir)
+ target = self.descend([entry.name])
+ target.export_files(dest_dir)
+ for entry in self.pb2_directory.files:
+ # Extract the entry to a single file
+ dest_name = os.path.join(to_directory, entry.name)
+ src_name = self.cas_cache.objpath(entry.digest)
+ safe_copy(src_name, dest_name)
+ if entry.is_executable:
+ os.chmod(dest_name, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR |
+ stat.S_IRGRP | stat.S_IXGRP |
+ stat.S_IROTH | stat.S_IXOTH)
+ for entry in self.pb2_directory.symlinks:
+ src_name = os.path.join(to_directory, entry.name)
+ target_name = entry.target
+ try:
+ os.symlink(target_name, src_name)
+ except FileExistsError as e:
+ raise BstError(("Cannot create a symlink named {} pointing to {}." +
+ " The original error was: {}").
+ format(src_name, entry.target, e))
+
+ def export_to_tar(self, tarfile, destination_dir, mtime=0):
+ raise NotImplementedError()
+
+ def mark_changed(self):
+ """ It should not be possible to externally modify a CAS-based
+ directory at the moment."""
+ raise NotImplementedError()
+
+ def is_empty(self):
+ """ Return true if this directory has no files, subdirectories or links in it.
+ """
+ return len(self.index) == 0
+
+ def _mark_directory_unmodified(self):
+ # Marks all entries in this directory and all child directories as unmodified.
+ for i in self.index.values():
+ i.modified = False
+ if isinstance(i.buildstream_object, CasBasedDirectory):
+ i.buildstream_object._mark_directory_unmodified()
+
+ def _mark_entry_unmodified(self, name):
+ # Marks an entry as unmodified. If the entry is a directory, it will
+ # recursively mark all its tree as unmodified.
+ self.index[name].modified = False
+ if self.index[name].buildstream_object:
+ self.index[name].buildstream_object._mark_directory_unmodified()
+
+ def mark_unmodified(self):
+ """ Marks all files in this directory (recursively) as unmodified.
+ If we have a parent, we mark our own entry as unmodified in that parent's
+ index.
+ """
+ if self.parent:
+ self.parent._mark_entry_unmodified(self._find_self_in_parent())
+ else:
+ self._mark_directory_unmodified()
+
+ def list_modified_paths(self):
+ """Provide a list of relative paths which have been modified since the
+ last call to mark_unmodified.
+
+ Return value: List(str) - list of modified paths
+ """
+
+ filelist = []
+ for (k, v) in self.index.items():
+ if isinstance(v.buildstream_object, CasBasedDirectory):
+ filelist.extend([k + os.path.sep + x for x in v.buildstream_object.list_modified_paths()])
+ elif isinstance(v.pb_object, remote_execution_pb2.FileNode) and v.modified:
+ filelist.append(k)
+ return filelist
+
+ def list_relative_paths(self):
+ """Provide a list of all relative paths.
+
+ NOTE: This list is not in the same order as utils.list_relative_paths.
+
+ Return value: List(str) - list of all paths
+ """
+
+ filelist = []
+ for (k, v) in self.index.items():
+ if isinstance(v.buildstream_object, CasBasedDirectory):
+ filelist.extend([k + os.path.sep + x for x in v.buildstream_object.list_relative_paths()])
+ elif isinstance(v.pb_object, remote_execution_pb2.FileNode):
+ filelist.append(k)
+ return filelist
+
+ def _get_identifier(self):
+ path = ""
+ if self.parent:
+ path = self.parent._get_identifier()
+ if self.filename:
+ path += "/" + self.filename
+ else:
+ path += "/" + self.common_name
+ return path
+
+ def __str__(self):
+ return "[CAS:{}]".format(self._get_identifier())
+
+ def _get_underlying_directory(self):
+ """ There is no underlying directory for a CAS-backed directory, so
+ throw an exception. """
+ raise VirtualDirectoryError("_get_underlying_directory was called on a CAS-backed directory," +
+ " which has no underlying directory.")
diff --git a/buildstream/storage/_filebaseddirectory.py b/buildstream/storage/_filebaseddirectory.py
index 5b3da28f4..735179cb6 100644
--- a/buildstream/storage/_filebaseddirectory.py
+++ b/buildstream/storage/_filebaseddirectory.py
@@ -29,25 +29,12 @@ See also: :ref:`sandboxing`.
import os
import time
-from .._exceptions import BstError, ErrorDomain
-from .directory import Directory
+from .directory import Directory, VirtualDirectoryError
from ..utils import link_files, copy_files, list_relative_paths, _get_link_mtime, _magic_timestamp
from ..utils import _set_deterministic_user, _set_deterministic_mtime
-
-class VirtualDirectoryError(BstError):
- """Raised by Directory functions when system calls fail.
- This will be handled internally by the BuildStream core,
- if you need to handle this error, then it should be reraised,
- or either of the :class:`.ElementError` or :class:`.SourceError`
- exceptions should be raised from this error.
- """
- def __init__(self, message, reason=None):
- super().__init__(message, domain=ErrorDomain.VIRTUAL_FS, reason=reason)
-
-
# FileBasedDirectory intentionally doesn't call its superclass constuctor,
-# which is mean to be unimplemented.
+# which is meant to be unimplemented.
# pylint: disable=super-init-not-called
@@ -108,7 +95,8 @@ class FileBasedDirectory(Directory):
if create:
new_path = os.path.join(self.external_directory, subdirectory_spec[0])
os.makedirs(new_path, exist_ok=True)
- return FileBasedDirectory(new_path).descend(subdirectory_spec[1:], create)
+ self.index[subdirectory_spec[0]] = FileBasedDirectory(new_path).descend(subdirectory_spec[1:], create)
+ return self.index[subdirectory_spec[0]]
else:
error = "No entry called '{}' found in the directory rooted at {}"
raise VirtualDirectoryError(error.format(subdirectory_spec[0], self.external_directory))
@@ -134,8 +122,12 @@ class FileBasedDirectory(Directory):
for f in import_result.files_written:
os.utime(os.path.join(self.external_directory, f), times=(cur_time, cur_time))
+ self._mark_changed()
return import_result
+ def _mark_changed(self):
+ self._directory_read = False
+
def set_deterministic_mtime(self):
_set_deterministic_mtime(self.external_directory)
@@ -214,3 +206,8 @@ class FileBasedDirectory(Directory):
# which exposes the sandbox directory; we will have to assume for the time being
# that people will not abuse __str__.
return self.external_directory
+
+ def _get_underlying_directory(self) -> str:
+ """ Returns the underlying (real) file system directory this
+ object refers to. """
+ return self.external_directory
diff --git a/buildstream/storage/directory.py b/buildstream/storage/directory.py
index 40a895acc..838741231 100644
--- a/buildstream/storage/directory.py
+++ b/buildstream/storage/directory.py
@@ -31,6 +31,19 @@ See also: :ref:`sandboxing`.
"""
+from .._exceptions import BstError, ErrorDomain
+
+
+class VirtualDirectoryError(BstError):
+ """Raised by Directory functions when system calls fail.
+ This will be handled internally by the BuildStream core,
+ if you need to handle this error, then it should be reraised,
+ or either of the :class:`.ElementError` or :class:`.SourceError`
+ exceptions should be raised from this error.
+ """
+ def __init__(self, message, reason=None):
+ super().__init__(message, domain=ErrorDomain.VIRTUAL_FS, reason=reason)
+
class Directory():
def __init__(self, external_directory=None):
@@ -153,3 +166,13 @@ class Directory():
"""
raise NotImplementedError()
+
+ def _mark_changed(self):
+ """Internal function to mark this directory as having been changed
+ outside this API. This normally can only happen by calling the
+ Sandbox's `run` method. This does *not* mark everything as modified
+ (i.e. list_modified_paths will not necessarily return the same results
+ as list_relative_paths after calling this.)
+
+ """
+ raise NotImplementedError()
diff --git a/tests/sandboxes/storage-test/original/bin/bash b/tests/sandboxes/storage-test/original/bin/bash
new file mode 100644
index 000000000..a221b564c
--- /dev/null
+++ b/tests/sandboxes/storage-test/original/bin/bash
@@ -0,0 +1 @@
+This is the original /bin/bash.
diff --git a/tests/sandboxes/storage-test/original/bin/hello b/tests/sandboxes/storage-test/original/bin/hello
new file mode 100644
index 000000000..5524e9677
--- /dev/null
+++ b/tests/sandboxes/storage-test/original/bin/hello
@@ -0,0 +1 @@
+This is the original /bin/hello.
diff --git a/tests/sandboxes/storage-test/overlay/bin/bash b/tests/sandboxes/storage-test/overlay/bin/bash
new file mode 100644
index 000000000..b639d94ec
--- /dev/null
+++ b/tests/sandboxes/storage-test/overlay/bin/bash
@@ -0,0 +1 @@
+This is the replacement /bin/bash.
diff --git a/tests/sandboxes/storage-tests.py b/tests/sandboxes/storage-tests.py
new file mode 100644
index 000000000..553da2ba7
--- /dev/null
+++ b/tests/sandboxes/storage-tests.py
@@ -0,0 +1,57 @@
+import os
+import pytest
+
+from buildstream._exceptions import ErrorDomain
+
+from buildstream._context import Context
+from buildstream.storage._casbaseddirectory import CasBasedDirectory
+from buildstream.storage._filebaseddirectory import FileBasedDirectory
+
+DATA_DIR = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)),
+ "storage-test"
+)
+
+
+def setup_backend(backend_class, tmpdir):
+ if backend_class == FileBasedDirectory:
+ return backend_class(os.path.join(tmpdir, "vdir"))
+ else:
+ context = Context()
+ context.artifactdir = os.path.join(tmpdir, "cas")
+ return backend_class(context)
+
+
+@pytest.mark.parametrize("backend", [
+ FileBasedDirectory, CasBasedDirectory])
+@pytest.mark.datafiles(DATA_DIR)
+def test_import(tmpdir, datafiles, backend):
+ original = os.path.join(str(datafiles), "original")
+
+ c = setup_backend(backend, str(tmpdir))
+
+ c.import_files(original)
+
+ assert("bin/bash" in c.list_relative_paths())
+ assert("bin/hello" in c.list_relative_paths())
+
+
+@pytest.mark.parametrize("backend", [
+ FileBasedDirectory, CasBasedDirectory])
+@pytest.mark.datafiles(DATA_DIR)
+def test_modified_file_list(tmpdir, datafiles, backend):
+ original = os.path.join(str(datafiles), "original")
+ overlay = os.path.join(str(datafiles), "overlay")
+
+ c = setup_backend(backend, str(tmpdir))
+
+ c.import_files(original)
+
+ c.mark_unmodified()
+
+ c.import_files(overlay)
+
+ print("List of all paths in imported results: {}".format(c.list_relative_paths()))
+ assert("bin/bash" in c.list_relative_paths())
+ assert("bin/bash" in c.list_modified_paths())
+ assert("bin/hello" not in c.list_modified_paths())