summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbst-marge-bot <marge-bot@buildstream.build>2020-04-27 09:27:58 +0000
committerbst-marge-bot <marge-bot@buildstream.build>2020-04-27 09:27:58 +0000
commit8db4a7d8e6ccde5a43c6999a2fea05f225b7ffa1 (patch)
tree37d47664614fd3a35ed69ea110dbb3cd3e947f9a
parent1f3752804419849aa196ac72e47396910e86376c (diff)
parent95d0fdaa448f6d8d998e80ac0798e0d1a796fcf6 (diff)
downloadbuildstream-8db4a7d8e6ccde5a43c6999a2fea05f225b7ffa1.tar.gz
Merge branch 'juerg/vdirectory' into 'master'
storage: Improve Directory API See merge request BuildStream/buildstream!1878
-rw-r--r--src/buildstream/element.py2
-rw-r--r--src/buildstream/storage/_casbaseddirectory.py130
-rw-r--r--src/buildstream/storage/_filebaseddirectory.py80
-rw-r--r--src/buildstream/storage/directory.py98
-rw-r--r--tests/internals/storage.py96
5 files changed, 367 insertions, 39 deletions
diff --git a/src/buildstream/element.py b/src/buildstream/element.py
index 8563eee3c..9efe97c54 100644
--- a/src/buildstream/element.py
+++ b/src/buildstream/element.py
@@ -659,7 +659,7 @@ class Element(Plugin):
# Hard link it into the staging area
#
vbasedir = sandbox.get_virtual_directory()
- vstagedir = vbasedir if path is None else vbasedir.descend(*path.lstrip(os.sep).split(os.sep))
+ vstagedir = vbasedir if path is None else vbasedir.descend(*path.lstrip(os.sep).split(os.sep), create=True)
split_filter = self.__split_filter_func(include, exclude, orphans)
diff --git a/src/buildstream/storage/_casbaseddirectory.py b/src/buildstream/storage/_casbaseddirectory.py
index e33bdc3d7..c83918e4d 100644
--- a/src/buildstream/storage/_casbaseddirectory.py
+++ b/src/buildstream/storage/_casbaseddirectory.py
@@ -289,24 +289,47 @@ class CasBasedDirectory(Directory):
# We can't iterate and remove entries at the same time
to_remove = [entry for entry in dir_a.index.values() if entry.name not in dir_b.index]
for entry in to_remove:
- self.delete_entry(entry.name)
+ self.remove(entry.name, recursive=True)
self.__invalidate_digest()
- def _copy_link_from_filesystem(self, basename, filename):
- self._add_new_link_direct(filename, os.readlink(os.path.join(basename, filename)))
-
def _add_new_link_direct(self, name, target):
self.index[name] = IndexEntry(name, _FileType.SYMLINK, target=target, modified=name in self.index)
self.__invalidate_digest()
- def delete_entry(self, name):
- if name in self.index:
- del self.index[name]
+ def remove(self, *path, recursive=False):
+ if len(path) > 1:
+ # Delegate remove to subdirectory
+ subdir = self.descend(*path[:-1])
+ subdir.remove(path[-1], recursive=recursive)
+ return
+
+ name = path[0]
+ self.__validate_path_component(name)
+ entry = self.index.get(name)
+ if not entry:
+ raise FileNotFoundError("{} not found in {}".format(name, str(self)))
+
+ if entry.type == _FileType.DIRECTORY and not recursive:
+ subdir = entry.get_directory(self)
+ if not subdir.is_empty():
+ raise VirtualDirectoryError("{} is not empty".format(str(subdir)))
+ del self.index[name]
self.__invalidate_digest()
+ def rename(self, src, dest):
+ srcdir = self.descend(*src[:-1])
+ entry = srcdir._entry_from_path(src[-1])
+
+ destdir = self.descend(*dest[:-1])
+ self.__validate_path_component(dest[-1])
+
+ srcdir.remove(src[-1], recursive=True)
+ entry.name = dest[-1]
+ destdir._add_entry(entry)
+
def descend(self, *paths, create=False, follow_symlinks=False):
"""Descend one or more levels of directory hierarchy and return a new
Directory object for that directory.
@@ -332,6 +355,8 @@ class CasBasedDirectory(Directory):
if not path:
continue
+ self.__validate_path_component(path)
+
entry = current_dir.index.get(path)
if entry:
@@ -379,7 +404,7 @@ class CasBasedDirectory(Directory):
# pointing to another Directory.
subdir = existing_entry.get_directory(self)
if subdir.is_empty():
- self.delete_entry(name)
+ self.remove(name)
fileListResult.overwritten.append(relative_pathname)
return True
else:
@@ -387,7 +412,7 @@ class CasBasedDirectory(Directory):
fileListResult.ignored.append(relative_pathname)
return False
else:
- self.delete_entry(name)
+ self.remove(name)
fileListResult.overwritten.append(relative_pathname)
return True
@@ -448,7 +473,7 @@ class CasBasedDirectory(Directory):
if filter_callback and not filter_callback(relative_pathname):
if is_dir and create_subdir and dest_subdir.is_empty():
# Complete subdirectory has been filtered out, remove it
- self.delete_entry(name)
+ self.remove(name)
# Entry filtered out, move to next
continue
@@ -729,12 +754,13 @@ class CasBasedDirectory(Directory):
@contextmanager
def open_file(self, *path: str, mode: str = "r"):
subdir = self.descend(*path[:-1])
+ self.__validate_path_component(path[-1])
entry = subdir.index.get(path[-1])
if entry and entry.type != _FileType.REGULAR_FILE:
raise VirtualDirectoryError("{} in {} is not a file".format(path[-1], str(subdir)))
- if mode not in ["r", "rb", "w", "wb", "x", "xb"]:
+ if mode not in ["r", "rb", "w", "wb", "w+", "w+b", "x", "xb", "x+", "x+b"]:
raise ValueError("Unsupported mode: `{}`".format(mode))
if "b" in mode:
@@ -825,23 +851,75 @@ class CasBasedDirectory(Directory):
return self.__digest
+ def _entry_from_path(self, *path, follow_symlinks=False):
+ subdir = self.descend(*path[:-1], follow_symlinks=follow_symlinks)
+ self.__validate_path_component(path[-1])
+ target = subdir.index.get(path[-1])
+ if target is None:
+ raise FileNotFoundError("{} not found in {}".format(path[-1], str(subdir)))
+
+ if follow_symlinks and target.type == _FileType.SYMLINK:
+ linklocation = target.target
+ newpath = linklocation.split(os.path.sep)
+ if os.path.isabs(linklocation):
+ return subdir._find_root()._entry_from_path(*newpath, follow_symlinks=True)
+ return subdir._entry_from_path(*newpath, follow_symlinks=True)
+ else:
+ return target
+
def exists(self, *path, follow_symlinks=False):
try:
- subdir = self.descend(*path[:-1], follow_symlinks=follow_symlinks)
- target = subdir.index.get(path[-1])
- if target is not None:
- if follow_symlinks and target.type == _FileType.SYMLINK:
- linklocation = target.target
- newpath = linklocation.split(os.path.sep)
- if os.path.isabs(linklocation):
- return subdir._find_root().exists(*newpath, follow_symlinks=True)
- return subdir.exists(*newpath, follow_symlinks=True)
- else:
- return True
- return False
- except VirtualDirectoryError:
+ self._entry_from_path(*path, follow_symlinks=follow_symlinks)
+ return True
+ except (VirtualDirectoryError, FileNotFoundError):
return False
+ def stat(self, *path, follow_symlinks=False):
+ entry = self._entry_from_path(*path, follow_symlinks=follow_symlinks)
+
+ st_mode = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH
+ st_nlink = 1
+ st_mtime = BST_ARBITRARY_TIMESTAMP
+
+ if entry.type == _FileType.REGULAR_FILE:
+ st_mode |= stat.S_IFREG
+ st_size = entry.get_digest().size_bytes
+ elif entry.type == _FileType.DIRECTORY:
+ st_mode |= stat.S_IFDIR
+ st_size = 0
+ elif entry.type == _FileType.SYMLINK:
+ st_mode |= stat.S_IFLNK
+ st_size = len(entry.target)
+ else:
+ raise VirtualDirectoryError("Unsupported file type {}".format(entry.type))
+
+ if entry.type == _FileType.DIRECTORY or entry.is_executable:
+ st_mode |= stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
+
+ if entry.node_properties:
+ for prop in entry.node_properties:
+ if prop.name == "MTime" and prop.value:
+ st_mtime = utils._parse_timestamp(prop.value)
+
+ return os.stat_result((st_mode, 0, 0, st_nlink, 0, 0, st_size, st_mtime, st_mtime, st_mtime))
+
+ def file_digest(self, *path):
+ entry = self._entry_from_path(*path)
+ if entry.type != _FileType.REGULAR_FILE:
+ raise VirtualDirectoryError("Unsupported file type for digest: {}".format(entry.type))
+
+ return entry.digest.hash
+
+ def readlink(self, *path):
+ entry = self._entry_from_path(*path)
+ if entry.type != _FileType.SYMLINK:
+ raise VirtualDirectoryError("Unsupported file type for readlink: {}".format(entry.type))
+
+ return entry.target
+
+ def __iter__(self):
+ yield from self.index.keys()
+
def _set_subtree_read_only(self, read_only):
self.__node_properties = list(filter(lambda prop: prop.name != "SubtreeReadOnly", self.__node_properties))
node_property = remote_execution_pb2.NodeProperty()
@@ -867,3 +945,7 @@ class CasBasedDirectory(Directory):
subdir.__add_files_to_result(path_prefix=relative_pathname, result=result)
else:
result.files_written.append(relative_pathname)
+
+ def __validate_path_component(self, path):
+ if "/" in path:
+ raise VirtualDirectoryError("Invalid path component: '{}'".format(path))
diff --git a/src/buildstream/storage/_filebaseddirectory.py b/src/buildstream/storage/_filebaseddirectory.py
index c492c41cd..12312299d 100644
--- a/src/buildstream/storage/_filebaseddirectory.py
+++ b/src/buildstream/storage/_filebaseddirectory.py
@@ -236,28 +236,50 @@ class FileBasedDirectory(Directory):
def get_size(self):
return utils._get_dir_size(self.external_directory)
+ def stat(self, *path, follow_symlinks=False):
+ subdir = self.descend(*path[:-1], follow_symlinks=follow_symlinks)
+ newpath = os.path.join(subdir.external_directory, path[-1])
+ st = os.lstat(newpath)
+ if follow_symlinks and stat.S_ISLNK(st.st_mode):
+ linklocation = os.readlink(newpath)
+ newpath = linklocation.split(os.path.sep)
+ if os.path.isabs(linklocation):
+ return subdir._find_root().stat(*newpath, follow_symlinks=True)
+ return subdir.stat(*newpath, follow_symlinks=True)
+ else:
+ return st
+
def exists(self, *path, follow_symlinks=False):
try:
- subdir = self.descend(*path[:-1], follow_symlinks=follow_symlinks)
- newpath = os.path.join(subdir.external_directory, path[-1])
- st = os.lstat(newpath)
- if follow_symlinks and stat.S_ISLNK(st.st_mode):
- linklocation = os.readlink(newpath)
- newpath = linklocation.split(os.path.sep)
- if os.path.isabs(linklocation):
- return subdir._find_root().exists(*newpath, follow_symlinks=True)
- return subdir.exists(*newpath, follow_symlinks=True)
- else:
- return True
+ self.stat(*path, follow_symlinks=follow_symlinks)
+ return True
except (VirtualDirectoryError, FileNotFoundError):
return False
+ def file_digest(self, *path):
+ # Use descend() to avoid following symlinks (potentially escaping the sandbox)
+ subdir = self.descend(*path[:-1])
+ if subdir.exists(path[-1]) and not subdir.isfile(path[-1]):
+ raise VirtualDirectoryError("Unsupported file type for digest")
+
+ newpath = os.path.join(subdir.external_directory, path[-1])
+ return utils.sha256sum(newpath)
+
+ def readlink(self, *path):
+ # Use descend() to avoid following symlinks (potentially escaping the sandbox)
+ subdir = self.descend(*path[:-1])
+ if subdir.exists(path[-1]) and not subdir.islink(path[-1]):
+ raise VirtualDirectoryError("Unsupported file type for readlink")
+
+ newpath = os.path.join(subdir.external_directory, path[-1])
+ return os.readlink(newpath)
+
def open_file(self, *path: str, mode: str = "r"):
# Use descend() to avoid following symlinks (potentially escaping the sandbox)
subdir = self.descend(*path[:-1])
newpath = os.path.join(subdir.external_directory, path[-1])
- if mode not in ["r", "rb", "w", "wb", "x", "xb"]:
+ if mode not in ["r", "rb", "w", "wb", "w+", "w+b", "x", "xb", "x+", "x+b"]:
raise ValueError("Unsupported mode: `{}`".format(mode))
if "b" in mode:
@@ -268,8 +290,42 @@ class FileBasedDirectory(Directory):
if "r" in mode:
return open(newpath, mode=mode, encoding=encoding)
else:
+ if "x" in mode:
+ # This check is not atomic, however, we're operating with a
+ # single thread in a private directory tree.
+ if subdir.exists(path[-1]):
+ raise FileExistsError("{} already exists in {}".format(path[-1], str(subdir)))
+ mode = "w" + mode[1:]
+
return utils.save_file_atomic(newpath, mode=mode, encoding=encoding)
+ def remove(self, *path, recursive=False):
+ # Use descend() to avoid following symlinks (potentially escaping the sandbox)
+ subdir = self.descend(*path[:-1])
+ newpath = os.path.join(subdir.external_directory, path[-1])
+
+ if subdir._get_filetype(path[-1]) == _FileType.DIRECTORY:
+ if recursive:
+ shutil.rmtree(newpath)
+ else:
+ os.rmdir(newpath)
+ else:
+ os.unlink(newpath)
+
+ def rename(self, src, dest):
+ # Use descend() to avoid following symlinks (potentially escaping the sandbox)
+ srcdir = self.descend(*src[:-1])
+ destdir = self.descend(*dest[:-1])
+ srcpath = os.path.join(srcdir.external_directory, src[-1])
+ destpath = os.path.join(destdir.external_directory, dest[-1])
+
+ if destdir.exists(dest[-1]):
+ destdir.remove(dest[-1])
+ os.rename(srcpath, destpath)
+
+ def __iter__(self):
+ yield from os.listdir(self.external_directory)
+
def __str__(self):
# This returns the whole path (since we don't know where the directory started)
# which exposes the sandbox directory; we will have to assume for the time being
diff --git a/src/buildstream/storage/directory.py b/src/buildstream/storage/directory.py
index bb9d78f7e..f9ea4044a 100644
--- a/src/buildstream/storage/directory.py
+++ b/src/buildstream/storage/directory.py
@@ -32,6 +32,8 @@ See also: :ref:`sandboxing`.
"""
+import os
+import stat
from typing import Callable, Optional, Union, List
from .._exceptions import BstError
@@ -208,6 +210,66 @@ class Directory:
"""
raise NotImplementedError()
+ def stat(self, *paths: str, follow_symlinks: bool = False) -> os.stat_result:
+ """ Get the status of a file.
+
+ Args:
+ *paths: A list of strings which are all path components.
+ follow_symlinks: True to follow symlinks.
+
+ Returns:
+ A `os.stat_result` object.
+ """
+ raise NotImplementedError()
+
+ def isfile(self, *paths: str, follow_symlinks: bool = False) -> bool:
+ """ Check whether the specified path is an existing regular file.
+
+ Args:
+ *paths: A list of strings which are all path components.
+ follow_symlinks: True to follow symlinks.
+
+ Returns:
+ True if the path is an existing regular file, False otherwise.
+ """
+ try:
+ st = self.stat(*paths, follow_symlinks=follow_symlinks)
+ return stat.S_ISREG(st.st_mode)
+ except (VirtualDirectoryError, FileNotFoundError):
+ return False
+
+ def isdir(self, *paths: str, follow_symlinks: bool = False) -> bool:
+ """ Check whether the specified path is an existing directory.
+
+ Args:
+ *paths: A list of strings which are all path components.
+ follow_symlinks: True to follow symlinks.
+
+ Returns:
+ True if the path is an existing directory, False otherwise.
+ """
+ try:
+ st = self.stat(*paths, follow_symlinks=follow_symlinks)
+ return stat.S_ISDIR(st.st_mode)
+ except (VirtualDirectoryError, FileNotFoundError):
+ return False
+
+ def islink(self, *paths: str, follow_symlinks: bool = False) -> bool:
+ """ Check whether the specified path is an existing symlink.
+
+ Args:
+ *paths: A list of strings which are all path components.
+ follow_symlinks: True to follow symlinks.
+
+ Returns:
+ True if the path is an existing symlink, False otherwise.
+ """
+ try:
+ st = self.stat(*paths, follow_symlinks=follow_symlinks)
+ return stat.S_ISLNK(st.st_mode)
+ except (VirtualDirectoryError, FileNotFoundError):
+ return False
+
def open_file(self, *paths: str, mode: str = "r"):
""" Open file and return a corresponding file object. In text mode,
UTF-8 is used as encoding.
@@ -218,6 +280,42 @@ class Directory:
"""
raise NotImplementedError()
+ def file_digest(self, *paths: str) -> str:
+ """ Return a digest of a file. The digest algorithm is implementation-
+ defined.
+
+ Args:
+ *paths: A list of strings which are all path components.
+ """
+ raise NotImplementedError()
+
+ def readlink(self, *paths: str) -> str:
+ """ Return a string representing the path to which the symbolic link points.
+
+ Args:
+ *paths: A list of strings which are all path components.
+ """
+ raise NotImplementedError()
+
+ def remove(self, *paths: str, recursive: bool = False):
+ """ Remove a file, symlink or directory. Symlinks are not followed.
+
+ Args:
+ *paths: A list of strings which are all path components.
+ recursive: True to delete non-empty directories.
+ """
+ raise NotImplementedError()
+
+ def rename(self, src: List[str], dest: List[str]):
+ """ Rename a file, symlink or directory. If destination path exists
+ already and is a file or empty directory, it will be replaced.
+
+ Args:
+ *src: Source path components.
+ *dest: Destination path components.
+ """
+ raise NotImplementedError()
+
def _create_empty_file(self, *paths):
with self.open_file(*paths, mode="w"):
pass
diff --git a/tests/internals/storage.py b/tests/internals/storage.py
index 2e636cc53..ea3f59b50 100644
--- a/tests/internals/storage.py
+++ b/tests/internals/storage.py
@@ -2,7 +2,9 @@ from contextlib import contextmanager
import os
import pprint
import shutil
+import stat
import glob
+import hashlib
from pathlib import Path
from typing import List, Optional
@@ -11,7 +13,7 @@ import pytest
from buildstream._cas import CASCache
from buildstream.storage._casbaseddirectory import CasBasedDirectory
from buildstream.storage._filebaseddirectory import FileBasedDirectory
-from buildstream.storage.directory import _FileType
+from buildstream.storage.directory import _FileType, VirtualDirectoryError
DATA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "storage")
@@ -19,7 +21,9 @@ DATA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "storage")
@contextmanager
def setup_backend(backend_class, tmpdir):
if backend_class == FileBasedDirectory:
- yield backend_class(os.path.join(tmpdir, "vdir"))
+ path = os.path.join(tmpdir, "vdir")
+ os.mkdir(path)
+ yield backend_class(path)
else:
cas_cache = CASCache(os.path.join(tmpdir, "cas"), log_directory=os.path.join(tmpdir, "logs"))
try:
@@ -216,6 +220,94 @@ def _test_merge_dirs(
)
+@pytest.mark.parametrize("backend", [FileBasedDirectory, CasBasedDirectory])
+@pytest.mark.datafiles(DATA_DIR)
+def test_file_types(tmpdir, datafiles, backend):
+ with setup_backend(backend, str(tmpdir)) as c:
+ c.import_files(os.path.join(str(datafiles), "merge-link"))
+
+ # Test __iter__
+ assert set(c) == {"link", "root-file", "subdirectory"}
+
+ assert c.exists("root-file")
+ assert c.isfile("root-file")
+ assert not c.isdir("root-file")
+ assert not c.islink("root-file")
+
+ st = c.stat("root-file")
+ assert stat.S_ISREG(st.st_mode)
+
+ assert c.exists("link")
+ assert c.islink("link")
+ assert not c.isfile("link")
+ assert c.readlink("link") == "root-file"
+
+ st = c.stat("link")
+ assert stat.S_ISLNK(st.st_mode)
+
+ assert c.exists("subdirectory")
+ assert c.isdir("subdirectory")
+ assert not c.isfile("subdirectory")
+ subdir = c.descend("subdirectory")
+ assert set(subdir) == {"subdir-file"}
+
+ st = c.stat("subdirectory")
+ assert stat.S_ISDIR(st.st_mode)
+
+
+@pytest.mark.parametrize("backend", [FileBasedDirectory, CasBasedDirectory])
+@pytest.mark.datafiles(DATA_DIR)
+def test_open_file(tmpdir, datafiles, backend):
+ with setup_backend(backend, str(tmpdir)) as c:
+ assert not c.isfile("hello")
+
+ with c.open_file("hello", mode="w") as f:
+ f.write("world")
+ assert c.isfile("hello")
+
+ assert c.file_digest("hello") == hashlib.sha256(b"world").hexdigest()
+
+ with c.open_file("hello", mode="r") as f:
+ assert f.read() == "world"
+
+
+@pytest.mark.parametrize("backend", [FileBasedDirectory, CasBasedDirectory])
+@pytest.mark.datafiles(DATA_DIR)
+def test_remove(tmpdir, datafiles, backend):
+ with setup_backend(backend, str(tmpdir)) as c:
+ c.import_files(os.path.join(str(datafiles), "merge-link"))
+
+ with pytest.raises((OSError, VirtualDirectoryError)):
+ c.remove("subdirectory")
+
+ with pytest.raises(FileNotFoundError):
+ c.remove("subdirectory", "does-not-exist")
+
+ # Check that `remove()` doesn't follow symlinks
+ c.remove("link")
+ assert not c.exists("link")
+ assert c.exists("root-file")
+
+ c.remove("subdirectory", recursive=True)
+ assert not c.exists("subdirectory")
+
+ # Removing an empty directory does not require recursive=True
+ c.descend("empty-directory", create=True)
+ c.remove("empty-directory")
+
+
+@pytest.mark.parametrize("backend", [FileBasedDirectory, CasBasedDirectory])
+@pytest.mark.datafiles(DATA_DIR)
+def test_rename(tmpdir, datafiles, backend):
+ with setup_backend(backend, str(tmpdir)) as c:
+ c.import_files(os.path.join(str(datafiles), "original"))
+
+ c.rename(["bin", "hello"], ["bin", "hello2"])
+ c.rename(["bin"], ["bin2"])
+
+ assert c.isfile("bin2", "hello2")
+
+
# This is purely for error output; lists relative paths and
# their digests so differences are human-grokkable
def list_relative_paths(directory):