summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJürg Billeter <j@bitron.ch>2019-02-27 20:21:34 +0000
committerJürg Billeter <j@bitron.ch>2019-02-27 20:21:34 +0000
commit153f81a10dcf558fcc4a7a796d1a3f48c21d9be9 (patch)
tree2c61bf9c22ce8cbb5df140275c8f812cccddec1a
parent2c492ff8a6e9a9c0d17654cc3d3d509828a4e1fd (diff)
parent4cc7eaf59bdeef5bb10694ec9a0a336105036fa3 (diff)
downloadbuildstream-153f81a10dcf558fcc4a7a796d1a3f48c21d9be9.tar.gz
Merge branch 'juerg/directory-import' into 'master'
Directory.import_files() improvements See merge request BuildStream/buildstream!1183
-rw-r--r--buildstream/storage/_casbaseddirectory.py199
-rw-r--r--buildstream/storage/_filebaseddirectory.py110
2 files changed, 175 insertions, 134 deletions
diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index 537feade2..2c1f465c3 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -32,7 +32,7 @@ import os
from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
from .directory import Directory, VirtualDirectoryError, _FileType
from ._filebaseddirectory import FileBasedDirectory
-from ..utils import FileListResult, list_relative_paths, _magic_timestamp
+from ..utils import FileListResult, _magic_timestamp
class IndexEntry():
@@ -302,151 +302,106 @@ class CasBasedDirectory(Directory):
fileListResult.overwritten.append(relative_pathname)
return True
- def _import_files_from_directory(self, source_directory, files, path_prefix=""):
- """ Imports files from a traditional directory. """
+ def _import_files_from_directory(self, source_directory, filter_callback, *, path_prefix="", result):
+ """ Import files from a traditional directory. """
- def _import_directory_recursively(directory_name, source_directory, remaining_path, path_prefix):
- """ _import_directory_recursively and _import_files_from_directory will be called alternately
- as a directory tree is descended. """
- subdir = self.descend(directory_name, create=True)
- new_path_prefix = os.path.join(path_prefix, directory_name)
- subdir_result = subdir._import_files_from_directory(os.path.join(source_directory, directory_name),
- [os.path.sep.join(remaining_path)],
- path_prefix=new_path_prefix)
- return subdir_result
-
- result = FileListResult()
- for entry in files:
- split_path = entry.split(os.path.sep)
- # The actual file on the FS we're importing
- import_file = os.path.join(source_directory, entry)
+ for direntry in sorted(os.scandir(source_directory), key=lambda e: e.name):
# The destination filename, relative to the root where the import started
- relative_pathname = os.path.join(path_prefix, entry)
- if len(split_path) > 1:
- directory_name = split_path[0]
- # Hand this off to the importer for that subdir.
-
- # It would be advantageous to batch these together by
- # directory_name. However, we can't do it out of
- # order, since importing symlinks affects the results
- # of other imports.
- subdir_result = _import_directory_recursively(directory_name, source_directory,
- split_path[1:], path_prefix)
- result.combine(subdir_result)
- elif os.path.islink(import_file):
- if self._check_replacement(entry, path_prefix, result):
- self._copy_link_from_filesystem(source_directory, entry)
+ relative_pathname = os.path.join(path_prefix, direntry.name)
+
+ is_dir = direntry.is_dir(follow_symlinks=False)
+
+ if is_dir:
+ src_subdir = os.path.join(source_directory, direntry.name)
+
+ try:
+ create_subdir = direntry.name not in self.index
+ dest_subdir = self.descend(direntry.name, create=create_subdir)
+ except VirtualDirectoryError:
+ filetype = self.index[direntry.name].type
+ raise VirtualDirectoryError('Destination is a {}, not a directory: /{}'
+ .format(filetype, relative_pathname))
+
+ dest_subdir._import_files_from_directory(src_subdir, filter_callback,
+ path_prefix=relative_pathname, result=result)
+
+ if filter_callback and not filter_callback(relative_pathname):
+ if is_dir and create_subdir and dest_subdir.is_empty():
+ # Complete subdirectory has been filtered out, remove it
+ self.delete_entry(direntry.name)
+
+ # Entry filtered out, move to next
+ continue
+
+ if direntry.is_file(follow_symlinks=False):
+ if self._check_replacement(direntry.name, path_prefix, result):
+ self._add_file(source_directory, direntry.name, modified=relative_pathname in result.overwritten)
result.files_written.append(relative_pathname)
- elif os.path.isdir(import_file):
- # A plain directory which already exists isn't a problem; just ignore it.
- if entry not in self.index:
- self._add_directory(entry)
- elif os.path.isfile(import_file):
- if self._check_replacement(entry, path_prefix, result):
- self._add_file(source_directory, entry, modified=relative_pathname in result.overwritten)
+ elif direntry.is_symlink():
+ if self._check_replacement(direntry.name, path_prefix, result):
+ self._copy_link_from_filesystem(source_directory, direntry.name)
result.files_written.append(relative_pathname)
- return result
- @staticmethod
- def _files_in_subdir(sorted_files, dirname):
- """Filters sorted_files and returns only the ones which have
- 'dirname' as a prefix, with that prefix removed.
+ def _partial_import_cas_into_cas(self, source_directory, filter_callback, *, path_prefix="", result):
+ """ Import files from a CAS-based directory. """
- """
- if not dirname.endswith(os.path.sep):
- dirname += os.path.sep
- return [f[len(dirname):] for f in sorted_files if f.startswith(dirname)]
-
- def _partial_import_cas_into_cas(self, source_directory, files, path_prefix="", file_list_required=True):
- """ Import only the files and symlinks listed in 'files' from source_directory to this one.
- Args:
- source_directory (:class:`.CasBasedDirectory`): The directory to import from
- files ([str]): List of pathnames to import. Must be a list, not a generator.
- path_prefix (str): Prefix used to add entries to the file list result.
- file_list_required: Whether to update the file list while processing.
- """
- result = FileListResult()
- processed_directories = set()
- for f in files:
- fullname = os.path.join(path_prefix, f)
- components = f.split(os.path.sep)
- if len(components) > 1:
- # We are importing a thing which is in a subdirectory. We may have already seen this dirname
- # for a previous file.
- dirname = components[0]
- if dirname not in processed_directories:
- # Now strip off the first directory name and import files recursively.
- subcomponents = CasBasedDirectory._files_in_subdir(files, dirname)
- # We will fail at this point if there is a file or symlink called 'dirname'.
- dest_subdir = self.descend(dirname, create=True)
- src_subdir = source_directory.descend(dirname)
- import_result = dest_subdir._partial_import_cas_into_cas(src_subdir, subcomponents,
- path_prefix=fullname,
- file_list_required=file_list_required)
- result.combine(import_result)
- processed_directories.add(dirname)
- elif source_directory.index[f].type == _FileType.DIRECTORY:
- # The thing in the input file list is a directory on
- # its own. We don't need to do anything other than create it if it doesn't exist.
- # If we already have an entry with the same name that isn't a directory, that
- # will be dealt with when importing files in this directory.
- if f not in self.index:
- self.descend(f, create=True)
- else:
- # We're importing a file or symlink - replace anything with the same name.
- importable = self._check_replacement(f, path_prefix, result)
- if importable:
- entry = source_directory.index[f]
+ for name, entry in sorted(source_directory.index.items()):
+ # The destination filename, relative to the root where the import started
+ relative_pathname = os.path.join(path_prefix, name)
+
+ is_dir = entry.type == _FileType.DIRECTORY
+
+ if is_dir:
+ src_subdir = source_directory.descend(name)
+
+ try:
+ create_subdir = name not in self.index
+ dest_subdir = self.descend(name, create=create_subdir)
+ except VirtualDirectoryError:
+ filetype = self.index[name].type
+ raise VirtualDirectoryError('Destination is a {}, not a directory: /{}'
+ .format(filetype, relative_pathname))
+
+ dest_subdir._partial_import_cas_into_cas(src_subdir, filter_callback,
+ path_prefix=relative_pathname, result=result)
+
+ if filter_callback and not filter_callback(relative_pathname):
+ if is_dir and create_subdir and dest_subdir.is_empty():
+ # Complete subdirectory has been filtered out, remove it
+ self.delete_entry(name)
+
+ # Entry filtered out, move to next
+ continue
+
+ if not is_dir:
+ if self._check_replacement(name, path_prefix, result):
item = entry.pb_object
if entry.type == _FileType.REGULAR_FILE:
- filenode = self.pb2_directory.files.add(digest=item.digest, name=f,
+ filenode = self.pb2_directory.files.add(digest=item.digest, name=name,
is_executable=item.is_executable)
- self.index[f] = IndexEntry(filenode, _FileType.REGULAR_FILE, modified=True)
+ self.index[name] = IndexEntry(filenode, _FileType.REGULAR_FILE, modified=True)
else:
assert entry.type == _FileType.SYMLINK
- self._add_new_link_direct(name=f, target=item.target)
- result.files_written.append(os.path.join(path_prefix, f))
- else:
- result.ignored.append(os.path.join(path_prefix, f))
- return result
+ self._add_new_link_direct(name=name, target=item.target)
+ result.files_written.append(relative_pathname)
def import_files(self, external_pathspec, *,
filter_callback=None,
report_written=True, update_mtime=False,
can_link=False):
- """Imports some or all files from external_path into this directory.
+ """ See superclass Directory for arguments """
- Keyword arguments: external_pathspec: Either a string
- containing a pathname, or a Directory object, to use as the
- source.
-
- report_written (bool): Return the full list of files
- written. Defaults to true. If false, only a list of
- overwritten files is returned.
-
- update_mtime (bool): Currently ignored, since CAS does not store mtimes.
-
- can_link (bool): Ignored, since hard links do not have any meaning within CAS.
- """
-
- if isinstance(external_pathspec, str):
- files = list_relative_paths(external_pathspec)
- else:
- assert isinstance(external_pathspec, Directory)
- files = external_pathspec.list_relative_paths()
-
- if filter_callback:
- files = [path for path in files if filter_callback(path)]
+ result = FileListResult()
if isinstance(external_pathspec, FileBasedDirectory):
source_directory = external_pathspec._get_underlying_directory()
- result = self._import_files_from_directory(source_directory, files=files)
+ self._import_files_from_directory(source_directory, filter_callback, result=result)
elif isinstance(external_pathspec, str):
source_directory = external_pathspec
- result = self._import_files_from_directory(source_directory, files=files)
+ self._import_files_from_directory(source_directory, filter_callback, result=result)
else:
assert isinstance(external_pathspec, CasBasedDirectory)
- result = self._partial_import_cas_into_cas(external_pathspec, files=list(files))
+ self._partial_import_cas_into_cas(external_pathspec, filter_callback, result=result)
# TODO: No notice is taken of report_written, update_mtime or can_link.
# Current behaviour is to fully populate the report, which is inefficient,
diff --git a/buildstream/storage/_filebaseddirectory.py b/buildstream/storage/_filebaseddirectory.py
index dfb310ae1..61827f19c 100644
--- a/buildstream/storage/_filebaseddirectory.py
+++ b/buildstream/storage/_filebaseddirectory.py
@@ -30,10 +30,12 @@ See also: :ref:`sandboxing`.
import os
import stat
import time
-from .directory import Directory, VirtualDirectoryError
+
+from .directory import Directory, VirtualDirectoryError, _FileType
from .. import utils
from ..utils import link_files, copy_files, list_relative_paths, _get_link_mtime, _magic_timestamp
from ..utils import _set_deterministic_user, _set_deterministic_mtime
+from ..utils import FileListResult
# FileBasedDirectory intentionally doesn't call its superclass constuctor,
# which is meant to be unimplemented.
@@ -80,19 +82,31 @@ class FileBasedDirectory(Directory):
can_link=False):
""" See superclass Directory for arguments """
- if isinstance(external_pathspec, Directory):
- source_directory = external_pathspec.external_directory
- else:
- source_directory = external_pathspec
+ from ._casbaseddirectory import CasBasedDirectory
+
+ if isinstance(external_pathspec, CasBasedDirectory):
+ if can_link and not update_mtime:
+ actionfunc = utils.safe_link
+ else:
+ actionfunc = utils.safe_copy
- if can_link and not update_mtime:
- import_result = link_files(source_directory, self.external_directory,
- filter_callback=filter_callback,
- ignore_missing=False, report_written=report_written)
+ import_result = FileListResult()
+ self._import_files_from_cas(external_pathspec, actionfunc, filter_callback, result=import_result)
else:
- import_result = copy_files(source_directory, self.external_directory,
- filter_callback=filter_callback,
- ignore_missing=False, report_written=report_written)
+ if isinstance(external_pathspec, Directory):
+ source_directory = external_pathspec.external_directory
+ else:
+ source_directory = external_pathspec
+
+ if can_link and not update_mtime:
+ import_result = link_files(source_directory, self.external_directory,
+ filter_callback=filter_callback,
+ ignore_missing=False, report_written=report_written)
+ else:
+ import_result = copy_files(source_directory, self.external_directory,
+ filter_callback=filter_callback,
+ ignore_missing=False, report_written=report_written)
+
if update_mtime:
cur_time = time.time()
@@ -189,3 +203,75 @@ class FileBasedDirectory(Directory):
""" Returns the underlying (real) file system directory this
object refers to. """
return self.external_directory
+
+ def _get_filetype(self, name=None):
+ path = self.external_directory
+
+ if name:
+ path = os.path.join(path, name)
+
+ st = os.lstat(path)
+ if stat.S_ISDIR(st.st_mode):
+ return _FileType.DIRECTORY
+ elif stat.S_ISLNK(st.st_mode):
+ return _FileType.SYMLINK
+ elif stat.S_ISREG(st.st_mode):
+ return _FileType.REGULAR_FILE
+ else:
+ return _FileType.SPECIAL_FILE
+
+ def _import_files_from_cas(self, source_directory, actionfunc, filter_callback, *, path_prefix="", result):
+ """ Import files from a CAS-based directory. """
+
+ for name, entry in source_directory.index.items():
+ # The destination filename, relative to the root where the import started
+ relative_pathname = os.path.join(path_prefix, name)
+
+ # The full destination path
+ dest_path = os.path.join(self.external_directory, name)
+
+ is_dir = entry.type == _FileType.DIRECTORY
+
+ if is_dir:
+ src_subdir = source_directory.descend(name)
+
+ try:
+ create_subdir = not os.path.lexists(dest_path)
+ dest_subdir = self.descend(name, create=create_subdir)
+ except VirtualDirectoryError:
+ filetype = self._get_filetype(name)
+ raise VirtualDirectoryError('Destination is a {}, not a directory: /{}'
+ .format(filetype, relative_pathname))
+
+ dest_subdir._import_files_from_cas(src_subdir, actionfunc, filter_callback,
+ path_prefix=relative_pathname, result=result)
+
+ if filter_callback and not filter_callback(relative_pathname):
+ if is_dir and create_subdir and dest_subdir.is_empty():
+ # Complete subdirectory has been filtered out, remove it
+ os.rmdir(dest_subdir.external_directory)
+
+ # Entry filtered out, move to next
+ continue
+
+ if not is_dir:
+ if os.path.lexists(dest_path):
+ # Collect overlaps
+ if not os.path.isdir(dest_path):
+ result.overwritten.append(relative_pathname)
+
+ if not utils.safe_remove(dest_path):
+ result.ignored.append(relative_pathname)
+ continue
+
+ item = entry.pb_object
+ if entry.type == _FileType.REGULAR_FILE:
+ src_path = source_directory.cas_cache.objpath(item.digest)
+ actionfunc(src_path, dest_path, result=result)
+ if item.is_executable:
+ os.chmod(dest_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR |
+ stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
+ else:
+ assert entry.type == _FileType.SYMLINK
+ os.symlink(item.target, dest_path)
+ result.files_written.append(relative_pathname)