From 3c20e79e02651c0c86ef04b17eafb384ec46bbbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Wed, 27 Feb 2019 16:33:50 +0100 Subject: _casbaseddirectory.py: Remove docstring for import_files() The docstring in the superclass should be used as reference. This matches FileBasedDirectory.import_files(). --- buildstream/storage/_casbaseddirectory.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py index 537feade2..6e993c011 100644 --- a/buildstream/storage/_casbaseddirectory.py +++ b/buildstream/storage/_casbaseddirectory.py @@ -414,20 +414,7 @@ class CasBasedDirectory(Directory): filter_callback=None, report_written=True, update_mtime=False, can_link=False): - """Imports some or all files from external_path into this directory. - - Keyword arguments: external_pathspec: Either a string - containing a pathname, or a Directory object, to use as the - source. - - report_written (bool): Return the full list of files - written. Defaults to true. If false, only a list of - overwritten files is returned. - - update_mtime (bool): Currently ignored, since CAS does not store mtimes. - - can_link (bool): Ignored, since hard links do not have any meaning within CAS. - """ + """ See superclass Directory for arguments """ if isinstance(external_pathspec, str): files = list_relative_paths(external_pathspec) -- cgit v1.2.1 From 73706d3aa70c75e7c37d6fa47cd0ff666ce0a711 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Mon, 25 Feb 2019 18:18:02 +0100 Subject: _casbaseddirectory.py: Add result parameter to import methods This eliminates the need to combine subdir results. --- buildstream/storage/_casbaseddirectory.py | 40 +++++++++++++++---------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py index 6e993c011..501e245f3 100644 --- a/buildstream/storage/_casbaseddirectory.py +++ b/buildstream/storage/_casbaseddirectory.py @@ -302,20 +302,19 @@ class CasBasedDirectory(Directory): fileListResult.overwritten.append(relative_pathname) return True - def _import_files_from_directory(self, source_directory, files, path_prefix=""): + def _import_files_from_directory(self, source_directory, files, *, path_prefix="", result): """ Imports files from a traditional directory. """ - def _import_directory_recursively(directory_name, source_directory, remaining_path, path_prefix): + def _import_directory_recursively(directory_name, source_directory, remaining_path, path_prefix, result): """ _import_directory_recursively and _import_files_from_directory will be called alternately as a directory tree is descended. """ subdir = self.descend(directory_name, create=True) new_path_prefix = os.path.join(path_prefix, directory_name) - subdir_result = subdir._import_files_from_directory(os.path.join(source_directory, directory_name), - [os.path.sep.join(remaining_path)], - path_prefix=new_path_prefix) - return subdir_result + subdir._import_files_from_directory(os.path.join(source_directory, directory_name), + [os.path.sep.join(remaining_path)], + path_prefix=new_path_prefix, + result=result) - result = FileListResult() for entry in files: split_path = entry.split(os.path.sep) # The actual file on the FS we're importing @@ -330,9 +329,8 @@ class CasBasedDirectory(Directory): # directory_name. However, we can't do it out of # order, since importing symlinks affects the results # of other imports. - subdir_result = _import_directory_recursively(directory_name, source_directory, - split_path[1:], path_prefix) - result.combine(subdir_result) + _import_directory_recursively(directory_name, source_directory, + split_path[1:], path_prefix, result) elif os.path.islink(import_file): if self._check_replacement(entry, path_prefix, result): self._copy_link_from_filesystem(source_directory, entry) @@ -345,7 +343,6 @@ class CasBasedDirectory(Directory): if self._check_replacement(entry, path_prefix, result): self._add_file(source_directory, entry, modified=relative_pathname in result.overwritten) result.files_written.append(relative_pathname) - return result @staticmethod def _files_in_subdir(sorted_files, dirname): @@ -357,7 +354,8 @@ class CasBasedDirectory(Directory): dirname += os.path.sep return [f[len(dirname):] for f in sorted_files if f.startswith(dirname)] - def _partial_import_cas_into_cas(self, source_directory, files, path_prefix="", file_list_required=True): + def _partial_import_cas_into_cas(self, source_directory, files, *, + path_prefix="", file_list_required=True, result): """ Import only the files and symlinks listed in 'files' from source_directory to this one. Args: source_directory (:class:`.CasBasedDirectory`): The directory to import from @@ -365,7 +363,6 @@ class CasBasedDirectory(Directory): path_prefix (str): Prefix used to add entries to the file list result. file_list_required: Whether to update the file list while processing. """ - result = FileListResult() processed_directories = set() for f in files: fullname = os.path.join(path_prefix, f) @@ -380,10 +377,10 @@ class CasBasedDirectory(Directory): # We will fail at this point if there is a file or symlink called 'dirname'. dest_subdir = self.descend(dirname, create=True) src_subdir = source_directory.descend(dirname) - import_result = dest_subdir._partial_import_cas_into_cas(src_subdir, subcomponents, - path_prefix=fullname, - file_list_required=file_list_required) - result.combine(import_result) + dest_subdir._partial_import_cas_into_cas(src_subdir, subcomponents, + path_prefix=fullname, + file_list_required=file_list_required, + result=result) processed_directories.add(dirname) elif source_directory.index[f].type == _FileType.DIRECTORY: # The thing in the input file list is a directory on @@ -408,7 +405,6 @@ class CasBasedDirectory(Directory): result.files_written.append(os.path.join(path_prefix, f)) else: result.ignored.append(os.path.join(path_prefix, f)) - return result def import_files(self, external_pathspec, *, filter_callback=None, @@ -425,15 +421,17 @@ class CasBasedDirectory(Directory): if filter_callback: files = [path for path in files if filter_callback(path)] + result = FileListResult() + if isinstance(external_pathspec, FileBasedDirectory): source_directory = external_pathspec._get_underlying_directory() - result = self._import_files_from_directory(source_directory, files=files) + self._import_files_from_directory(source_directory, files=files, result=result) elif isinstance(external_pathspec, str): source_directory = external_pathspec - result = self._import_files_from_directory(source_directory, files=files) + self._import_files_from_directory(source_directory, files=files, result=result) else: assert isinstance(external_pathspec, CasBasedDirectory) - result = self._partial_import_cas_into_cas(external_pathspec, files=list(files)) + self._partial_import_cas_into_cas(external_pathspec, files=list(files), result=result) # TODO: No notice is taken of report_written, update_mtime or can_link. # Current behaviour is to fully populate the report, which is inefficient, -- cgit v1.2.1 From cc067bd72d80f678aa7c1109c123581ff0fede77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Sun, 24 Feb 2019 09:45:11 +0100 Subject: _casbaseddirectory.py: Replace file list with filter callback --- buildstream/storage/_casbaseddirectory.py | 182 +++++++++++++----------------- 1 file changed, 76 insertions(+), 106 deletions(-) diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py index 501e245f3..2c1f465c3 100644 --- a/buildstream/storage/_casbaseddirectory.py +++ b/buildstream/storage/_casbaseddirectory.py @@ -32,7 +32,7 @@ import os from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2 from .directory import Directory, VirtualDirectoryError, _FileType from ._filebaseddirectory import FileBasedDirectory -from ..utils import FileListResult, list_relative_paths, _magic_timestamp +from ..utils import FileListResult, _magic_timestamp class IndexEntry(): @@ -302,109 +302,88 @@ class CasBasedDirectory(Directory): fileListResult.overwritten.append(relative_pathname) return True - def _import_files_from_directory(self, source_directory, files, *, path_prefix="", result): - """ Imports files from a traditional directory. """ - - def _import_directory_recursively(directory_name, source_directory, remaining_path, path_prefix, result): - """ _import_directory_recursively and _import_files_from_directory will be called alternately - as a directory tree is descended. """ - subdir = self.descend(directory_name, create=True) - new_path_prefix = os.path.join(path_prefix, directory_name) - subdir._import_files_from_directory(os.path.join(source_directory, directory_name), - [os.path.sep.join(remaining_path)], - path_prefix=new_path_prefix, - result=result) - - for entry in files: - split_path = entry.split(os.path.sep) - # The actual file on the FS we're importing - import_file = os.path.join(source_directory, entry) + def _import_files_from_directory(self, source_directory, filter_callback, *, path_prefix="", result): + """ Import files from a traditional directory. """ + + for direntry in sorted(os.scandir(source_directory), key=lambda e: e.name): # The destination filename, relative to the root where the import started - relative_pathname = os.path.join(path_prefix, entry) - if len(split_path) > 1: - directory_name = split_path[0] - # Hand this off to the importer for that subdir. - - # It would be advantageous to batch these together by - # directory_name. However, we can't do it out of - # order, since importing symlinks affects the results - # of other imports. - _import_directory_recursively(directory_name, source_directory, - split_path[1:], path_prefix, result) - elif os.path.islink(import_file): - if self._check_replacement(entry, path_prefix, result): - self._copy_link_from_filesystem(source_directory, entry) + relative_pathname = os.path.join(path_prefix, direntry.name) + + is_dir = direntry.is_dir(follow_symlinks=False) + + if is_dir: + src_subdir = os.path.join(source_directory, direntry.name) + + try: + create_subdir = direntry.name not in self.index + dest_subdir = self.descend(direntry.name, create=create_subdir) + except VirtualDirectoryError: + filetype = self.index[direntry.name].type + raise VirtualDirectoryError('Destination is a {}, not a directory: /{}' + .format(filetype, relative_pathname)) + + dest_subdir._import_files_from_directory(src_subdir, filter_callback, + path_prefix=relative_pathname, result=result) + + if filter_callback and not filter_callback(relative_pathname): + if is_dir and create_subdir and dest_subdir.is_empty(): + # Complete subdirectory has been filtered out, remove it + self.delete_entry(direntry.name) + + # Entry filtered out, move to next + continue + + if direntry.is_file(follow_symlinks=False): + if self._check_replacement(direntry.name, path_prefix, result): + self._add_file(source_directory, direntry.name, modified=relative_pathname in result.overwritten) result.files_written.append(relative_pathname) - elif os.path.isdir(import_file): - # A plain directory which already exists isn't a problem; just ignore it. - if entry not in self.index: - self._add_directory(entry) - elif os.path.isfile(import_file): - if self._check_replacement(entry, path_prefix, result): - self._add_file(source_directory, entry, modified=relative_pathname in result.overwritten) + elif direntry.is_symlink(): + if self._check_replacement(direntry.name, path_prefix, result): + self._copy_link_from_filesystem(source_directory, direntry.name) result.files_written.append(relative_pathname) - @staticmethod - def _files_in_subdir(sorted_files, dirname): - """Filters sorted_files and returns only the ones which have - 'dirname' as a prefix, with that prefix removed. + def _partial_import_cas_into_cas(self, source_directory, filter_callback, *, path_prefix="", result): + """ Import files from a CAS-based directory. """ - """ - if not dirname.endswith(os.path.sep): - dirname += os.path.sep - return [f[len(dirname):] for f in sorted_files if f.startswith(dirname)] - - def _partial_import_cas_into_cas(self, source_directory, files, *, - path_prefix="", file_list_required=True, result): - """ Import only the files and symlinks listed in 'files' from source_directory to this one. - Args: - source_directory (:class:`.CasBasedDirectory`): The directory to import from - files ([str]): List of pathnames to import. Must be a list, not a generator. - path_prefix (str): Prefix used to add entries to the file list result. - file_list_required: Whether to update the file list while processing. - """ - processed_directories = set() - for f in files: - fullname = os.path.join(path_prefix, f) - components = f.split(os.path.sep) - if len(components) > 1: - # We are importing a thing which is in a subdirectory. We may have already seen this dirname - # for a previous file. - dirname = components[0] - if dirname not in processed_directories: - # Now strip off the first directory name and import files recursively. - subcomponents = CasBasedDirectory._files_in_subdir(files, dirname) - # We will fail at this point if there is a file or symlink called 'dirname'. - dest_subdir = self.descend(dirname, create=True) - src_subdir = source_directory.descend(dirname) - dest_subdir._partial_import_cas_into_cas(src_subdir, subcomponents, - path_prefix=fullname, - file_list_required=file_list_required, - result=result) - processed_directories.add(dirname) - elif source_directory.index[f].type == _FileType.DIRECTORY: - # The thing in the input file list is a directory on - # its own. We don't need to do anything other than create it if it doesn't exist. - # If we already have an entry with the same name that isn't a directory, that - # will be dealt with when importing files in this directory. - if f not in self.index: - self.descend(f, create=True) - else: - # We're importing a file or symlink - replace anything with the same name. - importable = self._check_replacement(f, path_prefix, result) - if importable: - entry = source_directory.index[f] + for name, entry in sorted(source_directory.index.items()): + # The destination filename, relative to the root where the import started + relative_pathname = os.path.join(path_prefix, name) + + is_dir = entry.type == _FileType.DIRECTORY + + if is_dir: + src_subdir = source_directory.descend(name) + + try: + create_subdir = name not in self.index + dest_subdir = self.descend(name, create=create_subdir) + except VirtualDirectoryError: + filetype = self.index[name].type + raise VirtualDirectoryError('Destination is a {}, not a directory: /{}' + .format(filetype, relative_pathname)) + + dest_subdir._partial_import_cas_into_cas(src_subdir, filter_callback, + path_prefix=relative_pathname, result=result) + + if filter_callback and not filter_callback(relative_pathname): + if is_dir and create_subdir and dest_subdir.is_empty(): + # Complete subdirectory has been filtered out, remove it + self.delete_entry(name) + + # Entry filtered out, move to next + continue + + if not is_dir: + if self._check_replacement(name, path_prefix, result): item = entry.pb_object if entry.type == _FileType.REGULAR_FILE: - filenode = self.pb2_directory.files.add(digest=item.digest, name=f, + filenode = self.pb2_directory.files.add(digest=item.digest, name=name, is_executable=item.is_executable) - self.index[f] = IndexEntry(filenode, _FileType.REGULAR_FILE, modified=True) + self.index[name] = IndexEntry(filenode, _FileType.REGULAR_FILE, modified=True) else: assert entry.type == _FileType.SYMLINK - self._add_new_link_direct(name=f, target=item.target) - result.files_written.append(os.path.join(path_prefix, f)) - else: - result.ignored.append(os.path.join(path_prefix, f)) + self._add_new_link_direct(name=name, target=item.target) + result.files_written.append(relative_pathname) def import_files(self, external_pathspec, *, filter_callback=None, @@ -412,26 +391,17 @@ class CasBasedDirectory(Directory): can_link=False): """ See superclass Directory for arguments """ - if isinstance(external_pathspec, str): - files = list_relative_paths(external_pathspec) - else: - assert isinstance(external_pathspec, Directory) - files = external_pathspec.list_relative_paths() - - if filter_callback: - files = [path for path in files if filter_callback(path)] - result = FileListResult() if isinstance(external_pathspec, FileBasedDirectory): source_directory = external_pathspec._get_underlying_directory() - self._import_files_from_directory(source_directory, files=files, result=result) + self._import_files_from_directory(source_directory, filter_callback, result=result) elif isinstance(external_pathspec, str): source_directory = external_pathspec - self._import_files_from_directory(source_directory, files=files, result=result) + self._import_files_from_directory(source_directory, filter_callback, result=result) else: assert isinstance(external_pathspec, CasBasedDirectory) - self._partial_import_cas_into_cas(external_pathspec, files=list(files), result=result) + self._partial_import_cas_into_cas(external_pathspec, filter_callback, result=result) # TODO: No notice is taken of report_written, update_mtime or can_link. # Current behaviour is to fully populate the report, which is inefficient, -- cgit v1.2.1 From b6541ea945620f8db0b3875e09eb462b85e37623 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Sun, 24 Feb 2019 18:59:48 +0100 Subject: _filebaseddirectory.py: Add _get_filetype() method --- buildstream/storage/_filebaseddirectory.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/buildstream/storage/_filebaseddirectory.py b/buildstream/storage/_filebaseddirectory.py index dfb310ae1..67c0ec630 100644 --- a/buildstream/storage/_filebaseddirectory.py +++ b/buildstream/storage/_filebaseddirectory.py @@ -189,3 +189,19 @@ class FileBasedDirectory(Directory): """ Returns the underlying (real) file system directory this object refers to. """ return self.external_directory + + def _get_filetype(self, name=None): + path = self.external_directory + + if name: + path = os.path.join(path, name) + + st = os.lstat(path) + if stat.S_ISDIR(st.st_mode): + return _FileType.DIRECTORY + elif stat.S_ISLNK(st.st_mode): + return _FileType.SYMLINK + elif stat.S_ISREG(st.st_mode): + return _FileType.REGULAR_FILE + else: + return _FileType.SPECIAL_FILE -- cgit v1.2.1 From 4cc7eaf59bdeef5bb10694ec9a0a336105036fa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Sun, 24 Feb 2019 11:29:05 +0100 Subject: _filebaseddirectory.py: Support importing files from CAS --- buildstream/storage/_filebaseddirectory.py | 94 ++++++++++++++++++++++++++---- 1 file changed, 82 insertions(+), 12 deletions(-) diff --git a/buildstream/storage/_filebaseddirectory.py b/buildstream/storage/_filebaseddirectory.py index 67c0ec630..61827f19c 100644 --- a/buildstream/storage/_filebaseddirectory.py +++ b/buildstream/storage/_filebaseddirectory.py @@ -30,10 +30,12 @@ See also: :ref:`sandboxing`. import os import stat import time -from .directory import Directory, VirtualDirectoryError + +from .directory import Directory, VirtualDirectoryError, _FileType from .. import utils from ..utils import link_files, copy_files, list_relative_paths, _get_link_mtime, _magic_timestamp from ..utils import _set_deterministic_user, _set_deterministic_mtime +from ..utils import FileListResult # FileBasedDirectory intentionally doesn't call its superclass constuctor, # which is meant to be unimplemented. @@ -80,19 +82,31 @@ class FileBasedDirectory(Directory): can_link=False): """ See superclass Directory for arguments """ - if isinstance(external_pathspec, Directory): - source_directory = external_pathspec.external_directory - else: - source_directory = external_pathspec + from ._casbaseddirectory import CasBasedDirectory - if can_link and not update_mtime: - import_result = link_files(source_directory, self.external_directory, - filter_callback=filter_callback, - ignore_missing=False, report_written=report_written) + if isinstance(external_pathspec, CasBasedDirectory): + if can_link and not update_mtime: + actionfunc = utils.safe_link + else: + actionfunc = utils.safe_copy + + import_result = FileListResult() + self._import_files_from_cas(external_pathspec, actionfunc, filter_callback, result=import_result) else: - import_result = copy_files(source_directory, self.external_directory, - filter_callback=filter_callback, - ignore_missing=False, report_written=report_written) + if isinstance(external_pathspec, Directory): + source_directory = external_pathspec.external_directory + else: + source_directory = external_pathspec + + if can_link and not update_mtime: + import_result = link_files(source_directory, self.external_directory, + filter_callback=filter_callback, + ignore_missing=False, report_written=report_written) + else: + import_result = copy_files(source_directory, self.external_directory, + filter_callback=filter_callback, + ignore_missing=False, report_written=report_written) + if update_mtime: cur_time = time.time() @@ -205,3 +219,59 @@ class FileBasedDirectory(Directory): return _FileType.REGULAR_FILE else: return _FileType.SPECIAL_FILE + + def _import_files_from_cas(self, source_directory, actionfunc, filter_callback, *, path_prefix="", result): + """ Import files from a CAS-based directory. """ + + for name, entry in source_directory.index.items(): + # The destination filename, relative to the root where the import started + relative_pathname = os.path.join(path_prefix, name) + + # The full destination path + dest_path = os.path.join(self.external_directory, name) + + is_dir = entry.type == _FileType.DIRECTORY + + if is_dir: + src_subdir = source_directory.descend(name) + + try: + create_subdir = not os.path.lexists(dest_path) + dest_subdir = self.descend(name, create=create_subdir) + except VirtualDirectoryError: + filetype = self._get_filetype(name) + raise VirtualDirectoryError('Destination is a {}, not a directory: /{}' + .format(filetype, relative_pathname)) + + dest_subdir._import_files_from_cas(src_subdir, actionfunc, filter_callback, + path_prefix=relative_pathname, result=result) + + if filter_callback and not filter_callback(relative_pathname): + if is_dir and create_subdir and dest_subdir.is_empty(): + # Complete subdirectory has been filtered out, remove it + os.rmdir(dest_subdir.external_directory) + + # Entry filtered out, move to next + continue + + if not is_dir: + if os.path.lexists(dest_path): + # Collect overlaps + if not os.path.isdir(dest_path): + result.overwritten.append(relative_pathname) + + if not utils.safe_remove(dest_path): + result.ignored.append(relative_pathname) + continue + + item = entry.pb_object + if entry.type == _FileType.REGULAR_FILE: + src_path = source_directory.cas_cache.objpath(item.digest) + actionfunc(src_path, dest_path, result=result) + if item.is_executable: + os.chmod(dest_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | + stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) + else: + assert entry.type == _FileType.SYMLINK + os.symlink(item.target, dest_path) + result.files_written.append(relative_pathname) -- cgit v1.2.1