diff options
author | Jürg Billeter <j@bitron.ch> | 2019-02-26 14:01:02 +0000 |
---|---|---|
committer | Jürg Billeter <j@bitron.ch> | 2019-02-26 14:01:02 +0000 |
commit | 5a1a5814e77152bf2195f917147d5dc46f7201d7 (patch) | |
tree | d6ac7868be57ed368ed0aaae06b053472b5062aa | |
parent | ee9c1bf522d1f5ee05210c9267af82a9abc54806 (diff) | |
parent | b04c1cbf222b70888bd55c4410fc80dcaff66b8a (diff) | |
download | buildstream-5a1a5814e77152bf2195f917147d5dc46f7201d7.tar.gz |
Merge branch 'juerg/import-filter' into 'master'
Replace file lists with filter callback for file import
See merge request BuildStream/buildstream!1182
-rw-r--r-- | buildstream/element.py | 129 | ||||
-rw-r--r-- | buildstream/plugins/elements/compose.py | 5 | ||||
-rw-r--r-- | buildstream/plugins/sources/local.py | 2 | ||||
-rw-r--r-- | buildstream/storage/_casbaseddirectory.py | 21 | ||||
-rw-r--r-- | buildstream/storage/_filebaseddirectory.py | 9 | ||||
-rw-r--r-- | buildstream/storage/directory.py | 10 | ||||
-rw-r--r-- | buildstream/utils.py | 54 |
7 files changed, 144 insertions, 86 deletions
diff --git a/buildstream/element.py b/buildstream/element.py index cb252fe25..365931e27 100644 --- a/buildstream/element.py +++ b/buildstream/element.py @@ -80,6 +80,7 @@ from collections import OrderedDict from collections.abc import Mapping import contextlib from contextlib import contextmanager +from functools import partial import tempfile import shutil import string @@ -672,20 +673,29 @@ class Element(Plugin): if path is None \ else vbasedir.descend(path.lstrip(os.sep).split(os.sep)) - files = list(self.__compute_splits(include, exclude, orphans)) + split_filter = self.__split_filter_func(include, exclude, orphans) # We must not hardlink files whose mtimes we want to update if update_mtimes: - link_files = [f for f in files if f not in update_mtimes] - copy_files = [f for f in files if f in update_mtimes] + def link_filter(path): + return ((split_filter is None or split_filter(path)) and + path not in update_mtimes) + + def copy_filter(path): + return ((split_filter is None or split_filter(path)) and + path in update_mtimes) else: - link_files = files - copy_files = [] + link_filter = split_filter + + result = vstagedir.import_files(artifact, filter_callback=link_filter, + report_written=True, can_link=True) - link_result = vstagedir.import_files(artifact, files=link_files, report_written=True, can_link=True) - copy_result = vstagedir.import_files(artifact, files=copy_files, report_written=True, update_mtime=True) + if update_mtimes: + copy_result = vstagedir.import_files(artifact, filter_callback=copy_filter, + report_written=True, update_mtime=True) + result = result.combine(copy_result) - return link_result.combine(copy_result) + return result def stage_dependency_artifacts(self, sandbox, scope, *, path=None, include=None, exclude=None, orphans=True): @@ -2557,15 +2567,61 @@ class Element(Plugin): for domain, rules in self.node_items(splits) } - def __compute_splits(self, include=None, exclude=None, orphans=True): - artifact_base, _ = self.__extract() - basedir = os.path.join(artifact_base, 'files') + # __split_filter(): + # + # Returns True if the file with the specified `path` is included in the + # specified split domains. This is used by `__split_filter_func()` to create + # a filter callback. + # + # Args: + # element_domains (list): All domains for this element + # include (list): A list of domains to include files from + # exclude (list): A list of domains to exclude files from + # orphans (bool): Whether to include files not spoken for by split domains + # path (str): The relative path of the file + # + # Returns: + # (bool): Whether to include the specified file + # + def __split_filter(self, element_domains, include, exclude, orphans, path): + # Absolute path is required for matching + filename = os.path.join(os.sep, path) + + include_file = False + exclude_file = False + claimed_file = False + + for domain in element_domains: + if self.__splits[domain].match(filename): + claimed_file = True + if domain in include: + include_file = True + if domain in exclude: + exclude_file = True + + if orphans and not claimed_file: + include_file = True + + return include_file and not exclude_file - # No splitting requested, just report complete artifact + # __split_filter_func(): + # + # Returns callable split filter function for use with `copy_files()`, + # `link_files()` or `Directory.import_files()`. + # + # Args: + # include (list): An optional list of domains to include files from + # exclude (list): An optional list of domains to exclude files from + # orphans (bool): Whether to include files not spoken for by split domains + # + # Returns: + # (callable): Filter callback that returns True if the file is included + # in the specified split domains. + # + def __split_filter_func(self, include=None, exclude=None, orphans=True): + # No splitting requested, no filter needed if orphans and not (include or exclude): - for filename in utils.list_relative_paths(basedir): - yield filename - return + return None if not self.__splits: self.__init_splits() @@ -2581,33 +2637,30 @@ class Element(Plugin): include = [domain for domain in include if domain in element_domains] exclude = [domain for domain in exclude if domain in element_domains] + # The arguments element_domains, include, exclude, and orphans are + # the same for all files. Use `partial` to create a function with + # the required callback signature: a single `path` parameter. + return partial(self.__split_filter, element_domains, include, exclude, orphans) + + def __compute_splits(self, include=None, exclude=None, orphans=True): + filter_func = self.__split_filter_func(include=include, exclude=exclude, orphans=orphans) + + artifact_base, _ = self.__extract() + basedir = os.path.join(artifact_base, 'files') + # FIXME: Instead of listing the paths in an extracted artifact, # we should be using a manifest loaded from the artifact # metadata. # - element_files = [ - os.path.join(os.sep, filename) - for filename in utils.list_relative_paths(basedir) - ] - - for filename in element_files: - include_file = False - exclude_file = False - claimed_file = False - - for domain in element_domains: - if self.__splits[domain].match(filename): - claimed_file = True - if domain in include: - include_file = True - if domain in exclude: - exclude_file = True - - if orphans and not claimed_file: - include_file = True - - if include_file and not exclude_file: - yield filename.lstrip(os.sep) + element_files = utils.list_relative_paths(basedir) + + if not filter_func: + # No splitting requested, just report complete artifact + yield from element_files + else: + for filename in element_files: + if filter_func(filename): + yield filename def __file_is_whitelisted(self, path): # Considered storing the whitelist regex for re-use, but public data diff --git a/buildstream/plugins/elements/compose.py b/buildstream/plugins/elements/compose.py index d61a324cc..12520ce4c 100644 --- a/buildstream/plugins/elements/compose.py +++ b/buildstream/plugins/elements/compose.py @@ -178,9 +178,12 @@ class ComposeElement(Element): detail = "\n".join(lines) + def import_filter(path): + return path in manifest + with self.timed_activity("Creating composition", detail=detail, silent_nested=True): self.info("Composing {} files".format(len(manifest))) - installdir.import_files(vbasedir, files=manifest, can_link=True) + installdir.import_files(vbasedir, filter_callback=import_filter, can_link=True) # And we're done return os.path.join(os.sep, 'buildstream', 'install') diff --git a/buildstream/plugins/sources/local.py b/buildstream/plugins/sources/local.py index d4965cc9e..bdda91e2f 100644 --- a/buildstream/plugins/sources/local.py +++ b/buildstream/plugins/sources/local.py @@ -98,7 +98,7 @@ class LocalSource(Source): if os.path.isdir(self.fullpath): files = list(utils.list_relative_paths(self.fullpath)) - utils.copy_files(self.fullpath, directory, files=files) + utils.copy_files(self.fullpath, directory) else: destfile = os.path.join(directory, os.path.basename(self.path)) files = [os.path.basename(self.path)] diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py index 3477eeb3a..537feade2 100644 --- a/buildstream/storage/_casbaseddirectory.py +++ b/buildstream/storage/_casbaseddirectory.py @@ -410,7 +410,8 @@ class CasBasedDirectory(Directory): result.ignored.append(os.path.join(path_prefix, f)) return result - def import_files(self, external_pathspec, *, files=None, + def import_files(self, external_pathspec, *, + filter_callback=None, report_written=True, update_mtime=False, can_link=False): """Imports some or all files from external_path into this directory. @@ -419,10 +420,6 @@ class CasBasedDirectory(Directory): containing a pathname, or a Directory object, to use as the source. - files (list of strings): A list of all the files relative to - the external_pathspec to copy. If 'None' is supplied, all - files are copied. - report_written (bool): Return the full list of files written. Defaults to true. If false, only a list of overwritten files is returned. @@ -432,12 +429,14 @@ class CasBasedDirectory(Directory): can_link (bool): Ignored, since hard links do not have any meaning within CAS. """ - if files is None: - if isinstance(external_pathspec, str): - files = list_relative_paths(external_pathspec) - else: - assert isinstance(external_pathspec, Directory) - files = external_pathspec.list_relative_paths() + if isinstance(external_pathspec, str): + files = list_relative_paths(external_pathspec) + else: + assert isinstance(external_pathspec, Directory) + files = external_pathspec.list_relative_paths() + + if filter_callback: + files = [path for path in files if filter_callback(path)] if isinstance(external_pathspec, FileBasedDirectory): source_directory = external_pathspec._get_underlying_directory() diff --git a/buildstream/storage/_filebaseddirectory.py b/buildstream/storage/_filebaseddirectory.py index 2872812b6..dfb310ae1 100644 --- a/buildstream/storage/_filebaseddirectory.py +++ b/buildstream/storage/_filebaseddirectory.py @@ -74,7 +74,8 @@ class FileBasedDirectory(Directory): return FileBasedDirectory(new_path).descend(subdirectory_spec[1:], create) - def import_files(self, external_pathspec, *, files=None, + def import_files(self, external_pathspec, *, + filter_callback=None, report_written=True, update_mtime=False, can_link=False): """ See superclass Directory for arguments """ @@ -85,10 +86,12 @@ class FileBasedDirectory(Directory): source_directory = external_pathspec if can_link and not update_mtime: - import_result = link_files(source_directory, self.external_directory, files=files, + import_result = link_files(source_directory, self.external_directory, + filter_callback=filter_callback, ignore_missing=False, report_written=report_written) else: - import_result = copy_files(source_directory, self.external_directory, files=files, + import_result = copy_files(source_directory, self.external_directory, + filter_callback=filter_callback, ignore_missing=False, report_written=report_written) if update_mtime: cur_time = time.time() diff --git a/buildstream/storage/directory.py b/buildstream/storage/directory.py index 9022b8770..70054f78c 100644 --- a/buildstream/storage/directory.py +++ b/buildstream/storage/directory.py @@ -73,7 +73,8 @@ class Directory(): raise NotImplementedError() # Import and export of files and links - def import_files(self, external_pathspec, *, files=None, + def import_files(self, external_pathspec, *, + filter_callback=None, report_written=True, update_mtime=False, can_link=False): """Imports some or all files from external_path into this directory. @@ -81,9 +82,10 @@ class Directory(): Args: external_pathspec: Either a string containing a pathname, or a Directory object, to use as the source. - files (list of str): A list of all the files relative to - the external_pathspec to copy. If 'None' is supplied, all - files are copied. + filter_callback (callable): Optional filter callback. Called with the + relative path as argument for every file in the source directory. + The file is imported only if the callable returns True. + If no filter callback is specified, all files will be imported. report_written (bool): Return the full list of files written. Defaults to true. If false, only a list of overwritten files is returned. diff --git a/buildstream/utils.py b/buildstream/utils.py index 204afca0c..844153706 100644 --- a/buildstream/utils.py +++ b/buildstream/utils.py @@ -352,13 +352,16 @@ def safe_remove(path): .format(path, e)) -def copy_files(src, dest, *, files=None, ignore_missing=False, report_written=False): +def copy_files(src, dest, *, filter_callback=None, ignore_missing=False, report_written=False): """Copy files from source to destination. Args: src (str): The source file or directory dest (str): The destination directory - files (list): Optional list of files in `src` to copy + filter_callback (callable): Optional filter callback. Called with the relative path as + argument for every file in the source directory. The file is + copied only if the callable returns True. If no filter callback + is specified, all files will be copied. ignore_missing (bool): Dont raise any error if a source file is missing report_written (bool): Add to the result object the full list of files written @@ -376,28 +379,28 @@ def copy_files(src, dest, *, files=None, ignore_missing=False, report_written=Fa UNIX domain socket files from `src` are ignored. """ - presorted = False - if files is None: - files = list_relative_paths(src) - presorted = True - result = FileListResult() try: - _process_list(src, dest, files, safe_copy, result, ignore_missing=ignore_missing, - report_written=report_written, presorted=presorted) + _process_list(src, dest, safe_copy, result, + filter_callback=filter_callback, + ignore_missing=ignore_missing, + report_written=report_written) except OSError as e: raise UtilError("Failed to copy '{} -> {}': {}" .format(src, dest, e)) return result -def link_files(src, dest, *, files=None, ignore_missing=False, report_written=False): +def link_files(src, dest, *, filter_callback=None, ignore_missing=False, report_written=False): """Hardlink files from source to destination. Args: src (str): The source file or directory dest (str): The destination directory - files (list): Optional list of files in `src` to link + filter_callback (callable): Optional filter callback. Called with the relative path as + argument for every file in the source directory. The file is + hardlinked only if the callable returns True. If no filter + callback is specified, all files will be hardlinked. ignore_missing (bool): Dont raise any error if a source file is missing report_written (bool): Add to the result object the full list of files written @@ -420,15 +423,12 @@ def link_files(src, dest, *, files=None, ignore_missing=False, report_written=Fa UNIX domain socket files from `src` are ignored. """ - presorted = False - if files is None: - files = list_relative_paths(src) - presorted = True - result = FileListResult() try: - _process_list(src, dest, files, safe_link, result, ignore_missing=ignore_missing, - report_written=report_written, presorted=presorted) + _process_list(src, dest, safe_link, result, + filter_callback=filter_callback, + ignore_missing=ignore_missing, + report_written=report_written) except OSError as e: raise UtilError("Failed to link '{} -> {}': {}" .format(src, dest, e)) @@ -807,26 +807,24 @@ def _ensure_real_directory(root, path): # Args: # srcdir: The source base directory # destdir: The destination base directory -# filelist: List of relative file paths # actionfunc: The function to call for regular files # result: The FileListResult +# filter_callback: Optional callback to invoke for every directory entry # ignore_missing: Dont raise any error if a source file is missing -# presorted: Whether the passed list is known to be presorted # # -def _process_list(srcdir, destdir, filelist, actionfunc, result, - ignore_missing=False, report_written=False, - presorted=False): +def _process_list(srcdir, destdir, actionfunc, result, + filter_callback=None, + ignore_missing=False, report_written=False): # Keep track of directory permissions, since these need to be set # *after* files have been written. permissions = [] - # Sorting the list of files is necessary to ensure that we processes - # symbolic links which lead to directories before processing files inside - # those directories. - if not presorted: - filelist = sorted(filelist) + filelist = list_relative_paths(srcdir) + + if filter_callback: + filelist = [path for path in filelist if filter_callback(path)] # Now walk the list for path in filelist: |