summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJürg Billeter <j@bitron.ch>2019-02-26 14:01:02 +0000
committerJürg Billeter <j@bitron.ch>2019-02-26 14:01:02 +0000
commit5a1a5814e77152bf2195f917147d5dc46f7201d7 (patch)
treed6ac7868be57ed368ed0aaae06b053472b5062aa
parentee9c1bf522d1f5ee05210c9267af82a9abc54806 (diff)
parentb04c1cbf222b70888bd55c4410fc80dcaff66b8a (diff)
downloadbuildstream-5a1a5814e77152bf2195f917147d5dc46f7201d7.tar.gz
Merge branch 'juerg/import-filter' into 'master'
Replace file lists with filter callback for file import See merge request BuildStream/buildstream!1182
-rw-r--r--buildstream/element.py129
-rw-r--r--buildstream/plugins/elements/compose.py5
-rw-r--r--buildstream/plugins/sources/local.py2
-rw-r--r--buildstream/storage/_casbaseddirectory.py21
-rw-r--r--buildstream/storage/_filebaseddirectory.py9
-rw-r--r--buildstream/storage/directory.py10
-rw-r--r--buildstream/utils.py54
7 files changed, 144 insertions, 86 deletions
diff --git a/buildstream/element.py b/buildstream/element.py
index cb252fe25..365931e27 100644
--- a/buildstream/element.py
+++ b/buildstream/element.py
@@ -80,6 +80,7 @@ from collections import OrderedDict
from collections.abc import Mapping
import contextlib
from contextlib import contextmanager
+from functools import partial
import tempfile
import shutil
import string
@@ -672,20 +673,29 @@ class Element(Plugin):
if path is None \
else vbasedir.descend(path.lstrip(os.sep).split(os.sep))
- files = list(self.__compute_splits(include, exclude, orphans))
+ split_filter = self.__split_filter_func(include, exclude, orphans)
# We must not hardlink files whose mtimes we want to update
if update_mtimes:
- link_files = [f for f in files if f not in update_mtimes]
- copy_files = [f for f in files if f in update_mtimes]
+ def link_filter(path):
+ return ((split_filter is None or split_filter(path)) and
+ path not in update_mtimes)
+
+ def copy_filter(path):
+ return ((split_filter is None or split_filter(path)) and
+ path in update_mtimes)
else:
- link_files = files
- copy_files = []
+ link_filter = split_filter
+
+ result = vstagedir.import_files(artifact, filter_callback=link_filter,
+ report_written=True, can_link=True)
- link_result = vstagedir.import_files(artifact, files=link_files, report_written=True, can_link=True)
- copy_result = vstagedir.import_files(artifact, files=copy_files, report_written=True, update_mtime=True)
+ if update_mtimes:
+ copy_result = vstagedir.import_files(artifact, filter_callback=copy_filter,
+ report_written=True, update_mtime=True)
+ result = result.combine(copy_result)
- return link_result.combine(copy_result)
+ return result
def stage_dependency_artifacts(self, sandbox, scope, *, path=None,
include=None, exclude=None, orphans=True):
@@ -2557,15 +2567,61 @@ class Element(Plugin):
for domain, rules in self.node_items(splits)
}
- def __compute_splits(self, include=None, exclude=None, orphans=True):
- artifact_base, _ = self.__extract()
- basedir = os.path.join(artifact_base, 'files')
+ # __split_filter():
+ #
+ # Returns True if the file with the specified `path` is included in the
+ # specified split domains. This is used by `__split_filter_func()` to create
+ # a filter callback.
+ #
+ # Args:
+ # element_domains (list): All domains for this element
+ # include (list): A list of domains to include files from
+ # exclude (list): A list of domains to exclude files from
+ # orphans (bool): Whether to include files not spoken for by split domains
+ # path (str): The relative path of the file
+ #
+ # Returns:
+ # (bool): Whether to include the specified file
+ #
+ def __split_filter(self, element_domains, include, exclude, orphans, path):
+ # Absolute path is required for matching
+ filename = os.path.join(os.sep, path)
+
+ include_file = False
+ exclude_file = False
+ claimed_file = False
+
+ for domain in element_domains:
+ if self.__splits[domain].match(filename):
+ claimed_file = True
+ if domain in include:
+ include_file = True
+ if domain in exclude:
+ exclude_file = True
+
+ if orphans and not claimed_file:
+ include_file = True
+
+ return include_file and not exclude_file
- # No splitting requested, just report complete artifact
+ # __split_filter_func():
+ #
+ # Returns callable split filter function for use with `copy_files()`,
+ # `link_files()` or `Directory.import_files()`.
+ #
+ # Args:
+ # include (list): An optional list of domains to include files from
+ # exclude (list): An optional list of domains to exclude files from
+ # orphans (bool): Whether to include files not spoken for by split domains
+ #
+ # Returns:
+ # (callable): Filter callback that returns True if the file is included
+ # in the specified split domains.
+ #
+ def __split_filter_func(self, include=None, exclude=None, orphans=True):
+ # No splitting requested, no filter needed
if orphans and not (include or exclude):
- for filename in utils.list_relative_paths(basedir):
- yield filename
- return
+ return None
if not self.__splits:
self.__init_splits()
@@ -2581,33 +2637,30 @@ class Element(Plugin):
include = [domain for domain in include if domain in element_domains]
exclude = [domain for domain in exclude if domain in element_domains]
+ # The arguments element_domains, include, exclude, and orphans are
+ # the same for all files. Use `partial` to create a function with
+ # the required callback signature: a single `path` parameter.
+ return partial(self.__split_filter, element_domains, include, exclude, orphans)
+
+ def __compute_splits(self, include=None, exclude=None, orphans=True):
+ filter_func = self.__split_filter_func(include=include, exclude=exclude, orphans=orphans)
+
+ artifact_base, _ = self.__extract()
+ basedir = os.path.join(artifact_base, 'files')
+
# FIXME: Instead of listing the paths in an extracted artifact,
# we should be using a manifest loaded from the artifact
# metadata.
#
- element_files = [
- os.path.join(os.sep, filename)
- for filename in utils.list_relative_paths(basedir)
- ]
-
- for filename in element_files:
- include_file = False
- exclude_file = False
- claimed_file = False
-
- for domain in element_domains:
- if self.__splits[domain].match(filename):
- claimed_file = True
- if domain in include:
- include_file = True
- if domain in exclude:
- exclude_file = True
-
- if orphans and not claimed_file:
- include_file = True
-
- if include_file and not exclude_file:
- yield filename.lstrip(os.sep)
+ element_files = utils.list_relative_paths(basedir)
+
+ if not filter_func:
+ # No splitting requested, just report complete artifact
+ yield from element_files
+ else:
+ for filename in element_files:
+ if filter_func(filename):
+ yield filename
def __file_is_whitelisted(self, path):
# Considered storing the whitelist regex for re-use, but public data
diff --git a/buildstream/plugins/elements/compose.py b/buildstream/plugins/elements/compose.py
index d61a324cc..12520ce4c 100644
--- a/buildstream/plugins/elements/compose.py
+++ b/buildstream/plugins/elements/compose.py
@@ -178,9 +178,12 @@ class ComposeElement(Element):
detail = "\n".join(lines)
+ def import_filter(path):
+ return path in manifest
+
with self.timed_activity("Creating composition", detail=detail, silent_nested=True):
self.info("Composing {} files".format(len(manifest)))
- installdir.import_files(vbasedir, files=manifest, can_link=True)
+ installdir.import_files(vbasedir, filter_callback=import_filter, can_link=True)
# And we're done
return os.path.join(os.sep, 'buildstream', 'install')
diff --git a/buildstream/plugins/sources/local.py b/buildstream/plugins/sources/local.py
index d4965cc9e..bdda91e2f 100644
--- a/buildstream/plugins/sources/local.py
+++ b/buildstream/plugins/sources/local.py
@@ -98,7 +98,7 @@ class LocalSource(Source):
if os.path.isdir(self.fullpath):
files = list(utils.list_relative_paths(self.fullpath))
- utils.copy_files(self.fullpath, directory, files=files)
+ utils.copy_files(self.fullpath, directory)
else:
destfile = os.path.join(directory, os.path.basename(self.path))
files = [os.path.basename(self.path)]
diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index 3477eeb3a..537feade2 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -410,7 +410,8 @@ class CasBasedDirectory(Directory):
result.ignored.append(os.path.join(path_prefix, f))
return result
- def import_files(self, external_pathspec, *, files=None,
+ def import_files(self, external_pathspec, *,
+ filter_callback=None,
report_written=True, update_mtime=False,
can_link=False):
"""Imports some or all files from external_path into this directory.
@@ -419,10 +420,6 @@ class CasBasedDirectory(Directory):
containing a pathname, or a Directory object, to use as the
source.
- files (list of strings): A list of all the files relative to
- the external_pathspec to copy. If 'None' is supplied, all
- files are copied.
-
report_written (bool): Return the full list of files
written. Defaults to true. If false, only a list of
overwritten files is returned.
@@ -432,12 +429,14 @@ class CasBasedDirectory(Directory):
can_link (bool): Ignored, since hard links do not have any meaning within CAS.
"""
- if files is None:
- if isinstance(external_pathspec, str):
- files = list_relative_paths(external_pathspec)
- else:
- assert isinstance(external_pathspec, Directory)
- files = external_pathspec.list_relative_paths()
+ if isinstance(external_pathspec, str):
+ files = list_relative_paths(external_pathspec)
+ else:
+ assert isinstance(external_pathspec, Directory)
+ files = external_pathspec.list_relative_paths()
+
+ if filter_callback:
+ files = [path for path in files if filter_callback(path)]
if isinstance(external_pathspec, FileBasedDirectory):
source_directory = external_pathspec._get_underlying_directory()
diff --git a/buildstream/storage/_filebaseddirectory.py b/buildstream/storage/_filebaseddirectory.py
index 2872812b6..dfb310ae1 100644
--- a/buildstream/storage/_filebaseddirectory.py
+++ b/buildstream/storage/_filebaseddirectory.py
@@ -74,7 +74,8 @@ class FileBasedDirectory(Directory):
return FileBasedDirectory(new_path).descend(subdirectory_spec[1:], create)
- def import_files(self, external_pathspec, *, files=None,
+ def import_files(self, external_pathspec, *,
+ filter_callback=None,
report_written=True, update_mtime=False,
can_link=False):
""" See superclass Directory for arguments """
@@ -85,10 +86,12 @@ class FileBasedDirectory(Directory):
source_directory = external_pathspec
if can_link and not update_mtime:
- import_result = link_files(source_directory, self.external_directory, files=files,
+ import_result = link_files(source_directory, self.external_directory,
+ filter_callback=filter_callback,
ignore_missing=False, report_written=report_written)
else:
- import_result = copy_files(source_directory, self.external_directory, files=files,
+ import_result = copy_files(source_directory, self.external_directory,
+ filter_callback=filter_callback,
ignore_missing=False, report_written=report_written)
if update_mtime:
cur_time = time.time()
diff --git a/buildstream/storage/directory.py b/buildstream/storage/directory.py
index 9022b8770..70054f78c 100644
--- a/buildstream/storage/directory.py
+++ b/buildstream/storage/directory.py
@@ -73,7 +73,8 @@ class Directory():
raise NotImplementedError()
# Import and export of files and links
- def import_files(self, external_pathspec, *, files=None,
+ def import_files(self, external_pathspec, *,
+ filter_callback=None,
report_written=True, update_mtime=False,
can_link=False):
"""Imports some or all files from external_path into this directory.
@@ -81,9 +82,10 @@ class Directory():
Args:
external_pathspec: Either a string containing a pathname, or a
Directory object, to use as the source.
- files (list of str): A list of all the files relative to
- the external_pathspec to copy. If 'None' is supplied, all
- files are copied.
+ filter_callback (callable): Optional filter callback. Called with the
+ relative path as argument for every file in the source directory.
+ The file is imported only if the callable returns True.
+ If no filter callback is specified, all files will be imported.
report_written (bool): Return the full list of files
written. Defaults to true. If false, only a list of
overwritten files is returned.
diff --git a/buildstream/utils.py b/buildstream/utils.py
index 204afca0c..844153706 100644
--- a/buildstream/utils.py
+++ b/buildstream/utils.py
@@ -352,13 +352,16 @@ def safe_remove(path):
.format(path, e))
-def copy_files(src, dest, *, files=None, ignore_missing=False, report_written=False):
+def copy_files(src, dest, *, filter_callback=None, ignore_missing=False, report_written=False):
"""Copy files from source to destination.
Args:
src (str): The source file or directory
dest (str): The destination directory
- files (list): Optional list of files in `src` to copy
+ filter_callback (callable): Optional filter callback. Called with the relative path as
+ argument for every file in the source directory. The file is
+ copied only if the callable returns True. If no filter callback
+ is specified, all files will be copied.
ignore_missing (bool): Dont raise any error if a source file is missing
report_written (bool): Add to the result object the full list of files written
@@ -376,28 +379,28 @@ def copy_files(src, dest, *, files=None, ignore_missing=False, report_written=Fa
UNIX domain socket files from `src` are ignored.
"""
- presorted = False
- if files is None:
- files = list_relative_paths(src)
- presorted = True
-
result = FileListResult()
try:
- _process_list(src, dest, files, safe_copy, result, ignore_missing=ignore_missing,
- report_written=report_written, presorted=presorted)
+ _process_list(src, dest, safe_copy, result,
+ filter_callback=filter_callback,
+ ignore_missing=ignore_missing,
+ report_written=report_written)
except OSError as e:
raise UtilError("Failed to copy '{} -> {}': {}"
.format(src, dest, e))
return result
-def link_files(src, dest, *, files=None, ignore_missing=False, report_written=False):
+def link_files(src, dest, *, filter_callback=None, ignore_missing=False, report_written=False):
"""Hardlink files from source to destination.
Args:
src (str): The source file or directory
dest (str): The destination directory
- files (list): Optional list of files in `src` to link
+ filter_callback (callable): Optional filter callback. Called with the relative path as
+ argument for every file in the source directory. The file is
+ hardlinked only if the callable returns True. If no filter
+ callback is specified, all files will be hardlinked.
ignore_missing (bool): Dont raise any error if a source file is missing
report_written (bool): Add to the result object the full list of files written
@@ -420,15 +423,12 @@ def link_files(src, dest, *, files=None, ignore_missing=False, report_written=Fa
UNIX domain socket files from `src` are ignored.
"""
- presorted = False
- if files is None:
- files = list_relative_paths(src)
- presorted = True
-
result = FileListResult()
try:
- _process_list(src, dest, files, safe_link, result, ignore_missing=ignore_missing,
- report_written=report_written, presorted=presorted)
+ _process_list(src, dest, safe_link, result,
+ filter_callback=filter_callback,
+ ignore_missing=ignore_missing,
+ report_written=report_written)
except OSError as e:
raise UtilError("Failed to link '{} -> {}': {}"
.format(src, dest, e))
@@ -807,26 +807,24 @@ def _ensure_real_directory(root, path):
# Args:
# srcdir: The source base directory
# destdir: The destination base directory
-# filelist: List of relative file paths
# actionfunc: The function to call for regular files
# result: The FileListResult
+# filter_callback: Optional callback to invoke for every directory entry
# ignore_missing: Dont raise any error if a source file is missing
-# presorted: Whether the passed list is known to be presorted
#
#
-def _process_list(srcdir, destdir, filelist, actionfunc, result,
- ignore_missing=False, report_written=False,
- presorted=False):
+def _process_list(srcdir, destdir, actionfunc, result,
+ filter_callback=None,
+ ignore_missing=False, report_written=False):
# Keep track of directory permissions, since these need to be set
# *after* files have been written.
permissions = []
- # Sorting the list of files is necessary to ensure that we processes
- # symbolic links which lead to directories before processing files inside
- # those directories.
- if not presorted:
- filelist = sorted(filelist)
+ filelist = list_relative_paths(srcdir)
+
+ if filter_callback:
+ filelist = [path for path in filelist if filter_callback(path)]
# Now walk the list
for path in filelist: