diff options
author | Tristan van Berkom <tristan@codethink.co.uk> | 2020-09-23 16:44:36 +0900 |
---|---|---|
committer | Tristan van Berkom <tristan@codethink.co.uk> | 2020-09-24 18:09:58 +0900 |
commit | b583317c7954d29fce60728486537e945e0d9c28 (patch) | |
tree | 03e6792ee6357e2fbebcb994d4a529f4b54b85bd /src/buildstream/plugins | |
parent | ea753a4ee9dc12d37ce86d50f4c01ef9fcb2d874 (diff) | |
download | buildstream-b583317c7954d29fce60728486537e945e0d9c28.tar.gz |
source.py: Remove BST_KEY_REQUIRES_STAGE
Refactored this to remove unneeded complexity in the code base,
as described here:
https://lists.apache.org/thread.html/r4b9517742433f07c79379ba5b67932cfe997c1e64965a9f1a2b613fc%40%3Cdev.buildstream.apache.org%3E
Changes:
* source.py: Added private Source._cache_directory() context manager
We also move the assertion about nodes which are safe to write to
a bit lower in Source._set_ref(), as this was unnecessarily early.
When tracking a workspace, the ref will be none and will turn out
to be none afterwards, it is not a problem that a workspace's node
is a synthetic one, as tracking will never affect it.
* local plugin: Implement get_unique_key() and stage() using
the new context manager in order to optimize staging and
cache key calculations here.
* workspace plugin: Implement get_unique_key() and stage() using
the new context manager in order to optimize staging and
cache key calculations here.
* trackqueue.py: No special casing with Source._is_trackable()
Diffstat (limited to 'src/buildstream/plugins')
-rw-r--r-- | src/buildstream/plugins/sources/local.py | 57 | ||||
-rw-r--r-- | src/buildstream/plugins/sources/workspace.py | 53 |
2 files changed, 87 insertions, 23 deletions
diff --git a/src/buildstream/plugins/sources/local.py b/src/buildstream/plugins/sources/local.py index ffcae4993..15962b4eb 100644 --- a/src/buildstream/plugins/sources/local.py +++ b/src/buildstream/plugins/sources/local.py @@ -37,19 +37,16 @@ details on common configuration options for sources. """ import os -from buildstream.storage.directory import Directory -from buildstream import Source, SourceError +from buildstream import Source, SourceError, Directory class LocalSource(Source): # pylint: disable=attribute-defined-outside-init BST_MIN_VERSION = "2.0" - BST_STAGE_VIRTUAL_DIRECTORY = True - BST_KEY_REQUIRES_STAGE = True - __unique_key = None + __digest = None def configure(self, node): node.validate_keys(["path", *Source.COMMON_CONFIG_KEYS]) @@ -65,6 +62,21 @@ class LocalSource(Source): def is_cached(self): return True + def get_unique_key(self): + # + # As a core plugin, we use some private API to optimize file hashing. + # + # * Use Source._cache_directory() to prepare a Directory + # * Do the regular staging activity into the Directory + # * Use the hash of the cached digest as the unique key + # + if not self.__digest: + with self._cache_directory() as directory: + self.__do_stage(directory) + self.__digest = directory._get_digest() + + return self.__digest.hash + # We dont have a ref, we're a local file... def load_ref(self, node): pass @@ -80,8 +92,38 @@ class LocalSource(Source): pass # pragma: nocover def stage(self, directory): - # directory should always be a Directory object + # + # We've already prepared the CAS while resolving the cache key which + # will happen before staging. + # + # Now just retrieve the previously cached content to stage. + # assert isinstance(directory, Directory) + assert self.__digest is not None + with self._cache_directory(digest=self.__digest) as cached_directory: + directory.import_files(cached_directory) + + def init_workspace(self, directory): + # + # FIXME: We should be able to stage the workspace from the content + # cached in CAS instead of reimporting from the filesystem + # to the new workspace directory with this special case, but + # for some reason the writable bits are getting lost on regular + # files through the transition. + # + self.__do_stage(directory) + + # As a core element, we speed up some scenarios when this is used for + # a junction, by providing the local path to this content directly. + # + def _get_local_path(self): + return self.fullpath + + # Staging is implemented internally, we preemptively put it in the CAS + # as a side effect of resolving the cache key, at stage time we just + # do an internal CAS stage. + # + def __do_stage(self, directory): with self.timed_activity("Staging local files into CAS"): if os.path.isdir(self.fullpath) and not os.path.islink(self.fullpath): result = directory.import_files(self.fullpath) @@ -93,9 +135,6 @@ class LocalSource(Source): "Failed to stage source: files clash with existing directory", reason="ensure-stage-dir-fail" ) - def _get_local_path(self): - return self.fullpath - # Plugin entry point def setup(): diff --git a/src/buildstream/plugins/sources/workspace.py b/src/buildstream/plugins/sources/workspace.py index 44d0889b3..df24abb91 100644 --- a/src/buildstream/plugins/sources/workspace.py +++ b/src/buildstream/plugins/sources/workspace.py @@ -37,30 +37,21 @@ workspace. The node constructed would be specified as follows: import os -from buildstream.storage.directory import Directory -from buildstream import Source, SourceError +from buildstream import Source, SourceError, Directory, MappingNode from buildstream.types import SourceRef -from buildstream.node import MappingNode class WorkspaceSource(Source): # pylint: disable=attribute-defined-outside-init BST_MIN_VERSION = "2.0" - BST_STAGE_VIRTUAL_DIRECTORY = True - BST_KEY_REQUIRES_STAGE = True - # Cached unique key - __unique_key = None # the digest of the Directory following the import of the workspace __digest = None # the cache key of the last workspace build __last_build = None - def track(self) -> SourceRef: # pylint: disable=arguments-differ - return None - def configure(self, node: MappingNode) -> None: node.validate_keys(["path", "last_build", "kind"]) self.path = node.get_str("path") @@ -75,6 +66,21 @@ class WorkspaceSource(Source): def is_resolved(self): return os.path.exists(self._get_local_path()) + def get_unique_key(self): + # + # As a core plugin, we use some private API to optimize file hashing. + # + # * Use Source._cache_directory() to prepare a Directory + # * Do the regular staging activity into the Directory + # * Use the hash of the cached digest as the unique key + # + if not self.__digest: + with self._cache_directory() as directory: + self.__do_stage(directory) + self.__digest = directory._get_digest() + + return self.__digest.hash + def get_ref(self) -> None: return None @@ -93,7 +99,29 @@ class WorkspaceSource(Source): def fetch(self) -> None: # pylint: disable=arguments-differ pass # pragma: nocover - def stage(self, directory: Directory) -> None: + def stage(self, directory): + # + # We've already prepared the CAS while resolving the cache key which + # will happen before staging. + # + # Now just retrieve the previously cached content to stage. + # + assert isinstance(directory, Directory) + assert self.__digest is not None + with self._cache_directory(digest=self.__digest) as cached_directory: + directory.import_files(cached_directory) + + # As a core element, we speed up some scenarios when this is used for + # a junction, by providing the local path to this content directly. + # + def _get_local_path(self) -> str: + return self.path + + # Staging is implemented internally, we preemptively put it in the CAS + # as a side effect of resolving the cache key, at stage time we just + # do an internal CAS stage. + # + def __do_stage(self, directory: Directory) -> None: assert isinstance(directory, Directory) with self.timed_activity("Staging local files"): result = directory.import_files(self.path, properties=["mtime"]) @@ -103,9 +131,6 @@ class WorkspaceSource(Source): "Failed to stage source: files clash with existing directory", reason="ensure-stage-dir-fail" ) - def _get_local_path(self) -> str: - return self.path - # Plugin entry point def setup() -> WorkspaceSource: |