diff options
author | Tristan van Berkom <tristan@codethink.co.uk> | 2020-12-01 20:37:17 +0900 |
---|---|---|
committer | Tristan van Berkom <tristan@codethink.co.uk> | 2020-12-07 17:51:46 +0900 |
commit | 965da0ad04af8649f48cfeffd90174e147052494 (patch) | |
tree | 411fc37d0f8aa5b8a2fa92ee2e170d0496c301d6 /src | |
parent | 548b23d767e11f4ea49f4934a5d9d99ce41342f3 (diff) | |
download | buildstream-965da0ad04af8649f48cfeffd90174e147052494.tar.gz |
_artifact.py: Store additional metadata on the artifact
This commit enriches the metadata we store on artifacts in the
new detatched low/high diversity metadata files:
* The SandboxConfig is now stored in the artifact, allowing
one to perform activities such as launching sandboxes on
artifacts downloaded via artifact name (without backing
project data).
* The environment variables is now stored in the artifact,
similarly allowing one to shell into a downloaded artifacts
which are unrelated to a loaded project.
* The element variables are now stored in the artifact, allowing
more flexibility in what the core can do with a downloaded
ArtifactElement
* The element's strict key
All of these of course can additionally enhance traceability
in the UI with commands such as `bst artifact show`.
Summary of changes:
* _artifact.py:
- Store new data in the new proto digests.
- Added new accessors to extract these new aspects from loaded artifacts.
- Bump the proto version number for compatibility
* _artifactcache.py: Adjusted to push and pull the new blobs and digests.
* element.py:
- Call Artifact.cache() with new parameters
- Expect the strict key from Artifact.get_meta_keys()
- Always specify the strict key when constructing an Artifact
instance which will later be used to cache the artifact
(i.e. the self.__artifact Artifact).
* _versions.py: Bump the global artifact version number, as this breaks
the artifact format.
* tests/cachekey: Updated cache key test for new keys.
Diffstat (limited to 'src')
-rw-r--r-- | src/buildstream/_artifact.py | 128 | ||||
-rw-r--r-- | src/buildstream/_artifactcache.py | 8 | ||||
-rw-r--r-- | src/buildstream/_versions.py | 2 | ||||
-rw-r--r-- | src/buildstream/element.py | 25 |
4 files changed, 145 insertions, 18 deletions
diff --git a/src/buildstream/_artifact.py b/src/buildstream/_artifact.py index c110e57f0..1cea03092 100644 --- a/src/buildstream/_artifact.py +++ b/src/buildstream/_artifact.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2019 Codethink Limited +# Copyright (C) 2020 Codethink Limited # Copyright (C) 2019 Bloomberg Finance LP # # This program is free software; you can redistribute it and/or @@ -29,13 +29,16 @@ artifact composite interaction away from Element class """ import os +from typing import Dict, Tuple from ._protos.buildstream.v2.artifact_pb2 import Artifact as ArtifactProto from . import _yaml from . import utils +from .node import Node from .types import _Scope from .storage._casbaseddirectory import CasBasedDirectory - +from .sandbox._config import SandboxConfig +from ._variables import Variables # An Artifact class to abstract artifact operations # from the Element class @@ -44,23 +47,25 @@ from .storage._casbaseddirectory import CasBasedDirectory # element (Element): The Element object # context (Context): The BuildStream context # strong_key (str): The elements strong cache key, dependent on context +# strict_key (str): The elements strict cache key # weak_key (str): The elements weak cache key # class Artifact: - version = 0 + version = 1 - def __init__(self, element, context, *, strong_key=None, weak_key=None): + def __init__(self, element, context, *, strong_key=None, strict_key=None, weak_key=None): self._element = element self._context = context self._cache_key = strong_key + self._strict_key = strict_key self._weak_cache_key = weak_key self._artifactdir = context.artifactdir self._cas = context.get_cascache() self._tmpdir = context.tmpdir self._proto = None - self._metadata_keys = None # Strong and weak key tuple extracted from the artifact + self._metadata_keys = None # Strong, strict and weak key tuple extracted from the artifact self._metadata_dependencies = None # Dictionary of dependency strong keys from the artifact self._metadata_workspaced = None # Boolean of whether it's a workspaced artifact self._metadata_workspaced_dependencies = None # List of which dependencies are workspaced from the artifact @@ -137,11 +142,25 @@ class Artifact: # sourcesvdir (Directory): Virtual Directoy object for the staged sources # buildresult (tuple): bool, short desc and detailed desc of result # publicdata (dict): dict of public data to commit to artifact metadata + # variables (Variables): The element's Variables + # environment (dict): dict of the element's environment variables + # sandboxconfig (SandboxConfig): The element's SandboxConfig # # Returns: # (int): The size of the newly cached artifact # - def cache(self, sandbox_build_dir, collectvdir, sourcesvdir, buildresult, publicdata): + def cache( + self, + *, + sandbox_build_dir, + collectvdir, + sourcesvdir, + buildresult, + publicdata, + variables, + environment, + sandboxconfig, + ): context = self._context element = self._element @@ -161,6 +180,7 @@ class Artifact: # Store keys artifact.strong_key = self._cache_key + artifact.strict_key = self._strict_key artifact.weak_key = self._weak_cache_key artifact.was_workspaced = bool(element._get_workspace()) @@ -180,6 +200,34 @@ class Artifact: artifact.public_data.CopyFrom(public_data_digest) size += public_data_digest.size_bytes + # Store low diversity metadata, this metadata must have a high + # probability of deduplication, such as environment variables + # and SandboxConfig. + # + with utils._tempnamedfile_name(dir=self._tmpdir) as tmpname: + sandbox_dict = sandboxconfig.to_dict() + low_diversity_dict = {"environment": environment, "sandbox-config": sandbox_dict} + low_diversity_node = Node.from_dict(low_diversity_dict) + + _yaml.roundtrip_dump(low_diversity_node, tmpname) + low_diversity_meta_digest = self._cas.add_object(path=tmpname, link_directly=True) + artifact.low_diversity_meta.CopyFrom(low_diversity_meta_digest) + size += low_diversity_meta_digest.size_bytes + + # Store high diversity metadata, this metadata is expected to diverge + # for every element and as such cannot be deduplicated. + # + with utils._tempnamedfile_name(dir=self._tmpdir) as tmpname: + # The Variables object supports being converted directly to a dictionary + variables_dict = dict(variables) + high_diversity_dict = {"variables": variables_dict} + high_diversity_node = Node.from_dict(high_diversity_dict) + + _yaml.roundtrip_dump(high_diversity_node, tmpname) + high_diversity_meta_digest = self._cas.add_object(path=tmpname, link_directly=True) + artifact.high_diversity_meta.CopyFrom(high_diversity_meta_digest) + size += high_diversity_meta_digest.size_bytes + # store build dependencies for e in element._dependencies(_Scope.BUILD): new_build = artifact.build_deps.add() @@ -282,6 +330,64 @@ class Artifact: return data + # load_sandbox_config(): + # + # Loads the sandbox configuration from the cached artifact + # + # Returns: + # The stored SandboxConfig object + # + def load_sandbox_config(self) -> SandboxConfig: + + # Load the sandbox data from the artifact + artifact = self._get_proto() + meta_file = self._cas.objpath(artifact.low_diversity_meta) + data = _yaml.load(meta_file, shortname="low-diversity-meta.yaml") + + # Extract the sandbox data + config = data.get_mapping("sandbox-config") + + # Return a SandboxConfig + return SandboxConfig.new_from_node(config) + + # load_environment(): + # + # Loads the environment variables from the cached artifact + # + # Returns: + # The environment variables + # + def load_environment(self) -> Dict[str, str]: + + # Load the sandbox data from the artifact + artifact = self._get_proto() + meta_file = self._cas.objpath(artifact.low_diversity_meta) + data = _yaml.load(meta_file, shortname="low-diversity-meta.yaml") + + # Extract the environment + config = data.get_mapping("environment") + + # Return the environment + return config.strip_node_info() + + # load_variables(): + # + # Loads the element variables from the cached artifact + # + # Returns: + # The element variables + # + def load_variables(self) -> Variables: + + # Load the sandbox data from the artifact + artifact = self._get_proto() + meta_file = self._cas.objpath(artifact.high_diversity_meta) + data = _yaml.load(meta_file, shortname="high-diversity-meta.yaml") + + # Extract the variables node and return the new Variables instance + variables_node = data.get_mapping("variables") + return Variables(variables_node) + # load_build_result(): # # Load the build result from the cached artifact @@ -303,10 +409,11 @@ class Artifact: # Retrieve the strong and weak keys from the given artifact. # # Returns: - # (str): The strong key - # (str): The weak key + # The strong key + # The strict key + # The weak key # - def get_metadata_keys(self): + def get_metadata_keys(self) -> Tuple[str, str, str]: if self._metadata_keys is not None: return self._metadata_keys @@ -315,9 +422,10 @@ class Artifact: artifact = self._get_proto() strong_key = artifact.strong_key + strict_key = artifact.strict_key weak_key = artifact.weak_key - self._metadata_keys = (strong_key, weak_key) + self._metadata_keys = (strong_key, strict_key, weak_key) return self._metadata_keys diff --git a/src/buildstream/_artifactcache.py b/src/buildstream/_artifactcache.py index 5ccdf8fd8..c3d96166c 100644 --- a/src/buildstream/_artifactcache.py +++ b/src/buildstream/_artifactcache.py @@ -404,7 +404,7 @@ class ArtifactCache(AssetCache): except FileNotFoundError: pass - digests = [artifact_digest] + digests = [artifact_digest, artifact_proto.low_diversity_meta, artifact_proto.high_diversity_meta] if str(artifact_proto.public_data): digests.append(artifact_proto.public_data) @@ -470,7 +470,9 @@ class ArtifactCache(AssetCache): if artifact_proto.sources: referenced_directories.append(artifact_proto.sources) - referenced_blobs = [log_file.digest for log_file in artifact_proto.logs] + referenced_blobs = [artifact_proto.low_diversity_meta, artifact_proto.high_diversity_meta] + [ + log_file.digest for log_file in artifact_proto.logs + ] try: remote.push_blob( @@ -530,7 +532,7 @@ class ArtifactCache(AssetCache): if pull_buildtrees and str(artifact.buildtree): __pull_digest(artifact.buildtree) - digests = [] + digests = [artifact.low_diversity_meta, artifact.high_diversity_meta] if str(artifact.public_data): digests.append(artifact.public_data) diff --git a/src/buildstream/_versions.py b/src/buildstream/_versions.py index f97560b4d..37a7a0acd 100644 --- a/src/buildstream/_versions.py +++ b/src/buildstream/_versions.py @@ -24,4 +24,4 @@ # or if buildstream was changed in a way which can cause # the same cache key to produce something that is no longer # the same. -BST_CORE_ARTIFACT_VERSION = 9 +BST_CORE_ARTIFACT_VERSION = 10 diff --git a/src/buildstream/element.py b/src/buildstream/element.py index 595017052..c09bb2d18 100644 --- a/src/buildstream/element.py +++ b/src/buildstream/element.py @@ -1840,7 +1840,16 @@ class Element(Plugin): assert self.__artifact._cache_key is not None with self.timed_activity("Caching artifact"): - artifact_size = self.__artifact.cache(sandbox_build_dir, collectvdir, sourcesvdir, buildresult, publicdata) + artifact_size = self.__artifact.cache( + sandbox_build_dir=sandbox_build_dir, + collectvdir=collectvdir, + sourcesvdir=sourcesvdir, + buildresult=buildresult, + publicdata=publicdata, + variables=self.__variables, + environment=self.__environment, + sandboxconfig=self.__sandbox_config, + ) if collect is not None and collectvdir is None: raise ElementError( @@ -3208,11 +3217,19 @@ class Element(Plugin): context = self._get_context() - strict_artifact = Artifact(self, context, strong_key=self.__strict_cache_key, weak_key=self.__weak_cache_key) + strict_artifact = Artifact( + self, + context, + strong_key=self.__strict_cache_key, + strict_key=self.__strict_cache_key, + weak_key=self.__weak_cache_key, + ) if context.get_strict() or strict_artifact.cached(): self.__artifact = strict_artifact else: - self.__artifact = Artifact(self, context, weak_key=self.__weak_cache_key) + self.__artifact = Artifact( + self, context, strict_key=self.__strict_cache_key, weak_key=self.__weak_cache_key + ) if not context.get_strict() and self.__artifact.cached(): # In non-strict mode, strong cache key becomes available when @@ -3247,7 +3264,7 @@ class Element(Plugin): pass elif self._cached(): # Load the strong cache key from the artifact - strong_key, _ = self.__artifact.get_metadata_keys() + strong_key, _, _ = self.__artifact.get_metadata_keys() self.__cache_key = strong_key elif self.__assemble_scheduled or self.__assemble_done: # Artifact will or has been built, not downloaded |