summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTristan van Berkom <tristan@codethink.co.uk>2020-12-01 20:37:17 +0900
committerTristan van Berkom <tristan@codethink.co.uk>2020-12-07 17:51:46 +0900
commit965da0ad04af8649f48cfeffd90174e147052494 (patch)
tree411fc37d0f8aa5b8a2fa92ee2e170d0496c301d6 /src
parent548b23d767e11f4ea49f4934a5d9d99ce41342f3 (diff)
downloadbuildstream-965da0ad04af8649f48cfeffd90174e147052494.tar.gz
_artifact.py: Store additional metadata on the artifact
This commit enriches the metadata we store on artifacts in the new detatched low/high diversity metadata files: * The SandboxConfig is now stored in the artifact, allowing one to perform activities such as launching sandboxes on artifacts downloaded via artifact name (without backing project data). * The environment variables is now stored in the artifact, similarly allowing one to shell into a downloaded artifacts which are unrelated to a loaded project. * The element variables are now stored in the artifact, allowing more flexibility in what the core can do with a downloaded ArtifactElement * The element's strict key All of these of course can additionally enhance traceability in the UI with commands such as `bst artifact show`. Summary of changes: * _artifact.py: - Store new data in the new proto digests. - Added new accessors to extract these new aspects from loaded artifacts. - Bump the proto version number for compatibility * _artifactcache.py: Adjusted to push and pull the new blobs and digests. * element.py: - Call Artifact.cache() with new parameters - Expect the strict key from Artifact.get_meta_keys() - Always specify the strict key when constructing an Artifact instance which will later be used to cache the artifact (i.e. the self.__artifact Artifact). * _versions.py: Bump the global artifact version number, as this breaks the artifact format. * tests/cachekey: Updated cache key test for new keys.
Diffstat (limited to 'src')
-rw-r--r--src/buildstream/_artifact.py128
-rw-r--r--src/buildstream/_artifactcache.py8
-rw-r--r--src/buildstream/_versions.py2
-rw-r--r--src/buildstream/element.py25
4 files changed, 145 insertions, 18 deletions
diff --git a/src/buildstream/_artifact.py b/src/buildstream/_artifact.py
index c110e57f0..1cea03092 100644
--- a/src/buildstream/_artifact.py
+++ b/src/buildstream/_artifact.py
@@ -1,5 +1,5 @@
#
-# Copyright (C) 2019 Codethink Limited
+# Copyright (C) 2020 Codethink Limited
# Copyright (C) 2019 Bloomberg Finance LP
#
# This program is free software; you can redistribute it and/or
@@ -29,13 +29,16 @@ artifact composite interaction away from Element class
"""
import os
+from typing import Dict, Tuple
from ._protos.buildstream.v2.artifact_pb2 import Artifact as ArtifactProto
from . import _yaml
from . import utils
+from .node import Node
from .types import _Scope
from .storage._casbaseddirectory import CasBasedDirectory
-
+from .sandbox._config import SandboxConfig
+from ._variables import Variables
# An Artifact class to abstract artifact operations
# from the Element class
@@ -44,23 +47,25 @@ from .storage._casbaseddirectory import CasBasedDirectory
# element (Element): The Element object
# context (Context): The BuildStream context
# strong_key (str): The elements strong cache key, dependent on context
+# strict_key (str): The elements strict cache key
# weak_key (str): The elements weak cache key
#
class Artifact:
- version = 0
+ version = 1
- def __init__(self, element, context, *, strong_key=None, weak_key=None):
+ def __init__(self, element, context, *, strong_key=None, strict_key=None, weak_key=None):
self._element = element
self._context = context
self._cache_key = strong_key
+ self._strict_key = strict_key
self._weak_cache_key = weak_key
self._artifactdir = context.artifactdir
self._cas = context.get_cascache()
self._tmpdir = context.tmpdir
self._proto = None
- self._metadata_keys = None # Strong and weak key tuple extracted from the artifact
+ self._metadata_keys = None # Strong, strict and weak key tuple extracted from the artifact
self._metadata_dependencies = None # Dictionary of dependency strong keys from the artifact
self._metadata_workspaced = None # Boolean of whether it's a workspaced artifact
self._metadata_workspaced_dependencies = None # List of which dependencies are workspaced from the artifact
@@ -137,11 +142,25 @@ class Artifact:
# sourcesvdir (Directory): Virtual Directoy object for the staged sources
# buildresult (tuple): bool, short desc and detailed desc of result
# publicdata (dict): dict of public data to commit to artifact metadata
+ # variables (Variables): The element's Variables
+ # environment (dict): dict of the element's environment variables
+ # sandboxconfig (SandboxConfig): The element's SandboxConfig
#
# Returns:
# (int): The size of the newly cached artifact
#
- def cache(self, sandbox_build_dir, collectvdir, sourcesvdir, buildresult, publicdata):
+ def cache(
+ self,
+ *,
+ sandbox_build_dir,
+ collectvdir,
+ sourcesvdir,
+ buildresult,
+ publicdata,
+ variables,
+ environment,
+ sandboxconfig,
+ ):
context = self._context
element = self._element
@@ -161,6 +180,7 @@ class Artifact:
# Store keys
artifact.strong_key = self._cache_key
+ artifact.strict_key = self._strict_key
artifact.weak_key = self._weak_cache_key
artifact.was_workspaced = bool(element._get_workspace())
@@ -180,6 +200,34 @@ class Artifact:
artifact.public_data.CopyFrom(public_data_digest)
size += public_data_digest.size_bytes
+ # Store low diversity metadata, this metadata must have a high
+ # probability of deduplication, such as environment variables
+ # and SandboxConfig.
+ #
+ with utils._tempnamedfile_name(dir=self._tmpdir) as tmpname:
+ sandbox_dict = sandboxconfig.to_dict()
+ low_diversity_dict = {"environment": environment, "sandbox-config": sandbox_dict}
+ low_diversity_node = Node.from_dict(low_diversity_dict)
+
+ _yaml.roundtrip_dump(low_diversity_node, tmpname)
+ low_diversity_meta_digest = self._cas.add_object(path=tmpname, link_directly=True)
+ artifact.low_diversity_meta.CopyFrom(low_diversity_meta_digest)
+ size += low_diversity_meta_digest.size_bytes
+
+ # Store high diversity metadata, this metadata is expected to diverge
+ # for every element and as such cannot be deduplicated.
+ #
+ with utils._tempnamedfile_name(dir=self._tmpdir) as tmpname:
+ # The Variables object supports being converted directly to a dictionary
+ variables_dict = dict(variables)
+ high_diversity_dict = {"variables": variables_dict}
+ high_diversity_node = Node.from_dict(high_diversity_dict)
+
+ _yaml.roundtrip_dump(high_diversity_node, tmpname)
+ high_diversity_meta_digest = self._cas.add_object(path=tmpname, link_directly=True)
+ artifact.high_diversity_meta.CopyFrom(high_diversity_meta_digest)
+ size += high_diversity_meta_digest.size_bytes
+
# store build dependencies
for e in element._dependencies(_Scope.BUILD):
new_build = artifact.build_deps.add()
@@ -282,6 +330,64 @@ class Artifact:
return data
+ # load_sandbox_config():
+ #
+ # Loads the sandbox configuration from the cached artifact
+ #
+ # Returns:
+ # The stored SandboxConfig object
+ #
+ def load_sandbox_config(self) -> SandboxConfig:
+
+ # Load the sandbox data from the artifact
+ artifact = self._get_proto()
+ meta_file = self._cas.objpath(artifact.low_diversity_meta)
+ data = _yaml.load(meta_file, shortname="low-diversity-meta.yaml")
+
+ # Extract the sandbox data
+ config = data.get_mapping("sandbox-config")
+
+ # Return a SandboxConfig
+ return SandboxConfig.new_from_node(config)
+
+ # load_environment():
+ #
+ # Loads the environment variables from the cached artifact
+ #
+ # Returns:
+ # The environment variables
+ #
+ def load_environment(self) -> Dict[str, str]:
+
+ # Load the sandbox data from the artifact
+ artifact = self._get_proto()
+ meta_file = self._cas.objpath(artifact.low_diversity_meta)
+ data = _yaml.load(meta_file, shortname="low-diversity-meta.yaml")
+
+ # Extract the environment
+ config = data.get_mapping("environment")
+
+ # Return the environment
+ return config.strip_node_info()
+
+ # load_variables():
+ #
+ # Loads the element variables from the cached artifact
+ #
+ # Returns:
+ # The element variables
+ #
+ def load_variables(self) -> Variables:
+
+ # Load the sandbox data from the artifact
+ artifact = self._get_proto()
+ meta_file = self._cas.objpath(artifact.high_diversity_meta)
+ data = _yaml.load(meta_file, shortname="high-diversity-meta.yaml")
+
+ # Extract the variables node and return the new Variables instance
+ variables_node = data.get_mapping("variables")
+ return Variables(variables_node)
+
# load_build_result():
#
# Load the build result from the cached artifact
@@ -303,10 +409,11 @@ class Artifact:
# Retrieve the strong and weak keys from the given artifact.
#
# Returns:
- # (str): The strong key
- # (str): The weak key
+ # The strong key
+ # The strict key
+ # The weak key
#
- def get_metadata_keys(self):
+ def get_metadata_keys(self) -> Tuple[str, str, str]:
if self._metadata_keys is not None:
return self._metadata_keys
@@ -315,9 +422,10 @@ class Artifact:
artifact = self._get_proto()
strong_key = artifact.strong_key
+ strict_key = artifact.strict_key
weak_key = artifact.weak_key
- self._metadata_keys = (strong_key, weak_key)
+ self._metadata_keys = (strong_key, strict_key, weak_key)
return self._metadata_keys
diff --git a/src/buildstream/_artifactcache.py b/src/buildstream/_artifactcache.py
index 5ccdf8fd8..c3d96166c 100644
--- a/src/buildstream/_artifactcache.py
+++ b/src/buildstream/_artifactcache.py
@@ -404,7 +404,7 @@ class ArtifactCache(AssetCache):
except FileNotFoundError:
pass
- digests = [artifact_digest]
+ digests = [artifact_digest, artifact_proto.low_diversity_meta, artifact_proto.high_diversity_meta]
if str(artifact_proto.public_data):
digests.append(artifact_proto.public_data)
@@ -470,7 +470,9 @@ class ArtifactCache(AssetCache):
if artifact_proto.sources:
referenced_directories.append(artifact_proto.sources)
- referenced_blobs = [log_file.digest for log_file in artifact_proto.logs]
+ referenced_blobs = [artifact_proto.low_diversity_meta, artifact_proto.high_diversity_meta] + [
+ log_file.digest for log_file in artifact_proto.logs
+ ]
try:
remote.push_blob(
@@ -530,7 +532,7 @@ class ArtifactCache(AssetCache):
if pull_buildtrees and str(artifact.buildtree):
__pull_digest(artifact.buildtree)
- digests = []
+ digests = [artifact.low_diversity_meta, artifact.high_diversity_meta]
if str(artifact.public_data):
digests.append(artifact.public_data)
diff --git a/src/buildstream/_versions.py b/src/buildstream/_versions.py
index f97560b4d..37a7a0acd 100644
--- a/src/buildstream/_versions.py
+++ b/src/buildstream/_versions.py
@@ -24,4 +24,4 @@
# or if buildstream was changed in a way which can cause
# the same cache key to produce something that is no longer
# the same.
-BST_CORE_ARTIFACT_VERSION = 9
+BST_CORE_ARTIFACT_VERSION = 10
diff --git a/src/buildstream/element.py b/src/buildstream/element.py
index 595017052..c09bb2d18 100644
--- a/src/buildstream/element.py
+++ b/src/buildstream/element.py
@@ -1840,7 +1840,16 @@ class Element(Plugin):
assert self.__artifact._cache_key is not None
with self.timed_activity("Caching artifact"):
- artifact_size = self.__artifact.cache(sandbox_build_dir, collectvdir, sourcesvdir, buildresult, publicdata)
+ artifact_size = self.__artifact.cache(
+ sandbox_build_dir=sandbox_build_dir,
+ collectvdir=collectvdir,
+ sourcesvdir=sourcesvdir,
+ buildresult=buildresult,
+ publicdata=publicdata,
+ variables=self.__variables,
+ environment=self.__environment,
+ sandboxconfig=self.__sandbox_config,
+ )
if collect is not None and collectvdir is None:
raise ElementError(
@@ -3208,11 +3217,19 @@ class Element(Plugin):
context = self._get_context()
- strict_artifact = Artifact(self, context, strong_key=self.__strict_cache_key, weak_key=self.__weak_cache_key)
+ strict_artifact = Artifact(
+ self,
+ context,
+ strong_key=self.__strict_cache_key,
+ strict_key=self.__strict_cache_key,
+ weak_key=self.__weak_cache_key,
+ )
if context.get_strict() or strict_artifact.cached():
self.__artifact = strict_artifact
else:
- self.__artifact = Artifact(self, context, weak_key=self.__weak_cache_key)
+ self.__artifact = Artifact(
+ self, context, strict_key=self.__strict_cache_key, weak_key=self.__weak_cache_key
+ )
if not context.get_strict() and self.__artifact.cached():
# In non-strict mode, strong cache key becomes available when
@@ -3247,7 +3264,7 @@ class Element(Plugin):
pass
elif self._cached():
# Load the strong cache key from the artifact
- strong_key, _ = self.__artifact.get_metadata_keys()
+ strong_key, _, _ = self.__artifact.get_metadata_keys()
self.__cache_key = strong_key
elif self.__assemble_scheduled or self.__assemble_done:
# Artifact will or has been built, not downloaded