summaryrefslogtreecommitdiff
path: root/buildstream
diff options
context:
space:
mode:
authorTristan Van Berkom <tristan.vanberkom@codethink.co.uk>2018-09-14 20:03:28 +0900
committerTristan Van Berkom <tristan.vanberkom@codethink.co.uk>2018-09-14 21:11:30 +0900
commit95630260b82dcd7a5ddfee7a73bcb75d75838839 (patch)
tree26d54f12514ff2f3cc0e60f1706735736a9c1574 /buildstream
parent32d0ce664093a3b5256b7af449a047cc4ffe0a87 (diff)
downloadbuildstream-95630260b82dcd7a5ddfee7a73bcb75d75838839.tar.gz
_artifactcache/artifactcache.py: Changes to mark_required_elements()
This was previously append_required_artifacts(), which presumed that we knew at startup time what the cache keys of all elements in the loaded pipeline would be. This fixes unexpected deletions of required artifacts when dynamic tracking is enabled with `bst build --track-all target.bst`
Diffstat (limited to 'buildstream')
-rw-r--r--buildstream/_artifactcache/artifactcache.py57
-rw-r--r--buildstream/_stream.py9
-rw-r--r--buildstream/element.py19
3 files changed, 53 insertions, 32 deletions
diff --git a/buildstream/_artifactcache/artifactcache.py b/buildstream/_artifactcache/artifactcache.py
index ead44c4ff..aa11d92a9 100644
--- a/buildstream/_artifactcache/artifactcache.py
+++ b/buildstream/_artifactcache/artifactcache.py
@@ -87,7 +87,7 @@ class ArtifactCache():
self.global_remote_specs = []
self.project_remote_specs = {}
- self._required_artifacts = set() # The artifacts required for this session
+ self._required_elements = set() # The elements required for this session
self._cache_size = None # The current cache size, sometimes it's an estimate
self._cache_quota = None # The cache quota
self._cache_lower_threshold = None # The target cache size for a cleanup
@@ -189,33 +189,40 @@ class ArtifactCache():
(str(provenance)))
return cache_specs
- # append_required_artifacts():
+ # mark_required_elements():
#
- # Append to the list of elements whose artifacts are required for
- # the current run. Artifacts whose elements are in this list will
- # be locked by the artifact cache and not touched for the duration
- # of the current pipeline.
+ # Mark elements whose artifacts are required for the current run.
+ #
+ # Artifacts whose elements are in this list will be locked by the artifact
+ # cache and not touched for the duration of the current pipeline.
#
# Args:
# elements (iterable): A set of elements to mark as required
#
- def append_required_artifacts(self, elements):
- # We lock both strong and weak keys - deleting one but not the
- # other won't save space in most cases anyway, but would be a
- # user inconvenience.
+ def mark_required_elements(self, elements):
+
+ # We risk calling this function with a generator, so we
+ # better consume it first.
+ #
+ elements = list(elements)
+
+ # Mark the elements as required. We cannot know that we know the
+ # cache keys yet, so we only check that later when deleting.
+ #
+ self._required_elements.update(elements)
+ # For the cache keys which were resolved so far, we bump
+ # the atime of them.
+ #
+ # This is just in case we have concurrent instances of
+ # BuildStream running with the same artifact cache, it will
+ # reduce the likelyhood of one instance deleting artifacts
+ # which are required by the other.
for element in elements:
strong_key = element._get_cache_key(strength=_KeyStrength.STRONG)
weak_key = element._get_cache_key(strength=_KeyStrength.WEAK)
-
for key in (strong_key, weak_key):
- if key and key not in self._required_artifacts:
- self._required_artifacts.add(key)
-
- # We also update the usage times of any artifacts
- # we will be using, which helps preventing a
- # buildstream process that runs in parallel with
- # this one from removing artifacts in-use.
+ if key:
try:
self.update_atime(key)
except ArtifactError:
@@ -231,6 +238,18 @@ class ArtifactCache():
def clean(self):
artifacts = self.list_artifacts()
+ # Build a set of the cache keys which are required
+ # based on the required elements at cleanup time
+ #
+ # We lock both strong and weak keys - deleting one but not the
+ # other won't save space, but would be a user inconvenience.
+ required_artifacts = set()
+ for element in self._required_elements:
+ required_artifacts.update([
+ element._get_cache_key(strength=_KeyStrength.STRONG),
+ element._get_cache_key(strength=_KeyStrength.WEAK)
+ ])
+
# Do a real computation of the cache size once, just in case
self.compute_cache_size()
@@ -256,7 +275,7 @@ class ArtifactCache():
break
key = to_remove.rpartition('/')[2]
- if key not in self._required_artifacts:
+ if key not in required_artifacts:
# Remove the actual artifact, if it's not required.
size = self.remove(to_remove)
diff --git a/buildstream/_stream.py b/buildstream/_stream.py
index 252bed26e..5d862de91 100644
--- a/buildstream/_stream.py
+++ b/buildstream/_stream.py
@@ -937,13 +937,10 @@ class Stream():
# Set the "required" artifacts that should not be removed
# while this pipeline is active
#
- # FIXME: The set of required artifacts is only really needed
- # for build and pull tasks.
+ # It must include all the artifacts which are required by the
+ # final product. Note that this is a superset of the build plan.
#
- # It must include all the artifacts which are required by the
- # final product. Note that this is a superset of the build plan.
- #
- self._artifacts.append_required_artifacts((e for e in self._pipeline.dependencies(elements, Scope.ALL)))
+ self._artifacts.mark_required_elements(self._pipeline.dependencies(elements, Scope.ALL))
if selection == PipelineSelection.PLAN and dynamic_plan:
# We use a dynamic build plan, only request artifacts of top-level targets,
diff --git a/buildstream/element.py b/buildstream/element.py
index e6b330492..4f7fc0564 100644
--- a/buildstream/element.py
+++ b/buildstream/element.py
@@ -1434,15 +1434,20 @@ class Element(Plugin):
workspace.clear_running_files()
self._get_context().get_workspaces().save_config()
- # We also need to update the required artifacts, since
- # workspaced dependencies do not have a fixed cache key
- # when the build starts.
+ # This element will have already been marked as
+ # required, but we bump the atime again, in case
+ # we did not know the cache key until now.
#
- # This does *not* cause a race condition, because
- # _assemble_done is called before a cleanup job may be
- # launched.
+ # FIXME: This is not exactly correct, we should be
+ # doing this at the time which we have discovered
+ # a new cache key, this just happens to be the
+ # last place where that can happen.
#
- self.__artifacts.append_required_artifacts([self])
+ # Ultimately, we should be refactoring
+ # Element._update_state() such that we know
+ # when a cache key is actually discovered.
+ #
+ self.__artifacts.mark_required_elements([self])
# _assemble():
#