summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTristan Maat <tristan.maat@codethink.co.uk>2018-07-16 18:03:23 +0100
committerTristan Van Berkom <tristan.vanberkom@codethink.co.uk>2018-07-18 14:45:59 +0900
commit1ec5c7b1207f212e127b15da4094ffe99504301b (patch)
treecd274404844a72abfc8525558d71ab7721d3dccd
parent339844487ef9ea6c897254bfe8f35a6648b28a5e (diff)
downloadbuildstream-1ec5c7b1207f212e127b15da4094ffe99504301b.tar.gz
Make elements keep track of their built artifact size
-rw-r--r--buildstream/_artifactcache/artifactcache.py70
-rw-r--r--buildstream/_scheduler/jobs/elementjob.py5
-rw-r--r--buildstream/_scheduler/queues/queue.py2
-rw-r--r--buildstream/element.py31
4 files changed, 108 insertions, 0 deletions
diff --git a/buildstream/_artifactcache/artifactcache.py b/buildstream/_artifactcache/artifactcache.py
index 4aa7ec555..3541f244e 100644
--- a/buildstream/_artifactcache/artifactcache.py
+++ b/buildstream/_artifactcache/artifactcache.py
@@ -78,6 +78,9 @@ class ArtifactCache():
def __init__(self, context):
self.context = context
self.extractdir = os.path.join(context.artifactdir, 'extract')
+ self.max_size = context.cache_quota
+ self.estimated_size = None
+
self.global_remote_specs = []
self.project_remote_specs = {}
@@ -179,6 +182,35 @@ class ArtifactCache():
(str(provenance)))
return cache_specs
+ # get_approximate_cache_size()
+ #
+ # A cheap method that aims to serve as an upper limit on the
+ # artifact cache size.
+ #
+ # The cache size reported by this function will normally be larger
+ # than the real cache size, since it is calculated using the
+ # pre-commit artifact size, but for very small artifacts in
+ # certain caches additional overhead could cause this to be
+ # smaller than, but close to, the actual size.
+ #
+ # Nonetheless, in practice this should be safe to use as an upper
+ # limit on the cache size.
+ #
+ # If the cache has built-in constant-time size reporting, please
+ # feel free to override this method with a more accurate
+ # implementation.
+ #
+ # Returns:
+ # (int) An approximation of the artifact cache size.
+ #
+ def get_approximate_cache_size(self):
+ # If we don't currently have an estimate, figure out the real
+ # cache size.
+ if self.estimated_size is None:
+ self.estimated_size = self.calculate_cache_size()
+
+ return self.estimated_size
+
################################################
# Abstract methods for subclasses to implement #
################################################
@@ -328,6 +360,20 @@ class ArtifactCache():
raise ImplError("Cache '{kind}' does not implement link_key()"
.format(kind=type(self).__name__))
+ # calculate_cache_size()
+ #
+ # Return the real artifact cache size.
+ #
+ # Implementations should also use this to update estimated_size.
+ #
+ # Returns:
+ #
+ # (int) The size of the artifact cache.
+ #
+ def calculate_cache_size(self):
+ raise ImplError("Cache '{kind}' does not implement calculate_cache_size()"
+ .format(kind=type(self).__name__))
+
################################################
# Local Private Methods #
################################################
@@ -369,6 +415,30 @@ class ArtifactCache():
with self.context.timed_activity("Initializing remote caches", silent_nested=True):
self.initialize_remotes(on_failure=remote_failed)
+ # _add_artifact_size()
+ #
+ # Since we cannot keep track of the cache size between threads,
+ # this method will be called by the main process every time a
+ # process that added something to the cache finishes.
+ #
+ # This will then add the reported size to
+ # ArtifactCache.estimated_size.
+ #
+ def _add_artifact_size(self, artifact_size):
+ if not self.estimated_size:
+ self.estimated_size = self.calculate_cache_size()
+
+ self.estimated_size += artifact_size
+
+ # _set_cache_size()
+ #
+ # Similarly to the above method, when we calculate the actual size
+ # in a child thread, we can't update it. We instead pass the value
+ # back to the main thread and update it there.
+ #
+ def _set_cache_size(self, cache_size):
+ self.estimated_size = cache_size
+
# _configured_remote_artifact_cache_specs():
#
diff --git a/buildstream/_scheduler/jobs/elementjob.py b/buildstream/_scheduler/jobs/elementjob.py
index e49dfb12f..3dfb9aa6e 100644
--- a/buildstream/_scheduler/jobs/elementjob.py
+++ b/buildstream/_scheduler/jobs/elementjob.py
@@ -156,8 +156,13 @@ class ElementJob(Job):
data = {}
workspace = self._element._get_workspace()
+ artifact_size = self._element._get_artifact_size()
+ cache_size = self._element._get_artifact_cache().calculate_cache_size()
if workspace is not None:
data['workspace'] = workspace.to_dict()
+ if artifact_size is not None:
+ data['artifact_size'] = artifact_size
+ data['cache_size'] = cache_size
return data
diff --git a/buildstream/_scheduler/queues/queue.py b/buildstream/_scheduler/queues/queue.py
index 8ca3ac063..ac20d3711 100644
--- a/buildstream/_scheduler/queues/queue.py
+++ b/buildstream/_scheduler/queues/queue.py
@@ -300,6 +300,8 @@ class Queue():
# Update values that need to be synchronized in the main task
# before calling any queue implementation
self._update_workspaces(element, job)
+ if job.child_data:
+ element._get_artifact_cache().cache_size = job.child_data.get('cache_size')
# Give the result of the job to the Queue implementor,
# and determine if it should be considered as processed
diff --git a/buildstream/element.py b/buildstream/element.py
index 0f0bf49d6..140c824ec 100644
--- a/buildstream/element.py
+++ b/buildstream/element.py
@@ -225,6 +225,7 @@ class Element(Plugin):
self.__staged_sources_directory = None # Location where Element.stage_sources() was called
self.__tainted = None # Whether the artifact is tainted and should not be shared
self.__required = False # Whether the artifact is required in the current session
+ self.__artifact_size = None # The size of data committed to the artifact cache
# hash tables of loaded artifact metadata, hashed by key
self.__metadata_keys = {} # Strong and weak keys for this key
@@ -1397,6 +1398,16 @@ class Element(Plugin):
workspace.clear_running_files()
self._get_context().get_workspaces().save_config()
+ # We also need to update the required artifacts, since
+ # workspaced dependencies do not have a fixed cache key
+ # when the build starts.
+ #
+ # This does *not* cause a race condition, because
+ # _assemble_done is called before a cleanup job may be
+ # launched.
+ #
+ self.__artifacts.append_required_artifacts([self])
+
# _assemble():
#
# Internal method for running the entire build phase.
@@ -1524,6 +1535,7 @@ class Element(Plugin):
}), os.path.join(metadir, 'workspaced-dependencies.yaml'))
with self.timed_activity("Caching artifact"):
+ self.__artifact_size = utils._get_dir_size(assembledir)
self.__artifacts.commit(self, assembledir, self.__get_cache_keys_for_commit())
# Finally cleanup the build dir
@@ -1763,6 +1775,25 @@ class Element(Plugin):
workspaces = self._get_context().get_workspaces()
return workspaces.get_workspace(self._get_full_name())
+ # _get_artifact_size()
+ #
+ # Get the size of the artifact produced by this element in the
+ # current pipeline - if this element has not been assembled or
+ # pulled, this will be None.
+ #
+ # Note that this is the size of an artifact *before* committing it
+ # to the cache, the size on disk may differ. It can act as an
+ # approximate guide for when to do a proper size calculation.
+ #
+ # Returns:
+ # (int|None): The size of the artifact
+ #
+ def _get_artifact_size(self):
+ return self.__artifact_size
+
+ def _get_artifact_cache(self):
+ return self.__artifacts
+
# _write_script():
#
# Writes a script to the given directory.