From af0bfaeeda28186df3accebb688f208cf6036a29 Mon Sep 17 00:00:00 2001 From: Tristan Van Berkom Date: Sun, 9 Sep 2018 18:37:24 +0900 Subject: element.py: Remove _get_artifact_size() There is no justification to hold onto this state here. Instead, just make `Element._assemble()` return the size of the artifact it cached, and localize handling of that return value in the BuildQueue implementation where the value is observed. --- buildstream/_scheduler/jobs/elementjob.py | 3 --- buildstream/_scheduler/queues/buildqueue.py | 31 +++++++++++++++-------------- buildstream/element.py | 24 ++++++---------------- 3 files changed, 22 insertions(+), 36 deletions(-) diff --git a/buildstream/_scheduler/jobs/elementjob.py b/buildstream/_scheduler/jobs/elementjob.py index 36527794e..b3318302a 100644 --- a/buildstream/_scheduler/jobs/elementjob.py +++ b/buildstream/_scheduler/jobs/elementjob.py @@ -109,14 +109,11 @@ class ElementJob(Job): data = {} workspace = self._element._get_workspace() - artifact_size = self._element._get_artifact_size() artifacts = self._element._get_artifact_cache() cache_size = artifacts.compute_cache_size() if workspace is not None: data['workspace'] = workspace.to_dict() - if artifact_size is not None: - data['artifact_size'] = artifact_size data['cache_size'] = cache_size return data diff --git a/buildstream/_scheduler/queues/buildqueue.py b/buildstream/_scheduler/queues/buildqueue.py index e867ef010..2a07fdd6c 100644 --- a/buildstream/_scheduler/queues/buildqueue.py +++ b/buildstream/_scheduler/queues/buildqueue.py @@ -31,8 +31,7 @@ class BuildQueue(Queue): resources = [ResourceType.PROCESS, ResourceType.CACHE] def process(self, element): - element._assemble() - return element._get_unique_id() + return element._assemble() def status(self, element): # state of dependencies may have changed, recalculate element state @@ -51,18 +50,20 @@ class BuildQueue(Queue): return QueueStatus.READY - def _check_cache_size(self, job, element): - if not job.child_data: - return + def _check_cache_size(self, job, element, artifact_size): - artifact_size = job.child_data.get('artifact_size', False) + # After completing a build job, add the artifact size + # as returned from Element._assemble() to the estimated + # artifact cache size + # + cache = element._get_artifact_cache() + cache.add_artifact_size(artifact_size) - if artifact_size: - cache = element._get_artifact_cache() - cache.add_artifact_size(artifact_size) - - if cache.get_approximate_cache_size() > cache.cache_quota: - self._scheduler.check_cache_size() + # If the estimated size outgrows the quota, ask the scheduler + # to queue a job to actually check the real cache size. + # + if cache.get_approximate_cache_size() > cache.cache_quota: + self._scheduler.check_cache_size() def done(self, job, element, result, success): @@ -70,8 +71,8 @@ class BuildQueue(Queue): # Inform element in main process that assembly is done element._assemble_done() - # This has to be done after _assemble_done, such that the - # element may register its cache key as required - self._check_cache_size(job, element) + # This has to be done after _assemble_done, such that the + # element may register its cache key as required + self._check_cache_size(job, element, result) return True diff --git a/buildstream/element.py b/buildstream/element.py index 560712854..3093bc5e0 100644 --- a/buildstream/element.py +++ b/buildstream/element.py @@ -231,7 +231,6 @@ class Element(Plugin): self.__staged_sources_directory = None # Location where Element.stage_sources() was called self.__tainted = None # Whether the artifact is tainted and should not be shared self.__required = False # Whether the artifact is required in the current session - self.__artifact_size = None # The size of data committed to the artifact cache # hash tables of loaded artifact metadata, hashed by key self.__metadata_keys = {} # Strong and weak keys for this key @@ -1454,6 +1453,9 @@ class Element(Plugin): # - Call the public abstract methods for the build phase # - Cache the resulting artifact # + # Returns: + # (int): The size of the newly cached artifact + # def _assemble(self): # Assert call ordering @@ -1573,12 +1575,14 @@ class Element(Plugin): }), os.path.join(metadir, 'workspaced-dependencies.yaml')) with self.timed_activity("Caching artifact"): - self.__artifact_size = utils._get_dir_size(assembledir) + artifact_size = utils._get_dir_size(assembledir) self.__artifacts.commit(self, assembledir, self.__get_cache_keys_for_commit()) # Finally cleanup the build dir cleanup_rootdir() + return artifact_size + # _pull_pending() # # Check whether the artifact will be pulled. @@ -1817,22 +1821,6 @@ class Element(Plugin): workspaces = self._get_context().get_workspaces() return workspaces.get_workspace(self._get_full_name()) - # _get_artifact_size() - # - # Get the size of the artifact produced by this element in the - # current pipeline - if this element has not been assembled or - # pulled, this will be None. - # - # Note that this is the size of an artifact *before* committing it - # to the cache, the size on disk may differ. It can act as an - # approximate guide for when to do a proper size calculation. - # - # Returns: - # (int|None): The size of the artifact - # - def _get_artifact_size(self): - return self.__artifact_size - # _get_artifact_cache() # # Accessor for the artifact cache -- cgit v1.2.1