diff options
author | Raoul Hidalgo Charman <raoul.hidalgocharman@codethink.co.uk> | 2019-02-21 17:39:52 +0000 |
---|---|---|
committer | Jürg Billeter <j@bitron.ch> | 2019-03-14 07:12:34 +0000 |
commit | cbc5af53e7dc6b2d450728f6c17b9a79ff167990 (patch) | |
tree | 0b94e7418b3b48e3008e2adcaa0d0d61a018ab7d | |
parent | f4dbce44a8b8135daca1895772048b2e25fc8268 (diff) | |
download | buildstream-cbc5af53e7dc6b2d450728f6c17b9a79ff167990.tar.gz |
CASQuota: move clean method here
We need the clean method to be able to remove source refs as well.
Part of #440
-rw-r--r-- | buildstream/_artifactcache.py | 117 | ||||
-rw-r--r-- | buildstream/_cas/cascache.py | 136 | ||||
-rw-r--r-- | buildstream/_scheduler/jobs/cleanupjob.py | 2 | ||||
-rw-r--r-- | tests/artifactcache/expiry.py | 6 |
4 files changed, 147 insertions, 114 deletions
diff --git a/buildstream/_artifactcache.py b/buildstream/_artifactcache.py index 3afa980d2..e04648bee 100644 --- a/buildstream/_artifactcache.py +++ b/buildstream/_artifactcache.py @@ -17,15 +17,11 @@ # Authors: # Tristan Maat <tristan.maat@codethink.co.uk> -import os - from ._basecache import BaseCache from .types import _KeyStrength from ._exceptions import ArtifactError, CASError -from ._message import MessageType -from . import utils -from ._cas import CASRemoteSpec, CASCacheUsage +from ._cas import CASRemoteSpec from .storage._casbaseddirectory import CasBasedDirectory @@ -61,6 +57,9 @@ class ArtifactCache(BaseCache): self._required_elements = set() # The elements required for this session + self.casquota.add_ref_callbacks(self.required_artifacts()) + self.casquota.add_remove_callbacks((lambda x: not x.startswith('sources/'), self.remove)) + # mark_required_elements(): # # Mark elements whose artifacts are required for the current run. @@ -102,117 +101,15 @@ class ArtifactCache(BaseCache): except CASError: pass - # clean(): - # - # Clean the artifact cache as much as possible. - # - # Args: - # progress (callable): A callback to call when a ref is removed - # - # Returns: - # (int): The size of the cache after having cleaned up - # - def clean(self, progress=None): - artifacts = self.list_artifacts() - context = self.context - - # Some accumulative statistics - removed_ref_count = 0 - space_saved = 0 - - # Start off with an announcement with as much info as possible - volume_size, volume_avail = self.casquota._get_cache_volume_size() - self._message(MessageType.STATUS, "Starting cache cleanup", - detail=("Elements required by the current build plan: {}\n" + - "User specified quota: {} ({})\n" + - "Cache usage: {}\n" + - "Cache volume: {} total, {} available") - .format(len(self._required_elements), - context.config_cache_quota, - utils._pretty_size(self.casquota._cache_quota, dec_places=2), - utils._pretty_size(self.casquota.get_cache_size(), dec_places=2), - utils._pretty_size(volume_size, dec_places=2), - utils._pretty_size(volume_avail, dec_places=2))) - + def required_artifacts(self): # Build a set of the cache keys which are required # based on the required elements at cleanup time # # We lock both strong and weak keys - deleting one but not the # other won't save space, but would be a user inconvenience. - required_artifacts = set() for element in self._required_elements: - required_artifacts.update([ - element._get_cache_key(strength=_KeyStrength.STRONG), - element._get_cache_key(strength=_KeyStrength.WEAK) - ]) - - # Do a real computation of the cache size once, just in case - self.casquota.compute_cache_size() - usage = CASCacheUsage(self.casquota) - self._message(MessageType.STATUS, "Cache usage recomputed: {}".format(usage)) - - while self.casquota.get_cache_size() >= self.casquota._cache_lower_threshold: - try: - to_remove = artifacts.pop(0) - except IndexError: - # If too many artifacts are required, and we therefore - # can't remove them, we have to abort the build. - # - # FIXME: Asking the user what to do may be neater - # - default_conf = os.path.join(os.environ['XDG_CONFIG_HOME'], - 'buildstream.conf') - detail = ("Aborted after removing {} refs and saving {} disk space.\n" - "The remaining {} in the cache is required by the {} elements in your build plan\n\n" - "There is not enough space to complete the build.\n" - "Please increase the cache-quota in {} and/or make more disk space." - .format(removed_ref_count, - utils._pretty_size(space_saved, dec_places=2), - utils._pretty_size(self.casquota.get_cache_size(), dec_places=2), - len(self._required_elements), - (context.config_origin or default_conf))) - - if self.full(): - raise ArtifactError("Cache too full. Aborting.", - detail=detail, - reason="cache-too-full") - else: - break - - key = to_remove.rpartition('/')[2] - if key not in required_artifacts: - - # Remove the actual artifact, if it's not required. - size = self.remove(to_remove) - - removed_ref_count += 1 - space_saved += size - - self._message(MessageType.STATUS, - "Freed {: <7} {}".format( - utils._pretty_size(size, dec_places=2), - to_remove)) - - # Remove the size from the removed size - self.casquota.set_cache_size(self.casquota._cache_size - size) - - # User callback - # - # Currently this process is fairly slow, but we should - # think about throttling this progress() callback if this - # becomes too intense. - if progress: - progress() - - # Informational message about the side effects of the cleanup - self._message(MessageType.INFO, "Cleanup completed", - detail=("Removed {} refs and saving {} disk space.\n" + - "Cache usage is now: {}") - .format(removed_ref_count, - utils._pretty_size(space_saved, dec_places=2), - utils._pretty_size(self.casquota.get_cache_size(), dec_places=2))) - - return self.casquota.get_cache_size() + yield element._get_cache_key(strength=_KeyStrength.STRONG) + yield element._get_cache_key(strength=_KeyStrength.WEAK) def full(self): return self.casquota.full() diff --git a/buildstream/_cas/cascache.py b/buildstream/_cas/cascache.py index 02030bb68..bb65269ad 100644 --- a/buildstream/_cas/cascache.py +++ b/buildstream/_cas/cascache.py @@ -1041,6 +1041,7 @@ class CASCache(): class CASQuota: def __init__(self, context): + self.context = context self.cas = context.get_cascache() self.casdir = self.cas.casdir self._config_cache_quota = context.config_cache_quota @@ -1054,6 +1055,9 @@ class CASQuota: self._message = context.message + self._ref_callbacks = [] # Call backs to get required refs + self._remove_callbacks = [] # Call backs to remove refs + self._calculate_cache_quota() # compute_cache_size() @@ -1283,6 +1287,138 @@ class CASQuota: self._cache_quota = cache_quota - self._cache_quota_headroom self._cache_lower_threshold = self._cache_quota / 2 + # clean(): + # + # Clean the artifact cache as much as possible. + # + # Args: + # progress (callable): A callback to call when a ref is removed + # + # Returns: + # (int): The size of the cache after having cleaned up + # + def clean(self, progress=None): + context = self.context + + # Some accumulative statistics + removed_ref_count = 0 + space_saved = 0 + + # get required refs + refs = self.cas.list_refs() + required_refs = set(itertools.chain.from_iterable(self._ref_callbacks)) + + # Start off with an announcement with as much info as possible + volume_size, volume_avail = self._get_cache_volume_size() + self._message(Message( + None, MessageType.STATUS, "Starting cache cleanup", + detail=("Elements required by the current build plan: {}\n" + + "User specified quota: {} ({})\n" + + "Cache usage: {}\n" + + "Cache volume: {} total, {} available") + .format(len(required_refs), + context.config_cache_quota, + utils._pretty_size(self._cache_quota, dec_places=2), + utils._pretty_size(self.get_cache_size(), dec_places=2), + utils._pretty_size(volume_size, dec_places=2), + utils._pretty_size(volume_avail, dec_places=2)))) + + # Do a real computation of the cache size once, just in case + self.compute_cache_size() + usage = CASCacheUsage(self) + self._message(Message(None, MessageType.STATUS, + "Cache usage recomputed: {}".format(usage))) + + while self.get_cache_size() >= self._cache_lower_threshold: + try: + to_remove = refs.pop(0) + except IndexError: + # If too many artifacts are required, and we therefore + # can't remove them, we have to abort the build. + # + # FIXME: Asking the user what to do may be neater + # + default_conf = os.path.join(os.environ['XDG_CONFIG_HOME'], + 'buildstream.conf') + detail = ("Aborted after removing {} refs and saving {} disk space.\n" + "The remaining {} in the cache is required by the {} elements in your build plan\n\n" + "There is not enough space to complete the build.\n" + "Please increase the cache-quota in {} and/or make more disk space." + .format(removed_ref_count, + utils._pretty_size(space_saved, dec_places=2), + utils._pretty_size(self.get_cache_size(), dec_places=2), + len(required_refs), + (context.config_origin or default_conf))) + + if self.full(): + raise CASCacheError("Cache too full. Aborting.", + detail=detail, + reason="cache-too-full") + else: + break + + key = to_remove.rpartition('/')[2] + if key not in required_refs: + + # Remove the actual artifact, if it's not required. + size = 0 + removed_ref = False + for (pred, remove) in self._remove_callbacks: + if pred(to_remove): + size = remove(to_remove) + removed_ref = True + break + + if not removed_ref: + continue + + removed_ref_count += 1 + space_saved += size + + self._message(Message( + None, MessageType.STATUS, + "Freed {: <7} {}".format( + utils._pretty_size(size, dec_places=2), + to_remove))) + + # Remove the size from the removed size + self.set_cache_size(self._cache_size - size) + + # User callback + # + # Currently this process is fairly slow, but we should + # think about throttling this progress() callback if this + # becomes too intense. + if progress: + progress() + + # Informational message about the side effects of the cleanup + self._message(Message( + None, MessageType.INFO, "Cleanup completed", + detail=("Removed {} refs and saving {} disk space.\n" + + "Cache usage is now: {}") + .format(removed_ref_count, + utils._pretty_size(space_saved, dec_places=2), + utils._pretty_size(self.get_cache_size(), dec_places=2)))) + + return self.get_cache_size() + + # add_ref_callbacks() + # + # Args: + # callback (Iterator): function that gives list of required refs + def add_ref_callbacks(self, callback): + self._ref_callbacks.append(callback) + + # add_remove_callbacks() + # + # Args: + # callback (predicate, callback): The predicate says whether this is the + # correct type to remove given a ref and the callback does actual + # removing. + def add_remove_callbacks(self, callback): + self._remove_callbacks.append(callback) + def _grouper(iterable, n): while True: diff --git a/buildstream/_scheduler/jobs/cleanupjob.py b/buildstream/_scheduler/jobs/cleanupjob.py index e016d4cd7..9610d53f8 100644 --- a/buildstream/_scheduler/jobs/cleanupjob.py +++ b/buildstream/_scheduler/jobs/cleanupjob.py @@ -32,7 +32,7 @@ class CleanupJob(Job): def progress(): self.send_message('update-cache-size', self._casquota.get_cache_size()) - return self._artifacts.clean(progress) + return self._casquota.clean(progress) def handle_message(self, message_type, message): # Update the cache size in the main process as we go, diff --git a/tests/artifactcache/expiry.py b/tests/artifactcache/expiry.py index 20e8cbda2..23dc61d0f 100644 --- a/tests/artifactcache/expiry.py +++ b/tests/artifactcache/expiry.py @@ -94,7 +94,7 @@ def test_artifact_too_large(cli, datafiles, size): create_element_size('target.bst', project, element_path, [], size) res = cli.run(project=project, args=['build', 'target.bst']) res.assert_main_error(ErrorDomain.STREAM, None) - res.assert_task_error(ErrorDomain.ARTIFACT, 'cache-too-full') + res.assert_task_error(ErrorDomain.CAS, 'cache-too-full') @pytest.mark.datafiles(DATA_DIR) @@ -224,7 +224,7 @@ def test_never_delete_required(cli, datafiles): # cache. Since all elements are required, the build should fail. res = cli.run(project=project, args=['build', 'target.bst']) res.assert_main_error(ErrorDomain.STREAM, None) - res.assert_task_error(ErrorDomain.ARTIFACT, 'cache-too-full') + res.assert_task_error(ErrorDomain.CAS, 'cache-too-full') # Only the first artifact fits in the cache, but we expect # that the first *two* artifacts will be cached. @@ -295,7 +295,7 @@ def test_never_delete_required_track(cli, datafiles): # res = cli.run(project=project, args=['build', '--track-all', 'target.bst']) res.assert_main_error(ErrorDomain.STREAM, None) - res.assert_task_error(ErrorDomain.ARTIFACT, 'cache-too-full') + res.assert_task_error(ErrorDomain.CAS, 'cache-too-full') # Expect the same result that we did in test_never_delete_required() # |