From 22b3a0c182916b8eacb666aa7bc1ea829cd92ca4 Mon Sep 17 00:00:00 2001 From: Tristan Van Berkom Date: Thu, 24 Jan 2019 16:02:34 -0500 Subject: _artifactcache.py: Don't require the quota to be available on disk. Instead only rely on the headroom to be enough to protect against out of space conditions. The headroom can become configurable as a separate step is required. The changes to achieve this are: * Rename ArtifactCache.has_quota_exceeded() to ArtifactCache.full(). * ArtifactCache.full() now also reports True if the available space on the artifact cache volume is smaller than the headroom. This ensures jobs get triggered to cleanup the cache when reaching the end of the disk. * When loading the artifact quota, it is now only an error if the quota exceeds the overall disk space, not if it does not fit in the available space. It is still a warning if the quota does not fit in the available space on the artifact cache volume. * Updated scheduler.py and buildqueue.py for the API rename * tests: Updated the artifactcache/expiry.py test for its expectations in this regard. Added a new test to test an error when quota was specified to exceed total disk space, and adjusted the existing tests to expect a warning when the quota does not fit in the available space. This fixes issue #733 and #869. --- buildstream/_artifactcache.py | 61 ++++++++++++++++++++--------- buildstream/_scheduler/queues/buildqueue.py | 2 +- buildstream/_scheduler/scheduler.py | 4 +- tests/artifactcache/expiry.py | 20 +++++++++- 4 files changed, 63 insertions(+), 24 deletions(-) diff --git a/buildstream/_artifactcache.py b/buildstream/_artifactcache.py index aa40f571a..48ab278c4 100644 --- a/buildstream/_artifactcache.py +++ b/buildstream/_artifactcache.py @@ -98,6 +98,7 @@ class ArtifactCache(): self._cache_size = None # The current cache size, sometimes it's an estimate self._cache_quota = None # The cache quota self._cache_quota_original = None # The cache quota as specified by the user, in bytes + self._cache_quota_headroom = None # The headroom in bytes before reaching the quota or full disk self._cache_lower_threshold = None # The target cache size for a cleanup self._remotes_setup = False # Check to prevent double-setup of remotes @@ -314,7 +315,7 @@ class ArtifactCache(): len(self._required_elements), (context.config_origin or default_conf))) - if self.has_quota_exceeded(): + if self.full(): raise ArtifactError("Cache too full. Aborting.", detail=detail, reason="cache-too-full") @@ -431,15 +432,25 @@ class ArtifactCache(): self._cache_size = cache_size self._write_cache_size(self._cache_size) - # has_quota_exceeded() + # full() # - # Checks if the current artifact cache size exceeds the quota. + # Checks if the artifact cache is full, either + # because the user configured quota has been exceeded + # or because the underlying disk is almost full. # # Returns: - # (bool): True of the quota is exceeded + # (bool): True if the artifact cache is full # - def has_quota_exceeded(self): - return self.get_cache_size() > self._cache_quota + def full(self): + + if self.get_cache_size() > self._cache_quota: + return True + + _, volume_avail = self._get_cache_volume_size() + if volume_avail < self._cache_quota_headroom: + return True + + return False # preflight(): # @@ -936,9 +947,9 @@ class ArtifactCache(): # is taken from the user requested cache_quota. # if 'BST_TEST_SUITE' in os.environ: - headroom = 0 + self._cache_quota_headroom = 0 else: - headroom = 2e9 + self._cache_quota_headroom = 2e9 try: cache_quota = utils._parse_size(self.context.config_cache_quota, @@ -960,27 +971,39 @@ class ArtifactCache(): # if cache_quota is None: # Infinity, set to max system storage cache_quota = cache_size + available_space - if cache_quota < headroom: # Check minimum + if cache_quota < self._cache_quota_headroom: # Check minimum raise LoadError(LoadErrorReason.INVALID_DATA, "Invalid cache quota ({}): ".format(utils._pretty_size(cache_quota)) + "BuildStream requires a minimum cache quota of 2G.") - elif cache_quota > cache_size + available_space: # Check maximum - if '%' in self.context.config_cache_quota: - available = (available_space / total_size) * 100 - available = '{}% of total disk space'.format(round(available, 1)) - else: - available = utils._pretty_size(available_space) - + elif cache_quota > total_size: + # A quota greater than the total disk size is certianly an error raise ArtifactError("Your system does not have enough available " + "space to support the cache quota specified.", detail=("You have specified a quota of {quota} total disk space.\n" + "The filesystem containing {local_cache_path} only " + - "has {available_size} available.") + "has {total_size} total disk space.") .format( quota=self.context.config_cache_quota, local_cache_path=self.context.artifactdir, - available_size=available), + total_size=utils._pretty_size(total_size)), reason='insufficient-storage-for-quota') + elif cache_quota > cache_size + available_space: + # The quota does not fit in the available space, this is a warning + if '%' in self.context.config_cache_quota: + available = (available_space / total_size) * 100 + available = '{}% of total disk space'.format(round(available, 1)) + else: + available = utils._pretty_size(available_space) + + self._message(MessageType.WARN, + "Your system does not have enough available " + + "space to support the cache quota specified.", + detail=("You have specified a quota of {quota} total disk space.\n" + + "The filesystem containing {local_cache_path} only " + + "has {available_size} available.") + .format(quota=self.context.config_cache_quota, + local_cache_path=self.context.artifactdir, + available_size=available)) # Place a slight headroom (2e9 (2GB) on the cache_quota) into # cache_quota to try and avoid exceptions. @@ -990,7 +1013,7 @@ class ArtifactCache(): # already really fuzzy. # self._cache_quota_original = cache_quota - self._cache_quota = cache_quota - headroom + self._cache_quota = cache_quota - self._cache_quota_headroom self._cache_lower_threshold = self._cache_quota / 2 # _get_cache_volume_size() diff --git a/buildstream/_scheduler/queues/buildqueue.py b/buildstream/_scheduler/queues/buildqueue.py index 66ec4c69b..60ec19ff4 100644 --- a/buildstream/_scheduler/queues/buildqueue.py +++ b/buildstream/_scheduler/queues/buildqueue.py @@ -100,7 +100,7 @@ class BuildQueue(Queue): # If the estimated size outgrows the quota, ask the scheduler # to queue a job to actually check the real cache size. # - if artifacts.has_quota_exceeded(): + if artifacts.full(): self._scheduler.check_cache_size() def done(self, job, element, result, status): diff --git a/buildstream/_scheduler/scheduler.py b/buildstream/_scheduler/scheduler.py index f9d627912..f35a85b23 100644 --- a/buildstream/_scheduler/scheduler.py +++ b/buildstream/_scheduler/scheduler.py @@ -303,7 +303,7 @@ class Scheduler(): # starts while we are checking the cache. # artifacts = self.context.artifactcache - if artifacts.has_quota_exceeded(): + if artifacts.full(): self._sched_cache_size_job(exclusive=True) # _spawn_job() @@ -338,7 +338,7 @@ class Scheduler(): context = self.context artifacts = context.artifactcache - if artifacts.has_quota_exceeded(): + if artifacts.full(): self._cleanup_scheduled = True # Callback for the cleanup job diff --git a/tests/artifactcache/expiry.py b/tests/artifactcache/expiry.py index 2230b70bd..2cc59e03c 100644 --- a/tests/artifactcache/expiry.py +++ b/tests/artifactcache/expiry.py @@ -317,6 +317,16 @@ def test_never_delete_required_track(cli, datafiles, tmpdir): # has 10K total disk space, and 6K of it is already in use (not # including any space used by the artifact cache). # +# Parameters: +# quota (str): A quota size configuration for the config file +# err_domain (str): An ErrorDomain, or 'success' or 'warning' +# err_reason (str): A reson to compare with an error domain +# +# If err_domain is 'success', then err_reason is unused. +# +# If err_domain is 'warning', then err_reason is asserted to +# be in the stderr. +# @pytest.mark.parametrize("quota,err_domain,err_reason", [ # Valid configurations ("1", 'success', None), @@ -328,9 +338,13 @@ def test_never_delete_required_track(cli, datafiles, tmpdir): ("-1", ErrorDomain.LOAD, LoadErrorReason.INVALID_DATA), ("pony", ErrorDomain.LOAD, LoadErrorReason.INVALID_DATA), ("200%", ErrorDomain.LOAD, LoadErrorReason.INVALID_DATA), + + # Not enough space on disk even if you cleaned up + ("11K", ErrorDomain.ARTIFACT, 'insufficient-storage-for-quota'), + # Not enough space for these caches - ("7K", ErrorDomain.ARTIFACT, 'insufficient-storage-for-quota'), - ("70%", ErrorDomain.ARTIFACT, 'insufficient-storage-for-quota') + ("7K", 'warning', 'Your system does not have enough available'), + ("70%", 'warning', 'Your system does not have enough available') ]) @pytest.mark.datafiles(DATA_DIR) def test_invalid_cache_quota(cli, datafiles, tmpdir, quota, err_domain, err_reason): @@ -374,6 +388,8 @@ def test_invalid_cache_quota(cli, datafiles, tmpdir, quota, err_domain, err_reas if err_domain == 'success': res.assert_success() + elif err_domain == 'warning': + assert err_reason in res.stderr else: res.assert_main_error(err_domain, err_reason) -- cgit v1.2.1