diff options
author | Jonathan Maw <jonathan.maw@codethink.co.uk> | 2018-08-20 16:27:18 +0000 |
---|---|---|
committer | Jonathan Maw <jonathan.maw@codethink.co.uk> | 2018-08-20 16:27:18 +0000 |
commit | 92e34ccd2bfdb09f3c6d16e974d1c5fb78ac0516 (patch) | |
tree | e093c1ea9fa30eca781a14fa20348ac38e2bacb6 | |
parent | 372abed5f1b2202435d553b28fa807a913f8b5b9 (diff) | |
parent | a67906e695c027bdfa60a77a243bf3d6a664eec1 (diff) | |
download | buildstream-92e34ccd2bfdb09f3c6d16e974d1c5fb78ac0516.tar.gz |
Merge branch 'jonathan/cache-cache-size' into 'master'
Jonathan/cache cache size
See merge request BuildStream/buildstream!679
-rw-r--r-- | buildstream/_artifactcache/__init__.py | 2 | ||||
-rw-r--r-- | buildstream/_artifactcache/artifactcache.py | 120 | ||||
-rw-r--r-- | buildstream/_artifactcache/cascache.py | 4 | ||||
-rw-r--r-- | buildstream/_context.py | 74 | ||||
-rw-r--r-- | buildstream/_frontend/app.py | 6 | ||||
-rw-r--r-- | buildstream/_scheduler/queues/buildqueue.py | 2 | ||||
-rw-r--r-- | buildstream/_scheduler/scheduler.py | 4 | ||||
-rw-r--r-- | tests/artifactcache/cache_size.py | 62 | ||||
-rw-r--r-- | tests/testutils/artifactshare.py | 3 |
9 files changed, 195 insertions, 82 deletions
diff --git a/buildstream/_artifactcache/__init__.py b/buildstream/_artifactcache/__init__.py index 07ed52b4b..fad483a57 100644 --- a/buildstream/_artifactcache/__init__.py +++ b/buildstream/_artifactcache/__init__.py @@ -17,4 +17,4 @@ # Authors: # Tristan Van Berkom <tristan.vanberkom@codethink.co.uk> -from .artifactcache import ArtifactCache, ArtifactCacheSpec +from .artifactcache import ArtifactCache, ArtifactCacheSpec, CACHE_SIZE_FILE diff --git a/buildstream/_artifactcache/artifactcache.py b/buildstream/_artifactcache/artifactcache.py index d98c291f9..f28fe394f 100644 --- a/buildstream/_artifactcache/artifactcache.py +++ b/buildstream/_artifactcache/artifactcache.py @@ -28,6 +28,9 @@ from .. import utils from .. import _yaml +CACHE_SIZE_FILE = "cache_size" + + # An ArtifactCacheSpec holds the user configuration for a single remote # artifact cache. # @@ -82,7 +85,6 @@ class ArtifactCache(): self.extractdir = os.path.join(context.artifactdir, 'extract') self.tmpdir = os.path.join(context.artifactdir, 'tmp') - self.max_size = context.cache_quota self.estimated_size = None self.global_remote_specs = [] @@ -90,6 +92,8 @@ class ArtifactCache(): self._local = False self.cache_size = None + self.cache_quota = None + self.cache_lower_threshold = None os.makedirs(self.extractdir, exist_ok=True) os.makedirs(self.tmpdir, exist_ok=True) @@ -227,7 +231,7 @@ class ArtifactCache(): def clean(self): artifacts = self.list_artifacts() - while self.calculate_cache_size() >= self.context.cache_quota - self.context.cache_lower_threshold: + while self.calculate_cache_size() >= self.cache_quota - self.cache_lower_threshold: try: to_remove = artifacts.pop(0) except IndexError: @@ -241,7 +245,7 @@ class ArtifactCache(): "Please increase the cache-quota in {}." .format(self.context.config_origin or default_conf)) - if self.calculate_cache_size() > self.context.cache_quota: + if self.calculate_cache_size() > self.cache_quota: raise ArtifactError("Cache too full. Aborting.", detail=detail, reason="cache-too-full") @@ -282,7 +286,11 @@ class ArtifactCache(): # If we don't currently have an estimate, figure out the real # cache size. if self.estimated_size is None: - self.estimated_size = self.calculate_cache_size() + stored_size = self._read_cache_size() + if stored_size is not None: + self.estimated_size = stored_size + else: + self.estimated_size = self.calculate_cache_size() return self.estimated_size @@ -541,6 +549,7 @@ class ArtifactCache(): self.estimated_size = self.calculate_cache_size() self.estimated_size += artifact_size + self._write_cache_size(self.estimated_size) # _set_cache_size() # @@ -551,6 +560,109 @@ class ArtifactCache(): def _set_cache_size(self, cache_size): self.estimated_size = cache_size + # set_cache_size is called in cleanup, where it may set the cache to None + if self.estimated_size is not None: + self._write_cache_size(self.estimated_size) + + # _write_cache_size() + # + # Writes the given size of the artifact to the cache's size file + # + def _write_cache_size(self, size): + assert isinstance(size, int) + size_file_path = os.path.join(self.context.artifactdir, CACHE_SIZE_FILE) + with open(size_file_path, "w") as f: + f.write(str(size)) + + # _read_cache_size() + # + # Reads and returns the size of the artifact cache that's stored in the + # cache's size file + # + def _read_cache_size(self): + size_file_path = os.path.join(self.context.artifactdir, CACHE_SIZE_FILE) + + if not os.path.exists(size_file_path): + return None + + with open(size_file_path, "r") as f: + size = f.read() + + try: + num_size = int(size) + except ValueError as e: + raise ArtifactError("Size '{}' parsed from '{}' was not an integer".format( + size, size_file_path)) from e + + return num_size + + # _calculate_cache_quota() + # + # Calculates and sets the cache quota and lower threshold based on the + # quota set in Context. + # It checks that the quota is both a valid expression, and that there is + # enough disk space to satisfy that quota + # + def _calculate_cache_quota(self): + # Headroom intended to give BuildStream a bit of leeway. + # This acts as the minimum size of cache_quota and also + # is taken from the user requested cache_quota. + # + if 'BST_TEST_SUITE' in os.environ: + headroom = 0 + else: + headroom = 2e9 + + artifactdir_volume = self.context.artifactdir + while not os.path.exists(artifactdir_volume): + artifactdir_volume = os.path.dirname(artifactdir_volume) + + try: + cache_quota = utils._parse_size(self.context.config_cache_quota, artifactdir_volume) + except utils.UtilError as e: + raise LoadError(LoadErrorReason.INVALID_DATA, + "{}\nPlease specify the value in bytes or as a % of full disk space.\n" + "\nValid values are, for example: 800M 10G 1T 50%\n" + .format(str(e))) from e + + stat = os.statvfs(artifactdir_volume) + available_space = (stat.f_bsize * stat.f_bavail) + + cache_size = self.get_approximate_cache_size() + + # Ensure system has enough storage for the cache_quota + # + # If cache_quota is none, set it to the maximum it could possibly be. + # + # Also check that cache_quota is atleast as large as our headroom. + # + if cache_quota is None: # Infinity, set to max system storage + cache_quota = cache_size + available_space + if cache_quota < headroom: # Check minimum + raise LoadError(LoadErrorReason.INVALID_DATA, + "Invalid cache quota ({}): ".format(utils._pretty_size(cache_quota)) + + "BuildStream requires a minimum cache quota of 2G.") + elif cache_quota > cache_size + available_space: # Check maximum + raise LoadError(LoadErrorReason.INVALID_DATA, + ("Your system does not have enough available " + + "space to support the cache quota specified.\n" + + "You currently have:\n" + + "- {used} of cache in use at {local_cache_path}\n" + + "- {available} of available system storage").format( + used=utils._pretty_size(cache_size), + local_cache_path=self.context.artifactdir, + available=utils._pretty_size(available_space))) + + # Place a slight headroom (2e9 (2GB) on the cache_quota) into + # cache_quota to try and avoid exceptions. + # + # Of course, we might still end up running out during a build + # if we end up writing more than 2G, but hey, this stuff is + # already really fuzzy. + # + self.cache_quota = cache_quota - headroom + self.cache_lower_threshold = self.cache_quota / 2 + # _configured_remote_artifact_cache_specs(): # diff --git a/buildstream/_artifactcache/cascache.py b/buildstream/_artifactcache/cascache.py index 074899d81..d616e293a 100644 --- a/buildstream/_artifactcache/cascache.py +++ b/buildstream/_artifactcache/cascache.py @@ -61,6 +61,8 @@ class CASCache(ArtifactCache): os.makedirs(os.path.join(self.casdir, 'refs', 'heads'), exist_ok=True) os.makedirs(os.path.join(self.casdir, 'objects'), exist_ok=True) + self._calculate_cache_quota() + self._enable_push = enable_push # Per-project list of _CASRemote instances. @@ -330,7 +332,7 @@ class CASCache(ArtifactCache): request.write_offset = offset # max. 64 kB chunks request.data = f.read(chunk_size) - request.resource_name = resource_name + request.resource_name = resource_name # pylint: disable=cell-var-from-loop request.finish_write = remaining <= 0 yield request offset += chunk_size diff --git a/buildstream/_context.py b/buildstream/_context.py index 8ebb61d23..a94d374cf 100644 --- a/buildstream/_context.py +++ b/buildstream/_context.py @@ -64,12 +64,6 @@ class Context(): # The locations from which to push and pull prebuilt artifacts self.artifact_cache_specs = [] - # The artifact cache quota - self.cache_quota = None - - # The lower threshold to which we aim to reduce the cache size - self.cache_lower_threshold = None - # The directory to store build logs self.logdir = None @@ -124,6 +118,8 @@ class Context(): self._workspaces = None self._log_handle = None self._log_filename = None + self.config_cache_quota = 'infinity' + self.artifactdir_volume = None # load() # @@ -183,71 +179,7 @@ class Context(): cache = _yaml.node_get(defaults, Mapping, 'cache') _yaml.node_validate(cache, ['quota']) - artifactdir_volume = self.artifactdir - while not os.path.exists(artifactdir_volume): - artifactdir_volume = os.path.dirname(artifactdir_volume) - - # We read and parse the cache quota as specified by the user - cache_quota = _yaml.node_get(cache, str, 'quota', default_value='infinity') - try: - cache_quota = utils._parse_size(cache_quota, artifactdir_volume) - except utils.UtilError as e: - raise LoadError(LoadErrorReason.INVALID_DATA, - "{}\nPlease specify the value in bytes or as a % of full disk space.\n" - "\nValid values are, for example: 800M 10G 1T 50%\n" - .format(str(e))) from e - - # Headroom intended to give BuildStream a bit of leeway. - # This acts as the minimum size of cache_quota and also - # is taken from the user requested cache_quota. - # - if 'BST_TEST_SUITE' in os.environ: - headroom = 0 - else: - headroom = 2e9 - - stat = os.statvfs(artifactdir_volume) - available_space = (stat.f_bsize * stat.f_bavail) - - # Again, the artifact directory may not yet have been created yet - # - if not os.path.exists(self.artifactdir): - cache_size = 0 - else: - cache_size = utils._get_dir_size(self.artifactdir) - - # Ensure system has enough storage for the cache_quota - # - # If cache_quota is none, set it to the maximum it could possibly be. - # - # Also check that cache_quota is atleast as large as our headroom. - # - if cache_quota is None: # Infinity, set to max system storage - cache_quota = cache_size + available_space - if cache_quota < headroom: # Check minimum - raise LoadError(LoadErrorReason.INVALID_DATA, - "Invalid cache quota ({}): ".format(utils._pretty_size(cache_quota)) + - "BuildStream requires a minimum cache quota of 2G.") - elif cache_quota > cache_size + available_space: # Check maximum - raise LoadError(LoadErrorReason.INVALID_DATA, - ("Your system does not have enough available " + - "space to support the cache quota specified.\n" + - "You currently have:\n" + - "- {used} of cache in use at {local_cache_path}\n" + - "- {available} of available system storage").format( - used=utils._pretty_size(cache_size), - local_cache_path=self.artifactdir, - available=utils._pretty_size(available_space))) - - # Place a slight headroom (2e9 (2GB) on the cache_quota) into - # cache_quota to try and avoid exceptions. - # - # Of course, we might still end up running out during a build - # if we end up writing more than 2G, but hey, this stuff is - # already really fuzzy. - # - self.cache_quota = cache_quota - headroom - self.cache_lower_threshold = self.cache_quota / 2 + self.config_cache_quota = _yaml.node_get(cache, str, 'quota', default_value='infinity') # Load artifact share configuration self.artifact_cache_specs = ArtifactCache.specs_from_config_node(defaults) diff --git a/buildstream/_frontend/app.py b/buildstream/_frontend/app.py index 1550fbcb3..1e357f123 100644 --- a/buildstream/_frontend/app.py +++ b/buildstream/_frontend/app.py @@ -198,8 +198,10 @@ class App(): option_value = self._main_options.get(cli_option) if option_value is not None: setattr(self.context, context_attr, option_value) - - Platform.create_instance(self.context) + try: + Platform.create_instance(self.context) + except BstError as e: + self._error_exit(e, "Error instantiating platform") # Create the logger right before setting the message handler self.logger = LogLine(self.context, diff --git a/buildstream/_scheduler/queues/buildqueue.py b/buildstream/_scheduler/queues/buildqueue.py index 5967fbf76..2009fce97 100644 --- a/buildstream/_scheduler/queues/buildqueue.py +++ b/buildstream/_scheduler/queues/buildqueue.py @@ -97,7 +97,7 @@ class BuildQueue(Queue): cache = element._get_artifact_cache() cache._add_artifact_size(artifact_size) - if cache.get_approximate_cache_size() > self._scheduler.context.cache_quota: + if cache.get_approximate_cache_size() > cache.cache_quota: self._scheduler._check_cache_size_real() def done(self, job, element, result, success): diff --git a/buildstream/_scheduler/scheduler.py b/buildstream/_scheduler/scheduler.py index 3d1d79b61..38d38be48 100644 --- a/buildstream/_scheduler/scheduler.py +++ b/buildstream/_scheduler/scheduler.py @@ -29,6 +29,7 @@ from contextlib import contextmanager # Local imports from .resources import Resources, ResourceType from .jobs import CacheSizeJob, CleanupJob +from .._platform import Platform # A decent return code for Scheduler.run() @@ -316,7 +317,8 @@ class Scheduler(): self._sched() def _run_cleanup(self, cache_size): - if cache_size and cache_size < self.context.cache_quota: + platform = Platform.get_platform() + if cache_size and cache_size < platform.artifactcache.cache_quota: return job = CleanupJob(self, 'cleanup', 'cleanup', diff --git a/tests/artifactcache/cache_size.py b/tests/artifactcache/cache_size.py new file mode 100644 index 000000000..0d12cda8c --- /dev/null +++ b/tests/artifactcache/cache_size.py @@ -0,0 +1,62 @@ +import os +import pytest + +from buildstream import _yaml +from buildstream._artifactcache import CACHE_SIZE_FILE + +from tests.testutils import cli, create_element_size + +# XXX: Currently lacking: +# * A way to check whether it's faster to read cache size on +# successive invocations. +# * A way to check whether the cache size file has been read. + + +def create_project(project_dir): + project_file = os.path.join(project_dir, "project.conf") + project_conf = { + "name": "test" + } + _yaml.dump(project_conf, project_file) + element_name = "test.bst" + create_element_size(element_name, project_dir, ".", [], 1024) + + +def test_cache_size_roundtrip(cli, tmpdir): + # Builds (to put files in the cache), then invokes buildstream again + # to check nothing breaks + + # Create project + project_dir = str(tmpdir) + create_project(project_dir) + + # Build, to populate the cache + res = cli.run(project=project_dir, args=["build", "test.bst"]) + res.assert_success() + + # Show, to check that nothing breaks while reading cache size + res = cli.run(project=project_dir, args=["show", "test.bst"]) + res.assert_success() + + +def test_cache_size_write(cli, tmpdir): + # Builds (to put files in the cache), then checks a number is + # written to the cache size file. + + project_dir = str(tmpdir) + create_project(project_dir) + + # Artifact cache must be in a known place + artifactdir = os.path.join(project_dir, "artifacts") + cli.configure({"artifactdir": artifactdir}) + + # Build, to populate the cache + res = cli.run(project=project_dir, args=["build", "test.bst"]) + res.assert_success() + + # Inspect the artifact cache + sizefile = os.path.join(artifactdir, CACHE_SIZE_FILE) + assert os.path.isfile(sizefile) + with open(sizefile, "r") as f: + size_data = f.read() + size = int(size_data) diff --git a/tests/testutils/artifactshare.py b/tests/testutils/artifactshare.py index 76b729e33..05e87a499 100644 --- a/tests/testutils/artifactshare.py +++ b/tests/testutils/artifactshare.py @@ -140,6 +140,7 @@ class ArtifactShare(): return statvfs_result(f_blocks=self.total_space, f_bfree=self.free_space - repo_size, + f_bavail=self.free_space - repo_size, f_bsize=1) @@ -156,4 +157,4 @@ def create_artifact_share(directory, *, total_space=None, free_space=None): share.close() -statvfs_result = namedtuple('statvfs_result', 'f_blocks f_bfree f_bsize') +statvfs_result = namedtuple('statvfs_result', 'f_blocks f_bfree f_bsize f_bavail') |