summaryrefslogtreecommitdiff
path: root/buildstream/_cas/cascache.py
diff options
context:
space:
mode:
Diffstat (limited to 'buildstream/_cas/cascache.py')
-rw-r--r--buildstream/_cas/cascache.py290
1 files changed, 279 insertions, 11 deletions
diff --git a/buildstream/_cas/cascache.py b/buildstream/_cas/cascache.py
index 792bf3eb9..fe25efce6 100644
--- a/buildstream/_cas/cascache.py
+++ b/buildstream/_cas/cascache.py
@@ -32,17 +32,53 @@ from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
from .._protos.buildstream.v2 import buildstream_pb2
from .. import utils
-from .._exceptions import CASCacheError
+from .._exceptions import CASCacheError, LoadError, LoadErrorReason
+from .._message import Message, MessageType
from .casremote import BlobNotFound, _CASBatchRead, _CASBatchUpdate
_BUFFER_SIZE = 65536
+CACHE_SIZE_FILE = "cache_size"
+
+
+# CASCacheUsage
+#
+# A simple object to report the current CAS cache usage details.
+#
+# Note that this uses the user configured cache quota
+# rather than the internal quota with protective headroom
+# removed, to provide a more sensible value to display to
+# the user.
+#
+# Args:
+# cas (CASQuota): The CAS cache to get the status of
+#
+class CASCacheUsage():
+
+ def __init__(self, casquota):
+ self.quota_config = casquota._config_cache_quota # Configured quota
+ self.quota_size = casquota._cache_quota_original # Resolved cache quota in bytes
+ self.used_size = casquota.get_cache_size() # Size used by artifacts in bytes
+ self.used_percent = 0 # Percentage of the quota used
+ if self.quota_size is not None:
+ self.used_percent = int(self.used_size * 100 / self.quota_size)
+
+ # Formattable into a human readable string
+ #
+ def __str__(self):
+ return "{} / {} ({}%)" \
+ .format(utils._pretty_size(self.used_size, dec_places=1),
+ self.quota_config,
+ self.used_percent)
+
+
# A CASCache manages a CAS repository as specified in the Remote Execution API.
#
# Args:
# path (str): The root directory for the CAS repository
+# cache_quota (int): User configured cache quota
#
class CASCache():
@@ -459,16 +495,6 @@ class CASCache():
except FileNotFoundError as e:
raise CASCacheError("Attempt to access unavailable ref: {}".format(e)) from e
- # calculate_cache_size()
- #
- # Return the real disk usage of the CAS cache.
- #
- # Returns:
- # (int): The size of the cache.
- #
- def calculate_cache_size(self):
- return utils._get_dir_size(self.casdir)
-
# list_refs():
#
# List refs in Least Recently Modified (LRM) order.
@@ -1043,6 +1069,248 @@ class CASCache():
batch.send()
+class CASQuota:
+ def __init__(self, context):
+ self.cas = context.get_cascache()
+ self.casdir = self.cas.casdir
+ self._config_cache_quota = context.config_cache_quota
+ self._config_cache_quota_string = context.config_cache_quota_string
+ self._cache_size = None # The current cache size, sometimes it's an estimate
+ self._cache_quota = None # The cache quota
+ self._cache_quota_original = None # The cache quota as specified by the user, in bytes
+ self._cache_quota_headroom = None # The headroom in bytes before reaching the quota or full disk
+ self._cache_lower_threshold = None # The target cache size for a cleanup
+ self.available_space = None
+
+ self._message = context.message
+
+ self._calculate_cache_quota()
+
+ # compute_cache_size()
+ #
+ # Computes the real artifact cache size by calling
+ # the abstract calculate_cache_size() method.
+ #
+ # Returns:
+ # (int): The size of the artifact cache.
+ #
+ def compute_cache_size(self):
+ old_cache_size = self._cache_size
+ new_cache_size = self.calculate_cache_size()
+
+ if old_cache_size != new_cache_size:
+ self._cache_size = new_cache_size
+
+ return self._cache_size
+
+ # calculate_cache_size()
+ #
+ # Return the real disk usage of the CAS cache.
+ #
+ # Returns:
+ # (int): The size of the cache.
+ #
+ def calculate_cache_size(self):
+ return utils._get_dir_size(self.casdir)
+
+ # get_cache_size()
+ #
+ # Fetches the cached size of the cache, this is sometimes
+ # an estimate and periodically adjusted to the real size
+ # when a cache size calculation job runs.
+ #
+ # When it is an estimate, the value is either correct, or
+ # it is greater than the actual cache size.
+ #
+ # Returns:
+ # (int) An approximation of the artifact cache size, in bytes.
+ #
+ def get_cache_size(self):
+
+ # If we don't currently have an estimate, figure out the real cache size.
+ if self._cache_size is None:
+ stored_size = self._read_cache_size()
+ if stored_size is not None:
+ self._cache_size = stored_size
+ else:
+ self._cache_size = self.compute_cache_size()
+
+ return self._cache_size
+
+ # set_cache_size()
+ #
+ # Forcefully set the overall cache size.
+ #
+ # This is used to update the size in the main process after
+ # having calculated in a cleanup or a cache size calculation job.
+ #
+ # Args:
+ # cache_size (int): The size to set.
+ #
+ def set_cache_size(self, cache_size):
+
+ assert cache_size is not None
+
+ self._cache_size = cache_size
+ self._write_cache_size(self._cache_size)
+
+ # full()
+ #
+ # Checks if the artifact cache is full, either
+ # because the user configured quota has been exceeded
+ # or because the underlying disk is almost full.
+ #
+ # Returns:
+ # (bool): True if the artifact cache is full
+ #
+ def full(self):
+
+ if self.get_cache_size() > self._cache_quota:
+ return True
+
+ _, volume_avail = self._get_cache_volume_size()
+ if volume_avail < self._cache_quota_headroom:
+ return True
+
+ return False
+
+ ################################################
+ # Local Private Methods #
+ ################################################
+
+ # _read_cache_size()
+ #
+ # Reads and returns the size of the artifact cache that's stored in the
+ # cache's size file
+ #
+ # Returns:
+ # (int): The size of the artifact cache, as recorded in the file
+ #
+ def _read_cache_size(self):
+ size_file_path = os.path.join(self.casdir, CACHE_SIZE_FILE)
+
+ if not os.path.exists(size_file_path):
+ return None
+
+ with open(size_file_path, "r") as f:
+ size = f.read()
+
+ try:
+ num_size = int(size)
+ except ValueError as e:
+ raise CASCacheError("Size '{}' parsed from '{}' was not an integer".format(
+ size, size_file_path)) from e
+
+ return num_size
+
+ # _write_cache_size()
+ #
+ # Writes the given size of the artifact to the cache's size file
+ #
+ # Args:
+ # size (int): The size of the artifact cache to record
+ #
+ def _write_cache_size(self, size):
+ assert isinstance(size, int)
+ size_file_path = os.path.join(self.casdir, CACHE_SIZE_FILE)
+ with utils.save_file_atomic(size_file_path, "w") as f:
+ f.write(str(size))
+
+ # _get_cache_volume_size()
+ #
+ # Get the available space and total space for the volume on
+ # which the artifact cache is located.
+ #
+ # Returns:
+ # (int): The total number of bytes on the volume
+ # (int): The number of available bytes on the volume
+ #
+ # NOTE: We use this stub to allow the test cases
+ # to override what an artifact cache thinks
+ # about it's disk size and available bytes.
+ #
+ def _get_cache_volume_size(self):
+ return utils._get_volume_size(self.casdir)
+
+ # _calculate_cache_quota()
+ #
+ # Calculates and sets the cache quota and lower threshold based on the
+ # quota set in Context.
+ # It checks that the quota is both a valid expression, and that there is
+ # enough disk space to satisfy that quota
+ #
+ def _calculate_cache_quota(self):
+ # Headroom intended to give BuildStream a bit of leeway.
+ # This acts as the minimum size of cache_quota and also
+ # is taken from the user requested cache_quota.
+ #
+ if 'BST_TEST_SUITE' in os.environ:
+ self._cache_quota_headroom = 0
+ else:
+ self._cache_quota_headroom = 2e9
+
+ total_size, available_space = self._get_cache_volume_size()
+ cache_size = self.get_cache_size()
+ self.available_space = available_space
+
+ # Ensure system has enough storage for the cache_quota
+ #
+ # If cache_quota is none, set it to the maximum it could possibly be.
+ #
+ # Also check that cache_quota is at least as large as our headroom.
+ #
+ cache_quota = self._config_cache_quota
+ if cache_quota is None: # Infinity, set to max system storage
+ cache_quota = cache_size + available_space
+ if cache_quota < self._cache_quota_headroom: # Check minimum
+ raise LoadError(LoadErrorReason.INVALID_DATA,
+ "Invalid cache quota ({}): ".format(utils._pretty_size(cache_quota)) +
+ "BuildStream requires a minimum cache quota of 2G.")
+ elif cache_quota > total_size:
+ # A quota greater than the total disk size is certianly an error
+ raise CASCacheError("Your system does not have enough available " +
+ "space to support the cache quota specified.",
+ detail=("You have specified a quota of {quota} total disk space.\n" +
+ "The filesystem containing {local_cache_path} only " +
+ "has {total_size} total disk space.")
+ .format(
+ quota=self._config_cache_quota,
+ local_cache_path=self.casdir,
+ total_size=utils._pretty_size(total_size)),
+ reason='insufficient-storage-for-quota')
+
+ elif cache_quota > cache_size + available_space:
+ # The quota does not fit in the available space, this is a warning
+ if '%' in self._config_cache_quota_string:
+ available = (available_space / total_size) * 100
+ available = '{}% of total disk space'.format(round(available, 1))
+ else:
+ available = utils._pretty_size(available_space)
+
+ self._message(Message(
+ None,
+ MessageType.WARN,
+ "Your system does not have enough available " +
+ "space to support the cache quota specified.",
+ detail=("You have specified a quota of {quota} total disk space.\n" +
+ "The filesystem containing {local_cache_path} only " +
+ "has {available_size} available.")
+ .format(quota=self._config_cache_quota,
+ local_cache_path=self.casdir,
+ available_size=available)))
+
+ # Place a slight headroom (2e9 (2GB) on the cache_quota) into
+ # cache_quota to try and avoid exceptions.
+ #
+ # Of course, we might still end up running out during a build
+ # if we end up writing more than 2G, but hey, this stuff is
+ # already really fuzzy.
+ #
+ self._cache_quota_original = cache_quota
+ self._cache_quota = cache_quota - self._cache_quota_headroom
+ self._cache_lower_threshold = self._cache_quota / 2
+
+
def _grouper(iterable, n):
while True:
try: