summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTristan Maat <tristan.maat@codethink.co.uk>2018-07-16 18:00:41 +0100
committerTristan Van Berkom <tristan.vanberkom@codethink.co.uk>2018-07-18 14:45:59 +0900
commit339844487ef9ea6c897254bfe8f35a6648b28a5e (patch)
tree9aca6afbb82de4d4d8519a4adb45ed1c7c8b4cb5
parent249256346613dea52ad32d945dbd303f54bd245e (diff)
downloadbuildstream-339844487ef9ea6c897254bfe8f35a6648b28a5e.tar.gz
Add cache_quota to user config
-rw-r--r--buildstream/_context.py55
-rw-r--r--buildstream/data/userconfig.yaml8
-rw-r--r--buildstream/utils.py70
3 files changed, 133 insertions, 0 deletions
diff --git a/buildstream/_context.py b/buildstream/_context.py
index 1a59af2b9..5cc7f434c 100644
--- a/buildstream/_context.py
+++ b/buildstream/_context.py
@@ -21,6 +21,7 @@ import os
import datetime
from collections import deque, Mapping
from contextlib import contextmanager
+from . import utils
from . import _cachekey
from . import _signals
from . import _site
@@ -62,6 +63,12 @@ class Context():
# The locations from which to push and pull prebuilt artifacts
self.artifact_cache_specs = []
+ # The artifact cache quota
+ self.cache_quota = None
+
+ # The lower threshold to which we aim to reduce the cache size
+ self.cache_lower_threshold = None
+
# The directory to store build logs
self.logdir = None
@@ -153,6 +160,7 @@ class Context():
_yaml.node_validate(defaults, [
'sourcedir', 'builddir', 'artifactdir', 'logdir',
'scheduler', 'artifacts', 'logging', 'projects',
+ 'cache'
])
for directory in ['sourcedir', 'builddir', 'artifactdir', 'logdir']:
@@ -165,6 +173,53 @@ class Context():
path = os.path.normpath(path)
setattr(self, directory, path)
+ # Load quota configuration
+ # We need to find the first existing directory in the path of
+ # our artifactdir - the artifactdir may not have been created
+ # yet.
+ cache = _yaml.node_get(defaults, Mapping, 'cache')
+ _yaml.node_validate(cache, ['quota'])
+
+ artifactdir_volume = self.artifactdir
+ while not os.path.exists(artifactdir_volume):
+ artifactdir_volume = os.path.dirname(artifactdir_volume)
+
+ # We read and parse the cache quota as specified by the user
+ cache_quota = _yaml.node_get(cache, str, 'quota', default_value='infinity')
+ try:
+ cache_quota = utils._parse_size(cache_quota, artifactdir_volume)
+ except utils.UtilError as e:
+ raise LoadError(LoadErrorReason.INVALID_DATA,
+ "{}\nPlease specify the value in bytes or as a % of full disk space.\n"
+ "\nValid values are, for example: 800M 10G 1T 50%\n"
+ .format(str(e))) from e
+
+ # If we are asked not to set a quota, we set it to the maximum
+ # disk space available minus a headroom of 2GB, such that we
+ # at least try to avoid raising Exceptions.
+ #
+ # Of course, we might still end up running out during a build
+ # if we end up writing more than 2G, but hey, this stuff is
+ # already really fuzzy.
+ #
+ if cache_quota is None:
+ stat = os.statvfs(artifactdir_volume)
+ # Again, the artifact directory may not yet have been
+ # created
+ if not os.path.exists(self.artifactdir):
+ cache_size = 0
+ else:
+ cache_size = utils._get_dir_size(self.artifactdir)
+ cache_quota = cache_size + stat.f_bsize * stat.f_bavail
+
+ if 'BST_TEST_SUITE' in os.environ:
+ headroom = 0
+ else:
+ headroom = 2e9
+
+ self.cache_quota = cache_quota - headroom
+ self.cache_lower_threshold = self.cache_quota / 2
+
# Load artifact share configuration
self.artifact_cache_specs = ArtifactCache.specs_from_config_node(defaults)
diff --git a/buildstream/data/userconfig.yaml b/buildstream/data/userconfig.yaml
index 6bb54ff9e..6f9f190a1 100644
--- a/buildstream/data/userconfig.yaml
+++ b/buildstream/data/userconfig.yaml
@@ -23,6 +23,14 @@ artifactdir: ${XDG_CACHE_HOME}/buildstream/artifacts
logdir: ${XDG_CACHE_HOME}/buildstream/logs
#
+# Cache
+#
+cache:
+ # Size of the artifact cache - BuildStream will attempt to keep the
+ # artifact cache within this size.
+ quota: infinity
+
+#
# Scheduler
#
scheduler:
diff --git a/buildstream/utils.py b/buildstream/utils.py
index 0bbc7a877..e8270d82f 100644
--- a/buildstream/utils.py
+++ b/buildstream/utils.py
@@ -538,6 +538,76 @@ def save_file_atomic(filename, mode='w', *, buffering=-1, encoding=None,
raise
+# _get_dir_size():
+#
+# Get the disk usage of a given directory in bytes.
+#
+# Arguments:
+# (str) The path whose size to check.
+#
+# Returns:
+# (int) The size on disk in bytes.
+#
+def _get_dir_size(path):
+ path = os.path.abspath(path)
+
+ def get_size(path):
+ total = 0
+
+ for f in os.scandir(path):
+ total += f.stat(follow_symlinks=False).st_size
+
+ if f.is_dir(follow_symlinks=False):
+ total += get_size(f.path)
+
+ return total
+
+ return get_size(path)
+
+
+# _parse_size():
+#
+# Convert a string representing data size to a number of
+# bytes. E.g. "2K" -> 2048.
+#
+# This uses the same format as systemd's
+# [resource-control](https://www.freedesktop.org/software/systemd/man/systemd.resource-control.html#).
+#
+# Arguments:
+# size (str) The string to parse
+# volume (str) A path on the volume to consider for percentage
+# specifications
+#
+# Returns:
+# (int|None) The number of bytes, or None if 'infinity' was specified.
+#
+# Raises:
+# UtilError if the string is not a valid data size.
+#
+def _parse_size(size, volume):
+ if size == 'infinity':
+ return None
+
+ matches = re.fullmatch(r'([0-9]+\.?[0-9]*)([KMGT%]?)', size)
+ if matches is None:
+ raise UtilError("{} is not a valid data size.".format(size))
+
+ num, unit = matches.groups()
+
+ if unit == '%':
+ num = float(num)
+ if num > 100:
+ raise UtilError("{}% is not a valid percentage value.".format(num))
+
+ stat_ = os.statvfs(volume)
+ disk_size = stat_.f_blocks * stat_.f_bsize
+
+ return disk_size * (num / 100)
+
+ units = ('', 'K', 'M', 'G', 'T')
+ return int(num) * 1024**units.index(unit)
+
+
# A sentinel to be used as a default argument for functions that need
# to distinguish between a kwarg set to None and an unset kwarg.
_sentinel = object()