summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRaoul Hidalgo Charman <raoul.hidalgocharman@codethink.co.uk>2019-02-21 17:39:52 +0000
committerJürg Billeter <j@bitron.ch>2019-03-14 07:12:34 +0000
commitcbc5af53e7dc6b2d450728f6c17b9a79ff167990 (patch)
tree0b94e7418b3b48e3008e2adcaa0d0d61a018ab7d
parentf4dbce44a8b8135daca1895772048b2e25fc8268 (diff)
downloadbuildstream-cbc5af53e7dc6b2d450728f6c17b9a79ff167990.tar.gz
CASQuota: move clean method here
We need the clean method to be able to remove source refs as well. Part of #440
-rw-r--r--buildstream/_artifactcache.py117
-rw-r--r--buildstream/_cas/cascache.py136
-rw-r--r--buildstream/_scheduler/jobs/cleanupjob.py2
-rw-r--r--tests/artifactcache/expiry.py6
4 files changed, 147 insertions, 114 deletions
diff --git a/buildstream/_artifactcache.py b/buildstream/_artifactcache.py
index 3afa980d2..e04648bee 100644
--- a/buildstream/_artifactcache.py
+++ b/buildstream/_artifactcache.py
@@ -17,15 +17,11 @@
# Authors:
# Tristan Maat <tristan.maat@codethink.co.uk>
-import os
-
from ._basecache import BaseCache
from .types import _KeyStrength
from ._exceptions import ArtifactError, CASError
-from ._message import MessageType
-from . import utils
-from ._cas import CASRemoteSpec, CASCacheUsage
+from ._cas import CASRemoteSpec
from .storage._casbaseddirectory import CasBasedDirectory
@@ -61,6 +57,9 @@ class ArtifactCache(BaseCache):
self._required_elements = set() # The elements required for this session
+ self.casquota.add_ref_callbacks(self.required_artifacts())
+ self.casquota.add_remove_callbacks((lambda x: not x.startswith('sources/'), self.remove))
+
# mark_required_elements():
#
# Mark elements whose artifacts are required for the current run.
@@ -102,117 +101,15 @@ class ArtifactCache(BaseCache):
except CASError:
pass
- # clean():
- #
- # Clean the artifact cache as much as possible.
- #
- # Args:
- # progress (callable): A callback to call when a ref is removed
- #
- # Returns:
- # (int): The size of the cache after having cleaned up
- #
- def clean(self, progress=None):
- artifacts = self.list_artifacts()
- context = self.context
-
- # Some accumulative statistics
- removed_ref_count = 0
- space_saved = 0
-
- # Start off with an announcement with as much info as possible
- volume_size, volume_avail = self.casquota._get_cache_volume_size()
- self._message(MessageType.STATUS, "Starting cache cleanup",
- detail=("Elements required by the current build plan: {}\n" +
- "User specified quota: {} ({})\n" +
- "Cache usage: {}\n" +
- "Cache volume: {} total, {} available")
- .format(len(self._required_elements),
- context.config_cache_quota,
- utils._pretty_size(self.casquota._cache_quota, dec_places=2),
- utils._pretty_size(self.casquota.get_cache_size(), dec_places=2),
- utils._pretty_size(volume_size, dec_places=2),
- utils._pretty_size(volume_avail, dec_places=2)))
-
+ def required_artifacts(self):
# Build a set of the cache keys which are required
# based on the required elements at cleanup time
#
# We lock both strong and weak keys - deleting one but not the
# other won't save space, but would be a user inconvenience.
- required_artifacts = set()
for element in self._required_elements:
- required_artifacts.update([
- element._get_cache_key(strength=_KeyStrength.STRONG),
- element._get_cache_key(strength=_KeyStrength.WEAK)
- ])
-
- # Do a real computation of the cache size once, just in case
- self.casquota.compute_cache_size()
- usage = CASCacheUsage(self.casquota)
- self._message(MessageType.STATUS, "Cache usage recomputed: {}".format(usage))
-
- while self.casquota.get_cache_size() >= self.casquota._cache_lower_threshold:
- try:
- to_remove = artifacts.pop(0)
- except IndexError:
- # If too many artifacts are required, and we therefore
- # can't remove them, we have to abort the build.
- #
- # FIXME: Asking the user what to do may be neater
- #
- default_conf = os.path.join(os.environ['XDG_CONFIG_HOME'],
- 'buildstream.conf')
- detail = ("Aborted after removing {} refs and saving {} disk space.\n"
- "The remaining {} in the cache is required by the {} elements in your build plan\n\n"
- "There is not enough space to complete the build.\n"
- "Please increase the cache-quota in {} and/or make more disk space."
- .format(removed_ref_count,
- utils._pretty_size(space_saved, dec_places=2),
- utils._pretty_size(self.casquota.get_cache_size(), dec_places=2),
- len(self._required_elements),
- (context.config_origin or default_conf)))
-
- if self.full():
- raise ArtifactError("Cache too full. Aborting.",
- detail=detail,
- reason="cache-too-full")
- else:
- break
-
- key = to_remove.rpartition('/')[2]
- if key not in required_artifacts:
-
- # Remove the actual artifact, if it's not required.
- size = self.remove(to_remove)
-
- removed_ref_count += 1
- space_saved += size
-
- self._message(MessageType.STATUS,
- "Freed {: <7} {}".format(
- utils._pretty_size(size, dec_places=2),
- to_remove))
-
- # Remove the size from the removed size
- self.casquota.set_cache_size(self.casquota._cache_size - size)
-
- # User callback
- #
- # Currently this process is fairly slow, but we should
- # think about throttling this progress() callback if this
- # becomes too intense.
- if progress:
- progress()
-
- # Informational message about the side effects of the cleanup
- self._message(MessageType.INFO, "Cleanup completed",
- detail=("Removed {} refs and saving {} disk space.\n" +
- "Cache usage is now: {}")
- .format(removed_ref_count,
- utils._pretty_size(space_saved, dec_places=2),
- utils._pretty_size(self.casquota.get_cache_size(), dec_places=2)))
-
- return self.casquota.get_cache_size()
+ yield element._get_cache_key(strength=_KeyStrength.STRONG)
+ yield element._get_cache_key(strength=_KeyStrength.WEAK)
def full(self):
return self.casquota.full()
diff --git a/buildstream/_cas/cascache.py b/buildstream/_cas/cascache.py
index 02030bb68..bb65269ad 100644
--- a/buildstream/_cas/cascache.py
+++ b/buildstream/_cas/cascache.py
@@ -1041,6 +1041,7 @@ class CASCache():
class CASQuota:
def __init__(self, context):
+ self.context = context
self.cas = context.get_cascache()
self.casdir = self.cas.casdir
self._config_cache_quota = context.config_cache_quota
@@ -1054,6 +1055,9 @@ class CASQuota:
self._message = context.message
+ self._ref_callbacks = [] # Call backs to get required refs
+ self._remove_callbacks = [] # Call backs to remove refs
+
self._calculate_cache_quota()
# compute_cache_size()
@@ -1283,6 +1287,138 @@ class CASQuota:
self._cache_quota = cache_quota - self._cache_quota_headroom
self._cache_lower_threshold = self._cache_quota / 2
+ # clean():
+ #
+ # Clean the artifact cache as much as possible.
+ #
+ # Args:
+ # progress (callable): A callback to call when a ref is removed
+ #
+ # Returns:
+ # (int): The size of the cache after having cleaned up
+ #
+ def clean(self, progress=None):
+ context = self.context
+
+ # Some accumulative statistics
+ removed_ref_count = 0
+ space_saved = 0
+
+ # get required refs
+ refs = self.cas.list_refs()
+ required_refs = set(itertools.chain.from_iterable(self._ref_callbacks))
+
+ # Start off with an announcement with as much info as possible
+ volume_size, volume_avail = self._get_cache_volume_size()
+ self._message(Message(
+ None, MessageType.STATUS, "Starting cache cleanup",
+ detail=("Elements required by the current build plan: {}\n" +
+ "User specified quota: {} ({})\n" +
+ "Cache usage: {}\n" +
+ "Cache volume: {} total, {} available")
+ .format(len(required_refs),
+ context.config_cache_quota,
+ utils._pretty_size(self._cache_quota, dec_places=2),
+ utils._pretty_size(self.get_cache_size(), dec_places=2),
+ utils._pretty_size(volume_size, dec_places=2),
+ utils._pretty_size(volume_avail, dec_places=2))))
+
+ # Do a real computation of the cache size once, just in case
+ self.compute_cache_size()
+ usage = CASCacheUsage(self)
+ self._message(Message(None, MessageType.STATUS,
+ "Cache usage recomputed: {}".format(usage)))
+
+ while self.get_cache_size() >= self._cache_lower_threshold:
+ try:
+ to_remove = refs.pop(0)
+ except IndexError:
+ # If too many artifacts are required, and we therefore
+ # can't remove them, we have to abort the build.
+ #
+ # FIXME: Asking the user what to do may be neater
+ #
+ default_conf = os.path.join(os.environ['XDG_CONFIG_HOME'],
+ 'buildstream.conf')
+ detail = ("Aborted after removing {} refs and saving {} disk space.\n"
+ "The remaining {} in the cache is required by the {} elements in your build plan\n\n"
+ "There is not enough space to complete the build.\n"
+ "Please increase the cache-quota in {} and/or make more disk space."
+ .format(removed_ref_count,
+ utils._pretty_size(space_saved, dec_places=2),
+ utils._pretty_size(self.get_cache_size(), dec_places=2),
+ len(required_refs),
+ (context.config_origin or default_conf)))
+
+ if self.full():
+ raise CASCacheError("Cache too full. Aborting.",
+ detail=detail,
+ reason="cache-too-full")
+ else:
+ break
+
+ key = to_remove.rpartition('/')[2]
+ if key not in required_refs:
+
+ # Remove the actual artifact, if it's not required.
+ size = 0
+ removed_ref = False
+ for (pred, remove) in self._remove_callbacks:
+ if pred(to_remove):
+ size = remove(to_remove)
+ removed_ref = True
+ break
+
+ if not removed_ref:
+ continue
+
+ removed_ref_count += 1
+ space_saved += size
+
+ self._message(Message(
+ None, MessageType.STATUS,
+ "Freed {: <7} {}".format(
+ utils._pretty_size(size, dec_places=2),
+ to_remove)))
+
+ # Remove the size from the removed size
+ self.set_cache_size(self._cache_size - size)
+
+ # User callback
+ #
+ # Currently this process is fairly slow, but we should
+ # think about throttling this progress() callback if this
+ # becomes too intense.
+ if progress:
+ progress()
+
+ # Informational message about the side effects of the cleanup
+ self._message(Message(
+ None, MessageType.INFO, "Cleanup completed",
+ detail=("Removed {} refs and saving {} disk space.\n" +
+ "Cache usage is now: {}")
+ .format(removed_ref_count,
+ utils._pretty_size(space_saved, dec_places=2),
+ utils._pretty_size(self.get_cache_size(), dec_places=2))))
+
+ return self.get_cache_size()
+
+ # add_ref_callbacks()
+ #
+ # Args:
+ # callback (Iterator): function that gives list of required refs
+ def add_ref_callbacks(self, callback):
+ self._ref_callbacks.append(callback)
+
+ # add_remove_callbacks()
+ #
+ # Args:
+ # callback (predicate, callback): The predicate says whether this is the
+ # correct type to remove given a ref and the callback does actual
+ # removing.
+ def add_remove_callbacks(self, callback):
+ self._remove_callbacks.append(callback)
+
def _grouper(iterable, n):
while True:
diff --git a/buildstream/_scheduler/jobs/cleanupjob.py b/buildstream/_scheduler/jobs/cleanupjob.py
index e016d4cd7..9610d53f8 100644
--- a/buildstream/_scheduler/jobs/cleanupjob.py
+++ b/buildstream/_scheduler/jobs/cleanupjob.py
@@ -32,7 +32,7 @@ class CleanupJob(Job):
def progress():
self.send_message('update-cache-size',
self._casquota.get_cache_size())
- return self._artifacts.clean(progress)
+ return self._casquota.clean(progress)
def handle_message(self, message_type, message):
# Update the cache size in the main process as we go,
diff --git a/tests/artifactcache/expiry.py b/tests/artifactcache/expiry.py
index 20e8cbda2..23dc61d0f 100644
--- a/tests/artifactcache/expiry.py
+++ b/tests/artifactcache/expiry.py
@@ -94,7 +94,7 @@ def test_artifact_too_large(cli, datafiles, size):
create_element_size('target.bst', project, element_path, [], size)
res = cli.run(project=project, args=['build', 'target.bst'])
res.assert_main_error(ErrorDomain.STREAM, None)
- res.assert_task_error(ErrorDomain.ARTIFACT, 'cache-too-full')
+ res.assert_task_error(ErrorDomain.CAS, 'cache-too-full')
@pytest.mark.datafiles(DATA_DIR)
@@ -224,7 +224,7 @@ def test_never_delete_required(cli, datafiles):
# cache. Since all elements are required, the build should fail.
res = cli.run(project=project, args=['build', 'target.bst'])
res.assert_main_error(ErrorDomain.STREAM, None)
- res.assert_task_error(ErrorDomain.ARTIFACT, 'cache-too-full')
+ res.assert_task_error(ErrorDomain.CAS, 'cache-too-full')
# Only the first artifact fits in the cache, but we expect
# that the first *two* artifacts will be cached.
@@ -295,7 +295,7 @@ def test_never_delete_required_track(cli, datafiles):
#
res = cli.run(project=project, args=['build', '--track-all', 'target.bst'])
res.assert_main_error(ErrorDomain.STREAM, None)
- res.assert_task_error(ErrorDomain.ARTIFACT, 'cache-too-full')
+ res.assert_task_error(ErrorDomain.CAS, 'cache-too-full')
# Expect the same result that we did in test_never_delete_required()
#