summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorValentin David <valentin.david@codethink.co.uk>2018-11-15 14:49:56 +0100
committerValentin David <valentin.david@codethink.co.uk>2018-11-28 15:29:52 +0100
commita64f667db8cc8123b7a77c9871143fbe8d008aaf (patch)
tree4d310cdce44ee8443f6d3103a515d1cc28719447
parentb587953a9b502d15ff26b9957796207b1fa95fcf (diff)
downloadbuildstream-a64f667db8cc8123b7a77c9871143fbe8d008aaf.tar.gz
Move cas server from ref-based to object-based garbage collection.
-rw-r--r--buildstream/_artifactcache/cascache.py35
-rw-r--r--buildstream/_artifactcache/casserver.py14
2 files changed, 46 insertions, 3 deletions
diff --git a/buildstream/_artifactcache/cascache.py b/buildstream/_artifactcache/cascache.py
index 6b3bdb471..2ae36d22a 100644
--- a/buildstream/_artifactcache/cascache.py
+++ b/buildstream/_artifactcache/cascache.py
@@ -623,6 +623,41 @@ class CASCache():
# first ref of this list will be the file modified earliest.
return [ref for _, ref in sorted(zip(mtimes, refs))]
+ # list_objects():
+ #
+ # List cached objects in Least Recently Modified (LRM) order.
+ #
+ # Returns:
+ # (list) - A list of objects and timestamps in LRM order
+ #
+ def list_objects(self):
+ objs = []
+ mtimes = []
+
+ for root, _, files in os.walk(os.path.join(self.casdir, 'objects')):
+ for filename in files:
+ obj_path = os.path.join(root, filename)
+ try:
+ mtimes.append(os.path.getmtime(obj_path))
+ except FileNotFoundError:
+ pass
+ else:
+ objs.append(obj_path)
+
+ # NOTE: Sorted will sort from earliest to latest, thus the
+ # first element of this list will be the file modified earliest.
+ return sorted(zip(mtimes, objs))
+
+ def clean_up_refs_until(self, time):
+ ref_heads = os.path.join(self.casdir, 'refs', 'heads')
+
+ for root, _, files in os.walk(ref_heads):
+ for filename in files:
+ ref_path = os.path.join(root, filename)
+ # Obtain the mtime (the time a file was last modified)
+ if os.path.getmtime(ref_path) < time:
+ os.unlink(ref_path)
+
# remove():
#
# Removes the given symbolic ref from the repo.
diff --git a/buildstream/_artifactcache/casserver.py b/buildstream/_artifactcache/casserver.py
index e2344f0f7..3a6481fb2 100644
--- a/buildstream/_artifactcache/casserver.py
+++ b/buildstream/_artifactcache/casserver.py
@@ -462,12 +462,13 @@ def _clean_up_cache(cas, object_size):
return 0
# obtain a list of LRP artifacts
- LRP_artifacts = cas.list_refs()
+ LRP_objects = cas.list_objects()
removed_size = 0 # in bytes
+ last_mtime = 0
while object_size - removed_size > free_disk_space:
try:
- to_remove = LRP_artifacts.pop(0) # The first element in the list is the LRP artifact
+ last_mtime, to_remove = LRP_objects.pop(0) # The first element in the list is the LRP objects
except IndexError:
# This exception is caught if there are no more artifacts in the list
# LRP_artifacts. This means the the artifact is too large for the filesystem
@@ -476,7 +477,14 @@ def _clean_up_cache(cas, object_size):
"the filesystem which mounts the remote "
"cache".format(object_size))
- removed_size += cas.remove(to_remove, defer_prune=False)
+ try:
+ size = os.stat(to_remove).st_size
+ os.unlink(to_remove)
+ removed_size += size
+ except FileNotFoundError:
+ pass
+
+ cas.clean_up_refs_until(last_mtime)
if removed_size > 0:
logging.info("Successfully removed {} bytes from the cache".format(removed_size))