diff options
author | Valentin David <valentin.david@codethink.co.uk> | 2018-11-15 14:49:56 +0100 |
---|---|---|
committer | Valentin David <valentin.david@codethink.co.uk> | 2018-11-28 15:29:52 +0100 |
commit | a64f667db8cc8123b7a77c9871143fbe8d008aaf (patch) | |
tree | 4d310cdce44ee8443f6d3103a515d1cc28719447 | |
parent | b587953a9b502d15ff26b9957796207b1fa95fcf (diff) | |
download | buildstream-a64f667db8cc8123b7a77c9871143fbe8d008aaf.tar.gz |
Move cas server from ref-based to object-based garbage collection.
-rw-r--r-- | buildstream/_artifactcache/cascache.py | 35 | ||||
-rw-r--r-- | buildstream/_artifactcache/casserver.py | 14 |
2 files changed, 46 insertions, 3 deletions
diff --git a/buildstream/_artifactcache/cascache.py b/buildstream/_artifactcache/cascache.py index 6b3bdb471..2ae36d22a 100644 --- a/buildstream/_artifactcache/cascache.py +++ b/buildstream/_artifactcache/cascache.py @@ -623,6 +623,41 @@ class CASCache(): # first ref of this list will be the file modified earliest. return [ref for _, ref in sorted(zip(mtimes, refs))] + # list_objects(): + # + # List cached objects in Least Recently Modified (LRM) order. + # + # Returns: + # (list) - A list of objects and timestamps in LRM order + # + def list_objects(self): + objs = [] + mtimes = [] + + for root, _, files in os.walk(os.path.join(self.casdir, 'objects')): + for filename in files: + obj_path = os.path.join(root, filename) + try: + mtimes.append(os.path.getmtime(obj_path)) + except FileNotFoundError: + pass + else: + objs.append(obj_path) + + # NOTE: Sorted will sort from earliest to latest, thus the + # first element of this list will be the file modified earliest. + return sorted(zip(mtimes, objs)) + + def clean_up_refs_until(self, time): + ref_heads = os.path.join(self.casdir, 'refs', 'heads') + + for root, _, files in os.walk(ref_heads): + for filename in files: + ref_path = os.path.join(root, filename) + # Obtain the mtime (the time a file was last modified) + if os.path.getmtime(ref_path) < time: + os.unlink(ref_path) + # remove(): # # Removes the given symbolic ref from the repo. diff --git a/buildstream/_artifactcache/casserver.py b/buildstream/_artifactcache/casserver.py index e2344f0f7..3a6481fb2 100644 --- a/buildstream/_artifactcache/casserver.py +++ b/buildstream/_artifactcache/casserver.py @@ -462,12 +462,13 @@ def _clean_up_cache(cas, object_size): return 0 # obtain a list of LRP artifacts - LRP_artifacts = cas.list_refs() + LRP_objects = cas.list_objects() removed_size = 0 # in bytes + last_mtime = 0 while object_size - removed_size > free_disk_space: try: - to_remove = LRP_artifacts.pop(0) # The first element in the list is the LRP artifact + last_mtime, to_remove = LRP_objects.pop(0) # The first element in the list is the LRP objects except IndexError: # This exception is caught if there are no more artifacts in the list # LRP_artifacts. This means the the artifact is too large for the filesystem @@ -476,7 +477,14 @@ def _clean_up_cache(cas, object_size): "the filesystem which mounts the remote " "cache".format(object_size)) - removed_size += cas.remove(to_remove, defer_prune=False) + try: + size = os.stat(to_remove).st_size + os.unlink(to_remove) + removed_size += size + except FileNotFoundError: + pass + + cas.clean_up_refs_until(last_mtime) if removed_size > 0: logging.info("Successfully removed {} bytes from the cache".format(removed_size)) |