diff options
author | Valentin David <valentin.david@codethink.co.uk> | 2018-11-15 14:49:56 +0100 |
---|---|---|
committer | Valentin David <valentin.david@codethink.co.uk> | 2018-11-29 17:17:27 +0100 |
commit | d6b661e106c9a11edcc99dfe3a34bc50a74b9da0 (patch) | |
tree | 9ea20a894681d273718b81f62b8a5c7eba6c9e9b | |
parent | 2259dff16fdd4257edd3bc8ba3e4b0ca7b054d77 (diff) | |
download | buildstream-d6b661e106c9a11edcc99dfe3a34bc50a74b9da0.tar.gz |
Move cas server from ref-based to object-based garbage collection.
-rw-r--r-- | buildstream/_artifactcache/cascache.py | 35 | ||||
-rw-r--r-- | buildstream/_artifactcache/casserver.py | 14 |
2 files changed, 46 insertions, 3 deletions
diff --git a/buildstream/_artifactcache/cascache.py b/buildstream/_artifactcache/cascache.py index 905a6cc7d..837363d9d 100644 --- a/buildstream/_artifactcache/cascache.py +++ b/buildstream/_artifactcache/cascache.py @@ -500,6 +500,41 @@ class CASCache(ArtifactCache): # first element of this list will be the file modified earliest. return [ref for _, ref in sorted(zip(mtimes, refs))] + # list_objects(): + # + # List cached objects in Least Recently Modified (LRM) order. + # + # Returns: + # (list) - A list of objects and timestamps in LRM order + # + def list_objects(self): + objs = [] + mtimes = [] + + for root, _, files in os.walk(os.path.join(self.casdir, 'objects')): + for filename in files: + obj_path = os.path.join(root, filename) + try: + mtimes.append(os.path.getmtime(obj_path)) + except FileNotFoundError: + pass + else: + objs.append(obj_path) + + # NOTE: Sorted will sort from earliest to latest, thus the + # first element of this list will be the file modified earliest. + return sorted(zip(mtimes, objs)) + + def clean_up_refs_until(self, time): + ref_heads = os.path.join(self.casdir, 'refs', 'heads') + + for root, _, files in os.walk(ref_heads): + for filename in files: + ref_path = os.path.join(root, filename) + # Obtain the mtime (the time a file was last modified) + if os.path.getmtime(ref_path) < time: + os.unlink(ref_path) + # remove(): # # Removes the given symbolic ref from the repo. diff --git a/buildstream/_artifactcache/casserver.py b/buildstream/_artifactcache/casserver.py index df9c4740c..503705d9e 100644 --- a/buildstream/_artifactcache/casserver.py +++ b/buildstream/_artifactcache/casserver.py @@ -465,12 +465,13 @@ def _clean_up_cache(cas, object_size): return 0 # obtain a list of LRP artifacts - LRP_artifacts = cas.list_artifacts() + LRP_objects = cas.list_objects() removed_size = 0 # in bytes + last_mtime = 0 while object_size - removed_size > free_disk_space: try: - to_remove = LRP_artifacts.pop(0) # The first element in the list is the LRP artifact + last_mtime, to_remove = LRP_objects.pop(0) # The first element in the list is the LRP objects except IndexError: # This exception is caught if there are no more artifacts in the list # LRP_artifacts. This means the the artifact is too large for the filesystem @@ -479,7 +480,14 @@ def _clean_up_cache(cas, object_size): "the filesystem which mounts the remote " "cache".format(object_size)) - removed_size += cas.remove(to_remove, defer_prune=False) + try: + size = os.stat(to_remove).st_size + os.unlink(to_remove) + removed_size += size + except FileNotFoundError: + pass + + cas.clean_up_refs_until(last_mtime) if removed_size > 0: logging.info("Successfully removed {} bytes from the cache".format(removed_size)) |