summaryrefslogtreecommitdiff
path: root/morphlib
diff options
context:
space:
mode:
authorRichard Maw <richard.maw@codethink.co.uk>2013-06-09 18:35:40 +0000
committerRichard Maw <richard.maw@codethink.co.uk>2013-06-09 18:35:40 +0000
commit3b02eff30db5591ac43de6cd4638585ecc0cb5e2 (patch)
tree13769c9a3121d850d36f20b68548d32045baff1c /morphlib
parent174d5d734db09171342f60f4777a0cbe59a29df1 (diff)
parentf461727eeb43bdf1e9caf385997fd9fc860d8e1a (diff)
downloadmorph-3b02eff30db5591ac43de6cd4638585ecc0cb5e2.tar.gz
Merge branch 'baserock/richardmaw/S7905-morph-gc-v3' of git://git.baserock.org/baserock/baserock/morph
There were no major objections to the design and it has been sitting in the merge queue long enough that it was decided to merge it. It has been reviewed and lightly tested by Jonathan Maw.
Diffstat (limited to 'morphlib')
-rw-r--r--morphlib/localartifactcache.py28
-rw-r--r--morphlib/localartifactcache_tests.py36
-rw-r--r--morphlib/plugins/gc_plugin.py158
-rw-r--r--morphlib/util.py33
4 files changed, 247 insertions, 8 deletions
diff --git a/morphlib/localartifactcache.py b/morphlib/localartifactcache.py
index b845cebf..76d085d1 100644
--- a/morphlib/localartifactcache.py
+++ b/morphlib/localartifactcache.py
@@ -14,6 +14,7 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+import collections
import os
import morphlib
@@ -115,3 +116,30 @@ class LocalArtifactCache(object):
for basename in basenames:
os.remove(os.path.join(dirname, basename))
+ def list_contents(self):
+ '''Return the set of sources cached and related information.
+
+ returns a [(cache_key, set(artifacts), last_used)]
+
+ '''
+ CacheInfo = collections.namedtuple('CacheInfo', ('artifacts', 'mtime'))
+ contents = collections.defaultdict(lambda: CacheInfo(set(), 0))
+ for dirpath, dirnames, filenames in os.walk(self.cachedir):
+ for filename in filenames:
+ cachekey = filename[:63]
+ artifact = filename[65:]
+ artifacts, max_mtime = contents[cachekey]
+ artifacts.add(artifact)
+ this_mtime = os.stat(os.path.join(dirpath, filename)).st_mtime
+ contents[cachekey] = CacheInfo(artifacts,
+ max(max_mtime, this_mtime))
+ return ((cache_key, info.artifacts, info.mtime)
+ for cache_key, info in contents.iteritems())
+
+
+ def remove(self, cachekey):
+ '''Remove all artifacts associated with the given cachekey.'''
+ for dirpath, dirnames, filenames in os.walk(self.cachedir):
+ for filename in filenames:
+ if filename.startswith(cachekey):
+ os.remove(os.path.join(dirpath, filename))
diff --git a/morphlib/localartifactcache_tests.py b/morphlib/localartifactcache_tests.py
index 36b5e891..082b926a 100644
--- a/morphlib/localartifactcache_tests.py
+++ b/morphlib/localartifactcache_tests.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2012 Codethink Limited
+# Copyright (C) 2012,2013 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -47,8 +47,10 @@ class LocalArtifactCacheTests(unittest.TestCase):
'repo', 'ref', 'sha1', 'tree', morph, 'chunk.morph')
self.runtime_artifact = morphlib.artifact.Artifact(
self.source, 'chunk-runtime')
+ self.runtime_artifact.cache_key = '0'*64
self.devel_artifact = morphlib.artifact.Artifact(
self.source, 'chunk-devel')
+ self.devel_artifact.cache_key = '0'*64
def tearDown(self):
self.tempdir.remove()
@@ -155,3 +157,35 @@ class LocalArtifactCacheTests(unittest.TestCase):
cache.clear()
self.assertFalse(cache.has(self.runtime_artifact))
+ def test_put_artifacts_and_list_them_afterwards(self):
+ cache = morphlib.localartifactcache.LocalArtifactCache(
+ self.tempdir.dirname)
+
+ handle = cache.put(self.runtime_artifact)
+ handle.write('runtime')
+ handle.close()
+
+ self.assertTrue(len(list(cache.list_contents())) == 1)
+
+ handle = cache.put(self.devel_artifact)
+ handle.write('devel')
+ handle.close()
+
+ self.assertTrue(len(list(cache.list_contents())) == 1)
+
+ def test_put_artifacts_and_remove_them_afterwards(self):
+ cache = morphlib.localartifactcache.LocalArtifactCache(
+ self.tempdir.dirname)
+
+ handle = cache.put(self.runtime_artifact)
+ handle.write('runtime')
+ handle.close()
+
+ handle = cache.put(self.devel_artifact)
+ handle.write('devel')
+ handle.close()
+
+ key = list(cache.list_contents())[0][0]
+ cache.remove(key)
+
+ self.assertTrue(len(list(cache.list_contents())) == 0)
diff --git a/morphlib/plugins/gc_plugin.py b/morphlib/plugins/gc_plugin.py
new file mode 100644
index 00000000..cc82cae9
--- /dev/null
+++ b/morphlib/plugins/gc_plugin.py
@@ -0,0 +1,158 @@
+# Copyright (C) 2013 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import os
+import shutil
+import time
+
+import cliapp
+
+import morphlib
+
+
+class GCPlugin(cliapp.Plugin):
+
+ def enable(self):
+ self.app.add_subcommand('gc', self.gc,
+ arg_synopsis='')
+ self.app.settings.integer(['cachedir-artifact-delete-older-than'],
+ 'always delete artifacts older than this '
+ 'period in seconds, (default: 1 week)',
+ metavar='PERIOD',
+ group="Storage Options",
+ default=(60*60*24*7))
+ self.app.settings.integer(['cachedir-artifact-keep-younger-than'],
+ 'allow deletion of artifacts older than '
+ 'this period in seconds, (default: 1 day)',
+ metavar='PERIOD',
+ group="Storage Options",
+ default=(60*60*24))
+
+ def disable(self):
+ pass
+
+ def gc(self, args):
+ '''Make space by removing unused files.
+
+ This removes all artifacts older than
+ --cachedir-artifact-delete-older-than, and may delete artifacts
+ older than --cachedir-artifact-keep-younger-than if it still
+ needs to make space.
+
+ This removes extracted chunks and staging areas for failed builds
+ from the directory specified by --tempdir.
+
+ '''
+
+ tempdir = self.app.settings['tempdir']
+ cachedir = self.app.settings['cachedir']
+ tempdir_min_space, cachedir_min_space = \
+ morphlib.util.unify_space_requirements(
+ tempdir, self.app.settings['tempdir-min-space'],
+ cachedir, self.app.settings['cachedir-min-space'])
+
+ self.cleanup_tempdir(tempdir, tempdir_min_space)
+ self.cleanup_cachedir(cachedir, cachedir_min_space)
+
+ def cleanup_tempdir(self, temp_path, min_space):
+ self.app.status(msg='Cleaning up temp dir %(temp_path)s',
+ temp_path=temp_path, chatty=True)
+ for subdir in ('failed', 'chunks'):
+ if morphlib.util.get_bytes_free_in_path(temp_path) >= min_space:
+ self.app.status(msg='Not Removing subdirectory '
+ '%(subdir)s, enough space already cleared',
+ subdir=os.path.join(temp_path, subdir),
+ chatty=True)
+ break
+ self.app.status('Removing temp subdirectory: %(subdir)s',
+ subdir=subdir)
+ path = os.path.join(temp_path, subdir)
+ if os.path.exists(path):
+ shutil.rmtree(path)
+
+ def calculate_delete_range(self):
+ now = time.time()
+ always_delete_age = \
+ now - self.app.settings['cachedir-artifact-delete-older-than']
+ may_delete_age = \
+ now - self.app.settings['cachedir-artifact-keep-younger-than']
+ return always_delete_age, may_delete_age
+
+ def find_deletable_artifacts(self, lac, max_age, min_age):
+ '''Get a list of cache keys in order of how old they are.'''
+ contents = list(lac.list_contents())
+ always = set(cachekey
+ for cachekey, artifacts, mtime in contents
+ if mtime < max_age)
+ maybe = ((cachekey, mtime)
+ for cachekey, artifacts, mtime in contents
+ if max_age <= mtime < min_age)
+ return always, [cachekey for cachekey, mtime
+ in sorted(maybe, key=lambda x: x[1])]
+
+ def cleanup_cachedir(self, cache_path, min_space):
+ def sufficient_free():
+ free = morphlib.util.get_bytes_free_in_path(cache_path)
+ return (free >= min_space)
+ if sufficient_free():
+ self.app.status(msg='Not cleaning up cachedir, '
+ 'sufficient space already cleared',
+ chatty=True)
+ return
+ lac = morphlib.localartifactcache.LocalArtifactCache(cache_path)
+ max_age, min_age = self.calculate_delete_range()
+ logging.debug('Must remove artifacts older than timestamp %d'
+ % max_age)
+ always_delete, may_delete = \
+ self.find_deletable_artifacts(lac, max_age, min_age)
+ removed = 0
+ source_count = len(always_delete) + len(may_delete)
+ logging.debug('Must remove artifacts %s' % repr(always_delete))
+ logging.debug('Can remove artifacts %s' % repr(may_delete))
+
+ # Remove all old artifacts
+ for cachekey in always_delete:
+ self.app.status(msg='Removing source %(cachekey)s',
+ cachekey=cachekey, chatty=True)
+ lac.remove(cachekey)
+ removed += 1
+
+ # Maybe remove remaining middle-aged artifacts
+ for cachekey in may_delete:
+ if sufficient_free():
+ self.app.status(msg='Finished cleaning up cachedir with '
+ '%(remaining)d old sources remaining',
+ remaining=(source_count - removed),
+ chatty=True)
+ break
+ self.app.status(msg='Removing source %(cachekey)s',
+ cachekey=cachekey, chatty=True)
+ lac.remove(cachekey)
+ removed += 1
+
+ if sufficient_free():
+ self.app.status(msg='Made sufficient space in %(cache_path)s '
+ 'after removing %(removed)d sources',
+ removed=removed, cache_path=cache_path)
+ return
+ self.app.status(msg='Unable to clear enough space in %(cache_path)s '
+ 'after removing %(removed)d sources. Please '
+ 'reduce cachedir-artifact-keep-younger-than, '
+ 'clear space from elsewhere, enlarge the disk '
+ 'or reduce cachedir-min-space.',
+ cache_path=cache_path, removed=removed,
+ error=True)
diff --git a/morphlib/util.py b/morphlib/util.py
index 1a347da0..b56443e9 100644
--- a/morphlib/util.py
+++ b/morphlib/util.py
@@ -237,16 +237,29 @@ def on_same_filesystem(path_a, path_b): # pragma: no cover
# TODO: return true if one path is a subvolume of the other on btrfs?
return os.stat(path_a).st_dev == os.stat(path_b).st_dev
+def unify_space_requirements(tmp_path, tmp_min_size,
+ cache_path, cache_min_size): # pragma: no cover
+ """Adjust minimum sizes when paths share a disk.
+
+ Given pairs of path and minimum size, return the minimum sizes such
+ that when the paths are on the same disk, the sizes are added together.
+
+ """
+ # TODO: make this work for variable number of (path, size) pairs as needed
+ # hint: try list.sort and itertools.groupby
+ if not on_same_filesystem(tmp_path, cache_path):
+ return tmp_min_size, cache_min_size
+ unified_size = tmp_min_size + cache_min_size
+ return unified_size, unified_size
+
def check_disk_available(tmp_path, tmp_min_size,
cache_path, cache_min_size): # pragma: no cover
# if both are on the same filesystem, assume they share a storage pool,
# so the sum of the two sizes needs to be available
- # TODO: if we need to do this on any more than 2 filesystems
- # extend it to take a [(path, min)] and do some arcane mathematics
- # to split it into groups that share a filesystem
- # hint: try list.sort and itertools.groupby
- if on_same_filesystem(tmp_path, cache_path):
- tmp_min_size = cache_min_size = tmp_min_size + cache_min_size
+ # TODO: if we need to do this on any more than 2 paths
+ # extend it to take a [(path, min)]
+ tmp_min_size, cache_min_size = unify_space_requirements(
+ tmp_path, tmp_min_size, cache_path, cache_min_size)
tmp_size, cache_size = map(get_bytes_free_in_path, (tmp_path, cache_path))
errors = []
for path, min in [(tmp_path, tmp_min_size), (cache_path, cache_min_size)]:
@@ -256,4 +269,10 @@ def check_disk_available(tmp_path, tmp_min_size,
'has %(free)d' % locals())
if not errors:
return
- raise morphlib.Error('Insufficient space on disk:\n' + '\n'.join(errors))
+ raise morphlib.Error('Insufficient space on disk:\n' +
+ '\n'.join(errors) + '\n'
+ 'Please run `morph gc`. If the problem persists '
+ 'increase the disk size, manually clean up some '
+ 'space or reduce the disk space required by the '
+ 'tempdir-min-space and cachedir-min-space '
+ 'configuration options.')