summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorSam Thursfield <sam.thursfield@codethink.co.uk>2012-07-24 15:22:30 +0100
committerSam Thursfield <sam.thursfield@codethink.co.uk>2012-08-22 13:35:10 +0100
commitdabca9a5bf30c3eb9564e7130938c88bc7e4ab6b (patch)
tree9de0228d9ae770cba0501b593970cb52fc0b3702 /scripts
parent7ee8c1e4199d9b0c73cf52faad02822983b3d2d6 (diff)
downloadmorph-dabca9a5bf30c3eb9564e7130938c88bc7e4ab6b.tar.gz
Add scripts/clean-git-cache
This removes local repository clones that are not referenced by anything in the baserock:morphs repository. It would be useful for Morph to do this itself in the long term.
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/clean-git-cache238
1 files changed, 238 insertions, 0 deletions
diff --git a/scripts/clean-git-cache b/scripts/clean-git-cache
new file mode 100755
index 00000000..61e43e4e
--- /dev/null
+++ b/scripts/clean-git-cache
@@ -0,0 +1,238 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2012 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+# Remove directories in /src/cache/gits that aren't used in any morphologies.
+# Stale directories can occur as a result of switching a chunk from upstream
+# git to a lorried repo, or from large scale URI changes.
+
+import glob
+import json
+import logging
+import os
+import re
+import shutil
+import string
+import subprocess
+
+import cliapp
+
+### Copied and pasted from morphlib/repoaliasresolver.py !!!
+
+class RepoAliasResolver(object):
+
+ def __init__(self, aliases):
+ self.aliases = aliases
+
+ def pull_url(self, reponame):
+ '''Expand a possibly shortened repo name to a pull url.'''
+ return self._expand_reponame(reponame, 'pullpat')
+
+ def push_url(self, reponame):
+ '''Expand a possibly shortened repo name to a push url.'''
+ return self._expand_reponame(reponame, 'pushpat')
+
+ def _expand_reponame(self, reponame, patname):
+ logging.debug('expanding: reponame=%s' % reponame)
+ logging.debug('expanding: patname=%s' % patname)
+
+ prefix, suffix = self._split_reponame(reponame)
+ logging.debug('expanding: prefix=%s' % prefix)
+ logging.debug('expanding: suffix=%s' % suffix)
+
+ # There was no prefix.
+ if prefix is None:
+ logging.debug('expanding: no prefix')
+ return reponame
+
+ pat = r'^(?P<prefix>[a-z0-9]+)=(?P<pullpat>[^#]+)#(?P<pushpat>[^#]+)$'
+ for alias in self.aliases:
+ logging.debug('expanding: alias="%s"' % alias)
+ m = re.match(pat, alias)
+ logging.debug('expanding: m=%s' % repr(m))
+ if m:
+ logging.debug('expanding: prefix group=%s' % m.group('prefix'))
+ if m and m.group('prefix') == prefix:
+ pullpat = m.group(patname)
+ logging.debug('expanding: pullpat=%s' % pullpat)
+ return self._apply_url_pattern(pullpat, suffix)
+
+ # Unknown prefix. Which means it may be a real URL instead.
+ # Let the caller deal with it.
+ logging.debug('expanding: unknown prefix')
+ return reponame
+
+ def _split_reponame(self, reponame):
+ '''Split reponame into prefix and suffix.
+
+ The prefix is returned as None if there was no prefix.
+
+ '''
+
+ pat = r'^(?P<prefix>[a-z0-9]+):(?P<rest>.*)$'
+ m = re.match(pat, reponame)
+ if m:
+ return m.group('prefix'), m.group('rest')
+ else:
+ return None, reponame
+
+ def _apply_url_pattern(self, pattern, shortname):
+ if '%s' in pattern:
+ return shortname.join(pattern.split('%s'))
+ else:
+ return pattern + shortname
+
+### Copied and pasted from morph/morphlib/localrepocache.py !!!
+
+def quote_url(url):
+ ''' Convert URIs to strings that only contain digits, letters, % and _.
+
+ NOTE: When changing the code of this function, make sure to also apply
+ the same to the quote_url() function of lorry. Otherwise the git bundles
+ generated by lorry may no longer be found by morph.
+
+ '''
+ valid_chars = string.digits + string.letters + '%_'
+ transl = lambda x: x if x in valid_chars else '_'
+ return ''.join([transl(x) for x in url])
+
+###
+
+class CleanGitCache(cliapp.Application):
+ def add_settings(self):
+ self.settings.boolean(['remove', 'r'],
+ 'don\'t just list unused gits, remove them too')
+
+ def process_inputs(self, args):
+ morph = os.environ.get('MORPH', 'morph')
+
+ if len(args) != 2:
+ print "Usage: clean-git-cache REPO BRANCH"
+ print
+ print "For example: clean-git-cache baserock:morphs master"
+ print
+ print "This will detect any repositories that are not referenced "
+ print "by the morphologies available in that repo/branch."
+ return
+
+ print "Reading default config from '%s'" % morph
+ print
+
+ if morph == 'morph':
+ print "Note: if you use Morph from a git checkout instead of the "
+ print "systemwide version, set MORPH in the environment to point "
+ print "to that version or you may remove the wrong things, due to "
+ print "out of date configuration."
+ print
+
+ config = self.runcmd ([morph, '--dump-config'])
+
+ cache_dir = re.search('cachedir = (.*)', config).group(1)
+ repo_alias = re.search('repo-alias = (.*)', config).group(1)
+ repo_alias = repo_alias.split(', ')
+
+ self.resolver = RepoAliasResolver(repo_alias)
+
+ morphs_repo = args[0]
+ morphs_branch = args[1]
+
+ git_cache_dir = os.path.join(cache_dir, 'gits')
+
+ unused_gits = self._get_unused_gits(morphs_repo,
+ morphs_branch,
+ git_cache_dir)
+
+ if self.settings['remove']:
+ print "Removing unused git repositories ..."
+
+ for git in unused_gits:
+ f = os.path.join(git_cache_dir, git)
+ print f
+ shutil.rmtree(f)
+ else:
+ print "Unused gits:"
+
+ for git in unused_gits:
+ f = os.path.join(git_cache_dir, git)
+ #size = self.runcmd(['du', '-sh', f])
+ print f
+
+ print
+ print "Run again with --remove to delete these repository caches."
+
+ def _get_unused_gits(self, morphs_repo, morphs_branch, git_cache_dir):
+ morphs_repo = self.resolver.pull_url(morphs_repo)
+ morphs_dir = os.path.join(git_cache_dir, quote_url(morphs_repo))
+
+ refs = self._show_ref(morphs_dir, morphs_branch).split('\n')
+ refs = [x.split() for x in refs if 'origin' in x]
+ morphs_ref = refs[0][0]
+
+ gits_in_use_list = [morphs_repo]
+
+ for filename in self._ls_tree(morphs_dir, morphs_ref):
+ if not filename.endswith('.morph'):
+ continue
+
+ tree = json.loads(self._cat_file(morphs_dir, morphs_ref, filename))
+
+ if tree['kind'] != 'stratum' or 'sources' not in tree:
+ continue
+
+ for chunk in tree['sources']:
+ repo = chunk['repo']
+ directory = quote_url(self.resolver.pull_url(repo))
+ gits_in_use_list.append(directory)
+
+ unused_dir_list = []
+ submodule_dir_list = []
+
+ for git_dir in glob.glob(os.path.join(git_cache_dir, '*')):
+ git_dir_base = os.path.split(git_dir)[1]
+ if git_dir_base not in gits_in_use_list:
+ unused_dir_list.append(git_dir_base)
+ else:
+ submodules_file = os.path.join(git_dir, '.gitmodules')
+ if os.path.exists(submodules_file):
+ submodules = self._get_submodules(submodules_file)
+ submodule_dir_list.append(submodules)
+
+ result = []
+ for d in unused_dir_list:
+ if d not in submodule_dir_list:
+ result.append(d)
+ return result
+
+ def _get_submodules(self, gitmodules_file):
+ result = []
+ for line in open(gitmodules_file):
+ if line.strip().startswith('url'):
+ result.append(quote_url(line[line.find('=') + 1:].strip()))
+ return result
+
+ def _show_ref(self, cwd, ref):
+ return self.runcmd(['git', 'show-ref', ref], cwd = cwd)
+
+ def _ls_tree(self, cwd, ref):
+ result = self.runcmd(['git', 'ls-tree', '--name-only', ref], cwd = cwd)
+ return result.split('\n')
+
+ def _cat_file(self, cwd, ref, filename):
+ return self.runcmd(['git', 'cat-file', 'blob',
+ '%s:%s' % (ref, filename)],
+ cwd = cwd)
+
+CleanGitCache().run()