diff options
-rwxr-xr-x | scripts/clean-git-cache | 238 |
1 files changed, 238 insertions, 0 deletions
diff --git a/scripts/clean-git-cache b/scripts/clean-git-cache new file mode 100755 index 00000000..61e43e4e --- /dev/null +++ b/scripts/clean-git-cache @@ -0,0 +1,238 @@ +#!/usr/bin/env python +# +# Copyright (C) 2012 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# Remove directories in /src/cache/gits that aren't used in any morphologies. +# Stale directories can occur as a result of switching a chunk from upstream +# git to a lorried repo, or from large scale URI changes. + +import glob +import json +import logging +import os +import re +import shutil +import string +import subprocess + +import cliapp + +### Copied and pasted from morphlib/repoaliasresolver.py !!! + +class RepoAliasResolver(object): + + def __init__(self, aliases): + self.aliases = aliases + + def pull_url(self, reponame): + '''Expand a possibly shortened repo name to a pull url.''' + return self._expand_reponame(reponame, 'pullpat') + + def push_url(self, reponame): + '''Expand a possibly shortened repo name to a push url.''' + return self._expand_reponame(reponame, 'pushpat') + + def _expand_reponame(self, reponame, patname): + logging.debug('expanding: reponame=%s' % reponame) + logging.debug('expanding: patname=%s' % patname) + + prefix, suffix = self._split_reponame(reponame) + logging.debug('expanding: prefix=%s' % prefix) + logging.debug('expanding: suffix=%s' % suffix) + + # There was no prefix. + if prefix is None: + logging.debug('expanding: no prefix') + return reponame + + pat = r'^(?P<prefix>[a-z0-9]+)=(?P<pullpat>[^#]+)#(?P<pushpat>[^#]+)$' + for alias in self.aliases: + logging.debug('expanding: alias="%s"' % alias) + m = re.match(pat, alias) + logging.debug('expanding: m=%s' % repr(m)) + if m: + logging.debug('expanding: prefix group=%s' % m.group('prefix')) + if m and m.group('prefix') == prefix: + pullpat = m.group(patname) + logging.debug('expanding: pullpat=%s' % pullpat) + return self._apply_url_pattern(pullpat, suffix) + + # Unknown prefix. Which means it may be a real URL instead. + # Let the caller deal with it. + logging.debug('expanding: unknown prefix') + return reponame + + def _split_reponame(self, reponame): + '''Split reponame into prefix and suffix. + + The prefix is returned as None if there was no prefix. + + ''' + + pat = r'^(?P<prefix>[a-z0-9]+):(?P<rest>.*)$' + m = re.match(pat, reponame) + if m: + return m.group('prefix'), m.group('rest') + else: + return None, reponame + + def _apply_url_pattern(self, pattern, shortname): + if '%s' in pattern: + return shortname.join(pattern.split('%s')) + else: + return pattern + shortname + +### Copied and pasted from morph/morphlib/localrepocache.py !!! + +def quote_url(url): + ''' Convert URIs to strings that only contain digits, letters, % and _. + + NOTE: When changing the code of this function, make sure to also apply + the same to the quote_url() function of lorry. Otherwise the git bundles + generated by lorry may no longer be found by morph. + + ''' + valid_chars = string.digits + string.letters + '%_' + transl = lambda x: x if x in valid_chars else '_' + return ''.join([transl(x) for x in url]) + +### + +class CleanGitCache(cliapp.Application): + def add_settings(self): + self.settings.boolean(['remove', 'r'], + 'don\'t just list unused gits, remove them too') + + def process_inputs(self, args): + morph = os.environ.get('MORPH', 'morph') + + if len(args) != 2: + print "Usage: clean-git-cache REPO BRANCH" + print + print "For example: clean-git-cache baserock:morphs master" + print + print "This will detect any repositories that are not referenced " + print "by the morphologies available in that repo/branch." + return + + print "Reading default config from '%s'" % morph + print + + if morph == 'morph': + print "Note: if you use Morph from a git checkout instead of the " + print "systemwide version, set MORPH in the environment to point " + print "to that version or you may remove the wrong things, due to " + print "out of date configuration." + print + + config = self.runcmd ([morph, '--dump-config']) + + cache_dir = re.search('cachedir = (.*)', config).group(1) + repo_alias = re.search('repo-alias = (.*)', config).group(1) + repo_alias = repo_alias.split(', ') + + self.resolver = RepoAliasResolver(repo_alias) + + morphs_repo = args[0] + morphs_branch = args[1] + + git_cache_dir = os.path.join(cache_dir, 'gits') + + unused_gits = self._get_unused_gits(morphs_repo, + morphs_branch, + git_cache_dir) + + if self.settings['remove']: + print "Removing unused git repositories ..." + + for git in unused_gits: + f = os.path.join(git_cache_dir, git) + print f + shutil.rmtree(f) + else: + print "Unused gits:" + + for git in unused_gits: + f = os.path.join(git_cache_dir, git) + #size = self.runcmd(['du', '-sh', f]) + print f + + print + print "Run again with --remove to delete these repository caches." + + def _get_unused_gits(self, morphs_repo, morphs_branch, git_cache_dir): + morphs_repo = self.resolver.pull_url(morphs_repo) + morphs_dir = os.path.join(git_cache_dir, quote_url(morphs_repo)) + + refs = self._show_ref(morphs_dir, morphs_branch).split('\n') + refs = [x.split() for x in refs if 'origin' in x] + morphs_ref = refs[0][0] + + gits_in_use_list = [morphs_repo] + + for filename in self._ls_tree(morphs_dir, morphs_ref): + if not filename.endswith('.morph'): + continue + + tree = json.loads(self._cat_file(morphs_dir, morphs_ref, filename)) + + if tree['kind'] != 'stratum' or 'sources' not in tree: + continue + + for chunk in tree['sources']: + repo = chunk['repo'] + directory = quote_url(self.resolver.pull_url(repo)) + gits_in_use_list.append(directory) + + unused_dir_list = [] + submodule_dir_list = [] + + for git_dir in glob.glob(os.path.join(git_cache_dir, '*')): + git_dir_base = os.path.split(git_dir)[1] + if git_dir_base not in gits_in_use_list: + unused_dir_list.append(git_dir_base) + else: + submodules_file = os.path.join(git_dir, '.gitmodules') + if os.path.exists(submodules_file): + submodules = self._get_submodules(submodules_file) + submodule_dir_list.append(submodules) + + result = [] + for d in unused_dir_list: + if d not in submodule_dir_list: + result.append(d) + return result + + def _get_submodules(self, gitmodules_file): + result = [] + for line in open(gitmodules_file): + if line.strip().startswith('url'): + result.append(quote_url(line[line.find('=') + 1:].strip())) + return result + + def _show_ref(self, cwd, ref): + return self.runcmd(['git', 'show-ref', ref], cwd = cwd) + + def _ls_tree(self, cwd, ref): + result = self.runcmd(['git', 'ls-tree', '--name-only', ref], cwd = cwd) + return result.split('\n') + + def _cat_file(self, cwd, ref, filename): + return self.runcmd(['git', 'cat-file', 'blob', + '%s:%s' % (ref, filename)], + cwd = cwd) + +CleanGitCache().run() |