# Copyright (C) 2011-2013 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; version 2 of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import itertools import os import re import fs.osfs import morphlib '''Utility functions for morph.''' # It is intentional that if collections does not have OrderedDict that # simplejson is also used in preference to json, as OrderedDict became # a member of collections in the same release json got its object_pairs_hook try: # pragma: no cover from collections import OrderedDict import json except ImportError: # pragma: no cover from ordereddict import OrderedDict import simplejson as json try: from multiprocessing import cpu_count except NotImplementedError: # pragma: no cover cpu_count = lambda: 1 import os def indent(string, spaces=4): '''Return ``string`` indented by ``spaces`` spaces. The final line is not terminated by a newline. This makes it easy to use this function for indenting long text for logging: the logging library adds a newline, so not including it in the indented text avoids a spurious empty line in the log file. This also makes the result be a plain ASCII encoded string. ''' if type(string) == unicode: # pragma: no cover string = string.decode('utf-8') lines = string.splitlines() lines = ['%*s%s' % (spaces, '', line) for line in lines] return '\n'.join(lines) def strip_morph_extension(morph_name): if morph_name.startswith('.'): raise morphlib.Error( 'Invalid morphology name: %s' % morph_name) elif morph_name.endswith('.morph'): return morph_name[:-len('.morph')] return morph_name def make_concurrency(cores=None): '''Return the number of concurrent jobs for make. This will be given to make as the -j argument. ''' n = cpu_count() if cores is None else cores # Experimental results (ref. Kinnison) says a factor of 1.5 # gives about the optimal result for build times, since much of # builds are I/O bound, not CPU bound. return max(int(n * 1.5 + 0.5), 1) def create_cachedir(settings): # pragma: no cover '''Return cache directory, creating it if necessary.''' cachedir = settings['cachedir'] if not os.path.exists(cachedir): os.mkdir(cachedir) return cachedir def get_artifact_cache_server(settings): # pragma: no cover if settings['artifact-cache-server']: return settings['artifact-cache-server'] if settings['cache-server']: return settings['cache-server'] return None def get_git_resolve_cache_server(settings): # pragma: no cover if settings['git-resolve-cache-server']: return settings['git-resolve-cache-server'] if settings['cache-server']: return settings['cache-server'] return None def new_artifact_caches(settings): # pragma: no cover '''Create new objects for local and remote artifact caches. This includes creating the directories on disk, if missing. ''' cachedir = create_cachedir(settings) artifact_cachedir = os.path.join(cachedir, 'artifacts') if not os.path.exists(artifact_cachedir): os.mkdir(artifact_cachedir) lac = morphlib.localartifactcache.LocalArtifactCache( fs.osfs.OSFS(artifact_cachedir)) rac_url = get_artifact_cache_server(settings) rac = None if rac_url: rac = morphlib.remoteartifactcache.RemoteArtifactCache(rac_url) return lac, rac def combine_aliases(app): # pragma: no cover '''Create a full repo-alias set from the app's settings.''' trove_host = app.settings['trove-host'] trove_ids = app.settings['trove-id'] repo_aliases = app.settings['repo-alias'] repo_pat = r'^(?P[a-z][a-z0-9-]+)=(?P[^#]+)#(?P[^#]+)$' trove_pat = (r'^(?P[a-z][a-z0-9-]+)=(?P[^#]+)#' '(?P[^#]+)#(?P[^#]+)$') alias_map = {} def _expand(protocol, path): if protocol == "git": return "git://%s/%s/%%s" % (trove_host, path) elif protocol == "ssh": return "ssh://git@%s/%s/%%s" % (trove_host, path) else: raise morphlib.Error( 'Unknown protocol in trove_id: %s' % protocol) if trove_host: alias_map['baserock'] = "baserock=%s#%s" % ( _expand('git', 'baserock'), _expand('ssh', 'baserock')) alias_map['upstream'] = "upstream=%s#%s" % ( _expand('git', 'delta'), _expand('ssh', 'delta')) for trove_id in trove_ids: m = re.match(trove_pat, trove_id) if m: alias_map[m.group('prefix')] = "%s=%s#%s" % ( m.group('prefix'), _expand(m.group('pull'), m.group('path')), _expand(m.group('push'), m.group('path'))) elif '=' not in trove_id: alias_map[trove_id] = "%s=%s#%s" % ( trove_id, _expand('ssh', trove_id), _expand('ssh', trove_id)) for repo_alias in repo_aliases: m = re.match(repo_pat, repo_alias) if m: alias_map[m.group('prefix')] = repo_alias else: raise morphlib.Error( 'Invalid repo-alias: %s' % repo_alias) return alias_map.values() def new_repo_caches(app): # pragma: no cover '''Create new objects for local, remote git repository caches.''' aliases = app.settings['repo-alias'] cachedir = create_cachedir(app.settings) gits_dir = os.path.join(cachedir, 'gits') tarball_base_url = app.settings['tarball-server'] repo_resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases) lrc = morphlib.localrepocache.LocalRepoCache( app, gits_dir, repo_resolver, tarball_base_url=tarball_base_url) url = get_git_resolve_cache_server(app.settings) if url: rrc = morphlib.remoterepocache.RemoteRepoCache(url, repo_resolver) else: rrc = None return lrc, rrc def log_dict_diff(app, cur, pre): # pragma: no cover '''Log the differences between two dicts to debug log''' dictA = cur dictB = pre for key in dictA.keys(): if key not in dictB: app.status(msg="New environment: %(key)s = %(value)s", key=key, value=dictA[key], chatty=True) elif dictA[key] != dictB[key]: app.status(msg="Environment changed: \ %(key)s = %(valA)s to %(key)s = %(valB)s", key=key, valA=dictA[key], valB=dictB[key], chatty=True) for key in dictB.keys(): if key not in dictA: app.status(msg="Environment removed: %(key)s = %(value)s", key=key, value=dictB[key], chatty=True) # This acquired from rdiff-backup which is GPLv2+ and a patch from 2011 # which has not yet been merged, combined with a tad of tidying from us. def copyfileobj(inputfp, outputfp, blocksize=1024*1024): # pragma: no cover """Copies file inputfp to outputfp in blocksize intervals""" sparse = False buf = None while 1: inbuf = inputfp.read(blocksize) if not inbuf: break if not buf: buf = inbuf else: buf += inbuf # Combine "short" reads if (len(buf) < blocksize): continue buflen = len(buf) if buf == "\x00" * buflen: outputfp.seek(buflen, os.SEEK_CUR) buf = None # flag sparse=True, that we seek()ed, but have not written yet # The filesize is wrong until we write sparse = True else: outputfp.write(buf) buf = None # We wrote, so clear sparse. sparse = False if buf: outputfp.write(buf) elif sparse: outputfp.seek(-1, os.SEEK_CUR) outputfp.write("\x00") def get_bytes_free_in_path(path): # pragma: no cover """Returns the bytes free in the filesystem that path is part of""" fsinfo = os.statvfs(path) return fsinfo.f_bavail * fsinfo.f_bsize def on_same_filesystem(path_a, path_b): # pragma: no cover """Tests whether both paths are on the same fileystem Note behaviour may be unexpected on btrfs, since subvolumes appear to be on a different device, but share a storage pool. """ # TODO: return true if one path is a subvolume of the other on btrfs? return os.stat(path_a).st_dev == os.stat(path_b).st_dev def unify_space_requirements(tmp_path, tmp_min_size, cache_path, cache_min_size): # pragma: no cover """Adjust minimum sizes when paths share a disk. Given pairs of path and minimum size, return the minimum sizes such that when the paths are on the same disk, the sizes are added together. """ # TODO: make this work for variable number of (path, size) pairs as needed # hint: try list.sort and itertools.groupby if not on_same_filesystem(tmp_path, cache_path): return tmp_min_size, cache_min_size unified_size = tmp_min_size + cache_min_size return unified_size, unified_size def check_disk_available(tmp_path, tmp_min_size, cache_path, cache_min_size): # pragma: no cover # if both are on the same filesystem, assume they share a storage pool, # so the sum of the two sizes needs to be available # TODO: if we need to do this on any more than 2 paths # extend it to take a [(path, min)] tmp_min_size, cache_min_size = unify_space_requirements( tmp_path, tmp_min_size, cache_path, cache_min_size) tmp_size, cache_size = map(get_bytes_free_in_path, (tmp_path, cache_path)) errors = [] for path, min in [(tmp_path, tmp_min_size), (cache_path, cache_min_size)]: free = get_bytes_free_in_path(path) if free < min: errors.append('\t%(path)s requires %(min)d bytes free, ' 'has %(free)d' % locals()) if not errors: return raise morphlib.Error('Insufficient space on disk:\n' + '\n'.join(errors) + '\n' 'Please run `morph gc`. If the problem persists ' 'increase the disk size, manually clean up some ' 'space or reduce the disk space required by the ' 'tempdir-min-space and cachedir-min-space ' 'configuration options.') def find_root(dirname, subdir_name): '''Find parent of a directory, at or above a given directory. The sought-after directory is indicated by the existence of a subdirectory of the indicated name. For example, dirname might be the current working directory of the process, and subdir_name might be ".morph"; then the returned value would be the Morph workspace root directory, which has a subdirectory called ".morph". Return path to desired directory, or None if not found. ''' dirname = os.path.normpath(os.path.abspath(dirname)) while not os.path.isdir(os.path.join(dirname, subdir_name)): if dirname == '/': return None dirname = os.path.dirname(dirname) return dirname def find_leaves(search_dir, subdir_name): '''This is like find_root, except it looks towards leaves. The directory tree, starting at search_dir is traversed. If a directory has a subdirectory called subdir_name, then the directory is returned. It does not recurse into a leaf's subdirectories. ''' for dirname, subdirs, filenames in os.walk(search_dir): if subdir_name in subdirs: del subdirs[:] yield dirname def find_leaf(dirname, subdir_name): '''This is like find_root, except it looks towards leaves. If there are no subdirectories, or more than one, fail. ''' leaves = list(find_leaves(dirname, subdir_name)) if len(leaves) == 1: return leaves[0] return None class EnvironmentAlreadySetError(morphlib.Error): def __init__(self, conflicts): self.conflicts = conflicts morphlib.Error.__init__( self, 'Keys %r are already set in the environment' % conflicts) def parse_environment_pairs(env, pairs): '''Add key=value pairs to the environment dict. Given a dict and a list of strings of the form key=value, set dict[key] = value, unless key is already set in the environment, at which point raise an exception. This does not modify the passed in dict. Returns the extended dict. ''' extra_env = dict(p.split('=', 1) for p in pairs) conflicting = [k for k in extra_env if k in env] if conflicting: raise EnvironmentAlreadySetError(conflicting) # Return a dict that is the union of the two # This is not the most performant, since it creates # 3 unnecessary lists, but I felt this was the most # easy to read. Using itertools.chain may be more efficicent return dict(env.items() + extra_env.items()) def get_host_architecture(): # pragma: no cover '''Get the canonical Morph name for the host's architecture.''' machine = os.uname()[-1] table = { 'x86_64': 'x86_64', 'i386': 'x86_32', 'i486': 'x86_32', 'i586': 'x86_32', 'i686': 'x86_32', 'armv7l': 'armv7l', 'armv7b': 'armv7b', 'ppc64': 'ppc64' } if machine not in table: raise morphlib.Error('Unknown host architecture %s' % machine) return table[machine] def sanitize_environment(env): for k in env: env[k] = str(env[k]) def iter_trickle(iterable, limit): '''Split an iterable up into `limit` length chunks.''' it = iter(iterable) while True: buf = list(itertools.islice(it, limit)) if len(buf) == 0: break yield buf