diff options
author | Jonathan Maw <jonathan.maw@codethink.co.uk> | 2013-02-20 14:49:43 +0000 |
---|---|---|
committer | Jonathan Maw <jonathan.maw@codethink.co.uk> | 2013-02-28 11:35:20 +0000 |
commit | 5498cd4c3a503562530ea56529c952680d7d79c3 (patch) | |
tree | 948b45170b21414e3682d53491468375784ba2e0 | |
parent | 163d7e1dd8c84b93ddbb03b27f1a602cd660a402 (diff) | |
download | morph-5498cd4c3a503562530ea56529c952680d7d79c3.tar.gz |
Hardlink files into the staging-area
Instead of repeatedly unpacking tarballs into a succession of staging-
areas, it will unpack each tarball only once, then hardlink all the files
into the staging-area instead.
-rw-r--r-- | morphlib/builder2.py | 70 | ||||
-rw-r--r-- | morphlib/stagingarea.py | 153 | ||||
-rw-r--r-- | morphlib/stagingarea_tests.py | 26 |
3 files changed, 180 insertions, 69 deletions
diff --git a/morphlib/builder2.py b/morphlib/builder2.py index 68768699..14d73630 100644 --- a/morphlib/builder2.py +++ b/morphlib/builder2.py @@ -27,7 +27,6 @@ import traceback import subprocess import tempfile import gzip -from urlparse import urlparse import cliapp @@ -268,89 +267,32 @@ class ChunkBuilder(BuilderBase): def build_and_cache(self): # pragma: no cover with self.build_watch('overall-build'): - mounted = self.do_mounts() + + builddir, destdir = \ + self.staging_area.chroot_open(self.artifact.source, + self.setup_mounts) log_name = None try: - builddir = self.staging_area.builddir(self.artifact.source) self.get_sources(builddir) - destdir = self.staging_area.destdir(self.artifact.source) with self.local_artifact_cache.put_source_metadata( self.artifact.source, self.artifact.cache_key, 'build-log') as log: log_name = log.real_filename self.run_commands(builddir, destdir, log) except: - self.do_unmounts(mounted) + self.staging_area.chroot_close() if log_name: with open(log_name) as f: for line in f: logging.error('OUTPUT FROM FAILED BUILD: %s' % line.rstrip('\n')) raise - self.do_unmounts(mounted) + self.staging_area.chroot_close() built_artifacts = self.assemble_chunk_artifacts(destdir) self.save_build_times() return built_artifacts - to_mount = ( - ('proc', 'proc', 'none'), - ('dev/shm', 'tmpfs', 'none'), - ) - - def mount_ccachedir(self): #pragma: no cover - ccache_dir = self.app.settings['compiler-cache-dir'] - if not os.path.isdir(ccache_dir): - os.makedirs(ccache_dir) - # Get a path for the repo's ccache - ccache_url = self.artifact.source.repo.url - ccache_path = urlparse(ccache_url).path - ccache_repobase = os.path.basename(ccache_path) - if ':' in ccache_repobase: # the basename is a repo-alias - resolver = morphlib.repoaliasresolver.RepoAliasResolver( - self.app.settings['repo-alias']) - ccache_url = resolver.pull_url(ccache_repobase) - ccache_path = urlparse(ccache_url).path - ccache_repobase = os.path.basename(ccache_path) - if ccache_repobase.endswith('.git'): - ccache_repobase = ccache_repobase[:-len('.git')] - - ccache_repodir = os.path.join(ccache_dir, ccache_repobase) - # Make sure that directory exists - if not os.path.isdir(ccache_repodir): - os.mkdir(ccache_repodir) - # Get the destination path - ccache_destdir= os.path.join(self.staging_area.tempdir, - 'tmp', 'ccache') - # Make sure that the destination exists. We'll create /tmp if necessary - # to avoid breaking when faced with an empty staging area. - if not os.path.isdir(ccache_destdir): - os.makedirs(ccache_destdir) - # Mount it into the staging-area - self.app.runcmd(['mount', '--bind', ccache_repodir, - ccache_destdir]) - return ccache_destdir - - def do_mounts(self): # pragma: no cover - mounted = [] - if not self.setup_mounts: - return mounted - for mount_point, mount_type, source in ChunkBuilder.to_mount: - logging.debug('Mounting %s in staging area' % mount_point) - path = os.path.join(self.staging_area.dirname, mount_point) - if not os.path.exists(path): - os.makedirs(path) - self.app.runcmd(['mount', '-t', mount_type, source, path]) - mounted.append(path) - if not self.app.settings['no-ccache']: - mounted.append(self.mount_ccachedir()) - return mounted - - def do_unmounts(self, mounted): # pragma: no cover - for path in mounted: - logging.debug('Unmounting %s in staging area' % path) - morphlib.fsutils.unmount(self.app.runcmd, path) - def get_sources(self, srcdir): # pragma: no cover '''Get sources from git to a source directory, for building.''' diff --git a/morphlib/stagingarea.py b/morphlib/stagingarea.py index a87b45c3..ebfbb0b5 100644 --- a/morphlib/stagingarea.py +++ b/morphlib/stagingarea.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012 Codethink Limited +# Copyright (C) 2012,2013 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,6 +17,8 @@ import logging import os import shutil +import stat +from urlparse import urlparse import morphlib @@ -40,6 +42,10 @@ class StagingArea(object): self._app = app self.dirname = dirname self.tempdir = tempdir + self.builddirname = None + self.destdirname = None + self.mounted = None + self._bind_readonly_mount = None # Wrapper to be overridden by unit tests. def _mkdir(self, dirname): # pragma: no cover @@ -80,6 +86,52 @@ class StagingArea(object): assert filename.startswith(dirname) return filename[len(dirname) - 1:] # include leading slash + def hardlink_all_files(self, srcpath, destpath): # pragma: no cover + '''Hardlink every file in the path to the staging-area + + If an exception is raised, the staging-area is indeterminate. + + ''' + + file_stat = os.lstat(srcpath) + mode = file_stat.st_mode + + if stat.S_ISDIR(mode): + # Ensure directory exists in destination, then recurse. + if not os.path.exists(destpath): + os.makedirs(destpath) + dest_stat = os.stat(os.path.realpath(destpath)) + if not stat.S_ISDIR(dest_stat.st_mode): + raise IOError('Destination not a directory. source has %s' + ' destination has %s' % (srcpath, destpath)) + + for entry in os.listdir(srcpath): + self.hardlink_all_files(os.path.join(srcpath, entry), + os.path.join(destpath, entry)) + elif stat.S_ISLNK(mode): + # Copy the symlink. + if os.path.exists(destpath): + os.remove(destpath) + os.symlink(os.readlink(srcpath), destpath) + + elif stat.S_ISREG(mode): + # Hardlink the file. + if os.path.exists(destpath): + os.remove(destpath) + os.link(srcpath, destpath) + + elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode): + # Block or character device. Put contents of st_dev in a mknod. + if os.path.exists(destpath): + os.remove(destpath) + os.mknod(destpath, file_stat.st_mode, file_stat.st_rdev) + os.chmod(destpath, file_stat.st_mode) + + else: + # Unsupported type. + raise IOError('Cannot extract %s into staging-area. Unsupported' + ' type.' % srcpath) + def install_artifact(self, handle): '''Install a build artifact into the staging area. @@ -90,7 +142,19 @@ class StagingArea(object): logging.debug('Installing artifact %s' % getattr(handle, 'name', 'unknown name')) - morphlib.bins.unpack_binary_from_file(handle, self.dirname) + + unpacked_artifact = os.path.join( + self._app.settings['tempdir'], + os.path.basename(handle.name) + '.d') + if not os.path.exists(unpacked_artifact): + self._mkdir(unpacked_artifact) + morphlib.bins.unpack_binary_from_file( + handle, unpacked_artifact + '/') + + if not os.path.exists(self.dirname): + self._mkdir(self.dirname) + + self.hardlink_all_files(unpacked_artifact, self.dirname) def remove(self): '''Remove the entire staging area. @@ -103,14 +167,99 @@ class StagingArea(object): shutil.rmtree(self.dirname) + to_mount = ( + ('proc', 'proc', 'none'), + ('dev/shm', 'tmpfs', 'none'), + ) + + def mount_ccachedir(self, source): #pragma: no cover + ccache_dir = self._app.settings['compiler-cache-dir'] + if not os.path.isdir(ccache_dir): + os.makedirs(ccache_dir) + # Get a path for the repo's ccache + ccache_url = source.repo.url + ccache_path = urlparse(ccache_url).path + ccache_repobase = os.path.basename(ccache_path) + if ':' in ccache_repobase: # the basename is a repo-alias + resolver = morphlib.repoaliasresolver.RepoAliasResolver( + self._app.settings['repo-alias']) + ccache_url = resolver.pull_url(ccache_repobase) + ccache_path = urlparse(ccache_url).path + ccache_repobase = os.path.basename(ccache_path) + if ccache_repobase.endswith('.git'): + ccache_repobase = ccache_repobase[:-len('.git')] + + ccache_repodir = os.path.join(ccache_dir, ccache_repobase) + # Make sure that directory exists + if not os.path.isdir(ccache_repodir): + os.mkdir(ccache_repodir) + # Get the destination path + ccache_destdir= os.path.join(self.tempdir, + 'tmp', 'ccache') + # Make sure that the destination exists. We'll create /tmp if necessary + # to avoid breaking when faced with an empty staging area. + if not os.path.isdir(ccache_destdir): + os.makedirs(ccache_destdir) + # Mount it into the staging-area + self._app.runcmd(['mount', '--bind', ccache_repodir, + ccache_destdir]) + return ccache_destdir + + def do_mounts(self, setup_mounts): # pragma: no cover + self.mounted = [] + if not setup_mounts: + return + for mount_point, mount_type, source in self.to_mount: + logging.debug('Mounting %s in staging area' % mount_point) + path = os.path.join(self.dirname, mount_point) + if not os.path.exists(path): + os.makedirs(path) + self._app.runcmd(['mount', '-t', mount_type, source, path]) + self.mounted.append(path) + return + + def do_unmounts(self): # pragma: no cover + for path in reversed(self.mounted): + logging.debug('Unmounting %s in staging area' % path) + morphlib.fsutils.unmount(self._app.runcmd, path) + + def chroot_open(self, source, setup_mounts): # pragma: no cover + '''Setup staging area for use as a chroot.''' + + assert self.builddirname == None and self.destdirname == None + + builddir = self.builddir(source) + destdir = self.destdir(source) + self.builddirname = self.relative(builddir).lstrip('/') + self.destdirname = self.relative(destdir).lstrip('/') + + self.do_mounts(setup_mounts) + + if not self._app.settings['no-ccache']: + self.mounted.append(self.mount_ccachedir(source)) + + return builddir, destdir + + def chroot_close(self): # pragma: no cover + '''Undo changes by chroot_open. + + This should be called after the staging area is no longer needed. + + ''' + + self.do_unmounts() + def runcmd(self, argv, **kwargs): # pragma: no cover '''Run a command in a chroot in the staging area.''' + cwd = kwargs.get('cwd') or '/' if 'cwd' in kwargs: cwd = kwargs['cwd'] del kwargs['cwd'] else: cwd = '/' + real_argv = ['chroot', self.dirname, 'sh', '-c', 'cd "$1" && shift && exec "$@"', '--', cwd] + argv + return self._app.runcmd(real_argv, **kwargs) diff --git a/morphlib/stagingarea_tests.py b/morphlib/stagingarea_tests.py index 3230b9e3..313226d2 100644 --- a/morphlib/stagingarea_tests.py +++ b/morphlib/stagingarea_tests.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012 Codethink Limited +# Copyright (C) 2012,2013 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,6 +14,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +import cliapp import os import shutil import tarfile @@ -31,14 +32,33 @@ class FakeSource(object): } +class FakeApplication(object): + + def __init__(self, cachedir, tempdir): + self.settings = { + 'cachedir': cachedir, + 'tempdir': tempdir, + } + + def runcmd(self, *args, **kwargs): + return cliapp.runcmd(*args, **kwargs) + + def runcmd_unchecked(self, *args, **kwargs): + return cliapp.runcmd_unchecked(*args, **kwargs) + + class StagingAreaTests(unittest.TestCase): def setUp(self): self.tempdir = tempfile.mkdtemp() + self.cachedir = os.path.join(self.tempdir, 'cachedir') + os.mkdir(self.cachedir) + os.mkdir(os.path.join(self.cachedir, 'artifacts')) self.staging = os.path.join(self.tempdir, 'staging') self.created_dirs = [] - self.sa = morphlib.stagingarea.StagingArea(object(), self.staging, - self.staging) + self.sa = morphlib.stagingarea.StagingArea( + FakeApplication(self.cachedir, self.tempdir), + self.staging, self.staging) def tearDown(self): shutil.rmtree(self.tempdir) |