From a97d8b313d931bfc6d8c1341af91fc48a8250f33 Mon Sep 17 00:00:00 2001 From: Adam Coldrick Date: Fri, 20 Feb 2015 15:48:26 +0000 Subject: Add an artifact cache which uses OSTree Change-Id: Ie591875d0507b4f43da3e91d4ca63e0a4dc7cbf2 --- morphlib/__init__.py | 1 + morphlib/builder.py | 6 +- morphlib/ostreeartifactcache.py | 295 ++++++++++++++++++++++++++++++++++++++++ without-test-modules | 1 + 4 files changed, 298 insertions(+), 5 deletions(-) create mode 100644 morphlib/ostreeartifactcache.py diff --git a/morphlib/__init__.py b/morphlib/__init__.py index e2641402..79e829a4 100644 --- a/morphlib/__init__.py +++ b/morphlib/__init__.py @@ -73,6 +73,7 @@ import morphology import morphloader import morphset import ostree +import ostreeartifactcache import remoteartifactcache import remoterepocache import repoaliasresolver diff --git a/morphlib/builder.py b/morphlib/builder.py index 735e70aa..6fc871fc 100644 --- a/morphlib/builder.py +++ b/morphlib/builder.py @@ -125,11 +125,7 @@ def ldconfig(runcmd, rootdir): # pragma: no cover def download_depends(constituents, lac, rac, metadatas=None): for constituent in constituents: if not lac.has(constituent): - source = rac.get(constituent) - target = lac.put(constituent) - shutil.copyfileobj(source, target) - target.close() - source.close() + lac.copy_from_remote(constituent, rac) if metadatas is not None: for metadata in metadatas: if not lac.has_artifact_metadata(constituent, metadata): diff --git a/morphlib/ostreeartifactcache.py b/morphlib/ostreeartifactcache.py new file mode 100644 index 00000000..230460f8 --- /dev/null +++ b/morphlib/ostreeartifactcache.py @@ -0,0 +1,295 @@ +# Copyright (C) 2015 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +import collections +import contextlib +import logging +import os +import stat +import shutil +import tarfile +import tempfile + +import cliapp +from gi.repository import GLib + +import morphlib +from morphlib.artifactcachereference import ArtifactCacheReference + + +class NotCachedError(morphlib.Error): + + def __init__(self, ref): + self.msg = 'Failed to checkout %s from the artifact cache.' % ref + + +class OSTreeArtifactCache(object): + """Class to provide the artifact cache API using an OSTree repo.""" + + def __init__(self, cachedir, mode='bare', status_cb=None): + repo_dir = os.path.join(cachedir, 'repo') + self.repo = morphlib.ostree.OSTreeRepo(repo_dir, mode=mode) + self.cachedir = cachedir + self.status_cb = status_cb + + def status(self, *args, **kwargs): + if self.status_cb is not None: + self.status_cb(*args, **kwargs) + + @contextlib.contextmanager + def _get_file_from_remote(self, artifact, remote, metadata_name=None): + if metadata_name: + handle = remote.get_artifact_metadata(artifact, metadata_name) + self.status( + msg='Downloading %(name)s %(metadata_name)s as a file.', + chatty=True, name=artifact.basename(), + metadata_name=metadata_name) + else: + handle = remote.get(artifact) + self.status( + msg='Downloading %(name)s as a tarball.', chatty=True, + name=artifact.basename()) + + try: + temporary_download = tempfile.NamedTemporaryFile(dir=self.cachedir) + shutil.copyfileobj(handle, temporary_download) + yield temporary_download.name + finally: + temporary_download.close() + + def _get_artifact_cache_name(self, artifact): + cache_key, kind, name = artifact.basename().split('.', 2) + suffix = name.split('-')[-1] + return '%s-%s' % (cache_key, suffix) + + def put(self, directory, artifact): + """Commit the contents of 'directory' to the repo. + + This uses the artifact name and cache key to create the ref, so the + contents of directory should be the contents of the artifact. + + """ + cache_key, kind, name = artifact.basename().split('.', 2) + ref = self._get_artifact_cache_name(artifact) + subject = name + try: + self.status( + msg='Committing %(subject)s to artifact cache at %(ref)s.', + chatty=True, subject=subject, ref=ref) + self.repo.commit(subject, directory, ref) + except GLib.GError as e: + logging.debug('OSTree raised an exception: %s' % e) + raise cliapp.AppException('Failed to commit %s to artifact ' + 'cache.' % ref) + + def put_non_ostree_artifact(self, artifact, location, metadata_name=None): + """Store a single file in the artifact cachedir.""" + if metadata_name: + filename = self._artifact_metadata_filename(artifact, + metadata_name) + else: + filename = self.artifact_filename(artifact) + shutil.copy(location, filename) + + def _remove_device_nodes(self, path): + for dirpath, dirnames, filenames in os.walk(path): + for f in filenames: + filepath = os.path.join(dirpath, f) + mode = os.lstat(filepath).st_mode + if stat.S_ISBLK(mode) or stat.S_ISCHR(mode): + logging.debug('Removing device node %s from artifact' % + filepath) + os.remove(filepath) + + def _copy_metadata_from_remote(self, artifact, remote): + """Copy a metadata file from a remote cache.""" + a, name = artifact.basename().split('.', 1) + with self._get_file_from_remote(ArtifactCacheReference(a), + remote, name) as location: + self.put_non_ostree_artifact(ArtifactCacheReference(a), + location, name) + + def copy_from_remote(self, artifact, remote): + """Get 'artifact' from remote artifact cache and store it locally. + + This takes an Artifact object and a RemoteArtifactCache. Note that + `remote` here is not the same as a `remote` for and OSTree repo. + + """ + if remote.method == 'tarball': + with self._get_file_from_remote(artifact, remote) as location: + try: + cache_key, kind, name = artifact.basename().split('.', 2) + except ValueError: + # We can't split the name properly, it must be metadata! + self._copy_metadata_from_remote(artifact, remote) + return + + if kind == 'stratum': + self.put_non_ostree_artifact(artifact, location) + return + try: + tempdir = tempfile.mkdtemp(dir=self.cachedir) + with tarfile.open(name=location) as tf: + tf.extractall(path=tempdir) + self._remove_device_nodes(tempdir) + self.put(tempdir, artifact) + except tarfile.ReadError: + # Reading the tarball failed, and we expected a + # tarball artifact. Something must have gone + # wrong. + raise + finally: + shutil.rmtree(tempdir) + + elif remote.method == 'ostree': + self.status(msg='Pulling artifact for %(name)s from remote.', + chatty=True, name=artifact.basename()) + try: + ref = self._get_artifact_cache_name(artifact) + except ValueError: + # if we can't split the name properly, we must want metadata + self._copy_metadata_from_remote(artifact, remote) + return + + if artifact.basename().split('.', 2)[1] == 'stratum': + with self._get_file_from_remote(artifact, remote) as location: + self.put_non_ostree_artifact(artifact, location) + return + + try: + if not self.repo.has_remote(remote.name): + self.repo.add_remote(remote.name, remote.ostree_url) + self.repo.pull([ref], remote.name) + except GLib.GError as e: + logging.debug('OSTree raised an exception: %s' % e) + raise cliapp.AppException('Failed to pull %s from remote ' + 'cache.' % ref) + + def get(self, artifact, directory=None): + """Checkout an artifact from the repo and return its location.""" + cache_key, kind, name = artifact.basename().split('.', 2) + if kind == 'stratum': + return self.artifact_filename(artifact) + if directory is None: + directory = tempfile.mkdtemp() + ref = self._get_artifact_cache_name(artifact) + try: + self.repo.checkout(ref, directory) + # We need to update the mtime and atime of the ref file in the + # repository so that we can decide which refs were least recently + # accessed when doing `morph gc`. + self.repo.touch_ref(ref) + except GLib.GError as e: + logging.debug('OSTree raised an exception: %s' % e) + raise NotCachedError(ref) + return directory + + def list_contents(self): + """Return the set of sources cached and related information. + + returns a [(cache_key, set(artifacts), last_used)] + + """ + CacheInfo = collections.namedtuple('CacheInfo', ('artifacts', 'mtime')) + contents = collections.defaultdict(lambda: CacheInfo(set(), 0)) + for ref in self.repo.list_refs(): + cachekey = ref[:63] + artifact = ref[65:] + artifacts, max_mtime = contents[cachekey] + artifacts.add(artifact) + ref_filename = os.path.join(self.repo.refsdir(), ref) + mtime = os.path.getmtime(ref_filename) + contents[cachekey] = CacheInfo(artifacts, max(max_mtime, mtime)) + return ((cache_key, info.artifacts, info.mtime) + for cache_key, info in contents.iteritems()) + + def remove(self, cachekey): + """Remove all artifacts associated with the given cachekey.""" + for ref in (r for r in self.repo.list_refs() + if r.startswith(cachekey)): + self.repo.delete_ref(ref) + + def prune(self): + """Delete orphaned objects in the repo.""" + self.repo.prune() + + def has(self, artifact): + try: + cachekey, kind, name = artifact.basename().split('.', 2) + except ValueError: + # We couldn't split the basename properly, we must want metadata + cachekey, name = artifact.basename().split('.', 1) + if self.has_artifact_metadata(artifact, name): + return True + else: + return False + + if kind == 'stratum': + if self._has_file(self.artifact_filename(artifact)): + return True + else: + return False + + sha = self.repo.resolve_rev(self._get_artifact_cache_name(artifact)) + if sha: + self.repo.touch_ref(self._get_artifact_cache_name(artifact)) + return True + return False + + def get_artifact_metadata(self, artifact, name): + filename = self._artifact_metadata_filename(artifact, name) + os.utime(filename, None) + return open(filename) + + def get_source_metadata_filename(self, source, cachekey, name): + return self._source_metadata_filename(source, cachekey, name) + + def get_source_metadata(self, source, cachekey, name): + filename = self._source_metadata_filename(source, cachekey, name) + os.utime(filename, None) + return open(filename) + + def artifact_filename(self, artifact): + return os.path.join(self.cachedir, artifact.basename()) + + def _artifact_metadata_filename(self, artifact, name): + return os.path.join(self.cachedir, artifact.metadata_basename(name)) + + def _source_metadata_filename(self, source, cachekey, name): + return os.path.join(self.cachedir, '%s.%s' % (cachekey, name)) + + def put_artifact_metadata(self, artifact, name): + filename = self._artifact_metadata_filename(artifact, name) + return morphlib.savefile.SaveFile(filename, mode='w') + + def put_source_metadata(self, source, cachekey, name): + filename = self._source_metadata_filename(source, cachekey, name) + return morphlib.savefile.SaveFile(filename, mode='w') + + def _has_file(self, filename): + if os.path.exists(filename): + os.utime(filename, None) + return True + return False + + def has_artifact_metadata(self, artifact, name): + filename = self._artifact_metadata_filename(artifact, name) + return self._has_file(filename) + + def has_source_metadata(self, source, cachekey, name): + filename = self._source_metadata_filename(source, cachekey, name) + return self._has_file(filename) diff --git a/without-test-modules b/without-test-modules index c3e7f5a2..ebbcfb6a 100644 --- a/without-test-modules +++ b/without-test-modules @@ -55,3 +55,4 @@ distbuild/worker_build_scheduler.py # Not unit tested, since it needs a full system branch morphlib/buildbranch.py morphlib/ostree.py +morphlib/ostreeartifactcache.py -- cgit v1.2.1