summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Coldrick <adam.coldrick@codethink.co.uk>2015-02-20 15:48:26 +0000
committerSam Thursfield <sam.thursfield@codethink.co.uk>2015-04-20 14:07:41 +0000
commita97d8b313d931bfc6d8c1341af91fc48a8250f33 (patch)
tree525d4b5600181fb6973611e1492c6c99a485bc39
parentbab0adb513519ad6e76ad54f65a2dd226ba9c167 (diff)
downloadmorph-a97d8b313d931bfc6d8c1341af91fc48a8250f33.tar.gz
Add an artifact cache which uses OSTree
Change-Id: Ie591875d0507b4f43da3e91d4ca63e0a4dc7cbf2
-rw-r--r--morphlib/__init__.py1
-rw-r--r--morphlib/builder.py6
-rw-r--r--morphlib/ostreeartifactcache.py295
-rw-r--r--without-test-modules1
4 files changed, 298 insertions, 5 deletions
diff --git a/morphlib/__init__.py b/morphlib/__init__.py
index e2641402..79e829a4 100644
--- a/morphlib/__init__.py
+++ b/morphlib/__init__.py
@@ -73,6 +73,7 @@ import morphology
import morphloader
import morphset
import ostree
+import ostreeartifactcache
import remoteartifactcache
import remoterepocache
import repoaliasresolver
diff --git a/morphlib/builder.py b/morphlib/builder.py
index 735e70aa..6fc871fc 100644
--- a/morphlib/builder.py
+++ b/morphlib/builder.py
@@ -125,11 +125,7 @@ def ldconfig(runcmd, rootdir): # pragma: no cover
def download_depends(constituents, lac, rac, metadatas=None):
for constituent in constituents:
if not lac.has(constituent):
- source = rac.get(constituent)
- target = lac.put(constituent)
- shutil.copyfileobj(source, target)
- target.close()
- source.close()
+ lac.copy_from_remote(constituent, rac)
if metadatas is not None:
for metadata in metadatas:
if not lac.has_artifact_metadata(constituent, metadata):
diff --git a/morphlib/ostreeartifactcache.py b/morphlib/ostreeartifactcache.py
new file mode 100644
index 00000000..230460f8
--- /dev/null
+++ b/morphlib/ostreeartifactcache.py
@@ -0,0 +1,295 @@
+# Copyright (C) 2015 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import collections
+import contextlib
+import logging
+import os
+import stat
+import shutil
+import tarfile
+import tempfile
+
+import cliapp
+from gi.repository import GLib
+
+import morphlib
+from morphlib.artifactcachereference import ArtifactCacheReference
+
+
+class NotCachedError(morphlib.Error):
+
+ def __init__(self, ref):
+ self.msg = 'Failed to checkout %s from the artifact cache.' % ref
+
+
+class OSTreeArtifactCache(object):
+ """Class to provide the artifact cache API using an OSTree repo."""
+
+ def __init__(self, cachedir, mode='bare', status_cb=None):
+ repo_dir = os.path.join(cachedir, 'repo')
+ self.repo = morphlib.ostree.OSTreeRepo(repo_dir, mode=mode)
+ self.cachedir = cachedir
+ self.status_cb = status_cb
+
+ def status(self, *args, **kwargs):
+ if self.status_cb is not None:
+ self.status_cb(*args, **kwargs)
+
+ @contextlib.contextmanager
+ def _get_file_from_remote(self, artifact, remote, metadata_name=None):
+ if metadata_name:
+ handle = remote.get_artifact_metadata(artifact, metadata_name)
+ self.status(
+ msg='Downloading %(name)s %(metadata_name)s as a file.',
+ chatty=True, name=artifact.basename(),
+ metadata_name=metadata_name)
+ else:
+ handle = remote.get(artifact)
+ self.status(
+ msg='Downloading %(name)s as a tarball.', chatty=True,
+ name=artifact.basename())
+
+ try:
+ temporary_download = tempfile.NamedTemporaryFile(dir=self.cachedir)
+ shutil.copyfileobj(handle, temporary_download)
+ yield temporary_download.name
+ finally:
+ temporary_download.close()
+
+ def _get_artifact_cache_name(self, artifact):
+ cache_key, kind, name = artifact.basename().split('.', 2)
+ suffix = name.split('-')[-1]
+ return '%s-%s' % (cache_key, suffix)
+
+ def put(self, directory, artifact):
+ """Commit the contents of 'directory' to the repo.
+
+ This uses the artifact name and cache key to create the ref, so the
+ contents of directory should be the contents of the artifact.
+
+ """
+ cache_key, kind, name = artifact.basename().split('.', 2)
+ ref = self._get_artifact_cache_name(artifact)
+ subject = name
+ try:
+ self.status(
+ msg='Committing %(subject)s to artifact cache at %(ref)s.',
+ chatty=True, subject=subject, ref=ref)
+ self.repo.commit(subject, directory, ref)
+ except GLib.GError as e:
+ logging.debug('OSTree raised an exception: %s' % e)
+ raise cliapp.AppException('Failed to commit %s to artifact '
+ 'cache.' % ref)
+
+ def put_non_ostree_artifact(self, artifact, location, metadata_name=None):
+ """Store a single file in the artifact cachedir."""
+ if metadata_name:
+ filename = self._artifact_metadata_filename(artifact,
+ metadata_name)
+ else:
+ filename = self.artifact_filename(artifact)
+ shutil.copy(location, filename)
+
+ def _remove_device_nodes(self, path):
+ for dirpath, dirnames, filenames in os.walk(path):
+ for f in filenames:
+ filepath = os.path.join(dirpath, f)
+ mode = os.lstat(filepath).st_mode
+ if stat.S_ISBLK(mode) or stat.S_ISCHR(mode):
+ logging.debug('Removing device node %s from artifact' %
+ filepath)
+ os.remove(filepath)
+
+ def _copy_metadata_from_remote(self, artifact, remote):
+ """Copy a metadata file from a remote cache."""
+ a, name = artifact.basename().split('.', 1)
+ with self._get_file_from_remote(ArtifactCacheReference(a),
+ remote, name) as location:
+ self.put_non_ostree_artifact(ArtifactCacheReference(a),
+ location, name)
+
+ def copy_from_remote(self, artifact, remote):
+ """Get 'artifact' from remote artifact cache and store it locally.
+
+ This takes an Artifact object and a RemoteArtifactCache. Note that
+ `remote` here is not the same as a `remote` for and OSTree repo.
+
+ """
+ if remote.method == 'tarball':
+ with self._get_file_from_remote(artifact, remote) as location:
+ try:
+ cache_key, kind, name = artifact.basename().split('.', 2)
+ except ValueError:
+ # We can't split the name properly, it must be metadata!
+ self._copy_metadata_from_remote(artifact, remote)
+ return
+
+ if kind == 'stratum':
+ self.put_non_ostree_artifact(artifact, location)
+ return
+ try:
+ tempdir = tempfile.mkdtemp(dir=self.cachedir)
+ with tarfile.open(name=location) as tf:
+ tf.extractall(path=tempdir)
+ self._remove_device_nodes(tempdir)
+ self.put(tempdir, artifact)
+ except tarfile.ReadError:
+ # Reading the tarball failed, and we expected a
+ # tarball artifact. Something must have gone
+ # wrong.
+ raise
+ finally:
+ shutil.rmtree(tempdir)
+
+ elif remote.method == 'ostree':
+ self.status(msg='Pulling artifact for %(name)s from remote.',
+ chatty=True, name=artifact.basename())
+ try:
+ ref = self._get_artifact_cache_name(artifact)
+ except ValueError:
+ # if we can't split the name properly, we must want metadata
+ self._copy_metadata_from_remote(artifact, remote)
+ return
+
+ if artifact.basename().split('.', 2)[1] == 'stratum':
+ with self._get_file_from_remote(artifact, remote) as location:
+ self.put_non_ostree_artifact(artifact, location)
+ return
+
+ try:
+ if not self.repo.has_remote(remote.name):
+ self.repo.add_remote(remote.name, remote.ostree_url)
+ self.repo.pull([ref], remote.name)
+ except GLib.GError as e:
+ logging.debug('OSTree raised an exception: %s' % e)
+ raise cliapp.AppException('Failed to pull %s from remote '
+ 'cache.' % ref)
+
+ def get(self, artifact, directory=None):
+ """Checkout an artifact from the repo and return its location."""
+ cache_key, kind, name = artifact.basename().split('.', 2)
+ if kind == 'stratum':
+ return self.artifact_filename(artifact)
+ if directory is None:
+ directory = tempfile.mkdtemp()
+ ref = self._get_artifact_cache_name(artifact)
+ try:
+ self.repo.checkout(ref, directory)
+ # We need to update the mtime and atime of the ref file in the
+ # repository so that we can decide which refs were least recently
+ # accessed when doing `morph gc`.
+ self.repo.touch_ref(ref)
+ except GLib.GError as e:
+ logging.debug('OSTree raised an exception: %s' % e)
+ raise NotCachedError(ref)
+ return directory
+
+ def list_contents(self):
+ """Return the set of sources cached and related information.
+
+ returns a [(cache_key, set(artifacts), last_used)]
+
+ """
+ CacheInfo = collections.namedtuple('CacheInfo', ('artifacts', 'mtime'))
+ contents = collections.defaultdict(lambda: CacheInfo(set(), 0))
+ for ref in self.repo.list_refs():
+ cachekey = ref[:63]
+ artifact = ref[65:]
+ artifacts, max_mtime = contents[cachekey]
+ artifacts.add(artifact)
+ ref_filename = os.path.join(self.repo.refsdir(), ref)
+ mtime = os.path.getmtime(ref_filename)
+ contents[cachekey] = CacheInfo(artifacts, max(max_mtime, mtime))
+ return ((cache_key, info.artifacts, info.mtime)
+ for cache_key, info in contents.iteritems())
+
+ def remove(self, cachekey):
+ """Remove all artifacts associated with the given cachekey."""
+ for ref in (r for r in self.repo.list_refs()
+ if r.startswith(cachekey)):
+ self.repo.delete_ref(ref)
+
+ def prune(self):
+ """Delete orphaned objects in the repo."""
+ self.repo.prune()
+
+ def has(self, artifact):
+ try:
+ cachekey, kind, name = artifact.basename().split('.', 2)
+ except ValueError:
+ # We couldn't split the basename properly, we must want metadata
+ cachekey, name = artifact.basename().split('.', 1)
+ if self.has_artifact_metadata(artifact, name):
+ return True
+ else:
+ return False
+
+ if kind == 'stratum':
+ if self._has_file(self.artifact_filename(artifact)):
+ return True
+ else:
+ return False
+
+ sha = self.repo.resolve_rev(self._get_artifact_cache_name(artifact))
+ if sha:
+ self.repo.touch_ref(self._get_artifact_cache_name(artifact))
+ return True
+ return False
+
+ def get_artifact_metadata(self, artifact, name):
+ filename = self._artifact_metadata_filename(artifact, name)
+ os.utime(filename, None)
+ return open(filename)
+
+ def get_source_metadata_filename(self, source, cachekey, name):
+ return self._source_metadata_filename(source, cachekey, name)
+
+ def get_source_metadata(self, source, cachekey, name):
+ filename = self._source_metadata_filename(source, cachekey, name)
+ os.utime(filename, None)
+ return open(filename)
+
+ def artifact_filename(self, artifact):
+ return os.path.join(self.cachedir, artifact.basename())
+
+ def _artifact_metadata_filename(self, artifact, name):
+ return os.path.join(self.cachedir, artifact.metadata_basename(name))
+
+ def _source_metadata_filename(self, source, cachekey, name):
+ return os.path.join(self.cachedir, '%s.%s' % (cachekey, name))
+
+ def put_artifact_metadata(self, artifact, name):
+ filename = self._artifact_metadata_filename(artifact, name)
+ return morphlib.savefile.SaveFile(filename, mode='w')
+
+ def put_source_metadata(self, source, cachekey, name):
+ filename = self._source_metadata_filename(source, cachekey, name)
+ return morphlib.savefile.SaveFile(filename, mode='w')
+
+ def _has_file(self, filename):
+ if os.path.exists(filename):
+ os.utime(filename, None)
+ return True
+ return False
+
+ def has_artifact_metadata(self, artifact, name):
+ filename = self._artifact_metadata_filename(artifact, name)
+ return self._has_file(filename)
+
+ def has_source_metadata(self, source, cachekey, name):
+ filename = self._source_metadata_filename(source, cachekey, name)
+ return self._has_file(filename)
diff --git a/without-test-modules b/without-test-modules
index c3e7f5a2..ebbcfb6a 100644
--- a/without-test-modules
+++ b/without-test-modules
@@ -55,3 +55,4 @@ distbuild/worker_build_scheduler.py
# Not unit tested, since it needs a full system branch
morphlib/buildbranch.py
morphlib/ostree.py
+morphlib/ostreeartifactcache.py