summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Thursfield <sam.thursfield@codethink.co.uk>2015-02-10 12:23:13 +0000
committerSam Thursfield <sam.thursfield@codethink.co.uk>2015-02-11 16:04:44 +0000
commit81905d738a1343ad1d9da7f3977743c04601f0ba (patch)
treec488c3a4b116addd35c8f021a793e08e55563618
parent7a41668f803641af0edd5948125a66d45906d6c7 (diff)
downloadmorph-81905d738a1343ad1d9da7f3977743c04601f0ba.tar.gz
Rework artifact fetching
We now fetch sources, not artifacts. This means that built artifacts of a source are either all available, or all missing.
-rw-r--r--morphlib/artifact.py3
-rw-r--r--morphlib/buildcommand.py30
-rw-r--r--morphlib/builder.py23
-rw-r--r--morphlib/localartifactcache.py18
-rw-r--r--morphlib/remoteartifactcache.py46
-rw-r--r--morphlib/remoteartifactcache_tests.py12
-rw-r--r--morphlib/source.py28
7 files changed, 72 insertions, 88 deletions
diff --git a/morphlib/artifact.py b/morphlib/artifact.py
index 7a40a81a..ae1d0604 100644
--- a/morphlib/artifact.py
+++ b/morphlib/artifact.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2012, 2013, 2014 Codethink Limited
+# Copyright (C) 2012-2015 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -43,7 +43,6 @@ class Artifact(object):
def __repr__(self): # pragma: no cover
return 'Artifact(%s)' % str(self)
-
def walk(self): # pragma: no cover
'''Return list of an artifact and its build dependencies.
diff --git a/morphlib/buildcommand.py b/morphlib/buildcommand.py
index 0a8d463f..04321b42 100644
--- a/morphlib/buildcommand.py
+++ b/morphlib/buildcommand.py
@@ -293,15 +293,18 @@ class BuildCommand(object):
that doesn't work for some reason, by building the source locally.
'''
- artifacts = source.artifacts.values()
if self.rac is not None:
try:
- self.cache_artifacts_locally(artifacts)
+ self.cache_artifacts_locally([source])
except morphlib.remoteartifactcache.GetError:
# Error is logged by the RemoteArtifactCache object.
pass
+ artifacts = source.artifacts.values()
if any(not self.lac.has(artifact) for artifact in artifacts):
+ # If any of the artifacts are missing, build the whole thing again.
+ # We should only ever have all the artifacts or none of them, but
+ # builds needs to be robust.
self.build_source(source, build_env)
for a in artifacts:
@@ -323,11 +326,8 @@ class BuildCommand(object):
kind=source.morphology['kind'])
self.fetch_sources(source)
- # TODO: Make an artifact.walk() that takes multiple root artifacts.
- # as this does a walk for every artifact. This was the status
- # quo before build logic was made to work per-source, but we can
- # now do better.
- deps = self.get_recursive_deps(source.artifacts.values())
+
+ deps = morphlib.artifact.find_all_deps(source.artifacts.values())
self.cache_artifacts_locally(deps)
use_chroot = False
@@ -371,16 +371,6 @@ class BuildCommand(object):
td_string = "%02d:%02d:%02d" % (hours, minutes, seconds)
self.app.status(msg="Elapsed time %(duration)s", duration=td_string)
- def get_recursive_deps(self, artifacts):
- deps = set()
- ordered_deps = []
- for artifact in artifacts:
- for dep in artifact.walk():
- if dep not in deps and dep not in artifacts:
- deps.add(dep)
- ordered_deps.append(dep)
- return ordered_deps
-
def fetch_sources(self, source):
'''Update the local git repository cache with the sources.'''
@@ -388,11 +378,11 @@ class BuildCommand(object):
source.repo = self.lrc.get_updated_repo(repo_name, ref=source.sha1)
self.lrc.ensure_submodules(source.repo, source.sha1)
- def cache_artifacts_locally(self, artifacts):
+ def cache_artifacts_locally(self, sources):
'''Get artifacts missing from local cache from remote cache.'''
- self.rac.get_artifacts(
- artifacts, self.lac, status_cb=self.app.status)
+ self.rac.get_artifacts_for_sources(
+ sources, self.lac, status_cb=self.app.status)
def create_staging_area(self, build_env, use_chroot=True, extra_env={},
extra_path=[]):
diff --git a/morphlib/builder.py b/morphlib/builder.py
index 4786bd8a..4b659984 100644
--- a/morphlib/builder.py
+++ b/morphlib/builder.py
@@ -473,12 +473,13 @@ class StratumBuilder(BuilderBase):
with self.build_watch('overall-build'):
constituents = [d for d in self.source.dependencies
if self.is_constituent(d)]
+ constituent_sources = {d.source for d in constituents}
# the only reason the StratumBuilder has to download chunks is to
# check for overlap now that strata are lists of chunks
with self.build_watch('check-chunks'):
- self.remote_artifact_cache.get_artifacts(
- constituents, self.local_artifact_cache,
+ self.remote_artifact_cache.get_artifacts_for_sources(
+ constituent_sources, self.local_artifact_cache,
status_cb=self.app.status)
with self.build_watch('create-chunk-list'):
@@ -567,19 +568,13 @@ class SystemBuilder(BuilderBase): # pragma: no cover
self.app.status(msg='Unpacking strata to %(path)s',
path=path, chatty=True)
with self.build_watch('unpack-strata'):
- # download the stratum artifacts if necessary
- self.remote_artifact_cache.get_artifacts(
- self.source.dependencies, self.local_artifact_cache,
- status_cb=self.app.status)
-
- # download the chunk artifacts if necessary
- for stratum_artifact in self.source.dependencies:
- with self.local_artifact_cache.get(stratum_artifact) as f:
- chunks = [ArtifactCacheReference(c) for c in json.load(f)]
+ dep_sources = morphlib.artifact.find_all_deps(
+ self.source.dependencies)
- self.remote_artifact_cache.get_artifacts(
- chunks, self.local_artifact_cache,
- status_cb=self.app.status)
+ # download the stratum and chunk artifacts if necessary
+ self.remote_artifact_cache.get_artifacts_for_sources(
+ dep_sources, self.local_artifact_cache,
+ status_cb=self.app.status)
# unpack it from the local artifact cache
for stratum_artifact in self.source.dependencies:
diff --git a/morphlib/localartifactcache.py b/morphlib/localartifactcache.py
index 955ee97f..e1bfe6a3 100644
--- a/morphlib/localartifactcache.py
+++ b/morphlib/localartifactcache.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2012, 2013, 2014 Codethink Limited
+# Copyright (C) 2012-2015 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -48,8 +48,8 @@ class LocalArtifactCache(object):
def __init__(self, cachefs):
self.cachefs = cachefs
- def put(self, artifact):
- filename = self.artifact_filename(artifact)
+ def put_file(self, basename):
+ filename = self._join(basename)
return morphlib.savefile.SaveFile(filename, mode='w')
def put_artifact_metadata(self, artifact, name):
@@ -60,23 +60,21 @@ class LocalArtifactCache(object):
filename = self._source_metadata_filename(source, cachekey, name)
return morphlib.savefile.SaveFile(filename, mode='w')
- def _has_file(self, filename):
+ def has_file(self, basename):
+ filename = self._join(basename)
if os.path.exists(filename):
os.utime(filename, None)
return True
return False
def has(self, artifact):
- filename = self.artifact_filename(artifact)
- return self._has_file(filename)
+ return self.has_file(artifact.basename())
def has_artifact_metadata(self, artifact, name):
- filename = self._artifact_metadata_filename(artifact, name)
- return self._has_file(filename)
+ return self.has_file(artifact.metadata_basename(name))
def has_source_metadata(self, source, cachekey, name):
- filename = self._source_metadata_filename(source, cachekey, name)
- return self._has_file(filename)
+ return self.has_file('%s.%s' % (cachekey, name))
def get(self, artifact):
filename = self.artifact_filename(artifact)
diff --git a/morphlib/remoteartifactcache.py b/morphlib/remoteartifactcache.py
index 73f9cbed..76867c14 100644
--- a/morphlib/remoteartifactcache.py
+++ b/morphlib/remoteartifactcache.py
@@ -38,13 +38,6 @@ class RemoteArtifactCache(object):
def has(self, artifact):
return self._has_file(artifact.basename())
- def has_artifact_metadata(self, artifact, name):
- return self._has_file(artifact.metadata_basename(name))
-
- def has_source_metadata(self, source, cachekey, name):
- filename = '%s.%s' % (cachekey, name)
- return self._has_file(filename)
-
def _has_file(self, filename):
url = self._request_url(filename)
logging.debug('RemoteArtifactCache._has_file: url=%s' % url)
@@ -106,35 +99,32 @@ class RemoteArtifactCache(object):
for _, local_file in to_fetch:
local_file.close()
- def get_artifact(self, artifact, lac, status_cb=None):
- '''Ensure an artifact is available in the local artifact cache.'''
+ def get_artifacts_for_source(self, source, lac, status_cb=None):
+ '''Ensure all built artifacts for 'source' are in the local cache.
+
+ This includes all build metadata such as the .build-log file.
+
+ '''
to_fetch = []
- if not lac.has(artifact):
- to_fetch.append(
- (artifact.basename(), lac.put(artifact)))
-
- needs_artifact_meta = \
- artifact.source.morphology.needs_artifact_metadata_cached
- if needs_artifact_meta: # pragma: no cover
- if not lac.has_artifact_metadata(artifact, 'meta'):
- to_fetch.append((
- artifact.metadata_basename(artifact, 'meta'),
- lac.put_artifact_metadata(artifact, 'meta')))
+
+ for basename in source.files():
+ if not lac.has_file(basename):
+ to_fetch.append((basename, lac.put_file(basename)))
if len(to_fetch) > 0:
if status_cb:
status_cb(
- msg='Fetching to local cache: artifact %(name)s',
- name=artifact.name)
+ msg='Fetching to local cache: built artifacts of %(name)s',
+ name=source.name)
self._fetch_files(to_fetch)
- def get_artifacts(self, artifacts, lac, status_cb=None):
- '''Ensure multiple artifacts are available in the local cache.'''
+ def get_artifacts_for_sources(self, sources, lac, status_cb=None):
+ '''Ensure artifacts for multiple sources are available locally.'''
- # FIXME: Running the downloads in parallel may give a speed boost, as
- # many of these are small files.
+ # Running the downloads in parallel might give a speed boost, as many
+ # of these are small files.
- for artifact in artifacts:
- self.get_artifact(artifact, lac, status_cb=status_cb)
+ for source in sources:
+ self.get_artifacts_for_source(source, lac, status_cb=status_cb)
diff --git a/morphlib/remoteartifactcache_tests.py b/morphlib/remoteartifactcache_tests.py
index fc9b6b27..79c581ac 100644
--- a/morphlib/remoteartifactcache_tests.py
+++ b/morphlib/remoteartifactcache_tests.py
@@ -109,12 +109,6 @@ class RemoteArtifactCacheTests(unittest.TestCase):
self.assertTrue(
rac.has(an_artifact))
- self.assertTrue(
- rac.has_artifact_metadata(an_artifact, 'meta'))
-
- self.assertTrue(
- rac.has_source_metadata(chunk, chunk.cache_key, 'meta'))
-
lac = morphlib.testutils.FakeLocalArtifactCache()
self.assertFalse(lac.has(an_artifact))
@@ -135,12 +129,6 @@ class RemoteArtifactCacheTests(unittest.TestCase):
self.assertFalse(
rac.has(an_artifact))
- self.assertFalse(
- rac.has_artifact_metadata(an_artifact, 'non-existent-meta'))
-
- self.assertFalse(
- rac.has_source_metadata(chunk, chunk.cache_key, 'meta'))
-
lac = morphlib.testutils.FakeLocalArtifactCache()
with self.assertRaises(morphlib.remoteartifactcache.GetError):
rac.get_artifacts(
diff --git a/morphlib/source.py b/morphlib/source.py
index 4ad54ed9..e37d8cba 100644
--- a/morphlib/source.py
+++ b/morphlib/source.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2012-2014 Codethink Limited
+# Copyright (C) 2012-2015 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -64,9 +64,33 @@ class Source(object):
def __repr__(self): # pragma: no cover
return 'Source(%s)' % str(self)
- def basename(self): # pragma: no cover
+ def basename(self):
return '%s.%s' % (self.cache_key, str(self.morphology['kind']))
+ def build_log_basename(self):
+ return '%s.build-log' % (self.cache_key)
+
+ def files(self):
+ '''Return the name of all built artifacts of this source.
+
+ This includes every artifact and all associated metadata.
+
+ It's usually a bad idea to have only some of the files for a given
+ source available. Transfer all of them if you transfer any of them.
+
+ '''
+ files = {self.basename()}
+
+ if self.morphology['kind'] == 'chunk':
+ files.add(self.build_log_basename())
+
+ for artifact in self.artifacts:
+ files.add(artifact.basename())
+ if self.morphology.needs_artifact_metadata_cached:
+ files.add(artifact.metadata_basename())
+
+ return files
+
def add_dependency(self, artifact): # pragma: no cover
if artifact not in self.dependencies:
self.dependencies.append(artifact)