diff options
author | Sam Thursfield <sam.thursfield@codethink.co.uk> | 2015-02-10 12:23:13 +0000 |
---|---|---|
committer | Sam Thursfield <sam.thursfield@codethink.co.uk> | 2015-02-11 16:04:44 +0000 |
commit | 81905d738a1343ad1d9da7f3977743c04601f0ba (patch) | |
tree | c488c3a4b116addd35c8f021a793e08e55563618 | |
parent | 7a41668f803641af0edd5948125a66d45906d6c7 (diff) | |
download | morph-81905d738a1343ad1d9da7f3977743c04601f0ba.tar.gz |
Rework artifact fetching
We now fetch sources, not artifacts. This means that built artifacts
of a source are either all available, or all missing.
-rw-r--r-- | morphlib/artifact.py | 3 | ||||
-rw-r--r-- | morphlib/buildcommand.py | 30 | ||||
-rw-r--r-- | morphlib/builder.py | 23 | ||||
-rw-r--r-- | morphlib/localartifactcache.py | 18 | ||||
-rw-r--r-- | morphlib/remoteartifactcache.py | 46 | ||||
-rw-r--r-- | morphlib/remoteartifactcache_tests.py | 12 | ||||
-rw-r--r-- | morphlib/source.py | 28 |
7 files changed, 72 insertions, 88 deletions
diff --git a/morphlib/artifact.py b/morphlib/artifact.py index 7a40a81a..ae1d0604 100644 --- a/morphlib/artifact.py +++ b/morphlib/artifact.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012, 2013, 2014 Codethink Limited +# Copyright (C) 2012-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -43,7 +43,6 @@ class Artifact(object): def __repr__(self): # pragma: no cover return 'Artifact(%s)' % str(self) - def walk(self): # pragma: no cover '''Return list of an artifact and its build dependencies. diff --git a/morphlib/buildcommand.py b/morphlib/buildcommand.py index 0a8d463f..04321b42 100644 --- a/morphlib/buildcommand.py +++ b/morphlib/buildcommand.py @@ -293,15 +293,18 @@ class BuildCommand(object): that doesn't work for some reason, by building the source locally. ''' - artifacts = source.artifacts.values() if self.rac is not None: try: - self.cache_artifacts_locally(artifacts) + self.cache_artifacts_locally([source]) except morphlib.remoteartifactcache.GetError: # Error is logged by the RemoteArtifactCache object. pass + artifacts = source.artifacts.values() if any(not self.lac.has(artifact) for artifact in artifacts): + # If any of the artifacts are missing, build the whole thing again. + # We should only ever have all the artifacts or none of them, but + # builds needs to be robust. self.build_source(source, build_env) for a in artifacts: @@ -323,11 +326,8 @@ class BuildCommand(object): kind=source.morphology['kind']) self.fetch_sources(source) - # TODO: Make an artifact.walk() that takes multiple root artifacts. - # as this does a walk for every artifact. This was the status - # quo before build logic was made to work per-source, but we can - # now do better. - deps = self.get_recursive_deps(source.artifacts.values()) + + deps = morphlib.artifact.find_all_deps(source.artifacts.values()) self.cache_artifacts_locally(deps) use_chroot = False @@ -371,16 +371,6 @@ class BuildCommand(object): td_string = "%02d:%02d:%02d" % (hours, minutes, seconds) self.app.status(msg="Elapsed time %(duration)s", duration=td_string) - def get_recursive_deps(self, artifacts): - deps = set() - ordered_deps = [] - for artifact in artifacts: - for dep in artifact.walk(): - if dep not in deps and dep not in artifacts: - deps.add(dep) - ordered_deps.append(dep) - return ordered_deps - def fetch_sources(self, source): '''Update the local git repository cache with the sources.''' @@ -388,11 +378,11 @@ class BuildCommand(object): source.repo = self.lrc.get_updated_repo(repo_name, ref=source.sha1) self.lrc.ensure_submodules(source.repo, source.sha1) - def cache_artifacts_locally(self, artifacts): + def cache_artifacts_locally(self, sources): '''Get artifacts missing from local cache from remote cache.''' - self.rac.get_artifacts( - artifacts, self.lac, status_cb=self.app.status) + self.rac.get_artifacts_for_sources( + sources, self.lac, status_cb=self.app.status) def create_staging_area(self, build_env, use_chroot=True, extra_env={}, extra_path=[]): diff --git a/morphlib/builder.py b/morphlib/builder.py index 4786bd8a..4b659984 100644 --- a/morphlib/builder.py +++ b/morphlib/builder.py @@ -473,12 +473,13 @@ class StratumBuilder(BuilderBase): with self.build_watch('overall-build'): constituents = [d for d in self.source.dependencies if self.is_constituent(d)] + constituent_sources = {d.source for d in constituents} # the only reason the StratumBuilder has to download chunks is to # check for overlap now that strata are lists of chunks with self.build_watch('check-chunks'): - self.remote_artifact_cache.get_artifacts( - constituents, self.local_artifact_cache, + self.remote_artifact_cache.get_artifacts_for_sources( + constituent_sources, self.local_artifact_cache, status_cb=self.app.status) with self.build_watch('create-chunk-list'): @@ -567,19 +568,13 @@ class SystemBuilder(BuilderBase): # pragma: no cover self.app.status(msg='Unpacking strata to %(path)s', path=path, chatty=True) with self.build_watch('unpack-strata'): - # download the stratum artifacts if necessary - self.remote_artifact_cache.get_artifacts( - self.source.dependencies, self.local_artifact_cache, - status_cb=self.app.status) - - # download the chunk artifacts if necessary - for stratum_artifact in self.source.dependencies: - with self.local_artifact_cache.get(stratum_artifact) as f: - chunks = [ArtifactCacheReference(c) for c in json.load(f)] + dep_sources = morphlib.artifact.find_all_deps( + self.source.dependencies) - self.remote_artifact_cache.get_artifacts( - chunks, self.local_artifact_cache, - status_cb=self.app.status) + # download the stratum and chunk artifacts if necessary + self.remote_artifact_cache.get_artifacts_for_sources( + dep_sources, self.local_artifact_cache, + status_cb=self.app.status) # unpack it from the local artifact cache for stratum_artifact in self.source.dependencies: diff --git a/morphlib/localartifactcache.py b/morphlib/localartifactcache.py index 955ee97f..e1bfe6a3 100644 --- a/morphlib/localartifactcache.py +++ b/morphlib/localartifactcache.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012, 2013, 2014 Codethink Limited +# Copyright (C) 2012-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -48,8 +48,8 @@ class LocalArtifactCache(object): def __init__(self, cachefs): self.cachefs = cachefs - def put(self, artifact): - filename = self.artifact_filename(artifact) + def put_file(self, basename): + filename = self._join(basename) return morphlib.savefile.SaveFile(filename, mode='w') def put_artifact_metadata(self, artifact, name): @@ -60,23 +60,21 @@ class LocalArtifactCache(object): filename = self._source_metadata_filename(source, cachekey, name) return morphlib.savefile.SaveFile(filename, mode='w') - def _has_file(self, filename): + def has_file(self, basename): + filename = self._join(basename) if os.path.exists(filename): os.utime(filename, None) return True return False def has(self, artifact): - filename = self.artifact_filename(artifact) - return self._has_file(filename) + return self.has_file(artifact.basename()) def has_artifact_metadata(self, artifact, name): - filename = self._artifact_metadata_filename(artifact, name) - return self._has_file(filename) + return self.has_file(artifact.metadata_basename(name)) def has_source_metadata(self, source, cachekey, name): - filename = self._source_metadata_filename(source, cachekey, name) - return self._has_file(filename) + return self.has_file('%s.%s' % (cachekey, name)) def get(self, artifact): filename = self.artifact_filename(artifact) diff --git a/morphlib/remoteartifactcache.py b/morphlib/remoteartifactcache.py index 73f9cbed..76867c14 100644 --- a/morphlib/remoteartifactcache.py +++ b/morphlib/remoteartifactcache.py @@ -38,13 +38,6 @@ class RemoteArtifactCache(object): def has(self, artifact): return self._has_file(artifact.basename()) - def has_artifact_metadata(self, artifact, name): - return self._has_file(artifact.metadata_basename(name)) - - def has_source_metadata(self, source, cachekey, name): - filename = '%s.%s' % (cachekey, name) - return self._has_file(filename) - def _has_file(self, filename): url = self._request_url(filename) logging.debug('RemoteArtifactCache._has_file: url=%s' % url) @@ -106,35 +99,32 @@ class RemoteArtifactCache(object): for _, local_file in to_fetch: local_file.close() - def get_artifact(self, artifact, lac, status_cb=None): - '''Ensure an artifact is available in the local artifact cache.''' + def get_artifacts_for_source(self, source, lac, status_cb=None): + '''Ensure all built artifacts for 'source' are in the local cache. + + This includes all build metadata such as the .build-log file. + + ''' to_fetch = [] - if not lac.has(artifact): - to_fetch.append( - (artifact.basename(), lac.put(artifact))) - - needs_artifact_meta = \ - artifact.source.morphology.needs_artifact_metadata_cached - if needs_artifact_meta: # pragma: no cover - if not lac.has_artifact_metadata(artifact, 'meta'): - to_fetch.append(( - artifact.metadata_basename(artifact, 'meta'), - lac.put_artifact_metadata(artifact, 'meta'))) + + for basename in source.files(): + if not lac.has_file(basename): + to_fetch.append((basename, lac.put_file(basename))) if len(to_fetch) > 0: if status_cb: status_cb( - msg='Fetching to local cache: artifact %(name)s', - name=artifact.name) + msg='Fetching to local cache: built artifacts of %(name)s', + name=source.name) self._fetch_files(to_fetch) - def get_artifacts(self, artifacts, lac, status_cb=None): - '''Ensure multiple artifacts are available in the local cache.''' + def get_artifacts_for_sources(self, sources, lac, status_cb=None): + '''Ensure artifacts for multiple sources are available locally.''' - # FIXME: Running the downloads in parallel may give a speed boost, as - # many of these are small files. + # Running the downloads in parallel might give a speed boost, as many + # of these are small files. - for artifact in artifacts: - self.get_artifact(artifact, lac, status_cb=status_cb) + for source in sources: + self.get_artifacts_for_source(source, lac, status_cb=status_cb) diff --git a/morphlib/remoteartifactcache_tests.py b/morphlib/remoteartifactcache_tests.py index fc9b6b27..79c581ac 100644 --- a/morphlib/remoteartifactcache_tests.py +++ b/morphlib/remoteartifactcache_tests.py @@ -109,12 +109,6 @@ class RemoteArtifactCacheTests(unittest.TestCase): self.assertTrue( rac.has(an_artifact)) - self.assertTrue( - rac.has_artifact_metadata(an_artifact, 'meta')) - - self.assertTrue( - rac.has_source_metadata(chunk, chunk.cache_key, 'meta')) - lac = morphlib.testutils.FakeLocalArtifactCache() self.assertFalse(lac.has(an_artifact)) @@ -135,12 +129,6 @@ class RemoteArtifactCacheTests(unittest.TestCase): self.assertFalse( rac.has(an_artifact)) - self.assertFalse( - rac.has_artifact_metadata(an_artifact, 'non-existent-meta')) - - self.assertFalse( - rac.has_source_metadata(chunk, chunk.cache_key, 'meta')) - lac = morphlib.testutils.FakeLocalArtifactCache() with self.assertRaises(morphlib.remoteartifactcache.GetError): rac.get_artifacts( diff --git a/morphlib/source.py b/morphlib/source.py index 4ad54ed9..e37d8cba 100644 --- a/morphlib/source.py +++ b/morphlib/source.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2014 Codethink Limited +# Copyright (C) 2012-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -64,9 +64,33 @@ class Source(object): def __repr__(self): # pragma: no cover return 'Source(%s)' % str(self) - def basename(self): # pragma: no cover + def basename(self): return '%s.%s' % (self.cache_key, str(self.morphology['kind'])) + def build_log_basename(self): + return '%s.build-log' % (self.cache_key) + + def files(self): + '''Return the name of all built artifacts of this source. + + This includes every artifact and all associated metadata. + + It's usually a bad idea to have only some of the files for a given + source available. Transfer all of them if you transfer any of them. + + ''' + files = {self.basename()} + + if self.morphology['kind'] == 'chunk': + files.add(self.build_log_basename()) + + for artifact in self.artifacts: + files.add(artifact.basename()) + if self.morphology.needs_artifact_metadata_cached: + files.add(artifact.metadata_basename()) + + return files + def add_dependency(self, artifact): # pragma: no cover if artifact not in self.dependencies: self.dependencies.append(artifact) |