diff options
author | Sam Thursfield <sam.thursfield@codethink.co.uk> | 2015-02-09 17:07:00 +0000 |
---|---|---|
committer | Sam Thursfield <sam.thursfield@codethink.co.uk> | 2015-02-11 16:04:43 +0000 |
commit | 623997cfef08e4e8d7fcf47806d6d81735299105 (patch) | |
tree | 79809446177f00ce45b3e96d6247319fffc14c33 | |
parent | 59f5d084705c90c5c8345c816f90abe14f075cd7 (diff) | |
download | morph-623997cfef08e4e8d7fcf47806d6d81735299105.tar.gz |
Consolidate code for fetching artifacts from remote cache
-rw-r--r-- | morphlib/buildcommand.py | 43 | ||||
-rw-r--r-- | morphlib/builder.py | 61 | ||||
-rw-r--r-- | morphlib/remoteartifactcache.py | 118 |
3 files changed, 90 insertions, 132 deletions
diff --git a/morphlib/buildcommand.py b/morphlib/buildcommand.py index 6959c455..0a8d463f 100644 --- a/morphlib/buildcommand.py +++ b/morphlib/buildcommand.py @@ -16,7 +16,6 @@ import itertools import os -import shutil import logging import tempfile import datetime @@ -392,46 +391,8 @@ class BuildCommand(object): def cache_artifacts_locally(self, artifacts): '''Get artifacts missing from local cache from remote cache.''' - def fetch_files(to_fetch): - '''Fetch a set of files atomically. - - If an error occurs during the transfer of any files, all downloaded - data is deleted, to ensure integrity of the local cache. - - ''' - try: - for remote, local in to_fetch: - shutil.copyfileobj(remote, local) - except BaseException: - for remote, local in to_fetch: - local.abort() - raise - else: - for remote, local in to_fetch: - remote.close() - local.close() - - for artifact in artifacts: - # This block should fetch all artifact files in one go, using the - # 1.0/artifacts method of morph-cache-server. The code to do that - # needs bringing in from the distbuild.worker_build_connection - # module into morphlib.remoteartififactcache first. - to_fetch = [] - if not self.lac.has(artifact): - to_fetch.append((self.rac.get(artifact), - self.lac.put(artifact))) - - if artifact.source.morphology.needs_artifact_metadata_cached: - if not self.lac.has_artifact_metadata(artifact, 'meta'): - to_fetch.append(( - self.rac.get_artifact_metadata(artifact, 'meta'), - self.lac.put_artifact_metadata(artifact, 'meta'))) - - if len(to_fetch) > 0: - self.app.status( - msg='Fetching to local cache: artifact %(name)s', - name=artifact.name) - fetch_files(to_fetch) + self.rac.get_artifacts( + artifacts, self.lac, status_cb=self.app.status) def create_staging_area(self, build_env, use_chroot=True, extra_env={}, extra_path=[]): diff --git a/morphlib/builder.py b/morphlib/builder.py index 0bb21434..4786bd8a 100644 --- a/morphlib/builder.py +++ b/morphlib/builder.py @@ -123,25 +123,6 @@ def ldconfig(runcmd, rootdir): # pragma: no cover logging.debug('No %s, not running ldconfig' % conf) -def download_depends(constituents, lac, rac, metadatas=None): - for constituent in constituents: - if not lac.has(constituent): - source = rac.get(constituent) - target = lac.put(constituent) - shutil.copyfileobj(source, target) - target.close() - source.close() - if metadatas is not None: - for metadata in metadatas: - if not lac.has_artifact_metadata(constituent, metadata): - if rac.has_artifact_metadata(constituent, metadata): - src = rac.get_artifact_metadata(constituent, metadata) - dst = lac.put_artifact_metadata(constituent, metadata) - shutil.copyfileobj(src, dst) - dst.close() - src.close() - - class BuilderBase(object): '''Base class for building artifacts.''' @@ -496,11 +477,9 @@ class StratumBuilder(BuilderBase): # the only reason the StratumBuilder has to download chunks is to # check for overlap now that strata are lists of chunks with self.build_watch('check-chunks'): - for a_name, a in self.source.artifacts.iteritems(): - # download the chunk artifact if necessary - download_depends(constituents, - self.local_artifact_cache, - self.remote_artifact_cache) + self.remote_artifact_cache.get_artifacts( + constituents, self.local_artifact_cache, + status_cb=self.app.status) with self.build_watch('create-chunk-list'): lac = self.local_artifact_cache @@ -588,25 +567,23 @@ class SystemBuilder(BuilderBase): # pragma: no cover self.app.status(msg='Unpacking strata to %(path)s', path=path, chatty=True) with self.build_watch('unpack-strata'): - for a_name, a in self.source.artifacts.iteritems(): - # download the stratum artifacts if necessary - download_depends(self.source.dependencies, - self.local_artifact_cache, - self.remote_artifact_cache, - ('meta',)) - - # download the chunk artifacts if necessary - for stratum_artifact in self.source.dependencies: - f = self.local_artifact_cache.get(stratum_artifact) + # download the stratum artifacts if necessary + self.remote_artifact_cache.get_artifacts( + self.source.dependencies, self.local_artifact_cache, + status_cb=self.app.status) + + # download the chunk artifacts if necessary + for stratum_artifact in self.source.dependencies: + with self.local_artifact_cache.get(stratum_artifact) as f: chunks = [ArtifactCacheReference(c) for c in json.load(f)] - download_depends(chunks, - self.local_artifact_cache, - self.remote_artifact_cache) - f.close() - - # unpack it from the local artifact cache - for stratum_artifact in self.source.dependencies: - self.unpack_one_stratum(stratum_artifact, path) + + self.remote_artifact_cache.get_artifacts( + chunks, self.local_artifact_cache, + status_cb=self.app.status) + + # unpack it from the local artifact cache + for stratum_artifact in self.source.dependencies: + self.unpack_one_stratum(stratum_artifact, path) ldconfig(self.app.runcmd, path) diff --git a/morphlib/remoteartifactcache.py b/morphlib/remoteartifactcache.py index 499c2aab..45933d10 100644 --- a/morphlib/remoteartifactcache.py +++ b/morphlib/remoteartifactcache.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2014 Codethink Limited +# Copyright (C) 2012-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,6 +16,7 @@ import cliapp import logging +import shutil import urllib import urllib2 import urlparse @@ -29,29 +30,10 @@ class HeadRequest(urllib2.Request): # pragma: no cover class GetError(cliapp.AppException): - def __init__(self, cache, artifact, error): + def __init__(self, cache, filename, error): cliapp.AppException.__init__( - self, 'Failed to get the artifact %s ' - 'from the artifact cache %s: %s' % - (artifact.basename(), cache, error)) - - -class GetArtifactMetadataError(GetError): - - def __init__(self, cache, artifact, name, error): - cliapp.AppException.__init__( - self, 'Failed to get metadata %s for the artifact %s ' - 'from the artifact cache %s: %s' % - (name, artifact.basename(), cache, error)) - - -class GetSourceMetadataError(GetError): - - def __init__(self, cache, source, cache_key, name, error): - cliapp.AppException.__init__( - self, 'Failed to get metadata %s for source %s ' - 'and cache key %s from the artifact cache %s: %s' % - (name, source, cache_key, cache, error)) + self, 'Failed to get the file %s from the artifact cache %s: %s' % + (filename, cache, error)) class RemoteArtifactCache(object): @@ -69,27 +51,6 @@ class RemoteArtifactCache(object): filename = '%s.%s' % (cachekey, name) return self._has_file(filename) - def get(self, artifact, log=logging.error): - try: - return self._get_file(artifact.basename()) - except urllib2.URLError as e: - log(str(e)) - raise GetError(self, artifact, e) - - def get_artifact_metadata(self, artifact, name, log=logging.error): - try: - return self._get_file(artifact.metadata_basename(name)) - except urllib2.URLError as e: - log(str(e)) - raise GetArtifactMetadataError(self, artifact, name, e) - - def get_source_metadata(self, source, cachekey, name): - filename = '%s.%s' % (cachekey, name) - try: - return self._get_file(filename) - except urllib2.URLError as e: - raise GetSourceMetadataError(self, source, cachekey, name, e) - def _has_file(self, filename): # pragma: no cover url = self._request_url(filename) logging.debug('RemoteArtifactCache._has_file: url=%s' % url) @@ -100,11 +61,6 @@ class RemoteArtifactCache(object): except (urllib2.HTTPError, urllib2.URLError): return False - def _get_file(self, filename): # pragma: no cover - url = self._request_url(filename) - logging.debug('RemoteArtifactCache._get_file: url=%s' % url) - return urllib2.urlopen(url) - def _request_url(self, filename): # pragma: no cover server_url = self.server_url if not server_url.endswith('/'): @@ -115,3 +71,67 @@ class RemoteArtifactCache(object): def __str__(self): # pragma: no cover return self.server_url + + def _fetch_file(self, remote_filename, local_file): + remote_url = self._request_url(remote_filename) + logging.debug('RemoteArtifactCache._fetch_file: url=%s' % remote_url) + + try: + remote_file = urllib2.urlopen(remote_url) + shutil.copyfileobj(remote_file, local_file) + except (urllib.HTTPError, urllib.URLError) as e: + logging.debug(str(e)) + raise GetError(self, remote_filename, e) + + def _fetch_files(self, to_fetch): + '''Fetch a set of files atomically. + + If an error occurs during the transfer of any files, all downloaded + data is deleted, to reduce the chances of having artifacts in the local + cache that are missing their metadata, and so on. + + This assumes that the morphlib.savefile module is used so the file + handles passed in to_fetch have a .abort() method. + + ''' + try: + for remote_filename, local_file in to_fetch: + self._fetch_file(remote_filename, local_file) + except BaseException: + for _, local_file in to_fetch: + local_file.abort() + raise + else: + for _, local_file in to_fetch: + local_file.close() + + def get_artifact(self, artifact, lac, status_cb=None): + '''Ensure an artifact is available in the local artifact cache.''' + + to_fetch = [] + if not lac.has(artifact): + to_fetch.append( + (artifact.basename(), lac.put(artifact))) + + if artifact.source.morphology.needs_artifact_metadata_cached: + if not lac.has_artifact_metadata(artifact, 'meta'): + to_fetch.append(( + artifact.metadata_basename(artifact, 'meta'), + lac.put_artifact_metadata(artifact, 'meta'))) + + if len(to_fetch) > 0: + if status_cb: + status_cb( + msg='Fetching to local cache: artifact %(name)s', + name=artifact.name) + + self._fetch_files(to_fetch) + + def get_artifacts(self, artifacts, lac, status_cb=None): + '''Ensure multiple artifacts are available in the local cache.''' + + # FIXME: Running the downloads in parallel may give a speed boost, as + # many of these are small files. + + for artifact in artifacts: + self.get_artifact(artifact, lac, status_cb=status_cb) |