summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Thursfield <sam.thursfield@codethink.co.uk>2015-02-09 17:07:00 +0000
committerSam Thursfield <sam.thursfield@codethink.co.uk>2015-02-11 16:04:43 +0000
commit623997cfef08e4e8d7fcf47806d6d81735299105 (patch)
tree79809446177f00ce45b3e96d6247319fffc14c33
parent59f5d084705c90c5c8345c816f90abe14f075cd7 (diff)
downloadmorph-623997cfef08e4e8d7fcf47806d6d81735299105.tar.gz
Consolidate code for fetching artifacts from remote cache
-rw-r--r--morphlib/buildcommand.py43
-rw-r--r--morphlib/builder.py61
-rw-r--r--morphlib/remoteartifactcache.py118
3 files changed, 90 insertions, 132 deletions
diff --git a/morphlib/buildcommand.py b/morphlib/buildcommand.py
index 6959c455..0a8d463f 100644
--- a/morphlib/buildcommand.py
+++ b/morphlib/buildcommand.py
@@ -16,7 +16,6 @@
import itertools
import os
-import shutil
import logging
import tempfile
import datetime
@@ -392,46 +391,8 @@ class BuildCommand(object):
def cache_artifacts_locally(self, artifacts):
'''Get artifacts missing from local cache from remote cache.'''
- def fetch_files(to_fetch):
- '''Fetch a set of files atomically.
-
- If an error occurs during the transfer of any files, all downloaded
- data is deleted, to ensure integrity of the local cache.
-
- '''
- try:
- for remote, local in to_fetch:
- shutil.copyfileobj(remote, local)
- except BaseException:
- for remote, local in to_fetch:
- local.abort()
- raise
- else:
- for remote, local in to_fetch:
- remote.close()
- local.close()
-
- for artifact in artifacts:
- # This block should fetch all artifact files in one go, using the
- # 1.0/artifacts method of morph-cache-server. The code to do that
- # needs bringing in from the distbuild.worker_build_connection
- # module into morphlib.remoteartififactcache first.
- to_fetch = []
- if not self.lac.has(artifact):
- to_fetch.append((self.rac.get(artifact),
- self.lac.put(artifact)))
-
- if artifact.source.morphology.needs_artifact_metadata_cached:
- if not self.lac.has_artifact_metadata(artifact, 'meta'):
- to_fetch.append((
- self.rac.get_artifact_metadata(artifact, 'meta'),
- self.lac.put_artifact_metadata(artifact, 'meta')))
-
- if len(to_fetch) > 0:
- self.app.status(
- msg='Fetching to local cache: artifact %(name)s',
- name=artifact.name)
- fetch_files(to_fetch)
+ self.rac.get_artifacts(
+ artifacts, self.lac, status_cb=self.app.status)
def create_staging_area(self, build_env, use_chroot=True, extra_env={},
extra_path=[]):
diff --git a/morphlib/builder.py b/morphlib/builder.py
index 0bb21434..4786bd8a 100644
--- a/morphlib/builder.py
+++ b/morphlib/builder.py
@@ -123,25 +123,6 @@ def ldconfig(runcmd, rootdir): # pragma: no cover
logging.debug('No %s, not running ldconfig' % conf)
-def download_depends(constituents, lac, rac, metadatas=None):
- for constituent in constituents:
- if not lac.has(constituent):
- source = rac.get(constituent)
- target = lac.put(constituent)
- shutil.copyfileobj(source, target)
- target.close()
- source.close()
- if metadatas is not None:
- for metadata in metadatas:
- if not lac.has_artifact_metadata(constituent, metadata):
- if rac.has_artifact_metadata(constituent, metadata):
- src = rac.get_artifact_metadata(constituent, metadata)
- dst = lac.put_artifact_metadata(constituent, metadata)
- shutil.copyfileobj(src, dst)
- dst.close()
- src.close()
-
-
class BuilderBase(object):
'''Base class for building artifacts.'''
@@ -496,11 +477,9 @@ class StratumBuilder(BuilderBase):
# the only reason the StratumBuilder has to download chunks is to
# check for overlap now that strata are lists of chunks
with self.build_watch('check-chunks'):
- for a_name, a in self.source.artifacts.iteritems():
- # download the chunk artifact if necessary
- download_depends(constituents,
- self.local_artifact_cache,
- self.remote_artifact_cache)
+ self.remote_artifact_cache.get_artifacts(
+ constituents, self.local_artifact_cache,
+ status_cb=self.app.status)
with self.build_watch('create-chunk-list'):
lac = self.local_artifact_cache
@@ -588,25 +567,23 @@ class SystemBuilder(BuilderBase): # pragma: no cover
self.app.status(msg='Unpacking strata to %(path)s',
path=path, chatty=True)
with self.build_watch('unpack-strata'):
- for a_name, a in self.source.artifacts.iteritems():
- # download the stratum artifacts if necessary
- download_depends(self.source.dependencies,
- self.local_artifact_cache,
- self.remote_artifact_cache,
- ('meta',))
-
- # download the chunk artifacts if necessary
- for stratum_artifact in self.source.dependencies:
- f = self.local_artifact_cache.get(stratum_artifact)
+ # download the stratum artifacts if necessary
+ self.remote_artifact_cache.get_artifacts(
+ self.source.dependencies, self.local_artifact_cache,
+ status_cb=self.app.status)
+
+ # download the chunk artifacts if necessary
+ for stratum_artifact in self.source.dependencies:
+ with self.local_artifact_cache.get(stratum_artifact) as f:
chunks = [ArtifactCacheReference(c) for c in json.load(f)]
- download_depends(chunks,
- self.local_artifact_cache,
- self.remote_artifact_cache)
- f.close()
-
- # unpack it from the local artifact cache
- for stratum_artifact in self.source.dependencies:
- self.unpack_one_stratum(stratum_artifact, path)
+
+ self.remote_artifact_cache.get_artifacts(
+ chunks, self.local_artifact_cache,
+ status_cb=self.app.status)
+
+ # unpack it from the local artifact cache
+ for stratum_artifact in self.source.dependencies:
+ self.unpack_one_stratum(stratum_artifact, path)
ldconfig(self.app.runcmd, path)
diff --git a/morphlib/remoteartifactcache.py b/morphlib/remoteartifactcache.py
index 499c2aab..45933d10 100644
--- a/morphlib/remoteartifactcache.py
+++ b/morphlib/remoteartifactcache.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2012-2014 Codethink Limited
+# Copyright (C) 2012-2015 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -16,6 +16,7 @@
import cliapp
import logging
+import shutil
import urllib
import urllib2
import urlparse
@@ -29,29 +30,10 @@ class HeadRequest(urllib2.Request): # pragma: no cover
class GetError(cliapp.AppException):
- def __init__(self, cache, artifact, error):
+ def __init__(self, cache, filename, error):
cliapp.AppException.__init__(
- self, 'Failed to get the artifact %s '
- 'from the artifact cache %s: %s' %
- (artifact.basename(), cache, error))
-
-
-class GetArtifactMetadataError(GetError):
-
- def __init__(self, cache, artifact, name, error):
- cliapp.AppException.__init__(
- self, 'Failed to get metadata %s for the artifact %s '
- 'from the artifact cache %s: %s' %
- (name, artifact.basename(), cache, error))
-
-
-class GetSourceMetadataError(GetError):
-
- def __init__(self, cache, source, cache_key, name, error):
- cliapp.AppException.__init__(
- self, 'Failed to get metadata %s for source %s '
- 'and cache key %s from the artifact cache %s: %s' %
- (name, source, cache_key, cache, error))
+ self, 'Failed to get the file %s from the artifact cache %s: %s' %
+ (filename, cache, error))
class RemoteArtifactCache(object):
@@ -69,27 +51,6 @@ class RemoteArtifactCache(object):
filename = '%s.%s' % (cachekey, name)
return self._has_file(filename)
- def get(self, artifact, log=logging.error):
- try:
- return self._get_file(artifact.basename())
- except urllib2.URLError as e:
- log(str(e))
- raise GetError(self, artifact, e)
-
- def get_artifact_metadata(self, artifact, name, log=logging.error):
- try:
- return self._get_file(artifact.metadata_basename(name))
- except urllib2.URLError as e:
- log(str(e))
- raise GetArtifactMetadataError(self, artifact, name, e)
-
- def get_source_metadata(self, source, cachekey, name):
- filename = '%s.%s' % (cachekey, name)
- try:
- return self._get_file(filename)
- except urllib2.URLError as e:
- raise GetSourceMetadataError(self, source, cachekey, name, e)
-
def _has_file(self, filename): # pragma: no cover
url = self._request_url(filename)
logging.debug('RemoteArtifactCache._has_file: url=%s' % url)
@@ -100,11 +61,6 @@ class RemoteArtifactCache(object):
except (urllib2.HTTPError, urllib2.URLError):
return False
- def _get_file(self, filename): # pragma: no cover
- url = self._request_url(filename)
- logging.debug('RemoteArtifactCache._get_file: url=%s' % url)
- return urllib2.urlopen(url)
-
def _request_url(self, filename): # pragma: no cover
server_url = self.server_url
if not server_url.endswith('/'):
@@ -115,3 +71,67 @@ class RemoteArtifactCache(object):
def __str__(self): # pragma: no cover
return self.server_url
+
+ def _fetch_file(self, remote_filename, local_file):
+ remote_url = self._request_url(remote_filename)
+ logging.debug('RemoteArtifactCache._fetch_file: url=%s' % remote_url)
+
+ try:
+ remote_file = urllib2.urlopen(remote_url)
+ shutil.copyfileobj(remote_file, local_file)
+ except (urllib.HTTPError, urllib.URLError) as e:
+ logging.debug(str(e))
+ raise GetError(self, remote_filename, e)
+
+ def _fetch_files(self, to_fetch):
+ '''Fetch a set of files atomically.
+
+ If an error occurs during the transfer of any files, all downloaded
+ data is deleted, to reduce the chances of having artifacts in the local
+ cache that are missing their metadata, and so on.
+
+ This assumes that the morphlib.savefile module is used so the file
+ handles passed in to_fetch have a .abort() method.
+
+ '''
+ try:
+ for remote_filename, local_file in to_fetch:
+ self._fetch_file(remote_filename, local_file)
+ except BaseException:
+ for _, local_file in to_fetch:
+ local_file.abort()
+ raise
+ else:
+ for _, local_file in to_fetch:
+ local_file.close()
+
+ def get_artifact(self, artifact, lac, status_cb=None):
+ '''Ensure an artifact is available in the local artifact cache.'''
+
+ to_fetch = []
+ if not lac.has(artifact):
+ to_fetch.append(
+ (artifact.basename(), lac.put(artifact)))
+
+ if artifact.source.morphology.needs_artifact_metadata_cached:
+ if not lac.has_artifact_metadata(artifact, 'meta'):
+ to_fetch.append((
+ artifact.metadata_basename(artifact, 'meta'),
+ lac.put_artifact_metadata(artifact, 'meta')))
+
+ if len(to_fetch) > 0:
+ if status_cb:
+ status_cb(
+ msg='Fetching to local cache: artifact %(name)s',
+ name=artifact.name)
+
+ self._fetch_files(to_fetch)
+
+ def get_artifacts(self, artifacts, lac, status_cb=None):
+ '''Ensure multiple artifacts are available in the local cache.'''
+
+ # FIXME: Running the downloads in parallel may give a speed boost, as
+ # many of these are small files.
+
+ for artifact in artifacts:
+ self.get_artifact(artifact, lac, status_cb=status_cb)