summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Maw <jonathan.maw@codethink.co.uk>2018-08-28 16:12:45 +0100
committerJonathan Maw <jonathan.maw@codethink.co.uk>2018-10-30 13:15:48 +0000
commit2c0d7a614de63eacda257fbfc8898bcdb2a79492 (patch)
treec37337efc98b25f674503136222ee72192345242
parent7f8d0b9cac97b830b718b00ed3db9096c06d525c (diff)
downloadbuildstream-jonathan/expose-downloadablefilesource.tar.gz
Make DownloadableFileSource clearly define public and private fieldsjonathan/expose-downloadablefilesource
TarSource and RemoteSource now parse url from the config themselves, because they need it, but the url is private.
-rw-r--r--buildstream/downloadablefilesource.py146
-rw-r--r--buildstream/plugins/sources/deb.py2
-rw-r--r--buildstream/plugins/sources/remote.py10
-rw-r--r--buildstream/plugins/sources/tar.py8
-rw-r--r--buildstream/plugins/sources/zip.py2
5 files changed, 107 insertions, 61 deletions
diff --git a/buildstream/downloadablefilesource.py b/buildstream/downloadablefilesource.py
index 11981c4c2..593db8997 100644
--- a/buildstream/downloadablefilesource.py
+++ b/buildstream/downloadablefilesource.py
@@ -2,6 +2,13 @@
DownloadableFileSource - Abstract class for downloading files
=============================================================
A base abstract class for source implementations which download a file.
+
+Derived classes must write their own stage() implementation, using the
+public APIs exposed in this class.
+
+Derived classes must also chain up to the parent method in their get_unique_key()
+implementations.
+
"""
import os
@@ -19,50 +26,54 @@ class DownloadableFileSource(Source):
COMMON_CONFIG_KEYS = Source.COMMON_CONFIG_KEYS + ['url', 'ref', 'etag']
+ #####################################
+ # Implementations of abstract methods
+ #####################################
+
def configure(self, node):
- self.original_url = self.node_get_member(node, str, 'url')
- self.ref = self.node_get_member(node, str, 'ref', None)
- self.url = self.translate_url(self.original_url)
- self._warn_deprecated_etag(node)
+ self.__original_url = self.node_get_member(node, str, 'url')
+ self.__ref = self.node_get_member(node, str, 'ref', None)
+ self.__url = self.translate_url(self.__original_url)
+ self.__warn_deprecated_etag(node)
def preflight(self):
return
def get_unique_key(self):
- return [self.original_url, self.ref]
+ return [self.__original_url, self.__ref]
def get_consistency(self):
- if self.ref is None:
+ if self.__ref is None:
return Consistency.INCONSISTENT
- if os.path.isfile(self._get_mirror_file()):
+ if os.path.isfile(self.get_mirror_file()):
return Consistency.CACHED
else:
return Consistency.RESOLVED
def load_ref(self, node):
- self.ref = self.node_get_member(node, str, 'ref', None)
- self._warn_deprecated_etag(node)
+ self.__ref = self.node_get_member(node, str, 'ref', None)
+ self.__warn_deprecated_etag(node)
def get_ref(self):
- return self.ref
+ return self.__ref
def set_ref(self, ref, node):
- node['ref'] = self.ref = ref
+ node['ref'] = self.__ref = ref
def track(self):
# there is no 'track' field in the source to determine what/whether
# or not to update refs, because tracking a ref is always a conscious
# decision by the user.
- with self.timed_activity("Tracking {}".format(self.url),
+ with self.timed_activity("Tracking {}".format(self.__url),
silent_nested=True):
- new_ref = self._ensure_mirror()
+ new_ref = self.ensure_mirror()
- if self.ref and self.ref != new_ref:
+ if self.__ref and self.__ref != new_ref:
detail = "When tracking, new ref differs from current ref:\n" \
- + " Tracked URL: {}\n".format(self.url) \
- + " Current ref: {}\n".format(self.ref) \
+ + " Tracked URL: {}\n".format(self.__url) \
+ + " Current ref: {}\n".format(self.__ref) \
+ " New ref: {}\n".format(new_ref)
self.warn("Potential man-in-the-middle attack!", detail=detail)
@@ -74,49 +85,40 @@ class DownloadableFileSource(Source):
# file to be already cached because Source.fetch() will
# not be called if the source is already Consistency.CACHED.
#
- if os.path.isfile(self._get_mirror_file()):
+ if os.path.isfile(self.get_mirror_file()):
return # pragma: nocover
# Download the file, raise hell if the sha256sums don't match,
# and mirror the file otherwise.
- with self.timed_activity("Fetching {}".format(self.url), silent_nested=True):
- sha256 = self._ensure_mirror()
- if sha256 != self.ref:
+ with self.timed_activity("Fetching {}".format(self.__url), silent_nested=True):
+ sha256 = self.ensure_mirror()
+ if sha256 != self.__ref:
raise SourceError("File downloaded from {} has sha256sum '{}', not '{}'!"
- .format(self.url, sha256, self.ref))
+ .format(self.__url, sha256, self.__ref))
+ ################
+ # Public methods
+ ################
- def _warn_deprecated_etag(self, node):
- etag = self.node_get_member(node, str, 'etag', None)
- if etag:
- provenance = self.node_provenance(node, member_name='etag')
- self.warn('{} "etag" is deprecated and ignored.'.format(provenance))
+ def ensure_mirror(self):
+ """Downloads from the url and caches it according to its sha256sum
- def _get_etag(self, ref):
- etagfilename = os.path.join(self._get_mirror_dir(), '{}.etag'.format(ref))
- if os.path.exists(etagfilename):
- with open(etagfilename, 'r') as etagfile:
- return etagfile.read()
-
- return None
+ Returns:
+ (str): The sha256sum of the mirrored file
- def _store_etag(self, ref, etag):
- etagfilename = os.path.join(self._get_mirror_dir(), '{}.etag'.format(ref))
- with utils.save_file_atomic(etagfilename) as etagfile:
- etagfile.write(etag)
-
- def _ensure_mirror(self):
- # Downloads from the url and caches it according to its sha256sum.
+ Raises:
+ :class:`.SourceError`
+ """
try:
with self.tempdir() as td:
- default_name = os.path.basename(self.url)
- request = urllib.request.Request(self.url)
+ default_name = os.path.basename(self.__url)
+ request = urllib.request.Request(self.__url)
request.add_header('Accept', '*/*')
# We do not use etag in case what we have in cache is
# not matching ref in order to be able to recover from
# corrupted download.
- if self.ref:
- etag = self._get_etag(self.ref)
+ if self.__ref:
+ etag = self.__get_etag(self.__ref)
# Do not re-download the file if the ETag matches.
if etag and self.get_consistency() == Consistency.CACHED:
@@ -134,17 +136,17 @@ class DownloadableFileSource(Source):
shutil.copyfileobj(response, dest)
# Make sure url-specific mirror dir exists.
- if not os.path.isdir(self._get_mirror_dir()):
- os.makedirs(self._get_mirror_dir())
+ if not os.path.isdir(self.__get_mirror_dir()):
+ os.makedirs(self.__get_mirror_dir())
# Store by sha256sum
sha256 = utils.sha256sum(local_file)
# Even if the file already exists, move the new file over.
# In case the old file was corrupted somehow.
- os.rename(local_file, self._get_mirror_file(sha256))
+ os.rename(local_file, self.get_mirror_file(sha=sha256))
if etag:
- self._store_etag(sha256, etag)
+ self.__store_etag(sha256, etag)
return sha256
except urllib.error.HTTPError as e:
@@ -152,19 +154,53 @@ class DownloadableFileSource(Source):
# 304 Not Modified.
# Because we use etag only for matching ref, currently specified ref is what
# we would have downloaded.
- return self.ref
+ return self.__ref
raise SourceError("{}: Error mirroring {}: {}"
- .format(self, self.url, e), temporary=True) from e
+ .format(self, self.__url, e), temporary=True) from e
except (urllib.error.URLError, urllib.error.ContentTooShortError, OSError, ValueError) as e:
# Note that urllib.request.Request in the try block may throw a
# ValueError for unknown url types, so we handle it here.
raise SourceError("{}: Error mirroring {}: {}"
- .format(self, self.url, e), temporary=True) from e
+ .format(self, self.__url, e), temporary=True) from e
+
+ def get_mirror_file(self, *, sha=None):
+ """Calculates the path to where this source stores the downloaded file
- def _get_mirror_dir(self):
+ Users are expected to read the file this points to when staging their source.
+
+ Returns:
+ (str): A path to the file the source should be cached at
+ """
+ return os.path.join(self.__get_mirror_dir(), sha or self.__ref)
+
+ #######################
+ # Local Private methods
+ #######################
+
+ def __get_mirror_dir(self):
+ # Get the directory this source should store things in, for a given URL
return os.path.join(self.get_mirror_directory(),
- utils.url_directory_name(self.original_url))
+ utils.url_directory_name(self.__original_url))
+
+ def __warn_deprecated_etag(self, node):
+ # Warn the user if the 'etag' field is being used
+ etag = self.node_get_member(node, str, 'etag', None)
+ if etag:
+ provenance = self.node_provenance(node, member_name='etag')
+ self.warn('{} "etag" is deprecated and ignored.'.format(provenance))
- def _get_mirror_file(self, sha=None):
- return os.path.join(self._get_mirror_dir(), sha or self.ref)
+ def __get_etag(self, ref):
+ # Retrieve the etag's data from disk
+ etagfilename = os.path.join(self.__get_mirror_dir(), '{}.etag'.format(ref))
+ if os.path.exists(etagfilename):
+ with open(etagfilename, 'r') as etagfile:
+ return etagfile.read()
+
+ return None
+
+ def __store_etag(self, ref, etag):
+ # Write the etag's data to disk
+ etagfilename = os.path.join(self.__get_mirror_dir(), '{}.etag'.format(ref))
+ with utils.save_file_atomic(etagfilename) as etagfile:
+ etagfile.write(etag)
diff --git a/buildstream/plugins/sources/deb.py b/buildstream/plugins/sources/deb.py
index 1b7dafd31..b62a976da 100644
--- a/buildstream/plugins/sources/deb.py
+++ b/buildstream/plugins/sources/deb.py
@@ -68,7 +68,7 @@ class DebSource(TarSource):
@contextmanager
def _get_tar(self):
- with open(self._get_mirror_file(), 'rb') as deb_file:
+ with open(self.get_mirror_file(), 'rb') as deb_file:
arpy_archive = arpy.Archive(fileobj=deb_file)
arpy_archive.read_all_headers()
data_tar_arpy = [v for k, v in arpy_archive.archived_files.items() if b"data.tar" in k][0]
diff --git a/buildstream/plugins/sources/remote.py b/buildstream/plugins/sources/remote.py
index c296d3158..fbfff9056 100644
--- a/buildstream/plugins/sources/remote.py
+++ b/buildstream/plugins/sources/remote.py
@@ -61,7 +61,13 @@ class RemoteSource(DownloadableFileSource):
def configure(self, node):
super().configure(node)
- self.filename = self.node_get_member(node, str, 'filename', os.path.basename(self.url))
+ self.filename = self.node_get_member(node, str, 'filename', None)
+ if not self.filename:
+ # url in DownloadableFileSource is private, so we read it again
+ original_url = self.node_get_member(node, str, 'url')
+ url = self.translate_url(original_url)
+ self.filename = os.path.basename(url)
+
self.executable = self.node_get_member(node, bool, 'executable', False)
if os.sep in self.filename:
@@ -78,7 +84,7 @@ class RemoteSource(DownloadableFileSource):
dest = os.path.join(directory, self.filename)
with self.timed_activity("Staging remote file to {}".format(dest)):
- utils.safe_copy(self._get_mirror_file(), dest)
+ utils.safe_copy(self.get_mirror_file(), dest)
# To prevent user's umask introducing variability here, explicitly set
# file modes.
diff --git a/buildstream/plugins/sources/tar.py b/buildstream/plugins/sources/tar.py
index 75219dc89..667b28379 100644
--- a/buildstream/plugins/sources/tar.py
+++ b/buildstream/plugins/sources/tar.py
@@ -71,6 +71,10 @@ class TarSource(DownloadableFileSource):
def configure(self, node):
super().configure(node)
+ # url in DownloadableFileSource is private, so we read it again
+ original_url = self.node_get_member(node, str, 'url')
+ self.url = self.translate_url(original_url)
+
self.base_dir = self.node_get_member(node, str, 'base-dir', '*') or None
self.node_validate(node, DownloadableFileSource.COMMON_CONFIG_KEYS + ['base-dir'])
@@ -87,7 +91,7 @@ class TarSource(DownloadableFileSource):
def _run_lzip(self):
assert self.host_lzip
with TemporaryFile() as lzip_stdout:
- with open(self._get_mirror_file(), 'r') as lzip_file:
+ with open(self.get_mirror_file(), 'r') as lzip_file:
self.call([self.host_lzip, '-d'],
stdin=lzip_file,
stdout=lzip_stdout)
@@ -102,7 +106,7 @@ class TarSource(DownloadableFileSource):
with tarfile.open(fileobj=lzip_dec, mode='r:') as tar:
yield tar
else:
- with tarfile.open(self._get_mirror_file()) as tar:
+ with tarfile.open(self.get_mirror_file()) as tar:
yield tar
def stage(self, directory):
diff --git a/buildstream/plugins/sources/zip.py b/buildstream/plugins/sources/zip.py
index dafd6f500..5773e0dba 100644
--- a/buildstream/plugins/sources/zip.py
+++ b/buildstream/plugins/sources/zip.py
@@ -83,7 +83,7 @@ class ZipSource(DownloadableFileSource):
noexec_rights = exec_rights & ~(stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
try:
- with zipfile.ZipFile(self._get_mirror_file()) as archive:
+ with zipfile.ZipFile(self.get_mirror_file()) as archive:
base_dir = None
if self.base_dir:
base_dir = self._find_base_dir(archive, self.base_dir)