summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorValentin David <valentin.david@codethink.co.uk>2018-04-27 12:57:13 +0200
committerTristan Van Berkom <tristan.vanberkom@codethink.co.uk>2018-05-10 20:33:12 +0900
commit8985da07604674749eecbafba0d8c716b133d27d (patch)
tree78416703b91307843c140601eda8fc6238d4b6a7
parent32c307664bfd576723c5d977a09d57ec3b9cd369 (diff)
downloadbuildstream-8985da07604674749eecbafba0d8c716b133d27d.tar.gz
buildstream/plugins/sources/_downloadablefilesource.py: Store etag along with cache.
Fixes #377.
-rw-r--r--buildstream/plugins/sources/_downloadablefilesource.py65
1 files changed, 42 insertions, 23 deletions
diff --git a/buildstream/plugins/sources/_downloadablefilesource.py b/buildstream/plugins/sources/_downloadablefilesource.py
index 1b12efb34..ec9c0fbb7 100644
--- a/buildstream/plugins/sources/_downloadablefilesource.py
+++ b/buildstream/plugins/sources/_downloadablefilesource.py
@@ -18,8 +18,8 @@ class DownloadableFileSource(Source):
def configure(self, node):
self.original_url = self.node_get_member(node, str, 'url')
self.ref = self.node_get_member(node, str, 'ref', None)
- self.etag = self.node_get_member(node, str, 'etag', None)
self.url = self.translate_url(self.original_url)
+ self._warn_deprecated_etag(node)
def preflight(self):
return
@@ -39,24 +39,13 @@ class DownloadableFileSource(Source):
def load_ref(self, node):
self.ref = self.node_get_member(node, str, 'ref', None)
- self.etag = self.node_get_member(node, str, 'etag', None)
+ self._warn_deprecated_etag(node)
def get_ref(self):
- # Report `None` value if we dont have a ref
- if self.ref is None:
- return None
- return (self.ref, self.etag)
+ return self.ref
def set_ref(self, ref, node):
- # Always support `None` value for ref
- if ref is None:
- ref = (None, None)
-
- self.ref, self.etag = ref
-
- node['ref'] = self.ref
- if self.etag:
- node['etag'] = self.etag
+ node['ref'] = self.ref = ref
def track(self):
# there is no 'track' field in the source to determine what/whether
@@ -64,7 +53,7 @@ class DownloadableFileSource(Source):
# decision by the user.
with self.timed_activity("Tracking {}".format(self.url),
silent_nested=True):
- new_ref, new_etag = self._ensure_mirror()
+ new_ref = self._ensure_mirror()
if self.ref and self.ref != new_ref:
detail = "When tracking, new ref differs from current ref:\n" \
@@ -73,7 +62,7 @@ class DownloadableFileSource(Source):
+ " New ref: {}\n".format(new_ref)
self.warn("Potential man-in-the-middle attack!", detail=detail)
- return (new_ref, new_etag)
+ return new_ref
def fetch(self):
@@ -87,11 +76,30 @@ class DownloadableFileSource(Source):
# Download the file, raise hell if the sha256sums don't match,
# and mirror the file otherwise.
with self.timed_activity("Fetching {}".format(self.url), silent_nested=True):
- sha256, _ = self._ensure_mirror()
+ sha256 = self._ensure_mirror()
if sha256 != self.ref:
raise SourceError("File downloaded from {} has sha256sum '{}', not '{}'!"
.format(self.url, sha256, self.ref))
+ def _warn_deprecated_etag(self, node):
+ etag = self.node_get_member(node, str, 'etag', None)
+ if etag:
+ provenance = self.node_provenance(node, member_name='etag')
+ self.warn('{} "etag" is deprecated and ignored.'.format(provenance))
+
+ def _get_etag(self, ref):
+ etagfilename = os.path.join(self._get_mirror_dir(), '{}.etag'.format(ref))
+ if os.path.exists(etagfilename):
+ with open(etagfilename, 'r') as etagfile:
+ return etagfile.read()
+
+ return None
+
+ def _store_etag(self, ref, etag):
+ etagfilename = os.path.join(self._get_mirror_dir(), '{}.etag'.format(ref))
+ with utils.save_file_atomic(etagfilename) as etagfile:
+ etagfile.write(etag)
+
def _ensure_mirror(self):
# Downloads from the url and caches it according to its sha256sum.
try:
@@ -100,9 +108,15 @@ class DownloadableFileSource(Source):
request = urllib.request.Request(self.url)
request.add_header('Accept', '*/*')
- # Do not re-download the file if the ETag matches
- if self.etag and self.get_consistency() == Consistency.CACHED:
- request.add_header('If-None-Match', self.etag)
+ # We do not use etag in case what we have in cache is
+ # not matching ref in order to be able to recover from
+ # corrupted download.
+ if self.ref:
+ etag = self._get_etag(self.ref)
+
+ # Do not re-download the file if the ETag matches.
+ if etag and self.get_consistency() == Consistency.CACHED:
+ request.add_header('If-None-Match', etag)
with contextlib.closing(urllib.request.urlopen(request)) as response:
info = response.info()
@@ -125,11 +139,16 @@ class DownloadableFileSource(Source):
# In case the old file was corrupted somehow.
os.rename(local_file, self._get_mirror_file(sha256))
- return (sha256, etag)
+ if etag:
+ self._store_etag(sha256, etag)
+ return sha256
except urllib.error.HTTPError as e:
if e.code == 304:
- return (self.ref, self.etag)
+ # 304 Not Modified.
+ # Because we use etag only for matching ref, currently specified ref is what
+ # we would have downloaded.
+ return self.ref
raise SourceError("{}: Error mirroring {}: {}"
.format(self, self.url, e)) from e