diff options
Diffstat (limited to 'buildstream/plugins/sources/_downloadablefilesource.py')
-rw-r--r-- | buildstream/plugins/sources/_downloadablefilesource.py | 107 |
1 files changed, 107 insertions, 0 deletions
diff --git a/buildstream/plugins/sources/_downloadablefilesource.py b/buildstream/plugins/sources/_downloadablefilesource.py new file mode 100644 index 000000000..90d3709c2 --- /dev/null +++ b/buildstream/plugins/sources/_downloadablefilesource.py @@ -0,0 +1,107 @@ +"""A base abstract class for source implementations which download a file""" + +import os +import urllib.request +import urllib.error + +from buildstream import Source, SourceError, Consistency +from buildstream import utils + + +class DownloadableFileSource(Source): + + COMMON_CONFIG_KEYS = Source.COMMON_CONFIG_KEYS + ['url', 'ref'] + + def configure(self, node): + project = self.get_project() + + self.original_url = self.node_get_member(node, str, 'url') + self.ref = self.node_get_member(node, str, 'ref', '') or None + self.url = project.translate_url(self.original_url) + + def preflight(self): + return + + def get_unique_key(self): + return [self.original_url, self.ref] + + def get_consistency(self): + if self.ref is None: + return Consistency.INCONSISTENT + + if os.path.isfile(self._get_mirror_file()): + return Consistency.CACHED + + else: + return Consistency.RESOLVED + + def get_ref(self): + return self.ref + + def set_ref(self, ref, node): + node['ref'] = self.ref = ref + + def track(self): + # there is no 'track' field in the source to determine what/whether + # or not to update refs, because tracking a ref is always a conscious + # decision by the user. + with self.timed_activity("Tracking {}".format(self.url), + silent_nested=True): + new_ref = self._ensure_mirror() + + if self.ref and self.ref != new_ref: + detail = "When tracking, new ref differs from current ref:\n" \ + + " Tracked URL: {}\n".format(self.url) \ + + " Current ref: {}\n".format(self.ref) \ + + " New ref: {}\n".format(new_ref) + self.warn("Potential man-in-the-middle attack!", detail=detail) + + return new_ref + + def fetch(self): + if os.path.isfile(self._get_mirror_file()): + return + + # Download the file, raise hell if the sha256sums don't match, + # and mirror the file otherwise. + with self.timed_activity("Fetching {}".format(self.url), silent_nested=True): + sha256 = self._ensure_mirror() + if sha256 != self.ref: + raise SourceError("File downloaded from {} has sha256sum '{}', not '{}'!" + .format(self.url, sha256, self.ref)) + + def _ensure_mirror(self): + # Downloads from the url and caches it according to its sha256sum. + try: + with self.tempdir() as td: + # Using basename because there needs to be a filename, and 'foo' + # would be too silly. + temp_dest = os.path.join(td, os.path.basename(self.url)) + + local_file, _ = urllib.request.urlretrieve(self.url, temp_dest) + if local_file != temp_dest: + raise SourceError("Expected to download file to '{}', downloaded to '{}' instead!" + .format(temp_dest, local_file)) + + # Make sure url-specific mirror dir exists. + if not os.path.isdir(self._get_mirror_dir()): + os.makedirs(self._get_mirror_dir()) + + # Store by sha256sum + sha256 = utils.sha256sum(local_file) + # Even if the file already exists, move the new file over. + # In case the old file was corrupted somehow. + os.rename(local_file, self._get_mirror_file(sha256)) + + return sha256 + + except (urllib.error.URLError, urllib.error.ContentTooShortError, OSError) as e: + raise SourceError("{}: Error mirroring {}: {}" + .format(self, self.url, e)) from e + + def _get_mirror_dir(self): + return os.path.join(self.get_mirror_directory(), + utils.url_directory_name(self.original_url)) + + def _get_mirror_file(self, sha=None): + return os.path.join(self._get_mirror_dir(), sha or self.ref) |