diff options
author | Mathieu Bridon <bochecha@daitauha.fr> | 2017-10-30 17:25:03 +0100 |
---|---|---|
committer | Mathieu Bridon <bochecha@daitauha.fr> | 2017-11-03 14:35:21 +0800 |
commit | 3f25bb99437b17e627c26719c83c14294fe25e80 (patch) | |
tree | e53e5dc36f2e3269fa5a1d893ba5b49abffd2ce3 | |
parent | f330024d640f340e92ea221ff26a343bf26a0115 (diff) | |
download | buildstream-3f25bb99437b17e627c26719c83c14294fe25e80.tar.gz |
tar: Move most of the code to a new base class
The new DownloadableFileSource will be used as a base for all sources
which just download a file to use as source.
The existing TarSource just keeps the code responsible to manage a Tar
archive.
This will help implemeting other types of single file downloaded
sources, for example Zip archives.
-rw-r--r-- | buildstream/plugins/sources/_downloadablefilesource.py | 107 | ||||
-rw-r--r-- | buildstream/plugins/sources/tar.py | 98 |
2 files changed, 114 insertions, 91 deletions
diff --git a/buildstream/plugins/sources/_downloadablefilesource.py b/buildstream/plugins/sources/_downloadablefilesource.py new file mode 100644 index 000000000..90d3709c2 --- /dev/null +++ b/buildstream/plugins/sources/_downloadablefilesource.py @@ -0,0 +1,107 @@ +"""A base abstract class for source implementations which download a file""" + +import os +import urllib.request +import urllib.error + +from buildstream import Source, SourceError, Consistency +from buildstream import utils + + +class DownloadableFileSource(Source): + + COMMON_CONFIG_KEYS = Source.COMMON_CONFIG_KEYS + ['url', 'ref'] + + def configure(self, node): + project = self.get_project() + + self.original_url = self.node_get_member(node, str, 'url') + self.ref = self.node_get_member(node, str, 'ref', '') or None + self.url = project.translate_url(self.original_url) + + def preflight(self): + return + + def get_unique_key(self): + return [self.original_url, self.ref] + + def get_consistency(self): + if self.ref is None: + return Consistency.INCONSISTENT + + if os.path.isfile(self._get_mirror_file()): + return Consistency.CACHED + + else: + return Consistency.RESOLVED + + def get_ref(self): + return self.ref + + def set_ref(self, ref, node): + node['ref'] = self.ref = ref + + def track(self): + # there is no 'track' field in the source to determine what/whether + # or not to update refs, because tracking a ref is always a conscious + # decision by the user. + with self.timed_activity("Tracking {}".format(self.url), + silent_nested=True): + new_ref = self._ensure_mirror() + + if self.ref and self.ref != new_ref: + detail = "When tracking, new ref differs from current ref:\n" \ + + " Tracked URL: {}\n".format(self.url) \ + + " Current ref: {}\n".format(self.ref) \ + + " New ref: {}\n".format(new_ref) + self.warn("Potential man-in-the-middle attack!", detail=detail) + + return new_ref + + def fetch(self): + if os.path.isfile(self._get_mirror_file()): + return + + # Download the file, raise hell if the sha256sums don't match, + # and mirror the file otherwise. + with self.timed_activity("Fetching {}".format(self.url), silent_nested=True): + sha256 = self._ensure_mirror() + if sha256 != self.ref: + raise SourceError("File downloaded from {} has sha256sum '{}', not '{}'!" + .format(self.url, sha256, self.ref)) + + def _ensure_mirror(self): + # Downloads from the url and caches it according to its sha256sum. + try: + with self.tempdir() as td: + # Using basename because there needs to be a filename, and 'foo' + # would be too silly. + temp_dest = os.path.join(td, os.path.basename(self.url)) + + local_file, _ = urllib.request.urlretrieve(self.url, temp_dest) + if local_file != temp_dest: + raise SourceError("Expected to download file to '{}', downloaded to '{}' instead!" + .format(temp_dest, local_file)) + + # Make sure url-specific mirror dir exists. + if not os.path.isdir(self._get_mirror_dir()): + os.makedirs(self._get_mirror_dir()) + + # Store by sha256sum + sha256 = utils.sha256sum(local_file) + # Even if the file already exists, move the new file over. + # In case the old file was corrupted somehow. + os.rename(local_file, self._get_mirror_file(sha256)) + + return sha256 + + except (urllib.error.URLError, urllib.error.ContentTooShortError, OSError) as e: + raise SourceError("{}: Error mirroring {}: {}" + .format(self, self.url, e)) from e + + def _get_mirror_dir(self): + return os.path.join(self.get_mirror_directory(), + utils.url_directory_name(self.original_url)) + + def _get_mirror_file(self, sha=None): + return os.path.join(self._get_mirror_dir(), sha or self.ref) diff --git a/buildstream/plugins/sources/tar.py b/buildstream/plugins/sources/tar.py index bf4a31da4..a05605c4b 100644 --- a/buildstream/plugins/sources/tar.py +++ b/buildstream/plugins/sources/tar.py @@ -47,74 +47,25 @@ """ import os -import urllib.request -import urllib.error import tarfile -import tempfile -from buildstream import Source, SourceError, Consistency +from buildstream import Source, SourceError from buildstream import utils +from ._downloadablefilesource import DownloadableFileSource -class TarSource(Source): - def configure(self, node): - project = self.get_project() +class TarSource(DownloadableFileSource): - self.node_validate(node, ['url', 'ref', 'base-dir'] + Source.COMMON_CONFIG_KEYS) + def configure(self, node): + super().configure(node) - self.original_url = self.node_get_member(node, str, 'url') - self.ref = self.node_get_member(node, str, 'ref', '') or None self.base_dir = self.node_get_member(node, str, 'base-dir', '*') or None - self.url = project.translate_url(self.original_url) - def preflight(self): - return + self.node_validate(node, DownloadableFileSource.COMMON_CONFIG_KEYS + ['base-dir']) def get_unique_key(self): - return [self.original_url, self.ref, self.base_dir] - - def get_consistency(self): - if self.ref is None: - return Consistency.INCONSISTENT - - if os.path.isfile(self._get_mirror_file()): - return Consistency.CACHED - else: - return Consistency.RESOLVED - - def get_ref(self): - return self.ref - - def set_ref(self, ref, node): - node['ref'] = self.ref = ref - - def track(self): - # there is no 'track' field in the source to determine what/whether - # or not to update refs, because tracking a ref is always a conscious - # decision by the user. - with self.timed_activity("Tracking {}".format(self.url), - silent_nested=True): - new_ref = self._ensure_mirror() - if self.ref and self.ref != new_ref: - detail = "When tracking, new ref differs from current ref:\n" \ - + " Tracked URL: {}\n".format(self.url) \ - + " Current ref: {}\n".format(self.ref) \ - + " New ref: {}\n".format(new_ref) - self.warn("Potential man-in-the-middle attack!", detail=detail) - return new_ref - - def fetch(self): - if os.path.isfile(self._get_mirror_file()): - return - - # Download the file, raise hell if the sha256sums don't match, - # and mirror the file otherwise. - with self.timed_activity("Fetching {}".format(self.url), silent_nested=True): - sha256 = self._ensure_mirror() - if sha256 != self.ref: - raise SourceError("Tar downloaded from {} has sha256sum '{}', not '{}'!" - .format(self.url, sha256, self.ref)) + return super().get_unique_key() + [self.base_dir] def stage(self, directory): try: @@ -131,41 +82,6 @@ class TarSource(Source): except (tarfile.TarError, OSError) as e: raise SourceError("{}: Error staging source: {}".format(self, e)) from e - def _ensure_mirror(self): - # Downloads from the url and caches it according to its sha256sum. - try: - with self.tempdir() as td: - # Using basename because there needs to be a filename, and 'foo' - # would be too silly. - temp_dest = os.path.join(td, os.path.basename(self.url)) - - local_file, _ = urllib.request.urlretrieve(self.url, temp_dest) - if local_file != temp_dest: - raise SourceError("Expected to download file to '{}', downloaded to '{}' instead!" - .format(temp_dest, local_file)) - - # Make sure url-specific mirror dir exists. - if not os.path.isdir(self._get_mirror_dir()): - os.makedirs(self._get_mirror_dir()) - - # Store by sha256sum - sha256 = utils.sha256sum(local_file) - # Even if the file already exists, move the new file over. - # In case the old file was corrupted somehow. - os.rename(local_file, self._get_mirror_file(sha256)) - - return sha256 - except (urllib.error.URLError, urllib.error.ContentTooShortError, OSError) as e: - raise SourceError("{}: Error mirroring {}: {}" - .format(self, self.url, e)) from e - - def _get_mirror_dir(self): - return os.path.join(self.get_mirror_directory(), - utils.url_directory_name(self.original_url)) - - def _get_mirror_file(self, sha=None): - return os.path.join(self._get_mirror_dir(), sha or self.ref) - # Override and translate which filenames to extract def _extract_members(self, tar, base_dir): if not base_dir.endswith(os.sep): |