diff options
Diffstat (limited to 'src/buildstream/plugins/sources')
-rw-r--r-- | src/buildstream/plugins/sources/_downloadablefilesource.py | 250 | ||||
-rw-r--r-- | src/buildstream/plugins/sources/remote.py | 3 | ||||
-rw-r--r-- | src/buildstream/plugins/sources/tar.py | 4 | ||||
-rw-r--r-- | src/buildstream/plugins/sources/zip.py | 4 |
4 files changed, 3 insertions, 258 deletions
diff --git a/src/buildstream/plugins/sources/_downloadablefilesource.py b/src/buildstream/plugins/sources/_downloadablefilesource.py deleted file mode 100644 index fcdd005ac..000000000 --- a/src/buildstream/plugins/sources/_downloadablefilesource.py +++ /dev/null @@ -1,250 +0,0 @@ -"""A base abstract class for source implementations which download a file""" - -import os -import urllib.request -import urllib.error -import contextlib -import shutil -import netrc - -from buildstream import Source, SourceError -from buildstream import utils - - -class _NetrcFTPOpener(urllib.request.FTPHandler): - def __init__(self, netrc_config): - self.netrc = netrc_config - - def _split(self, netloc): - userpass, hostport = urllib.parse.splituser(netloc) - host, port = urllib.parse.splitport(hostport) - if userpass: - user, passwd = urllib.parse.splitpasswd(userpass) - else: - user = None - passwd = None - return host, port, user, passwd - - def _unsplit(self, host, port, user, passwd): - if port: - host = "{}:{}".format(host, port) - if user: - if passwd: - user = "{}:{}".format(user, passwd) - host = "{}@{}".format(user, host) - - return host - - def ftp_open(self, req): - host, port, user, passwd = self._split(req.host) - - if user is None and self.netrc: - entry = self.netrc.authenticators(host) - if entry: - user, _, passwd = entry - - req.host = self._unsplit(host, port, user, passwd) - - return super().ftp_open(req) - - -class _NetrcPasswordManager: - def __init__(self, netrc_config): - self.netrc = netrc_config - - def add_password(self, realm, uri, user, passwd): - pass - - def find_user_password(self, realm, authuri): - if not self.netrc: - return None, None - parts = urllib.parse.urlsplit(authuri) - entry = self.netrc.authenticators(parts.hostname) - if not entry: - return None, None - else: - login, _, password = entry - return login, password - - -class DownloadableFileSource(Source): - # pylint: disable=attribute-defined-outside-init - - COMMON_CONFIG_KEYS = Source.COMMON_CONFIG_KEYS + ["url", "ref", "etag"] - - __urlopener = None - __default_mirror_file = None - - def configure(self, node): - self.original_url = node.get_str("url") - self.ref = node.get_str("ref", None) - self.url = self.translate_url(self.original_url) - self._mirror_dir = os.path.join(self.get_mirror_directory(), utils.url_directory_name(self.original_url)) - self._warn_deprecated_etag(node) - - def preflight(self): - return - - def get_unique_key(self): - return [self.original_url, self.ref] - - def is_cached(self) -> bool: - return os.path.isfile(self._get_mirror_file()) - - def load_ref(self, node): - self.ref = node.get_str("ref", None) - self._warn_deprecated_etag(node) - - def get_ref(self): - return self.ref - - def set_ref(self, ref, node): - node["ref"] = self.ref = ref - - def track(self): # pylint: disable=arguments-differ - # there is no 'track' field in the source to determine what/whether - # or not to update refs, because tracking a ref is always a conscious - # decision by the user. - with self.timed_activity("Tracking {}".format(self.url), silent_nested=True): - new_ref = self._ensure_mirror() - - if self.ref and self.ref != new_ref: - detail = ( - "When tracking, new ref differs from current ref:\n" - + " Tracked URL: {}\n".format(self.url) - + " Current ref: {}\n".format(self.ref) - + " New ref: {}\n".format(new_ref) - ) - self.warn("Potential man-in-the-middle attack!", detail=detail) - - return new_ref - - def fetch(self): # pylint: disable=arguments-differ - - # Just a defensive check, it is impossible for the - # file to be already cached because Source.fetch() will - # not be called if the source is already cached. - # - if os.path.isfile(self._get_mirror_file()): - return # pragma: nocover - - # Download the file, raise hell if the sha256sums don't match, - # and mirror the file otherwise. - with self.timed_activity("Fetching {}".format(self.url), silent_nested=True): - sha256 = self._ensure_mirror() - if sha256 != self.ref: - raise SourceError( - "File downloaded from {} has sha256sum '{}', not '{}'!".format(self.url, sha256, self.ref) - ) - - def _warn_deprecated_etag(self, node): - etag = node.get_str("etag", None) - if etag: - provenance = node.get_scalar(etag).get_provenance() - self.warn('{} "etag" is deprecated and ignored.'.format(provenance)) - - def _get_etag(self, ref): - etagfilename = os.path.join(self._mirror_dir, "{}.etag".format(ref)) - if os.path.exists(etagfilename): - with open(etagfilename, "r") as etagfile: - return etagfile.read() - - return None - - def _store_etag(self, ref, etag): - etagfilename = os.path.join(self._mirror_dir, "{}.etag".format(ref)) - with utils.save_file_atomic(etagfilename) as etagfile: - etagfile.write(etag) - - def _ensure_mirror(self): - # Downloads from the url and caches it according to its sha256sum. - try: - with self.tempdir() as td: - default_name = os.path.basename(self.url) - request = urllib.request.Request(self.url) - request.add_header("Accept", "*/*") - request.add_header("User-Agent", "BuildStream/2") - - # We do not use etag in case what we have in cache is - # not matching ref in order to be able to recover from - # corrupted download. - if self.ref: - etag = self._get_etag(self.ref) - - # Do not re-download the file if the ETag matches. - if etag and self.is_cached(): - request.add_header("If-None-Match", etag) - - opener = self.__get_urlopener() - with contextlib.closing(opener.open(request)) as response: - info = response.info() - - # some servers don't honor the 'If-None-Match' header - if self.ref and etag and info["ETag"] == etag: - return self.ref - - etag = info["ETag"] - - filename = info.get_filename(default_name) - filename = os.path.basename(filename) - local_file = os.path.join(td, filename) - with open(local_file, "wb") as dest: - shutil.copyfileobj(response, dest) - - # Make sure url-specific mirror dir exists. - if not os.path.isdir(self._mirror_dir): - os.makedirs(self._mirror_dir) - - # Store by sha256sum - sha256 = utils.sha256sum(local_file) - # Even if the file already exists, move the new file over. - # In case the old file was corrupted somehow. - os.rename(local_file, self._get_mirror_file(sha256)) - - if etag: - self._store_etag(sha256, etag) - return sha256 - - except urllib.error.HTTPError as e: - if e.code == 304: - # 304 Not Modified. - # Because we use etag only for matching ref, currently specified ref is what - # we would have downloaded. - return self.ref - raise SourceError("{}: Error mirroring {}: {}".format(self, self.url, e), temporary=True) from e - - except (urllib.error.URLError, urllib.error.ContentTooShortError, OSError, ValueError) as e: - # Note that urllib.request.Request in the try block may throw a - # ValueError for unknown url types, so we handle it here. - raise SourceError("{}: Error mirroring {}: {}".format(self, self.url, e), temporary=True) from e - - def _get_mirror_file(self, sha=None): - if sha is not None: - return os.path.join(self._mirror_dir, sha) - - if self.__default_mirror_file is None: - self.__default_mirror_file = os.path.join(self._mirror_dir, self.ref) - - return self.__default_mirror_file - - def __get_urlopener(self): - if not DownloadableFileSource.__urlopener: - try: - netrc_config = netrc.netrc() - except OSError: - # If the .netrc file was not found, FileNotFoundError will be - # raised, but OSError will be raised directly by the netrc package - # in the case that $HOME is not set. - # - # This will catch both cases. - # - DownloadableFileSource.__urlopener = urllib.request.build_opener() - except netrc.NetrcParseError as e: - self.warn("{}: While reading .netrc: {}".format(self, e)) - return urllib.request.build_opener() - else: - netrc_pw_mgr = _NetrcPasswordManager(netrc_config) - http_auth = urllib.request.HTTPBasicAuthHandler(netrc_pw_mgr) - ftp_handler = _NetrcFTPOpener(netrc_config) - DownloadableFileSource.__urlopener = urllib.request.build_opener(http_auth, ftp_handler) - return DownloadableFileSource.__urlopener diff --git a/src/buildstream/plugins/sources/remote.py b/src/buildstream/plugins/sources/remote.py index 57d8743a7..9ed3099d9 100644 --- a/src/buildstream/plugins/sources/remote.py +++ b/src/buildstream/plugins/sources/remote.py @@ -48,8 +48,7 @@ See :ref:`built-in functionality doumentation <core_source_builtins>` for details on common configuration options for sources. """ import os -from buildstream import SourceError, utils -from ._downloadablefilesource import DownloadableFileSource +from buildstream import DownloadableFileSource, SourceError, utils class RemoteSource(DownloadableFileSource): diff --git a/src/buildstream/plugins/sources/tar.py b/src/buildstream/plugins/sources/tar.py index 793fd11c9..aba927b99 100644 --- a/src/buildstream/plugins/sources/tar.py +++ b/src/buildstream/plugins/sources/tar.py @@ -60,11 +60,9 @@ import tarfile from contextlib import contextmanager from tempfile import TemporaryFile -from buildstream import SourceError +from buildstream import DownloadableFileSource, SourceError from buildstream import utils -from ._downloadablefilesource import DownloadableFileSource - class ReadableTarInfo(tarfile.TarInfo): """ diff --git a/src/buildstream/plugins/sources/zip.py b/src/buildstream/plugins/sources/zip.py index c112cf10e..116425934 100644 --- a/src/buildstream/plugins/sources/zip.py +++ b/src/buildstream/plugins/sources/zip.py @@ -60,11 +60,9 @@ import os import zipfile import stat -from buildstream import SourceError +from buildstream import DownloadableFileSource, SourceError from buildstream import utils -from ._downloadablefilesource import DownloadableFileSource - class ZipSource(DownloadableFileSource): # pylint: disable=attribute-defined-outside-init |