diff options
author | Nate Prewitt <Nate.Prewitt@gmail.com> | 2018-04-07 15:11:00 -0700 |
---|---|---|
committer | Nate Prewitt <Nate.Prewitt@gmail.com> | 2018-04-07 15:11:00 -0700 |
commit | c7cea32304c05ef5890bcb8941edc8b6fbe04a03 (patch) | |
tree | 7a5d670da7f5b7a8403537f2099435ce2407194e /requests | |
parent | 9c6bd54b44c0b05c6907522e8d9998a87b69c1cd (diff) | |
parent | b66908e7b647689793e299edc111bf9910e93ad3 (diff) | |
download | python-requests-updating_3.0.0.tar.gz |
Merge remote-tracking branch 'upstream/master' into updating_3.0.0updating_3.0.0
Diffstat (limited to 'requests')
-rw-r--r-- | requests/adapters.py | 15 | ||||
-rw-r--r-- | requests/exceptions.py | 4 | ||||
-rw-r--r-- | requests/help.py | 2 | ||||
-rw-r--r-- | requests/models.py | 4 | ||||
-rw-r--r-- | requests/sessions.py | 13 | ||||
-rw-r--r-- | requests/status_codes.py | 38 | ||||
-rw-r--r-- | requests/utils.py | 103 |
7 files changed, 144 insertions, 35 deletions
diff --git a/requests/adapters.py b/requests/adapters.py index 5bf80eb5..fe0f9049 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -13,6 +13,7 @@ import socket from urllib3.poolmanager import PoolManager, proxy_from_url from urllib3.response import HTTPResponse +from urllib3.util import parse_url from urllib3.util import Timeout as TimeoutSauce from urllib3.util.retry import Retry from urllib3.exceptions import ClosedPoolError @@ -28,13 +29,13 @@ from urllib3.exceptions import ResponseError from .models import Response from .compat import urlparse, basestring -from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, - prepend_scheme_if_needed, get_auth_from_url, urldefragauth, - select_proxy) +from .utils import (DEFAULT_CA_BUNDLE_PATH, extract_zipped_paths, + get_encoding_from_headers, prepend_scheme_if_needed, + get_auth_from_url, urldefragauth, select_proxy) from .structures import CaseInsensitiveDict from .cookies import extract_cookies_to_jar from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError, - ProxyError, RetryError, InvalidScheme) + ProxyError, RetryError, InvalidScheme, InvalidProxyURL) from .auth import _basic_auth_str try: @@ -309,6 +310,10 @@ class HTTPAdapter(BaseAdapter): if proxy: proxy = prepend_scheme_if_needed(proxy, 'http') + proxy_url = parse_url(proxy) + if not proxy_url.host: + raise InvalidProxyURL("Please check proxy URL. It is malformed" + " and could be missing the host.") proxy_manager = self.proxy_manager_for(proxy) conn = proxy_manager.connection_from_url(url, pool_kwargs=pool_kwargs) else: @@ -413,7 +418,7 @@ class HTTPAdapter(BaseAdapter): conn = self.get_connection(request.url, proxies, verify, cert) url = self.request_url(request, proxies) - self.add_headers(request) + self.add_headers(request, stream=stream, timeout=timeout, verify=verify, cert=cert, proxies=proxies) chunked = not (request.body is None or 'Content-Length' in request.headers) diff --git a/requests/exceptions.py b/requests/exceptions.py index ebf4cc34..1c61bf87 100644 --- a/requests/exceptions.py +++ b/requests/exceptions.py @@ -85,6 +85,10 @@ class InvalidHeader(RequestException, ValueError): """The header value provided was somehow invalid.""" +class InvalidProxyURL(InvalidURL): + """The proxy URL provided is invalid.""" + + class ChunkedEncodingError(RequestException): """The server declared chunked encoding but sent an invalid chunk.""" diff --git a/requests/help.py b/requests/help.py index 5440ee61..06e06b2a 100644 --- a/requests/help.py +++ b/requests/help.py @@ -13,7 +13,7 @@ import chardet from . import __version__ as requests_version try: - from .packages.urllib3.contrib import pyopenssl + from urllib3.contrib import pyopenssl except ImportError: pyopenssl = None OpenSSL = None diff --git a/requests/models.py b/requests/models.py index e776bc34..c3391ad2 100644 --- a/requests/models.py +++ b/requests/models.py @@ -676,11 +676,11 @@ class Response(object): @property def ok(self): - """Returns True if :attr:`status_code` is less than 400. + """Returns True if :attr:`status_code` is less than 400, False if not. This attribute checks if the status code of the response is between 400 and 600 to see if there was a client error or a server error. If - the status code, is between 200 and 400, this will return True. This + the status code is between 200 and 400, this will return True. This is **not** a check to see if the response code is ``200 OK``. """ try: diff --git a/requests/sessions.py b/requests/sessions.py index a3f59133..66ed53ea 100644 --- a/requests/sessions.py +++ b/requests/sessions.py @@ -8,7 +8,7 @@ This module provides a Session object to manage and persist settings across requests (cookies, auth, proxies). """ import os -import platform +import sys import time from collections import Mapping, OrderedDict from datetime import timedelta @@ -40,7 +40,7 @@ from .status_codes import codes from .models import REDIRECT_STATI # Preferred clock, based on which one is more accurate on a given system. -if platform.system() == 'Windows': +if sys.platform == 'win32': try: # Python 3.4+ preferred_clock = time.perf_counter except AttributeError: # Earlier than Python 3. @@ -134,6 +134,7 @@ class SessionRedirectMixin(object): history = [response] # keep track of history; seed it with the original response location_url = self.get_redirect_target(response) + previous_fragment = urlparse(request.url).fragment while location_url: prepared_request = request.copy() @@ -154,8 +155,12 @@ class SessionRedirectMixin(object): parsed_rurl = urlparse(response.url) location_url = '%s:%s' % (to_native_string(parsed_rurl.scheme), location_url) - # The scheme should be lower case... + # Normalize url case and attach previous fragment if needed (RFC 7231 7.1.2) parsed = urlparse(location_url) + if parsed.fragment == '' and previous_fragment: + parsed = parsed._replace(fragment=previous_fragment) + elif parsed.fragment: + previous_fragment = parsed.fragment location_url = parsed.geturl() # Facilitate relative 'location' headers, as allowed by RFC 7231. @@ -720,7 +725,7 @@ class Session(SessionRedirectMixin): """ for (prefix, adapter) in self.adapters.items(): - if url.lower().startswith(prefix): + if url.lower().startswith(prefix.lower()): return adapter # Nothing matches :-/ diff --git a/requests/status_codes.py b/requests/status_codes.py index dee89190..96b86ddb 100644 --- a/requests/status_codes.py +++ b/requests/status_codes.py @@ -1,5 +1,22 @@ # -*- coding: utf-8 -*- +""" +The ``codes`` object defines a mapping from common names for HTTP statuses +to their numerical codes, accessible either as attributes or as dictionary +items. + +>>> requests.codes['temporary_redirect'] +307 +>>> requests.codes.teapot +418 +>>> requests.codes['\o/'] +200 + +Some codes have multiple names, and both upper- and lower-case versions of +the names are allowed. For example, ``codes.ok``, ``codes.OK``, and +``codes.okay`` all correspond to the HTTP status code 200. +""" + from .structures import LookupDict _codes = { @@ -84,8 +101,19 @@ _codes = { codes = LookupDict(name='status_codes') -for code, titles in _codes.items(): - for title in titles: - setattr(codes, title, code) - if not title.startswith(('\\', '/')): - setattr(codes, title.upper(), code) +def _init(): + for code, titles in _codes.items(): + for title in titles: + setattr(codes, title, code) + if not title.startswith(('\\', '/')): + setattr(codes, title.upper(), code) + + def doc(code): + names = ', '.join('``%s``' % n for n in _codes[code]) + return '* %d: %s' % (code, names) + + global __doc__ + __doc__ = (__doc__ + '\n' + + '\n'.join(doc(code) for code in sorted(_codes))) + +_init() diff --git a/requests/utils.py b/requests/utils.py index 745858d1..c718a783 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -8,17 +8,18 @@ This module provides utility functions that are used within Requests that are also useful for external consumption. """ -import cgi import codecs import collections import contextlib import io import os -import platform import re import socket import struct +import sys +import tempfile import warnings +import zipfile from .__version__ import __version__ from . import certs @@ -39,19 +40,25 @@ NETRC_FILES = ('.netrc', '_netrc') DEFAULT_CA_BUNDLE_PATH = certs.where() -if platform.system() == 'Windows': +if sys.platform == 'win32': # provide a proxy_bypass version on Windows without DNS lookups def proxy_bypass_registry(host): - if is_py3: - import winreg - else: - import _winreg as winreg + try: + if is_py3: + import winreg + else: + import _winreg as winreg + except ImportError: + return False + try: internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') - proxyEnable = winreg.QueryValueEx(internetSettings, - 'ProxyEnable')[0] + # ProxyEnable could be REG_SZ or REG_DWORD, normalizing it + proxyEnable = int(winreg.QueryValueEx(internetSettings, + 'ProxyEnable')[0]) + # ProxyOverride is almost always a string proxyOverride = winreg.QueryValueEx(internetSettings, 'ProxyOverride')[0] except OSError: @@ -216,6 +223,38 @@ def guess_filename(obj): return os.path.basename(name) +def extract_zipped_paths(path): + """Replace nonexistent paths that look like they refer to a member of a zip + archive with the location of an extracted copy of the target, or else + just return the provided path unchanged. + """ + if os.path.exists(path): + # this is already a valid path, no need to do anything further + return path + + # find the first valid part of the provided path and treat that as a zip archive + # assume the rest of the path is the name of a member in the archive + archive, member = os.path.split(path) + while archive and not os.path.exists(archive): + archive, prefix = os.path.split(archive) + member = '/'.join([prefix, member]) + + if not zipfile.is_zipfile(archive): + return path + + zip_file = zipfile.ZipFile(archive) + if member not in zip_file.namelist(): + return path + + # we have a valid zip archive and a valid member of that archive + tmp = tempfile.gettempdir() + extracted_path = os.path.join(tmp, *member.split('/')) + if not os.path.exists(extracted_path): + extracted_path = zip_file.extract(member, path=tmp) + + return extracted_path + + def from_key_val_list(value): """Take an object and test to see if it can be represented as a dictionary. Unless it can not be represented as such, return an @@ -407,6 +446,31 @@ def get_encodings_from_content(content): xml_re.findall(content)) +def _parse_content_type_header(header): + """Returns content type and parameters from given header + + :param header: string + :return: tuple containing content type and dictionary of + parameters + """ + + tokens = header.split(';') + content_type, params = tokens[0].strip(), tokens[1:] + params_dict = {} + items_to_strip = "\"' " + + for param in params: + param = param.strip() + if param: + key, value = param, True + index_of_equals = param.find("=") + if index_of_equals != -1: + key = param[:index_of_equals].strip(items_to_strip) + value = param[index_of_equals + 1:].strip(items_to_strip) + params_dict[key] = value + return content_type, params_dict + + def get_encoding_from_headers(headers): """Returns encodings from given HTTP Header Dict. @@ -419,7 +483,7 @@ def get_encoding_from_headers(headers): if not content_type: return None - content_type, params = cgi.parse_header(content_type) + content_type, params = _parse_content_type_header(content_type) if 'charset' in params: return params['charset'].strip("'\"") @@ -653,34 +717,37 @@ def should_bypass_proxies(url, no_proxy): no_proxy_arg = no_proxy if no_proxy is None: no_proxy = get_proxy('no_proxy') - netloc = urlparse(url).netloc + parsed = urlparse(url) if no_proxy: # We need to check whether we match here. We need to see if we match - # the end of the netloc, both with and without the port. + # the end of the hostname, both with and without the port. no_proxy = ( host for host in no_proxy.replace(' ', '').split(',') if host ) - ip = netloc.split(':')[0] - if is_ipv4_address(ip): + if is_ipv4_address(parsed.hostname): for proxy_ip in no_proxy: if is_valid_cidr(proxy_ip): - if address_in_network(ip, proxy_ip): + if address_in_network(parsed.hostname, proxy_ip): return True - elif ip == proxy_ip: + elif parsed.hostname == proxy_ip: # If no_proxy ip was defined in plain IP notation instead of cidr notation & # matches the IP of the index return True else: + host_with_port = parsed.hostname + if parsed.port: + host_with_port += ':{0}'.format(parsed.port) + for host in no_proxy: - if netloc.endswith(host) or netloc.split(':')[0].endswith(host): + if parsed.hostname.endswith(host) or host_with_port.endswith(host): # The URL does match something in no_proxy, so we don't want # to apply the proxies on this URL. return True with set_environ('no_proxy', no_proxy_arg): - return bool(proxy_bypass(netloc)) + return bool(proxy_bypass(parsed.hostname)) def get_environ_proxies(url, no_proxy=None): |