diff options
author | Guido van Rossum <guido@python.org> | 2011-03-29 12:53:55 -0700 |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2011-03-29 12:53:55 -0700 |
commit | cccaf34afbc2236d6e20e8d7f559c6876f6d4dba (patch) | |
tree | 5699fefdb39c02ed97ee16d1cb0a308eaf10db76 /Lib/urllib.py | |
parent | 22883802db8055b1bea27f1023a88ae717772750 (diff) | |
parent | a6ab6cbf8c95eb4c44e72a6bdf15d9091afb26a5 (diff) | |
download | cpython-cccaf34afbc2236d6e20e8d7f559c6876f6d4dba.tar.gz |
Merge issue 11662 from 2.6.
Diffstat (limited to 'Lib/urllib.py')
-rw-r--r-- | Lib/urllib.py | 124 |
1 files changed, 63 insertions, 61 deletions
diff --git a/Lib/urllib.py b/Lib/urllib.py index 3f5b592e40..62c08c99d5 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -42,9 +42,7 @@ __version__ = '1.17' # XXX This version is not always updated :-( MAXFTPCACHE = 10 # Trim the ftp cache beyond this size # Helper for non-unix systems -if os.name == 'mac': - from macurl2path import url2pathname, pathname2url -elif os.name == 'nt': +if os.name == 'nt': from nturl2path import url2pathname, pathname2url elif os.name == 'riscos': from rourl2path import url2pathname, pathname2url @@ -94,7 +92,7 @@ def urlretrieve(url, filename=None, reporthook=None, data=None): def urlcleanup(): if _urlopener: _urlopener.cleanup() - _safemaps.clear() + _safe_quoters.clear() ftpcache.clear() # check for SSL @@ -177,8 +175,8 @@ class URLopener: def open(self, fullurl, data=None): """Use URLopener().open(file) instead of open(file, 'r').""" fullurl = unwrap(toBytes(fullurl)) - # percent encode url. fixing lame server errors like space within url - # parts + # percent encode url, fixing lame server errors for e.g, like space + # within url paths. fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") if self.tempcache and fullurl in self.tempcache: filename, headers = self.tempcache[fullurl] @@ -232,9 +230,9 @@ class URLopener: try: fp = self.open_local_file(url1) hdrs = fp.info() - del fp + fp.close() return url2pathname(splithost(url1)[1]), hdrs - except IOError, msg: + except IOError: pass fp = self.open(url, data) try: @@ -276,8 +274,6 @@ class URLopener: tfp.close() finally: fp.close() - del fp - del tfp # raise exception if actual size does not match content-length header if size >= 0 and read < size: @@ -343,9 +339,7 @@ class URLopener: if auth: h.putheader('Authorization', 'Basic %s' % auth) if realhost: h.putheader('Host', realhost) for args in self.addheaders: h.putheader(*args) - h.endheaders() - if data is not None: - h.send(data) + h.endheaders(data) errcode, errmsg, headers = h.getreply() fp = h.getfile() if errcode == -1: @@ -438,9 +432,7 @@ class URLopener: if auth: h.putheader('Authorization', 'Basic %s' % auth) if realhost: h.putheader('Host', realhost) for args in self.addheaders: h.putheader(*args) - h.endheaders() - if data is not None: - h.send(data) + h.endheaders(data) errcode, errmsg, headers = h.getreply() fp = h.getfile() if errcode == -1: @@ -519,8 +511,8 @@ class URLopener: if user: user, passwd = splitpasswd(user) else: passwd = None host = unquote(host) - user = unquote(user or '') - passwd = unquote(passwd or '') + user = user or '' + passwd = passwd or '' host = socket.gethostbyname(host) if not port: import ftplib @@ -785,7 +777,7 @@ class FancyURLopener(URLopener): else: return self.open(newurl, data) - def get_user_passwd(self, host, realm, clear_cache = 0): + def get_user_passwd(self, host, realm, clear_cache=0): key = realm + '@' + host.lower() if key in self.auth_cache: if clear_cache: @@ -1072,7 +1064,12 @@ def splithost(url): _hostprog = re.compile('^//([^/?]*)(.*)$') match = _hostprog.match(url) - if match: return match.group(1, 2) + if match: + host_port = match.group(1) + path = match.group(2) + if path and not path.startswith('/'): + path = '/' + path + return host_port, path return None, url _userprog = None @@ -1084,7 +1081,7 @@ def splituser(host): _userprog = re.compile('^(.*)@(.*)$') match = _userprog.match(host) - if match: return map(unquote, match.group(1, 2)) + if match: return match.group(1, 2) return None, host _passwdprog = None @@ -1093,7 +1090,7 @@ def splitpasswd(user): global _passwdprog if _passwdprog is None: import re - _passwdprog = re.compile('^([^:]*):(.*)$') + _passwdprog = re.compile('^([^:]*):(.*)$',re.S) match = _passwdprog.match(user) if match: return match.group(1, 2) @@ -1176,21 +1173,29 @@ def splitvalue(attr): if match: return match.group(1, 2) return attr, None +# urlparse contains a duplicate of this method to avoid a circular import. If +# you update this method, also update the copy in urlparse. This code +# duplication does not exist in Python3. + _hexdig = '0123456789ABCDEFabcdef' -_hextochr = dict((a+b, chr(int(a+b,16))) for a in _hexdig for b in _hexdig) +_hextochr = dict((a + b, chr(int(a + b, 16))) + for a in _hexdig for b in _hexdig) def unquote(s): """unquote('abc%20def') -> 'abc def'.""" res = s.split('%') - for i in xrange(1, len(res)): - item = res[i] + # fastpath + if len(res) == 1: + return s + s = res[0] + for item in res[1:]: try: - res[i] = _hextochr[item[:2]] + item[2:] + s += _hextochr[item[:2]] + item[2:] except KeyError: - res[i] = '%' + item + s += '%' + item except UnicodeDecodeError: - res[i] = unichr(int(item[:2], 16)) + item[2:] - return "".join(res) + s += unichr(int(item[:2], 16)) + item[2:] + return s def unquote_plus(s): """unquote('%7e/abc+def') -> '~/abc def'""" @@ -1200,9 +1205,12 @@ def unquote_plus(s): always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' '0123456789' '_.-') -_safemaps = {} +_safe_map = {} +for i, c in zip(xrange(256), str(bytearray(xrange(256)))): + _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i) +_safe_quoters = {} -def quote(s, safe = '/'): +def quote(s, safe='/'): """quote('abc def') -> 'abc%20def' Each part of a URL, e.g. the path info, the query, etc., has a @@ -1223,27 +1231,32 @@ def quote(s, safe = '/'): called on a path where the existing slash characters are used as reserved characters. """ + # fastpath + if not s: + if s is None: + raise TypeError('None object cannot be quoted') + return s cachekey = (safe, always_safe) try: - safe_map = _safemaps[cachekey] + (quoter, safe) = _safe_quoters[cachekey] except KeyError: - safe += always_safe - safe_map = {} - for i in range(256): - c = chr(i) - safe_map[c] = (c in safe) and c or ('%%%02X' % i) - _safemaps[cachekey] = safe_map - res = map(safe_map.__getitem__, s) - return ''.join(res) - -def quote_plus(s, safe = ''): + safe_map = _safe_map.copy() + safe_map.update([(c, c) for c in safe]) + quoter = safe_map.__getitem__ + safe = always_safe + safe + _safe_quoters[cachekey] = (quoter, safe) + if not s.rstrip(safe): + return s + return ''.join(map(quoter, s)) + +def quote_plus(s, safe=''): """Quote the query fragment of a URL; replacing ' ' with '+'""" if ' ' in s: s = quote(s, safe + ' ') return s.replace(' ', '+') return quote(s, safe) -def urlencode(query,doseq=0): +def urlencode(query, doseq=0): """Encode a sequence of two-element tuples or dictionary into a URL query string. If any values in the query arg are sequences and doseq is true, each @@ -1295,7 +1308,7 @@ def urlencode(query,doseq=0): else: try: # is this a sufficient test for sequence-ness? - x = len(v) + len(v) except TypeError: # not a sequence v = quote_plus(str(v)) @@ -1395,7 +1408,7 @@ if sys.platform == 'darwin': else: mask = int(mask[1:]) - mask = 32 - mask + mask = 32 - mask if (hostIP >> mask) == (base >> mask): return True @@ -1405,7 +1418,6 @@ if sys.platform == 'darwin': return False - def getproxies_macosx_sysconf(): """Return a dictionary of scheme -> proxy server URL mappings. @@ -1414,8 +1426,6 @@ if sys.platform == 'darwin': """ return _get_proxies() - - def proxy_bypass(host): if getproxies_environment(): return proxy_bypass_environment(host) @@ -1519,18 +1529,11 @@ elif os.name == 'nt': # '<local>' string by the localhost entry and the corresponding # canonical entry. proxyOverride = proxyOverride.split(';') - i = 0 - while i < len(proxyOverride): - if proxyOverride[i] == '<local>': - proxyOverride[i:i+1] = ['localhost', - '127.0.0.1', - socket.gethostname(), - socket.gethostbyname( - socket.gethostname())] - i += 1 - # print proxyOverride # now check if we match one of the registry values. for test in proxyOverride: + if test == '<local>': + if '.' not in rawHost: + return 1 test = test.replace(".", r"\.") # mask dots test = test.replace("*", r".*") # change glob sequence test = test.replace("?", r".") # change glob char @@ -1600,9 +1603,8 @@ def test(args=[]): print '======' for k in h.keys(): print k + ':', h[k] print '======' - fp = open(fn, 'rb') - data = fp.read() - del fp + with open(fn, 'rb') as fp: + data = fp.read() if '\r' in data: table = string.maketrans("", "") data = data.translate(table, "\r") |