summaryrefslogtreecommitdiff
path: root/Lib/urllib.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2011-03-29 12:53:55 -0700
committerGuido van Rossum <guido@python.org>2011-03-29 12:53:55 -0700
commitcccaf34afbc2236d6e20e8d7f559c6876f6d4dba (patch)
tree5699fefdb39c02ed97ee16d1cb0a308eaf10db76 /Lib/urllib.py
parent22883802db8055b1bea27f1023a88ae717772750 (diff)
parenta6ab6cbf8c95eb4c44e72a6bdf15d9091afb26a5 (diff)
downloadcpython-cccaf34afbc2236d6e20e8d7f559c6876f6d4dba.tar.gz
Merge issue 11662 from 2.6.
Diffstat (limited to 'Lib/urllib.py')
-rw-r--r--Lib/urllib.py124
1 files changed, 63 insertions, 61 deletions
diff --git a/Lib/urllib.py b/Lib/urllib.py
index 3f5b592e40..62c08c99d5 100644
--- a/Lib/urllib.py
+++ b/Lib/urllib.py
@@ -42,9 +42,7 @@ __version__ = '1.17' # XXX This version is not always updated :-(
MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
# Helper for non-unix systems
-if os.name == 'mac':
- from macurl2path import url2pathname, pathname2url
-elif os.name == 'nt':
+if os.name == 'nt':
from nturl2path import url2pathname, pathname2url
elif os.name == 'riscos':
from rourl2path import url2pathname, pathname2url
@@ -94,7 +92,7 @@ def urlretrieve(url, filename=None, reporthook=None, data=None):
def urlcleanup():
if _urlopener:
_urlopener.cleanup()
- _safemaps.clear()
+ _safe_quoters.clear()
ftpcache.clear()
# check for SSL
@@ -177,8 +175,8 @@ class URLopener:
def open(self, fullurl, data=None):
"""Use URLopener().open(file) instead of open(file, 'r')."""
fullurl = unwrap(toBytes(fullurl))
- # percent encode url. fixing lame server errors like space within url
- # parts
+ # percent encode url, fixing lame server errors for e.g, like space
+ # within url paths.
fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
if self.tempcache and fullurl in self.tempcache:
filename, headers = self.tempcache[fullurl]
@@ -232,9 +230,9 @@ class URLopener:
try:
fp = self.open_local_file(url1)
hdrs = fp.info()
- del fp
+ fp.close()
return url2pathname(splithost(url1)[1]), hdrs
- except IOError, msg:
+ except IOError:
pass
fp = self.open(url, data)
try:
@@ -276,8 +274,6 @@ class URLopener:
tfp.close()
finally:
fp.close()
- del fp
- del tfp
# raise exception if actual size does not match content-length header
if size >= 0 and read < size:
@@ -343,9 +339,7 @@ class URLopener:
if auth: h.putheader('Authorization', 'Basic %s' % auth)
if realhost: h.putheader('Host', realhost)
for args in self.addheaders: h.putheader(*args)
- h.endheaders()
- if data is not None:
- h.send(data)
+ h.endheaders(data)
errcode, errmsg, headers = h.getreply()
fp = h.getfile()
if errcode == -1:
@@ -438,9 +432,7 @@ class URLopener:
if auth: h.putheader('Authorization', 'Basic %s' % auth)
if realhost: h.putheader('Host', realhost)
for args in self.addheaders: h.putheader(*args)
- h.endheaders()
- if data is not None:
- h.send(data)
+ h.endheaders(data)
errcode, errmsg, headers = h.getreply()
fp = h.getfile()
if errcode == -1:
@@ -519,8 +511,8 @@ class URLopener:
if user: user, passwd = splitpasswd(user)
else: passwd = None
host = unquote(host)
- user = unquote(user or '')
- passwd = unquote(passwd or '')
+ user = user or ''
+ passwd = passwd or ''
host = socket.gethostbyname(host)
if not port:
import ftplib
@@ -785,7 +777,7 @@ class FancyURLopener(URLopener):
else:
return self.open(newurl, data)
- def get_user_passwd(self, host, realm, clear_cache = 0):
+ def get_user_passwd(self, host, realm, clear_cache=0):
key = realm + '@' + host.lower()
if key in self.auth_cache:
if clear_cache:
@@ -1072,7 +1064,12 @@ def splithost(url):
_hostprog = re.compile('^//([^/?]*)(.*)$')
match = _hostprog.match(url)
- if match: return match.group(1, 2)
+ if match:
+ host_port = match.group(1)
+ path = match.group(2)
+ if path and not path.startswith('/'):
+ path = '/' + path
+ return host_port, path
return None, url
_userprog = None
@@ -1084,7 +1081,7 @@ def splituser(host):
_userprog = re.compile('^(.*)@(.*)$')
match = _userprog.match(host)
- if match: return map(unquote, match.group(1, 2))
+ if match: return match.group(1, 2)
return None, host
_passwdprog = None
@@ -1093,7 +1090,7 @@ def splitpasswd(user):
global _passwdprog
if _passwdprog is None:
import re
- _passwdprog = re.compile('^([^:]*):(.*)$')
+ _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
match = _passwdprog.match(user)
if match: return match.group(1, 2)
@@ -1176,21 +1173,29 @@ def splitvalue(attr):
if match: return match.group(1, 2)
return attr, None
+# urlparse contains a duplicate of this method to avoid a circular import. If
+# you update this method, also update the copy in urlparse. This code
+# duplication does not exist in Python3.
+
_hexdig = '0123456789ABCDEFabcdef'
-_hextochr = dict((a+b, chr(int(a+b,16))) for a in _hexdig for b in _hexdig)
+_hextochr = dict((a + b, chr(int(a + b, 16)))
+ for a in _hexdig for b in _hexdig)
def unquote(s):
"""unquote('abc%20def') -> 'abc def'."""
res = s.split('%')
- for i in xrange(1, len(res)):
- item = res[i]
+ # fastpath
+ if len(res) == 1:
+ return s
+ s = res[0]
+ for item in res[1:]:
try:
- res[i] = _hextochr[item[:2]] + item[2:]
+ s += _hextochr[item[:2]] + item[2:]
except KeyError:
- res[i] = '%' + item
+ s += '%' + item
except UnicodeDecodeError:
- res[i] = unichr(int(item[:2], 16)) + item[2:]
- return "".join(res)
+ s += unichr(int(item[:2], 16)) + item[2:]
+ return s
def unquote_plus(s):
"""unquote('%7e/abc+def') -> '~/abc def'"""
@@ -1200,9 +1205,12 @@ def unquote_plus(s):
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghijklmnopqrstuvwxyz'
'0123456789' '_.-')
-_safemaps = {}
+_safe_map = {}
+for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
+ _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
+_safe_quoters = {}
-def quote(s, safe = '/'):
+def quote(s, safe='/'):
"""quote('abc def') -> 'abc%20def'
Each part of a URL, e.g. the path info, the query, etc., has a
@@ -1223,27 +1231,32 @@ def quote(s, safe = '/'):
called on a path where the existing slash characters are used as
reserved characters.
"""
+ # fastpath
+ if not s:
+ if s is None:
+ raise TypeError('None object cannot be quoted')
+ return s
cachekey = (safe, always_safe)
try:
- safe_map = _safemaps[cachekey]
+ (quoter, safe) = _safe_quoters[cachekey]
except KeyError:
- safe += always_safe
- safe_map = {}
- for i in range(256):
- c = chr(i)
- safe_map[c] = (c in safe) and c or ('%%%02X' % i)
- _safemaps[cachekey] = safe_map
- res = map(safe_map.__getitem__, s)
- return ''.join(res)
-
-def quote_plus(s, safe = ''):
+ safe_map = _safe_map.copy()
+ safe_map.update([(c, c) for c in safe])
+ quoter = safe_map.__getitem__
+ safe = always_safe + safe
+ _safe_quoters[cachekey] = (quoter, safe)
+ if not s.rstrip(safe):
+ return s
+ return ''.join(map(quoter, s))
+
+def quote_plus(s, safe=''):
"""Quote the query fragment of a URL; replacing ' ' with '+'"""
if ' ' in s:
s = quote(s, safe + ' ')
return s.replace(' ', '+')
return quote(s, safe)
-def urlencode(query,doseq=0):
+def urlencode(query, doseq=0):
"""Encode a sequence of two-element tuples or dictionary into a URL query string.
If any values in the query arg are sequences and doseq is true, each
@@ -1295,7 +1308,7 @@ def urlencode(query,doseq=0):
else:
try:
# is this a sufficient test for sequence-ness?
- x = len(v)
+ len(v)
except TypeError:
# not a sequence
v = quote_plus(str(v))
@@ -1395,7 +1408,7 @@ if sys.platform == 'darwin':
else:
mask = int(mask[1:])
- mask = 32 - mask
+ mask = 32 - mask
if (hostIP >> mask) == (base >> mask):
return True
@@ -1405,7 +1418,6 @@ if sys.platform == 'darwin':
return False
-
def getproxies_macosx_sysconf():
"""Return a dictionary of scheme -> proxy server URL mappings.
@@ -1414,8 +1426,6 @@ if sys.platform == 'darwin':
"""
return _get_proxies()
-
-
def proxy_bypass(host):
if getproxies_environment():
return proxy_bypass_environment(host)
@@ -1519,18 +1529,11 @@ elif os.name == 'nt':
# '<local>' string by the localhost entry and the corresponding
# canonical entry.
proxyOverride = proxyOverride.split(';')
- i = 0
- while i < len(proxyOverride):
- if proxyOverride[i] == '<local>':
- proxyOverride[i:i+1] = ['localhost',
- '127.0.0.1',
- socket.gethostname(),
- socket.gethostbyname(
- socket.gethostname())]
- i += 1
- # print proxyOverride
# now check if we match one of the registry values.
for test in proxyOverride:
+ if test == '<local>':
+ if '.' not in rawHost:
+ return 1
test = test.replace(".", r"\.") # mask dots
test = test.replace("*", r".*") # change glob sequence
test = test.replace("?", r".") # change glob char
@@ -1600,9 +1603,8 @@ def test(args=[]):
print '======'
for k in h.keys(): print k + ':', h[k]
print '======'
- fp = open(fn, 'rb')
- data = fp.read()
- del fp
+ with open(fn, 'rb') as fp:
+ data = fp.read()
if '\r' in data:
table = string.maketrans("", "")
data = data.translate(table, "\r")