From ce6e06874b235f7825888c20fd2c6f4670a4aeba Mon Sep 17 00:00:00 2001 From: Martin Panter Date: Mon, 16 May 2016 01:07:13 +0000 Subject: Issue #14132: Fix redirect handling when target is just a query string --- Lib/urllib/request.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Lib/urllib/request.py') diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 5f40729fca..bbd2bdf685 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -652,7 +652,7 @@ class HTTPRedirectHandler(BaseHandler): "%s - Redirection to url '%s' is not allowed" % (msg, newurl), headers, fp) - if not urlparts.path: + if not urlparts.path and urlparts.netloc: urlparts = list(urlparts) urlparts[2] = "/" newurl = urlunparse(urlparts) -- cgit v1.2.1 From e6f060903cf2080b6570a87fde5021aa14d05530 Mon Sep 17 00:00:00 2001 From: Martin Panter Date: Mon, 16 May 2016 01:14:20 +0000 Subject: Issue #17214: Percent-encode non-ASCII bytes in redirect targets Some servers send Location header fields with non-ASCII bytes, but "http. client" requires the request target to be ASCII-encodable, otherwise a UnicodeEncodeError is raised. Based on patch by Christian Heimes. Python 2 does not suffer any problem because it allows non-ASCII bytes in the HTTP request target. --- Lib/urllib/request.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'Lib/urllib/request.py') diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index bbd2bdf685..1731fe3df1 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -91,6 +91,7 @@ import os import posixpath import re import socket +import string import sys import time import collections @@ -616,8 +617,12 @@ class HTTPRedirectHandler(BaseHandler): # from the user (of urllib.request, in this case). In practice, # essentially all clients do redirect in this case, so we do # the same. - # be conciliant with URIs containing a space + + # Be conciliant with URIs containing a space. This is mainly + # redundant with the more complete encoding done in http_error_302(), + # but it is kept for compatibility with other callers. newurl = newurl.replace(' ', '%20') + CONTENT_HEADERS = ("content-length", "content-type") newheaders = dict((k, v) for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS) @@ -657,6 +662,11 @@ class HTTPRedirectHandler(BaseHandler): urlparts[2] = "/" newurl = urlunparse(urlparts) + # http.client.parse_headers() decodes as ISO-8859-1. Recover the + # original bytes and percent-encode non-ASCII bytes, and any special + # characters such as the space. + newurl = quote( + newurl, encoding="iso-8859-1", safe=string.punctuation) newurl = urljoin(req.full_url, newurl) # XXX Probably want to forget about the state of the current -- cgit v1.2.1