From c70a6ae49bd162af06130e48a45579d445e058a8 Mon Sep 17 00:00:00 2001 From: Senthil Kumaran Date: Wed, 29 May 2013 05:54:31 -0700 Subject: #17403: urllib.parse.robotparser normalizes the urls before adding to ruleline. This helps in handling certain types invalid urls in a conservative manner. --- Lib/urllib/robotparser.py | 1 + 1 file changed, 1 insertion(+) (limited to 'Lib/urllib/robotparser.py') diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py index 75be4af409..978ba58d84 100644 --- a/Lib/urllib/robotparser.py +++ b/Lib/urllib/robotparser.py @@ -157,6 +157,7 @@ class RuleLine: if path == '' and not allowance: # an empty value means allow all allowance = True + path = urllib.parse.urlunparse(urllib.parse.urlparse(path)) self.path = urllib.parse.quote(path) self.allowance = allowance -- cgit v1.2.1