From 49fd7fa4431da299196d74087df4a04f99f9c46f Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Fri, 21 Apr 2006 10:40:58 +0000 Subject: Merge p3yk branch with the trunk up to revision 45595. This breaks a fair number of tests, all because of the codecs/_multibytecodecs issue described here (it's not a Py3K issue, just something Py3K discovers): http://mail.python.org/pipermail/python-dev/2006-April/064051.html Hye-Shik Chang promised to look for a fix, so no need to fix it here. The tests that are expected to break are: test_codecencodings_cn test_codecencodings_hk test_codecencodings_jp test_codecencodings_kr test_codecencodings_tw test_codecs test_multibytecodec This merge fixes an actual test failure (test_weakref) in this branch, though, so I believe merging is the right thing to do anyway. --- Lib/urlparse.py | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 118 insertions(+), 13 deletions(-) (limited to 'Lib/urlparse.py') diff --git a/Lib/urlparse.py b/Lib/urlparse.py index 8b750519de..eade040ff6 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -16,12 +16,12 @@ uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', 'svn', 'svn+ssh', 'sftp'] non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', - 'telnet', 'wais', 'imap', 'snews', 'sip'] + 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', - 'https', 'shttp', 'rtsp', 'rtspu', 'sip', + 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', 'mms', '', 'sftp'] uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', - 'gopher', 'rtsp', 'rtspu', 'sip', ''] + 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais', 'https', 'shttp', 'snews', 'file', 'prospero', ''] @@ -41,7 +41,111 @@ def clear_cache(): _parse_cache = {} -def urlparse(url, scheme='', allow_fragments=1): +class BaseResult(tuple): + """Base class for the parsed result objects. + + This provides the attributes shared by the two derived result + objects as read-only properties. The derived classes are + responsible for checking the right number of arguments were + supplied to the constructor. + + """ + + __slots__ = () + + # Attributes that access the basic components of the URL: + + @property + def scheme(self): + return self[0] + + @property + def netloc(self): + return self[1] + + @property + def path(self): + return self[2] + + @property + def query(self): + return self[-2] + + @property + def fragment(self): + return self[-1] + + # Additional attributes that provide access to parsed-out portions + # of the netloc: + + @property + def username(self): + netloc = self.netloc + if "@" in netloc: + userinfo = netloc.split("@", 1)[0] + if ":" in userinfo: + userinfo = userinfo.split(":", 1)[0] + return userinfo + return None + + @property + def password(self): + netloc = self.netloc + if "@" in netloc: + userinfo = netloc.split("@", 1)[0] + if ":" in userinfo: + return userinfo.split(":", 1)[1] + return None + + @property + def hostname(self): + netloc = self.netloc + if "@" in netloc: + netloc = netloc.split("@", 1)[1] + if ":" in netloc: + netloc = netloc.split(":", 1)[0] + return netloc.lower() or None + + @property + def port(self): + netloc = self.netloc + if "@" in netloc: + netloc = netloc.split("@", 1)[1] + if ":" in netloc: + port = netloc.split(":", 1)[1] + return int(port, 10) + return None + + +class SplitResult(BaseResult): + + __slots__ = () + + def __new__(cls, scheme, netloc, path, query, fragment): + return BaseResult.__new__( + cls, (scheme, netloc, path, query, fragment)) + + def geturl(self): + return urlunsplit(self) + + +class ParseResult(BaseResult): + + __slots__ = () + + def __new__(cls, scheme, netloc, path, params, query, fragment): + return BaseResult.__new__( + cls, (scheme, netloc, path, params, query, fragment)) + + @property + def params(self): + return self[3] + + def geturl(self): + return urlunparse(self) + + +def urlparse(url, scheme='', allow_fragments=True): """Parse a URL into 6 components: :///;?# Return a 6-tuple: (scheme, netloc, path, params, query, fragment). @@ -53,7 +157,7 @@ def urlparse(url, scheme='', allow_fragments=1): url, params = _splitparams(url) else: params = '' - return scheme, netloc, url, params, query, fragment + return ParseResult(scheme, netloc, url, params, query, fragment) def _splitparams(url): if '/' in url: @@ -73,12 +177,13 @@ def _splitnetloc(url, start=0): delim = len(url) return url[start:delim], url[delim:] -def urlsplit(url, scheme='', allow_fragments=1): +def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: :///?# Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" + allow_fragments = bool(allow_fragments) key = url, scheme, allow_fragments cached = _parse_cache.get(key, None) if cached: @@ -97,9 +202,9 @@ def urlsplit(url, scheme='', allow_fragments=1): url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) - tuple = scheme, netloc, url, query, fragment - _parse_cache[key] = tuple - return tuple + v = SplitResult(scheme, netloc, url, query, fragment) + _parse_cache[key] = v + return v for c in url[:i]: if c not in scheme_chars: break @@ -111,9 +216,9 @@ def urlsplit(url, scheme='', allow_fragments=1): url, fragment = url.split('#', 1) if scheme in uses_query and '?' in url: url, query = url.split('?', 1) - tuple = scheme, netloc, url, query, fragment - _parse_cache[key] = tuple - return tuple + v = SplitResult(scheme, netloc, url, query, fragment) + _parse_cache[key] = v + return v def urlunparse((scheme, netloc, url, params, query, fragment)): """Put a parsed URL back together again. This may result in a @@ -136,7 +241,7 @@ def urlunsplit((scheme, netloc, url, query, fragment)): url = url + '#' + fragment return url -def urljoin(base, url, allow_fragments = 1): +def urljoin(base, url, allow_fragments=True): """Join a base URL and a possibly relative URL to form an absolute interpretation of the latter.""" if not base: -- cgit v1.2.1