summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDonald Stufft <donald@stufft.io>2016-10-30 01:35:28 -0400
committerGitHub <noreply@github.com>2016-10-30 01:35:28 -0400
commitc8e8a99b7a6f9404536bc9d895a1a42a060f7f91 (patch)
tree6821e51d5b5835702e958e29fc3df74ac41c8bc2
parent22f2e01be5cd64d7db524e1ab5931fdc98521cfb (diff)
downloadpip-c8e8a99b7a6f9404536bc9d895a1a42a060f7f91.tar.gz
Upgrade Dependencies (#4038)
* Upgrade distlib to 0.2.4 * Upgrade distro to 1.0.0 * Upgrade html5lib to 1.0b10 * Upgrade requests to 2.11.1 * Upgrade CacheControl to 0.11.7 * Upgrade ipaddress to 1.0.17 * Upgrade pyparsing to 2.1.10 * Upgrade packaging to 16.8 * Add webencodings 0.5 * Add ordereddict 1.1
-rw-r--r--pip/_vendor/README.rst1
-rw-r--r--pip/_vendor/cachecontrol/__init__.py2
-rw-r--r--pip/_vendor/cachecontrol/adapter.py18
-rw-r--r--pip/_vendor/cachecontrol/controller.py2
-rw-r--r--pip/_vendor/cachecontrol/filewrapper.py33
-rw-r--r--pip/_vendor/cachecontrol/serialize.py6
-rw-r--r--pip/_vendor/distlib/__init__.py2
-rw-r--r--pip/_vendor/distlib/_backport/shutil.py6
-rw-r--r--pip/_vendor/distlib/_backport/tarfile.py2
-rw-r--r--pip/_vendor/distlib/compat.py17
-rw-r--r--pip/_vendor/distlib/database.py2
-rw-r--r--pip/_vendor/distlib/index.py8
-rw-r--r--pip/_vendor/distlib/locators.py41
-rw-r--r--pip/_vendor/distlib/manifest.py38
-rw-r--r--pip/_vendor/distlib/metadata.py14
-rw-r--r--pip/_vendor/distlib/resources.py11
-rw-r--r--pip/_vendor/distlib/scripts.py2
-rw-r--r--pip/_vendor/distlib/t32.exebin89088 -> 89088 bytes
-rw-r--r--pip/_vendor/distlib/t64.exebin97792 -> 97792 bytes
-rw-r--r--pip/_vendor/distlib/util.py266
-rw-r--r--pip/_vendor/distlib/version.py6
-rw-r--r--pip/_vendor/distlib/w32.exebin85504 -> 85504 bytes
-rw-r--r--pip/_vendor/distlib/w64.exebin94208 -> 94208 bytes
-rw-r--r--pip/_vendor/distro.py168
-rw-r--r--pip/_vendor/html5lib/__init__.py2
-rw-r--r--pip/_vendor/html5lib/_ihatexml.py (renamed from pip/_vendor/html5lib/ihatexml.py)13
-rw-r--r--pip/_vendor/html5lib/_inputstream.py (renamed from pip/_vendor/html5lib/inputstream.py)220
-rw-r--r--pip/_vendor/html5lib/_tokenizer.py (renamed from pip/_vendor/html5lib/tokenizer.py)36
-rw-r--r--pip/_vendor/html5lib/_trie/__init__.py (renamed from pip/_vendor/html5lib/trie/__init__.py)2
-rw-r--r--pip/_vendor/html5lib/_trie/_base.py (renamed from pip/_vendor/html5lib/trie/_base.py)3
-rw-r--r--pip/_vendor/html5lib/_trie/datrie.py (renamed from pip/_vendor/html5lib/trie/datrie.py)0
-rw-r--r--pip/_vendor/html5lib/_trie/py.py (renamed from pip/_vendor/html5lib/trie/py.py)0
-rw-r--r--pip/_vendor/html5lib/_utils.py (renamed from pip/_vendor/html5lib/utils.py)44
-rw-r--r--pip/_vendor/html5lib/constants.py303
-rw-r--r--pip/_vendor/html5lib/filters/alphabeticalattributes.py6
-rw-r--r--pip/_vendor/html5lib/filters/base.py (renamed from pip/_vendor/html5lib/filters/_base.py)0
-rw-r--r--pip/_vendor/html5lib/filters/inject_meta_charset.py8
-rw-r--r--pip/_vendor/html5lib/filters/lint.py111
-rw-r--r--pip/_vendor/html5lib/filters/optionaltags.py11
-rw-r--r--pip/_vendor/html5lib/filters/sanitizer.py861
-rw-r--r--pip/_vendor/html5lib/filters/whitespace.py6
-rw-r--r--pip/_vendor/html5lib/html5parser.py555
-rw-r--r--pip/_vendor/html5lib/sanitizer.py300
-rw-r--r--pip/_vendor/html5lib/serializer.py (renamed from pip/_vendor/html5lib/serializer/htmlserializer.py)179
-rw-r--r--pip/_vendor/html5lib/serializer/__init__.py16
-rw-r--r--pip/_vendor/html5lib/treeadapters/__init__.py12
-rw-r--r--pip/_vendor/html5lib/treeadapters/genshi.py47
-rw-r--r--pip/_vendor/html5lib/treebuilders/__init__.py2
-rw-r--r--pip/_vendor/html5lib/treebuilders/base.py (renamed from pip/_vendor/html5lib/treebuilders/_base.py)14
-rw-r--r--pip/_vendor/html5lib/treebuilders/dom.py49
-rw-r--r--pip/_vendor/html5lib/treebuilders/etree.py21
-rw-r--r--pip/_vendor/html5lib/treebuilders/etree_lxml.py42
-rw-r--r--pip/_vendor/html5lib/treewalkers/__init__.py50
-rw-r--r--pip/_vendor/html5lib/treewalkers/base.py (renamed from pip/_vendor/html5lib/treewalkers/_base.py)88
-rw-r--r--pip/_vendor/html5lib/treewalkers/dom.py16
-rw-r--r--pip/_vendor/html5lib/treewalkers/etree.py21
-rw-r--r--pip/_vendor/html5lib/treewalkers/etree_lxml.py (renamed from pip/_vendor/html5lib/treewalkers/lxmletree.py)58
-rw-r--r--pip/_vendor/html5lib/treewalkers/genshi.py (renamed from pip/_vendor/html5lib/treewalkers/genshistream.py)12
-rw-r--r--pip/_vendor/html5lib/treewalkers/pulldom.py63
-rw-r--r--pip/_vendor/ipaddress.py32
-rw-r--r--pip/_vendor/ordereddict.py127
-rw-r--r--pip/_vendor/packaging/__about__.py2
-rw-r--r--pip/_vendor/packaging/markers.py24
-rw-r--r--pip/_vendor/pyparsing.py3209
-rw-r--r--pip/_vendor/requests/__init__.py7
-rw-r--r--pip/_vendor/requests/adapters.py22
-rw-r--r--pip/_vendor/requests/api.py1
-rw-r--r--pip/_vendor/requests/auth.py12
-rw-r--r--pip/_vendor/requests/certs.py4
-rw-r--r--pip/_vendor/requests/compat.py6
-rw-r--r--pip/_vendor/requests/cookies.py103
-rw-r--r--pip/_vendor/requests/exceptions.py18
-rw-r--r--pip/_vendor/requests/hooks.py2
-rw-r--r--pip/_vendor/requests/models.py54
-rw-r--r--pip/_vendor/requests/packages/urllib3/__init__.py2
-rw-r--r--pip/_vendor/requests/packages/urllib3/connectionpool.py37
-rw-r--r--pip/_vendor/requests/packages/urllib3/contrib/appengine.py2
-rw-r--r--pip/_vendor/requests/packages/urllib3/contrib/socks.py2
-rw-r--r--pip/_vendor/requests/packages/urllib3/packages/six.py635
-rw-r--r--pip/_vendor/requests/packages/urllib3/poolmanager.py93
-rw-r--r--pip/_vendor/requests/packages/urllib3/response.py4
-rw-r--r--pip/_vendor/requests/packages/urllib3/util/connection.py47
-rw-r--r--pip/_vendor/requests/packages/urllib3/util/retry.py14
-rw-r--r--pip/_vendor/requests/packages/urllib3/util/ssl_.py4
-rw-r--r--pip/_vendor/requests/sessions.py71
-rw-r--r--pip/_vendor/requests/status_codes.py2
-rw-r--r--pip/_vendor/requests/structures.py7
-rw-r--r--pip/_vendor/requests/utils.py131
-rw-r--r--pip/_vendor/vendor.txt18
-rw-r--r--pip/_vendor/webencodings/__init__.py342
-rw-r--r--pip/_vendor/webencodings/labels.py231
-rw-r--r--pip/_vendor/webencodings/mklabels.py59
-rw-r--r--pip/_vendor/webencodings/tests.py153
-rw-r--r--pip/_vendor/webencodings/x_user_defined.py325
-rw-r--r--pip/index.py2
-rw-r--r--tests/unit/test_index.py14
96 files changed, 7077 insertions, 2557 deletions
diff --git a/pip/_vendor/README.rst b/pip/_vendor/README.rst
index 4a27bf09c..855b174f7 100644
--- a/pip/_vendor/README.rst
+++ b/pip/_vendor/README.rst
@@ -106,6 +106,7 @@ Modifications
* CacheControl has been modified to import its dependencies from pip._vendor
* packaging has been modified to import its dependencies from pip._vendor
* requests has been modified *not* to optionally load any C dependencies.
+* Modified distro to delay importing argparse to avoid errors on 2.6
Debundling
diff --git a/pip/_vendor/cachecontrol/__init__.py b/pip/_vendor/cachecontrol/__init__.py
index 724e220dc..ec9da2e37 100644
--- a/pip/_vendor/cachecontrol/__init__.py
+++ b/pip/_vendor/cachecontrol/__init__.py
@@ -4,7 +4,7 @@ Make it easy to import from cachecontrol without long namespaces.
"""
__author__ = 'Eric Larson'
__email__ = 'eric@ionrock.org'
-__version__ = '0.11.6'
+__version__ = '0.11.7'
from .wrapper import CacheControl
from .adapter import CacheControlAdapter
diff --git a/pip/_vendor/cachecontrol/adapter.py b/pip/_vendor/cachecontrol/adapter.py
index 74589e00c..23488566b 100644
--- a/pip/_vendor/cachecontrol/adapter.py
+++ b/pip/_vendor/cachecontrol/adapter.py
@@ -1,3 +1,4 @@
+import types
import functools
from pip._vendor.requests.adapters import HTTPAdapter
@@ -55,6 +56,10 @@ class CacheControlAdapter(HTTPAdapter):
cached response
"""
if not from_cache and request.method == 'GET':
+ # Check for any heuristics that might update headers
+ # before trying to cache.
+ if self.heuristic:
+ response = self.heuristic.apply(response)
# apply any expiration heuristics
if response.status == 304:
@@ -82,11 +87,6 @@ class CacheControlAdapter(HTTPAdapter):
elif response.status == 301:
self.controller.cache_response(request, response)
else:
- # Check for any heuristics that might update headers
- # before trying to cache.
- if self.heuristic:
- response = self.heuristic.apply(response)
-
# Wrap the response file with a wrapper that will cache the
# response when the stream has been consumed.
response._fp = CallbackFileWrapper(
@@ -97,6 +97,14 @@ class CacheControlAdapter(HTTPAdapter):
response,
)
)
+ if response.chunked:
+ super_update_chunk_length = response._update_chunk_length
+
+ def _update_chunk_length(self):
+ super_update_chunk_length()
+ if self.chunk_left == 0:
+ self._fp._close()
+ response._update_chunk_length = types.MethodType(_update_chunk_length, response)
resp = super(CacheControlAdapter, self).build_response(
request, response
diff --git a/pip/_vendor/cachecontrol/controller.py b/pip/_vendor/cachecontrol/controller.py
index 6e591f8b0..5eb961f85 100644
--- a/pip/_vendor/cachecontrol/controller.py
+++ b/pip/_vendor/cachecontrol/controller.py
@@ -290,7 +290,7 @@ class CacheController(object):
elif 'date' in response_headers:
# cache when there is a max-age > 0
if cc and cc.get('max-age'):
- if int(cc['max-age']) > 0:
+ if cc['max-age'].isdigit() and int(cc['max-age']) > 0:
logger.debug('Caching b/c date exists and max-age > 0')
self.cache.set(
cache_url,
diff --git a/pip/_vendor/cachecontrol/filewrapper.py b/pip/_vendor/cachecontrol/filewrapper.py
index 4b91bce04..f1e1ce055 100644
--- a/pip/_vendor/cachecontrol/filewrapper.py
+++ b/pip/_vendor/cachecontrol/filewrapper.py
@@ -45,19 +45,34 @@ class CallbackFileWrapper(object):
# TODO: Add some logging here...
return False
+ def _close(self):
+ if self.__callback:
+ self.__callback(self.__buf.getvalue())
+
+ # We assign this to None here, because otherwise we can get into
+ # really tricky problems where the CPython interpreter dead locks
+ # because the callback is holding a reference to something which
+ # has a __del__ method. Setting this to None breaks the cycle
+ # and allows the garbage collector to do it's thing normally.
+ self.__callback = None
+
def read(self, amt=None):
data = self.__fp.read(amt)
self.__buf.write(data)
+ if self.__is_fp_closed():
+ self._close()
+ return data
+
+ def _safe_read(self, amt):
+ data = self.__fp._safe_read(amt)
+ if amt == 2 and data == b'\r\n':
+ # urllib executes this read to toss the CRLF at the end
+ # of the chunk.
+ return data
+
+ self.__buf.write(data)
if self.__is_fp_closed():
- if self.__callback:
- self.__callback(self.__buf.getvalue())
-
- # We assign this to None here, because otherwise we can get into
- # really tricky problems where the CPython interpreter dead locks
- # because the callback is holding a reference to something which
- # has a __del__ method. Setting this to None breaks the cycle
- # and allows the garbage collector to do it's thing normally.
- self.__callback = None
+ self._close()
return data
diff --git a/pip/_vendor/cachecontrol/serialize.py b/pip/_vendor/cachecontrol/serialize.py
index ffbfbf6f8..8f9c589f2 100644
--- a/pip/_vendor/cachecontrol/serialize.py
+++ b/pip/_vendor/cachecontrol/serialize.py
@@ -134,6 +134,12 @@ class Serializer(object):
body_raw = cached["response"].pop("body")
+ headers = CaseInsensitiveDict(data=cached['response']['headers'])
+ if headers.get('transfer-encoding', '') == 'chunked':
+ headers.pop('transfer-encoding')
+
+ cached['response']['headers'] = headers
+
try:
body = io.BytesIO(body_raw)
except TypeError:
diff --git a/pip/_vendor/distlib/__init__.py b/pip/_vendor/distlib/__init__.py
index 7026860f1..d186b0a36 100644
--- a/pip/_vendor/distlib/__init__.py
+++ b/pip/_vendor/distlib/__init__.py
@@ -6,7 +6,7 @@
#
import logging
-__version__ = '0.2.3'
+__version__ = '0.2.4'
class DistlibException(Exception):
pass
diff --git a/pip/_vendor/distlib/_backport/shutil.py b/pip/_vendor/distlib/_backport/shutil.py
index 9e2e234d4..159e49ee8 100644
--- a/pip/_vendor/distlib/_backport/shutil.py
+++ b/pip/_vendor/distlib/_backport/shutil.py
@@ -55,8 +55,8 @@ class ReadError(EnvironmentError):
"""Raised when an archive cannot be read"""
class RegistryError(Exception):
- """Raised when a registery operation with the archiving
- and unpacking registeries fails"""
+ """Raised when a registry operation with the archiving
+ and unpacking registries fails"""
try:
@@ -648,7 +648,7 @@ def register_unpack_format(name, extensions, function, extra_args=None,
_UNPACK_FORMATS[name] = extensions, function, extra_args, description
def unregister_unpack_format(name):
- """Removes the pack format from the registery."""
+ """Removes the pack format from the registry."""
del _UNPACK_FORMATS[name]
def _ensure_directory(path):
diff --git a/pip/_vendor/distlib/_backport/tarfile.py b/pip/_vendor/distlib/_backport/tarfile.py
index 0580fb795..d66d85663 100644
--- a/pip/_vendor/distlib/_backport/tarfile.py
+++ b/pip/_vendor/distlib/_backport/tarfile.py
@@ -331,7 +331,7 @@ class ExtractError(TarError):
"""General exception for extract errors."""
pass
class ReadError(TarError):
- """Exception for unreadble tar archives."""
+ """Exception for unreadable tar archives."""
pass
class CompressionError(TarError):
"""Exception for unavailable compression methods."""
diff --git a/pip/_vendor/distlib/compat.py b/pip/_vendor/distlib/compat.py
index 069ec7796..2b198dd57 100644
--- a/pip/_vendor/distlib/compat.py
+++ b/pip/_vendor/distlib/compat.py
@@ -10,6 +10,11 @@ import os
import re
import sys
+try:
+ import ssl
+except ImportError:
+ ssl = None
+
if sys.version_info[0] < 3: # pragma: no cover
from StringIO import StringIO
string_types = basestring,
@@ -30,8 +35,10 @@ if sys.version_info[0] < 3: # pragma: no cover
import urllib2
from urllib2 import (Request, urlopen, URLError, HTTPError,
HTTPBasicAuthHandler, HTTPPasswordMgr,
- HTTPSHandler, HTTPHandler, HTTPRedirectHandler,
+ HTTPHandler, HTTPRedirectHandler,
build_opener)
+ if ssl:
+ from urllib2 import HTTPSHandler
import httplib
import xmlrpclib
import Queue as queue
@@ -66,8 +73,10 @@ else: # pragma: no cover
from urllib.request import (urlopen, urlretrieve, Request, url2pathname,
pathname2url,
HTTPBasicAuthHandler, HTTPPasswordMgr,
- HTTPSHandler, HTTPHandler, HTTPRedirectHandler,
+ HTTPHandler, HTTPRedirectHandler,
build_opener)
+ if ssl:
+ from urllib.request import HTTPSHandler
from urllib.error import HTTPError, URLError, ContentTooShortError
import http.client as httplib
import urllib.request as urllib2
@@ -101,7 +110,7 @@ except ImportError: # pragma: no cover
wildcards = leftmost.count('*')
if wildcards > max_wildcards:
# Issue #17980: avoid denials of service by refusing more
- # than one wildcard per fragment. A survery of established
+ # than one wildcard per fragment. A survey of established
# policy among SSL implementations showed it to be a
# reasonable choice.
raise CertificateError(
@@ -366,7 +375,7 @@ except ImportError: # pragma: no cover
def detect_encoding(readline):
"""
The detect_encoding() function is used to detect the encoding that should
- be used to decode a Python source file. It requires one argment, readline,
+ be used to decode a Python source file. It requires one argument, readline,
in the same way as the tokenize() generator.
It will call readline a maximum of twice, and return the encoding used
diff --git a/pip/_vendor/distlib/database.py b/pip/_vendor/distlib/database.py
index 7bc191447..c31442640 100644
--- a/pip/_vendor/distlib/database.py
+++ b/pip/_vendor/distlib/database.py
@@ -1308,5 +1308,5 @@ def make_dist(name, version, **kwargs):
md = Metadata(**kwargs)
md.name = name
md.version = version
- md.summary = summary or 'Plaeholder for summary'
+ md.summary = summary or 'Placeholder for summary'
return Distribution(md)
diff --git a/pip/_vendor/distlib/index.py b/pip/_vendor/distlib/index.py
index 73037c97b..6803dd283 100644
--- a/pip/_vendor/distlib/index.py
+++ b/pip/_vendor/distlib/index.py
@@ -51,7 +51,9 @@ class PackageIndex(object):
self.gpg_home = None
self.rpc_proxy = None
with open(os.devnull, 'w') as sink:
- for s in ('gpg2', 'gpg'):
+ # Use gpg by default rather than gpg2, as gpg2 insists on
+ # prompting for passwords
+ for s in ('gpg', 'gpg2'):
try:
rc = subprocess.check_call([s, '--version'], stdout=sink,
stderr=sink)
@@ -74,7 +76,7 @@ class PackageIndex(object):
def read_configuration(self):
"""
Read the PyPI access configuration as supported by distutils, getting
- PyPI to do the acutal work. This populates ``username``, ``password``,
+ PyPI to do the actual work. This populates ``username``, ``password``,
``realm`` and ``url`` attributes from the configuration.
"""
# get distutils to do the work
@@ -276,7 +278,7 @@ class PackageIndex(object):
sha256_digest = hashlib.sha256(file_data).hexdigest()
d.update({
':action': 'file_upload',
- 'protcol_version': '1',
+ 'protocol_version': '1',
'filetype': filetype,
'pyversion': pyversion,
'md5_digest': md5_digest,
diff --git a/pip/_vendor/distlib/locators.py b/pip/_vendor/distlib/locators.py
index 1e1463820..14789ef5d 100644
--- a/pip/_vendor/distlib/locators.py
+++ b/pip/_vendor/distlib/locators.py
@@ -21,13 +21,13 @@ import zlib
from . import DistlibException
from .compat import (urljoin, urlparse, urlunparse, url2pathname, pathname2url,
queue, quote, unescape, string_types, build_opener,
- HTTPRedirectHandler as BaseRedirectHandler,
+ HTTPRedirectHandler as BaseRedirectHandler, text_type,
Request, HTTPError, URLError)
from .database import Distribution, DistributionPath, make_dist
from .metadata import Metadata
from .util import (cached_property, parse_credentials, ensure_slash,
split_filename, get_project_data, parse_requirement,
- parse_name_and_version, ServerProxy)
+ parse_name_and_version, ServerProxy, normalize_name)
from .version import get_scheme, UnsupportedVersionError
from .wheel import Wheel, is_compatible
@@ -113,6 +113,28 @@ class Locator(object):
# is set from the requirement passed to locate(). See issue #18 for
# why this can be useful to know.
self.matcher = None
+ self.errors = queue.Queue()
+
+ def get_errors(self):
+ """
+ Return any errors which have occurred.
+ """
+ result = []
+ while not self.errors.empty(): # pragma: no cover
+ try:
+ e = self.errors.get(False)
+ result.append(e)
+ except self.errors.Empty:
+ continue
+ self.errors.task_done()
+ return result
+
+ def clear_errors(self):
+ """
+ Clear any errors which may have been logged.
+ """
+ # Just get the errors and throw them away
+ self.get_errors()
def clear_cache(self):
self._cache.clear()
@@ -155,6 +177,7 @@ class Locator(object):
elif name in self._cache:
result = self._cache[name]
else:
+ self.clear_errors()
result = self._get_project(name)
self._cache[name] = result
return result
@@ -210,14 +233,7 @@ class Locator(object):
"filename" and "url"; otherwise, None is returned.
"""
def same_project(name1, name2):
- name1, name2 = name1.lower(), name2.lower()
- if name1 == name2:
- result = True
- else:
- # distribute replaces '-' by '_' in project names, so it
- # can tell where the version starts in a filename.
- result = name1.replace('_', '-') == name2.replace('_', '-')
- return result
+ return normalize_name(name1) == normalize_name(name2)
result = None
scheme, netloc, path, params, query, frag = urlparse(url)
@@ -250,7 +266,7 @@ class Locator(object):
'python-version': ', '.join(
['.'.join(list(v[2:])) for v in wheel.pyver]),
}
- except Exception as e:
+ except Exception as e: # pragma: no cover
logger.warning('invalid path for wheel: %s', path)
elif path.endswith(self.downloadable_extensions):
path = filename = posixpath.basename(path)
@@ -489,6 +505,7 @@ class PyPIJSONLocator(Locator):
# result['urls'].setdefault(md.version, set()).add(url)
# result['digests'][url] = self._get_digest(info)
except Exception as e:
+ self.errors.put(text_type(e))
logger.exception('JSON fetch failed: %s', e)
return result
@@ -714,6 +731,8 @@ class SimpleScrapingLocator(Locator):
self._should_queue(link, url, rel)):
logger.debug('Queueing %s from %s', link, url)
self._to_fetch.put(link)
+ except Exception as e: # pragma: no cover
+ self.errors.put(text_type(e))
finally:
# always do this, to avoid hangs :-)
self._to_fetch.task_done()
diff --git a/pip/_vendor/distlib/manifest.py b/pip/_vendor/distlib/manifest.py
index 21cff45e3..9f0336453 100644
--- a/pip/_vendor/distlib/manifest.py
+++ b/pip/_vendor/distlib/manifest.py
@@ -12,6 +12,7 @@ import fnmatch
import logging
import os
import re
+import sys
from . import DistlibException
from .compat import fsdecode
@@ -26,6 +27,12 @@ logger = logging.getLogger(__name__)
_COLLAPSE_PATTERN = re.compile('\\\w*\n', re.M)
_COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S)
+#
+# Due to the different results returned by fnmatch.translate, we need
+# to do slightly different processing for Python 2.7 and 3.2 ... this needed
+# to be brought in for Python 3.6 onwards.
+#
+_PYTHON_VERSION = sys.version_info[:2]
class Manifest(object):
"""A list of files built by on exploring the filesystem and filtered by
@@ -322,24 +329,43 @@ class Manifest(object):
else:
return pattern
+ if _PYTHON_VERSION > (3, 2):
+ # ditch start and end characters
+ start, _, end = self._glob_to_re('_').partition('_')
+
if pattern:
pattern_re = self._glob_to_re(pattern)
+ if _PYTHON_VERSION > (3, 2):
+ assert pattern_re.startswith(start) and pattern_re.endswith(end)
else:
pattern_re = ''
base = re.escape(os.path.join(self.base, ''))
if prefix is not None:
# ditch end of pattern character
- empty_pattern = self._glob_to_re('')
- prefix_re = self._glob_to_re(prefix)[:-len(empty_pattern)]
+ if _PYTHON_VERSION <= (3, 2):
+ empty_pattern = self._glob_to_re('')
+ prefix_re = self._glob_to_re(prefix)[:-len(empty_pattern)]
+ else:
+ prefix_re = self._glob_to_re(prefix)
+ assert prefix_re.startswith(start) and prefix_re.endswith(end)
+ prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
sep = os.sep
if os.sep == '\\':
sep = r'\\'
- pattern_re = '^' + base + sep.join((prefix_re,
- '.*' + pattern_re))
- else: # no prefix -- respect anchor flag
+ if _PYTHON_VERSION <= (3, 2):
+ pattern_re = '^' + base + sep.join((prefix_re,
+ '.*' + pattern_re))
+ else:
+ pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
+ pattern_re = r'%s%s%s%s.*%s%s' % (start, base, prefix_re, sep,
+ pattern_re, end)
+ else: # no prefix -- respect anchor flag
if anchor:
- pattern_re = '^' + base + pattern_re
+ if _PYTHON_VERSION <= (3, 2):
+ pattern_re = '^' + base + pattern_re
+ else:
+ pattern_re = r'%s%s%s' % (start, base, pattern_re[len(start):])
return re.compile(pattern_re)
diff --git a/pip/_vendor/distlib/metadata.py b/pip/_vendor/distlib/metadata.py
index 71525dd9e..75bfd68ec 100644
--- a/pip/_vendor/distlib/metadata.py
+++ b/pip/_vendor/distlib/metadata.py
@@ -444,16 +444,16 @@ class LegacyMetadata(object):
# check that the values are valid
if not scheme.is_valid_matcher(v.split(';')[0]):
logger.warning(
- '%r: %r is not valid (field %r)',
+ "'%s': '%s' is not valid (field '%s')",
project_name, v, name)
# FIXME this rejects UNKNOWN, is that right?
elif name in _VERSIONS_FIELDS and value is not None:
if not scheme.is_valid_constraint_list(value):
- logger.warning('%r: %r is not a valid version (field %r)',
+ logger.warning("'%s': '%s' is not a valid version (field '%s')",
project_name, value, name)
elif name in _VERSION_FIELDS and value is not None:
if not scheme.is_valid_version(value):
- logger.warning('%r: %r is not a valid version (field %r)',
+ logger.warning("'%s': '%s' is not a valid version (field '%s')",
project_name, value, name)
if name in _UNICODEFIELDS:
@@ -531,7 +531,7 @@ class LegacyMetadata(object):
for field in fields:
value = self.get(field, None)
if value is not None and not controller(value):
- warnings.append('Wrong value for %r: %s' % (field, value))
+ warnings.append("Wrong value for '%s': %s" % (field, value))
return missing, warnings
@@ -766,6 +766,8 @@ class Metadata(object):
result = d.get(key, value)
else:
d = d.get('python.exports')
+ if not d:
+ d = self._data.get('python.exports')
if d:
result = d.get(key, value)
if result is sentinel:
@@ -784,8 +786,8 @@ class Metadata(object):
if (scheme or self.scheme) not in exclusions:
m = pattern.match(value)
if not m:
- raise MetadataInvalidError('%r is an invalid value for '
- 'the %r property' % (value,
+ raise MetadataInvalidError("'%s' is an invalid value for "
+ "the '%s' property" % (value,
key))
def __setattr__(self, key, value):
diff --git a/pip/_vendor/distlib/resources.py b/pip/_vendor/distlib/resources.py
index 9dd8ca016..f07cde259 100644
--- a/pip/_vendor/distlib/resources.py
+++ b/pip/_vendor/distlib/resources.py
@@ -289,9 +289,14 @@ _finder_registry = {
}
try:
- import _frozen_importlib
- _finder_registry[_frozen_importlib.SourceFileLoader] = ResourceFinder
- _finder_registry[_frozen_importlib.FileFinder] = ResourceFinder
+ # In Python 3.6, _frozen_importlib -> _frozen_importlib_external
+ try:
+ import _frozen_importlib_external as _fi
+ except ImportError:
+ import _frozen_importlib as _fi
+ _finder_registry[_fi.SourceFileLoader] = ResourceFinder
+ _finder_registry[_fi.FileFinder] = ResourceFinder
+ del _fi
except (ImportError, AttributeError):
pass
diff --git a/pip/_vendor/distlib/scripts.py b/pip/_vendor/distlib/scripts.py
index c9996d598..792fc2e1f 100644
--- a/pip/_vendor/distlib/scripts.py
+++ b/pip/_vendor/distlib/scripts.py
@@ -52,7 +52,7 @@ if __name__ == '__main__':
return result
try:
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+ sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
func = _resolve('%(module)s', '%(func)s')
rc = func() # None interpreted as 0
diff --git a/pip/_vendor/distlib/t32.exe b/pip/_vendor/distlib/t32.exe
index e0168c22c..836211d84 100644
--- a/pip/_vendor/distlib/t32.exe
+++ b/pip/_vendor/distlib/t32.exe
Binary files differ
diff --git a/pip/_vendor/distlib/t64.exe b/pip/_vendor/distlib/t64.exe
index 29a1fd6cc..a401b59d6 100644
--- a/pip/_vendor/distlib/t64.exe
+++ b/pip/_vendor/distlib/t64.exe
Binary files differ
diff --git a/pip/_vendor/distlib/util.py b/pip/_vendor/distlib/util.py
index 7e209ec2d..604f20cd3 100644
--- a/pip/_vendor/distlib/util.py
+++ b/pip/_vendor/distlib/util.py
@@ -15,7 +15,10 @@ import py_compile
import re
import shutil
import socket
-import ssl
+try:
+ import ssl
+except ImportError: # pragma: no cover
+ ssl = None
import subprocess
import sys
import tarfile
@@ -24,17 +27,16 @@ import textwrap
try:
import threading
-except ImportError:
+except ImportError: # pragma: no cover
import dummy_threading as threading
import time
from . import DistlibException
from .compat import (string_types, text_type, shutil, raw_input, StringIO,
cache_from_source, urlopen, urljoin, httplib, xmlrpclib,
- splittype, HTTPHandler, HTTPSHandler as BaseHTTPSHandler,
- BaseConfigurator, valid_ident, Container, configparser,
- URLError, match_hostname, CertificateError, ZipFile,
- fsdecode)
+ splittype, HTTPHandler, BaseConfigurator, valid_ident,
+ Container, configparser, URLError, ZipFile, fsdecode,
+ unquote)
logger = logging.getLogger(__name__)
@@ -540,7 +542,7 @@ class ExportEntry(object):
def value(self):
return resolve(self.prefix, self.suffix)
- def __repr__(self):
+ def __repr__(self): # pragma: no cover
return '<ExportEntry %s = %s:%s %s>' % (self.name, self.prefix,
self.suffix, self.flags)
@@ -567,8 +569,8 @@ def get_export_entry(specification):
if not m:
result = None
if '[' in specification or ']' in specification:
- raise DistlibException('Invalid specification '
- '%r' % specification)
+ raise DistlibException("Invalid specification "
+ "'%s'" % specification)
else:
d = m.groupdict()
name = d['name']
@@ -578,14 +580,14 @@ def get_export_entry(specification):
prefix, suffix = path, None
else:
if colons != 1:
- raise DistlibException('Invalid specification '
- '%r' % specification)
+ raise DistlibException("Invalid specification "
+ "'%s'" % specification)
prefix, suffix = path.split(':')
flags = d['flags']
if flags is None:
if '[' in specification or ']' in specification:
- raise DistlibException('Invalid specification '
- '%r' % specification)
+ raise DistlibException("Invalid specification "
+ "'%s'" % specification)
flags = []
else:
flags = [f.strip() for f in flags.split(',')]
@@ -696,6 +698,7 @@ def split_filename(filename, project_name=None):
"""
result = None
pyver = None
+ filename = unquote(filename).replace(' ', '-')
m = PYTHON_VERSION.search(filename)
if m:
pyver = m.group(1)
@@ -804,7 +807,7 @@ class Cache(object):
"""
# we use 'isdir' instead of 'exists', because we want to
# fail if there's a file with that name
- if not os.path.isdir(base):
+ if not os.path.isdir(base): # pragma: no cover
os.makedirs(base)
if (os.stat(base).st_mode & 0o77) != 0:
logger.warning('Directory \'%s\' is not private', base)
@@ -940,12 +943,12 @@ class Sequencer(object):
try:
preds = self._preds[succ]
succs = self._succs[pred]
- except KeyError:
+ except KeyError: # pragma: no cover
raise ValueError('%r not a successor of anything' % succ)
try:
preds.remove(pred)
succs.remove(succ)
- except KeyError:
+ except KeyError: # pragma: no cover
raise ValueError('%r not a successor of %r' % (succ, pred))
def is_step(self, step):
@@ -1071,7 +1074,7 @@ def unarchive(archive_filename, dest_dir, format=None, check=True):
elif archive_filename.endswith('.tar'):
format = 'tar'
mode = 'r'
- else:
+ else: # pragma: no cover
raise ValueError('Unknown format for %r' % archive_filename)
try:
if format == 'zip':
@@ -1257,99 +1260,102 @@ def _iglob(path_glob):
for fn in _iglob(os.path.join(path, radical)):
yield fn
+if ssl:
+ from .compat import (HTTPSHandler as BaseHTTPSHandler, match_hostname,
+ CertificateError)
#
# HTTPSConnection which verifies certificates/matches domains
#
-class HTTPSConnection(httplib.HTTPSConnection):
- ca_certs = None # set this to the path to the certs file (.pem)
- check_domain = True # only used if ca_certs is not None
-
- # noinspection PyPropertyAccess
- def connect(self):
- sock = socket.create_connection((self.host, self.port), self.timeout)
- if getattr(self, '_tunnel_host', False):
- self.sock = sock
- self._tunnel()
-
- if not hasattr(ssl, 'SSLContext'):
- # For 2.x
- if self.ca_certs:
- cert_reqs = ssl.CERT_REQUIRED
- else:
- cert_reqs = ssl.CERT_NONE
- self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file,
- cert_reqs=cert_reqs,
- ssl_version=ssl.PROTOCOL_SSLv23,
- ca_certs=self.ca_certs)
- else:
- context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
- context.options |= ssl.OP_NO_SSLv2
- if self.cert_file:
- context.load_cert_chain(self.cert_file, self.key_file)
- kwargs = {}
+ class HTTPSConnection(httplib.HTTPSConnection):
+ ca_certs = None # set this to the path to the certs file (.pem)
+ check_domain = True # only used if ca_certs is not None
+
+ # noinspection PyPropertyAccess
+ def connect(self):
+ sock = socket.create_connection((self.host, self.port), self.timeout)
+ if getattr(self, '_tunnel_host', False):
+ self.sock = sock
+ self._tunnel()
+
+ if not hasattr(ssl, 'SSLContext'):
+ # For 2.x
+ if self.ca_certs:
+ cert_reqs = ssl.CERT_REQUIRED
+ else:
+ cert_reqs = ssl.CERT_NONE
+ self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file,
+ cert_reqs=cert_reqs,
+ ssl_version=ssl.PROTOCOL_SSLv23,
+ ca_certs=self.ca_certs)
+ else: # pragma: no cover
+ context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+ context.options |= ssl.OP_NO_SSLv2
+ if self.cert_file:
+ context.load_cert_chain(self.cert_file, self.key_file)
+ kwargs = {}
+ if self.ca_certs:
+ context.verify_mode = ssl.CERT_REQUIRED
+ context.load_verify_locations(cafile=self.ca_certs)
+ if getattr(ssl, 'HAS_SNI', False):
+ kwargs['server_hostname'] = self.host
+ self.sock = context.wrap_socket(sock, **kwargs)
+ if self.ca_certs and self.check_domain:
+ try:
+ match_hostname(self.sock.getpeercert(), self.host)
+ logger.debug('Host verified: %s', self.host)
+ except CertificateError: # pragma: no cover
+ self.sock.shutdown(socket.SHUT_RDWR)
+ self.sock.close()
+ raise
+
+ class HTTPSHandler(BaseHTTPSHandler):
+ def __init__(self, ca_certs, check_domain=True):
+ BaseHTTPSHandler.__init__(self)
+ self.ca_certs = ca_certs
+ self.check_domain = check_domain
+
+ def _conn_maker(self, *args, **kwargs):
+ """
+ This is called to create a connection instance. Normally you'd
+ pass a connection class to do_open, but it doesn't actually check for
+ a class, and just expects a callable. As long as we behave just as a
+ constructor would have, we should be OK. If it ever changes so that
+ we *must* pass a class, we'll create an UnsafeHTTPSConnection class
+ which just sets check_domain to False in the class definition, and
+ choose which one to pass to do_open.
+ """
+ result = HTTPSConnection(*args, **kwargs)
if self.ca_certs:
- context.verify_mode = ssl.CERT_REQUIRED
- context.load_verify_locations(cafile=self.ca_certs)
- if getattr(ssl, 'HAS_SNI', False):
- kwargs['server_hostname'] = self.host
- self.sock = context.wrap_socket(sock, **kwargs)
- if self.ca_certs and self.check_domain:
- try:
- match_hostname(self.sock.getpeercert(), self.host)
- logger.debug('Host verified: %s', self.host)
- except CertificateError:
- self.sock.shutdown(socket.SHUT_RDWR)
- self.sock.close()
- raise
-
-class HTTPSHandler(BaseHTTPSHandler):
- def __init__(self, ca_certs, check_domain=True):
- BaseHTTPSHandler.__init__(self)
- self.ca_certs = ca_certs
- self.check_domain = check_domain
-
- def _conn_maker(self, *args, **kwargs):
- """
- This is called to create a connection instance. Normally you'd
- pass a connection class to do_open, but it doesn't actually check for
- a class, and just expects a callable. As long as we behave just as a
- constructor would have, we should be OK. If it ever changes so that
- we *must* pass a class, we'll create an UnsafeHTTPSConnection class
- which just sets check_domain to False in the class definition, and
- choose which one to pass to do_open.
- """
- result = HTTPSConnection(*args, **kwargs)
- if self.ca_certs:
- result.ca_certs = self.ca_certs
- result.check_domain = self.check_domain
- return result
-
- def https_open(self, req):
- try:
- return self.do_open(self._conn_maker, req)
- except URLError as e:
- if 'certificate verify failed' in str(e.reason):
- raise CertificateError('Unable to verify server certificate '
- 'for %s' % req.host)
- else:
- raise
+ result.ca_certs = self.ca_certs
+ result.check_domain = self.check_domain
+ return result
-#
-# To prevent against mixing HTTP traffic with HTTPS (examples: A Man-In-The-
-# Middle proxy using HTTP listens on port 443, or an index mistakenly serves
-# HTML containing a http://xyz link when it should be https://xyz),
-# you can use the following handler class, which does not allow HTTP traffic.
-#
-# It works by inheriting from HTTPHandler - so build_opener won't add a
-# handler for HTTP itself.
-#
-class HTTPSOnlyHandler(HTTPSHandler, HTTPHandler):
- def http_open(self, req):
- raise URLError('Unexpected HTTP request on what should be a secure '
- 'connection: %s' % req)
+ def https_open(self, req):
+ try:
+ return self.do_open(self._conn_maker, req)
+ except URLError as e:
+ if 'certificate verify failed' in str(e.reason):
+ raise CertificateError('Unable to verify server certificate '
+ 'for %s' % req.host)
+ else:
+ raise
+
+ #
+ # To prevent against mixing HTTP traffic with HTTPS (examples: A Man-In-The-
+ # Middle proxy using HTTP listens on port 443, or an index mistakenly serves
+ # HTML containing a http://xyz link when it should be https://xyz),
+ # you can use the following handler class, which does not allow HTTP traffic.
+ #
+ # It works by inheriting from HTTPHandler - so build_opener won't add a
+ # handler for HTTP itself.
+ #
+ class HTTPSOnlyHandler(HTTPSHandler, HTTPHandler):
+ def http_open(self, req):
+ raise URLError('Unexpected HTTP request on what should be a secure '
+ 'connection: %s' % req)
#
# XML-RPC with timeouts
@@ -1365,11 +1371,12 @@ if _ver_info == (2, 6):
self._setup(self._connection_class(host, port, **kwargs))
- class HTTPS(httplib.HTTPS):
- def __init__(self, host='', port=None, **kwargs):
- if port == 0: # 0 means use port 0, not the default port
- port = None
- self._setup(self._connection_class(host, port, **kwargs))
+ if ssl:
+ class HTTPS(httplib.HTTPS):
+ def __init__(self, host='', port=None, **kwargs):
+ if port == 0: # 0 means use port 0, not the default port
+ port = None
+ self._setup(self._connection_class(host, port, **kwargs))
class Transport(xmlrpclib.Transport):
@@ -1388,25 +1395,26 @@ class Transport(xmlrpclib.Transport):
result = self._connection[1]
return result
-class SafeTransport(xmlrpclib.SafeTransport):
- def __init__(self, timeout, use_datetime=0):
- self.timeout = timeout
- xmlrpclib.SafeTransport.__init__(self, use_datetime)
-
- def make_connection(self, host):
- h, eh, kwargs = self.get_host_info(host)
- if not kwargs:
- kwargs = {}
- kwargs['timeout'] = self.timeout
- if _ver_info == (2, 6):
- result = HTTPS(host, None, **kwargs)
- else:
- if not self._connection or host != self._connection[0]:
- self._extra_headers = eh
- self._connection = host, httplib.HTTPSConnection(h, None,
- **kwargs)
- result = self._connection[1]
- return result
+if ssl:
+ class SafeTransport(xmlrpclib.SafeTransport):
+ def __init__(self, timeout, use_datetime=0):
+ self.timeout = timeout
+ xmlrpclib.SafeTransport.__init__(self, use_datetime)
+
+ def make_connection(self, host):
+ h, eh, kwargs = self.get_host_info(host)
+ if not kwargs:
+ kwargs = {}
+ kwargs['timeout'] = self.timeout
+ if _ver_info == (2, 6):
+ result = HTTPS(host, None, **kwargs)
+ else:
+ if not self._connection or host != self._connection[0]:
+ self._extra_headers = eh
+ self._connection = host, httplib.HTTPSConnection(h, None,
+ **kwargs)
+ result = self._connection[1]
+ return result
class ServerProxy(xmlrpclib.ServerProxy):
@@ -1595,3 +1603,9 @@ class SubprocessMixin(object):
elif self.verbose:
sys.stderr.write('done.\n')
return p
+
+
+def normalize_name(name):
+ """Normalize a python package name a la PEP 503"""
+ # https://www.python.org/dev/peps/pep-0503/#normalized-names
+ return re.sub('[-_.]+', '-', name).lower()
diff --git a/pip/_vendor/distlib/version.py b/pip/_vendor/distlib/version.py
index d3dcfa006..48c17c0a6 100644
--- a/pip/_vendor/distlib/version.py
+++ b/pip/_vendor/distlib/version.py
@@ -137,7 +137,7 @@ class Matcher(object):
Check if the provided version matches the constraints.
:param version: The version to match against this instance.
- :type version: Strring or :class:`Version` instance.
+ :type version: String or :class:`Version` instance.
"""
if isinstance(version, string_types):
version = self.version_class(version)
@@ -265,7 +265,7 @@ class NormalizedVersion(Version):
TODO: fill this out
Bad:
- 1 # mininum two numbers
+ 1 # minimum two numbers
1.2a # release level must have a release serial
1.2.3b
"""
@@ -494,7 +494,7 @@ def _suggest_normalized_version(s):
rs = re.sub(r"dev$", r"dev0", rs)
# if we have something like "b-2" or "a.2" at the end of the
- # version, that is pobably beta, alpha, etc
+ # version, that is probably beta, alpha, etc
# let's remove the dash or dot
rs = re.sub(r"([abc]|rc)[\-\.](\d+)$", r"\1\2", rs)
diff --git a/pip/_vendor/distlib/w32.exe b/pip/_vendor/distlib/w32.exe
index f27573a1e..85a90a5f5 100644
--- a/pip/_vendor/distlib/w32.exe
+++ b/pip/_vendor/distlib/w32.exe
Binary files differ
diff --git a/pip/_vendor/distlib/w64.exe b/pip/_vendor/distlib/w64.exe
index fd37d9ee4..b3aea316f 100644
--- a/pip/_vendor/distlib/w64.exe
+++ b/pip/_vendor/distlib/w64.exe
Binary files differ
diff --git a/pip/_vendor/distro.py b/pip/_vendor/distro.py
index 4fd4be89f..68fef0ac2 100644
--- a/pip/_vendor/distro.py
+++ b/pip/_vendor/distro.py
@@ -31,11 +31,14 @@ more information.
import os
import re
import sys
+import json
import shlex
+import logging
import subprocess
-from pip._vendor import six
+if not sys.platform.startswith('linux'):
+ raise ImportError('Unsupported platform: {0}'.format(sys.platform))
_UNIXCONFDIR = '/etc'
_OS_RELEASE_BASENAME = 'os-release'
@@ -47,8 +50,7 @@ _OS_RELEASE_BASENAME = 'os-release'
#: with blanks translated to underscores.
#:
#: * Value: Normalized value.
-NORMALIZED_OS_ID = {
-}
+NORMALIZED_OS_ID = {}
#: Translation table for normalizing the "Distributor ID" attribute returned by
#: the lsb_release command, for use by the :func:`distro.id` method.
@@ -73,21 +75,22 @@ NORMALIZED_DISTRO_ID = {
'redhat': 'rhel', # RHEL 6.x, 7.x
}
-
# Pattern for content of distro release file (reversed)
_DISTRO_RELEASE_CONTENT_REVERSED_PATTERN = re.compile(
r'(?:[^)]*\)(.*)\()? *(?:STL )?([\d.+\-a-z]*\d) *(?:esaeler *)?(.+)')
# Pattern for base file name of distro release file
_DISTRO_RELEASE_BASENAME_PATTERN = re.compile(
- r'(\w+)[-_](release|version)')
+ r'(\w+)[-_](release|version)$')
# Base file names to be ignored when searching for distro release file
-_DISTRO_RELEASE_IGNORE_BASENAMES = [
+_DISTRO_RELEASE_IGNORE_BASENAMES = (
'debian_version',
- 'system-release',
- _OS_RELEASE_BASENAME
-]
+ 'lsb-release',
+ 'oem-release',
+ _OS_RELEASE_BASENAME,
+ 'system-release'
+)
def linux_distribution(full_distribution_name=True):
@@ -115,7 +118,7 @@ def linux_distribution(full_distribution_name=True):
method normalizes the distro ID string to a reliable machine-readable value
for a number of popular Linux distributions.
"""
- return _distroi.linux_distribution(full_distribution_name)
+ return _distro.linux_distribution(full_distribution_name)
def id():
@@ -190,7 +193,7 @@ def id():
command, with ID values that differ from what was previously determined
from the distro release file name.
"""
- return _distroi.id()
+ return _distro.id()
def name(pretty=False):
@@ -229,7 +232,7 @@ def name(pretty=False):
with the value of the pretty version ("<version_id>" and "<codename>"
fields) of the distro release file, if available.
"""
- return _distroi.name(pretty)
+ return _distro.name(pretty)
def version(pretty=False, best=False):
@@ -273,7 +276,7 @@ def version(pretty=False, best=False):
the lsb_release command, if it follows the format of the distro release
files.
"""
- return _distroi.version(pretty, best)
+ return _distro.version(pretty, best)
def version_parts(best=False):
@@ -290,7 +293,7 @@ def version_parts(best=False):
For a description of the *best* parameter, see the :func:`distro.version`
method.
"""
- return _distroi.version_parts(best)
+ return _distro.version_parts(best)
def major_version(best=False):
@@ -303,7 +306,7 @@ def major_version(best=False):
For a description of the *best* parameter, see the :func:`distro.version`
method.
"""
- return _distroi.major_version(best)
+ return _distro.major_version(best)
def minor_version(best=False):
@@ -316,7 +319,7 @@ def minor_version(best=False):
For a description of the *best* parameter, see the :func:`distro.version`
method.
"""
- return _distroi.minor_version(best)
+ return _distro.minor_version(best)
def build_number(best=False):
@@ -329,7 +332,7 @@ def build_number(best=False):
For a description of the *best* parameter, see the :func:`distro.version`
method.
"""
- return _distroi.build_number(best)
+ return _distro.build_number(best)
def like():
@@ -346,7 +349,7 @@ def like():
`os-release man page
<http://www.freedesktop.org/software/systemd/man/os-release.html>`_.
"""
- return _distroi.like()
+ return _distro.like()
def codename():
@@ -370,7 +373,7 @@ def codename():
* the value of the "<codename>" field of the distro release file.
"""
- return _distroi.codename()
+ return _distro.codename()
def info(pretty=False, best=False):
@@ -414,7 +417,7 @@ def info(pretty=False, best=False):
For a description of the *pretty* and *best* parameters, see the
:func:`distro.version` method.
"""
- return _distroi.info(pretty, best)
+ return _distro.info(pretty, best)
def os_release_info():
@@ -424,7 +427,7 @@ def os_release_info():
See `os-release file`_ for details about these information items.
"""
- return _distroi.os_release_info()
+ return _distro.os_release_info()
def lsb_release_info():
@@ -435,7 +438,7 @@ def lsb_release_info():
See `lsb_release command output`_ for details about these information
items.
"""
- return _distroi.lsb_release_info()
+ return _distro.lsb_release_info()
def distro_release_info():
@@ -445,7 +448,7 @@ def distro_release_info():
See `distro release file`_ for details about these information items.
"""
- return _distroi.distro_release_info()
+ return _distro.distro_release_info()
def os_release_attr(attribute):
@@ -464,7 +467,7 @@ def os_release_attr(attribute):
See `os-release file`_ for details about these information items.
"""
- return _distroi.os_release_attr(attribute)
+ return _distro.os_release_attr(attribute)
def lsb_release_attr(attribute):
@@ -484,7 +487,7 @@ def lsb_release_attr(attribute):
See `lsb_release command output`_ for details about these information
items.
"""
- return _distroi.lsb_release_attr(attribute)
+ return _distro.lsb_release_attr(attribute)
def distro_release_attr(attribute):
@@ -503,7 +506,7 @@ def distro_release_attr(attribute):
See `distro release file`_ for details about these information items.
"""
- return _distroi.distro_release_attr(attribute)
+ return _distro.distro_release_attr(attribute)
class LinuxDistribution(object):
@@ -625,20 +628,22 @@ class LinuxDistribution(object):
For details, see :func:`distro.id`.
"""
+
+ def normalize(distro_id, table):
+ distro_id = distro_id.lower().replace(' ', '_')
+ return table.get(distro_id, distro_id)
+
distro_id = self.os_release_attr('id')
if distro_id:
- distro_id = distro_id.lower().replace(' ', '_')
- return NORMALIZED_OS_ID.get(distro_id, distro_id)
+ return normalize(distro_id, NORMALIZED_OS_ID)
distro_id = self.lsb_release_attr('distributor_id')
if distro_id:
- distro_id = distro_id.lower().replace(' ', '_')
- return NORMALIZED_LSB_ID.get(distro_id, distro_id)
+ return normalize(distro_id, NORMALIZED_LSB_ID)
distro_id = self.distro_release_attr('id')
if distro_id:
- distro_id = distro_id.lower().replace(' ', '_')
- return NORMALIZED_DISTRO_ID.get(distro_id, distro_id)
+ return normalize(distro_id, NORMALIZED_DISTRO_ID)
return ''
@@ -703,12 +708,12 @@ class LinuxDistribution(object):
"""
version_str = self.version(best=best)
if version_str:
- g = re.compile(r'(\d+)\.?(\d+)?\.?(\d+)?')
- m = g.match(version_str)
- if m:
- major, minor, build_number = m.groups()
- return (major, minor or '', build_number or '')
- return ('', '', '')
+ version_regex = re.compile(r'(\d+)\.?(\d+)?\.?(\d+)?')
+ matches = version_regex.match(version_str)
+ if matches:
+ major, minor, build_number = matches.groups()
+ return major, minor or '', build_number or ''
+ return '', '', ''
def major_version(self, best=False):
"""
@@ -836,8 +841,8 @@ class LinuxDistribution(object):
A dictionary containing all information items.
"""
if os.path.isfile(self.os_release_file):
- with open(self.os_release_file, 'r') as f:
- return self._parse_os_release_content(f)
+ with open(self.os_release_file) as release_file:
+ return self._parse_os_release_content(release_file)
return {}
@staticmethod
@@ -865,7 +870,7 @@ class LinuxDistribution(object):
# string. This causes a UnicodeDecodeError to be raised when the
# parsed content is a unicode object. The following fix resolves that
# (... but it should be fixed in shlex...):
- if sys.version_info[0] == 2 and isinstance(lexer.wordchars, str):
+ if sys.version_info[0] == 2 and isinstance(lexer.wordchars, bytes):
lexer.wordchars = lexer.wordchars.decode('iso-8859-1')
tokens = list(lexer)
@@ -878,7 +883,7 @@ class LinuxDistribution(object):
# * commands or their arguments (not allowed in os-release)
if '=' in token:
k, v = token.split('=', 1)
- if isinstance(v, six.binary_type):
+ if isinstance(v, bytes):
v = v.decode('utf-8')
props[k.lower()] = v
if k == 'VERSION':
@@ -908,23 +913,26 @@ class LinuxDistribution(object):
A dictionary containing all information items.
"""
cmd = 'lsb_release -a'
- p = subprocess.Popen(
+ process = subprocess.Popen(
cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
- out, err = p.communicate()
- rc = p.returncode
- if rc == 0:
- content = out.decode('ascii').splitlines()
+ stdout, stderr = process.communicate()
+ stdout, stderr = stdout.decode('ascii'), stderr.decode('ascii')
+ code = process.returncode
+ if code == 0:
+ content = stdout.splitlines()
return self._parse_lsb_release_content(content)
- elif rc == 127: # Command not found
+ elif code == 127: # Command not found
return {}
else:
- if sys.version_info[0:2] >= (2, 7):
- raise subprocess.CalledProcessError(rc, cmd, err)
- else:
- raise subprocess.CalledProcessError(rc, cmd)
+ if sys.version_info[:2] >= (3, 5):
+ raise subprocess.CalledProcessError(code, cmd, stdout, stderr)
+ elif sys.version_info[:2] >= (2, 7):
+ raise subprocess.CalledProcessError(code, cmd, stdout)
+ elif sys.version_info[:2] == (2, 6):
+ raise subprocess.CalledProcessError(code, cmd)
@staticmethod
def _parse_lsb_release_content(lines):
@@ -942,7 +950,7 @@ class LinuxDistribution(object):
"""
props = {}
for line in lines:
- if isinstance(line, six.binary_type):
+ if isinstance(line, bytes):
line = line.decode('utf-8')
kv = line.strip('\n').split(':', 1)
if len(kv) != 2:
@@ -1005,7 +1013,7 @@ class LinuxDistribution(object):
A dictionary containing all information items.
"""
if os.path.isfile(filepath):
- with open(filepath, 'r') as fp:
+ with open(filepath) as fp:
# Only parse the first line. For instance, on SLES there
# are multiple lines. We don't want them...
return self._parse_distro_release_content(fp.readline())
@@ -1023,18 +1031,52 @@ class LinuxDistribution(object):
Returns:
A dictionary containing all information items.
"""
- if isinstance(line, six.binary_type):
+ if isinstance(line, bytes):
line = line.decode('utf-8')
- m = _DISTRO_RELEASE_CONTENT_REVERSED_PATTERN.match(
+ matches = _DISTRO_RELEASE_CONTENT_REVERSED_PATTERN.match(
line.strip()[::-1])
distro_info = {}
- if m:
- distro_info['name'] = m.group(3)[::-1] # regexp ensures non-None
- if m.group(2):
- distro_info['version_id'] = m.group(2)[::-1]
- if m.group(1):
- distro_info['codename'] = m.group(1)[::-1]
+ if matches:
+ # regexp ensures non-None
+ distro_info['name'] = matches.group(3)[::-1]
+ if matches.group(2):
+ distro_info['version_id'] = matches.group(2)[::-1]
+ if matches.group(1):
+ distro_info['codename'] = matches.group(1)[::-1]
+ elif line:
+ distro_info['name'] = line.strip()
return distro_info
-_distroi = LinuxDistribution()
+_distro = LinuxDistribution()
+
+
+def main():
+ import argparse
+
+ logger = logging.getLogger(__name__)
+ logger.setLevel(logging.DEBUG)
+ logger.addHandler(logging.StreamHandler(sys.stdout))
+
+ parser = argparse.ArgumentParser(description="Linux distro info tool")
+ parser.add_argument(
+ '--json',
+ '-j',
+ help="Output in machine readable format",
+ action="store_true")
+ args = parser.parse_args()
+
+ if args.json:
+ logger.info(json.dumps(info()))
+ else:
+ logger.info('Name: {0}'.format(name(pretty=True)))
+ distribution_version = version(pretty=True)
+ if distribution_version:
+ logger.info('Version: {0}'.format(distribution_version))
+ distribution_codename = codename()
+ if distribution_codename:
+ logger.info('Codename: {0}'.format(distribution_codename))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/pip/_vendor/html5lib/__init__.py b/pip/_vendor/html5lib/__init__.py
index 9484fdc9b..7427eb126 100644
--- a/pip/_vendor/html5lib/__init__.py
+++ b/pip/_vendor/html5lib/__init__.py
@@ -22,4 +22,4 @@ __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
"getTreeWalker", "serialize"]
# this has to be at the top level, see how setup.py parses this
-__version__ = "1.0b8"
+__version__ = "1.0b10"
diff --git a/pip/_vendor/html5lib/ihatexml.py b/pip/_vendor/html5lib/_ihatexml.py
index 0fc79308e..d6d1d6fb7 100644
--- a/pip/_vendor/html5lib/ihatexml.py
+++ b/pip/_vendor/html5lib/_ihatexml.py
@@ -175,9 +175,9 @@ def escapeRegexp(string):
return string
# output from the above
-nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
+nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa
-nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
+nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa
# Simpler things
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]")
@@ -186,7 +186,7 @@ nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]")
class InfosetFilter(object):
replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
- def __init__(self, replaceChars=None,
+ def __init__(self,
dropXmlnsLocalName=False,
dropXmlnsAttrNs=False,
preventDoubleDashComments=False,
@@ -217,7 +217,7 @@ class InfosetFilter(object):
else:
return self.toXmlName(name)
- def coerceElement(self, name, namespace=None):
+ def coerceElement(self, name):
return self.toXmlName(name)
def coerceComment(self, data):
@@ -225,11 +225,14 @@ class InfosetFilter(object):
while "--" in data:
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
data = data.replace("--", "- -")
+ if data.endswith("-"):
+ warnings.warn("Comments cannot end in a dash", DataLossWarning)
+ data += " "
return data
def coerceCharacters(self, data):
if self.replaceFormFeedCharacters:
- for i in range(data.count("\x0C")):
+ for _ in range(data.count("\x0C")):
warnings.warn("Text cannot contain U+000C", DataLossWarning)
data = data.replace("\x0C", " ")
# Other non-xml characters
diff --git a/pip/_vendor/html5lib/inputstream.py b/pip/_vendor/html5lib/_inputstream.py
index ac0c5e53a..7c5639ff8 100644
--- a/pip/_vendor/html5lib/inputstream.py
+++ b/pip/_vendor/html5lib/_inputstream.py
@@ -1,13 +1,16 @@
from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
-from pip._vendor.six.moves import http_client
+
+from pip._vendor.six import text_type, binary_type
+from pip._vendor.six.moves import http_client, urllib
import codecs
import re
+from pip._vendor import webencodings
+
from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
-from .constants import encodings, ReparseException
-from . import utils
+from .constants import ReparseException
+from . import _utils
from io import StringIO
@@ -16,12 +19,6 @@ try:
except ImportError:
BytesIO = StringIO
-try:
- from io import BufferedIOBase
-except ImportError:
- class BufferedIOBase(object):
- pass
-
# Non-unicode versions of constants for use in the pre-parser
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
@@ -29,15 +26,17 @@ asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase
spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
-invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]"
+invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa
-if utils.supports_lone_surrogates:
+if _utils.supports_lone_surrogates:
# Use one extra step of indirection and create surrogates with
- # unichr. Not using this indirection would introduce an illegal
+ # eval. Not using this indirection would introduce an illegal
# unicode literal on platforms not supporting such lone
# surrogates.
- invalid_unicode_re = re.compile(invalid_unicode_no_surrogate +
- eval('"\\uD800-\\uDFFF"'))
+ assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
+ invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
+ eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used
+ "]")
else:
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
@@ -129,10 +128,13 @@ class BufferedStream(object):
return b"".join(rv)
-def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
- if isinstance(source, http_client.HTTPResponse):
- # Work around Python bug #20007: read(0) closes the connection.
- # http://bugs.python.org/issue20007
+def HTMLInputStream(source, **kwargs):
+ # Work around Python bug #20007: read(0) closes the connection.
+ # http://bugs.python.org/issue20007
+ if (isinstance(source, http_client.HTTPResponse) or
+ # Also check for addinfourl wrapping HTTPResponse
+ (isinstance(source, urllib.response.addbase) and
+ isinstance(source.fp, http_client.HTTPResponse))):
isUnicode = False
elif hasattr(source, "read"):
isUnicode = isinstance(source.read(0), text_type)
@@ -140,12 +142,13 @@ def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
isUnicode = isinstance(source, text_type)
if isUnicode:
- if encoding is not None:
- raise TypeError("Cannot explicitly set an encoding with a unicode string")
+ encodings = [x for x in kwargs if x.endswith("_encoding")]
+ if encodings:
+ raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings)
- return HTMLUnicodeInputStream(source)
+ return HTMLUnicodeInputStream(source, **kwargs)
else:
- return HTMLBinaryInputStream(source, encoding, parseMeta, chardet)
+ return HTMLBinaryInputStream(source, **kwargs)
class HTMLUnicodeInputStream(object):
@@ -171,27 +174,21 @@ class HTMLUnicodeInputStream(object):
regardless of any BOM or later declaration (such as in a meta
element)
- parseMeta - Look for a <meta> element containing encoding information
-
"""
- if not utils.supports_lone_surrogates:
+ if not _utils.supports_lone_surrogates:
# Such platforms will have already checked for such
# surrogate errors, so no need to do this checking.
self.reportCharacterErrors = None
- self.replaceCharactersRegexp = None
elif len("\U0010FFFF") == 1:
self.reportCharacterErrors = self.characterErrorsUCS4
- self.replaceCharactersRegexp = re.compile(eval('"[\\uD800-\\uDFFF]"'))
else:
self.reportCharacterErrors = self.characterErrorsUCS2
- self.replaceCharactersRegexp = re.compile(
- eval('"([\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?<![\\uD800-\\uDBFF])[\\uDC00-\\uDFFF])"'))
# List of where new lines occur
self.newLines = [0]
- self.charEncoding = ("utf-8", "certain")
+ self.charEncoding = (lookupEncoding("utf-8"), "certain")
self.dataStream = self.openStream(source)
self.reset()
@@ -284,10 +281,7 @@ class HTMLUnicodeInputStream(object):
if self.reportCharacterErrors:
self.reportCharacterErrors(data)
- # Replace invalid characters
- # Note U+0000 is dealt with in the tokenizer
- data = self.replaceCharactersRegexp.sub("\ufffd", data)
-
+ # Replace invalid characters
data = data.replace("\r\n", "\n")
data = data.replace("\r", "\n")
@@ -297,7 +291,7 @@ class HTMLUnicodeInputStream(object):
return True
def characterErrorsUCS4(self, data):
- for i in range(len(invalid_unicode_re.findall(data))):
+ for _ in range(len(invalid_unicode_re.findall(data))):
self.errors.append("invalid-codepoint")
def characterErrorsUCS2(self, data):
@@ -310,9 +304,9 @@ class HTMLUnicodeInputStream(object):
codepoint = ord(match.group())
pos = match.start()
# Pretty sure there should be endianness issues here
- if utils.isSurrogatePair(data[pos:pos + 2]):
+ if _utils.isSurrogatePair(data[pos:pos + 2]):
# We have a surrogate pair!
- char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
+ char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
if char_val in non_bmp_invalid_codepoints:
self.errors.append("invalid-codepoint")
skip = True
@@ -395,7 +389,9 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
"""
- def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
+ def __init__(self, source, override_encoding=None, transport_encoding=None,
+ same_origin_parent_encoding=None, likely_encoding=None,
+ default_encoding="windows-1252", useChardet=True):
"""Initialises the HTMLInputStream.
HTMLInputStream(source, [encoding]) -> Normalized stream from source
@@ -408,8 +404,6 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
regardless of any BOM or later declaration (such as in a meta
element)
- parseMeta - Look for a <meta> element containing encoding information
-
"""
# Raw Stream - for unicode objects this will encode to utf-8 and set
# self.charEncoding as appropriate
@@ -417,27 +411,28 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
HTMLUnicodeInputStream.__init__(self, self.rawStream)
- self.charEncoding = (codecName(encoding), "certain")
-
# Encoding Information
# Number of bytes to use when looking for a meta element with
# encoding information
- self.numBytesMeta = 512
+ self.numBytesMeta = 1024
# Number of bytes to use when using detecting encoding using chardet
self.numBytesChardet = 100
- # Encoding to use if no other information can be found
- self.defaultEncoding = "windows-1252"
+ # Things from args
+ self.override_encoding = override_encoding
+ self.transport_encoding = transport_encoding
+ self.same_origin_parent_encoding = same_origin_parent_encoding
+ self.likely_encoding = likely_encoding
+ self.default_encoding = default_encoding
- # Detect encoding iff no explicit "transport level" encoding is supplied
- if (self.charEncoding[0] is None):
- self.charEncoding = self.detectEncoding(parseMeta, chardet)
+ # Determine encoding
+ self.charEncoding = self.determineEncoding(useChardet)
+ assert self.charEncoding[0] is not None
# Call superclass
self.reset()
def reset(self):
- self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream,
- 'replace')
+ self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
HTMLUnicodeInputStream.reset(self)
def openStream(self, source):
@@ -454,29 +449,50 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
try:
stream.seek(stream.tell())
- except:
+ except: # pylint:disable=bare-except
stream = BufferedStream(stream)
return stream
- def detectEncoding(self, parseMeta=True, chardet=True):
- # First look for a BOM
+ def determineEncoding(self, chardet=True):
+ # BOMs take precedence over everything
# This will also read past the BOM if present
- encoding = self.detectBOM()
- confidence = "certain"
- # If there is no BOM need to look for meta elements with encoding
- # information
- if encoding is None and parseMeta:
- encoding = self.detectEncodingMeta()
- confidence = "tentative"
- # Guess with chardet, if avaliable
- if encoding is None and chardet:
- confidence = "tentative"
+ charEncoding = self.detectBOM(), "certain"
+ if charEncoding[0] is not None:
+ return charEncoding
+
+ # If we've been overriden, we've been overriden
+ charEncoding = lookupEncoding(self.override_encoding), "certain"
+ if charEncoding[0] is not None:
+ return charEncoding
+
+ # Now check the transport layer
+ charEncoding = lookupEncoding(self.transport_encoding), "certain"
+ if charEncoding[0] is not None:
+ return charEncoding
+
+ # Look for meta elements with encoding information
+ charEncoding = self.detectEncodingMeta(), "tentative"
+ if charEncoding[0] is not None:
+ return charEncoding
+
+ # Parent document encoding
+ charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative"
+ if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"):
+ return charEncoding
+
+ # "likely" encoding
+ charEncoding = lookupEncoding(self.likely_encoding), "tentative"
+ if charEncoding[0] is not None:
+ return charEncoding
+
+ # Guess with chardet, if available
+ if chardet:
try:
- try:
- from charade.universaldetector import UniversalDetector
- except ImportError:
- from chardet.universaldetector import UniversalDetector
+ from chardet.universaldetector import UniversalDetector
+ except ImportError:
+ pass
+ else:
buffers = []
detector = UniversalDetector()
while not detector.done:
@@ -487,36 +503,33 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
buffers.append(buffer)
detector.feed(buffer)
detector.close()
- encoding = detector.result['encoding']
+ encoding = lookupEncoding(detector.result['encoding'])
self.rawStream.seek(0)
- except ImportError:
- pass
- # If all else fails use the default encoding
- if encoding is None:
- confidence = "tentative"
- encoding = self.defaultEncoding
+ if encoding is not None:
+ return encoding, "tentative"
- # Substitute for equivalent encodings:
- encodingSub = {"iso-8859-1": "windows-1252"}
+ # Try the default encoding
+ charEncoding = lookupEncoding(self.default_encoding), "tentative"
+ if charEncoding[0] is not None:
+ return charEncoding
- if encoding.lower() in encodingSub:
- encoding = encodingSub[encoding.lower()]
-
- return encoding, confidence
+ # Fallback to html5lib's default if even that hasn't worked
+ return lookupEncoding("windows-1252"), "tentative"
def changeEncoding(self, newEncoding):
assert self.charEncoding[1] != "certain"
- newEncoding = codecName(newEncoding)
- if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"):
- newEncoding = "utf-8"
+ newEncoding = lookupEncoding(newEncoding)
if newEncoding is None:
return
+ if newEncoding.name in ("utf-16be", "utf-16le"):
+ newEncoding = lookupEncoding("utf-8")
+ assert newEncoding is not None
elif newEncoding == self.charEncoding[0]:
self.charEncoding = (self.charEncoding[0], "certain")
else:
self.rawStream.seek(0)
- self.reset()
self.charEncoding = (newEncoding, "certain")
+ self.reset()
raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
def detectBOM(self):
@@ -525,8 +538,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
encoding otherwise return None"""
bomDict = {
codecs.BOM_UTF8: 'utf-8',
- codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
- codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
+ codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
+ codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
}
# Go to beginning of file and read in 4 bytes
@@ -546,9 +559,12 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
# Set the read position past the BOM if one was found, otherwise
# set it to the start of the stream
- self.rawStream.seek(encoding and seek or 0)
-
- return encoding
+ if encoding:
+ self.rawStream.seek(seek)
+ return lookupEncoding(encoding)
+ else:
+ self.rawStream.seek(0)
+ return None
def detectEncodingMeta(self):
"""Report the encoding declared by the meta element
@@ -559,8 +575,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
self.rawStream.seek(0)
encoding = parser.getEncoding()
- if encoding in ("utf-16", "utf-16-be", "utf-16-le"):
- encoding = "utf-8"
+ if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
+ encoding = lookupEncoding("utf-8")
return encoding
@@ -574,6 +590,7 @@ class EncodingBytes(bytes):
return bytes.__new__(self, value.lower())
def __init__(self, value):
+ # pylint:disable=unused-argument
self._position = -1
def __iter__(self):
@@ -684,7 +701,7 @@ class EncodingParser(object):
(b"<!", self.handleOther),
(b"<?", self.handleOther),
(b"<", self.handlePossibleStartTag))
- for byte in self.data:
+ for _ in self.data:
keepParsing = True
for key, method in methodDispatch:
if self.data.matchBytes(key):
@@ -723,7 +740,7 @@ class EncodingParser(object):
return False
elif attr[0] == b"charset":
tentativeEncoding = attr[1]
- codec = codecName(tentativeEncoding)
+ codec = lookupEncoding(tentativeEncoding)
if codec is not None:
self.encoding = codec
return False
@@ -731,7 +748,7 @@ class EncodingParser(object):
contentParser = ContentAttrParser(EncodingBytes(attr[1]))
tentativeEncoding = contentParser.parse()
if tentativeEncoding is not None:
- codec = codecName(tentativeEncoding)
+ codec = lookupEncoding(tentativeEncoding)
if codec is not None:
if hasPragma:
self.encoding = codec
@@ -888,16 +905,19 @@ class ContentAttrParser(object):
return None
-def codecName(encoding):
+def lookupEncoding(encoding):
"""Return the python codec name corresponding to an encoding or None if the
string doesn't correspond to a valid encoding."""
- if isinstance(encoding, bytes):
+ if isinstance(encoding, binary_type):
try:
encoding = encoding.decode("ascii")
except UnicodeDecodeError:
return None
- if encoding:
- canonicalName = ascii_punctuation_re.sub("", encoding).lower()
- return encodings.get(canonicalName, None)
+
+ if encoding is not None:
+ try:
+ return webencodings.lookup(encoding)
+ except AttributeError:
+ return None
else:
return None
diff --git a/pip/_vendor/html5lib/tokenizer.py b/pip/_vendor/html5lib/_tokenizer.py
index 797745787..178f6e7fa 100644
--- a/pip/_vendor/html5lib/tokenizer.py
+++ b/pip/_vendor/html5lib/_tokenizer.py
@@ -1,9 +1,6 @@
from __future__ import absolute_import, division, unicode_literals
-try:
- chr = unichr # flake8: noqa
-except NameError:
- pass
+from pip._vendor.six import unichr as chr
from collections import deque
@@ -14,9 +11,9 @@ from .constants import digits, hexDigits, EOF
from .constants import tokenTypes, tagTokenTypes
from .constants import replacementCharacters
-from .inputstream import HTMLInputStream
+from ._inputstream import HTMLInputStream
-from .trie import Trie
+from ._trie import Trie
entitiesTrie = Trie(entities)
@@ -34,16 +31,11 @@ class HTMLTokenizer(object):
Points to HTMLInputStream object.
"""
- def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
- lowercaseElementName=True, lowercaseAttrName=True, parser=None):
+ def __init__(self, stream, parser=None, **kwargs):
- self.stream = HTMLInputStream(stream, encoding, parseMeta, useChardet)
+ self.stream = HTMLInputStream(stream, **kwargs)
self.parser = parser
- # Perform case conversions?
- self.lowercaseElementName = lowercaseElementName
- self.lowercaseAttrName = lowercaseAttrName
-
# Setup the initial tokenizer state
self.escapeFlag = False
self.lastFourChars = []
@@ -147,8 +139,8 @@ class HTMLTokenizer(object):
output = "&"
charStack = [self.stream.char()]
- if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&")
- or (allowedChar is not None and allowedChar == charStack[0])):
+ if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or
+ (allowedChar is not None and allowedChar == charStack[0])):
self.stream.unget(charStack[0])
elif charStack[0] == "#":
@@ -235,8 +227,7 @@ class HTMLTokenizer(object):
token = self.currentToken
# Add token to the queue to be yielded
if (token["type"] in tagTokenTypes):
- if self.lowercaseElementName:
- token["name"] = token["name"].translate(asciiUpper2Lower)
+ token["name"] = token["name"].translate(asciiUpper2Lower)
if token["type"] == tokenTypes["EndTag"]:
if token["data"]:
self.tokenQueue.append({"type": tokenTypes["ParseError"],
@@ -921,10 +912,9 @@ class HTMLTokenizer(object):
# Attributes are not dropped at this stage. That happens when the
# start tag token is emitted so values can still be safely appended
# to attributes, but we do want to report the parse error in time.
- if self.lowercaseAttrName:
- self.currentToken["data"][-1][0] = (
- self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
- for name, value in self.currentToken["data"][:-1]:
+ self.currentToken["data"][-1][0] = (
+ self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
+ for name, _ in self.currentToken["data"][:-1]:
if self.currentToken["data"][-1][0] == name:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"duplicate-attribute"})
@@ -1716,11 +1706,11 @@ class HTMLTokenizer(object):
else:
data.append(char)
- data = "".join(data)
+ data = "".join(data) # pylint:disable=redefined-variable-type
# Deal with null here rather than in the parser
nullCount = data.count("\u0000")
if nullCount > 0:
- for i in range(nullCount):
+ for _ in range(nullCount):
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
data = data.replace("\u0000", "\uFFFD")
diff --git a/pip/_vendor/html5lib/trie/__init__.py b/pip/_vendor/html5lib/_trie/__init__.py
index a8cca8a9a..a5ba4bf12 100644
--- a/pip/_vendor/html5lib/trie/__init__.py
+++ b/pip/_vendor/html5lib/_trie/__init__.py
@@ -4,9 +4,11 @@ from .py import Trie as PyTrie
Trie = PyTrie
+# pylint:disable=wrong-import-position
try:
from .datrie import Trie as DATrie
except ImportError:
pass
else:
Trie = DATrie
+# pylint:enable=wrong-import-position
diff --git a/pip/_vendor/html5lib/trie/_base.py b/pip/_vendor/html5lib/_trie/_base.py
index 724486b16..25eece46e 100644
--- a/pip/_vendor/html5lib/trie/_base.py
+++ b/pip/_vendor/html5lib/_trie/_base.py
@@ -7,7 +7,8 @@ class Trie(Mapping):
"""Abstract base class for tries"""
def keys(self, prefix=None):
- keys = super().keys()
+ # pylint:disable=arguments-differ
+ keys = super(Trie, self).keys()
if prefix is None:
return set(keys)
diff --git a/pip/_vendor/html5lib/trie/datrie.py b/pip/_vendor/html5lib/_trie/datrie.py
index e2e5f8662..e2e5f8662 100644
--- a/pip/_vendor/html5lib/trie/datrie.py
+++ b/pip/_vendor/html5lib/_trie/datrie.py
diff --git a/pip/_vendor/html5lib/trie/py.py b/pip/_vendor/html5lib/_trie/py.py
index c178b219d..c178b219d 100644
--- a/pip/_vendor/html5lib/trie/py.py
+++ b/pip/_vendor/html5lib/_trie/py.py
diff --git a/pip/_vendor/html5lib/utils.py b/pip/_vendor/html5lib/_utils.py
index 85f1459d0..55d674753 100644
--- a/pip/_vendor/html5lib/utils.py
+++ b/pip/_vendor/html5lib/_utils.py
@@ -1,5 +1,6 @@
from __future__ import absolute_import, division, unicode_literals
+import sys
from types import ModuleType
from pip._vendor.six import text_type
@@ -12,9 +13,11 @@ except ImportError:
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
"surrogatePairToCodepoint", "moduleFactoryFactory",
- "supports_lone_surrogates"]
+ "supports_lone_surrogates", "PY27"]
+PY27 = sys.version_info[0] == 2 and sys.version_info[1] >= 7
+
# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
# caught by the below test. In general this would be any platform
# using UTF-16 as its encoding of unicode strings, such as
@@ -22,12 +25,12 @@ __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
# surrogates, and there is no mechanism to further escape such
# escapes.
try:
- _x = eval('"\\uD800"')
+ _x = eval('"\\uD800"') # pylint:disable=eval-used
if not isinstance(_x, text_type):
# We need this with u"" because of http://bugs.jython.org/issue2039
- _x = eval('u"\\uD800"')
+ _x = eval('u"\\uD800"') # pylint:disable=eval-used
assert isinstance(_x, text_type)
-except:
+except: # pylint:disable=bare-except
supports_lone_surrogates = False
else:
supports_lone_surrogates = True
@@ -52,19 +55,20 @@ class MethodDispatcher(dict):
# anything here.
_dictEntries = []
for name, value in items:
- if type(name) in (list, tuple, frozenset, set):
+ if isinstance(name, (list, tuple, frozenset, set)):
for item in name:
_dictEntries.append((item, value))
else:
_dictEntries.append((name, value))
dict.__init__(self, _dictEntries)
+ assert len(self) == len(_dictEntries)
self.default = None
def __getitem__(self, key):
return dict.get(self, key, self.default)
-# Some utility functions to dal with weirdness around UCS2 vs UCS4
+# Some utility functions to deal with weirdness around UCS2 vs UCS4
# python builds
def isSurrogatePair(data):
@@ -91,13 +95,33 @@ def moduleFactoryFactory(factory):
else:
name = b"_%s_factory" % baseModule.__name__
- if name in moduleCache:
- return moduleCache[name]
- else:
+ kwargs_tuple = tuple(kwargs.items())
+
+ try:
+ return moduleCache[name][args][kwargs_tuple]
+ except KeyError:
mod = ModuleType(name)
objs = factory(baseModule, *args, **kwargs)
mod.__dict__.update(objs)
- moduleCache[name] = mod
+ if "name" not in moduleCache:
+ moduleCache[name] = {}
+ if "args" not in moduleCache[name]:
+ moduleCache[name][args] = {}
+ if "kwargs" not in moduleCache[name][args]:
+ moduleCache[name][args][kwargs_tuple] = {}
+ moduleCache[name][args][kwargs_tuple] = mod
return mod
return moduleFactory
+
+
+def memoize(func):
+ cache = {}
+
+ def wrapped(*args, **kwargs):
+ key = (tuple(args), tuple(kwargs.items()))
+ if key not in cache:
+ cache[key] = func(*args, **kwargs)
+ return cache[key]
+
+ return wrapped
diff --git a/pip/_vendor/html5lib/constants.py b/pip/_vendor/html5lib/constants.py
index d938e0ae6..9e7541d38 100644
--- a/pip/_vendor/html5lib/constants.py
+++ b/pip/_vendor/html5lib/constants.py
@@ -283,6 +283,12 @@ E = {
"Element %(name)s not allowed in a non-html context",
"unexpected-end-tag-before-html":
"Unexpected end tag (%(name)s) before html.",
+ "unexpected-inhead-noscript-tag":
+ "Element %(name)s not allowed in a inhead-noscript context",
+ "eof-in-head-noscript":
+ "Unexpected end of file. Expected inhead-noscript content",
+ "char-in-head-noscript":
+ "Unexpected non-space character. Expected inhead-noscript content",
"XXX-undefined-error":
"Undefined error (this sucks and should be fixed)",
}
@@ -431,6 +437,73 @@ mathmlTextIntegrationPointElements = frozenset([
(namespaces["mathml"], "mtext")
])
+adjustSVGAttributes = {
+ "attributename": "attributeName",
+ "attributetype": "attributeType",
+ "basefrequency": "baseFrequency",
+ "baseprofile": "baseProfile",
+ "calcmode": "calcMode",
+ "clippathunits": "clipPathUnits",
+ "contentscripttype": "contentScriptType",
+ "contentstyletype": "contentStyleType",
+ "diffuseconstant": "diffuseConstant",
+ "edgemode": "edgeMode",
+ "externalresourcesrequired": "externalResourcesRequired",
+ "filterres": "filterRes",
+ "filterunits": "filterUnits",
+ "glyphref": "glyphRef",
+ "gradienttransform": "gradientTransform",
+ "gradientunits": "gradientUnits",
+ "kernelmatrix": "kernelMatrix",
+ "kernelunitlength": "kernelUnitLength",
+ "keypoints": "keyPoints",
+ "keysplines": "keySplines",
+ "keytimes": "keyTimes",
+ "lengthadjust": "lengthAdjust",
+ "limitingconeangle": "limitingConeAngle",
+ "markerheight": "markerHeight",
+ "markerunits": "markerUnits",
+ "markerwidth": "markerWidth",
+ "maskcontentunits": "maskContentUnits",
+ "maskunits": "maskUnits",
+ "numoctaves": "numOctaves",
+ "pathlength": "pathLength",
+ "patterncontentunits": "patternContentUnits",
+ "patterntransform": "patternTransform",
+ "patternunits": "patternUnits",
+ "pointsatx": "pointsAtX",
+ "pointsaty": "pointsAtY",
+ "pointsatz": "pointsAtZ",
+ "preservealpha": "preserveAlpha",
+ "preserveaspectratio": "preserveAspectRatio",
+ "primitiveunits": "primitiveUnits",
+ "refx": "refX",
+ "refy": "refY",
+ "repeatcount": "repeatCount",
+ "repeatdur": "repeatDur",
+ "requiredextensions": "requiredExtensions",
+ "requiredfeatures": "requiredFeatures",
+ "specularconstant": "specularConstant",
+ "specularexponent": "specularExponent",
+ "spreadmethod": "spreadMethod",
+ "startoffset": "startOffset",
+ "stddeviation": "stdDeviation",
+ "stitchtiles": "stitchTiles",
+ "surfacescale": "surfaceScale",
+ "systemlanguage": "systemLanguage",
+ "tablevalues": "tableValues",
+ "targetx": "targetX",
+ "targety": "targetY",
+ "textlength": "textLength",
+ "viewbox": "viewBox",
+ "viewtarget": "viewTarget",
+ "xchannelselector": "xChannelSelector",
+ "ychannelselector": "yChannelSelector",
+ "zoomandpan": "zoomAndPan"
+}
+
+adjustMathMLAttributes = {"definitionurl": "definitionURL"}
+
adjustForeignAttributes = {
"xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
"xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]),
@@ -2813,7 +2886,6 @@ replacementCharacters = {
0x0d: "\u000D",
0x80: "\u20AC",
0x81: "\u0081",
- 0x81: "\u0081",
0x82: "\u201A",
0x83: "\u0192",
0x84: "\u201E",
@@ -2846,235 +2918,6 @@ replacementCharacters = {
0x9F: "\u0178",
}
-encodings = {
- '437': 'cp437',
- '850': 'cp850',
- '852': 'cp852',
- '855': 'cp855',
- '857': 'cp857',
- '860': 'cp860',
- '861': 'cp861',
- '862': 'cp862',
- '863': 'cp863',
- '865': 'cp865',
- '866': 'cp866',
- '869': 'cp869',
- 'ansix341968': 'ascii',
- 'ansix341986': 'ascii',
- 'arabic': 'iso8859-6',
- 'ascii': 'ascii',
- 'asmo708': 'iso8859-6',
- 'big5': 'big5',
- 'big5hkscs': 'big5hkscs',
- 'chinese': 'gbk',
- 'cp037': 'cp037',
- 'cp1026': 'cp1026',
- 'cp154': 'ptcp154',
- 'cp367': 'ascii',
- 'cp424': 'cp424',
- 'cp437': 'cp437',
- 'cp500': 'cp500',
- 'cp775': 'cp775',
- 'cp819': 'windows-1252',
- 'cp850': 'cp850',
- 'cp852': 'cp852',
- 'cp855': 'cp855',
- 'cp857': 'cp857',
- 'cp860': 'cp860',
- 'cp861': 'cp861',
- 'cp862': 'cp862',
- 'cp863': 'cp863',
- 'cp864': 'cp864',
- 'cp865': 'cp865',
- 'cp866': 'cp866',
- 'cp869': 'cp869',
- 'cp936': 'gbk',
- 'cpgr': 'cp869',
- 'cpis': 'cp861',
- 'csascii': 'ascii',
- 'csbig5': 'big5',
- 'cseuckr': 'cp949',
- 'cseucpkdfmtjapanese': 'euc_jp',
- 'csgb2312': 'gbk',
- 'cshproman8': 'hp-roman8',
- 'csibm037': 'cp037',
- 'csibm1026': 'cp1026',
- 'csibm424': 'cp424',
- 'csibm500': 'cp500',
- 'csibm855': 'cp855',
- 'csibm857': 'cp857',
- 'csibm860': 'cp860',
- 'csibm861': 'cp861',
- 'csibm863': 'cp863',
- 'csibm864': 'cp864',
- 'csibm865': 'cp865',
- 'csibm866': 'cp866',
- 'csibm869': 'cp869',
- 'csiso2022jp': 'iso2022_jp',
- 'csiso2022jp2': 'iso2022_jp_2',
- 'csiso2022kr': 'iso2022_kr',
- 'csiso58gb231280': 'gbk',
- 'csisolatin1': 'windows-1252',
- 'csisolatin2': 'iso8859-2',
- 'csisolatin3': 'iso8859-3',
- 'csisolatin4': 'iso8859-4',
- 'csisolatin5': 'windows-1254',
- 'csisolatin6': 'iso8859-10',
- 'csisolatinarabic': 'iso8859-6',
- 'csisolatincyrillic': 'iso8859-5',
- 'csisolatingreek': 'iso8859-7',
- 'csisolatinhebrew': 'iso8859-8',
- 'cskoi8r': 'koi8-r',
- 'csksc56011987': 'cp949',
- 'cspc775baltic': 'cp775',
- 'cspc850multilingual': 'cp850',
- 'cspc862latinhebrew': 'cp862',
- 'cspc8codepage437': 'cp437',
- 'cspcp852': 'cp852',
- 'csptcp154': 'ptcp154',
- 'csshiftjis': 'shift_jis',
- 'csunicode11utf7': 'utf-7',
- 'cyrillic': 'iso8859-5',
- 'cyrillicasian': 'ptcp154',
- 'ebcdiccpbe': 'cp500',
- 'ebcdiccpca': 'cp037',
- 'ebcdiccpch': 'cp500',
- 'ebcdiccphe': 'cp424',
- 'ebcdiccpnl': 'cp037',
- 'ebcdiccpus': 'cp037',
- 'ebcdiccpwt': 'cp037',
- 'ecma114': 'iso8859-6',
- 'ecma118': 'iso8859-7',
- 'elot928': 'iso8859-7',
- 'eucjp': 'euc_jp',
- 'euckr': 'cp949',
- 'extendedunixcodepackedformatforjapanese': 'euc_jp',
- 'gb18030': 'gb18030',
- 'gb2312': 'gbk',
- 'gb231280': 'gbk',
- 'gbk': 'gbk',
- 'greek': 'iso8859-7',
- 'greek8': 'iso8859-7',
- 'hebrew': 'iso8859-8',
- 'hproman8': 'hp-roman8',
- 'hzgb2312': 'hz',
- 'ibm037': 'cp037',
- 'ibm1026': 'cp1026',
- 'ibm367': 'ascii',
- 'ibm424': 'cp424',
- 'ibm437': 'cp437',
- 'ibm500': 'cp500',
- 'ibm775': 'cp775',
- 'ibm819': 'windows-1252',
- 'ibm850': 'cp850',
- 'ibm852': 'cp852',
- 'ibm855': 'cp855',
- 'ibm857': 'cp857',
- 'ibm860': 'cp860',
- 'ibm861': 'cp861',
- 'ibm862': 'cp862',
- 'ibm863': 'cp863',
- 'ibm864': 'cp864',
- 'ibm865': 'cp865',
- 'ibm866': 'cp866',
- 'ibm869': 'cp869',
- 'iso2022jp': 'iso2022_jp',
- 'iso2022jp2': 'iso2022_jp_2',
- 'iso2022kr': 'iso2022_kr',
- 'iso646irv1991': 'ascii',
- 'iso646us': 'ascii',
- 'iso88591': 'windows-1252',
- 'iso885910': 'iso8859-10',
- 'iso8859101992': 'iso8859-10',
- 'iso885911987': 'windows-1252',
- 'iso885913': 'iso8859-13',
- 'iso885914': 'iso8859-14',
- 'iso8859141998': 'iso8859-14',
- 'iso885915': 'iso8859-15',
- 'iso885916': 'iso8859-16',
- 'iso8859162001': 'iso8859-16',
- 'iso88592': 'iso8859-2',
- 'iso885921987': 'iso8859-2',
- 'iso88593': 'iso8859-3',
- 'iso885931988': 'iso8859-3',
- 'iso88594': 'iso8859-4',
- 'iso885941988': 'iso8859-4',
- 'iso88595': 'iso8859-5',
- 'iso885951988': 'iso8859-5',
- 'iso88596': 'iso8859-6',
- 'iso885961987': 'iso8859-6',
- 'iso88597': 'iso8859-7',
- 'iso885971987': 'iso8859-7',
- 'iso88598': 'iso8859-8',
- 'iso885981988': 'iso8859-8',
- 'iso88599': 'windows-1254',
- 'iso885991989': 'windows-1254',
- 'isoceltic': 'iso8859-14',
- 'isoir100': 'windows-1252',
- 'isoir101': 'iso8859-2',
- 'isoir109': 'iso8859-3',
- 'isoir110': 'iso8859-4',
- 'isoir126': 'iso8859-7',
- 'isoir127': 'iso8859-6',
- 'isoir138': 'iso8859-8',
- 'isoir144': 'iso8859-5',
- 'isoir148': 'windows-1254',
- 'isoir149': 'cp949',
- 'isoir157': 'iso8859-10',
- 'isoir199': 'iso8859-14',
- 'isoir226': 'iso8859-16',
- 'isoir58': 'gbk',
- 'isoir6': 'ascii',
- 'koi8r': 'koi8-r',
- 'koi8u': 'koi8-u',
- 'korean': 'cp949',
- 'ksc5601': 'cp949',
- 'ksc56011987': 'cp949',
- 'ksc56011989': 'cp949',
- 'l1': 'windows-1252',
- 'l10': 'iso8859-16',
- 'l2': 'iso8859-2',
- 'l3': 'iso8859-3',
- 'l4': 'iso8859-4',
- 'l5': 'windows-1254',
- 'l6': 'iso8859-10',
- 'l8': 'iso8859-14',
- 'latin1': 'windows-1252',
- 'latin10': 'iso8859-16',
- 'latin2': 'iso8859-2',
- 'latin3': 'iso8859-3',
- 'latin4': 'iso8859-4',
- 'latin5': 'windows-1254',
- 'latin6': 'iso8859-10',
- 'latin8': 'iso8859-14',
- 'latin9': 'iso8859-15',
- 'ms936': 'gbk',
- 'mskanji': 'shift_jis',
- 'pt154': 'ptcp154',
- 'ptcp154': 'ptcp154',
- 'r8': 'hp-roman8',
- 'roman8': 'hp-roman8',
- 'shiftjis': 'shift_jis',
- 'tis620': 'cp874',
- 'unicode11utf7': 'utf-7',
- 'us': 'ascii',
- 'usascii': 'ascii',
- 'utf16': 'utf-16',
- 'utf16be': 'utf-16-be',
- 'utf16le': 'utf-16-le',
- 'utf8': 'utf-8',
- 'windows1250': 'cp1250',
- 'windows1251': 'cp1251',
- 'windows1252': 'cp1252',
- 'windows1253': 'cp1253',
- 'windows1254': 'cp1254',
- 'windows1255': 'cp1255',
- 'windows1256': 'cp1256',
- 'windows1257': 'cp1257',
- 'windows1258': 'cp1258',
- 'windows936': 'gbk',
- 'x-x-big5': 'big5'}
-
tokenTypes = {
"Doctype": 0,
"Characters": 1,
diff --git a/pip/_vendor/html5lib/filters/alphabeticalattributes.py b/pip/_vendor/html5lib/filters/alphabeticalattributes.py
index fed6996c1..4795baecc 100644
--- a/pip/_vendor/html5lib/filters/alphabeticalattributes.py
+++ b/pip/_vendor/html5lib/filters/alphabeticalattributes.py
@@ -1,6 +1,6 @@
from __future__ import absolute_import, division, unicode_literals
-from . import _base
+from . import base
try:
from collections import OrderedDict
@@ -8,9 +8,9 @@ except ImportError:
from ordereddict import OrderedDict
-class Filter(_base.Filter):
+class Filter(base.Filter):
def __iter__(self):
- for token in _base.Filter.__iter__(self):
+ for token in base.Filter.__iter__(self):
if token["type"] in ("StartTag", "EmptyTag"):
attrs = OrderedDict()
for name, value in sorted(token["data"].items(),
diff --git a/pip/_vendor/html5lib/filters/_base.py b/pip/_vendor/html5lib/filters/base.py
index c7dbaed0f..c7dbaed0f 100644
--- a/pip/_vendor/html5lib/filters/_base.py
+++ b/pip/_vendor/html5lib/filters/base.py
diff --git a/pip/_vendor/html5lib/filters/inject_meta_charset.py b/pip/_vendor/html5lib/filters/inject_meta_charset.py
index ca33b70b5..2059ec861 100644
--- a/pip/_vendor/html5lib/filters/inject_meta_charset.py
+++ b/pip/_vendor/html5lib/filters/inject_meta_charset.py
@@ -1,11 +1,11 @@
from __future__ import absolute_import, division, unicode_literals
-from . import _base
+from . import base
-class Filter(_base.Filter):
+class Filter(base.Filter):
def __init__(self, source, encoding):
- _base.Filter.__init__(self, source)
+ base.Filter.__init__(self, source)
self.encoding = encoding
def __iter__(self):
@@ -13,7 +13,7 @@ class Filter(_base.Filter):
meta_found = (self.encoding is None)
pending = []
- for token in _base.Filter.__iter__(self):
+ for token in base.Filter.__iter__(self):
type = token["type"]
if type == "StartTag":
if token["name"].lower() == "head":
diff --git a/pip/_vendor/html5lib/filters/lint.py b/pip/_vendor/html5lib/filters/lint.py
index 8884696dc..3b892c860 100644
--- a/pip/_vendor/html5lib/filters/lint.py
+++ b/pip/_vendor/html5lib/filters/lint.py
@@ -1,90 +1,81 @@
from __future__ import absolute_import, division, unicode_literals
-from . import _base
-from ..constants import cdataElements, rcdataElements, voidElements
+from pip._vendor.six import text_type
+
+from . import base
+from ..constants import namespaces, voidElements
from ..constants import spaceCharacters
spaceCharacters = "".join(spaceCharacters)
-class LintError(Exception):
- pass
-
+class Filter(base.Filter):
+ def __init__(self, source, require_matching_tags=True):
+ super(Filter, self).__init__(source)
+ self.require_matching_tags = require_matching_tags
-class Filter(_base.Filter):
def __iter__(self):
open_elements = []
- contentModelFlag = "PCDATA"
- for token in _base.Filter.__iter__(self):
+ for token in base.Filter.__iter__(self):
type = token["type"]
if type in ("StartTag", "EmptyTag"):
+ namespace = token["namespace"]
name = token["name"]
- if contentModelFlag != "PCDATA":
- raise LintError("StartTag not in PCDATA content model flag: %(tag)s" % {"tag": name})
- if not isinstance(name, str):
- raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
- if not name:
- raise LintError("Empty tag name")
- if type == "StartTag" and name in voidElements:
- raise LintError("Void element reported as StartTag token: %(tag)s" % {"tag": name})
- elif type == "EmptyTag" and name not in voidElements:
- raise LintError("Non-void element reported as EmptyTag token: %(tag)s" % {"tag": token["name"]})
- if type == "StartTag":
- open_elements.append(name)
- for name, value in token["data"]:
- if not isinstance(name, str):
- raise LintError("Attribute name is not a string: %(name)r" % {"name": name})
- if not name:
- raise LintError("Empty attribute name")
- if not isinstance(value, str):
- raise LintError("Attribute value is not a string: %(value)r" % {"value": value})
- if name in cdataElements:
- contentModelFlag = "CDATA"
- elif name in rcdataElements:
- contentModelFlag = "RCDATA"
- elif name == "plaintext":
- contentModelFlag = "PLAINTEXT"
+ assert namespace is None or isinstance(namespace, text_type)
+ assert namespace != ""
+ assert isinstance(name, text_type)
+ assert name != ""
+ assert isinstance(token["data"], dict)
+ if (not namespace or namespace == namespaces["html"]) and name in voidElements:
+ assert type == "EmptyTag"
+ else:
+ assert type == "StartTag"
+ if type == "StartTag" and self.require_matching_tags:
+ open_elements.append((namespace, name))
+ for (namespace, name), value in token["data"].items():
+ assert namespace is None or isinstance(namespace, text_type)
+ assert namespace != ""
+ assert isinstance(name, text_type)
+ assert name != ""
+ assert isinstance(value, text_type)
elif type == "EndTag":
+ namespace = token["namespace"]
name = token["name"]
- if not isinstance(name, str):
- raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
- if not name:
- raise LintError("Empty tag name")
- if name in voidElements:
- raise LintError("Void element reported as EndTag token: %(tag)s" % {"tag": name})
- start_name = open_elements.pop()
- if start_name != name:
- raise LintError("EndTag (%(end)s) does not match StartTag (%(start)s)" % {"end": name, "start": start_name})
- contentModelFlag = "PCDATA"
+ assert namespace is None or isinstance(namespace, text_type)
+ assert namespace != ""
+ assert isinstance(name, text_type)
+ assert name != ""
+ if (not namespace or namespace == namespaces["html"]) and name in voidElements:
+ assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
+ elif self.require_matching_tags:
+ start = open_elements.pop()
+ assert start == (namespace, name)
elif type == "Comment":
- if contentModelFlag != "PCDATA":
- raise LintError("Comment not in PCDATA content model flag")
+ data = token["data"]
+ assert isinstance(data, text_type)
elif type in ("Characters", "SpaceCharacters"):
data = token["data"]
- if not isinstance(data, str):
- raise LintError("Attribute name is not a string: %(name)r" % {"name": data})
- if not data:
- raise LintError("%(type)s token with empty data" % {"type": type})
+ assert isinstance(data, text_type)
+ assert data != ""
if type == "SpaceCharacters":
- data = data.strip(spaceCharacters)
- if data:
- raise LintError("Non-space character(s) found in SpaceCharacters token: %(token)r" % {"token": data})
+ assert data.strip(spaceCharacters) == ""
elif type == "Doctype":
name = token["name"]
- if contentModelFlag != "PCDATA":
- raise LintError("Doctype not in PCDATA content model flag: %(name)s" % {"name": name})
- if not isinstance(name, str):
- raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
- # XXX: what to do with token["data"] ?
+ assert name is None or isinstance(name, text_type)
+ assert token["publicId"] is None or isinstance(name, text_type)
+ assert token["systemId"] is None or isinstance(name, text_type)
+
+ elif type == "Entity":
+ assert isinstance(token["name"], text_type)
- elif type in ("ParseError", "SerializeError"):
- pass
+ elif type == "SerializerError":
+ assert isinstance(token["data"], text_type)
else:
- raise LintError("Unknown token type: %(type)s" % {"type": type})
+ assert False, "Unknown token type: %(type)s" % {"type": type}
yield token
diff --git a/pip/_vendor/html5lib/filters/optionaltags.py b/pip/_vendor/html5lib/filters/optionaltags.py
index fefe0b309..f6edb7341 100644
--- a/pip/_vendor/html5lib/filters/optionaltags.py
+++ b/pip/_vendor/html5lib/filters/optionaltags.py
@@ -1,9 +1,9 @@
from __future__ import absolute_import, division, unicode_literals
-from . import _base
+from . import base
-class Filter(_base.Filter):
+class Filter(base.Filter):
def slider(self):
previous1 = previous2 = None
for token in self.source:
@@ -11,7 +11,8 @@ class Filter(_base.Filter):
yield previous2, previous1, token
previous2 = previous1
previous1 = token
- yield previous2, previous1, None
+ if previous1 is not None:
+ yield previous2, previous1, None
def __iter__(self):
for previous, token, next in self.slider():
@@ -58,7 +59,7 @@ class Filter(_base.Filter):
elif tagname == 'colgroup':
# A colgroup element's start tag may be omitted if the first thing
# inside the colgroup element is a col element, and if the element
- # is not immediately preceeded by another colgroup element whose
+ # is not immediately preceded by another colgroup element whose
# end tag has been omitted.
if type in ("StartTag", "EmptyTag"):
# XXX: we do not look at the preceding event, so instead we never
@@ -70,7 +71,7 @@ class Filter(_base.Filter):
elif tagname == 'tbody':
# A tbody element's start tag may be omitted if the first thing
# inside the tbody element is a tr element, and if the element is
- # not immediately preceeded by a tbody, thead, or tfoot element
+ # not immediately preceded by a tbody, thead, or tfoot element
# whose end tag has been omitted.
if type == "StartTag":
# omit the thead and tfoot elements' end tag when they are
diff --git a/pip/_vendor/html5lib/filters/sanitizer.py b/pip/_vendor/html5lib/filters/sanitizer.py
index b206b54e7..026748d2e 100644
--- a/pip/_vendor/html5lib/filters/sanitizer.py
+++ b/pip/_vendor/html5lib/filters/sanitizer.py
@@ -1,12 +1,865 @@
from __future__ import absolute_import, division, unicode_literals
-from . import _base
-from ..sanitizer import HTMLSanitizerMixin
+import re
+from xml.sax.saxutils import escape, unescape
+from pip._vendor.six.moves import urllib_parse as urlparse
+
+from . import base
+from ..constants import namespaces, prefixes
+
+__all__ = ["Filter"]
+
+
+allowed_elements = frozenset((
+ (namespaces['html'], 'a'),
+ (namespaces['html'], 'abbr'),
+ (namespaces['html'], 'acronym'),
+ (namespaces['html'], 'address'),
+ (namespaces['html'], 'area'),
+ (namespaces['html'], 'article'),
+ (namespaces['html'], 'aside'),
+ (namespaces['html'], 'audio'),
+ (namespaces['html'], 'b'),
+ (namespaces['html'], 'big'),
+ (namespaces['html'], 'blockquote'),
+ (namespaces['html'], 'br'),
+ (namespaces['html'], 'button'),
+ (namespaces['html'], 'canvas'),
+ (namespaces['html'], 'caption'),
+ (namespaces['html'], 'center'),
+ (namespaces['html'], 'cite'),
+ (namespaces['html'], 'code'),
+ (namespaces['html'], 'col'),
+ (namespaces['html'], 'colgroup'),
+ (namespaces['html'], 'command'),
+ (namespaces['html'], 'datagrid'),
+ (namespaces['html'], 'datalist'),
+ (namespaces['html'], 'dd'),
+ (namespaces['html'], 'del'),
+ (namespaces['html'], 'details'),
+ (namespaces['html'], 'dfn'),
+ (namespaces['html'], 'dialog'),
+ (namespaces['html'], 'dir'),
+ (namespaces['html'], 'div'),
+ (namespaces['html'], 'dl'),
+ (namespaces['html'], 'dt'),
+ (namespaces['html'], 'em'),
+ (namespaces['html'], 'event-source'),
+ (namespaces['html'], 'fieldset'),
+ (namespaces['html'], 'figcaption'),
+ (namespaces['html'], 'figure'),
+ (namespaces['html'], 'footer'),
+ (namespaces['html'], 'font'),
+ (namespaces['html'], 'form'),
+ (namespaces['html'], 'header'),
+ (namespaces['html'], 'h1'),
+ (namespaces['html'], 'h2'),
+ (namespaces['html'], 'h3'),
+ (namespaces['html'], 'h4'),
+ (namespaces['html'], 'h5'),
+ (namespaces['html'], 'h6'),
+ (namespaces['html'], 'hr'),
+ (namespaces['html'], 'i'),
+ (namespaces['html'], 'img'),
+ (namespaces['html'], 'input'),
+ (namespaces['html'], 'ins'),
+ (namespaces['html'], 'keygen'),
+ (namespaces['html'], 'kbd'),
+ (namespaces['html'], 'label'),
+ (namespaces['html'], 'legend'),
+ (namespaces['html'], 'li'),
+ (namespaces['html'], 'm'),
+ (namespaces['html'], 'map'),
+ (namespaces['html'], 'menu'),
+ (namespaces['html'], 'meter'),
+ (namespaces['html'], 'multicol'),
+ (namespaces['html'], 'nav'),
+ (namespaces['html'], 'nextid'),
+ (namespaces['html'], 'ol'),
+ (namespaces['html'], 'output'),
+ (namespaces['html'], 'optgroup'),
+ (namespaces['html'], 'option'),
+ (namespaces['html'], 'p'),
+ (namespaces['html'], 'pre'),
+ (namespaces['html'], 'progress'),
+ (namespaces['html'], 'q'),
+ (namespaces['html'], 's'),
+ (namespaces['html'], 'samp'),
+ (namespaces['html'], 'section'),
+ (namespaces['html'], 'select'),
+ (namespaces['html'], 'small'),
+ (namespaces['html'], 'sound'),
+ (namespaces['html'], 'source'),
+ (namespaces['html'], 'spacer'),
+ (namespaces['html'], 'span'),
+ (namespaces['html'], 'strike'),
+ (namespaces['html'], 'strong'),
+ (namespaces['html'], 'sub'),
+ (namespaces['html'], 'sup'),
+ (namespaces['html'], 'table'),
+ (namespaces['html'], 'tbody'),
+ (namespaces['html'], 'td'),
+ (namespaces['html'], 'textarea'),
+ (namespaces['html'], 'time'),
+ (namespaces['html'], 'tfoot'),
+ (namespaces['html'], 'th'),
+ (namespaces['html'], 'thead'),
+ (namespaces['html'], 'tr'),
+ (namespaces['html'], 'tt'),
+ (namespaces['html'], 'u'),
+ (namespaces['html'], 'ul'),
+ (namespaces['html'], 'var'),
+ (namespaces['html'], 'video'),
+ (namespaces['mathml'], 'maction'),
+ (namespaces['mathml'], 'math'),
+ (namespaces['mathml'], 'merror'),
+ (namespaces['mathml'], 'mfrac'),
+ (namespaces['mathml'], 'mi'),
+ (namespaces['mathml'], 'mmultiscripts'),
+ (namespaces['mathml'], 'mn'),
+ (namespaces['mathml'], 'mo'),
+ (namespaces['mathml'], 'mover'),
+ (namespaces['mathml'], 'mpadded'),
+ (namespaces['mathml'], 'mphantom'),
+ (namespaces['mathml'], 'mprescripts'),
+ (namespaces['mathml'], 'mroot'),
+ (namespaces['mathml'], 'mrow'),
+ (namespaces['mathml'], 'mspace'),
+ (namespaces['mathml'], 'msqrt'),
+ (namespaces['mathml'], 'mstyle'),
+ (namespaces['mathml'], 'msub'),
+ (namespaces['mathml'], 'msubsup'),
+ (namespaces['mathml'], 'msup'),
+ (namespaces['mathml'], 'mtable'),
+ (namespaces['mathml'], 'mtd'),
+ (namespaces['mathml'], 'mtext'),
+ (namespaces['mathml'], 'mtr'),
+ (namespaces['mathml'], 'munder'),
+ (namespaces['mathml'], 'munderover'),
+ (namespaces['mathml'], 'none'),
+ (namespaces['svg'], 'a'),
+ (namespaces['svg'], 'animate'),
+ (namespaces['svg'], 'animateColor'),
+ (namespaces['svg'], 'animateMotion'),
+ (namespaces['svg'], 'animateTransform'),
+ (namespaces['svg'], 'clipPath'),
+ (namespaces['svg'], 'circle'),
+ (namespaces['svg'], 'defs'),
+ (namespaces['svg'], 'desc'),
+ (namespaces['svg'], 'ellipse'),
+ (namespaces['svg'], 'font-face'),
+ (namespaces['svg'], 'font-face-name'),
+ (namespaces['svg'], 'font-face-src'),
+ (namespaces['svg'], 'g'),
+ (namespaces['svg'], 'glyph'),
+ (namespaces['svg'], 'hkern'),
+ (namespaces['svg'], 'linearGradient'),
+ (namespaces['svg'], 'line'),
+ (namespaces['svg'], 'marker'),
+ (namespaces['svg'], 'metadata'),
+ (namespaces['svg'], 'missing-glyph'),
+ (namespaces['svg'], 'mpath'),
+ (namespaces['svg'], 'path'),
+ (namespaces['svg'], 'polygon'),
+ (namespaces['svg'], 'polyline'),
+ (namespaces['svg'], 'radialGradient'),
+ (namespaces['svg'], 'rect'),
+ (namespaces['svg'], 'set'),
+ (namespaces['svg'], 'stop'),
+ (namespaces['svg'], 'svg'),
+ (namespaces['svg'], 'switch'),
+ (namespaces['svg'], 'text'),
+ (namespaces['svg'], 'title'),
+ (namespaces['svg'], 'tspan'),
+ (namespaces['svg'], 'use'),
+))
+
+allowed_attributes = frozenset((
+ # HTML attributes
+ (None, 'abbr'),
+ (None, 'accept'),
+ (None, 'accept-charset'),
+ (None, 'accesskey'),
+ (None, 'action'),
+ (None, 'align'),
+ (None, 'alt'),
+ (None, 'autocomplete'),
+ (None, 'autofocus'),
+ (None, 'axis'),
+ (None, 'background'),
+ (None, 'balance'),
+ (None, 'bgcolor'),
+ (None, 'bgproperties'),
+ (None, 'border'),
+ (None, 'bordercolor'),
+ (None, 'bordercolordark'),
+ (None, 'bordercolorlight'),
+ (None, 'bottompadding'),
+ (None, 'cellpadding'),
+ (None, 'cellspacing'),
+ (None, 'ch'),
+ (None, 'challenge'),
+ (None, 'char'),
+ (None, 'charoff'),
+ (None, 'choff'),
+ (None, 'charset'),
+ (None, 'checked'),
+ (None, 'cite'),
+ (None, 'class'),
+ (None, 'clear'),
+ (None, 'color'),
+ (None, 'cols'),
+ (None, 'colspan'),
+ (None, 'compact'),
+ (None, 'contenteditable'),
+ (None, 'controls'),
+ (None, 'coords'),
+ (None, 'data'),
+ (None, 'datafld'),
+ (None, 'datapagesize'),
+ (None, 'datasrc'),
+ (None, 'datetime'),
+ (None, 'default'),
+ (None, 'delay'),
+ (None, 'dir'),
+ (None, 'disabled'),
+ (None, 'draggable'),
+ (None, 'dynsrc'),
+ (None, 'enctype'),
+ (None, 'end'),
+ (None, 'face'),
+ (None, 'for'),
+ (None, 'form'),
+ (None, 'frame'),
+ (None, 'galleryimg'),
+ (None, 'gutter'),
+ (None, 'headers'),
+ (None, 'height'),
+ (None, 'hidefocus'),
+ (None, 'hidden'),
+ (None, 'high'),
+ (None, 'href'),
+ (None, 'hreflang'),
+ (None, 'hspace'),
+ (None, 'icon'),
+ (None, 'id'),
+ (None, 'inputmode'),
+ (None, 'ismap'),
+ (None, 'keytype'),
+ (None, 'label'),
+ (None, 'leftspacing'),
+ (None, 'lang'),
+ (None, 'list'),
+ (None, 'longdesc'),
+ (None, 'loop'),
+ (None, 'loopcount'),
+ (None, 'loopend'),
+ (None, 'loopstart'),
+ (None, 'low'),
+ (None, 'lowsrc'),
+ (None, 'max'),
+ (None, 'maxlength'),
+ (None, 'media'),
+ (None, 'method'),
+ (None, 'min'),
+ (None, 'multiple'),
+ (None, 'name'),
+ (None, 'nohref'),
+ (None, 'noshade'),
+ (None, 'nowrap'),
+ (None, 'open'),
+ (None, 'optimum'),
+ (None, 'pattern'),
+ (None, 'ping'),
+ (None, 'point-size'),
+ (None, 'poster'),
+ (None, 'pqg'),
+ (None, 'preload'),
+ (None, 'prompt'),
+ (None, 'radiogroup'),
+ (None, 'readonly'),
+ (None, 'rel'),
+ (None, 'repeat-max'),
+ (None, 'repeat-min'),
+ (None, 'replace'),
+ (None, 'required'),
+ (None, 'rev'),
+ (None, 'rightspacing'),
+ (None, 'rows'),
+ (None, 'rowspan'),
+ (None, 'rules'),
+ (None, 'scope'),
+ (None, 'selected'),
+ (None, 'shape'),
+ (None, 'size'),
+ (None, 'span'),
+ (None, 'src'),
+ (None, 'start'),
+ (None, 'step'),
+ (None, 'style'),
+ (None, 'summary'),
+ (None, 'suppress'),
+ (None, 'tabindex'),
+ (None, 'target'),
+ (None, 'template'),
+ (None, 'title'),
+ (None, 'toppadding'),
+ (None, 'type'),
+ (None, 'unselectable'),
+ (None, 'usemap'),
+ (None, 'urn'),
+ (None, 'valign'),
+ (None, 'value'),
+ (None, 'variable'),
+ (None, 'volume'),
+ (None, 'vspace'),
+ (None, 'vrml'),
+ (None, 'width'),
+ (None, 'wrap'),
+ (namespaces['xml'], 'lang'),
+ # MathML attributes
+ (None, 'actiontype'),
+ (None, 'align'),
+ (None, 'columnalign'),
+ (None, 'columnalign'),
+ (None, 'columnalign'),
+ (None, 'columnlines'),
+ (None, 'columnspacing'),
+ (None, 'columnspan'),
+ (None, 'depth'),
+ (None, 'display'),
+ (None, 'displaystyle'),
+ (None, 'equalcolumns'),
+ (None, 'equalrows'),
+ (None, 'fence'),
+ (None, 'fontstyle'),
+ (None, 'fontweight'),
+ (None, 'frame'),
+ (None, 'height'),
+ (None, 'linethickness'),
+ (None, 'lspace'),
+ (None, 'mathbackground'),
+ (None, 'mathcolor'),
+ (None, 'mathvariant'),
+ (None, 'mathvariant'),
+ (None, 'maxsize'),
+ (None, 'minsize'),
+ (None, 'other'),
+ (None, 'rowalign'),
+ (None, 'rowalign'),
+ (None, 'rowalign'),
+ (None, 'rowlines'),
+ (None, 'rowspacing'),
+ (None, 'rowspan'),
+ (None, 'rspace'),
+ (None, 'scriptlevel'),
+ (None, 'selection'),
+ (None, 'separator'),
+ (None, 'stretchy'),
+ (None, 'width'),
+ (None, 'width'),
+ (namespaces['xlink'], 'href'),
+ (namespaces['xlink'], 'show'),
+ (namespaces['xlink'], 'type'),
+ # SVG attributes
+ (None, 'accent-height'),
+ (None, 'accumulate'),
+ (None, 'additive'),
+ (None, 'alphabetic'),
+ (None, 'arabic-form'),
+ (None, 'ascent'),
+ (None, 'attributeName'),
+ (None, 'attributeType'),
+ (None, 'baseProfile'),
+ (None, 'bbox'),
+ (None, 'begin'),
+ (None, 'by'),
+ (None, 'calcMode'),
+ (None, 'cap-height'),
+ (None, 'class'),
+ (None, 'clip-path'),
+ (None, 'color'),
+ (None, 'color-rendering'),
+ (None, 'content'),
+ (None, 'cx'),
+ (None, 'cy'),
+ (None, 'd'),
+ (None, 'dx'),
+ (None, 'dy'),
+ (None, 'descent'),
+ (None, 'display'),
+ (None, 'dur'),
+ (None, 'end'),
+ (None, 'fill'),
+ (None, 'fill-opacity'),
+ (None, 'fill-rule'),
+ (None, 'font-family'),
+ (None, 'font-size'),
+ (None, 'font-stretch'),
+ (None, 'font-style'),
+ (None, 'font-variant'),
+ (None, 'font-weight'),
+ (None, 'from'),
+ (None, 'fx'),
+ (None, 'fy'),
+ (None, 'g1'),
+ (None, 'g2'),
+ (None, 'glyph-name'),
+ (None, 'gradientUnits'),
+ (None, 'hanging'),
+ (None, 'height'),
+ (None, 'horiz-adv-x'),
+ (None, 'horiz-origin-x'),
+ (None, 'id'),
+ (None, 'ideographic'),
+ (None, 'k'),
+ (None, 'keyPoints'),
+ (None, 'keySplines'),
+ (None, 'keyTimes'),
+ (None, 'lang'),
+ (None, 'marker-end'),
+ (None, 'marker-mid'),
+ (None, 'marker-start'),
+ (None, 'markerHeight'),
+ (None, 'markerUnits'),
+ (None, 'markerWidth'),
+ (None, 'mathematical'),
+ (None, 'max'),
+ (None, 'min'),
+ (None, 'name'),
+ (None, 'offset'),
+ (None, 'opacity'),
+ (None, 'orient'),
+ (None, 'origin'),
+ (None, 'overline-position'),
+ (None, 'overline-thickness'),
+ (None, 'panose-1'),
+ (None, 'path'),
+ (None, 'pathLength'),
+ (None, 'points'),
+ (None, 'preserveAspectRatio'),
+ (None, 'r'),
+ (None, 'refX'),
+ (None, 'refY'),
+ (None, 'repeatCount'),
+ (None, 'repeatDur'),
+ (None, 'requiredExtensions'),
+ (None, 'requiredFeatures'),
+ (None, 'restart'),
+ (None, 'rotate'),
+ (None, 'rx'),
+ (None, 'ry'),
+ (None, 'slope'),
+ (None, 'stemh'),
+ (None, 'stemv'),
+ (None, 'stop-color'),
+ (None, 'stop-opacity'),
+ (None, 'strikethrough-position'),
+ (None, 'strikethrough-thickness'),
+ (None, 'stroke'),
+ (None, 'stroke-dasharray'),
+ (None, 'stroke-dashoffset'),
+ (None, 'stroke-linecap'),
+ (None, 'stroke-linejoin'),
+ (None, 'stroke-miterlimit'),
+ (None, 'stroke-opacity'),
+ (None, 'stroke-width'),
+ (None, 'systemLanguage'),
+ (None, 'target'),
+ (None, 'text-anchor'),
+ (None, 'to'),
+ (None, 'transform'),
+ (None, 'type'),
+ (None, 'u1'),
+ (None, 'u2'),
+ (None, 'underline-position'),
+ (None, 'underline-thickness'),
+ (None, 'unicode'),
+ (None, 'unicode-range'),
+ (None, 'units-per-em'),
+ (None, 'values'),
+ (None, 'version'),
+ (None, 'viewBox'),
+ (None, 'visibility'),
+ (None, 'width'),
+ (None, 'widths'),
+ (None, 'x'),
+ (None, 'x-height'),
+ (None, 'x1'),
+ (None, 'x2'),
+ (namespaces['xlink'], 'actuate'),
+ (namespaces['xlink'], 'arcrole'),
+ (namespaces['xlink'], 'href'),
+ (namespaces['xlink'], 'role'),
+ (namespaces['xlink'], 'show'),
+ (namespaces['xlink'], 'title'),
+ (namespaces['xlink'], 'type'),
+ (namespaces['xml'], 'base'),
+ (namespaces['xml'], 'lang'),
+ (namespaces['xml'], 'space'),
+ (None, 'y'),
+ (None, 'y1'),
+ (None, 'y2'),
+ (None, 'zoomAndPan'),
+))
+
+attr_val_is_uri = frozenset((
+ (None, 'href'),
+ (None, 'src'),
+ (None, 'cite'),
+ (None, 'action'),
+ (None, 'longdesc'),
+ (None, 'poster'),
+ (None, 'background'),
+ (None, 'datasrc'),
+ (None, 'dynsrc'),
+ (None, 'lowsrc'),
+ (None, 'ping'),
+ (namespaces['xlink'], 'href'),
+ (namespaces['xml'], 'base'),
+))
+
+svg_attr_val_allows_ref = frozenset((
+ (None, 'clip-path'),
+ (None, 'color-profile'),
+ (None, 'cursor'),
+ (None, 'fill'),
+ (None, 'filter'),
+ (None, 'marker'),
+ (None, 'marker-start'),
+ (None, 'marker-mid'),
+ (None, 'marker-end'),
+ (None, 'mask'),
+ (None, 'stroke'),
+))
+
+svg_allow_local_href = frozenset((
+ (None, 'altGlyph'),
+ (None, 'animate'),
+ (None, 'animateColor'),
+ (None, 'animateMotion'),
+ (None, 'animateTransform'),
+ (None, 'cursor'),
+ (None, 'feImage'),
+ (None, 'filter'),
+ (None, 'linearGradient'),
+ (None, 'pattern'),
+ (None, 'radialGradient'),
+ (None, 'textpath'),
+ (None, 'tref'),
+ (None, 'set'),
+ (None, 'use')
+))
+
+allowed_css_properties = frozenset((
+ 'azimuth',
+ 'background-color',
+ 'border-bottom-color',
+ 'border-collapse',
+ 'border-color',
+ 'border-left-color',
+ 'border-right-color',
+ 'border-top-color',
+ 'clear',
+ 'color',
+ 'cursor',
+ 'direction',
+ 'display',
+ 'elevation',
+ 'float',
+ 'font',
+ 'font-family',
+ 'font-size',
+ 'font-style',
+ 'font-variant',
+ 'font-weight',
+ 'height',
+ 'letter-spacing',
+ 'line-height',
+ 'overflow',
+ 'pause',
+ 'pause-after',
+ 'pause-before',
+ 'pitch',
+ 'pitch-range',
+ 'richness',
+ 'speak',
+ 'speak-header',
+ 'speak-numeral',
+ 'speak-punctuation',
+ 'speech-rate',
+ 'stress',
+ 'text-align',
+ 'text-decoration',
+ 'text-indent',
+ 'unicode-bidi',
+ 'vertical-align',
+ 'voice-family',
+ 'volume',
+ 'white-space',
+ 'width',
+))
+
+allowed_css_keywords = frozenset((
+ 'auto',
+ 'aqua',
+ 'black',
+ 'block',
+ 'blue',
+ 'bold',
+ 'both',
+ 'bottom',
+ 'brown',
+ 'center',
+ 'collapse',
+ 'dashed',
+ 'dotted',
+ 'fuchsia',
+ 'gray',
+ 'green',
+ '!important',
+ 'italic',
+ 'left',
+ 'lime',
+ 'maroon',
+ 'medium',
+ 'none',
+ 'navy',
+ 'normal',
+ 'nowrap',
+ 'olive',
+ 'pointer',
+ 'purple',
+ 'red',
+ 'right',
+ 'solid',
+ 'silver',
+ 'teal',
+ 'top',
+ 'transparent',
+ 'underline',
+ 'white',
+ 'yellow',
+))
+
+allowed_svg_properties = frozenset((
+ 'fill',
+ 'fill-opacity',
+ 'fill-rule',
+ 'stroke',
+ 'stroke-width',
+ 'stroke-linecap',
+ 'stroke-linejoin',
+ 'stroke-opacity',
+))
+
+allowed_protocols = frozenset((
+ 'ed2k',
+ 'ftp',
+ 'http',
+ 'https',
+ 'irc',
+ 'mailto',
+ 'news',
+ 'gopher',
+ 'nntp',
+ 'telnet',
+ 'webcal',
+ 'xmpp',
+ 'callto',
+ 'feed',
+ 'urn',
+ 'aim',
+ 'rsync',
+ 'tag',
+ 'ssh',
+ 'sftp',
+ 'rtsp',
+ 'afs',
+ 'data',
+))
+
+allowed_content_types = frozenset((
+ 'image/png',
+ 'image/jpeg',
+ 'image/gif',
+ 'image/webp',
+ 'image/bmp',
+ 'text/plain',
+))
+
+
+data_content_type = re.compile(r'''
+ ^
+ # Match a content type <application>/<type>
+ (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
+ # Match any character set and encoding
+ (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
+ |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
+ # Assume the rest is data
+ ,.*
+ $
+ ''',
+ re.VERBOSE)
+
+
+class Filter(base.Filter):
+ """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
+ def __init__(self,
+ source,
+ allowed_elements=allowed_elements,
+ allowed_attributes=allowed_attributes,
+ allowed_css_properties=allowed_css_properties,
+ allowed_css_keywords=allowed_css_keywords,
+ allowed_svg_properties=allowed_svg_properties,
+ allowed_protocols=allowed_protocols,
+ allowed_content_types=allowed_content_types,
+ attr_val_is_uri=attr_val_is_uri,
+ svg_attr_val_allows_ref=svg_attr_val_allows_ref,
+ svg_allow_local_href=svg_allow_local_href):
+ super(Filter, self).__init__(source)
+ self.allowed_elements = allowed_elements
+ self.allowed_attributes = allowed_attributes
+ self.allowed_css_properties = allowed_css_properties
+ self.allowed_css_keywords = allowed_css_keywords
+ self.allowed_svg_properties = allowed_svg_properties
+ self.allowed_protocols = allowed_protocols
+ self.allowed_content_types = allowed_content_types
+ self.attr_val_is_uri = attr_val_is_uri
+ self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
+ self.svg_allow_local_href = svg_allow_local_href
-class Filter(_base.Filter, HTMLSanitizerMixin):
def __iter__(self):
- for token in _base.Filter.__iter__(self):
+ for token in base.Filter.__iter__(self):
token = self.sanitize_token(token)
if token:
yield token
+
+ # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
+ # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
+ # attributes are parsed, and a restricted set, # specified by
+ # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
+ # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
+ # in ALLOWED_PROTOCOLS are allowed.
+ #
+ # sanitize_html('<script> do_nasty_stuff() </script>')
+ # => &lt;script> do_nasty_stuff() &lt;/script>
+ # sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
+ # => <a>Click here for $100</a>
+ def sanitize_token(self, token):
+
+ # accommodate filters which use token_type differently
+ token_type = token["type"]
+ if token_type in ("StartTag", "EndTag", "EmptyTag"):
+ name = token["name"]
+ namespace = token["namespace"]
+ if ((namespace, name) in self.allowed_elements or
+ (namespace is None and
+ (namespaces["html"], name) in self.allowed_elements)):
+ return self.allowed_token(token)
+ else:
+ return self.disallowed_token(token)
+ elif token_type == "Comment":
+ pass
+ else:
+ return token
+
+ def allowed_token(self, token):
+ if "data" in token:
+ attrs = token["data"]
+ attr_names = set(attrs.keys())
+
+ # Remove forbidden attributes
+ for to_remove in (attr_names - self.allowed_attributes):
+ del token["data"][to_remove]
+ attr_names.remove(to_remove)
+
+ # Remove attributes with disallowed URL values
+ for attr in (attr_names & self.attr_val_is_uri):
+ assert attr in attrs
+ # I don't have a clue where this regexp comes from or why it matches those
+ # characters, nor why we call unescape. I just know it's always been here.
+ # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
+ # this will do is remove *more* than it otherwise would.
+ val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\s]+", '',
+ unescape(attrs[attr])).lower()
+ # remove replacement characters from unescaped characters
+ val_unescaped = val_unescaped.replace("\ufffd", "")
+ try:
+ uri = urlparse.urlparse(val_unescaped)
+ except ValueError:
+ uri = None
+ del attrs[attr]
+ if uri and uri.scheme:
+ if uri.scheme not in self.allowed_protocols:
+ del attrs[attr]
+ if uri.scheme == 'data':
+ m = data_content_type.match(uri.path)
+ if not m:
+ del attrs[attr]
+ elif m.group('content_type') not in self.allowed_content_types:
+ del attrs[attr]
+
+ for attr in self.svg_attr_val_allows_ref:
+ if attr in attrs:
+ attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
+ ' ',
+ unescape(attrs[attr]))
+ if (token["name"] in self.svg_allow_local_href and
+ (namespaces['xlink'], 'href') in attrs and re.search('^\s*[^#\s].*',
+ attrs[(namespaces['xlink'], 'href')])):
+ del attrs[(namespaces['xlink'], 'href')]
+ if (None, 'style') in attrs:
+ attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
+ token["data"] = attrs
+ return token
+
+ def disallowed_token(self, token):
+ token_type = token["type"]
+ if token_type == "EndTag":
+ token["data"] = "</%s>" % token["name"]
+ elif token["data"]:
+ assert token_type in ("StartTag", "EmptyTag")
+ attrs = []
+ for (ns, name), v in token["data"].items():
+ attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
+ token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
+ else:
+ token["data"] = "<%s>" % token["name"]
+ if token.get("selfClosing"):
+ token["data"] = token["data"][:-1] + "/>"
+
+ token["type"] = "Characters"
+
+ del token["name"]
+ return token
+
+ def sanitize_css(self, style):
+ # disallow urls
+ style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
+
+ # gauntlet
+ if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
+ return ''
+ if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
+ return ''
+
+ clean = []
+ for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
+ if not value:
+ continue
+ if prop.lower() in self.allowed_css_properties:
+ clean.append(prop + ': ' + value + ';')
+ elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
+ 'padding']:
+ for keyword in value.split():
+ if keyword not in self.allowed_css_keywords and \
+ not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa
+ break
+ else:
+ clean.append(prop + ': ' + value + ';')
+ elif prop.lower() in self.allowed_svg_properties:
+ clean.append(prop + ': ' + value + ';')
+
+ return ' '.join(clean)
diff --git a/pip/_vendor/html5lib/filters/whitespace.py b/pip/_vendor/html5lib/filters/whitespace.py
index dfc60eebd..892105287 100644
--- a/pip/_vendor/html5lib/filters/whitespace.py
+++ b/pip/_vendor/html5lib/filters/whitespace.py
@@ -2,20 +2,20 @@ from __future__ import absolute_import, division, unicode_literals
import re
-from . import _base
+from . import base
from ..constants import rcdataElements, spaceCharacters
spaceCharacters = "".join(spaceCharacters)
SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
-class Filter(_base.Filter):
+class Filter(base.Filter):
spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
def __iter__(self):
preserve = 0
- for token in _base.Filter.__iter__(self):
+ for token in base.Filter.__iter__(self):
type = token["type"]
if type == "StartTag" \
and (preserve or token["name"] in self.spacePreserveElements):
diff --git a/pip/_vendor/html5lib/html5parser.py b/pip/_vendor/html5lib/html5parser.py
index 40f3d093e..f7043cb19 100644
--- a/pip/_vendor/html5lib/html5parser.py
+++ b/pip/_vendor/html5lib/html5parser.py
@@ -1,39 +1,44 @@
from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import with_metaclass
+from pip._vendor.six import with_metaclass, viewkeys, PY3
import types
-from . import inputstream
-from . import tokenizer
+try:
+ from collections import OrderedDict
+except ImportError:
+ from pip._vendor.ordereddict import OrderedDict
+
+from . import _inputstream
+from . import _tokenizer
from . import treebuilders
-from .treebuilders._base import Marker
-
-from . import utils
-from . import constants
-from .constants import spaceCharacters, asciiUpper2Lower
-from .constants import specialElements
-from .constants import headingElements
-from .constants import cdataElements, rcdataElements
-from .constants import tokenTypes, ReparseException, namespaces
-from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
-from .constants import adjustForeignAttributes as adjustForeignAttributesMap
-from .constants import E
-
-
-def parse(doc, treebuilder="etree", encoding=None,
- namespaceHTMLElements=True):
+from .treebuilders.base import Marker
+
+from . import _utils
+from .constants import (
+ spaceCharacters, asciiUpper2Lower,
+ specialElements, headingElements, cdataElements, rcdataElements,
+ tokenTypes, tagTokenTypes,
+ namespaces,
+ htmlIntegrationPointElements, mathmlTextIntegrationPointElements,
+ adjustForeignAttributes as adjustForeignAttributesMap,
+ adjustMathMLAttributes, adjustSVGAttributes,
+ E,
+ ReparseException
+)
+
+
+def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs):
"""Parse a string or file-like object into a tree"""
tb = treebuilders.getTreeBuilder(treebuilder)
p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
- return p.parse(doc, encoding=encoding)
+ return p.parse(doc, **kwargs)
-def parseFragment(doc, container="div", treebuilder="etree", encoding=None,
- namespaceHTMLElements=True):
+def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs):
tb = treebuilders.getTreeBuilder(treebuilder)
p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
- return p.parseFragment(doc, container=container, encoding=encoding)
+ return p.parseFragment(doc, container=container, **kwargs)
def method_decorator_metaclass(function):
@@ -52,18 +57,13 @@ class HTMLParser(object):
"""HTML parser. Generates a tree structure from a stream of (possibly
malformed) HTML"""
- def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer,
- strict=False, namespaceHTMLElements=True, debug=False):
+ def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False):
"""
strict - raise an exception when a parse error is encountered
tree - a treebuilder class controlling the type of tree that will be
returned. Built in treebuilders can be accessed through
html5lib.treebuilders.getTreeBuilder(treeType)
-
- tokenizer - a class that provides a stream of tokens to the treebuilder.
- This may be replaced for e.g. a sanitizer which converts some tags to
- text
"""
# Raise an exception on the first error encountered
@@ -72,29 +72,24 @@ class HTMLParser(object):
if tree is None:
tree = treebuilders.getTreeBuilder("etree")
self.tree = tree(namespaceHTMLElements)
- self.tokenizer_class = tokenizer
self.errors = []
self.phases = dict([(name, cls(self, self.tree)) for name, cls in
getPhases(debug).items()])
- def _parse(self, stream, innerHTML=False, container="div",
- encoding=None, parseMeta=True, useChardet=True, **kwargs):
+ def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):
self.innerHTMLMode = innerHTML
self.container = container
- self.tokenizer = self.tokenizer_class(stream, encoding=encoding,
- parseMeta=parseMeta,
- useChardet=useChardet,
- parser=self, **kwargs)
+ self.scripting = scripting
+ self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs)
self.reset()
- while True:
- try:
- self.mainLoop()
- break
- except ReparseException:
- self.reset()
+ try:
+ self.mainLoop()
+ except ReparseException:
+ self.reset()
+ self.mainLoop()
def reset(self):
self.tree.reset()
@@ -121,7 +116,7 @@ class HTMLParser(object):
self.phase.insertHtmlElement()
self.resetInsertionMode()
else:
- self.innerHTML = False
+ self.innerHTML = False # pylint:disable=redefined-variable-type
self.phase = self.phases["initial"]
self.lastPhase = None
@@ -139,7 +134,7 @@ class HTMLParser(object):
"""
if not hasattr(self, 'tokenizer'):
return None
- return self.tokenizer.stream.charEncoding[0]
+ return self.tokenizer.stream.charEncoding[0].name
def isHTMLIntegrationPoint(self, element):
if (element.name == "annotation-xml" and
@@ -164,8 +159,10 @@ class HTMLParser(object):
ParseErrorToken = tokenTypes["ParseError"]
for token in self.normalizedTokens():
+ prev_token = None
new_token = token
while new_token is not None:
+ prev_token = new_token
currentNode = self.tree.openElements[-1] if self.tree.openElements else None
currentNodeNamespace = currentNode.namespace if currentNode else None
currentNodeName = currentNode.name if currentNode else None
@@ -184,6 +181,7 @@ class HTMLParser(object):
type in (CharactersToken, SpaceCharactersToken))) or
(currentNodeNamespace == namespaces["mathml"] and
currentNodeName == "annotation-xml" and
+ type == StartTagToken and
token["name"] == "svg") or
(self.isHTMLIntegrationPoint(currentNode) and
type in (StartTagToken, CharactersToken, SpaceCharactersToken))):
@@ -204,10 +202,10 @@ class HTMLParser(object):
elif type == DoctypeToken:
new_token = phase.processDoctype(new_token)
- if (type == StartTagToken and token["selfClosing"]
- and not token["selfClosingAcknowledged"]):
+ if (type == StartTagToken and prev_token["selfClosing"] and
+ not prev_token["selfClosingAcknowledged"]):
self.parseError("non-void-element-with-trailing-solidus",
- {"name": token["name"]})
+ {"name": prev_token["name"]})
# When the loop finishes it's EOF
reprocess = True
@@ -222,7 +220,7 @@ class HTMLParser(object):
for token in self.tokenizer:
yield self.normalizeToken(token)
- def parse(self, stream, encoding=None, parseMeta=True, useChardet=True):
+ def parse(self, stream, *args, **kwargs):
"""Parse a HTML document into a well-formed tree
stream - a filelike object or string containing the HTML to be parsed
@@ -231,13 +229,13 @@ class HTMLParser(object):
the encoding. If specified, that encoding will be used,
regardless of any BOM or later declaration (such as in a meta
element)
+
+ scripting - treat noscript elements as if javascript was turned on
"""
- self._parse(stream, innerHTML=False, encoding=encoding,
- parseMeta=parseMeta, useChardet=useChardet)
+ self._parse(stream, False, None, *args, **kwargs)
return self.tree.getDocument()
- def parseFragment(self, stream, container="div", encoding=None,
- parseMeta=False, useChardet=True):
+ def parseFragment(self, stream, *args, **kwargs):
"""Parse a HTML fragment into a well-formed tree fragment
container - name of the element we're setting the innerHTML property
@@ -249,12 +247,16 @@ class HTMLParser(object):
the encoding. If specified, that encoding will be used,
regardless of any BOM or later declaration (such as in a meta
element)
+
+ scripting - treat noscript elements as if javascript was turned on
"""
- self._parse(stream, True, container=container, encoding=encoding)
+ self._parse(stream, True, *args, **kwargs)
return self.tree.getFragment()
- def parseError(self, errorcode="XXX-undefined-error", datavars={}):
+ def parseError(self, errorcode="XXX-undefined-error", datavars=None):
# XXX The idea is to make errorcode mandatory.
+ if datavars is None:
+ datavars = {}
self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))
if self.strict:
raise ParseError(E[errorcode] % datavars)
@@ -263,98 +265,25 @@ class HTMLParser(object):
""" HTML5 specific normalizations to the token stream """
if token["type"] == tokenTypes["StartTag"]:
- token["data"] = dict(token["data"][::-1])
+ raw = token["data"]
+ token["data"] = OrderedDict(raw)
+ if len(raw) > len(token["data"]):
+ # we had some duplicated attribute, fix so first wins
+ token["data"].update(raw[::-1])
return token
def adjustMathMLAttributes(self, token):
- replacements = {"definitionurl": "definitionURL"}
- for k, v in replacements.items():
- if k in token["data"]:
- token["data"][v] = token["data"][k]
- del token["data"][k]
+ adjust_attributes(token, adjustMathMLAttributes)
def adjustSVGAttributes(self, token):
- replacements = {
- "attributename": "attributeName",
- "attributetype": "attributeType",
- "basefrequency": "baseFrequency",
- "baseprofile": "baseProfile",
- "calcmode": "calcMode",
- "clippathunits": "clipPathUnits",
- "contentscripttype": "contentScriptType",
- "contentstyletype": "contentStyleType",
- "diffuseconstant": "diffuseConstant",
- "edgemode": "edgeMode",
- "externalresourcesrequired": "externalResourcesRequired",
- "filterres": "filterRes",
- "filterunits": "filterUnits",
- "glyphref": "glyphRef",
- "gradienttransform": "gradientTransform",
- "gradientunits": "gradientUnits",
- "kernelmatrix": "kernelMatrix",
- "kernelunitlength": "kernelUnitLength",
- "keypoints": "keyPoints",
- "keysplines": "keySplines",
- "keytimes": "keyTimes",
- "lengthadjust": "lengthAdjust",
- "limitingconeangle": "limitingConeAngle",
- "markerheight": "markerHeight",
- "markerunits": "markerUnits",
- "markerwidth": "markerWidth",
- "maskcontentunits": "maskContentUnits",
- "maskunits": "maskUnits",
- "numoctaves": "numOctaves",
- "pathlength": "pathLength",
- "patterncontentunits": "patternContentUnits",
- "patterntransform": "patternTransform",
- "patternunits": "patternUnits",
- "pointsatx": "pointsAtX",
- "pointsaty": "pointsAtY",
- "pointsatz": "pointsAtZ",
- "preservealpha": "preserveAlpha",
- "preserveaspectratio": "preserveAspectRatio",
- "primitiveunits": "primitiveUnits",
- "refx": "refX",
- "refy": "refY",
- "repeatcount": "repeatCount",
- "repeatdur": "repeatDur",
- "requiredextensions": "requiredExtensions",
- "requiredfeatures": "requiredFeatures",
- "specularconstant": "specularConstant",
- "specularexponent": "specularExponent",
- "spreadmethod": "spreadMethod",
- "startoffset": "startOffset",
- "stddeviation": "stdDeviation",
- "stitchtiles": "stitchTiles",
- "surfacescale": "surfaceScale",
- "systemlanguage": "systemLanguage",
- "tablevalues": "tableValues",
- "targetx": "targetX",
- "targety": "targetY",
- "textlength": "textLength",
- "viewbox": "viewBox",
- "viewtarget": "viewTarget",
- "xchannelselector": "xChannelSelector",
- "ychannelselector": "yChannelSelector",
- "zoomandpan": "zoomAndPan"
- }
- for originalName in list(token["data"].keys()):
- if originalName in replacements:
- svgName = replacements[originalName]
- token["data"][svgName] = token["data"][originalName]
- del token["data"][originalName]
+ adjust_attributes(token, adjustSVGAttributes)
def adjustForeignAttributes(self, token):
- replacements = adjustForeignAttributesMap
-
- for originalName in token["data"].keys():
- if originalName in replacements:
- foreignName = replacements[originalName]
- token["data"][foreignName] = token["data"][originalName]
- del token["data"][originalName]
+ adjust_attributes(token, adjustForeignAttributesMap)
def reparseTokenNormal(self, token):
+ # pylint:disable=unused-argument
self.parser.phase()
def resetInsertionMode(self):
@@ -419,11 +348,12 @@ class HTMLParser(object):
self.phase = self.phases["text"]
+@_utils.memoize
def getPhases(debug):
def log(function):
"""Logger that records which phase processes each token"""
type_names = dict((value, key) for key, value in
- constants.tokenTypes.items())
+ tokenTypes.items())
def wrapped(self, *args, **kwargs):
if function.__name__.startswith("process") and len(args) > 0:
@@ -432,7 +362,7 @@ def getPhases(debug):
info = {"type": type_names[token['type']]}
except:
raise
- if token['type'] in constants.tagTokenTypes:
+ if token['type'] in tagTokenTypes:
info["name"] = token['name']
self.parser.log.append((self.parser.tokenizer.state.__name__,
@@ -451,6 +381,7 @@ def getPhases(debug):
else:
return type
+ # pylint:disable=unused-argument
class Phase(with_metaclass(getMetaclass(debug, log))):
"""Base class for helper object that implements each phase of processing
"""
@@ -517,77 +448,76 @@ def getPhases(debug):
if publicId != "":
publicId = publicId.translate(asciiUpper2Lower)
- if (not correct or token["name"] != "html"
- or publicId.startswith(
- ("+//silmaril//dtd html pro v0r11 19970101//",
- "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
- "-//as//dtd html 3.0 aswedit + extensions//",
- "-//ietf//dtd html 2.0 level 1//",
- "-//ietf//dtd html 2.0 level 2//",
- "-//ietf//dtd html 2.0 strict level 1//",
- "-//ietf//dtd html 2.0 strict level 2//",
- "-//ietf//dtd html 2.0 strict//",
- "-//ietf//dtd html 2.0//",
- "-//ietf//dtd html 2.1e//",
- "-//ietf//dtd html 3.0//",
- "-//ietf//dtd html 3.2 final//",
- "-//ietf//dtd html 3.2//",
- "-//ietf//dtd html 3//",
- "-//ietf//dtd html level 0//",
- "-//ietf//dtd html level 1//",
- "-//ietf//dtd html level 2//",
- "-//ietf//dtd html level 3//",
- "-//ietf//dtd html strict level 0//",
- "-//ietf//dtd html strict level 1//",
- "-//ietf//dtd html strict level 2//",
- "-//ietf//dtd html strict level 3//",
- "-//ietf//dtd html strict//",
- "-//ietf//dtd html//",
- "-//metrius//dtd metrius presentational//",
- "-//microsoft//dtd internet explorer 2.0 html strict//",
- "-//microsoft//dtd internet explorer 2.0 html//",
- "-//microsoft//dtd internet explorer 2.0 tables//",
- "-//microsoft//dtd internet explorer 3.0 html strict//",
- "-//microsoft//dtd internet explorer 3.0 html//",
- "-//microsoft//dtd internet explorer 3.0 tables//",
- "-//netscape comm. corp.//dtd html//",
- "-//netscape comm. corp.//dtd strict html//",
- "-//o'reilly and associates//dtd html 2.0//",
- "-//o'reilly and associates//dtd html extended 1.0//",
- "-//o'reilly and associates//dtd html extended relaxed 1.0//",
- "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
- "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
- "-//spyglass//dtd html 2.0 extended//",
- "-//sq//dtd html 2.0 hotmetal + extensions//",
- "-//sun microsystems corp.//dtd hotjava html//",
- "-//sun microsystems corp.//dtd hotjava strict html//",
- "-//w3c//dtd html 3 1995-03-24//",
- "-//w3c//dtd html 3.2 draft//",
- "-//w3c//dtd html 3.2 final//",
- "-//w3c//dtd html 3.2//",
- "-//w3c//dtd html 3.2s draft//",
- "-//w3c//dtd html 4.0 frameset//",
- "-//w3c//dtd html 4.0 transitional//",
- "-//w3c//dtd html experimental 19960712//",
- "-//w3c//dtd html experimental 970421//",
- "-//w3c//dtd w3 html//",
- "-//w3o//dtd w3 html 3.0//",
- "-//webtechs//dtd mozilla html 2.0//",
- "-//webtechs//dtd mozilla html//"))
- or publicId in
- ("-//w3o//dtd w3 html strict 3.0//en//",
- "-/w3c/dtd html 4.0 transitional/en",
- "html")
- or publicId.startswith(
- ("-//w3c//dtd html 4.01 frameset//",
- "-//w3c//dtd html 4.01 transitional//")) and
- systemId is None
- or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
+ if (not correct or token["name"] != "html" or
+ publicId.startswith(
+ ("+//silmaril//dtd html pro v0r11 19970101//",
+ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+ "-//as//dtd html 3.0 aswedit + extensions//",
+ "-//ietf//dtd html 2.0 level 1//",
+ "-//ietf//dtd html 2.0 level 2//",
+ "-//ietf//dtd html 2.0 strict level 1//",
+ "-//ietf//dtd html 2.0 strict level 2//",
+ "-//ietf//dtd html 2.0 strict//",
+ "-//ietf//dtd html 2.0//",
+ "-//ietf//dtd html 2.1e//",
+ "-//ietf//dtd html 3.0//",
+ "-//ietf//dtd html 3.2 final//",
+ "-//ietf//dtd html 3.2//",
+ "-//ietf//dtd html 3//",
+ "-//ietf//dtd html level 0//",
+ "-//ietf//dtd html level 1//",
+ "-//ietf//dtd html level 2//",
+ "-//ietf//dtd html level 3//",
+ "-//ietf//dtd html strict level 0//",
+ "-//ietf//dtd html strict level 1//",
+ "-//ietf//dtd html strict level 2//",
+ "-//ietf//dtd html strict level 3//",
+ "-//ietf//dtd html strict//",
+ "-//ietf//dtd html//",
+ "-//metrius//dtd metrius presentational//",
+ "-//microsoft//dtd internet explorer 2.0 html strict//",
+ "-//microsoft//dtd internet explorer 2.0 html//",
+ "-//microsoft//dtd internet explorer 2.0 tables//",
+ "-//microsoft//dtd internet explorer 3.0 html strict//",
+ "-//microsoft//dtd internet explorer 3.0 html//",
+ "-//microsoft//dtd internet explorer 3.0 tables//",
+ "-//netscape comm. corp.//dtd html//",
+ "-//netscape comm. corp.//dtd strict html//",
+ "-//o'reilly and associates//dtd html 2.0//",
+ "-//o'reilly and associates//dtd html extended 1.0//",
+ "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+ "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+ "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+ "-//spyglass//dtd html 2.0 extended//",
+ "-//sq//dtd html 2.0 hotmetal + extensions//",
+ "-//sun microsystems corp.//dtd hotjava html//",
+ "-//sun microsystems corp.//dtd hotjava strict html//",
+ "-//w3c//dtd html 3 1995-03-24//",
+ "-//w3c//dtd html 3.2 draft//",
+ "-//w3c//dtd html 3.2 final//",
+ "-//w3c//dtd html 3.2//",
+ "-//w3c//dtd html 3.2s draft//",
+ "-//w3c//dtd html 4.0 frameset//",
+ "-//w3c//dtd html 4.0 transitional//",
+ "-//w3c//dtd html experimental 19960712//",
+ "-//w3c//dtd html experimental 970421//",
+ "-//w3c//dtd w3 html//",
+ "-//w3o//dtd w3 html 3.0//",
+ "-//webtechs//dtd mozilla html 2.0//",
+ "-//webtechs//dtd mozilla html//")) or
+ publicId in ("-//w3o//dtd w3 html strict 3.0//en//",
+ "-/w3c/dtd html 4.0 transitional/en",
+ "html") or
+ publicId.startswith(
+ ("-//w3c//dtd html 4.01 frameset//",
+ "-//w3c//dtd html 4.01 transitional//")) and
+ systemId is None or
+ systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
self.parser.compatMode = "quirks"
elif (publicId.startswith(
("-//w3c//dtd xhtml 1.0 frameset//",
- "-//w3c//dtd xhtml 1.0 transitional//"))
- or publicId.startswith(
+ "-//w3c//dtd xhtml 1.0 transitional//")) or
+ publicId.startswith(
("-//w3c//dtd html 4.01 frameset//",
"-//w3c//dtd html 4.01 transitional//")) and
systemId is not None):
@@ -660,13 +590,13 @@ def getPhases(debug):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("head", self.startTagHead)
])
self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([
+ self.endTagHandler = _utils.MethodDispatcher([
(("head", "body", "html", "br"), self.endTagImplyHead)
])
self.endTagHandler.default = self.endTagOther
@@ -706,10 +636,11 @@ def getPhases(debug):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("title", self.startTagTitle),
- (("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle),
+ (("noframes", "style"), self.startTagNoFramesStyle),
+ ("noscript", self.startTagNoscript),
("script", self.startTagScript),
(("base", "basefont", "bgsound", "command", "link"),
self.startTagBaseLinkCommand),
@@ -718,7 +649,7 @@ def getPhases(debug):
])
self.startTagHandler.default = self.startTagOther
- self. endTagHandler = utils.MethodDispatcher([
+ self.endTagHandler = _utils.MethodDispatcher([
("head", self.endTagHead),
(("br", "html", "body"), self.endTagHtmlBodyBr)
])
@@ -760,18 +691,25 @@ def getPhases(debug):
# the abstract Unicode string, and just use the
# ContentAttrParser on that, but using UTF-8 allows all chars
# to be encoded and as a ASCII-superset works.
- data = inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
- parser = inputstream.ContentAttrParser(data)
+ data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
+ parser = _inputstream.ContentAttrParser(data)
codec = parser.parse()
self.parser.tokenizer.stream.changeEncoding(codec)
def startTagTitle(self, token):
self.parser.parseRCDataRawtext(token, "RCDATA")
- def startTagNoScriptNoFramesStyle(self, token):
+ def startTagNoFramesStyle(self, token):
# Need to decide whether to implement the scripting-disabled case
self.parser.parseRCDataRawtext(token, "RAWTEXT")
+ def startTagNoscript(self, token):
+ if self.parser.scripting:
+ self.parser.parseRCDataRawtext(token, "RAWTEXT")
+ else:
+ self.tree.insertElement(token)
+ self.parser.phase = self.parser.phases["inHeadNoscript"]
+
def startTagScript(self, token):
self.tree.insertElement(token)
self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState
@@ -797,15 +735,75 @@ def getPhases(debug):
def anythingElse(self):
self.endTagHead(impliedTagToken("head"))
- # XXX If we implement a parser for which scripting is disabled we need to
- # implement this phase.
- #
- # class InHeadNoScriptPhase(Phase):
+ class InHeadNoscriptPhase(Phase):
+ def __init__(self, parser, tree):
+ Phase.__init__(self, parser, tree)
+
+ self.startTagHandler = _utils.MethodDispatcher([
+ ("html", self.startTagHtml),
+ (("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand),
+ (("head", "noscript"), self.startTagHeadNoscript),
+ ])
+ self.startTagHandler.default = self.startTagOther
+
+ self.endTagHandler = _utils.MethodDispatcher([
+ ("noscript", self.endTagNoscript),
+ ("br", self.endTagBr),
+ ])
+ self.endTagHandler.default = self.endTagOther
+
+ def processEOF(self):
+ self.parser.parseError("eof-in-head-noscript")
+ self.anythingElse()
+ return True
+
+ def processComment(self, token):
+ return self.parser.phases["inHead"].processComment(token)
+
+ def processCharacters(self, token):
+ self.parser.parseError("char-in-head-noscript")
+ self.anythingElse()
+ return token
+
+ def processSpaceCharacters(self, token):
+ return self.parser.phases["inHead"].processSpaceCharacters(token)
+
+ def startTagHtml(self, token):
+ return self.parser.phases["inBody"].processStartTag(token)
+
+ def startTagBaseLinkCommand(self, token):
+ return self.parser.phases["inHead"].processStartTag(token)
+
+ def startTagHeadNoscript(self, token):
+ self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
+
+ def startTagOther(self, token):
+ self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
+ self.anythingElse()
+ return token
+
+ def endTagNoscript(self, token):
+ node = self.parser.tree.openElements.pop()
+ assert node.name == "noscript", "Expected noscript got %s" % node.name
+ self.parser.phase = self.parser.phases["inHead"]
+
+ def endTagBr(self, token):
+ self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
+ self.anythingElse()
+ return token
+
+ def endTagOther(self, token):
+ self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+ def anythingElse(self):
+ # Caller must raise parse error first!
+ self.endTagNoscript(impliedTagToken("noscript"))
+
class AfterHeadPhase(Phase):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("body", self.startTagBody),
("frameset", self.startTagFrameset),
@@ -815,8 +813,8 @@ def getPhases(debug):
("head", self.startTagHead)
])
self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([(("body", "html", "br"),
- self.endTagHtmlBodyBr)])
+ self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
+ self.endTagHtmlBodyBr)])
self.endTagHandler.default = self.endTagOther
def processEOF(self):
@@ -874,10 +872,10 @@ def getPhases(debug):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- # Keep a ref to this for special handling of whitespace in <pre>
- self.processSpaceCharactersNonPre = self.processSpaceCharacters
+ # Set this to the default handler
+ self.processSpaceCharacters = self.processSpaceCharactersNonPre
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
(("base", "basefont", "bgsound", "command", "link", "meta",
"script", "style", "title"),
@@ -885,7 +883,7 @@ def getPhases(debug):
("body", self.startTagBody),
("frameset", self.startTagFrameset),
(("address", "article", "aside", "blockquote", "center", "details",
- "details", "dir", "div", "dl", "fieldset", "figcaption", "figure",
+ "dir", "div", "dl", "fieldset", "figcaption", "figure",
"footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
"section", "summary", "ul"),
self.startTagCloseP),
@@ -911,7 +909,8 @@ def getPhases(debug):
("isindex", self.startTagIsIndex),
("textarea", self.startTagTextarea),
("iframe", self.startTagIFrame),
- (("noembed", "noframes", "noscript"), self.startTagRawtext),
+ ("noscript", self.startTagNoscript),
+ (("noembed", "noframes"), self.startTagRawtext),
("select", self.startTagSelect),
(("rp", "rt"), self.startTagRpRt),
(("option", "optgroup"), self.startTagOpt),
@@ -923,7 +922,7 @@ def getPhases(debug):
])
self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([
+ self.endTagHandler = _utils.MethodDispatcher([
("body", self.endTagBody),
("html", self.endTagHtml),
(("address", "article", "aside", "blockquote", "button", "center",
@@ -942,17 +941,9 @@ def getPhases(debug):
self.endTagHandler.default = self.endTagOther
def isMatchingFormattingElement(self, node1, node2):
- if node1.name != node2.name or node1.namespace != node2.namespace:
- return False
- elif len(node1.attributes) != len(node2.attributes):
- return False
- else:
- attributes1 = sorted(node1.attributes.items())
- attributes2 = sorted(node2.attributes.items())
- for attr1, attr2 in zip(attributes1, attributes2):
- if attr1 != attr2:
- return False
- return True
+ return (node1.name == node2.name and
+ node1.namespace == node2.namespace and
+ node1.attributes == node2.attributes)
# helper
def addFormattingElement(self, token):
@@ -988,8 +979,8 @@ def getPhases(debug):
data = token["data"]
self.processSpaceCharacters = self.processSpaceCharactersNonPre
if (data.startswith("\n") and
- self.tree.openElements[-1].name in ("pre", "listing", "textarea")
- and not self.tree.openElements[-1].hasContent()):
+ self.tree.openElements[-1].name in ("pre", "listing", "textarea") and
+ not self.tree.openElements[-1].hasContent()):
data = data[1:]
if data:
self.tree.reconstructActiveFormattingElements()
@@ -1007,7 +998,7 @@ def getPhases(debug):
for char in token["data"]])):
self.parser.framesetOK = False
- def processSpaceCharacters(self, token):
+ def processSpaceCharactersNonPre(self, token):
self.tree.reconstructActiveFormattingElements()
self.tree.insertText(token["data"])
@@ -1016,8 +1007,8 @@ def getPhases(debug):
def startTagBody(self, token):
self.parser.parseError("unexpected-start-tag", {"name": "body"})
- if (len(self.tree.openElements) == 1
- or self.tree.openElements[1].name != "body"):
+ if (len(self.tree.openElements) == 1 or
+ self.tree.openElements[1].name != "body"):
assert self.parser.innerHTML
else:
self.parser.framesetOK = False
@@ -1232,6 +1223,12 @@ def getPhases(debug):
self.parser.framesetOK = False
self.startTagRawtext(token)
+ def startTagNoscript(self, token):
+ if self.parser.scripting:
+ self.startTagRawtext(token)
+ else:
+ self.startTagOther(token)
+
def startTagRawtext(self, token):
"""iframe, noembed noframes, noscript(if scripting enabled)"""
self.parser.parseRCDataRawtext(token, "RAWTEXT")
@@ -1327,7 +1324,7 @@ def getPhases(debug):
# Not sure this is the correct name for the parse error
self.parser.parseError(
"expected-one-end-tag-but-got-another",
- {"expectedName": "body", "gotName": node.name})
+ {"gotName": "body", "expectedName": node.name})
break
self.parser.phase = self.parser.phases["afterBody"]
@@ -1595,9 +1592,9 @@ def getPhases(debug):
class TextPhase(Phase):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([])
+ self.startTagHandler = _utils.MethodDispatcher([])
self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([
+ self.endTagHandler = _utils.MethodDispatcher([
("script", self.endTagScript)])
self.endTagHandler.default = self.endTagOther
@@ -1629,7 +1626,7 @@ def getPhases(debug):
# http://www.whatwg.org/specs/web-apps/current-work/#in-table
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("caption", self.startTagCaption),
("colgroup", self.startTagColgroup),
@@ -1643,7 +1640,7 @@ def getPhases(debug):
])
self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([
+ self.endTagHandler = _utils.MethodDispatcher([
("table", self.endTagTable),
(("body", "caption", "col", "colgroup", "html", "tbody", "td",
"tfoot", "th", "thead", "tr"), self.endTagIgnore)
@@ -1820,14 +1817,14 @@ def getPhases(debug):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
"thead", "tr"), self.startTagTableElement)
])
self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([
+ self.endTagHandler = _utils.MethodDispatcher([
("caption", self.endTagCaption),
("table", self.endTagTable),
(("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
@@ -1892,13 +1889,13 @@ def getPhases(debug):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("col", self.startTagCol)
])
self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([
+ self.endTagHandler = _utils.MethodDispatcher([
("colgroup", self.endTagColgroup),
("col", self.endTagCol)
])
@@ -1926,6 +1923,7 @@ def getPhases(debug):
def startTagCol(self, token):
self.tree.insertElement(token)
self.tree.openElements.pop()
+ token["selfClosingAcknowledged"] = True
def startTagOther(self, token):
ignoreEndTag = self.ignoreEndTagColgroup()
@@ -1955,7 +1953,7 @@ def getPhases(debug):
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("tr", self.startTagTr),
(("td", "th"), self.startTagTableCell),
@@ -1964,7 +1962,7 @@ def getPhases(debug):
])
self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([
+ self.endTagHandler = _utils.MethodDispatcher([
(("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
("table", self.endTagTable),
(("body", "caption", "col", "colgroup", "html", "td", "th",
@@ -2053,7 +2051,7 @@ def getPhases(debug):
# http://www.whatwg.org/specs/web-apps/current-work/#in-row
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
(("td", "th"), self.startTagTableCell),
(("caption", "col", "colgroup", "tbody", "tfoot", "thead",
@@ -2061,7 +2059,7 @@ def getPhases(debug):
])
self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([
+ self.endTagHandler = _utils.MethodDispatcher([
("tr", self.endTagTr),
("table", self.endTagTable),
(("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
@@ -2142,14 +2140,14 @@ def getPhases(debug):
# http://www.whatwg.org/specs/web-apps/current-work/#in-cell
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
"thead", "tr"), self.startTagTableOther)
])
self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([
+ self.endTagHandler = _utils.MethodDispatcher([
(("td", "th"), self.endTagTableCell),
(("body", "caption", "col", "colgroup", "html"), self.endTagIgnore),
(("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply)
@@ -2218,7 +2216,7 @@ def getPhases(debug):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("option", self.startTagOption),
("optgroup", self.startTagOptgroup),
@@ -2228,7 +2226,7 @@ def getPhases(debug):
])
self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([
+ self.endTagHandler = _utils.MethodDispatcher([
("option", self.endTagOption),
("optgroup", self.endTagOptgroup),
("select", self.endTagSelect)
@@ -2318,13 +2316,13 @@ def getPhases(debug):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
self.startTagTable)
])
self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([
+ self.endTagHandler = _utils.MethodDispatcher([
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
self.endTagTable)
])
@@ -2445,7 +2443,7 @@ def getPhases(debug):
def processEndTag(self, token):
nodeIndex = len(self.tree.openElements) - 1
node = self.tree.openElements[-1]
- if node.name != token["name"]:
+ if node.name.translate(asciiUpper2Lower) != token["name"]:
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
while True:
@@ -2472,12 +2470,12 @@ def getPhases(debug):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml)
])
self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([("html", self.endTagHtml)])
+ self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)])
self.endTagHandler.default = self.endTagOther
def processEOF(self):
@@ -2520,7 +2518,7 @@ def getPhases(debug):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("frameset", self.startTagFrameset),
("frame", self.startTagFrame),
@@ -2528,7 +2526,7 @@ def getPhases(debug):
])
self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([
+ self.endTagHandler = _utils.MethodDispatcher([
("frameset", self.endTagFrameset)
])
self.endTagHandler.default = self.endTagOther
@@ -2564,7 +2562,7 @@ def getPhases(debug):
self.tree.openElements.pop()
if (not self.parser.innerHTML and
self.tree.openElements[-1].name != "frameset"):
- # If we're not in innerHTML mode and the the current node is not a
+ # If we're not in innerHTML mode and the current node is not a
# "frameset" element (anymore) then switch.
self.parser.phase = self.parser.phases["afterFrameset"]
@@ -2577,13 +2575,13 @@ def getPhases(debug):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("noframes", self.startTagNoframes)
])
self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([
+ self.endTagHandler = _utils.MethodDispatcher([
("html", self.endTagHtml)
])
self.endTagHandler.default = self.endTagOther
@@ -2613,7 +2611,7 @@ def getPhases(debug):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml)
])
self.startTagHandler.default = self.startTagOther
@@ -2651,7 +2649,7 @@ def getPhases(debug):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
+ self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("noframes", self.startTagNoFrames)
])
@@ -2682,13 +2680,14 @@ def getPhases(debug):
def processEndTag(self, token):
self.parser.parseError("expected-eof-but-got-end-tag",
{"name": token["name"]})
+ # pylint:enable=unused-argument
return {
"initial": InitialPhase,
"beforeHtml": BeforeHtmlPhase,
"beforeHead": BeforeHeadPhase,
"inHead": InHeadPhase,
- # XXX "inHeadNoscript": InHeadNoScriptPhase,
+ "inHeadNoscript": InHeadNoscriptPhase,
"afterHead": AfterHeadPhase,
"inBody": InBodyPhase,
"text": TextPhase,
@@ -2711,6 +2710,16 @@ def getPhases(debug):
}
+def adjust_attributes(token, replacements):
+ if PY3 or _utils.PY27:
+ needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
+ else:
+ needs_adjustment = frozenset(token['data']) & frozenset(replacements)
+ if needs_adjustment:
+ token['data'] = OrderedDict((replacements.get(k, k), v)
+ for k, v in token['data'].items())
+
+
def impliedTagToken(name, type="EndTag", attributes=None,
selfClosing=False):
if attributes is None:
diff --git a/pip/_vendor/html5lib/sanitizer.py b/pip/_vendor/html5lib/sanitizer.py
deleted file mode 100644
index b714e8c9e..000000000
--- a/pip/_vendor/html5lib/sanitizer.py
+++ /dev/null
@@ -1,300 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import re
-from xml.sax.saxutils import escape, unescape
-from six.moves import urllib_parse as urlparse
-
-from .tokenizer import HTMLTokenizer
-from .constants import tokenTypes
-
-
-content_type_rgx = re.compile(r'''
- ^
- # Match a content type <application>/<type>
- (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
- # Match any character set and encoding
- (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
- |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
- # Assume the rest is data
- ,.*
- $
- ''',
- re.VERBOSE)
-
-
-class HTMLSanitizerMixin(object):
- """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
-
- acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
- 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
- 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
- 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
- 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
- 'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
- 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
- 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
- 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
- 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
- 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
- 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
- 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
-
- mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
- 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
- 'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
- 'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
- 'munderover', 'none']
-
- svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
- 'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
- 'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
- 'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
- 'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
- 'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
-
- acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
- 'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
- 'background', 'balance', 'bgcolor', 'bgproperties', 'border',
- 'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
- 'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
- 'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
- 'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
- 'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
- 'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
- 'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
- 'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
- 'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
- 'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
- 'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
- 'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
- 'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
- 'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
- 'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
- 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
- 'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
- 'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
- 'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
- 'width', 'wrap', 'xml:lang']
-
- mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
- 'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
- 'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
- 'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
- 'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
- 'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
- 'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
- 'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
- 'xlink:type', 'xmlns', 'xmlns:xlink']
-
- svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
- 'arabic-form', 'ascent', 'attributeName', 'attributeType',
- 'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
- 'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
- 'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
- 'fill-opacity', 'fill-rule', 'font-family', 'font-size',
- 'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
- 'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
- 'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
- 'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
- 'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
- 'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
- 'opacity', 'orient', 'origin', 'overline-position',
- 'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
- 'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
- 'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
- 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
- 'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
- 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
- 'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
- 'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
- 'transform', 'type', 'u1', 'u2', 'underline-position',
- 'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
- 'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
- 'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
- 'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
- 'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
- 'y1', 'y2', 'zoomAndPan']
-
- attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster', 'background', 'datasrc',
- 'dynsrc', 'lowsrc', 'ping', 'poster', 'xlink:href', 'xml:base']
-
- svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
- 'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
- 'mask', 'stroke']
-
- svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
- 'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
- 'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
- 'set', 'use']
-
- acceptable_css_properties = ['azimuth', 'background-color',
- 'border-bottom-color', 'border-collapse', 'border-color',
- 'border-left-color', 'border-right-color', 'border-top-color', 'clear',
- 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
- 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
- 'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
- 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
- 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
- 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
- 'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
- 'white-space', 'width']
-
- acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
- 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
- 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
- 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
- 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
- 'transparent', 'underline', 'white', 'yellow']
-
- acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule',
- 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
- 'stroke-opacity']
-
- acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
- 'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
- 'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
- 'ssh', 'sftp', 'rtsp', 'afs', 'data']
-
- acceptable_content_types = ['image/png', 'image/jpeg', 'image/gif', 'image/webp', 'image/bmp', 'text/plain']
-
- # subclasses may define their own versions of these constants
- allowed_elements = acceptable_elements + mathml_elements + svg_elements
- allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
- allowed_css_properties = acceptable_css_properties
- allowed_css_keywords = acceptable_css_keywords
- allowed_svg_properties = acceptable_svg_properties
- allowed_protocols = acceptable_protocols
- allowed_content_types = acceptable_content_types
-
- # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
- # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
- # attributes are parsed, and a restricted set, # specified by
- # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
- # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
- # in ALLOWED_PROTOCOLS are allowed.
- #
- # sanitize_html('<script> do_nasty_stuff() </script>')
- # => &lt;script> do_nasty_stuff() &lt;/script>
- # sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
- # => <a>Click here for $100</a>
- def sanitize_token(self, token):
-
- # accommodate filters which use token_type differently
- token_type = token["type"]
- if token_type in list(tokenTypes.keys()):
- token_type = tokenTypes[token_type]
-
- if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
- tokenTypes["EmptyTag"]):
- if token["name"] in self.allowed_elements:
- return self.allowed_token(token, token_type)
- else:
- return self.disallowed_token(token, token_type)
- elif token_type == tokenTypes["Comment"]:
- pass
- else:
- return token
-
- def allowed_token(self, token, token_type):
- if "data" in token:
- attrs = dict([(name, val) for name, val in
- token["data"][::-1]
- if name in self.allowed_attributes])
- for attr in self.attr_val_is_uri:
- if attr not in attrs:
- continue
- val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
- unescape(attrs[attr])).lower()
- # remove replacement characters from unescaped characters
- val_unescaped = val_unescaped.replace("\ufffd", "")
- try:
- uri = urlparse.urlparse(val_unescaped)
- except ValueError:
- uri = None
- del attrs[attr]
- if uri and uri.scheme:
- if uri.scheme not in self.allowed_protocols:
- del attrs[attr]
- if uri.scheme == 'data':
- m = content_type_rgx.match(uri.path)
- if not m:
- del attrs[attr]
- elif m.group('content_type') not in self.allowed_content_types:
- del attrs[attr]
-
- for attr in self.svg_attr_val_allows_ref:
- if attr in attrs:
- attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
- ' ',
- unescape(attrs[attr]))
- if (token["name"] in self.svg_allow_local_href and
- 'xlink:href' in attrs and re.search('^\s*[^#\s].*',
- attrs['xlink:href'])):
- del attrs['xlink:href']
- if 'style' in attrs:
- attrs['style'] = self.sanitize_css(attrs['style'])
- token["data"] = [[name, val] for name, val in list(attrs.items())]
- return token
-
- def disallowed_token(self, token, token_type):
- if token_type == tokenTypes["EndTag"]:
- token["data"] = "</%s>" % token["name"]
- elif token["data"]:
- attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]])
- token["data"] = "<%s%s>" % (token["name"], attrs)
- else:
- token["data"] = "<%s>" % token["name"]
- if token.get("selfClosing"):
- token["data"] = token["data"][:-1] + "/>"
-
- if token["type"] in list(tokenTypes.keys()):
- token["type"] = "Characters"
- else:
- token["type"] = tokenTypes["Characters"]
-
- del token["name"]
- return token
-
- def sanitize_css(self, style):
- # disallow urls
- style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
-
- # gauntlet
- if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
- return ''
- if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
- return ''
-
- clean = []
- for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
- if not value:
- continue
- if prop.lower() in self.allowed_css_properties:
- clean.append(prop + ': ' + value + ';')
- elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
- 'padding']:
- for keyword in value.split():
- if keyword not in self.acceptable_css_keywords and \
- not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
- break
- else:
- clean.append(prop + ': ' + value + ';')
- elif prop.lower() in self.allowed_svg_properties:
- clean.append(prop + ': ' + value + ';')
-
- return ' '.join(clean)
-
-
-class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
- def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
- lowercaseElementName=False, lowercaseAttrName=False, parser=None):
- # Change case matching defaults as we only output lowercase html anyway
- # This solution doesn't seem ideal...
- HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
- lowercaseElementName, lowercaseAttrName, parser=parser)
-
- def __iter__(self):
- for token in HTMLTokenizer.__iter__(self):
- token = self.sanitize_token(token)
- if token:
- yield token
diff --git a/pip/_vendor/html5lib/serializer/htmlserializer.py b/pip/_vendor/html5lib/serializer.py
index 01d9c5ff6..2fb348151 100644
--- a/pip/_vendor/html5lib/serializer/htmlserializer.py
+++ b/pip/_vendor/html5lib/serializer.py
@@ -1,79 +1,87 @@
from __future__ import absolute_import, division, unicode_literals
from pip._vendor.six import text_type
-try:
- from functools import reduce
-except ImportError:
- pass
+import re
+
+from codecs import register_error, xmlcharrefreplace_errors
-from ..constants import voidElements, booleanAttributes, spaceCharacters
-from ..constants import rcdataElements, entities, xmlEntities
-from .. import utils
+from .constants import voidElements, booleanAttributes, spaceCharacters
+from .constants import rcdataElements, entities, xmlEntities
+from . import treewalkers, _utils
from xml.sax.saxutils import escape
-spaceCharacters = "".join(spaceCharacters)
-
-try:
- from codecs import register_error, xmlcharrefreplace_errors
-except ImportError:
- unicode_encode_errors = "strict"
-else:
- unicode_encode_errors = "htmlentityreplace"
-
- encode_entity_map = {}
- is_ucs4 = len("\U0010FFFF") == 1
- for k, v in list(entities.items()):
- # skip multi-character entities
- if ((is_ucs4 and len(v) > 1) or
- (not is_ucs4 and len(v) > 2)):
- continue
- if v != "&":
- if len(v) == 2:
- v = utils.surrogatePairToCodepoint(v)
- else:
- v = ord(v)
- if v not in encode_entity_map or k.islower():
- # prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
- encode_entity_map[v] = k
-
- def htmlentityreplace_errors(exc):
- if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
- res = []
- codepoints = []
- skip = False
- for i, c in enumerate(exc.object[exc.start:exc.end]):
- if skip:
- skip = False
- continue
- index = i + exc.start
- if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
- codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
- skip = True
- else:
- codepoint = ord(c)
- codepoints.append(codepoint)
- for cp in codepoints:
- e = encode_entity_map.get(cp)
- if e:
- res.append("&")
- res.append(e)
- if not e.endswith(";"):
- res.append(";")
- else:
- res.append("&#x%s;" % (hex(cp)[2:]))
- return ("".join(res), exc.end)
+_quoteAttributeSpecChars = "".join(spaceCharacters) + "\"'=<>`"
+_quoteAttributeSpec = re.compile("[" + _quoteAttributeSpecChars + "]")
+_quoteAttributeLegacy = re.compile("[" + _quoteAttributeSpecChars +
+ "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
+ "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
+ "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+ "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
+ "\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
+ "\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
+ "\u3000]")
+
+
+_encode_entity_map = {}
+_is_ucs4 = len("\U0010FFFF") == 1
+for k, v in list(entities.items()):
+ # skip multi-character entities
+ if ((_is_ucs4 and len(v) > 1) or
+ (not _is_ucs4 and len(v) > 2)):
+ continue
+ if v != "&":
+ if len(v) == 2:
+ v = _utils.surrogatePairToCodepoint(v)
else:
- return xmlcharrefreplace_errors(exc)
+ v = ord(v)
+ if v not in _encode_entity_map or k.islower():
+ # prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
+ _encode_entity_map[v] = k
+
+
+def htmlentityreplace_errors(exc):
+ if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
+ res = []
+ codepoints = []
+ skip = False
+ for i, c in enumerate(exc.object[exc.start:exc.end]):
+ if skip:
+ skip = False
+ continue
+ index = i + exc.start
+ if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
+ codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2])
+ skip = True
+ else:
+ codepoint = ord(c)
+ codepoints.append(codepoint)
+ for cp in codepoints:
+ e = _encode_entity_map.get(cp)
+ if e:
+ res.append("&")
+ res.append(e)
+ if not e.endswith(";"):
+ res.append(";")
+ else:
+ res.append("&#x%s;" % (hex(cp)[2:]))
+ return ("".join(res), exc.end)
+ else:
+ return xmlcharrefreplace_errors(exc)
+
+register_error("htmlentityreplace", htmlentityreplace_errors)
- register_error(unicode_encode_errors, htmlentityreplace_errors)
- del register_error
+def serialize(input, tree="etree", encoding=None, **serializer_opts):
+ # XXX: Should we cache this?
+ walker = treewalkers.getTreeWalker(tree)
+ s = HTMLSerializer(**serializer_opts)
+ return s.render(walker(input), encoding)
class HTMLSerializer(object):
# attribute quoting options
- quote_attr_values = False
+ quote_attr_values = "legacy" # be secure by default
quote_char = '"'
use_best_quote_char = True
@@ -109,9 +117,9 @@ class HTMLSerializer(object):
inject_meta_charset=True|False
Whether it insert a meta element to define the character set of the
document.
- quote_attr_values=True|False
+ quote_attr_values="legacy"|"spec"|"always"
Whether to quote attribute values that don't require quoting
- per HTML5 parsing rules.
+ per legacy browser behaviour, when required by the standard, or always.
quote_char=u'"'|u"'"
Use given quote character for attribute quoting. Default is to
use double quote unless attribute value contains a double quote,
@@ -147,6 +155,9 @@ class HTMLSerializer(object):
.. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
"""
+ unexpected_args = frozenset(kwargs) - frozenset(self.options)
+ if len(unexpected_args) > 0:
+ raise TypeError("__init__() got an unexpected keyword argument '%s'" % next(iter(unexpected_args)))
if 'quote_char' in kwargs:
self.use_best_quote_char = False
for attr in self.options:
@@ -157,7 +168,7 @@ class HTMLSerializer(object):
def encode(self, string):
assert(isinstance(string, text_type))
if self.encoding:
- return string.encode(self.encoding, unicode_encode_errors)
+ return string.encode(self.encoding, "htmlentityreplace")
else:
return string
@@ -169,28 +180,30 @@ class HTMLSerializer(object):
return string
def serialize(self, treewalker, encoding=None):
+ # pylint:disable=too-many-nested-blocks
self.encoding = encoding
in_cdata = False
self.errors = []
if encoding and self.inject_meta_charset:
- from ..filters.inject_meta_charset import Filter
+ from .filters.inject_meta_charset import Filter
treewalker = Filter(treewalker, encoding)
+ # Alphabetical attributes is here under the assumption that none of
+ # the later filters add or change order of attributes; it needs to be
+ # before the sanitizer so escaped elements come out correctly
+ if self.alphabetical_attributes:
+ from .filters.alphabeticalattributes import Filter
+ treewalker = Filter(treewalker)
# WhitespaceFilter should be used before OptionalTagFilter
# for maximum efficiently of this latter filter
if self.strip_whitespace:
- from ..filters.whitespace import Filter
+ from .filters.whitespace import Filter
treewalker = Filter(treewalker)
if self.sanitize:
- from ..filters.sanitizer import Filter
+ from .filters.sanitizer import Filter
treewalker = Filter(treewalker)
if self.omit_optional_tags:
- from ..filters.optionaltags import Filter
- treewalker = Filter(treewalker)
- # Alphabetical attributes must be last, as other filters
- # could add attributes and alter the order
- if self.alphabetical_attributes:
- from ..filters.alphabeticalattributes import Filter
+ from .filters.optionaltags import Filter
treewalker = Filter(treewalker)
for token in treewalker:
@@ -229,7 +242,7 @@ class HTMLSerializer(object):
in_cdata = True
elif in_cdata:
self.serializeError("Unexpected child element of a CDATA element")
- for (attr_namespace, attr_name), attr_value in token["data"].items():
+ for (_, attr_name), attr_value in token["data"].items():
# TODO: Add namespace support here
k = attr_name
v = attr_value
@@ -237,14 +250,18 @@ class HTMLSerializer(object):
yield self.encodeStrict(k)
if not self.minimize_boolean_attributes or \
- (k not in booleanAttributes.get(name, tuple())
- and k not in booleanAttributes.get("", tuple())):
+ (k not in booleanAttributes.get(name, tuple()) and
+ k not in booleanAttributes.get("", tuple())):
yield self.encodeStrict("=")
- if self.quote_attr_values or not v:
+ if self.quote_attr_values == "always" or len(v) == 0:
quote_attr = True
+ elif self.quote_attr_values == "spec":
+ quote_attr = _quoteAttributeSpec.search(v) is not None
+ elif self.quote_attr_values == "legacy":
+ quote_attr = _quoteAttributeLegacy.search(v) is not None
else:
- quote_attr = reduce(lambda x, y: x or (y in v),
- spaceCharacters + ">\"'=", False)
+ raise ValueError("quote_attr_values must be one of: "
+ "'always', 'spec', or 'legacy'")
v = v.replace("&", "&amp;")
if self.escape_lt_in_attrs:
v = v.replace("<", "&lt;")
@@ -312,6 +329,6 @@ class HTMLSerializer(object):
raise SerializeError
-def SerializeError(Exception):
+class SerializeError(Exception):
"""Error in serialized tree"""
pass
diff --git a/pip/_vendor/html5lib/serializer/__init__.py b/pip/_vendor/html5lib/serializer/__init__.py
deleted file mode 100644
index 8380839a6..000000000
--- a/pip/_vendor/html5lib/serializer/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from .. import treewalkers
-
-from .htmlserializer import HTMLSerializer
-
-
-def serialize(input, tree="etree", format="html", encoding=None,
- **serializer_opts):
- # XXX: Should we cache this?
- walker = treewalkers.getTreeWalker(tree)
- if format == "html":
- s = HTMLSerializer(**serializer_opts)
- else:
- raise ValueError("type must be html")
- return s.render(walker(input), encoding)
diff --git a/pip/_vendor/html5lib/treeadapters/__init__.py b/pip/_vendor/html5lib/treeadapters/__init__.py
index e69de29bb..4f9784660 100644
--- a/pip/_vendor/html5lib/treeadapters/__init__.py
+++ b/pip/_vendor/html5lib/treeadapters/__init__.py
@@ -0,0 +1,12 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from . import sax
+
+__all__ = ["sax"]
+
+try:
+ from . import genshi # noqa
+except ImportError:
+ pass
+else:
+ __all__.append("genshi")
diff --git a/pip/_vendor/html5lib/treeadapters/genshi.py b/pip/_vendor/html5lib/treeadapters/genshi.py
new file mode 100644
index 000000000..04e316df5
--- /dev/null
+++ b/pip/_vendor/html5lib/treeadapters/genshi.py
@@ -0,0 +1,47 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from genshi.core import QName, Attrs
+from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
+
+
+def to_genshi(walker):
+ text = []
+ for token in walker:
+ type = token["type"]
+ if type in ("Characters", "SpaceCharacters"):
+ text.append(token["data"])
+ elif text:
+ yield TEXT, "".join(text), (None, -1, -1)
+ text = []
+
+ if type in ("StartTag", "EmptyTag"):
+ if token["namespace"]:
+ name = "{%s}%s" % (token["namespace"], token["name"])
+ else:
+ name = token["name"]
+ attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value)
+ for attr, value in token["data"].items()])
+ yield (START, (QName(name), attrs), (None, -1, -1))
+ if type == "EmptyTag":
+ type = "EndTag"
+
+ if type == "EndTag":
+ if token["namespace"]:
+ name = "{%s}%s" % (token["namespace"], token["name"])
+ else:
+ name = token["name"]
+
+ yield END, QName(name), (None, -1, -1)
+
+ elif type == "Comment":
+ yield COMMENT, token["data"], (None, -1, -1)
+
+ elif type == "Doctype":
+ yield DOCTYPE, (token["name"], token["publicId"],
+ token["systemId"]), (None, -1, -1)
+
+ else:
+ pass # FIXME: What to do?
+
+ if text:
+ yield TEXT, "".join(text), (None, -1, -1)
diff --git a/pip/_vendor/html5lib/treebuilders/__init__.py b/pip/_vendor/html5lib/treebuilders/__init__.py
index 6a6b2a4c4..e23288474 100644
--- a/pip/_vendor/html5lib/treebuilders/__init__.py
+++ b/pip/_vendor/html5lib/treebuilders/__init__.py
@@ -28,7 +28,7 @@ to the format used in the unittests
from __future__ import absolute_import, division, unicode_literals
-from ..utils import default_etree
+from .._utils import default_etree
treeBuilderCache = {}
diff --git a/pip/_vendor/html5lib/treebuilders/_base.py b/pip/_vendor/html5lib/treebuilders/base.py
index 970c9adb6..9798f7cfb 100644
--- a/pip/_vendor/html5lib/treebuilders/_base.py
+++ b/pip/_vendor/html5lib/treebuilders/base.py
@@ -126,6 +126,7 @@ class TreeBuilder(object):
commentClass - the class to use for comments
doctypeClass - the class to use for doctypes
"""
+ # pylint:disable=not-callable
# Document class
documentClass = None
@@ -166,12 +167,17 @@ class TreeBuilder(object):
# If we pass a node in we match that. if we pass a string
# match any node with that name
exactNode = hasattr(target, "nameTuple")
+ if not exactNode:
+ if isinstance(target, text_type):
+ target = (namespaces["html"], target)
+ assert isinstance(target, tuple)
listElements, invert = listElementsMap[variant]
for node in reversed(self.openElements):
- if (node.name == target and not exactNode or
- node == target and exactNode):
+ if exactNode and node == target:
+ return True
+ elif not exactNode and node.nameTuple == target:
return True
elif (invert ^ (node.nameTuple in listElements)):
return False
@@ -353,8 +359,8 @@ class TreeBuilder(object):
def generateImpliedEndTags(self, exclude=None):
name = self.openElements[-1].name
# XXX td, th and tr are not actually needed
- if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
- and name != exclude):
+ if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and
+ name != exclude):
self.openElements.pop()
# XXX This is not entirely what the specification says. We should
# investigate it more closely.
diff --git a/pip/_vendor/html5lib/treebuilders/dom.py b/pip/_vendor/html5lib/treebuilders/dom.py
index 234233b79..dcfac220b 100644
--- a/pip/_vendor/html5lib/treebuilders/dom.py
+++ b/pip/_vendor/html5lib/treebuilders/dom.py
@@ -1,54 +1,62 @@
from __future__ import absolute_import, division, unicode_literals
+from collections import MutableMapping
from xml.dom import minidom, Node
import weakref
-from . import _base
+from . import base
from .. import constants
from ..constants import namespaces
-from ..utils import moduleFactoryFactory
+from .._utils import moduleFactoryFactory
def getDomBuilder(DomImplementation):
Dom = DomImplementation
- class AttrList(object):
+ class AttrList(MutableMapping):
def __init__(self, element):
self.element = element
def __iter__(self):
- return list(self.element.attributes.items()).__iter__()
+ return iter(self.element.attributes.keys())
def __setitem__(self, name, value):
- self.element.setAttribute(name, value)
+ if isinstance(name, tuple):
+ raise NotImplementedError
+ else:
+ attr = self.element.ownerDocument.createAttribute(name)
+ attr.value = value
+ self.element.attributes[name] = attr
def __len__(self):
- return len(list(self.element.attributes.items()))
+ return len(self.element.attributes)
def items(self):
- return [(item[0], item[1]) for item in
- list(self.element.attributes.items())]
+ return list(self.element.attributes.items())
- def keys(self):
- return list(self.element.attributes.keys())
+ def values(self):
+ return list(self.element.attributes.values())
def __getitem__(self, name):
- return self.element.getAttribute(name)
+ if isinstance(name, tuple):
+ raise NotImplementedError
+ else:
+ return self.element.attributes[name].value
- def __contains__(self, name):
+ def __delitem__(self, name):
if isinstance(name, tuple):
raise NotImplementedError
else:
- return self.element.hasAttribute(name)
+ del self.element.attributes[name]
- class NodeBuilder(_base.Node):
+ class NodeBuilder(base.Node):
def __init__(self, element):
- _base.Node.__init__(self, element.nodeName)
+ base.Node.__init__(self, element.nodeName)
self.element = element
- namespace = property(lambda self: hasattr(self.element, "namespaceURI")
- and self.element.namespaceURI or None)
+ namespace = property(lambda self: hasattr(self.element, "namespaceURI") and
+ self.element.namespaceURI or None)
def appendChild(self, node):
node.parent = self
@@ -109,7 +117,7 @@ def getDomBuilder(DomImplementation):
nameTuple = property(getNameTuple)
- class TreeBuilder(_base.TreeBuilder):
+ class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable
def documentClass(self):
self.dom = Dom.getDOMImplementation().createDocument(None, None, None)
return weakref.proxy(self)
@@ -149,15 +157,16 @@ def getDomBuilder(DomImplementation):
return self.dom
def getFragment(self):
- return _base.TreeBuilder.getFragment(self).element
+ return base.TreeBuilder.getFragment(self).element
def insertText(self, data, parent=None):
data = data
if parent != self:
- _base.TreeBuilder.insertText(self, data, parent)
+ base.TreeBuilder.insertText(self, data, parent)
else:
# HACK: allow text nodes as children of the document node
if hasattr(self.dom, '_child_node_types'):
+ # pylint:disable=protected-access
if Node.TEXT_NODE not in self.dom._child_node_types:
self.dom._child_node_types = list(self.dom._child_node_types)
self.dom._child_node_types.append(Node.TEXT_NODE)
diff --git a/pip/_vendor/html5lib/treebuilders/etree.py b/pip/_vendor/html5lib/treebuilders/etree.py
index 48fead7e2..0dedf4416 100644
--- a/pip/_vendor/html5lib/treebuilders/etree.py
+++ b/pip/_vendor/html5lib/treebuilders/etree.py
@@ -1,13 +1,15 @@
from __future__ import absolute_import, division, unicode_literals
+# pylint:disable=protected-access
+
from pip._vendor.six import text_type
import re
-from . import _base
-from .. import ihatexml
+from . import base
+from .. import _ihatexml
from .. import constants
from ..constants import namespaces
-from ..utils import moduleFactoryFactory
+from .._utils import moduleFactoryFactory
tag_regexp = re.compile("{([^}]*)}(.*)")
@@ -16,7 +18,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
ElementTree = ElementTreeImplementation
ElementTreeCommentType = ElementTree.Comment("asd").tag
- class Element(_base.Node):
+ class Element(base.Node):
def __init__(self, name, namespace=None):
self._name = name
self._namespace = namespace
@@ -98,6 +100,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
node.parent = self
def removeChild(self, node):
+ self._childNodes.remove(node)
self._element.remove(node._element)
node.parent = None
@@ -139,7 +142,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
if self._element.text is not None:
newParent._element.text += self._element.text
self._element.text = ""
- _base.Node.reparentChildren(self, newParent)
+ base.Node.reparentChildren(self, newParent)
class Comment(Element):
def __init__(self, data):
@@ -253,10 +256,10 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
return "\n".join(rv)
- def tostring(element):
+ def tostring(element): # pylint:disable=unused-variable
"""Serialize an element and its child nodes to a string"""
rv = []
- filter = ihatexml.InfosetFilter()
+ filter = _ihatexml.InfosetFilter()
def serializeElement(element):
if isinstance(element, ElementTree.ElementTree):
@@ -307,7 +310,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
return "".join(rv)
- class TreeBuilder(_base.TreeBuilder):
+ class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable
documentClass = Document
doctypeClass = DocumentType
elementClass = Element
@@ -329,7 +332,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
return self.document._element.find("html")
def getFragment(self):
- return _base.TreeBuilder.getFragment(self)._element
+ return base.TreeBuilder.getFragment(self)._element
return locals()
diff --git a/pip/_vendor/html5lib/treebuilders/etree_lxml.py b/pip/_vendor/html5lib/treebuilders/etree_lxml.py
index 35d08efaa..908820c08 100644
--- a/pip/_vendor/html5lib/treebuilders/etree_lxml.py
+++ b/pip/_vendor/html5lib/treebuilders/etree_lxml.py
@@ -10,16 +10,17 @@ When any of these things occur, we emit a DataLossWarning
"""
from __future__ import absolute_import, division, unicode_literals
+# pylint:disable=protected-access
import warnings
import re
import sys
-from . import _base
+from . import base
from ..constants import DataLossWarning
from .. import constants
from . import etree as etree_builders
-from .. import ihatexml
+from .. import _ihatexml
import lxml.etree as etree
@@ -53,8 +54,7 @@ class Document(object):
def testSerializer(element):
rv = []
- finalText = None
- infosetFilter = ihatexml.InfosetFilter()
+ infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
def serializeElement(element, indent=0):
if not hasattr(element, "tag"):
@@ -79,7 +79,7 @@ def testSerializer(element):
next_element = next_element.getnext()
elif isinstance(element, str) or isinstance(element, bytes):
# Text in a fragment
- assert isinstance(element, str) or sys.version_info.major == 2
+ assert isinstance(element, str) or sys.version_info[0] == 2
rv.append("|%s\"%s\"" % (' ' * indent, element))
else:
# Fragment case
@@ -128,16 +128,12 @@ def testSerializer(element):
rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
serializeElement(element, 0)
- if finalText is not None:
- rv.append("|%s\"%s\"" % (' ' * 2, finalText))
-
return "\n".join(rv)
def tostring(element):
"""Serialize an element and its child nodes to a string"""
rv = []
- finalText = None
def serializeElement(element):
if not hasattr(element, "tag"):
@@ -173,13 +169,10 @@ def tostring(element):
serializeElement(element)
- if finalText is not None:
- rv.append("%s\"" % (' ' * 2, finalText))
-
return "".join(rv)
-class TreeBuilder(_base.TreeBuilder):
+class TreeBuilder(base.TreeBuilder):
documentClass = Document
doctypeClass = DocumentType
elementClass = None
@@ -189,13 +182,15 @@ class TreeBuilder(_base.TreeBuilder):
def __init__(self, namespaceHTMLElements, fullTree=False):
builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
- infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
+ infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
self.namespaceHTMLElements = namespaceHTMLElements
class Attributes(dict):
- def __init__(self, element, value={}):
+ def __init__(self, element, value=None):
+ if value is None:
+ value = {}
self._element = element
- dict.__init__(self, value)
+ dict.__init__(self, value) # pylint:disable=non-parent-init-called
for key, value in self.items():
if isinstance(key, tuple):
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
@@ -257,12 +252,12 @@ class TreeBuilder(_base.TreeBuilder):
data = property(_getData, _setData)
self.elementClass = Element
- self.commentClass = builder.Comment
+ self.commentClass = Comment
# self.fragmentClass = builder.DocumentFragment
- _base.TreeBuilder.__init__(self, namespaceHTMLElements)
+ base.TreeBuilder.__init__(self, namespaceHTMLElements)
def reset(self):
- _base.TreeBuilder.reset(self)
+ base.TreeBuilder.reset(self)
self.insertComment = self.insertCommentInitial
self.initial_comments = []
self.doctype = None
@@ -303,19 +298,21 @@ class TreeBuilder(_base.TreeBuilder):
self.doctype = doctype
def insertCommentInitial(self, data, parent=None):
+ assert parent is None or parent is self.document
+ assert self.document._elementTree is None
self.initial_comments.append(data)
def insertCommentMain(self, data, parent=None):
if (parent == self.document and
self.document._elementTree.getroot()[-1].tag == comment_type):
- warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
+ warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
super(TreeBuilder, self).insertComment(data, parent)
def insertRoot(self, token):
"""Create the document root"""
# Because of the way libxml2 works, it doesn't seem to be possible to
# alter information like the doctype after the tree has been parsed.
- # Therefore we need to use the built-in parser to create our iniial
+ # Therefore we need to use the built-in parser to create our initial
# tree, after which we can add elements like normal
docStr = ""
if self.doctype:
@@ -344,7 +341,8 @@ class TreeBuilder(_base.TreeBuilder):
# Append the initial comments:
for comment_token in self.initial_comments:
- root.addprevious(etree.Comment(comment_token["data"]))
+ comment = self.commentClass(comment_token["data"])
+ root.addprevious(comment._element)
# Create the root document and add the ElementTree to it
self.document = self.documentClass()
diff --git a/pip/_vendor/html5lib/treewalkers/__init__.py b/pip/_vendor/html5lib/treewalkers/__init__.py
index 20b91b114..9e19a5595 100644
--- a/pip/_vendor/html5lib/treewalkers/__init__.py
+++ b/pip/_vendor/html5lib/treewalkers/__init__.py
@@ -10,13 +10,10 @@ returning an iterator generating tokens.
from __future__ import absolute_import, division, unicode_literals
-__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree",
- "pulldom"]
-
-import sys
-
from .. import constants
-from ..utils import default_etree
+from .._utils import default_etree
+
+__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshi", "etree_lxml"]
treeWalkerCache = {}
@@ -24,34 +21,33 @@ treeWalkerCache = {}
def getTreeWalker(treeType, implementation=None, **kwargs):
"""Get a TreeWalker class for various types of tree with built-in support
- treeType - the name of the tree type required (case-insensitive). Supported
- values are:
+ Args:
+ treeType (str): the name of the tree type required (case-insensitive).
+ Supported values are:
- "dom" - The xml.dom.minidom DOM implementation
- "pulldom" - The xml.dom.pulldom event stream
- "etree" - A generic walker for tree implementations exposing an
- elementtree-like interface (known to work with
- ElementTree, cElementTree and lxml.etree).
- "lxml" - Optimized walker for lxml.etree
- "genshi" - a Genshi stream
+ - "dom": The xml.dom.minidom DOM implementation
+ - "etree": A generic walker for tree implementations exposing an
+ elementtree-like interface (known to work with
+ ElementTree, cElementTree and lxml.etree).
+ - "lxml": Optimized walker for lxml.etree
+ - "genshi": a Genshi stream
- implementation - (Currently applies to the "etree" tree type only). A module
- implementing the tree type e.g. xml.etree.ElementTree or
- cElementTree."""
+ Implementation: A module implementing the tree type e.g.
+ xml.etree.ElementTree or cElementTree (Currently applies to the
+ "etree" tree type only).
+ """
treeType = treeType.lower()
if treeType not in treeWalkerCache:
- if treeType in ("dom", "pulldom"):
- name = "%s.%s" % (__name__, treeType)
- __import__(name)
- mod = sys.modules[name]
- treeWalkerCache[treeType] = mod.TreeWalker
+ if treeType == "dom":
+ from . import dom
+ treeWalkerCache[treeType] = dom.TreeWalker
elif treeType == "genshi":
- from . import genshistream
- treeWalkerCache[treeType] = genshistream.TreeWalker
+ from . import genshi
+ treeWalkerCache[treeType] = genshi.TreeWalker
elif treeType == "lxml":
- from . import lxmletree
- treeWalkerCache[treeType] = lxmletree.TreeWalker
+ from . import etree_lxml
+ treeWalkerCache[treeType] = etree_lxml.TreeWalker
elif treeType == "etree":
from . import etree
if implementation is None:
diff --git a/pip/_vendor/html5lib/treewalkers/_base.py b/pip/_vendor/html5lib/treewalkers/base.py
index 42a59a4bf..36e1ba242 100644
--- a/pip/_vendor/html5lib/treewalkers/_base.py
+++ b/pip/_vendor/html5lib/treewalkers/base.py
@@ -1,11 +1,11 @@
from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type, string_types
+
+from xml.dom import Node
+from ..constants import namespaces, voidElements, spaceCharacters
__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN",
"TreeWalker", "NonRecursiveTreeWalker"]
-from xml.dom import Node
-
DOCUMENT = Node.DOCUMENT_NODE
DOCTYPE = Node.DOCUMENT_TYPE_NODE
TEXT = Node.TEXT_NODE
@@ -14,28 +14,9 @@ COMMENT = Node.COMMENT_NODE
ENTITY = Node.ENTITY_NODE
UNKNOWN = "<#UNKNOWN#>"
-from ..constants import voidElements, spaceCharacters
spaceCharacters = "".join(spaceCharacters)
-def to_text(s, blank_if_none=True):
- """Wrapper around six.text_type to convert None to empty string"""
- if s is None:
- if blank_if_none:
- return ""
- else:
- return None
- elif isinstance(s, text_type):
- return s
- else:
- return text_type(s)
-
-
-def is_text_or_none(string):
- """Wrapper around isinstance(string_types) or is None"""
- return string is None or isinstance(string, string_types)
-
-
class TreeWalker(object):
def __init__(self, tree):
self.tree = tree
@@ -47,47 +28,25 @@ class TreeWalker(object):
return {"type": "SerializeError", "data": msg}
def emptyTag(self, namespace, name, attrs, hasChildren=False):
- assert namespace is None or isinstance(namespace, string_types), type(namespace)
- assert isinstance(name, string_types), type(name)
- assert all((namespace is None or isinstance(namespace, string_types)) and
- isinstance(name, string_types) and
- isinstance(value, string_types)
- for (namespace, name), value in attrs.items())
-
- yield {"type": "EmptyTag", "name": to_text(name, False),
- "namespace": to_text(namespace),
+ yield {"type": "EmptyTag", "name": name,
+ "namespace": namespace,
"data": attrs}
if hasChildren:
yield self.error("Void element has children")
def startTag(self, namespace, name, attrs):
- assert namespace is None or isinstance(namespace, string_types), type(namespace)
- assert isinstance(name, string_types), type(name)
- assert all((namespace is None or isinstance(namespace, string_types)) and
- isinstance(name, string_types) and
- isinstance(value, string_types)
- for (namespace, name), value in attrs.items())
-
return {"type": "StartTag",
- "name": text_type(name),
- "namespace": to_text(namespace),
- "data": dict(((to_text(namespace, False), to_text(name)),
- to_text(value, False))
- for (namespace, name), value in attrs.items())}
+ "name": name,
+ "namespace": namespace,
+ "data": attrs}
def endTag(self, namespace, name):
- assert namespace is None or isinstance(namespace, string_types), type(namespace)
- assert isinstance(name, string_types), type(namespace)
-
return {"type": "EndTag",
- "name": to_text(name, False),
- "namespace": to_text(namespace),
- "data": {}}
+ "name": name,
+ "namespace": namespace}
def text(self, data):
- assert isinstance(data, string_types), type(data)
-
- data = to_text(data)
+ data = data
middle = data.lstrip(spaceCharacters)
left = data[:len(data) - len(middle)]
if left:
@@ -101,25 +60,16 @@ class TreeWalker(object):
yield {"type": "SpaceCharacters", "data": right}
def comment(self, data):
- assert isinstance(data, string_types), type(data)
-
- return {"type": "Comment", "data": text_type(data)}
-
- def doctype(self, name, publicId=None, systemId=None, correct=True):
- assert is_text_or_none(name), type(name)
- assert is_text_or_none(publicId), type(publicId)
- assert is_text_or_none(systemId), type(systemId)
+ return {"type": "Comment", "data": data}
+ def doctype(self, name, publicId=None, systemId=None):
return {"type": "Doctype",
- "name": to_text(name),
- "publicId": to_text(publicId),
- "systemId": to_text(systemId),
- "correct": to_text(correct)}
+ "name": name,
+ "publicId": publicId,
+ "systemId": systemId}
def entity(self, name):
- assert isinstance(name, string_types), type(name)
-
- return {"type": "Entity", "name": text_type(name)}
+ return {"type": "Entity", "name": name}
def unknown(self, nodeType):
return self.error("Unknown node type: " + nodeType)
@@ -154,7 +104,7 @@ class NonRecursiveTreeWalker(TreeWalker):
elif type == ELEMENT:
namespace, name, attributes, hasChildren = details
- if name in voidElements:
+ if (not namespace or namespace == namespaces["html"]) and name in voidElements:
for token in self.emptyTag(namespace, name, attributes,
hasChildren):
yield token
@@ -187,7 +137,7 @@ class NonRecursiveTreeWalker(TreeWalker):
type, details = details[0], details[1:]
if type == ELEMENT:
namespace, name, attributes, hasChildren = details
- if name not in voidElements:
+ if (namespace and namespace != namespaces["html"]) or name not in voidElements:
yield self.endTag(namespace, name)
if self.tree is currentNode:
currentNode = None
diff --git a/pip/_vendor/html5lib/treewalkers/dom.py b/pip/_vendor/html5lib/treewalkers/dom.py
index ac4dcf31b..b0c89b001 100644
--- a/pip/_vendor/html5lib/treewalkers/dom.py
+++ b/pip/_vendor/html5lib/treewalkers/dom.py
@@ -2,16 +2,16 @@ from __future__ import absolute_import, division, unicode_literals
from xml.dom import Node
-from . import _base
+from . import base
-class TreeWalker(_base.NonRecursiveTreeWalker):
+class TreeWalker(base.NonRecursiveTreeWalker):
def getNodeDetails(self, node):
if node.nodeType == Node.DOCUMENT_TYPE_NODE:
- return _base.DOCTYPE, node.name, node.publicId, node.systemId
+ return base.DOCTYPE, node.name, node.publicId, node.systemId
elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
- return _base.TEXT, node.nodeValue
+ return base.TEXT, node.nodeValue
elif node.nodeType == Node.ELEMENT_NODE:
attrs = {}
@@ -21,17 +21,17 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
attrs[(attr.namespaceURI, attr.localName)] = attr.value
else:
attrs[(None, attr.name)] = attr.value
- return (_base.ELEMENT, node.namespaceURI, node.nodeName,
+ return (base.ELEMENT, node.namespaceURI, node.nodeName,
attrs, node.hasChildNodes())
elif node.nodeType == Node.COMMENT_NODE:
- return _base.COMMENT, node.nodeValue
+ return base.COMMENT, node.nodeValue
elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
- return (_base.DOCUMENT,)
+ return (base.DOCUMENT,)
else:
- return _base.UNKNOWN, node.nodeType
+ return base.UNKNOWN, node.nodeType
def getFirstChild(self, node):
return node.firstChild
diff --git a/pip/_vendor/html5lib/treewalkers/etree.py b/pip/_vendor/html5lib/treewalkers/etree.py
index 716342551..d9ae3cccd 100644
--- a/pip/_vendor/html5lib/treewalkers/etree.py
+++ b/pip/_vendor/html5lib/treewalkers/etree.py
@@ -10,10 +10,10 @@ except ImportError:
import re
-from pip._vendor.six import text_type
+from pip._vendor. import string_types
-from . import _base
-from ..utils import moduleFactoryFactory
+from . import base
+from .._utils import moduleFactoryFactory
tag_regexp = re.compile("{([^}]*)}(.*)")
@@ -22,7 +22,7 @@ def getETreeBuilder(ElementTreeImplementation):
ElementTree = ElementTreeImplementation
ElementTreeCommentType = ElementTree.Comment("asd").tag
- class TreeWalker(_base.NonRecursiveTreeWalker):
+ class TreeWalker(base.NonRecursiveTreeWalker): # pylint:disable=unused-variable
"""Given the particular ElementTree representation, this implementation,
to avoid using recursion, returns "nodes" as tuples with the following
content:
@@ -38,9 +38,9 @@ def getETreeBuilder(ElementTreeImplementation):
"""
def getNodeDetails(self, node):
if isinstance(node, tuple): # It might be the root Element
- elt, key, parents, flag = node
+ elt, _, _, flag = node
if flag in ("text", "tail"):
- return _base.TEXT, getattr(elt, flag)
+ return base.TEXT, getattr(elt, flag)
else:
node = elt
@@ -48,14 +48,14 @@ def getETreeBuilder(ElementTreeImplementation):
node = node.getroot()
if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
- return (_base.DOCUMENT,)
+ return (base.DOCUMENT,)
elif node.tag == "<!DOCTYPE>":
- return (_base.DOCTYPE, node.text,
+ return (base.DOCTYPE, node.text,
node.get("publicId"), node.get("systemId"))
elif node.tag == ElementTreeCommentType:
- return _base.COMMENT, node.text
+ return base.COMMENT, node.text
else:
assert isinstance(node.tag, string_types), type(node.tag)
@@ -73,7 +73,7 @@ def getETreeBuilder(ElementTreeImplementation):
attrs[(match.group(1), match.group(2))] = value
else:
attrs[(None, name)] = value
- return (_base.ELEMENT, namespace, tag,
+ return (base.ELEMENT, namespace, tag,
attrs, len(node) or node.text)
def getFirstChild(self, node):
@@ -129,6 +129,7 @@ def getETreeBuilder(ElementTreeImplementation):
if not parents:
return parent
else:
+ assert list(parents[-1]).count(parent) == 1
return parent, list(parents[-1]).index(parent), parents, None
return locals()
diff --git a/pip/_vendor/html5lib/treewalkers/lxmletree.py b/pip/_vendor/html5lib/treewalkers/etree_lxml.py
index a4854869d..e81ddf33b 100644
--- a/pip/_vendor/html5lib/treewalkers/lxmletree.py
+++ b/pip/_vendor/html5lib/treewalkers/etree_lxml.py
@@ -4,9 +4,9 @@ from pip._vendor.six import text_type
from lxml import etree
from ..treebuilders.etree import tag_regexp
-from . import _base
+from . import base
-from .. import ihatexml
+from .. import _ihatexml
def ensure_str(s):
@@ -15,20 +15,27 @@ def ensure_str(s):
elif isinstance(s, text_type):
return s
else:
- return s.decode("utf-8", "strict")
+ return s.decode("ascii", "strict")
class Root(object):
def __init__(self, et):
self.elementtree = et
self.children = []
- if et.docinfo.internalDTD:
- self.children.append(Doctype(self,
- ensure_str(et.docinfo.root_name),
- ensure_str(et.docinfo.public_id),
- ensure_str(et.docinfo.system_url)))
- root = et.getroot()
- node = root
+
+ try:
+ if et.docinfo.internalDTD:
+ self.children.append(Doctype(self,
+ ensure_str(et.docinfo.root_name),
+ ensure_str(et.docinfo.public_id),
+ ensure_str(et.docinfo.system_url)))
+ except AttributeError:
+ pass
+
+ try:
+ node = et.getroot()
+ except AttributeError:
+ node = et
while node.getprevious() is not None:
node = node.getprevious()
@@ -115,35 +122,38 @@ class FragmentWrapper(object):
return len(self.obj)
-class TreeWalker(_base.NonRecursiveTreeWalker):
+class TreeWalker(base.NonRecursiveTreeWalker):
def __init__(self, tree):
- if hasattr(tree, "getroot"):
- tree = Root(tree)
- elif isinstance(tree, list):
+ # pylint:disable=redefined-variable-type
+ if isinstance(tree, list):
+ self.fragmentChildren = set(tree)
tree = FragmentRoot(tree)
- _base.NonRecursiveTreeWalker.__init__(self, tree)
- self.filter = ihatexml.InfosetFilter()
+ else:
+ self.fragmentChildren = set()
+ tree = Root(tree)
+ base.NonRecursiveTreeWalker.__init__(self, tree)
+ self.filter = _ihatexml.InfosetFilter()
def getNodeDetails(self, node):
if isinstance(node, tuple): # Text node
node, key = node
assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
- return _base.TEXT, ensure_str(getattr(node, key))
+ return base.TEXT, ensure_str(getattr(node, key))
elif isinstance(node, Root):
- return (_base.DOCUMENT,)
+ return (base.DOCUMENT,)
elif isinstance(node, Doctype):
- return _base.DOCTYPE, node.name, node.public_id, node.system_id
+ return base.DOCTYPE, node.name, node.public_id, node.system_id
elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"):
- return _base.TEXT, node.obj
+ return base.TEXT, ensure_str(node.obj)
elif node.tag == etree.Comment:
- return _base.COMMENT, ensure_str(node.text)
+ return base.COMMENT, ensure_str(node.text)
elif node.tag == etree.Entity:
- return _base.ENTITY, ensure_str(node.text)[1:-1] # strip &;
+ return base.ENTITY, ensure_str(node.text)[1:-1] # strip &;
else:
# This is assumed to be an ordinary element
@@ -162,7 +172,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
attrs[(match.group(1), match.group(2))] = value
else:
attrs[(None, name)] = value
- return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag),
+ return (base.ELEMENT, namespace, self.filter.fromXmlName(tag),
attrs, len(node) > 0 or node.text)
def getFirstChild(self, node):
@@ -197,5 +207,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
if key == "text":
return node
# else: fallback to "normal" processing
+ elif node in self.fragmentChildren:
+ return None
return node.getparent()
diff --git a/pip/_vendor/html5lib/treewalkers/genshistream.py b/pip/_vendor/html5lib/treewalkers/genshi.py
index f559c45d0..7483be27d 100644
--- a/pip/_vendor/html5lib/treewalkers/genshistream.py
+++ b/pip/_vendor/html5lib/treewalkers/genshi.py
@@ -4,12 +4,12 @@ from genshi.core import QName
from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
-from . import _base
+from . import base
from ..constants import voidElements, namespaces
-class TreeWalker(_base.TreeWalker):
+class TreeWalker(base.TreeWalker):
def __iter__(self):
# Buffer the events so we can pass in the following one
previous = None
@@ -25,7 +25,7 @@ class TreeWalker(_base.TreeWalker):
yield token
def tokens(self, event, next):
- kind, data, pos = event
+ kind, data, _ = event
if kind == START:
tag, attribs = data
name = tag.localname
@@ -39,8 +39,8 @@ class TreeWalker(_base.TreeWalker):
if namespace == namespaces["html"] and name in voidElements:
for token in self.emptyTag(namespace, name, converted_attribs,
- not next or next[0] != END
- or next[1] != tag):
+ not next or next[0] != END or
+ next[1] != tag):
yield token
else:
yield self.startTag(namespace, name, converted_attribs)
@@ -48,7 +48,7 @@ class TreeWalker(_base.TreeWalker):
elif kind == END:
name = data.localname
namespace = data.namespace
- if name not in voidElements:
+ if namespace != namespaces["html"] or name not in voidElements:
yield self.endTag(namespace, name)
elif kind == COMMENT:
diff --git a/pip/_vendor/html5lib/treewalkers/pulldom.py b/pip/_vendor/html5lib/treewalkers/pulldom.py
deleted file mode 100644
index 0b0f515fe..000000000
--- a/pip/_vendor/html5lib/treewalkers/pulldom.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \
- COMMENT, IGNORABLE_WHITESPACE, CHARACTERS
-
-from . import _base
-
-from ..constants import voidElements
-
-
-class TreeWalker(_base.TreeWalker):
- def __iter__(self):
- ignore_until = None
- previous = None
- for event in self.tree:
- if previous is not None and \
- (ignore_until is None or previous[1] is ignore_until):
- if previous[1] is ignore_until:
- ignore_until = None
- for token in self.tokens(previous, event):
- yield token
- if token["type"] == "EmptyTag":
- ignore_until = previous[1]
- previous = event
- if ignore_until is None or previous[1] is ignore_until:
- for token in self.tokens(previous, None):
- yield token
- elif ignore_until is not None:
- raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
-
- def tokens(self, event, next):
- type, node = event
- if type == START_ELEMENT:
- name = node.nodeName
- namespace = node.namespaceURI
- attrs = {}
- for attr in list(node.attributes.keys()):
- attr = node.getAttributeNode(attr)
- attrs[(attr.namespaceURI, attr.localName)] = attr.value
- if name in voidElements:
- for token in self.emptyTag(namespace,
- name,
- attrs,
- not next or next[1] is not node):
- yield token
- else:
- yield self.startTag(namespace, name, attrs)
-
- elif type == END_ELEMENT:
- name = node.nodeName
- namespace = node.namespaceURI
- if name not in voidElements:
- yield self.endTag(namespace, name)
-
- elif type == COMMENT:
- yield self.comment(node.nodeValue)
-
- elif type in (IGNORABLE_WHITESPACE, CHARACTERS):
- for token in self.text(node.nodeValue):
- yield token
-
- else:
- yield self.unknown(type)
diff --git a/pip/_vendor/ipaddress.py b/pip/_vendor/ipaddress.py
index 7657fc8fb..9cf71a744 100644
--- a/pip/_vendor/ipaddress.py
+++ b/pip/_vendor/ipaddress.py
@@ -14,7 +14,7 @@ from __future__ import unicode_literals
import itertools
import struct
-__version__ = '1.0.16'
+__version__ = '1.0.17'
# Compatibility functions
_compat_int_types = (int,)
@@ -759,12 +759,12 @@ class _BaseNetwork(_IPAddressBase):
broadcast = int(self.broadcast_address)
if n >= 0:
if network + n > broadcast:
- raise IndexError
+ raise IndexError('address out of range')
return self._address_class(network + n)
else:
n += 1
if broadcast + n < network:
- raise IndexError
+ raise IndexError('address out of range')
return self._address_class(broadcast + n)
def __lt__(self, other):
@@ -866,21 +866,21 @@ class _BaseNetwork(_IPAddressBase):
addr1 = ip_network('192.0.2.0/28')
addr2 = ip_network('192.0.2.1/32')
- addr1.address_exclude(addr2) =
+ list(addr1.address_exclude(addr2)) =
[IPv4Network('192.0.2.0/32'), IPv4Network('192.0.2.2/31'),
- IPv4Network('192.0.2.4/30'), IPv4Network('192.0.2.8/29')]
+ IPv4Network('192.0.2.4/30'), IPv4Network('192.0.2.8/29')]
or IPv6:
addr1 = ip_network('2001:db8::1/32')
addr2 = ip_network('2001:db8::1/128')
- addr1.address_exclude(addr2) =
+ list(addr1.address_exclude(addr2)) =
[ip_network('2001:db8::1/128'),
- ip_network('2001:db8::2/127'),
- ip_network('2001:db8::4/126'),
- ip_network('2001:db8::8/125'),
- ...
- ip_network('2001:db8:8000::/33')]
+ ip_network('2001:db8::2/127'),
+ ip_network('2001:db8::4/126'),
+ ip_network('2001:db8::8/125'),
+ ...
+ ip_network('2001:db8:8000::/33')]
Args:
other: An IPv4Network or IPv6Network object of the same type.
@@ -1039,7 +1039,7 @@ class _BaseNetwork(_IPAddressBase):
new_prefixlen, self))
start = int(self.network_address)
- end = int(self.broadcast_address)
+ end = int(self.broadcast_address) + 1
step = (int(self.hostmask) + 1) >> prefixlen_diff
for new_addr in _compat_range(start, end, step):
current = self.__class__((new_addr, new_prefixlen))
@@ -1436,6 +1436,12 @@ class IPv4Address(_BaseV4, _BaseAddress):
return any(self in net for net in self._constants._private_networks)
@property
+ def is_global(self):
+ return (
+ self not in self._constants._public_network and
+ not self.is_private)
+
+ @property
def is_multicast(self):
"""Test if the address is reserved for multicast use.
@@ -1682,6 +1688,8 @@ class _IPv4Constants(object):
_multicast_network = IPv4Network('224.0.0.0/4')
+ _public_network = IPv4Network('100.64.0.0/10')
+
_private_networks = [
IPv4Network('0.0.0.0/8'),
IPv4Network('10.0.0.0/8'),
diff --git a/pip/_vendor/ordereddict.py b/pip/_vendor/ordereddict.py
new file mode 100644
index 000000000..7242b5060
--- /dev/null
+++ b/pip/_vendor/ordereddict.py
@@ -0,0 +1,127 @@
+# Copyright (c) 2009 Raymond Hettinger
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation files
+# (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge,
+# publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+from UserDict import DictMixin
+
+class OrderedDict(dict, DictMixin):
+
+ def __init__(self, *args, **kwds):
+ if len(args) > 1:
+ raise TypeError('expected at most 1 arguments, got %d' % len(args))
+ try:
+ self.__end
+ except AttributeError:
+ self.clear()
+ self.update(*args, **kwds)
+
+ def clear(self):
+ self.__end = end = []
+ end += [None, end, end] # sentinel node for doubly linked list
+ self.__map = {} # key --> [key, prev, next]
+ dict.clear(self)
+
+ def __setitem__(self, key, value):
+ if key not in self:
+ end = self.__end
+ curr = end[1]
+ curr[2] = end[1] = self.__map[key] = [key, curr, end]
+ dict.__setitem__(self, key, value)
+
+ def __delitem__(self, key):
+ dict.__delitem__(self, key)
+ key, prev, next = self.__map.pop(key)
+ prev[2] = next
+ next[1] = prev
+
+ def __iter__(self):
+ end = self.__end
+ curr = end[2]
+ while curr is not end:
+ yield curr[0]
+ curr = curr[2]
+
+ def __reversed__(self):
+ end = self.__end
+ curr = end[1]
+ while curr is not end:
+ yield curr[0]
+ curr = curr[1]
+
+ def popitem(self, last=True):
+ if not self:
+ raise KeyError('dictionary is empty')
+ if last:
+ key = reversed(self).next()
+ else:
+ key = iter(self).next()
+ value = self.pop(key)
+ return key, value
+
+ def __reduce__(self):
+ items = [[k, self[k]] for k in self]
+ tmp = self.__map, self.__end
+ del self.__map, self.__end
+ inst_dict = vars(self).copy()
+ self.__map, self.__end = tmp
+ if inst_dict:
+ return (self.__class__, (items,), inst_dict)
+ return self.__class__, (items,)
+
+ def keys(self):
+ return list(self)
+
+ setdefault = DictMixin.setdefault
+ update = DictMixin.update
+ pop = DictMixin.pop
+ values = DictMixin.values
+ items = DictMixin.items
+ iterkeys = DictMixin.iterkeys
+ itervalues = DictMixin.itervalues
+ iteritems = DictMixin.iteritems
+
+ def __repr__(self):
+ if not self:
+ return '%s()' % (self.__class__.__name__,)
+ return '%s(%r)' % (self.__class__.__name__, self.items())
+
+ def copy(self):
+ return self.__class__(self)
+
+ @classmethod
+ def fromkeys(cls, iterable, value=None):
+ d = cls()
+ for key in iterable:
+ d[key] = value
+ return d
+
+ def __eq__(self, other):
+ if isinstance(other, OrderedDict):
+ if len(self) != len(other):
+ return False
+ for p, q in zip(self.items(), other.items()):
+ if p != q:
+ return False
+ return True
+ return dict.__eq__(self, other)
+
+ def __ne__(self, other):
+ return not self == other
diff --git a/pip/_vendor/packaging/__about__.py b/pip/_vendor/packaging/__about__.py
index c21a758b8..95d330ef8 100644
--- a/pip/_vendor/packaging/__about__.py
+++ b/pip/_vendor/packaging/__about__.py
@@ -12,7 +12,7 @@ __title__ = "packaging"
__summary__ = "Core utilities for Python packages"
__uri__ = "https://github.com/pypa/packaging"
-__version__ = "16.7"
+__version__ = "16.8"
__author__ = "Donald Stufft and individual contributors"
__email__ = "donald@stufft.io"
diff --git a/pip/_vendor/packaging/markers.py b/pip/_vendor/packaging/markers.py
index 919524353..f9ca1ffa3 100644
--- a/pip/_vendor/packaging/markers.py
+++ b/pip/_vendor/packaging/markers.py
@@ -54,13 +54,26 @@ class Node(object):
def __repr__(self):
return "<{0}({1!r})>".format(self.__class__.__name__, str(self))
+ def serialize(self):
+ raise NotImplementedError
+
class Variable(Node):
- pass
+
+ def serialize(self):
+ return str(self)
class Value(Node):
- pass
+
+ def serialize(self):
+ return '"{0}"'.format(self)
+
+
+class Op(Node):
+
+ def serialize(self):
+ return str(self)
VARIABLE = (
@@ -105,6 +118,7 @@ VERSION_CMP = (
)
MARKER_OP = VERSION_CMP | L("not in") | L("in")
+MARKER_OP.setParseAction(lambda s, l, t: Op(t[0]))
MARKER_VALUE = QuotedString("'") | QuotedString('"')
MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0]))
@@ -151,7 +165,7 @@ def _format_marker(marker, first=True):
else:
return "(" + " ".join(inner) + ")"
elif isinstance(marker, tuple):
- return '{0} {1} "{2}"'.format(*marker)
+ return " ".join([m.serialize() for m in marker])
else:
return marker
@@ -170,13 +184,13 @@ _operators = {
def _eval_op(lhs, op, rhs):
try:
- spec = Specifier("".join([op, rhs]))
+ spec = Specifier("".join([op.serialize(), rhs]))
except InvalidSpecifier:
pass
else:
return spec.contains(lhs)
- oper = _operators.get(op)
+ oper = _operators.get(op.serialize())
if oper is None:
raise UndefinedComparison(
"Undefined {0!r} on {1!r} and {2!r}.".format(op, lhs, rhs)
diff --git a/pip/_vendor/pyparsing.py b/pip/_vendor/pyparsing.py
index 56f196637..cb46d411a 100644
--- a/pip/_vendor/pyparsing.py
+++ b/pip/_vendor/pyparsing.py
@@ -1,6 +1,6 @@
# module pyparsing.py
#
-# Copyright (c) 2003-2015 Paul T. McGuire
+# Copyright (c) 2003-2016 Paul T. McGuire
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
@@ -31,15 +31,18 @@ vs. the traditional lex/yacc approach, or the use of regular expressions. With
don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
provides a library of classes that you use to construct the grammar directly in Python.
-Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
+Here is a program to parse "Hello, World!" (or any greeting of the form
+C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements
+(L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
+L{Literal} expressions)::
from pyparsing import Word, alphas
# define grammar of a greeting
- greet = Word( alphas ) + "," + Word( alphas ) + "!"
+ greet = Word(alphas) + "," + Word(alphas) + "!"
hello = "Hello, World!"
- print (hello, "->", greet.parseString( hello ))
+ print (hello, "->", greet.parseString(hello))
The program outputs the following::
@@ -48,7 +51,7 @@ The program outputs the following::
The Python representation of the grammar is quite readable, owing to the self-explanatory
class names, and the use of '+', '|' and '^' operators.
-The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an
+The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
object with named attributes.
The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
@@ -57,8 +60,8 @@ The pyparsing module handles some of the problems that are typically vexing when
- embedded comments
"""
-__version__ = "2.1.1"
-__versionTime__ = "21 Mar 2016 05:04 UTC"
+__version__ = "2.1.10"
+__versionTime__ = "07 Oct 2016 01:31 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -70,9 +73,22 @@ import re
import sre_constants
import collections
import pprint
-import functools
-import itertools
import traceback
+import types
+from datetime import datetime
+
+try:
+ from _thread import RLock
+except ImportError:
+ from threading import RLock
+
+try:
+ from collections import OrderedDict as _OrderedDict
+except ImportError:
+ try:
+ from ordereddict import OrderedDict as _OrderedDict
+ except ImportError:
+ _OrderedDict = None
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
@@ -94,9 +110,11 @@ __all__ = [
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
+'CloseMatch', 'tokenMap', 'pyparsing_common',
]
-PY_3 = sys.version.startswith('3')
+system_version = tuple(sys.version_info)[:3]
+PY_3 = system_version[0] == 3
if PY_3:
_MAX_INT = sys.maxsize
basestring = str
@@ -174,6 +192,15 @@ class ParseBaseException(Exception):
self.msg = msg
self.pstr = pstr
self.parserElement = elem
+ self.args = (pstr, loc, msg)
+
+ @classmethod
+ def _from_exception(cls, pe):
+ """
+ internal factory method to simplify creating one type of ParseException
+ from another - avoids having __init__ signature conflicts among subclasses
+ """
+ return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
def __getattr__( self, aname ):
"""supported attributes by name are:
@@ -209,11 +236,23 @@ class ParseBaseException(Exception):
return "lineno col line".split() + dir(type(self))
class ParseException(ParseBaseException):
- """exception thrown when parse expressions don't match class;
- supported attributes by name are:
- - lineno - returns the line number of the exception text
- - col - returns the column number of the exception text
- - line - returns the line containing the exception text
+ """
+ Exception thrown when parse expressions don't match class;
+ supported attributes by name are:
+ - lineno - returns the line number of the exception text
+ - col - returns the column number of the exception text
+ - line - returns the line containing the exception text
+
+ Example::
+ try:
+ Word(nums).setName("integer").parseString("ABC")
+ except ParseException as pe:
+ print(pe)
+ print("column: {}".format(pe.col))
+
+ prints::
+ Expected integer (at char 0), (line:1, col:1)
+ column: 1
"""
pass
@@ -223,12 +262,10 @@ class ParseFatalException(ParseBaseException):
pass
class ParseSyntaxException(ParseFatalException):
- """just like C{L{ParseFatalException}}, but thrown internally when an
- C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
- an unbacktrackable syntax error has been found"""
- def __init__(self, pe):
- super(ParseSyntaxException, self).__init__(
- pe.pstr, pe.loc, pe.msg, pe.parserElement)
+ """just like L{ParseFatalException}, but thrown internally when an
+ L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop
+ immediately because an unbacktrackable syntax error has been found"""
+ pass
#~ class ReparseException(ParseBaseException):
#~ """Experimental class - parse actions can raise this exception to cause
@@ -244,7 +281,7 @@ class ParseSyntaxException(ParseFatalException):
#~ self.reparseLoc = restartLoc
class RecursiveGrammarException(Exception):
- """exception thrown by C{validate()} if the grammar could be improperly recursive"""
+ """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
def __init__( self, parseElementList ):
self.parseElementTrace = parseElementList
@@ -257,16 +294,49 @@ class _ParseResultsWithOffset(object):
def __getitem__(self,i):
return self.tup[i]
def __repr__(self):
- return repr(self.tup)
+ return repr(self.tup[0])
def setOffset(self,i):
self.tup = (self.tup[0],i)
class ParseResults(object):
- """Structured parse results, to provide multiple means of access to the parsed data:
+ """
+ Structured parse results, to provide multiple means of access to the parsed data:
- as a list (C{len(results)})
- by list index (C{results[0], results[1]}, etc.)
- - by attribute (C{results.<resultsName>})
- """
+ - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
+
+ Example::
+ integer = Word(nums)
+ date_str = (integer.setResultsName("year") + '/'
+ + integer.setResultsName("month") + '/'
+ + integer.setResultsName("day"))
+ # equivalent form:
+ # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+ # parseString returns a ParseResults object
+ result = date_str.parseString("1999/12/31")
+
+ def test(s, fn=repr):
+ print("%s -> %s" % (s, fn(eval(s))))
+ test("list(result)")
+ test("result[0]")
+ test("result['month']")
+ test("result.day")
+ test("'month' in result")
+ test("'minutes' in result")
+ test("result.dump()", str)
+ prints::
+ list(result) -> ['1999', '/', '12', '/', '31']
+ result[0] -> '1999'
+ result['month'] -> '12'
+ result.day -> '31'
+ 'month' in result -> True
+ 'minutes' in result -> False
+ result.dump() -> ['1999', '/', '12', '/', '31']
+ - day: 31
+ - month: 12
+ - year: 1999
+ """
def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
if isinstance(toklist, cls):
return toklist
@@ -351,11 +421,6 @@ class ParseResults(object):
removed = list(range(*i.indices(mylen)))
removed.reverse()
# fixup indices in token dictionary
- #~ for name in self.__tokdict:
- #~ occurrences = self.__tokdict[name]
- #~ for j in removed:
- #~ for k, (value, position) in enumerate(occurrences):
- #~ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
for name,occurrences in self.__tokdict.items():
for j in removed:
for k, (value, position) in enumerate(occurrences):
@@ -371,35 +436,48 @@ class ParseResults(object):
__nonzero__ = __bool__
def __iter__( self ): return iter( self.__toklist )
def __reversed__( self ): return iter( self.__toklist[::-1] )
- def iterkeys( self ):
- """Returns all named result keys."""
+ def _iterkeys( self ):
if hasattr(self.__tokdict, "iterkeys"):
return self.__tokdict.iterkeys()
else:
return iter(self.__tokdict)
- def itervalues( self ):
- """Returns all named result values."""
- return (self[k] for k in self.iterkeys())
+ def _itervalues( self ):
+ return (self[k] for k in self._iterkeys())
- def iteritems( self ):
- return ((k, self[k]) for k in self.iterkeys())
+ def _iteritems( self ):
+ return ((k, self[k]) for k in self._iterkeys())
if PY_3:
- keys = iterkeys
- values = itervalues
- items = iteritems
+ keys = _iterkeys
+ """Returns an iterator of all named result keys (Python 3.x only)."""
+
+ values = _itervalues
+ """Returns an iterator of all named result values (Python 3.x only)."""
+
+ items = _iteritems
+ """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
+
else:
+ iterkeys = _iterkeys
+ """Returns an iterator of all named result keys (Python 2.x only)."""
+
+ itervalues = _itervalues
+ """Returns an iterator of all named result values (Python 2.x only)."""
+
+ iteritems = _iteritems
+ """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
+
def keys( self ):
- """Returns all named result keys."""
+ """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
return list(self.iterkeys())
def values( self ):
- """Returns all named result values."""
+ """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
return list(self.itervalues())
def items( self ):
- """Returns all named result keys and values as a list of tuples."""
+ """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
return list(self.iteritems())
def haskeys( self ):
@@ -408,14 +486,39 @@ class ParseResults(object):
return bool(self.__tokdict)
def pop( self, *args, **kwargs):
- """Removes and returns item at specified index (default=last).
- Supports both list and dict semantics for pop(). If passed no
- argument or an integer argument, it will use list semantics
- and pop tokens from the list of parsed tokens. If passed a
- non-integer argument (most likely a string), it will use dict
- semantics and pop the corresponding value from any defined
- results names. A second default return value argument is
- supported, just as in dict.pop()."""
+ """
+ Removes and returns item at specified index (default=C{last}).
+ Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
+ argument or an integer argument, it will use C{list} semantics
+ and pop tokens from the list of parsed tokens. If passed a
+ non-integer argument (most likely a string), it will use C{dict}
+ semantics and pop the corresponding value from any defined
+ results names. A second default return value argument is
+ supported, just as in C{dict.pop()}.
+
+ Example::
+ def remove_first(tokens):
+ tokens.pop(0)
+ print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+ print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
+
+ label = Word(alphas)
+ patt = label("LABEL") + OneOrMore(Word(nums))
+ print(patt.parseString("AAB 123 321").dump())
+
+ # Use pop() in a parse action to remove named result (note that corresponding value is not
+ # removed from list form of results)
+ def remove_LABEL(tokens):
+ tokens.pop("LABEL")
+ return tokens
+ patt.addParseAction(remove_LABEL)
+ print(patt.parseString("AAB 123 321").dump())
+ prints::
+ ['AAB', '123', '321']
+ - LABEL: AAB
+
+ ['AAB', '123', '321']
+ """
if not args:
args = [-1]
for k,v in kwargs.items():
@@ -435,39 +538,83 @@ class ParseResults(object):
return defaultvalue
def get(self, key, defaultValue=None):
- """Returns named result matching the given key, or if there is no
- such name, then returns the given C{defaultValue} or C{None} if no
- C{defaultValue} is specified."""
+ """
+ Returns named result matching the given key, or if there is no
+ such name, then returns the given C{defaultValue} or C{None} if no
+ C{defaultValue} is specified.
+
+ Similar to C{dict.get()}.
+
+ Example::
+ integer = Word(nums)
+ date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+ result = date_str.parseString("1999/12/31")
+ print(result.get("year")) # -> '1999'
+ print(result.get("hour", "not specified")) # -> 'not specified'
+ print(result.get("hour")) # -> None
+ """
if key in self:
return self[key]
else:
return defaultValue
def insert( self, index, insStr ):
- """Inserts new element at location index in the list of parsed tokens."""
+ """
+ Inserts new element at location index in the list of parsed tokens.
+
+ Similar to C{list.insert()}.
+
+ Example::
+ print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+
+ # use a parse action to insert the parse location in the front of the parsed results
+ def insert_locn(locn, tokens):
+ tokens.insert(0, locn)
+ print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
+ """
self.__toklist.insert(index, insStr)
# fixup indices in token dictionary
- #~ for name in self.__tokdict:
- #~ occurrences = self.__tokdict[name]
- #~ for k, (value, position) in enumerate(occurrences):
- #~ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
for name,occurrences in self.__tokdict.items():
for k, (value, position) in enumerate(occurrences):
occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
def append( self, item ):
- """Add single element to end of ParseResults list of elements."""
+ """
+ Add single element to end of ParseResults list of elements.
+
+ Example::
+ print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+
+ # use a parse action to compute the sum of the parsed integers, and add it to the end
+ def append_sum(tokens):
+ tokens.append(sum(map(int, tokens)))
+ print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
+ """
self.__toklist.append(item)
def extend( self, itemseq ):
- """Add sequence of elements to end of ParseResults list of elements."""
+ """
+ Add sequence of elements to end of ParseResults list of elements.
+
+ Example::
+ patt = OneOrMore(Word(alphas))
+
+ # use a parse action to append the reverse of the matched strings, to make a palindrome
+ def make_palindrome(tokens):
+ tokens.extend(reversed([t[::-1] for t in tokens]))
+ return ''.join(tokens)
+ print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
+ """
if isinstance(itemseq, ParseResults):
self += itemseq
else:
self.__toklist.extend(itemseq)
def clear( self ):
- """Clear all elements and results names."""
+ """
+ Clear all elements and results names.
+ """
del self.__toklist[:]
self.__tokdict.clear()
@@ -532,11 +679,40 @@ class ParseResults(object):
return out
def asList( self ):
- """Returns the parse results as a nested list of matching tokens, all converted to strings."""
+ """
+ Returns the parse results as a nested list of matching tokens, all converted to strings.
+
+ Example::
+ patt = OneOrMore(Word(alphas))
+ result = patt.parseString("sldkj lsdkj sldkj")
+ # even though the result prints in string-like form, it is actually a pyparsing ParseResults
+ print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
+
+ # Use asList() to create an actual list
+ result_list = result.asList()
+ print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
+ """
return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
def asDict( self ):
- """Returns the named parse results as a nested dictionary."""
+ """
+ Returns the named parse results as a nested dictionary.
+
+ Example::
+ integer = Word(nums)
+ date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+ result = date_str.parseString('12/31/1999')
+ print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
+
+ result_dict = result.asDict()
+ print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
+
+ # even though a ParseResults supports dict-like access, sometime you just need to have a dict
+ import json
+ print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
+ print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
+ """
if PY_3:
item_fn = self.items
else:
@@ -554,7 +730,9 @@ class ParseResults(object):
return dict((k,toItem(v)) for k,v in item_fn())
def copy( self ):
- """Returns a new copy of a C{ParseResults} object."""
+ """
+ Returns a new copy of a C{ParseResults} object.
+ """
ret = ParseResults( self.__toklist )
ret.__tokdict = self.__tokdict.copy()
ret.__parent = self.__parent
@@ -563,7 +741,9 @@ class ParseResults(object):
return ret
def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
- """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
+ """
+ (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
+ """
nl = "\n"
out = []
namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
@@ -629,7 +809,27 @@ class ParseResults(object):
return None
def getName(self):
- """Returns the results name for this token expression."""
+ """
+ Returns the results name for this token expression. Useful when several
+ different expressions might match at a particular location.
+
+ Example::
+ integer = Word(nums)
+ ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
+ house_number_expr = Suppress('#') + Word(nums, alphanums)
+ user_data = (Group(house_number_expr)("house_number")
+ | Group(ssn_expr)("ssn")
+ | Group(integer)("age"))
+ user_info = OneOrMore(user_data)
+
+ result = user_info.parseString("22 111-22-3333 #221B")
+ for item in result:
+ print(item.getName(), ':', item[0])
+ prints::
+ age : 22
+ ssn : 111-22-3333
+ house_number : 221B
+ """
if self.__name:
return self.__name
elif self.__parent:
@@ -640,45 +840,77 @@ class ParseResults(object):
return None
elif (len(self) == 1 and
len(self.__tokdict) == 1 and
- self.__tokdict.values()[0][0][1] in (0,-1)):
- return self.__tokdict.keys()[0]
+ next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
+ return next(iter(self.__tokdict.keys()))
else:
return None
- def dump(self,indent='',depth=0):
- """Diagnostic method for listing out the contents of a C{ParseResults}.
- Accepts an optional C{indent} argument so that this string can be embedded
- in a nested display of other data."""
+ def dump(self, indent='', depth=0, full=True):
+ """
+ Diagnostic method for listing out the contents of a C{ParseResults}.
+ Accepts an optional C{indent} argument so that this string can be embedded
+ in a nested display of other data.
+
+ Example::
+ integer = Word(nums)
+ date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+ result = date_str.parseString('12/31/1999')
+ print(result.dump())
+ prints::
+ ['12', '/', '31', '/', '1999']
+ - day: 1999
+ - month: 31
+ - year: 12
+ """
out = []
NL = '\n'
out.append( indent+_ustr(self.asList()) )
- if self.haskeys():
- items = sorted(self.items())
- for k,v in items:
- if out:
- out.append(NL)
- out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
- if isinstance(v,ParseResults):
- if v:
- out.append( v.dump(indent,depth+1) )
+ if full:
+ if self.haskeys():
+ items = sorted((str(k), v) for k,v in self.items())
+ for k,v in items:
+ if out:
+ out.append(NL)
+ out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
+ if isinstance(v,ParseResults):
+ if v:
+ out.append( v.dump(indent,depth+1) )
+ else:
+ out.append(_ustr(v))
else:
- out.append(_ustr(v))
- else:
- out.append(_ustr(v))
- elif any(isinstance(vv,ParseResults) for vv in self):
- v = self
- for i,vv in enumerate(v):
- if isinstance(vv,ParseResults):
- out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
- else:
- out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
+ out.append(repr(v))
+ elif any(isinstance(vv,ParseResults) for vv in self):
+ v = self
+ for i,vv in enumerate(v):
+ if isinstance(vv,ParseResults):
+ out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
+ else:
+ out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
return "".join(out)
def pprint(self, *args, **kwargs):
- """Pretty-printer for parsed results as a list, using the C{pprint} module.
- Accepts additional positional or keyword args as defined for the
- C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})"""
+ """
+ Pretty-printer for parsed results as a list, using the C{pprint} module.
+ Accepts additional positional or keyword args as defined for the
+ C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
+
+ Example::
+ ident = Word(alphas, alphanums)
+ num = Word(nums)
+ func = Forward()
+ term = ident | num | Group('(' + func + ')')
+ func <<= ident + Group(Optional(delimitedList(term)))
+ result = func.parseString("fna a,b,(fnb c,d,200),100")
+ result.pprint(width=40)
+ prints::
+ ['fna',
+ ['a',
+ 'b',
+ ['(', 'fnb', ['c', 'd', '200'], ')'],
+ '100']]
+ """
pprint.pprint(self.asList(), *args, **kwargs)
# add support for pickle protocol
@@ -721,7 +953,7 @@ def col (loc,strg):
positions within the parsed string.
"""
s = strg
- return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
+ return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
def lineno(loc,strg):
"""Returns current line number within a string, counting newlines as line separators.
@@ -786,10 +1018,35 @@ def _trim_arity(func, maxargs=2):
return lambda s,l,t: func(t)
limit = [0]
foundArity = [False]
+
+ # traceback return data structure changed in Py3.5 - normalize back to plain tuples
+ if system_version[:2] >= (3,5):
+ def extract_stack(limit=0):
+ # special handling for Python 3.5.0 - extra deep call stack by 1
+ offset = -3 if system_version == (3,5,0) else -2
+ frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
+ return [(frame_summary.filename, frame_summary.lineno)]
+ def extract_tb(tb, limit=0):
+ frames = traceback.extract_tb(tb, limit=limit)
+ frame_summary = frames[-1]
+ return [(frame_summary.filename, frame_summary.lineno)]
+ else:
+ extract_stack = traceback.extract_stack
+ extract_tb = traceback.extract_tb
+
+ # synthesize what would be returned by traceback.extract_stack at the call to
+ # user's parse action 'func', so that we don't incur call penalty at parse time
+
+ LINE_DIFF = 6
+ # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
+ # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
+ this_line = extract_stack(limit=2)[-1]
+ pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
+
def wrapper(*args):
while 1:
try:
- ret = func(*args[limit[0]:]) #~@$^*)+_(&%#!=-`~;:"[]{}
+ ret = func(*args[limit[0]:])
foundArity[0] = True
return ret
except TypeError:
@@ -799,8 +1056,7 @@ def _trim_arity(func, maxargs=2):
else:
try:
tb = sys.exc_info()[-1]
- exc_source_line = traceback.extract_tb(tb)[-1][-1]
- if not exc_source_line.endswith('#~@$^*)+_(&%#!=-`~;:"[]{}'):
+ if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
raise
finally:
del tb
@@ -809,6 +1065,16 @@ def _trim_arity(func, maxargs=2):
limit[0] += 1
continue
raise
+
+ # copy func name to wrapper for sensible debug output
+ func_name = "<parse action>"
+ try:
+ func_name = getattr(func, '__name__',
+ getattr(func, '__class__').__name__)
+ except Exception:
+ func_name = str(func)
+ wrapper.__name__ = func_name
+
return wrapper
class ParserElement(object):
@@ -818,7 +1084,16 @@ class ParserElement(object):
@staticmethod
def setDefaultWhitespaceChars( chars ):
- """Overrides the default whitespace chars
+ r"""
+ Overrides the default whitespace chars
+
+ Example::
+ # default whitespace chars are space, <TAB> and newline
+ OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
+
+ # change to just treat newline as significant
+ ParserElement.setDefaultWhitespaceChars(" \t")
+ OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
"""
ParserElement.DEFAULT_WHITE_CHARS = chars
@@ -826,8 +1101,22 @@ class ParserElement(object):
def inlineLiteralsUsing(cls):
"""
Set class to be used for inclusion of string literals into a parser.
+
+ Example::
+ # default literal class used is Literal
+ integer = Word(nums)
+ date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+ date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
+
+
+ # change to Suppress
+ ParserElement.inlineLiteralsUsing(Suppress)
+ date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+ date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
"""
- ParserElement.literalStringClass = cls
+ ParserElement._literalStringClass = cls
def __init__( self, savelist=False ):
self.parseAction = list()
@@ -853,8 +1142,21 @@ class ParserElement(object):
self.callDuringTry = False
def copy( self ):
- """Make a copy of this C{ParserElement}. Useful for defining different parse actions
- for the same parsing pattern, using copies of the original parse element."""
+ """
+ Make a copy of this C{ParserElement}. Useful for defining different parse actions
+ for the same parsing pattern, using copies of the original parse element.
+
+ Example::
+ integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+ integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
+ integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
+
+ print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
+ prints::
+ [5120, 100, 655360, 268435456]
+ Equivalent form of C{expr.copy()} is just C{expr()}::
+ integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
+ """
cpy = copy.copy( self )
cpy.parseAction = self.parseAction[:]
cpy.ignoreExprs = self.ignoreExprs[:]
@@ -863,7 +1165,13 @@ class ParserElement(object):
return cpy
def setName( self, name ):
- """Define name for this expression, for use in debugging."""
+ """
+ Define name for this expression, makes debugging and exception messages clearer.
+
+ Example::
+ Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
+ Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
+ """
self.name = name
self.errmsg = "Expected " + self.name
if hasattr(self,"exception"):
@@ -871,15 +1179,24 @@ class ParserElement(object):
return self
def setResultsName( self, name, listAllMatches=False ):
- """Define name for referencing matching tokens as a nested attribute
- of the returned parse results.
- NOTE: this returns a *copy* of the original C{ParserElement} object;
- this is so that the client can define a basic element, such as an
- integer, and reference it in multiple places with different names.
-
- You can also set results names using the abbreviated syntax,
- C{expr("name")} in place of C{expr.setResultsName("name")} -
- see L{I{__call__}<__call__>}.
+ """
+ Define name for referencing matching tokens as a nested attribute
+ of the returned parse results.
+ NOTE: this returns a *copy* of the original C{ParserElement} object;
+ this is so that the client can define a basic element, such as an
+ integer, and reference it in multiple places with different names.
+
+ You can also set results names using the abbreviated syntax,
+ C{expr("name")} in place of C{expr.setResultsName("name")} -
+ see L{I{__call__}<__call__>}.
+
+ Example::
+ date_str = (integer.setResultsName("year") + '/'
+ + integer.setResultsName("month") + '/'
+ + integer.setResultsName("day"))
+
+ # equivalent form:
+ date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
"""
newself = self.copy()
if name.endswith("*"):
@@ -908,42 +1225,76 @@ class ParserElement(object):
return self
def setParseAction( self, *fns, **kwargs ):
- """Define action to perform when successfully matching parse element definition.
- Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
- C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
- - s = the original string being parsed (see note below)
- - loc = the location of the matching substring
- - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
- If the functions in fns modify the tokens, they can return them as the return
- value from fn, and the modified list of tokens will replace the original.
- Otherwise, fn does not need to return any value.
-
- Note: the default parsing behavior is to expand tabs in the input string
- before starting the parsing process. See L{I{parseString}<parseString>} for more information
- on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
- consistent view of the parsed string, the parse location, and line and column
- positions within the parsed string.
- """
+ """
+ Define action to perform when successfully matching parse element definition.
+ Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
+ C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
+ - s = the original string being parsed (see note below)
+ - loc = the location of the matching substring
+ - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
+ If the functions in fns modify the tokens, they can return them as the return
+ value from fn, and the modified list of tokens will replace the original.
+ Otherwise, fn does not need to return any value.
+
+ Optional keyword arguments:
+ - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
+
+ Note: the default parsing behavior is to expand tabs in the input string
+ before starting the parsing process. See L{I{parseString}<parseString>} for more information
+ on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+ consistent view of the parsed string, the parse location, and line and column
+ positions within the parsed string.
+
+ Example::
+ integer = Word(nums)
+ date_str = integer + '/' + integer + '/' + integer
+
+ date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
+
+ # use parse action to convert to ints at parse time
+ integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+ date_str = integer + '/' + integer + '/' + integer
+
+ # note that integer fields are now ints, not strings
+ date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
+ """
self.parseAction = list(map(_trim_arity, list(fns)))
self.callDuringTry = kwargs.get("callDuringTry", False)
return self
def addParseAction( self, *fns, **kwargs ):
- """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
+ """
+ Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
+
+ See examples in L{I{copy}<copy>}.
+ """
self.parseAction += list(map(_trim_arity, list(fns)))
self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
return self
def addCondition(self, *fns, **kwargs):
"""Add a boolean predicate function to expression's list of parse actions. See
- L{I{setParseAction}<setParseAction>}. Optional keyword argument C{message} can
- be used to define a custom message to be used in the raised exception."""
- msg = kwargs.get("message") or "failed user-defined condition"
+ L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
+ functions passed to C{addCondition} need to return boolean success/fail of the condition.
+
+ Optional keyword arguments:
+ - message = define a custom message to be used in the raised exception
+ - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
+
+ Example::
+ integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+ year_int = integer.copy()
+ year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
+ date_str = year_int + '/' + integer + '/' + integer
+
+ result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
+ """
+ msg = kwargs.get("message", "failed user-defined condition")
+ exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
for fn in fns:
def pa(s,l,t):
if not bool(_trim_arity(fn)(s,l,t)):
- raise ParseException(s,l,msg)
- return t
+ raise exc_type(s,l,msg)
self.parseAction.append(pa)
self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
return self
@@ -1079,42 +1430,123 @@ class ParserElement(object):
else:
return True
+ class _UnboundedCache(object):
+ def __init__(self):
+ cache = {}
+ self.not_in_cache = not_in_cache = object()
+
+ def get(self, key):
+ return cache.get(key, not_in_cache)
+
+ def set(self, key, value):
+ cache[key] = value
+
+ def clear(self):
+ cache.clear()
+
+ self.get = types.MethodType(get, self)
+ self.set = types.MethodType(set, self)
+ self.clear = types.MethodType(clear, self)
+
+ if _OrderedDict is not None:
+ class _FifoCache(object):
+ def __init__(self, size):
+ self.not_in_cache = not_in_cache = object()
+
+ cache = _OrderedDict()
+
+ def get(self, key):
+ return cache.get(key, not_in_cache)
+
+ def set(self, key, value):
+ cache[key] = value
+ if len(cache) > size:
+ cache.popitem(False)
+
+ def clear(self):
+ cache.clear()
+
+ self.get = types.MethodType(get, self)
+ self.set = types.MethodType(set, self)
+ self.clear = types.MethodType(clear, self)
+
+ else:
+ class _FifoCache(object):
+ def __init__(self, size):
+ self.not_in_cache = not_in_cache = object()
+
+ cache = {}
+ key_fifo = collections.deque([], size)
+
+ def get(self, key):
+ return cache.get(key, not_in_cache)
+
+ def set(self, key, value):
+ cache[key] = value
+ if len(cache) > size:
+ cache.pop(key_fifo.popleft(), None)
+ key_fifo.append(key)
+
+ def clear(self):
+ cache.clear()
+ key_fifo.clear()
+
+ self.get = types.MethodType(get, self)
+ self.set = types.MethodType(set, self)
+ self.clear = types.MethodType(clear, self)
+
+ # argument cache for optimizing repeated calls when backtracking through recursive expressions
+ packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
+ packrat_cache_lock = RLock()
+ packrat_cache_stats = [0, 0]
+
# this method gets repeatedly called during backtracking with the same arguments -
# we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
- lookup = (self,instring,loc,callPreParse,doActions)
- if lookup in ParserElement._exprArgCache:
- value = ParserElement._exprArgCache[ lookup ]
- if isinstance(value, Exception):
- raise value
- return (value[0],value[1].copy())
- else:
- try:
- value = self._parseNoCache( instring, loc, doActions, callPreParse )
- ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
- return value
- except ParseBaseException as pe:
- pe.__traceback__ = None
- ParserElement._exprArgCache[ lookup ] = pe
- raise
+ HIT, MISS = 0, 1
+ lookup = (self, instring, loc, callPreParse, doActions)
+ with ParserElement.packrat_cache_lock:
+ cache = ParserElement.packrat_cache
+ value = cache.get(lookup)
+ if value is cache.not_in_cache:
+ ParserElement.packrat_cache_stats[MISS] += 1
+ try:
+ value = self._parseNoCache(instring, loc, doActions, callPreParse)
+ except ParseBaseException as pe:
+ # cache a copy of the exception, without the traceback
+ cache.set(lookup, pe.__class__(*pe.args))
+ raise
+ else:
+ cache.set(lookup, (value[0], value[1].copy()))
+ return value
+ else:
+ ParserElement.packrat_cache_stats[HIT] += 1
+ if isinstance(value, Exception):
+ raise value
+ return (value[0], value[1].copy())
_parse = _parseNoCache
- # argument cache for optimizing repeated calls when backtracking through recursive expressions
- _exprArgCache = {}
@staticmethod
def resetCache():
- ParserElement._exprArgCache.clear()
+ ParserElement.packrat_cache.clear()
+ ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
_packratEnabled = False
@staticmethod
- def enablePackrat():
+ def enablePackrat(cache_size_limit=128):
"""Enables "packrat" parsing, which adds memoizing to the parsing logic.
Repeated parse attempts at the same string location (which happens
often in many complex grammars) can immediately return a cached value,
instead of re-executing parsing/validating code. Memoizing is done of
both valid results and parsing exceptions.
-
+
+ Parameters:
+ - cache_size_limit - (default=C{128}) - if an integer value is provided
+ will limit the size of the packrat cache; if None is passed, then
+ the cache size will be unbounded; if 0 is passed, the cache will
+ be effectively disabled.
+
This speedup may break existing programs that use parse actions that
have side-effects. For this reason, packrat parsing is disabled when
you first import pyparsing. To activate the packrat feature, your
@@ -1123,32 +1555,45 @@ class ParserElement(object):
C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
Python will crash. For best results, call C{enablePackrat()} immediately
after importing pyparsing.
+
+ Example::
+ import pyparsing
+ pyparsing.ParserElement.enablePackrat()
"""
if not ParserElement._packratEnabled:
ParserElement._packratEnabled = True
+ if cache_size_limit is None:
+ ParserElement.packrat_cache = ParserElement._UnboundedCache()
+ else:
+ ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
ParserElement._parse = ParserElement._parseCache
def parseString( self, instring, parseAll=False ):
- """Execute the parse expression with the given string.
- This is the main interface to the client code, once the complete
- expression has been built.
-
- If you want the grammar to require that the entire input string be
- successfully parsed, then set C{parseAll} to True (equivalent to ending
- the grammar with C{L{StringEnd()}}).
-
- Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
- in order to report proper column numbers in parse actions.
- If the input string contains tabs and
- the grammar uses parse actions that use the C{loc} argument to index into the
- string being parsed, you can ensure you have a consistent view of the input
- string by:
- - calling C{parseWithTabs} on your grammar before calling C{parseString}
- (see L{I{parseWithTabs}<parseWithTabs>})
- - define your parse action using the full C{(s,loc,toks)} signature, and
- reference the input string using the parse action's C{s} argument
- - explictly expand the tabs in your input string before calling
- C{parseString}
+ """
+ Execute the parse expression with the given string.
+ This is the main interface to the client code, once the complete
+ expression has been built.
+
+ If you want the grammar to require that the entire input string be
+ successfully parsed, then set C{parseAll} to True (equivalent to ending
+ the grammar with C{L{StringEnd()}}).
+
+ Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
+ in order to report proper column numbers in parse actions.
+ If the input string contains tabs and
+ the grammar uses parse actions that use the C{loc} argument to index into the
+ string being parsed, you can ensure you have a consistent view of the input
+ string by:
+ - calling C{parseWithTabs} on your grammar before calling C{parseString}
+ (see L{I{parseWithTabs}<parseWithTabs>})
+ - define your parse action using the full C{(s,loc,toks)} signature, and
+ reference the input string using the parse action's C{s} argument
+ - explictly expand the tabs in your input string before calling
+ C{parseString}
+
+ Example::
+ Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
+ Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
"""
ParserElement.resetCache()
if not self.streamlined:
@@ -1174,14 +1619,35 @@ class ParserElement(object):
return tokens
def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
- """Scan the input string for expression matches. Each match will return the
- matching tokens, start location, and end location. May be called with optional
- C{maxMatches} argument, to clip scanning after 'n' matches are found. If
- C{overlap} is specified, then overlapping matches will be reported.
-
- Note that the start and end locations are reported relative to the string
- being parsed. See L{I{parseString}<parseString>} for more information on parsing
- strings with embedded tabs."""
+ """
+ Scan the input string for expression matches. Each match will return the
+ matching tokens, start location, and end location. May be called with optional
+ C{maxMatches} argument, to clip scanning after 'n' matches are found. If
+ C{overlap} is specified, then overlapping matches will be reported.
+
+ Note that the start and end locations are reported relative to the string
+ being parsed. See L{I{parseString}<parseString>} for more information on parsing
+ strings with embedded tabs.
+
+ Example::
+ source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
+ print(source)
+ for tokens,start,end in Word(alphas).scanString(source):
+ print(' '*start + '^'*(end-start))
+ print(' '*start + tokens[0])
+
+ prints::
+
+ sldjf123lsdjjkf345sldkjf879lkjsfd987
+ ^^^^^
+ sldjf
+ ^^^^^^^
+ lsdjjkf
+ ^^^^^^
+ sldkjf
+ ^^^^^^
+ lkjsfd
+ """
if not self.streamlined:
self.streamline()
for e in self.ignoreExprs:
@@ -1224,12 +1690,22 @@ class ParserElement(object):
raise exc
def transformString( self, instring ):
- """Extension to C{L{scanString}}, to modify matching text with modified tokens that may
- be returned from a parse action. To use C{transformString}, define a grammar and
- attach a parse action to it that modifies the returned token list.
- Invoking C{transformString()} on a target string will then scan for matches,
- and replace the matched text patterns according to the logic in the parse
- action. C{transformString()} returns the resulting transformed string."""
+ """
+ Extension to C{L{scanString}}, to modify matching text with modified tokens that may
+ be returned from a parse action. To use C{transformString}, define a grammar and
+ attach a parse action to it that modifies the returned token list.
+ Invoking C{transformString()} on a target string will then scan for matches,
+ and replace the matched text patterns according to the logic in the parse
+ action. C{transformString()} returns the resulting transformed string.
+
+ Example::
+ wd = Word(alphas)
+ wd.setParseAction(lambda toks: toks[0].title())
+
+ print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
+ Prints::
+ Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
+ """
out = []
lastE = 0
# force preservation of <TAB>s, to minimize unwanted transformation of string, and to
@@ -1257,9 +1733,18 @@ class ParserElement(object):
raise exc
def searchString( self, instring, maxMatches=_MAX_INT ):
- """Another extension to C{L{scanString}}, simplifying the access to the tokens found
- to match the given parse expression. May be called with optional
- C{maxMatches} argument, to clip searching after 'n' matches are found.
+ """
+ Another extension to C{L{scanString}}, simplifying the access to the tokens found
+ to match the given parse expression. May be called with optional
+ C{maxMatches} argument, to clip searching after 'n' matches are found.
+
+ Example::
+ # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
+ cap_word = Word(alphas.upper(), alphas.lower())
+
+ print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
+ prints::
+ ['More', 'Iron', 'Lead', 'Gold', 'I']
"""
try:
return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
@@ -1270,10 +1755,42 @@ class ParserElement(object):
# catch and re-raise exception from here, clears out pyparsing internal stack trace
raise exc
+ def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
+ """
+ Generator method to split a string using the given expression as a separator.
+ May be called with optional C{maxsplit} argument, to limit the number of splits;
+ and the optional C{includeSeparators} argument (default=C{False}), if the separating
+ matching text should be included in the split results.
+
+ Example::
+ punc = oneOf(list(".,;:/-!?"))
+ print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
+ prints::
+ ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
+ """
+ splits = 0
+ last = 0
+ for t,s,e in self.scanString(instring, maxMatches=maxsplit):
+ yield instring[last:s]
+ if includeSeparators:
+ yield t[0]
+ last = e
+ yield instring[last:]
+
def __add__(self, other ):
- """Implementation of + operator - returns C{L{And}}"""
+ """
+ Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
+ converts them to L{Literal}s by default.
+
+ Example::
+ greet = Word(alphas) + "," + Word(alphas) + "!"
+ hello = "Hello, World!"
+ print (hello, "->", greet.parseString(hello))
+ Prints::
+ Hello, World! -> ['Hello', ',', 'World', '!']
+ """
if isinstance( other, basestring ):
- other = ParserElement.literalStringClass( other )
+ other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@@ -1281,9 +1798,11 @@ class ParserElement(object):
return And( [ self, other ] )
def __radd__(self, other ):
- """Implementation of + operator when left operand is not a C{L{ParserElement}}"""
+ """
+ Implementation of + operator when left operand is not a C{L{ParserElement}}
+ """
if isinstance( other, basestring ):
- other = ParserElement.literalStringClass( other )
+ other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@@ -1291,9 +1810,11 @@ class ParserElement(object):
return other + self
def __sub__(self, other):
- """Implementation of - operator, returns C{L{And}} with error stop"""
+ """
+ Implementation of - operator, returns C{L{And}} with error stop
+ """
if isinstance( other, basestring ):
- other = ParserElement.literalStringClass( other )
+ other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@@ -1301,9 +1822,11 @@ class ParserElement(object):
return And( [ self, And._ErrorStop(), other ] )
def __rsub__(self, other ):
- """Implementation of - operator when left operand is not a C{L{ParserElement}}"""
+ """
+ Implementation of - operator when left operand is not a C{L{ParserElement}}
+ """
if isinstance( other, basestring ):
- other = ParserElement.literalStringClass( other )
+ other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@@ -1311,24 +1834,24 @@ class ParserElement(object):
return other - self
def __mul__(self,other):
- """Implementation of * operator, allows use of C{expr * 3} in place of
- C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
- tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
- may also include C{None} as in:
- - C{expr*(n,None)} or C{expr*(n,)} is equivalent
+ """
+ Implementation of * operator, allows use of C{expr * 3} in place of
+ C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
+ tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
+ may also include C{None} as in:
+ - C{expr*(n,None)} or C{expr*(n,)} is equivalent
to C{expr*n + L{ZeroOrMore}(expr)}
(read as "at least n instances of C{expr}")
- - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
+ - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
(read as "0 to n instances of C{expr}")
- - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
- - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
-
- Note that C{expr*(None,n)} does not raise an exception if
- more than n exprs exist in the input stream; that is,
- C{expr*(None,n)} does not enforce a maximum number of expr
- occurrences. If this behavior is desired, then write
- C{expr*(None,n) + ~expr}
-
+ - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
+ - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
+
+ Note that C{expr*(None,n)} does not raise an exception if
+ more than n exprs exist in the input stream; that is,
+ C{expr*(None,n)} does not enforce a maximum number of expr
+ occurrences. If this behavior is desired, then write
+ C{expr*(None,n) + ~expr}
"""
if isinstance(other,int):
minElements, optElements = other,0
@@ -1382,9 +1905,11 @@ class ParserElement(object):
return self.__mul__(other)
def __or__(self, other ):
- """Implementation of | operator - returns C{L{MatchFirst}}"""
+ """
+ Implementation of | operator - returns C{L{MatchFirst}}
+ """
if isinstance( other, basestring ):
- other = ParserElement.literalStringClass( other )
+ other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@@ -1392,9 +1917,11 @@ class ParserElement(object):
return MatchFirst( [ self, other ] )
def __ror__(self, other ):
- """Implementation of | operator when left operand is not a C{L{ParserElement}}"""
+ """
+ Implementation of | operator when left operand is not a C{L{ParserElement}}
+ """
if isinstance( other, basestring ):
- other = ParserElement.literalStringClass( other )
+ other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@@ -1402,9 +1929,11 @@ class ParserElement(object):
return other | self
def __xor__(self, other ):
- """Implementation of ^ operator - returns C{L{Or}}"""
+ """
+ Implementation of ^ operator - returns C{L{Or}}
+ """
if isinstance( other, basestring ):
- other = ParserElement.literalStringClass( other )
+ other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@@ -1412,9 +1941,11 @@ class ParserElement(object):
return Or( [ self, other ] )
def __rxor__(self, other ):
- """Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
+ """
+ Implementation of ^ operator when left operand is not a C{L{ParserElement}}
+ """
if isinstance( other, basestring ):
- other = ParserElement.literalStringClass( other )
+ other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@@ -1422,9 +1953,11 @@ class ParserElement(object):
return other ^ self
def __and__(self, other ):
- """Implementation of & operator - returns C{L{Each}}"""
+ """
+ Implementation of & operator - returns C{L{Each}}
+ """
if isinstance( other, basestring ):
- other = ParserElement.literalStringClass( other )
+ other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@@ -1432,9 +1965,11 @@ class ParserElement(object):
return Each( [ self, other ] )
def __rand__(self, other ):
- """Implementation of & operator when left operand is not a C{L{ParserElement}}"""
+ """
+ Implementation of & operator when left operand is not a C{L{ParserElement}}
+ """
if isinstance( other, basestring ):
- other = ParserElement.literalStringClass( other )
+ other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@@ -1442,41 +1977,49 @@ class ParserElement(object):
return other & self
def __invert__( self ):
- """Implementation of ~ operator - returns C{L{NotAny}}"""
+ """
+ Implementation of ~ operator - returns C{L{NotAny}}
+ """
return NotAny( self )
def __call__(self, name=None):
- """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
- userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
- could be written as::
- userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
-
- If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
- passed as C{True}.
+ """
+ Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
+
+ If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
+ passed as C{True}.
- If C{name} is omitted, same as calling C{L{copy}}.
- """
+ If C{name} is omitted, same as calling C{L{copy}}.
+
+ Example::
+ # these are equivalent
+ userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
+ userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
+ """
if name is not None:
return self.setResultsName(name)
else:
return self.copy()
def suppress( self ):
- """Suppresses the output of this C{ParserElement}; useful to keep punctuation from
- cluttering up returned output.
+ """
+ Suppresses the output of this C{ParserElement}; useful to keep punctuation from
+ cluttering up returned output.
"""
return Suppress( self )
def leaveWhitespace( self ):
- """Disables the skipping of whitespace before matching the characters in the
- C{ParserElement}'s defined pattern. This is normally only used internally by
- the pyparsing module, but may be needed in some whitespace-sensitive grammars.
+ """
+ Disables the skipping of whitespace before matching the characters in the
+ C{ParserElement}'s defined pattern. This is normally only used internally by
+ the pyparsing module, but may be needed in some whitespace-sensitive grammars.
"""
self.skipWhitespace = False
return self
def setWhitespaceChars( self, chars ):
- """Overrides the default whitespace chars
+ """
+ Overrides the default whitespace chars
"""
self.skipWhitespace = True
self.whiteChars = chars
@@ -1484,16 +2027,26 @@ class ParserElement(object):
return self
def parseWithTabs( self ):
- """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
- Must be called before C{parseString} when the input grammar contains elements that
- match C{<TAB>} characters."""
+ """
+ Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
+ Must be called before C{parseString} when the input grammar contains elements that
+ match C{<TAB>} characters.
+ """
self.keepTabs = True
return self
def ignore( self, other ):
- """Define expression to be ignored (e.g., comments) while doing pattern
- matching; may be called repeatedly, to define multiple comment or other
- ignorable patterns.
+ """
+ Define expression to be ignored (e.g., comments) while doing pattern
+ matching; may be called repeatedly, to define multiple comment or other
+ ignorable patterns.
+
+ Example::
+ patt = OneOrMore(Word(alphas))
+ patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
+
+ patt.ignore(cStyleComment)
+ patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
"""
if isinstance(other, basestring):
other = Suppress(other)
@@ -1506,7 +2059,9 @@ class ParserElement(object):
return self
def setDebugActions( self, startAction, successAction, exceptionAction ):
- """Enable display of debugging messages while doing pattern matching."""
+ """
+ Enable display of debugging messages while doing pattern matching.
+ """
self.debugActions = (startAction or _defaultStartDebugAction,
successAction or _defaultSuccessDebugAction,
exceptionAction or _defaultExceptionDebugAction)
@@ -1514,8 +2069,40 @@ class ParserElement(object):
return self
def setDebug( self, flag=True ):
- """Enable display of debugging messages while doing pattern matching.
- Set C{flag} to True to enable, False to disable."""
+ """
+ Enable display of debugging messages while doing pattern matching.
+ Set C{flag} to True to enable, False to disable.
+
+ Example::
+ wd = Word(alphas).setName("alphaword")
+ integer = Word(nums).setName("numword")
+ term = wd | integer
+
+ # turn on debugging for wd
+ wd.setDebug()
+
+ OneOrMore(term).parseString("abc 123 xyz 890")
+
+ prints::
+ Match alphaword at loc 0(1,1)
+ Matched alphaword -> ['abc']
+ Match alphaword at loc 3(1,4)
+ Exception raised:Expected alphaword (at char 4), (line:1, col:5)
+ Match alphaword at loc 7(1,8)
+ Matched alphaword -> ['xyz']
+ Match alphaword at loc 11(1,12)
+ Exception raised:Expected alphaword (at char 12), (line:1, col:13)
+ Match alphaword at loc 15(1,16)
+ Exception raised:Expected alphaword (at char 15), (line:1, col:16)
+
+ The output shown is that produced by the default debug actions - custom debug actions can be
+ specified using L{setDebugActions}. Prior to attempting
+ to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
+ is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
+ message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
+ which makes debugging and exception messages easier to understand - for instance, the default
+ name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
+ """
if flag:
self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
else:
@@ -1537,20 +2124,22 @@ class ParserElement(object):
pass
def validate( self, validateTrace=[] ):
- """Check defined expressions for valid structure, check for infinite recursive definitions."""
+ """
+ Check defined expressions for valid structure, check for infinite recursive definitions.
+ """
self.checkRecursion( [] )
def parseFile( self, file_or_filename, parseAll=False ):
- """Execute the parse expression on the given file or filename.
- If a filename is specified (instead of a file object),
- the entire file is opened, read, and closed before parsing.
+ """
+ Execute the parse expression on the given file or filename.
+ If a filename is specified (instead of a file object),
+ the entire file is opened, read, and closed before parsing.
"""
try:
file_contents = file_or_filename.read()
except AttributeError:
- f = open(file_or_filename, "r")
- file_contents = f.read()
- f.close()
+ with open(file_or_filename, "r") as f:
+ file_contents = f.read()
try:
return self.parseString(file_contents, parseAll)
except ParseBaseException as exc:
@@ -1564,11 +2153,7 @@ class ParserElement(object):
if isinstance(other, ParserElement):
return self is other or vars(self) == vars(other)
elif isinstance(other, basestring):
- try:
- self.parseString(_ustr(other), parseAll=True)
- return True
- except ParseBaseException:
- return False
+ return self.matches(other)
else:
return super(ParserElement,self)==other
@@ -1584,40 +2169,169 @@ class ParserElement(object):
def __rne__(self,other):
return not (self == other)
- def runTests(self, tests, parseAll=False):
- """Execute the parse expression on a series of test strings, showing each
- test, the parsed results or where the parse failed. Quick and easy way to
- run a parse expression against a list of sample strings.
+ def matches(self, testString, parseAll=True):
+ """
+ Method for quick testing of a parser against a test string. Good for simple
+ inline microtests of sub expressions while building up larger parser.
- Parameters:
- - tests - a list of separate test strings, or a multiline string of test strings
- - parseAll - (default=False) - flag to pass to C{L{parseString}} when running tests
+ Parameters:
+ - testString - to test against this expression for a match
+ - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
+
+ Example::
+ expr = Word(nums)
+ assert expr.matches("100")
+ """
+ try:
+ self.parseString(_ustr(testString), parseAll=parseAll)
+ return True
+ except ParseBaseException:
+ return False
+
+ def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
+ """
+ Execute the parse expression on a series of test strings, showing each
+ test, the parsed results or where the parse failed. Quick and easy way to
+ run a parse expression against a list of sample strings.
+
+ Parameters:
+ - tests - a list of separate test strings, or a multiline string of test strings
+ - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
+ - comment - (default=C{'#'}) - expression for indicating embedded comments in the test
+ string; pass None to disable comment filtering
+ - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
+ if False, only dump nested list
+ - printResults - (default=C{True}) prints test output to stdout
+ - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
+
+ Returns: a (success, results) tuple, where success indicates that all tests succeeded
+ (or failed if C{failureTests} is True), and the results contain a list of lines of each
+ test's output
+
+ Example::
+ number_expr = pyparsing_common.number.copy()
+
+ result = number_expr.runTests('''
+ # unsigned integer
+ 100
+ # negative integer
+ -100
+ # float with scientific notation
+ 6.02e23
+ # integer with scientific notation
+ 1e-12
+ ''')
+ print("Success" if result[0] else "Failed!")
+
+ result = number_expr.runTests('''
+ # stray character
+ 100Z
+ # missing leading digit before '.'
+ -.100
+ # too many '.'
+ 3.14.159
+ ''', failureTests=True)
+ print("Success" if result[0] else "Failed!")
+ prints::
+ # unsigned integer
+ 100
+ [100]
+
+ # negative integer
+ -100
+ [-100]
+
+ # float with scientific notation
+ 6.02e23
+ [6.02e+23]
+
+ # integer with scientific notation
+ 1e-12
+ [1e-12]
+
+ Success
+
+ # stray character
+ 100Z
+ ^
+ FAIL: Expected end of text (at char 3), (line:1, col:4)
+
+ # missing leading digit before '.'
+ -.100
+ ^
+ FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
+
+ # too many '.'
+ 3.14.159
+ ^
+ FAIL: Expected end of text (at char 4), (line:1, col:5)
+
+ Success
+
+ Each test string must be on a single line. If you want to test a string that spans multiple
+ lines, create a test like this::
+
+ expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
+
+ (Note that this is a raw string literal, you must include the leading 'r'.)
"""
if isinstance(tests, basestring):
- tests = map(str.strip, tests.splitlines())
+ tests = list(map(str.strip, tests.rstrip().splitlines()))
+ if isinstance(comment, basestring):
+ comment = Literal(comment)
+ allResults = []
+ comments = []
+ success = True
for t in tests:
- out = [t]
+ if comment is not None and comment.matches(t, False) or comments and not t:
+ comments.append(t)
+ continue
+ if not t:
+ continue
+ out = ['\n'.join(comments), t]
+ comments = []
try:
- out.append(self.parseString(t, parseAll=parseAll).dump())
- except ParseException as pe:
+ t = t.replace(r'\n','\n')
+ result = self.parseString(t, parseAll=parseAll)
+ out.append(result.dump(full=fullDump))
+ success = success and not failureTests
+ except ParseBaseException as pe:
+ fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
if '\n' in t:
out.append(line(pe.loc, t))
- out.append(' '*(col(pe.loc,t)-1) + '^')
+ out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
else:
- out.append(' '*pe.loc + '^')
- out.append(str(pe))
- out.append('')
- print('\n'.join(out))
+ out.append(' '*pe.loc + '^' + fatal)
+ out.append("FAIL: " + str(pe))
+ success = success and failureTests
+ result = pe
+ except Exception as exc:
+ out.append("FAIL-EXCEPTION: " + str(exc))
+ success = success and failureTests
+ result = exc
+
+ if printResults:
+ if fullDump:
+ out.append('')
+ print('\n'.join(out))
+
+ allResults.append((t, result))
+
+ return success, allResults
class Token(ParserElement):
- """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
+ """
+ Abstract C{ParserElement} subclass, for defining atomic matching patterns.
+ """
def __init__( self ):
super(Token,self).__init__( savelist=False )
class Empty(Token):
- """An empty token, will always match."""
+ """
+ An empty token, will always match.
+ """
def __init__( self ):
super(Empty,self).__init__()
self.name = "Empty"
@@ -1626,7 +2340,9 @@ class Empty(Token):
class NoMatch(Token):
- """A token that will never match."""
+ """
+ A token that will never match.
+ """
def __init__( self ):
super(NoMatch,self).__init__()
self.name = "NoMatch"
@@ -1639,7 +2355,19 @@ class NoMatch(Token):
class Literal(Token):
- """Token to exactly match a specified string."""
+ """
+ Token to exactly match a specified string.
+
+ Example::
+ Literal('blah').parseString('blah') # -> ['blah']
+ Literal('blah').parseString('blahfooblah') # -> ['blah']
+ Literal('blah').parseString('bla') # -> Exception: Expected "blah"
+
+ For case-insensitive matching, use L{CaselessLiteral}.
+
+ For keyword matching (force word break before and after the matched string),
+ use L{Keyword} or L{CaselessKeyword}.
+ """
def __init__( self, matchString ):
super(Literal,self).__init__()
self.match = matchString
@@ -1665,22 +2393,31 @@ class Literal(Token):
return loc+self.matchLen, self.match
raise ParseException(instring, loc, self.errmsg, self)
_L = Literal
-ParserElement.literalStringClass = Literal
+ParserElement._literalStringClass = Literal
class Keyword(Token):
- """Token to exactly match a specified string as a keyword, that is, it must be
- immediately followed by a non-keyword character. Compare with C{L{Literal}}::
- Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
- Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
- Accepts two optional constructor arguments in addition to the keyword string:
- C{identChars} is a string of characters that would be valid identifier characters,
- defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
- matching, default is C{False}.
+ """
+ Token to exactly match a specified string as a keyword, that is, it must be
+ immediately followed by a non-keyword character. Compare with C{L{Literal}}:
+ - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
+ - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
+ Accepts two optional constructor arguments in addition to the keyword string:
+ - C{identChars} is a string of characters that would be valid identifier characters,
+ defaulting to all alphanumerics + "_" and "$"
+ - C{caseless} allows case-insensitive matching, default is C{False}.
+
+ Example::
+ Keyword("start").parseString("start") # -> ['start']
+ Keyword("start").parseString("starting") # -> Exception
+
+ For case-insensitive matching, use L{CaselessKeyword}.
"""
DEFAULT_KEYWORD_CHARS = alphanums+"_$"
- def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
+ def __init__( self, matchString, identChars=None, caseless=False ):
super(Keyword,self).__init__()
+ if identChars is None:
+ identChars = Keyword.DEFAULT_KEYWORD_CHARS
self.match = matchString
self.matchLen = len(matchString)
try:
@@ -1724,9 +2461,15 @@ class Keyword(Token):
Keyword.DEFAULT_KEYWORD_CHARS = chars
class CaselessLiteral(Literal):
- """Token to match a specified string, ignoring case of letters.
- Note: the matched results will always be in the case of the given
- match string, NOT the case of the input text.
+ """
+ Token to match a specified string, ignoring case of letters.
+ Note: the matched results will always be in the case of the given
+ match string, NOT the case of the input text.
+
+ Example::
+ OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
+
+ (Contrast with example for L{CaselessKeyword}.)
"""
def __init__( self, matchString ):
super(CaselessLiteral,self).__init__( matchString.upper() )
@@ -1741,7 +2484,15 @@ class CaselessLiteral(Literal):
raise ParseException(instring, loc, self.errmsg, self)
class CaselessKeyword(Keyword):
- def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
+ """
+ Caseless version of L{Keyword}.
+
+ Example::
+ OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
+
+ (Contrast with example for L{CaselessLiteral}.)
+ """
+ def __init__( self, matchString, identChars=None ):
super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
def parseImpl( self, instring, loc, doActions=True ):
@@ -1750,17 +2501,113 @@ class CaselessKeyword(Keyword):
return loc+self.matchLen, self.match
raise ParseException(instring, loc, self.errmsg, self)
+class CloseMatch(Token):
+ """
+ A variation on L{Literal} which matches "close" matches, that is,
+ strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
+ - C{match_string} - string to be matched
+ - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
+
+ The results from a successful parse will contain the matched text from the input string and the following named results:
+ - C{mismatches} - a list of the positions within the match_string where mismatches were found
+ - C{original} - the original match_string used to compare against the input string
+
+ If C{mismatches} is an empty list, then the match was an exact match.
+
+ Example::
+ patt = CloseMatch("ATCATCGAATGGA")
+ patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
+ patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
+
+ # exact match
+ patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
+
+ # close match allowing up to 2 mismatches
+ patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
+ patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
+ """
+ def __init__(self, match_string, maxMismatches=1):
+ super(CloseMatch,self).__init__()
+ self.name = match_string
+ self.match_string = match_string
+ self.maxMismatches = maxMismatches
+ self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
+ self.mayIndexError = False
+ self.mayReturnEmpty = False
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ start = loc
+ instrlen = len(instring)
+ maxloc = start + len(self.match_string)
+
+ if maxloc <= instrlen:
+ match_string = self.match_string
+ match_stringloc = 0
+ mismatches = []
+ maxMismatches = self.maxMismatches
+
+ for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
+ src,mat = s_m
+ if src != mat:
+ mismatches.append(match_stringloc)
+ if len(mismatches) > maxMismatches:
+ break
+ else:
+ loc = match_stringloc + 1
+ results = ParseResults([instring[start:loc]])
+ results['original'] = self.match_string
+ results['mismatches'] = mismatches
+ return loc, results
+
+ raise ParseException(instring, loc, self.errmsg, self)
+
+
class Word(Token):
- """Token for matching words composed of allowed character sets.
- Defined with string containing all allowed initial characters,
- an optional string containing allowed body characters (if omitted,
- defaults to the initial character set), and an optional minimum,
- maximum, and/or exact length. The default value for C{min} is 1 (a
- minimum value < 1 is not valid); the default values for C{max} and C{exact}
- are 0, meaning no maximum or exact length restriction. An optional
- C{excludeChars} parameter can list characters that might be found in
- the input C{bodyChars} string; useful to define a word of all printables
- except for one or two characters, for instance.
+ """
+ Token for matching words composed of allowed character sets.
+ Defined with string containing all allowed initial characters,
+ an optional string containing allowed body characters (if omitted,
+ defaults to the initial character set), and an optional minimum,
+ maximum, and/or exact length. The default value for C{min} is 1 (a
+ minimum value < 1 is not valid); the default values for C{max} and C{exact}
+ are 0, meaning no maximum or exact length restriction. An optional
+ C{excludeChars} parameter can list characters that might be found in
+ the input C{bodyChars} string; useful to define a word of all printables
+ except for one or two characters, for instance.
+
+ L{srange} is useful for defining custom character set strings for defining
+ C{Word} expressions, using range notation from regular expression character sets.
+
+ A common mistake is to use C{Word} to match a specific literal string, as in
+ C{Word("Address")}. Remember that C{Word} uses the string argument to define
+ I{sets} of matchable characters. This expression would match "Add", "AAA",
+ "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
+ To match an exact literal string, use L{Literal} or L{Keyword}.
+
+ pyparsing includes helper strings for building Words:
+ - L{alphas}
+ - L{nums}
+ - L{alphanums}
+ - L{hexnums}
+ - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
+ - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
+ - L{printables} (any non-whitespace character)
+
+ Example::
+ # a word composed of digits
+ integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
+
+ # a word with a leading capital, and zero or more lowercase
+ capital_word = Word(alphas.upper(), alphas.lower())
+
+ # hostnames are alphanumeric, with leading alpha, and '-'
+ hostname = Word(alphas, alphanums+'-')
+
+ # roman numeral (not a strict parser, accepts invalid mix of characters)
+ roman = Word("IVXLCDM")
+
+ # any string of non-whitespace characters, except for ','
+ csv_value = Word(printables, excludeChars=",")
"""
def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
super(Word,self).__init__()
@@ -1813,7 +2660,7 @@ class Word(Token):
self.reString = r"\b"+self.reString+r"\b"
try:
self.re = re.compile( self.reString )
- except:
+ except Exception:
self.re = None
def parseImpl( self, instring, loc, doActions=True ):
@@ -1854,7 +2701,7 @@ class Word(Token):
def __str__( self ):
try:
return super(Word,self).__str__()
- except:
+ except Exception:
pass
@@ -1875,8 +2722,17 @@ class Word(Token):
class Regex(Token):
- """Token for matching strings that match a given regular expression.
- Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
+ """
+ Token for matching strings that match a given regular expression.
+ Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
+ If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as
+ named parse results.
+
+ Example::
+ realnum = Regex(r"[+-]?\d+\.\d*")
+ date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
+ # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
+ roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
"""
compiledREtype = type(re.compile("[A-Z]"))
def __init__( self, pattern, flags=0):
@@ -1929,7 +2785,7 @@ class Regex(Token):
def __str__( self ):
try:
return super(Regex,self).__str__()
- except:
+ except Exception:
pass
if self.strRepr is None:
@@ -1939,18 +2795,31 @@ class Regex(Token):
class QuotedString(Token):
- """Token for matching strings that are delimited by quoting characters.
+ r"""
+ Token for matching strings that are delimited by quoting characters.
+
+ Defined with the following parameters:
+ - quoteChar - string of one or more characters defining the quote delimiting string
+ - escChar - character to escape quotes, typically backslash (default=C{None})
+ - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
+ - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
+ - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
+ - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
+ - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
+
+ Example::
+ qs = QuotedString('"')
+ print(qs.searchString('lsjdf "This is the quote" sldjf'))
+ complex_qs = QuotedString('{{', endQuoteChar='}}')
+ print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
+ sql_qs = QuotedString('"', escQuote='""')
+ print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
+ prints::
+ [['This is the quote']]
+ [['This is the "quote"']]
+ [['This is the quote with "embedded" quotes']]
"""
def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
- r"""Defined with the following parameters:
- - quoteChar - string of one or more characters defining the quote delimiting string
- - escChar - character to escape quotes, typically backslash (default=None)
- - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
- - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
- - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
- - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
- - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
- """
super(QuotedString,self).__init__()
# remove white space from quote chars - wont work anyway
@@ -2053,7 +2922,7 @@ class QuotedString(Token):
def __str__( self ):
try:
return super(QuotedString,self).__str__()
- except:
+ except Exception:
pass
if self.strRepr is None:
@@ -2063,11 +2932,20 @@ class QuotedString(Token):
class CharsNotIn(Token):
- """Token for matching words composed of characters *not* in a given set.
- Defined with string containing all disallowed characters, and an optional
- minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
- minimum value < 1 is not valid); the default values for C{max} and C{exact}
- are 0, meaning no maximum or exact length restriction.
+ """
+ Token for matching words composed of characters I{not} in a given set (will
+ include whitespace in matched characters if not listed in the provided exclusion set - see example).
+ Defined with string containing all disallowed characters, and an optional
+ minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
+ minimum value < 1 is not valid); the default values for C{max} and C{exact}
+ are 0, meaning no maximum or exact length restriction.
+
+ Example::
+ # define a comma-separated-value as anything that is not a ','
+ csv_value = CharsNotIn(',')
+ print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
+ prints::
+ ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
"""
def __init__( self, notChars, min=1, max=0, exact=0 ):
super(CharsNotIn,self).__init__()
@@ -2113,7 +2991,7 @@ class CharsNotIn(Token):
def __str__( self ):
try:
return super(CharsNotIn, self).__str__()
- except:
+ except Exception:
pass
if self.strRepr is None:
@@ -2125,11 +3003,13 @@ class CharsNotIn(Token):
return self.strRepr
class White(Token):
- """Special matching class for matching whitespace. Normally, whitespace is ignored
- by pyparsing grammars. This class is included when some whitespace structures
- are significant. Define with a string containing the whitespace characters to be
- matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
- as defined for the C{L{Word}} class."""
+ """
+ Special matching class for matching whitespace. Normally, whitespace is ignored
+ by pyparsing grammars. This class is included when some whitespace structures
+ are significant. Define with a string containing the whitespace characters to be
+ matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
+ as defined for the C{L{Word}} class.
+ """
whiteStrs = {
" " : "<SPC>",
"\t": "<TAB>",
@@ -2181,7 +3061,9 @@ class _PositionToken(Token):
self.mayIndexError = False
class GoToColumn(_PositionToken):
- """Token to advance to a specific column of input text; useful for tabular report scraping."""
+ """
+ Token to advance to a specific column of input text; useful for tabular report scraping.
+ """
def __init__( self, colno ):
super(GoToColumn,self).__init__()
self.col = colno
@@ -2203,28 +3085,41 @@ class GoToColumn(_PositionToken):
ret = instring[ loc: newloc ]
return newloc, ret
+
class LineStart(_PositionToken):
- """Matches if current position is at the beginning of a line within the parse string"""
+ """
+ Matches if current position is at the beginning of a line within the parse string
+
+ Example::
+
+ test = '''\
+ AAA this line
+ AAA and this line
+ AAA but not this one
+ B AAA and definitely not this one
+ '''
+
+ for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
+ print(t)
+
+ Prints::
+ ['AAA', ' this line']
+ ['AAA', ' and this line']
+
+ """
def __init__( self ):
super(LineStart,self).__init__()
- self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
self.errmsg = "Expected start of line"
- def preParse( self, instring, loc ):
- preloc = super(LineStart,self).preParse(instring,loc)
- if instring[preloc] == "\n":
- loc += 1
- return loc
-
def parseImpl( self, instring, loc, doActions=True ):
- if not( loc==0 or
- (loc == self.preParse( instring, 0 )) or
- (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
- raise ParseException(instring, loc, self.errmsg, self)
- return loc, []
+ if col(loc, instring) == 1:
+ return loc, []
+ raise ParseException(instring, loc, self.errmsg, self)
class LineEnd(_PositionToken):
- """Matches if current position is at the end of a line within the parse string"""
+ """
+ Matches if current position is at the end of a line within the parse string
+ """
def __init__( self ):
super(LineEnd,self).__init__()
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
@@ -2242,7 +3137,9 @@ class LineEnd(_PositionToken):
raise ParseException(instring, loc, self.errmsg, self)
class StringStart(_PositionToken):
- """Matches if current position is at the beginning of the parse string"""
+ """
+ Matches if current position is at the beginning of the parse string
+ """
def __init__( self ):
super(StringStart,self).__init__()
self.errmsg = "Expected start of text"
@@ -2255,7 +3152,9 @@ class StringStart(_PositionToken):
return loc, []
class StringEnd(_PositionToken):
- """Matches if current position is at the end of the parse string"""
+ """
+ Matches if current position is at the end of the parse string
+ """
def __init__( self ):
super(StringEnd,self).__init__()
self.errmsg = "Expected end of text"
@@ -2271,11 +3170,12 @@ class StringEnd(_PositionToken):
raise ParseException(instring, loc, self.errmsg, self)
class WordStart(_PositionToken):
- """Matches if the current position is at the beginning of a Word, and
- is not preceded by any character in a given set of C{wordChars}
- (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
- use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
- the string being parsed, or at the beginning of a line.
+ """
+ Matches if the current position is at the beginning of a Word, and
+ is not preceded by any character in a given set of C{wordChars}
+ (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
+ use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
+ the string being parsed, or at the beginning of a line.
"""
def __init__(self, wordChars = printables):
super(WordStart,self).__init__()
@@ -2290,11 +3190,12 @@ class WordStart(_PositionToken):
return loc, []
class WordEnd(_PositionToken):
- """Matches if the current position is at the end of a Word, and
- is not followed by any character in a given set of C{wordChars}
- (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
- use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
- the string being parsed, or at the end of a line.
+ """
+ Matches if the current position is at the end of a Word, and
+ is not followed by any character in a given set of C{wordChars}
+ (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
+ use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
+ the string being parsed, or at the end of a line.
"""
def __init__(self, wordChars = printables):
super(WordEnd,self).__init__()
@@ -2312,18 +3213,21 @@ class WordEnd(_PositionToken):
class ParseExpression(ParserElement):
- """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
+ """
+ Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
+ """
def __init__( self, exprs, savelist = False ):
super(ParseExpression,self).__init__(savelist)
if isinstance( exprs, _generatorType ):
exprs = list(exprs)
if isinstance( exprs, basestring ):
- self.exprs = [ Literal( exprs ) ]
- elif isinstance( exprs, collections.Sequence ):
+ self.exprs = [ ParserElement._literalStringClass( exprs ) ]
+ elif isinstance( exprs, collections.Iterable ):
+ exprs = list(exprs)
# if sequence of strings provided, wrap with Literal
if all(isinstance(expr, basestring) for expr in exprs):
- exprs = map(Literal, exprs)
+ exprs = map(ParserElement._literalStringClass, exprs)
self.exprs = list(exprs)
else:
try:
@@ -2364,7 +3268,7 @@ class ParseExpression(ParserElement):
def __str__( self ):
try:
return super(ParseExpression,self).__str__()
- except:
+ except Exception:
pass
if self.strRepr is None:
@@ -2421,9 +3325,19 @@ class ParseExpression(ParserElement):
return ret
class And(ParseExpression):
- """Requires all given C{ParseExpression}s to be found in the given order.
- Expressions may be separated by whitespace.
- May be constructed using the C{'+'} operator.
+ """
+ Requires all given C{ParseExpression}s to be found in the given order.
+ Expressions may be separated by whitespace.
+ May be constructed using the C{'+'} operator.
+ May also be constructed using the C{'-'} operator, which will suppress backtracking.
+
+ Example::
+ integer = Word(nums)
+ name_expr = OneOrMore(Word(alphas))
+
+ expr = And([integer("id"),name_expr("name"),integer("age")])
+ # more easily written as:
+ expr = integer("id") + name_expr("name") + integer("age")
"""
class _ErrorStop(Empty):
@@ -2455,9 +3369,9 @@ class And(ParseExpression):
raise
except ParseBaseException as pe:
pe.__traceback__ = None
- raise ParseSyntaxException(pe)
+ raise ParseSyntaxException._from_exception(pe)
except IndexError:
- raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
+ raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
else:
loc, exprtokens = e._parse( instring, loc, doActions )
if exprtokens or exprtokens.haskeys():
@@ -2466,7 +3380,7 @@ class And(ParseExpression):
def __iadd__(self, other ):
if isinstance( other, basestring ):
- other = Literal( other )
+ other = ParserElement._literalStringClass( other )
return self.append( other ) #And( [ self, other ] )
def checkRecursion( self, parseElementList ):
@@ -2487,9 +3401,18 @@ class And(ParseExpression):
class Or(ParseExpression):
- """Requires that at least one C{ParseExpression} is found.
- If two expressions match, the expression that matches the longest string will be used.
- May be constructed using the C{'^'} operator.
+ """
+ Requires that at least one C{ParseExpression} is found.
+ If two expressions match, the expression that matches the longest string will be used.
+ May be constructed using the C{'^'} operator.
+
+ Example::
+ # construct Or using '^' operator
+
+ number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
+ print(number.searchString("123 3.1416 789"))
+ prints::
+ [['123'], ['3.1416'], ['789']]
"""
def __init__( self, exprs, savelist = False ):
super(Or,self).__init__(exprs, savelist)
@@ -2538,7 +3461,7 @@ class Or(ParseExpression):
def __ixor__(self, other ):
if isinstance( other, basestring ):
- other = ParserElement.literalStringClass( other )
+ other = ParserElement._literalStringClass( other )
return self.append( other ) #Or( [ self, other ] )
def __str__( self ):
@@ -2557,9 +3480,21 @@ class Or(ParseExpression):
class MatchFirst(ParseExpression):
- """Requires that at least one C{ParseExpression} is found.
- If two expressions match, the first one listed is the one that will match.
- May be constructed using the C{'|'} operator.
+ """
+ Requires that at least one C{ParseExpression} is found.
+ If two expressions match, the first one listed is the one that will match.
+ May be constructed using the C{'|'} operator.
+
+ Example::
+ # construct MatchFirst using '|' operator
+
+ # watch the order of expressions to match
+ number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
+ print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
+
+ # put more selective expression first
+ number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
+ print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
"""
def __init__( self, exprs, savelist = False ):
super(MatchFirst,self).__init__(exprs, savelist)
@@ -2594,7 +3529,7 @@ class MatchFirst(ParseExpression):
def __ior__(self, other ):
if isinstance( other, basestring ):
- other = ParserElement.literalStringClass( other )
+ other = ParserElement._literalStringClass( other )
return self.append( other ) #MatchFirst( [ self, other ] )
def __str__( self ):
@@ -2613,9 +3548,58 @@ class MatchFirst(ParseExpression):
class Each(ParseExpression):
- """Requires all given C{ParseExpression}s to be found, but in any order.
- Expressions may be separated by whitespace.
- May be constructed using the C{'&'} operator.
+ """
+ Requires all given C{ParseExpression}s to be found, but in any order.
+ Expressions may be separated by whitespace.
+ May be constructed using the C{'&'} operator.
+
+ Example::
+ color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
+ shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
+ integer = Word(nums)
+ shape_attr = "shape:" + shape_type("shape")
+ posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
+ color_attr = "color:" + color("color")
+ size_attr = "size:" + integer("size")
+
+ # use Each (using operator '&') to accept attributes in any order
+ # (shape and posn are required, color and size are optional)
+ shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
+
+ shape_spec.runTests('''
+ shape: SQUARE color: BLACK posn: 100, 120
+ shape: CIRCLE size: 50 color: BLUE posn: 50,80
+ color:GREEN size:20 shape:TRIANGLE posn:20,40
+ '''
+ )
+ prints::
+ shape: SQUARE color: BLACK posn: 100, 120
+ ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
+ - color: BLACK
+ - posn: ['100', ',', '120']
+ - x: 100
+ - y: 120
+ - shape: SQUARE
+
+
+ shape: CIRCLE size: 50 color: BLUE posn: 50,80
+ ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
+ - color: BLUE
+ - posn: ['50', ',', '80']
+ - x: 50
+ - y: 80
+ - shape: CIRCLE
+ - size: 50
+
+
+ color: GREEN size: 20 shape: TRIANGLE posn: 20,40
+ ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
+ - color: GREEN
+ - posn: ['20', ',', '40']
+ - x: 20
+ - y: 40
+ - shape: TRIANGLE
+ - size: 20
"""
def __init__( self, exprs, savelist = True ):
super(Each,self).__init__(exprs, savelist)
@@ -2669,17 +3653,7 @@ class Each(ParseExpression):
loc,results = e._parse(instring,loc,doActions)
resultlist.append(results)
- finalResults = ParseResults()
- for r in resultlist:
- dups = {}
- for k in r.keys():
- if k in finalResults:
- tmp = ParseResults(finalResults[k])
- tmp += ParseResults(r[k])
- dups[k] = tmp
- finalResults += ParseResults(r)
- for k,v in dups.items():
- finalResults[k] = v
+ finalResults = sum(resultlist, ParseResults([]))
return loc, finalResults
def __str__( self ):
@@ -2698,11 +3672,16 @@ class Each(ParseExpression):
class ParseElementEnhance(ParserElement):
- """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
+ """
+ Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
+ """
def __init__( self, expr, savelist=False ):
super(ParseElementEnhance,self).__init__(savelist)
if isinstance( expr, basestring ):
- expr = Literal(expr)
+ if issubclass(ParserElement._literalStringClass, Token):
+ expr = ParserElement._literalStringClass(expr)
+ else:
+ expr = ParserElement._literalStringClass(Literal(expr))
self.expr = expr
self.strRepr = None
if expr is not None:
@@ -2761,7 +3740,7 @@ class ParseElementEnhance(ParserElement):
def __str__( self ):
try:
return super(ParseElementEnhance,self).__str__()
- except:
+ except Exception:
pass
if self.strRepr is None and self.expr is not None:
@@ -2770,10 +3749,22 @@ class ParseElementEnhance(ParserElement):
class FollowedBy(ParseElementEnhance):
- """Lookahead matching of the given parse expression. C{FollowedBy}
- does *not* advance the parsing position within the input string, it only
+ """
+ Lookahead matching of the given parse expression. C{FollowedBy}
+ does I{not} advance the parsing position within the input string, it only
verifies that the specified parse expression matches at the current
- position. C{FollowedBy} always returns a null token list."""
+ position. C{FollowedBy} always returns a null token list.
+
+ Example::
+ # use FollowedBy to match a label only if it is followed by a ':'
+ data_word = Word(alphas)
+ label = data_word + FollowedBy(':')
+ attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
+
+ OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
+ prints::
+ [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
+ """
def __init__( self, expr ):
super(FollowedBy,self).__init__(expr)
self.mayReturnEmpty = True
@@ -2784,11 +3775,16 @@ class FollowedBy(ParseElementEnhance):
class NotAny(ParseElementEnhance):
- """Lookahead to disallow matching with the given parse expression. C{NotAny}
- does *not* advance the parsing position within the input string, it only
- verifies that the specified parse expression does *not* match at the current
- position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
- always returns a null token list. May be constructed using the '~' operator."""
+ """
+ Lookahead to disallow matching with the given parse expression. C{NotAny}
+ does I{not} advance the parsing position within the input string, it only
+ verifies that the specified parse expression does I{not} match at the current
+ position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
+ always returns a null token list. May be constructed using the '~' operator.
+
+ Example::
+
+ """
def __init__( self, expr ):
super(NotAny,self).__init__(expr)
#~ self.leaveWhitespace()
@@ -2810,21 +3806,13 @@ class NotAny(ParseElementEnhance):
return self.strRepr
-
-class OneOrMore(ParseElementEnhance):
- """Repetition of one or more of the given expression.
-
- Parameters:
- - expr - expression that must match one or more times
- - stopOn - (default=None) - expression for a terminating sentinel
- (only required if the sentinel would ordinarily match the repetition
- expression)
- """
+class _MultipleMatch(ParseElementEnhance):
def __init__( self, expr, stopOn=None):
- super(OneOrMore, self).__init__(expr)
+ super(_MultipleMatch, self).__init__(expr)
+ self.saveAsList = True
ender = stopOn
if isinstance(ender, basestring):
- ender = Literal(ender)
+ ender = ParserElement._literalStringClass(ender)
self.not_ender = ~ender if ender is not None else None
def parseImpl( self, instring, loc, doActions=True ):
@@ -2855,6 +3843,32 @@ class OneOrMore(ParseElementEnhance):
pass
return loc, tokens
+
+class OneOrMore(_MultipleMatch):
+ """
+ Repetition of one or more of the given expression.
+
+ Parameters:
+ - expr - expression that must match one or more times
+ - stopOn - (default=C{None}) - expression for a terminating sentinel
+ (only required if the sentinel would ordinarily match the repetition
+ expression)
+
+ Example::
+ data_word = Word(alphas)
+ label = data_word + FollowedBy(':')
+ attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
+
+ text = "shape: SQUARE posn: upper left color: BLACK"
+ OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
+
+ # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
+ attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
+ OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
+
+ # could also be written as
+ (attr_expr * (1,)).parseString(text).pprint()
+ """
def __str__( self ):
if hasattr(self,"name"):
@@ -2865,19 +3879,17 @@ class OneOrMore(ParseElementEnhance):
return self.strRepr
- def setResultsName( self, name, listAllMatches=False ):
- ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
- ret.saveAsList = True
- return ret
-
-class ZeroOrMore(OneOrMore):
- """Optional repetition of zero or more of the given expression.
+class ZeroOrMore(_MultipleMatch):
+ """
+ Optional repetition of zero or more of the given expression.
- Parameters:
- - expr - expression that must match zero or more times
- - stopOn - (default=None) - expression for a terminating sentinel
+ Parameters:
+ - expr - expression that must match zero or more times
+ - stopOn - (default=C{None}) - expression for a terminating sentinel
(only required if the sentinel would ordinarily match the repetition
expression)
+
+ Example: similar to L{OneOrMore}
"""
def __init__( self, expr, stopOn=None):
super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
@@ -2907,15 +3919,43 @@ class _NullToken(object):
_optionalNotMatched = _NullToken()
class Optional(ParseElementEnhance):
- """Optional matching of the given expression.
-
- Parameters:
- - expr - expression that must match zero or more times
- - default (optional) - value to be returned if the optional expression
- is not found.
+ """
+ Optional matching of the given expression.
+
+ Parameters:
+ - expr - expression that must match zero or more times
+ - default (optional) - value to be returned if the optional expression is not found.
+
+ Example::
+ # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
+ zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
+ zip.runTests('''
+ # traditional ZIP code
+ 12345
+
+ # ZIP+4 form
+ 12101-0001
+
+ # invalid ZIP
+ 98765-
+ ''')
+ prints::
+ # traditional ZIP code
+ 12345
+ ['12345']
+
+ # ZIP+4 form
+ 12101-0001
+ ['12101-0001']
+
+ # invalid ZIP
+ 98765-
+ ^
+ FAIL: Expected end of text (at char 5), (line:1, col:6)
"""
def __init__( self, expr, default=_optionalNotMatched ):
super(Optional,self).__init__( expr, savelist=False )
+ self.saveAsList = self.expr.saveAsList
self.defaultValue = default
self.mayReturnEmpty = True
@@ -2943,17 +3983,59 @@ class Optional(ParseElementEnhance):
return self.strRepr
class SkipTo(ParseElementEnhance):
- """Token for skipping over all undefined text until the matched expression is found.
+ """
+ Token for skipping over all undefined text until the matched expression is found.
- Parameters:
- - expr - target expression marking the end of the data to be skipped
- - include - (default=False) if True, the target expression is also parsed
+ Parameters:
+ - expr - target expression marking the end of the data to be skipped
+ - include - (default=C{False}) if True, the target expression is also parsed
(the skipped text and target expression are returned as a 2-element list).
- - ignore - (default=None) used to define grammars (typically quoted strings and
+ - ignore - (default=C{None}) used to define grammars (typically quoted strings and
comments) that might contain false matches to the target expression
- - failOn - (default=None) define expressions that are not allowed to be
+ - failOn - (default=C{None}) define expressions that are not allowed to be
included in the skipped test; if found before the target expression is found,
the SkipTo is not a match
+
+ Example::
+ report = '''
+ Outstanding Issues Report - 1 Jan 2000
+
+ # | Severity | Description | Days Open
+ -----+----------+-------------------------------------------+-----------
+ 101 | Critical | Intermittent system crash | 6
+ 94 | Cosmetic | Spelling error on Login ('log|n') | 14
+ 79 | Minor | System slow when running too many reports | 47
+ '''
+ integer = Word(nums)
+ SEP = Suppress('|')
+ # use SkipTo to simply match everything up until the next SEP
+ # - ignore quoted strings, so that a '|' character inside a quoted string does not match
+ # - parse action will call token.strip() for each matched token, i.e., the description body
+ string_data = SkipTo(SEP, ignore=quotedString)
+ string_data.setParseAction(tokenMap(str.strip))
+ ticket_expr = (integer("issue_num") + SEP
+ + string_data("sev") + SEP
+ + string_data("desc") + SEP
+ + integer("days_open"))
+
+ for tkt in ticket_expr.searchString(report):
+ print tkt.dump()
+ prints::
+ ['101', 'Critical', 'Intermittent system crash', '6']
+ - days_open: 6
+ - desc: Intermittent system crash
+ - issue_num: 101
+ - sev: Critical
+ ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
+ - days_open: 14
+ - desc: Spelling error on Login ('log|n')
+ - issue_num: 94
+ - sev: Cosmetic
+ ['79', 'Minor', 'System slow when running too many reports', '47']
+ - days_open: 47
+ - desc: System slow when running too many reports
+ - issue_num: 79
+ - sev: Minor
"""
def __init__( self, other, include=False, ignore=None, failOn=None ):
super( SkipTo, self ).__init__( other )
@@ -2963,7 +4045,7 @@ class SkipTo(ParseElementEnhance):
self.includeMatch = include
self.asList = False
if isinstance(failOn, basestring):
- self.failOn = Literal(failOn)
+ self.failOn = ParserElement._literalStringClass(failOn)
else:
self.failOn = failOn
self.errmsg = "No match found for "+_ustr(self.expr)
@@ -3016,26 +4098,30 @@ class SkipTo(ParseElementEnhance):
return loc, skipresult
class Forward(ParseElementEnhance):
- """Forward declaration of an expression to be defined later -
- used for recursive grammars, such as algebraic infix notation.
- When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
-
- Note: take care when assigning to C{Forward} not to overlook precedence of operators.
- Specifically, '|' has a lower precedence than '<<', so that::
- fwdExpr << a | b | c
- will actually be evaluated as::
- (fwdExpr << a) | b | c
- thereby leaving b and c out as parseable alternatives. It is recommended that you
- explicitly group the values inserted into the C{Forward}::
- fwdExpr << (a | b | c)
- Converting to use the '<<=' operator instead will avoid this problem.
+ """
+ Forward declaration of an expression to be defined later -
+ used for recursive grammars, such as algebraic infix notation.
+ When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
+
+ Note: take care when assigning to C{Forward} not to overlook precedence of operators.
+ Specifically, '|' has a lower precedence than '<<', so that::
+ fwdExpr << a | b | c
+ will actually be evaluated as::
+ (fwdExpr << a) | b | c
+ thereby leaving b and c out as parseable alternatives. It is recommended that you
+ explicitly group the values inserted into the C{Forward}::
+ fwdExpr << (a | b | c)
+ Converting to use the '<<=' operator instead will avoid this problem.
+
+ See L{ParseResults.pprint} for an example of a recursive parser created using
+ C{Forward}.
"""
def __init__( self, other=None ):
super(Forward,self).__init__( other, savelist=False )
def __lshift__( self, other ):
if isinstance( other, basestring ):
- other = ParserElement.literalStringClass(other)
+ other = ParserElement._literalStringClass(other)
self.expr = other
self.strRepr = None
self.mayIndexError = self.expr.mayIndexError
@@ -3097,15 +4183,29 @@ class _ForwardNoRecurse(Forward):
return "..."
class TokenConverter(ParseElementEnhance):
- """Abstract subclass of C{ParseExpression}, for converting parsed results."""
+ """
+ Abstract subclass of C{ParseExpression}, for converting parsed results.
+ """
def __init__( self, expr, savelist=False ):
super(TokenConverter,self).__init__( expr )#, savelist )
self.saveAsList = False
class Combine(TokenConverter):
- """Converter to concatenate all matching tokens to a single string.
- By default, the matching patterns must also be contiguous in the input string;
- this can be disabled by specifying C{'adjacent=False'} in the constructor.
+ """
+ Converter to concatenate all matching tokens to a single string.
+ By default, the matching patterns must also be contiguous in the input string;
+ this can be disabled by specifying C{'adjacent=False'} in the constructor.
+
+ Example::
+ real = Word(nums) + '.' + Word(nums)
+ print(real.parseString('3.1416')) # -> ['3', '.', '1416']
+ # will also erroneously match the following
+ print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
+
+ real = Combine(Word(nums) + '.' + Word(nums))
+ print(real.parseString('3.1416')) # -> ['3.1416']
+ # no match when there are internal spaces
+ print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
"""
def __init__( self, expr, joinString="", adjacent=True ):
super(Combine,self).__init__( expr )
@@ -3135,7 +4235,19 @@ class Combine(TokenConverter):
return retToks
class Group(TokenConverter):
- """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
+ """
+ Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
+
+ Example::
+ ident = Word(alphas)
+ num = Word(nums)
+ term = ident | num
+ func = ident + Optional(delimitedList(term))
+ print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
+
+ func = ident + Group(Optional(delimitedList(term)))
+ print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
+ """
def __init__( self, expr ):
super(Group,self).__init__( expr )
self.saveAsList = True
@@ -3144,9 +4256,40 @@ class Group(TokenConverter):
return [ tokenlist ]
class Dict(TokenConverter):
- """Converter to return a repetitive expression as a list, but also as a dictionary.
- Each element can also be referenced using the first token in the expression as its key.
- Useful for tabular report scraping when the first column can be used as a item key.
+ """
+ Converter to return a repetitive expression as a list, but also as a dictionary.
+ Each element can also be referenced using the first token in the expression as its key.
+ Useful for tabular report scraping when the first column can be used as a item key.
+
+ Example::
+ data_word = Word(alphas)
+ label = data_word + FollowedBy(':')
+ attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
+
+ text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
+ attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
+
+ # print attributes as plain groups
+ print(OneOrMore(attr_expr).parseString(text).dump())
+
+ # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
+ result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
+ print(result.dump())
+
+ # access named fields as dict entries, or output as dict
+ print(result['shape'])
+ print(result.asDict())
+ prints::
+ ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
+
+ [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
+ - color: light blue
+ - posn: upper left
+ - shape: SQUARE
+ - texture: burlap
+ SQUARE
+ {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
+ See more examples at L{ParseResults} of accessing fields by results name.
"""
def __init__( self, expr ):
super(Dict,self).__init__( expr )
@@ -3178,7 +4321,24 @@ class Dict(TokenConverter):
class Suppress(TokenConverter):
- """Converter for ignoring the results of a parsed expression."""
+ """
+ Converter for ignoring the results of a parsed expression.
+
+ Example::
+ source = "a, b, c,d"
+ wd = Word(alphas)
+ wd_list1 = wd + ZeroOrMore(',' + wd)
+ print(wd_list1.parseString(source))
+
+ # often, delimiters that are useful during parsing are just in the
+ # way afterward - use Suppress to keep them out of the parsed output
+ wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
+ print(wd_list2.parseString(source))
+ prints::
+ ['a', ',', 'b', ',', 'c', ',', 'd']
+ ['a', 'b', 'c', 'd']
+ (See also L{delimitedList}.)
+ """
def postParse( self, instring, loc, tokenlist ):
return []
@@ -3187,7 +4347,9 @@ class Suppress(TokenConverter):
class OnlyOnce(object):
- """Wrapper for parse actions, to ensure they are only called once."""
+ """
+ Wrapper for parse actions, to ensure they are only called once.
+ """
def __init__(self, methodCall):
self.callable = _trim_arity(methodCall)
self.called = False
@@ -3201,20 +4363,39 @@ class OnlyOnce(object):
self.called = False
def traceParseAction(f):
- """Decorator for debugging parse actions."""
+ """
+ Decorator for debugging parse actions.
+
+ When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
+ When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
+
+ Example::
+ wd = Word(alphas)
+
+ @traceParseAction
+ def remove_duplicate_chars(tokens):
+ return ''.join(sorted(set(''.join(tokens)))
+
+ wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
+ print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
+ prints::
+ >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
+ <<leaving remove_duplicate_chars (ret: 'dfjkls')
+ ['dfjkls']
+ """
f = _trim_arity(f)
def z(*paArgs):
- thisFunc = f.func_name
+ thisFunc = f.__name__
s,l,t = paArgs[-3:]
if len(paArgs)>3:
thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
- sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
+ sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
try:
ret = f(*paArgs)
except Exception as exc:
sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
raise
- sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
+ sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
return ret
try:
z.__name__ = f.__name__
@@ -3226,12 +4407,17 @@ def traceParseAction(f):
# global helpers
#
def delimitedList( expr, delim=",", combine=False ):
- """Helper to define a delimited list of expressions - the delimiter defaults to ','.
- By default, the list elements and delimiters can have intervening whitespace, and
- comments, but this can be overridden by passing C{combine=True} in the constructor.
- If C{combine} is set to C{True}, the matching tokens are returned as a single token
- string, with the delimiters included; otherwise, the matching tokens are returned
- as a list of tokens, with the delimiters suppressed.
+ """
+ Helper to define a delimited list of expressions - the delimiter defaults to ','.
+ By default, the list elements and delimiters can have intervening whitespace, and
+ comments, but this can be overridden by passing C{combine=True} in the constructor.
+ If C{combine} is set to C{True}, the matching tokens are returned as a single token
+ string, with the delimiters included; otherwise, the matching tokens are returned
+ as a list of tokens, with the delimiters suppressed.
+
+ Example::
+ delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
+ delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
"""
dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
if combine:
@@ -3240,11 +4426,22 @@ def delimitedList( expr, delim=",", combine=False ):
return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
def countedArray( expr, intExpr=None ):
- """Helper to define a counted list of expressions.
- This helper defines a pattern of the form::
- integer expr expr expr...
- where the leading integer tells how many expr expressions follow.
- The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
+ """
+ Helper to define a counted list of expressions.
+ This helper defines a pattern of the form::
+ integer expr expr expr...
+ where the leading integer tells how many expr expressions follow.
+ The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
+
+ If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
+
+ Example::
+ countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
+
+ # in this parser, the leading integer value is given in binary,
+ # '10' indicating that 2 values are in the array
+ binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
+ countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
"""
arrayExpr = Forward()
def countFieldParseAction(s,l,t):
@@ -3269,16 +4466,17 @@ def _flatten(L):
return ret
def matchPreviousLiteral(expr):
- """Helper to define an expression that is indirectly defined from
- the tokens matched in a previous expression, that is, it looks
- for a 'repeat' of a previous expression. For example::
- first = Word(nums)
- second = matchPreviousLiteral(first)
- matchExpr = first + ":" + second
- will match C{"1:1"}, but not C{"1:2"}. Because this matches a
- previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
- If this is not desired, use C{matchPreviousExpr}.
- Do *not* use with packrat parsing enabled.
+ """
+ Helper to define an expression that is indirectly defined from
+ the tokens matched in a previous expression, that is, it looks
+ for a 'repeat' of a previous expression. For example::
+ first = Word(nums)
+ second = matchPreviousLiteral(first)
+ matchExpr = first + ":" + second
+ will match C{"1:1"}, but not C{"1:2"}. Because this matches a
+ previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
+ If this is not desired, use C{matchPreviousExpr}.
+ Do I{not} use with packrat parsing enabled.
"""
rep = Forward()
def copyTokenToRepeater(s,l,t):
@@ -3296,17 +4494,18 @@ def matchPreviousLiteral(expr):
return rep
def matchPreviousExpr(expr):
- """Helper to define an expression that is indirectly defined from
- the tokens matched in a previous expression, that is, it looks
- for a 'repeat' of a previous expression. For example::
- first = Word(nums)
- second = matchPreviousExpr(first)
- matchExpr = first + ":" + second
- will match C{"1:1"}, but not C{"1:2"}. Because this matches by
- expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
- the expressions are evaluated first, and then compared, so
- C{"1"} is compared with C{"10"}.
- Do *not* use with packrat parsing enabled.
+ """
+ Helper to define an expression that is indirectly defined from
+ the tokens matched in a previous expression, that is, it looks
+ for a 'repeat' of a previous expression. For example::
+ first = Word(nums)
+ second = matchPreviousExpr(first)
+ matchExpr = first + ":" + second
+ will match C{"1:1"}, but not C{"1:2"}. Because this matches by
+ expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
+ the expressions are evaluated first, and then compared, so
+ C{"1"} is compared with C{"10"}.
+ Do I{not} use with packrat parsing enabled.
"""
rep = Forward()
e2 = expr.copy()
@@ -3331,16 +4530,27 @@ def _escapeRegexRangeChars(s):
return _ustr(s)
def oneOf( strs, caseless=False, useRegex=True ):
- """Helper to quickly define a set of alternative Literals, and makes sure to do
- longest-first testing when there is a conflict, regardless of the input order,
- but returns a C{L{MatchFirst}} for best performance.
-
- Parameters:
- - strs - a string of space-delimited literals, or a list of string literals
- - caseless - (default=False) - treat all literals as caseless
- - useRegex - (default=True) - as an optimization, will generate a Regex
+ """
+ Helper to quickly define a set of alternative Literals, and makes sure to do
+ longest-first testing when there is a conflict, regardless of the input order,
+ but returns a C{L{MatchFirst}} for best performance.
+
+ Parameters:
+ - strs - a string of space-delimited literals, or a collection of string literals
+ - caseless - (default=C{False}) - treat all literals as caseless
+ - useRegex - (default=C{True}) - as an optimization, will generate a Regex
object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
if creating a C{Regex} raises an exception)
+
+ Example::
+ comp_oper = oneOf("< = > <= >= !=")
+ var = Word(alphas)
+ number = Word(nums)
+ term = var | number
+ comparison_expr = term + comp_oper + term
+ print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
+ prints::
+ [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
"""
if caseless:
isequal = ( lambda a,b: a.upper() == b.upper() )
@@ -3354,12 +4564,10 @@ def oneOf( strs, caseless=False, useRegex=True ):
symbols = []
if isinstance(strs,basestring):
symbols = strs.split()
- elif isinstance(strs, collections.Sequence):
- symbols = list(strs[:])
- elif isinstance(strs, _generatorType):
+ elif isinstance(strs, collections.Iterable):
symbols = list(strs)
else:
- warnings.warn("Invalid argument to oneOf, expected string or list",
+ warnings.warn("Invalid argument to oneOf, expected string or iterable",
SyntaxWarning, stacklevel=2)
if not symbols:
return NoMatch()
@@ -3386,7 +4594,7 @@ def oneOf( strs, caseless=False, useRegex=True ):
return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
else:
return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
- except:
+ except Exception:
warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
SyntaxWarning, stacklevel=2)
@@ -3395,27 +4603,64 @@ def oneOf( strs, caseless=False, useRegex=True ):
return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
def dictOf( key, value ):
- """Helper to easily and clearly define a dictionary by specifying the respective patterns
- for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
- in the proper order. The key pattern can include delimiting markers or punctuation,
- as long as they are suppressed, thereby leaving the significant key text. The value
- pattern can include named results, so that the C{Dict} results can include named token
- fields.
+ """
+ Helper to easily and clearly define a dictionary by specifying the respective patterns
+ for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
+ in the proper order. The key pattern can include delimiting markers or punctuation,
+ as long as they are suppressed, thereby leaving the significant key text. The value
+ pattern can include named results, so that the C{Dict} results can include named token
+ fields.
+
+ Example::
+ text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
+ attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
+ print(OneOrMore(attr_expr).parseString(text).dump())
+
+ attr_label = label
+ attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
+
+ # similar to Dict, but simpler call format
+ result = dictOf(attr_label, attr_value).parseString(text)
+ print(result.dump())
+ print(result['shape'])
+ print(result.shape) # object attribute access works too
+ print(result.asDict())
+ prints::
+ [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
+ - color: light blue
+ - posn: upper left
+ - shape: SQUARE
+ - texture: burlap
+ SQUARE
+ SQUARE
+ {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
"""
return Dict( ZeroOrMore( Group ( key + value ) ) )
def originalTextFor(expr, asString=True):
- """Helper to return the original, untokenized text for a given expression. Useful to
- restore the parsed fields of an HTML start tag into the raw tag text itself, or to
- revert separate tokens with intervening whitespace back to the original matching
- input text. By default, returns astring containing the original parsed text.
+ """
+ Helper to return the original, untokenized text for a given expression. Useful to
+ restore the parsed fields of an HTML start tag into the raw tag text itself, or to
+ revert separate tokens with intervening whitespace back to the original matching
+ input text. By default, returns astring containing the original parsed text.
- If the optional C{asString} argument is passed as C{False}, then the return value is a
- C{L{ParseResults}} containing any results names that were originally matched, and a
- single token containing the original matched text from the input string. So if
- the expression passed to C{L{originalTextFor}} contains expressions with defined
- results names, you must set C{asString} to C{False} if you want to preserve those
- results name values."""
+ If the optional C{asString} argument is passed as C{False}, then the return value is a
+ C{L{ParseResults}} containing any results names that were originally matched, and a
+ single token containing the original matched text from the input string. So if
+ the expression passed to C{L{originalTextFor}} contains expressions with defined
+ results names, you must set C{asString} to C{False} if you want to preserve those
+ results name values.
+
+ Example::
+ src = "this is test <b> bold <i>text</i> </b> normal text "
+ for tag in ("b","i"):
+ opener,closer = makeHTMLTags(tag)
+ patt = originalTextFor(opener + SkipTo(closer) + closer)
+ print(patt.searchString(src)[0])
+ prints::
+ ['<b> bold <i>text</i> </b>']
+ ['<i>text</i>']
+ """
locMarker = Empty().setParseAction(lambda s,loc,t: loc)
endlocMarker = locMarker.copy()
endlocMarker.callPreparse = False
@@ -3426,22 +4671,35 @@ def originalTextFor(expr, asString=True):
def extractText(s,l,t):
t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
matchExpr.setParseAction(extractText)
+ matchExpr.ignoreExprs = expr.ignoreExprs
return matchExpr
def ungroup(expr):
- """Helper to undo pyparsing's default grouping of And expressions, even
- if all but one are non-empty."""
+ """
+ Helper to undo pyparsing's default grouping of And expressions, even
+ if all but one are non-empty.
+ """
return TokenConverter(expr).setParseAction(lambda t:t[0])
def locatedExpr(expr):
- """Helper to decorate a returned token with its starting and ending locations in the input string.
- This helper adds the following results names:
- - locn_start = location where matched expression begins
- - locn_end = location where matched expression ends
- - value = the actual parsed results
-
- Be careful if the input text contains C{<TAB>} characters, you may want to call
- C{L{ParserElement.parseWithTabs}}
+ """
+ Helper to decorate a returned token with its starting and ending locations in the input string.
+ This helper adds the following results names:
+ - locn_start = location where matched expression begins
+ - locn_end = location where matched expression ends
+ - value = the actual parsed results
+
+ Be careful if the input text contains C{<TAB>} characters, you may want to call
+ C{L{ParserElement.parseWithTabs}}
+
+ Example::
+ wd = Word(alphas)
+ for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
+ print(match)
+ prints::
+ [[0, 'ljsdf', 5]]
+ [[8, 'lksdjjf', 15]]
+ [[18, 'lkkjj', 23]]
"""
locator = Empty().setParseAction(lambda s,l,t: l)
return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
@@ -3462,31 +4720,33 @@ _charRange = Group(_singleChar + Suppress("-") + _singleChar)
_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
def srange(s):
- r"""Helper to easily define string ranges for use in Word construction. Borrows
- syntax from regexp '[]' string range definitions::
- srange("[0-9]") -> "0123456789"
- srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
- srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
- The input string must be enclosed in []'s, and the returned string is the expanded
- character set joined into a single string.
- The values enclosed in the []'s may be::
- a single character
- an escaped character with a leading backslash (such as \- or \])
- an escaped hex character with a leading '\x' (\x21, which is a '!' character)
- (\0x## is also supported for backwards compatibility)
- an escaped octal character with a leading '\0' (\041, which is a '!' character)
- a range of any of the above, separated by a dash ('a-z', etc.)
- any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
+ r"""
+ Helper to easily define string ranges for use in Word construction. Borrows
+ syntax from regexp '[]' string range definitions::
+ srange("[0-9]") -> "0123456789"
+ srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
+ srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
+ The input string must be enclosed in []'s, and the returned string is the expanded
+ character set joined into a single string.
+ The values enclosed in the []'s may be:
+ - a single character
+ - an escaped character with a leading backslash (such as C{\-} or C{\]})
+ - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
+ (C{\0x##} is also supported for backwards compatibility)
+ - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
+ - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
+ - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
"""
_expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
try:
return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
- except:
+ except Exception:
return ""
def matchOnlyAtCol(n):
- """Helper method for defining parse actions that require matching at a specific
- column in the input text.
+ """
+ Helper method for defining parse actions that require matching at a specific
+ column in the input text.
"""
def verifyCol(strg,locn,toks):
if col(locn,strg) != n:
@@ -3494,26 +4754,83 @@ def matchOnlyAtCol(n):
return verifyCol
def replaceWith(replStr):
- """Helper method for common parse actions that simply return a literal value. Especially
- useful when used with C{L{transformString<ParserElement.transformString>}()}.
+ """
+ Helper method for common parse actions that simply return a literal value. Especially
+ useful when used with C{L{transformString<ParserElement.transformString>}()}.
+
+ Example::
+ num = Word(nums).setParseAction(lambda toks: int(toks[0]))
+ na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
+ term = na | num
+
+ OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
"""
return lambda s,l,t: [replStr]
def removeQuotes(s,l,t):
- """Helper parse action for removing quotation marks from parsed quoted strings.
- To use, add this parse action to quoted string using::
- quotedString.setParseAction( removeQuotes )
+ """
+ Helper parse action for removing quotation marks from parsed quoted strings.
+
+ Example::
+ # by default, quotation marks are included in parsed results
+ quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
+
+ # use removeQuotes to strip quotation marks from parsed results
+ quotedString.setParseAction(removeQuotes)
+ quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
"""
return t[0][1:-1]
-def upcaseTokens(s,l,t):
- """Helper parse action to convert tokens to upper case."""
- return [ tt.upper() for tt in map(_ustr,t) ]
+def tokenMap(func, *args):
+ """
+ Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
+ args are passed, they are forwarded to the given function as additional arguments after
+ the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
+ parsed data to an integer using base 16.
+
+ Example (compare the last to example in L{ParserElement.transformString}::
+ hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
+ hex_ints.runTests('''
+ 00 11 22 aa FF 0a 0d 1a
+ ''')
+
+ upperword = Word(alphas).setParseAction(tokenMap(str.upper))
+ OneOrMore(upperword).runTests('''
+ my kingdom for a horse
+ ''')
+
+ wd = Word(alphas).setParseAction(tokenMap(str.title))
+ OneOrMore(wd).setParseAction(' '.join).runTests('''
+ now is the winter of our discontent made glorious summer by this sun of york
+ ''')
+ prints::
+ 00 11 22 aa FF 0a 0d 1a
+ [0, 17, 34, 170, 255, 10, 13, 26]
+
+ my kingdom for a horse
+ ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
+
+ now is the winter of our discontent made glorious summer by this sun of york
+ ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
+ """
+ def pa(s,l,t):
+ return [func(tokn, *args) for tokn in t]
+
+ try:
+ func_name = getattr(func, '__name__',
+ getattr(func, '__class__').__name__)
+ except Exception:
+ func_name = str(func)
+ pa.__name__ = func_name
+
+ return pa
-def downcaseTokens(s,l,t):
- """Helper parse action to convert tokens to lower case."""
- return [ tt.lower() for tt in map(_ustr,t) ]
+upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
+"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
+downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
+"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
+
def _makeTags(tagStr, xml):
"""Internal helper to construct opening and closing tag expressions, given a tag name"""
if isinstance(tagStr,basestring):
@@ -3544,33 +4861,83 @@ def _makeTags(tagStr, xml):
return openTag, closeTag
def makeHTMLTags(tagStr):
- """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
+ """
+ Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches
+ tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.
+
+ Example::
+ text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
+ # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple
+ a,a_end = makeHTMLTags("A")
+ link_expr = a + SkipTo(a_end)("link_text") + a_end
+
+ for link in link_expr.searchString(text):
+ # attributes in the <A> tag (like "href" shown here) are also accessible as named results
+ print(link.link_text, '->', link.href)
+ prints::
+ pyparsing -> http://pyparsing.wikispaces.com
+ """
return _makeTags( tagStr, False )
def makeXMLTags(tagStr):
- """Helper to construct opening and closing tag expressions for XML, given a tag name"""
+ """
+ Helper to construct opening and closing tag expressions for XML, given a tag name. Matches
+ tags only in the given upper/lower case.
+
+ Example: similar to L{makeHTMLTags}
+ """
return _makeTags( tagStr, True )
def withAttribute(*args,**attrDict):
- """Helper to create a validating parse action to be used with start tags created
- with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
- with a required attribute value, to avoid false matches on common tags such as
- C{<TD>} or C{<DIV>}.
-
- Call C{withAttribute} with a series of attribute names and values. Specify the list
- of filter attributes names and values as:
- - keyword arguments, as in C{(align="right")}, or
- - as an explicit dict with C{**} operator, when an attribute name is also a Python
+ """
+ Helper to create a validating parse action to be used with start tags created
+ with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
+ with a required attribute value, to avoid false matches on common tags such as
+ C{<TD>} or C{<DIV>}.
+
+ Call C{withAttribute} with a series of attribute names and values. Specify the list
+ of filter attributes names and values as:
+ - keyword arguments, as in C{(align="right")}, or
+ - as an explicit dict with C{**} operator, when an attribute name is also a Python
reserved word, as in C{**{"class":"Customer", "align":"right"}}
- - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
- For attribute names with a namespace prefix, you must use the second form. Attribute
- names are matched insensitive to upper/lower case.
+ - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
+ For attribute names with a namespace prefix, you must use the second form. Attribute
+ names are matched insensitive to upper/lower case.
- If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
-
- To verify that the attribute exists, but without specifying a value, pass
- C{withAttribute.ANY_VALUE} as the value.
- """
+ If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
+
+ To verify that the attribute exists, but without specifying a value, pass
+ C{withAttribute.ANY_VALUE} as the value.
+
+ Example::
+ html = '''
+ <div>
+ Some text
+ <div type="grid">1 4 0 1 0</div>
+ <div type="graph">1,3 2,3 1,1</div>
+ <div>this has no type</div>
+ </div>
+
+ '''
+ div,div_end = makeHTMLTags("div")
+
+ # only match div tag having a type attribute with value "grid"
+ div_grid = div().setParseAction(withAttribute(type="grid"))
+ grid_expr = div_grid + SkipTo(div | div_end)("body")
+ for grid_header in grid_expr.searchString(html):
+ print(grid_header.body)
+
+ # construct a match with any div tag having a type attribute, regardless of the value
+ div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
+ div_expr = div_any_type + SkipTo(div | div_end)("body")
+ for div_header in div_expr.searchString(html):
+ print(div_header.body)
+ prints::
+ 1 4 0 1 0
+
+ 1 4 0 1 0
+ 1,3 2,3 1,1
+ """
if args:
attrs = args[:]
else:
@@ -3587,9 +4954,37 @@ def withAttribute(*args,**attrDict):
withAttribute.ANY_VALUE = object()
def withClass(classname, namespace=''):
- """Simplified version of C{L{withAttribute}} when matching on a div class - made
- difficult because C{class} is a reserved word in Python.
- """
+ """
+ Simplified version of C{L{withAttribute}} when matching on a div class - made
+ difficult because C{class} is a reserved word in Python.
+
+ Example::
+ html = '''
+ <div>
+ Some text
+ <div class="grid">1 4 0 1 0</div>
+ <div class="graph">1,3 2,3 1,1</div>
+ <div>this &lt;div&gt; has no class</div>
+ </div>
+
+ '''
+ div,div_end = makeHTMLTags("div")
+ div_grid = div().setParseAction(withClass("grid"))
+
+ grid_expr = div_grid + SkipTo(div | div_end)("body")
+ for grid_header in grid_expr.searchString(html):
+ print(grid_header.body)
+
+ div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
+ div_expr = div_any_type + SkipTo(div | div_end)("body")
+ for div_header in div_expr.searchString(html):
+ print(div_header.body)
+ prints::
+ 1 4 0 1 0
+
+ 1 4 0 1 0
+ 1,3 2,3 1,1
+ """
classattr = "%s:class" % namespace if namespace else "class"
return withAttribute(**{classattr : classname})
@@ -3598,30 +4993,63 @@ opAssoc.LEFT = object()
opAssoc.RIGHT = object()
def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
- """Helper method for constructing grammars of expressions made up of
- operators working in a precedence hierarchy. Operators may be unary or
- binary, left- or right-associative. Parse actions can also be attached
- to operator expressions.
-
- Parameters:
- - baseExpr - expression representing the most basic element for the nested
- - opList - list of tuples, one for each operator precedence level in the
- expression grammar; each tuple is of the form
- (opExpr, numTerms, rightLeftAssoc, parseAction), where:
- - opExpr is the pyparsing expression for the operator;
- may also be a string, which will be converted to a Literal;
- if numTerms is 3, opExpr is a tuple of two expressions, for the
- two operators separating the 3 terms
- - numTerms is the number of terms for this operator (must
- be 1, 2, or 3)
- - rightLeftAssoc is the indicator whether the operator is
- right or left associative, using the pyparsing-defined
- constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
- - parseAction is the parse action to be associated with
- expressions matching this operator expression (the
- parse action tuple member may be omitted)
- - lpar - expression for matching left-parentheses (default=Suppress('('))
- - rpar - expression for matching right-parentheses (default=Suppress(')'))
+ """
+ Helper method for constructing grammars of expressions made up of
+ operators working in a precedence hierarchy. Operators may be unary or
+ binary, left- or right-associative. Parse actions can also be attached
+ to operator expressions. The generated parser will also recognize the use
+ of parentheses to override operator precedences (see example below).
+
+ Note: if you define a deep operator list, you may see performance issues
+ when using infixNotation. See L{ParserElement.enablePackrat} for a
+ mechanism to potentially improve your parser performance.
+
+ Parameters:
+ - baseExpr - expression representing the most basic element for the nested
+ - opList - list of tuples, one for each operator precedence level in the
+ expression grammar; each tuple is of the form
+ (opExpr, numTerms, rightLeftAssoc, parseAction), where:
+ - opExpr is the pyparsing expression for the operator;
+ may also be a string, which will be converted to a Literal;
+ if numTerms is 3, opExpr is a tuple of two expressions, for the
+ two operators separating the 3 terms
+ - numTerms is the number of terms for this operator (must
+ be 1, 2, or 3)
+ - rightLeftAssoc is the indicator whether the operator is
+ right or left associative, using the pyparsing-defined
+ constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
+ - parseAction is the parse action to be associated with
+ expressions matching this operator expression (the
+ parse action tuple member may be omitted)
+ - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
+ - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
+
+ Example::
+ # simple example of four-function arithmetic with ints and variable names
+ integer = pyparsing_common.signed_integer
+ varname = pyparsing_common.identifier
+
+ arith_expr = infixNotation(integer | varname,
+ [
+ ('-', 1, opAssoc.RIGHT),
+ (oneOf('* /'), 2, opAssoc.LEFT),
+ (oneOf('+ -'), 2, opAssoc.LEFT),
+ ])
+
+ arith_expr.runTests('''
+ 5+3*6
+ (5+3)*6
+ -2--11
+ ''', fullDump=False)
+ prints::
+ 5+3*6
+ [[5, '+', [3, '*', 6]]]
+
+ (5+3)*6
+ [[[5, '+', 3], '*', 6]]
+
+ -2--11
+ [[['-', 2], '-', ['-', 11]]]
"""
ret = Forward()
lastExpr = baseExpr | ( lpar + ret + rpar )
@@ -3670,33 +5098,73 @@ def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
lastExpr = thisExpr
ret <<= lastExpr
return ret
+
operatorPrecedence = infixNotation
+"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
-dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
-sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
-quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
+dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
+sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
+quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
+ Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
- """Helper method for defining nested lists enclosed in opening and closing
- delimiters ("(" and ")" are the default).
-
- Parameters:
- - opener - opening character for a nested list (default="("); can also be a pyparsing expression
- - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
- - content - expression for items within the nested lists (default=None)
- - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
-
- If an expression is not provided for the content argument, the nested
- expression will capture all whitespace-delimited content between delimiters
- as a list of separate values.
-
- Use the C{ignoreExpr} argument to define expressions that may contain
- opening or closing characters that should not be treated as opening
- or closing characters for nesting, such as quotedString or a comment
- expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
- The default is L{quotedString}, but if no expressions are to be ignored,
- then pass C{None} for this argument.
+ """
+ Helper method for defining nested lists enclosed in opening and closing
+ delimiters ("(" and ")" are the default).
+
+ Parameters:
+ - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
+ - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
+ - content - expression for items within the nested lists (default=C{None})
+ - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
+
+ If an expression is not provided for the content argument, the nested
+ expression will capture all whitespace-delimited content between delimiters
+ as a list of separate values.
+
+ Use the C{ignoreExpr} argument to define expressions that may contain
+ opening or closing characters that should not be treated as opening
+ or closing characters for nesting, such as quotedString or a comment
+ expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
+ The default is L{quotedString}, but if no expressions are to be ignored,
+ then pass C{None} for this argument.
+
+ Example::
+ data_type = oneOf("void int short long char float double")
+ decl_data_type = Combine(data_type + Optional(Word('*')))
+ ident = Word(alphas+'_', alphanums+'_')
+ number = pyparsing_common.number
+ arg = Group(decl_data_type + ident)
+ LPAR,RPAR = map(Suppress, "()")
+
+ code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
+
+ c_function = (decl_data_type("type")
+ + ident("name")
+ + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
+ + code_body("body"))
+ c_function.ignore(cStyleComment)
+
+ source_code = '''
+ int is_odd(int x) {
+ return (x%2);
+ }
+
+ int dec_to_hex(char hchar) {
+ if (hchar >= '0' && hchar <= '9') {
+ return (ord(hchar)-ord('0'));
+ } else {
+ return (10+ord(hchar)-ord('A'));
+ }
+ }
+ '''
+ for func in c_function.searchString(source_code):
+ print("%(name)s (%(type)s) args: %(args)s" % func)
+
+ prints::
+ is_odd (int) args: [['int', 'x']]
+ dec_to_hex (int) args: [['char', 'hchar']]
"""
if opener == closer:
raise ValueError("opening and closing strings cannot be the same")
@@ -3731,20 +5199,82 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.cop
return ret
def indentedBlock(blockStatementExpr, indentStack, indent=True):
- """Helper method for defining space-delimited indentation blocks, such as
- those used to define block statements in Python source code.
+ """
+ Helper method for defining space-delimited indentation blocks, such as
+ those used to define block statements in Python source code.
- Parameters:
- - blockStatementExpr - expression defining syntax of statement that
+ Parameters:
+ - blockStatementExpr - expression defining syntax of statement that
is repeated within the indented block
- - indentStack - list created by caller to manage indentation stack
+ - indentStack - list created by caller to manage indentation stack
(multiple statementWithIndentedBlock expressions within a single grammar
should share a common indentStack)
- - indent - boolean indicating whether block must be indented beyond the
+ - indent - boolean indicating whether block must be indented beyond the
the current level; set to False for block of left-most statements
- (default=True)
-
- A valid block must contain at least one C{blockStatement}.
+ (default=C{True})
+
+ A valid block must contain at least one C{blockStatement}.
+
+ Example::
+ data = '''
+ def A(z):
+ A1
+ B = 100
+ G = A2
+ A2
+ A3
+ B
+ def BB(a,b,c):
+ BB1
+ def BBA():
+ bba1
+ bba2
+ bba3
+ C
+ D
+ def spam(x,y):
+ def eggs(z):
+ pass
+ '''
+
+
+ indentStack = [1]
+ stmt = Forward()
+
+ identifier = Word(alphas, alphanums)
+ funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
+ func_body = indentedBlock(stmt, indentStack)
+ funcDef = Group( funcDecl + func_body )
+
+ rvalue = Forward()
+ funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
+ rvalue << (funcCall | identifier | Word(nums))
+ assignment = Group(identifier + "=" + rvalue)
+ stmt << ( funcDef | assignment | identifier )
+
+ module_body = OneOrMore(stmt)
+
+ parseTree = module_body.parseString(data)
+ parseTree.pprint()
+ prints::
+ [['def',
+ 'A',
+ ['(', 'z', ')'],
+ ':',
+ [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
+ 'B',
+ ['def',
+ 'BB',
+ ['(', 'a', 'b', 'c', ')'],
+ ':',
+ [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
+ 'C',
+ 'D',
+ ['def',
+ 'spam',
+ ['(', 'x', 'y', ')'],
+ ':',
+ [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
"""
def checkPeerIndent(s,l,t):
if l >= len(s): return
@@ -3793,45 +5323,374 @@ def replaceHTMLEntity(t):
return _htmlEntityMap.get(t.entity)
# it's easy to get these comment structures wrong - they're very common, so may as well make them available
-cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
+cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
+"Comment of the form C{/* ... */}"
htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
+"Comment of the form C{<!-- ... -->}"
+
restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
-dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
-cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
+dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
+"Comment of the form C{// ... (to end of line)}"
+
+cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
+"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
javaStyleComment = cppStyleComment
+"Same as C{L{cppStyleComment}}"
+
pythonStyleComment = Regex(r"#.*").setName("Python style comment")
+"Comment of the form C{# ... (to end of line)}"
+
_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
Optional( Word(" \t") +
~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
+"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.
+ This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
+
+# some other useful expressions - using lower-case class name since we are really using this as a namespace
+class pyparsing_common:
+ """
+ Here are some common low-level expressions that may be useful in jump-starting parser development:
+ - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})
+ - common L{programming identifiers<identifier>}
+ - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
+ - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
+ - L{UUID<uuid>}
+ - L{comma-separated list<comma_separated_list>}
+ Parse actions:
+ - C{L{convertToInteger}}
+ - C{L{convertToFloat}}
+ - C{L{convertToDate}}
+ - C{L{convertToDatetime}}
+ - C{L{stripHTMLTags}}
+ - C{L{upcaseTokens}}
+ - C{L{downcaseTokens}}
+
+ Example::
+ pyparsing_common.number.runTests('''
+ # any int or real number, returned as the appropriate type
+ 100
+ -100
+ +100
+ 3.14159
+ 6.02e23
+ 1e-12
+ ''')
+
+ pyparsing_common.fnumber.runTests('''
+ # any int or real number, returned as float
+ 100
+ -100
+ +100
+ 3.14159
+ 6.02e23
+ 1e-12
+ ''')
+
+ pyparsing_common.hex_integer.runTests('''
+ # hex numbers
+ 100
+ FF
+ ''')
+
+ pyparsing_common.fraction.runTests('''
+ # fractions
+ 1/2
+ -3/4
+ ''')
+
+ pyparsing_common.mixed_integer.runTests('''
+ # mixed fractions
+ 1
+ 1/2
+ -3/4
+ 1-3/4
+ ''')
+
+ import uuid
+ pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
+ pyparsing_common.uuid.runTests('''
+ # uuid
+ 12345678-1234-5678-1234-567812345678
+ ''')
+ prints::
+ # any int or real number, returned as the appropriate type
+ 100
+ [100]
+
+ -100
+ [-100]
+
+ +100
+ [100]
+
+ 3.14159
+ [3.14159]
+
+ 6.02e23
+ [6.02e+23]
+
+ 1e-12
+ [1e-12]
+
+ # any int or real number, returned as float
+ 100
+ [100.0]
+
+ -100
+ [-100.0]
+
+ +100
+ [100.0]
+
+ 3.14159
+ [3.14159]
+
+ 6.02e23
+ [6.02e+23]
+
+ 1e-12
+ [1e-12]
+
+ # hex numbers
+ 100
+ [256]
+
+ FF
+ [255]
+
+ # fractions
+ 1/2
+ [0.5]
+
+ -3/4
+ [-0.75]
+
+ # mixed fractions
+ 1
+ [1]
+
+ 1/2
+ [0.5]
+
+ -3/4
+ [-0.75]
+
+ 1-3/4
+ [1.75]
+
+ # uuid
+ 12345678-1234-5678-1234-567812345678
+ [UUID('12345678-1234-5678-1234-567812345678')]
+ """
+
+ convertToInteger = tokenMap(int)
+ """
+ Parse action for converting parsed integers to Python int
+ """
+
+ convertToFloat = tokenMap(float)
+ """
+ Parse action for converting parsed numbers to Python float
+ """
+
+ integer = Word(nums).setName("integer").setParseAction(convertToInteger)
+ """expression that parses an unsigned integer, returns an int"""
+
+ hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
+ """expression that parses a hexadecimal integer, returns an int"""
+
+ signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
+ """expression that parses an integer with optional leading sign, returns an int"""
+
+ fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
+ """fractional expression of an integer divided by an integer, returns a float"""
+ fraction.addParseAction(lambda t: t[0]/t[-1])
+
+ mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
+ """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
+ mixed_integer.addParseAction(sum)
+
+ real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
+ """expression that parses a floating point number and returns a float"""
+
+ sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
+ """expression that parses a floating point number with optional scientific notation and returns a float"""
+
+ # streamlining this expression makes the docs nicer-looking
+ number = (sci_real | real | signed_integer).streamline()
+ """any numeric expression, returns the corresponding Python type"""
+
+ fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
+ """any int or real number, returned as float"""
+
+ identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
+ """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
+
+ ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
+ "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
+
+ _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
+ _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
+ _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
+ _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
+ _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
+ ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
+ "IPv6 address (long, short, or mixed form)"
+
+ mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
+ "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
+
+ @staticmethod
+ def convertToDate(fmt="%Y-%m-%d"):
+ """
+ Helper to create a parse action for converting parsed date string to Python datetime.date
+
+ Params -
+ - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
+
+ Example::
+ date_expr = pyparsing_common.iso8601_date.copy()
+ date_expr.setParseAction(pyparsing_common.convertToDate())
+ print(date_expr.parseString("1999-12-31"))
+ prints::
+ [datetime.date(1999, 12, 31)]
+ """
+ def cvt_fn(s,l,t):
+ try:
+ return datetime.strptime(t[0], fmt).date()
+ except ValueError as ve:
+ raise ParseException(s, l, str(ve))
+ return cvt_fn
+
+ @staticmethod
+ def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
+ """
+ Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
+
+ Params -
+ - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
+
+ Example::
+ dt_expr = pyparsing_common.iso8601_datetime.copy()
+ dt_expr.setParseAction(pyparsing_common.convertToDatetime())
+ print(dt_expr.parseString("1999-12-31T23:59:59.999"))
+ prints::
+ [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
+ """
+ def cvt_fn(s,l,t):
+ try:
+ return datetime.strptime(t[0], fmt)
+ except ValueError as ve:
+ raise ParseException(s, l, str(ve))
+ return cvt_fn
+
+ iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
+ "ISO8601 date (C{yyyy-mm-dd})"
+
+ iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
+ "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
+
+ uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
+ "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
+
+ _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
+ @staticmethod
+ def stripHTMLTags(s, l, tokens):
+ """
+ Parse action to remove HTML tags from web page HTML source
+
+ Example::
+ # strip HTML links from normal text
+ text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
+ td,td_end = makeHTMLTags("TD")
+ table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
+
+ print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'
+ """
+ return pyparsing_common._html_stripper.transformString(tokens[0])
+
+ _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')
+ + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
+ comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
+ """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
+
+ upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
+ """Parse action to convert tokens to upper case."""
+
+ downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
+ """Parse action to convert tokens to lower case."""
if __name__ == "__main__":
- selectToken = CaselessLiteral( "select" )
- fromToken = CaselessLiteral( "from" )
-
- ident = Word( alphas, alphanums + "_$" )
- columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
- columnNameList = Group( delimitedList( columnName ) ).setName("columns")
- tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
- tableNameList = Group( delimitedList( tableName ) ).setName("tables")
- simpleSQL = ( selectToken + \
- ( '*' | columnNameList ).setResultsName( "columns" ) + \
- fromToken + \
- tableNameList.setResultsName( "tables" ) )
-
- simpleSQL.runTests("""\
- SELECT * from XYZZY, ABC
- select * from SYS.XYZZY
- Select A from Sys.dual
- Select AA,BB,CC from Sys.dual
- Select A, B, C from Sys.dual
- Select A, B, C from Sys.dual
- Xelect A, B, C from Sys.dual
- Select A, B, C frox Sys.dual
- Select
- Select ^^^ frox Sys.dual
- Select A, B, C from Sys.dual, Table2""")
+ selectToken = CaselessLiteral("select")
+ fromToken = CaselessLiteral("from")
+
+ ident = Word(alphas, alphanums + "_$")
+
+ columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
+ columnNameList = Group(delimitedList(columnName)).setName("columns")
+ columnSpec = ('*' | columnNameList)
+
+ tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
+ tableNameList = Group(delimitedList(tableName)).setName("tables")
+
+ simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
+
+ # demo runTests method, including embedded comments in test string
+ simpleSQL.runTests("""
+ # '*' as column list and dotted table name
+ select * from SYS.XYZZY
+
+ # caseless match on "SELECT", and casts back to "select"
+ SELECT * from XYZZY, ABC
+
+ # list of column names, and mixed case SELECT keyword
+ Select AA,BB,CC from Sys.dual
+
+ # multiple tables
+ Select A, B, C from Sys.dual, Table2
+
+ # invalid SELECT keyword - should fail
+ Xelect A, B, C from Sys.dual
+
+ # incomplete command - should fail
+ Select
+
+ # invalid column name - should fail
+ Select ^^^ frox Sys.dual
+
+ """)
+
+ pyparsing_common.number.runTests("""
+ 100
+ -100
+ +100
+ 3.14159
+ 6.02e23
+ 1e-12
+ """)
+
+ # any int or real number, returned as float
+ pyparsing_common.fnumber.runTests("""
+ 100
+ -100
+ +100
+ 3.14159
+ 6.02e23
+ 1e-12
+ """)
+
+ pyparsing_common.hex_integer.runTests("""
+ 100
+ FF
+ """)
+
+ import uuid
+ pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
+ pyparsing_common.uuid.runTests("""
+ 12345678-1234-5678-1234-567812345678
+ """)
diff --git a/pip/_vendor/requests/__init__.py b/pip/_vendor/requests/__init__.py
index 103d9fd8d..44f68361d 100644
--- a/pip/_vendor/requests/__init__.py
+++ b/pip/_vendor/requests/__init__.py
@@ -38,12 +38,11 @@ is at <http://python-requests.org>.
:copyright: (c) 2016 by Kenneth Reitz.
:license: Apache 2.0, see LICENSE for more details.
-
"""
__title__ = 'requests'
-__version__ = '2.10.0'
-__build__ = 0x021000
+__version__ = '2.11.1'
+__build__ = 0x021101
__author__ = 'Kenneth Reitz'
__license__ = 'Apache 2.0'
__copyright__ = 'Copyright 2016 Kenneth Reitz'
@@ -85,7 +84,5 @@ except ImportError:
logging.getLogger(__name__).addHandler(NullHandler())
-import warnings
-
# FileModeWarnings go off per the default.
warnings.simplefilter('default', FileModeWarning, append=True)
diff --git a/pip/_vendor/requests/adapters.py b/pip/_vendor/requests/adapters.py
index 23e448f42..4a4c4e0e0 100644
--- a/pip/_vendor/requests/adapters.py
+++ b/pip/_vendor/requests/adapters.py
@@ -54,10 +54,24 @@ class BaseAdapter(object):
def __init__(self):
super(BaseAdapter, self).__init__()
- def send(self):
+ def send(self, request, stream=False, timeout=None, verify=True,
+ cert=None, proxies=None):
+ """Sends PreparedRequest object. Returns Response object.
+
+ :param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
+ :param stream: (optional) Whether to stream the request content.
+ :param timeout: (optional) How long to wait for the server to send
+ data before giving up, as a float, or a :ref:`(connect timeout,
+ read timeout) <timeouts>` tuple.
+ :type timeout: float or tuple
+ :param verify: (optional) Whether to verify SSL certificates.
+ :param cert: (optional) Any user-provided SSL certificate to be trusted.
+ :param proxies: (optional) The proxies dictionary to apply to the request.
+ """
raise NotImplementedError
def close(self):
+ """Cleans up adapter specific items."""
raise NotImplementedError
@@ -154,6 +168,7 @@ class HTTPAdapter(BaseAdapter):
:param proxy: The proxy to return a urllib3 ProxyManager for.
:param proxy_kwargs: Extra keyword arguments used to configure the Proxy Manager.
:returns: ProxyManager
+ :rtype: requests.packages.urllib3.ProxyManager
"""
if proxy in self.proxy_manager:
manager = self.proxy_manager[proxy]
@@ -230,6 +245,7 @@ class HTTPAdapter(BaseAdapter):
:param req: The :class:`PreparedRequest <PreparedRequest>` used to generate the response.
:param resp: The urllib3 response object.
+ :rtype: requests.Response
"""
response = Response()
@@ -265,6 +281,7 @@ class HTTPAdapter(BaseAdapter):
:param url: The URL to connect to.
:param proxies: (optional) A Requests-style dictionary of proxies used on this request.
+ :rtype: requests.packages.urllib3.ConnectionPool
"""
proxy = select_proxy(url, proxies)
@@ -302,6 +319,7 @@ class HTTPAdapter(BaseAdapter):
:param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
:param proxies: A dictionary of schemes or schemes and hosts to proxy URLs.
+ :rtype: str
"""
proxy = select_proxy(request.url, proxies)
scheme = urlparse(request.url).scheme
@@ -343,6 +361,7 @@ class HTTPAdapter(BaseAdapter):
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
:param proxies: The url of the proxy being used for this request.
+ :rtype: dict
"""
headers = {}
username, password = get_auth_from_url(proxy)
@@ -365,6 +384,7 @@ class HTTPAdapter(BaseAdapter):
:param verify: (optional) Whether to verify SSL certificates.
:param cert: (optional) Any user-provided SSL certificate to be trusted.
:param proxies: (optional) The proxies dictionary to apply to the request.
+ :rtype: requests.Response
"""
conn = self.get_connection(request.url, proxies)
diff --git a/pip/_vendor/requests/api.py b/pip/_vendor/requests/api.py
index c2068d0ed..580b3f353 100644
--- a/pip/_vendor/requests/api.py
+++ b/pip/_vendor/requests/api.py
@@ -8,7 +8,6 @@ This module implements the Requests API.
:copyright: (c) 2012 by Kenneth Reitz.
:license: Apache2, see LICENSE for more details.
-
"""
from . import sessions
diff --git a/pip/_vendor/requests/auth.py b/pip/_vendor/requests/auth.py
index 73f8e9da8..49bcb24a4 100644
--- a/pip/_vendor/requests/auth.py
+++ b/pip/_vendor/requests/auth.py
@@ -43,6 +43,7 @@ class AuthBase(object):
class HTTPBasicAuth(AuthBase):
"""Attaches HTTP Basic Authentication to the given Request object."""
+
def __init__(self, username, password):
self.username = username
self.password = password
@@ -63,6 +64,7 @@ class HTTPBasicAuth(AuthBase):
class HTTPProxyAuth(HTTPBasicAuth):
"""Attaches HTTP Proxy Authentication to a given Request object."""
+
def __call__(self, r):
r.headers['Proxy-Authorization'] = _basic_auth_str(self.username, self.password)
return r
@@ -70,6 +72,7 @@ class HTTPProxyAuth(HTTPBasicAuth):
class HTTPDigestAuth(AuthBase):
"""Attaches HTTP Digest Authentication to the given Request object."""
+
def __init__(self, username, password):
self.username = username
self.password = password
@@ -87,6 +90,9 @@ class HTTPDigestAuth(AuthBase):
self._thread_local.num_401_calls = None
def build_digest_header(self, method, url):
+ """
+ :rtype: str
+ """
realm = self._thread_local.chal['realm']
nonce = self._thread_local.chal['nonce']
@@ -179,7 +185,11 @@ class HTTPDigestAuth(AuthBase):
self._thread_local.num_401_calls = 1
def handle_401(self, r, **kwargs):
- """Takes the given response and tries digest-auth, if needed."""
+ """
+ Takes the given response and tries digest-auth, if needed.
+
+ :rtype: requests.Response
+ """
if self._thread_local.pos is not None:
# Rewind the file position indicator of the body to where
diff --git a/pip/_vendor/requests/certs.py b/pip/_vendor/requests/certs.py
index 07e647507..f922b99d7 100644
--- a/pip/_vendor/requests/certs.py
+++ b/pip/_vendor/requests/certs.py
@@ -2,8 +2,8 @@
# -*- coding: utf-8 -*-
"""
-certs.py
-~~~~~~~~
+requests.certs
+~~~~~~~~~~~~~~
This module returns the preferred default CA certificate bundle.
diff --git a/pip/_vendor/requests/compat.py b/pip/_vendor/requests/compat.py
index eb7a4bfda..353ec29e4 100644
--- a/pip/_vendor/requests/compat.py
+++ b/pip/_vendor/requests/compat.py
@@ -1,7 +1,11 @@
# -*- coding: utf-8 -*-
"""
-pythoncompat
+requests.compat
+~~~~~~~~~~~~~~~
+
+This module handles import compatibility issues between Python 2 and
+Python 3.
"""
from .packages import chardet
diff --git a/pip/_vendor/requests/cookies.py b/pip/_vendor/requests/cookies.py
index eee5168f2..41a2fde14 100644
--- a/pip/_vendor/requests/cookies.py
+++ b/pip/_vendor/requests/cookies.py
@@ -1,6 +1,9 @@
# -*- coding: utf-8 -*-
"""
+requests.cookies
+~~~~~~~~~~~~~~~~
+
Compatibility code to be able to use `cookielib.CookieJar` with requests.
requests.utils imports from here, so be careful with imports.
@@ -131,7 +134,11 @@ def extract_cookies_to_jar(jar, request, response):
def get_cookie_header(jar, request):
- """Produce an appropriate Cookie header string to be sent with `request`, or None."""
+ """
+ Produce an appropriate Cookie header string to be sent with `request`, or None.
+
+ :rtype: str
+ """
r = MockRequest(request)
jar.add_cookie_header(r)
return r.get_new_headers().get('Cookie')
@@ -158,7 +165,8 @@ def remove_cookie_by_name(cookiejar, name, domain=None, path=None):
class CookieConflictError(RuntimeError):
"""There are two cookies that meet the criteria specified in the cookie jar.
- Use .get and .set and include domain and path args in order to be more specific."""
+ Use .get and .set and include domain and path args in order to be more specific.
+ """
class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping):
@@ -178,12 +186,14 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping):
.. warning:: dictionary operations that are normally O(1) may be O(n).
"""
+
def get(self, name, default=None, domain=None, path=None):
"""Dict-like get() that also supports optional domain and path args in
order to resolve naming collisions from using one cookie jar over
multiple domains.
- .. warning:: operation is O(n), not O(1)."""
+ .. warning:: operation is O(n), not O(1).
+ """
try:
return self._find_no_duplicates(name, domain, path)
except KeyError:
@@ -192,7 +202,8 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping):
def set(self, name, value, **kwargs):
"""Dict-like set() that also supports optional domain and path args in
order to resolve naming collisions from using one cookie jar over
- multiple domains."""
+ multiple domains.
+ """
# support client code that unsets cookies by assignment of a None value:
if value is None:
remove_cookie_by_name(self, name, domain=kwargs.get('domain'), path=kwargs.get('path'))
@@ -207,37 +218,54 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping):
def iterkeys(self):
"""Dict-like iterkeys() that returns an iterator of names of cookies
- from the jar. See itervalues() and iteritems()."""
+ from the jar.
+
+ .. seealso:: itervalues() and iteritems().
+ """
for cookie in iter(self):
yield cookie.name
def keys(self):
"""Dict-like keys() that returns a list of names of cookies from the
- jar. See values() and items()."""
+ jar.
+
+ .. seealso:: values() and items().
+ """
return list(self.iterkeys())
def itervalues(self):
"""Dict-like itervalues() that returns an iterator of values of cookies
- from the jar. See iterkeys() and iteritems()."""
+ from the jar.
+
+ .. seealso:: iterkeys() and iteritems().
+ """
for cookie in iter(self):
yield cookie.value
def values(self):
"""Dict-like values() that returns a list of values of cookies from the
- jar. See keys() and items()."""
+ jar.
+
+ .. seealso:: keys() and items().
+ """
return list(self.itervalues())
def iteritems(self):
"""Dict-like iteritems() that returns an iterator of name-value tuples
- from the jar. See iterkeys() and itervalues()."""
+ from the jar.
+
+ .. seealso:: iterkeys() and itervalues().
+ """
for cookie in iter(self):
yield cookie.name, cookie.value
def items(self):
"""Dict-like items() that returns a list of name-value tuples from the
- jar. See keys() and values(). Allows client-code to call
- ``dict(RequestsCookieJar)`` and get a vanilla python dict of key value
- pairs."""
+ jar. Allows client-code to call ``dict(RequestsCookieJar)`` and get a
+ vanilla python dict of key value pairs.
+
+ .. seealso:: keys() and values().
+ """
return list(self.iteritems())
def list_domains(self):
@@ -258,7 +286,10 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping):
def multiple_domains(self):
"""Returns True if there are multiple domains in the jar.
- Returns False otherwise."""
+ Returns False otherwise.
+
+ :rtype: bool
+ """
domains = []
for cookie in iter(self):
if cookie.domain is not None and cookie.domain in domains:
@@ -269,7 +300,10 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping):
def get_dict(self, domain=None, path=None):
"""Takes as an argument an optional domain and path and returns a plain
old Python dict of name-value pairs of cookies that meet the
- requirements."""
+ requirements.
+
+ :rtype: dict
+ """
dictionary = {}
for cookie in iter(self):
if (domain is None or cookie.domain == domain) and (path is None
@@ -288,20 +322,21 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping):
exception if there are more than one cookie with name. In that case,
use the more explicit get() method instead.
- .. warning:: operation is O(n), not O(1)."""
-
+ .. warning:: operation is O(n), not O(1).
+ """
return self._find_no_duplicates(name)
def __setitem__(self, name, value):
"""Dict-like __setitem__ for compatibility with client code. Throws
exception if there is already a cookie of that name in the jar. In that
- case, use the more explicit set() method instead."""
-
+ case, use the more explicit set() method instead.
+ """
self.set(name, value)
def __delitem__(self, name):
"""Deletes a cookie given a name. Wraps ``cookielib.CookieJar``'s
- ``remove_cookie_by_name()``."""
+ ``remove_cookie_by_name()``.
+ """
remove_cookie_by_name(self, name)
def set_cookie(self, cookie, *args, **kwargs):
@@ -318,11 +353,17 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping):
super(RequestsCookieJar, self).update(other)
def _find(self, name, domain=None, path=None):
- """Requests uses this method internally to get cookie values. Takes as
- args name and optional domain and path. Returns a cookie.value. If
- there are conflicting cookies, _find arbitrarily chooses one. See
- _find_no_duplicates if you want an exception thrown if there are
- conflicting cookies."""
+ """Requests uses this method internally to get cookie values.
+
+ If there are conflicting cookies, _find arbitrarily chooses one.
+ See _find_no_duplicates if you want an exception thrown if there are
+ conflicting cookies.
+
+ :param name: a string containing name of cookie
+ :param domain: (optional) string containing domain of cookie
+ :param path: (optional) string containing path of cookie
+ :return: cookie.value
+ """
for cookie in iter(self):
if cookie.name == name:
if domain is None or cookie.domain == domain:
@@ -333,10 +374,16 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping):
def _find_no_duplicates(self, name, domain=None, path=None):
"""Both ``__get_item__`` and ``get`` call this function: it's never
- used elsewhere in Requests. Takes as args name and optional domain and
- path. Returns a cookie.value. Throws KeyError if cookie is not found
- and CookieConflictError if there are multiple cookies that match name
- and optionally domain and path."""
+ used elsewhere in Requests.
+
+ :param name: a string containing name of cookie
+ :param domain: (optional) string containing domain of cookie
+ :param path: (optional) string containing path of cookie
+ :raises KeyError: if cookie is not found
+ :raises CookieConflictError: if there are multiple cookies
+ that match name and optionally domain and path
+ :return: cookie.value
+ """
toReturn = None
for cookie in iter(self):
if cookie.name == name:
diff --git a/pip/_vendor/requests/exceptions.py b/pip/_vendor/requests/exceptions.py
index ba0b910e3..b89e0cc62 100644
--- a/pip/_vendor/requests/exceptions.py
+++ b/pip/_vendor/requests/exceptions.py
@@ -5,19 +5,17 @@ requests.exceptions
~~~~~~~~~~~~~~~~~~~
This module contains the set of Requests' exceptions.
-
"""
from .packages.urllib3.exceptions import HTTPError as BaseHTTPError
class RequestException(IOError):
"""There was an ambiguous exception that occurred while handling your
- request."""
+ request.
+ """
def __init__(self, *args, **kwargs):
- """
- Initialize RequestException with `request` and `response` objects.
- """
+ """Initialize RequestException with `request` and `response` objects."""
response = kwargs.pop('response', None)
self.response = response
self.request = kwargs.pop('request', None)
@@ -80,7 +78,11 @@ class InvalidSchema(RequestException, ValueError):
class InvalidURL(RequestException, ValueError):
- """ The URL provided was somehow invalid. """
+ """The URL provided was somehow invalid."""
+
+
+class InvalidHeader(RequestException, ValueError):
+ """The header value provided was somehow invalid."""
class ChunkedEncodingError(RequestException):
@@ -108,7 +110,5 @@ class RequestsWarning(Warning):
class FileModeWarning(RequestsWarning, DeprecationWarning):
- """
- A file was opened in text mode, but Requests determined its binary length.
- """
+ """A file was opened in text mode, but Requests determined its binary length."""
pass
diff --git a/pip/_vendor/requests/hooks.py b/pip/_vendor/requests/hooks.py
index 9da94366d..32b32de75 100644
--- a/pip/_vendor/requests/hooks.py
+++ b/pip/_vendor/requests/hooks.py
@@ -10,10 +10,10 @@ Available hooks:
``response``:
The response generated from a Request.
-
"""
HOOKS = ['response']
+
def default_hooks():
return dict((event, []) for event in HOOKS)
diff --git a/pip/_vendor/requests/models.py b/pip/_vendor/requests/models.py
index fe4bec1bd..11434ef46 100644
--- a/pip/_vendor/requests/models.py
+++ b/pip/_vendor/requests/models.py
@@ -27,7 +27,8 @@ from .exceptions import (
from .utils import (
guess_filename, get_auth_from_url, requote_uri,
stream_decode_response_unicode, to_key_val_list, parse_header_links,
- iter_slices, guess_json_utf, super_len, to_native_string)
+ iter_slices, guess_json_utf, super_len, to_native_string,
+ check_header_validity)
from .compat import (
cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO,
is_py2, chardet, builtin_str, basestring)
@@ -37,11 +38,11 @@ from .status_codes import codes
#: The set of HTTP status codes that indicate an automatically
#: processable redirect.
REDIRECT_STATI = (
- codes.moved, # 301
- codes.found, # 302
- codes.other, # 303
- codes.temporary_redirect, # 307
- codes.permanent_redirect, # 308
+ codes.moved, # 301
+ codes.found, # 302
+ codes.other, # 303
+ codes.temporary_redirect, # 307
+ codes.permanent_redirect, # 308
)
DEFAULT_REDIRECT_LIMIT = 30
@@ -107,7 +108,6 @@ class RequestEncodingMixin(object):
if parameters are supplied as a dict.
The tuples may be 2-tuples (filename, fileobj), 3-tuples (filename, fileobj, contentype)
or 4-tuples (filename, fileobj, contentype, custom_headers).
-
"""
if (not files):
raise ValueError("Files must be provided.")
@@ -206,8 +206,8 @@ class Request(RequestHooksMixin):
>>> req = requests.Request('GET', 'http://httpbin.org/get')
>>> req.prepare()
<PreparedRequest [GET]>
-
"""
+
def __init__(self, method=None, url=None, headers=None, files=None,
data=None, params=None, auth=None, cookies=None, hooks=None, json=None):
@@ -269,7 +269,6 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
>>> s = requests.Session()
>>> s.send(r)
<Response [200]>
-
"""
def __init__(self):
@@ -403,10 +402,13 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
def prepare_headers(self, headers):
"""Prepares the given HTTP headers."""
+ self.headers = CaseInsensitiveDict()
if headers:
- self.headers = CaseInsensitiveDict((to_native_string(name), value) for name, value in headers.items())
- else:
- self.headers = CaseInsensitiveDict()
+ for header in headers.items():
+ # Raise exception on invalid header value.
+ check_header_validity(header)
+ name, value = header
+ self.headers[to_native_string(name)] = value
def prepare_body(self, data, files, json=None):
"""Prepares the given HTTP body data."""
@@ -420,8 +422,12 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
length = None
if not data and json is not None:
+ # urllib3 requires a bytes-like body. Python 2's json.dumps
+ # provides this natively, but Python 3 gives a Unicode string.
content_type = 'application/json'
body = complexjson.dumps(json)
+ if not isinstance(body, bytes):
+ body = body.encode('utf-8')
is_stream = all([
hasattr(data, '__iter__'),
@@ -508,8 +514,8 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
can only be called once for the life of the
:class:`PreparedRequest <PreparedRequest>` object. Any subsequent calls
to ``prepare_cookies`` will have no actual effect, unless the "Cookie"
- header is removed beforehand."""
-
+ header is removed beforehand.
+ """
if isinstance(cookies, cookielib.CookieJar):
self._cookies = cookies
else:
@@ -653,6 +659,12 @@ class Response(object):
read into memory. This is not necessarily the length of each item
returned as decoding can take place.
+ chunk_size must be of type int or None. A value of None will
+ function differently depending on the value of `stream`.
+ stream=True will read data as it arrives in whatever size the
+ chunks are received. If stream=False, data is returned as
+ a single chunk.
+
If decode_unicode is True, content will be decoded using the best
available encoding based on the response.
"""
@@ -681,6 +693,8 @@ class Response(object):
if self._content_consumed and isinstance(self._content, bool):
raise StreamConsumedError()
+ elif chunk_size is not None and not isinstance(chunk_size, int):
+ raise TypeError("chunk_size must be an int, it is instead a %s." % type(chunk_size))
# simulate reading small chunks of the content
reused_chunks = iter_slices(self._content, chunk_size)
@@ -792,7 +806,7 @@ class Response(object):
:param \*\*kwargs: Optional arguments that ``json.loads`` takes.
"""
- if not self.encoding and len(self.content) > 3:
+ if not self.encoding and self.content and len(self.content) > 3:
# No encoding set. JSON RFC 4627 section 3 states we should expect
# UTF-8, -16 or -32. Detect which one to use; If the detection or
# decoding fails, fall back to `self.text` (using chardet to make
@@ -833,12 +847,16 @@ class Response(object):
"""Raises stored :class:`HTTPError`, if one occurred."""
http_error_msg = ''
+ if isinstance(self.reason, bytes):
+ reason = self.reason.decode('utf-8', 'ignore')
+ else:
+ reason = self.reason
if 400 <= self.status_code < 500:
- http_error_msg = '%s Client Error: %s for url: %s' % (self.status_code, self.reason, self.url)
+ http_error_msg = u'%s Client Error: %s for url: %s' % (self.status_code, reason, self.url)
elif 500 <= self.status_code < 600:
- http_error_msg = '%s Server Error: %s for url: %s' % (self.status_code, self.reason, self.url)
+ http_error_msg = u'%s Server Error: %s for url: %s' % (self.status_code, reason, self.url)
if http_error_msg:
raise HTTPError(http_error_msg, response=self)
@@ -850,6 +868,6 @@ class Response(object):
*Note: Should not normally need to be called explicitly.*
"""
if not self._content_consumed:
- return self.raw.close()
+ self.raw.close()
return self.raw.release_conn()
diff --git a/pip/_vendor/requests/packages/urllib3/__init__.py b/pip/_vendor/requests/packages/urllib3/__init__.py
index 73668991f..c35367422 100644
--- a/pip/_vendor/requests/packages/urllib3/__init__.py
+++ b/pip/_vendor/requests/packages/urllib3/__init__.py
@@ -32,7 +32,7 @@ except ImportError:
__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
__license__ = 'MIT'
-__version__ = '1.15.1'
+__version__ = '1.16'
__all__ = (
'HTTPConnectionPool',
diff --git a/pip/_vendor/requests/packages/urllib3/connectionpool.py b/pip/_vendor/requests/packages/urllib3/connectionpool.py
index 3fcfb1201..ab634cb4b 100644
--- a/pip/_vendor/requests/packages/urllib3/connectionpool.py
+++ b/pip/_vendor/requests/packages/urllib3/connectionpool.py
@@ -90,7 +90,7 @@ class ConnectionPool(object):
# Return False to re-raise any potential exceptions
return False
- def close():
+ def close(self):
"""
Close all pooled connections and disable the pool.
"""
@@ -163,6 +163,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
scheme = 'http'
ConnectionCls = HTTPConnection
+ ResponseCls = HTTPResponse
def __init__(self, host, port=None, strict=False,
timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False,
@@ -383,8 +384,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
try:
try: # Python 2.7, use buffering of HTTP responses
httplib_response = conn.getresponse(buffering=True)
- except TypeError: # Python 2.6 and older
- httplib_response = conn.getresponse()
+ except TypeError: # Python 2.6 and older, Python 3
+ try:
+ httplib_response = conn.getresponse()
+ except Exception as e:
+ # Remove the TypeError from the exception chain in Python 3;
+ # otherwise it looks like a programming error was the cause.
+ six.raise_from(e, None)
except (SocketTimeout, BaseSSLError, SocketError) as e:
self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
raise
@@ -545,6 +551,17 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
conn = None
+ # Track whether `conn` needs to be released before
+ # returning/raising/recursing. Update this variable if necessary, and
+ # leave `release_conn` constant throughout the function. That way, if
+ # the function recurses, the original value of `release_conn` will be
+ # passed down into the recursive call, and its value will be respected.
+ #
+ # See issue #651 [1] for details.
+ #
+ # [1] <https://github.com/shazow/urllib3/issues/651>
+ release_this_conn = release_conn
+
# Merge the proxy headers. Only do this in HTTP. We have to copy the
# headers dict so we can safely change it without those changes being
# reflected in anyone else's copy.
@@ -584,10 +601,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
response_conn = conn if not release_conn else None
# Import httplib's response into our own wrapper object
- response = HTTPResponse.from_httplib(httplib_response,
- pool=self,
- connection=response_conn,
- **response_kw)
+ response = self.ResponseCls.from_httplib(httplib_response,
+ pool=self,
+ connection=response_conn,
+ **response_kw)
# Everything went great!
clean_exit = True
@@ -633,9 +650,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# Close the connection, set the variable to None, and make sure
# we put the None back in the pool to avoid leaking it.
conn = conn and conn.close()
- release_conn = True
+ release_this_conn = True
- if release_conn:
+ if release_this_conn:
# Put the connection back to be reused. If the connection is
# expired then it will be None, which will get replaced with a
# fresh connection during _get_conn.
@@ -817,7 +834,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):
warnings.warn((
'Unverified HTTPS request is being made. '
'Adding certificate verification is strongly advised. See: '
- 'https://urllib3.readthedocs.org/en/latest/security.html'),
+ 'https://urllib3.readthedocs.io/en/latest/security.html'),
InsecureRequestWarning)
diff --git a/pip/_vendor/requests/packages/urllib3/contrib/appengine.py b/pip/_vendor/requests/packages/urllib3/contrib/appengine.py
index f4289c0ff..1579476c3 100644
--- a/pip/_vendor/requests/packages/urllib3/contrib/appengine.py
+++ b/pip/_vendor/requests/packages/urllib3/contrib/appengine.py
@@ -70,7 +70,7 @@ class AppEngineManager(RequestMethods):
warnings.warn(
"urllib3 is using URLFetch on Google App Engine sandbox instead "
"of sockets. To use sockets directly instead of URLFetch see "
- "https://urllib3.readthedocs.org/en/latest/contrib.html.",
+ "https://urllib3.readthedocs.io/en/latest/contrib.html.",
AppEnginePlatformWarning)
RequestMethods.__init__(self, headers)
diff --git a/pip/_vendor/requests/packages/urllib3/contrib/socks.py b/pip/_vendor/requests/packages/urllib3/contrib/socks.py
index 3748fee53..81970fa60 100644
--- a/pip/_vendor/requests/packages/urllib3/contrib/socks.py
+++ b/pip/_vendor/requests/packages/urllib3/contrib/socks.py
@@ -26,7 +26,7 @@ except ImportError:
warnings.warn((
'SOCKS support in urllib3 requires the installation of optional '
'dependencies: specifically, PySocks. For more information, see '
- 'https://urllib3.readthedocs.org/en/latest/contrib.html#socks-proxies'
+ 'https://urllib3.readthedocs.io/en/latest/contrib.html#socks-proxies'
),
DependencyWarning
)
diff --git a/pip/_vendor/requests/packages/urllib3/packages/six.py b/pip/_vendor/requests/packages/urllib3/packages/six.py
index 27d80112b..190c0239c 100644
--- a/pip/_vendor/requests/packages/urllib3/packages/six.py
+++ b/pip/_vendor/requests/packages/urllib3/packages/six.py
@@ -1,34 +1,41 @@
"""Utilities for writing code that runs on Python 2 and 3"""
-#Copyright (c) 2010-2011 Benjamin Peterson
-
-#Permission is hereby granted, free of charge, to any person obtaining a copy of
-#this software and associated documentation files (the "Software"), to deal in
-#the Software without restriction, including without limitation the rights to
-#use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-#the Software, and to permit persons to whom the Software is furnished to do so,
-#subject to the following conditions:
-
-#The above copyright notice and this permission notice shall be included in all
-#copies or substantial portions of the Software.
-
-#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-#FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-#COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-#IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-#CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
+# Copyright (c) 2010-2015 Benjamin Peterson
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import absolute_import
+
+import functools
+import itertools
import operator
import sys
import types
__author__ = "Benjamin Peterson <benjamin@python.org>"
-__version__ = "1.2.0" # Revision 41c74fef2ded
+__version__ = "1.10.0"
-# True if we are running on Python 3.
+# Useful for very coarse version differentiation.
+PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3
+PY34 = sys.version_info[0:2] >= (3, 4)
if PY3:
string_types = str,
@@ -51,6 +58,7 @@ else:
else:
# It's possible to have sizeof(long) != sizeof(Py_ssize_t).
class X(object):
+
def __len__(self):
return 1 << 31
try:
@@ -61,7 +69,7 @@ else:
else:
# 64-bit
MAXSIZE = int((1 << 63) - 1)
- del X
+ del X
def _add_doc(func, doc):
@@ -82,9 +90,13 @@ class _LazyDescr(object):
def __get__(self, obj, tp):
result = self._resolve()
- setattr(obj, self.name, result)
- # This is a bit ugly, but it avoids running this again.
- delattr(tp, self.name)
+ setattr(obj, self.name, result) # Invokes __set__.
+ try:
+ # This is a bit ugly, but it avoids running this again by
+ # removing this descriptor.
+ delattr(obj.__class__, self.name)
+ except AttributeError:
+ pass
return result
@@ -102,6 +114,27 @@ class MovedModule(_LazyDescr):
def _resolve(self):
return _import_module(self.mod)
+ def __getattr__(self, attr):
+ _module = self._resolve()
+ value = getattr(_module, attr)
+ setattr(self, attr, value)
+ return value
+
+
+class _LazyModule(types.ModuleType):
+
+ def __init__(self, name):
+ super(_LazyModule, self).__init__(name)
+ self.__doc__ = self.__class__.__doc__
+
+ def __dir__(self):
+ attrs = ["__doc__", "__name__"]
+ attrs += [attr.name for attr in self._moved_attributes]
+ return attrs
+
+ # Subclasses should override this
+ _moved_attributes = []
+
class MovedAttribute(_LazyDescr):
@@ -128,30 +161,111 @@ class MovedAttribute(_LazyDescr):
return getattr(module, self.attr)
+class _SixMetaPathImporter(object):
+
+ """
+ A meta path importer to import six.moves and its submodules.
+
+ This class implements a PEP302 finder and loader. It should be compatible
+ with Python 2.5 and all existing versions of Python3
+ """
+
+ def __init__(self, six_module_name):
+ self.name = six_module_name
+ self.known_modules = {}
+
+ def _add_module(self, mod, *fullnames):
+ for fullname in fullnames:
+ self.known_modules[self.name + "." + fullname] = mod
+
+ def _get_module(self, fullname):
+ return self.known_modules[self.name + "." + fullname]
+
+ def find_module(self, fullname, path=None):
+ if fullname in self.known_modules:
+ return self
+ return None
+
+ def __get_module(self, fullname):
+ try:
+ return self.known_modules[fullname]
+ except KeyError:
+ raise ImportError("This loader does not know module " + fullname)
+
+ def load_module(self, fullname):
+ try:
+ # in case of a reload
+ return sys.modules[fullname]
+ except KeyError:
+ pass
+ mod = self.__get_module(fullname)
+ if isinstance(mod, MovedModule):
+ mod = mod._resolve()
+ else:
+ mod.__loader__ = self
+ sys.modules[fullname] = mod
+ return mod
+
+ def is_package(self, fullname):
+ """
+ Return true, if the named module is a package.
+
+ We need this method to get correct spec objects with
+ Python 3.4 (see PEP451)
+ """
+ return hasattr(self.__get_module(fullname), "__path__")
+
+ def get_code(self, fullname):
+ """Return None
+
+ Required, if is_package is implemented"""
+ self.__get_module(fullname) # eventually raises ImportError
+ return None
+ get_source = get_code # same as get_code
+
+_importer = _SixMetaPathImporter(__name__)
+
+
+class _MovedItems(_LazyModule):
-class _MovedItems(types.ModuleType):
"""Lazy loading of moved objects"""
+ __path__ = [] # mark as package
_moved_attributes = [
MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"),
MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"),
+ MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"),
MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"),
+ MovedAttribute("intern", "__builtin__", "sys"),
MovedAttribute("map", "itertools", "builtins", "imap", "map"),
- MovedAttribute("reload_module", "__builtin__", "imp", "reload"),
+ MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"),
+ MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"),
+ MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"),
+ MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"),
MovedAttribute("reduce", "__builtin__", "functools"),
+ MovedAttribute("shlex_quote", "pipes", "shlex", "quote"),
MovedAttribute("StringIO", "StringIO", "io"),
+ MovedAttribute("UserDict", "UserDict", "collections"),
+ MovedAttribute("UserList", "UserList", "collections"),
+ MovedAttribute("UserString", "UserString", "collections"),
MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"),
MovedAttribute("zip", "itertools", "builtins", "izip", "zip"),
-
+ MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"),
MovedModule("builtins", "__builtin__"),
MovedModule("configparser", "ConfigParser"),
MovedModule("copyreg", "copy_reg"),
+ MovedModule("dbm_gnu", "gdbm", "dbm.gnu"),
+ MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread"),
MovedModule("http_cookiejar", "cookielib", "http.cookiejar"),
MovedModule("http_cookies", "Cookie", "http.cookies"),
MovedModule("html_entities", "htmlentitydefs", "html.entities"),
MovedModule("html_parser", "HTMLParser", "html.parser"),
MovedModule("http_client", "httplib", "http.client"),
+ MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"),
+ MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"),
+ MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"),
+ MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"),
MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"),
MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"),
MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"),
@@ -159,12 +273,14 @@ _moved_attributes = [
MovedModule("queue", "Queue"),
MovedModule("reprlib", "repr"),
MovedModule("socketserver", "SocketServer"),
+ MovedModule("_thread", "thread", "_thread"),
MovedModule("tkinter", "Tkinter"),
MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"),
MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"),
MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"),
MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"),
MovedModule("tkinter_tix", "Tix", "tkinter.tix"),
+ MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"),
MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"),
MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"),
MovedModule("tkinter_colorchooser", "tkColorChooser",
@@ -176,14 +292,195 @@ _moved_attributes = [
MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"),
MovedModule("tkinter_tksimpledialog", "tkSimpleDialog",
"tkinter.simpledialog"),
+ MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"),
+ MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"),
+ MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"),
MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"),
- MovedModule("winreg", "_winreg"),
+ MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"),
+ MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"),
]
+# Add windows specific modules.
+if sys.platform == "win32":
+ _moved_attributes += [
+ MovedModule("winreg", "_winreg"),
+ ]
+
for attr in _moved_attributes:
setattr(_MovedItems, attr.name, attr)
+ if isinstance(attr, MovedModule):
+ _importer._add_module(attr, "moves." + attr.name)
del attr
-moves = sys.modules[__name__ + ".moves"] = _MovedItems("moves")
+_MovedItems._moved_attributes = _moved_attributes
+
+moves = _MovedItems(__name__ + ".moves")
+_importer._add_module(moves, "moves")
+
+
+class Module_six_moves_urllib_parse(_LazyModule):
+
+ """Lazy loading of moved objects in six.moves.urllib_parse"""
+
+
+_urllib_parse_moved_attributes = [
+ MovedAttribute("ParseResult", "urlparse", "urllib.parse"),
+ MovedAttribute("SplitResult", "urlparse", "urllib.parse"),
+ MovedAttribute("parse_qs", "urlparse", "urllib.parse"),
+ MovedAttribute("parse_qsl", "urlparse", "urllib.parse"),
+ MovedAttribute("urldefrag", "urlparse", "urllib.parse"),
+ MovedAttribute("urljoin", "urlparse", "urllib.parse"),
+ MovedAttribute("urlparse", "urlparse", "urllib.parse"),
+ MovedAttribute("urlsplit", "urlparse", "urllib.parse"),
+ MovedAttribute("urlunparse", "urlparse", "urllib.parse"),
+ MovedAttribute("urlunsplit", "urlparse", "urllib.parse"),
+ MovedAttribute("quote", "urllib", "urllib.parse"),
+ MovedAttribute("quote_plus", "urllib", "urllib.parse"),
+ MovedAttribute("unquote", "urllib", "urllib.parse"),
+ MovedAttribute("unquote_plus", "urllib", "urllib.parse"),
+ MovedAttribute("urlencode", "urllib", "urllib.parse"),
+ MovedAttribute("splitquery", "urllib", "urllib.parse"),
+ MovedAttribute("splittag", "urllib", "urllib.parse"),
+ MovedAttribute("splituser", "urllib", "urllib.parse"),
+ MovedAttribute("uses_fragment", "urlparse", "urllib.parse"),
+ MovedAttribute("uses_netloc", "urlparse", "urllib.parse"),
+ MovedAttribute("uses_params", "urlparse", "urllib.parse"),
+ MovedAttribute("uses_query", "urlparse", "urllib.parse"),
+ MovedAttribute("uses_relative", "urlparse", "urllib.parse"),
+]
+for attr in _urllib_parse_moved_attributes:
+ setattr(Module_six_moves_urllib_parse, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"),
+ "moves.urllib_parse", "moves.urllib.parse")
+
+
+class Module_six_moves_urllib_error(_LazyModule):
+
+ """Lazy loading of moved objects in six.moves.urllib_error"""
+
+
+_urllib_error_moved_attributes = [
+ MovedAttribute("URLError", "urllib2", "urllib.error"),
+ MovedAttribute("HTTPError", "urllib2", "urllib.error"),
+ MovedAttribute("ContentTooShortError", "urllib", "urllib.error"),
+]
+for attr in _urllib_error_moved_attributes:
+ setattr(Module_six_moves_urllib_error, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"),
+ "moves.urllib_error", "moves.urllib.error")
+
+
+class Module_six_moves_urllib_request(_LazyModule):
+
+ """Lazy loading of moved objects in six.moves.urllib_request"""
+
+
+_urllib_request_moved_attributes = [
+ MovedAttribute("urlopen", "urllib2", "urllib.request"),
+ MovedAttribute("install_opener", "urllib2", "urllib.request"),
+ MovedAttribute("build_opener", "urllib2", "urllib.request"),
+ MovedAttribute("pathname2url", "urllib", "urllib.request"),
+ MovedAttribute("url2pathname", "urllib", "urllib.request"),
+ MovedAttribute("getproxies", "urllib", "urllib.request"),
+ MovedAttribute("Request", "urllib2", "urllib.request"),
+ MovedAttribute("OpenerDirector", "urllib2", "urllib.request"),
+ MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"),
+ MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"),
+ MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"),
+ MovedAttribute("ProxyHandler", "urllib2", "urllib.request"),
+ MovedAttribute("BaseHandler", "urllib2", "urllib.request"),
+ MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"),
+ MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"),
+ MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"),
+ MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"),
+ MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"),
+ MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"),
+ MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"),
+ MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"),
+ MovedAttribute("HTTPHandler", "urllib2", "urllib.request"),
+ MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"),
+ MovedAttribute("FileHandler", "urllib2", "urllib.request"),
+ MovedAttribute("FTPHandler", "urllib2", "urllib.request"),
+ MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"),
+ MovedAttribute("UnknownHandler", "urllib2", "urllib.request"),
+ MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"),
+ MovedAttribute("urlretrieve", "urllib", "urllib.request"),
+ MovedAttribute("urlcleanup", "urllib", "urllib.request"),
+ MovedAttribute("URLopener", "urllib", "urllib.request"),
+ MovedAttribute("FancyURLopener", "urllib", "urllib.request"),
+ MovedAttribute("proxy_bypass", "urllib", "urllib.request"),
+]
+for attr in _urllib_request_moved_attributes:
+ setattr(Module_six_moves_urllib_request, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"),
+ "moves.urllib_request", "moves.urllib.request")
+
+
+class Module_six_moves_urllib_response(_LazyModule):
+
+ """Lazy loading of moved objects in six.moves.urllib_response"""
+
+
+_urllib_response_moved_attributes = [
+ MovedAttribute("addbase", "urllib", "urllib.response"),
+ MovedAttribute("addclosehook", "urllib", "urllib.response"),
+ MovedAttribute("addinfo", "urllib", "urllib.response"),
+ MovedAttribute("addinfourl", "urllib", "urllib.response"),
+]
+for attr in _urllib_response_moved_attributes:
+ setattr(Module_six_moves_urllib_response, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"),
+ "moves.urllib_response", "moves.urllib.response")
+
+
+class Module_six_moves_urllib_robotparser(_LazyModule):
+
+ """Lazy loading of moved objects in six.moves.urllib_robotparser"""
+
+
+_urllib_robotparser_moved_attributes = [
+ MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"),
+]
+for attr in _urllib_robotparser_moved_attributes:
+ setattr(Module_six_moves_urllib_robotparser, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"),
+ "moves.urllib_robotparser", "moves.urllib.robotparser")
+
+
+class Module_six_moves_urllib(types.ModuleType):
+
+ """Create a six.moves.urllib namespace that resembles the Python 3 namespace"""
+ __path__ = [] # mark as package
+ parse = _importer._get_module("moves.urllib_parse")
+ error = _importer._get_module("moves.urllib_error")
+ request = _importer._get_module("moves.urllib_request")
+ response = _importer._get_module("moves.urllib_response")
+ robotparser = _importer._get_module("moves.urllib_robotparser")
+
+ def __dir__(self):
+ return ['parse', 'error', 'request', 'response', 'robotparser']
+
+_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"),
+ "moves.urllib")
def add_move(move):
@@ -206,22 +503,18 @@ if PY3:
_meth_func = "__func__"
_meth_self = "__self__"
+ _func_closure = "__closure__"
_func_code = "__code__"
_func_defaults = "__defaults__"
-
- _iterkeys = "keys"
- _itervalues = "values"
- _iteritems = "items"
+ _func_globals = "__globals__"
else:
_meth_func = "im_func"
_meth_self = "im_self"
+ _func_closure = "func_closure"
_func_code = "func_code"
_func_defaults = "func_defaults"
-
- _iterkeys = "iterkeys"
- _itervalues = "itervalues"
- _iteritems = "iteritems"
+ _func_globals = "func_globals"
try:
@@ -232,18 +525,33 @@ except NameError:
next = advance_iterator
+try:
+ callable = callable
+except NameError:
+ def callable(obj):
+ return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
+
+
if PY3:
def get_unbound_function(unbound):
return unbound
- Iterator = object
+ create_bound_method = types.MethodType
- def callable(obj):
- return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
+ def create_unbound_method(func, cls):
+ return func
+
+ Iterator = object
else:
def get_unbound_function(unbound):
return unbound.im_func
+ def create_bound_method(func, obj):
+ return types.MethodType(func, obj, obj.__class__)
+
+ def create_unbound_method(func, cls):
+ return types.MethodType(func, None, cls)
+
class Iterator(object):
def next(self):
@@ -256,90 +564,179 @@ _add_doc(get_unbound_function,
get_method_function = operator.attrgetter(_meth_func)
get_method_self = operator.attrgetter(_meth_self)
+get_function_closure = operator.attrgetter(_func_closure)
get_function_code = operator.attrgetter(_func_code)
get_function_defaults = operator.attrgetter(_func_defaults)
+get_function_globals = operator.attrgetter(_func_globals)
+
+
+if PY3:
+ def iterkeys(d, **kw):
+ return iter(d.keys(**kw))
+
+ def itervalues(d, **kw):
+ return iter(d.values(**kw))
+
+ def iteritems(d, **kw):
+ return iter(d.items(**kw))
+
+ def iterlists(d, **kw):
+ return iter(d.lists(**kw))
+
+ viewkeys = operator.methodcaller("keys")
+
+ viewvalues = operator.methodcaller("values")
+
+ viewitems = operator.methodcaller("items")
+else:
+ def iterkeys(d, **kw):
+ return d.iterkeys(**kw)
+ def itervalues(d, **kw):
+ return d.itervalues(**kw)
-def iterkeys(d):
- """Return an iterator over the keys of a dictionary."""
- return iter(getattr(d, _iterkeys)())
+ def iteritems(d, **kw):
+ return d.iteritems(**kw)
-def itervalues(d):
- """Return an iterator over the values of a dictionary."""
- return iter(getattr(d, _itervalues)())
+ def iterlists(d, **kw):
+ return d.iterlists(**kw)
-def iteritems(d):
- """Return an iterator over the (key, value) pairs of a dictionary."""
- return iter(getattr(d, _iteritems)())
+ viewkeys = operator.methodcaller("viewkeys")
+
+ viewvalues = operator.methodcaller("viewvalues")
+
+ viewitems = operator.methodcaller("viewitems")
+
+_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.")
+_add_doc(itervalues, "Return an iterator over the values of a dictionary.")
+_add_doc(iteritems,
+ "Return an iterator over the (key, value) pairs of a dictionary.")
+_add_doc(iterlists,
+ "Return an iterator over the (key, [values]) pairs of a dictionary.")
if PY3:
def b(s):
return s.encode("latin-1")
+
def u(s):
return s
- if sys.version_info[1] <= 1:
- def int2byte(i):
- return bytes((i,))
- else:
- # This is about 2x faster than the implementation above on 3.2+
- int2byte = operator.methodcaller("to_bytes", 1, "big")
+ unichr = chr
+ import struct
+ int2byte = struct.Struct(">B").pack
+ del struct
+ byte2int = operator.itemgetter(0)
+ indexbytes = operator.getitem
+ iterbytes = iter
import io
StringIO = io.StringIO
BytesIO = io.BytesIO
+ _assertCountEqual = "assertCountEqual"
+ if sys.version_info[1] <= 1:
+ _assertRaisesRegex = "assertRaisesRegexp"
+ _assertRegex = "assertRegexpMatches"
+ else:
+ _assertRaisesRegex = "assertRaisesRegex"
+ _assertRegex = "assertRegex"
else:
def b(s):
return s
+ # Workaround for standalone backslash
+
def u(s):
- return unicode(s, "unicode_escape")
+ return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape")
+ unichr = unichr
int2byte = chr
+
+ def byte2int(bs):
+ return ord(bs[0])
+
+ def indexbytes(buf, i):
+ return ord(buf[i])
+ iterbytes = functools.partial(itertools.imap, ord)
import StringIO
StringIO = BytesIO = StringIO.StringIO
+ _assertCountEqual = "assertItemsEqual"
+ _assertRaisesRegex = "assertRaisesRegexp"
+ _assertRegex = "assertRegexpMatches"
_add_doc(b, """Byte literal""")
_add_doc(u, """Text literal""")
-if PY3:
- import builtins
- exec_ = getattr(builtins, "exec")
+def assertCountEqual(self, *args, **kwargs):
+ return getattr(self, _assertCountEqual)(*args, **kwargs)
+
+def assertRaisesRegex(self, *args, **kwargs):
+ return getattr(self, _assertRaisesRegex)(*args, **kwargs)
+
+
+def assertRegex(self, *args, **kwargs):
+ return getattr(self, _assertRegex)(*args, **kwargs)
+
+
+if PY3:
+ exec_ = getattr(moves.builtins, "exec")
def reraise(tp, value, tb=None):
+ if value is None:
+ value = tp()
if value.__traceback__ is not tb:
raise value.with_traceback(tb)
raise value
-
- print_ = getattr(builtins, "print")
- del builtins
-
else:
- def exec_(code, globs=None, locs=None):
+ def exec_(_code_, _globs_=None, _locs_=None):
"""Execute code in a namespace."""
- if globs is None:
+ if _globs_ is None:
frame = sys._getframe(1)
- globs = frame.f_globals
- if locs is None:
- locs = frame.f_locals
+ _globs_ = frame.f_globals
+ if _locs_ is None:
+ _locs_ = frame.f_locals
del frame
- elif locs is None:
- locs = globs
- exec("""exec code in globs, locs""")
-
+ elif _locs_ is None:
+ _locs_ = _globs_
+ exec("""exec _code_ in _globs_, _locs_""")
exec_("""def reraise(tp, value, tb=None):
raise tp, value, tb
""")
+if sys.version_info[:2] == (3, 2):
+ exec_("""def raise_from(value, from_value):
+ if from_value is None:
+ raise value
+ raise value from from_value
+""")
+elif sys.version_info[:2] > (3, 2):
+ exec_("""def raise_from(value, from_value):
+ raise value from from_value
+""")
+else:
+ def raise_from(value, from_value):
+ raise value
+
+
+print_ = getattr(moves.builtins, "print", None)
+if print_ is None:
def print_(*args, **kwargs):
- """The new-style print function."""
+ """The new-style print function for Python 2.4 and 2.5."""
fp = kwargs.pop("file", sys.stdout)
if fp is None:
return
+
def write(data):
if not isinstance(data, basestring):
data = str(data)
+ # If the file has an encoding, encode unicode with it.
+ if (isinstance(fp, file) and
+ isinstance(data, unicode) and
+ fp.encoding is not None):
+ errors = getattr(fp, "errors", None)
+ if errors is None:
+ errors = "strict"
+ data = data.encode(fp.encoding, errors)
fp.write(data)
want_unicode = False
sep = kwargs.pop("sep", None)
@@ -376,10 +773,96 @@ else:
write(sep)
write(arg)
write(end)
+if sys.version_info[:2] < (3, 3):
+ _print = print_
+
+ def print_(*args, **kwargs):
+ fp = kwargs.get("file", sys.stdout)
+ flush = kwargs.pop("flush", False)
+ _print(*args, **kwargs)
+ if flush and fp is not None:
+ fp.flush()
_add_doc(reraise, """Reraise an exception.""")
+if sys.version_info[0:2] < (3, 4):
+ def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS,
+ updated=functools.WRAPPER_UPDATES):
+ def wrapper(f):
+ f = functools.wraps(wrapped, assigned, updated)(f)
+ f.__wrapped__ = wrapped
+ return f
+ return wrapper
+else:
+ wraps = functools.wraps
+
-def with_metaclass(meta, base=object):
+def with_metaclass(meta, *bases):
"""Create a base class with a metaclass."""
- return meta("NewBase", (base,), {})
+ # This requires a bit of explanation: the basic idea is to make a dummy
+ # metaclass for one level of class instantiation that replaces itself with
+ # the actual metaclass.
+ class metaclass(meta):
+
+ def __new__(cls, name, this_bases, d):
+ return meta(name, bases, d)
+ return type.__new__(metaclass, 'temporary_class', (), {})
+
+
+def add_metaclass(metaclass):
+ """Class decorator for creating a class with a metaclass."""
+ def wrapper(cls):
+ orig_vars = cls.__dict__.copy()
+ slots = orig_vars.get('__slots__')
+ if slots is not None:
+ if isinstance(slots, str):
+ slots = [slots]
+ for slots_var in slots:
+ orig_vars.pop(slots_var)
+ orig_vars.pop('__dict__', None)
+ orig_vars.pop('__weakref__', None)
+ return metaclass(cls.__name__, cls.__bases__, orig_vars)
+ return wrapper
+
+
+def python_2_unicode_compatible(klass):
+ """
+ A decorator that defines __unicode__ and __str__ methods under Python 2.
+ Under Python 3 it does nothing.
+
+ To support Python 2 and 3 with a single code base, define a __str__ method
+ returning text and apply this decorator to the class.
+ """
+ if PY2:
+ if '__str__' not in klass.__dict__:
+ raise ValueError("@python_2_unicode_compatible cannot be applied "
+ "to %s because it doesn't define __str__()." %
+ klass.__name__)
+ klass.__unicode__ = klass.__str__
+ klass.__str__ = lambda self: self.__unicode__().encode('utf-8')
+ return klass
+
+
+# Complete the moves implementation.
+# This code is at the end of this module to speed up module loading.
+# Turn this module into a package.
+__path__ = [] # required for PEP 302 and PEP 451
+__package__ = __name__ # see PEP 366 @ReservedAssignment
+if globals().get("__spec__") is not None:
+ __spec__.submodule_search_locations = [] # PEP 451 @UndefinedVariable
+# Remove other six meta path importers, since they cause problems. This can
+# happen if six is removed from sys.modules and then reloaded. (Setuptools does
+# this for some reason.)
+if sys.meta_path:
+ for i, importer in enumerate(sys.meta_path):
+ # Here's some real nastiness: Another "instance" of the six module might
+ # be floating around. Therefore, we can't use isinstance() to check for
+ # the six meta path importer, since the other six instance will have
+ # inserted an importer with different class.
+ if (type(importer).__name__ == "_SixMetaPathImporter" and
+ importer.name == __name__):
+ del sys.meta_path[i]
+ break
+ del i, importer
+# Finally, add the importer to the meta path import hook.
+sys.meta_path.append(_importer)
diff --git a/pip/_vendor/requests/packages/urllib3/poolmanager.py b/pip/_vendor/requests/packages/urllib3/poolmanager.py
index 1023dcba3..7ed00b1ce 100644
--- a/pip/_vendor/requests/packages/urllib3/poolmanager.py
+++ b/pip/_vendor/requests/packages/urllib3/poolmanager.py
@@ -1,4 +1,6 @@
from __future__ import absolute_import
+import collections
+import functools
import logging
try: # Python 3
@@ -23,6 +25,59 @@ log = logging.getLogger(__name__)
SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs',
'ssl_version', 'ca_cert_dir')
+# The base fields to use when determining what pool to get a connection from;
+# these do not rely on the ``connection_pool_kw`` and can be determined by the
+# URL and potentially the ``urllib3.connection.port_by_scheme`` dictionary.
+#
+# All custom key schemes should include the fields in this key at a minimum.
+BasePoolKey = collections.namedtuple('BasePoolKey', ('scheme', 'host', 'port'))
+
+# The fields to use when determining what pool to get a HTTP and HTTPS
+# connection from. All additional fields must be present in the PoolManager's
+# ``connection_pool_kw`` instance variable.
+HTTPPoolKey = collections.namedtuple(
+ 'HTTPPoolKey', BasePoolKey._fields + ('timeout', 'retries', 'strict',
+ 'block', 'source_address')
+)
+HTTPSPoolKey = collections.namedtuple(
+ 'HTTPSPoolKey', HTTPPoolKey._fields + SSL_KEYWORDS
+)
+
+
+def _default_key_normalizer(key_class, request_context):
+ """
+ Create a pool key of type ``key_class`` for a request.
+
+ According to RFC 3986, both the scheme and host are case-insensitive.
+ Therefore, this function normalizes both before constructing the pool
+ key for an HTTPS request. If you wish to change this behaviour, provide
+ alternate callables to ``key_fn_by_scheme``.
+
+ :param key_class:
+ The class to use when constructing the key. This should be a namedtuple
+ with the ``scheme`` and ``host`` keys at a minimum.
+
+ :param request_context:
+ A dictionary-like object that contain the context for a request.
+ It should contain a key for each field in the :class:`HTTPPoolKey`
+ """
+ context = {}
+ for key in key_class._fields:
+ context[key] = request_context.get(key)
+ context['scheme'] = context['scheme'].lower()
+ context['host'] = context['host'].lower()
+ return key_class(**context)
+
+
+# A dictionary that maps a scheme to a callable that creates a pool key.
+# This can be used to alter the way pool keys are constructed, if desired.
+# Each PoolManager makes a copy of this dictionary so they can be configured
+# globally here, or individually on the instance.
+key_fn_by_scheme = {
+ 'http': functools.partial(_default_key_normalizer, HTTPPoolKey),
+ 'https': functools.partial(_default_key_normalizer, HTTPSPoolKey),
+}
+
pool_classes_by_scheme = {
'http': HTTPConnectionPool,
'https': HTTPSConnectionPool,
@@ -65,8 +120,10 @@ class PoolManager(RequestMethods):
self.pools = RecentlyUsedContainer(num_pools,
dispose_func=lambda p: p.close())
- # Locally set the pool classes so other PoolManagers can override them.
+ # Locally set the pool classes and keys so other PoolManagers can
+ # override them.
self.pool_classes_by_scheme = pool_classes_by_scheme
+ self.key_fn_by_scheme = key_fn_by_scheme.copy()
def __enter__(self):
return self
@@ -113,10 +170,36 @@ class PoolManager(RequestMethods):
if not host:
raise LocationValueError("No host specified.")
- scheme = scheme or 'http'
- port = port or port_by_scheme.get(scheme, 80)
- pool_key = (scheme, host, port)
+ request_context = self.connection_pool_kw.copy()
+ request_context['scheme'] = scheme or 'http'
+ if not port:
+ port = port_by_scheme.get(request_context['scheme'].lower(), 80)
+ request_context['port'] = port
+ request_context['host'] = host
+
+ return self.connection_from_context(request_context)
+ def connection_from_context(self, request_context):
+ """
+ Get a :class:`ConnectionPool` based on the request context.
+
+ ``request_context`` must at least contain the ``scheme`` key and its
+ value must be a key in ``key_fn_by_scheme`` instance variable.
+ """
+ scheme = request_context['scheme'].lower()
+ pool_key_constructor = self.key_fn_by_scheme[scheme]
+ pool_key = pool_key_constructor(request_context)
+
+ return self.connection_from_pool_key(pool_key)
+
+ def connection_from_pool_key(self, pool_key):
+ """
+ Get a :class:`ConnectionPool` based on the provided pool key.
+
+ ``pool_key`` should be a namedtuple that only contains immutable
+ objects. At a minimum it must have the ``scheme``, ``host``, and
+ ``port`` fields.
+ """
with self.pools.lock:
# If the scheme, host, or port doesn't match existing open
# connections, open a new ConnectionPool.
@@ -125,7 +208,7 @@ class PoolManager(RequestMethods):
return pool
# Make a fresh ConnectionPool of the desired type
- pool = self._new_pool(scheme, host, port)
+ pool = self._new_pool(pool_key.scheme, pool_key.host, pool_key.port)
self.pools[pool_key] = pool
return pool
diff --git a/pip/_vendor/requests/packages/urllib3/response.py b/pip/_vendor/requests/packages/urllib3/response.py
index ac1b2f19e..556790327 100644
--- a/pip/_vendor/requests/packages/urllib3/response.py
+++ b/pip/_vendor/requests/packages/urllib3/response.py
@@ -165,6 +165,10 @@ class HTTPResponse(io.IOBase):
if self._fp:
return self.read(cache_content=True)
+ @property
+ def connection(self):
+ return self._connection
+
def tell(self):
"""
Obtain the number of bytes pulled over the wire so far. May differ from
diff --git a/pip/_vendor/requests/packages/urllib3/util/connection.py b/pip/_vendor/requests/packages/urllib3/util/connection.py
index 01a4812f2..5e761352f 100644
--- a/pip/_vendor/requests/packages/urllib3/util/connection.py
+++ b/pip/_vendor/requests/packages/urllib3/util/connection.py
@@ -46,6 +46,8 @@ def is_connection_dropped(conn): # Platform-specific
# This function is copied from socket.py in the Python 2.7 standard
# library test suite. Added to its signature is only `socket_options`.
+# One additional modification is that we avoid binding to IPv6 servers
+# discovered in DNS if the system doesn't have IPv6 functionality.
def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
source_address=None, socket_options=None):
"""Connect to *address* and return the socket object.
@@ -64,14 +66,19 @@ def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
if host.startswith('['):
host = host.strip('[]')
err = None
- for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
+
+ # Using the value from allowed_gai_family() in the context of getaddrinfo lets
+ # us select whether to work with IPv4 DNS records, IPv6 records, or both.
+ # The original create_connection function always returns all records.
+ family = allowed_gai_family()
+
+ for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
sock = None
try:
sock = socket.socket(af, socktype, proto)
# If provided, set socket level options before connecting.
- # This is the only addition urllib3 makes to this function.
_set_socket_options(sock, socket_options)
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
@@ -99,3 +106,39 @@ def _set_socket_options(sock, options):
for opt in options:
sock.setsockopt(*opt)
+
+
+def allowed_gai_family():
+ """This function is designed to work in the context of
+ getaddrinfo, where family=socket.AF_UNSPEC is the default and
+ will perform a DNS search for both IPv6 and IPv4 records."""
+
+ family = socket.AF_INET
+ if HAS_IPV6:
+ family = socket.AF_UNSPEC
+ return family
+
+
+def _has_ipv6(host):
+ """ Returns True if the system can bind an IPv6 address. """
+ sock = None
+ has_ipv6 = False
+
+ if socket.has_ipv6:
+ # has_ipv6 returns true if cPython was compiled with IPv6 support.
+ # It does not tell us if the system has IPv6 support enabled. To
+ # determine that we must bind to an IPv6 address.
+ # https://github.com/shazow/urllib3/pull/611
+ # https://bugs.python.org/issue658327
+ try:
+ sock = socket.socket(socket.AF_INET6)
+ sock.bind((host, 0))
+ has_ipv6 = True
+ except Exception:
+ pass
+
+ if sock:
+ sock.close()
+ return has_ipv6
+
+HAS_IPV6 = _has_ipv6('::1')
diff --git a/pip/_vendor/requests/packages/urllib3/util/retry.py b/pip/_vendor/requests/packages/urllib3/util/retry.py
index 2d3aa20d0..d379833c5 100644
--- a/pip/_vendor/requests/packages/urllib3/util/retry.py
+++ b/pip/_vendor/requests/packages/urllib3/util/retry.py
@@ -80,21 +80,27 @@ class Retry(object):
Set of uppercased HTTP method verbs that we should retry on.
By default, we only retry on methods which are considered to be
- indempotent (multiple requests with the same parameters end with the
+ idempotent (multiple requests with the same parameters end with the
same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`.
+ Set to a ``False`` value to retry on any verb.
+
:param iterable status_forcelist:
- A set of HTTP status codes that we should force a retry on.
+ A set of integer HTTP status codes that we should force a retry on.
+ A retry is initiated if the request method is in ``method_whitelist``
+ and the response status code is in ``status_forcelist``.
By default, this is disabled with ``None``.
:param float backoff_factor:
- A backoff factor to apply between attempts. urllib3 will sleep for::
+ A backoff factor to apply between attempts after the second try
+ (most errors are resolved immediately by a second try without a
+ delay). urllib3 will sleep for::
{backoff factor} * (2 ^ ({number of total retries} - 1))
seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep
- for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer
+ for [0.0s, 0.2s, 0.4s, ...] between retries. It will never be longer
than :attr:`Retry.BACKOFF_MAX`.
By default, backoff is disabled (set to 0).
diff --git a/pip/_vendor/requests/packages/urllib3/util/ssl_.py b/pip/_vendor/requests/packages/urllib3/util/ssl_.py
index e8d9e7d29..4a64d7ef9 100644
--- a/pip/_vendor/requests/packages/urllib3/util/ssl_.py
+++ b/pip/_vendor/requests/packages/urllib3/util/ssl_.py
@@ -117,7 +117,7 @@ except ImportError:
'urllib3 from configuring SSL appropriately and may cause '
'certain SSL connections to fail. You can upgrade to a newer '
'version of Python to solve this. For more information, see '
- 'https://urllib3.readthedocs.org/en/latest/security.html'
+ 'https://urllib3.readthedocs.io/en/latest/security.html'
'#insecureplatformwarning.',
InsecurePlatformWarning
)
@@ -313,7 +313,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
'This may cause the server to present an incorrect TLS '
'certificate, which can cause validation failures. You can upgrade to '
'a newer version of Python to solve this. For more information, see '
- 'https://urllib3.readthedocs.org/en/latest/security.html'
+ 'https://urllib3.readthedocs.io/en/latest/security.html'
'#snimissingwarning.',
SNIMissingWarning
)
diff --git a/pip/_vendor/requests/sessions.py b/pip/_vendor/requests/sessions.py
index 45be9733e..bcbcc880f 100644
--- a/pip/_vendor/requests/sessions.py
+++ b/pip/_vendor/requests/sessions.py
@@ -6,7 +6,6 @@ requests.session
This module provides a Session object to manage and persist settings across
requests (cookies, auth, proxies).
-
"""
import os
from collections import Mapping
@@ -40,9 +39,8 @@ REDIRECT_CACHE_SIZE = 1000
def merge_setting(request_setting, session_setting, dict_class=OrderedDict):
- """
- Determines appropriate setting for a given request, taking into account the
- explicit setting on that request, and the setting in the session. If a
+ """Determines appropriate setting for a given request, taking into account
+ the explicit setting on that request, and the setting in the session. If a
setting is a dictionary, they will be merged together using `dict_class`
"""
@@ -72,8 +70,7 @@ def merge_setting(request_setting, session_setting, dict_class=OrderedDict):
def merge_hooks(request_hooks, session_hooks, dict_class=OrderedDict):
- """
- Properly merges both requests and session hooks.
+ """Properly merges both requests and session hooks.
This is necessary because when request_hooks == {'response': []}, the
merge breaks Session hooks entirely.
@@ -143,9 +140,10 @@ class SessionRedirectMixin(object):
# https://github.com/kennethreitz/requests/issues/1084
if resp.status_code not in (codes.temporary_redirect, codes.permanent_redirect):
- if 'Content-Length' in prepared_request.headers:
- del prepared_request.headers['Content-Length']
-
+ # https://github.com/kennethreitz/requests/issues/3490
+ purged_headers = ('Content-Length', 'Content-Type', 'Transfer-Encoding')
+ for header in purged_headers:
+ prepared_request.headers.pop(header, None)
prepared_request.body = None
headers = prepared_request.headers
@@ -185,8 +183,7 @@ class SessionRedirectMixin(object):
yield resp
def rebuild_auth(self, prepared_request, response):
- """
- When being redirected we may want to strip authentication from the
+ """When being redirected we may want to strip authentication from the
request to avoid leaking credentials. This method intelligently removes
and reapplies authentication where possible to avoid credential loss.
"""
@@ -195,7 +192,7 @@ class SessionRedirectMixin(object):
if 'Authorization' in headers:
# If we get redirected to a new host, we should strip out any
- # authentication headers.
+ # authentication headers.
original_parsed = urlparse(response.request.url)
redirect_parsed = urlparse(url)
@@ -210,8 +207,7 @@ class SessionRedirectMixin(object):
return
def rebuild_proxies(self, prepared_request, proxies):
- """
- This method re-evaluates the proxy configuration by considering the
+ """This method re-evaluates the proxy configuration by considering the
environment variables. If we are redirected to a URL covered by
NO_PROXY, we strip the proxy configuration. Otherwise, we set missing
proxy keys for this URL (in case they were stripped by a previous
@@ -219,6 +215,8 @@ class SessionRedirectMixin(object):
This method also replaces the Proxy-Authorization header where
necessary.
+
+ :rtype: dict
"""
headers = prepared_request.headers
url = prepared_request.url
@@ -228,10 +226,10 @@ class SessionRedirectMixin(object):
if self.trust_env and not should_bypass_proxies(url):
environ_proxies = get_environ_proxies(url)
- proxy = environ_proxies.get(scheme)
+ proxy = environ_proxies.get('all', environ_proxies.get(scheme))
if proxy:
- new_proxies.setdefault(scheme, environ_proxies[scheme])
+ new_proxies.setdefault(scheme, proxy)
if 'Proxy-Authorization' in headers:
del headers['Proxy-Authorization']
@@ -329,6 +327,8 @@ class Session(SessionRedirectMixin):
#: Maximum number of redirects allowed. If the request exceeds this
#: limit, a :class:`TooManyRedirects` exception is raised.
+ #: This defaults to requests.models.DEFAULT_REDIRECT_LIMIT, which is
+ #: 30.
self.max_redirects = DEFAULT_REDIRECT_LIMIT
#: Trust environment settings for proxy configuration, default
@@ -363,6 +363,7 @@ class Session(SessionRedirectMixin):
:param request: :class:`Request` instance to prepare with this
session's settings.
+ :rtype: requests.PreparedRequest
"""
cookies = request.cookies or {}
@@ -374,7 +375,6 @@ class Session(SessionRedirectMixin):
merged_cookies = merge_cookies(
merge_cookies(RequestsCookieJar(), self.cookies), cookies)
-
# Set environment's basic authentication if not explicitly set.
auth = request.auth
if self.trust_env and not auth and not self.auth:
@@ -444,7 +444,7 @@ class Session(SessionRedirectMixin):
:param cert: (optional) if String, path to ssl client cert file (.pem).
If Tuple, ('cert', 'key') pair.
:rtype: requests.Response
- """
+ """
# Create the Request.
req = Request(
method = method.upper(),
@@ -481,6 +481,7 @@ class Session(SessionRedirectMixin):
:param url: URL for the new :class:`Request` object.
:param \*\*kwargs: Optional arguments that ``request`` takes.
+ :rtype: requests.Response
"""
kwargs.setdefault('allow_redirects', True)
@@ -491,6 +492,7 @@ class Session(SessionRedirectMixin):
:param url: URL for the new :class:`Request` object.
:param \*\*kwargs: Optional arguments that ``request`` takes.
+ :rtype: requests.Response
"""
kwargs.setdefault('allow_redirects', True)
@@ -501,6 +503,7 @@ class Session(SessionRedirectMixin):
:param url: URL for the new :class:`Request` object.
:param \*\*kwargs: Optional arguments that ``request`` takes.
+ :rtype: requests.Response
"""
kwargs.setdefault('allow_redirects', False)
@@ -513,6 +516,7 @@ class Session(SessionRedirectMixin):
:param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
:param json: (optional) json to send in the body of the :class:`Request`.
:param \*\*kwargs: Optional arguments that ``request`` takes.
+ :rtype: requests.Response
"""
return self.request('POST', url, data=data, json=json, **kwargs)
@@ -523,6 +527,7 @@ class Session(SessionRedirectMixin):
:param url: URL for the new :class:`Request` object.
:param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
:param \*\*kwargs: Optional arguments that ``request`` takes.
+ :rtype: requests.Response
"""
return self.request('PUT', url, data=data, **kwargs)
@@ -533,6 +538,7 @@ class Session(SessionRedirectMixin):
:param url: URL for the new :class:`Request` object.
:param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
:param \*\*kwargs: Optional arguments that ``request`` takes.
+ :rtype: requests.Response
"""
return self.request('PATCH', url, data=data, **kwargs)
@@ -542,12 +548,17 @@ class Session(SessionRedirectMixin):
:param url: URL for the new :class:`Request` object.
:param \*\*kwargs: Optional arguments that ``request`` takes.
+ :rtype: requests.Response
"""
return self.request('DELETE', url, **kwargs)
def send(self, request, **kwargs):
- """Send a given PreparedRequest."""
+ """
+ Send a given PreparedRequest.
+
+ :rtype: requests.Response
+ """
# Set defaults that the hooks can utilize to ensure they always have
# the correct parameters to reproduce the previous request.
kwargs.setdefault('stream', self.stream)
@@ -619,7 +630,11 @@ class Session(SessionRedirectMixin):
return r
def merge_environment_settings(self, url, proxies, stream, verify, cert):
- """Check the environment and merge it with some settings."""
+ """
+ Check the environment and merge it with some settings.
+
+ :rtype: dict
+ """
# Gather clues from the surrounding environment.
if self.trust_env:
# Set environment's proxies.
@@ -643,7 +658,11 @@ class Session(SessionRedirectMixin):
'cert': cert}
def get_adapter(self, url):
- """Returns the appropriate connection adapter for the given URL."""
+ """
+ Returns the appropriate connection adapter for the given URL.
+
+ :rtype: requests.adapters.BaseAdapter
+ """
for (prefix, adapter) in self.adapters.items():
if url.lower().startswith(prefix):
@@ -660,8 +679,8 @@ class Session(SessionRedirectMixin):
def mount(self, prefix, adapter):
"""Registers a connection adapter to a prefix.
- Adapters are sorted in descending order by key length."""
-
+ Adapters are sorted in descending order by key length.
+ """
self.adapters[prefix] = adapter
keys_to_move = [k for k in self.adapters if len(k) < len(prefix)]
@@ -684,6 +703,10 @@ class Session(SessionRedirectMixin):
def session():
- """Returns a :class:`Session` for context-management."""
+ """
+ Returns a :class:`Session` for context-management.
+
+ :rtype: Session
+ """
return Session()
diff --git a/pip/_vendor/requests/status_codes.py b/pip/_vendor/requests/status_codes.py
index 0137c91d9..db2986bb1 100644
--- a/pip/_vendor/requests/status_codes.py
+++ b/pip/_vendor/requests/status_codes.py
@@ -31,7 +31,7 @@ _codes = {
306: ('switch_proxy',),
307: ('temporary_redirect', 'temporary_moved', 'temporary'),
308: ('permanent_redirect',
- 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0
+ 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0
# Client Error.
400: ('bad_request', 'bad'),
diff --git a/pip/_vendor/requests/structures.py b/pip/_vendor/requests/structures.py
index 991056e47..05d2b3f57 100644
--- a/pip/_vendor/requests/structures.py
+++ b/pip/_vendor/requests/structures.py
@@ -5,7 +5,6 @@ requests.structures
~~~~~~~~~~~~~~~~~~~
Data structures that power Requests.
-
"""
import collections
@@ -14,8 +13,7 @@ from .compat import OrderedDict
class CaseInsensitiveDict(collections.MutableMapping):
- """
- A case-insensitive ``dict``-like object.
+ """A case-insensitive ``dict``-like object.
Implements all methods and operations of
``collections.MutableMapping`` as well as dict's ``copy``. Also
@@ -39,8 +37,8 @@ class CaseInsensitiveDict(collections.MutableMapping):
If the constructor, ``.update``, or equality comparison
operations are given keys that have equal ``.lower()``s, the
behavior is undefined.
-
"""
+
def __init__(self, data=None, **kwargs):
self._store = OrderedDict()
if data is None:
@@ -87,6 +85,7 @@ class CaseInsensitiveDict(collections.MutableMapping):
def __repr__(self):
return str(dict(self.items()))
+
class LookupDict(dict):
"""Dictionary lookup object."""
diff --git a/pip/_vendor/requests/utils.py b/pip/_vendor/requests/utils.py
index c08448ccb..dfeb77d9d 100644
--- a/pip/_vendor/requests/utils.py
+++ b/pip/_vendor/requests/utils.py
@@ -6,7 +6,6 @@ requests.utils
This module provides utility functions that are used within Requests
that are also useful for external consumption.
-
"""
import cgi
@@ -27,7 +26,7 @@ from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2,
basestring)
from .cookies import RequestsCookieJar, cookiejar_from_dict
from .structures import CaseInsensitiveDict
-from .exceptions import InvalidURL, FileModeWarning
+from .exceptions import InvalidURL, InvalidHeader, FileModeWarning
_hush_pyflakes = (RequestsCookieJar,)
@@ -165,6 +164,8 @@ def from_key_val_list(value):
ValueError: need more than 1 value to unpack
>>> from_key_val_list({'key': 'val'})
OrderedDict([('key', 'val')])
+
+ :rtype: OrderedDict
"""
if value is None:
return None
@@ -187,6 +188,8 @@ def to_key_val_list(value):
[('key', 'val')]
>>> to_key_val_list('string')
ValueError: cannot encode objects that are not 2-tuples.
+
+ :rtype: list
"""
if value is None:
return None
@@ -222,6 +225,7 @@ def parse_list_header(value):
:param value: a string with a list header.
:return: :class:`list`
+ :rtype: list
"""
result = []
for item in _parse_list_header(value):
@@ -252,6 +256,7 @@ def parse_dict_header(value):
:param value: a string with a dict header.
:return: :class:`dict`
+ :rtype: dict
"""
result = {}
for item in _parse_list_header(value):
@@ -272,6 +277,7 @@ def unquote_header_value(value, is_filename=False):
using for quoting.
:param value: the header value to unquote.
+ :rtype: str
"""
if value and value[0] == value[-1] == '"':
# this is not the real unquoting, but fixing this so that the
@@ -294,6 +300,7 @@ def dict_from_cookiejar(cj):
"""Returns a key/value dictionary from a CookieJar.
:param cj: CookieJar object to extract cookies from.
+ :rtype: dict
"""
cookie_dict = {}
@@ -309,6 +316,7 @@ def add_dict_to_cookiejar(cj, cookie_dict):
:param cj: CookieJar to insert cookies into.
:param cookie_dict: Dict of key/values to insert into CookieJar.
+ :rtype: CookieJar
"""
cj2 = cookiejar_from_dict(cookie_dict)
@@ -340,6 +348,7 @@ def get_encoding_from_headers(headers):
"""Returns encodings from given HTTP Header Dict.
:param headers: dictionary to extract encoding from.
+ :rtype: str
"""
content_type = headers.get('content-type')
@@ -377,6 +386,8 @@ def stream_decode_response_unicode(iterator, r):
def iter_slices(string, slice_length):
"""Iterate over slices of a string."""
pos = 0
+ if slice_length is None or slice_length <= 0:
+ slice_length = len(string)
while pos < len(string):
yield string[pos:pos + slice_length]
pos += slice_length
@@ -392,6 +403,7 @@ def get_unicode_from_response(r):
1. charset from content-type
2. fall back and replace all unicode characters
+ :rtype: str
"""
warnings.warn((
'In requests 3.0, get_unicode_from_response will be removed. For '
@@ -426,6 +438,8 @@ UNRESERVED_SET = frozenset(
def unquote_unreserved(uri):
"""Un-escape any percent-escape sequences in a URI that are unreserved
characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
+
+ :rtype: str
"""
parts = uri.split('%')
for i in range(1, len(parts)):
@@ -450,6 +464,8 @@ def requote_uri(uri):
This function passes the given URI through an unquote/quote cycle to
ensure that it is fully and consistently quoted.
+
+ :rtype: str
"""
safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
safe_without_percent = "!#$&'()*+,/:;=?@[]~"
@@ -466,10 +482,12 @@ def requote_uri(uri):
def address_in_network(ip, net):
- """
- This function allows you to check if on IP belongs to a network subnet
+ """This function allows you to check if on IP belongs to a network subnet
+
Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
+
+ :rtype: bool
"""
ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
netaddr, bits = net.split('/')
@@ -479,15 +497,20 @@ def address_in_network(ip, net):
def dotted_netmask(mask):
- """
- Converts mask from /xx format to xxx.xxx.xxx.xxx
+ """Converts mask from /xx format to xxx.xxx.xxx.xxx
+
Example: if mask is 24 function returns 255.255.255.0
+
+ :rtype: str
"""
bits = 0xffffffff ^ (1 << 32 - mask) - 1
return socket.inet_ntoa(struct.pack('>I', bits))
def is_ipv4_address(string_ip):
+ """
+ :rtype: bool
+ """
try:
socket.inet_aton(string_ip)
except socket.error:
@@ -496,7 +519,11 @@ def is_ipv4_address(string_ip):
def is_valid_cidr(string_network):
- """Very simple check of the cidr format in no_proxy variable"""
+ """
+ Very simple check of the cidr format in no_proxy variable.
+
+ :rtype: bool
+ """
if string_network.count('/') == 1:
try:
mask = int(string_network.split('/')[1])
@@ -518,6 +545,8 @@ def is_valid_cidr(string_network):
def should_bypass_proxies(url):
"""
Returns whether we should bypass proxies or not.
+
+ :rtype: bool
"""
get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
@@ -539,6 +568,10 @@ def should_bypass_proxies(url):
if is_valid_cidr(proxy_ip):
if address_in_network(ip, proxy_ip):
return True
+ elif ip == proxy_ip:
+ # If no_proxy ip was defined in plain IP notation instead of cidr notation &
+ # matches the IP of the index
+ return True
else:
for host in no_proxy:
if netloc.endswith(host) or netloc.split(':')[0].endswith(host):
@@ -564,7 +597,11 @@ def should_bypass_proxies(url):
def get_environ_proxies(url):
- """Return a dict of environment proxies."""
+ """
+ Return a dict of environment proxies.
+
+ :rtype: dict
+ """
if should_bypass_proxies(url):
return {}
else:
@@ -580,20 +617,36 @@ def select_proxy(url, proxies):
proxies = proxies or {}
urlparts = urlparse(url)
if urlparts.hostname is None:
- proxy = None
- else:
- proxy = proxies.get(urlparts.scheme+'://'+urlparts.hostname)
- if proxy is None:
- proxy = proxies.get(urlparts.scheme)
+ return proxies.get('all', proxies.get(urlparts.scheme))
+
+ proxy_keys = [
+ 'all://' + urlparts.hostname,
+ 'all',
+ urlparts.scheme + '://' + urlparts.hostname,
+ urlparts.scheme,
+ ]
+ proxy = None
+ for proxy_key in proxy_keys:
+ if proxy_key in proxies:
+ proxy = proxies[proxy_key]
+ break
+
return proxy
def default_user_agent(name="python-requests"):
- """Return a string representing the default user agent."""
+ """
+ Return a string representing the default user agent.
+
+ :rtype: str
+ """
return '%s/%s' % (name, __version__)
def default_headers():
+ """
+ :rtype: requests.structures.CaseInsensitiveDict
+ """
return CaseInsensitiveDict({
'User-Agent': default_user_agent(),
'Accept-Encoding': ', '.join(('gzip', 'deflate')),
@@ -607,6 +660,7 @@ def parse_header_links(value):
i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
+ :rtype: list
"""
links = []
@@ -641,6 +695,9 @@ _null3 = _null * 3
def guess_json_utf(data):
+ """
+ :rtype: str
+ """
# JSON always starts with two ASCII characters, so detection is as
# easy as counting the nulls and from their location and count
# determine the encoding. Also detect a BOM, if present.
@@ -671,7 +728,10 @@ def guess_json_utf(data):
def prepend_scheme_if_needed(url, new_scheme):
"""Given a URL that may or may not have a scheme, prepend the given scheme.
- Does not replace a present scheme with the one provided as an argument."""
+ Does not replace a present scheme with the one provided as an argument.
+
+ :rtype: str
+ """
scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
# urlparse is a finicky beast, and sometimes decides that there isn't a
@@ -685,7 +745,10 @@ def prepend_scheme_if_needed(url, new_scheme):
def get_auth_from_url(url):
"""Given a url with authentication components, extract them into a tuple of
- username,password."""
+ username,password.
+
+ :rtype: (str,str)
+ """
parsed = urlparse(url)
try:
@@ -697,10 +760,9 @@ def get_auth_from_url(url):
def to_native_string(string, encoding='ascii'):
- """
- Given a string object, regardless of type, returns a representation of that
- string in the native string type, encoding and decoding where necessary.
- This assumes ASCII unless told otherwise.
+ """Given a string object, regardless of type, returns a representation of
+ that string in the native string type, encoding and decoding where
+ necessary. This assumes ASCII unless told otherwise.
"""
if isinstance(string, builtin_str):
out = string
@@ -713,9 +775,36 @@ def to_native_string(string, encoding='ascii'):
return out
+# Moved outside of function to avoid recompile every call
+_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$')
+_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$')
+
+def check_header_validity(header):
+ """Verifies that header value is a string which doesn't contain
+ leading whitespace or return characters. This prevents unintended
+ header injection.
+
+ :param header: tuple, in the format (name, value).
+ """
+ name, value = header
+
+ if isinstance(value, bytes):
+ pat = _CLEAN_HEADER_REGEX_BYTE
+ else:
+ pat = _CLEAN_HEADER_REGEX_STR
+ try:
+ if not pat.match(value):
+ raise InvalidHeader("Invalid return character or leading space in header: %s" % name)
+ except TypeError:
+ raise InvalidHeader("Header value %s must be of type str or bytes, "
+ "not %s" % (value, type(value)))
+
+
def urldefragauth(url):
"""
- Given a url remove the fragment and the authentication part
+ Given a url remove the fragment and the authentication part.
+
+ :rtype: str
"""
scheme, netloc, path, params, query, fragment = urlparse(url)
diff --git a/pip/_vendor/vendor.txt b/pip/_vendor/vendor.txt
index f82b97d13..b744fc7bf 100644
--- a/pip/_vendor/vendor.txt
+++ b/pip/_vendor/vendor.txt
@@ -1,13 +1,15 @@
-distlib==0.2.3
-distro==0.6.0
-html5lib==1.0b8
+distlib==0.2.4
+distro==1.0.0
+html5lib==1.0b10
six==1.10.0
colorama==0.3.7
-requests==2.10.0
-CacheControl==0.11.6
+requests==2.11.1
+CacheControl==0.11.7
lockfile==0.12.2
+ordereddict==1.1 # Only needed on 2.6
progress==1.2
-ipaddress==1.0.16 # Only needed on 2.6 and 2.7
-packaging==16.7
-pyparsing==2.1.1
+ipaddress==1.0.17 # Only needed on 2.6 and 2.7
+packaging==16.8
+pyparsing==2.1.10
retrying==1.3.3
+webencodings==0.5
diff --git a/pip/_vendor/webencodings/__init__.py b/pip/_vendor/webencodings/__init__.py
new file mode 100644
index 000000000..03d5d3576
--- /dev/null
+++ b/pip/_vendor/webencodings/__init__.py
@@ -0,0 +1,342 @@
+# coding: utf8
+"""
+
+ webencodings
+ ~~~~~~~~~~~~
+
+ This is a Python implementation of the `WHATWG Encoding standard
+ <http://encoding.spec.whatwg.org/>`. See README for details.
+
+ :copyright: Copyright 2012 by Simon Sapin
+ :license: BSD, see LICENSE for details.
+
+"""
+
+from __future__ import unicode_literals
+
+import codecs
+
+from .labels import LABELS
+
+
+VERSION = '0.5'
+
+
+# Some names in Encoding are not valid Python aliases. Remap these.
+PYTHON_NAMES = {
+ 'iso-8859-8-i': 'iso-8859-8',
+ 'x-mac-cyrillic': 'mac-cyrillic',
+ 'macintosh': 'mac-roman',
+ 'windows-874': 'cp874'}
+
+CACHE = {}
+
+
+def ascii_lower(string):
+ r"""Transform (only) ASCII letters to lower case: A-Z is mapped to a-z.
+
+ :param string: An Unicode string.
+ :returns: A new Unicode string.
+
+ This is used for `ASCII case-insensitive
+ <http://encoding.spec.whatwg.org/#ascii-case-insensitive>`_
+ matching of encoding labels.
+ The same matching is also used, among other things,
+ for `CSS keywords <http://dev.w3.org/csswg/css-values/#keywords>`_.
+
+ This is different from the :meth:`~py:str.lower` method of Unicode strings
+ which also affect non-ASCII characters,
+ sometimes mapping them into the ASCII range:
+
+ >>> keyword = u'Bac\N{KELVIN SIGN}ground'
+ >>> assert keyword.lower() == u'background'
+ >>> assert ascii_lower(keyword) != keyword.lower()
+ >>> assert ascii_lower(keyword) == u'bac\N{KELVIN SIGN}ground'
+
+ """
+ # This turns out to be faster than unicode.translate()
+ return string.encode('utf8').lower().decode('utf8')
+
+
+def lookup(label):
+ """
+ Look for an encoding by its label.
+ This is the spec’s `get an encoding
+ <http://encoding.spec.whatwg.org/#concept-encoding-get>`_ algorithm.
+ Supported labels are listed there.
+
+ :param label: A string.
+ :returns:
+ An :class:`Encoding` object, or :obj:`None` for an unknown label.
+
+ """
+ # Only strip ASCII whitespace: U+0009, U+000A, U+000C, U+000D, and U+0020.
+ label = ascii_lower(label.strip('\t\n\f\r '))
+ name = LABELS.get(label)
+ if name is None:
+ return None
+ encoding = CACHE.get(name)
+ if encoding is None:
+ if name == 'x-user-defined':
+ from .x_user_defined import codec_info
+ else:
+ python_name = PYTHON_NAMES.get(name, name)
+ # Any python_name value that gets to here should be valid.
+ codec_info = codecs.lookup(python_name)
+ encoding = Encoding(name, codec_info)
+ CACHE[name] = encoding
+ return encoding
+
+
+def _get_encoding(encoding_or_label):
+ """
+ Accept either an encoding object or label.
+
+ :param encoding: An :class:`Encoding` object or a label string.
+ :returns: An :class:`Encoding` object.
+ :raises: :exc:`~exceptions.LookupError` for an unknown label.
+
+ """
+ if hasattr(encoding_or_label, 'codec_info'):
+ return encoding_or_label
+
+ encoding = lookup(encoding_or_label)
+ if encoding is None:
+ raise LookupError('Unknown encoding label: %r' % encoding_or_label)
+ return encoding
+
+
+class Encoding(object):
+ """Reresents a character encoding such as UTF-8,
+ that can be used for decoding or encoding.
+
+ .. attribute:: name
+
+ Canonical name of the encoding
+
+ .. attribute:: codec_info
+
+ The actual implementation of the encoding,
+ a stdlib :class:`~codecs.CodecInfo` object.
+ See :func:`codecs.register`.
+
+ """
+ def __init__(self, name, codec_info):
+ self.name = name
+ self.codec_info = codec_info
+
+ def __repr__(self):
+ return '<Encoding %s>' % self.name
+
+
+#: The UTF-8 encoding. Should be used for new content and formats.
+UTF8 = lookup('utf-8')
+
+_UTF16LE = lookup('utf-16le')
+_UTF16BE = lookup('utf-16be')
+
+
+def decode(input, fallback_encoding, errors='replace'):
+ """
+ Decode a single string.
+
+ :param input: A byte string
+ :param fallback_encoding:
+ An :class:`Encoding` object or a label string.
+ The encoding to use if :obj:`input` does note have a BOM.
+ :param errors: Type of error handling. See :func:`codecs.register`.
+ :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+ :return:
+ A ``(output, encoding)`` tuple of an Unicode string
+ and an :obj:`Encoding`.
+
+ """
+ # Fail early if `encoding` is an invalid label.
+ fallback_encoding = _get_encoding(fallback_encoding)
+ bom_encoding, input = _detect_bom(input)
+ encoding = bom_encoding or fallback_encoding
+ return encoding.codec_info.decode(input, errors)[0], encoding
+
+
+def _detect_bom(input):
+ """Return (bom_encoding, input), with any BOM removed from the input."""
+ if input.startswith(b'\xFF\xFE'):
+ return _UTF16LE, input[2:]
+ if input.startswith(b'\xFE\xFF'):
+ return _UTF16BE, input[2:]
+ if input.startswith(b'\xEF\xBB\xBF'):
+ return UTF8, input[3:]
+ return None, input
+
+
+def encode(input, encoding=UTF8, errors='strict'):
+ """
+ Encode a single string.
+
+ :param input: An Unicode string.
+ :param encoding: An :class:`Encoding` object or a label string.
+ :param errors: Type of error handling. See :func:`codecs.register`.
+ :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+ :return: A byte string.
+
+ """
+ return _get_encoding(encoding).codec_info.encode(input, errors)[0]
+
+
+def iter_decode(input, fallback_encoding, errors='replace'):
+ """
+ "Pull"-based decoder.
+
+ :param input:
+ An iterable of byte strings.
+
+ The input is first consumed just enough to determine the encoding
+ based on the precense of a BOM,
+ then consumed on demand when the return value is.
+ :param fallback_encoding:
+ An :class:`Encoding` object or a label string.
+ The encoding to use if :obj:`input` does note have a BOM.
+ :param errors: Type of error handling. See :func:`codecs.register`.
+ :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+ :returns:
+ An ``(output, encoding)`` tuple.
+ :obj:`output` is an iterable of Unicode strings,
+ :obj:`encoding` is the :obj:`Encoding` that is being used.
+
+ """
+
+ decoder = IncrementalDecoder(fallback_encoding, errors)
+ generator = _iter_decode_generator(input, decoder)
+ encoding = next(generator)
+ return generator, encoding
+
+
+def _iter_decode_generator(input, decoder):
+ """Return a generator that first yields the :obj:`Encoding`,
+ then yields output chukns as Unicode strings.
+
+ """
+ decode = decoder.decode
+ input = iter(input)
+ for chunck in input:
+ output = decode(chunck)
+ if output:
+ assert decoder.encoding is not None
+ yield decoder.encoding
+ yield output
+ break
+ else:
+ # Input exhausted without determining the encoding
+ output = decode(b'', final=True)
+ assert decoder.encoding is not None
+ yield decoder.encoding
+ if output:
+ yield output
+ return
+
+ for chunck in input:
+ output = decode(chunck)
+ if output:
+ yield output
+ output = decode(b'', final=True)
+ if output:
+ yield output
+
+
+def iter_encode(input, encoding=UTF8, errors='strict'):
+ """
+ “Pull”-based encoder.
+
+ :param input: An iterable of Unicode strings.
+ :param encoding: An :class:`Encoding` object or a label string.
+ :param errors: Type of error handling. See :func:`codecs.register`.
+ :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+ :returns: An iterable of byte strings.
+
+ """
+ # Fail early if `encoding` is an invalid label.
+ encode = IncrementalEncoder(encoding, errors).encode
+ return _iter_encode_generator(input, encode)
+
+
+def _iter_encode_generator(input, encode):
+ for chunck in input:
+ output = encode(chunck)
+ if output:
+ yield output
+ output = encode('', final=True)
+ if output:
+ yield output
+
+
+class IncrementalDecoder(object):
+ """
+ “Push”-based decoder.
+
+ :param fallback_encoding:
+ An :class:`Encoding` object or a label string.
+ The encoding to use if :obj:`input` does note have a BOM.
+ :param errors: Type of error handling. See :func:`codecs.register`.
+ :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+
+ """
+ def __init__(self, fallback_encoding, errors='replace'):
+ # Fail early if `encoding` is an invalid label.
+ self._fallback_encoding = _get_encoding(fallback_encoding)
+ self._errors = errors
+ self._buffer = b''
+ self._decoder = None
+ #: The actual :class:`Encoding` that is being used,
+ #: or :obj:`None` if that is not determined yet.
+ #: (Ie. if there is not enough input yet to determine
+ #: if there is a BOM.)
+ self.encoding = None # Not known yet.
+
+ def decode(self, input, final=False):
+ """Decode one chunk of the input.
+
+ :param input: A byte string.
+ :param final:
+ Indicate that no more input is available.
+ Must be :obj:`True` if this is the last call.
+ :returns: An Unicode string.
+
+ """
+ decoder = self._decoder
+ if decoder is not None:
+ return decoder(input, final)
+
+ input = self._buffer + input
+ encoding, input = _detect_bom(input)
+ if encoding is None:
+ if len(input) < 3 and not final: # Not enough data yet.
+ self._buffer = input
+ return ''
+ else: # No BOM
+ encoding = self._fallback_encoding
+ decoder = encoding.codec_info.incrementaldecoder(self._errors).decode
+ self._decoder = decoder
+ self.encoding = encoding
+ return decoder(input, final)
+
+
+class IncrementalEncoder(object):
+ """
+ “Push”-based encoder.
+
+ :param encoding: An :class:`Encoding` object or a label string.
+ :param errors: Type of error handling. See :func:`codecs.register`.
+ :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+
+ .. method:: encode(input, final=False)
+
+ :param input: An Unicode string.
+ :param final:
+ Indicate that no more input is available.
+ Must be :obj:`True` if this is the last call.
+ :returns: A byte string.
+
+ """
+ def __init__(self, encoding=UTF8, errors='strict'):
+ encoding = _get_encoding(encoding)
+ self.encode = encoding.codec_info.incrementalencoder(errors).encode
diff --git a/pip/_vendor/webencodings/labels.py b/pip/_vendor/webencodings/labels.py
new file mode 100644
index 000000000..29cbf91ef
--- /dev/null
+++ b/pip/_vendor/webencodings/labels.py
@@ -0,0 +1,231 @@
+"""
+
+ webencodings.labels
+ ~~~~~~~~~~~~~~~~~~~
+
+ Map encoding labels to their name.
+
+ :copyright: Copyright 2012 by Simon Sapin
+ :license: BSD, see LICENSE for details.
+
+"""
+
+# XXX Do not edit!
+# This file is automatically generated by mklabels.py
+
+LABELS = {
+ 'unicode-1-1-utf-8': 'utf-8',
+ 'utf-8': 'utf-8',
+ 'utf8': 'utf-8',
+ '866': 'ibm866',
+ 'cp866': 'ibm866',
+ 'csibm866': 'ibm866',
+ 'ibm866': 'ibm866',
+ 'csisolatin2': 'iso-8859-2',
+ 'iso-8859-2': 'iso-8859-2',
+ 'iso-ir-101': 'iso-8859-2',
+ 'iso8859-2': 'iso-8859-2',
+ 'iso88592': 'iso-8859-2',
+ 'iso_8859-2': 'iso-8859-2',
+ 'iso_8859-2:1987': 'iso-8859-2',
+ 'l2': 'iso-8859-2',
+ 'latin2': 'iso-8859-2',
+ 'csisolatin3': 'iso-8859-3',
+ 'iso-8859-3': 'iso-8859-3',
+ 'iso-ir-109': 'iso-8859-3',
+ 'iso8859-3': 'iso-8859-3',
+ 'iso88593': 'iso-8859-3',
+ 'iso_8859-3': 'iso-8859-3',
+ 'iso_8859-3:1988': 'iso-8859-3',
+ 'l3': 'iso-8859-3',
+ 'latin3': 'iso-8859-3',
+ 'csisolatin4': 'iso-8859-4',
+ 'iso-8859-4': 'iso-8859-4',
+ 'iso-ir-110': 'iso-8859-4',
+ 'iso8859-4': 'iso-8859-4',
+ 'iso88594': 'iso-8859-4',
+ 'iso_8859-4': 'iso-8859-4',
+ 'iso_8859-4:1988': 'iso-8859-4',
+ 'l4': 'iso-8859-4',
+ 'latin4': 'iso-8859-4',
+ 'csisolatincyrillic': 'iso-8859-5',
+ 'cyrillic': 'iso-8859-5',
+ 'iso-8859-5': 'iso-8859-5',
+ 'iso-ir-144': 'iso-8859-5',
+ 'iso8859-5': 'iso-8859-5',
+ 'iso88595': 'iso-8859-5',
+ 'iso_8859-5': 'iso-8859-5',
+ 'iso_8859-5:1988': 'iso-8859-5',
+ 'arabic': 'iso-8859-6',
+ 'asmo-708': 'iso-8859-6',
+ 'csiso88596e': 'iso-8859-6',
+ 'csiso88596i': 'iso-8859-6',
+ 'csisolatinarabic': 'iso-8859-6',
+ 'ecma-114': 'iso-8859-6',
+ 'iso-8859-6': 'iso-8859-6',
+ 'iso-8859-6-e': 'iso-8859-6',
+ 'iso-8859-6-i': 'iso-8859-6',
+ 'iso-ir-127': 'iso-8859-6',
+ 'iso8859-6': 'iso-8859-6',
+ 'iso88596': 'iso-8859-6',
+ 'iso_8859-6': 'iso-8859-6',
+ 'iso_8859-6:1987': 'iso-8859-6',
+ 'csisolatingreek': 'iso-8859-7',
+ 'ecma-118': 'iso-8859-7',
+ 'elot_928': 'iso-8859-7',
+ 'greek': 'iso-8859-7',
+ 'greek8': 'iso-8859-7',
+ 'iso-8859-7': 'iso-8859-7',
+ 'iso-ir-126': 'iso-8859-7',
+ 'iso8859-7': 'iso-8859-7',
+ 'iso88597': 'iso-8859-7',
+ 'iso_8859-7': 'iso-8859-7',
+ 'iso_8859-7:1987': 'iso-8859-7',
+ 'sun_eu_greek': 'iso-8859-7',
+ 'csiso88598e': 'iso-8859-8',
+ 'csisolatinhebrew': 'iso-8859-8',
+ 'hebrew': 'iso-8859-8',
+ 'iso-8859-8': 'iso-8859-8',
+ 'iso-8859-8-e': 'iso-8859-8',
+ 'iso-ir-138': 'iso-8859-8',
+ 'iso8859-8': 'iso-8859-8',
+ 'iso88598': 'iso-8859-8',
+ 'iso_8859-8': 'iso-8859-8',
+ 'iso_8859-8:1988': 'iso-8859-8',
+ 'visual': 'iso-8859-8',
+ 'csiso88598i': 'iso-8859-8-i',
+ 'iso-8859-8-i': 'iso-8859-8-i',
+ 'logical': 'iso-8859-8-i',
+ 'csisolatin6': 'iso-8859-10',
+ 'iso-8859-10': 'iso-8859-10',
+ 'iso-ir-157': 'iso-8859-10',
+ 'iso8859-10': 'iso-8859-10',
+ 'iso885910': 'iso-8859-10',
+ 'l6': 'iso-8859-10',
+ 'latin6': 'iso-8859-10',
+ 'iso-8859-13': 'iso-8859-13',
+ 'iso8859-13': 'iso-8859-13',
+ 'iso885913': 'iso-8859-13',
+ 'iso-8859-14': 'iso-8859-14',
+ 'iso8859-14': 'iso-8859-14',
+ 'iso885914': 'iso-8859-14',
+ 'csisolatin9': 'iso-8859-15',
+ 'iso-8859-15': 'iso-8859-15',
+ 'iso8859-15': 'iso-8859-15',
+ 'iso885915': 'iso-8859-15',
+ 'iso_8859-15': 'iso-8859-15',
+ 'l9': 'iso-8859-15',
+ 'iso-8859-16': 'iso-8859-16',
+ 'cskoi8r': 'koi8-r',
+ 'koi': 'koi8-r',
+ 'koi8': 'koi8-r',
+ 'koi8-r': 'koi8-r',
+ 'koi8_r': 'koi8-r',
+ 'koi8-u': 'koi8-u',
+ 'csmacintosh': 'macintosh',
+ 'mac': 'macintosh',
+ 'macintosh': 'macintosh',
+ 'x-mac-roman': 'macintosh',
+ 'dos-874': 'windows-874',
+ 'iso-8859-11': 'windows-874',
+ 'iso8859-11': 'windows-874',
+ 'iso885911': 'windows-874',
+ 'tis-620': 'windows-874',
+ 'windows-874': 'windows-874',
+ 'cp1250': 'windows-1250',
+ 'windows-1250': 'windows-1250',
+ 'x-cp1250': 'windows-1250',
+ 'cp1251': 'windows-1251',
+ 'windows-1251': 'windows-1251',
+ 'x-cp1251': 'windows-1251',
+ 'ansi_x3.4-1968': 'windows-1252',
+ 'ascii': 'windows-1252',
+ 'cp1252': 'windows-1252',
+ 'cp819': 'windows-1252',
+ 'csisolatin1': 'windows-1252',
+ 'ibm819': 'windows-1252',
+ 'iso-8859-1': 'windows-1252',
+ 'iso-ir-100': 'windows-1252',
+ 'iso8859-1': 'windows-1252',
+ 'iso88591': 'windows-1252',
+ 'iso_8859-1': 'windows-1252',
+ 'iso_8859-1:1987': 'windows-1252',
+ 'l1': 'windows-1252',
+ 'latin1': 'windows-1252',
+ 'us-ascii': 'windows-1252',
+ 'windows-1252': 'windows-1252',
+ 'x-cp1252': 'windows-1252',
+ 'cp1253': 'windows-1253',
+ 'windows-1253': 'windows-1253',
+ 'x-cp1253': 'windows-1253',
+ 'cp1254': 'windows-1254',
+ 'csisolatin5': 'windows-1254',
+ 'iso-8859-9': 'windows-1254',
+ 'iso-ir-148': 'windows-1254',
+ 'iso8859-9': 'windows-1254',
+ 'iso88599': 'windows-1254',
+ 'iso_8859-9': 'windows-1254',
+ 'iso_8859-9:1989': 'windows-1254',
+ 'l5': 'windows-1254',
+ 'latin5': 'windows-1254',
+ 'windows-1254': 'windows-1254',
+ 'x-cp1254': 'windows-1254',
+ 'cp1255': 'windows-1255',
+ 'windows-1255': 'windows-1255',
+ 'x-cp1255': 'windows-1255',
+ 'cp1256': 'windows-1256',
+ 'windows-1256': 'windows-1256',
+ 'x-cp1256': 'windows-1256',
+ 'cp1257': 'windows-1257',
+ 'windows-1257': 'windows-1257',
+ 'x-cp1257': 'windows-1257',
+ 'cp1258': 'windows-1258',
+ 'windows-1258': 'windows-1258',
+ 'x-cp1258': 'windows-1258',
+ 'x-mac-cyrillic': 'x-mac-cyrillic',
+ 'x-mac-ukrainian': 'x-mac-cyrillic',
+ 'chinese': 'gbk',
+ 'csgb2312': 'gbk',
+ 'csiso58gb231280': 'gbk',
+ 'gb2312': 'gbk',
+ 'gb_2312': 'gbk',
+ 'gb_2312-80': 'gbk',
+ 'gbk': 'gbk',
+ 'iso-ir-58': 'gbk',
+ 'x-gbk': 'gbk',
+ 'gb18030': 'gb18030',
+ 'hz-gb-2312': 'hz-gb-2312',
+ 'big5': 'big5',
+ 'big5-hkscs': 'big5',
+ 'cn-big5': 'big5',
+ 'csbig5': 'big5',
+ 'x-x-big5': 'big5',
+ 'cseucpkdfmtjapanese': 'euc-jp',
+ 'euc-jp': 'euc-jp',
+ 'x-euc-jp': 'euc-jp',
+ 'csiso2022jp': 'iso-2022-jp',
+ 'iso-2022-jp': 'iso-2022-jp',
+ 'csshiftjis': 'shift_jis',
+ 'ms_kanji': 'shift_jis',
+ 'shift-jis': 'shift_jis',
+ 'shift_jis': 'shift_jis',
+ 'sjis': 'shift_jis',
+ 'windows-31j': 'shift_jis',
+ 'x-sjis': 'shift_jis',
+ 'cseuckr': 'euc-kr',
+ 'csksc56011987': 'euc-kr',
+ 'euc-kr': 'euc-kr',
+ 'iso-ir-149': 'euc-kr',
+ 'korean': 'euc-kr',
+ 'ks_c_5601-1987': 'euc-kr',
+ 'ks_c_5601-1989': 'euc-kr',
+ 'ksc5601': 'euc-kr',
+ 'ksc_5601': 'euc-kr',
+ 'windows-949': 'euc-kr',
+ 'csiso2022kr': 'iso-2022-kr',
+ 'iso-2022-kr': 'iso-2022-kr',
+ 'utf-16be': 'utf-16be',
+ 'utf-16': 'utf-16le',
+ 'utf-16le': 'utf-16le',
+ 'x-user-defined': 'x-user-defined',
+}
diff --git a/pip/_vendor/webencodings/mklabels.py b/pip/_vendor/webencodings/mklabels.py
new file mode 100644
index 000000000..295dc928b
--- /dev/null
+++ b/pip/_vendor/webencodings/mklabels.py
@@ -0,0 +1,59 @@
+"""
+
+ webencodings.mklabels
+ ~~~~~~~~~~~~~~~~~~~~~
+
+ Regenarate the webencodings.labels module.
+
+ :copyright: Copyright 2012 by Simon Sapin
+ :license: BSD, see LICENSE for details.
+
+"""
+
+import json
+try:
+ from urllib import urlopen
+except ImportError:
+ from urllib.request import urlopen
+
+
+def assert_lower(string):
+ assert string == string.lower()
+ return string
+
+
+def generate(url):
+ parts = ['''\
+"""
+
+ webencodings.labels
+ ~~~~~~~~~~~~~~~~~~~
+
+ Map encoding labels to their name.
+
+ :copyright: Copyright 2012 by Simon Sapin
+ :license: BSD, see LICENSE for details.
+
+"""
+
+# XXX Do not edit!
+# This file is automatically generated by mklabels.py
+
+LABELS = {
+''']
+ labels = [
+ (repr(assert_lower(label)).lstrip('u'),
+ repr(encoding['name']).lstrip('u'))
+ for category in json.loads(urlopen(url).read().decode('ascii'))
+ for encoding in category['encodings']
+ for label in encoding['labels']]
+ max_len = max(len(label) for label, name in labels)
+ parts.extend(
+ ' %s:%s %s,\n' % (label, ' ' * (max_len - len(label)), name)
+ for label, name in labels)
+ parts.append('}')
+ return ''.join(parts)
+
+
+if __name__ == '__main__':
+ print(generate('http://encoding.spec.whatwg.org/encodings.json'))
diff --git a/pip/_vendor/webencodings/tests.py b/pip/_vendor/webencodings/tests.py
new file mode 100644
index 000000000..b8c5653ec
--- /dev/null
+++ b/pip/_vendor/webencodings/tests.py
@@ -0,0 +1,153 @@
+# coding: utf8
+"""
+
+ webencodings.tests
+ ~~~~~~~~~~~~~~~~~~
+
+ A basic test suite for Encoding.
+
+ :copyright: Copyright 2012 by Simon Sapin
+ :license: BSD, see LICENSE for details.
+
+"""
+
+from __future__ import unicode_literals
+
+from . import (lookup, LABELS, decode, encode, iter_decode, iter_encode,
+ IncrementalDecoder, IncrementalEncoder, UTF8)
+
+
+def assert_raises(exception, function, *args, **kwargs):
+ try:
+ function(*args, **kwargs)
+ except exception:
+ return
+ else: # pragma: no cover
+ raise AssertionError('Did not raise %s.' % exception)
+
+
+def test_labels():
+ assert lookup('utf-8').name == 'utf-8'
+ assert lookup('Utf-8').name == 'utf-8'
+ assert lookup('UTF-8').name == 'utf-8'
+ assert lookup('utf8').name == 'utf-8'
+ assert lookup('utf8').name == 'utf-8'
+ assert lookup('utf8 ').name == 'utf-8'
+ assert lookup(' \r\nutf8\t').name == 'utf-8'
+ assert lookup('u8') is None # Python label.
+ assert lookup('utf-8 ') is None # Non-ASCII white space.
+
+ assert lookup('US-ASCII').name == 'windows-1252'
+ assert lookup('iso-8859-1').name == 'windows-1252'
+ assert lookup('latin1').name == 'windows-1252'
+ assert lookup('LATIN1').name == 'windows-1252'
+ assert lookup('latin-1') is None
+ assert lookup('LATİN1') is None # ASCII-only case insensitivity.
+
+
+def test_all_labels():
+ for label in LABELS:
+ assert decode(b'', label) == ('', lookup(label))
+ assert encode('', label) == b''
+ for repeat in [0, 1, 12]:
+ output, _ = iter_decode([b''] * repeat, label)
+ assert list(output) == []
+ assert list(iter_encode([''] * repeat, label)) == []
+ decoder = IncrementalDecoder(label)
+ assert decoder.decode(b'') == ''
+ assert decoder.decode(b'', final=True) == ''
+ encoder = IncrementalEncoder(label)
+ assert encoder.encode('') == b''
+ assert encoder.encode('', final=True) == b''
+ # All encoding names are valid labels too:
+ for name in set(LABELS.values()):
+ assert lookup(name).name == name
+
+
+def test_invalid_label():
+ assert_raises(LookupError, decode, b'\xEF\xBB\xBF\xc3\xa9', 'invalid')
+ assert_raises(LookupError, encode, 'é', 'invalid')
+ assert_raises(LookupError, iter_decode, [], 'invalid')
+ assert_raises(LookupError, iter_encode, [], 'invalid')
+ assert_raises(LookupError, IncrementalDecoder, 'invalid')
+ assert_raises(LookupError, IncrementalEncoder, 'invalid')
+
+
+def test_decode():
+ assert decode(b'\x80', 'latin1') == ('€', lookup('latin1'))
+ assert decode(b'\x80', lookup('latin1')) == ('€', lookup('latin1'))
+ assert decode(b'\xc3\xa9', 'utf8') == ('é', lookup('utf8'))
+ assert decode(b'\xc3\xa9', UTF8) == ('é', lookup('utf8'))
+ assert decode(b'\xc3\xa9', 'ascii') == ('é', lookup('ascii'))
+ assert decode(b'\xEF\xBB\xBF\xc3\xa9', 'ascii') == ('é', lookup('utf8')) # UTF-8 with BOM
+
+ assert decode(b'\xFE\xFF\x00\xe9', 'ascii') == ('é', lookup('utf-16be')) # UTF-16-BE with BOM
+ assert decode(b'\xFF\xFE\xe9\x00', 'ascii') == ('é', lookup('utf-16le')) # UTF-16-LE with BOM
+ assert decode(b'\xFE\xFF\xe9\x00', 'ascii') == ('\ue900', lookup('utf-16be'))
+ assert decode(b'\xFF\xFE\x00\xe9', 'ascii') == ('\ue900', lookup('utf-16le'))
+
+ assert decode(b'\x00\xe9', 'UTF-16BE') == ('é', lookup('utf-16be'))
+ assert decode(b'\xe9\x00', 'UTF-16LE') == ('é', lookup('utf-16le'))
+ assert decode(b'\xe9\x00', 'UTF-16') == ('é', lookup('utf-16le'))
+
+ assert decode(b'\xe9\x00', 'UTF-16BE') == ('\ue900', lookup('utf-16be'))
+ assert decode(b'\x00\xe9', 'UTF-16LE') == ('\ue900', lookup('utf-16le'))
+ assert decode(b'\x00\xe9', 'UTF-16') == ('\ue900', lookup('utf-16le'))
+
+
+def test_encode():
+ assert encode('é', 'latin1') == b'\xe9'
+ assert encode('é', 'utf8') == b'\xc3\xa9'
+ assert encode('é', 'utf8') == b'\xc3\xa9'
+ assert encode('é', 'utf-16') == b'\xe9\x00'
+ assert encode('é', 'utf-16le') == b'\xe9\x00'
+ assert encode('é', 'utf-16be') == b'\x00\xe9'
+
+
+def test_iter_decode():
+ def iter_decode_to_string(input, fallback_encoding):
+ output, _encoding = iter_decode(input, fallback_encoding)
+ return ''.join(output)
+ assert iter_decode_to_string([], 'latin1') == ''
+ assert iter_decode_to_string([b''], 'latin1') == ''
+ assert iter_decode_to_string([b'\xe9'], 'latin1') == 'é'
+ assert iter_decode_to_string([b'hello'], 'latin1') == 'hello'
+ assert iter_decode_to_string([b'he', b'llo'], 'latin1') == 'hello'
+ assert iter_decode_to_string([b'hell', b'o'], 'latin1') == 'hello'
+ assert iter_decode_to_string([b'\xc3\xa9'], 'latin1') == 'é'
+ assert iter_decode_to_string([b'\xEF\xBB\xBF\xc3\xa9'], 'latin1') == 'é'
+ assert iter_decode_to_string([
+ b'\xEF\xBB\xBF', b'\xc3', b'\xa9'], 'latin1') == 'é'
+ assert iter_decode_to_string([
+ b'\xEF\xBB\xBF', b'a', b'\xc3'], 'latin1') == 'a\uFFFD'
+ assert iter_decode_to_string([
+ b'', b'\xEF', b'', b'', b'\xBB\xBF\xc3', b'\xa9'], 'latin1') == 'é'
+ assert iter_decode_to_string([b'\xEF\xBB\xBF'], 'latin1') == ''
+ assert iter_decode_to_string([b'\xEF\xBB'], 'latin1') == 'ï»'
+ assert iter_decode_to_string([b'\xFE\xFF\x00\xe9'], 'latin1') == 'é'
+ assert iter_decode_to_string([b'\xFF\xFE\xe9\x00'], 'latin1') == 'é'
+ assert iter_decode_to_string([
+ b'', b'\xFF', b'', b'', b'\xFE\xe9', b'\x00'], 'latin1') == 'é'
+ assert iter_decode_to_string([
+ b'', b'h\xe9', b'llo'], 'x-user-defined') == 'h\uF7E9llo'
+
+
+def test_iter_encode():
+ assert b''.join(iter_encode([], 'latin1')) == b''
+ assert b''.join(iter_encode([''], 'latin1')) == b''
+ assert b''.join(iter_encode(['é'], 'latin1')) == b'\xe9'
+ assert b''.join(iter_encode(['', 'é', '', ''], 'latin1')) == b'\xe9'
+ assert b''.join(iter_encode(['', 'é', '', ''], 'utf-16')) == b'\xe9\x00'
+ assert b''.join(iter_encode(['', 'é', '', ''], 'utf-16le')) == b'\xe9\x00'
+ assert b''.join(iter_encode(['', 'é', '', ''], 'utf-16be')) == b'\x00\xe9'
+ assert b''.join(iter_encode([
+ '', 'h\uF7E9', '', 'llo'], 'x-user-defined')) == b'h\xe9llo'
+
+
+def test_x_user_defined():
+ encoded = b'2,\x0c\x0b\x1aO\xd9#\xcb\x0f\xc9\xbbt\xcf\xa8\xca'
+ decoded = '2,\x0c\x0b\x1aO\uf7d9#\uf7cb\x0f\uf7c9\uf7bbt\uf7cf\uf7a8\uf7ca'
+ encoded = b'aa'
+ decoded = 'aa'
+ assert decode(encoded, 'x-user-defined') == (decoded, lookup('x-user-defined'))
+ assert encode(decoded, 'x-user-defined') == encoded
diff --git a/pip/_vendor/webencodings/x_user_defined.py b/pip/_vendor/webencodings/x_user_defined.py
new file mode 100644
index 000000000..f0daa11a7
--- /dev/null
+++ b/pip/_vendor/webencodings/x_user_defined.py
@@ -0,0 +1,325 @@
+# coding: utf8
+"""
+
+ webencodings.x_user_defined
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ An implementation of the x-user-defined encoding.
+
+ :copyright: Copyright 2012 by Simon Sapin
+ :license: BSD, see LICENSE for details.
+
+"""
+
+from __future__ import unicode_literals
+
+import codecs
+
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self, input, errors='strict'):
+ return codecs.charmap_encode(input, errors, encoding_table)
+
+ def decode(self, input, errors='strict'):
+ return codecs.charmap_decode(input, errors, decoding_table)
+
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input, self.errors, encoding_table)[0]
+
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input, self.errors, decoding_table)[0]
+
+
+class StreamWriter(Codec, codecs.StreamWriter):
+ pass
+
+
+class StreamReader(Codec, codecs.StreamReader):
+ pass
+
+
+### encodings module API
+
+codec_info = codecs.CodecInfo(
+ name='x-user-defined',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+)
+
+
+### Decoding Table
+
+# Python 3:
+# for c in range(256): print(' %r' % chr(c if c < 128 else c + 0xF700))
+decoding_table = (
+ '\x00'
+ '\x01'
+ '\x02'
+ '\x03'
+ '\x04'
+ '\x05'
+ '\x06'
+ '\x07'
+ '\x08'
+ '\t'
+ '\n'
+ '\x0b'
+ '\x0c'
+ '\r'
+ '\x0e'
+ '\x0f'
+ '\x10'
+ '\x11'
+ '\x12'
+ '\x13'
+ '\x14'
+ '\x15'
+ '\x16'
+ '\x17'
+ '\x18'
+ '\x19'
+ '\x1a'
+ '\x1b'
+ '\x1c'
+ '\x1d'
+ '\x1e'
+ '\x1f'
+ ' '
+ '!'
+ '"'
+ '#'
+ '$'
+ '%'
+ '&'
+ "'"
+ '('
+ ')'
+ '*'
+ '+'
+ ','
+ '-'
+ '.'
+ '/'
+ '0'
+ '1'
+ '2'
+ '3'
+ '4'
+ '5'
+ '6'
+ '7'
+ '8'
+ '9'
+ ':'
+ ';'
+ '<'
+ '='
+ '>'
+ '?'
+ '@'
+ 'A'
+ 'B'
+ 'C'
+ 'D'
+ 'E'
+ 'F'
+ 'G'
+ 'H'
+ 'I'
+ 'J'
+ 'K'
+ 'L'
+ 'M'
+ 'N'
+ 'O'
+ 'P'
+ 'Q'
+ 'R'
+ 'S'
+ 'T'
+ 'U'
+ 'V'
+ 'W'
+ 'X'
+ 'Y'
+ 'Z'
+ '['
+ '\\'
+ ']'
+ '^'
+ '_'
+ '`'
+ 'a'
+ 'b'
+ 'c'
+ 'd'
+ 'e'
+ 'f'
+ 'g'
+ 'h'
+ 'i'
+ 'j'
+ 'k'
+ 'l'
+ 'm'
+ 'n'
+ 'o'
+ 'p'
+ 'q'
+ 'r'
+ 's'
+ 't'
+ 'u'
+ 'v'
+ 'w'
+ 'x'
+ 'y'
+ 'z'
+ '{'
+ '|'
+ '}'
+ '~'
+ '\x7f'
+ '\uf780'
+ '\uf781'
+ '\uf782'
+ '\uf783'
+ '\uf784'
+ '\uf785'
+ '\uf786'
+ '\uf787'
+ '\uf788'
+ '\uf789'
+ '\uf78a'
+ '\uf78b'
+ '\uf78c'
+ '\uf78d'
+ '\uf78e'
+ '\uf78f'
+ '\uf790'
+ '\uf791'
+ '\uf792'
+ '\uf793'
+ '\uf794'
+ '\uf795'
+ '\uf796'
+ '\uf797'
+ '\uf798'
+ '\uf799'
+ '\uf79a'
+ '\uf79b'
+ '\uf79c'
+ '\uf79d'
+ '\uf79e'
+ '\uf79f'
+ '\uf7a0'
+ '\uf7a1'
+ '\uf7a2'
+ '\uf7a3'
+ '\uf7a4'
+ '\uf7a5'
+ '\uf7a6'
+ '\uf7a7'
+ '\uf7a8'
+ '\uf7a9'
+ '\uf7aa'
+ '\uf7ab'
+ '\uf7ac'
+ '\uf7ad'
+ '\uf7ae'
+ '\uf7af'
+ '\uf7b0'
+ '\uf7b1'
+ '\uf7b2'
+ '\uf7b3'
+ '\uf7b4'
+ '\uf7b5'
+ '\uf7b6'
+ '\uf7b7'
+ '\uf7b8'
+ '\uf7b9'
+ '\uf7ba'
+ '\uf7bb'
+ '\uf7bc'
+ '\uf7bd'
+ '\uf7be'
+ '\uf7bf'
+ '\uf7c0'
+ '\uf7c1'
+ '\uf7c2'
+ '\uf7c3'
+ '\uf7c4'
+ '\uf7c5'
+ '\uf7c6'
+ '\uf7c7'
+ '\uf7c8'
+ '\uf7c9'
+ '\uf7ca'
+ '\uf7cb'
+ '\uf7cc'
+ '\uf7cd'
+ '\uf7ce'
+ '\uf7cf'
+ '\uf7d0'
+ '\uf7d1'
+ '\uf7d2'
+ '\uf7d3'
+ '\uf7d4'
+ '\uf7d5'
+ '\uf7d6'
+ '\uf7d7'
+ '\uf7d8'
+ '\uf7d9'
+ '\uf7da'
+ '\uf7db'
+ '\uf7dc'
+ '\uf7dd'
+ '\uf7de'
+ '\uf7df'
+ '\uf7e0'
+ '\uf7e1'
+ '\uf7e2'
+ '\uf7e3'
+ '\uf7e4'
+ '\uf7e5'
+ '\uf7e6'
+ '\uf7e7'
+ '\uf7e8'
+ '\uf7e9'
+ '\uf7ea'
+ '\uf7eb'
+ '\uf7ec'
+ '\uf7ed'
+ '\uf7ee'
+ '\uf7ef'
+ '\uf7f0'
+ '\uf7f1'
+ '\uf7f2'
+ '\uf7f3'
+ '\uf7f4'
+ '\uf7f5'
+ '\uf7f6'
+ '\uf7f7'
+ '\uf7f8'
+ '\uf7f9'
+ '\uf7fa'
+ '\uf7fb'
+ '\uf7fc'
+ '\uf7fd'
+ '\uf7fe'
+ '\uf7ff'
+)
+
+### Encoding table
+encoding_table = codecs.charmap_build(decoding_table)
diff --git a/pip/index.py b/pip/index.py
index feb9f582b..b1d08f59d 100644
--- a/pip/index.py
+++ b/pip/index.py
@@ -727,7 +727,7 @@ class HTMLPage(object):
self.content = content
self.parsed = html5lib.parse(
self.content,
- encoding=encoding,
+ transport_encoding=encoding,
namespaceHTMLElements=False,
)
self.url = url
diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py
index 625b38d6a..f25a0c4f8 100644
--- a/tests/unit/test_index.py
+++ b/tests/unit/test_index.py
@@ -90,18 +90,18 @@ class TestLink(object):
@pytest.mark.parametrize(
("html", "url", "expected"),
[
- ("<html></html>", "https://example.com/", "https://example.com/"),
+ (b"<html></html>", "https://example.com/", "https://example.com/"),
(
- "<html><head>"
- "<base href=\"https://foo.example.com/\">"
- "</head></html>",
+ b"<html><head>"
+ b"<base href=\"https://foo.example.com/\">"
+ b"</head></html>",
"https://example.com/",
"https://foo.example.com/",
),
(
- "<html><head>"
- "<base><base href=\"https://foo.example.com/\">"
- "</head></html>",
+ b"<html><head>"
+ b"<base><base href=\"https://foo.example.com/\">"
+ b"</head></html>",
"https://example.com/",
"https://foo.example.com/",
),