summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDonald Stufft <donald@stufft.io>2014-05-02 19:56:32 -0400
committerDonald Stufft <donald@stufft.io>2014-05-02 21:01:00 -0400
commitc3aa72ec176af16e12d21636c8ca92ab9ab3e722 (patch)
tree7acf69339d7ea19b1e4ff21c6d3a07ee4ae5ae57
parentb3ac7b22401e8f83cd09deaffe2fd10a9f3cf0c2 (diff)
downloadpip-c3aa72ec176af16e12d21636c8ca92ab9ab3e722.tar.gz
Upgrade html5lib to 1.0b3
-rw-r--r--pip/_vendor/html5lib/LICENSE20
-rw-r--r--pip/_vendor/html5lib/__init__.py2
-rw-r--r--pip/_vendor/html5lib/constants.py18
-rw-r--r--pip/_vendor/html5lib/filters/lint.py30
-rw-r--r--pip/_vendor/html5lib/html5parser.py128
-rw-r--r--pip/_vendor/html5lib/inputstream.py10
-rw-r--r--pip/_vendor/html5lib/serializer/htmlserializer.py21
-rw-r--r--pip/_vendor/html5lib/treeadapters/__init__.py0
-rw-r--r--pip/_vendor/html5lib/treeadapters/sax.py44
-rw-r--r--pip/_vendor/html5lib/treebuilders/dom.py65
-rw-r--r--pip/_vendor/html5lib/treewalkers/_base.py122
-rw-r--r--pip/_vendor/html5lib/treewalkers/etree.py9
-rw-r--r--pip/_vendor/html5lib/utils.py4
-rw-r--r--pip/_vendor/vendor.txt2
14 files changed, 234 insertions, 241 deletions
diff --git a/pip/_vendor/html5lib/LICENSE b/pip/_vendor/html5lib/LICENSE
deleted file mode 100644
index c87fa7a00..000000000
--- a/pip/_vendor/html5lib/LICENSE
+++ /dev/null
@@ -1,20 +0,0 @@
-Copyright (c) 2006-2013 James Graham and other contributors
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/pip/_vendor/html5lib/__init__.py b/pip/_vendor/html5lib/__init__.py
index 10e2b74c2..ff5c77551 100644
--- a/pip/_vendor/html5lib/__init__.py
+++ b/pip/_vendor/html5lib/__init__.py
@@ -20,4 +20,4 @@ from .serializer import serialize
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
"getTreeWalker", "serialize"]
-__version__ = "1.0b1"
+__version__ = "1.0b3"
diff --git a/pip/_vendor/html5lib/constants.py b/pip/_vendor/html5lib/constants.py
index 1866dd78e..e7089846d 100644
--- a/pip/_vendor/html5lib/constants.py
+++ b/pip/_vendor/html5lib/constants.py
@@ -433,6 +433,24 @@ mathmlTextIntegrationPointElements = frozenset((
(namespaces["mathml"], "mtext")
))
+adjustForeignAttributes = {
+ "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
+ "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]),
+ "xlink:href": ("xlink", "href", namespaces["xlink"]),
+ "xlink:role": ("xlink", "role", namespaces["xlink"]),
+ "xlink:show": ("xlink", "show", namespaces["xlink"]),
+ "xlink:title": ("xlink", "title", namespaces["xlink"]),
+ "xlink:type": ("xlink", "type", namespaces["xlink"]),
+ "xml:base": ("xml", "base", namespaces["xml"]),
+ "xml:lang": ("xml", "lang", namespaces["xml"]),
+ "xml:space": ("xml", "space", namespaces["xml"]),
+ "xmlns": (None, "xmlns", namespaces["xmlns"]),
+ "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
+}
+
+unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
+ adjustForeignAttributes.items()])
+
spaceCharacters = frozenset((
"\t",
"\n",
diff --git a/pip/_vendor/html5lib/filters/lint.py b/pip/_vendor/html5lib/filters/lint.py
index 83ad63971..7cc99a4ba 100644
--- a/pip/_vendor/html5lib/filters/lint.py
+++ b/pip/_vendor/html5lib/filters/lint.py
@@ -23,24 +23,24 @@ class Filter(_base.Filter):
if type in ("StartTag", "EmptyTag"):
name = token["name"]
if contentModelFlag != "PCDATA":
- raise LintError(_("StartTag not in PCDATA content model flag: %s") % name)
+ raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name})
if not isinstance(name, str):
- raise LintError(_("Tag name is not a string: %r") % name)
+ raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
if not name:
raise LintError(_("Empty tag name"))
if type == "StartTag" and name in voidElements:
- raise LintError(_("Void element reported as StartTag token: %s") % name)
+ raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name})
elif type == "EmptyTag" and name not in voidElements:
- raise LintError(_("Non-void element reported as EmptyTag token: %s") % token["name"])
+ raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]})
if type == "StartTag":
open_elements.append(name)
for name, value in token["data"]:
if not isinstance(name, str):
- raise LintError(_("Attribute name is not a string: %r") % name)
+ raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name})
if not name:
raise LintError(_("Empty attribute name"))
if not isinstance(value, str):
- raise LintError(_("Attribute value is not a string: %r") % value)
+ raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value})
if name in cdataElements:
contentModelFlag = "CDATA"
elif name in rcdataElements:
@@ -51,14 +51,14 @@ class Filter(_base.Filter):
elif type == "EndTag":
name = token["name"]
if not isinstance(name, str):
- raise LintError(_("Tag name is not a string: %r") % name)
+ raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
if not name:
raise LintError(_("Empty tag name"))
if name in voidElements:
- raise LintError(_("Void element reported as EndTag token: %s") % name)
+ raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name})
start_name = open_elements.pop()
if start_name != name:
- raise LintError(_("EndTag (%s) does not match StartTag (%s)") % (name, start_name))
+ raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name})
contentModelFlag = "PCDATA"
elif type == "Comment":
@@ -68,26 +68,26 @@ class Filter(_base.Filter):
elif type in ("Characters", "SpaceCharacters"):
data = token["data"]
if not isinstance(data, str):
- raise LintError(_("Attribute name is not a string: %r") % data)
+ raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data})
if not data:
- raise LintError(_("%s token with empty data") % type)
+ raise LintError(_("%(type)s token with empty data") % {"type": type})
if type == "SpaceCharacters":
data = data.strip(spaceCharacters)
if data:
- raise LintError(_("Non-space character(s) found in SpaceCharacters token: ") % data)
+ raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data})
elif type == "Doctype":
name = token["name"]
if contentModelFlag != "PCDATA":
- raise LintError(_("Doctype not in PCDATA content model flag: %s") % name)
+ raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name})
if not isinstance(name, str):
- raise LintError(_("Tag name is not a string: %r") % name)
+ raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
# XXX: what to do with token["data"] ?
elif type in ("ParseError", "SerializeError"):
pass
else:
- raise LintError(_("Unknown token type: %s") % type)
+ raise LintError(_("Unknown token type: %(type)s") % {"type": type})
yield token
diff --git a/pip/_vendor/html5lib/html5parser.py b/pip/_vendor/html5lib/html5parser.py
index 8a5acfeb0..b28f46f2a 100644
--- a/pip/_vendor/html5lib/html5parser.py
+++ b/pip/_vendor/html5lib/html5parser.py
@@ -17,6 +17,7 @@ from .constants import headingElements
from .constants import cdataElements, rcdataElements
from .constants import tokenTypes, ReparseException, namespaces
from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
+from .constants import adjustForeignAttributes as adjustForeignAttributesMap
def parse(doc, treebuilder="etree", encoding=None,
@@ -168,7 +169,7 @@ class HTMLParser(object):
(self.isMathMLTextIntegrationPoint(currentNode) and
((type == StartTagToken and
token["name"] not in frozenset(["mglyph", "malignmark"])) or
- type in (CharactersToken, SpaceCharactersToken))) or
+ type in (CharactersToken, SpaceCharactersToken))) or
(currentNodeNamespace == namespaces["mathml"] and
currentNodeName == "annotation-xml" and
token["name"] == "svg") or
@@ -333,20 +334,7 @@ class HTMLParser(object):
del token["data"][originalName]
def adjustForeignAttributes(self, token):
- replacements = {
- "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
- "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]),
- "xlink:href": ("xlink", "href", namespaces["xlink"]),
- "xlink:role": ("xlink", "role", namespaces["xlink"]),
- "xlink:show": ("xlink", "show", namespaces["xlink"]),
- "xlink:title": ("xlink", "title", namespaces["xlink"]),
- "xlink:type": ("xlink", "type", namespaces["xlink"]),
- "xml:base": ("xml", "base", namespaces["xml"]),
- "xml:lang": ("xml", "lang", namespaces["xml"]),
- "xml:space": ("xml", "space", namespaces["xml"]),
- "xmlns": (None, "xmlns", namespaces["xmlns"]),
- "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
- }
+ replacements = adjustForeignAttributesMap
for originalName in token["data"].keys():
if originalName in replacements:
@@ -519,61 +507,61 @@ def getPhases(debug):
if (not correct or token["name"] != "html"
or publicId.startswith(
- ("+//silmaril//dtd html pro v0r11 19970101//",
- "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
- "-//as//dtd html 3.0 aswedit + extensions//",
- "-//ietf//dtd html 2.0 level 1//",
- "-//ietf//dtd html 2.0 level 2//",
- "-//ietf//dtd html 2.0 strict level 1//",
- "-//ietf//dtd html 2.0 strict level 2//",
- "-//ietf//dtd html 2.0 strict//",
- "-//ietf//dtd html 2.0//",
- "-//ietf//dtd html 2.1e//",
- "-//ietf//dtd html 3.0//",
- "-//ietf//dtd html 3.2 final//",
- "-//ietf//dtd html 3.2//",
- "-//ietf//dtd html 3//",
- "-//ietf//dtd html level 0//",
- "-//ietf//dtd html level 1//",
- "-//ietf//dtd html level 2//",
- "-//ietf//dtd html level 3//",
- "-//ietf//dtd html strict level 0//",
- "-//ietf//dtd html strict level 1//",
- "-//ietf//dtd html strict level 2//",
- "-//ietf//dtd html strict level 3//",
- "-//ietf//dtd html strict//",
- "-//ietf//dtd html//",
- "-//metrius//dtd metrius presentational//",
- "-//microsoft//dtd internet explorer 2.0 html strict//",
- "-//microsoft//dtd internet explorer 2.0 html//",
- "-//microsoft//dtd internet explorer 2.0 tables//",
- "-//microsoft//dtd internet explorer 3.0 html strict//",
- "-//microsoft//dtd internet explorer 3.0 html//",
- "-//microsoft//dtd internet explorer 3.0 tables//",
- "-//netscape comm. corp.//dtd html//",
- "-//netscape comm. corp.//dtd strict html//",
- "-//o'reilly and associates//dtd html 2.0//",
- "-//o'reilly and associates//dtd html extended 1.0//",
- "-//o'reilly and associates//dtd html extended relaxed 1.0//",
- "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
- "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
- "-//spyglass//dtd html 2.0 extended//",
- "-//sq//dtd html 2.0 hotmetal + extensions//",
- "-//sun microsystems corp.//dtd hotjava html//",
- "-//sun microsystems corp.//dtd hotjava strict html//",
- "-//w3c//dtd html 3 1995-03-24//",
- "-//w3c//dtd html 3.2 draft//",
- "-//w3c//dtd html 3.2 final//",
- "-//w3c//dtd html 3.2//",
- "-//w3c//dtd html 3.2s draft//",
- "-//w3c//dtd html 4.0 frameset//",
- "-//w3c//dtd html 4.0 transitional//",
- "-//w3c//dtd html experimental 19960712//",
- "-//w3c//dtd html experimental 970421//",
- "-//w3c//dtd w3 html//",
- "-//w3o//dtd w3 html 3.0//",
- "-//webtechs//dtd mozilla html 2.0//",
- "-//webtechs//dtd mozilla html//"))
+ ("+//silmaril//dtd html pro v0r11 19970101//",
+ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+ "-//as//dtd html 3.0 aswedit + extensions//",
+ "-//ietf//dtd html 2.0 level 1//",
+ "-//ietf//dtd html 2.0 level 2//",
+ "-//ietf//dtd html 2.0 strict level 1//",
+ "-//ietf//dtd html 2.0 strict level 2//",
+ "-//ietf//dtd html 2.0 strict//",
+ "-//ietf//dtd html 2.0//",
+ "-//ietf//dtd html 2.1e//",
+ "-//ietf//dtd html 3.0//",
+ "-//ietf//dtd html 3.2 final//",
+ "-//ietf//dtd html 3.2//",
+ "-//ietf//dtd html 3//",
+ "-//ietf//dtd html level 0//",
+ "-//ietf//dtd html level 1//",
+ "-//ietf//dtd html level 2//",
+ "-//ietf//dtd html level 3//",
+ "-//ietf//dtd html strict level 0//",
+ "-//ietf//dtd html strict level 1//",
+ "-//ietf//dtd html strict level 2//",
+ "-//ietf//dtd html strict level 3//",
+ "-//ietf//dtd html strict//",
+ "-//ietf//dtd html//",
+ "-//metrius//dtd metrius presentational//",
+ "-//microsoft//dtd internet explorer 2.0 html strict//",
+ "-//microsoft//dtd internet explorer 2.0 html//",
+ "-//microsoft//dtd internet explorer 2.0 tables//",
+ "-//microsoft//dtd internet explorer 3.0 html strict//",
+ "-//microsoft//dtd internet explorer 3.0 html//",
+ "-//microsoft//dtd internet explorer 3.0 tables//",
+ "-//netscape comm. corp.//dtd html//",
+ "-//netscape comm. corp.//dtd strict html//",
+ "-//o'reilly and associates//dtd html 2.0//",
+ "-//o'reilly and associates//dtd html extended 1.0//",
+ "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+ "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+ "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+ "-//spyglass//dtd html 2.0 extended//",
+ "-//sq//dtd html 2.0 hotmetal + extensions//",
+ "-//sun microsystems corp.//dtd hotjava html//",
+ "-//sun microsystems corp.//dtd hotjava strict html//",
+ "-//w3c//dtd html 3 1995-03-24//",
+ "-//w3c//dtd html 3.2 draft//",
+ "-//w3c//dtd html 3.2 final//",
+ "-//w3c//dtd html 3.2//",
+ "-//w3c//dtd html 3.2s draft//",
+ "-//w3c//dtd html 4.0 frameset//",
+ "-//w3c//dtd html 4.0 transitional//",
+ "-//w3c//dtd html experimental 19960712//",
+ "-//w3c//dtd html experimental 970421//",
+ "-//w3c//dtd w3 html//",
+ "-//w3o//dtd w3 html 3.0//",
+ "-//webtechs//dtd mozilla html 2.0//",
+ "-//webtechs//dtd mozilla html//"))
or publicId in
("-//w3o//dtd w3 html strict 3.0//en//",
"-/w3c/dtd html 4.0 transitional/en",
diff --git a/pip/_vendor/html5lib/inputstream.py b/pip/_vendor/html5lib/inputstream.py
index 0ac70bb3a..f3dfd7f3d 100644
--- a/pip/_vendor/html5lib/inputstream.py
+++ b/pip/_vendor/html5lib/inputstream.py
@@ -43,7 +43,7 @@ ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005
charsUntilRegEx = {}
-class BufferedStream:
+class BufferedStream(object):
"""Buffering for streams that do not have buffering of their own
The buffer is implemented as a list of chunks on the assumption that
@@ -63,11 +63,11 @@ class BufferedStream:
return pos
def seek(self, pos):
- assert pos < self._bufferedBytes()
+ assert pos <= self._bufferedBytes()
offset = pos
i = 0
while len(self.buffer[i]) < offset:
- offset -= pos
+ offset -= len(self.buffer[i])
i += 1
self.position = [i, offset]
@@ -114,7 +114,7 @@ class BufferedStream:
if remainingBytes:
rv.append(self._readStream(remainingBytes))
- return "".join(rv)
+ return b"".join(rv)
def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
@@ -132,7 +132,7 @@ def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
return HTMLBinaryInputStream(source, encoding, parseMeta, chardet)
-class HTMLUnicodeInputStream:
+class HTMLUnicodeInputStream(object):
"""Provides a unicode stream of characters to the HTMLTokenizer.
This class takes care of character encoding and removing or replacing
diff --git a/pip/_vendor/html5lib/serializer/htmlserializer.py b/pip/_vendor/html5lib/serializer/htmlserializer.py
index 08b60dfcc..157840a05 100644
--- a/pip/_vendor/html5lib/serializer/htmlserializer.py
+++ b/pip/_vendor/html5lib/serializer/htmlserializer.py
@@ -92,15 +92,17 @@ class HTMLSerializer(object):
resolve_entities = True
# miscellaneous options
+ alphabetical_attributes = False
inject_meta_charset = True
strip_whitespace = False
sanitize = False
options = ("quote_attr_values", "quote_char", "use_best_quote_char",
- "minimize_boolean_attributes", "use_trailing_solidus",
- "space_before_trailing_solidus", "omit_optional_tags",
- "strip_whitespace", "inject_meta_charset", "escape_lt_in_attrs",
- "escape_rcdata", "resolve_entities", "sanitize")
+ "omit_optional_tags", "minimize_boolean_attributes",
+ "use_trailing_solidus", "space_before_trailing_solidus",
+ "escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
+ "alphabetical_attributes", "inject_meta_charset",
+ "strip_whitespace", "sanitize")
def __init__(self, **kwargs):
"""Initialize HTMLSerializer.
@@ -143,6 +145,8 @@ class HTMLSerializer(object):
See `html5lib user documentation`_
omit_optional_tags=True|False
Omit start/end tags that are optional.
+ alphabetical_attributes=False|True
+ Reorder attributes to be in alphabetical order.
.. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
"""
@@ -171,10 +175,11 @@ class HTMLSerializer(object):
self.encoding = encoding
in_cdata = False
self.errors = []
+
if encoding and self.inject_meta_charset:
from ..filters.inject_meta_charset import Filter
treewalker = Filter(treewalker, encoding)
- # XXX: WhitespaceFilter should be used before OptionalTagFilter
+ # WhitespaceFilter should be used before OptionalTagFilter
# for maximum efficiently of this latter filter
if self.strip_whitespace:
from ..filters.whitespace import Filter
@@ -185,6 +190,12 @@ class HTMLSerializer(object):
if self.omit_optional_tags:
from ..filters.optionaltags import Filter
treewalker = Filter(treewalker)
+ # Alphabetical attributes must be last, as other filters
+ # could add attributes and alter the order
+ if self.alphabetical_attributes:
+ from ..filters.alphabeticalattributes import Filter
+ treewalker = Filter(treewalker)
+
for token in treewalker:
type = token["type"]
if type == "Doctype":
diff --git a/pip/_vendor/html5lib/treeadapters/__init__.py b/pip/_vendor/html5lib/treeadapters/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/pip/_vendor/html5lib/treeadapters/__init__.py
diff --git a/pip/_vendor/html5lib/treeadapters/sax.py b/pip/_vendor/html5lib/treeadapters/sax.py
new file mode 100644
index 000000000..ad47df956
--- /dev/null
+++ b/pip/_vendor/html5lib/treeadapters/sax.py
@@ -0,0 +1,44 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from xml.sax.xmlreader import AttributesNSImpl
+
+from ..constants import adjustForeignAttributes, unadjustForeignAttributes
+
+prefix_mapping = {}
+for prefix, localName, namespace in adjustForeignAttributes.values():
+ if prefix is not None:
+ prefix_mapping[prefix] = namespace
+
+
+def to_sax(walker, handler):
+ """Call SAX-like content handler based on treewalker walker"""
+ handler.startDocument()
+ for prefix, namespace in prefix_mapping.items():
+ handler.startPrefixMapping(prefix, namespace)
+
+ for token in walker:
+ type = token["type"]
+ if type == "Doctype":
+ continue
+ elif type in ("StartTag", "EmptyTag"):
+ attrs = AttributesNSImpl(token["data"],
+ unadjustForeignAttributes)
+ handler.startElementNS((token["namespace"], token["name"]),
+ token["name"],
+ attrs)
+ if type == "EmptyTag":
+ handler.endElementNS((token["namespace"], token["name"]),
+ token["name"])
+ elif type == "EndTag":
+ handler.endElementNS((token["namespace"], token["name"]),
+ token["name"])
+ elif type in ("Characters", "SpaceCharacters"):
+ handler.characters(token["data"])
+ elif type == "Comment":
+ pass
+ else:
+ assert False, "Unknown token type"
+
+ for prefix, namespace in prefix_mapping.items():
+ handler.endPrefixMapping(prefix)
+ handler.endDocument()
diff --git a/pip/_vendor/html5lib/treebuilders/dom.py b/pip/_vendor/html5lib/treebuilders/dom.py
index f9e0d76e7..61e5ed79e 100644
--- a/pip/_vendor/html5lib/treebuilders/dom.py
+++ b/pip/_vendor/html5lib/treebuilders/dom.py
@@ -1,7 +1,7 @@
from __future__ import absolute_import, division, unicode_literals
-from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE
+from xml.dom import minidom, Node
import weakref
from . import _base
@@ -220,69 +220,6 @@ def getDomBuilder(DomImplementation):
return "\n".join(rv)
- def dom2sax(node, handler, nsmap={'xml': XML_NAMESPACE}):
- if node.nodeType == Node.ELEMENT_NODE:
- if not nsmap:
- handler.startElement(node.nodeName, node.attributes)
- for child in node.childNodes:
- dom2sax(child, handler, nsmap)
- handler.endElement(node.nodeName)
- else:
- attributes = dict(node.attributes.itemsNS())
-
- # gather namespace declarations
- prefixes = []
- for attrname in list(node.attributes.keys()):
- attr = node.getAttributeNode(attrname)
- if (attr.namespaceURI == XMLNS_NAMESPACE or
- (attr.namespaceURI is None and attr.nodeName.startswith('xmlns'))):
- prefix = (attr.nodeName != 'xmlns' and attr.nodeName or None)
- handler.startPrefixMapping(prefix, attr.nodeValue)
- prefixes.append(prefix)
- nsmap = nsmap.copy()
- nsmap[prefix] = attr.nodeValue
- del attributes[(attr.namespaceURI, attr.nodeName)]
-
- # apply namespace declarations
- for attrname in list(node.attributes.keys()):
- attr = node.getAttributeNode(attrname)
- if attr.namespaceURI is None and ':' in attr.nodeName:
- prefix = attr.nodeName.split(':')[0]
- if prefix in nsmap:
- del attributes[(attr.namespaceURI, attr.nodeName)]
- attributes[(nsmap[prefix], attr.nodeName)] = attr.nodeValue
-
- # SAX events
- ns = node.namespaceURI or nsmap.get(None, None)
- handler.startElementNS((ns, node.nodeName), node.nodeName, attributes)
- for child in node.childNodes:
- dom2sax(child, handler, nsmap)
- handler.endElementNS((ns, node.nodeName), node.nodeName)
- for prefix in prefixes:
- handler.endPrefixMapping(prefix)
-
- elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]:
- handler.characters(node.nodeValue)
-
- elif node.nodeType == Node.DOCUMENT_NODE:
- handler.startDocument()
- for child in node.childNodes:
- dom2sax(child, handler, nsmap)
- handler.endDocument()
-
- elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
- for child in node.childNodes:
- dom2sax(child, handler, nsmap)
-
- else:
- # ATTRIBUTE_NODE
- # ENTITY_NODE
- # PROCESSING_INSTRUCTION_NODE
- # COMMENT_NODE
- # DOCUMENT_TYPE_NODE
- # NOTATION_NODE
- pass
-
return locals()
diff --git a/pip/_vendor/html5lib/treewalkers/_base.py b/pip/_vendor/html5lib/treewalkers/_base.py
index a20235961..dda3cd74e 100644
--- a/pip/_vendor/html5lib/treewalkers/_base.py
+++ b/pip/_vendor/html5lib/treewalkers/_base.py
@@ -1,13 +1,41 @@
from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
+from pip._vendor.six import text_type, string_types
import gettext
_ = gettext.gettext
+from xml.dom import Node
+
+DOCUMENT = Node.DOCUMENT_NODE
+DOCTYPE = Node.DOCUMENT_TYPE_NODE
+TEXT = Node.TEXT_NODE
+ELEMENT = Node.ELEMENT_NODE
+COMMENT = Node.COMMENT_NODE
+ENTITY = Node.ENTITY_NODE
+UNKNOWN = "<#UNKNOWN#>"
+
from ..constants import voidElements, spaceCharacters
spaceCharacters = "".join(spaceCharacters)
+def to_text(s, blank_if_none=True):
+ """Wrapper around six.text_type to convert None to empty string"""
+ if s is None:
+ if blank_if_none:
+ return ""
+ else:
+ return None
+ elif isinstance(s, text_type):
+ return s
+ else:
+ return text_type(s)
+
+
+def is_text_or_none(string):
+ """Wrapper around isinstance(string_types) or is None"""
+ return string is None or isinstance(string, string_types)
+
+
class TreeWalker(object):
def __init__(self, tree):
self.tree = tree
@@ -19,45 +47,47 @@ class TreeWalker(object):
return {"type": "SerializeError", "data": msg}
def emptyTag(self, namespace, name, attrs, hasChildren=False):
- assert namespace is None or isinstance(namespace, text_type), type(namespace)
- assert isinstance(name, text_type), type(name)
- assert all((namespace is None or isinstance(namespace, text_type)) and
- isinstance(name, text_type) and
- isinstance(value, text_type)
+ assert namespace is None or isinstance(namespace, string_types), type(namespace)
+ assert isinstance(name, string_types), type(name)
+ assert all((namespace is None or isinstance(namespace, string_types)) and
+ isinstance(name, string_types) and
+ isinstance(value, string_types)
for (namespace, name), value in attrs.items())
- yield {"type": "EmptyTag", "name": name,
- "namespace": namespace,
+ yield {"type": "EmptyTag", "name": to_text(name, False),
+ "namespace": to_text(namespace),
"data": attrs}
if hasChildren:
yield self.error(_("Void element has children"))
def startTag(self, namespace, name, attrs):
- assert namespace is None or isinstance(namespace, text_type), type(namespace)
- assert isinstance(name, text_type), type(name)
- assert all((namespace is None or isinstance(namespace, text_type)) and
- isinstance(name, text_type) and
- isinstance(value, text_type)
+ assert namespace is None or isinstance(namespace, string_types), type(namespace)
+ assert isinstance(name, string_types), type(name)
+ assert all((namespace is None or isinstance(namespace, string_types)) and
+ isinstance(name, string_types) and
+ isinstance(value, string_types)
for (namespace, name), value in attrs.items())
return {"type": "StartTag",
- "name": name,
- "namespace": namespace,
- "data": attrs}
+ "name": text_type(name),
+ "namespace": to_text(namespace),
+ "data": dict(((to_text(namespace, False), to_text(name)),
+ to_text(value, False))
+ for (namespace, name), value in attrs.items())}
def endTag(self, namespace, name):
- assert namespace is None or isinstance(namespace, text_type), type(namespace)
- assert isinstance(name, text_type), type(namespace)
+ assert namespace is None or isinstance(namespace, string_types), type(namespace)
+ assert isinstance(name, string_types), type(namespace)
return {"type": "EndTag",
- "name": name,
- "namespace": namespace,
+ "name": to_text(name, False),
+ "namespace": to_text(namespace),
"data": {}}
def text(self, data):
- assert isinstance(data, text_type), type(data)
+ assert isinstance(data, string_types), type(data)
- data = data
+ data = to_text(data)
middle = data.lstrip(spaceCharacters)
left = data[:len(data) - len(middle)]
if left:
@@ -71,56 +101,30 @@ class TreeWalker(object):
yield {"type": "SpaceCharacters", "data": right}
def comment(self, data):
- assert isinstance(data, text_type), type(data)
+ assert isinstance(data, string_types), type(data)
- return {"type": "Comment", "data": data}
+ return {"type": "Comment", "data": text_type(data)}
def doctype(self, name, publicId=None, systemId=None, correct=True):
- assert name is None or isinstance(name, text_type), type(name)
- assert publicId is None or isinstance(publicId, text_type), type(publicId)
- assert systemId is None or isinstance(systemId, text_type), type(systemId)
+ assert is_text_or_none(name), type(name)
+ assert is_text_or_none(publicId), type(publicId)
+ assert is_text_or_none(systemId), type(systemId)
return {"type": "Doctype",
- "name": name if name is not None else "",
- "publicId": publicId,
- "systemId": systemId,
- "correct": correct}
+ "name": to_text(name),
+ "publicId": to_text(publicId),
+ "systemId": to_text(systemId),
+ "correct": to_text(correct)}
def entity(self, name):
- assert isinstance(name, text_type), type(name)
+ assert isinstance(name, string_types), type(name)
- return {"type": "Entity", "name": name}
+ return {"type": "Entity", "name": text_type(name)}
def unknown(self, nodeType):
return self.error(_("Unknown node type: ") + nodeType)
-class RecursiveTreeWalker(TreeWalker):
- def walkChildren(self, node):
- raise NotImplementedError
-
- def element(self, node, namespace, name, attrs, hasChildren):
- if name in voidElements:
- for token in self.emptyTag(namespace, name, attrs, hasChildren):
- yield token
- else:
- yield self.startTag(name, attrs)
- if hasChildren:
- for token in self.walkChildren(node):
- yield token
- yield self.endTag(name)
-
-from xml.dom import Node
-
-DOCUMENT = Node.DOCUMENT_NODE
-DOCTYPE = Node.DOCUMENT_TYPE_NODE
-TEXT = Node.TEXT_NODE
-ELEMENT = Node.ELEMENT_NODE
-COMMENT = Node.COMMENT_NODE
-ENTITY = Node.ENTITY_NODE
-UNKNOWN = "<#UNKNOWN#>"
-
-
class NonRecursiveTreeWalker(TreeWalker):
def getNodeDetails(self, node):
raise NotImplementedError
diff --git a/pip/_vendor/html5lib/treewalkers/etree.py b/pip/_vendor/html5lib/treewalkers/etree.py
index 88fb9811a..f5615f50a 100644
--- a/pip/_vendor/html5lib/treewalkers/etree.py
+++ b/pip/_vendor/html5lib/treewalkers/etree.py
@@ -1,5 +1,12 @@
from __future__ import absolute_import, division, unicode_literals
+try:
+ from collections import OrderedDict
+except ImportError:
+ try:
+ from ordereddict import OrderedDict
+ except ImportError:
+ OrderedDict = dict
import gettext
_ = gettext.gettext
@@ -61,7 +68,7 @@ def getETreeBuilder(ElementTreeImplementation):
else:
namespace = None
tag = node.tag
- attrs = {}
+ attrs = OrderedDict()
for name, value in list(node.attrib.items()):
match = tag_regexp.match(name)
if match:
diff --git a/pip/_vendor/html5lib/utils.py b/pip/_vendor/html5lib/utils.py
index 4e8559db6..2f41f4dfa 100644
--- a/pip/_vendor/html5lib/utils.py
+++ b/pip/_vendor/html5lib/utils.py
@@ -8,6 +8,10 @@ except ImportError:
import xml.etree.ElementTree as default_etree
+__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
+ "surrogatePairToCodepoint", "moduleFactoryFactory"]
+
+
class MethodDispatcher(dict):
"""Dict with 2 special properties:
diff --git a/pip/_vendor/vendor.txt b/pip/_vendor/vendor.txt
index 8385545e5..ba4f7885d 100644
--- a/pip/_vendor/vendor.txt
+++ b/pip/_vendor/vendor.txt
@@ -1,5 +1,5 @@
distlib==0.1.8
-html5lib==1.0b1
+html5lib==1.0b3
six==1.5.2
colorama==0.2.7
requests==2.2.1