Upgrade html5lib to 1.0b3

author: Donald Stufft <donald@stufft.io> 2014-05-02 19:56:32 -0400
committer: Donald Stufft <donald@stufft.io> 2014-05-02 21:01:00 -0400
commit: c3aa72ec176af16e12d21636c8ca92ab9ab3e722 (patch)
tree: 7acf69339d7ea19b1e4ff21c6d3a07ee4ae5ae57
parent: b3ac7b22401e8f83cd09deaffe2fd10a9f3cf0c2 (diff)
download: pip-c3aa72ec176af16e12d21636c8ca92ab9ab3e722.tar.gz
14 files changed, 234 insertions, 241 deletions
diff --git a/pip/_vendor/html5lib/LICENSE b/pip/_vendor/html5lib/LICENSE
deleted file mode 100644
index c87fa7a00..000000000
--- a/pip/_vendor/html5lib/LICENSE
+++ /dev/null
@@ -1,20 +0,0 @@
-Copyright (c) 2006-2013 James Graham and other contributors
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/pip/_vendor/html5lib/__init__.py b/pip/_vendor/html5lib/__init__.py
index 10e2b74c2..ff5c77551 100644
--- a/pip/_vendor/html5lib/__init__.py
+++ b/pip/_vendor/html5lib/__init__.py
@@ -20,4 +20,4 @@ from .serializer import serialize
 
 __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
            "getTreeWalker", "serialize"]
-__version__ = "1.0b1"
+__version__ = "1.0b3"
diff --git a/pip/_vendor/html5lib/constants.py b/pip/_vendor/html5lib/constants.py
index 1866dd78e..e7089846d 100644
--- a/pip/_vendor/html5lib/constants.py
+++ b/pip/_vendor/html5lib/constants.py
@@ -433,6 +433,24 @@ mathmlTextIntegrationPointElements = frozenset((
     (namespaces["mathml"], "mtext")
 ))
 
+adjustForeignAttributes = {
+    "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
+    "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]),
+    "xlink:href": ("xlink", "href", namespaces["xlink"]),
+    "xlink:role": ("xlink", "role", namespaces["xlink"]),
+    "xlink:show": ("xlink", "show", namespaces["xlink"]),
+    "xlink:title": ("xlink", "title", namespaces["xlink"]),
+    "xlink:type": ("xlink", "type", namespaces["xlink"]),
+    "xml:base": ("xml", "base", namespaces["xml"]),
+    "xml:lang": ("xml", "lang", namespaces["xml"]),
+    "xml:space": ("xml", "space", namespaces["xml"]),
+    "xmlns": (None, "xmlns", namespaces["xmlns"]),
+    "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
+}
+
+unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
+                                  adjustForeignAttributes.items()])
+
 spaceCharacters = frozenset((
     "\t",
     "\n",
diff --git a/pip/_vendor/html5lib/filters/lint.py b/pip/_vendor/html5lib/filters/lint.py
index 83ad63971..7cc99a4ba 100644
--- a/pip/_vendor/html5lib/filters/lint.py
+++ b/pip/_vendor/html5lib/filters/lint.py
@@ -23,24 +23,24 @@ class Filter(_base.Filter):
             if type in ("StartTag", "EmptyTag"):
                 name = token["name"]
                 if contentModelFlag != "PCDATA":
-                    raise LintError(_("StartTag not in PCDATA content model flag: %s") % name)
+                    raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name})
                 if not isinstance(name, str):
-                    raise LintError(_("Tag name is not a string: %r") % name)
+                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
                 if not name:
                     raise LintError(_("Empty tag name"))
                 if type == "StartTag" and name in voidElements:
-                    raise LintError(_("Void element reported as StartTag token: %s") % name)
+                    raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name})
                 elif type == "EmptyTag" and name not in voidElements:
-                    raise LintError(_("Non-void element reported as EmptyTag token: %s") % token["name"])
+                    raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]})
                 if type == "StartTag":
                     open_elements.append(name)
                 for name, value in token["data"]:
                     if not isinstance(name, str):
-                        raise LintError(_("Attribute name is not a string: %r") % name)
+                        raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name})
                     if not name:
                         raise LintError(_("Empty attribute name"))
                     if not isinstance(value, str):
-                        raise LintError(_("Attribute value is not a string: %r") % value)
+                        raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value})
                 if name in cdataElements:
                     contentModelFlag = "CDATA"
                 elif name in rcdataElements:
@@ -51,14 +51,14 @@ class Filter(_base.Filter):
             elif type == "EndTag":
                 name = token["name"]
                 if not isinstance(name, str):
-                    raise LintError(_("Tag name is not a string: %r") % name)
+                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
                 if not name:
                     raise LintError(_("Empty tag name"))
                 if name in voidElements:
-                    raise LintError(_("Void element reported as EndTag token: %s") % name)
+                    raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name})
                 start_name = open_elements.pop()
                 if start_name != name:
-                    raise LintError(_("EndTag (%s) does not match StartTag (%s)") % (name, start_name))
+                    raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name})
                 contentModelFlag = "PCDATA"
 
             elif type == "Comment":
@@ -68,26 +68,26 @@ class Filter(_base.Filter):
             elif type in ("Characters", "SpaceCharacters"):
                 data = token["data"]
                 if not isinstance(data, str):
-                    raise LintError(_("Attribute name is not a string: %r") % data)
+                    raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data})
                 if not data:
-                    raise LintError(_("%s token with empty data") % type)
+                    raise LintError(_("%(type)s token with empty data") % {"type": type})
                 if type == "SpaceCharacters":
                     data = data.strip(spaceCharacters)
                     if data:
-                        raise LintError(_("Non-space character(s) found in SpaceCharacters token: ") % data)
+                        raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data})
 
             elif type == "Doctype":
                 name = token["name"]
                 if contentModelFlag != "PCDATA":
-                    raise LintError(_("Doctype not in PCDATA content model flag: %s") % name)
+                    raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name})
                 if not isinstance(name, str):
-                    raise LintError(_("Tag name is not a string: %r") % name)
+                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
                 # XXX: what to do with token["data"] ?
 
             elif type in ("ParseError", "SerializeError"):
                 pass
 
             else:
-                raise LintError(_("Unknown token type: %s") % type)
+                raise LintError(_("Unknown token type: %(type)s") % {"type": type})
 
             yield token
diff --git a/pip/_vendor/html5lib/html5parser.py b/pip/_vendor/html5lib/html5parser.py
index 8a5acfeb0..b28f46f2a 100644
--- a/pip/_vendor/html5lib/html5parser.py
+++ b/pip/_vendor/html5lib/html5parser.py
@@ -17,6 +17,7 @@ from .constants import headingElements
 from .constants import cdataElements, rcdataElements
 from .constants import tokenTypes, ReparseException, namespaces
 from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
+from .constants import adjustForeignAttributes as adjustForeignAttributesMap
 
 
 def parse(doc, treebuilder="etree", encoding=None,
@@ -168,7 +169,7 @@ class HTMLParser(object):
                         (self.isMathMLTextIntegrationPoint(currentNode) and
                          ((type == StartTagToken and
                            token["name"] not in frozenset(["mglyph", "malignmark"])) or
-                         type in (CharactersToken, SpaceCharactersToken))) or
+                          type in (CharactersToken, SpaceCharactersToken))) or
                         (currentNodeNamespace == namespaces["mathml"] and
                          currentNodeName == "annotation-xml" and
                          token["name"] == "svg") or
@@ -333,20 +334,7 @@ class HTMLParser(object):
                 del token["data"][originalName]
 
     def adjustForeignAttributes(self, token):
-        replacements = {
-            "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
-            "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]),
-            "xlink:href": ("xlink", "href", namespaces["xlink"]),
-            "xlink:role": ("xlink", "role", namespaces["xlink"]),
-            "xlink:show": ("xlink", "show", namespaces["xlink"]),
-            "xlink:title": ("xlink", "title", namespaces["xlink"]),
-            "xlink:type": ("xlink", "type", namespaces["xlink"]),
-            "xml:base": ("xml", "base", namespaces["xml"]),
-            "xml:lang": ("xml", "lang", namespaces["xml"]),
-            "xml:space": ("xml", "space", namespaces["xml"]),
-            "xmlns": (None, "xmlns", namespaces["xmlns"]),
-            "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
-        }
+        replacements = adjustForeignAttributesMap
 
         for originalName in token["data"].keys():
             if originalName in replacements:
@@ -519,61 +507,61 @@ def getPhases(debug):
 
             if (not correct or token["name"] != "html"
                 or publicId.startswith(
-                ("+//silmaril//dtd html pro v0r11 19970101//",
-                 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
-                 "-//as//dtd html 3.0 aswedit + extensions//",
-                 "-//ietf//dtd html 2.0 level 1//",
-                 "-//ietf//dtd html 2.0 level 2//",
-                 "-//ietf//dtd html 2.0 strict level 1//",
-                 "-//ietf//dtd html 2.0 strict level 2//",
-                 "-//ietf//dtd html 2.0 strict//",
-                 "-//ietf//dtd html 2.0//",
-                 "-//ietf//dtd html 2.1e//",
-                 "-//ietf//dtd html 3.0//",
-                 "-//ietf//dtd html 3.2 final//",
-                 "-//ietf//dtd html 3.2//",
-                 "-//ietf//dtd html 3//",
-                 "-//ietf//dtd html level 0//",
-                 "-//ietf//dtd html level 1//",
-                 "-//ietf//dtd html level 2//",
-                 "-//ietf//dtd html level 3//",
-                 "-//ietf//dtd html strict level 0//",
-                 "-//ietf//dtd html strict level 1//",
-                 "-//ietf//dtd html strict level 2//",
-                 "-//ietf//dtd html strict level 3//",
-                 "-//ietf//dtd html strict//",
-                 "-//ietf//dtd html//",
-                 "-//metrius//dtd metrius presentational//",
-                 "-//microsoft//dtd internet explorer 2.0 html strict//",
-                 "-//microsoft//dtd internet explorer 2.0 html//",
-                 "-//microsoft//dtd internet explorer 2.0 tables//",
-                 "-//microsoft//dtd internet explorer 3.0 html strict//",
-                 "-//microsoft//dtd internet explorer 3.0 html//",
-                 "-//microsoft//dtd internet explorer 3.0 tables//",
-                 "-//netscape comm. corp.//dtd html//",
-                 "-//netscape comm. corp.//dtd strict html//",
-                 "-//o'reilly and associates//dtd html 2.0//",
-                 "-//o'reilly and associates//dtd html extended 1.0//",
-                 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
-                 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
-                 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
-                 "-//spyglass//dtd html 2.0 extended//",
-                 "-//sq//dtd html 2.0 hotmetal + extensions//",
-                 "-//sun microsystems corp.//dtd hotjava html//",
-                 "-//sun microsystems corp.//dtd hotjava strict html//",
-                 "-//w3c//dtd html 3 1995-03-24//",
-                 "-//w3c//dtd html 3.2 draft//",
-                 "-//w3c//dtd html 3.2 final//",
-                 "-//w3c//dtd html 3.2//",
-                 "-//w3c//dtd html 3.2s draft//",
-                 "-//w3c//dtd html 4.0 frameset//",
-                 "-//w3c//dtd html 4.0 transitional//",
-                 "-//w3c//dtd html experimental 19960712//",
-                 "-//w3c//dtd html experimental 970421//",
-                 "-//w3c//dtd w3 html//",
-                 "-//w3o//dtd w3 html 3.0//",
-                 "-//webtechs//dtd mozilla html 2.0//",
-                 "-//webtechs//dtd mozilla html//"))
+                    ("+//silmaril//dtd html pro v0r11 19970101//",
+                     "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+                     "-//as//dtd html 3.0 aswedit + extensions//",
+                     "-//ietf//dtd html 2.0 level 1//",
+                     "-//ietf//dtd html 2.0 level 2//",
+                     "-//ietf//dtd html 2.0 strict level 1//",
+                     "-//ietf//dtd html 2.0 strict level 2//",
+                     "-//ietf//dtd html 2.0 strict//",
+                     "-//ietf//dtd html 2.0//",
+                     "-//ietf//dtd html 2.1e//",
+                     "-//ietf//dtd html 3.0//",
+                     "-//ietf//dtd html 3.2 final//",
+                     "-//ietf//dtd html 3.2//",
+                     "-//ietf//dtd html 3//",
+                     "-//ietf//dtd html level 0//",
+                     "-//ietf//dtd html level 1//",
+                     "-//ietf//dtd html level 2//",
+                     "-//ietf//dtd html level 3//",
+                     "-//ietf//dtd html strict level 0//",
+                     "-//ietf//dtd html strict level 1//",
+                     "-//ietf//dtd html strict level 2//",
+                     "-//ietf//dtd html strict level 3//",
+                     "-//ietf//dtd html strict//",
+                     "-//ietf//dtd html//",
+                     "-//metrius//dtd metrius presentational//",
+                     "-//microsoft//dtd internet explorer 2.0 html strict//",
+                     "-//microsoft//dtd internet explorer 2.0 html//",
+                     "-//microsoft//dtd internet explorer 2.0 tables//",
+                     "-//microsoft//dtd internet explorer 3.0 html strict//",
+                     "-//microsoft//dtd internet explorer 3.0 html//",
+                     "-//microsoft//dtd internet explorer 3.0 tables//",
+                     "-//netscape comm. corp.//dtd html//",
+                     "-//netscape comm. corp.//dtd strict html//",
+                     "-//o'reilly and associates//dtd html 2.0//",
+                     "-//o'reilly and associates//dtd html extended 1.0//",
+                     "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+                     "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+                     "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+                     "-//spyglass//dtd html 2.0 extended//",
+                     "-//sq//dtd html 2.0 hotmetal + extensions//",
+                     "-//sun microsystems corp.//dtd hotjava html//",
+                     "-//sun microsystems corp.//dtd hotjava strict html//",
+                     "-//w3c//dtd html 3 1995-03-24//",
+                     "-//w3c//dtd html 3.2 draft//",
+                     "-//w3c//dtd html 3.2 final//",
+                     "-//w3c//dtd html 3.2//",
+                     "-//w3c//dtd html 3.2s draft//",
+                     "-//w3c//dtd html 4.0 frameset//",
+                     "-//w3c//dtd html 4.0 transitional//",
+                     "-//w3c//dtd html experimental 19960712//",
+                     "-//w3c//dtd html experimental 970421//",
+                     "-//w3c//dtd w3 html//",
+                     "-//w3o//dtd w3 html 3.0//",
+                     "-//webtechs//dtd mozilla html 2.0//",
+                     "-//webtechs//dtd mozilla html//"))
                 or publicId in
                     ("-//w3o//dtd w3 html strict 3.0//en//",
                      "-/w3c/dtd html 4.0 transitional/en",
diff --git a/pip/_vendor/html5lib/inputstream.py b/pip/_vendor/html5lib/inputstream.py
index 0ac70bb3a..f3dfd7f3d 100644
--- a/pip/_vendor/html5lib/inputstream.py
+++ b/pip/_vendor/html5lib/inputstream.py
@@ -43,7 +43,7 @@ ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005
 charsUntilRegEx = {}
 
 
-class BufferedStream:
+class BufferedStream(object):
     """Buffering for streams that do not have buffering of their own
 
     The buffer is implemented as a list of chunks on the assumption that
@@ -63,11 +63,11 @@ class BufferedStream:
         return pos
 
     def seek(self, pos):
-        assert pos < self._bufferedBytes()
+        assert pos <= self._bufferedBytes()
         offset = pos
         i = 0
         while len(self.buffer[i]) < offset:
-            offset -= pos
+            offset -= len(self.buffer[i])
             i += 1
         self.position = [i, offset]
 
@@ -114,7 +114,7 @@ class BufferedStream:
         if remainingBytes:
             rv.append(self._readStream(remainingBytes))
 
-        return "".join(rv)
+        return b"".join(rv)
 
 
 def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
@@ -132,7 +132,7 @@ def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
         return HTMLBinaryInputStream(source, encoding, parseMeta, chardet)
 
 
-class HTMLUnicodeInputStream:
+class HTMLUnicodeInputStream(object):
     """Provides a unicode stream of characters to the HTMLTokenizer.
 
     This class takes care of character encoding and removing or replacing
diff --git a/pip/_vendor/html5lib/serializer/htmlserializer.py b/pip/_vendor/html5lib/serializer/htmlserializer.py
index 08b60dfcc..157840a05 100644
--- a/pip/_vendor/html5lib/serializer/htmlserializer.py
+++ b/pip/_vendor/html5lib/serializer/htmlserializer.py
@@ -92,15 +92,17 @@ class HTMLSerializer(object):
     resolve_entities = True
 
     # miscellaneous options
+    alphabetical_attributes = False
     inject_meta_charset = True
     strip_whitespace = False
     sanitize = False
 
     options = ("quote_attr_values", "quote_char", "use_best_quote_char",
-               "minimize_boolean_attributes", "use_trailing_solidus",
-               "space_before_trailing_solidus", "omit_optional_tags",
-               "strip_whitespace", "inject_meta_charset", "escape_lt_in_attrs",
-               "escape_rcdata", "resolve_entities", "sanitize")
+               "omit_optional_tags", "minimize_boolean_attributes",
+               "use_trailing_solidus", "space_before_trailing_solidus",
+               "escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
+               "alphabetical_attributes", "inject_meta_charset",
+               "strip_whitespace", "sanitize")
 
     def __init__(self, **kwargs):
         """Initialize HTMLSerializer.
@@ -143,6 +145,8 @@ class HTMLSerializer(object):
           See `html5lib user documentation`_
         omit_optional_tags=True|False
           Omit start/end tags that are optional.
+        alphabetical_attributes=False|True
+          Reorder attributes to be in alphabetical order.
 
         .. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
         """
@@ -171,10 +175,11 @@ class HTMLSerializer(object):
         self.encoding = encoding
         in_cdata = False
         self.errors = []
+
         if encoding and self.inject_meta_charset:
             from ..filters.inject_meta_charset import Filter
             treewalker = Filter(treewalker, encoding)
-        # XXX: WhitespaceFilter should be used before OptionalTagFilter
+        # WhitespaceFilter should be used before OptionalTagFilter
         # for maximum efficiently of this latter filter
         if self.strip_whitespace:
             from ..filters.whitespace import Filter
@@ -185,6 +190,12 @@ class HTMLSerializer(object):
         if self.omit_optional_tags:
             from ..filters.optionaltags import Filter
             treewalker = Filter(treewalker)
+        # Alphabetical attributes must be last, as other filters
+        # could add attributes and alter the order
+        if self.alphabetical_attributes:
+            from ..filters.alphabeticalattributes import Filter
+            treewalker = Filter(treewalker)
+
         for token in treewalker:
             type = token["type"]
             if type == "Doctype":
diff --git a/pip/_vendor/html5lib/treeadapters/__init__.py b/pip/_vendor/html5lib/treeadapters/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/pip/_vendor/html5lib/treeadapters/__init__.py
diff --git a/pip/_vendor/html5lib/treeadapters/sax.py b/pip/_vendor/html5lib/treeadapters/sax.py
new file mode 100644
index 000000000..ad47df956
--- /dev/null
+++ b/pip/_vendor/html5lib/treeadapters/sax.py
@@ -0,0 +1,44 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from xml.sax.xmlreader import AttributesNSImpl
+
+from ..constants import adjustForeignAttributes, unadjustForeignAttributes
+
+prefix_mapping = {}
+for prefix, localName, namespace in adjustForeignAttributes.values():
+    if prefix is not None:
+        prefix_mapping[prefix] = namespace
+
+
+def to_sax(walker, handler):
+    """Call SAX-like content handler based on treewalker walker"""
+    handler.startDocument()
+    for prefix, namespace in prefix_mapping.items():
+        handler.startPrefixMapping(prefix, namespace)
+
+    for token in walker:
+        type = token["type"]
+        if type == "Doctype":
+            continue
+        elif type in ("StartTag", "EmptyTag"):
+            attrs = AttributesNSImpl(token["data"],
+                                     unadjustForeignAttributes)
+            handler.startElementNS((token["namespace"], token["name"]),
+                                   token["name"],
+                                   attrs)
+            if type == "EmptyTag":
+                handler.endElementNS((token["namespace"], token["name"]),
+                                     token["name"])
+        elif type == "EndTag":
+            handler.endElementNS((token["namespace"], token["name"]),
+                                 token["name"])
+        elif type in ("Characters", "SpaceCharacters"):
+            handler.characters(token["data"])
+        elif type == "Comment":
+            pass
+        else:
+            assert False, "Unknown token type"
+
+    for prefix, namespace in prefix_mapping.items():
+        handler.endPrefixMapping(prefix)
+    handler.endDocument()
diff --git a/pip/_vendor/html5lib/treebuilders/dom.py b/pip/_vendor/html5lib/treebuilders/dom.py
index f9e0d76e7..61e5ed79e 100644
--- a/pip/_vendor/html5lib/treebuilders/dom.py
+++ b/pip/_vendor/html5lib/treebuilders/dom.py
@@ -1,7 +1,7 @@
 from __future__ import absolute_import, division, unicode_literals
 
 
-from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE
+from xml.dom import minidom, Node
 import weakref
 
 from . import _base
@@ -220,69 +220,6 @@ def getDomBuilder(DomImplementation):
 
         return "\n".join(rv)
 
-    def dom2sax(node, handler, nsmap={'xml': XML_NAMESPACE}):
-        if node.nodeType == Node.ELEMENT_NODE:
-            if not nsmap:
-                handler.startElement(node.nodeName, node.attributes)
-                for child in node.childNodes:
-                    dom2sax(child, handler, nsmap)
-                handler.endElement(node.nodeName)
-            else:
-                attributes = dict(node.attributes.itemsNS())
-
-                # gather namespace declarations
-                prefixes = []
-                for attrname in list(node.attributes.keys()):
-                    attr = node.getAttributeNode(attrname)
-                    if (attr.namespaceURI == XMLNS_NAMESPACE or
-                       (attr.namespaceURI is None and attr.nodeName.startswith('xmlns'))):
-                        prefix = (attr.nodeName != 'xmlns' and attr.nodeName or None)
-                        handler.startPrefixMapping(prefix, attr.nodeValue)
-                        prefixes.append(prefix)
-                        nsmap = nsmap.copy()
-                        nsmap[prefix] = attr.nodeValue
-                        del attributes[(attr.namespaceURI, attr.nodeName)]
-
-                # apply namespace declarations
-                for attrname in list(node.attributes.keys()):
-                    attr = node.getAttributeNode(attrname)
-                    if attr.namespaceURI is None and ':' in attr.nodeName:
-                        prefix = attr.nodeName.split(':')[0]
-                        if prefix in nsmap:
-                            del attributes[(attr.namespaceURI, attr.nodeName)]
-                            attributes[(nsmap[prefix], attr.nodeName)] = attr.nodeValue
-
-                # SAX events
-                ns = node.namespaceURI or nsmap.get(None, None)
-                handler.startElementNS((ns, node.nodeName), node.nodeName, attributes)
-                for child in node.childNodes:
-                    dom2sax(child, handler, nsmap)
-                handler.endElementNS((ns, node.nodeName), node.nodeName)
-                for prefix in prefixes:
-                    handler.endPrefixMapping(prefix)
-
-        elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]:
-            handler.characters(node.nodeValue)
-
-        elif node.nodeType == Node.DOCUMENT_NODE:
-            handler.startDocument()
-            for child in node.childNodes:
-                dom2sax(child, handler, nsmap)
-            handler.endDocument()
-
-        elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
-            for child in node.childNodes:
-                dom2sax(child, handler, nsmap)
-
-        else:
-            # ATTRIBUTE_NODE
-            # ENTITY_NODE
-            # PROCESSING_INSTRUCTION_NODE
-            # COMMENT_NODE
-            # DOCUMENT_TYPE_NODE
-            # NOTATION_NODE
-            pass
-
     return locals()
 
 
diff --git a/pip/_vendor/html5lib/treewalkers/_base.py b/pip/_vendor/html5lib/treewalkers/_base.py
index a20235961..dda3cd74e 100644
--- a/pip/_vendor/html5lib/treewalkers/_base.py
+++ b/pip/_vendor/html5lib/treewalkers/_base.py
@@ -1,13 +1,41 @@
 from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
+from pip._vendor.six import text_type, string_types
 
 import gettext
 _ = gettext.gettext
 
+from xml.dom import Node
+
+DOCUMENT = Node.DOCUMENT_NODE
+DOCTYPE = Node.DOCUMENT_TYPE_NODE
+TEXT = Node.TEXT_NODE
+ELEMENT = Node.ELEMENT_NODE
+COMMENT = Node.COMMENT_NODE
+ENTITY = Node.ENTITY_NODE
+UNKNOWN = "<#UNKNOWN#>"
+
 from ..constants import voidElements, spaceCharacters
 spaceCharacters = "".join(spaceCharacters)
 
 
+def to_text(s, blank_if_none=True):
+    """Wrapper around six.text_type to convert None to empty string"""
+    if s is None:
+        if blank_if_none:
+            return ""
+        else:
+            return None
+    elif isinstance(s, text_type):
+        return s
+    else:
+        return text_type(s)
+
+
+def is_text_or_none(string):
+    """Wrapper around isinstance(string_types) or is None"""
+    return string is None or isinstance(string, string_types)
+
+
 class TreeWalker(object):
     def __init__(self, tree):
         self.tree = tree
@@ -19,45 +47,47 @@ class TreeWalker(object):
         return {"type": "SerializeError", "data": msg}
 
     def emptyTag(self, namespace, name, attrs, hasChildren=False):
-        assert namespace is None or isinstance(namespace, text_type), type(namespace)
-        assert isinstance(name, text_type), type(name)
-        assert all((namespace is None or isinstance(namespace, text_type)) and
-                   isinstance(name, text_type) and
-                   isinstance(value, text_type)
+        assert namespace is None or isinstance(namespace, string_types), type(namespace)
+        assert isinstance(name, string_types), type(name)
+        assert all((namespace is None or isinstance(namespace, string_types)) and
+                   isinstance(name, string_types) and
+                   isinstance(value, string_types)
                    for (namespace, name), value in attrs.items())
 
-        yield {"type": "EmptyTag", "name": name,
-               "namespace": namespace,
+        yield {"type": "EmptyTag", "name": to_text(name, False),
+               "namespace": to_text(namespace),
                "data": attrs}
         if hasChildren:
             yield self.error(_("Void element has children"))
 
     def startTag(self, namespace, name, attrs):
-        assert namespace is None or isinstance(namespace, text_type), type(namespace)
-        assert isinstance(name, text_type), type(name)
-        assert all((namespace is None or isinstance(namespace, text_type)) and
-                   isinstance(name, text_type) and
-                   isinstance(value, text_type)
+        assert namespace is None or isinstance(namespace, string_types), type(namespace)
+        assert isinstance(name, string_types), type(name)
+        assert all((namespace is None or isinstance(namespace, string_types)) and
+                   isinstance(name, string_types) and
+                   isinstance(value, string_types)
                    for (namespace, name), value in attrs.items())
 
         return {"type": "StartTag",
-                "name": name,
-                "namespace": namespace,
-                "data": attrs}
+                "name": text_type(name),
+                "namespace": to_text(namespace),
+                "data": dict(((to_text(namespace, False), to_text(name)),
+                              to_text(value, False))
+                             for (namespace, name), value in attrs.items())}
 
     def endTag(self, namespace, name):
-        assert namespace is None or isinstance(namespace, text_type), type(namespace)
-        assert isinstance(name, text_type), type(namespace)
+        assert namespace is None or isinstance(namespace, string_types), type(namespace)
+        assert isinstance(name, string_types), type(namespace)
 
         return {"type": "EndTag",
-                "name": name,
-                "namespace": namespace,
+                "name": to_text(name, False),
+                "namespace": to_text(namespace),
                 "data": {}}
 
     def text(self, data):
-        assert isinstance(data, text_type), type(data)
+        assert isinstance(data, string_types), type(data)
 
-        data = data
+        data = to_text(data)
         middle = data.lstrip(spaceCharacters)
         left = data[:len(data) - len(middle)]
         if left:
@@ -71,56 +101,30 @@ class TreeWalker(object):
             yield {"type": "SpaceCharacters", "data": right}
 
     def comment(self, data):
-        assert isinstance(data, text_type), type(data)
+        assert isinstance(data, string_types), type(data)
 
-        return {"type": "Comment", "data": data}
+        return {"type": "Comment", "data": text_type(data)}
 
     def doctype(self, name, publicId=None, systemId=None, correct=True):
-        assert name is None or isinstance(name, text_type), type(name)
-        assert publicId is None or isinstance(publicId, text_type), type(publicId)
-        assert systemId is None or isinstance(systemId, text_type), type(systemId)
+        assert is_text_or_none(name), type(name)
+        assert is_text_or_none(publicId), type(publicId)
+        assert is_text_or_none(systemId), type(systemId)
 
         return {"type": "Doctype",
-                "name": name if name is not None else "",
-                "publicId": publicId,
-                "systemId": systemId,
-                "correct": correct}
+                "name": to_text(name),
+                "publicId": to_text(publicId),
+                "systemId": to_text(systemId),
+                "correct": to_text(correct)}
 
     def entity(self, name):
-        assert isinstance(name, text_type), type(name)
+        assert isinstance(name, string_types), type(name)
 
-        return {"type": "Entity", "name": name}
+        return {"type": "Entity", "name": text_type(name)}
 
     def unknown(self, nodeType):
         return self.error(_("Unknown node type: ") + nodeType)
 
 
-class RecursiveTreeWalker(TreeWalker):
-    def walkChildren(self, node):
-        raise NotImplementedError
-
-    def element(self, node, namespace, name, attrs, hasChildren):
-        if name in voidElements:
-            for token in self.emptyTag(namespace, name, attrs, hasChildren):
-                yield token
-        else:
-            yield self.startTag(name, attrs)
-            if hasChildren:
-                for token in self.walkChildren(node):
-                    yield token
-            yield self.endTag(name)
-
-from xml.dom import Node
-
-DOCUMENT = Node.DOCUMENT_NODE
-DOCTYPE = Node.DOCUMENT_TYPE_NODE
-TEXT = Node.TEXT_NODE
-ELEMENT = Node.ELEMENT_NODE
-COMMENT = Node.COMMENT_NODE
-ENTITY = Node.ENTITY_NODE
-UNKNOWN = "<#UNKNOWN#>"
-
-
 class NonRecursiveTreeWalker(TreeWalker):
     def getNodeDetails(self, node):
         raise NotImplementedError
diff --git a/pip/_vendor/html5lib/treewalkers/etree.py b/pip/_vendor/html5lib/treewalkers/etree.py
index 88fb9811a..f5615f50a 100644
--- a/pip/_vendor/html5lib/treewalkers/etree.py
+++ b/pip/_vendor/html5lib/treewalkers/etree.py
@@ -1,5 +1,12 @@
 from __future__ import absolute_import, division, unicode_literals
 
+try:
+    from collections import OrderedDict
+except ImportError:
+    try:
+        from ordereddict import OrderedDict
+    except ImportError:
+        OrderedDict = dict
 import gettext
 _ = gettext.gettext
 
@@ -61,7 +68,7 @@ def getETreeBuilder(ElementTreeImplementation):
                 else:
                     namespace = None
                     tag = node.tag
-                attrs = {}
+                attrs = OrderedDict()
                 for name, value in list(node.attrib.items()):
                     match = tag_regexp.match(name)
                     if match:
diff --git a/pip/_vendor/html5lib/utils.py b/pip/_vendor/html5lib/utils.py
index 4e8559db6..2f41f4dfa 100644
--- a/pip/_vendor/html5lib/utils.py
+++ b/pip/_vendor/html5lib/utils.py
@@ -8,6 +8,10 @@ except ImportError:
     import xml.etree.ElementTree as default_etree
 
 
+__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
+           "surrogatePairToCodepoint", "moduleFactoryFactory"]
+
+
 class MethodDispatcher(dict):
     """Dict with 2 special properties:
 
diff --git a/pip/_vendor/vendor.txt b/pip/_vendor/vendor.txt
index 8385545e5..ba4f7885d 100644
--- a/pip/_vendor/vendor.txt
+++ b/pip/_vendor/vendor.txt
@@ -1,5 +1,5 @@
 distlib==0.1.8
-html5lib==1.0b1
+html5lib==1.0b3
 six==1.5.2
 colorama==0.2.7
 requests==2.2.1
author	Donald Stufft <donald@stufft.io>	2014-05-02 19:56:32 -0400
committer	Donald Stufft <donald@stufft.io>	2014-05-02 21:01:00 -0400
commit	c3aa72ec176af16e12d21636c8ca92ab9ab3e722 (patch)
tree	7acf69339d7ea19b1e4ff21c6d3a07ee4ae5ae57
parent	b3ac7b22401e8f83cd09deaffe2fd10a9f3cf0c2 (diff)
download	pip-c3aa72ec176af16e12d21636c8ca92ab9ab3e722.tar.gz