summaryrefslogtreecommitdiff
path: root/pip/_vendor/html5lib/treewalkers/_base.py
diff options
context:
space:
mode:
Diffstat (limited to 'pip/_vendor/html5lib/treewalkers/_base.py')
-rw-r--r--pip/_vendor/html5lib/treewalkers/_base.py122
1 files changed, 63 insertions, 59 deletions
diff --git a/pip/_vendor/html5lib/treewalkers/_base.py b/pip/_vendor/html5lib/treewalkers/_base.py
index a20235961..dda3cd74e 100644
--- a/pip/_vendor/html5lib/treewalkers/_base.py
+++ b/pip/_vendor/html5lib/treewalkers/_base.py
@@ -1,13 +1,41 @@
from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
+from pip._vendor.six import text_type, string_types
import gettext
_ = gettext.gettext
+from xml.dom import Node
+
+DOCUMENT = Node.DOCUMENT_NODE
+DOCTYPE = Node.DOCUMENT_TYPE_NODE
+TEXT = Node.TEXT_NODE
+ELEMENT = Node.ELEMENT_NODE
+COMMENT = Node.COMMENT_NODE
+ENTITY = Node.ENTITY_NODE
+UNKNOWN = "<#UNKNOWN#>"
+
from ..constants import voidElements, spaceCharacters
spaceCharacters = "".join(spaceCharacters)
+def to_text(s, blank_if_none=True):
+ """Wrapper around six.text_type to convert None to empty string"""
+ if s is None:
+ if blank_if_none:
+ return ""
+ else:
+ return None
+ elif isinstance(s, text_type):
+ return s
+ else:
+ return text_type(s)
+
+
+def is_text_or_none(string):
+ """Wrapper around isinstance(string_types) or is None"""
+ return string is None or isinstance(string, string_types)
+
+
class TreeWalker(object):
def __init__(self, tree):
self.tree = tree
@@ -19,45 +47,47 @@ class TreeWalker(object):
return {"type": "SerializeError", "data": msg}
def emptyTag(self, namespace, name, attrs, hasChildren=False):
- assert namespace is None or isinstance(namespace, text_type), type(namespace)
- assert isinstance(name, text_type), type(name)
- assert all((namespace is None or isinstance(namespace, text_type)) and
- isinstance(name, text_type) and
- isinstance(value, text_type)
+ assert namespace is None or isinstance(namespace, string_types), type(namespace)
+ assert isinstance(name, string_types), type(name)
+ assert all((namespace is None or isinstance(namespace, string_types)) and
+ isinstance(name, string_types) and
+ isinstance(value, string_types)
for (namespace, name), value in attrs.items())
- yield {"type": "EmptyTag", "name": name,
- "namespace": namespace,
+ yield {"type": "EmptyTag", "name": to_text(name, False),
+ "namespace": to_text(namespace),
"data": attrs}
if hasChildren:
yield self.error(_("Void element has children"))
def startTag(self, namespace, name, attrs):
- assert namespace is None or isinstance(namespace, text_type), type(namespace)
- assert isinstance(name, text_type), type(name)
- assert all((namespace is None or isinstance(namespace, text_type)) and
- isinstance(name, text_type) and
- isinstance(value, text_type)
+ assert namespace is None or isinstance(namespace, string_types), type(namespace)
+ assert isinstance(name, string_types), type(name)
+ assert all((namespace is None or isinstance(namespace, string_types)) and
+ isinstance(name, string_types) and
+ isinstance(value, string_types)
for (namespace, name), value in attrs.items())
return {"type": "StartTag",
- "name": name,
- "namespace": namespace,
- "data": attrs}
+ "name": text_type(name),
+ "namespace": to_text(namespace),
+ "data": dict(((to_text(namespace, False), to_text(name)),
+ to_text(value, False))
+ for (namespace, name), value in attrs.items())}
def endTag(self, namespace, name):
- assert namespace is None or isinstance(namespace, text_type), type(namespace)
- assert isinstance(name, text_type), type(namespace)
+ assert namespace is None or isinstance(namespace, string_types), type(namespace)
+ assert isinstance(name, string_types), type(namespace)
return {"type": "EndTag",
- "name": name,
- "namespace": namespace,
+ "name": to_text(name, False),
+ "namespace": to_text(namespace),
"data": {}}
def text(self, data):
- assert isinstance(data, text_type), type(data)
+ assert isinstance(data, string_types), type(data)
- data = data
+ data = to_text(data)
middle = data.lstrip(spaceCharacters)
left = data[:len(data) - len(middle)]
if left:
@@ -71,56 +101,30 @@ class TreeWalker(object):
yield {"type": "SpaceCharacters", "data": right}
def comment(self, data):
- assert isinstance(data, text_type), type(data)
+ assert isinstance(data, string_types), type(data)
- return {"type": "Comment", "data": data}
+ return {"type": "Comment", "data": text_type(data)}
def doctype(self, name, publicId=None, systemId=None, correct=True):
- assert name is None or isinstance(name, text_type), type(name)
- assert publicId is None or isinstance(publicId, text_type), type(publicId)
- assert systemId is None or isinstance(systemId, text_type), type(systemId)
+ assert is_text_or_none(name), type(name)
+ assert is_text_or_none(publicId), type(publicId)
+ assert is_text_or_none(systemId), type(systemId)
return {"type": "Doctype",
- "name": name if name is not None else "",
- "publicId": publicId,
- "systemId": systemId,
- "correct": correct}
+ "name": to_text(name),
+ "publicId": to_text(publicId),
+ "systemId": to_text(systemId),
+ "correct": to_text(correct)}
def entity(self, name):
- assert isinstance(name, text_type), type(name)
+ assert isinstance(name, string_types), type(name)
- return {"type": "Entity", "name": name}
+ return {"type": "Entity", "name": text_type(name)}
def unknown(self, nodeType):
return self.error(_("Unknown node type: ") + nodeType)
-class RecursiveTreeWalker(TreeWalker):
- def walkChildren(self, node):
- raise NotImplementedError
-
- def element(self, node, namespace, name, attrs, hasChildren):
- if name in voidElements:
- for token in self.emptyTag(namespace, name, attrs, hasChildren):
- yield token
- else:
- yield self.startTag(name, attrs)
- if hasChildren:
- for token in self.walkChildren(node):
- yield token
- yield self.endTag(name)
-
-from xml.dom import Node
-
-DOCUMENT = Node.DOCUMENT_NODE
-DOCTYPE = Node.DOCUMENT_TYPE_NODE
-TEXT = Node.TEXT_NODE
-ELEMENT = Node.ELEMENT_NODE
-COMMENT = Node.COMMENT_NODE
-ENTITY = Node.ENTITY_NODE
-UNKNOWN = "<#UNKNOWN#>"
-
-
class NonRecursiveTreeWalker(TreeWalker):
def getNodeDetails(self, node):
raise NotImplementedError