diff options
Diffstat (limited to 'pip/_vendor/html5lib/treewalkers/_base.py')
-rw-r--r-- | pip/_vendor/html5lib/treewalkers/_base.py | 122 |
1 files changed, 63 insertions, 59 deletions
diff --git a/pip/_vendor/html5lib/treewalkers/_base.py b/pip/_vendor/html5lib/treewalkers/_base.py index a20235961..dda3cd74e 100644 --- a/pip/_vendor/html5lib/treewalkers/_base.py +++ b/pip/_vendor/html5lib/treewalkers/_base.py @@ -1,13 +1,41 @@ from __future__ import absolute_import, division, unicode_literals -from pip._vendor.six import text_type +from pip._vendor.six import text_type, string_types import gettext _ = gettext.gettext +from xml.dom import Node + +DOCUMENT = Node.DOCUMENT_NODE +DOCTYPE = Node.DOCUMENT_TYPE_NODE +TEXT = Node.TEXT_NODE +ELEMENT = Node.ELEMENT_NODE +COMMENT = Node.COMMENT_NODE +ENTITY = Node.ENTITY_NODE +UNKNOWN = "<#UNKNOWN#>" + from ..constants import voidElements, spaceCharacters spaceCharacters = "".join(spaceCharacters) +def to_text(s, blank_if_none=True): + """Wrapper around six.text_type to convert None to empty string""" + if s is None: + if blank_if_none: + return "" + else: + return None + elif isinstance(s, text_type): + return s + else: + return text_type(s) + + +def is_text_or_none(string): + """Wrapper around isinstance(string_types) or is None""" + return string is None or isinstance(string, string_types) + + class TreeWalker(object): def __init__(self, tree): self.tree = tree @@ -19,45 +47,47 @@ class TreeWalker(object): return {"type": "SerializeError", "data": msg} def emptyTag(self, namespace, name, attrs, hasChildren=False): - assert namespace is None or isinstance(namespace, text_type), type(namespace) - assert isinstance(name, text_type), type(name) - assert all((namespace is None or isinstance(namespace, text_type)) and - isinstance(name, text_type) and - isinstance(value, text_type) + assert namespace is None or isinstance(namespace, string_types), type(namespace) + assert isinstance(name, string_types), type(name) + assert all((namespace is None or isinstance(namespace, string_types)) and + isinstance(name, string_types) and + isinstance(value, string_types) for (namespace, name), value in attrs.items()) - yield {"type": "EmptyTag", "name": name, - "namespace": namespace, + yield {"type": "EmptyTag", "name": to_text(name, False), + "namespace": to_text(namespace), "data": attrs} if hasChildren: yield self.error(_("Void element has children")) def startTag(self, namespace, name, attrs): - assert namespace is None or isinstance(namespace, text_type), type(namespace) - assert isinstance(name, text_type), type(name) - assert all((namespace is None or isinstance(namespace, text_type)) and - isinstance(name, text_type) and - isinstance(value, text_type) + assert namespace is None or isinstance(namespace, string_types), type(namespace) + assert isinstance(name, string_types), type(name) + assert all((namespace is None or isinstance(namespace, string_types)) and + isinstance(name, string_types) and + isinstance(value, string_types) for (namespace, name), value in attrs.items()) return {"type": "StartTag", - "name": name, - "namespace": namespace, - "data": attrs} + "name": text_type(name), + "namespace": to_text(namespace), + "data": dict(((to_text(namespace, False), to_text(name)), + to_text(value, False)) + for (namespace, name), value in attrs.items())} def endTag(self, namespace, name): - assert namespace is None or isinstance(namespace, text_type), type(namespace) - assert isinstance(name, text_type), type(namespace) + assert namespace is None or isinstance(namespace, string_types), type(namespace) + assert isinstance(name, string_types), type(namespace) return {"type": "EndTag", - "name": name, - "namespace": namespace, + "name": to_text(name, False), + "namespace": to_text(namespace), "data": {}} def text(self, data): - assert isinstance(data, text_type), type(data) + assert isinstance(data, string_types), type(data) - data = data + data = to_text(data) middle = data.lstrip(spaceCharacters) left = data[:len(data) - len(middle)] if left: @@ -71,56 +101,30 @@ class TreeWalker(object): yield {"type": "SpaceCharacters", "data": right} def comment(self, data): - assert isinstance(data, text_type), type(data) + assert isinstance(data, string_types), type(data) - return {"type": "Comment", "data": data} + return {"type": "Comment", "data": text_type(data)} def doctype(self, name, publicId=None, systemId=None, correct=True): - assert name is None or isinstance(name, text_type), type(name) - assert publicId is None or isinstance(publicId, text_type), type(publicId) - assert systemId is None or isinstance(systemId, text_type), type(systemId) + assert is_text_or_none(name), type(name) + assert is_text_or_none(publicId), type(publicId) + assert is_text_or_none(systemId), type(systemId) return {"type": "Doctype", - "name": name if name is not None else "", - "publicId": publicId, - "systemId": systemId, - "correct": correct} + "name": to_text(name), + "publicId": to_text(publicId), + "systemId": to_text(systemId), + "correct": to_text(correct)} def entity(self, name): - assert isinstance(name, text_type), type(name) + assert isinstance(name, string_types), type(name) - return {"type": "Entity", "name": name} + return {"type": "Entity", "name": text_type(name)} def unknown(self, nodeType): return self.error(_("Unknown node type: ") + nodeType) -class RecursiveTreeWalker(TreeWalker): - def walkChildren(self, node): - raise NotImplementedError - - def element(self, node, namespace, name, attrs, hasChildren): - if name in voidElements: - for token in self.emptyTag(namespace, name, attrs, hasChildren): - yield token - else: - yield self.startTag(name, attrs) - if hasChildren: - for token in self.walkChildren(node): - yield token - yield self.endTag(name) - -from xml.dom import Node - -DOCUMENT = Node.DOCUMENT_NODE -DOCTYPE = Node.DOCUMENT_TYPE_NODE -TEXT = Node.TEXT_NODE -ELEMENT = Node.ELEMENT_NODE -COMMENT = Node.COMMENT_NODE -ENTITY = Node.ENTITY_NODE -UNKNOWN = "<#UNKNOWN#>" - - class NonRecursiveTreeWalker(TreeWalker): def getNodeDetails(self, node): raise NotImplementedError |