diff options
Diffstat (limited to 'src/pip/_vendor/html5lib/treewalkers/base.py')
-rw-r--r-- | src/pip/_vendor/html5lib/treewalkers/base.py | 252 |
1 files changed, 0 insertions, 252 deletions
diff --git a/src/pip/_vendor/html5lib/treewalkers/base.py b/src/pip/_vendor/html5lib/treewalkers/base.py deleted file mode 100644 index 80c474c4e..000000000 --- a/src/pip/_vendor/html5lib/treewalkers/base.py +++ /dev/null @@ -1,252 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from xml.dom import Node -from ..constants import namespaces, voidElements, spaceCharacters - -__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN", - "TreeWalker", "NonRecursiveTreeWalker"] - -DOCUMENT = Node.DOCUMENT_NODE -DOCTYPE = Node.DOCUMENT_TYPE_NODE -TEXT = Node.TEXT_NODE -ELEMENT = Node.ELEMENT_NODE -COMMENT = Node.COMMENT_NODE -ENTITY = Node.ENTITY_NODE -UNKNOWN = "<#UNKNOWN#>" - -spaceCharacters = "".join(spaceCharacters) - - -class TreeWalker(object): - """Walks a tree yielding tokens - - Tokens are dicts that all have a ``type`` field specifying the type of the - token. - - """ - def __init__(self, tree): - """Creates a TreeWalker - - :arg tree: the tree to walk - - """ - self.tree = tree - - def __iter__(self): - raise NotImplementedError - - def error(self, msg): - """Generates an error token with the given message - - :arg msg: the error message - - :returns: SerializeError token - - """ - return {"type": "SerializeError", "data": msg} - - def emptyTag(self, namespace, name, attrs, hasChildren=False): - """Generates an EmptyTag token - - :arg namespace: the namespace of the token--can be ``None`` - - :arg name: the name of the element - - :arg attrs: the attributes of the element as a dict - - :arg hasChildren: whether or not to yield a SerializationError because - this tag shouldn't have children - - :returns: EmptyTag token - - """ - yield {"type": "EmptyTag", "name": name, - "namespace": namespace, - "data": attrs} - if hasChildren: - yield self.error("Void element has children") - - def startTag(self, namespace, name, attrs): - """Generates a StartTag token - - :arg namespace: the namespace of the token--can be ``None`` - - :arg name: the name of the element - - :arg attrs: the attributes of the element as a dict - - :returns: StartTag token - - """ - return {"type": "StartTag", - "name": name, - "namespace": namespace, - "data": attrs} - - def endTag(self, namespace, name): - """Generates an EndTag token - - :arg namespace: the namespace of the token--can be ``None`` - - :arg name: the name of the element - - :returns: EndTag token - - """ - return {"type": "EndTag", - "name": name, - "namespace": namespace} - - def text(self, data): - """Generates SpaceCharacters and Characters tokens - - Depending on what's in the data, this generates one or more - ``SpaceCharacters`` and ``Characters`` tokens. - - For example: - - >>> from html5lib.treewalkers.base import TreeWalker - >>> # Give it an empty tree just so it instantiates - >>> walker = TreeWalker([]) - >>> list(walker.text('')) - [] - >>> list(walker.text(' ')) - [{u'data': ' ', u'type': u'SpaceCharacters'}] - >>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE - [{u'data': ' ', u'type': u'SpaceCharacters'}, - {u'data': u'abc', u'type': u'Characters'}, - {u'data': u' ', u'type': u'SpaceCharacters'}] - - :arg data: the text data - - :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens - - """ - data = data - middle = data.lstrip(spaceCharacters) - left = data[:len(data) - len(middle)] - if left: - yield {"type": "SpaceCharacters", "data": left} - data = middle - middle = data.rstrip(spaceCharacters) - right = data[len(middle):] - if middle: - yield {"type": "Characters", "data": middle} - if right: - yield {"type": "SpaceCharacters", "data": right} - - def comment(self, data): - """Generates a Comment token - - :arg data: the comment - - :returns: Comment token - - """ - return {"type": "Comment", "data": data} - - def doctype(self, name, publicId=None, systemId=None): - """Generates a Doctype token - - :arg name: - - :arg publicId: - - :arg systemId: - - :returns: the Doctype token - - """ - return {"type": "Doctype", - "name": name, - "publicId": publicId, - "systemId": systemId} - - def entity(self, name): - """Generates an Entity token - - :arg name: the entity name - - :returns: an Entity token - - """ - return {"type": "Entity", "name": name} - - def unknown(self, nodeType): - """Handles unknown node types""" - return self.error("Unknown node type: " + nodeType) - - -class NonRecursiveTreeWalker(TreeWalker): - def getNodeDetails(self, node): - raise NotImplementedError - - def getFirstChild(self, node): - raise NotImplementedError - - def getNextSibling(self, node): - raise NotImplementedError - - def getParentNode(self, node): - raise NotImplementedError - - def __iter__(self): - currentNode = self.tree - while currentNode is not None: - details = self.getNodeDetails(currentNode) - type, details = details[0], details[1:] - hasChildren = False - - if type == DOCTYPE: - yield self.doctype(*details) - - elif type == TEXT: - for token in self.text(*details): - yield token - - elif type == ELEMENT: - namespace, name, attributes, hasChildren = details - if (not namespace or namespace == namespaces["html"]) and name in voidElements: - for token in self.emptyTag(namespace, name, attributes, - hasChildren): - yield token - hasChildren = False - else: - yield self.startTag(namespace, name, attributes) - - elif type == COMMENT: - yield self.comment(details[0]) - - elif type == ENTITY: - yield self.entity(details[0]) - - elif type == DOCUMENT: - hasChildren = True - - else: - yield self.unknown(details[0]) - - if hasChildren: - firstChild = self.getFirstChild(currentNode) - else: - firstChild = None - - if firstChild is not None: - currentNode = firstChild - else: - while currentNode is not None: - details = self.getNodeDetails(currentNode) - type, details = details[0], details[1:] - if type == ELEMENT: - namespace, name, attributes, hasChildren = details - if (namespace and namespace != namespaces["html"]) or name not in voidElements: - yield self.endTag(namespace, name) - if self.tree is currentNode: - currentNode = None - break - nextSibling = self.getNextSibling(currentNode) - if nextSibling is not None: - currentNode = nextSibling - break - else: - currentNode = self.getParentNode(currentNode) |