summaryrefslogtreecommitdiff
path: root/sphinx/pycode
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2008-12-30 01:29:49 +0100
committerGeorg Brandl <georg@python.org>2008-12-30 01:29:49 +0100
commit484625cc327f4fab73313f785b9aaeb94080e5be (patch)
tree29cdb2acf7de519231447941bab9ba6251f80f77 /sphinx/pycode
parente177eeb750aad12d964a16b750a8839f194a03bd (diff)
downloadsphinx-484625cc327f4fab73313f785b9aaeb94080e5be.tar.gz
Some speedups in pytree.
Add Cython parse.py replacement, yielding a 2x speedup in parsing.
Diffstat (limited to 'sphinx/pycode')
-rw-r--r--sphinx/pycode/pgen2/parse.pyx156
-rw-r--r--sphinx/pycode/pytree.py23
2 files changed, 165 insertions, 14 deletions
diff --git a/sphinx/pycode/pgen2/parse.pyx b/sphinx/pycode/pgen2/parse.pyx
new file mode 100644
index 00000000..6a11ee6b
--- /dev/null
+++ b/sphinx/pycode/pgen2/parse.pyx
@@ -0,0 +1,156 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Parser engine for the grammar tables generated by pgen.
+
+The grammar table must be loaded first.
+
+See Parser/parser.c in the Python distribution for additional info on
+how this parsing engine works.
+
+"""
+
+from sphinx.pycode.pytree import Node, Leaf
+
+DEF NAME = 1
+
+class ParseError(Exception):
+ """Exception to signal the parser is stuck."""
+
+ def __init__(self, msg, type, value, context):
+ Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
+ (msg, type, value, context))
+ self.msg = msg
+ self.type = type
+ self.value = value
+ self.context = context
+
+
+cdef class Parser:
+ cdef public grammar, stack, rootnode, used_names
+ cdef _grammar_dfas, _grammar_labels, _grammar_keywords, _grammar_tokens
+ cdef _grammar_number2symbol
+
+ def __init__(self, grammar, convert=None):
+ self.grammar = grammar
+ #self.convert = convert or noconvert
+
+ self._grammar_dfas = grammar.dfas
+ self._grammar_labels = grammar.labels
+ self._grammar_keywords = grammar.keywords
+ self._grammar_tokens = grammar.tokens
+ self._grammar_number2symbol = grammar.number2symbol
+
+ def setup(self, start=None):
+ if start is None:
+ start = self.grammar.start
+ # Each stack entry is a tuple: (dfa, state, node).
+ # A node is a tuple: (type, value, context, children),
+ # where children is a list of nodes or None, and context may be None.
+ newnode = (start, None, None, [])
+ stackentry = (self._grammar_dfas[start], 0, newnode)
+ self.stack = [stackentry]
+ self.rootnode = None
+ self.used_names = set() # Aliased to self.rootnode.used_names in pop()
+
+ def addtoken(self, type, value, context):
+ """Add a token; return True iff this is the end of the program."""
+ cdef int ilabel, i, t, state, newstate
+ # Map from token to label
+ ilabel = self.classify(type, value, context)
+ # Loop until the token is shifted; may raise exceptions
+ while True:
+ dfa, state, node = self.stack[-1]
+ states, first = dfa
+ arcs = states[state]
+ # Look for a state with this label
+ for i, newstate in arcs:
+ t, v = self._grammar_labels[i]
+ if ilabel == i:
+ # Look it up in the list of labels
+ ## assert t < 256
+ # Shift a token; we're done with it
+ self.shift(type, value, newstate, context)
+ # Pop while we are in an accept-only state
+ state = newstate
+ while states[state] == [(0, state)]:
+ self.pop()
+ if not self.stack:
+ # Done parsing!
+ return True
+ dfa, state, node = self.stack[-1]
+ states, first = dfa
+ # Done with this token
+ return False
+ elif t >= 256:
+ # See if it's a symbol and if we're in its first set
+ itsdfa = self._grammar_dfas[t]
+ itsstates, itsfirst = itsdfa
+ if ilabel in itsfirst:
+ # Push a symbol
+ self.push(t, itsdfa, newstate, context)
+ break # To continue the outer while loop
+ else:
+ if (0, state) in arcs:
+ # An accepting state, pop it and try something else
+ self.pop()
+ if not self.stack:
+ # Done parsing, but another token is input
+ raise ParseError("too much input",
+ type, value, context)
+ else:
+ # No success finding a transition
+ raise ParseError("bad input", type, value, context)
+
+ cdef int classify(self, type, value, context):
+ """Turn a token into a label. (Internal)"""
+ if type == NAME:
+ # Keep a listing of all used names
+ self.used_names.add(value)
+ # Check for reserved words
+ ilabel = self._grammar_keywords.get(value)
+ if ilabel is not None:
+ return ilabel
+ ilabel = self._grammar_tokens.get(type)
+ if ilabel is None:
+ raise ParseError("bad token", type, value, context)
+ return ilabel
+
+ cdef void shift(self, type, value, newstate, context):
+ """Shift a token. (Internal)"""
+ dfa, state, node = self.stack[-1]
+ newnode = (type, value, context, None)
+ newnode = self.convert(newnode)
+ if newnode is not None:
+ node[-1].append(newnode)
+ self.stack[-1] = (dfa, newstate, node)
+
+ cdef void push(self, type, newdfa, newstate, context):
+ """Push a nonterminal. (Internal)"""
+ dfa, state, node = self.stack[-1]
+ newnode = (type, None, context, [])
+ self.stack[-1] = (dfa, newstate, node)
+ self.stack.append((newdfa, 0, newnode))
+
+ cdef void pop(self):
+ """Pop a nonterminal. (Internal)"""
+ popdfa, popstate, popnode = self.stack.pop()
+ newnode = self.convert(popnode)
+ if newnode is not None:
+ if self.stack:
+ dfa, state, node = self.stack[-1]
+ node[-1].append(newnode)
+ else:
+ self.rootnode = newnode
+ self.rootnode.used_names = self.used_names
+
+ cdef convert(self, raw_node):
+ type, value, context, children = raw_node
+ if children or type in self._grammar_number2symbol:
+ # If there's exactly one child, return that child instead of
+ # creating a new node.
+ if len(children) == 1:
+ return children[0]
+ return Node(type, children, context=context)
+ else:
+ return Leaf(type, value, context=context)
diff --git a/sphinx/pycode/pytree.py b/sphinx/pycode/pytree.py
index 46017a95..063e39ec 100644
--- a/sphinx/pycode/pytree.py
+++ b/sphinx/pycode/pytree.py
@@ -29,11 +29,6 @@ class Base(object):
children = () # Tuple of subnodes
was_changed = False
- def __new__(cls, *args, **kwds):
- """Constructor that prevents Base from being instantiated."""
- assert cls is not Base, "Cannot instantiate Base"
- return object.__new__(cls)
-
def __eq__(self, other):
"""Compares two nodes for equality.
@@ -132,7 +127,7 @@ class Node(Base):
"""Concrete implementation for interior nodes."""
- def __init__(self, type, children, context=None, prefix=None):
+ def __init__(self, type, children, context=None):
"""Initializer.
Takes a type constant (a symbol number >= 256), a sequence of
@@ -140,14 +135,14 @@ class Node(Base):
As a side effect, the parent pointers of the children are updated.
"""
- assert type >= 256, type
+ # assert type >= 256, type
self.type = type
self.children = list(children)
for ch in self.children:
- assert ch.parent is None, repr(ch)
+ # assert ch.parent is None, repr(ch)
ch.parent = self
- if prefix is not None:
- self.set_prefix(prefix)
+ # if prefix is not None:
+ # self.set_prefix(prefix)
def __repr__(self):
return "%s(%s, %r)" % (self.__class__.__name__,
@@ -206,19 +201,19 @@ class Leaf(Base):
lineno = 0 # Line where this token starts in the input
column = 0 # Column where this token tarts in the input
- def __init__(self, type, value, context=None, prefix=None):
+ def __init__(self, type, value, context=None):
"""Initializer.
Takes a type constant (a token number < 256), a string value,
and an optional context keyword argument.
"""
- assert 0 <= type < 256, type
+ # assert 0 <= type < 256, type
if context is not None:
self.prefix, (self.lineno, self.column) = context
self.type = type
self.value = value
- if prefix is not None:
- self.prefix = prefix
+ # if prefix is not None:
+ # self.prefix = prefix
def __repr__(self):
return "%s(%r, %r, %r)" % (self.__class__.__name__,