Some speedups in pytree.

Add Cython parse.py replacement, yielding a 2x speedup in parsing.
author: Georg Brandl <georg@python.org> 2008-12-30 01:29:49 +0100
committer: Georg Brandl <georg@python.org> 2008-12-30 01:29:49 +0100
commit: 484625cc327f4fab73313f785b9aaeb94080e5be (patch)
tree: 29cdb2acf7de519231447941bab9ba6251f80f77 /sphinx/pycode
parent: e177eeb750aad12d964a16b750a8839f194a03bd (diff)
download: sphinx-484625cc327f4fab73313f785b9aaeb94080e5be.tar.gz
2 files changed, 165 insertions, 14 deletions
diff --git a/sphinx/pycode/pgen2/parse.pyx b/sphinx/pycode/pgen2/parse.pyx
new file mode 100644
index 00000000..6a11ee6b
--- /dev/null
+++ b/sphinx/pycode/pgen2/parse.pyx
@@ -0,0 +1,156 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Parser engine for the grammar tables generated by pgen.
+
+The grammar table must be loaded first.
+
+See Parser/parser.c in the Python distribution for additional info on
+how this parsing engine works.
+
+"""
+
+from sphinx.pycode.pytree import Node, Leaf
+
+DEF NAME = 1
+
+class ParseError(Exception):
+    """Exception to signal the parser is stuck."""
+
+    def __init__(self, msg, type, value, context):
+        Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
+                           (msg, type, value, context))
+        self.msg = msg
+        self.type = type
+        self.value = value
+        self.context = context
+
+
+cdef class Parser:
+    cdef public grammar, stack, rootnode, used_names
+    cdef _grammar_dfas, _grammar_labels, _grammar_keywords, _grammar_tokens
+    cdef _grammar_number2symbol
+
+    def __init__(self, grammar, convert=None):
+        self.grammar = grammar
+        #self.convert = convert or noconvert
+
+        self._grammar_dfas = grammar.dfas
+        self._grammar_labels = grammar.labels
+        self._grammar_keywords = grammar.keywords
+        self._grammar_tokens = grammar.tokens
+        self._grammar_number2symbol = grammar.number2symbol
+
+    def setup(self, start=None):
+        if start is None:
+            start = self.grammar.start
+        # Each stack entry is a tuple: (dfa, state, node).
+        # A node is a tuple: (type, value, context, children),
+        # where children is a list of nodes or None, and context may be None.
+        newnode = (start, None, None, [])
+        stackentry = (self._grammar_dfas[start], 0, newnode)
+        self.stack = [stackentry]
+        self.rootnode = None
+        self.used_names = set() # Aliased to self.rootnode.used_names in pop()
+
+    def addtoken(self, type, value, context):
+        """Add a token; return True iff this is the end of the program."""
+        cdef int ilabel, i, t, state, newstate
+        # Map from token to label
+        ilabel = self.classify(type, value, context)
+        # Loop until the token is shifted; may raise exceptions
+        while True:
+            dfa, state, node = self.stack[-1]
+            states, first = dfa
+            arcs = states[state]
+            # Look for a state with this label
+            for i, newstate in arcs:
+                t, v = self._grammar_labels[i]
+                if ilabel == i:
+                    # Look it up in the list of labels
+                    ## assert t < 256
+                    # Shift a token; we're done with it
+                    self.shift(type, value, newstate, context)
+                    # Pop while we are in an accept-only state
+                    state = newstate
+                    while states[state] == [(0, state)]:
+                        self.pop()
+                        if not self.stack:
+                            # Done parsing!
+                            return True
+                        dfa, state, node = self.stack[-1]
+                        states, first = dfa
+                    # Done with this token
+                    return False
+                elif t >= 256:
+                    # See if it's a symbol and if we're in its first set
+                    itsdfa = self._grammar_dfas[t]
+                    itsstates, itsfirst = itsdfa
+                    if ilabel in itsfirst:
+                        # Push a symbol
+                        self.push(t, itsdfa, newstate, context)
+                        break # To continue the outer while loop
+            else:
+                if (0, state) in arcs:
+                    # An accepting state, pop it and try something else
+                    self.pop()
+                    if not self.stack:
+                        # Done parsing, but another token is input
+                        raise ParseError("too much input",
+                                         type, value, context)
+                else:
+                    # No success finding a transition
+                    raise ParseError("bad input", type, value, context)
+
+    cdef int classify(self, type, value, context):
+        """Turn a token into a label.  (Internal)"""
+        if type == NAME:
+            # Keep a listing of all used names
+            self.used_names.add(value)
+            # Check for reserved words
+            ilabel = self._grammar_keywords.get(value)
+            if ilabel is not None:
+                return ilabel
+        ilabel = self._grammar_tokens.get(type)
+        if ilabel is None:
+            raise ParseError("bad token", type, value, context)
+        return ilabel
+
+    cdef void shift(self, type, value, newstate, context):
+        """Shift a token.  (Internal)"""
+        dfa, state, node = self.stack[-1]
+        newnode = (type, value, context, None)
+        newnode = self.convert(newnode)
+        if newnode is not None:
+            node[-1].append(newnode)
+        self.stack[-1] = (dfa, newstate, node)
+
+    cdef void push(self, type, newdfa, newstate, context):
+        """Push a nonterminal.  (Internal)"""
+        dfa, state, node = self.stack[-1]
+        newnode = (type, None, context, [])
+        self.stack[-1] = (dfa, newstate, node)
+        self.stack.append((newdfa, 0, newnode))
+
+    cdef void pop(self):
+        """Pop a nonterminal.  (Internal)"""
+        popdfa, popstate, popnode = self.stack.pop()
+        newnode = self.convert(popnode)
+        if newnode is not None:
+            if self.stack:
+                dfa, state, node = self.stack[-1]
+                node[-1].append(newnode)
+            else:
+                self.rootnode = newnode
+                self.rootnode.used_names = self.used_names
+
+    cdef convert(self, raw_node):
+        type, value, context, children = raw_node
+        if children or type in self._grammar_number2symbol:
+            # If there's exactly one child, return that child instead of
+            # creating a new node.
+            if len(children) == 1:
+                return children[0]
+            return Node(type, children, context=context)
+        else:
+            return Leaf(type, value, context=context)
diff --git a/sphinx/pycode/pytree.py b/sphinx/pycode/pytree.py
index 46017a95..063e39ec 100644
--- a/sphinx/pycode/pytree.py
+++ b/sphinx/pycode/pytree.py
@@ -29,11 +29,6 @@ class Base(object):
     children = ()  # Tuple of subnodes
     was_changed = False
 
-    def __new__(cls, *args, **kwds):
-        """Constructor that prevents Base from being instantiated."""
-        assert cls is not Base, "Cannot instantiate Base"
-        return object.__new__(cls)
-
     def __eq__(self, other):
         """Compares two nodes for equality.
 
@@ -132,7 +127,7 @@ class Node(Base):
 
     """Concrete implementation for interior nodes."""
 
-    def __init__(self, type, children, context=None, prefix=None):
+    def __init__(self, type, children, context=None):
         """Initializer.
 
         Takes a type constant (a symbol number >= 256), a sequence of
@@ -140,14 +135,14 @@ class Node(Base):
 
         As a side effect, the parent pointers of the children are updated.
         """
-        assert type >= 256, type
+        # assert type >= 256, type
         self.type = type
         self.children = list(children)
         for ch in self.children:
-            assert ch.parent is None, repr(ch)
+            # assert ch.parent is None, repr(ch)
             ch.parent = self
-        if prefix is not None:
-            self.set_prefix(prefix)
+        # if prefix is not None:
+        #     self.set_prefix(prefix)
 
     def __repr__(self):
         return "%s(%s, %r)" % (self.__class__.__name__,
@@ -206,19 +201,19 @@ class Leaf(Base):
     lineno = 0   # Line where this token starts in the input
     column = 0   # Column where this token tarts in the input
 
-    def __init__(self, type, value, context=None, prefix=None):
+    def __init__(self, type, value, context=None):
         """Initializer.
 
         Takes a type constant (a token number < 256), a string value,
         and an optional context keyword argument.
         """
-        assert 0 <= type < 256, type
+        # assert 0 <= type < 256, type
         if context is not None:
             self.prefix, (self.lineno, self.column) = context
         self.type = type
         self.value = value
-        if prefix is not None:
-            self.prefix = prefix
+        # if prefix is not None:
+        #     self.prefix = prefix
 
     def __repr__(self):
         return "%s(%r, %r, %r)" % (self.__class__.__name__,
author	Georg Brandl <georg@python.org>	2008-12-30 01:29:49 +0100
committer	Georg Brandl <georg@python.org>	2008-12-30 01:29:49 +0100
commit	484625cc327f4fab73313f785b9aaeb94080e5be (patch)
tree	29cdb2acf7de519231447941bab9ba6251f80f77 /sphinx/pycode
parent	e177eeb750aad12d964a16b750a8839f194a03bd (diff)
download	sphinx-484625cc327f4fab73313f785b9aaeb94080e5be.tar.gz