diff options
author | Georg Brandl <georg@python.org> | 2008-12-30 01:29:49 +0100 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2008-12-30 01:29:49 +0100 |
commit | 484625cc327f4fab73313f785b9aaeb94080e5be (patch) | |
tree | 29cdb2acf7de519231447941bab9ba6251f80f77 /sphinx/pycode | |
parent | e177eeb750aad12d964a16b750a8839f194a03bd (diff) | |
download | sphinx-484625cc327f4fab73313f785b9aaeb94080e5be.tar.gz |
Some speedups in pytree.
Add Cython parse.py replacement, yielding a 2x speedup in parsing.
Diffstat (limited to 'sphinx/pycode')
-rw-r--r-- | sphinx/pycode/pgen2/parse.pyx | 156 | ||||
-rw-r--r-- | sphinx/pycode/pytree.py | 23 |
2 files changed, 165 insertions, 14 deletions
diff --git a/sphinx/pycode/pgen2/parse.pyx b/sphinx/pycode/pgen2/parse.pyx new file mode 100644 index 00000000..6a11ee6b --- /dev/null +++ b/sphinx/pycode/pgen2/parse.pyx @@ -0,0 +1,156 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Parser engine for the grammar tables generated by pgen. + +The grammar table must be loaded first. + +See Parser/parser.c in the Python distribution for additional info on +how this parsing engine works. + +""" + +from sphinx.pycode.pytree import Node, Leaf + +DEF NAME = 1 + +class ParseError(Exception): + """Exception to signal the parser is stuck.""" + + def __init__(self, msg, type, value, context): + Exception.__init__(self, "%s: type=%r, value=%r, context=%r" % + (msg, type, value, context)) + self.msg = msg + self.type = type + self.value = value + self.context = context + + +cdef class Parser: + cdef public grammar, stack, rootnode, used_names + cdef _grammar_dfas, _grammar_labels, _grammar_keywords, _grammar_tokens + cdef _grammar_number2symbol + + def __init__(self, grammar, convert=None): + self.grammar = grammar + #self.convert = convert or noconvert + + self._grammar_dfas = grammar.dfas + self._grammar_labels = grammar.labels + self._grammar_keywords = grammar.keywords + self._grammar_tokens = grammar.tokens + self._grammar_number2symbol = grammar.number2symbol + + def setup(self, start=None): + if start is None: + start = self.grammar.start + # Each stack entry is a tuple: (dfa, state, node). + # A node is a tuple: (type, value, context, children), + # where children is a list of nodes or None, and context may be None. + newnode = (start, None, None, []) + stackentry = (self._grammar_dfas[start], 0, newnode) + self.stack = [stackentry] + self.rootnode = None + self.used_names = set() # Aliased to self.rootnode.used_names in pop() + + def addtoken(self, type, value, context): + """Add a token; return True iff this is the end of the program.""" + cdef int ilabel, i, t, state, newstate + # Map from token to label + ilabel = self.classify(type, value, context) + # Loop until the token is shifted; may raise exceptions + while True: + dfa, state, node = self.stack[-1] + states, first = dfa + arcs = states[state] + # Look for a state with this label + for i, newstate in arcs: + t, v = self._grammar_labels[i] + if ilabel == i: + # Look it up in the list of labels + ## assert t < 256 + # Shift a token; we're done with it + self.shift(type, value, newstate, context) + # Pop while we are in an accept-only state + state = newstate + while states[state] == [(0, state)]: + self.pop() + if not self.stack: + # Done parsing! + return True + dfa, state, node = self.stack[-1] + states, first = dfa + # Done with this token + return False + elif t >= 256: + # See if it's a symbol and if we're in its first set + itsdfa = self._grammar_dfas[t] + itsstates, itsfirst = itsdfa + if ilabel in itsfirst: + # Push a symbol + self.push(t, itsdfa, newstate, context) + break # To continue the outer while loop + else: + if (0, state) in arcs: + # An accepting state, pop it and try something else + self.pop() + if not self.stack: + # Done parsing, but another token is input + raise ParseError("too much input", + type, value, context) + else: + # No success finding a transition + raise ParseError("bad input", type, value, context) + + cdef int classify(self, type, value, context): + """Turn a token into a label. (Internal)""" + if type == NAME: + # Keep a listing of all used names + self.used_names.add(value) + # Check for reserved words + ilabel = self._grammar_keywords.get(value) + if ilabel is not None: + return ilabel + ilabel = self._grammar_tokens.get(type) + if ilabel is None: + raise ParseError("bad token", type, value, context) + return ilabel + + cdef void shift(self, type, value, newstate, context): + """Shift a token. (Internal)""" + dfa, state, node = self.stack[-1] + newnode = (type, value, context, None) + newnode = self.convert(newnode) + if newnode is not None: + node[-1].append(newnode) + self.stack[-1] = (dfa, newstate, node) + + cdef void push(self, type, newdfa, newstate, context): + """Push a nonterminal. (Internal)""" + dfa, state, node = self.stack[-1] + newnode = (type, None, context, []) + self.stack[-1] = (dfa, newstate, node) + self.stack.append((newdfa, 0, newnode)) + + cdef void pop(self): + """Pop a nonterminal. (Internal)""" + popdfa, popstate, popnode = self.stack.pop() + newnode = self.convert(popnode) + if newnode is not None: + if self.stack: + dfa, state, node = self.stack[-1] + node[-1].append(newnode) + else: + self.rootnode = newnode + self.rootnode.used_names = self.used_names + + cdef convert(self, raw_node): + type, value, context, children = raw_node + if children or type in self._grammar_number2symbol: + # If there's exactly one child, return that child instead of + # creating a new node. + if len(children) == 1: + return children[0] + return Node(type, children, context=context) + else: + return Leaf(type, value, context=context) diff --git a/sphinx/pycode/pytree.py b/sphinx/pycode/pytree.py index 46017a95..063e39ec 100644 --- a/sphinx/pycode/pytree.py +++ b/sphinx/pycode/pytree.py @@ -29,11 +29,6 @@ class Base(object): children = () # Tuple of subnodes was_changed = False - def __new__(cls, *args, **kwds): - """Constructor that prevents Base from being instantiated.""" - assert cls is not Base, "Cannot instantiate Base" - return object.__new__(cls) - def __eq__(self, other): """Compares two nodes for equality. @@ -132,7 +127,7 @@ class Node(Base): """Concrete implementation for interior nodes.""" - def __init__(self, type, children, context=None, prefix=None): + def __init__(self, type, children, context=None): """Initializer. Takes a type constant (a symbol number >= 256), a sequence of @@ -140,14 +135,14 @@ class Node(Base): As a side effect, the parent pointers of the children are updated. """ - assert type >= 256, type + # assert type >= 256, type self.type = type self.children = list(children) for ch in self.children: - assert ch.parent is None, repr(ch) + # assert ch.parent is None, repr(ch) ch.parent = self - if prefix is not None: - self.set_prefix(prefix) + # if prefix is not None: + # self.set_prefix(prefix) def __repr__(self): return "%s(%s, %r)" % (self.__class__.__name__, @@ -206,19 +201,19 @@ class Leaf(Base): lineno = 0 # Line where this token starts in the input column = 0 # Column where this token tarts in the input - def __init__(self, type, value, context=None, prefix=None): + def __init__(self, type, value, context=None): """Initializer. Takes a type constant (a token number < 256), a string value, and an optional context keyword argument. """ - assert 0 <= type < 256, type + # assert 0 <= type < 256, type if context is not None: self.prefix, (self.lineno, self.column) = context self.type = type self.value = value - if prefix is not None: - self.prefix = prefix + # if prefix is not None: + # self.prefix = prefix def __repr__(self): return "%s(%r, %r, %r)" % (self.__class__.__name__, |