1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
|
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Adapted from parse.py to be compiled with Cython by Georg Brandl.
"""Parser engine for the grammar tables generated by pgen.
The grammar table must be loaded first.
See Parser/parser.c in the Python distribution for additional info on
how this parsing engine works.
"""
from sphinx.pycode.nodes import Node, Leaf
DEF NAME = 1
class ParseError(Exception):
"""Exception to signal the parser is stuck."""
def __init__(self, msg, type, value, context):
Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
(msg, type, value, context))
self.msg = msg
self.type = type
self.value = value
self.context = context
cdef class Parser:
cdef public object grammar
cdef public object rootnode
cdef public list stack
cdef public set used_names
cdef int _grammar_start
cdef list _grammar_labels
cdef dict _grammar_dfas
cdef dict _grammar_keywords
cdef dict _grammar_tokens
cdef dict _grammar_number2symbol
def __init__(self, grammar, convert=None):
self.grammar = grammar
#self.convert = convert or noconvert
self._grammar_dfas = grammar.dfas
self._grammar_labels = grammar.labels
self._grammar_keywords = grammar.keywords
self._grammar_tokens = grammar.tokens
self._grammar_number2symbol = grammar.number2symbol
self._grammar_start = grammar.start
def setup(self, start=None):
if start is None:
start = self._grammar_start
# Each stack entry is a tuple: (dfa, state, node).
# A node is a tuple: (type, value, context, children),
# where children is a list of nodes or None, and context may be None.
newnode = (start, None, None, [])
stackentry = (self._grammar_dfas[start], 0, newnode)
self.stack = [stackentry]
self.rootnode = None
self.used_names = set() # Aliased to self.rootnode.used_names in pop()
def addtoken(self, int type, value, context):
"""Add a token; return True iff this is the end of the program."""
cdef int ilabel, i, t, state, newstate
# Map from token to label
ilabel = self.classify(type, value, context)
# Loop until the token is shifted; may raise exceptions
while True:
dfa, state, node = self.stack[-1]
states, first = dfa
arcs = states[state]
# Look for a state with this label
for i, newstate in arcs:
t, v = self._grammar_labels[i]
if ilabel == i:
# Look it up in the list of labels
## assert t < 256
# Shift a token; we're done with it
self.shift(type, value, newstate, context)
# Pop while we are in an accept-only state
state = newstate
while states[state] == [(0, state)]:
self.pop()
if not self.stack:
# Done parsing!
return True
dfa, state, node = self.stack[-1]
states, first = dfa
# Done with this token
return False
elif t >= 256:
# See if it's a symbol and if we're in its first set
itsdfa = self._grammar_dfas[t]
itsstates, itsfirst = itsdfa
if ilabel in itsfirst:
# Push a symbol
self.push(t, itsdfa, newstate, context)
break # To continue the outer while loop
else:
if (0, state) in arcs:
# An accepting state, pop it and try something else
self.pop()
if not self.stack:
# Done parsing, but another token is input
raise ParseError("too much input",
type, value, context)
else:
# No success finding a transition
raise ParseError("bad input", type, value, context)
cdef int classify(self, int type, value, context):
"""Turn a token into a label. (Internal)"""
if type == NAME:
# Keep a listing of all used names
self.used_names.add(value)
# Check for reserved words
if value in self._grammar_keywords:
return self._grammar_keywords[value]
if type not in self._grammar_tokens:
raise ParseError("bad token", type, value, context)
return self._grammar_tokens[type]
cdef void shift(self, type, value, newstate, context):
"""Shift a token. (Internal)"""
cdef tuple node
dfa, state, node = self.stack[-1]
newnode = (type, value, context, None)
newnode = self.convert(newnode)
if newnode is not None:
node[-1].append(newnode)
self.stack[-1] = (dfa, newstate, node)
cdef void push(self, type, newdfa, newstate, context):
"""Push a nonterminal. (Internal)"""
dfa, state, node = self.stack[-1]
newnode = (type, None, context, [])
self.stack[-1] = (dfa, newstate, node)
self.stack.append((newdfa, 0, newnode))
cdef void pop(self):
"""Pop a nonterminal. (Internal)"""
popdfa, popstate, popnode = self.stack.pop()
newnode = self.convert(popnode)
if newnode is not None:
if self.stack:
dfa, state, node = self.stack[-1]
node[-1].append(newnode)
else:
self.rootnode = newnode
self.rootnode.used_names = self.used_names
cdef convert(self, tuple raw_node):
type, value, context, children = raw_node
if children or type in self._grammar_number2symbol:
# If there's exactly one child, return that child instead of
# creating a new node.
if len(children) == 1:
return children[0]
return Node(type, children, context=context)
else:
return Leaf(type, value, context=context)
|