1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
|
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Modifications:
# Copyright 2006 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Parser driver.
This provides a high-level interface to parse a file into a syntax tree.
"""
__author__ = "Guido van Rossum <guido@python.org>"
__all__ = ["Driver", "load_grammar"]
# Python imports
import os
import logging
import sphinx
# Pgen imports
from sphinx.pycode.pgen2 import grammar, parse, token, tokenize, pgen
class Driver(object):
def __init__(self, grammar, convert=None, logger=None):
self.grammar = grammar
if logger is None:
logger = logging.getLogger()
self.logger = logger
self.convert = convert
def parse_tokens(self, tokens, debug=False):
"""Parse a series of tokens and return the syntax tree."""
# X X X Move the prefix computation into a wrapper around tokenize.
p = parse.Parser(self.grammar, self.convert)
p.setup()
lineno = 1
column = 0
type = value = start = end = line_text = None
prefix = ""
opmap = grammar.opmap
for type, value, start, end, line_text in tokens:
if start != (lineno, column):
assert (lineno, column) <= start, ((lineno, column), start)
s_lineno, s_column = start
if lineno < s_lineno:
prefix += "\n" * (s_lineno - lineno)
lineno = s_lineno
column = 0
if column < s_column:
prefix += line_text[column:s_column]
column = s_column
if type in (tokenize.COMMENT, tokenize.NL):
prefix += value
lineno, column = end
if value.endswith("\n"):
lineno += 1
column = 0
continue
if type == token.OP:
type = opmap[value]
# if debug:
# self.logger.debug("%s %r (prefix=%r)",
# token.tok_name[type], value, prefix)
if p.addtoken(type, value, (prefix, start)):
# if debug:
# self.logger.debug("Stop.")
break
prefix = ""
lineno, column = end
if value.endswith("\n"):
lineno += 1
column = 0
else:
# We never broke out -- EOF is too soon (how can this happen???)
raise parse.ParseError("incomplete input", type, value, line_text)
return p.rootnode
def parse_stream_raw(self, stream, debug=False):
"""Parse a stream and return the syntax tree."""
tokens = tokenize.generate_tokens(stream.readline)
return self.parse_tokens(tokens, debug)
def parse_stream(self, stream, debug=False):
"""Parse a stream and return the syntax tree."""
return self.parse_stream_raw(stream, debug)
def parse_file(self, filename, debug=False):
"""Parse a file and return the syntax tree."""
stream = open(filename)
try:
return self.parse_stream(stream, debug)
finally:
stream.close()
def parse_string(self, text, debug=False):
"""Parse a string and return the syntax tree."""
tokens = tokenize.generate_tokens(generate_lines(text).next)
return self.parse_tokens(tokens, debug)
def generate_lines(text):
"""Generator that behaves like readline without using StringIO."""
for line in text.splitlines(True):
yield line
while True:
yield ""
def load_grammar(gt="Grammar.txt", gp=None,
save=True, force=False, logger=None):
"""Load the grammar (maybe from a pickle)."""
if logger is None:
logger = logging.getLogger()
if gp is None:
head, tail = os.path.splitext(gt)
if tail == ".txt":
tail = ""
# embed Sphinx major version for the case we ever change the grammar...
gp = head + tail + "-sphinx" + \
".".join(map(str, sphinx.version_info[:2])) + ".pickle"
if force or not _newer(gp, gt):
logger.info("Generating grammar tables from %s", gt)
g = pgen.generate_grammar(gt)
if save:
logger.info("Writing grammar tables to %s", gp)
try:
g.dump(gp)
except IOError as e:
logger.info("Writing failed:"+str(e))
else:
g = grammar.Grammar()
g.load(gp)
return g
def _newer(a, b):
"""Inquire whether file a was written since file b."""
if not os.path.exists(a):
return False
if not os.path.exists(b):
return True
return os.path.getmtime(a) >= os.path.getmtime(b)
|