diff options
author | Georg Brandl <georg@python.org> | 2008-12-30 12:42:26 +0100 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2008-12-30 12:42:26 +0100 |
commit | 998660e41e99b171d029a0e6aa7b85fc0ba62373 (patch) | |
tree | 452521e7761a7fa32465d1486ece58c3aea6d5a0 /sphinx/pycode | |
parent | 6bec10bbcf957d4a26dc5b3db2f4a099382abf56 (diff) | |
download | sphinx-998660e41e99b171d029a0e6aa7b85fc0ba62373.tar.gz |
* Add a tag-finding method based on tokens.
* Don't parse immediately if tokenizing suffices.
* Also cache by file name.
Diffstat (limited to 'sphinx/pycode')
-rw-r--r-- | sphinx/pycode/__init__.py | 139 | ||||
-rw-r--r-- | sphinx/pycode/pgen2/driver.py | 18 |
2 files changed, 118 insertions, 39 deletions
diff --git a/sphinx/pycode/__init__.py b/sphinx/pycode/__init__.py index e52a231d..f456cfe6 100644 --- a/sphinx/pycode/__init__.py +++ b/sphinx/pycode/__init__.py @@ -11,9 +11,10 @@ import sys from os import path +from cStringIO import StringIO from sphinx.pycode import pytree -from sphinx.pycode.pgen2 import driver, token, parse, literals +from sphinx.pycode.pgen2 import driver, token, tokenize, parse, literals from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc @@ -22,9 +23,12 @@ _grammarfile = path.join(path.dirname(__file__), 'Grammar.txt') pygrammar = driver.load_grammar(_grammarfile) pydriver = driver.Driver(pygrammar, convert=pytree.convert) +# an object with attributes corresponding to token and symbol names class sym: pass for k, v in pygrammar.symbol2number.iteritems(): setattr(sym, k, v) +for k, v in token.tok_name.iteritems(): + setattr(sym, v, k) # a dict mapping terminal and nonterminal numbers to their names number2name = pygrammar.number2symbol.copy() @@ -110,36 +114,29 @@ class PycodeError(Exception): class ModuleAnalyzer(object): - # cache for analyzer objects + # cache for analyzer objects -- caches both by module and file name cache = {} - def __init__(self, tree, modname, srcname): - self.tree = tree - self.modname = modname - self.srcname = srcname - @classmethod def for_string(cls, string, modname, srcname='<string>'): - return cls(pydriver.parse_string(string), modname, srcname) + return cls(StringIO(string), modname, srcname) @classmethod def for_file(cls, filename, modname): + if ('file', filename) in cls.cache: + return cls.cache['file', filename] try: fileobj = open(filename, 'r') except Exception, err: raise PycodeError('error opening %r' % filename, err) - try: - try: - return cls(pydriver.parse_stream(fileobj), modname, filename) - except parse.ParseError, err: - raise PycodeError('error parsing %r' % filename, err) - finally: - fileobj.close() + obj = cls(fileobj, modname, filename) + cls.cache['file', filename] = obj + return obj @classmethod def for_module(cls, modname): - if modname in cls.cache: - return cls.cache[modname] + if ('module', modname) in cls.cache: + return cls.cache['module', modname] if modname not in sys.modules: try: __import__(modname) @@ -152,37 +149,119 @@ class ModuleAnalyzer(object): except Exception, err: raise PycodeError('error getting source for %r' % modname, err) obj = cls.for_string(source, modname) - cls.cache[modname] = obj + cls.cache['module', modname] = obj return obj filename = getattr(mod, '__file__', None) if filename is None: raise PycodeError('no source found for module %r' % modname) - if filename.lower().endswith('.pyo') or \ - filename.lower().endswith('.pyc'): + filename = path.normpath(filename) + lfilename = filename.lower() + if lfilename.endswith('.pyo') or lfilename.endswith('.pyc'): filename = filename[:-1] - elif not filename.lower().endswith('.py'): + elif not lfilename.endswith('.py'): raise PycodeError('source is not a .py file: %r' % filename) if not path.isfile(filename): raise PycodeError('source file is not present: %r' % filename) obj = cls.for_file(filename, modname) - cls.cache[modname] = obj + cls.cache['module', modname] = obj return obj + def __init__(self, source, modname, srcname): + self.modname = modname + self.srcname = srcname + # file-like object yielding source lines + self.source = source + + # will be filled by tokenize() + self.tokens = None + # will be filled by parse() + self.parsetree = None + + def tokenize(self): + """Generate tokens from the source.""" + if self.tokens is not None: + return + self.tokens = list(tokenize.generate_tokens(self.source.readline)) + self.source.close() + + def parse(self): + """Parse the generated source tokens.""" + if self.parsetree is not None: + return + self.tokenize() + self.parsetree = pydriver.parse_tokens(self.tokens) + def find_attr_docs(self, scope=''): + """Find class and module-level attributes and their documentation.""" + self.parse() attr_visitor = AttrDocVisitor(number2name, scope) - attr_visitor.visit(self.tree) + attr_visitor.visit(self.parsetree) return attr_visitor.collected + def find_tags(self): + """Find class, function and method definitions and their location.""" + self.tokenize() + result = {} + namespace = [] + stack = [] + indent = 0 + defline = False + expect_indent = False + def tokeniter(ignore = (token.COMMENT, token.NL)): + for tokentup in self.tokens: + if tokentup[0] not in ignore: + yield tokentup + tokeniter = tokeniter() + for type, tok, spos, epos, line in tokeniter: + if expect_indent: + if type != token.INDENT: + # no suite -- one-line definition + assert stack + dtype, fullname, startline, _ = stack.pop() + endline = epos[0] + namespace.pop() + result[dtype, fullname] = (startline, endline) + expect_indent = False + if tok in ('def', 'class'): + name = tokeniter.next()[1] + namespace.append(name) + fullname = '.'.join(namespace) + stack.append((tok, fullname, spos[0], indent)) + defline = True + elif type == token.INDENT: + expect_indent = False + indent += 1 + elif type == token.DEDENT: + indent -= 1 + # if the stacklevel is the same as it was before the last def/class block, + # this dedent closes that block + if stack and indent == stack[-1][3]: + dtype, fullname, startline, _ = stack.pop() + endline = spos[0] + namespace.pop() + result[dtype, fullname] = (startline, endline) + elif type == token.NEWLINE: + # if this line contained a definition, expect an INDENT to start the + # suite; if there is no such INDENT it's a one-line definition + if defline: + defline = False + expect_indent = True + return result + if __name__ == '__main__': - import time + import time, pprint x0 = time.time() - ma = ModuleAnalyzer.for_file('sphinx/builders/html.py', 'sphinx.builders.html') #ma = ModuleAnalyzer.for_file(__file__.rstrip('c'), 'sphinx.builders.html') + ma = ModuleAnalyzer.for_file('sphinx/builders/html.py', 'sphinx.builders.html') + ma.tokenize() x1 = time.time() - for (ns, name), doc in ma.find_attr_docs().iteritems(): - print '>>', ns, name - print '\n'.join(doc) + ma.parse() x2 = time.time() - #print pytree.nice_repr(ma.tree, number2name) - print "parsing %.4f, finding %.4f" % (x1-x0, x2-x1) + #for (ns, name), doc in ma.find_attr_docs().iteritems(): + # print '>>', ns, name + # print '\n'.join(doc) + pprint.pprint(ma.find_tags()) + x3 = time.time() + #print pytree.nice_repr(ma.parsetree, number2name) + print "tokenizing %.4f, parsing %.4f, finding %.4f" % (x1-x0, x2-x1, x3-x2) diff --git a/sphinx/pycode/pgen2/driver.py b/sphinx/pycode/pgen2/driver.py index 3e9e1043..edc882fa 100644 --- a/sphinx/pycode/pgen2/driver.py +++ b/sphinx/pycode/pgen2/driver.py @@ -42,8 +42,8 @@ class Driver(object): column = 0 type = value = start = end = line_text = None prefix = "" - for quintuple in tokens: - type, value, start, end, line_text = quintuple + opmap = grammar.opmap + for type, value, start, end, line_text in tokens: if start != (lineno, column): assert (lineno, column) <= start, ((lineno, column), start) s_lineno, s_column = start @@ -62,13 +62,13 @@ class Driver(object): column = 0 continue if type == token.OP: - type = grammar.opmap[value] - if debug: - self.logger.debug("%s %r (prefix=%r)", - token.tok_name[type], value, prefix) + type = opmap[value] + # if debug: + # self.logger.debug("%s %r (prefix=%r)", + # token.tok_name[type], value, prefix) if p.addtoken(type, value, (prefix, start)): - if debug: - self.logger.debug("Stop.") + # if debug: + # self.logger.debug("Stop.") break prefix = "" lineno, column = end @@ -77,7 +77,7 @@ class Driver(object): column = 0 else: # We never broke out -- EOF is too soon (how can this happen???) - raise parse.ParseError("incomplete input", t, v, x) + raise parse.ParseError("incomplete input", type, value, line_text) return p.rootnode def parse_stream_raw(self, stream, debug=False): |