diff options
| author | goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2002-12-14 01:38:31 +0000 |
|---|---|---|
| committer | goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2002-12-14 01:38:31 +0000 |
| commit | 47ff214eaec8f59ac08c78614811e12eb9b06fde (patch) | |
| tree | e5aa4cc02cc2a69f42321a9976cc7c746f6f2431 /docutils/readers | |
| parent | b75e46e8a06dd8170c9fbabaee4c4aaa22050e67 (diff) | |
| download | docutils-47ff214eaec8f59ac08c78614811e12eb9b06fde.tar.gz | |
making good progress
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1020 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils/readers')
| -rw-r--r-- | docutils/readers/python/moduleparser.py | 293 |
1 files changed, 199 insertions, 94 deletions
diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py index 9ab3eea79..cbca876a7 100644 --- a/docutils/readers/python/moduleparser.py +++ b/docutils/readers/python/moduleparser.py @@ -12,7 +12,7 @@ Ideas: * Tokenize the module in parallel to extract initial values, comments, etc. * Merge the compiler & tokenize output such that the raw text hangs off of - nodes? Especially assignment expressions (RHS). + nodes. Useful for assignment expressions (RHS). What I'd like to do is to take a module, read in the text, run it through the module parser (using compiler.py and tokenize.py) and produce a high-level AST @@ -79,7 +79,12 @@ The module parser should produce a high-level AST, something like this:: 1 <Docstring> class_attribute's docstring - <Method name="__init__" argnames=['self', ('text', 'None')]> + <Method name="__init__"> + <Parameters> + <Parameter name="self"> + <Parameter name="text"> + <Expression> + None <Docstring> __init__'s docstring <Attribute name="instance_attribute" instance=True> @@ -109,10 +114,10 @@ The module parser should produce a high-level AST, something like this:: <Docstring> f.function_attribute's docstring -compiler.parse() provides most of what's needed for this AST. I think that -"tokenize" can be used to get the rest, and all that's left is to hunker down -and figure out how. We can determine the line number from the -compiler.parse() AST, and a get_rhs(lineno) method would provide the rest. +compiler.parse() provides most of what's needed for this AST, and "tokenize" +can be used to get the rest. We can determine the line number from the +compiler.parse() AST, and the TokenParser.rhs(lineno) method provides the +rest. The Docutils Python reader component will transform this AST into a Python-specific doctree, and then a `stylist transform`_ would further @@ -174,17 +179,17 @@ from types import StringType, UnicodeType def parse_module(module_text, filename): ast = compiler.parse(module_text) - visitor = ModuleVisitor(filename) + token_parser = TokenParser(module_text) + visitor = ModuleVisitor(filename, token_parser) compiler.walk(ast, visitor, walker=visitor) return visitor.module -class ModuleVisitor(ASTVisitor): +class BaseVisitor(ASTVisitor): - def __init__(self, filename): + def __init__(self, token_parser): ASTVisitor.__init__(self) - self.filename = filename - self.module = None + self.token_parser = token_parser self.context = [] self.documentable = None @@ -193,22 +198,12 @@ class ModuleVisitor(ASTVisitor): #print 'in default (%s)' % node.__class__.__name__ #ASTVisitor.default(self, node, *args) - def default_ignore(self, node, *args): - #print 'in default_ignore (%s)' % node.__class__.__name__ + def default_visit(self, node, *args): + #print 'in default_visit (%s)' % node.__class__.__name__ ASTVisitor.default(self, node, *args) - def visitModule(self, node): - #print dir(node) - self.module = module = Module(node, self.filename) - if node.doc is not None: - module.append(Docstring(node, node.doc)) - self.context.append(module) - self.documentable = module - self.visit(node.node) - self.context.pop() - def visitStmt(self, node): - self.default_ignore(node) +class DocstringVisitor(BaseVisitor): def visitDiscard(self, node): if self.documentable: @@ -221,17 +216,14 @@ class ModuleVisitor(ASTVisitor): else: self.documentable = None - def visitImport(self, node): - self.context[-1].append(Import(node, node.names)) - self.documentable = None + def visitStmt(self, node): + self.default_visit(node) - def visitFrom(self, node): - self.context[-1].append( - Import(node, node.names, from_name=node.modname)) - self.documentable = None + +class AssignmentVisitor(DocstringVisitor): def visitAssign(self, node): - visitor = AssignmentVisitor() + visitor = AttributeVisitor(self.token_parser) compiler.walk(node, visitor, walker=visitor) if visitor.attributes: self.context[-1].extend(visitor.attributes) @@ -241,66 +233,111 @@ class ModuleVisitor(ASTVisitor): self.documentable = None -class AssignmentVisitor(ASTVisitor): +class ModuleVisitor(AssignmentVisitor): - """ - Tried reconstructing expressions (the RHS of assignments) by - visiting the compiler.parse() tree, but a lot of information is - missing, like parenthesis-grouping of expressions. + def __init__(self, filename, token_parser): + AssignmentVisitor.__init__(self, token_parser) + self.filename = filename + self.module = None - Gotta do it by parsing tokens. - """ + def visitModule(self, node): + self.module = module = Module(node, self.filename) + if node.doc is not None: + module.append(Docstring(node, node.doc)) + self.context.append(module) + self.documentable = module + self.visit(node.node) + self.context.pop() - def __init__(self): - ASTVisitor.__init__(self) - self.attributes = [] - self.parts = [] + def visitImport(self, node): + self.context[-1].append(Import(node, node.names)) + self.documentable = None - def default(self, node, *args): - print >>sys.stderr, '%s not visited!' % node.__class__.__name__ - ASTVisitor.default(self, node) + def visitFrom(self, node): + self.context[-1].append( + Import(node, node.names, from_name=node.modname)) + self.documentable = None + + def visitFunction(self, node): + visitor = FunctionVisitor(self.token_parser) + compiler.walk(node, visitor, walker=visitor) + self.context[-1].append(visitor.function) + + +class AttributeVisitor(BaseVisitor): + + def __init__(self, token_parser): + BaseVisitor.__init__(self, token_parser) + self.attributes = [] def visitAssign(self, node): - ASTVisitor.default(self, node) - self.attributes[-1].append(Expression(node, ''.join(self.parts))) + # Don't visit the expression itself, just the attribute nodes: + for child in node.nodes: + self.dispatch(child) + expression_text = self.token_parser.rhs(node.lineno) + expression = Expression(node, expression_text) + for attribute in self.attributes: + attribute.append(expression) def visitAssName(self, node): self.attributes.append(Attribute(node, node.name)) - def visitAdd(self, node): - ASTVisitor.default(self, node) - self.parts[-2:] = ' + '.join(self.parts[-2:]) - - def visitAnd(self, node): - ASTVisitor.default(self, node) - self.parts.insert(len(self.parts) - 1, ' and ') + def visitAssTuple(self, node): + attributes = self.attributes + self.attributes = [] + self.default_visit(node) + names = [attribute.name for attribute in self.attributes] + att_tuple = AttributeTuple(node, names) + att_tuple.lineno = self.attributes[0].lineno + self.attributes = attributes + self.attributes.append(att_tuple) - def visitBackquote(self, node): - self.parts.append('`') - ASTVisitor.default(self, node) - self.parts.append('`') + def visitAssAttr(self, node): + self.default_visit(node, node.attrname) - def visitBitand(self, node): - ASTVisitor.default(self, node) - self.parts.insert(len(self.parts) - 1, ' & ') + def visitGetattr(self, node, suffix): + self.default_visit(node, node.attrname + '.' + suffix) - def visitBitor(self, node): - ASTVisitor.default(self, node) - self.parts.insert(len(self.parts) - 1, ' | ') + def visitName(self, node, suffix): + self.attributes.append(Attribute(node, node.name + '.' + suffix)) - def visitBitxor(self, node): - ASTVisitor.default(self, node) - self.parts.insert(len(self.parts) - 1, ' ^ ') - def visitConst(self, node): - self.parts.append(repr(node.value)) +class FunctionVisitor(DocstringVisitor): - def visitConst(self, node): - self.parts.append(repr(node.value)) + def visitFunction(self, node): + self.function = function = Function(node, node.name) + if node.doc is not None: + function.append(Docstring(node, node.doc)) + self.context.append(function) + self.documentable = function + self.parse_parameter_list(node) + self.visit(node.code) + self.context.pop() - def visitInvert(self, node): - self.parts.append('~ ') - ASTVisitor.default(self, node) + def parse_parameter_list(self, node): + parameters = [] + special = [] + argnames = list(node.argnames) + if node.kwargs: + special.append(ExcessKeywordArguments(node, argnames[-1])) + argnames.pop() + if node.varargs: + special.append(ExcessPositionalArguments(node, argnames[-1])) + argnames.pop() + defaults = list(node.defaults) + defaults = [None] * (len(argnames) - len(defaults)) + defaults + for argname, default in zip(argnames, defaults): + parameter = Parameter(node, argname) + if default: + default_text = self.token_parser.default(node.lineno) + parameter.append(Default(node, default_text)) + parameters.append(parameter) + if parameters or special: + special.reverse() + parameters.extend(special) + parameter_list = ParameterList(node) + parameter_list.extend(parameters) + self.function.append(parameter_list) class Node: @@ -395,6 +432,16 @@ class Attribute(Node): return Node.attlist(self, name=self.name) +class AttributeTuple(Node): + + def __init__(self, node, names): + Node.__init__(self, node) + self.names = names + + def attlist(self): + return Node.attlist(self, names=' '.join(self.names)) + + class Expression(Node): def __init__(self, node, text): @@ -404,40 +451,98 @@ class Expression(Node): def __str__(self, indent=' ', level=0): prefix = indent * (level + 1) return '%s%s%s\n' % (Node.__str__(self, indent, level), - prefix, self.text) + prefix, self.text.encode('unicode-escape')) -class TokenReader: +class Function(Attribute): pass + + +class ParameterList(Node): pass + + +class Parameter(Attribute): pass + + +class ExcessPositionalArguments(Parameter): pass + + +class ExcessKeywordArguments(Parameter): pass + + +class Default(Expression): pass + + +class TokenParser: def __init__(self, text): - self.text = text - self.lines = text.splitlines(1) + self.text = text + '\n\n' + self.lines = self.text.splitlines(1) self.generator = tokenize.generate_tokens(iter(self.lines).next) + self.next() def __iter__(self): return self def next(self): - token = self.generator.next() - self.type, self.string, self.start, self.end, self.line = token - return token + self.token = self.generator.next() + self.type, self.string, self.start, self.end, self.line = self.token + return self.token def goto_line(self, lineno): - for token in self: - if self.start[0] >= lineno: - return token - else: - raise IndexError + while self.start[0] < lineno: + self.next() + return token - def rhs(self, name, lineno): + def rhs(self, lineno): + """ + Return a whitespace-normalized expression string from the right-hand + side of an assignment at line `lineno`. + """ self.goto_line(lineno) - while self.start[0] == lineno: - if self.type == token.OP and self.string == '=': - break + while self.string != '=': self.next() - else: - raise IndexError - + while self.type != token.NEWLINE and self.string != ';': + append = 1 + append_ws = 1 + del_ws = 0 + if self.string == '=': + start_row, start_col = self.end + tokens = [] + last_type = None + last_string = None + backquote = 0 + append = 0 + elif self.string == '.': + del_ws = 1 + append_ws = 0 + elif self.string in ('(', '[', '{'): + append_ws = 0 + if self.string in '([' and (last_type == token.NAME or + last_string in (')', ']', '}')): + del_ws = 1 + elif self.string in (')', ']', '}', ':', ','): + del_ws = 1 + elif self.string == '`': + if backquote: + del_ws = 1 + else: + append_ws = 0 + backquote = not backquote + elif self.type == tokenize.NL: + append = 0 + if append: + if del_ws and tokens and tokens[-1] == ' ': + del tokens[-1] + tokens.append(self.string) + last_type = self.type + last_string = self.string + if append_ws: + tokens.append(' ') + self.next() + self.next() + text = ''.join(tokens) + return text.strip() + def trim_docstring(text): """ |
