From 9635f4c1c9ba56761da76c853eaa53bde44655ca Mon Sep 17 00:00:00 2001
From: goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Thu, 5 Dec 2002 02:26:03 +0000
Subject: Parser for Python modules

git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@990 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/readers/python/moduleparser.py | 244 ++++++++++++++++++++++++++++++++
 1 file changed, 244 insertions(+)
 create mode 100644 docutils/readers/python/moduleparser.py

(limited to 'docutils/readers/python/moduleparser.py')

diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
new file mode 100644
index 000000000..a4ef25298
--- /dev/null
+++ b/docutils/readers/python/moduleparser.py
@@ -0,0 +1,244 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+Parser for Python modules.
+
+Ideas:
+
+* Tokenize the module in parallel to extract initial values, comments, etc.
+
+* Merge the compiler & tokenize output such that the raw text hangs off of
+  nodes?  Especially assignment expressions (RHS).
+
+"""
+
+__docformat__ = 'reStructuredText'
+
+import sys
+import compiler
+import compiler.ast
+import compiler.visitor
+from compiler.consts import OP_ASSIGN
+from types import StringType, UnicodeType
+
+
+def parse_module(module_text, filename):
+    ast = compiler.parse(module_text)
+    visitor = ModuleVisitor(filename)
+    compiler.walk(ast, visitor, walker=visitor)
+    return visitor.module
+
+
+class ModuleVisitor(compiler.visitor.ASTVisitor):
+
+    def __init__(self, filename):
+        compiler.visitor.ASTVisitor.__init__(self)
+        self.filename = filename
+        self.module = None
+        self.context = []
+        self.documentable = None
+
+    def default(self, node, *args):
+        self.documentable = None
+        #print 'in default (%s)' % node.__class__.__name__
+        #compiler.visitor.ASTVisitor.default(self, node, *args)
+
+    def default_ignore(self, node, *args):
+        #print 'in default_ignore (%s)' % node.__class__.__name__
+        compiler.visitor.ASTVisitor.default(self, node, *args)
+
+    def visitModule(self, node):
+        #print dir(node)
+        self.module = module = Module(node, self.filename)
+        if node.doc is not None:
+            module.append(Docstring(node, node.doc))
+        self.context.append(module)
+        self.documentable = module
+        self.visit(node.node)
+        self.context.pop()
+
+    def visitStmt(self, node):
+        self.default_ignore(node)
+
+    def visitDiscard(self, node):
+        if self.documentable:
+            self.visit(node.expr)
+
+    def visitConst(self, node):
+        if self.documentable:
+            if type(node.value) in (StringType, UnicodeType):
+                self.documentable.append(Docstring(node, node.value))
+            else:
+                self.documentable = None
+
+    def visitImport(self, node):
+        self.context[-1].append(Import(node, node.names))
+        self.documentable = None
+
+    def visitFrom(self, node):
+        self.context[-1].append(
+            Import(node, node.names, from_name=node.modname))
+        self.documentable = None
+
+    def visitAssign(self, node):
+        visitor = AssignmentVisitor()
+        compiler.walk(node, visitor, walker=visitor)
+        if visitor.attributes:
+            self.context[-1].extend(visitor.attributes)
+        if len(visitor.attributes) == 1:
+            self.documentable = visitor.attributes[0]
+        else:
+            self.documentable = None
+
+
+class AssignmentVisitor(compiler.visitor.ASTVisitor):
+
+    def __init__(self):
+        compiler.visitor.ASTVisitor.__init__(self)
+        self.attributes = []
+
+    def default(self, node, *args):
+        pass
+
+    def visitAssign(self, node):
+        compiler.visitor.ASTVisitor.default(self, node)
+
+    def visitAssName(self, node):
+        self.attributes.append(Attribute(node, node.name))
+
+    def get_rhs(self, node):
+        return "'TBD'"
+
+
+class Node:                             # (compiler.ast.Node)
+
+    def __init__(self, node):
+        self.children = []
+        """List of child nodes."""
+
+        self.lineno = node.lineno
+        """Line number of this node (or ``None``)."""
+
+    def __str__(self, indent='    ', level=0):
+        return ''.join(['%s%s\n' % (indent * level, repr(self))] +
+                       [child.__str__(indent, level+1)
+                        for child in self.children])
+
+    def __repr__(self):
+        parts = [self.__class__.__name__]
+        for name, value in self.attlist():
+            parts.append('%s="%s"' % (name, value))
+        return '<%s>' % ' '.join(parts)
+
+    def attlist(self, **atts):
+        if self.lineno is not None:
+            atts['lineno'] = self.lineno
+        attlist = atts.items()
+        attlist.sort()
+        return attlist
+
+    def append(self, node):
+        self.children.append(node)
+
+    def extend(self, node_list):
+        self.children.extend(node_list)
+
+
+class Module(Node):
+
+    def __init__(self, node, filename):
+        Node.__init__(self, node)
+        self.filename = filename
+
+    def attlist(self):
+        return Node.attlist(self, filename=self.filename)
+
+
+class Docstring(Node):
+
+    def __init__(self, node, text):
+        Node.__init__(self, node)
+        self.text = trim_docstring(text)
+
+    def __str__(self, indent='    ', level=0):
+        prefix = indent * (level + 1)
+        text = '\n'.join([prefix + line for line in self.text.splitlines()])
+        return Node.__str__(self, indent, level) + text + '\n'
+
+
+class Import(Node):
+
+    def __init__(self, node, names, from_name=None):
+        Node.__init__(self, node)
+        self.names = names
+        self.from_name = from_name
+
+    def __str__(self, indent='    ', level=0):
+        prefix = indent * (level + 1)
+        lines = []
+        for name, as in self.names:
+            if as:
+                lines.append('%s%s as %s' % (prefix, name, as))
+            else:
+                lines.append('%s%s' % (prefix, name))
+        text = '\n'.join(lines)
+        return Node.__str__(self, indent, level) + text + '\n'
+
+    def attlist(self):
+        if self.from_name:
+            atts = {'from': self.from_name}
+        else:
+            atts = {}
+        return Node.attlist(self, **atts)
+
+
+class Attribute(Node):
+
+    def __init__(self, node, name):
+        Node.__init__(self, node)
+        self.name = name
+
+    def attlist(self):
+        return Node.attlist(self, name=self.name)
+
+
+class Expression(Node):
+
+    def __init__(self, node, text):
+        Node.__init__(self, node)
+        self.text = text
+    
+
+def trim_docstring(text):
+    """
+    Trim indentation and blank lines from docstring text & return it.
+
+    See PEP 257.
+    """
+    if not text:
+        return ''
+    # Convert tabs to spaces (following the normal Python rules)
+    # and split into a list of lines:
+    lines = text.expandtabs().splitlines()
+    # Determine minimum indentation (first line doesn't count):
+    indent = sys.maxint
+    for line in lines[1:]:
+        stripped = line.lstrip()
+        if stripped:
+            indent = min(indent, len(line) - len(stripped))
+    # Remove indentation (first line is special):
+    trimmed = [lines[0].strip()]
+    if indent < sys.maxint:
+        for line in lines[1:]:
+            trimmed.append(line[indent:].rstrip())
+    # Strip off trailing and leading blank lines:
+    while trimmed and not trimmed[-1]:
+        trimmed.pop()
+    while trimmed and not trimmed[0]:
+        trimmed.pop(0)
+    # Return a single string:
+    return '\n'.join(trimmed)
-- 
cgit v1.2.1


From 241b81278dfcb2caddefea8ce1ed648a48f720d4 Mon Sep 17 00:00:00 2001
From: goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Sat, 7 Dec 2002 03:13:24 +0000
Subject: update

git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1006 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/readers/python/moduleparser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'docutils/readers/python/moduleparser.py')

diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
index a4ef25298..5aab372b2 100644
--- a/docutils/readers/python/moduleparser.py
+++ b/docutils/readers/python/moduleparser.py
@@ -114,7 +114,7 @@ class AssignmentVisitor(compiler.visitor.ASTVisitor):
         return "'TBD'"
 
 
-class Node:                             # (compiler.ast.Node)
+class Node:
 
     def __init__(self, node):
         self.children = []
-- 
cgit v1.2.1


From b75e46e8a06dd8170c9fbabaee4c4aaa22050e67 Mon Sep 17 00:00:00 2001
From: goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Thu, 12 Dec 2002 03:26:55 +0000
Subject: Updated.  Dead-end with AssignmentVisitor reconstructing expressions.
  TokenReader seems to be the way to go.

git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1017 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/readers/python/moduleparser.py | 250 ++++++++++++++++++++++++++++++--
 1 file changed, 238 insertions(+), 12 deletions(-)

(limited to 'docutils/readers/python/moduleparser.py')

diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
index 5aab372b2..9ab3eea79 100644
--- a/docutils/readers/python/moduleparser.py
+++ b/docutils/readers/python/moduleparser.py
@@ -14,6 +14,150 @@ Ideas:
 * Merge the compiler & tokenize output such that the raw text hangs off of
   nodes?  Especially assignment expressions (RHS).
 
+What I'd like to do is to take a module, read in the text, run it through the
+module parser (using compiler.py and tokenize.py) and produce a high-level AST
+full of nodes that are interesting from an auto-documentation standpoint.  For
+example, given this module (x.py)::
+
+    # comment
+
+    '''Docstring'''
+
+    '''Additional docstring'''
+
+    __docformat__ = 'reStructuredText'
+
+    a = 1
+    '''Attribute docstring'''
+
+    class C(Super):
+
+        '''C's docstring'''
+
+        class_attribute = 1
+        '''class_attribute's docstring'''
+
+        def __init__(self, text=None):
+            '''__init__'s docstring'''
+
+            self.instance_attribute = (text * 7
+                                       + ' whaddyaknow')
+            '''instance_attribute's docstring'''
+
+
+    def f(x,                            # parameter x
+          y=a*5,                        # parameter y
+          *args):                       # parameter args
+        '''f's docstring'''
+        return [x + item for item in args]
+
+    f.function_attribute = 1
+    '''f.function_attribute's docstring'''
+
+The module parser should produce a high-level AST, something like this::
+
+    <Module filename="x.py">
+        <Comment lineno=1>
+            comment
+        <Docstring lineno=3>
+            Docstring
+        <Docstring lineno=...>           (I'll leave out the lineno's)
+            Additional docstring
+        <Attribute name="__docformat__">
+            <Expression>
+                'reStructuredText'
+        <Attribute name="a">
+            <Expression>
+                1
+            <Docstring>
+                Attribute docstring
+        <Class name="C" inheritance="Super">
+            <Docstring>
+                C's docstring
+            <Attribute name="class_attribute">
+                <Expression>
+                    1
+                <Docstring>
+                    class_attribute's docstring
+            <Method name="__init__" argnames=['self', ('text', 'None')]>
+                <Docstring>
+                    __init__'s docstring
+                <Attribute name="instance_attribute" instance=True>
+                    <Expression>
+                        (text * 7
+                         + ' whaddyaknow')
+                    <Docstring>
+                        class_attribute's docstring
+        <Function name="f">
+            <Parameters>
+                <Parameter name="x">
+                    <Comment>
+                        # parameter x
+                <Parameter name="y">
+                    <Expression>
+                        a*5
+                    <Comment>
+                        # parameter y
+                <Parameter name="args" varargs=True>
+                    <Comment>
+                        # parameter args
+            <Docstring>
+                f's docstring
+            <Attribute name="function_attribute">
+                <Expression>
+                    1
+                <Docstring>
+                    f.function_attribute's docstring
+
+compiler.parse() provides most of what's needed for this AST.  I think that
+"tokenize" can be used to get the rest, and all that's left is to hunker down
+and figure out how.  We can determine the line number from the
+compiler.parse() AST, and a get_rhs(lineno) method would provide the rest.
+
+The Docutils Python reader component will transform this AST into a
+Python-specific doctree, and then a `stylist transform`_ would further
+transform it into a generic doctree.  Namespaces will have to be compiled for
+each of the scopes, but I'm not certain at what stage of processing.
+
+It's very important to keep all docstring processing out of this, so that it's
+a completely generic and not tool-specific.
+
+> Why perform all of those transformations?  Why not go from the AST to a
+> generic doctree?  Or, even from the AST to the final output?
+
+I want the docutils.readers.python.moduleparser.parse_module() function to
+produce a standard documentation-oriented AST that can be used by any tool.
+We can develop it together without having to compromise on the rest of our
+design (i.e., HappyDoc doesn't have to be made to work like Docutils, and
+vice-versa).  It would be a higher-level version of what compiler.py provides.
+
+The Python reader component transforms this generic AST into a Python-specific
+doctree (it knows about modules, classes, functions, etc.), but this is
+specific to Docutils and cannot be used by HappyDoc or others.  The stylist
+transform does the final layout, converting Python-specific structures
+("class" sections, etc.) into a generic doctree using primitives (tables,
+sections, lists, etc.).  This generic doctree does *not* know about Python
+structures any more.  The advantage is that this doctree can be handed off to
+any of the output writers to create any output format we like.
+
+The latter two transforms are separate because I want to be able to have
+multiple independent layout styles (multiple runtime-selectable "stylist
+transforms").  Each of the existing tools (HappyDoc, pydoc, epydoc, Crystal,
+etc.) has its own fixed format.  I personally don't like the tables-based
+format produced by these tools, and I'd like to be able to customize the
+format easily.  That's the goal of stylist transforms, which are independent
+from the Reader component itself.  One stylist transform could produce
+HappyDoc-like output, another could produce output similar to module docs in
+the Python library reference manual, and so on.
+
+It's for exactly this reason:
+
+>> It's very important to keep all docstring processing out of this, so that
+>> it's a completely generic and not tool-specific.
+
+... but it goes past docstring processing.  It's also important to keep style
+decisions and tool-specific data transforms out of this module parser.
+
 """
 
 __docformat__ = 'reStructuredText'
@@ -21,8 +165,10 @@ __docformat__ = 'reStructuredText'
 import sys
 import compiler
 import compiler.ast
-import compiler.visitor
+import tokenize
+import token
 from compiler.consts import OP_ASSIGN
+from compiler.visitor import ASTVisitor
 from types import StringType, UnicodeType
 
 
@@ -33,10 +179,10 @@ def parse_module(module_text, filename):
     return visitor.module
 
 
-class ModuleVisitor(compiler.visitor.ASTVisitor):
+class ModuleVisitor(ASTVisitor):
 
     def __init__(self, filename):
-        compiler.visitor.ASTVisitor.__init__(self)
+        ASTVisitor.__init__(self)
         self.filename = filename
         self.module = None
         self.context = []
@@ -45,11 +191,11 @@ class ModuleVisitor(compiler.visitor.ASTVisitor):
     def default(self, node, *args):
         self.documentable = None
         #print 'in default (%s)' % node.__class__.__name__
-        #compiler.visitor.ASTVisitor.default(self, node, *args)
+        #ASTVisitor.default(self, node, *args)
 
     def default_ignore(self, node, *args):
         #print 'in default_ignore (%s)' % node.__class__.__name__
-        compiler.visitor.ASTVisitor.default(self, node, *args)
+        ASTVisitor.default(self, node, *args)
 
     def visitModule(self, node):
         #print dir(node)
@@ -95,23 +241,66 @@ class ModuleVisitor(compiler.visitor.ASTVisitor):
             self.documentable = None
 
 
-class AssignmentVisitor(compiler.visitor.ASTVisitor):
+class AssignmentVisitor(ASTVisitor):
+
+    """
+    Tried reconstructing expressions (the RHS of assignments) by
+    visiting the compiler.parse() tree, but a lot of information is
+    missing, like parenthesis-grouping of expressions.
+
+    Gotta do it by parsing tokens.
+    """
 
     def __init__(self):
-        compiler.visitor.ASTVisitor.__init__(self)
+        ASTVisitor.__init__(self)
         self.attributes = []
+        self.parts = []
 
     def default(self, node, *args):
-        pass
+        print >>sys.stderr, '%s not visited!' % node.__class__.__name__
+        ASTVisitor.default(self, node)
 
     def visitAssign(self, node):
-        compiler.visitor.ASTVisitor.default(self, node)
+        ASTVisitor.default(self, node)
+        self.attributes[-1].append(Expression(node, ''.join(self.parts)))
 
     def visitAssName(self, node):
         self.attributes.append(Attribute(node, node.name))
 
-    def get_rhs(self, node):
-        return "'TBD'"
+    def visitAdd(self, node):
+        ASTVisitor.default(self, node)
+        self.parts[-2:] = ' + '.join(self.parts[-2:])
+
+    def visitAnd(self, node):
+        ASTVisitor.default(self, node)
+        self.parts.insert(len(self.parts) - 1, ' and ')
+
+    def visitBackquote(self, node):
+        self.parts.append('`')
+        ASTVisitor.default(self, node)
+        self.parts.append('`')
+
+    def visitBitand(self, node):
+        ASTVisitor.default(self, node)
+        self.parts.insert(len(self.parts) - 1, ' & ')
+
+    def visitBitor(self, node):
+        ASTVisitor.default(self, node)
+        self.parts.insert(len(self.parts) - 1, ' | ')
+
+    def visitBitxor(self, node):
+        ASTVisitor.default(self, node)
+        self.parts.insert(len(self.parts) - 1, ' ^ ')
+
+    def visitConst(self, node):
+        self.parts.append(repr(node.value))
+
+    def visitConst(self, node):
+        self.parts.append(repr(node.value))
+
+    def visitInvert(self, node):
+        self.parts.append('~ ')
+        ASTVisitor.default(self, node)
 
 
 class Node:
@@ -211,7 +400,44 @@ class Expression(Node):
     def __init__(self, node, text):
         Node.__init__(self, node)
         self.text = text
-    
+
+    def __str__(self, indent='    ', level=0):
+        prefix = indent * (level + 1)
+        return '%s%s%s\n' % (Node.__str__(self, indent, level),
+                             prefix, self.text)
+
+
+class TokenReader:
+
+    def __init__(self, text):
+        self.text = text
+        self.lines = text.splitlines(1)
+        self.generator = tokenize.generate_tokens(iter(self.lines).next)
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        token = self.generator.next()
+        self.type, self.string, self.start, self.end, self.line = token
+        return token
+
+    def goto_line(self, lineno):
+        for token in self:
+            if self.start[0] >= lineno:
+                return token
+        else:
+            raise IndexError
+
+    def rhs(self, name, lineno):
+        self.goto_line(lineno)
+        while self.start[0] == lineno:
+            if self.type == token.OP and self.string == '=':
+                break
+            self.next()
+        else:
+            raise IndexError
+        
 
 def trim_docstring(text):
     """
-- 
cgit v1.2.1


From 47ff214eaec8f59ac08c78614811e12eb9b06fde Mon Sep 17 00:00:00 2001
From: goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Sat, 14 Dec 2002 01:38:31 +0000
Subject: making good progress

git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1020 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/readers/python/moduleparser.py | 293 ++++++++++++++++++++++----------
 1 file changed, 199 insertions(+), 94 deletions(-)

(limited to 'docutils/readers/python/moduleparser.py')

diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
index 9ab3eea79..cbca876a7 100644
--- a/docutils/readers/python/moduleparser.py
+++ b/docutils/readers/python/moduleparser.py
@@ -12,7 +12,7 @@ Ideas:
 * Tokenize the module in parallel to extract initial values, comments, etc.
 
 * Merge the compiler & tokenize output such that the raw text hangs off of
-  nodes?  Especially assignment expressions (RHS).
+  nodes.  Useful for assignment expressions (RHS).
 
 What I'd like to do is to take a module, read in the text, run it through the
 module parser (using compiler.py and tokenize.py) and produce a high-level AST
@@ -79,7 +79,12 @@ The module parser should produce a high-level AST, something like this::
                     1
                 <Docstring>
                     class_attribute's docstring
-            <Method name="__init__" argnames=['self', ('text', 'None')]>
+            <Method name="__init__">
+                <Parameters>
+                    <Parameter name="self">
+                    <Parameter name="text">
+                        <Expression>
+                            None
                 <Docstring>
                     __init__'s docstring
                 <Attribute name="instance_attribute" instance=True>
@@ -109,10 +114,10 @@ The module parser should produce a high-level AST, something like this::
                 <Docstring>
                     f.function_attribute's docstring
 
-compiler.parse() provides most of what's needed for this AST.  I think that
-"tokenize" can be used to get the rest, and all that's left is to hunker down
-and figure out how.  We can determine the line number from the
-compiler.parse() AST, and a get_rhs(lineno) method would provide the rest.
+compiler.parse() provides most of what's needed for this AST, and "tokenize"
+can be used to get the rest.  We can determine the line number from the
+compiler.parse() AST, and the TokenParser.rhs(lineno) method provides the
+rest.
 
 The Docutils Python reader component will transform this AST into a
 Python-specific doctree, and then a `stylist transform`_ would further
@@ -174,17 +179,17 @@ from types import StringType, UnicodeType
 
 def parse_module(module_text, filename):
     ast = compiler.parse(module_text)
-    visitor = ModuleVisitor(filename)
+    token_parser = TokenParser(module_text)
+    visitor = ModuleVisitor(filename, token_parser)
     compiler.walk(ast, visitor, walker=visitor)
     return visitor.module
 
 
-class ModuleVisitor(ASTVisitor):
+class BaseVisitor(ASTVisitor):
 
-    def __init__(self, filename):
+    def __init__(self, token_parser):
         ASTVisitor.__init__(self)
-        self.filename = filename
-        self.module = None
+        self.token_parser = token_parser
         self.context = []
         self.documentable = None
 
@@ -193,22 +198,12 @@ class ModuleVisitor(ASTVisitor):
         #print 'in default (%s)' % node.__class__.__name__
         #ASTVisitor.default(self, node, *args)
 
-    def default_ignore(self, node, *args):
-        #print 'in default_ignore (%s)' % node.__class__.__name__
+    def default_visit(self, node, *args):
+        #print 'in default_visit (%s)' % node.__class__.__name__
         ASTVisitor.default(self, node, *args)
 
-    def visitModule(self, node):
-        #print dir(node)
-        self.module = module = Module(node, self.filename)
-        if node.doc is not None:
-            module.append(Docstring(node, node.doc))
-        self.context.append(module)
-        self.documentable = module
-        self.visit(node.node)
-        self.context.pop()
 
-    def visitStmt(self, node):
-        self.default_ignore(node)
+class DocstringVisitor(BaseVisitor):
 
     def visitDiscard(self, node):
         if self.documentable:
@@ -221,17 +216,14 @@ class ModuleVisitor(ASTVisitor):
             else:
                 self.documentable = None
 
-    def visitImport(self, node):
-        self.context[-1].append(Import(node, node.names))
-        self.documentable = None
+    def visitStmt(self, node):
+        self.default_visit(node)
 
-    def visitFrom(self, node):
-        self.context[-1].append(
-            Import(node, node.names, from_name=node.modname))
-        self.documentable = None
+
+class AssignmentVisitor(DocstringVisitor):
 
     def visitAssign(self, node):
-        visitor = AssignmentVisitor()
+        visitor = AttributeVisitor(self.token_parser)
         compiler.walk(node, visitor, walker=visitor)
         if visitor.attributes:
             self.context[-1].extend(visitor.attributes)
@@ -241,66 +233,111 @@ class ModuleVisitor(ASTVisitor):
             self.documentable = None
 
 
-class AssignmentVisitor(ASTVisitor):
+class ModuleVisitor(AssignmentVisitor):
 
-    """
-    Tried reconstructing expressions (the RHS of assignments) by
-    visiting the compiler.parse() tree, but a lot of information is
-    missing, like parenthesis-grouping of expressions.
+    def __init__(self, filename, token_parser):
+        AssignmentVisitor.__init__(self, token_parser)
+        self.filename = filename
+        self.module = None
 
-    Gotta do it by parsing tokens.
-    """
+    def visitModule(self, node):
+        self.module = module = Module(node, self.filename)
+        if node.doc is not None:
+            module.append(Docstring(node, node.doc))
+        self.context.append(module)
+        self.documentable = module
+        self.visit(node.node)
+        self.context.pop()
 
-    def __init__(self):
-        ASTVisitor.__init__(self)
-        self.attributes = []
-        self.parts = []
+    def visitImport(self, node):
+        self.context[-1].append(Import(node, node.names))
+        self.documentable = None
 
-    def default(self, node, *args):
-        print >>sys.stderr, '%s not visited!' % node.__class__.__name__
-        ASTVisitor.default(self, node)
+    def visitFrom(self, node):
+        self.context[-1].append(
+            Import(node, node.names, from_name=node.modname))
+        self.documentable = None
+
+    def visitFunction(self, node):
+        visitor = FunctionVisitor(self.token_parser)
+        compiler.walk(node, visitor, walker=visitor)
+        self.context[-1].append(visitor.function)
+
+
+class AttributeVisitor(BaseVisitor):
+
+    def __init__(self, token_parser):
+        BaseVisitor.__init__(self, token_parser)
+        self.attributes = []
 
     def visitAssign(self, node):
-        ASTVisitor.default(self, node)
-        self.attributes[-1].append(Expression(node, ''.join(self.parts)))
+        # Don't visit the expression itself, just the attribute nodes:
+        for child in node.nodes:
+            self.dispatch(child)
+        expression_text = self.token_parser.rhs(node.lineno)
+        expression = Expression(node, expression_text)
+        for attribute in self.attributes:
+            attribute.append(expression)
 
     def visitAssName(self, node):
         self.attributes.append(Attribute(node, node.name))
 
-    def visitAdd(self, node):
-        ASTVisitor.default(self, node)
-        self.parts[-2:] = ' + '.join(self.parts[-2:])
-
-    def visitAnd(self, node):
-        ASTVisitor.default(self, node)
-        self.parts.insert(len(self.parts) - 1, ' and ')
+    def visitAssTuple(self, node):
+        attributes = self.attributes
+        self.attributes = []
+        self.default_visit(node)
+        names = [attribute.name for attribute in self.attributes]
+        att_tuple = AttributeTuple(node, names)
+        att_tuple.lineno = self.attributes[0].lineno
+        self.attributes = attributes
+        self.attributes.append(att_tuple)
 
-    def visitBackquote(self, node):
-        self.parts.append('`')
-        ASTVisitor.default(self, node)
-        self.parts.append('`')
+    def visitAssAttr(self, node):
+        self.default_visit(node, node.attrname)
 
-    def visitBitand(self, node):
-        ASTVisitor.default(self, node)
-        self.parts.insert(len(self.parts) - 1, ' & ')
+    def visitGetattr(self, node, suffix):
+        self.default_visit(node, node.attrname + '.' + suffix)
 
-    def visitBitor(self, node):
-        ASTVisitor.default(self, node)
-        self.parts.insert(len(self.parts) - 1, ' | ')
+    def visitName(self, node, suffix):
+        self.attributes.append(Attribute(node, node.name + '.' + suffix))
 
-    def visitBitxor(self, node):
-        ASTVisitor.default(self, node)
-        self.parts.insert(len(self.parts) - 1, ' ^ ')
 
-    def visitConst(self, node):
-        self.parts.append(repr(node.value))
+class FunctionVisitor(DocstringVisitor):
 
-    def visitConst(self, node):
-        self.parts.append(repr(node.value))
+    def visitFunction(self, node):
+        self.function = function = Function(node, node.name)
+        if node.doc is not None:
+            function.append(Docstring(node, node.doc))
+        self.context.append(function)
+        self.documentable = function
+        self.parse_parameter_list(node)
+        self.visit(node.code)
+        self.context.pop()
 
-    def visitInvert(self, node):
-        self.parts.append('~ ')
-        ASTVisitor.default(self, node)
+    def parse_parameter_list(self, node):
+        parameters = []
+        special = []
+        argnames = list(node.argnames)
+        if node.kwargs:
+            special.append(ExcessKeywordArguments(node, argnames[-1]))
+            argnames.pop()
+        if node.varargs:
+            special.append(ExcessPositionalArguments(node, argnames[-1]))
+            argnames.pop()
+        defaults = list(node.defaults)
+        defaults = [None] * (len(argnames) - len(defaults)) + defaults
+        for argname, default in zip(argnames, defaults):
+            parameter = Parameter(node, argname)
+            if default:
+                default_text = self.token_parser.default(node.lineno)
+                parameter.append(Default(node, default_text))
+            parameters.append(parameter)
+        if parameters or special:
+            special.reverse()
+            parameters.extend(special)
+            parameter_list = ParameterList(node)
+            parameter_list.extend(parameters)
+            self.function.append(parameter_list)
 
 
 class Node:
@@ -395,6 +432,16 @@ class Attribute(Node):
         return Node.attlist(self, name=self.name)
 
 
+class AttributeTuple(Node):
+
+    def __init__(self, node, names):
+        Node.__init__(self, node)
+        self.names = names
+
+    def attlist(self):
+        return Node.attlist(self, names=' '.join(self.names))
+
+
 class Expression(Node):
 
     def __init__(self, node, text):
@@ -404,40 +451,98 @@ class Expression(Node):
     def __str__(self, indent='    ', level=0):
         prefix = indent * (level + 1)
         return '%s%s%s\n' % (Node.__str__(self, indent, level),
-                             prefix, self.text)
+                             prefix, self.text.encode('unicode-escape'))
 
 
-class TokenReader:
+class Function(Attribute): pass
+
+
+class ParameterList(Node): pass
+
+
+class Parameter(Attribute): pass
+
+
+class ExcessPositionalArguments(Parameter): pass
+
+
+class ExcessKeywordArguments(Parameter): pass
+
+
+class Default(Expression): pass
+
+
+class TokenParser:
 
     def __init__(self, text):
-        self.text = text
-        self.lines = text.splitlines(1)
+        self.text = text + '\n\n'
+        self.lines = self.text.splitlines(1)
         self.generator = tokenize.generate_tokens(iter(self.lines).next)
+        self.next()
 
     def __iter__(self):
         return self
 
     def next(self):
-        token = self.generator.next()
-        self.type, self.string, self.start, self.end, self.line = token
-        return token
+        self.token = self.generator.next()
+        self.type, self.string, self.start, self.end, self.line = self.token
+        return self.token
 
     def goto_line(self, lineno):
-        for token in self:
-            if self.start[0] >= lineno:
-                return token
-        else:
-            raise IndexError
+        while self.start[0] < lineno:
+            self.next()
+        return token
 
-    def rhs(self, name, lineno):
+    def rhs(self, lineno):
+        """
+        Return a whitespace-normalized expression string from the right-hand
+        side of an assignment at line `lineno`.
+        """
         self.goto_line(lineno)
-        while self.start[0] == lineno:
-            if self.type == token.OP and self.string == '=':
-                break
+        while self.string != '=':
             self.next()
-        else:
-            raise IndexError
-        
+        while self.type != token.NEWLINE and self.string != ';':
+            append = 1
+            append_ws = 1
+            del_ws = 0
+            if self.string == '=':
+                start_row, start_col = self.end
+                tokens = []
+                last_type = None
+                last_string = None
+                backquote = 0
+                append = 0
+            elif self.string == '.':
+                del_ws = 1
+                append_ws = 0
+            elif self.string in ('(', '[', '{'):
+                append_ws = 0
+                if self.string in '([' and (last_type == token.NAME or
+                                            last_string in (')', ']', '}')):
+                    del_ws = 1
+            elif self.string in (')', ']', '}', ':', ','):
+                    del_ws = 1
+            elif self.string == '`':
+                if backquote:
+                    del_ws = 1
+                else:
+                    append_ws = 0
+                backquote = not backquote
+            elif self.type == tokenize.NL:
+                append = 0
+            if append:
+                if del_ws and tokens and tokens[-1] == ' ':
+                    del tokens[-1]
+                tokens.append(self.string)
+                last_type = self.type
+                last_string = self.string
+                if append_ws:
+                    tokens.append(' ')
+            self.next()
+        self.next()
+        text = ''.join(tokens)
+        return text.strip()
+
 
 def trim_docstring(text):
     """
-- 
cgit v1.2.1


From 173c7a539ca4e2b3a9f8c3a06f67d4a868fd850c Mon Sep 17 00:00:00 2001
From: goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Wed, 18 Dec 2002 01:44:49 +0000
Subject: More progress; functions done.

git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1027 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/readers/python/moduleparser.py | 197 +++++++++++++++++++++++++-------
 1 file changed, 157 insertions(+), 40 deletions(-)

(limited to 'docutils/readers/python/moduleparser.py')

diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
index cbca876a7..8dcf432b2 100644
--- a/docutils/readers/python/moduleparser.py
+++ b/docutils/readers/python/moduleparser.py
@@ -163,6 +163,14 @@ It's for exactly this reason:
 ... but it goes past docstring processing.  It's also important to keep style
 decisions and tool-specific data transforms out of this module parser.
 
+
+Issues
+======
+
+* At what point should namespaces be computed?  Should they be part of the
+  basic AST produced by the ASTVisitor walk, or generated by another tree
+  traversal?
+
 """
 
 __docformat__ = 'reStructuredText'
@@ -174,7 +182,7 @@ import tokenize
 import token
 from compiler.consts import OP_ASSIGN
 from compiler.visitor import ASTVisitor
-from types import StringType, UnicodeType
+from types import StringType, UnicodeType, TupleType
 
 
 def parse_module(module_text, filename):
@@ -304,7 +312,13 @@ class AttributeVisitor(BaseVisitor):
 
 class FunctionVisitor(DocstringVisitor):
 
+    in_function = 0
+
     def visitFunction(self, node):
+        if self.in_function:
+            # Don't bother with nested function definitions.
+            return
+        self.in_function = 1
         self.function = function = Function(node, node.name)
         if node.doc is not None:
             function.append(Docstring(node, node.doc))
@@ -326,11 +340,17 @@ class FunctionVisitor(DocstringVisitor):
             argnames.pop()
         defaults = list(node.defaults)
         defaults = [None] * (len(argnames) - len(defaults)) + defaults
+        function_parameters = self.token_parser.function_parameters(
+            node.lineno)
+        #print >>sys.stderr, function_parameters
         for argname, default in zip(argnames, defaults):
-            parameter = Parameter(node, argname)
+            if type(argname) is TupleType:
+                parameter = ParameterTuple(node, argname)
+                argname = normalize_parameter_name(argname)
+            else:
+                parameter = Parameter(node, argname)
             if default:
-                default_text = self.token_parser.default(node.lineno)
-                parameter.append(Default(node, default_text))
+                parameter.append(Default(node, function_parameters[argname]))
             parameters.append(parameter)
         if parameters or special:
             special.reverse()
@@ -463,6 +483,12 @@ class ParameterList(Node): pass
 class Parameter(Attribute): pass
 
 
+class ParameterTuple(AttributeTuple):
+
+    def attlist(self):
+        return Node.attlist(self, names=normalize_parameter_name(self.names))
+
+
 class ExcessPositionalArguments(Parameter): pass
 
 
@@ -502,47 +528,129 @@ class TokenParser:
         while self.string != '=':
             self.next()
         while self.type != token.NEWLINE and self.string != ';':
-            append = 1
-            append_ws = 1
-            del_ws = 0
             if self.string == '=':
-                start_row, start_col = self.end
-                tokens = []
-                last_type = None
-                last_string = None
-                backquote = 0
-                append = 0
-            elif self.string == '.':
-                del_ws = 1
-                append_ws = 0
-            elif self.string in ('(', '[', '{'):
-                append_ws = 0
-                if self.string in '([' and (last_type == token.NAME or
-                                            last_string in (')', ']', '}')):
-                    del_ws = 1
-            elif self.string in (')', ']', '}', ':', ','):
-                    del_ws = 1
-            elif self.string == '`':
-                if backquote:
-                    del_ws = 1
-                else:
-                    append_ws = 0
-                backquote = not backquote
-            elif self.type == tokenize.NL:
-                append = 0
-            if append:
-                if del_ws and tokens and tokens[-1] == ' ':
-                    del tokens[-1]
-                tokens.append(self.string)
-                last_type = self.type
-                last_string = self.string
-                if append_ws:
-                    tokens.append(' ')
+                self.tokens = []
+                self.stack = []
+                self._type = None
+                self._string = None
+                self._backquote = 0
+            else:
+                self.note_token()
             self.next()
         self.next()
-        text = ''.join(tokens)
+        text = ''.join(self.tokens)
         return text.strip()
 
+    openers = {')': '(', ']': '[', '}': '{'}
+
+    def note_token(self):
+        append = 1
+        append_ws = 1
+        del_ws = 0
+        if self.string == '.':
+            del_ws = 1
+            append_ws = 0
+        elif self.string in ('(', '[', '{'):
+            append_ws = 0
+            if self.string in '([' and (self._type == token.NAME or
+                                        self._string in (')', ']', '}')):
+                del_ws = 1
+            self.stack.append(self.string)
+        elif self.string in (')', ']', '}'):
+            del_ws = 1
+            assert self.stack[-1] == self.openers[self.string]
+            self.stack.pop()
+        elif self.string in (':', ','):
+            del_ws = 1
+        elif self.string == '`':
+            if self._backquote:
+                del_ws = 1
+                assert self.stack[-1] == self.string
+                self.stack.pop()
+            else:
+                append_ws = 0
+                self.stack.append(self.string)
+            self._backquote = not self._backquote
+        elif self.type == tokenize.NL:
+            append = 0
+        if append:
+            if del_ws and self.tokens and self.tokens[-1] == ' ':
+                del self.tokens[-1]
+            self.tokens.append(self.string)
+            self._type = self.type
+            self._string = self.string
+            if append_ws:
+                self.tokens.append(' ')
+
+    def function_parameters(self, lineno):
+        """
+        Return a dictionary mapping parameters to defaults
+        (whitespace-normalized strings).
+        """
+        self.goto_line(lineno)
+        while self.string != 'def':
+            self.next()
+        while self.string != '(':
+            self.next()
+        name = None
+        default = None
+        parameter_tuple = None
+        self.tokens = []
+        parameters = {}
+        self.stack = [self.string]
+        self.next()
+        while 1:
+            if len(self.stack) == 1:
+                if parameter_tuple:
+                    # Just encountered ")".
+                    #print >>sys.stderr, 'parameter_tuple: %r' % self.tokens
+                    name = ''.join(self.tokens).strip()
+                    self.tokens = []
+                    parameter_tuple = None
+                if self.string in (')', ','):
+                    if name:
+                        if self.tokens:
+                            default_text = ''.join(self.tokens).strip()
+                        else:
+                            default_text = None
+                        parameters[name] = default_text
+                        self.tokens = []
+                        name = None
+                        default = None
+                    if self.string == ')':
+                        break
+                elif self.type == token.NAME:
+                    if name and default:
+                        self.note_token()
+                    else:
+                        assert name is None, (
+                            'token=%r name=%r parameters=%r stack=%r'
+                            % (self.token, name, parameters, self.stack))
+                        name = self.string
+                        #print >>sys.stderr, 'name=%r' % name
+                elif self.string == '=':
+                    assert name is not None, 'token=%r' % (self.token,)
+                    assert default is None, 'token=%r' % (self.token,)
+                    assert self.tokens == [], 'token=%r' % (self.token,)
+                    default = 1
+                    self._type = None
+                    self._string = None
+                    self._backquote = 0
+                elif name:
+                    self.note_token()
+                elif self.string == '(':
+                    parameter_tuple = 1
+                    self._type = None
+                    self._string = None
+                    self._backquote = 0
+                    self.note_token()
+                else:                   # ignore these tokens:
+                    assert self.string in ('*', '**', '\n'), (
+                        'token=%r' % (self.token,))
+            else:
+                self.note_token()
+            self.next()
+        return parameters
 
 def trim_docstring(text):
     """
@@ -573,3 +681,12 @@ def trim_docstring(text):
         trimmed.pop(0)
     # Return a single string:
     return '\n'.join(trimmed)
+
+def normalize_parameter_name(name):
+    """
+    Converts a tuple like ``('a', ('b', 'c'), 'd')`` into ``'(a, (b, c), d)'``
+    """
+    if type(name) is TupleType:
+        return '(%s)' % ', '.join([normalize_parameter_name(n) for n in name])
+    else:
+        return name
-- 
cgit v1.2.1


From 34762d707e5fbe502b302027e4318c8301d7a34d Mon Sep 17 00:00:00 2001
From: goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Thu, 19 Dec 2002 01:08:01 +0000
Subject: Added classes & methods.

git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1032 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/readers/python/moduleparser.py | 464 +++++++++++++++++++-------------
 1 file changed, 277 insertions(+), 187 deletions(-)

(limited to 'docutils/readers/python/moduleparser.py')

diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
index 8dcf432b2..dee7810ad 100644
--- a/docutils/readers/python/moduleparser.py
+++ b/docutils/readers/python/moduleparser.py
@@ -7,17 +7,11 @@
 """
 Parser for Python modules.
 
-Ideas:
-
-* Tokenize the module in parallel to extract initial values, comments, etc.
-
-* Merge the compiler & tokenize output such that the raw text hangs off of
-  nodes.  Useful for assignment expressions (RHS).
-
-What I'd like to do is to take a module, read in the text, run it through the
-module parser (using compiler.py and tokenize.py) and produce a high-level AST
-full of nodes that are interesting from an auto-documentation standpoint.  For
-example, given this module (x.py)::
+The `parse_module()` function takes a module's text and file name, runs it
+through the module parser (using compiler.py and tokenize.py) and produces a
+"module documentation tree": a high-level AST full of nodes that are
+interesting from an auto-documentation standpoint.  For example, given this
+module (x.py)::
 
     # comment
 
@@ -54,75 +48,77 @@ example, given this module (x.py)::
     f.function_attribute = 1
     '''f.function_attribute's docstring'''
 
-The module parser should produce a high-level AST, something like this::
+The module parser will produce this module documentation tree::
 
-    <Module filename="x.py">
+    <Module filename="test data">
         <Comment lineno=1>
             comment
-        <Docstring lineno=3>
+        <Docstring>
             Docstring
-        <Docstring lineno=...>           (I'll leave out the lineno's)
+        <Docstring lineno="5">
             Additional docstring
-        <Attribute name="__docformat__">
-            <Expression>
+        <Attribute lineno="7" name="__docformat__">
+            <Expression lineno="7">
                 'reStructuredText'
-        <Attribute name="a">
-            <Expression>
+        <Attribute lineno="9" name="a">
+            <Expression lineno="9">
                 1
-            <Docstring>
+            <Docstring lineno="10">
                 Attribute docstring
-        <Class name="C" inheritance="Super">
-            <Docstring>
+        <Class bases="Super" lineno="12" name="C">
+            <Docstring lineno="12">
                 C's docstring
-            <Attribute name="class_attribute">
-                <Expression>
+            <Attribute lineno="16" name="class_attribute">
+                <Expression lineno="16">
                     1
-                <Docstring>
+                <Docstring lineno="17">
                     class_attribute's docstring
-            <Method name="__init__">
-                <Parameters>
-                    <Parameter name="self">
-                    <Parameter name="text">
-                        <Expression>
-                            None
-                <Docstring>
+            <Method lineno="19" name="__init__">
+                <Docstring lineno="19">
                     __init__'s docstring
-                <Attribute name="instance_attribute" instance=True>
-                    <Expression>
-                        (text * 7
-                         + ' whaddyaknow')
-                    <Docstring>
-                        class_attribute's docstring
-        <Function name="f">
-            <Parameters>
-                <Parameter name="x">
+                <ParameterList lineno="19">
+                    <Parameter lineno="19" name="self">
+                    <Parameter lineno="19" name="text">
+                        <Default lineno="19">
+                            None
+                <Attribute lineno="22" name="self.instance_attribute">
+                    <Expression lineno="22">
+                        (text * 7 + ' whaddyaknow')
+                    <Docstring lineno="24">
+                        instance_attribute's docstring
+        <Function lineno="27" name="f">
+            <Docstring lineno="27">
+                f's docstring
+            <ParameterList lineno="27">
+                <Parameter lineno="27" name="x">
                     <Comment>
                         # parameter x
-                <Parameter name="y">
-                    <Expression>
-                        a*5
+                <Parameter lineno="27" name="y">
+                    <Default lineno="27">
+                        a * 5
                     <Comment>
                         # parameter y
-                <Parameter name="args" varargs=True>
+                <ExcessPositionalArguments lineno="27" name="args">
                     <Comment>
                         # parameter args
-            <Docstring>
-                f's docstring
-            <Attribute name="function_attribute">
-                <Expression>
-                    1
-                <Docstring>
-                    f.function_attribute's docstring
+        <Attribute lineno="33" name="f.function_attribute">
+            <Expression lineno="33">
+                1
+            <Docstring lineno="34">
+                f.function_attribute's docstring
+
+(Comments are not implemented yet.)
 
-compiler.parse() provides most of what's needed for this AST, and "tokenize"
-can be used to get the rest.  We can determine the line number from the
-compiler.parse() AST, and the TokenParser.rhs(lineno) method provides the
+compiler.parse() provides most of what's needed for this doctree, and
+"tokenize" can be used to get the rest.  We can determine the line number from
+the compiler.parse() AST, and the TokenParser.rhs(lineno) method provides the
 rest.
 
-The Docutils Python reader component will transform this AST into a
-Python-specific doctree, and then a `stylist transform`_ would further
-transform it into a generic doctree.  Namespaces will have to be compiled for
-each of the scopes, but I'm not certain at what stage of processing.
+The Docutils Python reader component will transform this module doctree into a
+Python-specific Docutils doctree, and then a `stylist transform`_ will
+further transform it into a generic doctree.  Namespaces will have to be
+compiled for each of the scopes, but I'm not certain at what stage of
+processing.
 
 It's very important to keep all docstring processing out of this, so that it's
 a completely generic and not tool-specific.
@@ -131,7 +127,7 @@ a completely generic and not tool-specific.
 > generic doctree?  Or, even from the AST to the final output?
 
 I want the docutils.readers.python.moduleparser.parse_module() function to
-produce a standard documentation-oriented AST that can be used by any tool.
+produce a standard documentation-oriented tree that can be used by any tool.
 We can develop it together without having to compromise on the rest of our
 design (i.e., HappyDoc doesn't have to be made to work like Docutils, and
 vice-versa).  It would be a higher-level version of what compiler.py provides.
@@ -171,6 +167,17 @@ Issues
   basic AST produced by the ASTVisitor walk, or generated by another tree
   traversal?
 
+* At what point should a distinction be made between local variables &
+  instance attributes in __init__ methods?
+
+* Docstrings are getting their lineno from their parents.  Should the
+  TokenParser find the real line no's?
+
+* Comments: include them?  How and when?  Only full-line comments, or
+  parameter comments too?  (See function "f" above for an example.)
+
+* Module could use more docstrings & refactoring in places.
+
 """
 
 __docformat__ = 'reStructuredText'
@@ -186,6 +193,7 @@ from types import StringType, UnicodeType, TupleType
 
 
 def parse_module(module_text, filename):
+    """Return a module documentation tree from `module_text`."""
     ast = compiler.parse(module_text)
     token_parser = TokenParser(module_text)
     visitor = ModuleVisitor(filename, token_parser)
@@ -193,6 +201,161 @@ def parse_module(module_text, filename):
     return visitor.module
 
 
+class Node:
+
+    def __init__(self, node):
+        self.children = []
+        """List of child nodes."""
+
+        self.lineno = node.lineno
+        """Line number of this node (or ``None``)."""
+
+    def __str__(self, indent='    ', level=0):
+        return ''.join(['%s%s\n' % (indent * level, repr(self))] +
+                       [child.__str__(indent, level+1)
+                        for child in self.children])
+
+    def __repr__(self):
+        parts = [self.__class__.__name__]
+        for name, value in self.attlist():
+            parts.append('%s="%s"' % (name, value))
+        return '<%s>' % ' '.join(parts)
+
+    def attlist(self, **atts):
+        if self.lineno is not None:
+            atts['lineno'] = self.lineno
+        attlist = atts.items()
+        attlist.sort()
+        return attlist
+
+    def append(self, node):
+        self.children.append(node)
+
+    def extend(self, node_list):
+        self.children.extend(node_list)
+
+
+class Module(Node):
+
+    def __init__(self, node, filename):
+        Node.__init__(self, node)
+        self.filename = filename
+
+    def attlist(self):
+        return Node.attlist(self, filename=self.filename)
+
+
+class Docstring(Node):
+
+    def __init__(self, node, text):
+        Node.__init__(self, node)
+        self.text = trim_docstring(text)
+
+    def __str__(self, indent='    ', level=0):
+        prefix = indent * (level + 1)
+        text = '\n'.join([prefix + line for line in self.text.splitlines()])
+        return Node.__str__(self, indent, level) + text + '\n'
+
+
+class Import(Node):
+
+    def __init__(self, node, names, from_name=None):
+        Node.__init__(self, node)
+        self.names = names
+        self.from_name = from_name
+
+    def __str__(self, indent='    ', level=0):
+        prefix = indent * (level + 1)
+        lines = []
+        for name, as in self.names:
+            if as:
+                lines.append('%s%s as %s' % (prefix, name, as))
+            else:
+                lines.append('%s%s' % (prefix, name))
+        text = '\n'.join(lines)
+        return Node.__str__(self, indent, level) + text + '\n'
+
+    def attlist(self):
+        if self.from_name:
+            atts = {'from': self.from_name}
+        else:
+            atts = {}
+        return Node.attlist(self, **atts)
+
+
+class Attribute(Node):
+
+    def __init__(self, node, name):
+        Node.__init__(self, node)
+        self.name = name
+
+    def attlist(self):
+        return Node.attlist(self, name=self.name)
+
+
+class AttributeTuple(Node):
+
+    def __init__(self, node, names):
+        Node.__init__(self, node)
+        self.names = names
+
+    def attlist(self):
+        return Node.attlist(self, names=' '.join(self.names))
+
+
+class Expression(Node):
+
+    def __init__(self, node, text):
+        Node.__init__(self, node)
+        self.text = text
+
+    def __str__(self, indent='    ', level=0):
+        prefix = indent * (level + 1)
+        return '%s%s%s\n' % (Node.__str__(self, indent, level),
+                             prefix, self.text.encode('unicode-escape'))
+
+
+class Function(Attribute): pass
+
+
+class ParameterList(Node): pass
+
+
+class Parameter(Attribute): pass
+
+
+class ParameterTuple(AttributeTuple):
+
+    def attlist(self):
+        return Node.attlist(self, names=normalize_parameter_name(self.names))
+
+
+class ExcessPositionalArguments(Parameter): pass
+
+
+class ExcessKeywordArguments(Parameter): pass
+
+
+class Default(Expression): pass
+
+
+class Class(Node):
+
+    def __init__(self, node, name, bases=None):
+        Node.__init__(self, node)
+        self.name = name
+        self.bases = bases or []
+
+    def attlist(self):
+        atts = {'name': self.name}
+        if self.bases:
+            atts['bases'] = ' '.join(self.bases)
+        return Node.attlist(self, **atts)
+
+
+class Method(Function): pass
+
+
 class BaseVisitor(ASTVisitor):
 
     def __init__(self, token_parser):
@@ -271,6 +434,11 @@ class ModuleVisitor(AssignmentVisitor):
         compiler.walk(node, visitor, walker=visitor)
         self.context[-1].append(visitor.function)
 
+    def visitClass(self, node):
+        visitor = ClassVisitor(self.token_parser)
+        compiler.walk(node, visitor, walker=visitor)
+        self.context[-1].append(visitor.klass)
+
 
 class AttributeVisitor(BaseVisitor):
 
@@ -313,13 +481,15 @@ class AttributeVisitor(BaseVisitor):
 class FunctionVisitor(DocstringVisitor):
 
     in_function = 0
+    function_class = Function
 
     def visitFunction(self, node):
         if self.in_function:
+            self.documentable = None
             # Don't bother with nested function definitions.
             return
         self.in_function = 1
-        self.function = function = Function(node, node.name)
+        self.function = function = self.function_class(node, node.name)
         if node.doc is not None:
             function.append(Docstring(node, node.doc))
         self.context.append(function)
@@ -360,142 +530,61 @@ class FunctionVisitor(DocstringVisitor):
             self.function.append(parameter_list)
 
 
-class Node:
-
-    def __init__(self, node):
-        self.children = []
-        """List of child nodes."""
-
-        self.lineno = node.lineno
-        """Line number of this node (or ``None``)."""
-
-    def __str__(self, indent='    ', level=0):
-        return ''.join(['%s%s\n' % (indent * level, repr(self))] +
-                       [child.__str__(indent, level+1)
-                        for child in self.children])
+class ClassVisitor(AssignmentVisitor):
 
-    def __repr__(self):
-        parts = [self.__class__.__name__]
-        for name, value in self.attlist():
-            parts.append('%s="%s"' % (name, value))
-        return '<%s>' % ' '.join(parts)
+    in_class = 0
 
-    def attlist(self, **atts):
-        if self.lineno is not None:
-            atts['lineno'] = self.lineno
-        attlist = atts.items()
-        attlist.sort()
-        return attlist
-
-    def append(self, node):
-        self.children.append(node)
-
-    def extend(self, node_list):
-        self.children.extend(node_list)
-
-
-class Module(Node):
-
-    def __init__(self, node, filename):
-        Node.__init__(self, node)
-        self.filename = filename
-
-    def attlist(self):
-        return Node.attlist(self, filename=self.filename)
-
-
-class Docstring(Node):
-
-    def __init__(self, node, text):
-        Node.__init__(self, node)
-        self.text = trim_docstring(text)
-
-    def __str__(self, indent='    ', level=0):
-        prefix = indent * (level + 1)
-        text = '\n'.join([prefix + line for line in self.text.splitlines()])
-        return Node.__str__(self, indent, level) + text + '\n'
-
-
-class Import(Node):
-
-    def __init__(self, node, names, from_name=None):
-        Node.__init__(self, node)
-        self.names = names
-        self.from_name = from_name
+    def __init__(self, token_parser):
+        AssignmentVisitor.__init__(self, token_parser)
+        self.bases = []
 
-    def __str__(self, indent='    ', level=0):
-        prefix = indent * (level + 1)
-        lines = []
-        for name, as in self.names:
-            if as:
-                lines.append('%s%s as %s' % (prefix, name, as))
-            else:
-                lines.append('%s%s' % (prefix, name))
-        text = '\n'.join(lines)
-        return Node.__str__(self, indent, level) + text + '\n'
+    def visitClass(self, node):
+        if self.in_class:
+            self.documentable = None
+            # Don't bother with nested class definitions.
+            return
+        self.in_class = 1
+        #import mypdb as pdb
+        #pdb.set_trace()
+        for base in node.bases:
+            self.visit(base)
+        self.klass = klass = Class(node, node.name, self.bases)
+        if node.doc is not None:
+            klass.append(Docstring(node, node.doc))
+        self.context.append(klass)
+        self.documentable = klass
+        self.visit(node.code)
+        self.context.pop()
 
-    def attlist(self):
-        if self.from_name:
-            atts = {'from': self.from_name}
+    def visitGetattr(self, node, suffix=None):
+        if suffix:
+            name = node.attrname + '.' + suffix
         else:
-            atts = {}
-        return Node.attlist(self, **atts)
-
-
-class Attribute(Node):
-
-    def __init__(self, node, name):
-        Node.__init__(self, node)
-        self.name = name
-
-    def attlist(self):
-        return Node.attlist(self, name=self.name)
-
-
-class AttributeTuple(Node):
-
-    def __init__(self, node, names):
-        Node.__init__(self, node)
-        self.names = names
+            name = node.attrname
+        self.default_visit(node, name)
 
-    def attlist(self):
-        return Node.attlist(self, names=' '.join(self.names))
-
-
-class Expression(Node):
-
-    def __init__(self, node, text):
-        Node.__init__(self, node)
-        self.text = text
-
-    def __str__(self, indent='    ', level=0):
-        prefix = indent * (level + 1)
-        return '%s%s%s\n' % (Node.__str__(self, indent, level),
-                             prefix, self.text.encode('unicode-escape'))
-
-
-class Function(Attribute): pass
-
-
-class ParameterList(Node): pass
-
-
-class Parameter(Attribute): pass
-
-
-class ParameterTuple(AttributeTuple):
-
-    def attlist(self):
-        return Node.attlist(self, names=normalize_parameter_name(self.names))
+    def visitName(self, node, suffix=None):
+        if suffix:
+            name = node.name + '.' + suffix
+        else:
+            name = node.name
+        self.bases.append(name)
 
+    def visitFunction(self, node):
+        if node.name == '__init__':
+            visitor = InitMethodVisitor(self.token_parser)
+        else:
+            visitor = MethodVisitor(self.token_parser)
+        compiler.walk(node, visitor, walker=visitor)
+        self.context[-1].append(visitor.function)
 
-class ExcessPositionalArguments(Parameter): pass
 
+class MethodVisitor(FunctionVisitor):
 
-class ExcessKeywordArguments(Parameter): pass
+    function_class = Method
 
 
-class Default(Expression): pass
+class InitMethodVisitor(MethodVisitor, AssignmentVisitor): pass
 
 
 class TokenParser:
@@ -645,7 +734,8 @@ class TokenParser:
                     self._backquote = 0
                     self.note_token()
                 else:                   # ignore these tokens:
-                    assert self.string in ('*', '**', '\n'), (
+                    assert (self.string in ('*', '**', '\n') 
+                            or self.type == tokenize.COMMENT), (
                         'token=%r' % (self.token,))
             else:
                 self.note_token()
@@ -659,7 +749,7 @@ def trim_docstring(text):
     See PEP 257.
     """
     if not text:
-        return ''
+        return text
     # Convert tabs to spaces (following the normal Python rules)
     # and split into a list of lines:
     lines = text.expandtabs().splitlines()
-- 
cgit v1.2.1


From 6c7003a050d919f5b10d85b28c0dcafc45f4686f Mon Sep 17 00:00:00 2001
From: goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Thu, 19 Dec 2002 04:40:45 +0000
Subject: fixed RHS parse bug, found by Richard Jones

git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1035 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/readers/python/moduleparser.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'docutils/readers/python/moduleparser.py')

diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
index dee7810ad..15f05a869 100644
--- a/docutils/readers/python/moduleparser.py
+++ b/docutils/readers/python/moduleparser.py
@@ -616,8 +616,9 @@ class TokenParser:
         self.goto_line(lineno)
         while self.string != '=':
             self.next()
+        self.stack = None
         while self.type != token.NEWLINE and self.string != ';':
-            if self.string == '=':
+            if self.string == '=' and not self.stack:
                 self.tokens = []
                 self.stack = []
                 self._type = None
-- 
cgit v1.2.1


From acb0feec0ebec5110c719a661b253cd37bf9a721 Mon Sep 17 00:00:00 2001
From: goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Sun, 29 Dec 2002 18:37:18 +0000
Subject: refactored a bit

git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1045 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/readers/python/moduleparser.py | 81 ++++++++++++++++-----------------
 1 file changed, 39 insertions(+), 42 deletions(-)

(limited to 'docutils/readers/python/moduleparser.py')

diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
index 15f05a869..2262ddbd5 100644
--- a/docutils/readers/python/moduleparser.py
+++ b/docutils/readers/python/moduleparser.py
@@ -235,6 +235,18 @@ class Node:
         self.children.extend(node_list)
 
 
+class TextNode(Node):
+
+    def __init__(self, node, text):
+        Node.__init__(self, node)
+        self.text = trim_docstring(text)
+
+    def __str__(self, indent='    ', level=0):
+        prefix = indent * (level + 1)
+        text = '\n'.join([prefix + line for line in self.text.splitlines()])
+        return Node.__str__(self, indent, level) + text + '\n'
+
+
 class Module(Node):
 
     def __init__(self, node, filename):
@@ -245,16 +257,10 @@ class Module(Node):
         return Node.attlist(self, filename=self.filename)
 
 
-class Docstring(Node):
+class Docstring(TextNode): pass
 
-    def __init__(self, node, text):
-        Node.__init__(self, node)
-        self.text = trim_docstring(text)
 
-    def __str__(self, indent='    ', level=0):
-        prefix = indent * (level + 1)
-        text = '\n'.join([prefix + line for line in self.text.splitlines()])
-        return Node.__str__(self, indent, level) + text + '\n'
+class Comment(TextNode): pass
 
 
 class Import(Node):
@@ -303,11 +309,7 @@ class AttributeTuple(Node):
         return Node.attlist(self, names=' '.join(self.names))
 
 
-class Expression(Node):
-
-    def __init__(self, node, text):
-        Node.__init__(self, node)
-        self.text = text
+class Expression(TextNode):
 
     def __str__(self, indent='    ', level=0):
         prefix = indent * (level + 1)
@@ -631,46 +633,40 @@ class TokenParser:
         text = ''.join(self.tokens)
         return text.strip()
 
-    openers = {')': '(', ']': '[', '}': '{'}
+    closers = {')': '(', ']': '[', '}': '{'}
+    openers = {'(': 1, '[': 1, '{': 1}
+    del_ws_prefix = {'.': 1, '=': 1, ')': 1, ']': 1, '}': 1, ':': 1, ',': 1}
+    no_ws_suffix = {'.': 1, '=': 1, '(': 1, '[': 1, '{': 1}
 
     def note_token(self):
-        append = 1
-        append_ws = 1
-        del_ws = 0
-        if self.string == '.':
-            del_ws = 1
-            append_ws = 0
-        elif self.string in ('(', '[', '{'):
-            append_ws = 0
-            if self.string in '([' and (self._type == token.NAME or
-                                        self._string in (')', ']', '}')):
-                del_ws = 1
+        if self.type == tokenize.NL:
+            return
+        del_ws = self.del_ws_prefix.has_key(self.string)
+        append_ws = not self.no_ws_suffix.has_key(self.string)
+        if self.openers.has_key(self.string):
             self.stack.append(self.string)
-        elif self.string in (')', ']', '}'):
-            del_ws = 1
-            assert self.stack[-1] == self.openers[self.string]
+            if (self._type == token.NAME
+                or self.closers.has_key(self._string)):
+                del_ws = 1
+        elif self.closers.has_key(self.string):
+            assert self.stack[-1] == self.closers[self.string]
             self.stack.pop()
-        elif self.string in (':', ','):
-            del_ws = 1
         elif self.string == '`':
             if self._backquote:
                 del_ws = 1
-                assert self.stack[-1] == self.string
+                assert self.stack[-1] == '`'
                 self.stack.pop()
             else:
                 append_ws = 0
-                self.stack.append(self.string)
+                self.stack.append('`')
             self._backquote = not self._backquote
-        elif self.type == tokenize.NL:
-            append = 0
-        if append:
-            if del_ws and self.tokens and self.tokens[-1] == ' ':
-                del self.tokens[-1]
-            self.tokens.append(self.string)
-            self._type = self.type
-            self._string = self.string
-            if append_ws:
-                self.tokens.append(' ')
+        if del_ws and self.tokens and self.tokens[-1] == ' ':
+            del self.tokens[-1]
+        self.tokens.append(self.string)
+        self._type = self.type
+        self._string = self.string
+        if append_ws:
+            self.tokens.append(' ')
 
     def function_parameters(self, lineno):
         """
@@ -743,6 +739,7 @@ class TokenParser:
             self.next()
         return parameters
 
+
 def trim_docstring(text):
     """
     Trim indentation and blank lines from docstring text & return it.
-- 
cgit v1.2.1


From df2ba997c490308d849dfc5c67f3ed550dd8138c Mon Sep 17 00:00:00 2001
From: goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Sat, 4 Jan 2003 00:18:58 +0000
Subject: docstring

git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1060 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/readers/python/moduleparser.py | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'docutils/readers/python/moduleparser.py')

diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
index 2262ddbd5..9fcd1ec07 100644
--- a/docutils/readers/python/moduleparser.py
+++ b/docutils/readers/python/moduleparser.py
@@ -203,6 +203,10 @@ def parse_module(module_text, filename):
 
 class Node:
 
+    """
+    Base class for module documentation tree nodes.
+    """
+
     def __init__(self, node):
         self.children = []
         """List of child nodes."""
-- 
cgit v1.2.1


From 9f31157a98faacf4c2b8f2bbdf2bbbd5078d6093 Mon Sep 17 00:00:00 2001
From: ianbicking <ianbicking@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Sun, 21 Mar 2004 20:18:22 +0000
Subject: Incomplete changes to the moduleparser python source reader

git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1847 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/readers/python/moduleparser.py | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'docutils/readers/python/moduleparser.py')

diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
index 9fcd1ec07..a425d2738 100644
--- a/docutils/readers/python/moduleparser.py
+++ b/docutils/readers/python/moduleparser.py
@@ -190,6 +190,7 @@ import token
 from compiler.consts import OP_ASSIGN
 from compiler.visitor import ASTVisitor
 from types import StringType, UnicodeType, TupleType
+from docutils.readers.python import pynodes
 
 
 def parse_module(module_text, filename):
@@ -418,6 +419,7 @@ class ModuleVisitor(AssignmentVisitor):
         self.module = None
 
     def visitModule(self, node):
+        
         self.module = module = Module(node, self.filename)
         if node.doc is not None:
             module.append(Docstring(node, node.doc))
@@ -782,3 +784,9 @@ def normalize_parameter_name(name):
         return '(%s)' % ', '.join([normalize_parameter_name(n) for n in name])
     else:
         return name
+
+if __name__ == '__main__':
+    import sys
+    filename = sys.argv[1]
+    content = open(filename).read()
+    print parse_module(content, filename)
-- 
cgit v1.2.1


From db10b42915077db1a6cbb936145d0d803142b12a Mon Sep 17 00:00:00 2001
From: ianbicking <ianbicking@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Tue, 23 Mar 2004 19:57:14 +0000
Subject: * Bug fixes to python reader * Getting tests up-to-date * Trimming
 unused nodes from pynodes

git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1876 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/readers/python/moduleparser.py | 343 +++++++++++++-------------------
 1 file changed, 140 insertions(+), 203 deletions(-)

(limited to 'docutils/readers/python/moduleparser.py')

diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
index a425d2738..c95d997c8 100644
--- a/docutils/readers/python/moduleparser.py
+++ b/docutils/readers/python/moduleparser.py
@@ -191,6 +191,7 @@ from compiler.consts import OP_ASSIGN
 from compiler.visitor import ASTVisitor
 from types import StringType, UnicodeType, TupleType
 from docutils.readers.python import pynodes
+from docutils.nodes import Text
 
 
 def parse_module(module_text, filename):
@@ -201,168 +202,6 @@ def parse_module(module_text, filename):
     compiler.walk(ast, visitor, walker=visitor)
     return visitor.module
 
-
-class Node:
-
-    """
-    Base class for module documentation tree nodes.
-    """
-
-    def __init__(self, node):
-        self.children = []
-        """List of child nodes."""
-
-        self.lineno = node.lineno
-        """Line number of this node (or ``None``)."""
-
-    def __str__(self, indent='    ', level=0):
-        return ''.join(['%s%s\n' % (indent * level, repr(self))] +
-                       [child.__str__(indent, level+1)
-                        for child in self.children])
-
-    def __repr__(self):
-        parts = [self.__class__.__name__]
-        for name, value in self.attlist():
-            parts.append('%s="%s"' % (name, value))
-        return '<%s>' % ' '.join(parts)
-
-    def attlist(self, **atts):
-        if self.lineno is not None:
-            atts['lineno'] = self.lineno
-        attlist = atts.items()
-        attlist.sort()
-        return attlist
-
-    def append(self, node):
-        self.children.append(node)
-
-    def extend(self, node_list):
-        self.children.extend(node_list)
-
-
-class TextNode(Node):
-
-    def __init__(self, node, text):
-        Node.__init__(self, node)
-        self.text = trim_docstring(text)
-
-    def __str__(self, indent='    ', level=0):
-        prefix = indent * (level + 1)
-        text = '\n'.join([prefix + line for line in self.text.splitlines()])
-        return Node.__str__(self, indent, level) + text + '\n'
-
-
-class Module(Node):
-
-    def __init__(self, node, filename):
-        Node.__init__(self, node)
-        self.filename = filename
-
-    def attlist(self):
-        return Node.attlist(self, filename=self.filename)
-
-
-class Docstring(TextNode): pass
-
-
-class Comment(TextNode): pass
-
-
-class Import(Node):
-
-    def __init__(self, node, names, from_name=None):
-        Node.__init__(self, node)
-        self.names = names
-        self.from_name = from_name
-
-    def __str__(self, indent='    ', level=0):
-        prefix = indent * (level + 1)
-        lines = []
-        for name, as in self.names:
-            if as:
-                lines.append('%s%s as %s' % (prefix, name, as))
-            else:
-                lines.append('%s%s' % (prefix, name))
-        text = '\n'.join(lines)
-        return Node.__str__(self, indent, level) + text + '\n'
-
-    def attlist(self):
-        if self.from_name:
-            atts = {'from': self.from_name}
-        else:
-            atts = {}
-        return Node.attlist(self, **atts)
-
-
-class Attribute(Node):
-
-    def __init__(self, node, name):
-        Node.__init__(self, node)
-        self.name = name
-
-    def attlist(self):
-        return Node.attlist(self, name=self.name)
-
-
-class AttributeTuple(Node):
-
-    def __init__(self, node, names):
-        Node.__init__(self, node)
-        self.names = names
-
-    def attlist(self):
-        return Node.attlist(self, names=' '.join(self.names))
-
-
-class Expression(TextNode):
-
-    def __str__(self, indent='    ', level=0):
-        prefix = indent * (level + 1)
-        return '%s%s%s\n' % (Node.__str__(self, indent, level),
-                             prefix, self.text.encode('unicode-escape'))
-
-
-class Function(Attribute): pass
-
-
-class ParameterList(Node): pass
-
-
-class Parameter(Attribute): pass
-
-
-class ParameterTuple(AttributeTuple):
-
-    def attlist(self):
-        return Node.attlist(self, names=normalize_parameter_name(self.names))
-
-
-class ExcessPositionalArguments(Parameter): pass
-
-
-class ExcessKeywordArguments(Parameter): pass
-
-
-class Default(Expression): pass
-
-
-class Class(Node):
-
-    def __init__(self, node, name, bases=None):
-        Node.__init__(self, node)
-        self.name = name
-        self.bases = bases or []
-
-    def attlist(self):
-        atts = {'name': self.name}
-        if self.bases:
-            atts['bases'] = ' '.join(self.bases)
-        return Node.attlist(self, **atts)
-
-
-class Method(Function): pass
-
-
 class BaseVisitor(ASTVisitor):
 
     def __init__(self, token_parser):
@@ -390,7 +229,7 @@ class DocstringVisitor(BaseVisitor):
     def visitConst(self, node):
         if self.documentable:
             if type(node.value) in (StringType, UnicodeType):
-                self.documentable.append(Docstring(node, node.value))
+                self.documentable.append(make_docstring(node.value, node.lineno))
             else:
                 self.documentable = None
 
@@ -419,26 +258,28 @@ class ModuleVisitor(AssignmentVisitor):
         self.module = None
 
     def visitModule(self, node):
-        
-        self.module = module = Module(node, self.filename)
-        if node.doc is not None:
-            module.append(Docstring(node, node.doc))
+        self.module = module = pynodes.module_section()
+        module['filename'] = self.filename
+        append_docstring(module, node.doc, node.lineno)
         self.context.append(module)
         self.documentable = module
         self.visit(node.node)
         self.context.pop()
 
     def visitImport(self, node):
-        self.context[-1].append(Import(node, node.names))
+        self.context[-1] += make_import_group(names=node.names,
+                                              lineno=node.lineno)
         self.documentable = None
 
     def visitFrom(self, node):
         self.context[-1].append(
-            Import(node, node.names, from_name=node.modname))
+            make_import_group(names=node.names, from_name=node.modname,
+                              lineno=node.lineno))
         self.documentable = None
 
     def visitFunction(self, node):
-        visitor = FunctionVisitor(self.token_parser)
+        visitor = FunctionVisitor(self.token_parser,
+                                  function_class=pynodes.function_section)
         compiler.walk(node, visitor, walker=visitor)
         self.context[-1].append(visitor.function)
 
@@ -452,29 +293,32 @@ class AttributeVisitor(BaseVisitor):
 
     def __init__(self, token_parser):
         BaseVisitor.__init__(self, token_parser)
-        self.attributes = []
+        self.attributes = pynodes.class_attribute_section()
 
     def visitAssign(self, node):
         # Don't visit the expression itself, just the attribute nodes:
         for child in node.nodes:
             self.dispatch(child)
         expression_text = self.token_parser.rhs(node.lineno)
-        expression = Expression(node, expression_text)
+        expression = pynodes.expression_value()
+        expression.append(Text(expression_text))
         for attribute in self.attributes:
             attribute.append(expression)
 
     def visitAssName(self, node):
-        self.attributes.append(Attribute(node, node.name))
+        self.attributes.append(make_attribute(node.name,
+                                              lineno=node.lineno))
 
     def visitAssTuple(self, node):
         attributes = self.attributes
         self.attributes = []
         self.default_visit(node)
-        names = [attribute.name for attribute in self.attributes]
-        att_tuple = AttributeTuple(node, names)
-        att_tuple.lineno = self.attributes[0].lineno
+        n = pynodes.attribute_tuple()
+        n.extend(self.attributes)
+        n['lineno'] = self.attributes[0]['lineno']
+        attributes.append(n)
         self.attributes = attributes
-        self.attributes.append(att_tuple)
+        #self.attributes.append(att_tuple)
 
     def visitAssAttr(self, node):
         self.default_visit(node, node.attrname)
@@ -483,13 +327,17 @@ class AttributeVisitor(BaseVisitor):
         self.default_visit(node, node.attrname + '.' + suffix)
 
     def visitName(self, node, suffix):
-        self.attributes.append(Attribute(node, node.name + '.' + suffix))
+        self.attributes.append(make_attribute(node.name + '.' + suffix,
+                                              lineno=node.lineno))
 
 
 class FunctionVisitor(DocstringVisitor):
 
     in_function = 0
-    function_class = Function
+
+    def __init__(self, token_parser, function_class):
+        DocstringVisitor.__init__(self, token_parser)
+        self.function_class = function_class
 
     def visitFunction(self, node):
         if self.in_function:
@@ -497,9 +345,11 @@ class FunctionVisitor(DocstringVisitor):
             # Don't bother with nested function definitions.
             return
         self.in_function = 1
-        self.function = function = self.function_class(node, node.name)
-        if node.doc is not None:
-            function.append(Docstring(node, node.doc))
+        self.function = function = make_function_like_section(
+            name=node.name,
+            lineno=node.lineno,
+            doc=node.doc,
+            function_class=self.function_class)
         self.context.append(function)
         self.documentable = function
         self.parse_parameter_list(node)
@@ -511,10 +361,11 @@ class FunctionVisitor(DocstringVisitor):
         special = []
         argnames = list(node.argnames)
         if node.kwargs:
-            special.append(ExcessKeywordArguments(node, argnames[-1]))
+            special.append(make_parameter(argnames[-1], excess_keyword=True))
             argnames.pop()
         if node.varargs:
-            special.append(ExcessPositionalArguments(node, argnames[-1]))
+            special.append(make_parameter(argnames[-1],
+                                          excess_positional=True))
             argnames.pop()
         defaults = list(node.defaults)
         defaults = [None] * (len(argnames) - len(defaults)) + defaults
@@ -523,17 +374,21 @@ class FunctionVisitor(DocstringVisitor):
         #print >>sys.stderr, function_parameters
         for argname, default in zip(argnames, defaults):
             if type(argname) is TupleType:
-                parameter = ParameterTuple(node, argname)
+                parameter = pynodes.parameter_tuple()
+                for tuplearg in argname:
+                    parameter.append(make_parameter(tuplearg))
                 argname = normalize_parameter_name(argname)
             else:
-                parameter = Parameter(node, argname)
+                parameter = make_parameter(argname)
             if default:
-                parameter.append(Default(node, function_parameters[argname]))
+                n_default = pynodes.parameter_default()
+                n_default.append(Text(function_parameters[argname]))
+                parameter.append(n_default)
             parameters.append(parameter)
         if parameters or special:
             special.reverse()
             parameters.extend(special)
-            parameter_list = ParameterList(node)
+            parameter_list = pynodes.parameter_list()
             parameter_list.extend(parameters)
             self.function.append(parameter_list)
 
@@ -556,9 +411,9 @@ class ClassVisitor(AssignmentVisitor):
         #pdb.set_trace()
         for base in node.bases:
             self.visit(base)
-        self.klass = klass = Class(node, node.name, self.bases)
-        if node.doc is not None:
-            klass.append(Docstring(node, node.doc))
+        self.klass = klass = make_class_section(node.name, self.bases,
+                                                doc=node.doc,
+                                                lineno=node.lineno)
         self.context.append(klass)
         self.documentable = klass
         self.visit(node.code)
@@ -580,19 +435,17 @@ class ClassVisitor(AssignmentVisitor):
 
     def visitFunction(self, node):
         if node.name == '__init__':
-            visitor = InitMethodVisitor(self.token_parser)
+            visitor = InitMethodVisitor(self.token_parser,
+                                        function_class=pynodes.method_section)
+            compiler.walk(node, visitor, walker=visitor)
         else:
-            visitor = MethodVisitor(self.token_parser)
-        compiler.walk(node, visitor, walker=visitor)
+            visitor = FunctionVisitor(self.token_parser,
+                                      function_class=pynodes.method_section)
+            compiler.walk(node, visitor, walker=visitor)
         self.context[-1].append(visitor.function)
 
 
-class MethodVisitor(FunctionVisitor):
-
-    function_class = Method
-
-
-class InitMethodVisitor(MethodVisitor, AssignmentVisitor): pass
+class InitMethodVisitor(FunctionVisitor, AssignmentVisitor): pass
 
 
 class TokenParser:
@@ -746,6 +599,81 @@ class TokenParser:
         return parameters
 
 
+def make_docstring(doc, lineno):
+    n = pynodes.docstring()
+    if lineno:
+        # Really, only module docstrings don't have a line
+        # (@@: but maybe they should)
+        n['lineno'] = lineno
+    n.append(Text(doc))
+    return n
+
+def append_docstring(node, doc, lineno):
+    if doc:
+        node.append(make_docstring(doc, lineno))
+
+def make_class_section(name, bases, lineno, doc):
+    n = pynodes.class_section()
+    n['lineno'] = lineno
+    n.append(make_object_name(name))
+    for base in bases:
+        b = pynodes.class_base()
+        b.append(make_object_name(base))
+        n.append(b)
+    append_docstring(n, doc, lineno)
+    return n
+
+def make_object_name(name):
+    n = pynodes.object_name()
+    n.append(Text(name))
+    return n
+
+def make_function_like_section(name, lineno, doc, function_class):
+    n = function_class()
+    n['lineno'] = lineno
+    n.append(make_object_name(name))
+    append_docstring(n, doc, lineno)
+    return n
+
+def make_import_group(names, lineno, from_name=None):
+    n = pynodes.import_group()
+    n['lineno'] = lineno
+    if from_name:
+        n_from = pynodes.import_from()
+        n_from.append(Text(from_name))
+        n.append(n_from)
+    for name, alias in names:
+        n_name = pynodes.import_name()
+        n_name.append(Text(name))
+        if alias:
+            n_alias = pynodes.import_alias()
+            n_alias.append(Text(alias))
+            n_name.append(n_alias)
+        n.append(n_name)
+    return n
+
+def make_class_attribute(name, lineno):
+    n = pynodes.class_attribute()
+    n['lineno'] = lineno
+    n.append(Text(name))
+    return n
+
+def make_attribute(name, lineno):
+    n = pynodes.attribute()
+    n['lineno'] = lineno
+    n.append(make_object_name(name))
+    return n
+
+def make_parameter(name, excess_keyword=False, excess_positional=False):
+    n = pynodes.parameter()
+    n.append(make_object_name(name))
+    assert not excess_keyword or not excess_positional
+    if excess_keyword:
+        n['excess_keyword'] = 1
+    if excess_positional:
+        n['excess_positional'] = 1
+    return n
+
 def trim_docstring(text):
     """
     Trim indentation and blank lines from docstring text & return it.
@@ -787,6 +715,15 @@ def normalize_parameter_name(name):
 
 if __name__ == '__main__':
     import sys
-    filename = sys.argv[1]
-    content = open(filename).read()
-    print parse_module(content, filename)
+    args = sys.argv[1:]
+    if args[0] == '-v':
+        filename = args[1]
+        module_text = open(filename).read()
+        ast = compiler.parse(module_text)
+        visitor = compiler.visitor.ExampleASTVisitor()
+        compiler.walk(ast, visitor, walker=visitor, verbose=1)
+    else:
+        filename = args[0]
+        content = open(filename).read()
+        print parse_module(content, filename).pformat()
+
-- 
cgit v1.2.1


From c75f5e75d567181ab0267cf4224a3cea59adc53f Mon Sep 17 00:00:00 2001
From: ianbicking <ianbicking@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Tue, 23 Mar 2004 23:21:11 +0000
Subject: Reader parses docstrings (according to __docformat__) and produces
 full output.  The reader should thus be "done".  Run
 readers/python/__init__.py with a filename argument to get output in the DOM
 format.

A transformer will be necessary to translate this into the standard
docutils DOM.


git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1881 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/readers/python/moduleparser.py | 115 +++++++++++++++++++-------------
 1 file changed, 70 insertions(+), 45 deletions(-)

(limited to 'docutils/readers/python/moduleparser.py')

diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
index c95d997c8..7f965e6e2 100644
--- a/docutils/readers/python/moduleparser.py
+++ b/docutils/readers/python/moduleparser.py
@@ -7,11 +7,10 @@
 """
 Parser for Python modules.
 
-The `parse_module()` function takes a module's text and file name, runs it
-through the module parser (using compiler.py and tokenize.py) and produces a
-"module documentation tree": a high-level AST full of nodes that are
-interesting from an auto-documentation standpoint.  For example, given this
-module (x.py)::
+The `parse_module()` function takes a module's text and file name,
+runs it through the module parser (using compiler.py and tokenize.py)
+and produces a parse tree of the source code, using the nodes as found
+in pynodes.py.  For example, given this module (x.py)::
 
     # comment
 
@@ -50,69 +49,95 @@ module (x.py)::
 
 The module parser will produce this module documentation tree::
 
-    <Module filename="test data">
-        <Comment lineno=1>
-            comment
-        <Docstring>
+    <module_section filename="test data">
+        <docstring>
             Docstring
-        <Docstring lineno="5">
+        <docstring lineno="5">
             Additional docstring
-        <Attribute lineno="7" name="__docformat__">
-            <Expression lineno="7">
+        <attribute lineno="7">
+	    <object_name>
+	        __docformat__
+            <expression_value lineno="7">
                 'reStructuredText'
-        <Attribute lineno="9" name="a">
-            <Expression lineno="9">
+        <attribute lineno="9">
+	    <object_name>
+	        a
+            <expression_value lineno="9">
                 1
-            <Docstring lineno="10">
+            <docstring lineno="10">
                 Attribute docstring
-        <Class bases="Super" lineno="12" name="C">
-            <Docstring lineno="12">
+        <class_section lineno="12">
+	    <object_name>
+	        C
+            <class_base>
+	        Super
+            <docstring lineno="12">
                 C's docstring
-            <Attribute lineno="16" name="class_attribute">
-                <Expression lineno="16">
+            <attribute lineno="16">
+	        <object_name>
+		    class_attribute
+                <expression_value lineno="16">
                     1
-                <Docstring lineno="17">
+                <docstring lineno="17">
                     class_attribute's docstring
-            <Method lineno="19" name="__init__">
-                <Docstring lineno="19">
+            <method_section lineno="19">
+	        <object_name>
+		    __init__
+                <docstring lineno="19">
                     __init__'s docstring
-                <ParameterList lineno="19">
-                    <Parameter lineno="19" name="self">
-                    <Parameter lineno="19" name="text">
-                        <Default lineno="19">
+                <parameter_list lineno="19">
+                    <parameter lineno="19">
+		        <object_name>
+			    self
+                    <parameter lineno="19">
+		        <object_name>
+			    text
+                        <parameter_default lineno="19">
                             None
-                <Attribute lineno="22" name="self.instance_attribute">
-                    <Expression lineno="22">
+                <attribute lineno="22">
+		    <object_name>
+		        self.instance_attribute
+                    <expression_value lineno="22">
                         (text * 7 + ' whaddyaknow')
-                    <Docstring lineno="24">
+                    <docstring lineno="24">
                         instance_attribute's docstring
-        <Function lineno="27" name="f">
-            <Docstring lineno="27">
+        <function_section lineno="27">
+	    <object_name>
+	        f
+            <docstring lineno="27">
                 f's docstring
-            <ParameterList lineno="27">
-                <Parameter lineno="27" name="x">
-                    <Comment>
+            <parameter_list lineno="27">
+                <parameter lineno="27">
+		    <object_name>
+		        x
+                    <comment>
                         # parameter x
-                <Parameter lineno="27" name="y">
-                    <Default lineno="27">
+                <parameter lineno="27">
+		    <object_name>
+		        y
+                    <parameter_default lineno="27">
                         a * 5
-                    <Comment>
+                    <comment>
                         # parameter y
-                <ExcessPositionalArguments lineno="27" name="args">
-                    <Comment>
+                <parameter excess_positional="1" lineno="27">
+		    <object_name>
+		        args
+                    <comment>
                         # parameter args
-        <Attribute lineno="33" name="f.function_attribute">
-            <Expression lineno="33">
+        <attribute lineno="33">
+	    <object_name>
+	        f.function_attribute
+            <expression_value lineno="33">
                 1
-            <Docstring lineno="34">
+            <docstring lineno="34">
                 f.function_attribute's docstring
 
 (Comments are not implemented yet.)
 
 compiler.parse() provides most of what's needed for this doctree, and
-"tokenize" can be used to get the rest.  We can determine the line number from
-the compiler.parse() AST, and the TokenParser.rhs(lineno) method provides the
-rest.
+"tokenize" can be used to get the rest.  We can determine the line
+number from the compiler.parse() AST, and the TokenParser.rhs(lineno)
+method provides the rest.
 
 The Docutils Python reader component will transform this module doctree into a
 Python-specific Docutils doctree, and then a `stylist transform`_ will
-- 
cgit v1.2.1


From 00a18ecf4a86081753e27d394473a70dc287e9ed Mon Sep 17 00:00:00 2001
From: wiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Fri, 7 May 2004 12:07:30 +0000
Subject: Python 2.1 compatibility fix

git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2037 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/readers/python/moduleparser.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'docutils/readers/python/moduleparser.py')

diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
index 7f965e6e2..ddfe21ea7 100644
--- a/docutils/readers/python/moduleparser.py
+++ b/docutils/readers/python/moduleparser.py
@@ -386,11 +386,11 @@ class FunctionVisitor(DocstringVisitor):
         special = []
         argnames = list(node.argnames)
         if node.kwargs:
-            special.append(make_parameter(argnames[-1], excess_keyword=True))
+            special.append(make_parameter(argnames[-1], excess_keyword=1))
             argnames.pop()
         if node.varargs:
             special.append(make_parameter(argnames[-1],
-                                          excess_positional=True))
+                                          excess_positional=1))
             argnames.pop()
         defaults = list(node.defaults)
         defaults = [None] * (len(argnames) - len(defaults)) + defaults
@@ -689,7 +689,11 @@ def make_attribute(name, lineno):
     n.append(make_object_name(name))
     return n
 
-def make_parameter(name, excess_keyword=False, excess_positional=False):
+def make_parameter(name, excess_keyword=0, excess_positional=0):
+    """
+    excess_keyword and excess_positional must be either 1 or 0, and
+    not both of them can be 1.
+    """
     n = pynodes.parameter()
     n.append(make_object_name(name))
     assert not excess_keyword or not excess_positional
-- 
cgit v1.2.1


From 2384d0cf6cd5f596511784ff8b54c8ae1e57e8fa Mon Sep 17 00:00:00 2001
From: cben <cben@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Sun, 25 Jul 2004 01:45:27 +0000
Subject: Allow the test suite to survive unimportable test modules.

Notably, this fixes a crash on importing `moduleparser` under Python 2.1 from
``test/test_readers/test_python/test_functions.py``.  (This shouldn't happen
anyway, added to BUGS.txt)


git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2449 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/readers/python/moduleparser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'docutils/readers/python/moduleparser.py')

diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
index ddfe21ea7..8fd7ed67b 100644
--- a/docutils/readers/python/moduleparser.py
+++ b/docutils/readers/python/moduleparser.py
@@ -5,7 +5,7 @@
 # Copyright: This module has been placed in the public domain.
 
 """
-Parser for Python modules.
+Parser for Python modules.  Requires Python 2.2 or higher.
 
 The `parse_module()` function takes a module's text and file name,
 runs it through the module parser (using compiler.py and tokenize.py)
-- 
cgit v1.2.1


From c6a87ac3b3db32003899bd2a9d45f96c0fee2193 Mon Sep 17 00:00:00 2001
From: goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Thu, 5 Jan 2006 23:28:53 +0000
Subject: fixed markup bugs in docstrings; now works with Endo

git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@4242 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/readers/python/moduleparser.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'docutils/readers/python/moduleparser.py')

diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
index 8fd7ed67b..03d57c948 100644
--- a/docutils/readers/python/moduleparser.py
+++ b/docutils/readers/python/moduleparser.py
@@ -140,7 +140,7 @@ number from the compiler.parse() AST, and the TokenParser.rhs(lineno)
 method provides the rest.
 
 The Docutils Python reader component will transform this module doctree into a
-Python-specific Docutils doctree, and then a `stylist transform`_ will
+Python-specific Docutils doctree, and then a "stylist transform" will
 further transform it into a generic doctree.  Namespaces will have to be
 compiled for each of the scopes, but I'm not certain at what stage of
 processing.
@@ -148,6 +148,8 @@ processing.
 It's very important to keep all docstring processing out of this, so that it's
 a completely generic and not tool-specific.
 
+::
+
 > Why perform all of those transformations?  Why not go from the AST to a
 > generic doctree?  Or, even from the AST to the final output?
 
@@ -176,7 +178,7 @@ from the Reader component itself.  One stylist transform could produce
 HappyDoc-like output, another could produce output similar to module docs in
 the Python library reference manual, and so on.
 
-It's for exactly this reason:
+It's for exactly this reason::
 
 >> It's very important to keep all docstring processing out of this, so that
 >> it's a completely generic and not tool-specific.
-- 
cgit v1.2.1