sandbox/blais/elisp-reader/elisp2rst


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181

#!/usr/bin/env python

"""elisp2rst [<options>] <file.el>

Emacs-LISP to reStructuredText converter.
"""

import sys, re, os


hierarchy = ('=', '-', '~', '+', '^')

def underline(title, char, outfile):
    """
    Outputs 'title' with underline character 'char' to file 'outfile'
    """
    print >> outfile, title
    print >> outfile, char * len(title)

def printpar(paragraph, outfile, indent=None):
    """
    Output a paragraph's lines to 'outfile'.
    """
    write = outfile.write
    write(os.linesep)
    for line in map(str.rstrip, paragraph):
        if indent:
            write(indent)
        write(line)
        write(os.linesep)

        
class ElispFormatError(Exception):
    """
    Error raised during the parsing of Emacs-LISP code.
    """


def elisp2rst(infile, outfile):
    """
    Reads Emacs-LISP text and converts it in to reStructuredText.  'infile' and
    'outfile' are expected to be open file objects for input and output.

    This code does the following transformations:

    FIXME TODO:
    - create a title and subtitle
    - converts the RFC822 headers into bibliographic fields
    - converts the copyright line at the top into a biblio field
    - places the copyright notice in a bibliographic field
    - converts the ;;; section titles to include underlines
    - adds an extra colon at the end of appropriate lines for creating literal
      blocks

    """
    hidx = 0 # index in the hierarchy

    # Get the title and subtitle lines.
    titleline = infile.next()
    mo = re.match('^;;; (.*) --- (.*)$', titleline)
    if mo:
        title, subtitle = mo.group(1, 2)
    else:
        title, subtitle = titleline[4:], None
    if title:
        underline(title, hierarchy[hidx], outfile)
        hidx += 1
    if subtitle:
        underline(subtitle, hierarchy[hidx], outfile)
        hidx += 1

    # Process the rest of the lines until we hit a ';;; Code:' section.
    precommentary, done_pre = [], False
    "List of paragraphs that appear before the commentary."

    copyright_years, fields = None, None
    "Copyright years and RFC822 field list, if present in the pre-commentary."
    
    parnum = 0
    paragraph = []
    for line in infile:
        # Section title.
        mo = re.match(';;; (.*)', line)
        if mo:
            title = mo.group(1)
            # If we reached the code marker, stop processing.
            if title.startswith('Code:'):
                break
            
            if title.startswith('Commentary:'):
                done_pre = True

                # Output the pre-commentary stuff.
                if fields:
                    for line in fields:
                        if line.startswith(' ') or line.startswith('\t'):
                            print >> outfile, ' %s' % line
                        else:
                            print >> outfile, ':%s' % line
                    outfile.write(os.linesep)

                prepars = []
                if copyright_years:
                    prepars.append(copyright_years)
                prepars.extend(precommentary)
                if prepars:
                    print >> outfile, '.. note::'
                    print >> outfile
                    for par in prepars:
                        printpar(par, outfile, indent='   ')


            if title.endswith(':'):
                title = title[:-1]

            outfile.write('\n')
            underline(title, hierarchy[hidx], outfile)
            continue
        
        # Detect LISP code and exit if we find some.
        if re.match('\([a-zA-Z-]+( |$)', line):
            break

        # Normal text contents.
        mo = re.match(';;? ?(.*)$', line)
        if mo:
            line_contents = mo.group(1)
            line_contents = re.sub("`(.*?)'", "``\\1``", line_contents)
            paragraph.append(line_contents)
        else:
            if not re.match('^[ \t\f]*$', line):
                raise ElispFormatError("Unknown line format")
            elif paragraph:
                if not done_pre:
                    if parnum == 0 and paragraph[0].startswith('Copyright'):
                        copyright_years = paragraph
                    elif parnum == 1 and re.match('[A-Za-z]+: ', paragraph[0]):
                        fields = paragraph
                    else:
                        precommentary.append(paragraph)
                else:
                    printpar(paragraph, outfile)

                paragraph = []
                parnum += 1


    if paragraph:
        printpar(paragraph, outfile)
        paragraph = []
        parnum += 1
                    
                            
def main():
    import optparse
    parser = optparse.OptionParser(__doc__.strip())
    opts, args = parser.parse_args()

    if len(args) != 1:
        parser.error("You must specify a single Emacs-LISP file as input.")
    elispfn = args[0]

    elispf = open(elispfn, 'r')

    elisp2rst(elispf, sys.stdout)


if __name__ == '__main__':
    main()