macro.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

#! /usr/bin/env python

from __future__ import with_statement
from contextlib import closing
import re
import os.path as path
import sys
import textwrap
import encodings

def loadFile(path, encoding):
  with closing( encodings.search_function(encoding).streamreader(open(path)) ) as fd:
    return fd.read()

def writeFile(path, encoding, buffer):
  with closing( encodings.search_function(encoding).streamwriter(open(path, "w")) ) as fd:
    fd.write(buffer)

def splitSections(buffer):
  while buffer:
    assert len(buffer) >= 3
    name = buffer.pop(0).lower()
    assert buffer.pop(0) == ''
    body = []
    while buffer:
      line = buffer.pop(0)
      if line == '' or line[0].isspace():
        body.append(line[2:])
      else:
        buffer.insert(0, line)
        yield (name, body)
        body = []
        break
  if body:
    yield (name, body)

def collapseText(lines, width = 72):
  wrapper = textwrap.TextWrapper( width = width
                                , expand_tabs = False
                                , break_on_hyphens = False
                                , break_long_words = False
                                )
  body = []
  prev = None
  for line in lines:
    if line == '':
      prev = None
    elif line[0].isspace():
      if prev == "quote":
        body[-1].append(line)
      else:
        body.append([line])
        prev = "quote"
    else:
      if prev == "text":
        newtext = ' '.join(body[-1]) + ' ' + line
        body[-1] = wrapper.wrap(newtext)
      else:
        body.append(wrapper.wrap(line))
        prev = "text"
  return body

class Macro:
  def __init__(self, filePath, encoding):
    self.name = path.splitext(path.basename(filePath))[0]
    # header and body are separated by an empty line.
    (header,body) = loadFile(filePath, encoding).split("\n\n", 1)
    self.body = body.split('\n')
    # drop initial header (if present)
    header = re.sub(r"^\n*# =+\n#[^\n]*\n# =+\n(#\n)+", '', header, 1)
    # split buffer into lines and drop initial "# " prefix in the process
    header = map(lambda l: l[2:], header.split('\n'))
    # set defaults
    self.authors = []
    url = "http://www.nongnu.org/autoconf-archive/%s" % (self.name + ".html")
    lineLen = max(75,len(url) + 2)
    separator = '=' * lineLen
    padding = ' ' * ((lineLen - len(url)) / 2)
    self.m4header = "# %s\n"*3 % (separator, padding + url, separator)
    # parse each section in the remaining list
    for (key, body) in splitSections(header):
      # drop empty lines at beginning and end of body
      while body[0]  == '': body.pop(0)
      while body[-1] == '': body.pop(-1)
      # each section has its own parser
      if key == "synopsis":
        if '' in body:
          raise Exception("%s: malformed synopsis section" % filePath)
      elif key == "description":
        body = collapseText(body)
      elif key == "license":
        while True:
          match = re.match(r"Copyright \(c\) ([0-9.,-]+) (.*)", body[0])
          if not match: break
          (year,name) = (match.group(1), match.group(2))
          match = re.match(r"(.*) <(.*)>", name)
          if match:
            (name,email) = (match.group(1), match.group(2))
            self.authors.append(dict(year = year, name = name, email = email))
          else:
            self.authors.append(dict(year = year, name = name))
          body.pop(0)
        assert self.authors
        if body.pop(0) != '':
          raise Exception("%s: malformed license section" % filePath)
        body = collapseText(body)
      else:
        raise Exception("%s: unknown section %r in macro" % (filePath, key))
      self.__dict__[key] = body

  def __repr__(self):
    return repr(self.__dict__)

if __name__ == "__main__":
  from stringtemplate3 import StringTemplateGroup, StringTemplate
  from optparse import OptionParser

  opts = OptionParser()
  opts.add_option('', "--input-encoding", dest = "inEncode", default = "latin1")
  opts.add_option('', "--output-encoding", dest = "outEncode", default = "latin1")
  opts.add_option('', "--template-lexer",  dest = "lexer", default = "angle-bracket")
  opts.add_option('', "--output-dir", dest = "outDir", default = "stage")
  opts.add_option('', "--output-suffix", dest = "suffix", default = ".m4")
  (options, args) = opts.parse_args()
  stFile = args.pop(0)
  formatter = StringTemplateGroup(file = open(stFile), lexer = options.lexer)
  for m4File in args:
    (stem,suff) = path.splitext(path.basename(m4File))
    assert suff == ".m4"
    outFile = path.join(options.outDir, stem + options.suffix)
    assert outFile != m4File
    print m4File, "->", outFile
    m = Macro(m4File, options.inEncode)
    f = formatter.getInstanceOf("canon")
    for (k,v) in m.__dict__.items():
      f[k] = v
    writeFile(outFile, options.outEncode, f.toString().strip() + '\n')