#! /usr/bin/env python from __future__ import with_statement from contextlib import closing import re import os.path as path import sys import textwrap def loadFile(path): with closing( open(path) ) as fd: return fd.read() def writeFile(path, buffer): with closing( open(path, "w") ) as fd: fd.write(buffer) def splitSections(buffer): while buffer: assert len(buffer) >= 3 name = buffer.pop(0).lower() assert buffer.pop(0) == '' body = [] while buffer: line = buffer.pop(0) if line == '' or line[0].isspace(): body.append(line[2:]) else: buffer.insert(0, line) yield (name, body) body = [] break if body: yield (name, body) def collapseText(lines, width = 72): wrapper = textwrap.TextWrapper( width = width , expand_tabs = False , break_on_hyphens = False , break_long_words = False ) body = [] prev = None for line in lines: if line == '': prev = None elif line[0].isspace(): if prev == "quote": body[-1].append(line) else: body.append([line]) prev = "quote" else: if prev == "text": newtext = ' '.join(body[-1]) + ' ' + line body[-1] = wrapper.wrap(newtext) else: body.append(wrapper.wrap(line)) prev = "text" return body class Macro: def __init__(self, filePath): self.name = path.splitext(path.basename(filePath))[0] # header and body are separated by an empty line. (header,body) = loadFile(filePath).split("\n\n", 1) self.body = body.split('\n') # headers may not contain tab characters assert not ('\t' in header) # drop initial header (if present) header = re.sub(r"^\n*# =+\n#[^\n]*\n# =+\n(#\n)+", '', header, 1) # split buffer into lines and drop initial "# " prefix in the process header = map(lambda l: l[2:], header.split('\n')) # set defaults self.authors = [] url = "http://www.nongnu.org/autoconf-archive/%s" % (self.name + ".html") lineLen = max(75,len(url) + 2) separator = '=' * lineLen padding = ' ' * ((lineLen - len(url)) / 2) self.m4header = "# %s\n"*3 % (separator, padding + url, separator) # parse each section in the remaining list for (key, body) in splitSections(header): # drop empty lines at beginning and end of body while body[0] == '': body.pop(0) while body[-1] == '': body.pop(-1) # each section has its own parser if key == "synopsis": if '' in body: raise Exception("%s: malformed synopsis section" % filePath) elif key == "description": body = collapseText(body) elif key == "license": while True: match = re.match(r"Copyright \(c\) ([0-9.,-]+) (.*)", body[0]) if not match: break (year,name) = (match.group(1), match.group(2)) match = re.match(r"(.*) <(.*)>", name) if match: (name,email) = (match.group(1), match.group(2)) self.authors.append(dict(year = year, name = name, email = email)) else: self.authors.append(dict(year = year, name = name)) body.pop(0) assert self.authors if body.pop(0) != '': raise Exception("%s: malformed license section" % filePath) body = collapseText(body) elif key == "obsolete macro": if '' in body: raise Exception("%s: malformed obsoleted section" % filePath) key = "obsolete" body = collapseText(body) elif key == "description": body = collapseText(body) else: raise Exception("%s: unknown section %r in macro" % (filePath, key)) self.__dict__[key] = body def __repr__(self): return repr(self.__dict__)