#! /usr/bin/env python

from contextlib import closing
import os, sys, subprocess, re, textwrap

def loadFile(path):
  with closing( open(path) ) as fd:
    return fd.read()

def writeFile(path, buffer):
  with closing( open(path, "w") ) as fd:
    fd.write(buffer)

def splitSections(buffer):
  while buffer:
    assert len(buffer) >= 3
    name = buffer.pop(0).lower()
    assert buffer.pop(0) == ''
    body = []
    while buffer:
      line = buffer.pop(0)
      if line == '' or line[0].isspace():
        body.append(line[2:])
      else:
        buffer.insert(0, line)
        yield (name, body)
        body = []
        break
  if body:
    yield (name, body)

def collapseText(lines, width = 72):
  wrapper = textwrap.TextWrapper( width = width
                                , expand_tabs = False
                                , break_on_hyphens = False
                                , break_long_words = False
                                )
  body = []
  prev = None
  for line in lines:
    if line == '':
      prev = None
    elif line[0].isspace():
      if prev == "quote":
        body[-1].append(line)
      else:
        body.append([line])
        prev = "quote"
    else:
      if prev == "text":
        newtext = ' '.join(body[-1]) + ' ' + line
        body[-1] = wrapper.wrap(newtext)
      else:
        body.append(wrapper.wrap(line))
        prev = "text"
  return body

class Macro:
  def __init__(self, filePath, computeSerialNumber=False):
    self.name = os.path.splitext(os.path.basename(filePath))[0]
    # header and body are separated by an empty line.
    (header,body) = loadFile(filePath).split("\n\n", 1)
    self.body = body.split('\n')
    # headers may not contain tab characters
    assert not ('\t' in header)
    # drop initial header (if present)
    header = re.sub(r"^\n*# =+\n#[^\n]*\n# =+\n(#\n)+", '', header, 1)
    # split buffer into lines and drop initial "# " prefix in the process
    header = [l[2:] for l in header.split('\n')]
    # set defaults
    self.authors = []
    # parse each section in the remaining list
    for (key, body) in splitSections(header):
      # drop empty lines at beginning and end of body
      while body[0]  == '': body.pop(0)
      while body[-1] == '': body.pop(-1)
      # each section has its own parser
      if key == "synopsis":
        if '' in body:
          raise Exception("%s: malformed synopsis section" % filePath)
      elif key == "description":
        body = collapseText(body)
      elif key == "license":
        while True:
          match = re.match(r"Copyright \([cC]\) ([0-9.,-]+) (.*)", body[0])
          if not match: break
          (year,name) = (match.group(1), match.group(2))
          match = re.match(r"(.*) <(.*)>", name)
          if match:
            (name,email) = (match.group(1), match.group(2))
            self.authors.append(dict(year = year, name = name, email = email))
          else:
            self.authors.append(dict(year = year, name = name))
          body.pop(0)
        assert self.authors
        if body.pop(0) != '':
          raise Exception("%s: malformed license section" % filePath)
        body = collapseText(body)
      elif key == "obsolete macro":
        key = "obsolete"
        body = collapseText(body)
      elif key == "description":
        body = collapseText(body)
      else:
        raise Exception("%s: unknown section %r in macro" % (filePath, key))
      self.__dict__[key] = body
    # determine the macro's serial number
    if computeSerialNumber:     # compute the number from git
      logMessages = subprocess.check_output(["git", "log", "--oneline", "054e8ad8c766afa7059d8cd4a81bbfa99133ef5e..HEAD", "--", filePath], bufsize=1)
      logLines = logMessages.rstrip(b'\n').split(b"\n")
      self.serial = len(logLines)
      modified = subprocess.call(["git", "diff", "--quiet", "--exit-code", "HEAD", "--", filePath])
      if modified:
        self.serial += 1
    else:                       # trust the m4 file
      assert self.body[0].startswith("#serial")
      self.serial = int(self.body[0].split()[1])
    # drop the original serial number from the body
    self.body = [ l for l in self.body if not l.startswith("#serial") ]
    # drop whitespace from beginning and end of body
    while self.body[0] == "":
      self.body.pop(0)
    while self.body[-1] == "":
      self.body.pop(-1)

  def __repr__(self):
    return repr(self.__dict__)