Lossless rst parser part (not yet functional)lossless-rst-writer

git-svn-id: http://svn.code.sf.net/p/docutils/code/branches/lossless-rst-writer@6260 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
author: strank <strank@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2010-03-05 10:02:30 +0000
committer: strank <strank@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2010-03-05 10:02:30 +0000
commit: a758fd20ba593453f163e8375ac2c895de090038 (patch)
tree: 5ce296a53da323264c4be0a7849ff8a6daac7741
parent: 7e55210d99b28027bb29b0e58f02f315dfd88605 (diff)
download: docutils-lossless-rst-writer.tar.gz
3 files changed, 2942 insertions, 2 deletions
diff --git a/docutils/parsers/losslessrst/__init__.py b/docutils/parsers/losslessrst/__init__.py
new file mode 100644
index 000000000..07eddabd5
--- /dev/null
+++ b/docutils/parsers/losslessrst/__init__.py
@@ -0,0 +1,54 @@
+# $Id$
+# Author: Stefan Rank <strank(AT)strank(DOT)info>
+# Copyright: This module has been placed in the public domain.
+
+"""
+This is ``docutils.parsers.losslessrst`` package.
+A subclass of the rst parser that retains additional information
+during parsing to allow exact reconstruction of the input.
+(See the ``docutils.parsers.rst`` parser for general documentation
+
+The node structure created is fully compatible with the rst parser.
+Additional information is stored in one attribute per node.
+(named lossless?)
+
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+import docutils.parsers
+import docutils.statemachine
+from docutils.parsers.losslessrst import states
+from docutils import frontend, nodes
+
+
+class Parser(docutils.parsers.rst.Parser):
+
+    """The lossless reStructuredText parser."""
+
+    supported = ('losslessrst', 'lossless')
+    """Aliases this parser supports."""
+
+    def __init__(self, rfc2822=None, inliner=None):
+        if rfc2822:
+            self.initial_state = 'RFC2822Body'
+        else:
+            self.initial_state = 'Body'
+        self.state_classes = states.state_classes
+        self.inliner = inliner
+
+    def parse(self, inputstring, document):
+        """Parse `inputstring` and populate `document`, a document tree."""
+        self.setup_parse(inputstring, document)
+        self.statemachine = states.RSTStateMachine(
+              state_classes=self.state_classes,
+              initial_state=self.initial_state,
+              debug=document.reporter.debug_flag)
+        inputlines = docutils.statemachine.string2lines(
+              inputstring, tab_width=document.settings.tab_width,
+              convert_whitespace=1)
+        self.statemachine.run(inputlines, document, inliner=self.inliner)
+        self.finish_parse()
+
+
diff --git a/docutils/parsers/losslessrst/states.py b/docutils/parsers/losslessrst/states.py
new file mode 100644
index 000000000..ab736ac4a
--- /dev/null
+++ b/docutils/parsers/losslessrst/states.py
@@ -0,0 +1,2886 @@
+# $Id$
+# Author: Stefan Rank <strank(AT)strank(DOT)info>
+# Copyright: This module has been placed in the public domain.
+
+"""
+This is the ``docutils.parsers.losslessrst.states`` module,
+subclass of the rst.states module.
+It subclasses some of the state classes as necessary for losslessrst.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+# import all imported names from original states module
+from docutils.parsers.rst.states import (sys, re, roman,
+        TupleType, FunctionType, MethodType,
+        nodes, statemachine, utils, urischemes,
+        ApplicationError, DataError,
+        StateMachineWS, StateWS,
+        normalize_name,
+        whitespace_normalize_name,
+        escape2null, unescape, column_width,
+        directives, languages, tableparser, roles,
+        _fallback_language_module)
+
+from docutils.parsers.rst.states import (MarkupError,
+        UnknownInterpretedRoleError,
+        InterpretedRoleNotImplementedError,
+        ParserError,
+        MarkupMismatch,
+        Struct)
+
+
+class RSTStateMachine(StateMachineWS):
+
+    """
+    reStructuredText's master StateMachine.
+
+    The entry point to reStructuredText parsing is the `run()` method.
+    """
+
+    def run(self, input_lines, document, input_offset=0, match_titles=1,
+            inliner=None):
+        """
+        Parse `input_lines` and modify the `document` node in place.
+
+        Extend `StateMachineWS.run()`: set up parse-global data and
+        run the StateMachine.
+        """
+        self.language = languages.get_language(
+            document.settings.language_code)
+        self.match_titles = match_titles
+        if inliner is None:
+            inliner = Inliner()
+        inliner.init_customizations(document.settings)
+        self.memo = Struct(document=document,
+                           reporter=document.reporter,
+                           language=self.language,
+                           title_styles=[],
+                           section_level=0,
+                           section_bubble_up_kludge=0,
+                           inliner=inliner)
+        self.document = document
+        self.attach_observer(document.note_source)
+        self.reporter = self.memo.reporter
+        self.node = document
+        results = StateMachineWS.run(self, input_lines, input_offset,
+                                     input_source=document['source'])
+        assert results == [], 'RSTStateMachine.run() results should be empty!'
+        self.node = self.memo = None    # remove unneeded references
+
+
+class NestedStateMachine(StateMachineWS):
+
+    """
+    StateMachine run from within other StateMachine runs, to parse nested
+    document structures.
+    """
+
+    def run(self, input_lines, input_offset, memo, node, match_titles=1):
+        """
+        Parse `input_lines` and populate a `docutils.nodes.document` instance.
+
+        Extend `StateMachineWS.run()`: set up document-wide data.
+        """
+        self.match_titles = match_titles
+        self.memo = memo
+        self.document = memo.document
+        self.attach_observer(self.document.note_source)
+        self.reporter = memo.reporter
+        self.language = memo.language
+        self.node = node
+        results = StateMachineWS.run(self, input_lines, input_offset)
+        assert results == [], ('NestedStateMachine.run() results should be '
+                               'empty!')
+        return results
+
+
+class RSTState(StateWS):
+
+    """
+    reStructuredText State superclass.
+
+    Contains methods used by all State subclasses.
+    """
+
+    nested_sm = NestedStateMachine
+
+    def __init__(self, state_machine, debug=0):
+        self.nested_sm_kwargs = {'state_classes': state_classes,
+                                 'initial_state': 'Body'}
+        StateWS.__init__(self, state_machine, debug)
+
+    def runtime_init(self):
+        StateWS.runtime_init(self)
+        memo = self.state_machine.memo
+        self.memo = memo
+        self.reporter = memo.reporter
+        self.inliner = memo.inliner
+        self.document = memo.document
+        self.parent = self.state_machine.node
+
+    def goto_line(self, abs_line_offset):
+        """
+        Jump to input line `abs_line_offset`, ignoring jumps past the end.
+        """
+        try:
+            self.state_machine.goto_line(abs_line_offset)
+        except EOFError:
+            pass
+
+    def no_match(self, context, transitions):
+        """
+        Override `StateWS.no_match` to generate a system message.
+
+        This code should never be run.
+        """
+        self.reporter.severe(
+            'Internal error: no transition pattern match.  State: "%s"; '
+            'transitions: %s; context: %s; current line: %r.'
+            % (self.__class__.__name__, transitions, context,
+               self.state_machine.line),
+            line=self.state_machine.abs_line_number())
+        return context, None, []
+
+    def bof(self, context):
+        """Called at beginning of file."""
+        return [], []
+
+    def nested_parse(self, block, input_offset, node, match_titles=0,
+                     state_machine_class=None, state_machine_kwargs=None):
+        """
+        Create a new StateMachine rooted at `node` and run it over the input
+        `block`.
+        """
+        if state_machine_class is None:
+            state_machine_class = self.nested_sm
+        if state_machine_kwargs is None:
+            state_machine_kwargs = self.nested_sm_kwargs
+        block_length = len(block)
+        state_machine = state_machine_class(debug=self.debug,
+                                            **state_machine_kwargs)
+        state_machine.run(block, input_offset, memo=self.memo,
+                          node=node, match_titles=match_titles)
+        state_machine.unlink()
+        new_offset = state_machine.abs_line_offset()
+        # No `block.parent` implies disconnected -- lines aren't in sync:
+        if block.parent and (len(block) - block_length) != 0:
+            # Adjustment for block if modified in nested parse:
+            self.state_machine.next_line(len(block) - block_length)
+        return new_offset
+
+    def nested_list_parse(self, block, input_offset, node, initial_state,
+                          blank_finish,
+                          blank_finish_state=None,
+                          extra_settings={},
+                          match_titles=0,
+                          state_machine_class=None,
+                          state_machine_kwargs=None):
+        """
+        Create a new StateMachine rooted at `node` and run it over the input
+        `block`. Also keep track of optional intermediate blank lines and the
+        required final one.
+        """
+        if state_machine_class is None:
+            state_machine_class = self.nested_sm
+        if state_machine_kwargs is None:
+            state_machine_kwargs = self.nested_sm_kwargs.copy()
+        state_machine_kwargs['initial_state'] = initial_state
+        state_machine = state_machine_class(debug=self.debug,
+                                            **state_machine_kwargs)
+        if blank_finish_state is None:
+            blank_finish_state = initial_state
+        state_machine.states[blank_finish_state].blank_finish = blank_finish
+        for key, value in extra_settings.items():
+            setattr(state_machine.states[initial_state], key, value)
+        state_machine.run(block, input_offset, memo=self.memo,
+                          node=node, match_titles=match_titles)
+        blank_finish = state_machine.states[blank_finish_state].blank_finish
+        state_machine.unlink()
+        return state_machine.abs_line_offset(), blank_finish
+
+    def section(self, title, source, style, lineno, messages):
+        """Check for a valid subsection and create one if it checks out."""
+        if self.check_subsection(source, style, lineno):
+            self.new_subsection(title, lineno, messages)
+
+    def check_subsection(self, source, style, lineno):
+        """
+        Check for a valid subsection header.  Return 1 (true) or None (false).
+
+        When a new section is reached that isn't a subsection of the current
+        section, back up the line count (use ``previous_line(-x)``), then
+        ``raise EOFError``.  The current StateMachine will finish, then the
+        calling StateMachine can re-examine the title.  This will work its way
+        back up the calling chain until the correct section level isreached.
+
+        @@@ Alternative: Evaluate the title, store the title info & level, and
+        back up the chain until that level is reached.  Store in memo? Or
+        return in results?
+
+        :Exception: `EOFError` when a sibling or supersection encountered.
+        """
+        memo = self.memo
+        title_styles = memo.title_styles
+        mylevel = memo.section_level
+        try:                            # check for existing title style
+            level = title_styles.index(style) + 1
+        except ValueError:              # new title style
+            if len(title_styles) == memo.section_level: # new subsection
+                title_styles.append(style)
+                return 1
+            else:                       # not at lowest level
+                self.parent += self.title_inconsistent(source, lineno)
+                return None
+        if level <= mylevel:            # sibling or supersection
+            memo.section_level = level   # bubble up to parent section
+            if len(style) == 2:
+                memo.section_bubble_up_kludge = 1
+            # back up 2 lines for underline title, 3 for overline title
+            self.state_machine.previous_line(len(style) + 1)
+            raise EOFError              # let parent section re-evaluate
+        if level == mylevel + 1:        # immediate subsection
+            return 1
+        else:                           # invalid subsection
+            self.parent += self.title_inconsistent(source, lineno)
+            return None
+
+    def title_inconsistent(self, sourcetext, lineno):
+        error = self.reporter.severe(
+            'Title level inconsistent:', nodes.literal_block('', sourcetext),
+            line=lineno)
+        return error
+
+    def new_subsection(self, title, lineno, messages):
+        """Append new subsection to document tree. On return, check level."""
+        memo = self.memo
+        mylevel = memo.section_level
+        memo.section_level += 1
+        section_node = nodes.section()
+        self.parent += section_node
+        textnodes, title_messages = self.inline_text(title, lineno)
+        titlenode = nodes.title(title, '', *textnodes)
+        name = normalize_name(titlenode.astext())
+        section_node['names'].append(name)
+        section_node += titlenode
+        section_node += messages
+        section_node += title_messages
+        self.document.note_implicit_target(section_node, section_node)
+        offset = self.state_machine.line_offset + 1
+        absoffset = self.state_machine.abs_line_offset() + 1
+        newabsoffset = self.nested_parse(
+              self.state_machine.input_lines[offset:], input_offset=absoffset,
+              node=section_node, match_titles=1)
+        self.goto_line(newabsoffset)
+        if memo.section_level <= mylevel: # can't handle next section?
+            raise EOFError              # bubble up to supersection
+        # reset section_level; next pass will detect it properly
+        memo.section_level = mylevel
+
+    def paragraph(self, lines, lineno):
+        """
+        Return a list (paragraph & messages) & a boolean: literal_block next?
+        """
+        data = '\n'.join(lines).rstrip()
+        if re.search(r'(?<!\\)(\\\\)*::$', data):
+            if len(data) == 2:
+                return [], 1
+            elif data[-3] in ' \n':
+                text = data[:-3].rstrip()
+            else:
+                text = data[:-1]
+            literalnext = 1
+        else:
+            text = data
+            literalnext = 0
+        textnodes, messages = self.inline_text(text, lineno)
+        p = nodes.paragraph(data, '', *textnodes)
+        p.line = lineno
+        return [p] + messages, literalnext
+
+    def inline_text(self, text, lineno):
+        """
+        Return 2 lists: nodes (text and inline elements), and system_messages.
+        """
+        return self.inliner.parse(text, lineno, self.memo, self.parent)
+
+    def unindent_warning(self, node_name):
+        return self.reporter.warning(
+            '%s ends without a blank line; unexpected unindent.' % node_name,
+            line=(self.state_machine.abs_line_number() + 1))
+
+
+def build_regexp(definition, compile=1):
+    """
+    Build, compile and return a regular expression based on `definition`.
+
+    :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
+        where "parts" is a list of regular expressions and/or regular
+        expression definitions to be joined into an or-group.
+    """
+    name, prefix, suffix, parts = definition
+    part_strings = []
+    for part in parts:
+        if type(part) is TupleType:
+            part_strings.append(build_regexp(part, None))
+        else:
+            part_strings.append(part)
+    or_group = '|'.join(part_strings)
+    regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
+    if compile:
+        return re.compile(regexp, re.UNICODE)
+    else:
+        return regexp
+
+
+class Inliner:
+
+    """
+    Parse inline markup; call the `parse()` method.
+    """
+
+    def __init__(self):
+        self.implicit_dispatch = [(self.patterns.uri, self.standalone_uri),]
+        """List of (pattern, bound method) tuples, used by
+        `self.implicit_inline`."""
+
+    def init_customizations(self, settings):
+        """Setting-based customizations; run when parsing begins."""
+        if settings.pep_references:
+            self.implicit_dispatch.append((self.patterns.pep,
+                                           self.pep_reference))
+        if settings.rfc_references:
+            self.implicit_dispatch.append((self.patterns.rfc,
+                                           self.rfc_reference))
+
+    def parse(self, text, lineno, memo, parent):
+        # Needs to be refactored for nested inline markup.
+        # Add nested_parse() method?
+        """
+        Return 2 lists: nodes (text and inline elements), and system_messages.
+
+        Using `self.patterns.initial`, a pattern which matches start-strings
+        (emphasis, strong, interpreted, phrase reference, literal,
+        substitution reference, and inline target) and complete constructs
+        (simple reference, footnote reference), search for a candidate.  When
+        one is found, check for validity (e.g., not a quoted '*' character).
+        If valid, search for the corresponding end string if applicable, and
+        check it for validity.  If not found or invalid, generate a warning
+        and ignore the start-string.  Implicit inline markup (e.g. standalone
+        URIs) is found last.
+        """
+        self.reporter = memo.reporter
+        self.document = memo.document
+        self.language = memo.language
+        self.parent = parent
+        pattern_search = self.patterns.initial.search
+        dispatch = self.dispatch
+        remaining = escape2null(text)
+        processed = []
+        unprocessed = []
+        messages = []
+        while remaining:
+            match = pattern_search(remaining)
+            if match:
+                groups = match.groupdict()
+                method = dispatch[groups['start'] or groups['backquote']
+                                  or groups['refend'] or groups['fnend']]
+                before, inlines, remaining, sysmessages = method(self, match,
+                                                                 lineno)
+                unprocessed.append(before)
+                messages += sysmessages
+                if inlines:
+                    processed += self.implicit_inline(''.join(unprocessed),
+                                                      lineno)
+                    processed += inlines
+                    unprocessed = []
+            else:
+                break
+        remaining = ''.join(unprocessed) + remaining
+        if remaining:
+            processed += self.implicit_inline(remaining, lineno)
+        return processed, messages
+
+    openers = '\'"([{<'
+    closers = '\'")]}>'
+    start_string_prefix = (r'((?<=^)|(?<=[-/: \n%s]))' % re.escape(openers))
+    end_string_suffix = (r'((?=$)|(?=[-/:.,;!? \n\x00%s]))'
+                         % re.escape(closers))
+    non_whitespace_before = r'(?<![ \n])'
+    non_whitespace_escape_before = r'(?<![ \n\x00])'
+    non_whitespace_after = r'(?![ \n])'
+    # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
+    simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
+    # Valid URI characters (see RFC 2396 & RFC 2732);
+    # final \x00 allows backslash escapes in URIs:
+    uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
+    # Delimiter indicating the end of a URI (not part of the URI):
+    uri_end_delim = r"""[>]"""
+    # Last URI character; same as uric but no punctuation:
+    urilast = r"""[_~*/=+a-zA-Z0-9]"""
+    # End of a URI (either 'urilast' or 'uric followed by a
+    # uri_end_delim'):
+    uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
+    emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
+    email_pattern = r"""
+          %(emailc)s+(?:\.%(emailc)s+)*   # name
+          (?<!\x00)@                      # at
+          %(emailc)s+(?:\.%(emailc)s*)*   # host
+          %(uri_end)s                     # final URI char
+          """
+    parts = ('initial_inline', start_string_prefix, '',
+             [('start', '', non_whitespace_after,  # simple start-strings
+               [r'\*\*',                # strong
+                r'\*(?!\*)',            # emphasis but not strong
+                r'``',                  # literal
+                r'_`',                  # inline internal target
+                r'\|(?!\|)']            # substitution reference
+               ),
+              ('whole', '', end_string_suffix, # whole constructs
+               [# reference name & end-string
+                r'(?P<refname>%s)(?P<refend>__?)' % simplename,
+                ('footnotelabel', r'\[', r'(?P<fnend>\]_)',
+                 [r'[0-9]+',               # manually numbered
+                  r'\#(%s)?' % simplename, # auto-numbered (w/ label?)
+                  r'\*',                   # auto-symbol
+                  r'(?P<citationlabel>%s)' % simplename] # citation reference
+                 )
+                ]
+               ),
+              ('backquote',             # interpreted text or phrase reference
+               '(?P<role>(:%s:)?)' % simplename, # optional role
+               non_whitespace_after,
+               ['`(?!`)']               # but not literal
+               )
+              ]
+             )
+    patterns = Struct(
+          initial=build_regexp(parts),
+          emphasis=re.compile(non_whitespace_escape_before
+                              + r'(\*)' + end_string_suffix),
+          strong=re.compile(non_whitespace_escape_before
+                            + r'(\*\*)' + end_string_suffix),
+          interpreted_or_phrase_ref=re.compile(
+              r"""
+              %(non_whitespace_escape_before)s
+              (
+                `
+                (?P<suffix>
+                  (?P<role>:%(simplename)s:)?
+                  (?P<refend>__?)?
+                )
+              )
+              %(end_string_suffix)s
+              """ % locals(), re.VERBOSE | re.UNICODE),
+          embedded_uri=re.compile(
+              r"""
+              (
+                (?:[ \n]+|^)            # spaces or beginning of line/string
+                <                       # open bracket
+                %(non_whitespace_after)s
+                ([^<>\x00]+)            # anything but angle brackets & nulls
+                %(non_whitespace_before)s
+                >                       # close bracket w/o whitespace before
+              )
+              $                         # end of string
+              """ % locals(), re.VERBOSE),
+          literal=re.compile(non_whitespace_before + '(``)'
+                             + end_string_suffix),
+          target=re.compile(non_whitespace_escape_before
+                            + r'(`)' + end_string_suffix),
+          substitution_ref=re.compile(non_whitespace_escape_before
+                                      + r'(\|_{0,2})'
+                                      + end_string_suffix),
+          email=re.compile(email_pattern % locals() + '$', re.VERBOSE),
+          uri=re.compile(
+                (r"""
+                %(start_string_prefix)s
+                (?P<whole>
+                  (?P<absolute>           # absolute URI
+                    (?P<scheme>             # scheme (http, ftp, mailto)
+                      [a-zA-Z][a-zA-Z0-9.+-]*
+                    )
+                    :
+                    (
+                      (                       # either:
+                        (//?)?                  # hierarchical URI
+                        %(uric)s*               # URI characters
+                        %(uri_end)s             # final URI char
+                      )
+                      (                       # optional query
+                        \?%(uric)s*
+                        %(uri_end)s
+                      )?
+                      (                       # optional fragment
+                        \#%(uric)s*
+                        %(uri_end)s
+                      )?
+                    )
+                  )
+                |                       # *OR*
+                  (?P<email>              # email address
+                    """ + email_pattern + r"""
+                  )
+                )
+                %(end_string_suffix)s
+                """) % locals(), re.VERBOSE),
+          pep=re.compile(
+                r"""
+                %(start_string_prefix)s
+                (
+                  (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
+                |
+                  (PEP\s+(?P<pepnum2>\d+))      # reference by name
+                )
+                %(end_string_suffix)s""" % locals(), re.VERBOSE),
+          rfc=re.compile(
+                r"""
+                %(start_string_prefix)s
+                (RFC(-|\s+)?(?P<rfcnum>\d+))
+                %(end_string_suffix)s""" % locals(), re.VERBOSE))
+
+    def quoted_start(self, match):
+        """Return 1 if inline markup start-string is 'quoted', 0 if not."""
+        string = match.string
+        start = match.start()
+        end = match.end()
+        if start == 0:                  # start-string at beginning of text
+            return 0
+        prestart = string[start - 1]
+        try:
+            poststart = string[end]
+            if self.openers.index(prestart) \
+                  == self.closers.index(poststart):   # quoted
+                return 1
+        except IndexError:              # start-string at end of text
+            return 1
+        except ValueError:              # not quoted
+            pass
+        return 0
+
+    def inline_obj(self, match, lineno, end_pattern, nodeclass,
+                   restore_backslashes=0):
+        string = match.string
+        matchstart = match.start('start')
+        matchend = match.end('start')
+        if self.quoted_start(match):
+            return (string[:matchend], [], string[matchend:], [], '')
+        endmatch = end_pattern.search(string[matchend:])
+        if endmatch and endmatch.start(1):  # 1 or more chars
+            text = unescape(endmatch.string[:endmatch.start(1)],
+                            restore_backslashes)
+            textend = matchend + endmatch.end(1)
+            rawsource = unescape(string[matchstart:textend], 1)
+            return (string[:matchstart], [nodeclass(rawsource, text)],
+                    string[textend:], [], endmatch.group(1))
+        msg = self.reporter.warning(
+              'Inline %s start-string without end-string.'
+              % nodeclass.__name__, line=lineno)
+        text = unescape(string[matchstart:matchend], 1)
+        rawsource = unescape(string[matchstart:matchend], 1)
+        prb = self.problematic(text, rawsource, msg)
+        return string[:matchstart], [prb], string[matchend:], [msg], ''
+
+    def problematic(self, text, rawsource, message):
+        msgid = self.document.set_id(message, self.parent)
+        problematic = nodes.problematic(rawsource, text, refid=msgid)
+        prbid = self.document.set_id(problematic)
+        message.add_backref(prbid)
+        return problematic
+
+    def emphasis(self, match, lineno):
+        before, inlines, remaining, sysmessages, endstring = self.inline_obj(
+              match, lineno, self.patterns.emphasis, nodes.emphasis)
+        return before, inlines, remaining, sysmessages
+
+    def strong(self, match, lineno):
+        before, inlines, remaining, sysmessages, endstring = self.inline_obj(
+              match, lineno, self.patterns.strong, nodes.strong)
+        return before, inlines, remaining, sysmessages
+
+    def interpreted_or_phrase_ref(self, match, lineno):
+        end_pattern = self.patterns.interpreted_or_phrase_ref
+        string = match.string
+        matchstart = match.start('backquote')
+        matchend = match.end('backquote')
+        rolestart = match.start('role')
+        role = match.group('role')
+        position = ''
+        if role:
+            role = role[1:-1]
+            position = 'prefix'
+        elif self.quoted_start(match):
+            return (string[:matchend], [], string[matchend:], [])
+        endmatch = end_pattern.search(string[matchend:])
+        if endmatch and endmatch.start(1):  # 1 or more chars
+            textend = matchend + endmatch.end()
+            if endmatch.group('role'):
+                if role:
+                    msg = self.reporter.warning(
+                        'Multiple roles in interpreted text (both '
+                        'prefix and suffix present; only one allowed).',
+                        line=lineno)
+                    text = unescape(string[rolestart:textend], 1)
+                    prb = self.problematic(text, text, msg)
+                    return string[:rolestart], [prb], string[textend:], [msg]
+                role = endmatch.group('suffix')[1:-1]
+                position = 'suffix'
+            escaped = endmatch.string[:endmatch.start(1)]
+            rawsource = unescape(string[matchstart:textend], 1)
+            if rawsource[-1:] == '_':
+                if role:
+                    msg = self.reporter.warning(
+                          'Mismatch: both interpreted text role %s and '
+                          'reference suffix.' % position, line=lineno)
+                    text = unescape(string[rolestart:textend], 1)
+                    prb = self.problematic(text, text, msg)
+                    return string[:rolestart], [prb], string[textend:], [msg]
+                return self.phrase_ref(string[:matchstart], string[textend:],
+                                       rawsource, escaped, unescape(escaped))
+            else:
+                rawsource = unescape(string[rolestart:textend], 1)
+                nodelist, messages = self.interpreted(rawsource, escaped, role,
+                                                      lineno)
+                return (string[:rolestart], nodelist,
+                        string[textend:], messages)
+        msg = self.reporter.warning(
+              'Inline interpreted text or phrase reference start-string '
+              'without end-string.', line=lineno)
+        text = unescape(string[matchstart:matchend], 1)
+        prb = self.problematic(text, text, msg)
+        return string[:matchstart], [prb], string[matchend:], [msg]
+
+    def phrase_ref(self, before, after, rawsource, escaped, text):
+        match = self.patterns.embedded_uri.search(escaped)
+        if match:
+            text = unescape(escaped[:match.start(0)])
+            uri_text = match.group(2)
+            uri = ''.join(uri_text.split())
+            uri = self.adjust_uri(uri)
+            if uri:
+                target = nodes.target(match.group(1), refuri=uri)
+            else:
+                raise ApplicationError('problem with URI: %r' % uri_text)
+            if not text:
+                text = uri
+        else:
+            target = None
+        refname = normalize_name(text)
+        reference = nodes.reference(rawsource, text,
+                                    name=whitespace_normalize_name(text))
+        node_list = [reference]
+        if rawsource[-2:] == '__':
+            if target:
+                reference['refuri'] = uri
+            else:
+                reference['anonymous'] = 1
+        else:
+            if target:
+                reference['refuri'] = uri
+                target['names'].append(refname)
+                self.document.note_explicit_target(target, self.parent)
+                node_list.append(target)
+            else:
+                reference['refname'] = refname
+                self.document.note_refname(reference)
+        return before, node_list, after, []
+
+    def adjust_uri(self, uri):
+        match = self.patterns.email.match(uri)
+        if match:
+            return 'mailto:' + uri
+        else:
+            return uri
+
+    def interpreted(self, rawsource, text, role, lineno):
+        role_fn, messages = roles.role(role, self.language, lineno,
+                                       self.reporter)
+        if role_fn:
+            nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
+            return nodes, messages + messages2
+        else:
+            msg = self.reporter.error(
+                'Unknown interpreted text role "%s".' % role,
+                line=lineno)
+            return ([self.problematic(rawsource, rawsource, msg)],
+                    messages + [msg])
+
+    def literal(self, match, lineno):
+        before, inlines, remaining, sysmessages, endstring = self.inline_obj(
+              match, lineno, self.patterns.literal, nodes.literal,
+              restore_backslashes=1)
+        return before, inlines, remaining, sysmessages
+
+    def inline_internal_target(self, match, lineno):
+        before, inlines, remaining, sysmessages, endstring = self.inline_obj(
+              match, lineno, self.patterns.target, nodes.target)
+        if inlines and isinstance(inlines[0], nodes.target):
+            assert len(inlines) == 1
+            target = inlines[0]
+            name = normalize_name(target.astext())
+            target['names'].append(name)
+            self.document.note_explicit_target(target, self.parent)
+        return before, inlines, remaining, sysmessages
+
+    def substitution_reference(self, match, lineno):
+        before, inlines, remaining, sysmessages, endstring = self.inline_obj(
+              match, lineno, self.patterns.substitution_ref,
+              nodes.substitution_reference)
+        if len(inlines) == 1:
+            subref_node = inlines[0]
+            if isinstance(subref_node, nodes.substitution_reference):
+                subref_text = subref_node.astext()
+                self.document.note_substitution_ref(subref_node, subref_text)
+                if endstring[-1:] == '_':
+                    reference_node = nodes.reference(
+                        '|%s%s' % (subref_text, endstring), '')
+                    if endstring[-2:] == '__':
+                        reference_node['anonymous'] = 1
+                    else:
+                        reference_node['refname'] = normalize_name(subref_text)
+                        self.document.note_refname(reference_node)
+                    reference_node += subref_node
+                    inlines = [reference_node]
+        return before, inlines, remaining, sysmessages
+
+    def footnote_reference(self, match, lineno):
+        """
+        Handles `nodes.footnote_reference` and `nodes.citation_reference`
+        elements.
+        """
+        label = match.group('footnotelabel')
+        refname = normalize_name(label)
+        string = match.string
+        before = string[:match.start('whole')]
+        remaining = string[match.end('whole'):]
+        if match.group('citationlabel'):
+            refnode = nodes.citation_reference('[%s]_' % label,
+                                               refname=refname)
+            refnode += nodes.Text(label)
+            self.document.note_citation_ref(refnode)
+        else:
+            refnode = nodes.footnote_reference('[%s]_' % label)
+            if refname[0] == '#':
+                refname = refname[1:]
+                refnode['auto'] = 1
+                self.document.note_autofootnote_ref(refnode)
+            elif refname == '*':
+                refname = ''
+                refnode['auto'] = '*'
+                self.document.note_symbol_footnote_ref(
+                      refnode)
+            else:
+                refnode += nodes.Text(label)
+            if refname:
+                refnode['refname'] = refname
+                self.document.note_footnote_ref(refnode)
+            if utils.get_trim_footnote_ref_space(self.document.settings):
+                before = before.rstrip()
+        return (before, [refnode], remaining, [])
+
+    def reference(self, match, lineno, anonymous=None):
+        referencename = match.group('refname')
+        refname = normalize_name(referencename)
+        referencenode = nodes.reference(
+            referencename + match.group('refend'), referencename,
+            name=whitespace_normalize_name(referencename))
+        if anonymous:
+            referencenode['anonymous'] = 1
+        else:
+            referencenode['refname'] = refname
+            self.document.note_refname(referencenode)
+        string = match.string
+        matchstart = match.start('whole')
+        matchend = match.end('whole')
+        return (string[:matchstart], [referencenode], string[matchend:], [])
+
+    def anonymous_reference(self, match, lineno):
+        return self.reference(match, lineno, anonymous=1)
+
+    def standalone_uri(self, match, lineno):
+        if not match.group('scheme') or urischemes.schemes.has_key(
+              match.group('scheme').lower()):
+            if match.group('email'):
+                addscheme = 'mailto:'
+            else:
+                addscheme = ''
+            text = match.group('whole')
+            unescaped = unescape(text, 0)
+            return [nodes.reference(unescape(text, 1), unescaped,
+                                    refuri=addscheme + unescaped)]
+        else:                   # not a valid scheme
+            raise MarkupMismatch
+
+    def pep_reference(self, match, lineno):
+        text = match.group(0)
+        if text.startswith('pep-'):
+            pepnum = int(match.group('pepnum1'))
+        elif text.startswith('PEP'):
+            pepnum = int(match.group('pepnum2'))
+        else:
+            raise MarkupMismatch
+        ref = (self.document.settings.pep_base_url
+               + self.document.settings.pep_file_url_template % pepnum)
+        unescaped = unescape(text, 0)
+        return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
+
+    rfc_url = 'rfc%d.html'
+
+    def rfc_reference(self, match, lineno):
+        text = match.group(0)
+        if text.startswith('RFC'):
+            rfcnum = int(match.group('rfcnum'))
+            ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
+        else:
+            raise MarkupMismatch
+        unescaped = unescape(text, 0)
+        return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
+
+    def implicit_inline(self, text, lineno):
+        """
+        Check each of the patterns in `self.implicit_dispatch` for a match,
+        and dispatch to the stored method for the pattern.  Recursively check
+        the text before and after the match.  Return a list of `nodes.Text`
+        and inline element nodes.
+        """
+        if not text:
+            return []
+        for pattern, method in self.implicit_dispatch:
+            match = pattern.search(text)
+            if match:
+                try:
+                    # Must recurse on strings before *and* after the match;
+                    # there may be multiple patterns.
+                    return (self.implicit_inline(text[:match.start()], lineno)
+                            + method(match, lineno) +
+                            self.implicit_inline(text[match.end():], lineno))
+                except MarkupMismatch:
+                    pass
+        return [nodes.Text(unescape(text), rawsource=unescape(text, 1))]
+
+    dispatch = {'*': emphasis,
+                '**': strong,
+                '`': interpreted_or_phrase_ref,
+                '``': literal,
+                '_`': inline_internal_target,
+                ']_': footnote_reference,
+                '|': substitution_reference,
+                '_': reference,
+                '__': anonymous_reference}
+
+
+def _loweralpha_to_int(s, _zero=(ord('a')-1)):
+    return ord(s) - _zero
+
+def _upperalpha_to_int(s, _zero=(ord('A')-1)):
+    return ord(s) - _zero
+
+def _lowerroman_to_int(s):
+    return roman.fromRoman(s.upper())
+
+
+class Body(RSTState):
+
+    """
+    Generic classifier of the first line of a block.
+    """
+
+    double_width_pad_char = tableparser.TableParser.double_width_pad_char
+    """Padding character for East Asian double-width text."""
+
+    enum = Struct()
+    """Enumerated list parsing information."""
+
+    enum.formatinfo = {
+          'parens': Struct(prefix='(', suffix=')', start=1, end=-1),
+          'rparen': Struct(prefix='', suffix=')', start=0, end=-1),
+          'period': Struct(prefix='', suffix='.', start=0, end=-1)}
+    enum.formats = enum.formatinfo.keys()
+    enum.sequences = ['arabic', 'loweralpha', 'upperalpha',
+                      'lowerroman', 'upperroman'] # ORDERED!
+    enum.sequencepats = {'arabic': '[0-9]+',
+                         'loweralpha': '[a-z]',
+                         'upperalpha': '[A-Z]',
+                         'lowerroman': '[ivxlcdm]+',
+                         'upperroman': '[IVXLCDM]+',}
+    enum.converters = {'arabic': int,
+                       'loweralpha': _loweralpha_to_int,
+                       'upperalpha': _upperalpha_to_int,
+                       'lowerroman': _lowerroman_to_int,
+                       'upperroman': roman.fromRoman}
+
+    enum.sequenceregexps = {}
+    for sequence in enum.sequences:
+        enum.sequenceregexps[sequence] = re.compile(
+              enum.sequencepats[sequence] + '$')
+
+    grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')
+    """Matches the top (& bottom) of a full table)."""
+
+    simple_table_top_pat = re.compile('=+( +=+)+ *$')
+    """Matches the top of a simple table."""
+
+    simple_table_border_pat = re.compile('=+[ =]*$')
+    """Matches the bottom & header bottom of a simple table."""
+
+    pats = {}
+    """Fragments of patterns used by transitions."""
+
+    pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
+    pats['alpha'] = '[a-zA-Z]'
+    pats['alphanum'] = '[a-zA-Z0-9]'
+    pats['alphanumplus'] = '[a-zA-Z0-9_-]'
+    pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
+                    '|%(upperroman)s|#)' % enum.sequencepats)
+    pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
+    # @@@ Loosen up the pattern?  Allow Unicode?
+    pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
+    pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
+    pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
+    pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
+
+    for format in enum.formats:
+        pats[format] = '(?P<%s>%s%s%s)' % (
+              format, re.escape(enum.formatinfo[format].prefix),
+              pats['enum'], re.escape(enum.formatinfo[format].suffix))
+
+    patterns = {
+          'bullet': ur'[-+*\u2022\u2023\u2043]( +|$)',
+          'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
+          'field_marker': r':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)',
+          'option_marker': r'%(option)s(, %(option)s)*(  +| ?$)' % pats,
+          'doctest': r'>>>( +|$)',
+          'line_block': r'\|( +|$)',
+          'grid_table_top': grid_table_top_pat,
+          'simple_table_top': simple_table_top_pat,
+          'explicit_markup': r'\.\.( +|$)',
+          'anonymous': r'__( +|$)',
+          'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,
+          'text': r''}
+    initial_transitions = (
+          'bullet',
+          'enumerator',
+          'field_marker',
+          'option_marker',
+          'doctest',
+          'line_block',
+          'grid_table_top',
+          'simple_table_top',
+          'explicit_markup',
+          'anonymous',
+          'line',
+          'text')
+
+    def indent(self, match, context, next_state):
+        """Block quote."""
+        indented, indent, line_offset, blank_finish = \
+              self.state_machine.get_indented()
+        elements = self.block_quote(indented, line_offset)
+        self.parent += elements
+        if not blank_finish:
+            self.parent += self.unindent_warning('Block quote')
+        return context, next_state, []
+
+    def block_quote(self, indented, line_offset):
+        elements = []
+        while indented:
+            (blockquote_lines,
+             attribution_lines,
+             attribution_offset,
+             indented,
+             new_line_offset) = self.split_attribution(indented, line_offset)
+            blockquote = nodes.block_quote()
+            self.nested_parse(blockquote_lines, line_offset, blockquote)
+            elements.append(blockquote)
+            if attribution_lines:
+                attribution, messages = self.parse_attribution(
+                    attribution_lines, attribution_offset)
+                blockquote += attribution
+                elements += messages
+            line_offset = new_line_offset
+            while indented and not indented[0]:
+                indented = indented[1:]
+                line_offset += 1
+        return elements
+
+    # U+2014 is an em-dash:
+    attribution_pattern = re.compile(ur'(---?(?!-)|\u2014) *(?=[^ \n])')
+
+    def split_attribution(self, indented, line_offset):
+        """
+        Check for a block quote attribution and split it off:
+
+        * First line after a blank line must begin with a dash ("--", "---",
+          em-dash; matches `self.attribution_pattern`).
+        * Every line after that must have consistent indentation.
+        * Attributions must be preceded by block quote content.
+
+        Return a tuple of: (block quote content lines, content offset,
+        attribution lines, attribution offset, remaining indented lines).
+        """
+        blank = None
+        nonblank_seen = False
+        for i in range(len(indented)):
+            line = indented[i].rstrip()
+            if line:
+                if nonblank_seen and blank == i - 1: # last line blank
+                    match = self.attribution_pattern.match(line)
+                    if match:
+                        attribution_end, indent = self.check_attribution(
+                            indented, i)
+                        if attribution_end:
+                            a_lines = indented[i:attribution_end]
+                            a_lines.trim_left(match.end(), end=1)
+                            a_lines.trim_left(indent, start=1)
+                            return (indented[:i], a_lines,
+                                    i, indented[attribution_end:],
+                                    line_offset + attribution_end)
+                nonblank_seen = True
+            else:
+                blank = i
+        else:
+            return (indented, None, None, None, None)
+
+    def check_attribution(self, indented, attribution_start):
+        """
+        Check attribution shape.
+        Return the index past the end of the attribution, and the indent.
+        """
+        indent = None
+        i = attribution_start + 1
+        for i in range(attribution_start + 1, len(indented)):
+            line = indented[i].rstrip()
+            if not line:
+                break
+            if indent is None:
+                indent = len(line) - len(line.lstrip())
+            elif len(line) - len(line.lstrip()) != indent:
+                return None, None       # bad shape; not an attribution
+        else:
+            # return index of line after last attribution line:
+            i += 1
+        return i, (indent or 0)
+
+    def parse_attribution(self, indented, line_offset):
+        text = '\n'.join(indented).rstrip()
+        lineno = self.state_machine.abs_line_number() + line_offset
+        textnodes, messages = self.inline_text(text, lineno)
+        node = nodes.attribution(text, '', *textnodes)
+        node.line = lineno
+        return node, messages
+
+    def bullet(self, match, context, next_state):
+        """Bullet list item."""
+        bulletlist = nodes.bullet_list()
+        self.parent += bulletlist
+        bulletlist['bullet'] = match.string[0]
+        i, blank_finish = self.list_item(match.end())
+        bulletlist += i
+        offset = self.state_machine.line_offset + 1   # next line
+        new_line_offset, blank_finish = self.nested_list_parse(
+              self.state_machine.input_lines[offset:],
+              input_offset=self.state_machine.abs_line_offset() + 1,
+              node=bulletlist, initial_state='BulletList',
+              blank_finish=blank_finish)
+        self.goto_line(new_line_offset)
+        if not blank_finish:
+            self.parent += self.unindent_warning('Bullet list')
+        return [], next_state, []
+
+    def list_item(self, indent):
+        if self.state_machine.line[indent:]:
+            indented, line_offset, blank_finish = (
+                self.state_machine.get_known_indented(indent))
+        else:
+            indented, indent, line_offset, blank_finish = (
+                self.state_machine.get_first_known_indented(indent))
+        listitem = nodes.list_item('\n'.join(indented))
+        if indented:
+            self.nested_parse(indented, input_offset=line_offset,
+                              node=listitem)
+        return listitem, blank_finish
+
+    def enumerator(self, match, context, next_state):
+        """Enumerated List Item"""
+        format, sequence, text, ordinal = self.parse_enumerator(match)
+        if not self.is_enumerated_list_item(ordinal, sequence, format):
+            raise statemachine.TransitionCorrection('text')
+        enumlist = nodes.enumerated_list()
+        self.parent += enumlist
+        if sequence == '#':
+            enumlist['enumtype'] = 'arabic'
+        else:
+            enumlist['enumtype'] = sequence
+        enumlist['prefix'] = self.enum.formatinfo[format].prefix
+        enumlist['suffix'] = self.enum.formatinfo[format].suffix
+        if ordinal != 1:
+            enumlist['start'] = ordinal
+            msg = self.reporter.info(
+                'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
+                % (text, ordinal), line=self.state_machine.abs_line_number())
+            self.parent += msg
+        listitem, blank_finish = self.list_item(match.end())
+        enumlist += listitem
+        offset = self.state_machine.line_offset + 1   # next line
+        newline_offset, blank_finish = self.nested_list_parse(
+              self.state_machine.input_lines[offset:],
+              input_offset=self.state_machine.abs_line_offset() + 1,
+              node=enumlist, initial_state='EnumeratedList',
+              blank_finish=blank_finish,
+              extra_settings={'lastordinal': ordinal,
+                              'format': format,
+                              'auto': sequence == '#'})
+        self.goto_line(newline_offset)
+        if not blank_finish:
+            self.parent += self.unindent_warning('Enumerated list')
+        return [], next_state, []
+
+    def parse_enumerator(self, match, expected_sequence=None):
+        """
+        Analyze an enumerator and return the results.
+
+        :Return:
+            - the enumerator format ('period', 'parens', or 'rparen'),
+            - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
+            - the text of the enumerator, stripped of formatting, and
+            - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
+              ``None`` is returned for invalid enumerator text).
+
+        The enumerator format has already been determined by the regular
+        expression match. If `expected_sequence` is given, that sequence is
+        tried first. If not, we check for Roman numeral 1. This way,
+        single-character Roman numerals (which are also alphabetical) can be
+        matched. If no sequence has been matched, all sequences are checked in
+        order.
+        """
+        groupdict = match.groupdict()
+        sequence = ''
+        for format in self.enum.formats:
+            if groupdict[format]:       # was this the format matched?
+                break                   # yes; keep `format`
+        else:                           # shouldn't happen
+            raise ParserError('enumerator format not matched')
+        text = groupdict[format][self.enum.formatinfo[format].start
+                                 :self.enum.formatinfo[format].end]
+        if text == '#':
+            sequence = '#'
+        elif expected_sequence:
+            try:
+                if self.enum.sequenceregexps[expected_sequence].match(text):
+                    sequence = expected_sequence
+            except KeyError:            # shouldn't happen
+                raise ParserError('unknown enumerator sequence: %s'
+                                  % sequence)
+        elif text == 'i':
+            sequence = 'lowerroman'
+        elif text == 'I':
+            sequence = 'upperroman'
+        if not sequence:
+            for sequence in self.enum.sequences:
+                if self.enum.sequenceregexps[sequence].match(text):
+                    break
+            else:                       # shouldn't happen
+                raise ParserError('enumerator sequence not matched')
+        if sequence == '#':
+            ordinal = 1
+        else:
+            try:
+                ordinal = self.enum.converters[sequence](text)
+            except roman.InvalidRomanNumeralError:
+                ordinal = None
+        return format, sequence, text, ordinal
+
+    def is_enumerated_list_item(self, ordinal, sequence, format):
+        """
+        Check validity based on the ordinal value and the second line.
+
+        Return true iff the ordinal is valid and the second line is blank,
+        indented, or starts with the next enumerator or an auto-enumerator.
+        """
+        if ordinal is None:
+            return None
+        try:
+            next_line = self.state_machine.next_line()
+        except EOFError:              # end of input lines
+            self.state_machine.previous_line()
+            return 1
+        else:
+            self.state_machine.previous_line()
+        if not next_line[:1].strip():   # blank or indented
+            return 1
+        result = self.make_enumerator(ordinal + 1, sequence, format)
+        if result:
+            next_enumerator, auto_enumerator = result
+            try:
+                if ( next_line.startswith(next_enumerator) or
+                     next_line.startswith(auto_enumerator) ):
+                    return 1
+            except TypeError:
+                pass
+        return None
+
+    def make_enumerator(self, ordinal, sequence, format):
+        """
+        Construct and return the next enumerated list item marker, and an
+        auto-enumerator ("#" instead of the regular enumerator).
+
+        Return ``None`` for invalid (out of range) ordinals.
+        """ #"
+        if sequence == '#':
+            enumerator = '#'
+        elif sequence == 'arabic':
+            enumerator = str(ordinal)
+        else:
+            if sequence.endswith('alpha'):
+                if ordinal > 26:
+                    return None
+                enumerator = chr(ordinal + ord('a') - 1)
+            elif sequence.endswith('roman'):
+                try:
+                    enumerator = roman.toRoman(ordinal)
+                except roman.RomanError:
+                    return None
+            else:                       # shouldn't happen
+                raise ParserError('unknown enumerator sequence: "%s"'
+                                  % sequence)
+            if sequence.startswith('lower'):
+                enumerator = enumerator.lower()
+            elif sequence.startswith('upper'):
+                enumerator = enumerator.upper()
+            else:                       # shouldn't happen
+                raise ParserError('unknown enumerator sequence: "%s"'
+                                  % sequence)
+        formatinfo = self.enum.formatinfo[format]
+        next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix
+                           + ' ')
+        auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '
+        return next_enumerator, auto_enumerator
+
+    def field_marker(self, match, context, next_state):
+        """Field list item."""
+        field_list = nodes.field_list()
+        self.parent += field_list
+        field, blank_finish = self.field(match)
+        field_list += field
+        offset = self.state_machine.line_offset + 1   # next line
+        newline_offset, blank_finish = self.nested_list_parse(
+              self.state_machine.input_lines[offset:],
+              input_offset=self.state_machine.abs_line_offset() + 1,
+              node=field_list, initial_state='FieldList',
+              blank_finish=blank_finish)
+        self.goto_line(newline_offset)
+        if not blank_finish:
+            self.parent += self.unindent_warning('Field list')
+        return [], next_state, []
+
+    def field(self, match):
+        name = self.parse_field_marker(match)
+        lineno = self.state_machine.abs_line_number()
+        indented, indent, line_offset, blank_finish = \
+              self.state_machine.get_first_known_indented(match.end())
+        field_node = nodes.field()
+        field_node.line = lineno
+        name_nodes, name_messages = self.inline_text(name, lineno)
+        field_node += nodes.field_name(name, '', *name_nodes)
+        field_body = nodes.field_body('\n'.join(indented), *name_messages)
+        field_node += field_body
+        if indented:
+            self.parse_field_body(indented, line_offset, field_body)
+        return field_node, blank_finish
+
+    def parse_field_marker(self, match):
+        """Extract & return field name from a field marker match."""
+        field = match.group()[1:]        # strip off leading ':'
+        field = field[:field.rfind(':')] # strip off trailing ':' etc.
+        return field
+
+    def parse_field_body(self, indented, offset, node):
+        self.nested_parse(indented, input_offset=offset, node=node)
+
+    def option_marker(self, match, context, next_state):
+        """Option list item."""
+        optionlist = nodes.option_list()
+        try:
+            listitem, blank_finish = self.option_list_item(match)
+        except MarkupError, (message, lineno):
+            # This shouldn't happen; pattern won't match.
+            msg = self.reporter.error(
+                'Invalid option list marker: %s' % message, line=lineno)
+            self.parent += msg
+            indented, indent, line_offset, blank_finish = \
+                  self.state_machine.get_first_known_indented(match.end())
+            elements = self.block_quote(indented, line_offset)
+            self.parent += elements
+            if not blank_finish:
+                self.parent += self.unindent_warning('Option list')
+            return [], next_state, []
+        self.parent += optionlist
+        optionlist += listitem
+        offset = self.state_machine.line_offset + 1   # next line
+        newline_offset, blank_finish = self.nested_list_parse(
+              self.state_machine.input_lines[offset:],
+              input_offset=self.state_machine.abs_line_offset() + 1,
+              node=optionlist, initial_state='OptionList',
+              blank_finish=blank_finish)
+        self.goto_line(newline_offset)
+        if not blank_finish:
+            self.parent += self.unindent_warning('Option list')
+        return [], next_state, []
+
+    def option_list_item(self, match):
+        offset = self.state_machine.abs_line_offset()
+        options = self.parse_option_marker(match)
+        indented, indent, line_offset, blank_finish = \
+              self.state_machine.get_first_known_indented(match.end())
+        if not indented:                # not an option list item
+            self.goto_line(offset)
+            raise statemachine.TransitionCorrection('text')
+        option_group = nodes.option_group('', *options)
+        description = nodes.description('\n'.join(indented))
+        option_list_item = nodes.option_list_item('', option_group,
+                                                  description)
+        if indented:
+            self.nested_parse(indented, input_offset=line_offset,
+                              node=description)
+        return option_list_item, blank_finish
+
+    def parse_option_marker(self, match):
+        """
+        Return a list of `node.option` and `node.option_argument` objects,
+        parsed from an option marker match.
+
+        :Exception: `MarkupError` for invalid option markers.
+        """
+        optlist = []
+        optionstrings = match.group().rstrip().split(', ')
+        for optionstring in optionstrings:
+            tokens = optionstring.split()
+            delimiter = ' '
+            firstopt = tokens[0].split('=')
+            if len(firstopt) > 1:
+                # "--opt=value" form
+                tokens[:1] = firstopt
+                delimiter = '='
+            elif (len(tokens[0]) > 2
+                  and ((tokens[0].startswith('-')
+                        and not tokens[0].startswith('--'))
+                       or tokens[0].startswith('+'))):
+                # "-ovalue" form
+                tokens[:1] = [tokens[0][:2], tokens[0][2:]]
+                delimiter = ''
+            if len(tokens) > 1 and (tokens[1].startswith('<')
+                                    and tokens[-1].endswith('>')):
+                # "-o <value1 value2>" form; join all values into one token
+                tokens[1:] = [' '.join(tokens[1:])]
+            if 0 < len(tokens) <= 2:
+                option = nodes.option(optionstring)
+                option += nodes.option_string(tokens[0], tokens[0])
+                if len(tokens) > 1:
+                    option += nodes.option_argument(tokens[1], tokens[1],
+                                                    delimiter=delimiter)
+                optlist.append(option)
+            else:
+                raise MarkupError(
+                    'wrong number of option tokens (=%s), should be 1 or 2: '
+                    '"%s"' % (len(tokens), optionstring),
+                    self.state_machine.abs_line_number() + 1)
+        return optlist
+
+    def doctest(self, match, context, next_state):
+        data = '\n'.join(self.state_machine.get_text_block())
+        self.parent += nodes.doctest_block(data, data)
+        return [], next_state, []
+
+    def line_block(self, match, context, next_state):
+        """First line of a line block."""
+        block = nodes.line_block()
+        self.parent += block
+        lineno = self.state_machine.abs_line_number()
+        line, messages, blank_finish = self.line_block_line(match, lineno)
+        block += line
+        self.parent += messages
+        if not blank_finish:
+            offset = self.state_machine.line_offset + 1   # next line
+            new_line_offset, blank_finish = self.nested_list_parse(
+                  self.state_machine.input_lines[offset:],
+                  input_offset=self.state_machine.abs_line_offset() + 1,
+                  node=block, initial_state='LineBlock',
+                  blank_finish=0)
+            self.goto_line(new_line_offset)
+        if not blank_finish:
+            self.parent += self.reporter.warning(
+                'Line block ends without a blank line.',
+                line=(self.state_machine.abs_line_number() + 1))
+        if len(block):
+            if block[0].indent is None:
+                block[0].indent = 0
+            self.nest_line_block_lines(block)
+        return [], next_state, []
+
+    def line_block_line(self, match, lineno):
+        """Return one line element of a line_block."""
+        indented, indent, line_offset, blank_finish = \
+              self.state_machine.get_first_known_indented(match.end(),
+                                                          until_blank=1)
+        text = u'\n'.join(indented)
+        text_nodes, messages = self.inline_text(text, lineno)
+        line = nodes.line(text, '', *text_nodes)
+        if match.string.rstrip() != '|': # not empty
+            line.indent = len(match.group(1)) - 1
+        return line, messages, blank_finish
+
+    def nest_line_block_lines(self, block):
+        for index in range(1, len(block)):
+            if block[index].indent is None:
+                block[index].indent = block[index - 1].indent
+        self.nest_line_block_segment(block)
+
+    def nest_line_block_segment(self, block):
+        indents = [item.indent for item in block]
+        least = min(indents)
+        new_items = []
+        new_block = nodes.line_block()
+        for item in block:
+            if item.indent > least:
+                new_block.append(item)
+            else:
+                if len(new_block):
+                    self.nest_line_block_segment(new_block)
+                    new_items.append(new_block)
+                    new_block = nodes.line_block()
+                new_items.append(item)
+        if len(new_block):
+            self.nest_line_block_segment(new_block)
+            new_items.append(new_block)
+        block[:] = new_items
+
+    def grid_table_top(self, match, context, next_state):
+        """Top border of a full table."""
+        return self.table_top(match, context, next_state,
+                              self.isolate_grid_table,
+                              tableparser.GridTableParser)
+
+    def simple_table_top(self, match, context, next_state):
+        """Top border of a simple table."""
+        return self.table_top(match, context, next_state,
+                              self.isolate_simple_table,
+                              tableparser.SimpleTableParser)
+
+    def table_top(self, match, context, next_state,
+                  isolate_function, parser_class):
+        """Top border of a generic table."""
+        nodelist, blank_finish = self.table(isolate_function, parser_class)
+        self.parent += nodelist
+        if not blank_finish:
+            msg = self.reporter.warning(
+                'Blank line required after table.',
+                line=self.state_machine.abs_line_number() + 1)
+            self.parent += msg
+        return [], next_state, []
+
+    def table(self, isolate_function, parser_class):
+        """Parse a table."""
+        block, messages, blank_finish = isolate_function()
+        if block:
+            try:
+                parser = parser_class()
+                tabledata = parser.parse(block)
+                tableline = (self.state_machine.abs_line_number() - len(block)
+                             + 1)
+                table = self.build_table(tabledata, tableline)
+                nodelist = [table] + messages
+            except tableparser.TableMarkupError, detail:
+                nodelist = self.malformed_table(
+                    block, ' '.join(detail.args)) + messages
+        else:
+            nodelist = messages
+        return nodelist, blank_finish
+
+    def isolate_grid_table(self):
+        messages = []
+        blank_finish = 1
+        try:
+            block = self.state_machine.get_text_block(flush_left=1)
+        except statemachine.UnexpectedIndentationError, instance:
+            block, source, lineno = instance.args
+            messages.append(self.reporter.error('Unexpected indentation.',
+                                                source=source, line=lineno))
+            blank_finish = 0
+        block.disconnect()
+        # for East Asian chars:
+        block.pad_double_width(self.double_width_pad_char)
+        width = len(block[0].strip())
+        for i in range(len(block)):
+            block[i] = block[i].strip()
+            if block[i][0] not in '+|': # check left edge
+                blank_finish = 0
+                self.state_machine.previous_line(len(block) - i)
+                del block[i:]
+                break
+        if not self.grid_table_top_pat.match(block[-1]): # find bottom
+            blank_finish = 0
+            # from second-last to third line of table:
+            for i in range(len(block) - 2, 1, -1):
+                if self.grid_table_top_pat.match(block[i]):
+                    self.state_machine.previous_line(len(block) - i + 1)
+                    del block[i+1:]
+                    break
+            else:
+                messages.extend(self.malformed_table(block))
+                return [], messages, blank_finish
+        for i in range(len(block)):     # check right edge
+            if len(block[i]) != width or block[i][-1] not in '+|':
+                messages.extend(self.malformed_table(block))
+                return [], messages, blank_finish
+        return block, messages, blank_finish
+
+    def isolate_simple_table(self):
+        start = self.state_machine.line_offset
+        lines = self.state_machine.input_lines
+        limit = len(lines) - 1
+        toplen = len(lines[start].strip())
+        pattern_match = self.simple_table_border_pat.match
+        found = 0
+        found_at = None
+        i = start + 1
+        while i <= limit:
+            line = lines[i]
+            match = pattern_match(line)
+            if match:
+                if len(line.strip()) != toplen:
+                    self.state_machine.next_line(i - start)
+                    messages = self.malformed_table(
+                        lines[start:i+1], 'Bottom/header table border does '
+                        'not match top border.')
+                    return [], messages, i == limit or not lines[i+1].strip()
+                found += 1
+                found_at = i
+                if found == 2 or i == limit or not lines[i+1].strip():
+                    end = i
+                    break
+            i += 1
+        else:                           # reached end of input_lines
+            if found:
+                extra = ' or no blank line after table bottom'
+                self.state_machine.next_line(found_at - start)
+                block = lines[start:found_at+1]
+            else:
+                extra = ''
+                self.state_machine.next_line(i - start - 1)
+                block = lines[start:]
+            messages = self.malformed_table(
+                block, 'No bottom table border found%s.' % extra)
+            return [], messages, not extra
+        self.state_machine.next_line(end - start)
+        block = lines[start:end+1]
+        # for East Asian chars:
+        block.pad_double_width(self.double_width_pad_char)
+        return block, [], end == limit or not lines[end+1].strip()
+
+    def malformed_table(self, block, detail=''):
+        block.replace(self.double_width_pad_char, '')
+        data = '\n'.join(block)
+        message = 'Malformed table.'
+        lineno = self.state_machine.abs_line_number() - len(block) + 1
+        if detail:
+            message += '\n' + detail
+        error = self.reporter.error(message, nodes.literal_block(data, data),
+                                    line=lineno)
+        return [error]
+
+    def build_table(self, tabledata, tableline, stub_columns=0):
+        colwidths, headrows, bodyrows = tabledata
+        table = nodes.table()
+        tgroup = nodes.tgroup(cols=len(colwidths))
+        table += tgroup
+        for colwidth in colwidths:
+            colspec = nodes.colspec(colwidth=colwidth)
+            if stub_columns:
+                colspec.attributes['stub'] = 1
+                stub_columns -= 1
+            tgroup += colspec
+        if headrows:
+            thead = nodes.thead()
+            tgroup += thead
+            for row in headrows:
+                thead += self.build_table_row(row, tableline)
+        tbody = nodes.tbody()
+        tgroup += tbody
+        for row in bodyrows:
+            tbody += self.build_table_row(row, tableline)
+        return table
+
+    def build_table_row(self, rowdata, tableline):
+        row = nodes.row()
+        for cell in rowdata:
+            if cell is None:
+                continue
+            morerows, morecols, offset, cellblock = cell
+            attributes = {}
+            if morerows:
+                attributes['morerows'] = morerows
+            if morecols:
+                attributes['morecols'] = morecols
+            entry = nodes.entry(**attributes)
+            row += entry
+            if ''.join(cellblock):
+                self.nested_parse(cellblock, input_offset=tableline+offset,
+                                  node=entry)
+        return row
+
+
+    explicit = Struct()
+    """Patterns and constants used for explicit markup recognition."""
+
+    explicit.patterns = Struct(
+          target=re.compile(r"""
+                            (
+                              _               # anonymous target
+                            |               # *OR*
+                              (?!_)           # no underscore at the beginning
+                              (?P<quote>`?)   # optional open quote
+                              (?![ `])        # first char. not space or
+                                              # backquote
+                              (?P<name>       # reference name
+                                .+?
+                              )
+                              %(non_whitespace_escape_before)s
+                              (?P=quote)      # close quote if open quote used
+                            )
+                            (?<!(?<!\x00):) # no unescaped colon at end
+                            %(non_whitespace_escape_before)s
+                            [ ]?            # optional space
+                            :               # end of reference name
+                            ([ ]+|$)        # followed by whitespace
+                            """ % vars(Inliner), re.VERBOSE),
+          reference=re.compile(r"""
+                               (
+                                 (?P<simple>%(simplename)s)_
+                               |                  # *OR*
+                                 `                  # open backquote
+                                 (?![ ])            # not space
+                                 (?P<phrase>.+?)    # hyperlink phrase
+                                 %(non_whitespace_escape_before)s
+                                 `_                 # close backquote,
+                                                    # reference mark
+                               )
+                               $                  # end of string
+                               """ % vars(Inliner), re.VERBOSE | re.UNICODE),
+          substitution=re.compile(r"""
+                                  (
+                                    (?![ ])          # first char. not space
+                                    (?P<name>.+?)    # substitution text
+                                    %(non_whitespace_escape_before)s
+                                    \|               # close delimiter
+                                  )
+                                  ([ ]+|$)           # followed by whitespace
+                                  """ % vars(Inliner), re.VERBOSE),)
+
+    def footnote(self, match):
+        lineno = self.state_machine.abs_line_number()
+        indented, indent, offset, blank_finish = \
+              self.state_machine.get_first_known_indented(match.end())
+        label = match.group(1)
+        name = normalize_name(label)
+        footnote = nodes.footnote('\n'.join(indented))
+        footnote.line = lineno
+        if name[0] == '#':              # auto-numbered
+            name = name[1:]             # autonumber label
+            footnote['auto'] = 1
+            if name:
+                footnote['names'].append(name)
+            self.document.note_autofootnote(footnote)
+        elif name == '*':               # auto-symbol
+            name = ''
+            footnote['auto'] = '*'
+            self.document.note_symbol_footnote(footnote)
+        else:                           # manually numbered
+            footnote += nodes.label('', label)
+            footnote['names'].append(name)
+            self.document.note_footnote(footnote)
+        if name:
+            self.document.note_explicit_target(footnote, footnote)
+        else:
+            self.document.set_id(footnote, footnote)
+        if indented:
+            self.nested_parse(indented, input_offset=offset, node=footnote)
+        return [footnote], blank_finish
+
+    def citation(self, match):
+        lineno = self.state_machine.abs_line_number()
+        indented, indent, offset, blank_finish = \
+              self.state_machine.get_first_known_indented(match.end())
+        label = match.group(1)
+        name = normalize_name(label)
+        citation = nodes.citation('\n'.join(indented))
+        citation.line = lineno
+        citation += nodes.label('', label)
+        citation['names'].append(name)
+        self.document.note_citation(citation)
+        self.document.note_explicit_target(citation, citation)
+        if indented:
+            self.nested_parse(indented, input_offset=offset, node=citation)
+        return [citation], blank_finish
+
+    def hyperlink_target(self, match):
+        pattern = self.explicit.patterns.target
+        lineno = self.state_machine.abs_line_number()
+        block, indent, offset, blank_finish = \
+              self.state_machine.get_first_known_indented(
+              match.end(), until_blank=1, strip_indent=0)
+        blocktext = match.string[:match.end()] + '\n'.join(block)
+        block = [escape2null(line) for line in block]
+        escaped = block[0]
+        blockindex = 0
+        while 1:
+            targetmatch = pattern.match(escaped)
+            if targetmatch:
+                break
+            blockindex += 1
+            try:
+                escaped += block[blockindex]
+            except IndexError:
+                raise MarkupError('malformed hyperlink target.', lineno)
+        del block[:blockindex]
+        block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()
+        target = self.make_target(block, blocktext, lineno,
+                                  targetmatch.group('name'))
+        return [target], blank_finish
+
+    def make_target(self, block, block_text, lineno, target_name):
+        target_type, data = self.parse_target(block, block_text, lineno)
+        if target_type == 'refname':
+            target = nodes.target(block_text, '', refname=normalize_name(data))
+            target.indirect_reference_name = data
+            self.add_target(target_name, '', target, lineno)
+            self.document.note_indirect_target(target)
+            return target
+        elif target_type == 'refuri':
+            target = nodes.target(block_text, '')
+            self.add_target(target_name, data, target, lineno)
+            return target
+        else:
+            return data
+
+    def parse_target(self, block, block_text, lineno):
+        """
+        Determine the type of reference of a target.
+
+        :Return: A 2-tuple, one of:
+
+            - 'refname' and the indirect reference name
+            - 'refuri' and the URI
+            - 'malformed' and a system_message node
+        """
+        if block and block[-1].strip()[-1:] == '_': # possible indirect target
+            reference = ' '.join([line.strip() for line in block])
+            refname = self.is_reference(reference)
+            if refname:
+                return 'refname', refname
+        reference = ''.join([''.join(line.split()) for line in block])
+        return 'refuri', unescape(reference)
+
+    def is_reference(self, reference):
+        match = self.explicit.patterns.reference.match(
+            whitespace_normalize_name(reference))
+        if not match:
+            return None
+        return unescape(match.group('simple') or match.group('phrase'))
+
+    def add_target(self, targetname, refuri, target, lineno):
+        target.line = lineno
+        if targetname:
+            name = normalize_name(unescape(targetname))
+            target['names'].append(name)
+            if refuri:
+                uri = self.inliner.adjust_uri(refuri)
+                if uri:
+                    target['refuri'] = uri
+                else:
+                    raise ApplicationError('problem with URI: %r' % refuri)
+            self.document.note_explicit_target(target, self.parent)
+        else:                       # anonymous target
+            if refuri:
+                target['refuri'] = refuri
+            target['anonymous'] = 1
+            self.document.note_anonymous_target(target)
+
+    def substitution_def(self, match):
+        pattern = self.explicit.patterns.substitution
+        lineno = self.state_machine.abs_line_number()
+        block, indent, offset, blank_finish = \
+              self.state_machine.get_first_known_indented(match.end(),
+                                                          strip_indent=0)
+        blocktext = (match.string[:match.end()] + '\n'.join(block))
+        block.disconnect()
+        escaped = escape2null(block[0].rstrip())
+        blockindex = 0
+        while 1:
+            subdefmatch = pattern.match(escaped)
+            if subdefmatch:
+                break
+            blockindex += 1
+            try:
+                escaped = escaped + ' ' + escape2null(block[blockindex].strip())
+            except IndexError:
+                raise MarkupError('malformed substitution definition.',
+                                  lineno)
+        del block[:blockindex]          # strip out the substitution marker
+        block[0] = (block[0].strip() + ' ')[subdefmatch.end()-len(escaped)-1:-1]
+        if not block[0]:
+            del block[0]
+            offset += 1
+        while block and not block[-1].strip():
+            block.pop()
+        subname = subdefmatch.group('name')
+        substitution_node = nodes.substitution_definition(blocktext)
+        substitution_node.line = lineno
+        if not block:
+            msg = self.reporter.warning(
+                'Substitution definition "%s" missing contents.' % subname,
+                nodes.literal_block(blocktext, blocktext), line=lineno)
+            return [msg], blank_finish
+        block[0] = block[0].strip()
+        substitution_node['names'].append(
+            nodes.whitespace_normalize_name(subname))
+        new_abs_offset, blank_finish = self.nested_list_parse(
+              block, input_offset=offset, node=substitution_node,
+              initial_state='SubstitutionDef', blank_finish=blank_finish)
+        i = 0
+        for node in substitution_node[:]:
+            if not (isinstance(node, nodes.Inline) or
+                    isinstance(node, nodes.Text)):
+                self.parent += substitution_node[i]
+                del substitution_node[i]
+            else:
+                i += 1
+        for node in substitution_node.traverse(nodes.Element):
+            if self.disallowed_inside_substitution_definitions(node):
+                pformat = nodes.literal_block('', node.pformat().rstrip())
+                msg = self.reporter.error(
+                    'Substitution definition contains illegal element:',
+                    pformat, nodes.literal_block(blocktext, blocktext),
+                    line=lineno)
+                return [msg], blank_finish
+        if len(substitution_node) == 0:
+            msg = self.reporter.warning(
+                  'Substitution definition "%s" empty or invalid.'
+                  % subname,
+                  nodes.literal_block(blocktext, blocktext), line=lineno)
+            return [msg], blank_finish
+        self.document.note_substitution_def(
+            substitution_node, subname, self.parent)
+        return [substitution_node], blank_finish
+
+    def disallowed_inside_substitution_definitions(self, node):
+        if (node['ids'] or
+            isinstance(node, nodes.reference) and node.get('anonymous') or
+            isinstance(node, nodes.footnote_reference) and node.get('auto')):
+            return 1
+        else:
+            return 0
+
+    def directive(self, match, **option_presets):
+        """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
+        type_name = match.group(1)
+        directive_class, messages = directives.directive(
+            type_name, self.memo.language, self.document)
+        self.parent += messages
+        if directive_class:
+            return self.run_directive(
+                directive_class, match, type_name, option_presets)
+        else:
+            return self.unknown_directive(type_name)
+
+    def run_directive(self, directive, match, type_name, option_presets):
+        """
+        Parse a directive then run its directive function.
+
+        Parameters:
+
+        - `directive`: The class implementing the directive.  Must be
+          a subclass of `rst.Directive`.
+
+        - `match`: A regular expression match object which matched the first
+          line of the directive.
+
+        - `type_name`: The directive name, as used in the source text.
+
+        - `option_presets`: A dictionary of preset options, defaults for the
+          directive options.  Currently, only an "alt" option is passed by
+          substitution definitions (value: the substitution name), which may
+          be used by an embedded image directive.
+
+        Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
+        """
+        if isinstance(directive, (FunctionType, MethodType)):
+            from docutils.parsers.rst import convert_directive_function
+            directive = convert_directive_function(directive)
+        lineno = self.state_machine.abs_line_number()
+        initial_line_offset = self.state_machine.line_offset
+        indented, indent, line_offset, blank_finish \
+                  = self.state_machine.get_first_known_indented(match.end(),
+                                                                strip_top=0)
+        block_text = '\n'.join(self.state_machine.input_lines[
+            initial_line_offset : self.state_machine.line_offset + 1])
+        try:
+            arguments, options, content, content_offset = (
+                self.parse_directive_block(indented, line_offset,
+                                           directive, option_presets))
+        except MarkupError, detail:
+            error = self.reporter.error(
+                'Error in "%s" directive:\n%s.' % (type_name,
+                                                   ' '.join(detail.args)),
+                nodes.literal_block(block_text, block_text), line=lineno)
+            return [error], blank_finish
+        directive_instance = directive(
+            type_name, arguments, options, content, lineno,
+            content_offset, block_text, self, self.state_machine)
+        try:
+            result = directive_instance.run()
+        except docutils.parsers.rst.DirectiveError, directive_error:
+            msg_node = self.reporter.system_message(directive_error.level,
+                                                    directive_error.message)
+            msg_node += nodes.literal_block(block_text, block_text)
+            msg_node['line'] = lineno
+            result = [msg_node]
+        assert isinstance(result, list), \
+               'Directive "%s" must return a list of nodes.' % type_name
+        for i in range(len(result)):
+            assert isinstance(result[i], nodes.Node), \
+                   ('Directive "%s" returned non-Node object (index %s): %r'
+                    % (type_name, i, result[i]))
+        return (result,
+                blank_finish or self.state_machine.is_next_line_blank())
+
+    def parse_directive_block(self, indented, line_offset, directive,
+                              option_presets):
+        option_spec = directive.option_spec
+        has_content = directive.has_content
+        if indented and not indented[0].strip():
+            indented.trim_start()
+            line_offset += 1
+        while indented and not indented[-1].strip():
+            indented.trim_end()
+        if indented and (directive.required_arguments
+                         or directive.optional_arguments
+                         or option_spec):
+            for i in range(len(indented)):
+                if not indented[i].strip():
+                    break
+            else:
+                i += 1
+            arg_block = indented[:i]
+            content = indented[i+1:]
+            content_offset = line_offset + i + 1
+        else:
+            content = indented
+            content_offset = line_offset
+            arg_block = []
+        while content and not content[0].strip():
+            content.trim_start()
+            content_offset += 1
+        if option_spec:
+            options, arg_block = self.parse_directive_options(
+                option_presets, option_spec, arg_block)
+            if arg_block and not (directive.required_arguments
+                                  or directive.optional_arguments):
+                raise MarkupError('no arguments permitted; blank line '
+                                  'required before content block')
+        else:
+            options = {}
+        if directive.required_arguments or directive.optional_arguments:
+            arguments = self.parse_directive_arguments(
+                directive, arg_block)
+        else:
+            arguments = []
+        if content and not has_content:
+            raise MarkupError('no content permitted')
+        return (arguments, options, content, content_offset)
+
+    def parse_directive_options(self, option_presets, option_spec, arg_block):
+        options = option_presets.copy()
+        for i in range(len(arg_block)):
+            if arg_block[i][:1] == ':':
+                opt_block = arg_block[i:]
+                arg_block = arg_block[:i]
+                break
+        else:
+            opt_block = []
+        if opt_block:
+            success, data = self.parse_extension_options(option_spec,
+                                                         opt_block)
+            if success:                 # data is a dict of options
+                options.update(data)
+            else:                       # data is an error string
+                raise MarkupError(data)
+        return options, arg_block
+
+    def parse_directive_arguments(self, directive, arg_block):
+        required = directive.required_arguments
+        optional = directive.optional_arguments
+        arg_text = '\n'.join(arg_block)
+        arguments = arg_text.split()
+        if len(arguments) < required:
+            raise MarkupError('%s argument(s) required, %s supplied'
+                              % (required, len(arguments)))
+        elif len(arguments) > required + optional:
+            if directive.final_argument_whitespace:
+                arguments = arg_text.split(None, required + optional - 1)
+            else:
+                raise MarkupError(
+                    'maximum %s argument(s) allowed, %s supplied'
+                    % (required + optional, len(arguments)))
+        return arguments
+
+    def parse_extension_options(self, option_spec, datalines):
+        """
+        Parse `datalines` for a field list containing extension options
+        matching `option_spec`.
+
+        :Parameters:
+            - `option_spec`: a mapping of option name to conversion
+              function, which should raise an exception on bad input.
+            - `datalines`: a list of input strings.
+
+        :Return:
+            - Success value, 1 or 0.
+            - An option dictionary on success, an error string on failure.
+        """
+        node = nodes.field_list()
+        newline_offset, blank_finish = self.nested_list_parse(
+              datalines, 0, node, initial_state='ExtensionOptions',
+              blank_finish=1)
+        if newline_offset != len(datalines): # incomplete parse of block
+            return 0, 'invalid option block'
+        try:
+            options = utils.extract_extension_options(node, option_spec)
+        except KeyError, detail:
+            return 0, ('unknown option: "%s"' % detail.args[0])
+        except (ValueError, TypeError), detail:
+            return 0, ('invalid option value: %s' % ' '.join(detail.args))
+        except utils.ExtensionOptionError, detail:
+            return 0, ('invalid option data: %s' % ' '.join(detail.args))
+        if blank_finish:
+            return 1, options
+        else:
+            return 0, 'option data incompletely parsed'
+
+    def unknown_directive(self, type_name):
+        lineno = self.state_machine.abs_line_number()
+        indented, indent, offset, blank_finish = \
+              self.state_machine.get_first_known_indented(0, strip_indent=0)
+        text = '\n'.join(indented)
+        error = self.reporter.error(
+              'Unknown directive type "%s".' % type_name,
+              nodes.literal_block(text, text), line=lineno)
+        return [error], blank_finish
+
+    def comment(self, match):
+        if not match.string[match.end():].strip() \
+              and self.state_machine.is_next_line_blank(): # an empty comment?
+            return [nodes.comment()], 1 # "A tiny but practical wart."
+        indented, indent, offset, blank_finish = \
+              self.state_machine.get_first_known_indented(match.end())
+        while indented and not indented[-1].strip():
+            indented.trim_end()
+        text = '\n'.join(indented)
+        return [nodes.comment(text, text)], blank_finish
+
+    explicit.constructs = [
+          (footnote,
+           re.compile(r"""
+                      \.\.[ ]+          # explicit markup start
+                      \[
+                      (                 # footnote label:
+                          [0-9]+          # manually numbered footnote
+                        |               # *OR*
+                          \#              # anonymous auto-numbered footnote
+                        |               # *OR*
+                          \#%s            # auto-number ed?) footnote label
+                        |               # *OR*
+                          \*              # auto-symbol footnote
+                      )
+                      \]
+                      ([ ]+|$)          # whitespace or end of line
+                      """ % Inliner.simplename, re.VERBOSE | re.UNICODE)),
+          (citation,
+           re.compile(r"""
+                      \.\.[ ]+          # explicit markup start
+                      \[(%s)\]          # citation label
+                      ([ ]+|$)          # whitespace or end of line
+                      """ % Inliner.simplename, re.VERBOSE | re.UNICODE)),
+          (hyperlink_target,
+           re.compile(r"""
+                      \.\.[ ]+          # explicit markup start
+                      _                 # target indicator
+                      (?![ ]|$)         # first char. not space or EOL
+                      """, re.VERBOSE)),
+          (substitution_def,
+           re.compile(r"""
+                      \.\.[ ]+          # explicit markup start
+                      \|                # substitution indicator
+                      (?![ ]|$)         # first char. not space or EOL
+                      """, re.VERBOSE)),
+          (directive,
+           re.compile(r"""
+                      \.\.[ ]+          # explicit markup start
+                      (%s)              # directive name
+                      [ ]?              # optional space
+                      ::                # directive delimiter
+                      ([ ]+|$)          # whitespace or end of line
+                      """ % Inliner.simplename, re.VERBOSE | re.UNICODE))]
+
+    def explicit_markup(self, match, context, next_state):
+        """Footnotes, hyperlink targets, directives, comments."""
+        nodelist, blank_finish = self.explicit_construct(match)
+        self.parent += nodelist
+        self.explicit_list(blank_finish)
+        return [], next_state, []
+
+    def explicit_construct(self, match):
+        """Determine which explicit construct this is, parse & return it."""
+        errors = []
+        for method, pattern in self.explicit.constructs:
+            expmatch = pattern.match(match.string)
+            if expmatch:
+                try:
+                    return method(self, expmatch)
+                except MarkupError, (message, lineno): # never reached?
+                    errors.append(self.reporter.warning(message, line=lineno))
+                    break
+        nodelist, blank_finish = self.comment(match)
+        return nodelist + errors, blank_finish
+
+    def explicit_list(self, blank_finish):
+        """
+        Create a nested state machine for a series of explicit markup
+        constructs (including anonymous hyperlink targets).
+        """
+        offset = self.state_machine.line_offset + 1   # next line
+        newline_offset, blank_finish = self.nested_list_parse(
+              self.state_machine.input_lines[offset:],
+              input_offset=self.state_machine.abs_line_offset() + 1,
+              node=self.parent, initial_state='Explicit',
+              blank_finish=blank_finish,
+              match_titles=self.state_machine.match_titles)
+        self.goto_line(newline_offset)
+        if not blank_finish:
+            self.parent += self.unindent_warning('Explicit markup')
+
+    def anonymous(self, match, context, next_state):
+        """Anonymous hyperlink targets."""
+        nodelist, blank_finish = self.anonymous_target(match)
+        self.parent += nodelist
+        self.explicit_list(blank_finish)
+        return [], next_state, []
+
+    def anonymous_target(self, match):
+        lineno = self.state_machine.abs_line_number()
+        block, indent, offset, blank_finish \
+              = self.state_machine.get_first_known_indented(match.end(),
+                                                            until_blank=1)
+        blocktext = match.string[:match.end()] + '\n'.join(block)
+        block = [escape2null(line) for line in block]
+        target = self.make_target(block, blocktext, lineno, '')
+        return [target], blank_finish
+
+    def line(self, match, context, next_state):
+        """Section title overline or transition marker."""
+        if self.state_machine.match_titles:
+            return [match.string], 'Line', []
+        elif match.string.strip() == '::':
+            raise statemachine.TransitionCorrection('text')
+        elif len(match.string.strip()) < 4:
+            msg = self.reporter.info(
+                'Unexpected possible title overline or transition.\n'
+                "Treating it as ordinary text because it's so short.",
+                line=self.state_machine.abs_line_number())
+            self.parent += msg
+            raise statemachine.TransitionCorrection('text')
+        else:
+            blocktext = self.state_machine.line
+            msg = self.reporter.severe(
+                  'Unexpected section title or transition.',
+                  nodes.literal_block(blocktext, blocktext),
+                  line=self.state_machine.abs_line_number())
+            self.parent += msg
+            return [], next_state, []
+
+    def text(self, match, context, next_state):
+        """Titles, definition lists, paragraphs."""
+        return [match.string], 'Text', []
+
+
+class RFC2822Body(Body):
+
+    """
+    RFC2822 headers are only valid as the first constructs in documents.  As
+    soon as anything else appears, the `Body` state should take over.
+    """
+
+    patterns = Body.patterns.copy()     # can't modify the original
+    patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'
+    initial_transitions = [(name, 'Body')
+                           for name in Body.initial_transitions]
+    initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'
+
+    def rfc2822(self, match, context, next_state):
+        """RFC2822-style field list item."""
+        fieldlist = nodes.field_list(classes=['rfc2822'])
+        self.parent += fieldlist
+        field, blank_finish = self.rfc2822_field(match)
+        fieldlist += field
+        offset = self.state_machine.line_offset + 1   # next line
+        newline_offset, blank_finish = self.nested_list_parse(
+              self.state_machine.input_lines[offset:],
+              input_offset=self.state_machine.abs_line_offset() + 1,
+              node=fieldlist, initial_state='RFC2822List',
+              blank_finish=blank_finish)
+        self.goto_line(newline_offset)
+        if not blank_finish:
+            self.parent += self.unindent_warning(
+                  'RFC2822-style field list')
+        return [], next_state, []
+
+    def rfc2822_field(self, match):
+        name = match.string[:match.string.find(':')]
+        indented, indent, line_offset, blank_finish = \
+              self.state_machine.get_first_known_indented(match.end(),
+                                                          until_blank=1)
+        fieldnode = nodes.field()
+        fieldnode += nodes.field_name(name, name)
+        fieldbody = nodes.field_body('\n'.join(indented))
+        fieldnode += fieldbody
+        if indented:
+            self.nested_parse(indented, input_offset=line_offset,
+                              node=fieldbody)
+        return fieldnode, blank_finish
+
+
+class SpecializedBody(Body):
+
+    """
+    Superclass for second and subsequent compound element members.  Compound
+    elements are lists and list-like constructs.
+
+    All transition methods are disabled (redefined as `invalid_input`).
+    Override individual methods in subclasses to re-enable.
+
+    For example, once an initial bullet list item, say, is recognized, the
+    `BulletList` subclass takes over, with a "bullet_list" node as its
+    container.  Upon encountering the initial bullet list item, `Body.bullet`
+    calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
+    starts up a nested parsing session with `BulletList` as the initial state.
+    Only the ``bullet`` transition method is enabled in `BulletList`; as long
+    as only bullet list items are encountered, they are parsed and inserted
+    into the container.  The first construct which is *not* a bullet list item
+    triggers the `invalid_input` method, which ends the nested parse and
+    closes the container.  `BulletList` needs to recognize input that is
+    invalid in the context of a bullet list, which means everything *other
+    than* bullet list items, so it inherits the transition list created in
+    `Body`.
+    """
+
+    def invalid_input(self, match=None, context=None, next_state=None):
+        """Not a compound element member. Abort this state machine."""
+        self.state_machine.previous_line() # back up so parent SM can reassess
+        raise EOFError
+
+    indent = invalid_input
+    bullet = invalid_input
+    enumerator = invalid_input
+    field_marker = invalid_input
+    option_marker = invalid_input
+    doctest = invalid_input
+    line_block = invalid_input
+    grid_table_top = invalid_input
+    simple_table_top = invalid_input
+    explicit_markup = invalid_input
+    anonymous = invalid_input
+    line = invalid_input
+    text = invalid_input
+
+
+class BulletList(SpecializedBody):
+
+    """Second and subsequent bullet_list list_items."""
+
+    def bullet(self, match, context, next_state):
+        """Bullet list item."""
+        if match.string[0] != self.parent['bullet']:
+            # different bullet: new list
+            self.invalid_input()
+        listitem, blank_finish = self.list_item(match.end())
+        self.parent += listitem
+        self.blank_finish = blank_finish
+        return [], next_state, []
+
+
+class DefinitionList(SpecializedBody):
+
+    """Second and subsequent definition_list_items."""
+
+    def text(self, match, context, next_state):
+        """Definition lists."""
+        return [match.string], 'Definition', []
+
+
+class EnumeratedList(SpecializedBody):
+
+    """Second and subsequent enumerated_list list_items."""
+
+    def enumerator(self, match, context, next_state):
+        """Enumerated list item."""
+        format, sequence, text, ordinal = self.parse_enumerator(
+              match, self.parent['enumtype'])
+        if ( format != self.format
+             or (sequence != '#' and (sequence != self.parent['enumtype']
+                                      or self.auto
+                                      or ordinal != (self.lastordinal + 1)))
+             or not self.is_enumerated_list_item(ordinal, sequence, format)):
+            # different enumeration: new list
+            self.invalid_input()
+        if sequence == '#':
+            self.auto = 1
+        listitem, blank_finish = self.list_item(match.end())
+        self.parent += listitem
+        self.blank_finish = blank_finish
+        self.lastordinal = ordinal
+        return [], next_state, []
+
+
+class FieldList(SpecializedBody):
+
+    """Second and subsequent field_list fields."""
+
+    def field_marker(self, match, context, next_state):
+        """Field list field."""
+        field, blank_finish = self.field(match)
+        self.parent += field
+        self.blank_finish = blank_finish
+        return [], next_state, []
+
+
+class OptionList(SpecializedBody):
+
+    """Second and subsequent option_list option_list_items."""
+
+    def option_marker(self, match, context, next_state):
+        """Option list item."""
+        try:
+            option_list_item, blank_finish = self.option_list_item(match)
+        except MarkupError, (message, lineno):
+            self.invalid_input()
+        self.parent += option_list_item
+        self.blank_finish = blank_finish
+        return [], next_state, []
+
+
+class RFC2822List(SpecializedBody, RFC2822Body):
+
+    """Second and subsequent RFC2822-style field_list fields."""
+
+    patterns = RFC2822Body.patterns
+    initial_transitions = RFC2822Body.initial_transitions
+
+    def rfc2822(self, match, context, next_state):
+        """RFC2822-style field list item."""
+        field, blank_finish = self.rfc2822_field(match)
+        self.parent += field
+        self.blank_finish = blank_finish
+        return [], 'RFC2822List', []
+
+    blank = SpecializedBody.invalid_input
+
+
+class ExtensionOptions(FieldList):
+
+    """
+    Parse field_list fields for extension options.
+
+    No nested parsing is done (including inline markup parsing).
+    """
+
+    def parse_field_body(self, indented, offset, node):
+        """Override `Body.parse_field_body` for simpler parsing."""
+        lines = []
+        for line in list(indented) + ['']:
+            if line.strip():
+                lines.append(line)
+            elif lines:
+                text = '\n'.join(lines)
+                node += nodes.paragraph(text, text)
+                lines = []
+
+
+class LineBlock(SpecializedBody):
+
+    """Second and subsequent lines of a line_block."""
+
+    blank = SpecializedBody.invalid_input
+
+    def line_block(self, match, context, next_state):
+        """New line of line block."""
+        lineno = self.state_machine.abs_line_number()
+        line, messages, blank_finish = self.line_block_line(match, lineno)
+        self.parent += line
+        self.parent.parent += messages
+        self.blank_finish = blank_finish
+        return [], next_state, []
+
+
+class Explicit(SpecializedBody):
+
+    """Second and subsequent explicit markup construct."""
+
+    def explicit_markup(self, match, context, next_state):
+        """Footnotes, hyperlink targets, directives, comments."""
+        nodelist, blank_finish = self.explicit_construct(match)
+        self.parent += nodelist
+        self.blank_finish = blank_finish
+        return [], next_state, []
+
+    def anonymous(self, match, context, next_state):
+        """Anonymous hyperlink targets."""
+        nodelist, blank_finish = self.anonymous_target(match)
+        self.parent += nodelist
+        self.blank_finish = blank_finish
+        return [], next_state, []
+
+    blank = SpecializedBody.invalid_input
+
+
+class SubstitutionDef(Body):
+
+    """
+    Parser for the contents of a substitution_definition element.
+    """
+
+    patterns = {
+          'embedded_directive': re.compile(r'(%s)::( +|$)'
+                                           % Inliner.simplename, re.UNICODE),
+          'text': r''}
+    initial_transitions = ['embedded_directive', 'text']
+
+    def embedded_directive(self, match, context, next_state):
+        nodelist, blank_finish = self.directive(match,
+                                                alt=self.parent['names'][0])
+        self.parent += nodelist
+        if not self.state_machine.at_eof():
+            self.blank_finish = blank_finish
+        raise EOFError
+
+    def text(self, match, context, next_state):
+        if not self.state_machine.at_eof():
+            self.blank_finish = self.state_machine.is_next_line_blank()
+        raise EOFError
+
+
+class Text(RSTState):
+
+    """
+    Classifier of second line of a text block.
+
+    Could be a paragraph, a definition list item, or a title.
+    """
+
+    patterns = {'underline': Body.patterns['line'],
+                'text': r''}
+    initial_transitions = [('underline', 'Body'), ('text', 'Body')]
+
+    def blank(self, match, context, next_state):
+        """End of paragraph."""
+        paragraph, literalnext = self.paragraph(
+              context, self.state_machine.abs_line_number() - 1)
+        self.parent += paragraph
+        if literalnext:
+            self.parent += self.literal_block()
+        return [], 'Body', []
+
+    def eof(self, context):
+        if context:
+            self.blank(None, context, None)
+        return []
+
+    def indent(self, match, context, next_state):
+        """Definition list item."""
+        definitionlist = nodes.definition_list()
+        definitionlistitem, blank_finish = self.definition_list_item(context)
+        definitionlist += definitionlistitem
+        self.parent += definitionlist
+        offset = self.state_machine.line_offset + 1   # next line
+        newline_offset, blank_finish = self.nested_list_parse(
+              self.state_machine.input_lines[offset:],
+              input_offset=self.state_machine.abs_line_offset() + 1,
+              node=definitionlist, initial_state='DefinitionList',
+              blank_finish=blank_finish, blank_finish_state='Definition')
+        self.goto_line(newline_offset)
+        if not blank_finish:
+            self.parent += self.unindent_warning('Definition list')
+        return [], 'Body', []
+
+    def underline(self, match, context, next_state):
+        """Section title."""
+        lineno = self.state_machine.abs_line_number()
+        title = context[0].rstrip()
+        underline = match.string.rstrip()
+        source = title + '\n' + underline
+        messages = []
+        if column_width(title) > len(underline):
+            if len(underline) < 4:
+                if self.state_machine.match_titles:
+                    msg = self.reporter.info(
+                        'Possible title underline, too short for the title.\n'
+                        "Treating it as ordinary text because it's so short.",
+                        line=lineno)
+                    self.parent += msg
+                raise statemachine.TransitionCorrection('text')
+            else:
+                blocktext = context[0] + '\n' + self.state_machine.line
+                msg = self.reporter.warning(
+                    'Title underline too short.',
+                    nodes.literal_block(blocktext, blocktext), line=lineno)
+                messages.append(msg)
+        if not self.state_machine.match_titles:
+            blocktext = context[0] + '\n' + self.state_machine.line
+            msg = self.reporter.severe(
+                'Unexpected section title.',
+                nodes.literal_block(blocktext, blocktext), line=lineno)
+            self.parent += messages
+            self.parent += msg
+            return [], next_state, []
+        style = underline[0]
+        context[:] = []
+        self.section(title, source, style, lineno - 1, messages)
+        return [], next_state, []
+
+    def text(self, match, context, next_state):
+        """Paragraph."""
+        startline = self.state_machine.abs_line_number() - 1
+        msg = None
+        try:
+            block = self.state_machine.get_text_block(flush_left=1)
+        except statemachine.UnexpectedIndentationError, instance:
+            block, source, lineno = instance.args
+            msg = self.reporter.error('Unexpected indentation.',
+                                      source=source, line=lineno)
+        lines = context + list(block)
+        paragraph, literalnext = self.paragraph(lines, startline)
+        self.parent += paragraph
+        self.parent += msg
+        if literalnext:
+            try:
+                self.state_machine.next_line()
+            except EOFError:
+                pass
+            self.parent += self.literal_block()
+        return [], next_state, []
+
+    def literal_block(self):
+        """Return a list of nodes."""
+        indented, indent, offset, blank_finish = \
+              self.state_machine.get_indented()
+        while indented and not indented[-1].strip():
+            indented.trim_end()
+        if not indented:
+            return self.quoted_literal_block()
+        data = '\n'.join(indented)
+        literal_block = nodes.literal_block(data, data)
+        literal_block.line = offset + 1
+        nodelist = [literal_block]
+        if not blank_finish:
+            nodelist.append(self.unindent_warning('Literal block'))
+        return nodelist
+
+    def quoted_literal_block(self):
+        abs_line_offset = self.state_machine.abs_line_offset()
+        offset = self.state_machine.line_offset
+        parent_node = nodes.Element()
+        new_abs_offset = self.nested_parse(
+            self.state_machine.input_lines[offset:],
+            input_offset=abs_line_offset, node=parent_node, match_titles=0,
+            state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),
+                                  'initial_state': 'QuotedLiteralBlock'})
+        self.goto_line(new_abs_offset)
+        return parent_node.children
+
+    def definition_list_item(self, termline):
+        indented, indent, line_offset, blank_finish = \
+              self.state_machine.get_indented()
+        definitionlistitem = nodes.definition_list_item(
+            '\n'.join(termline + list(indented)))
+        lineno = self.state_machine.abs_line_number() - 1
+        definitionlistitem.line = lineno
+        termlist, messages = self.term(termline, lineno)
+        definitionlistitem += termlist
+        definition = nodes.definition('', *messages)
+        definitionlistitem += definition
+        if termline[0][-2:] == '::':
+            definition += self.reporter.info(
+                  'Blank line missing before literal block (after the "::")? '
+                  'Interpreted as a definition list item.', line=line_offset+1)
+        self.nested_parse(indented, input_offset=line_offset, node=definition)
+        return definitionlistitem, blank_finish
+
+    classifier_delimiter = re.compile(' +: +')
+
+    def term(self, lines, lineno):
+        """Return a definition_list's term and optional classifiers."""
+        assert len(lines) == 1
+        text_nodes, messages = self.inline_text(lines[0], lineno)
+        term_node = nodes.term()
+        node_list = [term_node]
+        for i in range(len(text_nodes)):
+            node = text_nodes[i]
+            if isinstance(node, nodes.Text):
+                parts = self.classifier_delimiter.split(node.rawsource)
+                if len(parts) == 1:
+                    node_list[-1] += node
+                else:
+                    
+                    node_list[-1] += nodes.Text(parts[0].rstrip())
+                    for part in parts[1:]:
+                        classifier_node = nodes.classifier('', part)
+                        node_list.append(classifier_node)
+            else:
+                node_list[-1] += node
+        return node_list, messages
+
+
+class SpecializedText(Text):
+
+    """
+    Superclass for second and subsequent lines of Text-variants.
+
+    All transition methods are disabled. Override individual methods in
+    subclasses to re-enable.
+    """
+
+    def eof(self, context):
+        """Incomplete construct."""
+        return []
+
+    def invalid_input(self, match=None, context=None, next_state=None):
+        """Not a compound element member. Abort this state machine."""
+        raise EOFError
+
+    blank = invalid_input
+    indent = invalid_input
+    underline = invalid_input
+    text = invalid_input
+
+
+class Definition(SpecializedText):
+
+    """Second line of potential definition_list_item."""
+
+    def eof(self, context):
+        """Not a definition."""
+        self.state_machine.previous_line(2) # so parent SM can reassess
+        return []
+
+    def indent(self, match, context, next_state):
+        """Definition list item."""
+        definitionlistitem, blank_finish = self.definition_list_item(context)
+        self.parent += definitionlistitem
+        self.blank_finish = blank_finish
+        return [], 'DefinitionList', []
+
+
+class Line(SpecializedText):
+
+    """
+    Second line of over- & underlined section title or transition marker.
+    """
+
+    eofcheck = 1                        # @@@ ???
+    """Set to 0 while parsing sections, so that we don't catch the EOF."""
+
+    def eof(self, context):
+        """Transition marker at end of section or document."""
+        marker = context[0].strip()
+        if self.memo.section_bubble_up_kludge:
+            self.memo.section_bubble_up_kludge = 0
+        elif len(marker) < 4:
+            self.state_correction(context)
+        if self.eofcheck:               # ignore EOFError with sections
+            lineno = self.state_machine.abs_line_number() - 1
+            transition = nodes.transition(rawsource=context[0])
+            transition.line = lineno
+            self.parent += transition
+        self.eofcheck = 1
+        return []
+
+    def blank(self, match, context, next_state):
+        """Transition marker."""
+        lineno = self.state_machine.abs_line_number() - 1
+        marker = context[0].strip()
+        if len(marker) < 4:
+            self.state_correction(context)
+        transition = nodes.transition(rawsource=marker)
+        transition.line = lineno
+        self.parent += transition
+        return [], 'Body', []
+
+    def text(self, match, context, next_state):
+        """Potential over- & underlined title."""
+        lineno = self.state_machine.abs_line_number() - 1
+        overline = context[0]
+        title = match.string
+        underline = ''
+        try:
+            underline = self.state_machine.next_line()
+        except EOFError:
+            blocktext = overline + '\n' + title
+            if len(overline.rstrip()) < 4:
+                self.short_overline(context, blocktext, lineno, 2)
+            else:
+                msg = self.reporter.severe(
+                    'Incomplete section title.',
+                    nodes.literal_block(blocktext, blocktext), line=lineno)
+                self.parent += msg
+                return [], 'Body', []
+        source = '%s\n%s\n%s' % (overline, title, underline)
+        overline = overline.rstrip()
+        underline = underline.rstrip()
+        if not self.transitions['underline'][0].match(underline):
+            blocktext = overline + '\n' + title + '\n' + underline
+            if len(overline.rstrip()) < 4:
+                self.short_overline(context, blocktext, lineno, 2)
+            else:
+                msg = self.reporter.severe(
+                    'Missing matching underline for section title overline.',
+                    nodes.literal_block(source, source), line=lineno)
+                self.parent += msg
+                return [], 'Body', []
+        elif overline != underline:
+            blocktext = overline + '\n' + title + '\n' + underline
+            if len(overline.rstrip()) < 4:
+                self.short_overline(context, blocktext, lineno, 2)
+            else:
+                msg = self.reporter.severe(
+                      'Title overline & underline mismatch.',
+                      nodes.literal_block(source, source), line=lineno)
+                self.parent += msg
+                return [], 'Body', []
+        title = title.rstrip()
+        messages = []
+        if column_width(title) > len(overline):
+            blocktext = overline + '\n' + title + '\n' + underline
+            if len(overline.rstrip()) < 4:
+                self.short_overline(context, blocktext, lineno, 2)
+            else:
+                msg = self.reporter.warning(
+                      'Title overline too short.',
+                      nodes.literal_block(source, source), line=lineno)
+                messages.append(msg)
+        style = (overline[0], underline[0])
+        self.eofcheck = 0               # @@@ not sure this is correct
+        self.section(title.lstrip(), source, style, lineno + 1, messages)
+        self.eofcheck = 1
+        return [], 'Body', []
+
+    indent = text                       # indented title
+
+    def underline(self, match, context, next_state):
+        overline = context[0]
+        blocktext = overline + '\n' + self.state_machine.line
+        lineno = self.state_machine.abs_line_number() - 1
+        if len(overline.rstrip()) < 4:
+            self.short_overline(context, blocktext, lineno, 1)
+        msg = self.reporter.error(
+              'Invalid section title or transition marker.',
+              nodes.literal_block(blocktext, blocktext), line=lineno)
+        self.parent += msg
+        return [], 'Body', []
+
+    def short_overline(self, context, blocktext, lineno, lines=1):
+        msg = self.reporter.info(
+            'Possible incomplete section title.\nTreating the overline as '
+            "ordinary text because it's so short.", line=lineno)
+        self.parent += msg
+        self.state_correction(context, lines)
+
+    def state_correction(self, context, lines=1):
+        self.state_machine.previous_line(lines)
+        context[:] = []
+        raise statemachine.StateCorrection('Body', 'text')
+
+
+class QuotedLiteralBlock(RSTState):
+
+    """
+    Nested parse handler for quoted (unindented) literal blocks.
+
+    Special-purpose.  Not for inclusion in `state_classes`.
+    """
+
+    patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,
+                'text': r''}
+    initial_transitions = ('initial_quoted', 'text')
+
+    def __init__(self, state_machine, debug=0):
+        RSTState.__init__(self, state_machine, debug)
+        self.messages = []
+        self.initial_lineno = None
+
+    def blank(self, match, context, next_state):
+        if context:
+            raise EOFError
+        else:
+            return context, next_state, []
+
+    def eof(self, context):
+        if context:
+            text = '\n'.join(context)
+            literal_block = nodes.literal_block(text, text)
+            literal_block.line = self.initial_lineno
+            self.parent += literal_block
+        else:
+            self.parent += self.reporter.warning(
+                'Literal block expected; none found.',
+                line=self.state_machine.abs_line_number())
+            self.state_machine.previous_line()
+        self.parent += self.messages
+        return []
+
+    def indent(self, match, context, next_state):
+        assert context, ('QuotedLiteralBlock.indent: context should not '
+                         'be empty!')
+        self.messages.append(
+            self.reporter.error('Unexpected indentation.',
+                                line=self.state_machine.abs_line_number()))
+        self.state_machine.previous_line()
+        raise EOFError
+
+    def initial_quoted(self, match, context, next_state):
+        """Match arbitrary quote character on the first line only."""
+        self.remove_transition('initial_quoted')
+        quote = match.string[0]
+        pattern = re.compile(re.escape(quote))
+        # New transition matches consistent quotes only:
+        self.add_transition('quoted',
+                            (pattern, self.quoted, self.__class__.__name__))
+        self.initial_lineno = self.state_machine.abs_line_number()
+        return [match.string], next_state, []
+
+    def quoted(self, match, context, next_state):
+        """Match consistent quotes on subsequent lines."""
+        context.append(match.string)
+        return context, next_state, []
+
+    def text(self, match, context, next_state):
+        if context:
+            self.messages.append(
+                self.reporter.error('Inconsistent literal block quoting.',
+                                    line=self.state_machine.abs_line_number()))
+            self.state_machine.previous_line()
+        raise EOFError
+
+
+state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
+                 OptionList, LineBlock, ExtensionOptions, Explicit, Text,
+                 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
+"""Standard set of State classes used to start `RSTStateMachine`."""
diff --git a/test/test_rst2rst_functional.py b/test/test_functional_losslessrst.py
index f7dd1b5dc..334829ccd 100644
--- a/test/test_rst2rst_functional.py
+++ b/test/test_functional_losslessrst.py
@@ -25,7 +25,7 @@ import test_functional
 from test_functional import datadir, join_path
 
 ### use this to limit the tests run:
-testsubset = (0,2,3,4,5, ) # 1 == dangerous.txt
+testsubset = (0,)#2,3,4,5, ) # 1 == dangerous.txt
 #testsubset = (5, ) #(0,1,2,3,4,)
 
 
@@ -86,7 +86,7 @@ class FunctionalRst2RstTestCase(DocutilsTestSupport.CustomTestCase):
                 'docinfo_xform': 0,
                 }
         namespace['reader_name'] = "standalone"
-        namespace['parser_name'] = "rst"
+        namespace['parser_name'] = "losslessrst"
         namespace['writer_name'] = "rst"
         # Read the variables set in the default config file:
         execfile(join_path(datadir, 'tests', '_default.py'), namespace)
author	strank <strank@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2010-03-05 10:02:30 +0000
committer	strank <strank@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2010-03-05 10:02:30 +0000
commit	a758fd20ba593453f163e8375ac2c895de090038 (patch)
tree	5ce296a53da323264c4be0a7849ff8a6daac7741
parent	7e55210d99b28027bb29b0e58f02f315dfd88605 (diff)
download	docutils-lossless-rst-writer.tar.gz