summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorstrank <strank@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2010-03-05 10:02:30 +0000
committerstrank <strank@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2010-03-05 10:02:30 +0000
commita758fd20ba593453f163e8375ac2c895de090038 (patch)
tree5ce296a53da323264c4be0a7849ff8a6daac7741
parent7e55210d99b28027bb29b0e58f02f315dfd88605 (diff)
downloaddocutils-lossless-rst-writer.tar.gz
Lossless rst parser part (not yet functional)lossless-rst-writer
git-svn-id: http://svn.code.sf.net/p/docutils/code/branches/lossless-rst-writer@6260 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
-rw-r--r--docutils/parsers/losslessrst/__init__.py54
-rw-r--r--docutils/parsers/losslessrst/states.py2886
-rw-r--r--test/test_functional_losslessrst.py (renamed from test/test_rst2rst_functional.py)4
3 files changed, 2942 insertions, 2 deletions
diff --git a/docutils/parsers/losslessrst/__init__.py b/docutils/parsers/losslessrst/__init__.py
new file mode 100644
index 000000000..07eddabd5
--- /dev/null
+++ b/docutils/parsers/losslessrst/__init__.py
@@ -0,0 +1,54 @@
+# $Id$
+# Author: Stefan Rank <strank(AT)strank(DOT)info>
+# Copyright: This module has been placed in the public domain.
+
+"""
+This is ``docutils.parsers.losslessrst`` package.
+A subclass of the rst parser that retains additional information
+during parsing to allow exact reconstruction of the input.
+(See the ``docutils.parsers.rst`` parser for general documentation
+
+The node structure created is fully compatible with the rst parser.
+Additional information is stored in one attribute per node.
+(named lossless?)
+
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+import docutils.parsers
+import docutils.statemachine
+from docutils.parsers.losslessrst import states
+from docutils import frontend, nodes
+
+
+class Parser(docutils.parsers.rst.Parser):
+
+ """The lossless reStructuredText parser."""
+
+ supported = ('losslessrst', 'lossless')
+ """Aliases this parser supports."""
+
+ def __init__(self, rfc2822=None, inliner=None):
+ if rfc2822:
+ self.initial_state = 'RFC2822Body'
+ else:
+ self.initial_state = 'Body'
+ self.state_classes = states.state_classes
+ self.inliner = inliner
+
+ def parse(self, inputstring, document):
+ """Parse `inputstring` and populate `document`, a document tree."""
+ self.setup_parse(inputstring, document)
+ self.statemachine = states.RSTStateMachine(
+ state_classes=self.state_classes,
+ initial_state=self.initial_state,
+ debug=document.reporter.debug_flag)
+ inputlines = docutils.statemachine.string2lines(
+ inputstring, tab_width=document.settings.tab_width,
+ convert_whitespace=1)
+ self.statemachine.run(inputlines, document, inliner=self.inliner)
+ self.finish_parse()
+
+
diff --git a/docutils/parsers/losslessrst/states.py b/docutils/parsers/losslessrst/states.py
new file mode 100644
index 000000000..ab736ac4a
--- /dev/null
+++ b/docutils/parsers/losslessrst/states.py
@@ -0,0 +1,2886 @@
+# $Id$
+# Author: Stefan Rank <strank(AT)strank(DOT)info>
+# Copyright: This module has been placed in the public domain.
+
+"""
+This is the ``docutils.parsers.losslessrst.states`` module,
+subclass of the rst.states module.
+It subclasses some of the state classes as necessary for losslessrst.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+# import all imported names from original states module
+from docutils.parsers.rst.states import (sys, re, roman,
+ TupleType, FunctionType, MethodType,
+ nodes, statemachine, utils, urischemes,
+ ApplicationError, DataError,
+ StateMachineWS, StateWS,
+ normalize_name,
+ whitespace_normalize_name,
+ escape2null, unescape, column_width,
+ directives, languages, tableparser, roles,
+ _fallback_language_module)
+
+from docutils.parsers.rst.states import (MarkupError,
+ UnknownInterpretedRoleError,
+ InterpretedRoleNotImplementedError,
+ ParserError,
+ MarkupMismatch,
+ Struct)
+
+
+class RSTStateMachine(StateMachineWS):
+
+ """
+ reStructuredText's master StateMachine.
+
+ The entry point to reStructuredText parsing is the `run()` method.
+ """
+
+ def run(self, input_lines, document, input_offset=0, match_titles=1,
+ inliner=None):
+ """
+ Parse `input_lines` and modify the `document` node in place.
+
+ Extend `StateMachineWS.run()`: set up parse-global data and
+ run the StateMachine.
+ """
+ self.language = languages.get_language(
+ document.settings.language_code)
+ self.match_titles = match_titles
+ if inliner is None:
+ inliner = Inliner()
+ inliner.init_customizations(document.settings)
+ self.memo = Struct(document=document,
+ reporter=document.reporter,
+ language=self.language,
+ title_styles=[],
+ section_level=0,
+ section_bubble_up_kludge=0,
+ inliner=inliner)
+ self.document = document
+ self.attach_observer(document.note_source)
+ self.reporter = self.memo.reporter
+ self.node = document
+ results = StateMachineWS.run(self, input_lines, input_offset,
+ input_source=document['source'])
+ assert results == [], 'RSTStateMachine.run() results should be empty!'
+ self.node = self.memo = None # remove unneeded references
+
+
+class NestedStateMachine(StateMachineWS):
+
+ """
+ StateMachine run from within other StateMachine runs, to parse nested
+ document structures.
+ """
+
+ def run(self, input_lines, input_offset, memo, node, match_titles=1):
+ """
+ Parse `input_lines` and populate a `docutils.nodes.document` instance.
+
+ Extend `StateMachineWS.run()`: set up document-wide data.
+ """
+ self.match_titles = match_titles
+ self.memo = memo
+ self.document = memo.document
+ self.attach_observer(self.document.note_source)
+ self.reporter = memo.reporter
+ self.language = memo.language
+ self.node = node
+ results = StateMachineWS.run(self, input_lines, input_offset)
+ assert results == [], ('NestedStateMachine.run() results should be '
+ 'empty!')
+ return results
+
+
+class RSTState(StateWS):
+
+ """
+ reStructuredText State superclass.
+
+ Contains methods used by all State subclasses.
+ """
+
+ nested_sm = NestedStateMachine
+
+ def __init__(self, state_machine, debug=0):
+ self.nested_sm_kwargs = {'state_classes': state_classes,
+ 'initial_state': 'Body'}
+ StateWS.__init__(self, state_machine, debug)
+
+ def runtime_init(self):
+ StateWS.runtime_init(self)
+ memo = self.state_machine.memo
+ self.memo = memo
+ self.reporter = memo.reporter
+ self.inliner = memo.inliner
+ self.document = memo.document
+ self.parent = self.state_machine.node
+
+ def goto_line(self, abs_line_offset):
+ """
+ Jump to input line `abs_line_offset`, ignoring jumps past the end.
+ """
+ try:
+ self.state_machine.goto_line(abs_line_offset)
+ except EOFError:
+ pass
+
+ def no_match(self, context, transitions):
+ """
+ Override `StateWS.no_match` to generate a system message.
+
+ This code should never be run.
+ """
+ self.reporter.severe(
+ 'Internal error: no transition pattern match. State: "%s"; '
+ 'transitions: %s; context: %s; current line: %r.'
+ % (self.__class__.__name__, transitions, context,
+ self.state_machine.line),
+ line=self.state_machine.abs_line_number())
+ return context, None, []
+
+ def bof(self, context):
+ """Called at beginning of file."""
+ return [], []
+
+ def nested_parse(self, block, input_offset, node, match_titles=0,
+ state_machine_class=None, state_machine_kwargs=None):
+ """
+ Create a new StateMachine rooted at `node` and run it over the input
+ `block`.
+ """
+ if state_machine_class is None:
+ state_machine_class = self.nested_sm
+ if state_machine_kwargs is None:
+ state_machine_kwargs = self.nested_sm_kwargs
+ block_length = len(block)
+ state_machine = state_machine_class(debug=self.debug,
+ **state_machine_kwargs)
+ state_machine.run(block, input_offset, memo=self.memo,
+ node=node, match_titles=match_titles)
+ state_machine.unlink()
+ new_offset = state_machine.abs_line_offset()
+ # No `block.parent` implies disconnected -- lines aren't in sync:
+ if block.parent and (len(block) - block_length) != 0:
+ # Adjustment for block if modified in nested parse:
+ self.state_machine.next_line(len(block) - block_length)
+ return new_offset
+
+ def nested_list_parse(self, block, input_offset, node, initial_state,
+ blank_finish,
+ blank_finish_state=None,
+ extra_settings={},
+ match_titles=0,
+ state_machine_class=None,
+ state_machine_kwargs=None):
+ """
+ Create a new StateMachine rooted at `node` and run it over the input
+ `block`. Also keep track of optional intermediate blank lines and the
+ required final one.
+ """
+ if state_machine_class is None:
+ state_machine_class = self.nested_sm
+ if state_machine_kwargs is None:
+ state_machine_kwargs = self.nested_sm_kwargs.copy()
+ state_machine_kwargs['initial_state'] = initial_state
+ state_machine = state_machine_class(debug=self.debug,
+ **state_machine_kwargs)
+ if blank_finish_state is None:
+ blank_finish_state = initial_state
+ state_machine.states[blank_finish_state].blank_finish = blank_finish
+ for key, value in extra_settings.items():
+ setattr(state_machine.states[initial_state], key, value)
+ state_machine.run(block, input_offset, memo=self.memo,
+ node=node, match_titles=match_titles)
+ blank_finish = state_machine.states[blank_finish_state].blank_finish
+ state_machine.unlink()
+ return state_machine.abs_line_offset(), blank_finish
+
+ def section(self, title, source, style, lineno, messages):
+ """Check for a valid subsection and create one if it checks out."""
+ if self.check_subsection(source, style, lineno):
+ self.new_subsection(title, lineno, messages)
+
+ def check_subsection(self, source, style, lineno):
+ """
+ Check for a valid subsection header. Return 1 (true) or None (false).
+
+ When a new section is reached that isn't a subsection of the current
+ section, back up the line count (use ``previous_line(-x)``), then
+ ``raise EOFError``. The current StateMachine will finish, then the
+ calling StateMachine can re-examine the title. This will work its way
+ back up the calling chain until the correct section level isreached.
+
+ @@@ Alternative: Evaluate the title, store the title info & level, and
+ back up the chain until that level is reached. Store in memo? Or
+ return in results?
+
+ :Exception: `EOFError` when a sibling or supersection encountered.
+ """
+ memo = self.memo
+ title_styles = memo.title_styles
+ mylevel = memo.section_level
+ try: # check for existing title style
+ level = title_styles.index(style) + 1
+ except ValueError: # new title style
+ if len(title_styles) == memo.section_level: # new subsection
+ title_styles.append(style)
+ return 1
+ else: # not at lowest level
+ self.parent += self.title_inconsistent(source, lineno)
+ return None
+ if level <= mylevel: # sibling or supersection
+ memo.section_level = level # bubble up to parent section
+ if len(style) == 2:
+ memo.section_bubble_up_kludge = 1
+ # back up 2 lines for underline title, 3 for overline title
+ self.state_machine.previous_line(len(style) + 1)
+ raise EOFError # let parent section re-evaluate
+ if level == mylevel + 1: # immediate subsection
+ return 1
+ else: # invalid subsection
+ self.parent += self.title_inconsistent(source, lineno)
+ return None
+
+ def title_inconsistent(self, sourcetext, lineno):
+ error = self.reporter.severe(
+ 'Title level inconsistent:', nodes.literal_block('', sourcetext),
+ line=lineno)
+ return error
+
+ def new_subsection(self, title, lineno, messages):
+ """Append new subsection to document tree. On return, check level."""
+ memo = self.memo
+ mylevel = memo.section_level
+ memo.section_level += 1
+ section_node = nodes.section()
+ self.parent += section_node
+ textnodes, title_messages = self.inline_text(title, lineno)
+ titlenode = nodes.title(title, '', *textnodes)
+ name = normalize_name(titlenode.astext())
+ section_node['names'].append(name)
+ section_node += titlenode
+ section_node += messages
+ section_node += title_messages
+ self.document.note_implicit_target(section_node, section_node)
+ offset = self.state_machine.line_offset + 1
+ absoffset = self.state_machine.abs_line_offset() + 1
+ newabsoffset = self.nested_parse(
+ self.state_machine.input_lines[offset:], input_offset=absoffset,
+ node=section_node, match_titles=1)
+ self.goto_line(newabsoffset)
+ if memo.section_level <= mylevel: # can't handle next section?
+ raise EOFError # bubble up to supersection
+ # reset section_level; next pass will detect it properly
+ memo.section_level = mylevel
+
+ def paragraph(self, lines, lineno):
+ """
+ Return a list (paragraph & messages) & a boolean: literal_block next?
+ """
+ data = '\n'.join(lines).rstrip()
+ if re.search(r'(?<!\\)(\\\\)*::$', data):
+ if len(data) == 2:
+ return [], 1
+ elif data[-3] in ' \n':
+ text = data[:-3].rstrip()
+ else:
+ text = data[:-1]
+ literalnext = 1
+ else:
+ text = data
+ literalnext = 0
+ textnodes, messages = self.inline_text(text, lineno)
+ p = nodes.paragraph(data, '', *textnodes)
+ p.line = lineno
+ return [p] + messages, literalnext
+
+ def inline_text(self, text, lineno):
+ """
+ Return 2 lists: nodes (text and inline elements), and system_messages.
+ """
+ return self.inliner.parse(text, lineno, self.memo, self.parent)
+
+ def unindent_warning(self, node_name):
+ return self.reporter.warning(
+ '%s ends without a blank line; unexpected unindent.' % node_name,
+ line=(self.state_machine.abs_line_number() + 1))
+
+
+def build_regexp(definition, compile=1):
+ """
+ Build, compile and return a regular expression based on `definition`.
+
+ :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
+ where "parts" is a list of regular expressions and/or regular
+ expression definitions to be joined into an or-group.
+ """
+ name, prefix, suffix, parts = definition
+ part_strings = []
+ for part in parts:
+ if type(part) is TupleType:
+ part_strings.append(build_regexp(part, None))
+ else:
+ part_strings.append(part)
+ or_group = '|'.join(part_strings)
+ regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
+ if compile:
+ return re.compile(regexp, re.UNICODE)
+ else:
+ return regexp
+
+
+class Inliner:
+
+ """
+ Parse inline markup; call the `parse()` method.
+ """
+
+ def __init__(self):
+ self.implicit_dispatch = [(self.patterns.uri, self.standalone_uri),]
+ """List of (pattern, bound method) tuples, used by
+ `self.implicit_inline`."""
+
+ def init_customizations(self, settings):
+ """Setting-based customizations; run when parsing begins."""
+ if settings.pep_references:
+ self.implicit_dispatch.append((self.patterns.pep,
+ self.pep_reference))
+ if settings.rfc_references:
+ self.implicit_dispatch.append((self.patterns.rfc,
+ self.rfc_reference))
+
+ def parse(self, text, lineno, memo, parent):
+ # Needs to be refactored for nested inline markup.
+ # Add nested_parse() method?
+ """
+ Return 2 lists: nodes (text and inline elements), and system_messages.
+
+ Using `self.patterns.initial`, a pattern which matches start-strings
+ (emphasis, strong, interpreted, phrase reference, literal,
+ substitution reference, and inline target) and complete constructs
+ (simple reference, footnote reference), search for a candidate. When
+ one is found, check for validity (e.g., not a quoted '*' character).
+ If valid, search for the corresponding end string if applicable, and
+ check it for validity. If not found or invalid, generate a warning
+ and ignore the start-string. Implicit inline markup (e.g. standalone
+ URIs) is found last.
+ """
+ self.reporter = memo.reporter
+ self.document = memo.document
+ self.language = memo.language
+ self.parent = parent
+ pattern_search = self.patterns.initial.search
+ dispatch = self.dispatch
+ remaining = escape2null(text)
+ processed = []
+ unprocessed = []
+ messages = []
+ while remaining:
+ match = pattern_search(remaining)
+ if match:
+ groups = match.groupdict()
+ method = dispatch[groups['start'] or groups['backquote']
+ or groups['refend'] or groups['fnend']]
+ before, inlines, remaining, sysmessages = method(self, match,
+ lineno)
+ unprocessed.append(before)
+ messages += sysmessages
+ if inlines:
+ processed += self.implicit_inline(''.join(unprocessed),
+ lineno)
+ processed += inlines
+ unprocessed = []
+ else:
+ break
+ remaining = ''.join(unprocessed) + remaining
+ if remaining:
+ processed += self.implicit_inline(remaining, lineno)
+ return processed, messages
+
+ openers = '\'"([{<'
+ closers = '\'")]}>'
+ start_string_prefix = (r'((?<=^)|(?<=[-/: \n%s]))' % re.escape(openers))
+ end_string_suffix = (r'((?=$)|(?=[-/:.,;!? \n\x00%s]))'
+ % re.escape(closers))
+ non_whitespace_before = r'(?<![ \n])'
+ non_whitespace_escape_before = r'(?<![ \n\x00])'
+ non_whitespace_after = r'(?![ \n])'
+ # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
+ simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
+ # Valid URI characters (see RFC 2396 & RFC 2732);
+ # final \x00 allows backslash escapes in URIs:
+ uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
+ # Delimiter indicating the end of a URI (not part of the URI):
+ uri_end_delim = r"""[>]"""
+ # Last URI character; same as uric but no punctuation:
+ urilast = r"""[_~*/=+a-zA-Z0-9]"""
+ # End of a URI (either 'urilast' or 'uric followed by a
+ # uri_end_delim'):
+ uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
+ emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
+ email_pattern = r"""
+ %(emailc)s+(?:\.%(emailc)s+)* # name
+ (?<!\x00)@ # at
+ %(emailc)s+(?:\.%(emailc)s*)* # host
+ %(uri_end)s # final URI char
+ """
+ parts = ('initial_inline', start_string_prefix, '',
+ [('start', '', non_whitespace_after, # simple start-strings
+ [r'\*\*', # strong
+ r'\*(?!\*)', # emphasis but not strong
+ r'``', # literal
+ r'_`', # inline internal target
+ r'\|(?!\|)'] # substitution reference
+ ),
+ ('whole', '', end_string_suffix, # whole constructs
+ [# reference name & end-string
+ r'(?P<refname>%s)(?P<refend>__?)' % simplename,
+ ('footnotelabel', r'\[', r'(?P<fnend>\]_)',
+ [r'[0-9]+', # manually numbered
+ r'\#(%s)?' % simplename, # auto-numbered (w/ label?)
+ r'\*', # auto-symbol
+ r'(?P<citationlabel>%s)' % simplename] # citation reference
+ )
+ ]
+ ),
+ ('backquote', # interpreted text or phrase reference
+ '(?P<role>(:%s:)?)' % simplename, # optional role
+ non_whitespace_after,
+ ['`(?!`)'] # but not literal
+ )
+ ]
+ )
+ patterns = Struct(
+ initial=build_regexp(parts),
+ emphasis=re.compile(non_whitespace_escape_before
+ + r'(\*)' + end_string_suffix),
+ strong=re.compile(non_whitespace_escape_before
+ + r'(\*\*)' + end_string_suffix),
+ interpreted_or_phrase_ref=re.compile(
+ r"""
+ %(non_whitespace_escape_before)s
+ (
+ `
+ (?P<suffix>
+ (?P<role>:%(simplename)s:)?
+ (?P<refend>__?)?
+ )
+ )
+ %(end_string_suffix)s
+ """ % locals(), re.VERBOSE | re.UNICODE),
+ embedded_uri=re.compile(
+ r"""
+ (
+ (?:[ \n]+|^) # spaces or beginning of line/string
+ < # open bracket
+ %(non_whitespace_after)s
+ ([^<>\x00]+) # anything but angle brackets & nulls
+ %(non_whitespace_before)s
+ > # close bracket w/o whitespace before
+ )
+ $ # end of string
+ """ % locals(), re.VERBOSE),
+ literal=re.compile(non_whitespace_before + '(``)'
+ + end_string_suffix),
+ target=re.compile(non_whitespace_escape_before
+ + r'(`)' + end_string_suffix),
+ substitution_ref=re.compile(non_whitespace_escape_before
+ + r'(\|_{0,2})'
+ + end_string_suffix),
+ email=re.compile(email_pattern % locals() + '$', re.VERBOSE),
+ uri=re.compile(
+ (r"""
+ %(start_string_prefix)s
+ (?P<whole>
+ (?P<absolute> # absolute URI
+ (?P<scheme> # scheme (http, ftp, mailto)
+ [a-zA-Z][a-zA-Z0-9.+-]*
+ )
+ :
+ (
+ ( # either:
+ (//?)? # hierarchical URI
+ %(uric)s* # URI characters
+ %(uri_end)s # final URI char
+ )
+ ( # optional query
+ \?%(uric)s*
+ %(uri_end)s
+ )?
+ ( # optional fragment
+ \#%(uric)s*
+ %(uri_end)s
+ )?
+ )
+ )
+ | # *OR*
+ (?P<email> # email address
+ """ + email_pattern + r"""
+ )
+ )
+ %(end_string_suffix)s
+ """) % locals(), re.VERBOSE),
+ pep=re.compile(
+ r"""
+ %(start_string_prefix)s
+ (
+ (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
+ |
+ (PEP\s+(?P<pepnum2>\d+)) # reference by name
+ )
+ %(end_string_suffix)s""" % locals(), re.VERBOSE),
+ rfc=re.compile(
+ r"""
+ %(start_string_prefix)s
+ (RFC(-|\s+)?(?P<rfcnum>\d+))
+ %(end_string_suffix)s""" % locals(), re.VERBOSE))
+
+ def quoted_start(self, match):
+ """Return 1 if inline markup start-string is 'quoted', 0 if not."""
+ string = match.string
+ start = match.start()
+ end = match.end()
+ if start == 0: # start-string at beginning of text
+ return 0
+ prestart = string[start - 1]
+ try:
+ poststart = string[end]
+ if self.openers.index(prestart) \
+ == self.closers.index(poststart): # quoted
+ return 1
+ except IndexError: # start-string at end of text
+ return 1
+ except ValueError: # not quoted
+ pass
+ return 0
+
+ def inline_obj(self, match, lineno, end_pattern, nodeclass,
+ restore_backslashes=0):
+ string = match.string
+ matchstart = match.start('start')
+ matchend = match.end('start')
+ if self.quoted_start(match):
+ return (string[:matchend], [], string[matchend:], [], '')
+ endmatch = end_pattern.search(string[matchend:])
+ if endmatch and endmatch.start(1): # 1 or more chars
+ text = unescape(endmatch.string[:endmatch.start(1)],
+ restore_backslashes)
+ textend = matchend + endmatch.end(1)
+ rawsource = unescape(string[matchstart:textend], 1)
+ return (string[:matchstart], [nodeclass(rawsource, text)],
+ string[textend:], [], endmatch.group(1))
+ msg = self.reporter.warning(
+ 'Inline %s start-string without end-string.'
+ % nodeclass.__name__, line=lineno)
+ text = unescape(string[matchstart:matchend], 1)
+ rawsource = unescape(string[matchstart:matchend], 1)
+ prb = self.problematic(text, rawsource, msg)
+ return string[:matchstart], [prb], string[matchend:], [msg], ''
+
+ def problematic(self, text, rawsource, message):
+ msgid = self.document.set_id(message, self.parent)
+ problematic = nodes.problematic(rawsource, text, refid=msgid)
+ prbid = self.document.set_id(problematic)
+ message.add_backref(prbid)
+ return problematic
+
+ def emphasis(self, match, lineno):
+ before, inlines, remaining, sysmessages, endstring = self.inline_obj(
+ match, lineno, self.patterns.emphasis, nodes.emphasis)
+ return before, inlines, remaining, sysmessages
+
+ def strong(self, match, lineno):
+ before, inlines, remaining, sysmessages, endstring = self.inline_obj(
+ match, lineno, self.patterns.strong, nodes.strong)
+ return before, inlines, remaining, sysmessages
+
+ def interpreted_or_phrase_ref(self, match, lineno):
+ end_pattern = self.patterns.interpreted_or_phrase_ref
+ string = match.string
+ matchstart = match.start('backquote')
+ matchend = match.end('backquote')
+ rolestart = match.start('role')
+ role = match.group('role')
+ position = ''
+ if role:
+ role = role[1:-1]
+ position = 'prefix'
+ elif self.quoted_start(match):
+ return (string[:matchend], [], string[matchend:], [])
+ endmatch = end_pattern.search(string[matchend:])
+ if endmatch and endmatch.start(1): # 1 or more chars
+ textend = matchend + endmatch.end()
+ if endmatch.group('role'):
+ if role:
+ msg = self.reporter.warning(
+ 'Multiple roles in interpreted text (both '
+ 'prefix and suffix present; only one allowed).',
+ line=lineno)
+ text = unescape(string[rolestart:textend], 1)
+ prb = self.problematic(text, text, msg)
+ return string[:rolestart], [prb], string[textend:], [msg]
+ role = endmatch.group('suffix')[1:-1]
+ position = 'suffix'
+ escaped = endmatch.string[:endmatch.start(1)]
+ rawsource = unescape(string[matchstart:textend], 1)
+ if rawsource[-1:] == '_':
+ if role:
+ msg = self.reporter.warning(
+ 'Mismatch: both interpreted text role %s and '
+ 'reference suffix.' % position, line=lineno)
+ text = unescape(string[rolestart:textend], 1)
+ prb = self.problematic(text, text, msg)
+ return string[:rolestart], [prb], string[textend:], [msg]
+ return self.phrase_ref(string[:matchstart], string[textend:],
+ rawsource, escaped, unescape(escaped))
+ else:
+ rawsource = unescape(string[rolestart:textend], 1)
+ nodelist, messages = self.interpreted(rawsource, escaped, role,
+ lineno)
+ return (string[:rolestart], nodelist,
+ string[textend:], messages)
+ msg = self.reporter.warning(
+ 'Inline interpreted text or phrase reference start-string '
+ 'without end-string.', line=lineno)
+ text = unescape(string[matchstart:matchend], 1)
+ prb = self.problematic(text, text, msg)
+ return string[:matchstart], [prb], string[matchend:], [msg]
+
+ def phrase_ref(self, before, after, rawsource, escaped, text):
+ match = self.patterns.embedded_uri.search(escaped)
+ if match:
+ text = unescape(escaped[:match.start(0)])
+ uri_text = match.group(2)
+ uri = ''.join(uri_text.split())
+ uri = self.adjust_uri(uri)
+ if uri:
+ target = nodes.target(match.group(1), refuri=uri)
+ else:
+ raise ApplicationError('problem with URI: %r' % uri_text)
+ if not text:
+ text = uri
+ else:
+ target = None
+ refname = normalize_name(text)
+ reference = nodes.reference(rawsource, text,
+ name=whitespace_normalize_name(text))
+ node_list = [reference]
+ if rawsource[-2:] == '__':
+ if target:
+ reference['refuri'] = uri
+ else:
+ reference['anonymous'] = 1
+ else:
+ if target:
+ reference['refuri'] = uri
+ target['names'].append(refname)
+ self.document.note_explicit_target(target, self.parent)
+ node_list.append(target)
+ else:
+ reference['refname'] = refname
+ self.document.note_refname(reference)
+ return before, node_list, after, []
+
+ def adjust_uri(self, uri):
+ match = self.patterns.email.match(uri)
+ if match:
+ return 'mailto:' + uri
+ else:
+ return uri
+
+ def interpreted(self, rawsource, text, role, lineno):
+ role_fn, messages = roles.role(role, self.language, lineno,
+ self.reporter)
+ if role_fn:
+ nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
+ return nodes, messages + messages2
+ else:
+ msg = self.reporter.error(
+ 'Unknown interpreted text role "%s".' % role,
+ line=lineno)
+ return ([self.problematic(rawsource, rawsource, msg)],
+ messages + [msg])
+
+ def literal(self, match, lineno):
+ before, inlines, remaining, sysmessages, endstring = self.inline_obj(
+ match, lineno, self.patterns.literal, nodes.literal,
+ restore_backslashes=1)
+ return before, inlines, remaining, sysmessages
+
+ def inline_internal_target(self, match, lineno):
+ before, inlines, remaining, sysmessages, endstring = self.inline_obj(
+ match, lineno, self.patterns.target, nodes.target)
+ if inlines and isinstance(inlines[0], nodes.target):
+ assert len(inlines) == 1
+ target = inlines[0]
+ name = normalize_name(target.astext())
+ target['names'].append(name)
+ self.document.note_explicit_target(target, self.parent)
+ return before, inlines, remaining, sysmessages
+
+ def substitution_reference(self, match, lineno):
+ before, inlines, remaining, sysmessages, endstring = self.inline_obj(
+ match, lineno, self.patterns.substitution_ref,
+ nodes.substitution_reference)
+ if len(inlines) == 1:
+ subref_node = inlines[0]
+ if isinstance(subref_node, nodes.substitution_reference):
+ subref_text = subref_node.astext()
+ self.document.note_substitution_ref(subref_node, subref_text)
+ if endstring[-1:] == '_':
+ reference_node = nodes.reference(
+ '|%s%s' % (subref_text, endstring), '')
+ if endstring[-2:] == '__':
+ reference_node['anonymous'] = 1
+ else:
+ reference_node['refname'] = normalize_name(subref_text)
+ self.document.note_refname(reference_node)
+ reference_node += subref_node
+ inlines = [reference_node]
+ return before, inlines, remaining, sysmessages
+
+ def footnote_reference(self, match, lineno):
+ """
+ Handles `nodes.footnote_reference` and `nodes.citation_reference`
+ elements.
+ """
+ label = match.group('footnotelabel')
+ refname = normalize_name(label)
+ string = match.string
+ before = string[:match.start('whole')]
+ remaining = string[match.end('whole'):]
+ if match.group('citationlabel'):
+ refnode = nodes.citation_reference('[%s]_' % label,
+ refname=refname)
+ refnode += nodes.Text(label)
+ self.document.note_citation_ref(refnode)
+ else:
+ refnode = nodes.footnote_reference('[%s]_' % label)
+ if refname[0] == '#':
+ refname = refname[1:]
+ refnode['auto'] = 1
+ self.document.note_autofootnote_ref(refnode)
+ elif refname == '*':
+ refname = ''
+ refnode['auto'] = '*'
+ self.document.note_symbol_footnote_ref(
+ refnode)
+ else:
+ refnode += nodes.Text(label)
+ if refname:
+ refnode['refname'] = refname
+ self.document.note_footnote_ref(refnode)
+ if utils.get_trim_footnote_ref_space(self.document.settings):
+ before = before.rstrip()
+ return (before, [refnode], remaining, [])
+
+ def reference(self, match, lineno, anonymous=None):
+ referencename = match.group('refname')
+ refname = normalize_name(referencename)
+ referencenode = nodes.reference(
+ referencename + match.group('refend'), referencename,
+ name=whitespace_normalize_name(referencename))
+ if anonymous:
+ referencenode['anonymous'] = 1
+ else:
+ referencenode['refname'] = refname
+ self.document.note_refname(referencenode)
+ string = match.string
+ matchstart = match.start('whole')
+ matchend = match.end('whole')
+ return (string[:matchstart], [referencenode], string[matchend:], [])
+
+ def anonymous_reference(self, match, lineno):
+ return self.reference(match, lineno, anonymous=1)
+
+ def standalone_uri(self, match, lineno):
+ if not match.group('scheme') or urischemes.schemes.has_key(
+ match.group('scheme').lower()):
+ if match.group('email'):
+ addscheme = 'mailto:'
+ else:
+ addscheme = ''
+ text = match.group('whole')
+ unescaped = unescape(text, 0)
+ return [nodes.reference(unescape(text, 1), unescaped,
+ refuri=addscheme + unescaped)]
+ else: # not a valid scheme
+ raise MarkupMismatch
+
+ def pep_reference(self, match, lineno):
+ text = match.group(0)
+ if text.startswith('pep-'):
+ pepnum = int(match.group('pepnum1'))
+ elif text.startswith('PEP'):
+ pepnum = int(match.group('pepnum2'))
+ else:
+ raise MarkupMismatch
+ ref = (self.document.settings.pep_base_url
+ + self.document.settings.pep_file_url_template % pepnum)
+ unescaped = unescape(text, 0)
+ return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
+
+ rfc_url = 'rfc%d.html'
+
+ def rfc_reference(self, match, lineno):
+ text = match.group(0)
+ if text.startswith('RFC'):
+ rfcnum = int(match.group('rfcnum'))
+ ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
+ else:
+ raise MarkupMismatch
+ unescaped = unescape(text, 0)
+ return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
+
+ def implicit_inline(self, text, lineno):
+ """
+ Check each of the patterns in `self.implicit_dispatch` for a match,
+ and dispatch to the stored method for the pattern. Recursively check
+ the text before and after the match. Return a list of `nodes.Text`
+ and inline element nodes.
+ """
+ if not text:
+ return []
+ for pattern, method in self.implicit_dispatch:
+ match = pattern.search(text)
+ if match:
+ try:
+ # Must recurse on strings before *and* after the match;
+ # there may be multiple patterns.
+ return (self.implicit_inline(text[:match.start()], lineno)
+ + method(match, lineno) +
+ self.implicit_inline(text[match.end():], lineno))
+ except MarkupMismatch:
+ pass
+ return [nodes.Text(unescape(text), rawsource=unescape(text, 1))]
+
+ dispatch = {'*': emphasis,
+ '**': strong,
+ '`': interpreted_or_phrase_ref,
+ '``': literal,
+ '_`': inline_internal_target,
+ ']_': footnote_reference,
+ '|': substitution_reference,
+ '_': reference,
+ '__': anonymous_reference}
+
+
+def _loweralpha_to_int(s, _zero=(ord('a')-1)):
+ return ord(s) - _zero
+
+def _upperalpha_to_int(s, _zero=(ord('A')-1)):
+ return ord(s) - _zero
+
+def _lowerroman_to_int(s):
+ return roman.fromRoman(s.upper())
+
+
+class Body(RSTState):
+
+ """
+ Generic classifier of the first line of a block.
+ """
+
+ double_width_pad_char = tableparser.TableParser.double_width_pad_char
+ """Padding character for East Asian double-width text."""
+
+ enum = Struct()
+ """Enumerated list parsing information."""
+
+ enum.formatinfo = {
+ 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),
+ 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),
+ 'period': Struct(prefix='', suffix='.', start=0, end=-1)}
+ enum.formats = enum.formatinfo.keys()
+ enum.sequences = ['arabic', 'loweralpha', 'upperalpha',
+ 'lowerroman', 'upperroman'] # ORDERED!
+ enum.sequencepats = {'arabic': '[0-9]+',
+ 'loweralpha': '[a-z]',
+ 'upperalpha': '[A-Z]',
+ 'lowerroman': '[ivxlcdm]+',
+ 'upperroman': '[IVXLCDM]+',}
+ enum.converters = {'arabic': int,
+ 'loweralpha': _loweralpha_to_int,
+ 'upperalpha': _upperalpha_to_int,
+ 'lowerroman': _lowerroman_to_int,
+ 'upperroman': roman.fromRoman}
+
+ enum.sequenceregexps = {}
+ for sequence in enum.sequences:
+ enum.sequenceregexps[sequence] = re.compile(
+ enum.sequencepats[sequence] + '$')
+
+ grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')
+ """Matches the top (& bottom) of a full table)."""
+
+ simple_table_top_pat = re.compile('=+( +=+)+ *$')
+ """Matches the top of a simple table."""
+
+ simple_table_border_pat = re.compile('=+[ =]*$')
+ """Matches the bottom & header bottom of a simple table."""
+
+ pats = {}
+ """Fragments of patterns used by transitions."""
+
+ pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
+ pats['alpha'] = '[a-zA-Z]'
+ pats['alphanum'] = '[a-zA-Z0-9]'
+ pats['alphanumplus'] = '[a-zA-Z0-9_-]'
+ pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
+ '|%(upperroman)s|#)' % enum.sequencepats)
+ pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
+ # @@@ Loosen up the pattern? Allow Unicode?
+ pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
+ pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
+ pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
+ pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
+
+ for format in enum.formats:
+ pats[format] = '(?P<%s>%s%s%s)' % (
+ format, re.escape(enum.formatinfo[format].prefix),
+ pats['enum'], re.escape(enum.formatinfo[format].suffix))
+
+ patterns = {
+ 'bullet': ur'[-+*\u2022\u2023\u2043]( +|$)',
+ 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
+ 'field_marker': r':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)',
+ 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,
+ 'doctest': r'>>>( +|$)',
+ 'line_block': r'\|( +|$)',
+ 'grid_table_top': grid_table_top_pat,
+ 'simple_table_top': simple_table_top_pat,
+ 'explicit_markup': r'\.\.( +|$)',
+ 'anonymous': r'__( +|$)',
+ 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,
+ 'text': r''}
+ initial_transitions = (
+ 'bullet',
+ 'enumerator',
+ 'field_marker',
+ 'option_marker',
+ 'doctest',
+ 'line_block',
+ 'grid_table_top',
+ 'simple_table_top',
+ 'explicit_markup',
+ 'anonymous',
+ 'line',
+ 'text')
+
+ def indent(self, match, context, next_state):
+ """Block quote."""
+ indented, indent, line_offset, blank_finish = \
+ self.state_machine.get_indented()
+ elements = self.block_quote(indented, line_offset)
+ self.parent += elements
+ if not blank_finish:
+ self.parent += self.unindent_warning('Block quote')
+ return context, next_state, []
+
+ def block_quote(self, indented, line_offset):
+ elements = []
+ while indented:
+ (blockquote_lines,
+ attribution_lines,
+ attribution_offset,
+ indented,
+ new_line_offset) = self.split_attribution(indented, line_offset)
+ blockquote = nodes.block_quote()
+ self.nested_parse(blockquote_lines, line_offset, blockquote)
+ elements.append(blockquote)
+ if attribution_lines:
+ attribution, messages = self.parse_attribution(
+ attribution_lines, attribution_offset)
+ blockquote += attribution
+ elements += messages
+ line_offset = new_line_offset
+ while indented and not indented[0]:
+ indented = indented[1:]
+ line_offset += 1
+ return elements
+
+ # U+2014 is an em-dash:
+ attribution_pattern = re.compile(ur'(---?(?!-)|\u2014) *(?=[^ \n])')
+
+ def split_attribution(self, indented, line_offset):
+ """
+ Check for a block quote attribution and split it off:
+
+ * First line after a blank line must begin with a dash ("--", "---",
+ em-dash; matches `self.attribution_pattern`).
+ * Every line after that must have consistent indentation.
+ * Attributions must be preceded by block quote content.
+
+ Return a tuple of: (block quote content lines, content offset,
+ attribution lines, attribution offset, remaining indented lines).
+ """
+ blank = None
+ nonblank_seen = False
+ for i in range(len(indented)):
+ line = indented[i].rstrip()
+ if line:
+ if nonblank_seen and blank == i - 1: # last line blank
+ match = self.attribution_pattern.match(line)
+ if match:
+ attribution_end, indent = self.check_attribution(
+ indented, i)
+ if attribution_end:
+ a_lines = indented[i:attribution_end]
+ a_lines.trim_left(match.end(), end=1)
+ a_lines.trim_left(indent, start=1)
+ return (indented[:i], a_lines,
+ i, indented[attribution_end:],
+ line_offset + attribution_end)
+ nonblank_seen = True
+ else:
+ blank = i
+ else:
+ return (indented, None, None, None, None)
+
+ def check_attribution(self, indented, attribution_start):
+ """
+ Check attribution shape.
+ Return the index past the end of the attribution, and the indent.
+ """
+ indent = None
+ i = attribution_start + 1
+ for i in range(attribution_start + 1, len(indented)):
+ line = indented[i].rstrip()
+ if not line:
+ break
+ if indent is None:
+ indent = len(line) - len(line.lstrip())
+ elif len(line) - len(line.lstrip()) != indent:
+ return None, None # bad shape; not an attribution
+ else:
+ # return index of line after last attribution line:
+ i += 1
+ return i, (indent or 0)
+
+ def parse_attribution(self, indented, line_offset):
+ text = '\n'.join(indented).rstrip()
+ lineno = self.state_machine.abs_line_number() + line_offset
+ textnodes, messages = self.inline_text(text, lineno)
+ node = nodes.attribution(text, '', *textnodes)
+ node.line = lineno
+ return node, messages
+
+ def bullet(self, match, context, next_state):
+ """Bullet list item."""
+ bulletlist = nodes.bullet_list()
+ self.parent += bulletlist
+ bulletlist['bullet'] = match.string[0]
+ i, blank_finish = self.list_item(match.end())
+ bulletlist += i
+ offset = self.state_machine.line_offset + 1 # next line
+ new_line_offset, blank_finish = self.nested_list_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=self.state_machine.abs_line_offset() + 1,
+ node=bulletlist, initial_state='BulletList',
+ blank_finish=blank_finish)
+ self.goto_line(new_line_offset)
+ if not blank_finish:
+ self.parent += self.unindent_warning('Bullet list')
+ return [], next_state, []
+
+ def list_item(self, indent):
+ if self.state_machine.line[indent:]:
+ indented, line_offset, blank_finish = (
+ self.state_machine.get_known_indented(indent))
+ else:
+ indented, indent, line_offset, blank_finish = (
+ self.state_machine.get_first_known_indented(indent))
+ listitem = nodes.list_item('\n'.join(indented))
+ if indented:
+ self.nested_parse(indented, input_offset=line_offset,
+ node=listitem)
+ return listitem, blank_finish
+
+ def enumerator(self, match, context, next_state):
+ """Enumerated List Item"""
+ format, sequence, text, ordinal = self.parse_enumerator(match)
+ if not self.is_enumerated_list_item(ordinal, sequence, format):
+ raise statemachine.TransitionCorrection('text')
+ enumlist = nodes.enumerated_list()
+ self.parent += enumlist
+ if sequence == '#':
+ enumlist['enumtype'] = 'arabic'
+ else:
+ enumlist['enumtype'] = sequence
+ enumlist['prefix'] = self.enum.formatinfo[format].prefix
+ enumlist['suffix'] = self.enum.formatinfo[format].suffix
+ if ordinal != 1:
+ enumlist['start'] = ordinal
+ msg = self.reporter.info(
+ 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
+ % (text, ordinal), line=self.state_machine.abs_line_number())
+ self.parent += msg
+ listitem, blank_finish = self.list_item(match.end())
+ enumlist += listitem
+ offset = self.state_machine.line_offset + 1 # next line
+ newline_offset, blank_finish = self.nested_list_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=self.state_machine.abs_line_offset() + 1,
+ node=enumlist, initial_state='EnumeratedList',
+ blank_finish=blank_finish,
+ extra_settings={'lastordinal': ordinal,
+ 'format': format,
+ 'auto': sequence == '#'})
+ self.goto_line(newline_offset)
+ if not blank_finish:
+ self.parent += self.unindent_warning('Enumerated list')
+ return [], next_state, []
+
+ def parse_enumerator(self, match, expected_sequence=None):
+ """
+ Analyze an enumerator and return the results.
+
+ :Return:
+ - the enumerator format ('period', 'parens', or 'rparen'),
+ - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
+ - the text of the enumerator, stripped of formatting, and
+ - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
+ ``None`` is returned for invalid enumerator text).
+
+ The enumerator format has already been determined by the regular
+ expression match. If `expected_sequence` is given, that sequence is
+ tried first. If not, we check for Roman numeral 1. This way,
+ single-character Roman numerals (which are also alphabetical) can be
+ matched. If no sequence has been matched, all sequences are checked in
+ order.
+ """
+ groupdict = match.groupdict()
+ sequence = ''
+ for format in self.enum.formats:
+ if groupdict[format]: # was this the format matched?
+ break # yes; keep `format`
+ else: # shouldn't happen
+ raise ParserError('enumerator format not matched')
+ text = groupdict[format][self.enum.formatinfo[format].start
+ :self.enum.formatinfo[format].end]
+ if text == '#':
+ sequence = '#'
+ elif expected_sequence:
+ try:
+ if self.enum.sequenceregexps[expected_sequence].match(text):
+ sequence = expected_sequence
+ except KeyError: # shouldn't happen
+ raise ParserError('unknown enumerator sequence: %s'
+ % sequence)
+ elif text == 'i':
+ sequence = 'lowerroman'
+ elif text == 'I':
+ sequence = 'upperroman'
+ if not sequence:
+ for sequence in self.enum.sequences:
+ if self.enum.sequenceregexps[sequence].match(text):
+ break
+ else: # shouldn't happen
+ raise ParserError('enumerator sequence not matched')
+ if sequence == '#':
+ ordinal = 1
+ else:
+ try:
+ ordinal = self.enum.converters[sequence](text)
+ except roman.InvalidRomanNumeralError:
+ ordinal = None
+ return format, sequence, text, ordinal
+
+ def is_enumerated_list_item(self, ordinal, sequence, format):
+ """
+ Check validity based on the ordinal value and the second line.
+
+ Return true iff the ordinal is valid and the second line is blank,
+ indented, or starts with the next enumerator or an auto-enumerator.
+ """
+ if ordinal is None:
+ return None
+ try:
+ next_line = self.state_machine.next_line()
+ except EOFError: # end of input lines
+ self.state_machine.previous_line()
+ return 1
+ else:
+ self.state_machine.previous_line()
+ if not next_line[:1].strip(): # blank or indented
+ return 1
+ result = self.make_enumerator(ordinal + 1, sequence, format)
+ if result:
+ next_enumerator, auto_enumerator = result
+ try:
+ if ( next_line.startswith(next_enumerator) or
+ next_line.startswith(auto_enumerator) ):
+ return 1
+ except TypeError:
+ pass
+ return None
+
+ def make_enumerator(self, ordinal, sequence, format):
+ """
+ Construct and return the next enumerated list item marker, and an
+ auto-enumerator ("#" instead of the regular enumerator).
+
+ Return ``None`` for invalid (out of range) ordinals.
+ """ #"
+ if sequence == '#':
+ enumerator = '#'
+ elif sequence == 'arabic':
+ enumerator = str(ordinal)
+ else:
+ if sequence.endswith('alpha'):
+ if ordinal > 26:
+ return None
+ enumerator = chr(ordinal + ord('a') - 1)
+ elif sequence.endswith('roman'):
+ try:
+ enumerator = roman.toRoman(ordinal)
+ except roman.RomanError:
+ return None
+ else: # shouldn't happen
+ raise ParserError('unknown enumerator sequence: "%s"'
+ % sequence)
+ if sequence.startswith('lower'):
+ enumerator = enumerator.lower()
+ elif sequence.startswith('upper'):
+ enumerator = enumerator.upper()
+ else: # shouldn't happen
+ raise ParserError('unknown enumerator sequence: "%s"'
+ % sequence)
+ formatinfo = self.enum.formatinfo[format]
+ next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix
+ + ' ')
+ auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '
+ return next_enumerator, auto_enumerator
+
+ def field_marker(self, match, context, next_state):
+ """Field list item."""
+ field_list = nodes.field_list()
+ self.parent += field_list
+ field, blank_finish = self.field(match)
+ field_list += field
+ offset = self.state_machine.line_offset + 1 # next line
+ newline_offset, blank_finish = self.nested_list_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=self.state_machine.abs_line_offset() + 1,
+ node=field_list, initial_state='FieldList',
+ blank_finish=blank_finish)
+ self.goto_line(newline_offset)
+ if not blank_finish:
+ self.parent += self.unindent_warning('Field list')
+ return [], next_state, []
+
+ def field(self, match):
+ name = self.parse_field_marker(match)
+ lineno = self.state_machine.abs_line_number()
+ indented, indent, line_offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end())
+ field_node = nodes.field()
+ field_node.line = lineno
+ name_nodes, name_messages = self.inline_text(name, lineno)
+ field_node += nodes.field_name(name, '', *name_nodes)
+ field_body = nodes.field_body('\n'.join(indented), *name_messages)
+ field_node += field_body
+ if indented:
+ self.parse_field_body(indented, line_offset, field_body)
+ return field_node, blank_finish
+
+ def parse_field_marker(self, match):
+ """Extract & return field name from a field marker match."""
+ field = match.group()[1:] # strip off leading ':'
+ field = field[:field.rfind(':')] # strip off trailing ':' etc.
+ return field
+
+ def parse_field_body(self, indented, offset, node):
+ self.nested_parse(indented, input_offset=offset, node=node)
+
+ def option_marker(self, match, context, next_state):
+ """Option list item."""
+ optionlist = nodes.option_list()
+ try:
+ listitem, blank_finish = self.option_list_item(match)
+ except MarkupError, (message, lineno):
+ # This shouldn't happen; pattern won't match.
+ msg = self.reporter.error(
+ 'Invalid option list marker: %s' % message, line=lineno)
+ self.parent += msg
+ indented, indent, line_offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end())
+ elements = self.block_quote(indented, line_offset)
+ self.parent += elements
+ if not blank_finish:
+ self.parent += self.unindent_warning('Option list')
+ return [], next_state, []
+ self.parent += optionlist
+ optionlist += listitem
+ offset = self.state_machine.line_offset + 1 # next line
+ newline_offset, blank_finish = self.nested_list_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=self.state_machine.abs_line_offset() + 1,
+ node=optionlist, initial_state='OptionList',
+ blank_finish=blank_finish)
+ self.goto_line(newline_offset)
+ if not blank_finish:
+ self.parent += self.unindent_warning('Option list')
+ return [], next_state, []
+
+ def option_list_item(self, match):
+ offset = self.state_machine.abs_line_offset()
+ options = self.parse_option_marker(match)
+ indented, indent, line_offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end())
+ if not indented: # not an option list item
+ self.goto_line(offset)
+ raise statemachine.TransitionCorrection('text')
+ option_group = nodes.option_group('', *options)
+ description = nodes.description('\n'.join(indented))
+ option_list_item = nodes.option_list_item('', option_group,
+ description)
+ if indented:
+ self.nested_parse(indented, input_offset=line_offset,
+ node=description)
+ return option_list_item, blank_finish
+
+ def parse_option_marker(self, match):
+ """
+ Return a list of `node.option` and `node.option_argument` objects,
+ parsed from an option marker match.
+
+ :Exception: `MarkupError` for invalid option markers.
+ """
+ optlist = []
+ optionstrings = match.group().rstrip().split(', ')
+ for optionstring in optionstrings:
+ tokens = optionstring.split()
+ delimiter = ' '
+ firstopt = tokens[0].split('=')
+ if len(firstopt) > 1:
+ # "--opt=value" form
+ tokens[:1] = firstopt
+ delimiter = '='
+ elif (len(tokens[0]) > 2
+ and ((tokens[0].startswith('-')
+ and not tokens[0].startswith('--'))
+ or tokens[0].startswith('+'))):
+ # "-ovalue" form
+ tokens[:1] = [tokens[0][:2], tokens[0][2:]]
+ delimiter = ''
+ if len(tokens) > 1 and (tokens[1].startswith('<')
+ and tokens[-1].endswith('>')):
+ # "-o <value1 value2>" form; join all values into one token
+ tokens[1:] = [' '.join(tokens[1:])]
+ if 0 < len(tokens) <= 2:
+ option = nodes.option(optionstring)
+ option += nodes.option_string(tokens[0], tokens[0])
+ if len(tokens) > 1:
+ option += nodes.option_argument(tokens[1], tokens[1],
+ delimiter=delimiter)
+ optlist.append(option)
+ else:
+ raise MarkupError(
+ 'wrong number of option tokens (=%s), should be 1 or 2: '
+ '"%s"' % (len(tokens), optionstring),
+ self.state_machine.abs_line_number() + 1)
+ return optlist
+
+ def doctest(self, match, context, next_state):
+ data = '\n'.join(self.state_machine.get_text_block())
+ self.parent += nodes.doctest_block(data, data)
+ return [], next_state, []
+
+ def line_block(self, match, context, next_state):
+ """First line of a line block."""
+ block = nodes.line_block()
+ self.parent += block
+ lineno = self.state_machine.abs_line_number()
+ line, messages, blank_finish = self.line_block_line(match, lineno)
+ block += line
+ self.parent += messages
+ if not blank_finish:
+ offset = self.state_machine.line_offset + 1 # next line
+ new_line_offset, blank_finish = self.nested_list_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=self.state_machine.abs_line_offset() + 1,
+ node=block, initial_state='LineBlock',
+ blank_finish=0)
+ self.goto_line(new_line_offset)
+ if not blank_finish:
+ self.parent += self.reporter.warning(
+ 'Line block ends without a blank line.',
+ line=(self.state_machine.abs_line_number() + 1))
+ if len(block):
+ if block[0].indent is None:
+ block[0].indent = 0
+ self.nest_line_block_lines(block)
+ return [], next_state, []
+
+ def line_block_line(self, match, lineno):
+ """Return one line element of a line_block."""
+ indented, indent, line_offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end(),
+ until_blank=1)
+ text = u'\n'.join(indented)
+ text_nodes, messages = self.inline_text(text, lineno)
+ line = nodes.line(text, '', *text_nodes)
+ if match.string.rstrip() != '|': # not empty
+ line.indent = len(match.group(1)) - 1
+ return line, messages, blank_finish
+
+ def nest_line_block_lines(self, block):
+ for index in range(1, len(block)):
+ if block[index].indent is None:
+ block[index].indent = block[index - 1].indent
+ self.nest_line_block_segment(block)
+
+ def nest_line_block_segment(self, block):
+ indents = [item.indent for item in block]
+ least = min(indents)
+ new_items = []
+ new_block = nodes.line_block()
+ for item in block:
+ if item.indent > least:
+ new_block.append(item)
+ else:
+ if len(new_block):
+ self.nest_line_block_segment(new_block)
+ new_items.append(new_block)
+ new_block = nodes.line_block()
+ new_items.append(item)
+ if len(new_block):
+ self.nest_line_block_segment(new_block)
+ new_items.append(new_block)
+ block[:] = new_items
+
+ def grid_table_top(self, match, context, next_state):
+ """Top border of a full table."""
+ return self.table_top(match, context, next_state,
+ self.isolate_grid_table,
+ tableparser.GridTableParser)
+
+ def simple_table_top(self, match, context, next_state):
+ """Top border of a simple table."""
+ return self.table_top(match, context, next_state,
+ self.isolate_simple_table,
+ tableparser.SimpleTableParser)
+
+ def table_top(self, match, context, next_state,
+ isolate_function, parser_class):
+ """Top border of a generic table."""
+ nodelist, blank_finish = self.table(isolate_function, parser_class)
+ self.parent += nodelist
+ if not blank_finish:
+ msg = self.reporter.warning(
+ 'Blank line required after table.',
+ line=self.state_machine.abs_line_number() + 1)
+ self.parent += msg
+ return [], next_state, []
+
+ def table(self, isolate_function, parser_class):
+ """Parse a table."""
+ block, messages, blank_finish = isolate_function()
+ if block:
+ try:
+ parser = parser_class()
+ tabledata = parser.parse(block)
+ tableline = (self.state_machine.abs_line_number() - len(block)
+ + 1)
+ table = self.build_table(tabledata, tableline)
+ nodelist = [table] + messages
+ except tableparser.TableMarkupError, detail:
+ nodelist = self.malformed_table(
+ block, ' '.join(detail.args)) + messages
+ else:
+ nodelist = messages
+ return nodelist, blank_finish
+
+ def isolate_grid_table(self):
+ messages = []
+ blank_finish = 1
+ try:
+ block = self.state_machine.get_text_block(flush_left=1)
+ except statemachine.UnexpectedIndentationError, instance:
+ block, source, lineno = instance.args
+ messages.append(self.reporter.error('Unexpected indentation.',
+ source=source, line=lineno))
+ blank_finish = 0
+ block.disconnect()
+ # for East Asian chars:
+ block.pad_double_width(self.double_width_pad_char)
+ width = len(block[0].strip())
+ for i in range(len(block)):
+ block[i] = block[i].strip()
+ if block[i][0] not in '+|': # check left edge
+ blank_finish = 0
+ self.state_machine.previous_line(len(block) - i)
+ del block[i:]
+ break
+ if not self.grid_table_top_pat.match(block[-1]): # find bottom
+ blank_finish = 0
+ # from second-last to third line of table:
+ for i in range(len(block) - 2, 1, -1):
+ if self.grid_table_top_pat.match(block[i]):
+ self.state_machine.previous_line(len(block) - i + 1)
+ del block[i+1:]
+ break
+ else:
+ messages.extend(self.malformed_table(block))
+ return [], messages, blank_finish
+ for i in range(len(block)): # check right edge
+ if len(block[i]) != width or block[i][-1] not in '+|':
+ messages.extend(self.malformed_table(block))
+ return [], messages, blank_finish
+ return block, messages, blank_finish
+
+ def isolate_simple_table(self):
+ start = self.state_machine.line_offset
+ lines = self.state_machine.input_lines
+ limit = len(lines) - 1
+ toplen = len(lines[start].strip())
+ pattern_match = self.simple_table_border_pat.match
+ found = 0
+ found_at = None
+ i = start + 1
+ while i <= limit:
+ line = lines[i]
+ match = pattern_match(line)
+ if match:
+ if len(line.strip()) != toplen:
+ self.state_machine.next_line(i - start)
+ messages = self.malformed_table(
+ lines[start:i+1], 'Bottom/header table border does '
+ 'not match top border.')
+ return [], messages, i == limit or not lines[i+1].strip()
+ found += 1
+ found_at = i
+ if found == 2 or i == limit or not lines[i+1].strip():
+ end = i
+ break
+ i += 1
+ else: # reached end of input_lines
+ if found:
+ extra = ' or no blank line after table bottom'
+ self.state_machine.next_line(found_at - start)
+ block = lines[start:found_at+1]
+ else:
+ extra = ''
+ self.state_machine.next_line(i - start - 1)
+ block = lines[start:]
+ messages = self.malformed_table(
+ block, 'No bottom table border found%s.' % extra)
+ return [], messages, not extra
+ self.state_machine.next_line(end - start)
+ block = lines[start:end+1]
+ # for East Asian chars:
+ block.pad_double_width(self.double_width_pad_char)
+ return block, [], end == limit or not lines[end+1].strip()
+
+ def malformed_table(self, block, detail=''):
+ block.replace(self.double_width_pad_char, '')
+ data = '\n'.join(block)
+ message = 'Malformed table.'
+ lineno = self.state_machine.abs_line_number() - len(block) + 1
+ if detail:
+ message += '\n' + detail
+ error = self.reporter.error(message, nodes.literal_block(data, data),
+ line=lineno)
+ return [error]
+
+ def build_table(self, tabledata, tableline, stub_columns=0):
+ colwidths, headrows, bodyrows = tabledata
+ table = nodes.table()
+ tgroup = nodes.tgroup(cols=len(colwidths))
+ table += tgroup
+ for colwidth in colwidths:
+ colspec = nodes.colspec(colwidth=colwidth)
+ if stub_columns:
+ colspec.attributes['stub'] = 1
+ stub_columns -= 1
+ tgroup += colspec
+ if headrows:
+ thead = nodes.thead()
+ tgroup += thead
+ for row in headrows:
+ thead += self.build_table_row(row, tableline)
+ tbody = nodes.tbody()
+ tgroup += tbody
+ for row in bodyrows:
+ tbody += self.build_table_row(row, tableline)
+ return table
+
+ def build_table_row(self, rowdata, tableline):
+ row = nodes.row()
+ for cell in rowdata:
+ if cell is None:
+ continue
+ morerows, morecols, offset, cellblock = cell
+ attributes = {}
+ if morerows:
+ attributes['morerows'] = morerows
+ if morecols:
+ attributes['morecols'] = morecols
+ entry = nodes.entry(**attributes)
+ row += entry
+ if ''.join(cellblock):
+ self.nested_parse(cellblock, input_offset=tableline+offset,
+ node=entry)
+ return row
+
+
+ explicit = Struct()
+ """Patterns and constants used for explicit markup recognition."""
+
+ explicit.patterns = Struct(
+ target=re.compile(r"""
+ (
+ _ # anonymous target
+ | # *OR*
+ (?!_) # no underscore at the beginning
+ (?P<quote>`?) # optional open quote
+ (?![ `]) # first char. not space or
+ # backquote
+ (?P<name> # reference name
+ .+?
+ )
+ %(non_whitespace_escape_before)s
+ (?P=quote) # close quote if open quote used
+ )
+ (?<!(?<!\x00):) # no unescaped colon at end
+ %(non_whitespace_escape_before)s
+ [ ]? # optional space
+ : # end of reference name
+ ([ ]+|$) # followed by whitespace
+ """ % vars(Inliner), re.VERBOSE),
+ reference=re.compile(r"""
+ (
+ (?P<simple>%(simplename)s)_
+ | # *OR*
+ ` # open backquote
+ (?![ ]) # not space
+ (?P<phrase>.+?) # hyperlink phrase
+ %(non_whitespace_escape_before)s
+ `_ # close backquote,
+ # reference mark
+ )
+ $ # end of string
+ """ % vars(Inliner), re.VERBOSE | re.UNICODE),
+ substitution=re.compile(r"""
+ (
+ (?![ ]) # first char. not space
+ (?P<name>.+?) # substitution text
+ %(non_whitespace_escape_before)s
+ \| # close delimiter
+ )
+ ([ ]+|$) # followed by whitespace
+ """ % vars(Inliner), re.VERBOSE),)
+
+ def footnote(self, match):
+ lineno = self.state_machine.abs_line_number()
+ indented, indent, offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end())
+ label = match.group(1)
+ name = normalize_name(label)
+ footnote = nodes.footnote('\n'.join(indented))
+ footnote.line = lineno
+ if name[0] == '#': # auto-numbered
+ name = name[1:] # autonumber label
+ footnote['auto'] = 1
+ if name:
+ footnote['names'].append(name)
+ self.document.note_autofootnote(footnote)
+ elif name == '*': # auto-symbol
+ name = ''
+ footnote['auto'] = '*'
+ self.document.note_symbol_footnote(footnote)
+ else: # manually numbered
+ footnote += nodes.label('', label)
+ footnote['names'].append(name)
+ self.document.note_footnote(footnote)
+ if name:
+ self.document.note_explicit_target(footnote, footnote)
+ else:
+ self.document.set_id(footnote, footnote)
+ if indented:
+ self.nested_parse(indented, input_offset=offset, node=footnote)
+ return [footnote], blank_finish
+
+ def citation(self, match):
+ lineno = self.state_machine.abs_line_number()
+ indented, indent, offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end())
+ label = match.group(1)
+ name = normalize_name(label)
+ citation = nodes.citation('\n'.join(indented))
+ citation.line = lineno
+ citation += nodes.label('', label)
+ citation['names'].append(name)
+ self.document.note_citation(citation)
+ self.document.note_explicit_target(citation, citation)
+ if indented:
+ self.nested_parse(indented, input_offset=offset, node=citation)
+ return [citation], blank_finish
+
+ def hyperlink_target(self, match):
+ pattern = self.explicit.patterns.target
+ lineno = self.state_machine.abs_line_number()
+ block, indent, offset, blank_finish = \
+ self.state_machine.get_first_known_indented(
+ match.end(), until_blank=1, strip_indent=0)
+ blocktext = match.string[:match.end()] + '\n'.join(block)
+ block = [escape2null(line) for line in block]
+ escaped = block[0]
+ blockindex = 0
+ while 1:
+ targetmatch = pattern.match(escaped)
+ if targetmatch:
+ break
+ blockindex += 1
+ try:
+ escaped += block[blockindex]
+ except IndexError:
+ raise MarkupError('malformed hyperlink target.', lineno)
+ del block[:blockindex]
+ block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()
+ target = self.make_target(block, blocktext, lineno,
+ targetmatch.group('name'))
+ return [target], blank_finish
+
+ def make_target(self, block, block_text, lineno, target_name):
+ target_type, data = self.parse_target(block, block_text, lineno)
+ if target_type == 'refname':
+ target = nodes.target(block_text, '', refname=normalize_name(data))
+ target.indirect_reference_name = data
+ self.add_target(target_name, '', target, lineno)
+ self.document.note_indirect_target(target)
+ return target
+ elif target_type == 'refuri':
+ target = nodes.target(block_text, '')
+ self.add_target(target_name, data, target, lineno)
+ return target
+ else:
+ return data
+
+ def parse_target(self, block, block_text, lineno):
+ """
+ Determine the type of reference of a target.
+
+ :Return: A 2-tuple, one of:
+
+ - 'refname' and the indirect reference name
+ - 'refuri' and the URI
+ - 'malformed' and a system_message node
+ """
+ if block and block[-1].strip()[-1:] == '_': # possible indirect target
+ reference = ' '.join([line.strip() for line in block])
+ refname = self.is_reference(reference)
+ if refname:
+ return 'refname', refname
+ reference = ''.join([''.join(line.split()) for line in block])
+ return 'refuri', unescape(reference)
+
+ def is_reference(self, reference):
+ match = self.explicit.patterns.reference.match(
+ whitespace_normalize_name(reference))
+ if not match:
+ return None
+ return unescape(match.group('simple') or match.group('phrase'))
+
+ def add_target(self, targetname, refuri, target, lineno):
+ target.line = lineno
+ if targetname:
+ name = normalize_name(unescape(targetname))
+ target['names'].append(name)
+ if refuri:
+ uri = self.inliner.adjust_uri(refuri)
+ if uri:
+ target['refuri'] = uri
+ else:
+ raise ApplicationError('problem with URI: %r' % refuri)
+ self.document.note_explicit_target(target, self.parent)
+ else: # anonymous target
+ if refuri:
+ target['refuri'] = refuri
+ target['anonymous'] = 1
+ self.document.note_anonymous_target(target)
+
+ def substitution_def(self, match):
+ pattern = self.explicit.patterns.substitution
+ lineno = self.state_machine.abs_line_number()
+ block, indent, offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end(),
+ strip_indent=0)
+ blocktext = (match.string[:match.end()] + '\n'.join(block))
+ block.disconnect()
+ escaped = escape2null(block[0].rstrip())
+ blockindex = 0
+ while 1:
+ subdefmatch = pattern.match(escaped)
+ if subdefmatch:
+ break
+ blockindex += 1
+ try:
+ escaped = escaped + ' ' + escape2null(block[blockindex].strip())
+ except IndexError:
+ raise MarkupError('malformed substitution definition.',
+ lineno)
+ del block[:blockindex] # strip out the substitution marker
+ block[0] = (block[0].strip() + ' ')[subdefmatch.end()-len(escaped)-1:-1]
+ if not block[0]:
+ del block[0]
+ offset += 1
+ while block and not block[-1].strip():
+ block.pop()
+ subname = subdefmatch.group('name')
+ substitution_node = nodes.substitution_definition(blocktext)
+ substitution_node.line = lineno
+ if not block:
+ msg = self.reporter.warning(
+ 'Substitution definition "%s" missing contents.' % subname,
+ nodes.literal_block(blocktext, blocktext), line=lineno)
+ return [msg], blank_finish
+ block[0] = block[0].strip()
+ substitution_node['names'].append(
+ nodes.whitespace_normalize_name(subname))
+ new_abs_offset, blank_finish = self.nested_list_parse(
+ block, input_offset=offset, node=substitution_node,
+ initial_state='SubstitutionDef', blank_finish=blank_finish)
+ i = 0
+ for node in substitution_node[:]:
+ if not (isinstance(node, nodes.Inline) or
+ isinstance(node, nodes.Text)):
+ self.parent += substitution_node[i]
+ del substitution_node[i]
+ else:
+ i += 1
+ for node in substitution_node.traverse(nodes.Element):
+ if self.disallowed_inside_substitution_definitions(node):
+ pformat = nodes.literal_block('', node.pformat().rstrip())
+ msg = self.reporter.error(
+ 'Substitution definition contains illegal element:',
+ pformat, nodes.literal_block(blocktext, blocktext),
+ line=lineno)
+ return [msg], blank_finish
+ if len(substitution_node) == 0:
+ msg = self.reporter.warning(
+ 'Substitution definition "%s" empty or invalid.'
+ % subname,
+ nodes.literal_block(blocktext, blocktext), line=lineno)
+ return [msg], blank_finish
+ self.document.note_substitution_def(
+ substitution_node, subname, self.parent)
+ return [substitution_node], blank_finish
+
+ def disallowed_inside_substitution_definitions(self, node):
+ if (node['ids'] or
+ isinstance(node, nodes.reference) and node.get('anonymous') or
+ isinstance(node, nodes.footnote_reference) and node.get('auto')):
+ return 1
+ else:
+ return 0
+
+ def directive(self, match, **option_presets):
+ """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
+ type_name = match.group(1)
+ directive_class, messages = directives.directive(
+ type_name, self.memo.language, self.document)
+ self.parent += messages
+ if directive_class:
+ return self.run_directive(
+ directive_class, match, type_name, option_presets)
+ else:
+ return self.unknown_directive(type_name)
+
+ def run_directive(self, directive, match, type_name, option_presets):
+ """
+ Parse a directive then run its directive function.
+
+ Parameters:
+
+ - `directive`: The class implementing the directive. Must be
+ a subclass of `rst.Directive`.
+
+ - `match`: A regular expression match object which matched the first
+ line of the directive.
+
+ - `type_name`: The directive name, as used in the source text.
+
+ - `option_presets`: A dictionary of preset options, defaults for the
+ directive options. Currently, only an "alt" option is passed by
+ substitution definitions (value: the substitution name), which may
+ be used by an embedded image directive.
+
+ Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
+ """
+ if isinstance(directive, (FunctionType, MethodType)):
+ from docutils.parsers.rst import convert_directive_function
+ directive = convert_directive_function(directive)
+ lineno = self.state_machine.abs_line_number()
+ initial_line_offset = self.state_machine.line_offset
+ indented, indent, line_offset, blank_finish \
+ = self.state_machine.get_first_known_indented(match.end(),
+ strip_top=0)
+ block_text = '\n'.join(self.state_machine.input_lines[
+ initial_line_offset : self.state_machine.line_offset + 1])
+ try:
+ arguments, options, content, content_offset = (
+ self.parse_directive_block(indented, line_offset,
+ directive, option_presets))
+ except MarkupError, detail:
+ error = self.reporter.error(
+ 'Error in "%s" directive:\n%s.' % (type_name,
+ ' '.join(detail.args)),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error], blank_finish
+ directive_instance = directive(
+ type_name, arguments, options, content, lineno,
+ content_offset, block_text, self, self.state_machine)
+ try:
+ result = directive_instance.run()
+ except docutils.parsers.rst.DirectiveError, directive_error:
+ msg_node = self.reporter.system_message(directive_error.level,
+ directive_error.message)
+ msg_node += nodes.literal_block(block_text, block_text)
+ msg_node['line'] = lineno
+ result = [msg_node]
+ assert isinstance(result, list), \
+ 'Directive "%s" must return a list of nodes.' % type_name
+ for i in range(len(result)):
+ assert isinstance(result[i], nodes.Node), \
+ ('Directive "%s" returned non-Node object (index %s): %r'
+ % (type_name, i, result[i]))
+ return (result,
+ blank_finish or self.state_machine.is_next_line_blank())
+
+ def parse_directive_block(self, indented, line_offset, directive,
+ option_presets):
+ option_spec = directive.option_spec
+ has_content = directive.has_content
+ if indented and not indented[0].strip():
+ indented.trim_start()
+ line_offset += 1
+ while indented and not indented[-1].strip():
+ indented.trim_end()
+ if indented and (directive.required_arguments
+ or directive.optional_arguments
+ or option_spec):
+ for i in range(len(indented)):
+ if not indented[i].strip():
+ break
+ else:
+ i += 1
+ arg_block = indented[:i]
+ content = indented[i+1:]
+ content_offset = line_offset + i + 1
+ else:
+ content = indented
+ content_offset = line_offset
+ arg_block = []
+ while content and not content[0].strip():
+ content.trim_start()
+ content_offset += 1
+ if option_spec:
+ options, arg_block = self.parse_directive_options(
+ option_presets, option_spec, arg_block)
+ if arg_block and not (directive.required_arguments
+ or directive.optional_arguments):
+ raise MarkupError('no arguments permitted; blank line '
+ 'required before content block')
+ else:
+ options = {}
+ if directive.required_arguments or directive.optional_arguments:
+ arguments = self.parse_directive_arguments(
+ directive, arg_block)
+ else:
+ arguments = []
+ if content and not has_content:
+ raise MarkupError('no content permitted')
+ return (arguments, options, content, content_offset)
+
+ def parse_directive_options(self, option_presets, option_spec, arg_block):
+ options = option_presets.copy()
+ for i in range(len(arg_block)):
+ if arg_block[i][:1] == ':':
+ opt_block = arg_block[i:]
+ arg_block = arg_block[:i]
+ break
+ else:
+ opt_block = []
+ if opt_block:
+ success, data = self.parse_extension_options(option_spec,
+ opt_block)
+ if success: # data is a dict of options
+ options.update(data)
+ else: # data is an error string
+ raise MarkupError(data)
+ return options, arg_block
+
+ def parse_directive_arguments(self, directive, arg_block):
+ required = directive.required_arguments
+ optional = directive.optional_arguments
+ arg_text = '\n'.join(arg_block)
+ arguments = arg_text.split()
+ if len(arguments) < required:
+ raise MarkupError('%s argument(s) required, %s supplied'
+ % (required, len(arguments)))
+ elif len(arguments) > required + optional:
+ if directive.final_argument_whitespace:
+ arguments = arg_text.split(None, required + optional - 1)
+ else:
+ raise MarkupError(
+ 'maximum %s argument(s) allowed, %s supplied'
+ % (required + optional, len(arguments)))
+ return arguments
+
+ def parse_extension_options(self, option_spec, datalines):
+ """
+ Parse `datalines` for a field list containing extension options
+ matching `option_spec`.
+
+ :Parameters:
+ - `option_spec`: a mapping of option name to conversion
+ function, which should raise an exception on bad input.
+ - `datalines`: a list of input strings.
+
+ :Return:
+ - Success value, 1 or 0.
+ - An option dictionary on success, an error string on failure.
+ """
+ node = nodes.field_list()
+ newline_offset, blank_finish = self.nested_list_parse(
+ datalines, 0, node, initial_state='ExtensionOptions',
+ blank_finish=1)
+ if newline_offset != len(datalines): # incomplete parse of block
+ return 0, 'invalid option block'
+ try:
+ options = utils.extract_extension_options(node, option_spec)
+ except KeyError, detail:
+ return 0, ('unknown option: "%s"' % detail.args[0])
+ except (ValueError, TypeError), detail:
+ return 0, ('invalid option value: %s' % ' '.join(detail.args))
+ except utils.ExtensionOptionError, detail:
+ return 0, ('invalid option data: %s' % ' '.join(detail.args))
+ if blank_finish:
+ return 1, options
+ else:
+ return 0, 'option data incompletely parsed'
+
+ def unknown_directive(self, type_name):
+ lineno = self.state_machine.abs_line_number()
+ indented, indent, offset, blank_finish = \
+ self.state_machine.get_first_known_indented(0, strip_indent=0)
+ text = '\n'.join(indented)
+ error = self.reporter.error(
+ 'Unknown directive type "%s".' % type_name,
+ nodes.literal_block(text, text), line=lineno)
+ return [error], blank_finish
+
+ def comment(self, match):
+ if not match.string[match.end():].strip() \
+ and self.state_machine.is_next_line_blank(): # an empty comment?
+ return [nodes.comment()], 1 # "A tiny but practical wart."
+ indented, indent, offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end())
+ while indented and not indented[-1].strip():
+ indented.trim_end()
+ text = '\n'.join(indented)
+ return [nodes.comment(text, text)], blank_finish
+
+ explicit.constructs = [
+ (footnote,
+ re.compile(r"""
+ \.\.[ ]+ # explicit markup start
+ \[
+ ( # footnote label:
+ [0-9]+ # manually numbered footnote
+ | # *OR*
+ \# # anonymous auto-numbered footnote
+ | # *OR*
+ \#%s # auto-number ed?) footnote label
+ | # *OR*
+ \* # auto-symbol footnote
+ )
+ \]
+ ([ ]+|$) # whitespace or end of line
+ """ % Inliner.simplename, re.VERBOSE | re.UNICODE)),
+ (citation,
+ re.compile(r"""
+ \.\.[ ]+ # explicit markup start
+ \[(%s)\] # citation label
+ ([ ]+|$) # whitespace or end of line
+ """ % Inliner.simplename, re.VERBOSE | re.UNICODE)),
+ (hyperlink_target,
+ re.compile(r"""
+ \.\.[ ]+ # explicit markup start
+ _ # target indicator
+ (?![ ]|$) # first char. not space or EOL
+ """, re.VERBOSE)),
+ (substitution_def,
+ re.compile(r"""
+ \.\.[ ]+ # explicit markup start
+ \| # substitution indicator
+ (?![ ]|$) # first char. not space or EOL
+ """, re.VERBOSE)),
+ (directive,
+ re.compile(r"""
+ \.\.[ ]+ # explicit markup start
+ (%s) # directive name
+ [ ]? # optional space
+ :: # directive delimiter
+ ([ ]+|$) # whitespace or end of line
+ """ % Inliner.simplename, re.VERBOSE | re.UNICODE))]
+
+ def explicit_markup(self, match, context, next_state):
+ """Footnotes, hyperlink targets, directives, comments."""
+ nodelist, blank_finish = self.explicit_construct(match)
+ self.parent += nodelist
+ self.explicit_list(blank_finish)
+ return [], next_state, []
+
+ def explicit_construct(self, match):
+ """Determine which explicit construct this is, parse & return it."""
+ errors = []
+ for method, pattern in self.explicit.constructs:
+ expmatch = pattern.match(match.string)
+ if expmatch:
+ try:
+ return method(self, expmatch)
+ except MarkupError, (message, lineno): # never reached?
+ errors.append(self.reporter.warning(message, line=lineno))
+ break
+ nodelist, blank_finish = self.comment(match)
+ return nodelist + errors, blank_finish
+
+ def explicit_list(self, blank_finish):
+ """
+ Create a nested state machine for a series of explicit markup
+ constructs (including anonymous hyperlink targets).
+ """
+ offset = self.state_machine.line_offset + 1 # next line
+ newline_offset, blank_finish = self.nested_list_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=self.state_machine.abs_line_offset() + 1,
+ node=self.parent, initial_state='Explicit',
+ blank_finish=blank_finish,
+ match_titles=self.state_machine.match_titles)
+ self.goto_line(newline_offset)
+ if not blank_finish:
+ self.parent += self.unindent_warning('Explicit markup')
+
+ def anonymous(self, match, context, next_state):
+ """Anonymous hyperlink targets."""
+ nodelist, blank_finish = self.anonymous_target(match)
+ self.parent += nodelist
+ self.explicit_list(blank_finish)
+ return [], next_state, []
+
+ def anonymous_target(self, match):
+ lineno = self.state_machine.abs_line_number()
+ block, indent, offset, blank_finish \
+ = self.state_machine.get_first_known_indented(match.end(),
+ until_blank=1)
+ blocktext = match.string[:match.end()] + '\n'.join(block)
+ block = [escape2null(line) for line in block]
+ target = self.make_target(block, blocktext, lineno, '')
+ return [target], blank_finish
+
+ def line(self, match, context, next_state):
+ """Section title overline or transition marker."""
+ if self.state_machine.match_titles:
+ return [match.string], 'Line', []
+ elif match.string.strip() == '::':
+ raise statemachine.TransitionCorrection('text')
+ elif len(match.string.strip()) < 4:
+ msg = self.reporter.info(
+ 'Unexpected possible title overline or transition.\n'
+ "Treating it as ordinary text because it's so short.",
+ line=self.state_machine.abs_line_number())
+ self.parent += msg
+ raise statemachine.TransitionCorrection('text')
+ else:
+ blocktext = self.state_machine.line
+ msg = self.reporter.severe(
+ 'Unexpected section title or transition.',
+ nodes.literal_block(blocktext, blocktext),
+ line=self.state_machine.abs_line_number())
+ self.parent += msg
+ return [], next_state, []
+
+ def text(self, match, context, next_state):
+ """Titles, definition lists, paragraphs."""
+ return [match.string], 'Text', []
+
+
+class RFC2822Body(Body):
+
+ """
+ RFC2822 headers are only valid as the first constructs in documents. As
+ soon as anything else appears, the `Body` state should take over.
+ """
+
+ patterns = Body.patterns.copy() # can't modify the original
+ patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'
+ initial_transitions = [(name, 'Body')
+ for name in Body.initial_transitions]
+ initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'
+
+ def rfc2822(self, match, context, next_state):
+ """RFC2822-style field list item."""
+ fieldlist = nodes.field_list(classes=['rfc2822'])
+ self.parent += fieldlist
+ field, blank_finish = self.rfc2822_field(match)
+ fieldlist += field
+ offset = self.state_machine.line_offset + 1 # next line
+ newline_offset, blank_finish = self.nested_list_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=self.state_machine.abs_line_offset() + 1,
+ node=fieldlist, initial_state='RFC2822List',
+ blank_finish=blank_finish)
+ self.goto_line(newline_offset)
+ if not blank_finish:
+ self.parent += self.unindent_warning(
+ 'RFC2822-style field list')
+ return [], next_state, []
+
+ def rfc2822_field(self, match):
+ name = match.string[:match.string.find(':')]
+ indented, indent, line_offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end(),
+ until_blank=1)
+ fieldnode = nodes.field()
+ fieldnode += nodes.field_name(name, name)
+ fieldbody = nodes.field_body('\n'.join(indented))
+ fieldnode += fieldbody
+ if indented:
+ self.nested_parse(indented, input_offset=line_offset,
+ node=fieldbody)
+ return fieldnode, blank_finish
+
+
+class SpecializedBody(Body):
+
+ """
+ Superclass for second and subsequent compound element members. Compound
+ elements are lists and list-like constructs.
+
+ All transition methods are disabled (redefined as `invalid_input`).
+ Override individual methods in subclasses to re-enable.
+
+ For example, once an initial bullet list item, say, is recognized, the
+ `BulletList` subclass takes over, with a "bullet_list" node as its
+ container. Upon encountering the initial bullet list item, `Body.bullet`
+ calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
+ starts up a nested parsing session with `BulletList` as the initial state.
+ Only the ``bullet`` transition method is enabled in `BulletList`; as long
+ as only bullet list items are encountered, they are parsed and inserted
+ into the container. The first construct which is *not* a bullet list item
+ triggers the `invalid_input` method, which ends the nested parse and
+ closes the container. `BulletList` needs to recognize input that is
+ invalid in the context of a bullet list, which means everything *other
+ than* bullet list items, so it inherits the transition list created in
+ `Body`.
+ """
+
+ def invalid_input(self, match=None, context=None, next_state=None):
+ """Not a compound element member. Abort this state machine."""
+ self.state_machine.previous_line() # back up so parent SM can reassess
+ raise EOFError
+
+ indent = invalid_input
+ bullet = invalid_input
+ enumerator = invalid_input
+ field_marker = invalid_input
+ option_marker = invalid_input
+ doctest = invalid_input
+ line_block = invalid_input
+ grid_table_top = invalid_input
+ simple_table_top = invalid_input
+ explicit_markup = invalid_input
+ anonymous = invalid_input
+ line = invalid_input
+ text = invalid_input
+
+
+class BulletList(SpecializedBody):
+
+ """Second and subsequent bullet_list list_items."""
+
+ def bullet(self, match, context, next_state):
+ """Bullet list item."""
+ if match.string[0] != self.parent['bullet']:
+ # different bullet: new list
+ self.invalid_input()
+ listitem, blank_finish = self.list_item(match.end())
+ self.parent += listitem
+ self.blank_finish = blank_finish
+ return [], next_state, []
+
+
+class DefinitionList(SpecializedBody):
+
+ """Second and subsequent definition_list_items."""
+
+ def text(self, match, context, next_state):
+ """Definition lists."""
+ return [match.string], 'Definition', []
+
+
+class EnumeratedList(SpecializedBody):
+
+ """Second and subsequent enumerated_list list_items."""
+
+ def enumerator(self, match, context, next_state):
+ """Enumerated list item."""
+ format, sequence, text, ordinal = self.parse_enumerator(
+ match, self.parent['enumtype'])
+ if ( format != self.format
+ or (sequence != '#' and (sequence != self.parent['enumtype']
+ or self.auto
+ or ordinal != (self.lastordinal + 1)))
+ or not self.is_enumerated_list_item(ordinal, sequence, format)):
+ # different enumeration: new list
+ self.invalid_input()
+ if sequence == '#':
+ self.auto = 1
+ listitem, blank_finish = self.list_item(match.end())
+ self.parent += listitem
+ self.blank_finish = blank_finish
+ self.lastordinal = ordinal
+ return [], next_state, []
+
+
+class FieldList(SpecializedBody):
+
+ """Second and subsequent field_list fields."""
+
+ def field_marker(self, match, context, next_state):
+ """Field list field."""
+ field, blank_finish = self.field(match)
+ self.parent += field
+ self.blank_finish = blank_finish
+ return [], next_state, []
+
+
+class OptionList(SpecializedBody):
+
+ """Second and subsequent option_list option_list_items."""
+
+ def option_marker(self, match, context, next_state):
+ """Option list item."""
+ try:
+ option_list_item, blank_finish = self.option_list_item(match)
+ except MarkupError, (message, lineno):
+ self.invalid_input()
+ self.parent += option_list_item
+ self.blank_finish = blank_finish
+ return [], next_state, []
+
+
+class RFC2822List(SpecializedBody, RFC2822Body):
+
+ """Second and subsequent RFC2822-style field_list fields."""
+
+ patterns = RFC2822Body.patterns
+ initial_transitions = RFC2822Body.initial_transitions
+
+ def rfc2822(self, match, context, next_state):
+ """RFC2822-style field list item."""
+ field, blank_finish = self.rfc2822_field(match)
+ self.parent += field
+ self.blank_finish = blank_finish
+ return [], 'RFC2822List', []
+
+ blank = SpecializedBody.invalid_input
+
+
+class ExtensionOptions(FieldList):
+
+ """
+ Parse field_list fields for extension options.
+
+ No nested parsing is done (including inline markup parsing).
+ """
+
+ def parse_field_body(self, indented, offset, node):
+ """Override `Body.parse_field_body` for simpler parsing."""
+ lines = []
+ for line in list(indented) + ['']:
+ if line.strip():
+ lines.append(line)
+ elif lines:
+ text = '\n'.join(lines)
+ node += nodes.paragraph(text, text)
+ lines = []
+
+
+class LineBlock(SpecializedBody):
+
+ """Second and subsequent lines of a line_block."""
+
+ blank = SpecializedBody.invalid_input
+
+ def line_block(self, match, context, next_state):
+ """New line of line block."""
+ lineno = self.state_machine.abs_line_number()
+ line, messages, blank_finish = self.line_block_line(match, lineno)
+ self.parent += line
+ self.parent.parent += messages
+ self.blank_finish = blank_finish
+ return [], next_state, []
+
+
+class Explicit(SpecializedBody):
+
+ """Second and subsequent explicit markup construct."""
+
+ def explicit_markup(self, match, context, next_state):
+ """Footnotes, hyperlink targets, directives, comments."""
+ nodelist, blank_finish = self.explicit_construct(match)
+ self.parent += nodelist
+ self.blank_finish = blank_finish
+ return [], next_state, []
+
+ def anonymous(self, match, context, next_state):
+ """Anonymous hyperlink targets."""
+ nodelist, blank_finish = self.anonymous_target(match)
+ self.parent += nodelist
+ self.blank_finish = blank_finish
+ return [], next_state, []
+
+ blank = SpecializedBody.invalid_input
+
+
+class SubstitutionDef(Body):
+
+ """
+ Parser for the contents of a substitution_definition element.
+ """
+
+ patterns = {
+ 'embedded_directive': re.compile(r'(%s)::( +|$)'
+ % Inliner.simplename, re.UNICODE),
+ 'text': r''}
+ initial_transitions = ['embedded_directive', 'text']
+
+ def embedded_directive(self, match, context, next_state):
+ nodelist, blank_finish = self.directive(match,
+ alt=self.parent['names'][0])
+ self.parent += nodelist
+ if not self.state_machine.at_eof():
+ self.blank_finish = blank_finish
+ raise EOFError
+
+ def text(self, match, context, next_state):
+ if not self.state_machine.at_eof():
+ self.blank_finish = self.state_machine.is_next_line_blank()
+ raise EOFError
+
+
+class Text(RSTState):
+
+ """
+ Classifier of second line of a text block.
+
+ Could be a paragraph, a definition list item, or a title.
+ """
+
+ patterns = {'underline': Body.patterns['line'],
+ 'text': r''}
+ initial_transitions = [('underline', 'Body'), ('text', 'Body')]
+
+ def blank(self, match, context, next_state):
+ """End of paragraph."""
+ paragraph, literalnext = self.paragraph(
+ context, self.state_machine.abs_line_number() - 1)
+ self.parent += paragraph
+ if literalnext:
+ self.parent += self.literal_block()
+ return [], 'Body', []
+
+ def eof(self, context):
+ if context:
+ self.blank(None, context, None)
+ return []
+
+ def indent(self, match, context, next_state):
+ """Definition list item."""
+ definitionlist = nodes.definition_list()
+ definitionlistitem, blank_finish = self.definition_list_item(context)
+ definitionlist += definitionlistitem
+ self.parent += definitionlist
+ offset = self.state_machine.line_offset + 1 # next line
+ newline_offset, blank_finish = self.nested_list_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=self.state_machine.abs_line_offset() + 1,
+ node=definitionlist, initial_state='DefinitionList',
+ blank_finish=blank_finish, blank_finish_state='Definition')
+ self.goto_line(newline_offset)
+ if not blank_finish:
+ self.parent += self.unindent_warning('Definition list')
+ return [], 'Body', []
+
+ def underline(self, match, context, next_state):
+ """Section title."""
+ lineno = self.state_machine.abs_line_number()
+ title = context[0].rstrip()
+ underline = match.string.rstrip()
+ source = title + '\n' + underline
+ messages = []
+ if column_width(title) > len(underline):
+ if len(underline) < 4:
+ if self.state_machine.match_titles:
+ msg = self.reporter.info(
+ 'Possible title underline, too short for the title.\n'
+ "Treating it as ordinary text because it's so short.",
+ line=lineno)
+ self.parent += msg
+ raise statemachine.TransitionCorrection('text')
+ else:
+ blocktext = context[0] + '\n' + self.state_machine.line
+ msg = self.reporter.warning(
+ 'Title underline too short.',
+ nodes.literal_block(blocktext, blocktext), line=lineno)
+ messages.append(msg)
+ if not self.state_machine.match_titles:
+ blocktext = context[0] + '\n' + self.state_machine.line
+ msg = self.reporter.severe(
+ 'Unexpected section title.',
+ nodes.literal_block(blocktext, blocktext), line=lineno)
+ self.parent += messages
+ self.parent += msg
+ return [], next_state, []
+ style = underline[0]
+ context[:] = []
+ self.section(title, source, style, lineno - 1, messages)
+ return [], next_state, []
+
+ def text(self, match, context, next_state):
+ """Paragraph."""
+ startline = self.state_machine.abs_line_number() - 1
+ msg = None
+ try:
+ block = self.state_machine.get_text_block(flush_left=1)
+ except statemachine.UnexpectedIndentationError, instance:
+ block, source, lineno = instance.args
+ msg = self.reporter.error('Unexpected indentation.',
+ source=source, line=lineno)
+ lines = context + list(block)
+ paragraph, literalnext = self.paragraph(lines, startline)
+ self.parent += paragraph
+ self.parent += msg
+ if literalnext:
+ try:
+ self.state_machine.next_line()
+ except EOFError:
+ pass
+ self.parent += self.literal_block()
+ return [], next_state, []
+
+ def literal_block(self):
+ """Return a list of nodes."""
+ indented, indent, offset, blank_finish = \
+ self.state_machine.get_indented()
+ while indented and not indented[-1].strip():
+ indented.trim_end()
+ if not indented:
+ return self.quoted_literal_block()
+ data = '\n'.join(indented)
+ literal_block = nodes.literal_block(data, data)
+ literal_block.line = offset + 1
+ nodelist = [literal_block]
+ if not blank_finish:
+ nodelist.append(self.unindent_warning('Literal block'))
+ return nodelist
+
+ def quoted_literal_block(self):
+ abs_line_offset = self.state_machine.abs_line_offset()
+ offset = self.state_machine.line_offset
+ parent_node = nodes.Element()
+ new_abs_offset = self.nested_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=abs_line_offset, node=parent_node, match_titles=0,
+ state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),
+ 'initial_state': 'QuotedLiteralBlock'})
+ self.goto_line(new_abs_offset)
+ return parent_node.children
+
+ def definition_list_item(self, termline):
+ indented, indent, line_offset, blank_finish = \
+ self.state_machine.get_indented()
+ definitionlistitem = nodes.definition_list_item(
+ '\n'.join(termline + list(indented)))
+ lineno = self.state_machine.abs_line_number() - 1
+ definitionlistitem.line = lineno
+ termlist, messages = self.term(termline, lineno)
+ definitionlistitem += termlist
+ definition = nodes.definition('', *messages)
+ definitionlistitem += definition
+ if termline[0][-2:] == '::':
+ definition += self.reporter.info(
+ 'Blank line missing before literal block (after the "::")? '
+ 'Interpreted as a definition list item.', line=line_offset+1)
+ self.nested_parse(indented, input_offset=line_offset, node=definition)
+ return definitionlistitem, blank_finish
+
+ classifier_delimiter = re.compile(' +: +')
+
+ def term(self, lines, lineno):
+ """Return a definition_list's term and optional classifiers."""
+ assert len(lines) == 1
+ text_nodes, messages = self.inline_text(lines[0], lineno)
+ term_node = nodes.term()
+ node_list = [term_node]
+ for i in range(len(text_nodes)):
+ node = text_nodes[i]
+ if isinstance(node, nodes.Text):
+ parts = self.classifier_delimiter.split(node.rawsource)
+ if len(parts) == 1:
+ node_list[-1] += node
+ else:
+
+ node_list[-1] += nodes.Text(parts[0].rstrip())
+ for part in parts[1:]:
+ classifier_node = nodes.classifier('', part)
+ node_list.append(classifier_node)
+ else:
+ node_list[-1] += node
+ return node_list, messages
+
+
+class SpecializedText(Text):
+
+ """
+ Superclass for second and subsequent lines of Text-variants.
+
+ All transition methods are disabled. Override individual methods in
+ subclasses to re-enable.
+ """
+
+ def eof(self, context):
+ """Incomplete construct."""
+ return []
+
+ def invalid_input(self, match=None, context=None, next_state=None):
+ """Not a compound element member. Abort this state machine."""
+ raise EOFError
+
+ blank = invalid_input
+ indent = invalid_input
+ underline = invalid_input
+ text = invalid_input
+
+
+class Definition(SpecializedText):
+
+ """Second line of potential definition_list_item."""
+
+ def eof(self, context):
+ """Not a definition."""
+ self.state_machine.previous_line(2) # so parent SM can reassess
+ return []
+
+ def indent(self, match, context, next_state):
+ """Definition list item."""
+ definitionlistitem, blank_finish = self.definition_list_item(context)
+ self.parent += definitionlistitem
+ self.blank_finish = blank_finish
+ return [], 'DefinitionList', []
+
+
+class Line(SpecializedText):
+
+ """
+ Second line of over- & underlined section title or transition marker.
+ """
+
+ eofcheck = 1 # @@@ ???
+ """Set to 0 while parsing sections, so that we don't catch the EOF."""
+
+ def eof(self, context):
+ """Transition marker at end of section or document."""
+ marker = context[0].strip()
+ if self.memo.section_bubble_up_kludge:
+ self.memo.section_bubble_up_kludge = 0
+ elif len(marker) < 4:
+ self.state_correction(context)
+ if self.eofcheck: # ignore EOFError with sections
+ lineno = self.state_machine.abs_line_number() - 1
+ transition = nodes.transition(rawsource=context[0])
+ transition.line = lineno
+ self.parent += transition
+ self.eofcheck = 1
+ return []
+
+ def blank(self, match, context, next_state):
+ """Transition marker."""
+ lineno = self.state_machine.abs_line_number() - 1
+ marker = context[0].strip()
+ if len(marker) < 4:
+ self.state_correction(context)
+ transition = nodes.transition(rawsource=marker)
+ transition.line = lineno
+ self.parent += transition
+ return [], 'Body', []
+
+ def text(self, match, context, next_state):
+ """Potential over- & underlined title."""
+ lineno = self.state_machine.abs_line_number() - 1
+ overline = context[0]
+ title = match.string
+ underline = ''
+ try:
+ underline = self.state_machine.next_line()
+ except EOFError:
+ blocktext = overline + '\n' + title
+ if len(overline.rstrip()) < 4:
+ self.short_overline(context, blocktext, lineno, 2)
+ else:
+ msg = self.reporter.severe(
+ 'Incomplete section title.',
+ nodes.literal_block(blocktext, blocktext), line=lineno)
+ self.parent += msg
+ return [], 'Body', []
+ source = '%s\n%s\n%s' % (overline, title, underline)
+ overline = overline.rstrip()
+ underline = underline.rstrip()
+ if not self.transitions['underline'][0].match(underline):
+ blocktext = overline + '\n' + title + '\n' + underline
+ if len(overline.rstrip()) < 4:
+ self.short_overline(context, blocktext, lineno, 2)
+ else:
+ msg = self.reporter.severe(
+ 'Missing matching underline for section title overline.',
+ nodes.literal_block(source, source), line=lineno)
+ self.parent += msg
+ return [], 'Body', []
+ elif overline != underline:
+ blocktext = overline + '\n' + title + '\n' + underline
+ if len(overline.rstrip()) < 4:
+ self.short_overline(context, blocktext, lineno, 2)
+ else:
+ msg = self.reporter.severe(
+ 'Title overline & underline mismatch.',
+ nodes.literal_block(source, source), line=lineno)
+ self.parent += msg
+ return [], 'Body', []
+ title = title.rstrip()
+ messages = []
+ if column_width(title) > len(overline):
+ blocktext = overline + '\n' + title + '\n' + underline
+ if len(overline.rstrip()) < 4:
+ self.short_overline(context, blocktext, lineno, 2)
+ else:
+ msg = self.reporter.warning(
+ 'Title overline too short.',
+ nodes.literal_block(source, source), line=lineno)
+ messages.append(msg)
+ style = (overline[0], underline[0])
+ self.eofcheck = 0 # @@@ not sure this is correct
+ self.section(title.lstrip(), source, style, lineno + 1, messages)
+ self.eofcheck = 1
+ return [], 'Body', []
+
+ indent = text # indented title
+
+ def underline(self, match, context, next_state):
+ overline = context[0]
+ blocktext = overline + '\n' + self.state_machine.line
+ lineno = self.state_machine.abs_line_number() - 1
+ if len(overline.rstrip()) < 4:
+ self.short_overline(context, blocktext, lineno, 1)
+ msg = self.reporter.error(
+ 'Invalid section title or transition marker.',
+ nodes.literal_block(blocktext, blocktext), line=lineno)
+ self.parent += msg
+ return [], 'Body', []
+
+ def short_overline(self, context, blocktext, lineno, lines=1):
+ msg = self.reporter.info(
+ 'Possible incomplete section title.\nTreating the overline as '
+ "ordinary text because it's so short.", line=lineno)
+ self.parent += msg
+ self.state_correction(context, lines)
+
+ def state_correction(self, context, lines=1):
+ self.state_machine.previous_line(lines)
+ context[:] = []
+ raise statemachine.StateCorrection('Body', 'text')
+
+
+class QuotedLiteralBlock(RSTState):
+
+ """
+ Nested parse handler for quoted (unindented) literal blocks.
+
+ Special-purpose. Not for inclusion in `state_classes`.
+ """
+
+ patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,
+ 'text': r''}
+ initial_transitions = ('initial_quoted', 'text')
+
+ def __init__(self, state_machine, debug=0):
+ RSTState.__init__(self, state_machine, debug)
+ self.messages = []
+ self.initial_lineno = None
+
+ def blank(self, match, context, next_state):
+ if context:
+ raise EOFError
+ else:
+ return context, next_state, []
+
+ def eof(self, context):
+ if context:
+ text = '\n'.join(context)
+ literal_block = nodes.literal_block(text, text)
+ literal_block.line = self.initial_lineno
+ self.parent += literal_block
+ else:
+ self.parent += self.reporter.warning(
+ 'Literal block expected; none found.',
+ line=self.state_machine.abs_line_number())
+ self.state_machine.previous_line()
+ self.parent += self.messages
+ return []
+
+ def indent(self, match, context, next_state):
+ assert context, ('QuotedLiteralBlock.indent: context should not '
+ 'be empty!')
+ self.messages.append(
+ self.reporter.error('Unexpected indentation.',
+ line=self.state_machine.abs_line_number()))
+ self.state_machine.previous_line()
+ raise EOFError
+
+ def initial_quoted(self, match, context, next_state):
+ """Match arbitrary quote character on the first line only."""
+ self.remove_transition('initial_quoted')
+ quote = match.string[0]
+ pattern = re.compile(re.escape(quote))
+ # New transition matches consistent quotes only:
+ self.add_transition('quoted',
+ (pattern, self.quoted, self.__class__.__name__))
+ self.initial_lineno = self.state_machine.abs_line_number()
+ return [match.string], next_state, []
+
+ def quoted(self, match, context, next_state):
+ """Match consistent quotes on subsequent lines."""
+ context.append(match.string)
+ return context, next_state, []
+
+ def text(self, match, context, next_state):
+ if context:
+ self.messages.append(
+ self.reporter.error('Inconsistent literal block quoting.',
+ line=self.state_machine.abs_line_number()))
+ self.state_machine.previous_line()
+ raise EOFError
+
+
+state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
+ OptionList, LineBlock, ExtensionOptions, Explicit, Text,
+ Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
+"""Standard set of State classes used to start `RSTStateMachine`."""
diff --git a/test/test_rst2rst_functional.py b/test/test_functional_losslessrst.py
index f7dd1b5dc..334829ccd 100644
--- a/test/test_rst2rst_functional.py
+++ b/test/test_functional_losslessrst.py
@@ -25,7 +25,7 @@ import test_functional
from test_functional import datadir, join_path
### use this to limit the tests run:
-testsubset = (0,2,3,4,5, ) # 1 == dangerous.txt
+testsubset = (0,)#2,3,4,5, ) # 1 == dangerous.txt
#testsubset = (5, ) #(0,1,2,3,4,)
@@ -86,7 +86,7 @@ class FunctionalRst2RstTestCase(DocutilsTestSupport.CustomTestCase):
'docinfo_xform': 0,
}
namespace['reader_name'] = "standalone"
- namespace['parser_name'] = "rst"
+ namespace['parser_name'] = "losslessrst"
namespace['writer_name'] = "rst"
# Read the variables set in the default config file:
execfile(join_path(datadir, 'tests', '_default.py'), namespace)