From 101671ae44e1686680c80cd07b452aabeb88fb63 Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 20 Apr 2002 03:01:52 +0000 Subject: Initial revision git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@18 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2115 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2115 insertions(+) create mode 100644 docutils/parsers/rst/states.py (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py new file mode 100644 index 000000000..b2dbf9b3e --- /dev/null +++ b/docutils/parsers/rst/states.py @@ -0,0 +1,2115 @@ +""" +:Author: David Goodger +:Contact: goodger@users.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This module has been placed in the public domain. + +This is the ``docutils.parsers.restructuredtext.states`` module, the core of +the reStructuredText parser. It defines the following: + +:Classes: + - `RSTStateMachine`: reStructuredText parser's entry point. + - `NestedStateMachine`: recursive StateMachine. + - `RSTState`: reStructuredText State superclass. + - `Body`: Generic classifier of the first line of a block. + - `BulletList`: Second and subsequent bullet_list list_items + - `DefinitionList`: Second and subsequent definition_list_items. + - `EnumeratedList`: Second and subsequent enumerated_list list_items. + - `FieldList`: Second and subsequent fields. + - `OptionList`: Second and subsequent option_list_items. + - `Explicit`: Second and subsequent explicit markup constructs. + - `SubstitutionDef`: For embedded directives in substitution definitions. + - `Text`: Classifier of second line of a text block. + - `Definition`: Second line of potential definition_list_item. + - `Line`: Second line of overlined section title or transition marker. + - `Stuff`: An auxilliary collection class. + +:Exception classes: + - `MarkupError` + - `ParserError` + - `TransformationError` + +:Functions: + - `escape2null()`: Return a string, escape-backslashes converted to nulls. + - `unescape()`: Return a string, nulls removed or restored to backslashes. + - `normname()`: Return a case- and whitespace-normalized name. + +:Attributes: + - `stateclasses`: set of State classes used with `RSTStateMachine`. + +Parser Overview +=============== + +The reStructuredText parser is implemented as a state machine, examining its +input one line at a time. To understand how the parser works, please first +become familiar with the `docutils.statemachine` module. In the description +below, references are made to classes defined in this module; please see the +individual classes for details. + +Parsing proceeds as follows: + +1. The state machine examines each line of input, checking each of the + transition patterns of the state `Body`, in order, looking for a match. The + implicit transitions (blank lines and indentation) are checked before any + others. The 'text' transition is a catch-all (matches anything). + +2. The method associated with the matched transition pattern is called. + + A. Some transition methods are self-contained, appending elements to the + document tree ('doctest' parses a doctest block). The parser's current + line index is advanced to the end of the element, and parsing continues + with step 1. + + B. Others trigger the creation of a nested state machine, whose job is to + parse a compound construct ('indent' does a block quote, 'bullet' does a + bullet list, 'overline' does a section [first checking for a valid + section header]). + + - In the case of lists and explicit markup, a new state machine is + created and run to parse the first item. + + - A new state machine is created and its initial state is set to the + appropriate specialized state (`BulletList` in the case of the + 'bullet' transition). This state machine is run to parse the compound + element (or series of explicit markup elements), and returns as soon + as a non-member element is encountered. For example, the `BulletList` + state machine aborts as soon as it encounters an element which is not + a list item of that bullet list. The optional omission of + inter-element blank lines is handled by the nested state machine. + + - The current line index is advanced to the end of the elements parsed, + and parsing continues with step 1. + + C. The result of the 'text' transition depends on the next line of text. + The current state is changed to `Text`, under which the second line is + examined. If the second line is: + + - Indented: The element is a definition list item, and parsing proceeds + similarly to step 2.B, using the `DefinitionList` state. + + - A line of uniform punctuation characters: The element is a section + header; again, parsing proceeds as in step 2.B, and `Body` is still + used. + + - Anything else: The element is a paragraph, which is examined for + inline markup and appended to the parent element. Processing continues + with step 1. +""" + +__docformat__ = 'reStructuredText' + + +import sys, re, string +from docutils import nodes, statemachine, utils, roman, urischemes +from docutils.statemachine import StateMachineWS, StateWS +from docutils.utils import normname +import directives, languages +from tableparser import TableParser, TableMarkupError + + +class MarkupError(Exception): pass +class ParserError(Exception): pass + + +class Stuff: + + """Stores a bunch of stuff for dotted-attribute access.""" + + def __init__(self, **keywordargs): + self.__dict__.update(keywordargs) + + +class RSTStateMachine(StateMachineWS): + + """ + reStructuredText's master StateMachine. + + The entry point to reStructuredText parsing is the `run()` method. + """ + + def run(self, inputlines, docroot, inputoffset=0, matchtitles=1): + """ + Parse `inputlines` and return a `docutils.nodes.document` instance. + + Extend `StateMachineWS.run()`: set up parse-global data, run the + StateMachine, and return the resulting + document. + """ + self.language = languages.getlanguage(docroot.languagecode) + self.matchtitles = matchtitles + self.memo = Stuff(document=docroot, + reporter=docroot.reporter, + language=self.language, + titlestyles=[], + sectionlevel=0) + self.node = docroot + results = StateMachineWS.run(self, inputlines, inputoffset) + assert results == [], 'RSTStateMachine.run() results should be empty.' + self.node = self.memo = None # remove unneeded references + + +class NestedStateMachine(StateMachineWS): + + """ + StateMachine run from within other StateMachine runs, to parse nested + document structures. + """ + + def run(self, inputlines, inputoffset, memo, node, matchtitles=1): + """ + Parse `inputlines` and populate a `docutils.nodes.document` instance. + + Extend `StateMachineWS.run()`: set up document-wide data. + """ + self.matchtitles = matchtitles + self.memo = memo + self.node = node + results = StateMachineWS.run(self, inputlines, inputoffset) + assert results == [], 'NestedStateMachine.run() results should be empty' + return results + + +class RSTState(StateWS): + + """ + reStructuredText State superclass. + + Contains methods used by all State subclasses. + """ + + nestedSM = NestedStateMachine + + def __init__(self, statemachine, debug=0): + self.nestedSMkwargs = {'stateclasses': stateclasses, + 'initialstate': 'Body'} + StateWS.__init__(self, statemachine, debug) + + def gotoline(self, abslineoffset): + """Jump to input line `abslineoffset`, ignoring jumps past the end.""" + try: + self.statemachine.gotoline(abslineoffset) + except IndexError: + pass + + def bof(self, context): + """Called at beginning of file.""" + return [], [] + + def nestedparse(self, block, inputoffset, node, matchtitles=0, + statemachineclass=None, statemachinekwargs=None): + """ + Create a new StateMachine rooted at `node` and run it over the input + `block`. + """ + if statemachineclass is None: + statemachineclass = self.nestedSM + if statemachinekwargs is None: + statemachinekwargs = self.nestedSMkwargs + statemachine = statemachineclass(debug=self.debug, **statemachinekwargs) + statemachine.run(block, inputoffset, memo=self.statemachine.memo, + node=node, matchtitles=matchtitles) + statemachine.unlink() + return statemachine.abslineoffset() + + def nestedlistparse(self, block, inputoffset, node, initialstate, + blankfinish, blankfinishstate=None, extrasettings={}, + matchtitles=0, statemachineclass=None, + statemachinekwargs=None): + """ + Create a new StateMachine rooted at `node` and run it over the input + `block`. Also keep track of optional intermdediate blank lines and the + required final one. + """ + if statemachineclass is None: + statemachineclass = self.nestedSM + if statemachinekwargs is None: + statemachinekwargs = self.nestedSMkwargs.copy() + statemachinekwargs['initialstate'] = initialstate + statemachine = statemachineclass(debug=self.debug, **statemachinekwargs) + if blankfinishstate is None: + blankfinishstate = initialstate + statemachine.states[blankfinishstate].blankfinish = blankfinish + for key, value in extrasettings.items(): + setattr(statemachine.states[initialstate], key, value) + statemachine.run(block, inputoffset, memo=self.statemachine.memo, + node=node, matchtitles=matchtitles) + blankfinish = statemachine.states[blankfinishstate].blankfinish + statemachine.unlink() + return statemachine.abslineoffset(), blankfinish + + def section(self, title, source, style, lineno): + """ + When a new section is reached that isn't a subsection of the current + section, back up the line count (use previousline(-x)), then raise + EOFError. The current StateMachine will finish, then the calling + StateMachine can re-examine the title. This will work its way back up + the calling chain until the correct section level isreached. + + Alternative: Evaluate the title, store the title info & level, and + back up the chain until that level is reached. Store in memo? Or + return in results? + """ + if self.checksubsection(source, style, lineno): + self.newsubsection(title, lineno) + + def checksubsection(self, source, style, lineno): + """ + Check for a valid subsection header. Return 1 (true) or None (false). + + :Exception: `EOFError` when a sibling or supersection encountered. + """ + memo = self.statemachine.memo + titlestyles = memo.titlestyles + mylevel = memo.sectionlevel + try: # check for existing title style + level = titlestyles.index(style) + 1 + except ValueError: # new title style + if len(titlestyles) == memo.sectionlevel: # new subsection + titlestyles.append(style) + return 1 + else: # not at lowest level + self.statemachine.node += self.titleinconsistent(source, lineno) + return None + if level <= mylevel: # sibling or supersection + memo.sectionlevel = level # bubble up to parent section + # back up 2 lines for underline title, 3 for overline title + self.statemachine.previousline(len(style) + 1) + raise EOFError # let parent section re-evaluate + if level == mylevel + 1: # immediate subsection + return 1 + else: # invalid subsection + self.statemachine.node += self.titleinconsistent(source, lineno) + return None + + def titleinconsistent(self, sourcetext, lineno): + literalblock = nodes.literal_block('', sourcetext) + error = self.statemachine.memo.reporter.severe( + 'Title level inconsistent at line %s:' % lineno, '', literalblock) + return error + + def newsubsection(self, title, lineno): + """Append new subsection to document tree. On return, check level.""" + memo = self.statemachine.memo + mylevel = memo.sectionlevel + memo.sectionlevel += 1 + sectionnode = nodes.section() + self.statemachine.node += sectionnode + textnodes, messages = self.inline_text(title, lineno) + titlenode = nodes.title(title, '', *textnodes) + name = normname(titlenode.astext()) + sectionnode['name'] = name + sectionnode += titlenode + sectionnode += messages + memo.document.note_implicit_target(sectionnode, sectionnode) + offset = self.statemachine.lineoffset + 1 + absoffset = self.statemachine.abslineoffset() + 1 + newabsoffset = self.nestedparse( + self.statemachine.inputlines[offset:], inputoffset=absoffset, + node=sectionnode, matchtitles=1) + self.gotoline(newabsoffset) + if memo.sectionlevel <= mylevel: # can't handle next section? + raise EOFError # bubble up to supersection + # reset sectionlevel; next pass will detect it properly + memo.sectionlevel = mylevel + + def paragraph(self, lines, lineno): + """ + Return a list (paragraph & messages) and a boolean: literal_block next? + """ + data = '\n'.join(lines).rstrip() + if data[-2:] == '::': + if len(data) == 2: + return [], 1 + elif data[-3] == ' ': + text = data[:-3].rstrip() + else: + text = data[:-1] + literalnext = 1 + else: + text = data + literalnext = 0 + textnodes, messages = self.inline_text(text, lineno) + p = nodes.paragraph(data, '', *textnodes) + return [p] + messages, literalnext + + inline = Stuff() + """Patterns and constants used for inline markup recognition.""" + + inline.openers = '\'"([{<' + inline.closers = '\'")]}>' + inline.start_string_prefix = (r'(?:(?<=^)|(?<=[ \n%s]))' + % re.escape(inline.openers)) + inline.end_string_suffix = (r'(?:(?=$)|(?=[- \n.,:;!?%s]))' + % re.escape(inline.closers)) + inline.non_whitespace_before = r'(? 0: + textnodes.append(nodes.Text(unescape( + remainder[:match.start(whole)]))) + if match.group(email): + addscheme = 'mailto:' + else: + addscheme = '' + text = match.group(whole) + unescaped = unescape(text, 0) + textnodes.append( + nodes.reference(unescape(text, 1), unescaped, + refuri=addscheme + unescaped)) + remainder = remainder[match.end(whole):] + start = 0 + else: # not a valid scheme + start = match.end(whole) + else: + if remainder: + textnodes.append(nodes.Text(unescape(remainder))) + break + return textnodes + + inline.dispatch = {'*': emphasis, + '**': strong, + '`': interpreted_or_phrase_ref, + '``': literal, + '_`': inline_target, + ']_': footnote_reference, + '|': substitution_reference, + '_': reference, + '__': anonymous_reference} + + def inline_text(self, text, lineno): + """ + Return 2 lists: nodes (text and inline elements), and system_messages. + + Using a `pattern` matching start-strings (for emphasis, strong, + interpreted, phrase reference, literal, substitution reference, and + inline target) or complete constructs (simple reference, footnote + reference) we search for a candidate. When one is found, we check for + validity (e.g., not a quoted '*' character). If valid, search for the + corresponding end string if applicable, and check for validity. If not + found or invalid, generate a warning and ignore the start-string. + Standalone hyperlinks are found last. + """ + pattern = self.inline.patterns.initial + dispatch = self.inline.dispatch + start = self.inline.groups.initial.start - 1 + backquote = self.inline.groups.initial.backquote - 1 + refend = self.inline.groups.initial.refend - 1 + fnend = self.inline.groups.initial.fnend - 1 + remaining = escape2null(text) + processed = [] + unprocessed = [] + messages = [] + while remaining: + match = pattern.search(remaining) + if match: + groups = match.groups() + before, inlines, remaining, sysmessages = \ + dispatch[groups[start] or groups[backquote] + or groups[refend] + or groups[fnend]](self, match, lineno) + unprocessed.append(before) + messages += sysmessages + if inlines: + processed += self.standalone_uri(''.join(unprocessed), + lineno) + processed += inlines + unprocessed = [] + else: + break + remaining = ''.join(unprocessed) + remaining + if remaining: + processed += self.standalone_uri(remaining, lineno) + return processed, messages + + def unindentwarning(self): + return self.statemachine.memo.reporter.warning( + ('Unindent without blank line at line %s.' + % (self.statemachine.abslineno() + 1))) + + +class Body(RSTState): + + """ + Generic classifier of the first line of a block. + """ + + enum = Stuff() + """Enumerated list parsing information.""" + + enum.formatinfo = { + 'parens': Stuff(prefix='(', suffix=')', start=1, end=-1), + 'rparen': Stuff(prefix='', suffix=')', start=0, end=-1), + 'period': Stuff(prefix='', suffix='.', start=0, end=-1)} + enum.formats = enum.formatinfo.keys() + enum.sequences = ['arabic', 'loweralpha', 'upperalpha', + 'lowerroman', 'upperroman'] # ORDERED! + enum.sequencepats = {'arabic': '[0-9]+', + 'loweralpha': '[a-z]', + 'upperalpha': '[A-Z]', + 'lowerroman': '[ivxlcdm]+', + 'upperroman': '[IVXLCDM]+',} + enum.converters = {'arabic': int, + 'loweralpha': + lambda s, zero=(ord('a')-1): ord(s) - zero, + 'upperalpha': + lambda s, zero=(ord('A')-1): ord(s) - zero, + 'lowerroman': + lambda s: roman.fromRoman(s.upper()), + 'upperroman': roman.fromRoman} + + enum.sequenceregexps = {} + for sequence in enum.sequences: + enum.sequenceregexps[sequence] = re.compile(enum.sequencepats[sequence] + + '$') + + tabletoppat = re.compile(r'\+-[-+]+-\+ *$') + """Matches the top (& bottom) of a table).""" + + tableparser = TableParser() + + pats = {} + """Fragments of patterns used by transitions.""" + + pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]' + pats['alpha'] = '[a-zA-Z]' + pats['alphanum'] = '[a-zA-Z0-9]' + pats['alphanumplus'] = '[a-zA-Z0-9_-]' + pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s' + '|%(upperroman)s)' % enum.sequencepats) + pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats + pats['optarg'] = '%(alpha)s%(alphanumplus)s*' % pats + pats['option'] = r'(--?|\+|/)%(optname)s([ =]%(optarg)s)?' % pats + + for format in enum.formats: + pats[format] = '(?P<%s>%s%s%s)' % ( + format, re.escape(enum.formatinfo[format].prefix), + pats['enum'], re.escape(enum.formatinfo[format].suffix)) + + patterns = {'bullet': r'[-+*]( +|$)', + 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' + % pats, + 'field_marker': r':[^: ]([^:]*[^: ])?:( +|$)', + 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats, + 'doctest': r'>>>( +|$)', + 'tabletop': tabletoppat, + 'explicit_markup': r'\.\.( +|$)', + 'anonymous': r'__( +|$)', + 'line': r'(%(nonalphanum7bit)s)\1\1\1+ *$' % pats, + #'rfc822': r'[!-9;-~]+:( +|$)', + 'text': r''} + initialtransitions = ['bullet', + 'enumerator', + 'field_marker', + 'option_marker', + 'doctest', + 'tabletop', + 'explicit_markup', + 'anonymous', + 'line', + 'text'] + + def indent(self, match, context, nextstate): + """Block quote.""" + indented, indent, lineoffset, blankfinish = \ + self.statemachine.getindented() + blockquote = self.block_quote(indented, lineoffset) + self.statemachine.node += blockquote + if not blankfinish: + self.statemachine.node += self.unindentwarning() + return context, nextstate, [] + + def block_quote(self, indented, lineoffset): + blockquote = nodes.block_quote() + self.nestedparse(indented, lineoffset, blockquote) + return blockquote + + def bullet(self, match, context, nextstate): + """Bullet list item.""" + bulletlist = nodes.bullet_list() + self.statemachine.node += bulletlist + bulletlist['bullet'] = match.string[0] + i, blankfinish = self.list_item(match.end()) + bulletlist += i + offset = self.statemachine.lineoffset + 1 # next line + newlineoffset, blankfinish = self.nestedlistparse( + self.statemachine.inputlines[offset:], + inputoffset=self.statemachine.abslineoffset() + 1, + node=bulletlist, initialstate='BulletList', + blankfinish=blankfinish) + if not blankfinish: + self.statemachine.node += self.unindentwarning() + self.gotoline(newlineoffset) + return [], nextstate, [] + + def list_item(self, indent): + indented, lineoffset, blankfinish = \ + self.statemachine.getknownindented(indent) + listitem = nodes.list_item('\n'.join(indented)) + if indented: + self.nestedparse(indented, inputoffset=lineoffset, node=listitem) + return listitem, blankfinish + + def enumerator(self, match, context, nextstate): + """Enumerated List Item""" + format, sequence, text, ordinal = self.parse_enumerator(match) + if ordinal is None: + msg = self.statemachine.memo.reporter.error( + ('Enumerated list start value invalid at line %s: ' + '%r (sequence %r)' % (self.statemachine.abslineno(), + text, sequence))) + self.statemachine.node += msg + indented, lineoffset, blankfinish = \ + self.statemachine.getknownindented(match.end()) + bq = self.block_quote(indented, lineoffset) + self.statemachine.node += bq + if not blankfinish: + self.statemachine.node += self.unindentwarning() + return [], nextstate, [] + if ordinal != 1: + msg = self.statemachine.memo.reporter.info( + ('Enumerated list start value not ordinal-1 at line %s: ' + '%r (ordinal %s)' % (self.statemachine.abslineno(), + text, ordinal))) + self.statemachine.node += msg + enumlist = nodes.enumerated_list() + self.statemachine.node += enumlist + enumlist['enumtype'] = sequence + if ordinal != 1: + enumlist['start'] = ordinal + enumlist['prefix'] = self.enum.formatinfo[format].prefix + enumlist['suffix'] = self.enum.formatinfo[format].suffix + listitem, blankfinish = self.list_item(match.end()) + enumlist += listitem + offset = self.statemachine.lineoffset + 1 # next line + newlineoffset, blankfinish = self.nestedlistparse( + self.statemachine.inputlines[offset:], + inputoffset=self.statemachine.abslineoffset() + 1, + node=enumlist, initialstate='EnumeratedList', + blankfinish=blankfinish, + extrasettings={'lastordinal': ordinal, 'format': format}) + if not blankfinish: + self.statemachine.node += self.unindentwarning() + self.gotoline(newlineoffset) + return [], nextstate, [] + + def parse_enumerator(self, match, expectedsequence=None): + """ + Analyze an enumerator and return the results. + + :Return: + - the enumerator format ('period', 'parens', or 'rparen'), + - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.), + - the text of the enumerator, stripped of formatting, and + - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.; + ``None`` is returned for invalid enumerator text). + + The enumerator format has already been determined by the regular + expression match. If `expectedsequence` is given, that sequence is + tried first. If not, we check for Roman numeral 1. This way, + single-character Roman numerals (which are also alphabetical) can be + matched. If no sequence has been matched, all sequences are checked in + order. + """ + groupdict = match.groupdict() + sequence = '' + for format in self.enum.formats: + if groupdict[format]: # was this the format matched? + break # yes; keep `format` + else: # shouldn't happen + raise ParserError, 'enumerator format not matched' + text = groupdict[format][self.enum.formatinfo[format].start + :self.enum.formatinfo[format].end] + if expectedsequence: + try: + if self.enum.sequenceregexps[expectedsequence].match(text): + sequence = expectedsequence + except KeyError: # shouldn't happen + raise ParserError, 'unknown sequence: %s' % sequence + else: + if text == 'i': + sequence = 'lowerroman' + elif text == 'I': + sequence = 'upperroman' + if not sequence: + for sequence in self.enum.sequences: + if self.enum.sequenceregexps[sequence].match(text): + break + else: # shouldn't happen + raise ParserError, 'enumerator sequence not matched' + try: + ordinal = self.enum.converters[sequence](text) + except roman.InvalidRomanNumeralError: + ordinal = None + return format, sequence, text, ordinal + + def field_marker(self, match, context, nextstate): + """Field list item.""" + fieldlist = nodes.field_list() + self.statemachine.node += fieldlist + field, blankfinish = self.field(match) + fieldlist += field + offset = self.statemachine.lineoffset + 1 # next line + newlineoffset, blankfinish = self.nestedlistparse( + self.statemachine.inputlines[offset:], + inputoffset=self.statemachine.abslineoffset() + 1, + node=fieldlist, initialstate='FieldList', + blankfinish=blankfinish) + if not blankfinish: + self.statemachine.node += self.unindentwarning() + self.gotoline(newlineoffset) + return [], nextstate, [] + + def field(self, match): + name, args = self.parse_field_marker(match) + indented, indent, lineoffset, blankfinish = \ + self.statemachine.getfirstknownindented(match.end()) + fieldnode = nodes.field() + fieldnode += nodes.field_name(name, name) + for arg in args: + fieldnode += nodes.field_argument(arg, arg) + fieldbody = nodes.field_body('\n'.join(indented)) + fieldnode += fieldbody + if indented: + self.nestedparse(indented, inputoffset=lineoffset, node=fieldbody) + return fieldnode, blankfinish + + def parse_field_marker(self, match): + """Extract & return name & argument list from a field marker match.""" + field = match.string[1:] # strip off leading ':' + field = field[:field.find(':')] # strip off trailing ':' etc. + tokens = field.split() + return tokens[0], tokens[1:] # first == name, others == args + + def option_marker(self, match, context, nextstate): + """Option list item.""" + optionlist = nodes.option_list() + try: + listitem, blankfinish = self.option_list_item(match) + except MarkupError, detail: # shouldn't happen; won't match pattern + msg = self.statemachine.memo.reporter.error( + ('Invalid option list marker at line %s: %s' + % (self.statemachine.abslineno(), detail))) + self.statemachine.node += msg + indented, indent, lineoffset, blankfinish = \ + self.statemachine.getfirstknownindented(match.end()) + blockquote = self.block_quote(indented, lineoffset) + self.statemachine.node += blockquote + if not blankfinish: + self.statemachine.node += self.unindentwarning() + return [], nextstate, [] + self.statemachine.node += optionlist + optionlist += listitem + offset = self.statemachine.lineoffset + 1 # next line + newlineoffset, blankfinish = self.nestedlistparse( + self.statemachine.inputlines[offset:], + inputoffset=self.statemachine.abslineoffset() + 1, + node=optionlist, initialstate='OptionList', + blankfinish=blankfinish) + if not blankfinish: + self.statemachine.node += self.unindentwarning() + self.gotoline(newlineoffset) + return [], nextstate, [] + + def option_list_item(self, match): + options = self.parse_option_marker(match) + indented, indent, lineoffset, blankfinish = \ + self.statemachine.getfirstknownindented(match.end()) + if not indented: # not an option list item + raise statemachine.TransitionCorrection('text') + option_group = nodes.option_group('', *options) + description = nodes.description('\n'.join(indented)) + option_list_item = nodes.option_list_item('', option_group, description) + if indented: + self.nestedparse(indented, inputoffset=lineoffset, node=description) + return option_list_item, blankfinish + + def parse_option_marker(self, match): + """ + Return a list of `node.option` and `node.option_argument` objects, + parsed from an option marker match. + + :Exception: `MarkupError` for invalid option markers. + """ + optlist = [] + optionstrings = match.group().rstrip().split(', ') + for optionstring in optionstrings: + tokens = optionstring.split() + delimiter = ' ' + firstopt = tokens[0].split('=') + if len(firstopt) > 1: + tokens[:1] = firstopt + delimiter = '=' + if 0 < len(tokens) <= 2: + option = nodes.option(optionstring) + option += nodes.option_string(tokens[0], tokens[0]) + if len(tokens) > 1: + option += nodes.option_argument(tokens[1], tokens[1], + delimiter=delimiter) + optlist.append(option) + else: + raise MarkupError('wrong numer of option tokens (=%s), ' + 'should be 1 or 2: %r' % (len(tokens), + optionstring)) + return optlist + + def doctest(self, match, context, nextstate): + data = '\n'.join(self.statemachine.gettextblock()) + self.statemachine.node += nodes.doctest_block(data, data) + return [], nextstate, [] + + def tabletop(self, match, context, nextstate): + """Top border of a table.""" + nodelist, blankfinish = self.table() + self.statemachine.node += nodelist + if not blankfinish: + msg = self.statemachine.memo.reporter.warning( + 'Blank line required after table at line %s.' + % (self.statemachine.abslineno() + 1)) + self.statemachine.node += msg + return [], nextstate, [] + + def table(self): + """Parse a table.""" + block, messages, blankfinish = self.isolatetable() + if block: + try: + tabledata = self.tableparser.parse(block) + tableline = self.statemachine.abslineno() - len(block) + 1 + table = self.buildtable(tabledata, tableline) + nodelist = [table] + messages + except TableMarkupError, detail: + nodelist = self.malformedtable(block, str(detail)) + messages + else: + nodelist = messages + return nodelist, blankfinish + + def isolatetable(self): + messages = [] + blankfinish = 1 + try: + block = self.statemachine.getunindented() + except statemachine.UnexpectedIndentationError, instance: + block, lineno = instance.args + messages.append(self.statemachine.memo.reporter.error( + 'Unexpected indentation at line %s.' % lineno)) + blankfinish = 0 + width = len(block[0].strip()) + for i in range(len(block)): + block[i] = block[i].strip() + if block[i][0] not in '+|': # check left edge + blankfinish = 0 + self.statemachine.previousline(len(block) - i) + del block[i:] + break + if not self.tabletoppat.match(block[-1]): # find bottom + blankfinish = 0 + # from second-last to third line of table: + for i in range(len(block) - 2, 1, -1): + if self.tabletoppat.match(block[i]): + self.statemachine.previousline(len(block) - i + 1) + del block[i+1:] + break + else: + messages.extend(self.malformedtable(block)) + return [], messages, blankfinish + for i in range(len(block)): # check right edge + if len(block[i]) != width or block[i][-1] not in '+|': + messages.extend(self.malformedtable(block)) + return [], messages, blankfinish + return block, messages, blankfinish + + def malformedtable(self, block, detail=''): + data = '\n'.join(block) + message = 'Malformed table at line %s; formatting as a ' \ + 'literal block.' % (self.statemachine.abslineno() + - len(block) + 1) + if detail: + message += '\n' + detail + nodelist = [self.statemachine.memo.reporter.error(message), + nodes.literal_block(data, data)] + return nodelist + + def buildtable(self, tabledata, tableline): + colspecs, headrows, bodyrows = tabledata + table = nodes.table() + tgroup = nodes.tgroup(cols=len(colspecs)) + table += tgroup + for colspec in colspecs: + tgroup += nodes.colspec(colwidth=colspec) + if headrows: + thead = nodes.thead() + tgroup += thead + for row in headrows: + thead += self.buildtablerow(row, tableline) + tbody = nodes.tbody() + tgroup += tbody + for row in bodyrows: + tbody += self.buildtablerow(row, tableline) + return table + + def buildtablerow(self, rowdata, tableline): + row = nodes.row() + for cell in rowdata: + if cell is None: + continue + morerows, morecols, offset, cellblock = cell + attributes = {} + if morerows: + attributes['morerows'] = morerows + if morecols: + attributes['morecols'] = morecols + entry = nodes.entry(**attributes) + row += entry + if ''.join(cellblock): + self.nestedparse(cellblock, inputoffset=tableline+offset, + node=entry) + return row + + + explicit = Stuff() + """Patterns and constants used for explicit markup recognition.""" + + explicit.patterns = Stuff( + target=re.compile(r""" + (?: + _ # anonymous target + | # *OR* + (`?) # optional open quote + (?![ `]) # first char. not space or backquote + ( # reference name + .+? + ) + %s # not whitespace or escape + \1 # close quote if open quote used + ) + %s # not whitespace or escape + : # end of reference name + (?:[ ]+|$) # followed by whitespace + """ + % (RSTState.inline.non_whitespace_escape_before, + RSTState.inline.non_whitespace_escape_before), + re.VERBOSE), + reference=re.compile(r""" + (?: + (%s)_ # simple reference name + | # *OR* + ` # open backquote + (?![ ]) # not space + (.+?) # hyperlink phrase + %s # not whitespace or escape + `_ # close backquote & reference mark + ) + $ # end of string + """ % + (RSTState.inline.simplename, + RSTState.inline.non_whitespace_escape_before,), + re.VERBOSE), + substitution=re.compile(r""" + (?: + (?![ ]) # first char. not space + (.+?) # substitution text + %s # not whitespace or escape + \| # close delimiter + ) + (?:[ ]+|$) # followed by whitespace + """ % + RSTState.inline.non_whitespace_escape_before, + re.VERBOSE),) + explicit.groups = Stuff( + target=Stuff(quote=1, name=2), + reference=Stuff(simple=1, phrase=2), + substitution=Stuff(name=1)) + + def footnote(self, match): + indented, indent, offset, blankfinish = \ + self.statemachine.getfirstknownindented(match.end()) + label = match.group(1) + name = normname(label) + footnote = nodes.footnote('\n'.join(indented)) + if name[0] == '#': # auto-numbered + name = name[1:] # autonumber label + footnote['auto'] = 1 + if name: + footnote['name'] = name + self.statemachine.memo.document.note_autofootnote(footnote) + elif name == '*': # auto-symbol + name = '' + footnote['auto'] = '*' + self.statemachine.memo.document.note_symbol_footnote(footnote) + else: # manually numbered + footnote += nodes.label('', label) + footnote['name'] = name + self.statemachine.memo.document.note_footnote(footnote) + if name: + self.statemachine.memo.document.note_explicit_target(footnote, + footnote) + if indented: + self.nestedparse(indented, inputoffset=offset, node=footnote) + return [footnote], blankfinish + + def citation(self, match): + indented, indent, offset, blankfinish = \ + self.statemachine.getfirstknownindented(match.end()) + label = match.group(1) + name = normname(label) + citation = nodes.citation('\n'.join(indented)) + citation += nodes.label('', label) + citation['name'] = name + self.statemachine.memo.document.note_citation(citation) + self.statemachine.memo.document.note_explicit_target(citation, citation) + if indented: + self.nestedparse(indented, inputoffset=offset, node=citation) + return [citation], blankfinish + + def hyperlink_target(self, match): + pattern = self.explicit.patterns.target + namegroup = self.explicit.groups.target.name + lineno = self.statemachine.abslineno() + block, indent, offset, blankfinish = \ + self.statemachine.getfirstknownindented(match.end(), uptoblank=1, + stripindent=0) + blocktext = match.string[:match.end()] + '\n'.join(block) + block = [escape2null(line) for line in block] + escaped = block[0] + blockindex = 0 + while 1: + targetmatch = pattern.match(escaped) + if targetmatch: + break + blockindex += 1 + try: + escaped += block[blockindex] + except (IndexError, MarkupError): + raise MarkupError('malformed hyperlink target at line %s.' + % lineno) + del block[:blockindex] + block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip() + if block and block[-1].strip()[-1:] == '_': # possible indirect target + reference = ' '.join([line.strip() for line in block]) + refname = self.isreference(reference) + if refname: + target = nodes.target(blocktext, '', refname=refname) + self.addtarget(targetmatch.group(namegroup), '', target) + self.statemachine.memo.document.note_indirect_target(target) + return [target], blankfinish + nodelist = [] + reference = ''.join([line.strip() for line in block]) + if reference.find(' ') != -1: + warning = self.statemachine.memo.reporter.warning( + 'Hyperlink target at line %s contains whitespace. ' + 'Perhaps a footnote was intended?' + % (self.statemachine.abslineno() - len(block) + 1), '', + nodes.literal_block(blocktext, blocktext)) + nodelist.append(warning) + else: + unescaped = unescape(reference) + target = nodes.target(blocktext, '') + self.addtarget(targetmatch.group(namegroup), unescaped, target) + nodelist.append(target) + return nodelist, blankfinish + + def isreference(self, reference): + match = self.explicit.patterns.reference.match(normname(reference)) + if not match: + return None + return unescape(match.group(self.explicit.groups.reference.simple) + or match.group(self.explicit.groups.reference.phrase)) + + def addtarget(self, targetname, refuri, target): + if targetname: + name = normname(unescape(targetname)) + target['name'] = name + if refuri: + target['refuri'] = refuri + self.statemachine.memo.document.note_external_target(target) + else: + self.statemachine.memo.document.note_internal_target(target) + self.statemachine.memo.document.note_explicit_target( + target, self.statemachine.node) + else: # anonymous target + if refuri: + target['refuri'] = refuri + target['anonymous'] = 1 + self.statemachine.memo.document.note_anonymous_target(target) + + def substitutiondef(self, match): + pattern = self.explicit.patterns.substitution + lineno = self.statemachine.abslineno() + block, indent, offset, blankfinish = \ + self.statemachine.getfirstknownindented(match.end(), + stripindent=0) + blocktext = (match.string[:match.end()] + '\n'.join(block)) + block = [escape2null(line) for line in block] + escaped = block[0].rstrip() + blockindex = 0 + while 1: + subdefmatch = pattern.match(escaped) + if subdefmatch: + break + blockindex += 1 + try: + escaped = escaped + ' ' + block[blockindex].strip() + except (IndexError, MarkupError): + raise MarkupError('malformed substitution definition ' + 'at line %s.' % lineno) + del block[:blockindex] # strip out the substitution marker + block[0] = (block[0] + ' ')[subdefmatch.end()-len(escaped)-1:].strip() + if not block[0]: + del block[0] + offset += 1 + subname = subdefmatch.group(self.explicit.groups.substitution.name) + name = normname(subname) + substitutionnode = nodes.substitution_definition( + blocktext, name=name, alt=subname) + if block: + block[0] = block[0].strip() + newabsoffset, blankfinish = self.nestedlistparse( + block, inputoffset=offset, node=substitutionnode, + initialstate='SubstitutionDef', blankfinish=blankfinish) + self.statemachine.previousline( + len(block) + offset - newabsoffset - 1) + i = 0 + for node in substitutionnode[:]: + if not (isinstance(node, nodes.Inline) or + isinstance(node, nodes.Text)): + self.statemachine.node += substitutionnode[i] + del substitutionnode[i] + else: + i += 1 + if len(substitutionnode) == 0: + msg = self.statemachine.memo.reporter.warning( + 'Substitution definition "%s" empty or invalid at line ' + '%s.' % (subname, self.statemachine.abslineno()), '', + nodes.literal_block(blocktext, blocktext)) + self.statemachine.node += msg + else: + del substitutionnode['alt'] + self.statemachine.memo.document.note_substitution_def( + substitutionnode, self.statemachine.node) + return [substitutionnode], blankfinish + else: + msg = self.statemachine.memo.reporter.warning( + 'Substitution definition "%s" missing contents at line %s.' + % (subname, self.statemachine.abslineno()), '', + nodes.literal_block(blocktext, blocktext)) + self.statemachine.node += msg + return [], blankfinish + + def directive(self, match, **attributes): + typename = match.group(1) + directivefunction = directives.directive( + typename, self.statemachine.memo.language) + data = match.string[match.end():].strip() + if directivefunction: + return directivefunction(match, typename, data, self, + self.statemachine, attributes) + else: + return self.unknowndirective(typename, data) + + def unknowndirective(self, typename, data): + lineno = self.statemachine.abslineno() + indented, indent, offset, blankfinish = \ + self.statemachine.getfirstknownindented(0, stripindent=0) + text = '\n'.join(indented) + error = self.statemachine.memo.reporter.error( + 'Unknown directive type "%s" at line %s.' % (typename, lineno), + '', nodes.literal_block(text, text)) + return [error], blankfinish + + def parse_extension_attributes(self, attribute_spec, datalines, blankfinish): + """ + Parse `datalines` for a field list containing extension attributes + matching `attribute_spec`. + + :Parameters: + - `attribute_spec`: a mapping of attribute name to conversion + function, which should raise an exception on bad input. + - `datalines`: a list of input strings. + - `blankfinish`: + + :Return: + - Success value, 1 or 0. + - An attribute dictionary on success, an error string on failure. + - Updated `blankfinish` flag. + """ + node = nodes.field_list() + newlineoffset, blankfinish = self.nestedlistparse( + datalines, 0, node, initialstate='FieldList', + blankfinish=blankfinish) + if newlineoffset != len(datalines): # incomplete parse of block + return 0, 'invalid attribute block', blankfinish + try: + attributes = utils.extract_extension_attributes(node, attribute_spec) + except KeyError, detail: + return 0, ('unknown attribute: "%s"' % detail), blankfinish + except (ValueError, TypeError), detail: + return 0, ('invalid attribute value:\n%s' % detail), blankfinish + except utils.ExtensionAttributeError, detail: + return 0, ('invalid attribute data: %s' % detail), blankfinish + return 1, attributes, blankfinish + + def comment(self, match): + if not match.string[match.end():].strip() \ + and self.statemachine.nextlineblank(): # an empty comment? + return [nodes.comment()], 1 # "A tiny but practical wart." + indented, indent, offset, blankfinish = \ + self.statemachine.getfirstknownindented(match.end()) + text = '\n'.join(indented) + return [nodes.comment(text, text)], blankfinish + + explicit.constructs = [ + (footnote, + re.compile(r""" + \.\.[ ]+ # explicit markup start + \[ + ( # footnote label: + [0-9]+ # manually numbered footnote + | # *OR* + \# # anonymous auto-numbered footnote + | # *OR* + \#%s # auto-number ed?) footnote label + | # *OR* + \* # auto-symbol footnote + ) + \] + (?:[ ]+|$) # whitespace or end of line + """ % RSTState.inline.simplename, re.VERBOSE)), + (citation, + re.compile(r""" + \.\.[ ]+ # explicit markup start + \[(%s)\] # citation label + (?:[ ]+|$) # whitespace or end of line + """ % RSTState.inline.simplename, re.VERBOSE)), + (hyperlink_target, + re.compile(r""" + \.\.[ ]+ # explicit markup start + _ # target indicator + (?![ ]) # first char. not space + """, re.VERBOSE)), + (substitutiondef, + re.compile(r""" + \.\.[ ]+ # explicit markup start + \| # substitution indicator + (?![ ]) # first char. not space + """, re.VERBOSE)), + (directive, + re.compile(r""" + \.\.[ ]+ # explicit markup start + (%s) # directive name + :: # directive delimiter + (?:[ ]+|$) # whitespace or end of line + """ % RSTState.inline.simplename, re.VERBOSE))] + + def explicit_markup(self, match, context, nextstate): + """Footnotes, hyperlink targets, directives, comments.""" + nodelist, blankfinish = self.explicit_construct(match) + self.statemachine.node += nodelist + self.explicitlist(blankfinish) + return [], nextstate, [] + + def explicit_construct(self, match): + """Determine which explicit construct this is, parse & return it.""" + errors = [] + for method, pattern in self.explicit.constructs: + expmatch = pattern.match(match.string) + if expmatch: + try: + return method(self, expmatch) + except MarkupError, detail: # never reached? + errors.append( + self.statemachine.memo.reporter.warning('%s: %s' + % (detail.__class__.__name__, detail))) + break + nodelist, blankfinish = self.comment(match) + return nodelist + errors, blankfinish + + def explicitlist(self, blankfinish): + """ + Create a nested state machine for a series of explicit markup constructs + (including anonymous hyperlink targets). + """ + offset = self.statemachine.lineoffset + 1 # next line + newlineoffset, blankfinish = self.nestedlistparse( + self.statemachine.inputlines[offset:], + inputoffset=self.statemachine.abslineoffset() + 1, + node=self.statemachine.node, initialstate='Explicit', + blankfinish=blankfinish) + self.gotoline(newlineoffset) + if not blankfinish: + self.statemachine.node += self.unindentwarning() + + def anonymous(self, match, context, nextstate): + """Anonymous hyperlink targets.""" + nodelist, blankfinish = self.anonymous_target(match) + self.statemachine.node += nodelist + self.explicitlist(blankfinish) + return [], nextstate, [] + + def anonymous_target(self, match): + block, indent, offset, blankfinish \ + = self.statemachine.getfirstknownindented(match.end(), + uptoblank=1) + blocktext = match.string[:match.end()] + '\n'.join(block) + if block and block[-1].strip()[-1:] == '_': # possible indirect target + reference = escape2null(' '.join([line.strip() for line in block])) + refname = self.isreference(reference) + if refname: + target = nodes.target(blocktext, '', refname=refname, + anonymous=1) + self.statemachine.memo.document.note_anonymous_target(target) + self.statemachine.memo.document.note_indirect_target(target) + return [target], blankfinish + nodelist = [] + reference = escape2null(''.join([line.strip() for line in block])) + if reference.find(' ') != -1: + warning = self.statemachine.memo.reporter.warning( + 'Anonymous hyperlink target at line %s contains whitespace. ' + 'Perhaps a footnote was intended?' + % (self.statemachine.abslineno() - len(block) + 1), '', + nodes.literal_block(blocktext, blocktext)) + nodelist.append(warning) + else: + target = nodes.target(blocktext, '', anonymous=1) + if reference: + unescaped = unescape(reference) + target['refuri'] = unescaped + self.statemachine.memo.document.note_anonymous_target(target) + nodelist.append(target) + return nodelist, blankfinish + + def line(self, match, context, nextstate): + """Section title overline or transition marker.""" + if self.statemachine.matchtitles: + return [match.string], 'Line', [] + else: + blocktext = self.statemachine.line + msg = self.statemachine.memo.reporter.severe( + 'Unexpected section title or transition at line %s.' + % self.statemachine.abslineno(), '', + nodes.literal_block(blocktext, blocktext)) + self.statemachine.node += msg + return [], nextstate, [] + + def text(self, match, context, nextstate): + """Titles, definition lists, paragraphs.""" + return [match.string], 'Text', [] + + +class SpecializedBody(Body): + + """ + Superclass for second and subsequent compound element members. + + All transition methods are disabled. Override individual methods in + subclasses to re-enable. + """ + + def invalid_input(self, match=None, context=None, nextstate=None): + """Not a compound element member. Abort this state machine.""" + self.statemachine.previousline() # back up so parent SM can reassess + raise EOFError + + indent = invalid_input + bullet = invalid_input + enumerator = invalid_input + field_marker = invalid_input + option_marker = invalid_input + doctest = invalid_input + tabletop = invalid_input + explicit_markup = invalid_input + anonymous = invalid_input + line = invalid_input + text = invalid_input + + +class BulletList(SpecializedBody): + + """Second and subsequent bullet_list list_items.""" + + def bullet(self, match, context, nextstate): + """Bullet list item.""" + if match.string[0] != self.statemachine.node['bullet']: + # different bullet: new list + self.invalid_input() + listitem, blankfinish = self.list_item(match.end()) + self.statemachine.node += listitem + self.blankfinish = blankfinish + return [], 'BulletList', [] + + +class DefinitionList(SpecializedBody): + + """Second and subsequent definition_list_items.""" + + def text(self, match, context, nextstate): + """Definition lists.""" + return [match.string], 'Definition', [] + + +class EnumeratedList(SpecializedBody): + + """Second and subsequent enumerated_list list_items.""" + + def enumerator(self, match, context, nextstate): + """Enumerated list item.""" + format, sequence, text, ordinal = self.parse_enumerator( + match, self.statemachine.node['enumtype']) + if (sequence != self.statemachine.node['enumtype'] or + format != self.format or + ordinal != self.lastordinal + 1): + # different enumeration: new list + self.invalid_input() + listitem, blankfinish = self.list_item(match.end()) + self.statemachine.node += listitem + self.blankfinish = blankfinish + self.lastordinal = ordinal + return [], 'EnumeratedList', [] + + +class FieldList(SpecializedBody): + + """Second and subsequent field_list fields.""" + + def field_marker(self, match, context, nextstate): + """Field list field.""" + field, blankfinish = self.field(match) + self.statemachine.node += field + self.blankfinish = blankfinish + return [], 'FieldList', [] + + +class OptionList(SpecializedBody): + + """Second and subsequent option_list option_list_items.""" + + def option_marker(self, match, context, nextstate): + """Option list item.""" + try: + option_list_item, blankfinish = self.option_list_item(match) + except MarkupError, detail: + self.invalid_input() + self.statemachine.node += option_list_item + self.blankfinish = blankfinish + return [], 'OptionList', [] + + +class RFC822List(SpecializedBody): + + """Second and subsequent RFC822 field_list fields.""" + + pass + + +class Explicit(SpecializedBody): + + """Second and subsequent explicit markup construct.""" + + def explicit_markup(self, match, context, nextstate): + """Footnotes, hyperlink targets, directives, comments.""" + nodelist, blankfinish = self.explicit_construct(match) + self.statemachine.node += nodelist + self.blankfinish = blankfinish + return [], nextstate, [] + + def anonymous(self, match, context, nextstate): + """Anonymous hyperlink targets.""" + nodelist, blankfinish = self.anonymous_target(match) + self.statemachine.node += nodelist + self.blankfinish = blankfinish + return [], nextstate, [] + + +class SubstitutionDef(Body): + + """ + Parser for the contents of a substitution_definition element. + """ + + patterns = { + 'embedded_directive': r'(%s)::( +|$)' % RSTState.inline.simplename, + 'text': r''} + initialtransitions = ['embedded_directive', 'text'] + + def embedded_directive(self, match, context, nextstate): + if self.statemachine.node.has_key('alt'): + attributes = {'alt': self.statemachine.node['alt']} + else: + attributes = {} + nodelist, blankfinish = self.directive(match, **attributes) + self.statemachine.node += nodelist + if not self.statemachine.ateof(): + self.blankfinish = blankfinish + raise EOFError + + def text(self, match, context, nextstate): + if not self.statemachine.ateof(): + self.blankfinish = self.statemachine.nextlineblank() + raise EOFError + + +class Text(RSTState): + + """ + Classifier of second line of a text block. + + Could be a paragraph, a definition list item, or a title. + """ + + patterns = {'underline': Body.patterns['line'], + 'text': r''} + initialtransitions = [('underline', 'Body'), ('text', 'Body')] + + def blank(self, match, context, nextstate): + """End of paragraph.""" + paragraph, literalnext = self.paragraph( + context, self.statemachine.abslineno() - 1) + self.statemachine.node += paragraph + if literalnext: + self.statemachine.node += self.literal_block() + return [], 'Body', [] + + def eof(self, context): + if context: + paragraph, literalnext = self.paragraph( + context, self.statemachine.abslineno() - 1) + self.statemachine.node += paragraph + if literalnext: + self.statemachine.node += self.literal_block() + return [] + + def indent(self, match, context, nextstate): + """Definition list item.""" + definitionlist = nodes.definition_list() + definitionlistitem, blankfinish = self.definition_list_item(context) + definitionlist += definitionlistitem + self.statemachine.node += definitionlist + offset = self.statemachine.lineoffset + 1 # next line + newlineoffset, blankfinish = self.nestedlistparse( + self.statemachine.inputlines[offset:], + inputoffset=self.statemachine.abslineoffset() + 1, + node=definitionlist, initialstate='DefinitionList', + blankfinish=blankfinish, blankfinishstate='Definition') + if not blankfinish: + self.statemachine.node += self.unindentwarning() + self.gotoline(newlineoffset) + return [], 'Body', [] + + def underline(self, match, context, nextstate): + """Section title.""" + lineno = self.statemachine.abslineno() + if not self.statemachine.matchtitles: + blocktext = context[0] + '\n' + self.statemachine.line + msg = self.statemachine.memo.reporter.severe( + 'Unexpected section title at line %s.' % lineno, '', + nodes.literal_block(blocktext, blocktext)) + self.statemachine.node += msg + return [], nextstate, [] + title = context[0].rstrip() + underline = match.string.rstrip() + source = title + '\n' + underline + if len(title) > len(underline): + blocktext = context[0] + '\n' + self.statemachine.line + msg = self.statemachine.memo.reporter.info( + 'Title underline too short at line %s.' % lineno, '', + nodes.literal_block(blocktext, blocktext)) + self.statemachine.node += msg + style = underline[0] + context[:] = [] + self.section(title, source, style, lineno - 1) + return [], nextstate, [] + + def text(self, match, context, nextstate): + """Paragraph.""" + startline = self.statemachine.abslineno() - 1 + msg = None + try: + block = self.statemachine.getunindented() + except statemachine.UnexpectedIndentationError, instance: + block, lineno = instance.args + msg = self.statemachine.memo.reporter.error( + 'Unexpected indentation at line %s.' % lineno) + lines = context + block + paragraph, literalnext = self.paragraph(lines, startline) + self.statemachine.node += paragraph + self.statemachine.node += msg + if literalnext: + try: + self.statemachine.nextline() + except IndexError: + pass + self.statemachine.node += self.literal_block() + return [], nextstate, [] + + def literal_block(self): + """Return a list of nodes.""" + indented, indent, offset, blankfinish = \ + self.statemachine.getindented() + nodelist = [] + while indented and not indented[-1].strip(): + indented.pop() + if indented: + data = '\n'.join(indented) + nodelist.append(nodes.literal_block(data, data)) + if not blankfinish: + nodelist.append(self.unindentwarning()) + else: + nodelist.append(self.statemachine.memo.reporter.warning( + 'Literal block expected at line %s; none found.' + % self.statemachine.abslineno())) + return nodelist + + def definition_list_item(self, termline): + indented, indent, lineoffset, blankfinish = \ + self.statemachine.getindented() + definitionlistitem = nodes.definition_list_item('\n'.join(termline + + indented)) + termlist, messages = self.term(termline, + self.statemachine.abslineno() - 1) + definitionlistitem += termlist + definition = nodes.definition('', *messages) + definitionlistitem += definition + if termline[0][-2:] == '::': + definition += self.statemachine.memo.reporter.info( + 'Blank line missing before literal block? Interpreted as a ' + 'definition list item. At line %s.' % (lineoffset + 1)) + self.nestedparse(indented, inputoffset=lineoffset, node=definition) + return definitionlistitem, blankfinish + + def term(self, lines, lineno): + """Return a definition_list's term and optional classifier.""" + assert len(lines) == 1 + nodelist = [] + parts = lines[0].split(' : ', 1) # split into 1 or 2 parts + termpart = parts[0].rstrip() + textnodes, messages = self.inline_text(termpart, lineno) + nodelist = [nodes.term(termpart, '', *textnodes)] + if len(parts) == 2: + classifierpart = parts[1].lstrip() + textnodes, cpmessages = self.inline_text(classifierpart, lineno) + nodelist.append(nodes.classifier(classifierpart, '', *textnodes)) + messages += cpmessages + return nodelist, messages + + +class SpecializedText(Text): + + """ + Superclass for second and subsequent lines of Text-variants. + + All transition methods are disabled. Override individual methods in + subclasses to re-enable. + """ + + def eof(self, context): + """Incomplete construct.""" + return [] + + def invalid_input(self, match=None, context=None, nextstate=None): + """Not a compound element member. Abort this state machine.""" + raise EOFError + + blank = invalid_input + indent = invalid_input + underline = invalid_input + text = invalid_input + + +class Definition(SpecializedText): + + """Second line of potential definition_list_item.""" + + def eof(self, context): + """Not a definition.""" + self.statemachine.previousline(2) # back up so parent SM can reassess + return [] + + def indent(self, match, context, nextstate): + """Definition list item.""" + definitionlistitem, blankfinish = self.definition_list_item(context) + self.statemachine.node += definitionlistitem + self.blankfinish = blankfinish + return [], 'DefinitionList', [] + + +class Line(SpecializedText): + + """Second line of over- & underlined section title or transition marker.""" + + eofcheck = 1 # @@@ ??? + """Set to 0 while parsing sections, so that we don't catch the EOF.""" + + def eof(self, context): + """Transition marker at end of section or document.""" + if self.eofcheck: # ignore EOFError with sections + transition = nodes.transition(context[0]) + self.statemachine.node += transition + msg = self.statemachine.memo.reporter.error( + 'Document or section may not end with a transition ' + '(line %s).' % (self.statemachine.abslineno() - 1)) + self.statemachine.node += msg + self.eofcheck = 1 + return [] + + def blank(self, match, context, nextstate): + """Transition marker.""" + transition = nodes.transition(context[0]) + if len(self.statemachine.node) == 0: + msg = self.statemachine.memo.reporter.error( + 'Document or section may not begin with a transition ' + '(line %s).' % (self.statemachine.abslineno() - 1)) + self.statemachine.node += msg + elif isinstance(self.statemachine.node[-1], nodes.transition): + msg = self.statemachine.memo.reporter.error( + 'At least one body element must separate transitions; ' + 'adjacent transitions at line %s.' + % (self.statemachine.abslineno() - 1)) + self.statemachine.node += msg + self.statemachine.node += transition + return [], 'Body', [] + + def text(self, match, context, nextstate): + """Potential over- & underlined title.""" + lineno = self.statemachine.abslineno() - 1 + overline = context[0] + title = match.string + underline = '' + try: + underline = self.statemachine.nextline() + except IndexError: + blocktext = overline + '\n' + title + msg = self.statemachine.memo.reporter.severe( + 'Incomplete section title at line %s.' % lineno, '', + nodes.literal_block(blocktext, blocktext)) + self.statemachine.node += msg + return [], 'Body', [] + source = '%s\n%s\n%s' % (overline, title, underline) + overline = overline.rstrip() + underline = underline.rstrip() + if not self.transitions['underline'][0].match(underline): + msg = self.statemachine.memo.reporter.severe( + 'Missing underline for overline at line %s.' % lineno, '', + nodes.literal_block(source, source)) + self.statemachine.node += msg + return [], 'Body', [] + elif overline != underline: + msg = self.statemachine.memo.reporter.severe( + 'Title overline & underline mismatch at ' 'line %s.' % lineno, + '', nodes.literal_block(source, source)) + self.statemachine.node += msg + return [], 'Body', [] + title = title.rstrip() + if len(title) > len(overline): + msg = self.statemachine.memo.reporter.info( + 'Title overline too short at line %s.'% lineno, '', + nodes.literal_block(source, source)) + self.statemachine.node += msg + style = (overline[0], underline[0]) + self.eofcheck = 0 # @@@ not sure this is correct + self.section(title.lstrip(), source, style, lineno + 1) + self.eofcheck = 1 + return [], 'Body', [] + + indent = text # indented title + + def underline(self, match=None, context=None, nextstate=None): + blocktext = context[0] + '\n' + self.statemachine.line + msg = self.statemachine.memo.reporter.error( + 'Invalid section title or transition marker at line %s.' + % (self.statemachine.abslineno() - 1), '', + nodes.literal_block(blocktext, blocktext)) + self.statemachine.node += msg + return [], 'Body', [] + + +stateclasses = [Body, BulletList, DefinitionList, EnumeratedList, FieldList, + OptionList, RFC822List, Explicit, Text, Definition, Line, + SubstitutionDef] +"""Standard set of State classes used to start `RSTStateMachine`.""" + + +def escape2null(text): + """Return a string with escape-backslashes converted to nulls.""" + parts = [] + start = 0 + while 1: + found = text.find('\\', start) + if found == -1: + parts.append(text[start:]) + return ''.join(parts) + parts.append(text[start:found]) + parts.append('\x00' + text[found+1:found+2]) + start = found + 2 # skip character after escape + +def unescape(text, restorebackslashes=0): + """Return a string with nulls removed or restored to backslashes.""" + if restorebackslashes: + return text.translate(RSTState.inline.null2backslash) + else: + return text.translate(RSTState.inline.identity, '\x00') -- cgit v1.2.1 From 6381b74ce6924bc0f14b7f377ee57496e2ebb2d9 Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 20 Apr 2002 16:36:24 +0000 Subject: - Improved diagnostic system messages for missing blank lines. - Fixed substitution_reference bug. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@29 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 68 ++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 32 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index b2dbf9b3e..706ed6106 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -626,24 +626,27 @@ class RSTState(StateWS): before, inlines, remaining, sysmessages, endstring = self.inlineobj( match, lineno, self.inline.patterns.substitution_ref, nodes.substitution_reference) - if inlines: - assert len(inlines) == 1 + if len(inlines) == 1: subrefnode = inlines[0] - assert isinstance(subrefnode, nodes.substitution_reference) - subreftext = subrefnode.astext() - refname = normname(subreftext) - subrefnode['refname'] = refname - self.statemachine.memo.document.note_substitution_ref(subrefnode) - if endstring[-1:] == '_': - referencenode = nodes.reference('|%s%s' % (subreftext, endstring), '') - if endstring[-2:] == '__': - referencenode['anonymous'] = 1 - self.statemachine.memo.document.note_anonymous_ref(referencenode) - else: - referencenode['refname'] = refname - self.statemachine.memo.document.note_refname(referencenode) - referencenode += subrefnode - inlines = [referencenode] + if isinstance(subrefnode, nodes.substitution_reference): + subreftext = subrefnode.astext() + refname = normname(subreftext) + subrefnode['refname'] = refname + self.statemachine.memo.document.note_substitution_ref( + subrefnode) + if endstring[-1:] == '_': + referencenode = nodes.reference( + '|%s%s' % (subreftext, endstring), '') + if endstring[-2:] == '__': + referencenode['anonymous'] = 1 + self.statemachine.memo.document.note_anonymous_ref( + referencenode) + else: + referencenode['refname'] = refname + self.statemachine.memo.document.note_refname( + referencenode) + referencenode += subrefnode + inlines = [referencenode] return before, inlines, remaining, sysmessages def footnote_reference(self, match, lineno): @@ -788,10 +791,10 @@ class RSTState(StateWS): processed += self.standalone_uri(remaining, lineno) return processed, messages - def unindentwarning(self): + def unindent_warning(self, node_name): return self.statemachine.memo.reporter.warning( - ('Unindent without blank line at line %s.' - % (self.statemachine.abslineno() + 1))) + ('%s ends without a blank line; unexpected unindent at line %s.' + % (node_name, self.statemachine.abslineno() + 1))) class Body(RSTState): @@ -882,7 +885,7 @@ class Body(RSTState): blockquote = self.block_quote(indented, lineoffset) self.statemachine.node += blockquote if not blankfinish: - self.statemachine.node += self.unindentwarning() + self.statemachine.node += self.unindent_warning('Block quote') return context, nextstate, [] def block_quote(self, indented, lineoffset): @@ -904,7 +907,7 @@ class Body(RSTState): node=bulletlist, initialstate='BulletList', blankfinish=blankfinish) if not blankfinish: - self.statemachine.node += self.unindentwarning() + self.statemachine.node += self.unindent_warning('Bullet list') self.gotoline(newlineoffset) return [], nextstate, [] @@ -930,7 +933,8 @@ class Body(RSTState): bq = self.block_quote(indented, lineoffset) self.statemachine.node += bq if not blankfinish: - self.statemachine.node += self.unindentwarning() + self.statemachine.node += self.unindent_warning( + 'Enumerated list') return [], nextstate, [] if ordinal != 1: msg = self.statemachine.memo.reporter.info( @@ -955,7 +959,7 @@ class Body(RSTState): blankfinish=blankfinish, extrasettings={'lastordinal': ordinal, 'format': format}) if not blankfinish: - self.statemachine.node += self.unindentwarning() + self.statemachine.node += self.unindent_warning('Enumerated list') self.gotoline(newlineoffset) return [], nextstate, [] @@ -1022,7 +1026,7 @@ class Body(RSTState): node=fieldlist, initialstate='FieldList', blankfinish=blankfinish) if not blankfinish: - self.statemachine.node += self.unindentwarning() + self.statemachine.node += self.unindent_warning('Field list') self.gotoline(newlineoffset) return [], nextstate, [] @@ -1062,7 +1066,7 @@ class Body(RSTState): blockquote = self.block_quote(indented, lineoffset) self.statemachine.node += blockquote if not blankfinish: - self.statemachine.node += self.unindentwarning() + self.statemachine.node += self.unindent_warning('Option list') return [], nextstate, [] self.statemachine.node += optionlist optionlist += listitem @@ -1073,7 +1077,7 @@ class Body(RSTState): node=optionlist, initialstate='OptionList', blankfinish=blankfinish) if not blankfinish: - self.statemachine.node += self.unindentwarning() + self.statemachine.node += self.unindent_warning('Option list') self.gotoline(newlineoffset) return [], nextstate, [] @@ -1592,8 +1596,8 @@ class Body(RSTState): def explicitlist(self, blankfinish): """ - Create a nested state machine for a series of explicit markup constructs - (including anonymous hyperlink targets). + Create a nested state machine for a series of explicit markup + constructs (including anonymous hyperlink targets). """ offset = self.statemachine.lineoffset + 1 # next line newlineoffset, blankfinish = self.nestedlistparse( @@ -1603,7 +1607,7 @@ class Body(RSTState): blankfinish=blankfinish) self.gotoline(newlineoffset) if not blankfinish: - self.statemachine.node += self.unindentwarning() + self.statemachine.node += self.unindent_warning('Explicit markup') def anonymous(self, match, context, nextstate): """Anonymous hyperlink targets.""" @@ -1857,7 +1861,7 @@ class Text(RSTState): node=definitionlist, initialstate='DefinitionList', blankfinish=blankfinish, blankfinishstate='Definition') if not blankfinish: - self.statemachine.node += self.unindentwarning() + self.statemachine.node += self.unindent_warning('Definition list') self.gotoline(newlineoffset) return [], 'Body', [] @@ -1918,7 +1922,7 @@ class Text(RSTState): data = '\n'.join(indented) nodelist.append(nodes.literal_block(data, data)) if not blankfinish: - nodelist.append(self.unindentwarning()) + nodelist.append(self.unindent_warning('Literal block')) else: nodelist.append(self.statemachine.memo.reporter.warning( 'Literal block expected at line %s; none found.' -- cgit v1.2.1 From 630e79abef4ea3263ee70235cd85e1d8159e0321 Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 25 Apr 2002 03:25:44 +0000 Subject: - Added RFC-2822 header support. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@36 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 105 +++++++++++++++++++++++++++++++---------- 1 file changed, 79 insertions(+), 26 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 706ed6106..efce3b4f1 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -6,21 +6,24 @@ :Copyright: This module has been placed in the public domain. This is the ``docutils.parsers.restructuredtext.states`` module, the core of -the reStructuredText parser. It defines the following: +the reStructuredText parser. It defines the following: :Classes: - `RSTStateMachine`: reStructuredText parser's entry point. - `NestedStateMachine`: recursive StateMachine. - `RSTState`: reStructuredText State superclass. - `Body`: Generic classifier of the first line of a block. + - `SpecializedBody`: Superclass for compound element members. - `BulletList`: Second and subsequent bullet_list list_items - - `DefinitionList`: Second and subsequent definition_list_items. - - `EnumeratedList`: Second and subsequent enumerated_list list_items. - - `FieldList`: Second and subsequent fields. - - `OptionList`: Second and subsequent option_list_items. - - `Explicit`: Second and subsequent explicit markup constructs. + - `DefinitionList`: Second+ definition_list_items. + - `EnumeratedList`: Second+ enumerated_list list_items. + - `FieldList`: Second+ fields. + - `OptionList`: Second+ option_list_items. + - `RFC2822List`: Second+ RFC2822-style fields. + - `Explicit`: Second+ explicit markup constructs. - `SubstitutionDef`: For embedded directives in substitution definitions. - `Text`: Classifier of second line of a text block. + - `SpecializedText`: Superclass for continuation lines of Text-variants. - `Definition`: Second line of potential definition_list_item. - `Line`: Second line of overlined section title or transition marker. - `Stuff`: An auxilliary collection class. @@ -28,12 +31,10 @@ the reStructuredText parser. It defines the following: :Exception classes: - `MarkupError` - `ParserError` - - `TransformationError` :Functions: - `escape2null()`: Return a string, escape-backslashes converted to nulls. - `unescape()`: Return a string, nulls removed or restored to backslashes. - - `normname()`: Return a case- and whitespace-normalized name. :Attributes: - `stateclasses`: set of State classes used with `RSTStateMachine`. @@ -42,22 +43,22 @@ Parser Overview =============== The reStructuredText parser is implemented as a state machine, examining its -input one line at a time. To understand how the parser works, please first -become familiar with the `docutils.statemachine` module. In the description +input one line at a time. To understand how the parser works, please first +become familiar with the `docutils.statemachine` module. In the description below, references are made to classes defined in this module; please see the individual classes for details. Parsing proceeds as follows: 1. The state machine examines each line of input, checking each of the - transition patterns of the state `Body`, in order, looking for a match. The - implicit transitions (blank lines and indentation) are checked before any - others. The 'text' transition is a catch-all (matches anything). + transition patterns of the state `Body`, in order, looking for a match. + The implicit transitions (blank lines and indentation) are checked before + any others. The 'text' transition is a catch-all (matches anything). 2. The method associated with the matched transition pattern is called. A. Some transition methods are self-contained, appending elements to the - document tree ('doctest' parses a doctest block). The parser's current + document tree ('doctest' parses a doctest block). The parser's current line index is advanced to the end of the element, and parsing continues with step 1. @@ -71,11 +72,11 @@ Parsing proceeds as follows: - A new state machine is created and its initial state is set to the appropriate specialized state (`BulletList` in the case of the - 'bullet' transition). This state machine is run to parse the compound + 'bullet' transition). This state machine is run to parse the compound element (or series of explicit markup elements), and returns as soon - as a non-member element is encountered. For example, the `BulletList` + as a non-member element is encountered. For example, the `BulletList` state machine aborts as soon as it encounters an element which is not - a list item of that bullet list. The optional omission of + a list item of that bullet list. The optional omission of inter-element blank lines is handled by the nested state machine. - The current line index is advanced to the end of the elements parsed, @@ -83,7 +84,7 @@ Parsing proceeds as follows: C. The result of the 'text' transition depends on the next line of text. The current state is changed to `Text`, under which the second line is - examined. If the second line is: + examined. If the second line is: - Indented: The element is a definition list item, and parsing proceeds similarly to step 2.B, using the `DefinitionList` state. @@ -93,8 +94,8 @@ Parsing proceeds as follows: used. - Anything else: The element is a paragraph, which is examined for - inline markup and appended to the parent element. Processing continues - with step 1. + inline markup and appended to the parent element. Processing + continues with step 1. """ __docformat__ = 'reStructuredText' @@ -865,7 +866,6 @@ class Body(RSTState): 'explicit_markup': r'\.\.( +|$)', 'anonymous': r'__( +|$)', 'line': r'(%(nonalphanum7bit)s)\1\1\1+ *$' % pats, - #'rfc822': r'[!-9;-~]+:( +|$)', 'text': r''} initialtransitions = ['bullet', 'enumerator', @@ -1666,6 +1666,49 @@ class Body(RSTState): return [match.string], 'Text', [] +class RFC2822Body(Body): + + """ + RFC2822 headers are only valid as the first constructs in documents. As + soon as anything else appears, the `Body` state should take over. + """ + + patterns = Body.patterns.copy() # can't modify the original + patterns['rfc2822'] = r'[!-9;-~]+:( +|$)' + initialtransitions = [(name, 'Body') for name in Body.initialtransitions] + initialtransitions.insert(-1, ('rfc2822', 'Body')) # just before 'text' + + def rfc2822(self, match, context, nextstate): + """RFC2822-style field list item.""" + fieldlist = nodes.field_list() + self.statemachine.node += fieldlist + field, blankfinish = self.rfc2822_field(match) + fieldlist += field + offset = self.statemachine.lineoffset + 1 # next line + newlineoffset, blankfinish = self.nestedlistparse( + self.statemachine.inputlines[offset:], + inputoffset=self.statemachine.abslineoffset() + 1, + node=fieldlist, initialstate='RFC2822List', + blankfinish=blankfinish) + if not blankfinish: + self.statemachine.node += self.unindent_warning( + 'RFC2822-style field list') + self.gotoline(newlineoffset) + return [], nextstate, [] + + def rfc2822_field(self, match): + name = match.string[:match.string.find(':')] + indented, indent, lineoffset, blankfinish = \ + self.statemachine.getfirstknownindented(match.end()) + fieldnode = nodes.field() + fieldnode += nodes.field_name(name, name) + fieldbody = nodes.field_body('\n'.join(indented)) + fieldnode += fieldbody + if indented: + self.nestedparse(indented, inputoffset=lineoffset, node=fieldbody) + return fieldnode, blankfinish + + class SpecializedBody(Body): """ @@ -1764,11 +1807,21 @@ class OptionList(SpecializedBody): return [], 'OptionList', [] -class RFC822List(SpecializedBody): +class RFC2822List(SpecializedBody, RFC2822Body): - """Second and subsequent RFC822 field_list fields.""" + """Second and subsequent RFC2822-style field_list fields.""" + + patterns = RFC2822Body.patterns + initialtransitions = RFC2822Body.initialtransitions + + def rfc2822(self, match, context, nextstate): + """RFC2822-style field list item.""" + field, blankfinish = self.rfc2822_field(match) + self.statemachine.node += field + self.blankfinish = blankfinish + return [], 'RFC2822List', [] - pass + blank = SpecializedBody.invalid_input class Explicit(SpecializedBody): @@ -2093,8 +2146,8 @@ class Line(SpecializedText): stateclasses = [Body, BulletList, DefinitionList, EnumeratedList, FieldList, - OptionList, RFC822List, Explicit, Text, Definition, Line, - SubstitutionDef] + OptionList, Explicit, Text, Definition, Line, SubstitutionDef, + RFC2822Body, RFC2822List] """Standard set of State classes used to start `RSTStateMachine`.""" -- cgit v1.2.1 From c854d0803dd10573ea908840b85d12499904b060 Mon Sep 17 00:00:00 2001 From: goodger Date: Sun, 5 May 2002 15:22:30 +0000 Subject: - Added underscores to improve many awkward names. - Extracted the inline parsing code from ``RSTState`` to a separate class, ``Inliner``, which will allow easy subclassing. - Made local bindings for ``memo`` container & often-used contents (reduces code complexity a lot). See ``RSTState.runtime_init()``. - ``RSTState.parent`` replaces ``RSTState.statemachine.node``. - Added ``MarkupMismatch`` exception; for late corrections. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@75 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 1787 +++++++++++++++++++++------------------- 1 file changed, 919 insertions(+), 868 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index efce3b4f1..d2a133198 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -31,13 +31,14 @@ the reStructuredText parser. It defines the following: :Exception classes: - `MarkupError` - `ParserError` + - `MarkupMismatch` :Functions: - `escape2null()`: Return a string, escape-backslashes converted to nulls. - `unescape()`: Return a string, nulls removed or restored to backslashes. :Attributes: - - `stateclasses`: set of State classes used with `RSTStateMachine`. + - `state_classes`: set of State classes used with `RSTStateMachine`. Parser Overview =============== @@ -102,15 +103,17 @@ __docformat__ = 'reStructuredText' import sys, re, string -from docutils import nodes, statemachine, utils, roman, urischemes +from docutils import nodes, statemachine, utils, roman, urischemes, \ + ApplicationError, DataError from docutils.statemachine import StateMachineWS, StateWS -from docutils.utils import normname +from docutils.utils import normalize_name import directives, languages from tableparser import TableParser, TableMarkupError -class MarkupError(Exception): pass -class ParserError(Exception): pass +class MarkupError(DataError): pass +class ParserError(ApplicationError): pass +class MarkupMismatch(Exception): pass class Stuff: @@ -129,24 +132,30 @@ class RSTStateMachine(StateMachineWS): The entry point to reStructuredText parsing is the `run()` method. """ - def run(self, inputlines, docroot, inputoffset=0, matchtitles=1): + def run(self, input_lines, document, input_offset=0, match_titles=1, + inliner=None): """ - Parse `inputlines` and return a `docutils.nodes.document` instance. + Parse `input_lines` and return a `docutils.nodes.document` instance. Extend `StateMachineWS.run()`: set up parse-global data, run the StateMachine, and return the resulting document. """ - self.language = languages.getlanguage(docroot.languagecode) - self.matchtitles = matchtitles - self.memo = Stuff(document=docroot, - reporter=docroot.reporter, + self.language = languages.getlanguage(document.language_code) + self.match_titles = match_titles + if inliner is None: + inliner = Inliner() + self.memo = Stuff(document=document, + reporter=document.reporter, language=self.language, - titlestyles=[], - sectionlevel=0) - self.node = docroot - results = StateMachineWS.run(self, inputlines, inputoffset) - assert results == [], 'RSTStateMachine.run() results should be empty.' + title_styles=[], + section_level=0, + inliner=inliner) + self.document = self.memo.document + self.reporter = self.memo.reporter + self.node = document + results = StateMachineWS.run(self, input_lines, input_offset) + assert results == [], 'RSTStateMachine.run() results should be empty!' self.node = self.memo = None # remove unneeded references @@ -157,17 +166,20 @@ class NestedStateMachine(StateMachineWS): document structures. """ - def run(self, inputlines, inputoffset, memo, node, matchtitles=1): + def run(self, input_lines, input_offset, memo, node, match_titles=1): """ - Parse `inputlines` and populate a `docutils.nodes.document` instance. + Parse `input_lines` and populate a `docutils.nodes.document` instance. Extend `StateMachineWS.run()`: set up document-wide data. """ - self.matchtitles = matchtitles + self.match_titles = match_titles self.memo = memo + self.document = memo.document + self.reporter = memo.reporter self.node = node - results = StateMachineWS.run(self, inputlines, inputoffset) - assert results == [], 'NestedStateMachine.run() results should be empty' + results = StateMachineWS.run(self, input_lines, input_offset) + assert results == [], ('NestedStateMachine.run() results should be ' + 'empty!') return results @@ -179,17 +191,28 @@ class RSTState(StateWS): Contains methods used by all State subclasses. """ - nestedSM = NestedStateMachine + nested_sm = NestedStateMachine - def __init__(self, statemachine, debug=0): - self.nestedSMkwargs = {'stateclasses': stateclasses, - 'initialstate': 'Body'} - StateWS.__init__(self, statemachine, debug) + def __init__(self, state_machine, debug=0): + self.nested_sm_kwargs = {'state_classes': state_classes, + 'initial_state': 'Body'} + StateWS.__init__(self, state_machine, debug) - def gotoline(self, abslineoffset): - """Jump to input line `abslineoffset`, ignoring jumps past the end.""" + def runtime_init(self): + StateWS.runtime_init(self) + memo = self.state_machine.memo + self.memo = memo + self.reporter = memo.reporter + self.inliner = memo.inliner + self.document = memo.document + self.parent = self.state_machine.node + + def goto_line(self, abs_line_offset): + """ + Jump to input line `abs_line_offset`, ignoring jumps past the end. + """ try: - self.statemachine.gotoline(abslineoffset) + self.state_machine.goto_line(abs_line_offset) except IndexError: pass @@ -197,126 +220,131 @@ class RSTState(StateWS): """Called at beginning of file.""" return [], [] - def nestedparse(self, block, inputoffset, node, matchtitles=0, - statemachineclass=None, statemachinekwargs=None): + def nested_parse(self, block, input_offset, node, match_titles=0, + state_machine_class=None, state_machine_kwargs=None): """ Create a new StateMachine rooted at `node` and run it over the input `block`. """ - if statemachineclass is None: - statemachineclass = self.nestedSM - if statemachinekwargs is None: - statemachinekwargs = self.nestedSMkwargs - statemachine = statemachineclass(debug=self.debug, **statemachinekwargs) - statemachine.run(block, inputoffset, memo=self.statemachine.memo, - node=node, matchtitles=matchtitles) - statemachine.unlink() - return statemachine.abslineoffset() - - def nestedlistparse(self, block, inputoffset, node, initialstate, - blankfinish, blankfinishstate=None, extrasettings={}, - matchtitles=0, statemachineclass=None, - statemachinekwargs=None): + if state_machine_class is None: + state_machine_class = self.nested_sm + if state_machine_kwargs is None: + state_machine_kwargs = self.nested_sm_kwargs + state_machine = state_machine_class(debug=self.debug, + **state_machine_kwargs) + state_machine.run(block, input_offset, memo=self.memo, + node=node, match_titles=match_titles) + state_machine.unlink() + return state_machine.abs_line_offset() + + def nested_list_parse(self, block, input_offset, node, initial_state, + blank_finish, + blank_finish_state=None, + extra_settings={}, + match_titles=0, + state_machine_class=None, + state_machine_kwargs=None): """ Create a new StateMachine rooted at `node` and run it over the input `block`. Also keep track of optional intermdediate blank lines and the required final one. """ - if statemachineclass is None: - statemachineclass = self.nestedSM - if statemachinekwargs is None: - statemachinekwargs = self.nestedSMkwargs.copy() - statemachinekwargs['initialstate'] = initialstate - statemachine = statemachineclass(debug=self.debug, **statemachinekwargs) - if blankfinishstate is None: - blankfinishstate = initialstate - statemachine.states[blankfinishstate].blankfinish = blankfinish - for key, value in extrasettings.items(): - setattr(statemachine.states[initialstate], key, value) - statemachine.run(block, inputoffset, memo=self.statemachine.memo, - node=node, matchtitles=matchtitles) - blankfinish = statemachine.states[blankfinishstate].blankfinish - statemachine.unlink() - return statemachine.abslineoffset(), blankfinish + if state_machine_class is None: + state_machine_class = self.nested_sm + if state_machine_kwargs is None: + state_machine_kwargs = self.nested_sm_kwargs.copy() + state_machine_kwargs['initial_state'] = initial_state + state_machine = state_machine_class(debug=self.debug, + **state_machine_kwargs) + if blank_finish_state is None: + blank_finish_state = initial_state + state_machine.states[blank_finish_state].blank_finish = blank_finish + for key, value in extra_settings.items(): + setattr(state_machine.states[initial_state], key, value) + state_machine.run(block, input_offset, memo=self.memo, + node=node, match_titles=match_titles) + blank_finish = state_machine.states[blank_finish_state].blank_finish + state_machine.unlink() + return state_machine.abs_line_offset(), blank_finish def section(self, title, source, style, lineno): + """Check for a valid subsection and create one if it checks out.""" + if self.check_subsection(source, style, lineno): + self.new_subsection(title, lineno) + + def check_subsection(self, source, style, lineno): """ + Check for a valid subsection header. Return 1 (true) or None (false). + When a new section is reached that isn't a subsection of the current - section, back up the line count (use previousline(-x)), then raise - EOFError. The current StateMachine will finish, then the calling - StateMachine can re-examine the title. This will work its way back up - the calling chain until the correct section level isreached. + section, back up the line count (use ``previous_line(-x)``), then + ``raise EOFError``. The current StateMachine will finish, then the + calling StateMachine can re-examine the title. This will work its way + back up the calling chain until the correct section level isreached. - Alternative: Evaluate the title, store the title info & level, and - back up the chain until that level is reached. Store in memo? Or + @@@ Alternative: Evaluate the title, store the title info & level, and + back up the chain until that level is reached. Store in memo? Or return in results? - """ - if self.checksubsection(source, style, lineno): - self.newsubsection(title, lineno) - - def checksubsection(self, source, style, lineno): - """ - Check for a valid subsection header. Return 1 (true) or None (false). :Exception: `EOFError` when a sibling or supersection encountered. """ - memo = self.statemachine.memo - titlestyles = memo.titlestyles - mylevel = memo.sectionlevel + memo = self.memo + title_styles = memo.title_styles + mylevel = memo.section_level try: # check for existing title style - level = titlestyles.index(style) + 1 + level = title_styles.index(style) + 1 except ValueError: # new title style - if len(titlestyles) == memo.sectionlevel: # new subsection - titlestyles.append(style) + if len(title_styles) == memo.section_level: # new subsection + title_styles.append(style) return 1 else: # not at lowest level - self.statemachine.node += self.titleinconsistent(source, lineno) + self.parent += self.title_inconsistent(source, lineno) return None if level <= mylevel: # sibling or supersection - memo.sectionlevel = level # bubble up to parent section + memo.section_level = level # bubble up to parent section # back up 2 lines for underline title, 3 for overline title - self.statemachine.previousline(len(style) + 1) + self.state_machine.previous_line(len(style) + 1) raise EOFError # let parent section re-evaluate if level == mylevel + 1: # immediate subsection return 1 else: # invalid subsection - self.statemachine.node += self.titleinconsistent(source, lineno) + self.parent += self.title_inconsistent(source, lineno) return None - def titleinconsistent(self, sourcetext, lineno): + def title_inconsistent(self, sourcetext, lineno): literalblock = nodes.literal_block('', sourcetext) - error = self.statemachine.memo.reporter.severe( - 'Title level inconsistent at line %s:' % lineno, '', literalblock) + error = self.reporter.severe('Title level inconsistent at line %s:' + % lineno, '', literalblock) return error - def newsubsection(self, title, lineno): + def new_subsection(self, title, lineno): """Append new subsection to document tree. On return, check level.""" - memo = self.statemachine.memo - mylevel = memo.sectionlevel - memo.sectionlevel += 1 + memo = self.memo + mylevel = memo.section_level + memo.section_level += 1 sectionnode = nodes.section() - self.statemachine.node += sectionnode + self.parent += sectionnode textnodes, messages = self.inline_text(title, lineno) titlenode = nodes.title(title, '', *textnodes) - name = normname(titlenode.astext()) + name = normalize_name(titlenode.astext()) sectionnode['name'] = name sectionnode += titlenode sectionnode += messages - memo.document.note_implicit_target(sectionnode, sectionnode) - offset = self.statemachine.lineoffset + 1 - absoffset = self.statemachine.abslineoffset() + 1 - newabsoffset = self.nestedparse( - self.statemachine.inputlines[offset:], inputoffset=absoffset, - node=sectionnode, matchtitles=1) - self.gotoline(newabsoffset) - if memo.sectionlevel <= mylevel: # can't handle next section? + self.document.note_implicit_target(sectionnode, sectionnode) + offset = self.state_machine.line_offset + 1 + absoffset = self.state_machine.abs_line_offset() + 1 + newabsoffset = self.nested_parse( + self.state_machine.input_lines[offset:], input_offset=absoffset, + node=sectionnode, match_titles=1) + self.goto_line(newabsoffset) + if memo.section_level <= mylevel: # can't handle next section? raise EOFError # bubble up to supersection - # reset sectionlevel; next pass will detect it properly - memo.sectionlevel = mylevel + # reset section_level; next pass will detect it properly + memo.section_level = mylevel def paragraph(self, lines, lineno): """ - Return a list (paragraph & messages) and a boolean: literal_block next? + Return a list (paragraph & messages) & a boolean: literal_block next? """ data = '\n'.join(lines).rstrip() if data[-2:] == '::': @@ -334,143 +362,203 @@ class RSTState(StateWS): p = nodes.paragraph(data, '', *textnodes) return [p] + messages, literalnext - inline = Stuff() - """Patterns and constants used for inline markup recognition.""" - - inline.openers = '\'"([{<' - inline.closers = '\'")]}>' - inline.start_string_prefix = (r'(?:(?<=^)|(?<=[ \n%s]))' - % re.escape(inline.openers)) - inline.end_string_suffix = (r'(?:(?=$)|(?=[- \n.,:;!?%s]))' - % re.escape(inline.closers)) - inline.non_whitespace_before = r'(?' + start_string_prefix = (r'(?:(?<=^)|(?<=[ \n%s]))' + % re.escape(openers)) + end_string_suffix = (r'(?:(?=$)|(?=[- \n.,:;!?%s]))' + % re.escape(closers)) + non_whitespace_before = r'(? 0: - textnodes.append(nodes.Text(unescape( - remainder[:match.start(whole)]))) - if match.group(email): - addscheme = 'mailto:' - else: - addscheme = '' - text = match.group(whole) - unescaped = unescape(text, 0) - textnodes.append( - nodes.reference(unescape(text, 1), unescaped, - refuri=addscheme + unescaped)) - remainder = remainder[match.end(whole):] - start = 0 - else: # not a valid scheme - start = match.end(whole) + def standalone_uri(self, match, lineno): + scheme = self.groups.uri.scheme + if not match.group(scheme) or urischemes.schemes.has_key( + match.group(scheme).lower()): + if match.group(self.groups.uri.email): + addscheme = 'mailto:' else: - if remainder: - textnodes.append(nodes.Text(unescape(remainder))) - break - return textnodes - - inline.dispatch = {'*': emphasis, - '**': strong, - '`': interpreted_or_phrase_ref, - '``': literal, - '_`': inline_target, - ']_': footnote_reference, - '|': substitution_reference, - '_': reference, - '__': anonymous_reference} - - def inline_text(self, text, lineno): + addscheme = '' + text = match.group(self.groups.uri.whole) + unescaped = unescape(text, 0) + return [nodes.reference(unescape(text, 1), unescaped, + refuri=addscheme + unescaped)] + else: # not a valid scheme + raise MarkupMismatch + + implicit = ((patterns.uri, standalone_uri),) + """List of (pattern, dispatch method) pairs.""" + + def implicit_inline(self, text, lineno): """ - Return 2 lists: nodes (text and inline elements), and system_messages. - - Using a `pattern` matching start-strings (for emphasis, strong, - interpreted, phrase reference, literal, substitution reference, and - inline target) or complete constructs (simple reference, footnote - reference) we search for a candidate. When one is found, we check for - validity (e.g., not a quoted '*' character). If valid, search for the - corresponding end string if applicable, and check for validity. If not - found or invalid, generate a warning and ignore the start-string. - Standalone hyperlinks are found last. + Check each of the patterns in `self.implicit` for a match, and + dispatch to the stored method for the pattern. Recursively check the + text before and after the match. """ - pattern = self.inline.patterns.initial - dispatch = self.inline.dispatch - start = self.inline.groups.initial.start - 1 - backquote = self.inline.groups.initial.backquote - 1 - refend = self.inline.groups.initial.refend - 1 - fnend = self.inline.groups.initial.fnend - 1 - remaining = escape2null(text) - processed = [] - unprocessed = [] - messages = [] - while remaining: - match = pattern.search(remaining) + if not text: + return [] + for pattern, dispatch in self.implicit: + match = pattern.search(text) if match: - groups = match.groups() - before, inlines, remaining, sysmessages = \ - dispatch[groups[start] or groups[backquote] - or groups[refend] - or groups[fnend]](self, match, lineno) - unprocessed.append(before) - messages += sysmessages - if inlines: - processed += self.standalone_uri(''.join(unprocessed), - lineno) - processed += inlines - unprocessed = [] - else: - break - remaining = ''.join(unprocessed) + remaining - if remaining: - processed += self.standalone_uri(remaining, lineno) - return processed, messages - - def unindent_warning(self, node_name): - return self.statemachine.memo.reporter.warning( - ('%s ends without a blank line; unexpected unindent at line %s.' - % (node_name, self.statemachine.abslineno() + 1))) + try: + return (self.implicit_inline(text[:match.start()], lineno) + + dispatch(self, match, lineno) + + self.implicit_inline(text[match.end():], lineno)) + except MarkupMismatch: + pass + return [nodes.Text(unescape(text))] + + dispatch = {'*': emphasis, + '**': strong, + '`': interpreted_or_phrase_ref, + '``': literal, + '_`': inline_target, + ']_': footnote_reference, + '|': substitution_reference, + '_': reference, + '__': anonymous_reference} class Body(RSTState): @@ -830,10 +870,10 @@ class Body(RSTState): enum.sequenceregexps = {} for sequence in enum.sequences: - enum.sequenceregexps[sequence] = re.compile(enum.sequencepats[sequence] - + '$') + enum.sequenceregexps[sequence] = re.compile( + enum.sequencepats[sequence] + '$') - tabletoppat = re.compile(r'\+-[-+]+-\+ *$') + table_top_pat = re.compile(r'\+-[-+]+-\+ *$') """Matches the top (& bottom) of a table).""" tableparser = TableParser() @@ -856,114 +896,116 @@ class Body(RSTState): format, re.escape(enum.formatinfo[format].prefix), pats['enum'], re.escape(enum.formatinfo[format].suffix)) - patterns = {'bullet': r'[-+*]( +|$)', - 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' - % pats, - 'field_marker': r':[^: ]([^:]*[^: ])?:( +|$)', - 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats, - 'doctest': r'>>>( +|$)', - 'tabletop': tabletoppat, - 'explicit_markup': r'\.\.( +|$)', - 'anonymous': r'__( +|$)', - 'line': r'(%(nonalphanum7bit)s)\1\1\1+ *$' % pats, - 'text': r''} - initialtransitions = ['bullet', - 'enumerator', - 'field_marker', - 'option_marker', - 'doctest', - 'tabletop', - 'explicit_markup', - 'anonymous', - 'line', - 'text'] - - def indent(self, match, context, nextstate): + patterns = { + 'bullet': r'[-+*]( +|$)', + 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats, + 'field_marker': r':[^: ]([^:]*[^: ])?:( +|$)', + 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats, + 'doctest': r'>>>( +|$)', + 'table_top': table_top_pat, + 'explicit_markup': r'\.\.( +|$)', + 'anonymous': r'__( +|$)', + 'line': r'(%(nonalphanum7bit)s)\1\1\1+ *$' % pats, + 'text': r''} + initial_transitions = ( + 'bullet', + 'enumerator', + 'field_marker', + 'option_marker', + 'doctest', + 'table_top', + 'explicit_markup', + 'anonymous', + 'line', + 'text') + + def indent(self, match, context, next_state): """Block quote.""" - indented, indent, lineoffset, blankfinish = \ - self.statemachine.getindented() - blockquote = self.block_quote(indented, lineoffset) - self.statemachine.node += blockquote - if not blankfinish: - self.statemachine.node += self.unindent_warning('Block quote') - return context, nextstate, [] - - def block_quote(self, indented, lineoffset): + indented, indent, line_offset, blank_finish = \ + self.state_machine.get_indented() + blockquote = self.block_quote(indented, line_offset) + self.parent += blockquote + if not blank_finish: + self.parent += self.unindent_warning('Block quote') + return context, next_state, [] + + def block_quote(self, indented, line_offset): blockquote = nodes.block_quote() - self.nestedparse(indented, lineoffset, blockquote) + self.nested_parse(indented, line_offset, blockquote) return blockquote - def bullet(self, match, context, nextstate): + def bullet(self, match, context, next_state): """Bullet list item.""" bulletlist = nodes.bullet_list() - self.statemachine.node += bulletlist + self.parent += bulletlist bulletlist['bullet'] = match.string[0] - i, blankfinish = self.list_item(match.end()) + i, blank_finish = self.list_item(match.end()) bulletlist += i - offset = self.statemachine.lineoffset + 1 # next line - newlineoffset, blankfinish = self.nestedlistparse( - self.statemachine.inputlines[offset:], - inputoffset=self.statemachine.abslineoffset() + 1, - node=bulletlist, initialstate='BulletList', - blankfinish=blankfinish) - if not blankfinish: - self.statemachine.node += self.unindent_warning('Bullet list') - self.gotoline(newlineoffset) - return [], nextstate, [] + offset = self.state_machine.line_offset + 1 # next line + newline_offset, blank_finish = self.nested_list_parse( + self.state_machine.input_lines[offset:], + input_offset=self.state_machine.abs_line_offset() + 1, + node=bulletlist, initial_state='BulletList', + blank_finish=blank_finish) + if not blank_finish: + self.parent += self.unindent_warning('Bullet list') + self.goto_line(newline_offset) + return [], next_state, [] def list_item(self, indent): - indented, lineoffset, blankfinish = \ - self.statemachine.getknownindented(indent) + indented, line_offset, blank_finish = \ + self.state_machine.get_known_indented(indent) listitem = nodes.list_item('\n'.join(indented)) if indented: - self.nestedparse(indented, inputoffset=lineoffset, node=listitem) - return listitem, blankfinish + self.nested_parse(indented, input_offset=line_offset, + node=listitem) + return listitem, blank_finish - def enumerator(self, match, context, nextstate): + def enumerator(self, match, context, next_state): """Enumerated List Item""" format, sequence, text, ordinal = self.parse_enumerator(match) if ordinal is None: - msg = self.statemachine.memo.reporter.error( + msg = self.reporter.error( ('Enumerated list start value invalid at line %s: ' - '%r (sequence %r)' % (self.statemachine.abslineno(), + '%r (sequence %r)' % (self.state_machine.abs_line_number(), text, sequence))) - self.statemachine.node += msg - indented, lineoffset, blankfinish = \ - self.statemachine.getknownindented(match.end()) - bq = self.block_quote(indented, lineoffset) - self.statemachine.node += bq - if not blankfinish: - self.statemachine.node += self.unindent_warning( + self.parent += msg + indented, line_offset, blank_finish = \ + self.state_machine.get_known_indented(match.end()) + bq = self.block_quote(indented, line_offset) + self.parent += bq + if not blank_finish: + self.parent += self.unindent_warning( 'Enumerated list') - return [], nextstate, [] + return [], next_state, [] if ordinal != 1: - msg = self.statemachine.memo.reporter.info( + msg = self.reporter.info( ('Enumerated list start value not ordinal-1 at line %s: ' - '%r (ordinal %s)' % (self.statemachine.abslineno(), - text, ordinal))) - self.statemachine.node += msg + '%r (ordinal %s)' % (self.state_machine.abs_line_number(), + text, ordinal))) + self.parent += msg enumlist = nodes.enumerated_list() - self.statemachine.node += enumlist + self.parent += enumlist enumlist['enumtype'] = sequence if ordinal != 1: enumlist['start'] = ordinal enumlist['prefix'] = self.enum.formatinfo[format].prefix enumlist['suffix'] = self.enum.formatinfo[format].suffix - listitem, blankfinish = self.list_item(match.end()) + listitem, blank_finish = self.list_item(match.end()) enumlist += listitem - offset = self.statemachine.lineoffset + 1 # next line - newlineoffset, blankfinish = self.nestedlistparse( - self.statemachine.inputlines[offset:], - inputoffset=self.statemachine.abslineoffset() + 1, - node=enumlist, initialstate='EnumeratedList', - blankfinish=blankfinish, - extrasettings={'lastordinal': ordinal, 'format': format}) - if not blankfinish: - self.statemachine.node += self.unindent_warning('Enumerated list') - self.gotoline(newlineoffset) - return [], nextstate, [] - - def parse_enumerator(self, match, expectedsequence=None): + offset = self.state_machine.line_offset + 1 # next line + newline_offset, blank_finish = self.nested_list_parse( + self.state_machine.input_lines[offset:], + input_offset=self.state_machine.abs_line_offset() + 1, + node=enumlist, initial_state='EnumeratedList', + blank_finish=blank_finish, + extra_settings={'lastordinal': ordinal, 'format': format}) + if not blank_finish: + self.parent += self.unindent_warning('Enumerated list') + self.goto_line(newline_offset) + return [], next_state, [] + + def parse_enumerator(self, match, expected_sequence=None): """ Analyze an enumerator and return the results. @@ -975,7 +1017,7 @@ class Body(RSTState): ``None`` is returned for invalid enumerator text). The enumerator format has already been determined by the regular - expression match. If `expectedsequence` is given, that sequence is + expression match. If `expected_sequence` is given, that sequence is tried first. If not, we check for Roman numeral 1. This way, single-character Roman numerals (which are also alphabetical) can be matched. If no sequence has been matched, all sequences are checked in @@ -990,10 +1032,10 @@ class Body(RSTState): raise ParserError, 'enumerator format not matched' text = groupdict[format][self.enum.formatinfo[format].start :self.enum.formatinfo[format].end] - if expectedsequence: + if expected_sequence: try: - if self.enum.sequenceregexps[expectedsequence].match(text): - sequence = expectedsequence + if self.enum.sequenceregexps[expected_sequence].match(text): + sequence = expected_sequence except KeyError: # shouldn't happen raise ParserError, 'unknown sequence: %s' % sequence else: @@ -1013,27 +1055,27 @@ class Body(RSTState): ordinal = None return format, sequence, text, ordinal - def field_marker(self, match, context, nextstate): + def field_marker(self, match, context, next_state): """Field list item.""" fieldlist = nodes.field_list() - self.statemachine.node += fieldlist - field, blankfinish = self.field(match) + self.parent += fieldlist + field, blank_finish = self.field(match) fieldlist += field - offset = self.statemachine.lineoffset + 1 # next line - newlineoffset, blankfinish = self.nestedlistparse( - self.statemachine.inputlines[offset:], - inputoffset=self.statemachine.abslineoffset() + 1, - node=fieldlist, initialstate='FieldList', - blankfinish=blankfinish) - if not blankfinish: - self.statemachine.node += self.unindent_warning('Field list') - self.gotoline(newlineoffset) - return [], nextstate, [] + offset = self.state_machine.line_offset + 1 # next line + newline_offset, blank_finish = self.nested_list_parse( + self.state_machine.input_lines[offset:], + input_offset=self.state_machine.abs_line_offset() + 1, + node=fieldlist, initial_state='FieldList', + blank_finish=blank_finish) + if not blank_finish: + self.parent += self.unindent_warning('Field list') + self.goto_line(newline_offset) + return [], next_state, [] def field(self, match): name, args = self.parse_field_marker(match) - indented, indent, lineoffset, blankfinish = \ - self.statemachine.getfirstknownindented(match.end()) + indented, indent, line_offset, blank_finish = \ + self.state_machine.get_first_known_indented(match.end()) fieldnode = nodes.field() fieldnode += nodes.field_name(name, name) for arg in args: @@ -1041,8 +1083,9 @@ class Body(RSTState): fieldbody = nodes.field_body('\n'.join(indented)) fieldnode += fieldbody if indented: - self.nestedparse(indented, inputoffset=lineoffset, node=fieldbody) - return fieldnode, blankfinish + self.nested_parse(indented, input_offset=line_offset, + node=fieldbody) + return fieldnode, blank_finish def parse_field_marker(self, match): """Extract & return name & argument list from a field marker match.""" @@ -1051,48 +1094,50 @@ class Body(RSTState): tokens = field.split() return tokens[0], tokens[1:] # first == name, others == args - def option_marker(self, match, context, nextstate): + def option_marker(self, match, context, next_state): """Option list item.""" optionlist = nodes.option_list() try: - listitem, blankfinish = self.option_list_item(match) - except MarkupError, detail: # shouldn't happen; won't match pattern - msg = self.statemachine.memo.reporter.error( + listitem, blank_finish = self.option_list_item(match) + except MarkupError, detail: # shouldn't happen; won't match pattern + msg = self.reporter.error( ('Invalid option list marker at line %s: %s' - % (self.statemachine.abslineno(), detail))) - self.statemachine.node += msg - indented, indent, lineoffset, blankfinish = \ - self.statemachine.getfirstknownindented(match.end()) - blockquote = self.block_quote(indented, lineoffset) - self.statemachine.node += blockquote - if not blankfinish: - self.statemachine.node += self.unindent_warning('Option list') - return [], nextstate, [] - self.statemachine.node += optionlist + % (self.state_machine.abs_line_number(), detail))) + self.parent += msg + indented, indent, line_offset, blank_finish = \ + self.state_machine.get_first_known_indented(match.end()) + blockquote = self.block_quote(indented, line_offset) + self.parent += blockquote + if not blank_finish: + self.parent += self.unindent_warning('Option list') + return [], next_state, [] + self.parent += optionlist optionlist += listitem - offset = self.statemachine.lineoffset + 1 # next line - newlineoffset, blankfinish = self.nestedlistparse( - self.statemachine.inputlines[offset:], - inputoffset=self.statemachine.abslineoffset() + 1, - node=optionlist, initialstate='OptionList', - blankfinish=blankfinish) - if not blankfinish: - self.statemachine.node += self.unindent_warning('Option list') - self.gotoline(newlineoffset) - return [], nextstate, [] + offset = self.state_machine.line_offset + 1 # next line + newline_offset, blank_finish = self.nested_list_parse( + self.state_machine.input_lines[offset:], + input_offset=self.state_machine.abs_line_offset() + 1, + node=optionlist, initial_state='OptionList', + blank_finish=blank_finish) + if not blank_finish: + self.parent += self.unindent_warning('Option list') + self.goto_line(newline_offset) + return [], next_state, [] def option_list_item(self, match): options = self.parse_option_marker(match) - indented, indent, lineoffset, blankfinish = \ - self.statemachine.getfirstknownindented(match.end()) + indented, indent, line_offset, blank_finish = \ + self.state_machine.get_first_known_indented(match.end()) if not indented: # not an option list item raise statemachine.TransitionCorrection('text') option_group = nodes.option_group('', *options) description = nodes.description('\n'.join(indented)) - option_list_item = nodes.option_list_item('', option_group, description) + option_list_item = nodes.option_list_item('', option_group, + description) if indented: - self.nestedparse(indented, inputoffset=lineoffset, node=description) - return option_list_item, blankfinish + self.nested_parse(indented, input_offset=line_offset, + node=description) + return option_list_item, blank_finish def parse_option_marker(self, match): """ @@ -1123,84 +1168,85 @@ class Body(RSTState): optionstring)) return optlist - def doctest(self, match, context, nextstate): - data = '\n'.join(self.statemachine.gettextblock()) - self.statemachine.node += nodes.doctest_block(data, data) - return [], nextstate, [] + def doctest(self, match, context, next_state): + data = '\n'.join(self.state_machine.get_text_block()) + self.parent += nodes.doctest_block(data, data) + return [], next_state, [] - def tabletop(self, match, context, nextstate): + def table_top(self, match, context, next_state): """Top border of a table.""" - nodelist, blankfinish = self.table() - self.statemachine.node += nodelist - if not blankfinish: - msg = self.statemachine.memo.reporter.warning( + nodelist, blank_finish = self.table() + self.parent += nodelist + if not blank_finish: + msg = self.reporter.warning( 'Blank line required after table at line %s.' - % (self.statemachine.abslineno() + 1)) - self.statemachine.node += msg - return [], nextstate, [] + % (self.state_machine.abs_line_number() + 1)) + self.parent += msg + return [], next_state, [] def table(self): """Parse a table.""" - block, messages, blankfinish = self.isolatetable() + block, messages, blank_finish = self.isolate_table() if block: try: tabledata = self.tableparser.parse(block) - tableline = self.statemachine.abslineno() - len(block) + 1 - table = self.buildtable(tabledata, tableline) + tableline = (self.state_machine.abs_line_number() - len(block) + + 1) + table = self.build_table(tabledata, tableline) nodelist = [table] + messages except TableMarkupError, detail: - nodelist = self.malformedtable(block, str(detail)) + messages + nodelist = self.malformed_table(block, str(detail)) + messages else: nodelist = messages - return nodelist, blankfinish + return nodelist, blank_finish - def isolatetable(self): + def isolate_table(self): messages = [] - blankfinish = 1 + blank_finish = 1 try: - block = self.statemachine.getunindented() + block = self.state_machine.get_text_block(flush_left=1) except statemachine.UnexpectedIndentationError, instance: block, lineno = instance.args - messages.append(self.statemachine.memo.reporter.error( + messages.append(self.reporter.error( 'Unexpected indentation at line %s.' % lineno)) - blankfinish = 0 + blank_finish = 0 width = len(block[0].strip()) for i in range(len(block)): block[i] = block[i].strip() if block[i][0] not in '+|': # check left edge - blankfinish = 0 - self.statemachine.previousline(len(block) - i) + blank_finish = 0 + self.state_machine.previous_line(len(block) - i) del block[i:] break - if not self.tabletoppat.match(block[-1]): # find bottom - blankfinish = 0 + if not self.table_top_pat.match(block[-1]): # find bottom + blank_finish = 0 # from second-last to third line of table: for i in range(len(block) - 2, 1, -1): - if self.tabletoppat.match(block[i]): - self.statemachine.previousline(len(block) - i + 1) + if self.table_top_pat.match(block[i]): + self.state_machine.previous_line(len(block) - i + 1) del block[i+1:] break else: - messages.extend(self.malformedtable(block)) - return [], messages, blankfinish + messages.extend(self.malformed_table(block)) + return [], messages, blank_finish for i in range(len(block)): # check right edge if len(block[i]) != width or block[i][-1] not in '+|': - messages.extend(self.malformedtable(block)) - return [], messages, blankfinish - return block, messages, blankfinish + messages.extend(self.malformed_table(block)) + return [], messages, blank_finish + return block, messages, blank_finish - def malformedtable(self, block, detail=''): + def malformed_table(self, block, detail=''): data = '\n'.join(block) message = 'Malformed table at line %s; formatting as a ' \ - 'literal block.' % (self.statemachine.abslineno() + 'literal block.' % (self.state_machine.abs_line_number() - len(block) + 1) if detail: message += '\n' + detail - nodelist = [self.statemachine.memo.reporter.error(message), + nodelist = [self.reporter.error(message), nodes.literal_block(data, data)] return nodelist - def buildtable(self, tabledata, tableline): + def build_table(self, tabledata, tableline): colspecs, headrows, bodyrows = tabledata table = nodes.table() tgroup = nodes.tgroup(cols=len(colspecs)) @@ -1211,14 +1257,14 @@ class Body(RSTState): thead = nodes.thead() tgroup += thead for row in headrows: - thead += self.buildtablerow(row, tableline) + thead += self.build_table_row(row, tableline) tbody = nodes.tbody() tgroup += tbody for row in bodyrows: - tbody += self.buildtablerow(row, tableline) + tbody += self.build_table_row(row, tableline) return table - def buildtablerow(self, rowdata, tableline): + def build_table_row(self, rowdata, tableline): row = nodes.row() for cell in rowdata: if cell is None: @@ -1232,8 +1278,8 @@ class Body(RSTState): entry = nodes.entry(**attributes) row += entry if ''.join(cellblock): - self.nestedparse(cellblock, inputoffset=tableline+offset, - node=entry) + self.nested_parse(cellblock, input_offset=tableline+offset, + node=entry) return row @@ -1257,8 +1303,8 @@ class Body(RSTState): : # end of reference name (?:[ ]+|$) # followed by whitespace """ - % (RSTState.inline.non_whitespace_escape_before, - RSTState.inline.non_whitespace_escape_before), + % (Inliner.non_whitespace_escape_before, + Inliner.non_whitespace_escape_before), re.VERBOSE), reference=re.compile(r""" (?: @@ -1268,12 +1314,12 @@ class Body(RSTState): (?![ ]) # not space (.+?) # hyperlink phrase %s # not whitespace or escape - `_ # close backquote & reference mark + `_ # close backquote, reference mark ) $ # end of string """ % - (RSTState.inline.simplename, - RSTState.inline.non_whitespace_escape_before,), + (Inliner.simplename, + Inliner.non_whitespace_escape_before,), re.VERBOSE), substitution=re.compile(r""" (?: @@ -1284,7 +1330,7 @@ class Body(RSTState): ) (?:[ ]+|$) # followed by whitespace """ % - RSTState.inline.non_whitespace_escape_before, + Inliner.non_whitespace_escape_before, re.VERBOSE),) explicit.groups = Stuff( target=Stuff(quote=1, name=2), @@ -1292,53 +1338,53 @@ class Body(RSTState): substitution=Stuff(name=1)) def footnote(self, match): - indented, indent, offset, blankfinish = \ - self.statemachine.getfirstknownindented(match.end()) + indented, indent, offset, blank_finish = \ + self.state_machine.get_first_known_indented(match.end()) label = match.group(1) - name = normname(label) + name = normalize_name(label) footnote = nodes.footnote('\n'.join(indented)) if name[0] == '#': # auto-numbered name = name[1:] # autonumber label footnote['auto'] = 1 if name: footnote['name'] = name - self.statemachine.memo.document.note_autofootnote(footnote) + self.document.note_autofootnote(footnote) elif name == '*': # auto-symbol name = '' footnote['auto'] = '*' - self.statemachine.memo.document.note_symbol_footnote(footnote) + self.document.note_symbol_footnote(footnote) else: # manually numbered footnote += nodes.label('', label) footnote['name'] = name - self.statemachine.memo.document.note_footnote(footnote) + self.document.note_footnote(footnote) if name: - self.statemachine.memo.document.note_explicit_target(footnote, + self.document.note_explicit_target(footnote, footnote) if indented: - self.nestedparse(indented, inputoffset=offset, node=footnote) - return [footnote], blankfinish + self.nested_parse(indented, input_offset=offset, node=footnote) + return [footnote], blank_finish def citation(self, match): - indented, indent, offset, blankfinish = \ - self.statemachine.getfirstknownindented(match.end()) + indented, indent, offset, blank_finish = \ + self.state_machine.get_first_known_indented(match.end()) label = match.group(1) - name = normname(label) + name = normalize_name(label) citation = nodes.citation('\n'.join(indented)) citation += nodes.label('', label) citation['name'] = name - self.statemachine.memo.document.note_citation(citation) - self.statemachine.memo.document.note_explicit_target(citation, citation) + self.document.note_citation(citation) + self.document.note_explicit_target(citation, citation) if indented: - self.nestedparse(indented, inputoffset=offset, node=citation) - return [citation], blankfinish + self.nested_parse(indented, input_offset=offset, node=citation) + return [citation], blank_finish def hyperlink_target(self, match): pattern = self.explicit.patterns.target namegroup = self.explicit.groups.target.name - lineno = self.statemachine.abslineno() - block, indent, offset, blankfinish = \ - self.statemachine.getfirstknownindented(match.end(), uptoblank=1, - stripindent=0) + lineno = self.state_machine.abs_line_number() + block, indent, offset, blank_finish = \ + self.state_machine.get_first_known_indented( + match.end(), until_blank=1, strip_indent=0) blocktext = match.string[:match.end()] + '\n'.join(block) block = [escape2null(line) for line in block] escaped = block[0] @@ -1357,58 +1403,58 @@ class Body(RSTState): block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip() if block and block[-1].strip()[-1:] == '_': # possible indirect target reference = ' '.join([line.strip() for line in block]) - refname = self.isreference(reference) + refname = self.is_reference(reference) if refname: target = nodes.target(blocktext, '', refname=refname) - self.addtarget(targetmatch.group(namegroup), '', target) - self.statemachine.memo.document.note_indirect_target(target) - return [target], blankfinish + self.add_target(targetmatch.group(namegroup), '', target) + self.document.note_indirect_target(target) + return [target], blank_finish nodelist = [] reference = ''.join([line.strip() for line in block]) if reference.find(' ') != -1: - warning = self.statemachine.memo.reporter.warning( + warning = self.reporter.warning( 'Hyperlink target at line %s contains whitespace. ' 'Perhaps a footnote was intended?' - % (self.statemachine.abslineno() - len(block) + 1), '', - nodes.literal_block(blocktext, blocktext)) + % (self.state_machine.abs_line_number() - len(block) + 1), + '', nodes.literal_block(blocktext, blocktext)) nodelist.append(warning) else: unescaped = unescape(reference) target = nodes.target(blocktext, '') - self.addtarget(targetmatch.group(namegroup), unescaped, target) + self.add_target(targetmatch.group(namegroup), unescaped, target) nodelist.append(target) - return nodelist, blankfinish + return nodelist, blank_finish - def isreference(self, reference): - match = self.explicit.patterns.reference.match(normname(reference)) + def is_reference(self, reference): + match = self.explicit.patterns.reference.match(normalize_name(reference)) if not match: return None return unescape(match.group(self.explicit.groups.reference.simple) or match.group(self.explicit.groups.reference.phrase)) - def addtarget(self, targetname, refuri, target): + def add_target(self, targetname, refuri, target): if targetname: - name = normname(unescape(targetname)) + name = normalize_name(unescape(targetname)) target['name'] = name if refuri: target['refuri'] = refuri - self.statemachine.memo.document.note_external_target(target) + self.document.note_external_target(target) else: - self.statemachine.memo.document.note_internal_target(target) - self.statemachine.memo.document.note_explicit_target( - target, self.statemachine.node) + self.document.note_internal_target(target) + self.document.note_explicit_target( + target, self.parent) else: # anonymous target if refuri: target['refuri'] = refuri target['anonymous'] = 1 - self.statemachine.memo.document.note_anonymous_target(target) + self.document.note_anonymous_target(target) - def substitutiondef(self, match): + def substitution_def(self, match): pattern = self.explicit.patterns.substitution - lineno = self.statemachine.abslineno() - block, indent, offset, blankfinish = \ - self.statemachine.getfirstknownindented(match.end(), - stripindent=0) + lineno = self.state_machine.abs_line_number() + block, indent, offset, blank_finish = \ + self.state_machine.get_first_known_indented(match.end(), + strip_indent=0) blocktext = (match.string[:match.end()] + '\n'.join(block)) block = [escape2null(line) for line in block] escaped = block[0].rstrip() @@ -1429,65 +1475,66 @@ class Body(RSTState): del block[0] offset += 1 subname = subdefmatch.group(self.explicit.groups.substitution.name) - name = normname(subname) + name = normalize_name(subname) substitutionnode = nodes.substitution_definition( blocktext, name=name, alt=subname) if block: block[0] = block[0].strip() - newabsoffset, blankfinish = self.nestedlistparse( - block, inputoffset=offset, node=substitutionnode, - initialstate='SubstitutionDef', blankfinish=blankfinish) - self.statemachine.previousline( + newabsoffset, blank_finish = self.nested_list_parse( + block, input_offset=offset, node=substitutionnode, + initial_state='SubstitutionDef', blank_finish=blank_finish) + self.state_machine.previous_line( len(block) + offset - newabsoffset - 1) i = 0 for node in substitutionnode[:]: if not (isinstance(node, nodes.Inline) or isinstance(node, nodes.Text)): - self.statemachine.node += substitutionnode[i] + self.parent += substitutionnode[i] del substitutionnode[i] else: i += 1 if len(substitutionnode) == 0: - msg = self.statemachine.memo.reporter.warning( + msg = self.reporter.warning( 'Substitution definition "%s" empty or invalid at line ' - '%s.' % (subname, self.statemachine.abslineno()), '', - nodes.literal_block(blocktext, blocktext)) - self.statemachine.node += msg + '%s.' % (subname, self.state_machine.abs_line_number()), + '', nodes.literal_block(blocktext, blocktext)) + self.parent += msg else: del substitutionnode['alt'] - self.statemachine.memo.document.note_substitution_def( - substitutionnode, self.statemachine.node) - return [substitutionnode], blankfinish + self.document.note_substitution_def( + substitutionnode, self.parent) + return [substitutionnode], blank_finish else: - msg = self.statemachine.memo.reporter.warning( + msg = self.reporter.warning( 'Substitution definition "%s" missing contents at line %s.' - % (subname, self.statemachine.abslineno()), '', + % (subname, self.state_machine.abs_line_number()), '', nodes.literal_block(blocktext, blocktext)) - self.statemachine.node += msg - return [], blankfinish + self.parent += msg + return [], blank_finish def directive(self, match, **attributes): - typename = match.group(1) - directivefunction = directives.directive( - typename, self.statemachine.memo.language) + type_name = match.group(1) + directivefunction = directives.directive(type_name, + self.memo.language) data = match.string[match.end():].strip() if directivefunction: - return directivefunction(match, typename, data, self, - self.statemachine, attributes) + return directivefunction(match, type_name, data, self, + self.state_machine, attributes) else: - return self.unknowndirective(typename, data) + return self.unknown_directive(type_name, data) - def unknowndirective(self, typename, data): - lineno = self.statemachine.abslineno() - indented, indent, offset, blankfinish = \ - self.statemachine.getfirstknownindented(0, stripindent=0) + def unknown_directive(self, type_name, data): + lineno = self.state_machine.abs_line_number() + indented, indent, offset, blank_finish = \ + self.state_machine.get_first_known_indented(0, strip_indent=0) text = '\n'.join(indented) - error = self.statemachine.memo.reporter.error( - 'Unknown directive type "%s" at line %s.' % (typename, lineno), + error = self.reporter.error( + 'Unknown directive type "%s" at line %s.' % (type_name, lineno), '', nodes.literal_block(text, text)) - return [error], blankfinish + return [error], blank_finish - def parse_extension_attributes(self, attribute_spec, datalines, blankfinish): + def parse_extension_attributes(self, attribute_spec, datalines, + blank_finish): """ Parse `datalines` for a field list containing extension attributes matching `attribute_spec`. @@ -1496,37 +1543,38 @@ class Body(RSTState): - `attribute_spec`: a mapping of attribute name to conversion function, which should raise an exception on bad input. - `datalines`: a list of input strings. - - `blankfinish`: + - `blank_finish`: :Return: - Success value, 1 or 0. - An attribute dictionary on success, an error string on failure. - - Updated `blankfinish` flag. + - Updated `blank_finish` flag. """ node = nodes.field_list() - newlineoffset, blankfinish = self.nestedlistparse( - datalines, 0, node, initialstate='FieldList', - blankfinish=blankfinish) - if newlineoffset != len(datalines): # incomplete parse of block - return 0, 'invalid attribute block', blankfinish + newline_offset, blank_finish = self.nested_list_parse( + datalines, 0, node, initial_state='FieldList', + blank_finish=blank_finish) + if newline_offset != len(datalines): # incomplete parse of block + return 0, 'invalid attribute block', blank_finish try: - attributes = utils.extract_extension_attributes(node, attribute_spec) + attributes = utils.extract_extension_attributes(node, + attribute_spec) except KeyError, detail: - return 0, ('unknown attribute: "%s"' % detail), blankfinish + return 0, ('unknown attribute: "%s"' % detail), blank_finish except (ValueError, TypeError), detail: - return 0, ('invalid attribute value:\n%s' % detail), blankfinish + return 0, ('invalid attribute value:\n%s' % detail), blank_finish except utils.ExtensionAttributeError, detail: - return 0, ('invalid attribute data: %s' % detail), blankfinish - return 1, attributes, blankfinish + return 0, ('invalid attribute data: %s' % detail), blank_finish + return 1, attributes, blank_finish def comment(self, match): if not match.string[match.end():].strip() \ - and self.statemachine.nextlineblank(): # an empty comment? + and self.state_machine.is_next_line_blank(): # an empty comment? return [nodes.comment()], 1 # "A tiny but practical wart." - indented, indent, offset, blankfinish = \ - self.statemachine.getfirstknownindented(match.end()) + indented, indent, offset, blank_finish = \ + self.state_machine.get_first_known_indented(match.end()) text = '\n'.join(indented) - return [nodes.comment(text, text)], blankfinish + return [nodes.comment(text, text)], blank_finish explicit.constructs = [ (footnote, @@ -1544,20 +1592,20 @@ class Body(RSTState): ) \] (?:[ ]+|$) # whitespace or end of line - """ % RSTState.inline.simplename, re.VERBOSE)), + """ % Inliner.simplename, re.VERBOSE)), (citation, re.compile(r""" \.\.[ ]+ # explicit markup start \[(%s)\] # citation label (?:[ ]+|$) # whitespace or end of line - """ % RSTState.inline.simplename, re.VERBOSE)), + """ % Inliner.simplename, re.VERBOSE)), (hyperlink_target, re.compile(r""" \.\.[ ]+ # explicit markup start _ # target indicator (?![ ]) # first char. not space """, re.VERBOSE)), - (substitutiondef, + (substitution_def, re.compile(r""" \.\.[ ]+ # explicit markup start \| # substitution indicator @@ -1569,14 +1617,14 @@ class Body(RSTState): (%s) # directive name :: # directive delimiter (?:[ ]+|$) # whitespace or end of line - """ % RSTState.inline.simplename, re.VERBOSE))] + """ % Inliner.simplename, re.VERBOSE))] - def explicit_markup(self, match, context, nextstate): + def explicit_markup(self, match, context, next_state): """Footnotes, hyperlink targets, directives, comments.""" - nodelist, blankfinish = self.explicit_construct(match) - self.statemachine.node += nodelist - self.explicitlist(blankfinish) - return [], nextstate, [] + nodelist, blank_finish = self.explicit_construct(match) + self.parent += nodelist + self.explicit_list(blank_finish) + return [], next_state, [] def explicit_construct(self, match): """Determine which explicit construct this is, parse & return it.""" @@ -1588,80 +1636,81 @@ class Body(RSTState): return method(self, expmatch) except MarkupError, detail: # never reached? errors.append( - self.statemachine.memo.reporter.warning('%s: %s' + self.reporter.warning('%s: %s' % (detail.__class__.__name__, detail))) break - nodelist, blankfinish = self.comment(match) - return nodelist + errors, blankfinish + nodelist, blank_finish = self.comment(match) + return nodelist + errors, blank_finish - def explicitlist(self, blankfinish): + def explicit_list(self, blank_finish): """ Create a nested state machine for a series of explicit markup constructs (including anonymous hyperlink targets). """ - offset = self.statemachine.lineoffset + 1 # next line - newlineoffset, blankfinish = self.nestedlistparse( - self.statemachine.inputlines[offset:], - inputoffset=self.statemachine.abslineoffset() + 1, - node=self.statemachine.node, initialstate='Explicit', - blankfinish=blankfinish) - self.gotoline(newlineoffset) - if not blankfinish: - self.statemachine.node += self.unindent_warning('Explicit markup') - - def anonymous(self, match, context, nextstate): + offset = self.state_machine.line_offset + 1 # next line + newline_offset, blank_finish = self.nested_list_parse( + self.state_machine.input_lines[offset:], + input_offset=self.state_machine.abs_line_offset() + 1, + node=self.parent, initial_state='Explicit', + blank_finish=blank_finish) + self.goto_line(newline_offset) + if not blank_finish: + self.parent += self.unindent_warning('Explicit markup') + + def anonymous(self, match, context, next_state): """Anonymous hyperlink targets.""" - nodelist, blankfinish = self.anonymous_target(match) - self.statemachine.node += nodelist - self.explicitlist(blankfinish) - return [], nextstate, [] + nodelist, blank_finish = self.anonymous_target(match) + self.parent += nodelist + self.explicit_list(blank_finish) + return [], next_state, [] def anonymous_target(self, match): - block, indent, offset, blankfinish \ - = self.statemachine.getfirstknownindented(match.end(), - uptoblank=1) + block, indent, offset, blank_finish \ + = self.state_machine.get_first_known_indented(match.end(), + until_blank=1) blocktext = match.string[:match.end()] + '\n'.join(block) if block and block[-1].strip()[-1:] == '_': # possible indirect target - reference = escape2null(' '.join([line.strip() for line in block])) - refname = self.isreference(reference) + reference = escape2null(' '.join([line.strip() + for line in block])) + refname = self.is_reference(reference) if refname: target = nodes.target(blocktext, '', refname=refname, anonymous=1) - self.statemachine.memo.document.note_anonymous_target(target) - self.statemachine.memo.document.note_indirect_target(target) - return [target], blankfinish + self.document.note_anonymous_target(target) + self.document.note_indirect_target(target) + return [target], blank_finish nodelist = [] reference = escape2null(''.join([line.strip() for line in block])) if reference.find(' ') != -1: - warning = self.statemachine.memo.reporter.warning( - 'Anonymous hyperlink target at line %s contains whitespace. ' - 'Perhaps a footnote was intended?' - % (self.statemachine.abslineno() - len(block) + 1), '', - nodes.literal_block(blocktext, blocktext)) + warning = self.reporter.warning( + 'Anonymous hyperlink target at line %s contains ' + 'whitespace. Perhaps a footnote was intended?' + % (self.state_machine.abs_line_number() - len(block) + 1), + '', nodes.literal_block(blocktext, blocktext)) nodelist.append(warning) else: target = nodes.target(blocktext, '', anonymous=1) if reference: unescaped = unescape(reference) target['refuri'] = unescaped - self.statemachine.memo.document.note_anonymous_target(target) + self.document.note_anonymous_target(target) nodelist.append(target) - return nodelist, blankfinish + return nodelist, blank_finish - def line(self, match, context, nextstate): + def line(self, match, context, next_state): """Section title overline or transition marker.""" - if self.statemachine.matchtitles: + if self.state_machine.match_titles: return [match.string], 'Line', [] else: - blocktext = self.statemachine.line - msg = self.statemachine.memo.reporter.severe( + blocktext = self.state_machine.line + msg = self.reporter.severe( 'Unexpected section title or transition at line %s.' - % self.statemachine.abslineno(), '', + % self.state_machine.abs_line_number(), '', nodes.literal_block(blocktext, blocktext)) - self.statemachine.node += msg - return [], nextstate, [] + self.parent += msg + return [], next_state, [] - def text(self, match, context, nextstate): + def text(self, match, context, next_state): """Titles, definition lists, paragraphs.""" return [match.string], 'Text', [] @@ -1675,38 +1724,40 @@ class RFC2822Body(Body): patterns = Body.patterns.copy() # can't modify the original patterns['rfc2822'] = r'[!-9;-~]+:( +|$)' - initialtransitions = [(name, 'Body') for name in Body.initialtransitions] - initialtransitions.insert(-1, ('rfc2822', 'Body')) # just before 'text' + initial_transitions = [(name, 'Body') + for name in Body.initial_transitions] + initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text' - def rfc2822(self, match, context, nextstate): + def rfc2822(self, match, context, next_state): """RFC2822-style field list item.""" - fieldlist = nodes.field_list() - self.statemachine.node += fieldlist - field, blankfinish = self.rfc2822_field(match) + fieldlist = nodes.field_list(CLASS='rfc2822') + self.parent += fieldlist + field, blank_finish = self.rfc2822_field(match) fieldlist += field - offset = self.statemachine.lineoffset + 1 # next line - newlineoffset, blankfinish = self.nestedlistparse( - self.statemachine.inputlines[offset:], - inputoffset=self.statemachine.abslineoffset() + 1, - node=fieldlist, initialstate='RFC2822List', - blankfinish=blankfinish) - if not blankfinish: - self.statemachine.node += self.unindent_warning( + offset = self.state_machine.line_offset + 1 # next line + newline_offset, blank_finish = self.nested_list_parse( + self.state_machine.input_lines[offset:], + input_offset=self.state_machine.abs_line_offset() + 1, + node=fieldlist, initial_state='RFC2822List', + blank_finish=blank_finish) + if not blank_finish: + self.parent += self.unindent_warning( 'RFC2822-style field list') - self.gotoline(newlineoffset) - return [], nextstate, [] + self.goto_line(newline_offset) + return [], next_state, [] def rfc2822_field(self, match): name = match.string[:match.string.find(':')] - indented, indent, lineoffset, blankfinish = \ - self.statemachine.getfirstknownindented(match.end()) + indented, indent, line_offset, blank_finish = \ + self.state_machine.get_first_known_indented(match.end()) fieldnode = nodes.field() fieldnode += nodes.field_name(name, name) fieldbody = nodes.field_body('\n'.join(indented)) fieldnode += fieldbody if indented: - self.nestedparse(indented, inputoffset=lineoffset, node=fieldbody) - return fieldnode, blankfinish + self.nested_parse(indented, input_offset=line_offset, + node=fieldbody) + return fieldnode, blank_finish class SpecializedBody(Body): @@ -1718,9 +1769,9 @@ class SpecializedBody(Body): subclasses to re-enable. """ - def invalid_input(self, match=None, context=None, nextstate=None): + def invalid_input(self, match=None, context=None, next_state=None): """Not a compound element member. Abort this state machine.""" - self.statemachine.previousline() # back up so parent SM can reassess + self.state_machine.previous_line() # back up so parent SM can reassess raise EOFError indent = invalid_input @@ -1729,7 +1780,7 @@ class SpecializedBody(Body): field_marker = invalid_input option_marker = invalid_input doctest = invalid_input - tabletop = invalid_input + table_top = invalid_input explicit_markup = invalid_input anonymous = invalid_input line = invalid_input @@ -1740,14 +1791,14 @@ class BulletList(SpecializedBody): """Second and subsequent bullet_list list_items.""" - def bullet(self, match, context, nextstate): + def bullet(self, match, context, next_state): """Bullet list item.""" - if match.string[0] != self.statemachine.node['bullet']: + if match.string[0] != self.parent['bullet']: # different bullet: new list self.invalid_input() - listitem, blankfinish = self.list_item(match.end()) - self.statemachine.node += listitem - self.blankfinish = blankfinish + listitem, blank_finish = self.list_item(match.end()) + self.parent += listitem + self.blank_finish = blank_finish return [], 'BulletList', [] @@ -1755,7 +1806,7 @@ class DefinitionList(SpecializedBody): """Second and subsequent definition_list_items.""" - def text(self, match, context, nextstate): + def text(self, match, context, next_state): """Definition lists.""" return [match.string], 'Definition', [] @@ -1764,18 +1815,18 @@ class EnumeratedList(SpecializedBody): """Second and subsequent enumerated_list list_items.""" - def enumerator(self, match, context, nextstate): + def enumerator(self, match, context, next_state): """Enumerated list item.""" format, sequence, text, ordinal = self.parse_enumerator( - match, self.statemachine.node['enumtype']) - if (sequence != self.statemachine.node['enumtype'] or + match, self.parent['enumtype']) + if (sequence != self.parent['enumtype'] or format != self.format or ordinal != self.lastordinal + 1): # different enumeration: new list self.invalid_input() - listitem, blankfinish = self.list_item(match.end()) - self.statemachine.node += listitem - self.blankfinish = blankfinish + listitem, blank_finish = self.list_item(match.end()) + self.parent += listitem + self.blank_finish = blank_finish self.lastordinal = ordinal return [], 'EnumeratedList', [] @@ -1784,11 +1835,11 @@ class FieldList(SpecializedBody): """Second and subsequent field_list fields.""" - def field_marker(self, match, context, nextstate): + def field_marker(self, match, context, next_state): """Field list field.""" - field, blankfinish = self.field(match) - self.statemachine.node += field - self.blankfinish = blankfinish + field, blank_finish = self.field(match) + self.parent += field + self.blank_finish = blank_finish return [], 'FieldList', [] @@ -1796,14 +1847,14 @@ class OptionList(SpecializedBody): """Second and subsequent option_list option_list_items.""" - def option_marker(self, match, context, nextstate): + def option_marker(self, match, context, next_state): """Option list item.""" try: - option_list_item, blankfinish = self.option_list_item(match) + option_list_item, blank_finish = self.option_list_item(match) except MarkupError, detail: self.invalid_input() - self.statemachine.node += option_list_item - self.blankfinish = blankfinish + self.parent += option_list_item + self.blank_finish = blank_finish return [], 'OptionList', [] @@ -1812,13 +1863,13 @@ class RFC2822List(SpecializedBody, RFC2822Body): """Second and subsequent RFC2822-style field_list fields.""" patterns = RFC2822Body.patterns - initialtransitions = RFC2822Body.initialtransitions + initial_transitions = RFC2822Body.initial_transitions - def rfc2822(self, match, context, nextstate): + def rfc2822(self, match, context, next_state): """RFC2822-style field list item.""" - field, blankfinish = self.rfc2822_field(match) - self.statemachine.node += field - self.blankfinish = blankfinish + field, blank_finish = self.rfc2822_field(match) + self.parent += field + self.blank_finish = blank_finish return [], 'RFC2822List', [] blank = SpecializedBody.invalid_input @@ -1828,19 +1879,19 @@ class Explicit(SpecializedBody): """Second and subsequent explicit markup construct.""" - def explicit_markup(self, match, context, nextstate): + def explicit_markup(self, match, context, next_state): """Footnotes, hyperlink targets, directives, comments.""" - nodelist, blankfinish = self.explicit_construct(match) - self.statemachine.node += nodelist - self.blankfinish = blankfinish - return [], nextstate, [] + nodelist, blank_finish = self.explicit_construct(match) + self.parent += nodelist + self.blank_finish = blank_finish + return [], next_state, [] - def anonymous(self, match, context, nextstate): + def anonymous(self, match, context, next_state): """Anonymous hyperlink targets.""" - nodelist, blankfinish = self.anonymous_target(match) - self.statemachine.node += nodelist - self.blankfinish = blankfinish - return [], nextstate, [] + nodelist, blank_finish = self.anonymous_target(match) + self.parent += nodelist + self.blank_finish = blank_finish + return [], next_state, [] class SubstitutionDef(Body): @@ -1850,24 +1901,24 @@ class SubstitutionDef(Body): """ patterns = { - 'embedded_directive': r'(%s)::( +|$)' % RSTState.inline.simplename, + 'embedded_directive': r'(%s)::( +|$)' % Inliner.simplename, 'text': r''} - initialtransitions = ['embedded_directive', 'text'] + initial_transitions = ['embedded_directive', 'text'] - def embedded_directive(self, match, context, nextstate): - if self.statemachine.node.has_key('alt'): - attributes = {'alt': self.statemachine.node['alt']} + def embedded_directive(self, match, context, next_state): + if self.parent.has_key('alt'): + attributes = {'alt': self.parent['alt']} else: attributes = {} - nodelist, blankfinish = self.directive(match, **attributes) - self.statemachine.node += nodelist - if not self.statemachine.ateof(): - self.blankfinish = blankfinish + nodelist, blank_finish = self.directive(match, **attributes) + self.parent += nodelist + if not self.state_machine.at_eof(): + self.blank_finish = blank_finish raise EOFError - def text(self, match, context, nextstate): - if not self.statemachine.ateof(): - self.blankfinish = self.statemachine.nextlineblank() + def text(self, match, context, next_state): + if not self.state_machine.at_eof(): + self.blank_finish = self.state_machine.is_next_line_blank() raise EOFError @@ -1881,123 +1932,123 @@ class Text(RSTState): patterns = {'underline': Body.patterns['line'], 'text': r''} - initialtransitions = [('underline', 'Body'), ('text', 'Body')] + initial_transitions = [('underline', 'Body'), ('text', 'Body')] - def blank(self, match, context, nextstate): + def blank(self, match, context, next_state): """End of paragraph.""" paragraph, literalnext = self.paragraph( - context, self.statemachine.abslineno() - 1) - self.statemachine.node += paragraph + context, self.state_machine.abs_line_number() - 1) + self.parent += paragraph if literalnext: - self.statemachine.node += self.literal_block() + self.parent += self.literal_block() return [], 'Body', [] def eof(self, context): if context: paragraph, literalnext = self.paragraph( - context, self.statemachine.abslineno() - 1) - self.statemachine.node += paragraph + context, self.state_machine.abs_line_number() - 1) + self.parent += paragraph if literalnext: - self.statemachine.node += self.literal_block() + self.parent += self.literal_block() return [] - def indent(self, match, context, nextstate): + def indent(self, match, context, next_state): """Definition list item.""" definitionlist = nodes.definition_list() - definitionlistitem, blankfinish = self.definition_list_item(context) + definitionlistitem, blank_finish = self.definition_list_item(context) definitionlist += definitionlistitem - self.statemachine.node += definitionlist - offset = self.statemachine.lineoffset + 1 # next line - newlineoffset, blankfinish = self.nestedlistparse( - self.statemachine.inputlines[offset:], - inputoffset=self.statemachine.abslineoffset() + 1, - node=definitionlist, initialstate='DefinitionList', - blankfinish=blankfinish, blankfinishstate='Definition') - if not blankfinish: - self.statemachine.node += self.unindent_warning('Definition list') - self.gotoline(newlineoffset) + self.parent += definitionlist + offset = self.state_machine.line_offset + 1 # next line + newline_offset, blank_finish = self.nested_list_parse( + self.state_machine.input_lines[offset:], + input_offset=self.state_machine.abs_line_offset() + 1, + node=definitionlist, initial_state='DefinitionList', + blank_finish=blank_finish, blank_finish_state='Definition') + if not blank_finish: + self.parent += self.unindent_warning('Definition list') + self.goto_line(newline_offset) return [], 'Body', [] - def underline(self, match, context, nextstate): + def underline(self, match, context, next_state): """Section title.""" - lineno = self.statemachine.abslineno() - if not self.statemachine.matchtitles: - blocktext = context[0] + '\n' + self.statemachine.line - msg = self.statemachine.memo.reporter.severe( + lineno = self.state_machine.abs_line_number() + if not self.state_machine.match_titles: + blocktext = context[0] + '\n' + self.state_machine.line + msg = self.reporter.severe( 'Unexpected section title at line %s.' % lineno, '', nodes.literal_block(blocktext, blocktext)) - self.statemachine.node += msg - return [], nextstate, [] + self.parent += msg + return [], next_state, [] title = context[0].rstrip() underline = match.string.rstrip() source = title + '\n' + underline if len(title) > len(underline): - blocktext = context[0] + '\n' + self.statemachine.line - msg = self.statemachine.memo.reporter.info( + blocktext = context[0] + '\n' + self.state_machine.line + msg = self.reporter.info( 'Title underline too short at line %s.' % lineno, '', nodes.literal_block(blocktext, blocktext)) - self.statemachine.node += msg + self.parent += msg style = underline[0] context[:] = [] self.section(title, source, style, lineno - 1) - return [], nextstate, [] + return [], next_state, [] - def text(self, match, context, nextstate): + def text(self, match, context, next_state): """Paragraph.""" - startline = self.statemachine.abslineno() - 1 + startline = self.state_machine.abs_line_number() - 1 msg = None try: - block = self.statemachine.getunindented() + block = self.state_machine.get_text_block(flush_left=1) except statemachine.UnexpectedIndentationError, instance: block, lineno = instance.args - msg = self.statemachine.memo.reporter.error( + msg = self.reporter.error( 'Unexpected indentation at line %s.' % lineno) lines = context + block paragraph, literalnext = self.paragraph(lines, startline) - self.statemachine.node += paragraph - self.statemachine.node += msg + self.parent += paragraph + self.parent += msg if literalnext: try: - self.statemachine.nextline() + self.state_machine.next_line() except IndexError: pass - self.statemachine.node += self.literal_block() - return [], nextstate, [] + self.parent += self.literal_block() + return [], next_state, [] def literal_block(self): """Return a list of nodes.""" - indented, indent, offset, blankfinish = \ - self.statemachine.getindented() + indented, indent, offset, blank_finish = \ + self.state_machine.get_indented() nodelist = [] while indented and not indented[-1].strip(): indented.pop() if indented: data = '\n'.join(indented) nodelist.append(nodes.literal_block(data, data)) - if not blankfinish: + if not blank_finish: nodelist.append(self.unindent_warning('Literal block')) else: - nodelist.append(self.statemachine.memo.reporter.warning( + nodelist.append(self.reporter.warning( 'Literal block expected at line %s; none found.' - % self.statemachine.abslineno())) + % self.state_machine.abs_line_number())) return nodelist def definition_list_item(self, termline): - indented, indent, lineoffset, blankfinish = \ - self.statemachine.getindented() + indented, indent, line_offset, blank_finish = \ + self.state_machine.get_indented() definitionlistitem = nodes.definition_list_item('\n'.join(termline + indented)) - termlist, messages = self.term(termline, - self.statemachine.abslineno() - 1) + termlist, messages = self.term( + termline, self.state_machine.abs_line_number() - 1) definitionlistitem += termlist definition = nodes.definition('', *messages) definitionlistitem += definition if termline[0][-2:] == '::': - definition += self.statemachine.memo.reporter.info( + definition += self.reporter.info( 'Blank line missing before literal block? Interpreted as a ' - 'definition list item. At line %s.' % (lineoffset + 1)) - self.nestedparse(indented, inputoffset=lineoffset, node=definition) - return definitionlistitem, blankfinish + 'definition list item. At line %s.' % (line_offset + 1)) + self.nested_parse(indented, input_offset=line_offset, node=definition) + return definitionlistitem, blank_finish def term(self, lines, lineno): """Return a definition_list's term and optional classifier.""" @@ -2028,7 +2079,7 @@ class SpecializedText(Text): """Incomplete construct.""" return [] - def invalid_input(self, match=None, context=None, nextstate=None): + def invalid_input(self, match=None, context=None, next_state=None): """Not a compound element member. Abort this state machine.""" raise EOFError @@ -2044,14 +2095,14 @@ class Definition(SpecializedText): def eof(self, context): """Not a definition.""" - self.statemachine.previousline(2) # back up so parent SM can reassess + self.state_machine.previous_line(2) # so parent SM can reassess return [] - def indent(self, match, context, nextstate): + def indent(self, match, context, next_state): """Definition list item.""" - definitionlistitem, blankfinish = self.definition_list_item(context) - self.statemachine.node += definitionlistitem - self.blankfinish = blankfinish + definitionlistitem, blank_finish = self.definition_list_item(context) + self.parent += definitionlistitem + self.blank_finish = blank_finish return [], 'DefinitionList', [] @@ -2066,67 +2117,67 @@ class Line(SpecializedText): """Transition marker at end of section or document.""" if self.eofcheck: # ignore EOFError with sections transition = nodes.transition(context[0]) - self.statemachine.node += transition - msg = self.statemachine.memo.reporter.error( + self.parent += transition + msg = self.reporter.error( 'Document or section may not end with a transition ' - '(line %s).' % (self.statemachine.abslineno() - 1)) - self.statemachine.node += msg + '(line %s).' % (self.state_machine.abs_line_number() - 1)) + self.parent += msg self.eofcheck = 1 return [] - def blank(self, match, context, nextstate): + def blank(self, match, context, next_state): """Transition marker.""" transition = nodes.transition(context[0]) - if len(self.statemachine.node) == 0: - msg = self.statemachine.memo.reporter.error( + if len(self.parent) == 0: + msg = self.reporter.error( 'Document or section may not begin with a transition ' - '(line %s).' % (self.statemachine.abslineno() - 1)) - self.statemachine.node += msg - elif isinstance(self.statemachine.node[-1], nodes.transition): - msg = self.statemachine.memo.reporter.error( + '(line %s).' % (self.state_machine.abs_line_number() - 1)) + self.parent += msg + elif isinstance(self.parent[-1], nodes.transition): + msg = self.reporter.error( 'At least one body element must separate transitions; ' 'adjacent transitions at line %s.' - % (self.statemachine.abslineno() - 1)) - self.statemachine.node += msg - self.statemachine.node += transition + % (self.state_machine.abs_line_number() - 1)) + self.parent += msg + self.parent += transition return [], 'Body', [] - def text(self, match, context, nextstate): + def text(self, match, context, next_state): """Potential over- & underlined title.""" - lineno = self.statemachine.abslineno() - 1 + lineno = self.state_machine.abs_line_number() - 1 overline = context[0] title = match.string underline = '' try: - underline = self.statemachine.nextline() + underline = self.state_machine.next_line() except IndexError: blocktext = overline + '\n' + title - msg = self.statemachine.memo.reporter.severe( + msg = self.reporter.severe( 'Incomplete section title at line %s.' % lineno, '', nodes.literal_block(blocktext, blocktext)) - self.statemachine.node += msg + self.parent += msg return [], 'Body', [] source = '%s\n%s\n%s' % (overline, title, underline) overline = overline.rstrip() underline = underline.rstrip() if not self.transitions['underline'][0].match(underline): - msg = self.statemachine.memo.reporter.severe( + msg = self.reporter.severe( 'Missing underline for overline at line %s.' % lineno, '', nodes.literal_block(source, source)) - self.statemachine.node += msg + self.parent += msg return [], 'Body', [] elif overline != underline: - msg = self.statemachine.memo.reporter.severe( - 'Title overline & underline mismatch at ' 'line %s.' % lineno, - '', nodes.literal_block(source, source)) - self.statemachine.node += msg + msg = self.reporter.severe( + 'Title overline & underline mismatch at ' 'line %s.' + % lineno, '', nodes.literal_block(source, source)) + self.parent += msg return [], 'Body', [] title = title.rstrip() if len(title) > len(overline): - msg = self.statemachine.memo.reporter.info( + msg = self.reporter.info( 'Title overline too short at line %s.'% lineno, '', nodes.literal_block(source, source)) - self.statemachine.node += msg + self.parent += msg style = (overline[0], underline[0]) self.eofcheck = 0 # @@@ not sure this is correct self.section(title.lstrip(), source, style, lineno + 1) @@ -2135,19 +2186,19 @@ class Line(SpecializedText): indent = text # indented title - def underline(self, match=None, context=None, nextstate=None): - blocktext = context[0] + '\n' + self.statemachine.line - msg = self.statemachine.memo.reporter.error( + def underline(self, match=None, context=None, next_state=None): + blocktext = context[0] + '\n' + self.state_machine.line + msg = self.reporter.error( 'Invalid section title or transition marker at line %s.' - % (self.statemachine.abslineno() - 1), '', + % (self.state_machine.abs_line_number() - 1), '', nodes.literal_block(blocktext, blocktext)) - self.statemachine.node += msg + self.parent += msg return [], 'Body', [] -stateclasses = [Body, BulletList, DefinitionList, EnumeratedList, FieldList, - OptionList, Explicit, Text, Definition, Line, SubstitutionDef, - RFC2822Body, RFC2822List] +state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList, + OptionList, Explicit, Text, Definition, Line, + SubstitutionDef, RFC2822Body, RFC2822List) """Standard set of State classes used to start `RSTStateMachine`.""" @@ -2164,9 +2215,9 @@ def escape2null(text): parts.append('\x00' + text[found+1:found+2]) start = found + 2 # skip character after escape -def unescape(text, restorebackslashes=0): +def unescape(text, restore_backslashes=0): """Return a string with nulls removed or restored to backslashes.""" - if restorebackslashes: - return text.translate(RSTState.inline.null2backslash) + if restore_backslashes: + return text.translate(Inliner.null2backslash) else: - return text.translate(RSTState.inline.identity, '\x00') + return text.translate(Inliner.identity, '\x00') -- cgit v1.2.1 From 5acb5943d2383d82e4e180a49905f6d7f7eb7337 Mon Sep 17 00:00:00 2001 From: goodger Date: Tue, 7 May 2002 04:18:13 +0000 Subject: - Fixed bug reporting (line #) when no blank line. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@107 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index d2a133198..434888417 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -141,7 +141,7 @@ class RSTStateMachine(StateMachineWS): StateMachine, and return the resulting document. """ - self.language = languages.getlanguage(document.language_code) + self.language = languages.get_language(document.language_code) self.match_titles = match_titles if inliner is None: inliner = Inliner() @@ -947,9 +947,9 @@ class Body(RSTState): input_offset=self.state_machine.abs_line_offset() + 1, node=bulletlist, initial_state='BulletList', blank_finish=blank_finish) + self.goto_line(newline_offset) if not blank_finish: self.parent += self.unindent_warning('Bullet list') - self.goto_line(newline_offset) return [], next_state, [] def list_item(self, indent): @@ -1000,9 +1000,9 @@ class Body(RSTState): node=enumlist, initial_state='EnumeratedList', blank_finish=blank_finish, extra_settings={'lastordinal': ordinal, 'format': format}) + self.goto_line(newline_offset) if not blank_finish: self.parent += self.unindent_warning('Enumerated list') - self.goto_line(newline_offset) return [], next_state, [] def parse_enumerator(self, match, expected_sequence=None): @@ -1067,9 +1067,9 @@ class Body(RSTState): input_offset=self.state_machine.abs_line_offset() + 1, node=fieldlist, initial_state='FieldList', blank_finish=blank_finish) + self.goto_line(newline_offset) if not blank_finish: self.parent += self.unindent_warning('Field list') - self.goto_line(newline_offset) return [], next_state, [] def field(self, match): @@ -1119,9 +1119,9 @@ class Body(RSTState): input_offset=self.state_machine.abs_line_offset() + 1, node=optionlist, initial_state='OptionList', blank_finish=blank_finish) + self.goto_line(newline_offset) if not blank_finish: self.parent += self.unindent_warning('Option list') - self.goto_line(newline_offset) return [], next_state, [] def option_list_item(self, match): @@ -1740,10 +1740,10 @@ class RFC2822Body(Body): input_offset=self.state_machine.abs_line_offset() + 1, node=fieldlist, initial_state='RFC2822List', blank_finish=blank_finish) + self.goto_line(newline_offset) if not blank_finish: self.parent += self.unindent_warning( 'RFC2822-style field list') - self.goto_line(newline_offset) return [], next_state, [] def rfc2822_field(self, match): @@ -1964,9 +1964,9 @@ class Text(RSTState): input_offset=self.state_machine.abs_line_offset() + 1, node=definitionlist, initial_state='DefinitionList', blank_finish=blank_finish, blank_finish_state='Definition') + self.goto_line(newline_offset) if not blank_finish: self.parent += self.unindent_warning('Definition list') - self.goto_line(newline_offset) return [], 'Body', [] def underline(self, match, context, next_state): -- cgit v1.2.1 From 9c4cc2885cf24d4bf4c5edb3d968ecb3dae2ef0d Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 30 May 2002 02:15:20 +0000 Subject: - Added ``-/:`` characters to inline markup's start string prefix, ``/`` to end string suffix. - Added support for an option values object which carries default settings and overrides (from command-line options and library use). - Cleaned up imports: no more relative package imports or comma-separated lists of top-level modules. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@153 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 434888417..3ee0e0534 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -12,6 +12,7 @@ the reStructuredText parser. It defines the following: - `RSTStateMachine`: reStructuredText parser's entry point. - `NestedStateMachine`: recursive StateMachine. - `RSTState`: reStructuredText State superclass. + - `Inliner`: For parsing inline markup. - `Body`: Generic classifier of the first line of a block. - `SpecializedBody`: Superclass for compound element members. - `BulletList`: Second and subsequent bullet_list list_items @@ -102,13 +103,15 @@ Parsing proceeds as follows: __docformat__ = 'reStructuredText' -import sys, re, string -from docutils import nodes, statemachine, utils, roman, urischemes, \ - ApplicationError, DataError +import sys +import re +import string +from docutils import nodes, statemachine, utils, roman, urischemes +from docutils import ApplicationError, DataError from docutils.statemachine import StateMachineWS, StateWS from docutils.utils import normalize_name -import directives, languages -from tableparser import TableParser, TableMarkupError +from docutils.parsers.rst import directives, languages +from docutils.parsers.rst.tableparser import TableParser, TableMarkupError class MarkupError(DataError): pass @@ -141,7 +144,7 @@ class RSTStateMachine(StateMachineWS): StateMachine, and return the resulting document. """ - self.language = languages.get_language(document.language_code) + self.language = languages.get_language(document.options.language_code) self.match_titles = match_titles if inliner is None: inliner = Inliner() @@ -431,9 +434,9 @@ class Inliner: openers = '\'"([{<' closers = '\'")]}>' - start_string_prefix = (r'(?:(?<=^)|(?<=[ \n%s]))' + start_string_prefix = (r'(?:(?<=^)|(?<=[-/: \n%s]))' % re.escape(openers)) - end_string_suffix = (r'(?:(?=$)|(?=[- \n.,:;!?%s]))' + end_string_suffix = (r'(?:(?=$)|(?=[-/:.,;!? \n%s]))' % re.escape(closers)) non_whitespace_before = r'(? Date: Thu, 27 Jun 2002 01:20:25 +0000 Subject: Updated & cleaned up (a bit). git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@208 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 3ee0e0534..543bc1108 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1361,8 +1361,9 @@ class Body(RSTState): footnote['name'] = name self.document.note_footnote(footnote) if name: - self.document.note_explicit_target(footnote, - footnote) + self.document.note_explicit_target(footnote, footnote) + else: + self.document.set_id(footnote) if indented: self.nested_parse(indented, input_offset=offset, node=footnote) return [footnote], blank_finish @@ -1444,8 +1445,7 @@ class Body(RSTState): self.document.note_external_target(target) else: self.document.note_internal_target(target) - self.document.note_explicit_target( - target, self.parent) + self.document.note_explicit_target(target, self.parent) else: # anonymous target if refuri: target['refuri'] = refuri -- cgit v1.2.1 From eca63d0c7af69af08c0acc8e54b1d8e9fc97b88d Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 28 Jun 2002 04:19:02 +0000 Subject: - Fixed bug with literal blocks. - Added support for input and output encodings and for internal Unicode support. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@221 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 543bc1108..aff499625 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -353,7 +353,7 @@ class RSTState(StateWS): if data[-2:] == '::': if len(data) == 2: return [], 1 - elif data[-3] == ' ': + elif data[-3] in ' \n': text = data[:-3].rstrip() else: text = data[:-1] @@ -445,8 +445,6 @@ class Inliner: uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9]""" urilast = r"""[_~/\]a-zA-Z0-9]""" emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9]""" - identity = string.maketrans('', '') - null2backslash = string.maketrans('\x00', '\\') patterns = Stuff( initial=re.compile( r""" @@ -970,8 +968,8 @@ class Body(RSTState): if ordinal is None: msg = self.reporter.error( ('Enumerated list start value invalid at line %s: ' - '%r (sequence %r)' % (self.state_machine.abs_line_number(), - text, sequence))) + '"%s" (sequence %r)' + % (self.state_machine.abs_line_number(), text, sequence))) self.parent += msg indented, line_offset, blank_finish = \ self.state_machine.get_known_indented(match.end()) @@ -984,8 +982,8 @@ class Body(RSTState): if ordinal != 1: msg = self.reporter.info( ('Enumerated list start value not ordinal-1 at line %s: ' - '%r (ordinal %s)' % (self.state_machine.abs_line_number(), - text, ordinal))) + '"%s" (ordinal %s)' + % (self.state_machine.abs_line_number(), text, ordinal))) self.parent += msg enumlist = nodes.enumerated_list() self.parent += enumlist @@ -1167,8 +1165,8 @@ class Body(RSTState): optlist.append(option) else: raise MarkupError('wrong numer of option tokens (=%s), ' - 'should be 1 or 2: %r' % (len(tokens), - optionstring)) + 'should be 1 or 2: "%s"' % (len(tokens), + optionstring)) return optlist def doctest(self, match, context, next_state): @@ -2221,6 +2219,6 @@ def escape2null(text): def unescape(text, restore_backslashes=0): """Return a string with nulls removed or restored to backslashes.""" if restore_backslashes: - return text.translate(Inliner.null2backslash) + return text.replace('\x00', '\\') else: - return text.translate(Inliner.identity, '\x00') + return ''.join(text.split('\x00')) -- cgit v1.2.1 From c06d0f016cf4f539db28a4ea6142855cc7bb92aa Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 29 Jun 2002 23:07:42 +0000 Subject: Applied patch from Simon Budig, simplifying regexps by with symbolic names. Also, Inliner.groups and Body.explicit.groups have been removed. The patch pointed out a bug in interpreted text parsing code, to be resolved. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@232 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 198 ++++++++++++++++++----------------------- 1 file changed, 89 insertions(+), 109 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index aff499625..ffce6171b 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -402,10 +402,6 @@ class Inliner: self.parent = parent pattern = self.patterns.initial dispatch = self.dispatch - start = self.groups.initial.start - 1 - backquote = self.groups.initial.backquote - 1 - refend = self.groups.initial.refend - 1 - fnend = self.groups.initial.fnend - 1 remaining = escape2null(text) processed = [] unprocessed = [] @@ -413,11 +409,11 @@ class Inliner: while remaining: match = pattern.search(remaining) if match: - groups = match.groups() - before, inlines, remaining, sysmessages = \ - dispatch[groups[start] or groups[backquote] - or groups[refend] - or groups[fnend]](self, match, lineno) + groupdict = match.groupdict() + method = dispatch[groupdict["start"] or groupdict["backquote"] + or groupdict["refend"] or groupdict["fnend"]] + before, inlines, remaining, sysmessages = method(self, match, + lineno) unprocessed.append(before) messages += sysmessages if inlines: @@ -434,14 +430,12 @@ class Inliner: openers = '\'"([{<' closers = '\'")]}>' - start_string_prefix = (r'(?:(?<=^)|(?<=[-/: \n%s]))' - % re.escape(openers)) - end_string_suffix = (r'(?:(?=$)|(?=[-/:.,;!? \n%s]))' - % re.escape(closers)) + start_string_prefix = (r'((?<=^)|(?<=[-/: \n%s]))' % re.escape(openers)) + end_string_suffix = (r'((?=$)|(?=[-/:.,;!? \n%s]))' % re.escape(closers)) non_whitespace_before = r'(? # start-strings only: \*\* # strong | \* # emphasis @@ -464,27 +458,27 @@ class Inliner: ) %s # no whitespace after | # *OR* - ( # whole constructs (group 3): - (%s) # reference name (4) - (__?) # end-string (5) + (?P # whole constructs: + (?P%s) # reference name + (?P__?) # end-string | \[ # footnote_reference or # citation_reference start - ( # label (group 6): + (?P # label: [0-9]+ # manually numbered | # *OR* - \#(?:%s)? # auto-numbered (w/ label?) + \#(%s)? # auto-numbered (w/ label?) | # *OR* \* # auto-symbol | # *OR* - (%s) # citation reference (group 7) + (?P%s) # citation reference ) - (\]_) # end-string (group 8) + (?P\]_) # end-string ) %s # end-string suffix | # *OR* - ((?::%s:)?) # optional role (group 9) - ( # start-string (group 10) + (?P(:%s:)?) # optional role + (?P # start-string ` # interpreted text # or phrase reference (?!`) # but not literal @@ -505,9 +499,9 @@ class Inliner: strong=re.compile(non_whitespace_escape_before + r'(\*\*)' + end_string_suffix), interpreted_or_phrase_ref=re.compile( - '%s(`(:%s:|__?)?)%s' % (non_whitespace_escape_before, - simplename, - end_string_suffix)), + '%s(`(?P:%s:|__?)?)%s' % (non_whitespace_escape_before, + simplename, + end_string_suffix)), literal=re.compile(non_whitespace_before + '(``)' + end_string_suffix), target=re.compile(non_whitespace_escape_before @@ -518,33 +512,33 @@ class Inliner: uri=re.compile( r""" %s # start-string prefix - ( - ( # absolute URI (group 2) - ( # scheme (http, ftp, mailto) - [a-zA-Z][a-zA-Z0-9.+-]* # (group 3) + (?P + (?P # absolute URI + (?P # scheme (http, ftp, mailto) + [a-zA-Z][a-zA-Z0-9.+-]* ) : - (?: - (?: # either: - (?://?)? # hierarchical URI + ( + ( # either: + (//?)? # hierarchical URI %s* # URI characters %s # final URI char ) - (?: # optional query + ( # optional query \?%s* # URI characters %s # final URI char )? - (?: # optional fragment + ( # optional fragment \#%s* # URI characters %s # final URI char )? ) ) | # *OR* - ( # email address (group 4) - %s+(?:\.%s+)* # name + (?P # email address + %s+(\.%s+)* # name @ # at - %s+(?:\.%s*)* # host + %s+(\.%s*)* # host %s # final URI char ) ) @@ -553,11 +547,6 @@ class Inliner: uric, urilast, emailc, emailc, emailc, emailc, urilast, end_string_suffix,), re.VERBOSE)) - groups = Stuff(initial=Stuff(start=2, whole=3, refname=4, refend=5, - footnotelabel=6, citationlabel=7, - fnend=8, role=9, backquote=10), - interpreted_or_phrase_ref=Stuff(suffix=2), - uri=Stuff(whole=1, absolute=2, scheme=3, email=4)) def quoted_start(self, match): """Return 1 if inline markup start-string is 'quoted', 0 if not.""" @@ -581,8 +570,8 @@ class Inliner: def inline_obj(self, match, lineno, pattern, nodeclass, restore_backslashes=0): string = match.string - matchstart = match.start(self.groups.initial.start) - matchend = match.end(self.groups.initial.start) + matchstart = match.start('start') + matchend = match.end('start') if self.quoted_start(match): return (string[:matchend], [], string[matchend:], [], '') endmatch = pattern.search(string[matchend:]) @@ -621,13 +610,11 @@ class Inliner: def interpreted_or_phrase_ref(self, match, lineno): pattern = self.patterns.interpreted_or_phrase_ref - rolegroup = self.groups.initial.role - backquote = self.groups.initial.backquote string = match.string - matchstart = match.start(backquote) - matchend = match.end(backquote) - rolestart = match.start(rolegroup) - role = match.group(rolegroup) + matchstart = match.start('backquote') + matchend = match.end('backquote') + rolestart = match.start('role') + role = match.group('role') position = '' if role: role = role[1:-1] @@ -680,13 +667,12 @@ class Inliner: def interpreted(self, before, after, endmatch, role, position, lineno, escaped, rawsource, text): - suffix = self.groups.interpreted_or_phrase_ref.suffix - if endmatch.group(suffix): + if endmatch.group('suffix'): if role: msg = self.reporter.warning('Multiple roles in interpreted ' 'text at line %s.' % lineno) return (before + rawsource, [], after, [msg]) - role = endmatch.group(suffix)[1:-1] + role = endmatch.group('suffix')[1:-1] position = 'suffix' if role: atts = {'role': role, 'position': position} @@ -743,9 +729,9 @@ class Inliner: Handles `nodes.footnote_reference` and `nodes.citation_reference` elements. """ - label = match.group(self.groups.initial.footnotelabel) + label = match.group('footnotelabel') refname = normalize_name(label) - if match.group(self.groups.initial.citationlabel): + if match.group('citationlabel'): refnode = nodes.citation_reference('[%s]_' % label, refname=refname) refnode += nodes.Text(label) @@ -767,16 +753,15 @@ class Inliner: refnode['refname'] = refname self.document.note_footnote_ref(refnode) string = match.string - matchstart = match.start(self.groups.initial.whole) - matchend = match.end(self.groups.initial.whole) + matchstart = match.start('whole') + matchend = match.end('whole') return (string[:matchstart], [refnode], string[matchend:], []) def reference(self, match, lineno, anonymous=None): - referencename = match.group(self.groups.initial.refname) + referencename = match.group('refname') refname = normalize_name(referencename) - referencenode = nodes.reference( - referencename + match.group(self.groups.initial.refend), - referencename) + referencenode = nodes.reference(referencename + match.group('refend'), + referencename) if anonymous: referencenode['anonymous'] = 1 self.document.note_anonymous_ref(referencenode) @@ -784,22 +769,21 @@ class Inliner: referencenode['refname'] = refname self.document.note_refname(referencenode) string = match.string - matchstart = match.start(self.groups.initial.whole) - matchend = match.end(self.groups.initial.whole) + matchstart = match.start('whole') + matchend = match.end('whole') return (string[:matchstart], [referencenode], string[matchend:], []) def anonymous_reference(self, match, lineno): return self.reference(match, lineno, anonymous=1) def standalone_uri(self, match, lineno): - scheme = self.groups.uri.scheme - if not match.group(scheme) or urischemes.schemes.has_key( - match.group(scheme).lower()): - if match.group(self.groups.uri.email): + if not match.group('scheme') or urischemes.schemes.has_key( + match.group('scheme').lower()): + if match.group('email'): addscheme = 'mailto:' else: addscheme = '' - text = match.group(self.groups.uri.whole) + text = match.group('whole') unescaped = unescape(text, 0) return [nodes.reference(unescape(text, 1), unescaped, refuri=addscheme + unescaped)] @@ -1289,54 +1273,52 @@ class Body(RSTState): explicit.patterns = Stuff( target=re.compile(r""" - (?: - _ # anonymous target - | # *OR* - (`?) # optional open quote - (?![ `]) # first char. not space or backquote - ( # reference name + ( + _ # anonymous target + | # *OR* + (?P`?) # optional open quote + (?![ `]) # first char. not space or + # backquote + (?P # reference name .+? ) - %s # not whitespace or escape - \1 # close quote if open quote used + %s # not whitespace or escape + (?P=quote) # close quote if open quote used ) - %s # not whitespace or escape - : # end of reference name - (?:[ ]+|$) # followed by whitespace + %s # not whitespace or escape + : # end of reference name + ([ ]+|$) # followed by whitespace """ % (Inliner.non_whitespace_escape_before, Inliner.non_whitespace_escape_before), re.VERBOSE), reference=re.compile(r""" - (?: - (%s)_ # simple reference name - | # *OR* - ` # open backquote - (?![ ]) # not space - (.+?) # hyperlink phrase - %s # not whitespace or escape - `_ # close backquote, reference mark + ( + (?P%s)_ # simple reference name + | # *OR* + ` # open backquote + (?![ ]) # not space + (?P.+?) # hyperlink phrase + %s # not whitespace or escape + `_ # close backquote, + # reference mark ) - $ # end of string + $ # end of string """ % (Inliner.simplename, Inliner.non_whitespace_escape_before,), re.VERBOSE), substitution=re.compile(r""" - (?: - (?![ ]) # first char. not space - (.+?) # substitution text - %s # not whitespace or escape - \| # close delimiter + ( + (?![ ]) # first char. not space + (?P.+?) # substitution text + %s # not whitespace or escape + \| # close delimiter ) - (?:[ ]+|$) # followed by whitespace + ([ ]+|$) # followed by whitespace """ % Inliner.non_whitespace_escape_before, re.VERBOSE),) - explicit.groups = Stuff( - target=Stuff(quote=1, name=2), - reference=Stuff(simple=1, phrase=2), - substitution=Stuff(name=1)) def footnote(self, match): indented, indent, offset, blank_finish = \ @@ -1382,7 +1364,6 @@ class Body(RSTState): def hyperlink_target(self, match): pattern = self.explicit.patterns.target - namegroup = self.explicit.groups.target.name lineno = self.state_machine.abs_line_number() block, indent, offset, blank_finish = \ self.state_machine.get_first_known_indented( @@ -1408,7 +1389,7 @@ class Body(RSTState): refname = self.is_reference(reference) if refname: target = nodes.target(blocktext, '', refname=refname) - self.add_target(targetmatch.group(namegroup), '', target) + self.add_target(targetmatch.group('name'), '', target) self.document.note_indirect_target(target) return [target], blank_finish nodelist = [] @@ -1423,7 +1404,7 @@ class Body(RSTState): else: unescaped = unescape(reference) target = nodes.target(blocktext, '') - self.add_target(targetmatch.group(namegroup), unescaped, target) + self.add_target(targetmatch.group('name'), unescaped, target) nodelist.append(target) return nodelist, blank_finish @@ -1431,8 +1412,7 @@ class Body(RSTState): match = self.explicit.patterns.reference.match(normalize_name(reference)) if not match: return None - return unescape(match.group(self.explicit.groups.reference.simple) - or match.group(self.explicit.groups.reference.phrase)) + return unescape(match.group('simple') or match.group('phrase')) def add_target(self, targetname, refuri, target): if targetname: @@ -1475,7 +1455,7 @@ class Body(RSTState): if not block[0]: del block[0] offset += 1 - subname = subdefmatch.group(self.explicit.groups.substitution.name) + subname = subdefmatch.group('name') name = normalize_name(subname) substitutionnode = nodes.substitution_definition( blocktext, name=name, alt=subname) @@ -1592,13 +1572,13 @@ class Body(RSTState): \* # auto-symbol footnote ) \] - (?:[ ]+|$) # whitespace or end of line + ([ ]+|$) # whitespace or end of line """ % Inliner.simplename, re.VERBOSE)), (citation, re.compile(r""" \.\.[ ]+ # explicit markup start \[(%s)\] # citation label - (?:[ ]+|$) # whitespace or end of line + ([ ]+|$) # whitespace or end of line """ % Inliner.simplename, re.VERBOSE)), (hyperlink_target, re.compile(r""" @@ -1617,7 +1597,7 @@ class Body(RSTState): \.\.[ ]+ # explicit markup start (%s) # directive name :: # directive delimiter - (?:[ ]+|$) # whitespace or end of line + ([ ]+|$) # whitespace or end of line """ % Inliner.simplename, re.VERBOSE))] def explicit_markup(self, match, context, next_state): -- cgit v1.2.1 From 606baf1b20ec93778e127d90c11d60321ea2ae4e Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 4 Jul 2002 01:21:21 +0000 Subject: - Converted regexps from ``'%s' % var`` to ``'%(var)s' % locals()``. - Fixed a bug in ``Inliner.interpreted_or_phrase_ref()``. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@240 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 192 ++++++++++++++++++++--------------------- 1 file changed, 93 insertions(+), 99 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index ffce6171b..f3be937d6 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -400,18 +400,18 @@ class Inliner: self.reporter = memo.reporter self.document = memo.document self.parent = parent - pattern = self.patterns.initial + pattern_search = self.patterns.initial.search dispatch = self.dispatch remaining = escape2null(text) processed = [] unprocessed = [] messages = [] while remaining: - match = pattern.search(remaining) + match = pattern_search(remaining) if match: - groupdict = match.groupdict() - method = dispatch[groupdict["start"] or groupdict["backquote"] - or groupdict["refend"] or groupdict["fnend"]] + groups = match.groupdict() + method = dispatch[groups['start'] or groups['backquote'] + or groups['refend'] or groups['fnend']] before, inlines, remaining, sysmessages = method(self, match, lineno) unprocessed.append(before) @@ -437,12 +437,12 @@ class Inliner: non_whitespace_after = r'(?![ \n])' simplename = r'[a-zA-Z0-9]([-_.a-zA-Z0-9]*[a-zA-Z0-9])?' uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9]""" - urilast = r"""[_~/\]a-zA-Z0-9]""" + urilast = r"""[_~/\]a-zA-Z0-9]""" # no punctuation emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9]""" patterns = Stuff( initial=re.compile( r""" - %s # start-string prefix + %(start_string_prefix)s ( (?P # start-strings only: \*\* # strong @@ -456,52 +456,53 @@ class Inliner: | \| # substitution_reference start ) - %s # no whitespace after + %(non_whitespace_after)s | # *OR* (?P # whole constructs: - (?P%s) # reference name - (?P__?) # end-string + (?P%(simplename)s) # reference name + (?P__?) # end-string | \[ # footnote_reference or # citation_reference start (?P # label: [0-9]+ # manually numbered | # *OR* - \#(%s)? # auto-numbered (w/ label?) + \#(%(simplename)s)? # auto-numbered (w/ label?) | # *OR* \* # auto-symbol | # *OR* - (?P%s) # citation reference + (?P + %(simplename)s) # citation reference ) (?P\]_) # end-string ) - %s # end-string suffix + %(end_string_suffix)s | # *OR* - (?P(:%s:)?) # optional role + (?P(:%(simplename)s:)?) # optional role (?P # start-string ` # interpreted text # or phrase reference (?!`) # but not literal ) - %s # no whitespace after + %(non_whitespace_after)s # no whitespace after ) - """ % (start_string_prefix, - non_whitespace_after, - simplename, - simplename, - simplename, - end_string_suffix, - simplename, - non_whitespace_after,), - re.VERBOSE), + """ % locals(), re.VERBOSE), emphasis=re.compile(non_whitespace_escape_before + r'(\*)' + end_string_suffix), strong=re.compile(non_whitespace_escape_before + r'(\*\*)' + end_string_suffix), interpreted_or_phrase_ref=re.compile( - '%s(`(?P:%s:|__?)?)%s' % (non_whitespace_escape_before, - simplename, - end_string_suffix)), + r""" + %(non_whitespace_escape_before)s + ( + ` + (?P + (?P:%(simplename)s:)? + (?P__?)? + ) + ) + %(end_string_suffix)s + """ % locals(), re.VERBOSE), literal=re.compile(non_whitespace_before + '(``)' + end_string_suffix), target=re.compile(non_whitespace_escape_before @@ -511,7 +512,7 @@ class Inliner: + end_string_suffix), uri=re.compile( r""" - %s # start-string prefix + %(start_string_prefix)s (?P (?P # absolute URI (?P # scheme (http, ftp, mailto) @@ -521,32 +522,29 @@ class Inliner: ( ( # either: (//?)? # hierarchical URI - %s* # URI characters - %s # final URI char + %(uric)s* # URI characters + %(urilast)s # final URI char ) ( # optional query - \?%s* # URI characters - %s # final URI char + \?%(uric)s* + %(urilast)s )? ( # optional fragment - \#%s* # URI characters - %s # final URI char + \#%(uric)s* + %(urilast)s )? ) ) | # *OR* (?P # email address - %s+(\.%s+)* # name - @ # at - %s+(\.%s*)* # host - %s # final URI char + %(emailc)s+(\.%(emailc)s+)* # name + @ # at + %(emailc)s+(\.%(emailc)s*)* # host + %(urilast)s # final URI char ) ) - %s # end-string suffix - """ % (start_string_prefix, uric, urilast, uric, urilast, - uric, urilast, emailc, emailc, emailc, emailc, urilast, - end_string_suffix,), - re.VERBOSE)) + %(end_string_suffix)s + """ % locals(), re.VERBOSE)) def quoted_start(self, match): """Return 1 if inline markup start-string is 'quoted', 0 if not.""" @@ -567,22 +565,21 @@ class Inliner: pass return 0 - def inline_obj(self, match, lineno, pattern, nodeclass, + def inline_obj(self, match, lineno, end_pattern, nodeclass, restore_backslashes=0): string = match.string matchstart = match.start('start') matchend = match.end('start') if self.quoted_start(match): return (string[:matchend], [], string[matchend:], [], '') - endmatch = pattern.search(string[matchend:]) + endmatch = end_pattern.search(string[matchend:]) if endmatch and endmatch.start(1): # 1 or more chars text = unescape(endmatch.string[:endmatch.start(1)], restore_backslashes) - rawsource = unescape(string[matchstart:matchend+endmatch.end(1)], - 1) + textend = matchend + endmatch.end(1) + rawsource = unescape(string[matchstart:textend], 1) return (string[:matchstart], [nodeclass(rawsource, text)], - string[matchend:][endmatch.end(1):], [], - endmatch.group(1)) + string[textend:], [], endmatch.group(1)) msg = self.reporter.warning( 'Inline %s start-string without end-string ' 'at line %s.' % (nodeclass.__name__, lineno)) @@ -609,7 +606,7 @@ class Inliner: return before, inlines, remaining, sysmessages def interpreted_or_phrase_ref(self, match, lineno): - pattern = self.patterns.interpreted_or_phrase_ref + end_pattern = self.patterns.interpreted_or_phrase_ref string = match.string matchstart = match.start('backquote') matchend = match.end('backquote') @@ -621,40 +618,44 @@ class Inliner: position = 'prefix' elif self.quoted_start(match): return (string[:matchend], [], string[matchend:], []) - endmatch = pattern.search(string[matchend:]) + endmatch = end_pattern.search(string[matchend:]) if endmatch and endmatch.start(1): # 1 or more chars + textend = matchend + endmatch.end() + if endmatch.group('role'): + if role: + msg = self.reporter.warning( + 'Multiple roles in interpreted text at line %s (both ' + 'prefix and suffix present; only one allowed).' + % lineno) + text = unescape(string[rolestart:textend], 1) + prb = self.problematic(text, text, msg) + return string[:rolestart], [prb], string[textend:], [msg] + role = endmatch.group('suffix')[1:-1] + position = 'suffix' escaped = endmatch.string[:endmatch.start(1)] text = unescape(escaped, 0) - rawsource = unescape( - string[match.start():matchend+endmatch.end()], 1) + rawsource = unescape(string[matchstart:textend], 1) if rawsource[-1:] == '_': if role: msg = self.reporter.warning( - 'Mismatch: inline interpreted text start-string and' - ' role with phrase-reference end-string at line %s.' - % lineno) - text = unescape(string[matchstart:matchend], 1) - rawsource = unescape(string[matchstart:matchend], 1) - prb = self.problematic(text, rawsource, msg) - return (string[:matchstart], [prb], string[matchend:], - [msg]) - return self.phrase_ref( - string[:matchstart], string[matchend:][endmatch.end():], - text, rawsource) + 'Mismatch: both interpreted text role %s and ' + 'reference suffix at line %s.' % (position, lineno)) + text = unescape(string[rolestart:textend], 1) + prb = self.problematic(text, text, msg) + return string[:rolestart], [prb], string[textend:], [msg] + return self.phrase_ref(string[:matchstart], string[textend:], + rawsource, text) else: - return self.interpreted( - string[:rolestart], string[matchend:][endmatch.end():], - endmatch, role, position, lineno, - escaped, rawsource, text) + return self.interpreted(string[:rolestart], string[textend:], + rawsource, text, role, position) msg = self.reporter.warning( 'Inline interpreted text or phrase reference start-string ' 'without end-string at line %s.' % lineno) text = unescape(string[matchstart:matchend], 1) - rawsource = unescape(string[matchstart:matchend], 1) - prb = self.problematic(text, rawsource, msg) + prb = self.problematic(text, text, msg) return string[:matchstart], [prb], string[matchend:], [msg] - def phrase_ref(self, before, after, text, rawsource): + def phrase_ref(self, before, after, rawsource, text): refname = normalize_name(text) reference = nodes.reference(rawsource, text) if rawsource[-2:] == '__': @@ -665,15 +666,7 @@ class Inliner: self.document.note_refname(reference) return before, [reference], after, [] - def interpreted(self, before, after, endmatch, role, position, lineno, - escaped, rawsource, text): - if endmatch.group('suffix'): - if role: - msg = self.reporter.warning('Multiple roles in interpreted ' - 'text at line %s.' % lineno) - return (before + rawsource, [], after, [msg]) - role = endmatch.group('suffix')[1:-1] - position = 'suffix' + def interpreted(self, before, after, rawsource, text, role, position): if role: atts = {'role': role, 'position': position} else: @@ -797,7 +790,8 @@ class Inliner: """ Check each of the patterns in `self.implicit` for a match, and dispatch to the stored method for the pattern. Recursively check the - text before and after the match. + text before and after the match. Return a list of `nodes.Text` and + inline element nodes. """ if not text: return [] @@ -805,13 +799,20 @@ class Inliner: match = pattern.search(text) if match: try: - return (self.implicit_inline(text[:match.start()], lineno) + return (self.text(text[:match.start()]) + dispatch(self, match, lineno) + self.implicit_inline(text[match.end():], lineno)) except MarkupMismatch: pass return [nodes.Text(unescape(text))] + def text(self, text): + """Return a list containing one `nodes.Text` node or nothing.""" + if not text: + return [] + return [nodes.Text(unescape(text))] + + dispatch = {'*': emphasis, '**': strong, '`': interpreted_or_phrase_ref, @@ -1282,43 +1283,35 @@ class Body(RSTState): (?P # reference name .+? ) - %s # not whitespace or escape + %(non_whitespace_escape_before)s (?P=quote) # close quote if open quote used ) - %s # not whitespace or escape + %(non_whitespace_escape_before)s : # end of reference name ([ ]+|$) # followed by whitespace - """ - % (Inliner.non_whitespace_escape_before, - Inliner.non_whitespace_escape_before), - re.VERBOSE), + """ % vars(Inliner), re.VERBOSE), reference=re.compile(r""" ( - (?P%s)_ # simple reference name + (?P%(simplename)s)_ | # *OR* ` # open backquote (?![ ]) # not space (?P.+?) # hyperlink phrase - %s # not whitespace or escape + %(non_whitespace_escape_before)s `_ # close backquote, # reference mark ) $ # end of string - """ % - (Inliner.simplename, - Inliner.non_whitespace_escape_before,), - re.VERBOSE), + """ % vars(Inliner), re.VERBOSE), substitution=re.compile(r""" ( (?![ ]) # first char. not space (?P.+?) # substitution text - %s # not whitespace or escape + %(non_whitespace_escape_before)s \| # close delimiter ) ([ ]+|$) # followed by whitespace - """ % - Inliner.non_whitespace_escape_before, - re.VERBOSE),) + """ % vars(Inliner), re.VERBOSE),) def footnote(self, match): indented, indent, offset, blank_finish = \ @@ -1409,7 +1402,8 @@ class Body(RSTState): return nodelist, blank_finish def is_reference(self, reference): - match = self.explicit.patterns.reference.match(normalize_name(reference)) + match = self.explicit.patterns.reference.match( + normalize_name(reference)) if not match: return None return unescape(match.group('simple') or match.group('phrase')) -- cgit v1.2.1 From e521880b4a16e6500b171d673e1c6ef50d92e7e2 Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 11 Jul 2002 01:49:35 +0000 Subject: - Allowed non-ASCII in "simple names" (directive names, field names, references, etc.). git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@262 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index f3be937d6..12174c5de 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -435,7 +435,7 @@ class Inliner: non_whitespace_before = r'(? Date: Sat, 13 Jul 2002 03:05:45 +0000 Subject: - Converted ``Inliner.patterns.initial`` to be dynamically built from parts with ``build_regexp()`` function. - Changed ``Inliner.inline_target`` to ``.inline_internal_target``. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@275 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 103 +++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 50 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 12174c5de..6c26b779b 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -105,7 +105,7 @@ __docformat__ = 'reStructuredText' import sys import re -import string +from types import TupleType from docutils import nodes, statemachine, utils, roman, urischemes from docutils import ApplicationError, DataError from docutils.statemachine import StateMachineWS, StateWS @@ -377,6 +377,29 @@ class RSTState(StateWS): % (node_name, self.state_machine.abs_line_number() + 1))) +def build_regexp(definition, compile=1): + """ + Build, compile and return a regular expression based on `definition`. + + :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts), + where "parts" is a list of regular expressions and/or regular + expression definitions to be joined into an or-group. + """ + name, prefix, suffix, parts = definition + part_strings = [] + for part in parts: + if type(part) is TupleType: + part_strings.append(build_regexp(part, None)) + else: + part_strings.append(part) + or_group = '|'.join(part_strings) + regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals() + if compile: + return re.compile(regexp, re.UNICODE) + else: + return regexp + + class Inliner: """ @@ -439,54 +462,34 @@ class Inliner: uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9]""" urilast = r"""[_~/\]a-zA-Z0-9]""" # no punctuation emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9]""" + parts = ('initial_inline', start_string_prefix, '', + [('start', '', non_whitespace_after, # simple start-strings + [r'\*\*', # strong + r'\*(?!\*)', # emphasis but not strong + r'``', # literal + r'_`', # inline internal target + r'\|'] # substitution reference + ), + ('whole', '', end_string_suffix, # whole constructs + [# reference name & end-string + r'(?P%s)(?P__?)' % simplename, + ('footnotelabel', r'\[', r'(?P\]_)', + [r'[0-9]+', # manually numbered + r'\#(%s)?' % simplename, # auto-numbered (w/ label?) + r'\*', # auto-symbol + r'(?P%s)' % simplename] # citation reference + ) + ] + ), + ('backquote', # interpreted text or phrase reference + '(?P(:%s:)?)' % simplename, # optional role + non_whitespace_after, + ['`(?!`)'] # but not literal + ) + ] + ) patterns = Stuff( - initial=re.compile( - r""" - %(start_string_prefix)s - ( - (?P # start-strings only: - \*\* # strong - | - \* # emphasis - (?!\*) # but not strong - | - `` # literal - | - _` # inline hyperlink target - | - \| # substitution_reference start - ) - %(non_whitespace_after)s - | # *OR* - (?P # whole constructs: - (?P%(simplename)s) # reference name - (?P__?) # end-string - | - \[ # footnote_reference or - # citation_reference start - (?P # label: - [0-9]+ # manually numbered - | # *OR* - \#(%(simplename)s)? # auto-numbered (w/ label?) - | # *OR* - \* # auto-symbol - | # *OR* - (?P - %(simplename)s) # citation reference - ) - (?P\]_) # end-string - ) - %(end_string_suffix)s - | # *OR* - (?P(:%(simplename)s:)?) # optional role - (?P # start-string - ` # interpreted text - # or phrase reference - (?!`) # but not literal - ) - %(non_whitespace_after)s # no whitespace after - ) - """ % locals(), re.VERBOSE | re.UNICODE), + initial=build_regexp(parts), emphasis=re.compile(non_whitespace_escape_before + r'(\*)' + end_string_suffix), strong=re.compile(non_whitespace_escape_before @@ -679,7 +682,7 @@ class Inliner: restore_backslashes=1) return before, inlines, remaining, sysmessages - def inline_target(self, match, lineno): + def inline_internal_target(self, match, lineno): before, inlines, remaining, sysmessages, endstring = self.inline_obj( match, lineno, self.patterns.target, nodes.target) if inlines and isinstance(inlines[0], nodes.target): @@ -817,7 +820,7 @@ class Inliner: '**': strong, '`': interpreted_or_phrase_ref, '``': literal, - '_`': inline_target, + '_`': inline_internal_target, ']_': footnote_reference, '|': substitution_reference, '_': reference, -- cgit v1.2.1 From c21a148798856797bbf693ff1f5c8ba9c604cce1 Mon Sep 17 00:00:00 2001 From: goodger Date: Sun, 14 Jul 2002 02:45:41 +0000 Subject: Undid a mistake. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@278 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 6c26b779b..9b4114b80 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -802,18 +802,14 @@ class Inliner: match = pattern.search(text) if match: try: - return (self.text(text[:match.start()]) + # Must recurse on strings before *and* after the match; + # there may be multiple patterns. + return (self.implicit_inline(text[:match.start()], lineno) + dispatch(self, match, lineno) + self.implicit_inline(text[match.end():], lineno)) except MarkupMismatch: pass return [nodes.Text(unescape(text))] - - def text(self, text): - """Return a list containing one `nodes.Text` node or nothing.""" - if not text: - return [] - return [nodes.Text(unescape(text))] dispatch = {'*': emphasis, -- cgit v1.2.1 From 6b456ac082f235f368eaa37e4b4905222a76028d Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 18 Jul 2002 00:52:40 +0000 Subject: - Updated docstrings. - Changed "table" to "grid_table"; added "simple_table" support. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@300 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 161 ++++++++++++++++++++++++++++++----------- 1 file changed, 119 insertions(+), 42 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 9b4114b80..04a9ea605 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -44,11 +44,11 @@ the reStructuredText parser. It defines the following: Parser Overview =============== -The reStructuredText parser is implemented as a state machine, examining its -input one line at a time. To understand how the parser works, please first -become familiar with the `docutils.statemachine` module. In the description -below, references are made to classes defined in this module; please see the -individual classes for details. +The reStructuredText parser is implemented as a recursive state machine, +examining its input one line at a time. To understand how the parser works, +please first become familiar with the `docutils.statemachine` module. In the +description below, references are made to classes defined in this module; +please see the individual classes for details. Parsing proceeds as follows: @@ -60,26 +60,27 @@ Parsing proceeds as follows: 2. The method associated with the matched transition pattern is called. A. Some transition methods are self-contained, appending elements to the - document tree ('doctest' parses a doctest block). The parser's current - line index is advanced to the end of the element, and parsing continues - with step 1. + document tree (`Body.doctest` parses a doctest block). The parser's + current line index is advanced to the end of the element, and parsing + continues with step 1. - B. Others trigger the creation of a nested state machine, whose job is to - parse a compound construct ('indent' does a block quote, 'bullet' does a - bullet list, 'overline' does a section [first checking for a valid - section header]). + B. Other transition methods trigger the creation of a nested state machine, + whose job is to parse a compound construct ('indent' does a block quote, + 'bullet' does a bullet list, 'overline' does a section [first checking + for a valid section header], etc.). - - In the case of lists and explicit markup, a new state machine is - created and run to parse the first item. + - In the case of lists and explicit markup, a one-off state machine is + created and run to parse contents of the first item. - A new state machine is created and its initial state is set to the appropriate specialized state (`BulletList` in the case of the - 'bullet' transition). This state machine is run to parse the compound - element (or series of explicit markup elements), and returns as soon - as a non-member element is encountered. For example, the `BulletList` - state machine aborts as soon as it encounters an element which is not - a list item of that bullet list. The optional omission of - inter-element blank lines is handled by the nested state machine. + 'bullet' transition; see `SpecializedBody` for more detail). This + state machine is run to parse the compound element (or series of + explicit markup elements), and returns as soon as a non-member element + is encountered. For example, the `BulletList` state machine ends as + soon as it encounters an element which is not a list item of that + bullet list. The optional omission of inter-element blank lines is + enabled by this nested state machine. - The current line index is advanced to the end of the elements parsed, and parsing continues with step 1. @@ -110,8 +111,7 @@ from docutils import nodes, statemachine, utils, roman, urischemes from docutils import ApplicationError, DataError from docutils.statemachine import StateMachineWS, StateWS from docutils.utils import normalize_name -from docutils.parsers.rst import directives, languages -from docutils.parsers.rst.tableparser import TableParser, TableMarkupError +from docutils.parsers.rst import directives, languages, tableparser class MarkupError(DataError): pass @@ -858,10 +858,14 @@ class Body(RSTState): enum.sequenceregexps[sequence] = re.compile( enum.sequencepats[sequence] + '$') - table_top_pat = re.compile(r'\+-[-+]+-\+ *$') - """Matches the top (& bottom) of a table).""" + grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$') + """Matches the top (& bottom) of a full table).""" - tableparser = TableParser() + simple_table_top_pat = re.compile('=+( +=+)+ *$') + """Matches the top of a simple table.""" + + simple_table_border_pat = re.compile('=+[ =]*$') + """Matches the bottom & header bottom of a simple table.""" pats = {} """Fragments of patterns used by transitions.""" @@ -888,7 +892,8 @@ class Body(RSTState): 'field_marker': r':[^: ]([^:]*[^: ])?:( +|$)', 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats, 'doctest': r'>>>( +|$)', - 'table_top': table_top_pat, + 'grid_table_top': grid_table_top_pat, + 'simple_table_top': simple_table_top_pat, 'explicit_markup': r'\.\.( +|$)', 'anonymous': r'__( +|$)', 'line': r'(%(nonalphanum7bit)s)\1\1\1+ *$' % pats, @@ -899,7 +904,8 @@ class Body(RSTState): 'field_marker', 'option_marker', 'doctest', - 'table_top', + 'grid_table_top', + 'simple_table_top', 'explicit_markup', 'anonymous', 'line', @@ -1159,9 +1165,22 @@ class Body(RSTState): self.parent += nodes.doctest_block(data, data) return [], next_state, [] - def table_top(self, match, context, next_state): - """Top border of a table.""" - nodelist, blank_finish = self.table() + def grid_table_top(self, match, context, next_state): + """Top border of a full table.""" + return self.table_top(match, context, next_state, + self.isolate_grid_table, + tableparser.GridTableParser) + + def simple_table_top(self, match, context, next_state): + """Top border of a simple table.""" + return self.table_top(match, context, next_state, + self.isolate_simple_table, + tableparser.SimpleTableParser) + + def table_top(self, match, context, next_state, + isolate_function, parser_class): + """Top border of a generic table.""" + nodelist, blank_finish = self.table(isolate_function, parser_class) self.parent += nodelist if not blank_finish: msg = self.reporter.warning( @@ -1170,23 +1189,24 @@ class Body(RSTState): self.parent += msg return [], next_state, [] - def table(self): + def table(self, isolate_function, parser_class): """Parse a table.""" - block, messages, blank_finish = self.isolate_table() + block, messages, blank_finish = isolate_function() if block: try: - tabledata = self.tableparser.parse(block) + parser = parser_class() + tabledata = parser.parse(block) tableline = (self.state_machine.abs_line_number() - len(block) + 1) table = self.build_table(tabledata, tableline) nodelist = [table] + messages - except TableMarkupError, detail: + except tableparser.TableMarkupError, detail: nodelist = self.malformed_table(block, str(detail)) + messages else: nodelist = messages return nodelist, blank_finish - def isolate_table(self): + def isolate_grid_table(self): messages = [] blank_finish = 1 try: @@ -1204,11 +1224,11 @@ class Body(RSTState): self.state_machine.previous_line(len(block) - i) del block[i:] break - if not self.table_top_pat.match(block[-1]): # find bottom + if not self.grid_table_top_pat.match(block[-1]): # find bottom blank_finish = 0 # from second-last to third line of table: for i in range(len(block) - 2, 1, -1): - if self.table_top_pat.match(block[i]): + if self.grid_table_top_pat.match(block[i]): self.state_machine.previous_line(len(block) - i + 1) del block[i+1:] break @@ -1221,6 +1241,47 @@ class Body(RSTState): return [], messages, blank_finish return block, messages, blank_finish + def isolate_simple_table(self): + start = self.state_machine.line_offset + lines = self.state_machine.input_lines + limit = len(lines) - 1 + toplen = len(lines[start].strip()) + pattern_match = self.simple_table_border_pat.match + found = 0 + found_at = None + i = start + 1 + while i <= limit: + line = lines[i] + match = pattern_match(line) + if match: + if len(line.strip()) != toplen: + self.state_machine.next_line(i - start) + messages = self.malformed_table( + lines[start:i+1], 'Bottom/header table border does ' + 'not match top border.') + return [], messages, i == limit or not lines[i+1].strip() + found += 1 + found_at = i + if found == 2 or i == limit or not lines[i+1].strip(): + end = i + break + i += 1 + else: # reached end of input_lines + if found: + extra = ' or no blank line after table bottom' + self.state_machine.next_line(found_at - start) + block = lines[start:found_at+1] + else: + extra = '' + self.state_machine.next_line(i - start - 1) + block = lines[start:] + messages = self.malformed_table( + block, 'No bottom table border found%s.' % extra) + return [], messages, not extra + self.state_machine.next_line(end - start) + block = lines[start:end+1] + return block, [], end == limit or not lines[end+1].strip() + def malformed_table(self, block, detail=''): data = '\n'.join(block) message = 'Malformed table at line %s; formatting as a ' \ @@ -1738,10 +1799,25 @@ class RFC2822Body(Body): class SpecializedBody(Body): """ - Superclass for second and subsequent compound element members. - - All transition methods are disabled. Override individual methods in - subclasses to re-enable. + Superclass for second and subsequent compound element members. Compound + elements are lists and list-like constructs. + + All transition methods are disabled (redefined as `invalid_input`). + Override individual methods in subclasses to re-enable. + + For example, once an initial bullet list item, say, is recognized, the + `BulletList` subclass takes over, with a "bullet_list" node as its + container. Upon encountering the initial bullet list item, `Body.bullet` + calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which + starts up a nested parsing session with `BulletList` as the initial state. + Only the ``bullet`` transition method is enabled in `BulletList`; as long + as only bullet list items are encountered, they are parsed and inserted + into the container. The first construct which is *not* a bullet list item + triggers the `invalid_input` method, which ends the nested parse and + closes the container. `BulletList` needs to recognize input that is + invalid in the context of a bullet list, which means everything *other + than* bullet list items, so it inherits the transition list created in + `Body`. """ def invalid_input(self, match=None, context=None, next_state=None): @@ -1755,7 +1831,8 @@ class SpecializedBody(Body): field_marker = invalid_input option_marker = invalid_input doctest = invalid_input - table_top = invalid_input + grid_table_top = invalid_input + simple_table_top = invalid_input explicit_markup = invalid_input anonymous = invalid_input line = invalid_input -- cgit v1.2.1 From 54a26a7452be9b2e0f5f0404d383e244fec5376a Mon Sep 17 00:00:00 2001 From: goodger Date: Wed, 24 Jul 2002 01:39:23 +0000 Subject: Changed format of directve attribute error.. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@357 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 04a9ea605..cfb934e9d 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1598,7 +1598,7 @@ class Body(RSTState): except KeyError, detail: return 0, ('unknown attribute: "%s"' % detail), blank_finish except (ValueError, TypeError), detail: - return 0, ('invalid attribute value:\n%s' % detail), blank_finish + return 0, ('invalid attribute value: %s' % detail), blank_finish except utils.ExtensionAttributeError, detail: return 0, ('invalid attribute data: %s' % detail), blank_finish return 1, attributes, blank_finish -- cgit v1.2.1 From 4b4b1b9a3615ba979d6c5b933e34a14d30dcf3c2 Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 8 Aug 2002 00:25:37 +0000 Subject: Changed "title under/overline too short" system messages from INFO to WARNING, and fixed its insertion location. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@480 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index cfb934e9d..e59102801 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -270,10 +270,10 @@ class RSTState(StateWS): state_machine.unlink() return state_machine.abs_line_offset(), blank_finish - def section(self, title, source, style, lineno): + def section(self, title, source, style, lineno, messages): """Check for a valid subsection and create one if it checks out.""" if self.check_subsection(source, style, lineno): - self.new_subsection(title, lineno) + self.new_subsection(title, lineno, messages) def check_subsection(self, source, style, lineno): """ @@ -320,19 +320,20 @@ class RSTState(StateWS): % lineno, '', literalblock) return error - def new_subsection(self, title, lineno): + def new_subsection(self, title, lineno, messages): """Append new subsection to document tree. On return, check level.""" memo = self.memo mylevel = memo.section_level memo.section_level += 1 sectionnode = nodes.section() self.parent += sectionnode - textnodes, messages = self.inline_text(title, lineno) + textnodes, title_messages = self.inline_text(title, lineno) titlenode = nodes.title(title, '', *textnodes) name = normalize_name(titlenode.astext()) sectionnode['name'] = name sectionnode += titlenode sectionnode += messages + sectionnode += title_messages self.document.note_implicit_target(sectionnode, sectionnode) offset = self.state_machine.line_offset + 1 absoffset = self.state_machine.abs_line_offset() + 1 @@ -2035,15 +2036,16 @@ class Text(RSTState): title = context[0].rstrip() underline = match.string.rstrip() source = title + '\n' + underline + messages = [] if len(title) > len(underline): blocktext = context[0] + '\n' + self.state_machine.line - msg = self.reporter.info( + msg = self.reporter.warning( 'Title underline too short at line %s.' % lineno, '', nodes.literal_block(blocktext, blocktext)) - self.parent += msg + messages.append(msg) style = underline[0] context[:] = [] - self.section(title, source, style, lineno - 1) + self.section(title, source, style, lineno - 1, messages) return [], next_state, [] def text(self, match, context, next_state): @@ -2226,14 +2228,15 @@ class Line(SpecializedText): self.parent += msg return [], 'Body', [] title = title.rstrip() + messages = [] if len(title) > len(overline): - msg = self.reporter.info( + msg = self.reporter.warning( 'Title overline too short at line %s.'% lineno, '', nodes.literal_block(source, source)) - self.parent += msg + messages.append(msg) style = (overline[0], underline[0]) self.eofcheck = 0 # @@@ not sure this is correct - self.section(title.lstrip(), source, style, lineno + 1) + self.section(title.lstrip(), source, style, lineno + 1, messages) self.eofcheck = 1 return [], 'Body', [] -- cgit v1.2.1 From 403cb20d5dc1c872855031ed1ea90dc2fa948281 Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 10 Aug 2002 02:31:33 +0000 Subject: docstrings git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@493 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index e59102801..fec37d1c9 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -27,7 +27,7 @@ the reStructuredText parser. It defines the following: - `SpecializedText`: Superclass for continuation lines of Text-variants. - `Definition`: Second line of potential definition_list_item. - `Line`: Second line of overlined section title or transition marker. - - `Stuff`: An auxilliary collection class. + - `Stuff`: An auxiliary collection class. :Exception classes: - `MarkupError` -- cgit v1.2.1 From a7cbc5c1f096f980f20efceb9ca8dd3b7cff1ee0 Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 16 Aug 2002 00:34:32 +0000 Subject: Fixed enumerated list item parsing to allow paragraphs & section titles to begin with enumerators. Plus some cleanup. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@538 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 102 +++++++++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 29 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index fec37d1c9..eb545a1f2 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -957,20 +957,8 @@ class Body(RSTState): def enumerator(self, match, context, next_state): """Enumerated List Item""" format, sequence, text, ordinal = self.parse_enumerator(match) - if ordinal is None: - msg = self.reporter.error( - ('Enumerated list start value invalid at line %s: ' - '"%s" (sequence %r)' - % (self.state_machine.abs_line_number(), text, sequence))) - self.parent += msg - indented, line_offset, blank_finish = \ - self.state_machine.get_known_indented(match.end()) - bq = self.block_quote(indented, line_offset) - self.parent += bq - if not blank_finish: - self.parent += self.unindent_warning( - 'Enumerated list') - return [], next_state, [] + if not self.is_enumerated_list_item(ordinal, sequence, format): + raise statemachine.TransitionCorrection('text') if ordinal != 1: msg = self.reporter.info( ('Enumerated list start value not ordinal-1 at line %s: ' @@ -1022,7 +1010,7 @@ class Body(RSTState): if groupdict[format]: # was this the format matched? break # yes; keep `format` else: # shouldn't happen - raise ParserError, 'enumerator format not matched' + raise ParserError('enumerator format not matched') text = groupdict[format][self.enum.formatinfo[format].start :self.enum.formatinfo[format].end] if expected_sequence: @@ -1030,24 +1018,81 @@ class Body(RSTState): if self.enum.sequenceregexps[expected_sequence].match(text): sequence = expected_sequence except KeyError: # shouldn't happen - raise ParserError, 'unknown sequence: %s' % sequence - else: - if text == 'i': - sequence = 'lowerroman' - elif text == 'I': - sequence = 'upperroman' + raise ParserError('unknown enumerator sequence: %s' + % sequence) + elif text == 'i': + sequence = 'lowerroman' + elif text == 'I': + sequence = 'upperroman' if not sequence: for sequence in self.enum.sequences: if self.enum.sequenceregexps[sequence].match(text): break else: # shouldn't happen - raise ParserError, 'enumerator sequence not matched' + raise ParserError('enumerator sequence not matched') try: ordinal = self.enum.converters[sequence](text) except roman.InvalidRomanNumeralError: ordinal = None return format, sequence, text, ordinal + def is_enumerated_list_item(self, ordinal, sequence, format): + """ + Check validity based on the ordinal value and the second line. + + Return true iff the ordinal is valid and the second line is blank, + indented, or starts with the next enumerator. + """ + if ordinal is None: + return None + try: + next_line = self.state_machine.next_line() + except IndexError: # end of input lines + self.state_machine.previous_line() + return 1 + else: + self.state_machine.previous_line() + if not next_line[:1].strip(): # blank or indented + return 1 + next_enumerator = self.make_enumerator(ordinal + 1, sequence, format) + try: + if next_line.startswith(next_enumerator): + return 1 + except TypeError: + pass + return None + + def make_enumerator(self, ordinal, sequence, format): + """ + Construct and return an enumerated list item marker. + + Return ``None`` for invalid (out of range) ordinals. + """ + if sequence == 'arabic': + enumerator = str(ordinal) + else: + if sequence.endswith('alpha'): + if ordinal > 26: + return None + enumerator = chr(ordinal + ord('a') - 1) + elif sequence.endswith('roman'): + try: + enumerator = roman.toRoman(ordinal) + except roman.RomanError: + return None + else: # shouldn't happen + raise ParserError('unknown enumerator sequence: "%s"' + % sequence) + if sequence.startswith('lower'): + enumerator = enumerator.lower() + elif sequence.startswith('upper'): + enumerator = enumerator.upper() + else: # shouldn't happen + raise ParserError('unknown enumerator sequence: "%s"' + % sequence) + formatinfo = self.enum.formatinfo[format] + return formatinfo.prefix + enumerator + formatinfo.suffix + ' ' + def field_marker(self, match, context, next_state): """Field list item.""" fieldlist = nodes.field_list() @@ -1874,7 +1919,8 @@ class EnumeratedList(SpecializedBody): match, self.parent['enumtype']) if (sequence != self.parent['enumtype'] or format != self.format or - ordinal != self.lastordinal + 1): + ordinal != (self.lastordinal + 1) or + not self.is_enumerated_list_item(ordinal, sequence, format)): # different enumeration: new list self.invalid_input() listitem, blank_finish = self.list_item(match.end()) @@ -1999,11 +2045,7 @@ class Text(RSTState): def eof(self, context): if context: - paragraph, literalnext = self.paragraph( - context, self.state_machine.abs_line_number() - 1) - self.parent += paragraph - if literalnext: - self.parent += self.literal_block() + self.blank(None, context, None) return [] def indent(self, match, context, next_state): @@ -2163,7 +2205,9 @@ class Definition(SpecializedText): class Line(SpecializedText): - """Second line of over- & underlined section title or transition marker.""" + """ + Second line of over- & underlined section title or transition marker. + """ eofcheck = 1 # @@@ ??? """Set to 0 while parsing sections, so that we don't catch the EOF.""" -- cgit v1.2.1 From f14be8cb9b05636ef65e921923ecd74a7cd8304d Mon Sep 17 00:00:00 2001 From: goodger Date: Tue, 27 Aug 2002 00:40:51 +0000 Subject: Changed "attribute" to "option" for directives/extensions. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@597 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index eb545a1f2..ed56875cd 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1594,14 +1594,14 @@ class Body(RSTState): self.parent += msg return [], blank_finish - def directive(self, match, **attributes): + def directive(self, match, **option_presets): type_name = match.group(1) directivefunction = directives.directive(type_name, self.memo.language) data = match.string[match.end():].strip() if directivefunction: return directivefunction(match, type_name, data, self, - self.state_machine, attributes) + self.state_machine, option_presets) else: return self.unknown_directive(type_name, data) @@ -1615,21 +1615,20 @@ class Body(RSTState): '', nodes.literal_block(text, text)) return [error], blank_finish - def parse_extension_attributes(self, attribute_spec, datalines, - blank_finish): + def parse_extension_options(self, option_spec, datalines, blank_finish): """ - Parse `datalines` for a field list containing extension attributes - matching `attribute_spec`. + Parse `datalines` for a field list containing extension options + matching `option_spec`. :Parameters: - - `attribute_spec`: a mapping of attribute name to conversion + - `option_spec`: a mapping of option name to conversion function, which should raise an exception on bad input. - `datalines`: a list of input strings. - `blank_finish`: :Return: - Success value, 1 or 0. - - An attribute dictionary on success, an error string on failure. + - An option dictionary on success, an error string on failure. - Updated `blank_finish` flag. """ node = nodes.field_list() @@ -1637,17 +1636,16 @@ class Body(RSTState): datalines, 0, node, initial_state='FieldList', blank_finish=blank_finish) if newline_offset != len(datalines): # incomplete parse of block - return 0, 'invalid attribute block', blank_finish + return 0, 'invalid option block', blank_finish try: - attributes = utils.extract_extension_attributes(node, - attribute_spec) + options = utils.extract_extension_options(node, option_spec) except KeyError, detail: - return 0, ('unknown attribute: "%s"' % detail), blank_finish + return 0, ('unknown option: "%s"' % detail), blank_finish except (ValueError, TypeError), detail: - return 0, ('invalid attribute value: %s' % detail), blank_finish - except utils.ExtensionAttributeError, detail: - return 0, ('invalid attribute data: %s' % detail), blank_finish - return 1, attributes, blank_finish + return 0, ('invalid option value: %s' % detail), blank_finish + except utils.ExtensionOptionError, detail: + return 0, ('invalid option data: %s' % detail), blank_finish + return 1, options, blank_finish def comment(self, match): if not match.string[match.end():].strip() \ @@ -2007,10 +2005,10 @@ class SubstitutionDef(Body): def embedded_directive(self, match, context, next_state): if self.parent.has_key('alt'): - attributes = {'alt': self.parent['alt']} + option_presets = {'alt': self.parent['alt']} else: - attributes = {} - nodelist, blank_finish = self.directive(match, **attributes) + option_presets = {} + nodelist, blank_finish = self.directive(match, **option_presets) self.parent += nodelist if not self.state_machine.at_eof(): self.blank_finish = blank_finish -- cgit v1.2.1 From a024b2b067ec8a092d783c9baed4b43dd5898d56 Mon Sep 17 00:00:00 2001 From: goodger Date: Wed, 4 Sep 2002 01:33:22 +0000 Subject: Converted system messages to use the new "line" attribute. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@622 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 168 ++++++++++++++++++++--------------------- 1 file changed, 83 insertions(+), 85 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index ed56875cd..88d4f3562 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -316,8 +316,8 @@ class RSTState(StateWS): def title_inconsistent(self, sourcetext, lineno): literalblock = nodes.literal_block('', sourcetext) - error = self.reporter.severe('Title level inconsistent at line %s:' - % lineno, '', literalblock) + error = self.reporter.severe('Title level inconsistent:', '', + literalblock, line=lineno) return error def new_subsection(self, title, lineno, messages): @@ -374,8 +374,8 @@ class RSTState(StateWS): def unindent_warning(self, node_name): return self.reporter.warning( - ('%s ends without a blank line; unexpected unindent at line %s.' - % (node_name, self.state_machine.abs_line_number() + 1))) + '%s ends without a blank line; unexpected unindent.' % node_name, + line=(self.state_machine.abs_line_number() + 1)) def build_regexp(definition, compile=1): @@ -585,8 +585,8 @@ class Inliner: return (string[:matchstart], [nodeclass(rawsource, text)], string[textend:], [], endmatch.group(1)) msg = self.reporter.warning( - 'Inline %s start-string without end-string ' - 'at line %s.' % (nodeclass.__name__, lineno)) + 'Inline %s start-string without end-string.' + % nodeclass.__name__, line=lineno) text = unescape(string[matchstart:matchend], 1) rawsource = unescape(string[matchstart:matchend], 1) prb = self.problematic(text, rawsource, msg) @@ -628,9 +628,9 @@ class Inliner: if endmatch.group('role'): if role: msg = self.reporter.warning( - 'Multiple roles in interpreted text at line %s (both ' - 'prefix and suffix present; only one allowed).' - % lineno) + 'Multiple roles in interpreted text (both ' + 'prefix and suffix present; only one allowed).', + line=lineno) text = unescape(string[rolestart:textend], 1) prb = self.problematic(text, text, msg) return string[:rolestart], [prb], string[textend:], [msg] @@ -643,7 +643,7 @@ class Inliner: if role: msg = self.reporter.warning( 'Mismatch: both interpreted text role %s and ' - 'reference suffix at line %s.' % (position, lineno)) + 'reference suffix.' % position, line=lineno) text = unescape(string[rolestart:textend], 1) prb = self.problematic(text, text, msg) return string[:rolestart], [prb], string[textend:], [msg] @@ -654,7 +654,7 @@ class Inliner: rawsource, text, role, position) msg = self.reporter.warning( 'Inline interpreted text or phrase reference start-string ' - 'without end-string at line %s.' % lineno) + 'without end-string.', line=lineno) text = unescape(string[matchstart:matchend], 1) prb = self.problematic(text, text, msg) return string[:matchstart], [prb], string[matchend:], [msg] @@ -961,9 +961,8 @@ class Body(RSTState): raise statemachine.TransitionCorrection('text') if ordinal != 1: msg = self.reporter.info( - ('Enumerated list start value not ordinal-1 at line %s: ' - '"%s" (ordinal %s)' - % (self.state_machine.abs_line_number(), text, ordinal))) + 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)' + % (text, ordinal), line=self.state_machine.abs_line_number()) self.parent += msg enumlist = nodes.enumerated_list() self.parent += enumlist @@ -1137,10 +1136,10 @@ class Body(RSTState): optionlist = nodes.option_list() try: listitem, blank_finish = self.option_list_item(match) - except MarkupError, detail: # shouldn't happen; won't match pattern + except MarkupError, (message, lineno): + # This shouldn't happen; pattern won't match. msg = self.reporter.error( - ('Invalid option list marker at line %s: %s' - % (self.state_machine.abs_line_number(), detail))) + 'Invalid option list marker: %s' % message, line=lineno) self.parent += msg indented, indent, line_offset, blank_finish = \ self.state_machine.get_first_known_indented(match.end()) @@ -1201,9 +1200,10 @@ class Body(RSTState): delimiter=delimiter) optlist.append(option) else: - raise MarkupError('wrong numer of option tokens (=%s), ' - 'should be 1 or 2: "%s"' % (len(tokens), - optionstring)) + raise MarkupError( + 'wrong numer of option tokens (=%s), should be 1 or 2: ' + '"%s"' % (len(tokens), optionstring), + self.state_machine.abs_line_number() + 1) return optlist def doctest(self, match, context, next_state): @@ -1230,8 +1230,8 @@ class Body(RSTState): self.parent += nodelist if not blank_finish: msg = self.reporter.warning( - 'Blank line required after table at line %s.' - % (self.state_machine.abs_line_number() + 1)) + 'Blank line required after table.', + line=self.state_machine.abs_line_number() + 1) self.parent += msg return [], next_state, [] @@ -1259,8 +1259,8 @@ class Body(RSTState): block = self.state_machine.get_text_block(flush_left=1) except statemachine.UnexpectedIndentationError, instance: block, lineno = instance.args - messages.append(self.reporter.error( - 'Unexpected indentation at line %s.' % lineno)) + messages.append(self.reporter.error('Unexpected indentation.', + line=lineno)) blank_finish = 0 width = len(block[0].strip()) for i in range(len(block)): @@ -1330,14 +1330,14 @@ class Body(RSTState): def malformed_table(self, block, detail=''): data = '\n'.join(block) - message = 'Malformed table at line %s; formatting as a ' \ - 'literal block.' % (self.state_machine.abs_line_number() - - len(block) + 1) + message = 'Malformed table.' + lineno = self.state_machine.abs_line_number() - len(block) + 1 if detail: message += '\n' + detail - nodelist = [self.reporter.error(message), - nodes.literal_block(data, data)] - return nodelist + error = self.reporter.error(message, '', + nodes.literal_block(data, data), + line=lineno) + return [error] def build_table(self, tabledata, tableline): colspecs, headrows, bodyrows = tabledata @@ -1479,9 +1479,8 @@ class Body(RSTState): blockindex += 1 try: escaped += block[blockindex] - except (IndexError, MarkupError): - raise MarkupError('malformed hyperlink target at line %s.' - % lineno) + except IndexError: + raise MarkupError('malformed hyperlink target.', lineno) del block[:blockindex] block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip() if block and block[-1].strip()[-1:] == '_': # possible indirect target @@ -1496,10 +1495,9 @@ class Body(RSTState): reference = ''.join([line.strip() for line in block]) if reference.find(' ') != -1: warning = self.reporter.warning( - 'Hyperlink target at line %s contains whitespace. ' - 'Perhaps a footnote was intended?' - % (self.state_machine.abs_line_number() - len(block) + 1), - '', nodes.literal_block(blocktext, blocktext)) + 'Hyperlink target contains whitespace. Perhaps a footnote ' + 'was intended?', '', + nodes.literal_block(blocktext, blocktext), line=lineno) nodelist.append(warning) else: unescaped = unescape(reference) @@ -1548,9 +1546,9 @@ class Body(RSTState): blockindex += 1 try: escaped = escaped + ' ' + block[blockindex].strip() - except (IndexError, MarkupError): - raise MarkupError('malformed substitution definition ' - 'at line %s.' % lineno) + except IndexError: + raise MarkupError('malformed substitution definition.', + lineno) del block[:blockindex] # strip out the substitution marker block[0] = (block[0] + ' ')[subdefmatch.end()-len(escaped)-1:].strip() if not block[0]: @@ -1577,9 +1575,9 @@ class Body(RSTState): i += 1 if len(substitutionnode) == 0: msg = self.reporter.warning( - 'Substitution definition "%s" empty or invalid at line ' - '%s.' % (subname, self.state_machine.abs_line_number()), - '', nodes.literal_block(blocktext, blocktext)) + 'Substitution definition "%s" empty or invalid.' + % subname, '', + nodes.literal_block(blocktext, blocktext), line=lineno) self.parent += msg else: del substitutionnode['alt'] @@ -1588,9 +1586,8 @@ class Body(RSTState): return [substitutionnode], blank_finish else: msg = self.reporter.warning( - 'Substitution definition "%s" missing contents at line %s.' - % (subname, self.state_machine.abs_line_number()), '', - nodes.literal_block(blocktext, blocktext)) + 'Substitution definition "%s" missing contents.' % subname, + '', nodes.literal_block(blocktext, blocktext), line=lineno) self.parent += msg return [], blank_finish @@ -1611,8 +1608,8 @@ class Body(RSTState): self.state_machine.get_first_known_indented(0, strip_indent=0) text = '\n'.join(indented) error = self.reporter.error( - 'Unknown directive type "%s" at line %s.' % (type_name, lineno), - '', nodes.literal_block(text, text)) + 'Unknown directive type "%s".' % type_name, '', + nodes.literal_block(text, text), line=lineno) return [error], blank_finish def parse_extension_options(self, option_spec, datalines, blank_finish): @@ -1714,10 +1711,11 @@ class Body(RSTState): if expmatch: try: return method(self, expmatch) - except MarkupError, detail: # never reached? + except MarkupError, (message, lineno): # never reached? errors.append( - self.reporter.warning('%s: %s' - % (detail.__class__.__name__, detail))) + self.reporter.warning( + '%s: %s' % (detail.__class__.__name__, message), + line=lineno)) break nodelist, blank_finish = self.comment(match) return nodelist + errors, blank_finish @@ -1762,11 +1760,12 @@ class Body(RSTState): nodelist = [] reference = escape2null(''.join([line.strip() for line in block])) if reference.find(' ') != -1: + lineno = self.state_machine.abs_line_number() - len(block) + 1 warning = self.reporter.warning( - 'Anonymous hyperlink target at line %s contains ' - 'whitespace. Perhaps a footnote was intended?' - % (self.state_machine.abs_line_number() - len(block) + 1), - '', nodes.literal_block(blocktext, blocktext)) + 'Anonymous hyperlink target contains whitespace. Perhaps a ' + 'footnote was intended?', '', + nodes.literal_block(blocktext, blocktext), + line=lineno) nodelist.append(warning) else: target = nodes.target(blocktext, '', anonymous=1) @@ -1784,9 +1783,9 @@ class Body(RSTState): else: blocktext = self.state_machine.line msg = self.reporter.severe( - 'Unexpected section title or transition at line %s.' - % self.state_machine.abs_line_number(), '', - nodes.literal_block(blocktext, blocktext)) + 'Unexpected section title or transition.', '', + nodes.literal_block(blocktext, blocktext), + line=self.state_machine.abs_line_number()) self.parent += msg return [], next_state, [] @@ -1948,7 +1947,7 @@ class OptionList(SpecializedBody): """Option list item.""" try: option_list_item, blank_finish = self.option_list_item(match) - except MarkupError, detail: + except MarkupError, (message, lineno): self.invalid_input() self.parent += option_list_item self.blank_finish = blank_finish @@ -2069,8 +2068,8 @@ class Text(RSTState): if not self.state_machine.match_titles: blocktext = context[0] + '\n' + self.state_machine.line msg = self.reporter.severe( - 'Unexpected section title at line %s.' % lineno, '', - nodes.literal_block(blocktext, blocktext)) + 'Unexpected section title.', '', + nodes.literal_block(blocktext, blocktext), line=lineno) self.parent += msg return [], next_state, [] title = context[0].rstrip() @@ -2080,8 +2079,8 @@ class Text(RSTState): if len(title) > len(underline): blocktext = context[0] + '\n' + self.state_machine.line msg = self.reporter.warning( - 'Title underline too short at line %s.' % lineno, '', - nodes.literal_block(blocktext, blocktext)) + 'Title underline too short.', '', + nodes.literal_block(blocktext, blocktext), line=lineno) messages.append(msg) style = underline[0] context[:] = [] @@ -2096,8 +2095,7 @@ class Text(RSTState): block = self.state_machine.get_text_block(flush_left=1) except statemachine.UnexpectedIndentationError, instance: block, lineno = instance.args - msg = self.reporter.error( - 'Unexpected indentation at line %s.' % lineno) + msg = self.reporter.error('Unexpected indentation.', line=lineno) lines = context + block paragraph, literalnext = self.paragraph(lines, startline) self.parent += paragraph @@ -2124,8 +2122,8 @@ class Text(RSTState): nodelist.append(self.unindent_warning('Literal block')) else: nodelist.append(self.reporter.warning( - 'Literal block expected at line %s; none found.' - % self.state_machine.abs_line_number())) + 'Literal block expected; none found.', + line=self.state_machine.abs_line_number())) return nodelist def definition_list_item(self, termline): @@ -2141,7 +2139,7 @@ class Text(RSTState): if termline[0][-2:] == '::': definition += self.reporter.info( 'Blank line missing before literal block? Interpreted as a ' - 'definition list item. At line %s.' % (line_offset + 1)) + 'definition list item.', line=line_offset + 1) self.nested_parse(indented, input_offset=line_offset, node=definition) return definitionlistitem, blank_finish @@ -2216,8 +2214,8 @@ class Line(SpecializedText): transition = nodes.transition(context[0]) self.parent += transition msg = self.reporter.error( - 'Document or section may not end with a transition ' - '(line %s).' % (self.state_machine.abs_line_number() - 1)) + 'Document or section may not end with a transition.', + line=self.state_machine.abs_line_number() - 1) self.parent += msg self.eofcheck = 1 return [] @@ -2227,14 +2225,14 @@ class Line(SpecializedText): transition = nodes.transition(context[0]) if len(self.parent) == 0: msg = self.reporter.error( - 'Document or section may not begin with a transition ' - '(line %s).' % (self.state_machine.abs_line_number() - 1)) + 'Document or section may not begin with a transition.', + line=self.state_machine.abs_line_number() - 1) self.parent += msg elif isinstance(self.parent[-1], nodes.transition): msg = self.reporter.error( 'At least one body element must separate transitions; ' - 'adjacent transitions at line %s.' - % (self.state_machine.abs_line_number() - 1)) + 'adjacent transitions not allowed.', + line=self.state_machine.abs_line_number() - 1) self.parent += msg self.parent += transition return [], 'Body', [] @@ -2250,8 +2248,8 @@ class Line(SpecializedText): except IndexError: blocktext = overline + '\n' + title msg = self.reporter.severe( - 'Incomplete section title at line %s.' % lineno, '', - nodes.literal_block(blocktext, blocktext)) + 'Incomplete section title.', '', + nodes.literal_block(blocktext, blocktext), line=lineno) self.parent += msg return [], 'Body', [] source = '%s\n%s\n%s' % (overline, title, underline) @@ -2259,22 +2257,22 @@ class Line(SpecializedText): underline = underline.rstrip() if not self.transitions['underline'][0].match(underline): msg = self.reporter.severe( - 'Missing underline for overline at line %s.' % lineno, '', - nodes.literal_block(source, source)) + 'Missing underline for overline.', '', + nodes.literal_block(source, source), line=lineno) self.parent += msg return [], 'Body', [] elif overline != underline: msg = self.reporter.severe( - 'Title overline & underline mismatch at ' 'line %s.' - % lineno, '', nodes.literal_block(source, source)) + 'Title overline & underline mismatch.', '', + nodes.literal_block(source, source), line=lineno) self.parent += msg return [], 'Body', [] title = title.rstrip() messages = [] if len(title) > len(overline): msg = self.reporter.warning( - 'Title overline too short at line %s.'% lineno, '', - nodes.literal_block(source, source)) + 'Title overline too short.', '', + nodes.literal_block(source, source), line=lineno) messages.append(msg) style = (overline[0], underline[0]) self.eofcheck = 0 # @@@ not sure this is correct @@ -2287,9 +2285,9 @@ class Line(SpecializedText): def underline(self, match=None, context=None, next_state=None): blocktext = context[0] + '\n' + self.state_machine.line msg = self.reporter.error( - 'Invalid section title or transition marker at line %s.' - % (self.state_machine.abs_line_number() - 1), '', - nodes.literal_block(blocktext, blocktext)) + 'Invalid section title or transition marker.', '', + nodes.literal_block(blocktext, blocktext), + line=self.state_machine.abs_line_number() - 1) self.parent += msg return [], 'Body', [] -- cgit v1.2.1 From 1368e12ba7bc99bbaab878786482331e3cee07ed Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 6 Sep 2002 02:12:54 +0000 Subject: Fixed a substitution reference edge case. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@636 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 88d4f3562..0d5b8ba20 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -469,7 +469,7 @@ class Inliner: r'\*(?!\*)', # emphasis but not strong r'``', # literal r'_`', # inline internal target - r'\|'] # substitution reference + r'\|(?!\|)'] # substitution reference ), ('whole', '', end_string_suffix, # whole constructs [# reference name & end-string -- cgit v1.2.1 From aba194af678474835e94a9c686d6e5a202706af0 Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 7 Sep 2002 02:32:37 +0000 Subject: Added support for "--pep-references" and "--rfc-references" options; reworked ``Inliner`` code to make customization easier. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@644 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 77 +++++++++++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 12 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 0d5b8ba20..d66e52bbd 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -148,6 +148,7 @@ class RSTStateMachine(StateMachineWS): self.match_titles = match_titles if inliner is None: inliner = Inliner() + inliner.init_customizations(document.options) self.memo = Stuff(document=document, reporter=document.reporter, language=self.language, @@ -407,6 +408,20 @@ class Inliner: Parse inline markup; call the `parse()` method. """ + def __init__(self): + self.implicit_dispatch = [(self.patterns.uri, self.standalone_uri),] + """List of (pattern, bound method) tuples, used by + `self.implicit_inline`.""" + + def init_customizations(self, options): + """Option-based customizations; run when parsing begins.""" + if options.pep_references: + self.implicit_dispatch.append((self.patterns.pep, + self.pep_reference)) + if options.rfc_references: + self.implicit_dispatch.append((self.patterns.rfc, + self.rfc_reference)) + def parse(self, text, lineno, memo, parent): """ Return 2 lists: nodes (text and inline elements), and system_messages. @@ -548,7 +563,21 @@ class Inliner: ) ) %(end_string_suffix)s - """ % locals(), re.VERBOSE)) + """ % locals(), re.VERBOSE), + pep=re.compile( + r""" + %(start_string_prefix)s + ( + (pep-(?P\d+)(.txt)?) # reference to source file + | + (PEP\s+(?P\d+)) # reference by name + ) + %(end_string_suffix)s""" % locals(), re.VERBOSE), + rfc=re.compile( + r""" + %(start_string_prefix)s + (RFC(-|\s+)?(?P\d+)) + %(end_string_suffix)s""" % locals(), re.VERBOSE)) def quoted_start(self, match): """Return 1 if inline markup start-string is 'quoted', 0 if not.""" @@ -787,31 +816,55 @@ class Inliner: else: # not a valid scheme raise MarkupMismatch - implicit = ((patterns.uri, standalone_uri),) - """List of (pattern, dispatch method) pairs.""" + pep_url_local = 'pep-%04d.html' + pep_url_absolute = 'http://www.python.org/peps/pep-%04d.html' + pep_url = pep_url_absolute + + def pep_reference(self, match, lineno): + text = match.group(0) + if text.startswith('pep-'): + pepnum = int(match.group('pepnum1')) + elif text.startswith('PEP'): + pepnum = int(match.group('pepnum2')) + else: + raise MarkupMismatch + ref = self.pep_url % pepnum + unescaped = unescape(text, 0) + return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)] + + rfc_url = 'http://www.faqs.org/rfcs/rfc%d.html' + + def rfc_reference(self, match, lineno): + text = match.group(0) + if text.startswith('RFC'): + rfcnum = int(match.group('rfcnum')) + ref = self.rfc_url % rfcnum + else: + raise MarkupMismatch + unescaped = unescape(text, 0) + return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)] def implicit_inline(self, text, lineno): """ - Check each of the patterns in `self.implicit` for a match, and - dispatch to the stored method for the pattern. Recursively check the - text before and after the match. Return a list of `nodes.Text` and - inline element nodes. + Check each of the patterns in `self.implicit_dispatch` for a match, + and dispatch to the stored method for the pattern. Recursively check + the text before and after the match. Return a list of `nodes.Text` + and inline element nodes. """ if not text: return [] - for pattern, dispatch in self.implicit: + for pattern, method in self.implicit_dispatch: match = pattern.search(text) if match: try: # Must recurse on strings before *and* after the match; # there may be multiple patterns. return (self.implicit_inline(text[:match.start()], lineno) - + dispatch(self, match, lineno) + + + method(match, lineno) + self.implicit_inline(text[match.end():], lineno)) except MarkupMismatch: pass return [nodes.Text(unescape(text))] - dispatch = {'*': emphasis, '**': strong, @@ -1047,10 +1100,10 @@ class Body(RSTState): try: next_line = self.state_machine.next_line() except IndexError: # end of input lines - self.state_machine.previous_line() + self.state_machine.previous_line() return 1 else: - self.state_machine.previous_line() + self.state_machine.previous_line() if not next_line[:1].strip(): # blank or indented return 1 next_enumerator = self.make_enumerator(ordinal + 1, sequence, format) -- cgit v1.2.1 From a07bc663d7654004db7594bb1601525b18d46f4a Mon Sep 17 00:00:00 2001 From: goodger Date: Wed, 11 Sep 2002 02:52:01 +0000 Subject: Removed "field_argument" element & support; "field_name" may contain multiple words and whitespace. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@664 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index d66e52bbd..fa2d6260f 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1163,13 +1163,11 @@ class Body(RSTState): return [], next_state, [] def field(self, match): - name, args = self.parse_field_marker(match) + name = self.parse_field_marker(match) indented, indent, line_offset, blank_finish = \ self.state_machine.get_first_known_indented(match.end()) fieldnode = nodes.field() fieldnode += nodes.field_name(name, name) - for arg in args: - fieldnode += nodes.field_argument(arg, arg) fieldbody = nodes.field_body('\n'.join(indented)) fieldnode += fieldbody if indented: @@ -1178,11 +1176,10 @@ class Body(RSTState): return fieldnode, blank_finish def parse_field_marker(self, match): - """Extract & return name & argument list from a field marker match.""" + """Extract & return field name from a field marker match.""" field = match.string[1:] # strip off leading ':' field = field[:field.find(':')] # strip off trailing ':' etc. - tokens = field.split() - return tokens[0], tokens[1:] # first == name, others == args + return field def option_marker(self, match, context, next_state): """Option list item.""" -- cgit v1.2.1 From 0b774599f04c5ff6c27b3c271019a949500ea002 Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 19 Sep 2002 00:41:42 +0000 Subject: Added support for short section title over/underlines. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@689 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 145 +++++++++++++++++++++++++++++------------ 1 file changed, 105 insertions(+), 40 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index fa2d6260f..fa940c49a 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1,10 +1,10 @@ -""" -:Author: David Goodger -:Contact: goodger@users.sourceforge.net -:Revision: $Revision$ -:Date: $Date$ -:Copyright: This module has been placed in the public domain. +# Author: David Goodger +# Contact: goodger@users.sourceforge.net +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. +""" This is the ``docutils.parsers.restructuredtext.states`` module, the core of the reStructuredText parser. It defines the following: @@ -217,9 +217,23 @@ class RSTState(StateWS): """ try: self.state_machine.goto_line(abs_line_offset) - except IndexError: + except EOFError: pass + def no_match(self, context, transitions): + """ + Override `StateWS.no_match` to generate a system message. + + This code should never be run. + """ + self.reporter.severe( + 'Internal error: no transition pattern match. State: "%s"; ' + 'transitions: %s; context: %s; current line: %r.' + % (self.__class__.__name__, transitions, context, + self.state_machine.line), + line=self.state_machine.abs_line_number()) + return context, None, [] + def bof(self, context): """Called at beginning of file.""" return [], [] @@ -950,7 +964,7 @@ class Body(RSTState): 'simple_table_top': simple_table_top_pat, 'explicit_markup': r'\.\.( +|$)', 'anonymous': r'__( +|$)', - 'line': r'(%(nonalphanum7bit)s)\1\1\1+ *$' % pats, + 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats, 'text': r''} initial_transitions = ( 'bullet', @@ -1099,7 +1113,7 @@ class Body(RSTState): return None try: next_line = self.state_machine.next_line() - except IndexError: # end of input lines + except EOFError: # end of input lines self.state_machine.previous_line() return 1 else: @@ -1830,6 +1844,13 @@ class Body(RSTState): """Section title overline or transition marker.""" if self.state_machine.match_titles: return [match.string], 'Line', [] + elif len(match.string.strip()) < 4: + msg = self.reporter.info( + 'Unexpected possible title overline or transition.\n' + "Treating it as ordinary text because it's so short.", '', + line=self.state_machine.abs_line_number()) + self.parent += msg + raise statemachine.TransitionCorrection('text') else: blocktext = self.state_machine.line msg = self.reporter.severe( @@ -2127,11 +2148,19 @@ class Text(RSTState): source = title + '\n' + underline messages = [] if len(title) > len(underline): - blocktext = context[0] + '\n' + self.state_machine.line - msg = self.reporter.warning( - 'Title underline too short.', '', - nodes.literal_block(blocktext, blocktext), line=lineno) - messages.append(msg) + if len(underline) < 4: + msg = self.reporter.info( + 'Possible title underline, too short for the title.\n' + "Treating it as ordinary text because it's so short.", '', + line=lineno) + self.parent += msg + raise statemachine.TransitionCorrection('text') + else: + blocktext = context[0] + '\n' + self.state_machine.line + msg = self.reporter.warning( + 'Title underline too short.', '', + nodes.literal_block(blocktext, blocktext), line=lineno) + messages.append(msg) style = underline[0] context[:] = [] self.section(title, source, style, lineno - 1, messages) @@ -2153,7 +2182,7 @@ class Text(RSTState): if literalnext: try: self.state_machine.next_line() - except IndexError: + except EOFError: pass self.parent += self.literal_block() return [], next_state, [] @@ -2260,6 +2289,9 @@ class Line(SpecializedText): def eof(self, context): """Transition marker at end of section or document.""" + marker = context[0].strip() + if len(marker) < 4: + self.state_correction(context) if self.eofcheck: # ignore EOFError with sections transition = nodes.transition(context[0]) self.parent += transition @@ -2272,7 +2304,10 @@ class Line(SpecializedText): def blank(self, match, context, next_state): """Transition marker.""" - transition = nodes.transition(context[0]) + marker = context[0].strip() + if len(marker) < 4: + self.state_correction(context) + transition = nodes.transition(marker) if len(self.parent) == 0: msg = self.reporter.error( 'Document or section may not begin with a transition.', @@ -2295,35 +2330,50 @@ class Line(SpecializedText): underline = '' try: underline = self.state_machine.next_line() - except IndexError: + except EOFError: blocktext = overline + '\n' + title - msg = self.reporter.severe( - 'Incomplete section title.', '', - nodes.literal_block(blocktext, blocktext), line=lineno) - self.parent += msg - return [], 'Body', [] + if len(overline.rstrip()) < 4: + self.short_overline(context, blocktext, lineno, 2) + else: + msg = self.reporter.severe( + 'Incomplete section title.', '', + nodes.literal_block(blocktext, blocktext), line=lineno) + self.parent += msg + return [], 'Body', [] source = '%s\n%s\n%s' % (overline, title, underline) overline = overline.rstrip() underline = underline.rstrip() if not self.transitions['underline'][0].match(underline): - msg = self.reporter.severe( - 'Missing underline for overline.', '', - nodes.literal_block(source, source), line=lineno) - self.parent += msg - return [], 'Body', [] + blocktext = overline + '\n' + title + '\n' + underline + if len(overline.rstrip()) < 4: + self.short_overline(context, blocktext, lineno, 2) + else: + msg = self.reporter.severe( + 'Missing underline for overline.', '', + nodes.literal_block(source, source), line=lineno) + self.parent += msg + return [], 'Body', [] elif overline != underline: - msg = self.reporter.severe( - 'Title overline & underline mismatch.', '', - nodes.literal_block(source, source), line=lineno) - self.parent += msg - return [], 'Body', [] + blocktext = overline + '\n' + title + '\n' + underline + if len(overline.rstrip()) < 4: + self.short_overline(context, blocktext, lineno, 2) + else: + msg = self.reporter.severe( + 'Title overline & underline mismatch.', '', + nodes.literal_block(source, source), line=lineno) + self.parent += msg + return [], 'Body', [] title = title.rstrip() messages = [] if len(title) > len(overline): - msg = self.reporter.warning( - 'Title overline too short.', '', - nodes.literal_block(source, source), line=lineno) - messages.append(msg) + blocktext = overline + '\n' + title + '\n' + underline + if len(overline.rstrip()) < 4: + self.short_overline(context, blocktext, lineno, 2) + else: + msg = self.reporter.warning( + 'Title overline too short.', '', + nodes.literal_block(source, source), line=lineno) + messages.append(msg) style = (overline[0], underline[0]) self.eofcheck = 0 # @@@ not sure this is correct self.section(title.lstrip(), source, style, lineno + 1, messages) @@ -2332,15 +2382,30 @@ class Line(SpecializedText): indent = text # indented title - def underline(self, match=None, context=None, next_state=None): - blocktext = context[0] + '\n' + self.state_machine.line + def underline(self, match, context, next_state): + overline = context[0] + blocktext = overline + '\n' + self.state_machine.line + lineno = self.state_machine.abs_line_number() - 1 + if len(overline.rstrip()) < 4: + self.short_overline(context, blocktext, lineno, 1) msg = self.reporter.error( 'Invalid section title or transition marker.', '', - nodes.literal_block(blocktext, blocktext), - line=self.state_machine.abs_line_number() - 1) + nodes.literal_block(blocktext, blocktext), line=lineno) self.parent += msg return [], 'Body', [] + def short_overline(self, context, blocktext, lineno, lines=1): + msg = self.reporter.info( + 'Possible incomplete section title.\nTreating the overline as ' + "ordinary text because it's so short.", '', line=lineno) + self.parent += msg + self.state_correction(context, lines) + + def state_correction(self, context, lines=1): + self.state_machine.previous_line(lines) + context[:] = [] + raise statemachine.StateCorrection('Body', 'text') + state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList, OptionList, Explicit, Text, Definition, Line, -- cgit v1.2.1 From c42533ff80a5543fb3a207c102cf8655b9cb6ffc Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 20 Sep 2002 02:51:33 +0000 Subject: Fixed "simple reference name" regexp to ignore text like "object.__method__"; not an anonymous reference. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@700 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index fa940c49a..742aa94f2 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -488,7 +488,8 @@ class Inliner: non_whitespace_before = r'(? Date: Tue, 24 Sep 2002 02:12:46 +0000 Subject: Initial support for improved diagnostics. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@709 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 742aa94f2..4380b3864 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -156,6 +156,7 @@ class RSTStateMachine(StateMachineWS): section_level=0, inliner=inliner) self.document = self.memo.document + self.attach_observer(self.document.note_state_machine_change) self.reporter = self.memo.reporter self.node = document results = StateMachineWS.run(self, input_lines, input_offset) @@ -179,6 +180,7 @@ class NestedStateMachine(StateMachineWS): self.match_titles = match_titles self.memo = memo self.document = memo.document + self.attach_observer(self.document.note_state_machine_change) self.reporter = memo.reporter self.node = node results = StateMachineWS.run(self, input_lines, input_offset) -- cgit v1.2.1 From 9d50fb6eee483d2125cc5ca65c4b23ff6d51ff0a Mon Sep 17 00:00:00 2001 From: goodger Date: Wed, 2 Oct 2002 03:18:24 +0000 Subject: Reworked directive API. Added ``Body.parse_directive()``, ``.parse_directive_options()``, ``.parse_directive_arguments()`` methods. Added ``ExtensionOptions`` class, to parse directive options without parsing field bodies. Factored ``Body.parse_field_body()`` out of ``Body.field()``, overridden in ``ExtensionOptions``. Generalized some state transition return values (``next_state``). git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@745 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 203 ++++++++++++++++++++++++++++++++++------- 1 file changed, 170 insertions(+), 33 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 4380b3864..35d08ca7c 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -21,6 +21,7 @@ the reStructuredText parser. It defines the following: - `FieldList`: Second+ fields. - `OptionList`: Second+ option_list_items. - `RFC2822List`: Second+ RFC2822-style fields. + - `ExtensionOptions`: Parses directive option fields. - `Explicit`: Second+ explicit markup constructs. - `SubstitutionDef`: For embedded directives in substitution definitions. - `Text`: Classifier of second line of a text block. @@ -1188,8 +1189,7 @@ class Body(RSTState): fieldbody = nodes.field_body('\n'.join(indented)) fieldnode += fieldbody if indented: - self.nested_parse(indented, input_offset=line_offset, - node=fieldbody) + self.parse_field_body(indented, line_offset, fieldbody) return fieldnode, blank_finish def parse_field_marker(self, match): @@ -1198,6 +1198,9 @@ class Body(RSTState): field = field[:field.find(':')] # strip off trailing ':' etc. return field + def parse_field_body(self, indented, offset, node): + self.nested_parse(indented, input_offset=offset, node=node) + def option_marker(self, match, context, next_state): """Option list item.""" optionlist = nodes.option_list() @@ -1660,26 +1663,129 @@ class Body(RSTState): def directive(self, match, **option_presets): type_name = match.group(1) - directivefunction = directives.directive(type_name, - self.memo.language) - data = match.string[match.end():].strip() - if directivefunction: - return directivefunction(match, type_name, data, self, - self.state_machine, option_presets) + directive_function = directives.directive(type_name, + self.memo.language) + if directive_function: + return self.parse_directive( + directive_function, match, type_name, option_presets) else: - return self.unknown_directive(type_name, data) + return self.unknown_directive(type_name) + + def parse_directive(self, directive_fn, match, type_name, option_presets): + """ + Parse a directive then run its directive function. + + Parameters: + + - `directive_fn`: The function implementing the directive. Must have + function attributes ``arguments``, ``options``, and ``content``. - def unknown_directive(self, type_name, data): + - `match`: A regular expression match object which matched the first + line of the directive. + + - `type_name`: The directive name, as used in the source text. + + - `option_presets`: A dictionary of preset options, defaults for the + directive options. Currently, only an "alt" option is passed by + substitution definitions (value: the substitution name), which may + be used by an embedded image directive. + + Returns a 2-tuple: list of nodes, and a "blank finish" boolean. + """ + arguments = [] + options = {} + content = [] + argument_spec = option_spec = content_spec = None + if hasattr(directive_fn, 'arguments'): + argument_spec = directive_fn.arguments + if argument_spec[:2] == (0, 0): + argument_spec = None + if hasattr(directive_fn, 'options'): + option_spec = directive_fn.options + if hasattr(directive_fn, 'content'): + content_spec = directive_fn.content lineno = self.state_machine.abs_line_number() - indented, indent, offset, blank_finish = \ - self.state_machine.get_first_known_indented(0, strip_indent=0) - text = '\n'.join(indented) - error = self.reporter.error( - 'Unknown directive type "%s".' % type_name, '', - nodes.literal_block(text, text), line=lineno) - return [error], blank_finish + initial_line_offset = self.state_machine.line_offset + indented, indent, line_offset, blank_finish \ + = self.state_machine.get_first_known_indented(match.end(), + strip_top=0) + block_text = '\n'.join(self.state_machine.input_lines[ + initial_line_offset : self.state_machine.line_offset + 1]) + if indented and not indented[0].strip(): + indented.pop(0) + line_offset += 1 + while indented and not indented[-1].strip(): + indented.pop() + if indented and (argument_spec or option_spec): + for i in range(len(indented)): + if not indented[i].strip(): + break + else: + i += 1 + arg_block = indented[:i] + content = indented[i+1:] + content_offset = line_offset + i + 1 + else: + content = indented + content_offset = line_offset + arg_block = [] + while content and not content[0].strip(): + content.pop(0) + content_offset += 1 + try: + if option_spec: + options, arg_block = self.parse_directive_options( + option_presets, option_spec, arg_block) + if argument_spec: + arguments = self.parse_directive_arguments(argument_spec, + arg_block) + if content and not content_spec: + raise MarkupError('no content permitted.') + except MarkupError, detail: + error = self.reporter.error( + 'Error in "%s" directive:\n%s.' % (type_name, detail), '', + nodes.literal_block(block_text, block_text), line=lineno) + return [error], blank_finish + result = directive_fn( + type_name, arguments, options, content, lineno, content_offset, + block_text, self, self.state_machine) + return result, blank_finish + + def parse_directive_options(self, option_presets, option_spec, arg_block): + options = option_presets.copy() + for i in range(len(arg_block)): + if arg_block[i][:1] == ':': + opt_block = arg_block[i:] + arg_block = arg_block[:i] + break + else: + opt_block = [] + if opt_block: + success, data = self.parse_extension_options(option_spec, + opt_block) + if success: # data is a dict of options + options.update(data) + else: # data is an error string + raise MarkupError(data) + return options, arg_block + + def parse_directive_arguments(self, argument_spec, arg_block): + required, optional, last_whitespace = argument_spec + arg_text = '\n'.join(arg_block) + arguments = arg_text.split() + if len(arguments) < required: + raise MarkupError('%s argument(s) required, %s supplied' + % (required, len(arguments))) + elif len(arguments) > required + optional: + if last_whitespace: + arguments = arg_text.split(None, required + optional - 1) + else: + raise MarkupError( + 'maximum %s argument(s) allowed, %s supplied' + % (required + optional, len(arguments))) + return arguments - def parse_extension_options(self, option_spec, datalines, blank_finish): + def parse_extension_options(self, option_spec, datalines): """ Parse `datalines` for a field list containing extension options matching `option_spec`. @@ -1688,28 +1794,39 @@ class Body(RSTState): - `option_spec`: a mapping of option name to conversion function, which should raise an exception on bad input. - `datalines`: a list of input strings. - - `blank_finish`: :Return: - Success value, 1 or 0. - An option dictionary on success, an error string on failure. - - Updated `blank_finish` flag. """ node = nodes.field_list() newline_offset, blank_finish = self.nested_list_parse( - datalines, 0, node, initial_state='FieldList', - blank_finish=blank_finish) + datalines, 0, node, initial_state='ExtensionOptions', + blank_finish=1) if newline_offset != len(datalines): # incomplete parse of block - return 0, 'invalid option block', blank_finish + return 0, 'invalid option block' try: options = utils.extract_extension_options(node, option_spec) except KeyError, detail: - return 0, ('unknown option: "%s"' % detail), blank_finish + return 0, ('unknown option: "%s"' % detail) except (ValueError, TypeError), detail: - return 0, ('invalid option value: %s' % detail), blank_finish + return 0, ('invalid option value: %s' % detail) except utils.ExtensionOptionError, detail: - return 0, ('invalid option data: %s' % detail), blank_finish - return 1, options, blank_finish + return 0, ('invalid option data: %s' % detail) + if blank_finish: + return 1, options + else: + return 0, 'option data incompletely parsed' + + def unknown_directive(self, type_name): + lineno = self.state_machine.abs_line_number() + indented, indent, offset, blank_finish = \ + self.state_machine.get_first_known_indented(0, strip_indent=0) + text = '\n'.join(indented) + error = self.reporter.error( + 'Unknown directive type "%s".' % type_name, '', + nodes.literal_block(text, text), line=lineno) + return [error], blank_finish def comment(self, match): if not match.string[match.end():].strip() \ @@ -1968,7 +2085,7 @@ class BulletList(SpecializedBody): listitem, blank_finish = self.list_item(match.end()) self.parent += listitem self.blank_finish = blank_finish - return [], 'BulletList', [] + return [], next_state, [] class DefinitionList(SpecializedBody): @@ -1998,7 +2115,7 @@ class EnumeratedList(SpecializedBody): self.parent += listitem self.blank_finish = blank_finish self.lastordinal = ordinal - return [], 'EnumeratedList', [] + return [], next_state, [] class FieldList(SpecializedBody): @@ -2010,7 +2127,7 @@ class FieldList(SpecializedBody): field, blank_finish = self.field(match) self.parent += field self.blank_finish = blank_finish - return [], 'FieldList', [] + return [], next_state, [] class OptionList(SpecializedBody): @@ -2025,7 +2142,7 @@ class OptionList(SpecializedBody): self.invalid_input() self.parent += option_list_item self.blank_finish = blank_finish - return [], 'OptionList', [] + return [], next_state, [] class RFC2822List(SpecializedBody, RFC2822Body): @@ -2045,6 +2162,26 @@ class RFC2822List(SpecializedBody, RFC2822Body): blank = SpecializedBody.invalid_input +class ExtensionOptions(FieldList): + + """ + Parse field_list fields for extension options. + + No nested parsing is done (including inline markup parsing). + """ + + def parse_field_body(self, indented, offset, node): + """Override `Body.parse_field_body` for simpler parsing.""" + lines = [] + for line in indented + ['']: + if line.strip(): + lines.append(line) + elif lines: + text = '\n'.join(lines) + node += nodes.paragraph(text, text) + lines = [] + + class Explicit(SpecializedBody): """Second and subsequent explicit markup construct.""" @@ -2411,8 +2548,8 @@ class Line(SpecializedText): state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList, - OptionList, Explicit, Text, Definition, Line, - SubstitutionDef, RFC2822Body, RFC2822List) + OptionList, ExtensionOptions, Explicit, Text, Definition, + Line, SubstitutionDef, RFC2822Body, RFC2822List) """Standard set of State classes used to start `RSTStateMachine`.""" -- cgit v1.2.1 From ed28012e60a40b2a0774a987d0d25240e2f592a8 Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 3 Oct 2002 22:21:58 +0000 Subject: Improved definition list term/classifier parsing. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@761 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 35d08ca7c..bc19f07ab 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -882,7 +882,7 @@ class Inliner: self.implicit_inline(text[match.end():], lineno)) except MarkupMismatch: pass - return [nodes.Text(unescape(text))] + return [nodes.Text(unescape(text), rawsource=unescape(text, 1))] dispatch = {'*': emphasis, '**': strong, @@ -2365,17 +2365,24 @@ class Text(RSTState): def term(self, lines, lineno): """Return a definition_list's term and optional classifier.""" assert len(lines) == 1 - nodelist = [] - parts = lines[0].split(' : ', 1) # split into 1 or 2 parts - termpart = parts[0].rstrip() - textnodes, messages = self.inline_text(termpart, lineno) - nodelist = [nodes.term(termpart, '', *textnodes)] - if len(parts) == 2: - classifierpart = parts[1].lstrip() - textnodes, cpmessages = self.inline_text(classifierpart, lineno) - nodelist.append(nodes.classifier(classifierpart, '', *textnodes)) - messages += cpmessages - return nodelist, messages + text_nodes, messages = self.inline_text(lines[0], lineno) + term_node = nodes.term() + node_list = [term_node] + for i in range(len(text_nodes)): + node = text_nodes[i] + if isinstance(node, nodes.Text): + parts = node.rawsource.split(' : ', 1) + if len(parts) == 1: + term_node += node + else: + term_node += nodes.Text(parts[0].rstrip()) + classifier_node = nodes.classifier('', parts[1]) + classifier_node += text_nodes[i+1:] + node_list.append(classifier_node) + break + else: + term_node += node + return node_list, messages class SpecializedText(Text): -- cgit v1.2.1 From 9f5cd7076855bb066a91602e60862435a4c63b40 Mon Sep 17 00:00:00 2001 From: goodger Date: Tue, 8 Oct 2002 01:25:40 +0000 Subject: Updated for improved diagnostics & Reporter API. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@770 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 75 ++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 35 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index bc19f07ab..f64eb3f5f 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -333,9 +333,9 @@ class RSTState(StateWS): return None def title_inconsistent(self, sourcetext, lineno): - literalblock = nodes.literal_block('', sourcetext) - error = self.reporter.severe('Title level inconsistent:', '', - literalblock, line=lineno) + error = self.reporter.severe( + 'Title level inconsistent:', nodes.literal_block('', sourcetext), + line=lineno) return error def new_subsection(self, title, lineno, messages): @@ -382,6 +382,7 @@ class RSTState(StateWS): literalnext = 0 textnodes, messages = self.inline_text(text, lineno) p = nodes.paragraph(data, '', *textnodes) + p.line = lineno return [p] + messages, literalnext def inline_text(self, text, lineno): @@ -1182,9 +1183,11 @@ class Body(RSTState): def field(self, match): name = self.parse_field_marker(match) + lineno = self.state_machine.abs_line_number() indented, indent, line_offset, blank_finish = \ self.state_machine.get_first_known_indented(match.end()) fieldnode = nodes.field() + fieldnode.line = lineno fieldnode += nodes.field_name(name, name) fieldbody = nodes.field_body('\n'.join(indented)) fieldnode += fieldbody @@ -1404,8 +1407,7 @@ class Body(RSTState): lineno = self.state_machine.abs_line_number() - len(block) + 1 if detail: message += '\n' + detail - error = self.reporter.error(message, '', - nodes.literal_block(data, data), + error = self.reporter.error(message, nodes.literal_block(data, data), line=lineno) return [error] @@ -1491,11 +1493,13 @@ class Body(RSTState): """ % vars(Inliner), re.VERBOSE),) def footnote(self, match): + lineno = self.state_machine.abs_line_number() indented, indent, offset, blank_finish = \ self.state_machine.get_first_known_indented(match.end()) label = match.group(1) name = normalize_name(label) footnote = nodes.footnote('\n'.join(indented)) + footnote.line = lineno if name[0] == '#': # auto-numbered name = name[1:] # autonumber label footnote['auto'] = 1 @@ -1513,17 +1517,19 @@ class Body(RSTState): if name: self.document.note_explicit_target(footnote, footnote) else: - self.document.set_id(footnote) + self.document.set_id(footnote, footnote) if indented: self.nested_parse(indented, input_offset=offset, node=footnote) return [footnote], blank_finish def citation(self, match): + lineno = self.state_machine.abs_line_number() indented, indent, offset, blank_finish = \ self.state_machine.get_first_known_indented(match.end()) label = match.group(1) name = normalize_name(label) citation = nodes.citation('\n'.join(indented)) + citation.line = lineno citation += nodes.label('', label) citation['name'] = name self.document.note_citation(citation) @@ -1558,6 +1564,7 @@ class Body(RSTState): refname = self.is_reference(reference) if refname: target = nodes.target(blocktext, '', refname=refname) + target.line = lineno self.add_target(targetmatch.group('name'), '', target) self.document.note_indirect_target(target) return [target], blank_finish @@ -1566,12 +1573,13 @@ class Body(RSTState): if reference.find(' ') != -1: warning = self.reporter.warning( 'Hyperlink target contains whitespace. Perhaps a footnote ' - 'was intended?', '', + 'was intended?', nodes.literal_block(blocktext, blocktext), line=lineno) nodelist.append(warning) else: unescaped = unescape(reference) target = nodes.target(blocktext, '') + target.line = lineno self.add_target(targetmatch.group('name'), unescaped, target) nodelist.append(target) return nodelist, blank_finish @@ -1628,6 +1636,7 @@ class Body(RSTState): name = normalize_name(subname) substitutionnode = nodes.substitution_definition( blocktext, name=name, alt=subname) + substitutionnode.line = lineno if block: block[0] = block[0].strip() newabsoffset, blank_finish = self.nested_list_parse( @@ -1646,7 +1655,7 @@ class Body(RSTState): if len(substitutionnode) == 0: msg = self.reporter.warning( 'Substitution definition "%s" empty or invalid.' - % subname, '', + % subname, nodes.literal_block(blocktext, blocktext), line=lineno) self.parent += msg else: @@ -1657,7 +1666,7 @@ class Body(RSTState): else: msg = self.reporter.warning( 'Substitution definition "%s" missing contents.' % subname, - '', nodes.literal_block(blocktext, blocktext), line=lineno) + nodes.literal_block(blocktext, blocktext), line=lineno) self.parent += msg return [], blank_finish @@ -1674,7 +1683,7 @@ class Body(RSTState): def parse_directive(self, directive_fn, match, type_name, option_presets): """ Parse a directive then run its directive function. - + Parameters: - `directive_fn`: The function implementing the directive. Must have @@ -1689,21 +1698,17 @@ class Body(RSTState): directive options. Currently, only an "alt" option is passed by substitution definitions (value: the substitution name), which may be used by an embedded image directive. - + Returns a 2-tuple: list of nodes, and a "blank finish" boolean. """ arguments = [] options = {} content = [] - argument_spec = option_spec = content_spec = None - if hasattr(directive_fn, 'arguments'): - argument_spec = directive_fn.arguments - if argument_spec[:2] == (0, 0): - argument_spec = None - if hasattr(directive_fn, 'options'): - option_spec = directive_fn.options - if hasattr(directive_fn, 'content'): - content_spec = directive_fn.content + argument_spec = getattr(directive_fn, 'arguments', None) + if argument_spec and argument_spec[:2] == (0, 0): + argument_spec = None + option_spec = getattr(directive_fn, 'options', None) + content_spec = getattr(directive_fn, 'content', None) lineno = self.state_machine.abs_line_number() initial_line_offset = self.state_machine.line_offset indented, indent, line_offset, blank_finish \ @@ -1743,7 +1748,7 @@ class Body(RSTState): raise MarkupError('no content permitted.') except MarkupError, detail: error = self.reporter.error( - 'Error in "%s" directive:\n%s.' % (type_name, detail), '', + 'Error in "%s" directive:\n%s.' % (type_name, detail), nodes.literal_block(block_text, block_text), line=lineno) return [error], blank_finish result = directive_fn( @@ -1824,7 +1829,7 @@ class Body(RSTState): self.state_machine.get_first_known_indented(0, strip_indent=0) text = '\n'.join(indented) error = self.reporter.error( - 'Unknown directive type "%s".' % type_name, '', + 'Unknown directive type "%s".' % type_name, nodes.literal_block(text, text), line=lineno) return [error], blank_finish @@ -1947,7 +1952,7 @@ class Body(RSTState): lineno = self.state_machine.abs_line_number() - len(block) + 1 warning = self.reporter.warning( 'Anonymous hyperlink target contains whitespace. Perhaps a ' - 'footnote was intended?', '', + 'footnote was intended?', nodes.literal_block(blocktext, blocktext), line=lineno) nodelist.append(warning) @@ -1967,14 +1972,14 @@ class Body(RSTState): elif len(match.string.strip()) < 4: msg = self.reporter.info( 'Unexpected possible title overline or transition.\n' - "Treating it as ordinary text because it's so short.", '', + "Treating it as ordinary text because it's so short.", line=self.state_machine.abs_line_number()) self.parent += msg raise statemachine.TransitionCorrection('text') else: blocktext = self.state_machine.line msg = self.reporter.severe( - 'Unexpected section title or transition.', '', + 'Unexpected section title or transition.', nodes.literal_block(blocktext, blocktext), line=self.state_machine.abs_line_number()) self.parent += msg @@ -2279,8 +2284,8 @@ class Text(RSTState): if not self.state_machine.match_titles: blocktext = context[0] + '\n' + self.state_machine.line msg = self.reporter.severe( - 'Unexpected section title.', '', - nodes.literal_block(blocktext, blocktext), line=lineno) + 'Unexpected section title.', + nodes.literal_block(blocktext, blocktext), line=lineno) self.parent += msg return [], next_state, [] title = context[0].rstrip() @@ -2291,14 +2296,14 @@ class Text(RSTState): if len(underline) < 4: msg = self.reporter.info( 'Possible title underline, too short for the title.\n' - "Treating it as ordinary text because it's so short.", '', + "Treating it as ordinary text because it's so short.", line=lineno) self.parent += msg raise statemachine.TransitionCorrection('text') else: blocktext = context[0] + '\n' + self.state_machine.line msg = self.reporter.warning( - 'Title underline too short.', '', + 'Title underline too short.', nodes.literal_block(blocktext, blocktext), line=lineno) messages.append(msg) style = underline[0] @@ -2483,7 +2488,7 @@ class Line(SpecializedText): self.short_overline(context, blocktext, lineno, 2) else: msg = self.reporter.severe( - 'Incomplete section title.', '', + 'Incomplete section title.', nodes.literal_block(blocktext, blocktext), line=lineno) self.parent += msg return [], 'Body', [] @@ -2496,7 +2501,7 @@ class Line(SpecializedText): self.short_overline(context, blocktext, lineno, 2) else: msg = self.reporter.severe( - 'Missing underline for overline.', '', + 'Missing underline for overline.', nodes.literal_block(source, source), line=lineno) self.parent += msg return [], 'Body', [] @@ -2506,7 +2511,7 @@ class Line(SpecializedText): self.short_overline(context, blocktext, lineno, 2) else: msg = self.reporter.severe( - 'Title overline & underline mismatch.', '', + 'Title overline & underline mismatch.', nodes.literal_block(source, source), line=lineno) self.parent += msg return [], 'Body', [] @@ -2518,7 +2523,7 @@ class Line(SpecializedText): self.short_overline(context, blocktext, lineno, 2) else: msg = self.reporter.warning( - 'Title overline too short.', '', + 'Title overline too short.', nodes.literal_block(source, source), line=lineno) messages.append(msg) style = (overline[0], underline[0]) @@ -2536,7 +2541,7 @@ class Line(SpecializedText): if len(overline.rstrip()) < 4: self.short_overline(context, blocktext, lineno, 1) msg = self.reporter.error( - 'Invalid section title or transition marker.', '', + 'Invalid section title or transition marker.', nodes.literal_block(blocktext, blocktext), line=lineno) self.parent += msg return [], 'Body', [] @@ -2544,7 +2549,7 @@ class Line(SpecializedText): def short_overline(self, context, blocktext, lineno, lines=1): msg = self.reporter.info( 'Possible incomplete section title.\nTreating the overline as ' - "ordinary text because it's so short.", '', line=lineno) + "ordinary text because it's so short.", line=lineno) self.parent += msg self.state_correction(context, lines) -- cgit v1.2.1 From 16a25cf1c6c5e5fa9214bce8c3d45e882783215a Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 17 Oct 2002 01:35:34 +0000 Subject: Added warnings for unknown directives. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@808 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index f64eb3f5f..27784dc27 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1672,8 +1672,9 @@ class Body(RSTState): def directive(self, match, **option_presets): type_name = match.group(1) - directive_function = directives.directive(type_name, - self.memo.language) + directive_function, messages = directives.directive( + type_name, self.memo.language, self.document) + self.parent += messages if directive_function: return self.parse_directive( directive_function, match, type_name, option_presets) @@ -1969,6 +1970,8 @@ class Body(RSTState): """Section title overline or transition marker.""" if self.state_machine.match_titles: return [match.string], 'Line', [] + elif match.string.strip() == '::': + raise statemachine.TransitionCorrection('text') elif len(match.string.strip()) < 4: msg = self.reporter.info( 'Unexpected possible title overline or transition.\n' -- cgit v1.2.1 From afeedfb343c2904e9357997d2a50f8f3cabb2568 Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 18 Oct 2002 04:55:21 +0000 Subject: Refactored names (options -> settings; .transform() -> .apply(); etc.); updated. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@825 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 27784dc27..6600391ff 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -145,11 +145,12 @@ class RSTStateMachine(StateMachineWS): StateMachine, and return the resulting document. """ - self.language = languages.get_language(document.options.language_code) + self.language = languages.get_language( + document.settings.language_code) self.match_titles = match_titles if inliner is None: inliner = Inliner() - inliner.init_customizations(document.options) + inliner.init_customizations(document.settings) self.memo = Stuff(document=document, reporter=document.reporter, language=self.language, @@ -431,12 +432,12 @@ class Inliner: """List of (pattern, bound method) tuples, used by `self.implicit_inline`.""" - def init_customizations(self, options): - """Option-based customizations; run when parsing begins.""" - if options.pep_references: + def init_customizations(self, settings): + """Setting-based customizations; run when parsing begins.""" + if settings.pep_references: self.implicit_dispatch.append((self.patterns.pep, self.pep_reference)) - if options.rfc_references: + if settings.rfc_references: self.implicit_dispatch.append((self.patterns.rfc, self.rfc_reference)) -- cgit v1.2.1 From a3d5874e951e34373fc39ac5dc502ec4ef01d89e Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 18 Oct 2002 23:44:47 +0000 Subject: Renamed ``Stuff`` to ``Struct``. Fixed a bug. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@830 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 6600391ff..06c6cb1c1 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -28,7 +28,7 @@ the reStructuredText parser. It defines the following: - `SpecializedText`: Superclass for continuation lines of Text-variants. - `Definition`: Second line of potential definition_list_item. - `Line`: Second line of overlined section title or transition marker. - - `Stuff`: An auxiliary collection class. + - `Struct`: An auxiliary collection class. :Exception classes: - `MarkupError` @@ -120,9 +120,9 @@ class ParserError(ApplicationError): pass class MarkupMismatch(Exception): pass -class Stuff: +class Struct: - """Stores a bunch of stuff for dotted-attribute access.""" + """Stores data attributes for dotted-attribute access.""" def __init__(self, **keywordargs): self.__dict__.update(keywordargs) @@ -151,12 +151,12 @@ class RSTStateMachine(StateMachineWS): if inliner is None: inliner = Inliner() inliner.init_customizations(document.settings) - self.memo = Stuff(document=document, - reporter=document.reporter, - language=self.language, - title_styles=[], - section_level=0, - inliner=inliner) + self.memo = Struct(document=document, + reporter=document.reporter, + language=self.language, + title_styles=[], + section_level=0, + inliner=inliner) self.document = self.memo.document self.attach_observer(self.document.note_state_machine_change) self.reporter = self.memo.reporter @@ -524,7 +524,7 @@ class Inliner: ) ] ) - patterns = Stuff( + patterns = Struct( initial=build_regexp(parts), emphasis=re.compile(non_whitespace_escape_before + r'(\*)' + end_string_suffix), @@ -903,13 +903,13 @@ class Body(RSTState): Generic classifier of the first line of a block. """ - enum = Stuff() + enum = Struct() """Enumerated list parsing information.""" enum.formatinfo = { - 'parens': Stuff(prefix='(', suffix=')', start=1, end=-1), - 'rparen': Stuff(prefix='', suffix=')', start=0, end=-1), - 'period': Stuff(prefix='', suffix='.', start=0, end=-1)} + 'parens': Struct(prefix='(', suffix=')', start=1, end=-1), + 'rparen': Struct(prefix='', suffix=')', start=0, end=-1), + 'period': Struct(prefix='', suffix='.', start=0, end=-1)} enum.formats = enum.formatinfo.keys() enum.sequences = ['arabic', 'loweralpha', 'upperalpha', 'lowerroman', 'upperroman'] # ORDERED! @@ -1449,10 +1449,10 @@ class Body(RSTState): return row - explicit = Stuff() + explicit = Struct() """Patterns and constants used for explicit markup recognition.""" - explicit.patterns = Stuff( + explicit.patterns = Struct( target=re.compile(r""" ( _ # anonymous target @@ -1903,10 +1903,7 @@ class Body(RSTState): try: return method(self, expmatch) except MarkupError, (message, lineno): # never reached? - errors.append( - self.reporter.warning( - '%s: %s' % (detail.__class__.__name__, message), - line=lineno)) + errors.append(self.reporter.warning(message, line=lineno)) break nodelist, blank_finish = self.comment(match) return nodelist + errors, blank_finish -- cgit v1.2.1 From b4f387efe3f819802be5bdbcecc3e420f244a0b2 Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 31 Oct 2002 02:38:00 +0000 Subject: Now flagged as errors: transitions at the beginning or end of sections, empty sections (except title), and empty documents. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@869 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 60 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 11 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 06c6cb1c1..ebfee3430 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -163,8 +163,16 @@ class RSTStateMachine(StateMachineWS): self.node = document results = StateMachineWS.run(self, input_lines, input_offset) assert results == [], 'RSTStateMachine.run() results should be empty!' + self.check_document() self.node = self.memo = None # remove unneeded references + def check_document(self): + """Check for illegal structure: empty document.""" + if len(self.document) == 0: + error = self.reporter.error( + 'Document empty; must have contents.', line=0) + self.document += error + class NestedStateMachine(StateMachineWS): @@ -344,27 +352,53 @@ class RSTState(StateWS): memo = self.memo mylevel = memo.section_level memo.section_level += 1 - sectionnode = nodes.section() - self.parent += sectionnode + section_node = nodes.section() + self.parent += section_node textnodes, title_messages = self.inline_text(title, lineno) titlenode = nodes.title(title, '', *textnodes) name = normalize_name(titlenode.astext()) - sectionnode['name'] = name - sectionnode += titlenode - sectionnode += messages - sectionnode += title_messages - self.document.note_implicit_target(sectionnode, sectionnode) + section_node['name'] = name + section_node += titlenode + section_node += messages + section_node += title_messages + self.document.note_implicit_target(section_node, section_node) offset = self.state_machine.line_offset + 1 absoffset = self.state_machine.abs_line_offset() + 1 newabsoffset = self.nested_parse( self.state_machine.input_lines[offset:], input_offset=absoffset, - node=sectionnode, match_titles=1) + node=section_node, match_titles=1) self.goto_line(newabsoffset) + self.check_section(section_node) if memo.section_level <= mylevel: # can't handle next section? raise EOFError # bubble up to supersection # reset section_level; next pass will detect it properly memo.section_level = mylevel + def check_section(self, section): + """ + Check for illegal structure: empty section, misplaced transitions. + """ + lineno = section.line + if len(section) <= 1: + error = self.reporter.error( + 'Section empty; must have contents.', line=lineno) + section += error + return + if not isinstance(section[0], nodes.title): # shouldn't ever happen + error = self.reporter.error( + 'First element of section must be a title.', line=lineno) + section.insert(0, error) + if isinstance(section[1], nodes.transition): + error = self.reporter.error( + 'Section may not begin with a transition.', + line=section[1].line) + section.insert(1, error) + if len(section) > 2 and isinstance(section[-1], nodes.transition): + error = self.reporter.error( + 'Section may not end with a transition.', + line=section[-1].line) + section += error + def paragraph(self, lines, lineno): """ Return a list (paragraph & messages) & a boolean: literal_block next? @@ -2446,31 +2480,35 @@ class Line(SpecializedText): if len(marker) < 4: self.state_correction(context) if self.eofcheck: # ignore EOFError with sections + lineno = self.state_machine.abs_line_number() - 1 transition = nodes.transition(context[0]) + transition.line = lineno self.parent += transition msg = self.reporter.error( 'Document or section may not end with a transition.', - line=self.state_machine.abs_line_number() - 1) + line=lineno) self.parent += msg self.eofcheck = 1 return [] def blank(self, match, context, next_state): """Transition marker.""" + lineno = self.state_machine.abs_line_number() - 1 marker = context[0].strip() if len(marker) < 4: self.state_correction(context) transition = nodes.transition(marker) + transition.line = lineno if len(self.parent) == 0: msg = self.reporter.error( 'Document or section may not begin with a transition.', - line=self.state_machine.abs_line_number() - 1) + line=lineno) self.parent += msg elif isinstance(self.parent[-1], nodes.transition): msg = self.reporter.error( 'At least one body element must separate transitions; ' 'adjacent transitions not allowed.', - line=self.state_machine.abs_line_number() - 1) + line=lineno) self.parent += msg self.parent += transition return [], 'Body', [] -- cgit v1.2.1 From 39148f8101ca745fbf08dc91862537dc5b8a1668 Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 8 Nov 2002 01:30:20 +0000 Subject: Updated for ``statemachine.StringList``. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@913 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 72 ++++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 28 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index ebfee3430..9c4eeafab 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -157,11 +157,12 @@ class RSTStateMachine(StateMachineWS): title_styles=[], section_level=0, inliner=inliner) - self.document = self.memo.document - self.attach_observer(self.document.note_state_machine_change) + self.document = document + self.attach_observer(document.note_source) self.reporter = self.memo.reporter self.node = document - results = StateMachineWS.run(self, input_lines, input_offset) + results = StateMachineWS.run(self, input_lines, input_offset, + input_source=document['source']) assert results == [], 'RSTStateMachine.run() results should be empty!' self.check_document() self.node = self.memo = None # remove unneeded references @@ -190,7 +191,7 @@ class NestedStateMachine(StateMachineWS): self.match_titles = match_titles self.memo = memo self.document = memo.document - self.attach_observer(self.document.note_state_machine_change) + self.attach_observer(self.document.note_source) self.reporter = memo.reporter self.node = node results = StateMachineWS.run(self, input_lines, input_offset) @@ -260,12 +261,16 @@ class RSTState(StateWS): state_machine_class = self.nested_sm if state_machine_kwargs is None: state_machine_kwargs = self.nested_sm_kwargs + block_length = len(block) state_machine = state_machine_class(debug=self.debug, **state_machine_kwargs) state_machine.run(block, input_offset, memo=self.memo, node=node, match_titles=match_titles) state_machine.unlink() - return state_machine.abs_line_offset() + new_offset = state_machine.abs_line_offset() + # Adjustment for block if modified in nested parse: + self.state_machine.next_line(len(block) - block_length) + return new_offset def nested_list_parse(self, block, input_offset, node, initial_state, blank_finish, @@ -1270,10 +1275,12 @@ class Body(RSTState): return [], next_state, [] def option_list_item(self, match): + offset = self.state_machine.abs_line_offset() options = self.parse_option_marker(match) indented, indent, line_offset, blank_finish = \ self.state_machine.get_first_known_indented(match.end()) if not indented: # not an option list item + self.goto_line(offset) raise statemachine.TransitionCorrection('text') option_group = nodes.option_group('', *options) description = nodes.description('\n'.join(indented)) @@ -1366,10 +1373,11 @@ class Body(RSTState): try: block = self.state_machine.get_text_block(flush_left=1) except statemachine.UnexpectedIndentationError, instance: - block, lineno = instance.args + block, source, lineno = instance.args messages.append(self.reporter.error('Unexpected indentation.', - line=lineno)) + source=source, line=lineno)) blank_finish = 0 + block.disconnect() width = len(block[0].strip()) for i in range(len(block)): block[i] = block[i].strip() @@ -1649,7 +1657,9 @@ class Body(RSTState): self.state_machine.get_first_known_indented(match.end(), strip_indent=0) blocktext = (match.string[:match.end()] + '\n'.join(block)) - block = [escape2null(line) for line in block] + block.disconnect() + for i in range(len(block)): + block[i] = escape2null(block[i]) escaped = block[0].rstrip() blockindex = 0 while 1: @@ -1667,6 +1677,8 @@ class Body(RSTState): if not block[0]: del block[0] offset += 1 + while block and not block[-1].strip(): + block.pop() subname = subdefmatch.group('name') name = normalize_name(subname) substitutionnode = nodes.substitution_definition( @@ -1674,11 +1686,9 @@ class Body(RSTState): substitutionnode.line = lineno if block: block[0] = block[0].strip() - newabsoffset, blank_finish = self.nested_list_parse( + new_abs_offset, blank_finish = self.nested_list_parse( block, input_offset=offset, node=substitutionnode, initial_state='SubstitutionDef', blank_finish=blank_finish) - self.state_machine.previous_line( - len(block) + offset - newabsoffset - 1) i = 0 for node in substitutionnode[:]: if not (isinstance(node, nodes.Inline) or @@ -1692,7 +1702,7 @@ class Body(RSTState): 'Substitution definition "%s" empty or invalid.' % subname, nodes.literal_block(blocktext, blocktext), line=lineno) - self.parent += msg + return [msg], blank_finish else: del substitutionnode['alt'] self.document.note_substitution_def( @@ -1702,8 +1712,7 @@ class Body(RSTState): msg = self.reporter.warning( 'Substitution definition "%s" missing contents.' % subname, nodes.literal_block(blocktext, blocktext), line=lineno) - self.parent += msg - return [], blank_finish + return [msg], blank_finish def directive(self, match, **option_presets): type_name = match.group(1) @@ -1722,8 +1731,9 @@ class Body(RSTState): Parameters: - - `directive_fn`: The function implementing the directive. Must have - function attributes ``arguments``, ``options``, and ``content``. + - `directive_fn`: The function implementing the directive. Uses + function attributes ``arguments``, ``options``, and/or ``content`` + if present. - `match`: A regular expression match object which matched the first line of the directive. @@ -1753,10 +1763,10 @@ class Body(RSTState): block_text = '\n'.join(self.state_machine.input_lines[ initial_line_offset : self.state_machine.line_offset + 1]) if indented and not indented[0].strip(): - indented.pop(0) + indented.trim_start() line_offset += 1 while indented and not indented[-1].strip(): - indented.pop() + indented.trim_end() if indented and (argument_spec or option_spec): for i in range(len(indented)): if not indented[i].strip(): @@ -1771,7 +1781,7 @@ class Body(RSTState): content_offset = line_offset arg_block = [] while content and not content[0].strip(): - content.pop(0) + content.trim_start() content_offset += 1 try: if option_spec: @@ -1790,7 +1800,7 @@ class Body(RSTState): result = directive_fn( type_name, arguments, options, content, lineno, content_offset, block_text, self, self.state_machine) - return result, blank_finish + return result, blank_finish or self.state_machine.is_next_line_blank() def parse_directive_options(self, option_presets, option_spec, arg_block): options = option_presets.copy() @@ -1875,6 +1885,8 @@ class Body(RSTState): return [nodes.comment()], 1 # "A tiny but practical wart." indented, indent, offset, blank_finish = \ self.state_machine.get_first_known_indented(match.end()) + while indented and not indented[-1].strip(): + indented.trim_end() text = '\n'.join(indented) return [nodes.comment(text, text)], blank_finish @@ -2059,7 +2071,8 @@ class RFC2822Body(Body): def rfc2822_field(self, match): name = match.string[:match.string.find(':')] indented, indent, line_offset, blank_finish = \ - self.state_machine.get_first_known_indented(match.end()) + self.state_machine.get_first_known_indented(match.end(), + until_blank=1) fieldnode = nodes.field() fieldnode += nodes.field_name(name, name) fieldbody = nodes.field_body('\n'.join(indented)) @@ -2213,7 +2226,7 @@ class ExtensionOptions(FieldList): def parse_field_body(self, indented, offset, node): """Override `Body.parse_field_body` for simpler parsing.""" lines = [] - for line in indented + ['']: + for line in list(indented) + ['']: if line.strip(): lines.append(line) elif lines: @@ -2240,6 +2253,8 @@ class Explicit(SpecializedBody): self.blank_finish = blank_finish return [], next_state, [] + blank = SpecializedBody.invalid_input + class SubstitutionDef(Body): @@ -2353,9 +2368,10 @@ class Text(RSTState): try: block = self.state_machine.get_text_block(flush_left=1) except statemachine.UnexpectedIndentationError, instance: - block, lineno = instance.args - msg = self.reporter.error('Unexpected indentation.', line=lineno) - lines = context + block + block, source, lineno = instance.args + msg = self.reporter.error('Unexpected indentation.', + source=source, line=lineno) + lines = context + list(block) paragraph, literalnext = self.paragraph(lines, startline) self.parent += paragraph self.parent += msg @@ -2373,7 +2389,7 @@ class Text(RSTState): self.state_machine.get_indented() nodelist = [] while indented and not indented[-1].strip(): - indented.pop() + indented.trim_end() if indented: data = '\n'.join(indented) nodelist.append(nodes.literal_block(data, data)) @@ -2388,8 +2404,8 @@ class Text(RSTState): def definition_list_item(self, termline): indented, indent, line_offset, blank_finish = \ self.state_machine.get_indented() - definitionlistitem = nodes.definition_list_item('\n'.join(termline - + indented)) + definitionlistitem = nodes.definition_list_item( + '\n'.join(termline + list(indented))) termlist, messages = self.term( termline, self.state_machine.abs_line_number() - 1) definitionlistitem += termlist -- cgit v1.2.1 From 3d389e7cded3fe068f30fab1546e87ff6e79ccc9 Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 16 Nov 2002 02:27:00 +0000 Subject: Enabled recognition of schemeless email addresses in targets. Added support for embedded URIs in hyperlink references. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@954 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 84 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 16 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 9c4eeafab..3f49a9954 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -534,9 +534,17 @@ class Inliner: non_whitespace_after = r'(?![ \n])' # Alphanumerics with isolated internal [-._] chars (i.e. not 2 together): simplename = r'(?:(?!_)\w)+(?:[-._](?:(?!_)\w)+)*' + # Valid URI characters (see RFC 2396 & RFC 2732): uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9]""" - urilast = r"""[_~/\]a-zA-Z0-9]""" # no punctuation + # Last URI character; same as uric but no punctuation: + urilast = r"""[_~/a-zA-Z0-9]""" emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9]""" + email_pattern = r""" + %(emailc)s+(?:\.%(emailc)s+)* # name + @ # at + %(emailc)s+(?:\.%(emailc)s*)* # host + %(urilast)s # final URI char + """ parts = ('initial_inline', start_string_prefix, '', [('start', '', non_whitespace_after, # simple start-strings [r'\*\*', # strong @@ -581,6 +589,18 @@ class Inliner: ) %(end_string_suffix)s """ % locals(), re.VERBOSE | re.UNICODE), + embedded_uri=re.compile( + r""" + ( + [ \n]+ # spaces or beginning of line + < # open bracket + %(non_whitespace_after)s + ([^<>\0]+) # anything but angle brackets & nulls + %(non_whitespace_before)s + > # close bracket w/o whitespace before + ) + $ # end of string + """ % locals(), re.VERBOSE), literal=re.compile(non_whitespace_before + '(``)' + end_string_suffix), target=re.compile(non_whitespace_escape_before @@ -588,8 +608,9 @@ class Inliner: substitution_ref=re.compile(non_whitespace_escape_before + r'(\|_{0,2})' + end_string_suffix), + email=re.compile(email_pattern % locals() + '$', re.VERBOSE), uri=re.compile( - r""" + (r""" %(start_string_prefix)s (?P (?P # absolute URI @@ -615,14 +636,11 @@ class Inliner: ) | # *OR* (?P # email address - %(emailc)s+(\.%(emailc)s+)* # name - @ # at - %(emailc)s+(\.%(emailc)s*)* # host - %(urilast)s # final URI char + """ + email_pattern + r""" ) ) %(end_string_suffix)s - """ % locals(), re.VERBOSE), + """) % locals(), re.VERBOSE), pep=re.compile( r""" %(start_string_prefix)s @@ -736,7 +754,7 @@ class Inliner: prb = self.problematic(text, text, msg) return string[:rolestart], [prb], string[textend:], [msg] return self.phrase_ref(string[:matchstart], string[textend:], - rawsource, text) + rawsource, escaped, text) else: return self.interpreted(string[:rolestart], string[textend:], rawsource, text, role, position) @@ -747,16 +765,46 @@ class Inliner: prb = self.problematic(text, text, msg) return string[:matchstart], [prb], string[matchend:], [msg] - def phrase_ref(self, before, after, rawsource, text): + def phrase_ref(self, before, after, rawsource, escaped, text): + match = self.patterns.embedded_uri.search(escaped) + if match: + text = unescape(escaped[:match.start(0)]) + uri_text = match.group(2) + uri = ''.join(uri_text.split()) + uri = self.adjust_uri(uri) + if uri: + target = nodes.target(match.group(1), refuri=uri) + else: + raise ApplicationError('problem with URI: %r' % uri_text) + else: + target = None refname = normalize_name(text) reference = nodes.reference(rawsource, text) + node_list = [reference] if rawsource[-2:] == '__': - reference['anonymous'] = 1 - self.document.note_anonymous_ref(reference) + if target: + reference['refuri'] = uri + else: + reference['anonymous'] = 1 + self.document.note_anonymous_ref(reference) else: - reference['refname'] = refname - self.document.note_refname(reference) - return before, [reference], after, [] + if target: + reference['refuri'] = uri + target['name'] = refname + self.document.note_external_target(target) + self.document.note_explicit_target(target, self.parent) + node_list.append(target) + else: + reference['refname'] = refname + self.document.note_refname(reference) + return before, node_list, after, [] + + def adjust_uri(self, uri): + match = self.patterns.email.match(uri) + if match: + return 'mailto:' + uri + else: + return uri def interpreted(self, before, after, rawsource, text, role, position): if role: @@ -1639,8 +1687,12 @@ class Body(RSTState): name = normalize_name(unescape(targetname)) target['name'] = name if refuri: - target['refuri'] = refuri - self.document.note_external_target(target) + uri = self.inliner.adjust_uri(refuri) + if uri: + target['refuri'] = uri + self.document.note_external_target(target) + else: + raise ApplicationError('problem with URI: %r' % refuri) else: self.document.note_internal_target(target) self.document.note_explicit_target(target, self.parent) -- cgit v1.2.1 From df0b272dc4cd35d6b1ab106ffd1285c7da2177ac Mon Sep 17 00:00:00 2001 From: goodger Date: Tue, 19 Nov 2002 02:37:08 +0000 Subject: updated git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@966 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 1 - 1 file changed, 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 3f49a9954..5ddcbb5bb 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1801,7 +1801,6 @@ class Body(RSTState): """ arguments = [] options = {} - content = [] argument_spec = getattr(directive_fn, 'arguments', None) if argument_spec and argument_spec[:2] == (0, 0): argument_spec = None -- cgit v1.2.1 From a5017ad1128d5d901ce4c80c31cf4d06a43d5637 Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 12 Dec 2002 02:44:44 +0000 Subject: fixed a bug with ellipsis etc. (<=3 char underline) on second line of a block quote paragraph git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1013 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 5ddcbb5bb..ab1266f9a 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -2382,24 +2382,18 @@ class Text(RSTState): def underline(self, match, context, next_state): """Section title.""" lineno = self.state_machine.abs_line_number() - if not self.state_machine.match_titles: - blocktext = context[0] + '\n' + self.state_machine.line - msg = self.reporter.severe( - 'Unexpected section title.', - nodes.literal_block(blocktext, blocktext), line=lineno) - self.parent += msg - return [], next_state, [] title = context[0].rstrip() underline = match.string.rstrip() source = title + '\n' + underline messages = [] if len(title) > len(underline): if len(underline) < 4: - msg = self.reporter.info( - 'Possible title underline, too short for the title.\n' - "Treating it as ordinary text because it's so short.", - line=lineno) - self.parent += msg + if self.state_machine.match_titles: + msg = self.reporter.info( + 'Possible title underline, too short for the title.\n' + "Treating it as ordinary text because it's so short.", + line=lineno) + self.parent += msg raise statemachine.TransitionCorrection('text') else: blocktext = context[0] + '\n' + self.state_machine.line @@ -2407,6 +2401,14 @@ class Text(RSTState): 'Title underline too short.', nodes.literal_block(blocktext, blocktext), line=lineno) messages.append(msg) + if not self.state_machine.match_titles: + blocktext = context[0] + '\n' + self.state_machine.line + msg = self.reporter.severe( + 'Unexpected section title.', + nodes.literal_block(blocktext, blocktext), line=lineno) + self.parent += messages + self.parent += msg + return [], next_state, [] style = underline[0] context[:] = [] self.section(title, source, style, lineno - 1, messages) -- cgit v1.2.1 From eb82300e688f6e452b1db58d5abadd76be56025d Mon Sep 17 00:00:00 2001 From: goodger Date: Tue, 17 Dec 2002 02:25:19 +0000 Subject: Added support for backslash-escape after inline markup. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1023 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index ab1266f9a..fb95cdd6a 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -528,7 +528,7 @@ class Inliner: openers = '\'"([{<' closers = '\'")]}>' start_string_prefix = (r'((?<=^)|(?<=[-/: \n%s]))' % re.escape(openers)) - end_string_suffix = (r'((?=$)|(?=[-/:.,;!? \n%s]))' % re.escape(closers)) + end_string_suffix = (r'((?=$)|(?=[-/:.,;!? \n\x00%s]))' % re.escape(closers)) non_whitespace_before = r'(? Date: Fri, 3 Jan 2003 00:49:44 +0000 Subject: Python 2.3 compatibility fix git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1053 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index fb95cdd6a..8c3f43a30 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1910,7 +1910,7 @@ class Body(RSTState): try: options = utils.extract_extension_options(node, option_spec) except KeyError, detail: - return 0, ('unknown option: "%s"' % detail) + return 0, ('unknown option: "%s"' % detail.args[0]) except (ValueError, TypeError), detail: return 0, ('invalid option value: %s' % detail) except utils.ExtensionOptionError, detail: -- cgit v1.2.1 From edaeb226e0a505faa5ca54f8bb473a534f3e7b81 Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 10 Jan 2003 02:12:36 +0000 Subject: Added support for correct interpreted text processing. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1078 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 148 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 140 insertions(+), 8 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 8c3f43a30..2ddd50809 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -113,9 +113,12 @@ from docutils import ApplicationError, DataError from docutils.statemachine import StateMachineWS, StateWS from docutils.utils import normalize_name from docutils.parsers.rst import directives, languages, tableparser +from docutils.parsers.rst.languages import en as _fallback_language_module class MarkupError(DataError): pass +class UnknownInterpretedRoleError(DataError): pass +class InterpretedRoleNotImplementedError(DataError): pass class ParserError(ApplicationError): pass class MarkupMismatch(Exception): pass @@ -466,11 +469,53 @@ class Inliner: Parse inline markup; call the `parse()` method. """ - def __init__(self): + _interpreted_roles = { + # Values of ``None`` mean "not implemented yet": + 'title-reference': 'title_reference_role', + 'abbreviation': None, + 'acronym': None, + 'index': None, + 'emphasis': None, + 'strong': None, + 'literal': None, + 'named-reference': None, + 'anonymous-reference': None, + 'uri-reference': None, + 'pep-reference': 'pep_reference_role', + 'rfc-reference': 'rfc_reference_role', + 'footnote-reference': None, + 'citation-reference': None, + 'substitution-reference': None, + 'target': None, + } + """Mapping of canonical interpreted text role name to method name. + Initializes a name to bound-method mapping in `__init__`.""" + + default_interpreted_role = 'title-reference' + """The role to use when no explicit role is given. + Override in subclasses.""" + + def __init__(self, roles=None): + """ + `roles` is a mapping of canonical role name to role function or bound + method, which enables additional interpreted text roles. + """ + self.implicit_dispatch = [(self.patterns.uri, self.standalone_uri),] """List of (pattern, bound method) tuples, used by `self.implicit_inline`.""" + self.interpreted_roles = {} + """Mapping of canonical role name to role function or bound method. + Items removed from this mapping will be disabled.""" + + for canonical, method in self._interpreted_roles.items(): + if method: + self.interpreted_roles[canonical] = getattr(self, method) + else: + self.interpreted_roles[canonical] = None + self.interpreted_roles.update(roles or {}) + def init_customizations(self, settings): """Setting-based customizations; run when parsing begins.""" if settings.pep_references: @@ -496,6 +541,7 @@ class Inliner: """ self.reporter = memo.reporter self.document = memo.document + self.language = memo.language self.parent = parent pattern_search = self.patterns.initial.search dispatch = self.dispatch @@ -528,7 +574,8 @@ class Inliner: openers = '\'"([{<' closers = '\'")]}>' start_string_prefix = (r'((?<=^)|(?<=[-/: \n%s]))' % re.escape(openers)) - end_string_suffix = (r'((?=$)|(?=[-/:.,;!? \n\x00%s]))' % re.escape(closers)) + end_string_suffix = (r'((?=$)|(?=[-/:.,;!? \n\x00%s]))' + % re.escape(closers)) non_whitespace_before = r'(? 9999: + raise ValueError + except ValueError: + msg = self.reporter.error( + 'PEP number must be a number from 0 to 9999; "%s" is invalid.' + % text, line=lineno) + prb = self.problematic(text, text, msg) + return [prb], [msg] + ref = self.pep_url % pepnum + return [nodes.reference(rawtext, 'PEP ' + text, refuri=ref)], [] + + def rfc_reference_role(self, role, rawtext, text, lineno): + try: + rfcnum = int(text) + if rfcnum <= 0: + raise ValueError + except ValueError: + msg = self.reporter.error( + 'RFC number must be a number greater than or equal to 1; ' + '"%s" is invalid.' % text, line=lineno) + prb = self.problematic(text, text, msg) + return [prb], [msg] + ref = self.rfc_url % rfcnum + return [nodes.reference(rawtext, 'RFC ' + text, refuri=ref)], [] + class Body(RSTState): -- cgit v1.2.1 From c6a8b09215bf080177a35cbd1ad9154be6328970 Mon Sep 17 00:00:00 2001 From: goodger Date: Tue, 21 Jan 2003 18:24:54 +0000 Subject: correction git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1151 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 2ddd50809..e7b5a91a7 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1974,7 +1974,7 @@ class Body(RSTState): arguments = self.parse_directive_arguments(argument_spec, arg_block) if content and not content_spec: - raise MarkupError('no content permitted.') + raise MarkupError('no content permitted') except MarkupError, detail: error = self.reporter.error( 'Error in "%s" directive:\n%s.' % (type_name, detail), -- cgit v1.2.1 From 813c817e2696f9eba43e10cd147ba4a04244a1d1 Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 20 Feb 2003 22:35:49 +0000 Subject: sidebar improvements git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1196 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index e7b5a91a7..d10b07a68 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -2147,7 +2147,8 @@ class Body(RSTState): self.state_machine.input_lines[offset:], input_offset=self.state_machine.abs_line_offset() + 1, node=self.parent, initial_state='Explicit', - blank_finish=blank_finish) + blank_finish=blank_finish, + match_titles=self.state_machine.match_titles) self.goto_line(newline_offset) if not blank_finish: self.parent += self.unindent_warning('Explicit markup') -- cgit v1.2.1 From 3cf105681c70001de69a7bc12e5d77dee6329e61 Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 22 Mar 2003 06:02:17 +0000 Subject: support & docs for character-level inline markup git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1226 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index d10b07a68..68b3b6853 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -2820,8 +2820,13 @@ def escape2null(text): start = found + 2 # skip character after escape def unescape(text, restore_backslashes=0): - """Return a string with nulls removed or restored to backslashes.""" + """ + Return a string with nulls removed or restored to backslashes. + Backslash-escaped spaces are also removed. + """ if restore_backslashes: return text.replace('\x00', '\\') else: - return ''.join(text.split('\x00')) + for sep in ['\x00 ', '\x00\n', '\x00']: + text = ''.join(text.split(sep)) + return text -- cgit v1.2.1 From adf281a00341eae13815afb97acf04ded5fa6f2d Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 27 Mar 2003 00:28:07 +0000 Subject: beginnings of substitution case-sensitizing git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1235 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 68b3b6853..d712cbea2 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -111,7 +111,7 @@ from types import TupleType from docutils import nodes, statemachine, utils, roman, urischemes from docutils import ApplicationError, DataError from docutils.statemachine import StateMachineWS, StateWS -from docutils.utils import normalize_name +from docutils.nodes import fully_normalize_name as normalize_name from docutils.parsers.rst import directives, languages, tableparser from docutils.parsers.rst.languages import en as _fallback_language_module -- cgit v1.2.1 From 1c87d959eb6be73667ca61bbb48d69b075a846df Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 27 Mar 2003 03:56:37 +0000 Subject: substitutions made case-sensitive but forgiving (case-insensitive fallback) git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1236 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 58 +++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 34 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index d712cbea2..481ac1259 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -936,26 +936,22 @@ class Inliner: match, lineno, self.patterns.substitution_ref, nodes.substitution_reference) if len(inlines) == 1: - subrefnode = inlines[0] - if isinstance(subrefnode, nodes.substitution_reference): - subreftext = subrefnode.astext() - refname = normalize_name(subreftext) - subrefnode['refname'] = refname - self.document.note_substitution_ref( - subrefnode) + subref_node = inlines[0] + if isinstance(subref_node, nodes.substitution_reference): + subref_text = subref_node.astext() + self.document.note_substitution_ref(subref_node, subref_text) if endstring[-1:] == '_': - referencenode = nodes.reference( - '|%s%s' % (subreftext, endstring), '') + reference_node = nodes.reference( + '|%s%s' % (subref_text, endstring), '') if endstring[-2:] == '__': - referencenode['anonymous'] = 1 + reference_node['anonymous'] = 1 self.document.note_anonymous_ref( - referencenode) + reference_node) else: - referencenode['refname'] = refname - self.document.note_refname( - referencenode) - referencenode += subrefnode - inlines = [referencenode] + reference_node['refname'] = normalize_name(subref_text) + self.document.note_refname(reference_node) + reference_node += subref_node + inlines = [reference_node] return before, inlines, remaining, sysmessages def footnote_reference(self, match, lineno): @@ -1864,34 +1860,31 @@ class Body(RSTState): while block and not block[-1].strip(): block.pop() subname = subdefmatch.group('name') - name = normalize_name(subname) - substitutionnode = nodes.substitution_definition( - blocktext, name=name, alt=subname) - substitutionnode.line = lineno + substitution_node = nodes.substitution_definition(blocktext) + substitution_node.line = lineno + self.document.note_substitution_def( + substitution_node,subname, self.parent) if block: block[0] = block[0].strip() new_abs_offset, blank_finish = self.nested_list_parse( - block, input_offset=offset, node=substitutionnode, + block, input_offset=offset, node=substitution_node, initial_state='SubstitutionDef', blank_finish=blank_finish) i = 0 - for node in substitutionnode[:]: + for node in substitution_node[:]: if not (isinstance(node, nodes.Inline) or isinstance(node, nodes.Text)): - self.parent += substitutionnode[i] - del substitutionnode[i] + self.parent += substitution_node[i] + del substitution_node[i] else: i += 1 - if len(substitutionnode) == 0: + if len(substitution_node) == 0: msg = self.reporter.warning( 'Substitution definition "%s" empty or invalid.' % subname, nodes.literal_block(blocktext, blocktext), line=lineno) return [msg], blank_finish else: - del substitutionnode['alt'] - self.document.note_substitution_def( - substitutionnode, self.parent) - return [substitutionnode], blank_finish + return [substitution_node], blank_finish else: msg = self.reporter.warning( 'Substitution definition "%s" missing contents.' % subname, @@ -2453,11 +2446,8 @@ class SubstitutionDef(Body): initial_transitions = ['embedded_directive', 'text'] def embedded_directive(self, match, context, next_state): - if self.parent.has_key('alt'): - option_presets = {'alt': self.parent['alt']} - else: - option_presets = {} - nodelist, blank_finish = self.directive(match, **option_presets) + nodelist, blank_finish = self.directive(match, + alt=self.parent['name']) self.parent += nodelist if not self.state_machine.at_eof(): self.blank_finish = blank_finish -- cgit v1.2.1 From bd61a20c2fa1a7b09df66561eb26b5fc509806c2 Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 24 May 2003 20:46:45 +0000 Subject: Added support for block quote attributions. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1327 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 70 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 6 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 481ac1259..838d495c4 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1204,16 +1204,72 @@ class Body(RSTState): """Block quote.""" indented, indent, line_offset, blank_finish = \ self.state_machine.get_indented() - blockquote = self.block_quote(indented, line_offset) + blockquote, messages = self.block_quote(indented, line_offset) self.parent += blockquote + self.parent += messages if not blank_finish: self.parent += self.unindent_warning('Block quote') return context, next_state, [] def block_quote(self, indented, line_offset): + blockquote_lines, attribution_lines, attribution_offset = \ + self.check_attribution(indented, line_offset) blockquote = nodes.block_quote() - self.nested_parse(indented, line_offset, blockquote) - return blockquote + self.nested_parse(blockquote_lines, line_offset, blockquote) + messages = [] + if attribution_lines: + attribution, messages = self.parse_attribution(attribution_lines, + attribution_offset) + blockquote += attribution + return blockquote, messages + + attribution_pattern = re.compile(r'--(?![-\n]) *(?=[^ \n])') + + def check_attribution(self, indented, line_offset): + """ + Check for an attribution in the last contiguous block of `indented`. + + * First line after last blank line must begin with "--" (etc.). + * Every line after that must have consistent indentation. + + Return a 3-tuple: (block quote lines, attribution lines, + attribution offset). + """ + blank = None + nonblank_seen = None + indent = 0 + for i in range(len(indented) - 1, 0, -1): # don't check first line + this_line_blank = not indented[i].strip() + if nonblank_seen and this_line_blank: + match = self.attribution_pattern.match(indented[i + 1]) + if match: + blank = i + break + elif not this_line_blank: + nonblank_seen = 1 + if blank and len(indented) - blank > 2: # multi-line attribution + indent = (len(indented[blank + 2]) + - len(indented[blank + 2].lstrip())) + for j in range(blank + 3, len(indented)): + if indent != (len(indented[j]) + - len(indented[j].lstrip())): # bad shape + blank = None + break + if blank: + a_lines = indented[blank + 1:] + a_lines.strip_indent(match.end(), end=1) + a_lines.strip_indent(indent, start=1) + return (indented[:blank], a_lines, line_offset + blank + 1) + else: + return (indented, None, None) + + def parse_attribution(self, indented, line_offset): + text = '\n'.join(indented).rstrip() + lineno = self.state_machine.abs_line_number() + line_offset + textnodes, messages = self.inline_text(text, lineno) + node = nodes.attribution(text, '', *textnodes) + node.line = lineno + return node, messages def bullet(self, match, context, next_state): """Bullet list item.""" @@ -1432,8 +1488,9 @@ class Body(RSTState): self.parent += msg indented, indent, line_offset, blank_finish = \ self.state_machine.get_first_known_indented(match.end()) - blockquote = self.block_quote(indented, line_offset) + blockquote, messages = self.block_quote(indented, line_offset) self.parent += blockquote + self.parent += messages if not blank_finish: self.parent += self.unindent_warning('Option list') return [], next_state, [] @@ -2582,8 +2639,9 @@ class Text(RSTState): self.state_machine.get_indented() definitionlistitem = nodes.definition_list_item( '\n'.join(termline + list(indented))) - termlist, messages = self.term( - termline, self.state_machine.abs_line_number() - 1) + lineno = self.state_machine.abs_line_number() - 1 + definitionlistitem.line = lineno + termlist, messages = self.term(termline, lineno) definitionlistitem += termlist definition = nodes.definition('', *messages) definitionlistitem += definition -- cgit v1.2.1 From ea87be65128106e35aea589545f563f31e5c13bb Mon Sep 17 00:00:00 2001 From: goodger Date: Sun, 25 May 2003 14:59:30 +0000 Subject: Added a kludge to work-around a conflict between the bubble-up parser strategy and short titles (<= 3 char-long over- & underlines). Fixes SF bug #738803 "infinite loop with multiple titles" submitted by Jason Diamond. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1344 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 838d495c4..3901d6078 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -159,6 +159,7 @@ class RSTStateMachine(StateMachineWS): language=self.language, title_styles=[], section_level=0, + section_bubble_up_kludge=0, inliner=inliner) self.document = document self.attach_observer(document.note_source) @@ -340,6 +341,8 @@ class RSTState(StateWS): return None if level <= mylevel: # sibling or supersection memo.section_level = level # bubble up to parent section + if len(style) == 2: + memo.section_bubble_up_kludge = 1 # back up 2 lines for underline title, 3 for overline title self.state_machine.previous_line(len(style) + 1) raise EOFError # let parent section re-evaluate @@ -2727,7 +2730,9 @@ class Line(SpecializedText): def eof(self, context): """Transition marker at end of section or document.""" marker = context[0].strip() - if len(marker) < 4: + if self.memo.section_bubble_up_kludge: + self.memo.section_bubble_up_kludge = 0 + elif len(marker) < 4: self.state_correction(context) if self.eofcheck: # ignore EOFError with sections lineno = self.state_machine.abs_line_number() - 1 -- cgit v1.2.1 From 389a395279a3fa0459a6ebe5dfa992b2973c4956 Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 5 Jun 2003 15:14:36 +0000 Subject: Added explicit interpreted text roles for standard inline markup: "emphasis", "strong", "literal". Implemented "superscript" and "subscript" interpreted text roles. Added initial support for "abbreviation" and "acronym" roles; incomplete. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1383 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 97 ++++++++++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 37 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 3901d6078..4a4da3478 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -474,13 +474,15 @@ class Inliner: _interpreted_roles = { # Values of ``None`` mean "not implemented yet": - 'title-reference': 'title_reference_role', - 'abbreviation': None, - 'acronym': None, + 'title-reference': 'generic_interpreted_role', + 'abbreviation': 'generic_interpreted_role', + 'acronym': 'generic_interpreted_role', 'index': None, - 'emphasis': None, - 'strong': None, - 'literal': None, + 'subscript': 'generic_interpreted_role', + 'superscript': 'generic_interpreted_role', + 'emphasis': 'generic_interpreted_role', + 'strong': 'generic_interpreted_role', + 'literal': 'generic_interpreted_role', 'named-reference': None, 'anonymous-reference': None, 'uri-reference': None, @@ -490,7 +492,7 @@ class Inliner: 'citation-reference': None, 'substitution-reference': None, 'target': None, - } + 'restructuredtext-unimplemented-role': None} """Mapping of canonical interpreted text role name to method name. Initializes a name to bound-method mapping in `__init__`.""" @@ -498,6 +500,18 @@ class Inliner: """The role to use when no explicit role is given. Override in subclasses.""" + generic_roles = {'abbreviation': nodes.abbreviation, + 'acronym': nodes.acronym, + 'emphasis': nodes.emphasis, + 'literal': nodes.literal, + 'strong': nodes.strong, + 'subscript': nodes.subscript, + 'superscript': nodes.superscript, + 'title-reference': nodes.title_reference,} + """Mapping of canonical interpreted text role name to node class. + Used by the `generic_interpreted_role` method for simple, straightforward + roles (simple wrapping; no extra processing).""" + def __init__(self, roles=None): """ `roles` is a mapping of canonical role name to role function or bound @@ -875,9 +889,11 @@ class Inliner: return uri def interpreted(self, before, after, rawsource, text, role, lineno): - role_function, messages = self.get_role_function(role, lineno) + role_function, canonical, messages = self.get_role_function(role, + lineno) if role_function: - nodelist, messages2 = role_function(role, rawsource, text, lineno) + nodelist, messages2 = role_function(canonical, rawsource, text, + lineno) messages.extend(messages2) return before, nodelist, after, messages else: @@ -888,34 +904,34 @@ class Inliner: msg_text = [] if role: name = role.lower() - canonical = None - try: - canonical = self.language.roles[name] - except AttributeError, error: - msg_text.append('Problem retrieving role entry from language ' - 'module %r: %s.' % (self.language, error)) - except KeyError: - msg_text.append('No role entry for "%s" in module "%s".' - % (role, self.language.__name__)) - if not canonical: - try: - canonical = _fallback_language_module.roles[name] - msg_text.append('Using English fallback for role "%s".' - % role) - except KeyError: - msg_text.append('Trying "%s" as canonical role name.' - % role) - # Should be an English name, but just in case: - canonical = name - if msg_text: - message = self.reporter.info('\n'.join(msg_text), line=lineno) - messages.append(message) + else: + name = self.default_interpreted_role + canonical = None + try: + canonical = self.language.roles[name] + except AttributeError, error: + msg_text.append('Problem retrieving role entry from language ' + 'module %r: %s.' % (self.language, error)) + except KeyError: + msg_text.append('No role entry for "%s" in module "%s".' + % (name, self.language.__name__)) + if not canonical: try: - return self.interpreted_roles[canonical], messages + canonical = _fallback_language_module.roles[name] + msg_text.append('Using English fallback for role "%s".' + % name) except KeyError: - raise UnknownInterpretedRoleError(messages) - else: - return self.interpreted_roles[self.default_interpreted_role], [] + msg_text.append('Trying "%s" as canonical role name.' + % name) + # Should be an English name, but just in case: + canonical = name + if msg_text: + message = self.reporter.info('\n'.join(msg_text), line=lineno) + messages.append(message) + try: + return self.interpreted_roles[canonical], canonical, messages + except KeyError: + raise UnknownInterpretedRoleError(messages) def literal(self, match, lineno): before, inlines, remaining, sysmessages, endstring = self.inline_obj( @@ -1083,8 +1099,15 @@ class Inliner: '_': reference, '__': anonymous_reference} - def title_reference_role(self, role, rawtext, text, lineno): - return [nodes.title_reference(rawtext, text)], [] + def generic_interpreted_role(self, role, rawtext, text, lineno): + try: + role_class = self.generic_roles[role] + except KeyError: + msg = self.reporter.error('Unknown interpreted text role: "%s".' + % role, line=lineno) + prb = self.problematic(text, text, msg) + return [prb], [msg] + return [role_class(rawtext, text)], [] def pep_reference_role(self, role, rawtext, text, lineno): try: -- cgit v1.2.1 From 0cf867bc975f087ce49b13340e925ff9311b4f8a Mon Sep 17 00:00:00 2001 From: goodger Date: Mon, 9 Jun 2003 15:06:34 +0000 Subject: Support for "--trim-footnote-reference-space" option. Optional space before colons in directives & hyperlink targets. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1404 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 4a4da3478..296bc57b4 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -980,6 +980,9 @@ class Inliner: """ label = match.group('footnotelabel') refname = normalize_name(label) + string = match.string + before = string[:match.start('whole')] + remaining = string[match.end('whole'):] if match.group('citationlabel'): refnode = nodes.citation_reference('[%s]_' % label, refname=refname) @@ -1001,10 +1004,9 @@ class Inliner: if refname: refnode['refname'] = refname self.document.note_footnote_ref(refnode) - string = match.string - matchstart = match.start('whole') - matchend = match.end('whole') - return (string[:matchstart], [refnode], string[matchend:], []) + if self.document.settings.trim_footnote_reference_space: + before = before.rstrip() + return (before, [refnode], remaining, []) def reference(self, match, lineno, anonymous=None): referencename = match.group('refname') @@ -1768,6 +1770,7 @@ class Body(RSTState): (?P=quote) # close quote if open quote used ) %(non_whitespace_escape_before)s + [ ]? # optional space : # end of reference name ([ ]+|$) # followed by whitespace """ % vars(Inliner), re.VERBOSE), @@ -2188,6 +2191,7 @@ class Body(RSTState): re.compile(r""" \.\.[ ]+ # explicit markup start (%s) # directive name + [ ]? # optional space :: # directive delimiter ([ ]+|$) # whitespace or end of line """ % Inliner.simplename, re.VERBOSE | re.UNICODE))] -- cgit v1.2.1 From 4d5856ed0b53ed121be993e520271fc0e4a1ce55 Mon Sep 17 00:00:00 2001 From: goodger Date: Mon, 16 Jun 2003 03:26:53 +0000 Subject: updated git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1470 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 296bc57b4..2fc1f8866 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -107,8 +107,9 @@ __docformat__ = 'reStructuredText' import sys import re +import roman from types import TupleType -from docutils import nodes, statemachine, utils, roman, urischemes +from docutils import nodes, statemachine, utils, urischemes from docutils import ApplicationError, DataError from docutils.statemachine import StateMachineWS, StateWS from docutils.nodes import fully_normalize_name as normalize_name -- cgit v1.2.1 From 5a8e95129e0a2603d6b711b5d22b060d071e2021 Mon Sep 17 00:00:00 2001 From: goodger Date: Sun, 29 Jun 2003 15:39:51 +0000 Subject: improved error message; suggestion by Roman Suzi git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1527 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 2fc1f8866..c8d04325f 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -2823,7 +2823,7 @@ class Line(SpecializedText): self.short_overline(context, blocktext, lineno, 2) else: msg = self.reporter.severe( - 'Missing underline for overline.', + 'Missing matching underline for section title overline.', nodes.literal_block(source, source), line=lineno) self.parent += msg return [], 'Body', [] -- cgit v1.2.1 From 2313cb61ce61101b1d6bf7152b273b78196e4bcc Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 5 Jul 2003 19:38:11 +0000 Subject: updated ``RSTState.nested_parse`` for "include" in table cells, but not sure of correctness git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1569 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index c8d04325f..dce993f89 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -273,8 +273,13 @@ class RSTState(StateWS): node=node, match_titles=match_titles) state_machine.unlink() new_offset = state_machine.abs_line_offset() - # Adjustment for block if modified in nested parse: - self.state_machine.next_line(len(block) - block_length) + try: + # Adjustment for block if modified in nested parse: + self.state_machine.next_line(len(block) - block_length) + except EOFError: + # @@@ This accommodates "include" directives in table cells, + # but I'm not sure it's the correct solution. + pass return new_offset def nested_list_parse(self, block, input_offset, node, initial_state, @@ -1286,8 +1291,8 @@ class Body(RSTState): break if blank: a_lines = indented[blank + 1:] - a_lines.strip_indent(match.end(), end=1) - a_lines.strip_indent(indent, start=1) + a_lines.trim_left(match.end(), end=1) + a_lines.trim_left(indent, start=1) return (indented[:blank], a_lines, line_offset + blank + 1) else: return (indented, None, None) -- cgit v1.2.1 From 58f7621cb2d8e18bfebf255eafc705c5a12c3fae Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 5 Jul 2003 22:38:28 +0000 Subject: fixed table cell line sync problem ("include" directive in cells) git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1574 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index dce993f89..6c759367c 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -273,13 +273,10 @@ class RSTState(StateWS): node=node, match_titles=match_titles) state_machine.unlink() new_offset = state_machine.abs_line_offset() - try: + # No `block.parent` implies disconnected -- lines aren't in sync: + if block.parent: # Adjustment for block if modified in nested parse: self.state_machine.next_line(len(block) - block_length) - except EOFError: - # @@@ This accommodates "include" directives in table cells, - # but I'm not sure it's the correct solution. - pass return new_offset def nested_list_parse(self, block, input_offset, node, initial_state, -- cgit v1.2.1 From 180fb329f6e70b36a04e3ae54238964eec9adf44 Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 11 Jul 2003 02:05:37 +0000 Subject: Allowed true em-dash character as block quote attribution marker. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1584 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 6c759367c..216ea8da2 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1254,7 +1254,8 @@ class Body(RSTState): blockquote += attribution return blockquote, messages - attribution_pattern = re.compile(r'--(?![-\n]) *(?=[^ \n])') + # u'\u2014' is an em-dash: + attribution_pattern = re.compile(ur'(--|\u2014)(?![-\n]) *(?=[^ \n])') def check_attribution(self, indented, line_offset): """ -- cgit v1.2.1 From a3418f12380f5ca855453fa104445fcb52ed396c Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 12 Jul 2003 18:59:30 +0000 Subject: allow "---" for block quote attributions too git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1588 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 216ea8da2..6666366b6 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1255,7 +1255,7 @@ class Body(RSTState): return blockquote, messages # u'\u2014' is an em-dash: - attribution_pattern = re.compile(ur'(--|\u2014)(?![-\n]) *(?=[^ \n])') + attribution_pattern = re.compile(ur'(---?(?!-)|\u2014) *(?=[^ \n])') def check_attribution(self, indented, line_offset): """ -- cgit v1.2.1 From 296b45ebc25622c11469a55f08ecba9b0b3d8f5e Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 19 Sep 2003 03:01:44 +0000 Subject: Added support for complex option arguments (option lists). git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1695 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 6666366b6..bb84261ae 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1198,8 +1198,10 @@ class Body(RSTState): '|%(upperroman)s)' % enum.sequencepats) pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats # @@@ Loosen up the pattern? Allow Unicode? - pats['optarg'] = '%(alpha)s%(alphanumplus)s*' % pats - pats['option'] = r'(--?|\+|/)%(optname)s([ =]%(optarg)s)?' % pats + pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<%(alphanum)s[^ <>]+>)' % pats + pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats + pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats + pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats for format in enum.formats: pats[format] = '(?P<%s>%s%s%s)' % ( @@ -1572,6 +1574,12 @@ class Body(RSTState): if len(firstopt) > 1: tokens[:1] = firstopt delimiter = '=' + elif (len(tokens[0]) > 2 + and ((tokens[0].startswith('-') + and not tokens[0].startswith('--')) + or tokens[0].startswith('+'))): + tokens[:1] = [tokens[0][:2], tokens[0][2:]] + delimiter = '' if 0 < len(tokens) <= 2: option = nodes.option(optionstring) option += nodes.option_string(tokens[0], tokens[0]) -- cgit v1.2.1 From db535a208f73d2fc6af3887c5cb8d0030558213d Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 20 Sep 2003 17:22:11 +0000 Subject: Fixed handling of backslashes in substitution definitions. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1696 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index bb84261ae..82ae15791 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1936,9 +1936,7 @@ class Body(RSTState): strip_indent=0) blocktext = (match.string[:match.end()] + '\n'.join(block)) block.disconnect() - for i in range(len(block)): - block[i] = escape2null(block[i]) - escaped = block[0].rstrip() + escaped = escape2null(block[0].rstrip()) blockindex = 0 while 1: subdefmatch = pattern.match(escaped) @@ -1946,7 +1944,7 @@ class Body(RSTState): break blockindex += 1 try: - escaped = escaped + ' ' + block[blockindex].strip() + escaped = escaped + ' ' + escape2null(block[blockindex].strip()) except IndexError: raise MarkupError('malformed substitution definition.', lineno) -- cgit v1.2.1 From a64ff7ddc4a8637fd9dda75947e93c005ee4a2de Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 7 Nov 2003 14:48:26 +0000 Subject: fixed off-by-1 error with extra whitespace after substitution definition directive git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1733 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 82ae15791..d85ad8dae 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1949,7 +1949,7 @@ class Body(RSTState): raise MarkupError('malformed substitution definition.', lineno) del block[:blockindex] # strip out the substitution marker - block[0] = (block[0] + ' ')[subdefmatch.end()-len(escaped)-1:].strip() + block[0] = (block[0].strip() + ' ')[subdefmatch.end()-len(escaped)-1:-1] if not block[0]: del block[0] offset += 1 -- cgit v1.2.1 From c5e249ae47c1c7b96367b911b7aa7817ba85c797 Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 19 Dec 2003 14:30:13 +0000 Subject: Added inline markup parsing to field lists' field names git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1771 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index d85ad8dae..be0ab922d 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1494,8 +1494,9 @@ class Body(RSTState): self.state_machine.get_first_known_indented(match.end()) fieldnode = nodes.field() fieldnode.line = lineno - fieldnode += nodes.field_name(name, name) - fieldbody = nodes.field_body('\n'.join(indented)) + name_nodes, name_messages = self.inline_text(name, lineno) + fieldnode += nodes.field_name(name, '', *name_nodes) + fieldbody = nodes.field_body('\n'.join(indented), *name_messages) fieldnode += fieldbody if indented: self.parse_field_body(indented, line_offset, fieldbody) -- cgit v1.2.1 From 19c14c68f9ae90da3cc2595df2eb74a9f56a9115 Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 19 Dec 2003 14:35:47 +0000 Subject: improved some naming git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1772 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index be0ab922d..0d4e6ab03 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1472,15 +1472,15 @@ class Body(RSTState): def field_marker(self, match, context, next_state): """Field list item.""" - fieldlist = nodes.field_list() - self.parent += fieldlist + field_list = nodes.field_list() + self.parent += field_list field, blank_finish = self.field(match) - fieldlist += field + field_list += field offset = self.state_machine.line_offset + 1 # next line newline_offset, blank_finish = self.nested_list_parse( self.state_machine.input_lines[offset:], input_offset=self.state_machine.abs_line_offset() + 1, - node=fieldlist, initial_state='FieldList', + node=field_list, initial_state='FieldList', blank_finish=blank_finish) self.goto_line(newline_offset) if not blank_finish: @@ -1492,15 +1492,15 @@ class Body(RSTState): lineno = self.state_machine.abs_line_number() indented, indent, line_offset, blank_finish = \ self.state_machine.get_first_known_indented(match.end()) - fieldnode = nodes.field() - fieldnode.line = lineno + field_node = nodes.field() + field_node.line = lineno name_nodes, name_messages = self.inline_text(name, lineno) - fieldnode += nodes.field_name(name, '', *name_nodes) - fieldbody = nodes.field_body('\n'.join(indented), *name_messages) - fieldnode += fieldbody + field_node += nodes.field_name(name, '', *name_nodes) + field_body = nodes.field_body('\n'.join(indented), *name_messages) + field_node += field_body if indented: - self.parse_field_body(indented, line_offset, fieldbody) - return fieldnode, blank_finish + self.parse_field_body(indented, line_offset, field_body) + return field_node, blank_finish def parse_field_marker(self, match): """Extract & return field name from a field marker match.""" -- cgit v1.2.1 From f74494c19b8a9de8a1bb4d6c2acce694b24d802b Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 20 Dec 2003 18:48:23 +0000 Subject: Added support for quoted (and unindented) literal blocks. Driven in part by a bribe from Frank Siebenlist (thanks!). git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1773 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 102 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 91 insertions(+), 11 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 0d4e6ab03..571fe8f42 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -274,7 +274,7 @@ class RSTState(StateWS): state_machine.unlink() new_offset = state_machine.abs_line_offset() # No `block.parent` implies disconnected -- lines aren't in sync: - if block.parent: + if block.parent and (len(block) - block_length) != 0: # Adjustment for block if modified in nested parse: self.state_machine.next_line(len(block) - block_length) return new_offset @@ -2661,20 +2661,29 @@ class Text(RSTState): """Return a list of nodes.""" indented, indent, offset, blank_finish = \ self.state_machine.get_indented() - nodelist = [] while indented and not indented[-1].strip(): indented.trim_end() - if indented: - data = '\n'.join(indented) - nodelist.append(nodes.literal_block(data, data)) - if not blank_finish: - nodelist.append(self.unindent_warning('Literal block')) - else: - nodelist.append(self.reporter.warning( - 'Literal block expected; none found.', - line=self.state_machine.abs_line_number())) + if not indented: + return self.quoted_literal_block() + nodelist = [] + data = '\n'.join(indented) + nodelist.append(nodes.literal_block(data, data)) + if not blank_finish: + nodelist.append(self.unindent_warning('Literal block')) return nodelist + def quoted_literal_block(self): + abs_line_offset = self.state_machine.abs_line_offset() + offset = self.state_machine.line_offset + parent_node = nodes.Element() + new_abs_offset = self.nested_parse( + self.state_machine.input_lines[offset:], + input_offset=abs_line_offset, node=parent_node, match_titles=0, + state_machine_kwargs={'state_classes': (QuotedLiteralBlock,), + 'initial_state': 'QuotedLiteralBlock'}) + self.goto_line(new_abs_offset) + return parent_node.children + def definition_list_item(self, termline): indented, indent, line_offset, blank_finish = \ self.state_machine.get_indented() @@ -2891,6 +2900,77 @@ class Line(SpecializedText): raise statemachine.StateCorrection('Body', 'text') +class QuotedLiteralBlock(RSTState): + + """ + Nested parse handler for quoted (unindented) literal blocks. + + Special-purpose. Not for inclusion in `state_classes`. + """ + + patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats, + 'text': r''} + initial_transitions = ('initial_quoted', 'text') + + def __init__(self, state_machine, debug=0): + RSTState.__init__(self, state_machine, debug) + self.messages = [] + self.initial_lineno = None + + def blank(self, match, context, next_state): + if context: + raise EOFError + else: + return context, next_state, [] + + def eof(self, context): + if context: + text = '\n'.join(context) + literal_block = nodes.literal_block(text, text) + literal_block.line = self.initial_lineno + self.parent += literal_block + else: + self.parent += self.reporter.warning( + 'Literal block expected; none found.', + line=self.state_machine.abs_line_number()) + self.state_machine.previous_line() + self.parent += self.messages + return [] + + def indent(self, match, context, next_state): + assert context, ('QuotedLiteralBlock.indent: context should not ' + 'be empty!') + self.messages.append( + self.reporter.error('Unexpected indentation.', + line=self.state_machine.abs_line_number())) + self.state_machine.previous_line() + raise EOFError + + def initial_quoted(self, match, context, next_state): + """Match arbitrary quote character on the first line only.""" + self.remove_transition('initial_quoted') + quote = match.string[0] + pattern = re.compile(re.escape(quote)) + # New transition matches consistent quotes only: + self.add_transition('quoted', + (pattern, self.quoted, self.__class__.__name__)) + self.initial_lineno = self.state_machine.abs_line_number() + return [match.string], next_state, [] + + def quoted(self, match, context, next_state): + """Match consistent quotes on subsequent lines.""" + context.append(match.string) + return context, next_state, [] + + def text(self, match, context, next_state): + if context: + self.messages.append( + self.reporter.error('Inconsistent literal block quoting.', + line=self.state_machine.abs_line_number())) + self.state_machine.previous_line() + raise EOFError + + state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList, OptionList, ExtensionOptions, Explicit, Text, Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List) -- cgit v1.2.1 From 1a9d3d2ed89b62a7ed5126355e3f11b70ed1e5c1 Mon Sep 17 00:00:00 2001 From: goodger Date: Wed, 24 Dec 2003 15:05:18 +0000 Subject: Upon reviewing RFC 2396, I see that asterisks are valid URL characters, sometimes actually used. There's a conflict with emphasis, but backslash escapes should overcome that (they didn't). This fixes a bug in the parser that escaped asterisks in URLs weren't recognized. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1778 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 571fe8f42..1c9b65f20 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -601,11 +601,12 @@ class Inliner: non_whitespace_after = r'(?![ \n])' # Alphanumerics with isolated internal [-._] chars (i.e. not 2 together): simplename = r'(?:(?!_)\w)+(?:[-._](?:(?!_)\w)+)*' - # Valid URI characters (see RFC 2396 & RFC 2732): - uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9]""" + # Valid URI characters (see RFC 2396 & RFC 2732); + # final \x00 allows backslash escapes in URIs: + uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]""" # Last URI character; same as uric but no punctuation: urilast = r"""[_~/a-zA-Z0-9]""" - emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9]""" + emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]""" email_pattern = r""" %(emailc)s+(?:\.%(emailc)s+)* # name @ # at -- cgit v1.2.1 From 4e174f882eb88ad4fd1e41e785a5c21e347e0c53 Mon Sep 17 00:00:00 2001 From: goodger Date: Wed, 24 Dec 2003 15:51:38 +0000 Subject: Made embedded-URIs' reference text omittable. Idea from Beni Cherniavsky. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1781 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 1c9b65f20..9e048099d 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -660,10 +660,10 @@ class Inliner: embedded_uri=re.compile( r""" ( - [ \n]+ # spaces or beginning of line + (?:[ \n]+|^) # spaces or beginning of line/string < # open bracket %(non_whitespace_after)s - ([^<>\0]+) # anything but angle brackets & nulls + ([^<>\x00]+) # anything but angle brackets & nulls %(non_whitespace_before)s > # close bracket w/o whitespace before ) @@ -862,6 +862,8 @@ class Inliner: target = nodes.target(match.group(1), refuri=uri) else: raise ApplicationError('problem with URI: %r' % uri_text) + if not text: + text = uri else: target = None refname = normalize_name(text) -- cgit v1.2.1 From 69e7ed1f66cc19205efaa870de5f76a30fdc09c4 Mon Sep 17 00:00:00 2001 From: goodger Date: Sun, 4 Jan 2004 17:34:17 +0000 Subject: Refactored explicit target processing code git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1790 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 85 ++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 45 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 9e048099d..26f55c566 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1880,30 +1880,48 @@ class Body(RSTState): raise MarkupError('malformed hyperlink target.', lineno) del block[:blockindex] block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip() + target = self.make_target(block, blocktext, lineno, + targetmatch.group('name')) + return [target], blank_finish + + def make_target(self, block, block_text, lineno, target_name): + target_type, data = self.parse_target(block, block_text, lineno) + if target_type == 'refname': + target = nodes.target(block_text, '', refname=data) + self.add_target(target_name, '', target, lineno) + self.document.note_indirect_target(target) + return target + elif target_type == 'refuri': + target = nodes.target(block_text, '') + self.add_target(target_name, data, target, lineno) + return target + else: + return data + + def parse_target(self, block, block_text, lineno): + """ + Determine the type of reference of a target. + + :Return: A 2-tuple, one of: + + - 'refname' and the indirect reference name + - 'refuri' and the URI + - 'malformed' and a system_message node + """ if block and block[-1].strip()[-1:] == '_': # possible indirect target reference = ' '.join([line.strip() for line in block]) refname = self.is_reference(reference) if refname: - target = nodes.target(blocktext, '', refname=refname) - target.line = lineno - self.add_target(targetmatch.group('name'), '', target) - self.document.note_indirect_target(target) - return [target], blank_finish - nodelist = [] + return 'refname', refname reference = ''.join([line.strip() for line in block]) - if reference.find(' ') != -1: + if reference.find(' ') == -1: + return 'refuri', unescape(reference) + else: warning = self.reporter.warning( 'Hyperlink target contains whitespace. Perhaps a footnote ' 'was intended?', - nodes.literal_block(blocktext, blocktext), line=lineno) - nodelist.append(warning) - else: - unescaped = unescape(reference) - target = nodes.target(blocktext, '') - target.line = lineno - self.add_target(targetmatch.group('name'), unescaped, target) - nodelist.append(target) - return nodelist, blank_finish + nodes.literal_block(block_text, block_text), line=lineno) + return 'malformed', warning def is_reference(self, reference): match = self.explicit.patterns.reference.match( @@ -1912,7 +1930,8 @@ class Body(RSTState): return None return unescape(match.group('simple') or match.group('phrase')) - def add_target(self, targetname, refuri, target): + def add_target(self, targetname, refuri, target, lineno): + target.line = lineno if targetname: name = normalize_name(unescape(targetname)) target['name'] = name @@ -2255,38 +2274,14 @@ class Body(RSTState): return [], next_state, [] def anonymous_target(self, match): + lineno = self.state_machine.abs_line_number() block, indent, offset, blank_finish \ = self.state_machine.get_first_known_indented(match.end(), until_blank=1) blocktext = match.string[:match.end()] + '\n'.join(block) - if block and block[-1].strip()[-1:] == '_': # possible indirect target - reference = escape2null(' '.join([line.strip() - for line in block])) - refname = self.is_reference(reference) - if refname: - target = nodes.target(blocktext, '', refname=refname, - anonymous=1) - self.document.note_anonymous_target(target) - self.document.note_indirect_target(target) - return [target], blank_finish - nodelist = [] - reference = escape2null(''.join([line.strip() for line in block])) - if reference.find(' ') != -1: - lineno = self.state_machine.abs_line_number() - len(block) + 1 - warning = self.reporter.warning( - 'Anonymous hyperlink target contains whitespace. Perhaps a ' - 'footnote was intended?', - nodes.literal_block(blocktext, blocktext), - line=lineno) - nodelist.append(warning) - else: - target = nodes.target(blocktext, '', anonymous=1) - if reference: - unescaped = unescape(reference) - target['refuri'] = unescaped - self.document.note_anonymous_target(target) - nodelist.append(target) - return nodelist, blank_finish + block = [escape2null(line) for line in block] + target = self.make_target(block, blocktext, lineno, '') + return [target], blank_finish def line(self, match, context, next_state): """Section title overline or transition marker.""" -- cgit v1.2.1 From 7aca2d56b7ae2bea193e91efd1531204d53e0195 Mon Sep 17 00:00:00 2001 From: goodger Date: Wed, 14 Jan 2004 16:07:11 +0000 Subject: clarified system message git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1801 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 26f55c566..252a38f37 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -2695,8 +2695,8 @@ class Text(RSTState): definitionlistitem += definition if termline[0][-2:] == '::': definition += self.reporter.info( - 'Blank line missing before literal block? Interpreted as a ' - 'definition list item.', line=line_offset + 1) + 'Blank line missing before literal block (after the "::")? ' + 'Interpreted as a definition list item.', line=line_offset+1) self.nested_parse(indented, input_offset=line_offset, node=definition) return definitionlistitem, blank_finish -- cgit v1.2.1 From 00ee5f95c731a0e30a3a2504ce61561b9674dd7b Mon Sep 17 00:00:00 2001 From: mmgilbe Date: Tue, 23 Mar 2004 23:43:54 +0000 Subject: Verifying that external targets are truly targets and not indirect references. This is because we are now adding a "name" attribute to references in addition to targets. Note sure if this is correct! git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1882 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 252a38f37..9b77d5d49 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -113,6 +113,7 @@ from docutils import nodes, statemachine, utils, urischemes from docutils import ApplicationError, DataError from docutils.statemachine import StateMachineWS, StateWS from docutils.nodes import fully_normalize_name as normalize_name +from docutils.nodes import whitespace_normalize_name from docutils.parsers.rst import directives, languages, tableparser from docutils.parsers.rst.languages import en as _fallback_language_module @@ -867,7 +868,8 @@ class Inliner: else: target = None refname = normalize_name(text) - reference = nodes.reference(rawsource, text) + reference = nodes.reference(rawsource, text, + name=whitespace_normalize_name(text)) node_list = [reference] if rawsource[-2:] == '__': if target: @@ -1017,8 +1019,9 @@ class Inliner: def reference(self, match, lineno, anonymous=None): referencename = match.group('refname') refname = normalize_name(referencename) - referencenode = nodes.reference(referencename + match.group('refend'), - referencename) + referencenode = nodes.reference( + referencename + match.group('refend'), referencename, + name=whitespace_normalize_name(referencename)) if anonymous: referencenode['anonymous'] = 1 self.document.note_anonymous_ref(referencenode) -- cgit v1.2.1 From 06305ebeb5bb98b2be5f754084ac12cefc58719e Mon Sep 17 00:00:00 2001 From: mmgilbe Date: Wed, 24 Mar 2004 16:53:29 +0000 Subject: Added "origuri" attribute to references which contain an embedded uri. Some writers (maybe other transforms) are interested in the original (non normalized) uri text. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1888 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 9b77d5d49..9a52cafca 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -854,6 +854,7 @@ class Inliner: def phrase_ref(self, before, after, rawsource, escaped, text): match = self.patterns.embedded_uri.search(escaped) + uri_text = None if match: text = unescape(escaped[:match.start(0)]) uri_text = match.group(2) @@ -868,8 +869,15 @@ class Inliner: else: target = None refname = normalize_name(text) - reference = nodes.reference(rawsource, text, - name=whitespace_normalize_name(text)) + # Only add origuri attribute if the rawsource does contain an + # embedded_uri + if uri_text: + reference = nodes.reference(rawsource, text, + name=whitespace_normalize_name(text), + origuri=uri_text) + else: + reference = nodes.reference(rawsource, text, + name=whitespace_normalize_name(text)) node_list = [reference] if rawsource[-2:] == '__': if target: -- cgit v1.2.1 From f9ce4af77d798fd979eee78a8d31d05783f6f6f3 Mon Sep 17 00:00:00 2001 From: mmgilbe Date: Thu, 25 Mar 2004 23:01:11 +0000 Subject: Added unknown_reference_resolvers list for each transformer. This list holds the list of functions provided by each component of the transformer that help resolve references. Removed all refernces to the origuri code, which was broken. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1894 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 9a52cafca..d1c63f7f1 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -854,7 +854,6 @@ class Inliner: def phrase_ref(self, before, after, rawsource, escaped, text): match = self.patterns.embedded_uri.search(escaped) - uri_text = None if match: text = unescape(escaped[:match.start(0)]) uri_text = match.group(2) @@ -869,15 +868,8 @@ class Inliner: else: target = None refname = normalize_name(text) - # Only add origuri attribute if the rawsource does contain an - # embedded_uri - if uri_text: - reference = nodes.reference(rawsource, text, - name=whitespace_normalize_name(text), - origuri=uri_text) - else: - reference = nodes.reference(rawsource, text, - name=whitespace_normalize_name(text)) + reference = nodes.reference(rawsource, text, + name=whitespace_normalize_name(text)) node_list = [reference] if rawsource[-2:] == '__': if target: @@ -1898,7 +1890,8 @@ class Body(RSTState): def make_target(self, block, block_text, lineno, target_name): target_type, data = self.parse_target(block, block_text, lineno) if target_type == 'refname': - target = nodes.target(block_text, '', refname=data) + target = nodes.target(block_text, '', refname=normalize_name(data)) + target.indirect_reference_name = data self.add_target(target_name, '', target, lineno) self.document.note_indirect_target(target) return target @@ -1936,7 +1929,7 @@ class Body(RSTState): def is_reference(self, reference): match = self.explicit.patterns.reference.match( - normalize_name(reference)) + whitespace_normalize_name(reference)) if not match: return None return unescape(match.group('simple') or match.group('phrase')) -- cgit v1.2.1 From d8c8903dfb10818aad93e126a7541877a774b5cd Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 8 Apr 2004 01:58:27 +0000 Subject: Fixed bug relating to role-less interpreted text in non-English contexts. Reported by Lele Gaifax. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1916 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index d1c63f7f1..065eb6ef3 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -912,17 +912,17 @@ class Inliner: msg_text = [] if role: name = role.lower() + canonical = None + try: + canonical = self.language.roles[name] + except AttributeError, error: + msg_text.append('Problem retrieving role entry from language ' + 'module %r: %s.' % (self.language, error)) + except KeyError: + msg_text.append('No role entry for "%s" in module "%s".' + % (name, self.language.__name__)) else: - name = self.default_interpreted_role - canonical = None - try: - canonical = self.language.roles[name] - except AttributeError, error: - msg_text.append('Problem retrieving role entry from language ' - 'module %r: %s.' % (self.language, error)) - except KeyError: - msg_text.append('No role entry for "%s" in module "%s".' - % (name, self.language.__name__)) + canonical = self.default_interpreted_role if not canonical: try: canonical = _fallback_language_module.roles[name] -- cgit v1.2.1 From bee6561efe398179e59d465218a0aaefc9bf024b Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 16 Apr 2004 15:14:57 +0000 Subject: Reorganized interpreted text processing; moved code into the new roles.py module. roles.py contains interpreted text role functions, a registry for interpreted text roles, and an API for adding to and retrieving from the registry. Contributed by Edward Loper. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1943 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 169 +++++------------------------------------ 1 file changed, 18 insertions(+), 151 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 065eb6ef3..01075288f 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -114,7 +114,7 @@ from docutils import ApplicationError, DataError from docutils.statemachine import StateMachineWS, StateWS from docutils.nodes import fully_normalize_name as normalize_name from docutils.nodes import whitespace_normalize_name -from docutils.parsers.rst import directives, languages, tableparser +from docutils.parsers.rst import directives, languages, tableparser, roles from docutils.parsers.rst.languages import en as _fallback_language_module @@ -475,47 +475,10 @@ class Inliner: """ Parse inline markup; call the `parse()` method. """ - - _interpreted_roles = { - # Values of ``None`` mean "not implemented yet": - 'title-reference': 'generic_interpreted_role', - 'abbreviation': 'generic_interpreted_role', - 'acronym': 'generic_interpreted_role', - 'index': None, - 'subscript': 'generic_interpreted_role', - 'superscript': 'generic_interpreted_role', - 'emphasis': 'generic_interpreted_role', - 'strong': 'generic_interpreted_role', - 'literal': 'generic_interpreted_role', - 'named-reference': None, - 'anonymous-reference': None, - 'uri-reference': None, - 'pep-reference': 'pep_reference_role', - 'rfc-reference': 'rfc_reference_role', - 'footnote-reference': None, - 'citation-reference': None, - 'substitution-reference': None, - 'target': None, - 'restructuredtext-unimplemented-role': None} - """Mapping of canonical interpreted text role name to method name. - Initializes a name to bound-method mapping in `__init__`.""" - default_interpreted_role = 'title-reference' """The role to use when no explicit role is given. Override in subclasses.""" - generic_roles = {'abbreviation': nodes.abbreviation, - 'acronym': nodes.acronym, - 'emphasis': nodes.emphasis, - 'literal': nodes.literal, - 'strong': nodes.strong, - 'subscript': nodes.subscript, - 'superscript': nodes.superscript, - 'title-reference': nodes.title_reference,} - """Mapping of canonical interpreted text role name to node class. - Used by the `generic_interpreted_role` method for simple, straightforward - roles (simple wrapping; no extra processing).""" - def __init__(self, roles=None): """ `roles` is a mapping of canonical role name to role function or bound @@ -526,17 +489,6 @@ class Inliner: """List of (pattern, bound method) tuples, used by `self.implicit_inline`.""" - self.interpreted_roles = {} - """Mapping of canonical role name to role function or bound method. - Items removed from this mapping will be disabled.""" - - for canonical, method in self._interpreted_roles.items(): - if method: - self.interpreted_roles[canonical] = getattr(self, method) - else: - self.interpreted_roles[canonical] = None - self.interpreted_roles.update(roles or {}) - def init_customizations(self, settings): """Setting-based customizations; run when parsing begins.""" if settings.pep_references: @@ -825,26 +777,11 @@ class Inliner: return self.phrase_ref(string[:matchstart], string[textend:], rawsource, escaped, text) else: - try: - return self.interpreted( - string[:rolestart], string[textend:], - rawsource, text, role, lineno) - except UnknownInterpretedRoleError, detail: - msg = self.reporter.error( - 'Unknown interpreted text role "%s".' % role, - line=lineno) - text = unescape(string[rolestart:textend], 1) - prb = self.problematic(text, text, msg) - return (string[:rolestart], [prb], string[textend:], - detail.args[0] + [msg]) - except InterpretedRoleNotImplementedError, detail: - msg = self.reporter.error( - 'Interpreted text role "%s" not implemented.' % role, - line=lineno) - text = unescape(string[rolestart:textend], 1) - prb = self.problematic(text, text, msg) - return (string[:rolestart], [prb], string[textend:], - detail.args[0] + [msg]) + rawsource = unescape(string[rolestart:textend], 1) + nodelist, messages = self.interpreted(rawsource, text, role, + lineno) + return (string[:rolestart], nodelist, + string[textend:], messages) msg = self.reporter.warning( 'Inline interpreted text or phrase reference start-string ' 'without end-string.', line=lineno) @@ -896,50 +833,19 @@ class Inliner: else: return uri - def interpreted(self, before, after, rawsource, text, role, lineno): - role_function, canonical, messages = self.get_role_function(role, - lineno) - if role_function: - nodelist, messages2 = role_function(canonical, rawsource, text, - lineno) - messages.extend(messages2) - return before, nodelist, after, messages + def interpreted(self, rawsource, text, role, lineno): + if not role: + role = roles.DEFAULT_INTERPRETED_ROLE + role_fn, messages = roles.role(role, self.language, lineno, self) + if role_fn: + nodes, messages2 = role_fn(role, rawsource, text, lineno, self) + return nodes, messages + messages2 else: - raise InterpretedRoleNotImplementedError(messages) - - def get_role_function(self, role, lineno): - messages = [] - msg_text = [] - if role: - name = role.lower() - canonical = None - try: - canonical = self.language.roles[name] - except AttributeError, error: - msg_text.append('Problem retrieving role entry from language ' - 'module %r: %s.' % (self.language, error)) - except KeyError: - msg_text.append('No role entry for "%s" in module "%s".' - % (name, self.language.__name__)) - else: - canonical = self.default_interpreted_role - if not canonical: - try: - canonical = _fallback_language_module.roles[name] - msg_text.append('Using English fallback for role "%s".' - % name) - except KeyError: - msg_text.append('Trying "%s" as canonical role name.' - % name) - # Should be an English name, but just in case: - canonical = name - if msg_text: - message = self.reporter.info('\n'.join(msg_text), line=lineno) - messages.append(message) - try: - return self.interpreted_roles[canonical], canonical, messages - except KeyError: - raise UnknownInterpretedRoleError(messages) + msg = self.reporter.error( + 'Unknown interpreted text role "%s".' % role, + line=lineno) + return ([self.problematic(rawsource, rawsource, msg)], + messages + [msg]) def literal(self, match, lineno): before, inlines, remaining, sysmessages, endstring = self.inline_obj( @@ -1110,45 +1016,6 @@ class Inliner: '_': reference, '__': anonymous_reference} - def generic_interpreted_role(self, role, rawtext, text, lineno): - try: - role_class = self.generic_roles[role] - except KeyError: - msg = self.reporter.error('Unknown interpreted text role: "%s".' - % role, line=lineno) - prb = self.problematic(text, text, msg) - return [prb], [msg] - return [role_class(rawtext, text)], [] - - def pep_reference_role(self, role, rawtext, text, lineno): - try: - pepnum = int(text) - if pepnum < 0 or pepnum > 9999: - raise ValueError - except ValueError: - msg = self.reporter.error( - 'PEP number must be a number from 0 to 9999; "%s" is invalid.' - % text, line=lineno) - prb = self.problematic(text, text, msg) - return [prb], [msg] - ref = self.pep_url % pepnum - return [nodes.reference(rawtext, 'PEP ' + text, refuri=ref)], [] - - def rfc_reference_role(self, role, rawtext, text, lineno): - try: - rfcnum = int(text) - if rfcnum <= 0: - raise ValueError - except ValueError: - msg = self.reporter.error( - 'RFC number must be a number greater than or equal to 1; ' - '"%s" is invalid.' % text, line=lineno) - prb = self.problematic(text, text, msg) - return [prb], [msg] - ref = self.rfc_url % rfcnum - return [nodes.reference(rawtext, 'RFC ' + text, refuri=ref)], [] - - class Body(RSTState): """ -- cgit v1.2.1 From 3abac3279f845005f6313a8b0d16e14b6c8f8758 Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 16 Apr 2004 15:38:21 +0000 Subject: Moved default interpreted text role processing to roles.py (canonical name, not language-dependent). Plus docstrings, whitespace. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1946 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 01075288f..5d7c463dc 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -475,9 +475,6 @@ class Inliner: """ Parse inline markup; call the `parse()` method. """ - default_interpreted_role = 'title-reference' - """The role to use when no explicit role is given. - Override in subclasses.""" def __init__(self, roles=None): """ @@ -834,8 +831,6 @@ class Inliner: return uri def interpreted(self, rawsource, text, role, lineno): - if not role: - role = roles.DEFAULT_INTERPRETED_ROLE role_fn, messages = roles.role(role, self.language, lineno, self) if role_fn: nodes, messages2 = role_fn(role, rawsource, text, lineno, self) @@ -926,7 +921,7 @@ class Inliner: referencename = match.group('refname') refname = normalize_name(referencename) referencenode = nodes.reference( - referencename + match.group('refend'), referencename, + referencename + match.group('refend'), referencename, name=whitespace_normalize_name(referencename)) if anonymous: referencenode['anonymous'] = 1 @@ -1016,6 +1011,7 @@ class Inliner: '_': reference, '__': anonymous_reference} + class Body(RSTState): """ -- cgit v1.2.1 From 5f4fc7c0e88fe3f9bdadc52d7ec2906bc02e4839 Mon Sep 17 00:00:00 2001 From: goodger Date: Tue, 27 Apr 2004 19:47:28 +0000 Subject: Refactored ``Body.parse_directive`` into ``run_directive`` and ``parse_directive_block``. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2000 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 68 +++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 28 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 5d7c463dc..219b8a790 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -496,6 +496,8 @@ class Inliner: self.rfc_reference)) def parse(self, text, lineno, memo, parent): + # Needs to be refactored for nested inline markup. + # Add nested_parse() method? """ Return 2 lists: nodes (text and inline elements), and system_messages. @@ -831,7 +833,8 @@ class Inliner: return uri def interpreted(self, rawsource, text, role, lineno): - role_fn, messages = roles.role(role, self.language, lineno, self) + role_fn, messages = roles.role(role, self.language, lineno, + self.reporter) if role_fn: nodes, messages2 = role_fn(role, rawsource, text, lineno, self) return nodes, messages + messages2 @@ -1878,17 +1881,18 @@ class Body(RSTState): return [msg], blank_finish def directive(self, match, **option_presets): + """Returns a 2-tuple: list of nodes, and a "blank finish" boolean.""" type_name = match.group(1) directive_function, messages = directives.directive( type_name, self.memo.language, self.document) self.parent += messages if directive_function: - return self.parse_directive( + return self.run_directive( directive_function, match, type_name, option_presets) else: return self.unknown_directive(type_name) - def parse_directive(self, directive_fn, match, type_name, option_presets): + def run_directive(self, directive_fn, match, type_name, option_presets): """ Parse a directive then run its directive function. @@ -1910,13 +1914,6 @@ class Body(RSTState): Returns a 2-tuple: list of nodes, and a "blank finish" boolean. """ - arguments = [] - options = {} - argument_spec = getattr(directive_fn, 'arguments', None) - if argument_spec and argument_spec[:2] == (0, 0): - argument_spec = None - option_spec = getattr(directive_fn, 'options', None) - content_spec = getattr(directive_fn, 'content', None) lineno = self.state_machine.abs_line_number() initial_line_offset = self.state_machine.line_offset indented, indent, line_offset, blank_finish \ @@ -1924,6 +1921,30 @@ class Body(RSTState): strip_top=0) block_text = '\n'.join(self.state_machine.input_lines[ initial_line_offset : self.state_machine.line_offset + 1]) + try: + arguments, options, content, content_offset = ( + self.parse_directive_block(indented, line_offset, + directive_fn, option_presets)) + except MarkupError, detail: + error = self.reporter.error( + 'Error in "%s" directive:\n%s.' % (type_name, detail), + nodes.literal_block(block_text, block_text), line=lineno) + return [error], blank_finish + result = directive_fn(type_name, arguments, options, content, lineno, + content_offset, block_text, self, + self.state_machine) + return (result, + blank_finish or self.state_machine.is_next_line_blank()) + + def parse_directive_block(self, indented, line_offset, directive_fn, + option_presets): + arguments = [] + options = {} + argument_spec = getattr(directive_fn, 'arguments', None) + if argument_spec and argument_spec[:2] == (0, 0): + argument_spec = None + option_spec = getattr(directive_fn, 'options', None) + content_spec = getattr(directive_fn, 'content', None) if indented and not indented[0].strip(): indented.trim_start() line_offset += 1 @@ -1945,24 +1966,15 @@ class Body(RSTState): while content and not content[0].strip(): content.trim_start() content_offset += 1 - try: - if option_spec: - options, arg_block = self.parse_directive_options( - option_presets, option_spec, arg_block) - if argument_spec: - arguments = self.parse_directive_arguments(argument_spec, - arg_block) - if content and not content_spec: - raise MarkupError('no content permitted') - except MarkupError, detail: - error = self.reporter.error( - 'Error in "%s" directive:\n%s.' % (type_name, detail), - nodes.literal_block(block_text, block_text), line=lineno) - return [error], blank_finish - result = directive_fn( - type_name, arguments, options, content, lineno, content_offset, - block_text, self, self.state_machine) - return result, blank_finish or self.state_machine.is_next_line_blank() + if option_spec: + options, arg_block = self.parse_directive_options( + option_presets, option_spec, arg_block) + if argument_spec: + arguments = self.parse_directive_arguments( + argument_spec, arg_block) + if content and not content_spec: + raise MarkupError('no content permitted') + return (arguments, options, content, content_offset) def parse_directive_options(self, option_presets, option_spec, arg_block): options = option_presets.copy() -- cgit v1.2.1 From c48260665ecf74131830f99949da7b99b7579348 Mon Sep 17 00:00:00 2001 From: goodger Date: Wed, 16 Jun 2004 17:21:57 +0000 Subject: added pep_base_url & rfc_base_url settings & support git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2287 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 219b8a790..4025c7e72 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -954,9 +954,7 @@ class Inliner: else: # not a valid scheme raise MarkupMismatch - pep_url_local = 'pep-%04d.html' - pep_url_absolute = 'http://www.python.org/peps/pep-%04d.html' - pep_url = pep_url_absolute + pep_url = 'pep-%04d.html' def pep_reference(self, match, lineno): text = match.group(0) @@ -966,17 +964,17 @@ class Inliner: pepnum = int(match.group('pepnum2')) else: raise MarkupMismatch - ref = self.pep_url % pepnum + ref = self.document.settings.pep_base_url + self.pep_url % pepnum unescaped = unescape(text, 0) return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)] - rfc_url = 'http://www.faqs.org/rfcs/rfc%d.html' + rfc_url = 'rfc%d.html' def rfc_reference(self, match, lineno): text = match.group(0) if text.startswith('RFC'): rfcnum = int(match.group('rfcnum')) - ref = self.rfc_url % rfcnum + ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum else: raise MarkupMismatch unescaped = unescape(text, 0) -- cgit v1.2.1 From 9d0496b115ec1e25eecd5a3d34fba4857d9f53bd Mon Sep 17 00:00:00 2001 From: goodger Date: Sun, 20 Jun 2004 21:31:56 +0000 Subject: fixed line number for literal blocks git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2334 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 4025c7e72..a786abbda 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -2540,9 +2540,10 @@ class Text(RSTState): indented.trim_end() if not indented: return self.quoted_literal_block() - nodelist = [] data = '\n'.join(indented) - nodelist.append(nodes.literal_block(data, data)) + literal_block = nodes.literal_block(data, data) + literal_block.line = offset + 1 + nodelist = [literal_block] if not blank_finish: nodelist.append(self.unindent_warning('Literal block')) return nodelist -- cgit v1.2.1 From 80fce2bdf1c90e564c0f4b8d062d5349277e33b2 Mon Sep 17 00:00:00 2001 From: wiemann Date: Mon, 28 Jun 2004 19:06:02 +0000 Subject: made URI recognition slightly more aggressive and intelligent git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2407 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index a786abbda..43caa9711 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -556,14 +556,19 @@ class Inliner: # Valid URI characters (see RFC 2396 & RFC 2732); # final \x00 allows backslash escapes in URIs: uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]""" + # Delimiter indicating the end of a URI (not part of the URI): + uri_end_delim = r"""[>]""" # Last URI character; same as uric but no punctuation: - urilast = r"""[_~/a-zA-Z0-9]""" + urilast = r"""[_~*/=+a-zA-Z0-9]""" + # End of a URI (either 'urilast' or 'uric followed by a + # uri_end_delim'): + uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals() emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]""" email_pattern = r""" %(emailc)s+(?:\.%(emailc)s+)* # name @ # at %(emailc)s+(?:\.%(emailc)s*)* # host - %(urilast)s # final URI char + %(uri_end)s # final URI char """ parts = ('initial_inline', start_string_prefix, '', [('start', '', non_whitespace_after, # simple start-strings @@ -642,15 +647,15 @@ class Inliner: ( # either: (//?)? # hierarchical URI %(uric)s* # URI characters - %(urilast)s # final URI char + %(uri_end)s # final URI char ) ( # optional query \?%(uric)s* - %(urilast)s + %(uri_end)s )? ( # optional fragment \#%(uric)s* - %(urilast)s + %(uri_end)s )? ) ) -- cgit v1.2.1 From 7e01e55a123cbcb5ba89d7cc435ec97e2269f57e Mon Sep 17 00:00:00 2001 From: goodger Date: Mon, 27 Sep 2004 15:28:33 +0000 Subject: Fixed bug where a "role" directive in a nested parse would crash the parser; the "language" attribute was not being copied over to the new state machine. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2654 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 1 + 1 file changed, 1 insertion(+) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 43caa9711..f6a25465f 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -199,6 +199,7 @@ class NestedStateMachine(StateMachineWS): self.document = memo.document self.attach_observer(self.document.note_source) self.reporter = memo.reporter + self.language = memo.language self.node = node results = StateMachineWS.run(self, input_lines, input_offset) assert results == [], ('NestedStateMachine.run() results should be ' -- cgit v1.2.1 From 124f37c038f86eb6c3b1344150840b5fd2a1cdbd Mon Sep 17 00:00:00 2001 From: wiemann Date: Thu, 30 Sep 2004 13:47:59 +0000 Subject: setting footnote_references to "superscript" or "brackets" now causes footnote-reference-space to be trimmed or not, resp. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2681 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index f6a25465f..60e0bcafd 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -922,7 +922,7 @@ class Inliner: if refname: refnode['refname'] = refname self.document.note_footnote_ref(refnode) - if self.document.settings.trim_footnote_reference_space: + if utils.get_trim_footnote_ref_space(self.document.settings): before = before.rstrip() return (before, [refnode], remaining, []) -- cgit v1.2.1 From 81cbe9fdf4f4d5b57902ddc6d7b764b1250b9c96 Mon Sep 17 00:00:00 2001 From: goodger Date: Tue, 5 Oct 2004 01:21:26 +0000 Subject: Added support for line block syntax. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2686 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 90 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 4 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 60e0bcafd..cdc4258d1 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1090,6 +1090,7 @@ class Body(RSTState): 'field_marker': r':[^: ]([^:]*[^: ])?:( +|$)', 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats, 'doctest': r'>>>( +|$)', + 'line_block': r'\|( +|$)', 'grid_table_top': grid_table_top_pat, 'simple_table_top': simple_table_top_pat, 'explicit_markup': r'\.\.( +|$)', @@ -1102,6 +1103,7 @@ class Body(RSTState): 'field_marker', 'option_marker', 'doctest', + 'line_block', 'grid_table_top', 'simple_table_top', 'explicit_markup', @@ -1189,12 +1191,12 @@ class Body(RSTState): i, blank_finish = self.list_item(match.end()) bulletlist += i offset = self.state_machine.line_offset + 1 # next line - newline_offset, blank_finish = self.nested_list_parse( + new_line_offset, blank_finish = self.nested_list_parse( self.state_machine.input_lines[offset:], input_offset=self.state_machine.abs_line_offset() + 1, node=bulletlist, initial_state='BulletList', blank_finish=blank_finish) - self.goto_line(newline_offset) + self.goto_line(new_line_offset) if not blank_finish: self.parent += self.unindent_warning('Bullet list') return [], next_state, [] @@ -1476,6 +1478,69 @@ class Body(RSTState): self.parent += nodes.doctest_block(data, data) return [], next_state, [] + def line_block(self, match, context, next_state): + """First line of a line block.""" + block = nodes.line_block() + self.parent += block + lineno = self.state_machine.abs_line_number() + line, messages, blank_finish = self.line_block_line(match, lineno) + block += line + self.parent += messages + if not blank_finish: + offset = self.state_machine.line_offset + 1 # next line + new_line_offset, blank_finish = self.nested_list_parse( + self.state_machine.input_lines[offset:], + input_offset=self.state_machine.abs_line_offset() + 1, + node=block, initial_state='LineBlock', + blank_finish=0) + self.goto_line(new_line_offset) + if not blank_finish: + self.parent += self.reporter.warning( + 'Line block ends without a blank line.', + line=(self.state_machine.abs_line_number() + 1)) + if len(block): + if block[0].indent is None: + block[0].indent = 0 + self.nest_line_block_lines(block) + return [], next_state, [] + + def line_block_line(self, match, lineno): + """Return one line element of a line_block.""" + indented, indent, line_offset, blank_finish = \ + self.state_machine.get_first_known_indented(match.end(), + until_blank=1) + text = u'\n'.join(indented) + text_nodes, messages = self.inline_text(text, lineno) + line = nodes.line(text, '', *text_nodes) + if match.string.rstrip() != '|': # not empty + line.indent = len(match.group(1)) - 1 + return line, messages, blank_finish + + def nest_line_block_lines(self, block): + for index in range(1, len(block)): + if block[index].indent is None: + block[index].indent = block[index - 1].indent + self.nest_line_block_segment(block) + + def nest_line_block_segment(self, block): + indents = [item.indent for item in block] + least = min(indents) + new_items = [] + new_block = nodes.line_block() + for item in block: + if item.indent > least: + new_block.append(item) + else: + if len(new_block): + self.nest_line_block_segment(new_block) + new_items.append(new_block) + new_block = nodes.line_block() + new_items.append(item) + if len(new_block): + self.nest_line_block_segment(new_block) + new_items.append(new_block) + block[:] = new_items + def grid_table_top(self, match, context, next_state): """Top border of a full table.""" return self.table_top(match, context, next_state, @@ -2274,6 +2339,7 @@ class SpecializedBody(Body): field_marker = invalid_input option_marker = invalid_input doctest = invalid_input + line_block = invalid_input grid_table_top = invalid_input simple_table_top = invalid_input explicit_markup = invalid_input @@ -2391,6 +2457,22 @@ class ExtensionOptions(FieldList): lines = [] +class LineBlock(SpecializedBody): + + """Second and subsequent lines of a line_block.""" + + blank = SpecializedBody.invalid_input + + def line_block(self, match, context, next_state): + """New line of line block.""" + lineno = self.state_machine.abs_line_number() + line, messages, blank_finish = self.line_block_line(match, lineno) + self.parent += line + self.parent.parent += messages + self.blank_finish = blank_finish + return [], next_state, [] + + class Explicit(SpecializedBody): """Second and subsequent explicit markup construct.""" @@ -2854,8 +2936,8 @@ class QuotedLiteralBlock(RSTState): state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList, - OptionList, ExtensionOptions, Explicit, Text, Definition, - Line, SubstitutionDef, RFC2822Body, RFC2822List) + OptionList, LineBlock, ExtensionOptions, Explicit, Text, + Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List) """Standard set of State classes used to start `RSTStateMachine`.""" -- cgit v1.2.1 From 3060e6fbba970093521688e7f665a2ef8e9214d6 Mon Sep 17 00:00:00 2001 From: goodger Date: Tue, 19 Oct 2004 23:36:31 +0000 Subject: Fixed directive parsing bug: argument-less directives didn't notice that arguments were present. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2725 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index cdc4258d1..0d6362b51 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -2038,6 +2038,9 @@ class Body(RSTState): if option_spec: options, arg_block = self.parse_directive_options( option_presets, option_spec, arg_block) + if arg_block and not argument_spec: + raise MarkupError('no arguments permitted; blank line ' + 'required before content block') if argument_spec: arguments = self.parse_directive_arguments( argument_spec, arg_block) -- cgit v1.2.1 From d2995e5fff30d16439cae71a4f6aa3d173a92b7b Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 22 Oct 2004 01:51:28 +0000 Subject: fixed unwanted interaction of "class" directive and enumerated lists (start != 1) git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2761 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 0d6362b51..0f8bdcf83 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1215,13 +1215,13 @@ class Body(RSTState): format, sequence, text, ordinal = self.parse_enumerator(match) if not self.is_enumerated_list_item(ordinal, sequence, format): raise statemachine.TransitionCorrection('text') + enumlist = nodes.enumerated_list() + self.parent += enumlist if ordinal != 1: msg = self.reporter.info( 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)' % (text, ordinal), line=self.state_machine.abs_line_number()) self.parent += msg - enumlist = nodes.enumerated_list() - self.parent += enumlist enumlist['enumtype'] = sequence if ordinal != 1: enumlist['start'] = ordinal -- cgit v1.2.1 From 51768bcfb012ab6819afb5f726043f29386e739f Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 22 Oct 2004 02:10:43 +0000 Subject: small simplification git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2762 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 0f8bdcf83..a1e34d5d1 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1217,16 +1217,15 @@ class Body(RSTState): raise statemachine.TransitionCorrection('text') enumlist = nodes.enumerated_list() self.parent += enumlist + enumlist['enumtype'] = sequence + enumlist['prefix'] = self.enum.formatinfo[format].prefix + enumlist['suffix'] = self.enum.formatinfo[format].suffix if ordinal != 1: + enumlist['start'] = ordinal msg = self.reporter.info( 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)' % (text, ordinal), line=self.state_machine.abs_line_number()) self.parent += msg - enumlist['enumtype'] = sequence - if ordinal != 1: - enumlist['start'] = ordinal - enumlist['prefix'] = self.enum.formatinfo[format].prefix - enumlist['suffix'] = self.enum.formatinfo[format].suffix listitem, blank_finish = self.list_item(match.end()) enumlist += listitem offset = self.state_machine.line_offset + 1 # next line -- cgit v1.2.1 From 4513833b27e558a8837877231abb4ce8b886b37b Mon Sep 17 00:00:00 2001 From: wiemann Date: Mon, 25 Oct 2004 13:05:43 +0000 Subject: fixed documentation of RSTStateMachine.run() git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2768 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index a1e34d5d1..2c33cb081 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -144,11 +144,10 @@ class RSTStateMachine(StateMachineWS): def run(self, input_lines, document, input_offset=0, match_titles=1, inliner=None): """ - Parse `input_lines` and return a `docutils.nodes.document` instance. + Parse `input_lines` and modify the `document` node in place. - Extend `StateMachineWS.run()`: set up parse-global data, run the - StateMachine, and return the resulting - document. + Extend `StateMachineWS.run()`: set up parse-global data and + run the StateMachine. """ self.language = languages.get_language( document.settings.language_code) @@ -290,7 +289,7 @@ class RSTState(StateWS): state_machine_kwargs=None): """ Create a new StateMachine rooted at `node` and run it over the input - `block`. Also keep track of optional intermdediate blank lines and the + `block`. Also keep track of optional intermediate blank lines and the required final one. """ if state_machine_class is None: -- cgit v1.2.1 From 88a4789f802608b293da9f7be6af821103ab4222 Mon Sep 17 00:00:00 2001 From: wiemann Date: Sat, 30 Oct 2004 13:53:50 +0000 Subject: insert pending transition instead of real transition; removed error checking code git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2772 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 2c33cb081..b99687505 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -116,6 +116,7 @@ from docutils.nodes import fully_normalize_name as normalize_name from docutils.nodes import whitespace_normalize_name from docutils.parsers.rst import directives, languages, tableparser, roles from docutils.parsers.rst.languages import en as _fallback_language_module +from docutils.transforms.structural import Transition as TransitionTransform class MarkupError(DataError): pass @@ -403,16 +404,6 @@ class RSTState(StateWS): error = self.reporter.error( 'First element of section must be a title.', line=lineno) section.insert(0, error) - if isinstance(section[1], nodes.transition): - error = self.reporter.error( - 'Section may not begin with a transition.', - line=section[1].line) - section.insert(1, error) - if len(section) > 2 and isinstance(section[-1], nodes.transition): - error = self.reporter.error( - 'Section may not end with a transition.', - line=section[-1].line) - section += error def paragraph(self, lines, lineno): """ @@ -2748,13 +2739,11 @@ class Line(SpecializedText): self.state_correction(context) if self.eofcheck: # ignore EOFError with sections lineno = self.state_machine.abs_line_number() - 1 - transition = nodes.transition(context[0]) + transition = nodes.pending(TransitionTransform, + rawsource=context[0]) transition.line = lineno + self.document.note_pending(transition) self.parent += transition - msg = self.reporter.error( - 'Document or section may not end with a transition.', - line=lineno) - self.parent += msg self.eofcheck = 1 return [] @@ -2764,19 +2753,9 @@ class Line(SpecializedText): marker = context[0].strip() if len(marker) < 4: self.state_correction(context) - transition = nodes.transition(marker) + transition = nodes.pending(TransitionTransform, rawsource=marker) transition.line = lineno - if len(self.parent) == 0: - msg = self.reporter.error( - 'Document or section may not begin with a transition.', - line=lineno) - self.parent += msg - elif isinstance(self.parent[-1], nodes.transition): - msg = self.reporter.error( - 'At least one body element must separate transitions; ' - 'adjacent transitions not allowed.', - line=lineno) - self.parent += msg + self.document.note_pending(transition) self.parent += transition return [], 'Body', [] -- cgit v1.2.1 From 54bea9c700c038c02df30a191545eb1e4c686ebe Mon Sep 17 00:00:00 2001 From: wiemann Date: Sat, 30 Oct 2004 15:56:50 +0000 Subject: insert real transitions git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2780 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index b99687505..7d0b1bca2 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -116,7 +116,6 @@ from docutils.nodes import fully_normalize_name as normalize_name from docutils.nodes import whitespace_normalize_name from docutils.parsers.rst import directives, languages, tableparser, roles from docutils.parsers.rst.languages import en as _fallback_language_module -from docutils.transforms.structural import Transition as TransitionTransform class MarkupError(DataError): pass @@ -2739,10 +2738,8 @@ class Line(SpecializedText): self.state_correction(context) if self.eofcheck: # ignore EOFError with sections lineno = self.state_machine.abs_line_number() - 1 - transition = nodes.pending(TransitionTransform, - rawsource=context[0]) + transition = nodes.transition(rawsource=context[0]) transition.line = lineno - self.document.note_pending(transition) self.parent += transition self.eofcheck = 1 return [] @@ -2753,9 +2750,8 @@ class Line(SpecializedText): marker = context[0].strip() if len(marker) < 4: self.state_correction(context) - transition = nodes.pending(TransitionTransform, rawsource=marker) + transition = nodes.transition(rawsource=marker) transition.line = lineno - self.document.note_pending(transition) self.parent += transition return [], 'Body', [] -- cgit v1.2.1 From 097815228a511d2c7da47cf4c4d2141893556dcd Mon Sep 17 00:00:00 2001 From: goodger Date: Wed, 3 Nov 2004 20:11:11 +0000 Subject: Added support for multiple classifiers in definition list items. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2802 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 7d0b1bca2..874110a30 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -2657,8 +2657,10 @@ class Text(RSTState): self.nested_parse(indented, input_offset=line_offset, node=definition) return definitionlistitem, blank_finish + classifier_delimiter = re.compile(' +: +') + def term(self, lines, lineno): - """Return a definition_list's term and optional classifier.""" + """Return a definition_list's term and optional classifiers.""" assert len(lines) == 1 text_nodes, messages = self.inline_text(lines[0], lineno) term_node = nodes.term() @@ -2666,17 +2668,17 @@ class Text(RSTState): for i in range(len(text_nodes)): node = text_nodes[i] if isinstance(node, nodes.Text): - parts = node.rawsource.split(' : ', 1) + parts = self.classifier_delimiter.split(node.rawsource) if len(parts) == 1: - term_node += node + node_list[-1] += node else: - term_node += nodes.Text(parts[0].rstrip()) - classifier_node = nodes.classifier('', parts[1]) - classifier_node += text_nodes[i+1:] - node_list.append(classifier_node) - break + + node_list[-1] += nodes.Text(parts[0].rstrip()) + for part in parts[1:]: + classifier_node = nodes.classifier('', part) + node_list.append(classifier_node) else: - term_node += node + node_list[-1] += node return node_list, messages -- cgit v1.2.1 From a2458fbff98892912f8b2c1b0fb39ebe5cbcb646 Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 6 Nov 2004 19:52:19 +0000 Subject: Changed role function API: the "text" parameter now takes null-escaped interpreted text content. Moved ``escape2null`` and ``unescape`` functions from to docutils/utils.py. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2816 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 32 +++----------------------------- 1 file changed, 3 insertions(+), 29 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 874110a30..7de9165a8 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -114,6 +114,7 @@ from docutils import ApplicationError, DataError from docutils.statemachine import StateMachineWS, StateWS from docutils.nodes import fully_normalize_name as normalize_name from docutils.nodes import whitespace_normalize_name +from docutils.utils import escape2null, unescape from docutils.parsers.rst import directives, languages, tableparser, roles from docutils.parsers.rst.languages import en as _fallback_language_module @@ -758,7 +759,6 @@ class Inliner: role = endmatch.group('suffix')[1:-1] position = 'suffix' escaped = endmatch.string[:endmatch.start(1)] - text = unescape(escaped, 0) rawsource = unescape(string[matchstart:textend], 1) if rawsource[-1:] == '_': if role: @@ -769,10 +769,10 @@ class Inliner: prb = self.problematic(text, text, msg) return string[:rolestart], [prb], string[textend:], [msg] return self.phrase_ref(string[:matchstart], string[textend:], - rawsource, escaped, text) + rawsource, escaped, unescape(escaped)) else: rawsource = unescape(string[rolestart:textend], 1) - nodelist, messages = self.interpreted(rawsource, text, role, + nodelist, messages = self.interpreted(rawsource, escaped, role, lineno) return (string[:rolestart], nodelist, string[textend:], messages) @@ -2917,29 +2917,3 @@ state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList, OptionList, LineBlock, ExtensionOptions, Explicit, Text, Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List) """Standard set of State classes used to start `RSTStateMachine`.""" - - -def escape2null(text): - """Return a string with escape-backslashes converted to nulls.""" - parts = [] - start = 0 - while 1: - found = text.find('\\', start) - if found == -1: - parts.append(text[start:]) - return ''.join(parts) - parts.append(text[start:found]) - parts.append('\x00' + text[found+1:found+2]) - start = found + 2 # skip character after escape - -def unescape(text, restore_backslashes=0): - """ - Return a string with nulls removed or restored to backslashes. - Backslash-escaped spaces are also removed. - """ - if restore_backslashes: - return text.replace('\x00', '\\') - else: - for sep in ['\x00 ', '\x00\n', '\x00']: - text = ''.join(text.split(sep)) - return text -- cgit v1.2.1 From a33879a926ba9d3beffcc35e5f8c45cb62e85ce4 Mon Sep 17 00:00:00 2001 From: wiemann Date: Fri, 26 Nov 2004 09:27:55 +0000 Subject: allow empty sections and documents git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2870 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 7de9165a8..b6954fe99 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -170,16 +170,8 @@ class RSTStateMachine(StateMachineWS): results = StateMachineWS.run(self, input_lines, input_offset, input_source=document['source']) assert results == [], 'RSTStateMachine.run() results should be empty!' - self.check_document() self.node = self.memo = None # remove unneeded references - def check_document(self): - """Check for illegal structure: empty document.""" - if len(self.document) == 0: - error = self.reporter.error( - 'Document empty; must have contents.', line=0) - self.document += error - class NestedStateMachine(StateMachineWS): @@ -384,27 +376,11 @@ class RSTState(StateWS): self.state_machine.input_lines[offset:], input_offset=absoffset, node=section_node, match_titles=1) self.goto_line(newabsoffset) - self.check_section(section_node) if memo.section_level <= mylevel: # can't handle next section? raise EOFError # bubble up to supersection # reset section_level; next pass will detect it properly memo.section_level = mylevel - def check_section(self, section): - """ - Check for illegal structure: empty section, misplaced transitions. - """ - lineno = section.line - if len(section) <= 1: - error = self.reporter.error( - 'Section empty; must have contents.', line=lineno) - section += error - return - if not isinstance(section[0], nodes.title): # shouldn't ever happen - error = self.reporter.error( - 'First element of section must be a title.', line=lineno) - section.insert(0, error) - def paragraph(self, lines, lineno): """ Return a list (paragraph & messages) & a boolean: literal_block next? -- cgit v1.2.1 From ae854fe9e05524b51690889ae99e1565507ddfda Mon Sep 17 00:00:00 2001 From: wiemann Date: Thu, 13 Jan 2005 20:09:18 +0000 Subject: made email addresses escapable git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2935 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index b6954fe99..ddb2e5100 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -533,7 +533,7 @@ class Inliner: emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]""" email_pattern = r""" %(emailc)s+(?:\.%(emailc)s+)* # name - @ # at + (? Date: Thu, 17 Feb 2005 14:39:07 +0000 Subject: typo git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2967 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index ddb2e5100..43a25b5ab 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1432,7 +1432,7 @@ class Body(RSTState): optlist.append(option) else: raise MarkupError( - 'wrong numer of option tokens (=%s), should be 1 or 2: ' + 'wrong number of option tokens (=%s), should be 1 or 2: ' '"%s"' % (len(tokens), optionstring), self.state_machine.abs_line_number() + 1) return optlist -- cgit v1.2.1 From 92b0917ae757987cc88daeefb777df3068c703f5 Mon Sep 17 00:00:00 2001 From: wiemann Date: Thu, 17 Feb 2005 14:50:10 +0000 Subject: allow option arguments in angle brackets to start with a non-alphanumeric character git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2969 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 43a25b5ab..b42d32a71 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1039,7 +1039,7 @@ class Body(RSTState): '|%(upperroman)s)' % enum.sequencepats) pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats # @@@ Loosen up the pattern? Allow Unicode? - pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<%(alphanum)s[^ <>]+>)' % pats + pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^ <>]+>)' % pats pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats -- cgit v1.2.1 From 6e59df17d0c4a53e5d5ced08f5ef58852bc66ec8 Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 19 Feb 2005 01:23:10 +0000 Subject: Fixed option lists to allow spaces inside ````. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2975 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index b42d32a71..3427c4694 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1039,7 +1039,7 @@ class Body(RSTState): '|%(upperroman)s)' % enum.sequencepats) pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats # @@@ Loosen up the pattern? Allow Unicode? - pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^ <>]+>)' % pats + pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats @@ -1415,14 +1415,20 @@ class Body(RSTState): delimiter = ' ' firstopt = tokens[0].split('=') if len(firstopt) > 1: + # "--opt=value" form tokens[:1] = firstopt delimiter = '=' elif (len(tokens[0]) > 2 and ((tokens[0].startswith('-') and not tokens[0].startswith('--')) or tokens[0].startswith('+'))): + # "-ovalue" form tokens[:1] = [tokens[0][:2], tokens[0][2:]] delimiter = '' + if len(tokens) > 1 and (tokens[1].startswith('<') + and tokens[-1].endswith('>')): + # "-o " form; join all values into one token + tokens[1:] = [' '.join(tokens[1:])] if 0 < len(tokens) <= 2: option = nodes.option(optionstring) option += nodes.option_string(tokens[0], tokens[0]) -- cgit v1.2.1 From 6e194ab6b0f7b77f7c8edf53730f52bd8ce5f4a1 Mon Sep 17 00:00:00 2001 From: wiemann Date: Sun, 27 Feb 2005 01:00:59 +0000 Subject: when relocating a target, it now acquires the children of its new parent; fixed bug in recording internal targets so that anonymous targets are relocated as well git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2996 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 3427c4694..f2c2d5144 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1857,6 +1857,8 @@ class Body(RSTState): else: # anonymous target if refuri: target['refuri'] = refuri + else: + self.document.note_internal_target(target) target['anonymous'] = 1 self.document.note_anonymous_target(target) -- cgit v1.2.1 From aa76cea304f331ad416e6f4623c2e5f00f995c58 Mon Sep 17 00:00:00 2001 From: wiemann Date: Fri, 4 Mar 2005 22:52:04 +0000 Subject: removed direct references to Element.children attribute outside nodes.py git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3004 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index f2c2d5144..c4dff990a 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -2621,7 +2621,7 @@ class Text(RSTState): state_machine_kwargs={'state_classes': (QuotedLiteralBlock,), 'initial_state': 'QuotedLiteralBlock'}) self.goto_line(new_abs_offset) - return parent_node.children + return parent_node.get_children() def definition_list_item(self, termline): indented, indent, line_offset, blank_finish = \ -- cgit v1.2.1 From b17334d09de7211955e834b0243695d6cb637d84 Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 5 Mar 2005 18:32:47 +0000 Subject: Allowed whitespace in paths and URLs (targets; "image", "figure", & "include" directive args; ":file:" & ":url:" directive options). git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3018 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index c4dff990a..b9d257924 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1823,14 +1823,7 @@ class Body(RSTState): if refname: return 'refname', refname reference = ''.join([line.strip() for line in block]) - if reference.find(' ') == -1: - return 'refuri', unescape(reference) - else: - warning = self.reporter.warning( - 'Hyperlink target contains whitespace. Perhaps a footnote ' - 'was intended?', - nodes.literal_block(block_text, block_text), line=lineno) - return 'malformed', warning + return 'refuri', unescape(reference) def is_reference(self, reference): match = self.explicit.patterns.reference.match( -- cgit v1.2.1 From 43ca6a79125d28fce6c2ab2142f93733319382d4 Mon Sep 17 00:00:00 2001 From: wiemann Date: Mon, 14 Mar 2005 16:16:57 +0000 Subject: removed redundant get_children(); in case we want to change the behavior later, be can use __getattr__ or a descriptor; (the list is modified in place anyway, so there'd be not much to change about get_children) git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3038 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index b9d257924..04713fbdc 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -2614,7 +2614,7 @@ class Text(RSTState): state_machine_kwargs={'state_classes': (QuotedLiteralBlock,), 'initial_state': 'QuotedLiteralBlock'}) self.goto_line(new_abs_offset) - return parent_node.get_children() + return parent_node.children def definition_list_item(self, termline): indented, indent, line_offset, blank_finish = \ -- cgit v1.2.1 From 0a938a279683da492acd6512d0af6fa636c5a51c Mon Sep 17 00:00:00 2001 From: goodger Date: Tue, 15 Mar 2005 00:37:51 +0000 Subject: differentiate between paths and URIs git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3046 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 04713fbdc..628a1b02c 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1822,7 +1822,7 @@ class Body(RSTState): refname = self.is_reference(reference) if refname: return 'refname', refname - reference = ''.join([line.strip() for line in block]) + reference = ''.join([''.join(line.split()) for line in block]) return 'refuri', unescape(reference) def is_reference(self, reference): -- cgit v1.2.1 From 7d76b140ee974b59e35f1f044949dc032630e15f Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 25 Mar 2005 03:02:32 +0000 Subject: added auto-enumerated list items git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3113 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 59 ++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 19 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 628a1b02c..34c206ed2 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1036,7 +1036,7 @@ class Body(RSTState): pats['alphanum'] = '[a-zA-Z0-9]' pats['alphanumplus'] = '[a-zA-Z0-9_-]' pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s' - '|%(upperroman)s)' % enum.sequencepats) + '|%(upperroman)s|#)' % enum.sequencepats) pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats # @@@ Loosen up the pattern? Allow Unicode? pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats @@ -1182,7 +1182,10 @@ class Body(RSTState): raise statemachine.TransitionCorrection('text') enumlist = nodes.enumerated_list() self.parent += enumlist - enumlist['enumtype'] = sequence + if sequence == '#': + enumlist['enumtype'] = 'arabic' + else: + enumlist['enumtype'] = sequence enumlist['prefix'] = self.enum.formatinfo[format].prefix enumlist['suffix'] = self.enum.formatinfo[format].suffix if ordinal != 1: @@ -1199,7 +1202,9 @@ class Body(RSTState): input_offset=self.state_machine.abs_line_offset() + 1, node=enumlist, initial_state='EnumeratedList', blank_finish=blank_finish, - extra_settings={'lastordinal': ordinal, 'format': format}) + extra_settings={'lastordinal': ordinal, + 'format': format, + 'auto': sequence == '#'}) self.goto_line(newline_offset) if not blank_finish: self.parent += self.unindent_warning('Enumerated list') @@ -1232,7 +1237,9 @@ class Body(RSTState): raise ParserError('enumerator format not matched') text = groupdict[format][self.enum.formatinfo[format].start :self.enum.formatinfo[format].end] - if expected_sequence: + if text == '#': + sequence = '#' + elif expected_sequence: try: if self.enum.sequenceregexps[expected_sequence].match(text): sequence = expected_sequence @@ -1249,10 +1256,13 @@ class Body(RSTState): break else: # shouldn't happen raise ParserError('enumerator sequence not matched') - try: - ordinal = self.enum.converters[sequence](text) - except roman.InvalidRomanNumeralError: - ordinal = None + if sequence == '#': + ordinal = 1 + else: + try: + ordinal = self.enum.converters[sequence](text) + except roman.InvalidRomanNumeralError: + ordinal = None return format, sequence, text, ordinal def is_enumerated_list_item(self, ordinal, sequence, format): @@ -1260,7 +1270,7 @@ class Body(RSTState): Check validity based on the ordinal value and the second line. Return true iff the ordinal is valid and the second line is blank, - indented, or starts with the next enumerator. + indented, or starts with the next enumerator or an auto-enumerator. """ if ordinal is None: return None @@ -1273,9 +1283,11 @@ class Body(RSTState): self.state_machine.previous_line() if not next_line[:1].strip(): # blank or indented return 1 - next_enumerator = self.make_enumerator(ordinal + 1, sequence, format) + next_enumerator, auto_enumerator = self.make_enumerator( + ordinal + 1, sequence, format) try: - if next_line.startswith(next_enumerator): + if ( next_line.startswith(next_enumerator) or + next_line.startswith(auto_enumerator) ): return 1 except TypeError: pass @@ -1283,11 +1295,14 @@ class Body(RSTState): def make_enumerator(self, ordinal, sequence, format): """ - Construct and return an enumerated list item marker. + Construct and return the next enumerated list item marker, and an + auto-enumerator ("#" instead of the regular enumerator). Return ``None`` for invalid (out of range) ordinals. - """ - if sequence == 'arabic': + """ #" + if sequence == '#': + enumerator = '#' + elif sequence == 'arabic': enumerator = str(ordinal) else: if sequence.endswith('alpha'): @@ -1310,7 +1325,10 @@ class Body(RSTState): raise ParserError('unknown enumerator sequence: "%s"' % sequence) formatinfo = self.enum.formatinfo[format] - return formatinfo.prefix + enumerator + formatinfo.suffix + ' ' + next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix + + ' ') + auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' ' + return next_enumerator, auto_enumerator def field_marker(self, match, context, next_state): """Field list item.""" @@ -2348,12 +2366,15 @@ class EnumeratedList(SpecializedBody): """Enumerated list item.""" format, sequence, text, ordinal = self.parse_enumerator( match, self.parent['enumtype']) - if (sequence != self.parent['enumtype'] or - format != self.format or - ordinal != (self.lastordinal + 1) or - not self.is_enumerated_list_item(ordinal, sequence, format)): + if ( format != self.format + or (sequence != '#' and (sequence != self.parent['enumtype'] + or self.auto + or ordinal != (self.lastordinal + 1))) + or not self.is_enumerated_list_item(ordinal, sequence, format)): # different enumeration: new list self.invalid_input() + if sequence == '#': + self.auto = 1 listitem, blank_finish = self.list_item(match.end()) self.parent += listitem self.blank_finish = blank_finish -- cgit v1.2.1 From 4df9fad485c1e12e850f9d7ff225d51c538af3dc Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 26 Mar 2005 01:04:09 +0000 Subject: fixed a bug that assumed text follows ".. _" git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3119 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 34c206ed2..ddf47df7f 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -2146,13 +2146,13 @@ class Body(RSTState): re.compile(r""" \.\.[ ]+ # explicit markup start _ # target indicator - (?![ ]) # first char. not space + (?![ ]|$) # first char. not space or EOL """, re.VERBOSE)), (substitution_def, re.compile(r""" \.\.[ ]+ # explicit markup start \| # substitution indicator - (?![ ]) # first char. not space + (?![ ]|$) # first char. not space or EOL """, re.VERBOSE)), (directive, re.compile(r""" -- cgit v1.2.1 From 9dbc2adead008935245a396eab10bb7bcb11b226 Mon Sep 17 00:00:00 2001 From: wiemann Date: Sat, 26 Mar 2005 16:21:28 +0000 Subject: merged rev. 3094:3101 and 3102:HEAD from branches/multiple-ids to trunk git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3129 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index ddf47df7f..a44f47027 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -365,7 +365,7 @@ class RSTState(StateWS): textnodes, title_messages = self.inline_text(title, lineno) titlenode = nodes.title(title, '', *textnodes) name = normalize_name(titlenode.astext()) - section_node['name'] = name + section_node['names'].append(name) section_node += titlenode section_node += messages section_node += title_messages @@ -787,7 +787,7 @@ class Inliner: else: if target: reference['refuri'] = uri - target['name'] = refname + target['names'].append(refname) self.document.note_external_target(target) self.document.note_explicit_target(target, self.parent) node_list.append(target) @@ -829,7 +829,7 @@ class Inliner: assert len(inlines) == 1 target = inlines[0] name = normalize_name(target.astext()) - target['name'] = name + target['names'].append(name) self.document.note_explicit_target(target, self.parent) return before, inlines, remaining, sysmessages @@ -1751,7 +1751,7 @@ class Body(RSTState): name = name[1:] # autonumber label footnote['auto'] = 1 if name: - footnote['name'] = name + footnote['names'].append(name) self.document.note_autofootnote(footnote) elif name == '*': # auto-symbol name = '' @@ -1759,7 +1759,7 @@ class Body(RSTState): self.document.note_symbol_footnote(footnote) else: # manually numbered footnote += nodes.label('', label) - footnote['name'] = name + footnote['names'].append(name) self.document.note_footnote(footnote) if name: self.document.note_explicit_target(footnote, footnote) @@ -1778,7 +1778,7 @@ class Body(RSTState): citation = nodes.citation('\n'.join(indented)) citation.line = lineno citation += nodes.label('', label) - citation['name'] = name + citation['names'].append(name) self.document.note_citation(citation) self.document.note_explicit_target(citation, citation) if indented: @@ -1814,7 +1814,6 @@ class Body(RSTState): target_type, data = self.parse_target(block, block_text, lineno) if target_type == 'refname': target = nodes.target(block_text, '', refname=normalize_name(data)) - target.indirect_reference_name = data self.add_target(target_name, '', target, lineno) self.document.note_indirect_target(target) return target @@ -1854,7 +1853,7 @@ class Body(RSTState): target.line = lineno if targetname: name = normalize_name(unescape(targetname)) - target['name'] = name + target['names'].append(name) if refuri: uri = self.inliner.adjust_uri(refuri) if uri: @@ -2259,7 +2258,7 @@ class RFC2822Body(Body): def rfc2822(self, match, context, next_state): """RFC2822-style field list item.""" - fieldlist = nodes.field_list(CLASS='rfc2822') + fieldlist = nodes.field_list(classes=['rfc2822']) self.parent += fieldlist field, blank_finish = self.rfc2822_field(match) fieldlist += field @@ -2497,7 +2496,7 @@ class SubstitutionDef(Body): def embedded_directive(self, match, context, next_state): nodelist, blank_finish = self.directive(match, - alt=self.parent['name']) + alt=self.parent['names'][0]) self.parent += nodelist if not self.state_machine.at_eof(): self.blank_finish = blank_finish -- cgit v1.2.1 From dc3284ca85006e72f0e3b2c4ffbe078aa9148f76 Mon Sep 17 00:00:00 2001 From: goodger Date: Tue, 5 Apr 2005 02:55:06 +0000 Subject: added "stub-columns" options to "csv-table" and "list-table" directives, plus support, docs, and tests git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3165 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index a44f47027..f779df8b2 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1657,13 +1657,17 @@ class Body(RSTState): line=lineno) return [error] - def build_table(self, tabledata, tableline): - colspecs, headrows, bodyrows = tabledata + def build_table(self, tabledata, tableline, stub_columns=0): + colwidths, headrows, bodyrows = tabledata table = nodes.table() - tgroup = nodes.tgroup(cols=len(colspecs)) + tgroup = nodes.tgroup(cols=len(colwidths)) table += tgroup - for colspec in colspecs: - tgroup += nodes.colspec(colwidth=colspec) + for colwidth in colwidths: + colspec = nodes.colspec(colwidth=colwidth) + if stub_columns: + colspec.attributes['stub'] = 1 + stub_columns -= 1 + tgroup += colspec if headrows: thead = nodes.thead() tgroup += thead -- cgit v1.2.1 From 3da860aec26788ffd148bdbbedeee98b53a1436a Mon Sep 17 00:00:00 2001 From: wiemann Date: Mon, 25 Apr 2005 15:08:01 +0000 Subject: str(Exception) doesn't work for anything but ASCII Exception texts, so '%s' % exception_instance is unsafe unless exception_instance.args contains only byte strings. The change in alltests.py is a first attempt to catch such cases where a str(Exception) is done with an Exception text which is not necessarily ASCII-only (i.e. with a unicode string in Exception.args), but since most input data (in the totest dicts) is passed in as byte strings, it doesn't catch much. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3253 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index f779df8b2..1ee9dc6f2 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1565,7 +1565,8 @@ class Body(RSTState): table = self.build_table(tabledata, tableline) nodelist = [table] + messages except tableparser.TableMarkupError, detail: - nodelist = self.malformed_table(block, str(detail)) + messages + nodelist = self.malformed_table( + block, ' '.join(detail.args)) + messages else: nodelist = messages return nodelist, blank_finish @@ -1982,7 +1983,8 @@ class Body(RSTState): directive_fn, option_presets)) except MarkupError, detail: error = self.reporter.error( - 'Error in "%s" directive:\n%s.' % (type_name, detail), + 'Error in "%s" directive:\n%s.' % (type_name, + ' '.join(detail.args)), nodes.literal_block(block_text, block_text), line=lineno) return [error], blank_finish result = directive_fn(type_name, arguments, options, content, lineno, @@ -2093,9 +2095,9 @@ class Body(RSTState): except KeyError, detail: return 0, ('unknown option: "%s"' % detail.args[0]) except (ValueError, TypeError), detail: - return 0, ('invalid option value: %s' % detail) + return 0, ('invalid option value: %s' % ' '.join(detail.args)) except utils.ExtensionOptionError, detail: - return 0, ('invalid option data: %s' % detail) + return 0, ('invalid option data: %s' % ' '.join(detail.args)) if blank_finish: return 1, options else: -- cgit v1.2.1 From cec037b796bfbd3481a37b0691c68a96f56ba0d9 Mon Sep 17 00:00:00 2001 From: goodger Date: Wed, 8 Jun 2005 14:08:21 +0000 Subject: removed "from __future__ import" statements and lambda expressions git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3454 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 1ee9dc6f2..b7a359159 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -984,6 +984,16 @@ class Inliner: '__': anonymous_reference} +def _loweralpha_to_int(s, _zero=(ord('a')-1)): + return ord(s) - _zero + +def _upperalpha_to_int(s, _zero=(ord('A')-1)): + return ord(s) - _zero + +def _lowerroman_to_int(s): + return roman.fromRoman(s.upper()) + + class Body(RSTState): """ @@ -1006,12 +1016,9 @@ class Body(RSTState): 'lowerroman': '[ivxlcdm]+', 'upperroman': '[IVXLCDM]+',} enum.converters = {'arabic': int, - 'loweralpha': - lambda s, zero=(ord('a')-1): ord(s) - zero, - 'upperalpha': - lambda s, zero=(ord('A')-1): ord(s) - zero, - 'lowerroman': - lambda s: roman.fromRoman(s.upper()), + 'loweralpha': _loweralpha_to_int, + 'upperalpha': _upperalpha_to_int, + 'lowerroman': _lowerroman_to_int, 'upperroman': roman.fromRoman} enum.sequenceregexps = {} -- cgit v1.2.1 From c9bdf0b05fc639f4112d0dfd7538ff63a02c79b5 Mon Sep 17 00:00:00 2001 From: wiemann Date: Thu, 16 Jun 2005 00:27:26 +0000 Subject: fixed bug with escaped colons introducing literal block git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3491 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index b7a359159..9f12bd560 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -386,7 +386,7 @@ class RSTState(StateWS): Return a list (paragraph & messages) & a boolean: literal_block next? """ data = '\n'.join(lines).rstrip() - if data[-2:] == '::': + if re.search(r'(? Date: Wed, 29 Jun 2005 22:34:44 +0000 Subject: removed document.internal_targets and document.external_targets; fixed bug (not sure yet if the code is clean -- needs refactoring) git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3637 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 6 ------ 1 file changed, 6 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 9f12bd560..4156fd682 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -788,7 +788,6 @@ class Inliner: if target: reference['refuri'] = uri target['names'].append(refname) - self.document.note_external_target(target) self.document.note_explicit_target(target, self.parent) node_list.append(target) else: @@ -1870,17 +1869,12 @@ class Body(RSTState): uri = self.inliner.adjust_uri(refuri) if uri: target['refuri'] = uri - self.document.note_external_target(target) else: raise ApplicationError('problem with URI: %r' % refuri) - else: - self.document.note_internal_target(target) self.document.note_explicit_target(target, self.parent) else: # anonymous target if refuri: target['refuri'] = refuri - else: - self.document.note_internal_target(target) target['anonymous'] = 1 self.document.note_anonymous_target(target) -- cgit v1.2.1 From d854c9bf42f3c21b876ad51e3c691def7e072840 Mon Sep 17 00:00:00 2001 From: goodger Date: Wed, 10 Aug 2005 02:49:06 +0000 Subject: fixed enumerated list bug (SF#1254145) git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3789 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 4156fd682..175334fd2 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1289,14 +1289,15 @@ class Body(RSTState): self.state_machine.previous_line() if not next_line[:1].strip(): # blank or indented return 1 - next_enumerator, auto_enumerator = self.make_enumerator( - ordinal + 1, sequence, format) - try: - if ( next_line.startswith(next_enumerator) or - next_line.startswith(auto_enumerator) ): - return 1 - except TypeError: - pass + result = self.make_enumerator(ordinal + 1, sequence, format) + if result: + next_enumerator, auto_enumerator = result + try: + if ( next_line.startswith(next_enumerator) or + next_line.startswith(auto_enumerator) ): + return 1 + except TypeError: + pass return None def make_enumerator(self, ordinal, sequence, format): -- cgit v1.2.1 From b67ead9a20e534e7bb5fac07d62a8738ea8e5f46 Mon Sep 17 00:00:00 2001 From: goodger Date: Fri, 26 Aug 2005 03:14:19 +0000 Subject: catch unescaped colon at end of hyperlink targets git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3833 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 1 + 1 file changed, 1 insertion(+) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 175334fd2..06077cfca 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1723,6 +1723,7 @@ class Body(RSTState): %(non_whitespace_escape_before)s (?P=quote) # close quote if open quote used ) + (? Date: Fri, 26 Aug 2005 13:10:47 +0000 Subject: catch another case of malformed hyperlink target git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3834 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 06077cfca..d72a96e0c 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1723,7 +1723,7 @@ class Body(RSTState): %(non_whitespace_escape_before)s (?P=quote) # close quote if open quote used ) - (? Date: Tue, 11 Oct 2005 17:51:38 +0000 Subject: disallow targets inside substitution definitions git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3933 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 53 ++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 23 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index d72a96e0c..e9fa14d40 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1910,34 +1910,41 @@ class Body(RSTState): subname = subdefmatch.group('name') substitution_node = nodes.substitution_definition(blocktext) substitution_node.line = lineno - self.document.note_substitution_def( - substitution_node,subname, self.parent) - if block: - block[0] = block[0].strip() - new_abs_offset, blank_finish = self.nested_list_parse( - block, input_offset=offset, node=substitution_node, - initial_state='SubstitutionDef', blank_finish=blank_finish) - i = 0 - for node in substitution_node[:]: - if not (isinstance(node, nodes.Inline) or - isinstance(node, nodes.Text)): - self.parent += substitution_node[i] - del substitution_node[i] - else: - i += 1 - if len(substitution_node) == 0: + if not block: + msg = self.reporter.warning( + 'Substitution definition "%s" missing contents.' % subname, + nodes.literal_block(blocktext, blocktext), line=lineno) + return [msg], blank_finish + block[0] = block[0].strip() + substitution_node['names'].append( + nodes.whitespace_normalize_name(subname)) + new_abs_offset, blank_finish = self.nested_list_parse( + block, input_offset=offset, node=substitution_node, + initial_state='SubstitutionDef', blank_finish=blank_finish) + i = 0 + for node in substitution_node[:]: + if not (isinstance(node, nodes.Inline) or + isinstance(node, nodes.Text)): + self.parent += substitution_node[i] + del substitution_node[i] + else: + i += 1 + for node in substitution_node.traverse(nodes.Element): + if node['ids']: msg = self.reporter.warning( - 'Substitution definition "%s" empty or invalid.' - % subname, - nodes.literal_block(blocktext, blocktext), line=lineno) + 'Substitution definitions may not contain targets.', + nodes.literal_block(blocktext, blocktext), + line=lineno) return [msg], blank_finish - else: - return [substitution_node], blank_finish - else: + if len(substitution_node) == 0: msg = self.reporter.warning( - 'Substitution definition "%s" missing contents.' % subname, + 'Substitution definition "%s" empty or invalid.' + % subname, nodes.literal_block(blocktext, blocktext), line=lineno) return [msg], blank_finish + self.document.note_substitution_def( + substitution_node, subname, self.parent) + return [substitution_node], blank_finish def directive(self, match, **option_presets): """Returns a 2-tuple: list of nodes, and a "blank finish" boolean.""" -- cgit v1.2.1 From 7068fae26ee297f883adf5f884fd64b0892b2a70 Mon Sep 17 00:00:00 2001 From: wiemann Date: Tue, 11 Oct 2005 20:40:41 +0000 Subject: disallow anonymous hyperlink references and auto-numbered footnotes inside of substitution definitions git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3937 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index e9fa14d40..db19c34b6 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1930,10 +1930,11 @@ class Body(RSTState): else: i += 1 for node in substitution_node.traverse(nodes.Element): - if node['ids']: - msg = self.reporter.warning( - 'Substitution definitions may not contain targets.', - nodes.literal_block(blocktext, blocktext), + if self.disallowed_inside_substitution_definitions(node): + pformat = nodes.literal_block('', node.pformat().rstrip()) + msg = self.reporter.error( + 'Substitution definition contains illegal element:', + pformat, nodes.literal_block(blocktext, blocktext), line=lineno) return [msg], blank_finish if len(substitution_node) == 0: @@ -1946,6 +1947,14 @@ class Body(RSTState): substitution_node, subname, self.parent) return [substitution_node], blank_finish + def disallowed_inside_substitution_definitions(self, node): + if (node['ids'] or + isinstance(node, nodes.reference) and node.get('anonymous') or + isinstance(node, nodes.footnote_reference) and node.get('auto')): + return 1 + else: + return 0 + def directive(self, match, **option_presets): """Returns a 2-tuple: list of nodes, and a "blank finish" boolean.""" type_name = match.group(1) -- cgit v1.2.1 From c6e1fdb5efb32575f69e1c0c68cb6715ac896a85 Mon Sep 17 00:00:00 2001 From: wiemann Date: Tue, 11 Oct 2005 21:36:06 +0000 Subject: removed more internal state (`document.anonymous_refs` and `document.anonymous_targets`) git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3939 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 4 ---- 1 file changed, 4 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index db19c34b6..be064db8a 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -783,7 +783,6 @@ class Inliner: reference['refuri'] = uri else: reference['anonymous'] = 1 - self.document.note_anonymous_ref(reference) else: if target: reference['refuri'] = uri @@ -846,8 +845,6 @@ class Inliner: '|%s%s' % (subref_text, endstring), '') if endstring[-2:] == '__': reference_node['anonymous'] = 1 - self.document.note_anonymous_ref( - reference_node) else: reference_node['refname'] = normalize_name(subref_text) self.document.note_refname(reference_node) @@ -898,7 +895,6 @@ class Inliner: name=whitespace_normalize_name(referencename)) if anonymous: referencenode['anonymous'] = 1 - self.document.note_anonymous_ref(referencenode) else: referencenode['refname'] = refname self.document.note_refname(referencenode) -- cgit v1.2.1 From ced6843a40a4b84b50b10c1106edfe80e088fbaa Mon Sep 17 00:00:00 2001 From: wiemann Date: Sat, 19 Nov 2005 23:48:04 +0000 Subject: allow escaped colons inside of fields git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@4083 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index be064db8a..3a9c49094 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1054,7 +1054,7 @@ class Body(RSTState): patterns = { 'bullet': r'[-+*]( +|$)', 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats, - 'field_marker': r':[^: ]([^:]*[^: ])?:( +|$)', + 'field_marker': r':(?![: ])([^:\\]|\\.)*(?>>( +|$)', 'line_block': r'\|( +|$)', @@ -1367,8 +1367,8 @@ class Body(RSTState): def parse_field_marker(self, match): """Extract & return field name from a field marker match.""" - field = match.string[1:] # strip off leading ':' - field = field[:field.find(':')] # strip off trailing ':' etc. + field = match.group()[1:] # strip off leading ':' + field = field[:field.rfind(':')] # strip off trailing ':' etc. return field def parse_field_body(self, indented, offset, node): -- cgit v1.2.1 From 2ee75a675d5c74d75b587b2d28de459436ac25e6 Mon Sep 17 00:00:00 2001 From: goodger Date: Tue, 6 Dec 2005 01:06:33 +0000 Subject: Fixed bugs: list items with blank first lines; block quote attributions with indented second lines. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@4147 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 3a9c49094..b149c1888 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1114,6 +1114,7 @@ class Body(RSTState): Return a 3-tuple: (block quote lines, attribution lines, attribution offset). """ + #import pdb ; pdb.set_trace() blank = None nonblank_seen = None indent = 0 @@ -1130,8 +1131,10 @@ class Body(RSTState): indent = (len(indented[blank + 2]) - len(indented[blank + 2].lstrip())) for j in range(blank + 3, len(indented)): - if indent != (len(indented[j]) - - len(indented[j].lstrip())): # bad shape + if ( indented[j] # may be blank last line + and indent != (len(indented[j]) + - len(indented[j].lstrip()))): + # bad shape blank = None break if blank: @@ -1169,8 +1172,12 @@ class Body(RSTState): return [], next_state, [] def list_item(self, indent): - indented, line_offset, blank_finish = \ - self.state_machine.get_known_indented(indent) + if self.state_machine.line[indent:]: + indented, line_offset, blank_finish = ( + self.state_machine.get_known_indented(indent)) + else: + indented, indent, line_offset, blank_finish = ( + self.state_machine.get_first_known_indented(indent)) listitem = nodes.list_item('\n'.join(indented)) if indented: self.nested_parse(indented, input_offset=line_offset, -- cgit v1.2.1 From e571eb6f3616063e8cc1096b3451aa6f3e659858 Mon Sep 17 00:00:00 2001 From: goodger Date: Wed, 7 Dec 2005 23:46:30 +0000 Subject: added East Asian double-width character support; thanks to Frank Bennett for inspiration on ``docutils.utils.east_asian_column_width()`` git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@4152 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index b149c1888..66c4393b8 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -114,7 +114,7 @@ from docutils import ApplicationError, DataError from docutils.statemachine import StateMachineWS, StateWS from docutils.nodes import fully_normalize_name as normalize_name from docutils.nodes import whitespace_normalize_name -from docutils.utils import escape2null, unescape +from docutils.utils import escape2null, unescape, column_width from docutils.parsers.rst import directives, languages, tableparser, roles from docutils.parsers.rst.languages import en as _fallback_language_module @@ -995,6 +995,9 @@ class Body(RSTState): Generic classifier of the first line of a block. """ + double_width_pad_char = tableparser.TableParser.double_width_pad_char + """Padding character for East Asian double-width text.""" + enum = Struct() """Enumerated list parsing information.""" @@ -1592,6 +1595,8 @@ class Body(RSTState): source=source, line=lineno)) blank_finish = 0 block.disconnect() + # for East Asian chars: + block.pad_double_width(self.double_width_pad_char) width = len(block[0].strip()) for i in range(len(block)): block[i] = block[i].strip() @@ -1656,9 +1661,12 @@ class Body(RSTState): return [], messages, not extra self.state_machine.next_line(end - start) block = lines[start:end+1] + # for East Asian chars: + block.pad_double_width(self.double_width_pad_char) return block, [], end == limit or not lines[end+1].strip() def malformed_table(self, block, detail=''): + block.replace(self.double_width_pad_char, '') data = '\n'.join(block) message = 'Malformed table.' lineno = self.state_machine.abs_line_number() - len(block) + 1 @@ -2586,7 +2594,7 @@ class Text(RSTState): underline = match.string.rstrip() source = title + '\n' + underline messages = [] - if len(title) > len(underline): + if column_width(title) > len(underline): if len(underline) < 4: if self.state_machine.match_titles: msg = self.reporter.info( @@ -2825,7 +2833,7 @@ class Line(SpecializedText): return [], 'Body', [] title = title.rstrip() messages = [] - if len(title) > len(overline): + if column_width(title) > len(overline): blocktext = overline + '\n' + title + '\n' + underline if len(overline.rstrip()) < 4: self.short_overline(context, blocktext, lineno, 2) -- cgit v1.2.1 From 364683bdf6f3be7de65103d10e654afbc82221f5 Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 29 Dec 2005 01:14:21 +0000 Subject: Re-introduced ``Targetable.indirect_reference_name``, for MoinMoin/reST compatibility (removed in r3124/r3129; noticed by Matthew Gilbert). git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@4234 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 1 + 1 file changed, 1 insertion(+) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 66c4393b8..5d0fd1c23 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -1838,6 +1838,7 @@ class Body(RSTState): target_type, data = self.parse_target(block, block_text, lineno) if target_type == 'refname': target = nodes.target(block_text, '', refname=normalize_name(data)) + target.indirect_reference_name = data self.add_target(target_name, '', target, lineno) self.document.note_indirect_target(target) return target -- cgit v1.2.1 From d2bdd5c0e3514a7223cc454e89f6ab7d82b54beb Mon Sep 17 00:00:00 2001 From: wiemann Date: Mon, 9 Jan 2006 03:29:23 +0000 Subject: some cleanup: removed unnecessary tearDown method; removed unnecessary roles parameter to Inliner.__init__ git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@4258 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/parsers/rst/states.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'docutils/parsers/rst/states.py') diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 5d0fd1c23..363ef8fdd 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -443,12 +443,7 @@ class Inliner: Parse inline markup; call the `parse()` method. """ - def __init__(self, roles=None): - """ - `roles` is a mapping of canonical role name to role function or bound - method, which enables additional interpreted text roles. - """ - + def __init__(self): self.implicit_dispatch = [(self.patterns.uri, self.standalone_uri),] """List of (pattern, bound method) tuples, used by `self.implicit_inline`.""" -- cgit v1.2.1