summaryrefslogtreecommitdiff
path: root/docutils/statemachine.py
diff options
context:
space:
mode:
authorgoodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2002-11-08 01:32:08 +0000
committergoodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2002-11-08 01:32:08 +0000
commit5dc3617bbe58ef3bbf00aae5f5b5dd4d74afb8a4 (patch)
tree91c4f8c4ebf8cc58404562b56e2775ea16045f58 /docutils/statemachine.py
parent10a51bd456b22e1bedc9721a00c1854e17260d78 (diff)
downloaddocutils-5dc3617bbe58ef3bbf00aae5f5b5dd4d74afb8a4.tar.gz
Added ``ViewList`` & ``StringList`` classes, to allow synchronized updating of parent lists from slices (child lists). ``extract_indented()`` becomes ``StringList.get_indented()``. Added ``StateMachine.insert_input()``.
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@915 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils/statemachine.py')
-rw-r--r--docutils/statemachine.py510
1 files changed, 409 insertions, 101 deletions
diff --git a/docutils/statemachine.py b/docutils/statemachine.py
index 19c357d06..076a9df2f 100644
--- a/docutils/statemachine.py
+++ b/docutils/statemachine.py
@@ -14,6 +14,8 @@ this module defines the following classes:
- `StateWS`, a state superclass for use with `StateMachineWS`
- `SearchStateMachine`, uses `re.search()` instead of `re.match()`
- `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
+- `ViewList`, extends standard Python lists.
+- `StringList`, string-specific ViewList.
Exception classes:
@@ -31,7 +33,7 @@ Exception classes:
Functions:
- `string2lines()`: split a multi-line string into a list of one-line strings
-- `extract_indented()`: return indented lines with minimum indentation removed
+
How To Use This Module
======================
@@ -136,7 +138,8 @@ class StateMachine:
"""
self.input_lines = None
- """List of strings (without newlines). Filled by `self.run()`."""
+ """`StringList` of input lines (without newlines).
+ Filled by `self.run()`."""
self.input_offset = 0
"""Offset of `self.input_lines` from the beginning of the file."""
@@ -172,7 +175,8 @@ class StateMachine:
state.unlink()
self.states = None
- def run(self, input_lines, input_offset=0, context=None):
+ def run(self, input_lines, input_offset=0, context=None,
+ input_source=None):
"""
Run the state machine on `input_lines`. Return results (a list).
@@ -187,19 +191,24 @@ class StateMachine:
Parameters:
- - `input_lines`: a list of strings without newlines.
+ - `input_lines`: a list of strings without newlines, or `StringList`.
- `input_offset`: the line offset of `input_lines` from the beginning
of the file.
- `context`: application-specific storage.
+ - `input_source`: name or path of source of `input_lines`.
"""
self.runtime_init()
- self.input_lines = input_lines
+ if isinstance(input_lines, StringList):
+ self.input_lines = input_lines
+ else:
+ self.input_lines = StringList(input_lines, source=input_source)
self.input_offset = input_offset
self.line_offset = -1
self.current_state = self.initial_state
if self.debug:
- print >>sys.stderr, ('\nStateMachine.run: input_lines:\n| %s' %
- '\n| '.join(self.input_lines))
+ print >>sys.stderr, (
+ '\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
+ % (self.line_offset, '\n| '.join(self.input_lines)))
transitions = None
results = []
state = self.get_state()
@@ -213,8 +222,12 @@ class StateMachine:
try:
self.next_line()
if self.debug:
- print >>sys.stderr, ('\nStateMachine.run: line:\n'
- '| %s' % self.line)
+ source, offset = self.input_lines.info(
+ self.line_offset)
+ print >>sys.stderr, (
+ '\nStateMachine.run: line (source=%r, '
+ 'offset=%r):\n| %s'
+ % (source, offset, self.line))
context, next_state, result = self.check_line(
context, state, transitions)
except EOFError:
@@ -234,7 +247,7 @@ class StateMachine:
print >>sys.stderr, (
'\nStateMachine.run: TransitionCorrection to '
'state "%s", transition %s.'
- % (state.__class__.__name, transitions[0]))
+ % (state.__class__.__name__, transitions[0]))
continue
except StateCorrection, exception:
self.previous_line() # back up for another try
@@ -337,6 +350,14 @@ class StateMachine:
"""Return line number of current line (counting from 1)."""
return self.line_offset + self.input_offset + 1
+ def insert_input(self, input_lines, source):
+ self.input_lines.insert(self.line_offset + 1, '',
+ source='internal padding')
+ self.input_lines.insert(self.line_offset + 1, '',
+ source='internal padding')
+ self.input_lines.insert(self.line_offset + 2,
+ StringList(input_lines, source))
+
def get_text_block(self, flush_left=0):
"""
Return a contiguous block of text.
@@ -345,17 +366,15 @@ class StateMachine:
indented line is encountered before the text block ends (with a blank
line).
"""
- block = []
- for line in self.input_lines[self.line_offset:]:
- if not line.strip():
- break
- if flush_left and (line[0] == ' '):
- self.next_line(len(block) - 1) # advance to last line of block
- raise UnexpectedIndentationError(block,
- self.abs_line_number() + 1)
- block.append(line)
- self.next_line(len(block) - 1) # advance to last line of block
- return block
+ try:
+ block = self.input_lines.get_text_block(self.line_offset,
+ flush_left)
+ self.next_line(len(block) - 1)
+ return block
+ except UnexpectedIndentationError, error:
+ block, source, lineno = error
+ self.next_line(len(block) - 1) # advance to last line of block
+ raise
def check_line(self, context, state, transitions=None):
"""
@@ -386,10 +405,6 @@ class StateMachine:
% (state.__class__.__name__, transitions))
for name in transitions:
pattern, method, next_state = state.transitions[name]
- if self.debug:
- print >>sys.stderr, (
- '\nStateMachine.check_line: Trying transition "%s" '
- 'in state "%s".' % (name, state.__class__.__name__))
match = self.match(pattern)
if match:
if self.debug:
@@ -399,6 +414,10 @@ class StateMachine:
% (name, state.__class__.__name__))
return method(match, context, next_state)
else:
+ if self.debug:
+ print >>sys.stderr, (
+ '\nStateMachine.check_line: No match in state "%s".'
+ % state.__class__.__name__)
return state.no_match(context, transitions)
def match(self, pattern):
@@ -445,8 +464,8 @@ class StateMachine:
def attach_observer(self, observer):
"""
- The `observer` parameter is a function or bound method which takes one
- argument, ``self`` (this StateMachine object).
+ The `observer` parameter is a function or bound method which takes two
+ arguments, the source and offset of the current line.
"""
self.observers.append(observer)
@@ -455,7 +474,11 @@ class StateMachine:
def notify_observers(self):
for observer in self.observers:
- observer(self)
+ try:
+ info = self.input_lines.info(self.line_offset)
+ except IndexError:
+ info = (None, None)
+ observer(*info)
class State:
@@ -762,12 +785,12 @@ class StateMachineWS(StateMachine):
- whether or not it finished with a blank line.
"""
offset = self.abs_line_offset()
- indented, indent, blank_finish = extract_indented(
- self.input_lines[self.line_offset:], until_blank, strip_indent)
+ indented, indent, blank_finish = self.input_lines.get_indented(
+ self.line_offset, until_blank, strip_indent)
if indented:
self.next_line(len(indented) - 1) # advance to last indented line
while indented and not indented[0].strip():
- indented.pop(0)
+ indented.trim_start()
offset += 1
return indented, indent, offset, blank_finish
@@ -793,24 +816,12 @@ class StateMachineWS(StateMachine):
- whether or not it finished with a blank line.
"""
offset = self.abs_line_offset()
- indented = [self.line[indent:]]
- for line in self.input_lines[self.line_offset + 1:]:
- if line[:indent].strip():
- blank_finish = not indented[-1].strip() and len(indented) > 1
- break
- if until_blank and line.strip():
- blank_finish = 1
- break
- if strip_indent:
- indented.append(line[indent:])
- else:
- indented.append(line)
- else:
- blank_finish = 1
- if indented:
- self.next_line(len(indented) - 1) # advance to last indented line
+ indented, indent, blank_finish = self.input_lines.get_indented(
+ self.line_offset, until_blank, strip_indent,
+ block_indent=indent)
+ self.next_line(len(indented) - 1) # advance to last indented line
while indented and not indented[0].strip():
- indented.pop(0)
+ indented.trim_start()
offset += 1
return indented, offset, blank_finish
@@ -837,14 +848,13 @@ class StateMachineWS(StateMachine):
- whether or not it finished with a blank line.
"""
offset = self.abs_line_offset()
- indented = [self.line[indent:]]
- indented[1:], indent, blank_finish = extract_indented(
- self.input_lines[self.line_offset + 1:], until_blank,
- strip_indent)
+ indented, indent, blank_finish = self.input_lines.get_indented(
+ self.line_offset, until_blank, strip_indent,
+ first_indent=indent)
self.next_line(len(indented) - 1) # advance to last indented line
if strip_top:
while indented and not indented[0].strip():
- indented.pop(0)
+ indented.trim_start()
offset += 1
return indented, indent, offset, blank_finish
@@ -1023,6 +1033,352 @@ class SearchStateMachineWS(_SearchOverride, StateMachineWS):
pass
+class ViewList:
+
+ """
+ List with extended functionality: slices of ViewList objects are child
+ lists, linked to their parents. Changes made to a child list also affect
+ the parent list. A child list is effectively a "view" (in the SQL sense)
+ of the parent list. Changes to parent lists, however, do *not* affect
+ active child lists. If a parent list is changed, any active child lists
+ should be recreated.
+
+ The start and end of the slice can be trimmed using the `trim_start()` and
+ `trim_end()` methods, without affecting the parent list. The link between
+ child and parent lists can be broken by calling `disconnect()` on the
+ child list.
+
+ Also, ViewList objects keep track of the source & offset of each item.
+ This information is accessible via the `source()`, `offset()`, and
+ `info()` methods.
+ """
+
+ def __init__(self, initlist=None, source=None, items=None,
+ parent=None, parent_offset=None):
+ self.data = []
+ """The actual list of data, flattened from various sources."""
+
+ self.items = []
+ """A list of (source, offset) pairs, same length as `self.data`: the
+ source of each line and the offset of each line from the beginning of
+ its source."""
+
+ self.parent = parent
+ """The parent list."""
+
+ self.parent_offset = parent_offset
+ """Offset of this list from the beginning of the parent list."""
+
+ if isinstance(initlist, ViewList):
+ self.data = initlist.data[:]
+ self.items = initlist.items[:]
+ elif initlist is not None:
+ self.data = list(initlist)
+ if items:
+ self.items = items
+ else:
+ self.items = [(source, i) for i in range(len(initlist))]
+ assert len(self.data) == len(self.items), 'data mismatch'
+
+ def __str__(self):
+ return str(self.data)
+
+ def __repr__(self):
+ return '%s(%s, items=%s)' % (self.__class__.__name__,
+ self.data, self.items)
+
+ def __lt__(self, other): return self.data < self.__cast(other)
+ def __le__(self, other): return self.data <= self.__cast(other)
+ def __eq__(self, other): return self.data == self.__cast(other)
+ def __ne__(self, other): return self.data != self.__cast(other)
+ def __gt__(self, other): return self.data > self.__cast(other)
+ def __ge__(self, other): return self.data >= self.__cast(other)
+ def __cmp__(self, other): return cmp(self.data, self.__cast(other))
+
+ def __cast(self, other):
+ if isinstance(other, ViewList):
+ return other.data
+ else:
+ return other
+
+ def __contains__(self, item): return item in self.data
+ def __len__(self): return len(self.data)
+
+ def __getitem__(self, i):
+ try:
+ return self.data[i]
+ except TypeError:
+ assert i.step is None, 'cannot handle slice with stride'
+ return self.__class__(self.data[i.start:i.stop],
+ items=self.items[i.start:i.stop],
+ parent=self, parent_offset=i.start)
+
+ def __setitem__(self, i, item):
+ try:
+ self.data[i] = item
+ if self.parent:
+ self.parent[i + self.parent_offset] = item
+ except TypeError:
+ assert i.step is None, 'cannot handle slice with stride'
+ if not isinstance(item, ViewList):
+ raise TypeError('assigning non-ViewList to ViewList slice')
+ self.data[i.start:i.stop] = item.data
+ self.items[i.start:i.stop] = item.items
+ assert len(self.data) == len(self.items), 'data mismatch'
+ if self.parent:
+ self.parent[i.start + self.parent_offset
+ : i.stop + self.parent_offset] = item
+
+ def __delitem__(self, i):
+ try:
+ del self.data[i]
+ del self.items[i]
+ if self.parent:
+ del self.parent[i + self.parent_offset]
+ except TypeError:
+ assert i.step is None, 'cannot handle slice with stride'
+ del self.data[i.start:i.stop]
+ del self.items[i.start:i.stop]
+ if self.parent:
+ del self.parent[i.start + self.parent_offset
+ : i.stop + self.parent_offset]
+
+ def __add__(self, other):
+ if isinstance(other, ViewList):
+ return self.__class__(self.data + other.data,
+ items=(self.items + other.items))
+ else:
+ raise TypeError('adding non-ViewList to a ViewList')
+
+ def __radd__(self, other):
+ if isinstance(other, ViewList):
+ return self.__class__(other.data + self.data,
+ items=(other.items + self.items))
+ else:
+ raise TypeError('adding ViewList to a non-ViewList')
+
+ def __iadd__(self, other):
+ if isinstance(other, ViewList):
+ self.data += other.data
+ else:
+ raise TypeError('argument to += must be a ViewList')
+ return self
+
+ def __mul__(self, n):
+ return self.__class__(self.data * n, items=(self.items * n))
+
+ __rmul__ = __mul__
+
+ def __imul__(self, n):
+ self.data *= n
+ self.items *= n
+ return self
+
+ def extend(self, other):
+ if not isinstance(other, ViewList):
+ raise TypeError('extending a ViewList with a non-ViewList')
+ if self.parent:
+ self.parent.insert(len(self.data) + self.parent_offset, other)
+ self.data.extend(other.data)
+ self.items.extend(other.items)
+
+ def append(self, item, source=None, offset=0):
+ if source is None:
+ self.extend(item)
+ else:
+ if self.parent:
+ self.parent.insert(len(self.data) + self.parent_offset, item,
+ source, offset)
+ self.data.append(item)
+ self.items.append((source, offset))
+
+ def insert(self, i, item, source=None, offset=0):
+ if source is None:
+ if not isinstance(item, ViewList):
+ raise TypeError('inserting non-ViewList with no source given')
+ self.data[i:i] = item.data
+ self.items[i:i] = item.items
+ if self.parent:
+ index = (len(self.data) + i) % len(self.data)
+ self.parent.insert(index + self.parent_offset, item)
+ else:
+ self.data.insert(i, item)
+ self.items.insert(i, (source, offset))
+ if self.parent:
+ index = (len(self.data) + i) % len(self.data)
+ self.parent.insert(index + self.parent_offset, item,
+ source, offset)
+
+ def pop(self, i=-1):
+ if self.parent:
+ index = (len(self.data) + i) % len(self.data)
+ self.parent.pop(index + self.parent_offset)
+ self.items.pop(i)
+ return self.data.pop(i)
+
+ def trim_start(self, n=1):
+ """
+ Remove items from the start of the list, without touching the parent.
+ """
+ if n > len(self.data):
+ raise IndexError("Size of trim too large; can't trim %s items "
+ "from a list of size %s." % (n, len(self.data)))
+ elif n < 0:
+ raise IndexError('Trim size must be >= 0.')
+ del self.data[:n]
+ del self.items[:n]
+ if self.parent:
+ self.parent_offset += n
+
+ def trim_end(self, n=1):
+ """
+ Remove items from the end of the list, without touching the parent.
+ """
+ if n > len(self.data):
+ raise IndexError("Size of trim too large; can't trim %s items "
+ "from a list of size %s." % (n, len(self.data)))
+ elif n < 0:
+ raise IndexError('Trim size must be >= 0.')
+ del self.data[-n:]
+ del self.items[-n:]
+
+ def remove(self, item):
+ index = self.index(item)
+ del self[index]
+
+ def count(self, item): return self.data.count(item)
+ def index(self, item): return self.data.index(item)
+
+ def reverse(self):
+ self.data.reverse()
+ self.items.reverse()
+ self.parent = None
+
+ def sort(self, *args):
+ tmp = zip(self.data, self.items)
+ tmp.sort(*args)
+ self.data = [entry[0] for entry in tmp]
+ self.items = [entry[1] for entry in tmp]
+ self.parent = None
+
+ def info(self, i):
+ """Return source & offset for index `i`."""
+ try:
+ return self.items[i]
+ except IndexError:
+ if i == len(self.data): # Just past the end
+ return self.items[i - 1][0], None
+ else:
+ raise
+
+ def source(self, i):
+ """Return source for index `i`."""
+ return self.info(i)[0]
+
+ def offset(self, i):
+ """Return offset for index `i`."""
+ return self.info(i)[1]
+
+ def disconnect(self):
+ """Break link between this list and parent list."""
+ self.parent = None
+
+
+class StringList(ViewList):
+
+ """A `ViewList` with string-specific methods."""
+
+ def strip_indent(self, length, start=0, end=sys.maxint):
+ """
+ Strip `length` characters off the beginning of each item, in-place,
+ from index `start` to `end`. No whitespace-checking is done on the
+ stripped text. Does not affect slice parent.
+ """
+ self.data[start:end] = [line[length:]
+ for line in self.data[start:end]]
+
+ def get_text_block(self, start, flush_left=0):
+ """
+ Return a contiguous block of text.
+
+ If `flush_left` is true, raise `UnexpectedIndentationError` if an
+ indented line is encountered before the text block ends (with a blank
+ line).
+ """
+ end = start
+ last = len(self.data)
+ while end < last:
+ line = self.data[end]
+ if not line.strip():
+ break
+ if flush_left and (line[0] == ' '):
+ source, offset = self.info(end)
+ raise UnexpectedIndentationError(self[start:end], source,
+ offset + 1)
+ end += 1
+ return self[start:end]
+
+ def get_indented(self, start=0, until_blank=0, strip_indent=1,
+ block_indent=None, first_indent=None):
+ """
+ Extract and return a StringList of indented lines of text.
+
+ Collect all lines with indentation, determine the minimum indentation,
+ remove the minimum indentation from all indented lines (unless
+ `strip_indent` is false), and return them. All lines up to but not
+ including the first unindented line will be returned.
+
+ :Parameters:
+ - `start`: The index of the first line to examine.
+ - `until_blank`: Stop collecting at the first blank line if true.
+ - `strip_indent`: Strip common leading indent if true (default).
+ - `block_indent`: The indent of the entire block, if known.
+ - `first_indent`: The indent of the first line, if known.
+
+ :Return:
+ - a StringList of indented lines with mininum indent removed;
+ - the amount of the indent;
+ - a boolean: did the indented block finish with a blank line or EOF?
+ """
+ indent = block_indent # start with None if unknown
+ end = start
+ if block_indent is not None and first_indent is None:
+ first_indent = block_indent
+ if first_indent is not None:
+ end += 1
+ last = len(self.data)
+ while end < last:
+ line = self.data[end]
+ if line and (line[0] != ' '
+ or (block_indent is not None
+ and line[:block_indent].strip())):
+ # Line not indented or insufficiently indented.
+ # Block finished properly iff the last indented line blank:
+ blank_finish = ((end > start)
+ and not self.data[end - 1].strip())
+ break
+ stripped = line.lstrip()
+ if not stripped: # blank line
+ if until_blank:
+ blank_finish = 1
+ break
+ elif block_indent is None:
+ line_indent = len(line) - len(stripped)
+ if indent is None:
+ indent = line_indent
+ else:
+ indent = min(indent, line_indent)
+ end += 1
+ else:
+ blank_finish = 1 # block ends at end of lines
+ block = self[start:end]
+ if first_indent is not None and block:
+ block.data[0] = block.data[0][first_indent:]
+ if indent and strip_indent:
+ block.strip_indent(indent, start=(first_indent is not None))
+ return block, indent or 0, blank_finish
+
+
class StateMachineError(Exception): pass
class UnknownStateError(StateMachineError): pass
class DuplicateStateError(StateMachineError): pass
@@ -1070,54 +1426,6 @@ def string2lines(astring, tab_width=8, convert_whitespace=0,
astring = whitespace.sub(' ', astring)
return [s.expandtabs(tab_width) for s in astring.splitlines()]
-def extract_indented(lines, until_blank=0, strip_indent=1):
- """
- Extract and return a list of indented lines of text.
-
- Collect all lines with indentation, determine the minimum indentation,
- remove the minimum indentation from all indented lines (unless
- `strip_indent` is false), and return them. All lines up to but not
- including the first unindented line will be returned.
-
- :Parameters:
- - `lines`: a list of one-line strings without newlines.
- - `until_blank`: Stop collecting at the first blank line if true (1).
- - `strip_indent`: Strip common leading indent if true (1, default).
-
- :Return:
- - a list of indented lines with mininum indent removed;
- - the amount of the indent;
- - whether or not the block finished with a blank line or at the end of
- `lines`.
- """
- source = []
- indent = None
- for line in lines:
- if line and line[0] != ' ': # line not indented
- # block finished properly iff the last indented line was blank
- blank_finish = len(source) and not source[-1].strip()
- break
- stripped = line.lstrip()
- if until_blank and not stripped: # blank line
- blank_finish = 1
- break
- source.append(line)
- if not stripped: # blank line
- continue
- lineindent = len(line) - len(stripped)
- if indent is None:
- indent = lineindent
- else:
- indent = min(indent, lineindent)
- else:
- blank_finish = 1 # block ends at end of lines
- if indent:
- if strip_indent:
- source = [s[indent:] for s in source]
- return source, indent, blank_finish
- else:
- return [], 0, blank_finish
-
def _exception_data():
"""
Return exception information: