diff options
| author | goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2002-11-08 01:32:08 +0000 |
|---|---|---|
| committer | goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2002-11-08 01:32:08 +0000 |
| commit | 5dc3617bbe58ef3bbf00aae5f5b5dd4d74afb8a4 (patch) | |
| tree | 91c4f8c4ebf8cc58404562b56e2775ea16045f58 /docutils/statemachine.py | |
| parent | 10a51bd456b22e1bedc9721a00c1854e17260d78 (diff) | |
| download | docutils-5dc3617bbe58ef3bbf00aae5f5b5dd4d74afb8a4.tar.gz | |
Added ``ViewList`` & ``StringList`` classes, to allow synchronized updating of parent lists from slices (child lists). ``extract_indented()`` becomes ``StringList.get_indented()``. Added ``StateMachine.insert_input()``.
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@915 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils/statemachine.py')
| -rw-r--r-- | docutils/statemachine.py | 510 |
1 files changed, 409 insertions, 101 deletions
diff --git a/docutils/statemachine.py b/docutils/statemachine.py index 19c357d06..076a9df2f 100644 --- a/docutils/statemachine.py +++ b/docutils/statemachine.py @@ -14,6 +14,8 @@ this module defines the following classes: - `StateWS`, a state superclass for use with `StateMachineWS` - `SearchStateMachine`, uses `re.search()` instead of `re.match()` - `SearchStateMachineWS`, uses `re.search()` instead of `re.match()` +- `ViewList`, extends standard Python lists. +- `StringList`, string-specific ViewList. Exception classes: @@ -31,7 +33,7 @@ Exception classes: Functions: - `string2lines()`: split a multi-line string into a list of one-line strings -- `extract_indented()`: return indented lines with minimum indentation removed + How To Use This Module ====================== @@ -136,7 +138,8 @@ class StateMachine: """ self.input_lines = None - """List of strings (without newlines). Filled by `self.run()`.""" + """`StringList` of input lines (without newlines). + Filled by `self.run()`.""" self.input_offset = 0 """Offset of `self.input_lines` from the beginning of the file.""" @@ -172,7 +175,8 @@ class StateMachine: state.unlink() self.states = None - def run(self, input_lines, input_offset=0, context=None): + def run(self, input_lines, input_offset=0, context=None, + input_source=None): """ Run the state machine on `input_lines`. Return results (a list). @@ -187,19 +191,24 @@ class StateMachine: Parameters: - - `input_lines`: a list of strings without newlines. + - `input_lines`: a list of strings without newlines, or `StringList`. - `input_offset`: the line offset of `input_lines` from the beginning of the file. - `context`: application-specific storage. + - `input_source`: name or path of source of `input_lines`. """ self.runtime_init() - self.input_lines = input_lines + if isinstance(input_lines, StringList): + self.input_lines = input_lines + else: + self.input_lines = StringList(input_lines, source=input_source) self.input_offset = input_offset self.line_offset = -1 self.current_state = self.initial_state if self.debug: - print >>sys.stderr, ('\nStateMachine.run: input_lines:\n| %s' % - '\n| '.join(self.input_lines)) + print >>sys.stderr, ( + '\nStateMachine.run: input_lines (line_offset=%s):\n| %s' + % (self.line_offset, '\n| '.join(self.input_lines))) transitions = None results = [] state = self.get_state() @@ -213,8 +222,12 @@ class StateMachine: try: self.next_line() if self.debug: - print >>sys.stderr, ('\nStateMachine.run: line:\n' - '| %s' % self.line) + source, offset = self.input_lines.info( + self.line_offset) + print >>sys.stderr, ( + '\nStateMachine.run: line (source=%r, ' + 'offset=%r):\n| %s' + % (source, offset, self.line)) context, next_state, result = self.check_line( context, state, transitions) except EOFError: @@ -234,7 +247,7 @@ class StateMachine: print >>sys.stderr, ( '\nStateMachine.run: TransitionCorrection to ' 'state "%s", transition %s.' - % (state.__class__.__name, transitions[0])) + % (state.__class__.__name__, transitions[0])) continue except StateCorrection, exception: self.previous_line() # back up for another try @@ -337,6 +350,14 @@ class StateMachine: """Return line number of current line (counting from 1).""" return self.line_offset + self.input_offset + 1 + def insert_input(self, input_lines, source): + self.input_lines.insert(self.line_offset + 1, '', + source='internal padding') + self.input_lines.insert(self.line_offset + 1, '', + source='internal padding') + self.input_lines.insert(self.line_offset + 2, + StringList(input_lines, source)) + def get_text_block(self, flush_left=0): """ Return a contiguous block of text. @@ -345,17 +366,15 @@ class StateMachine: indented line is encountered before the text block ends (with a blank line). """ - block = [] - for line in self.input_lines[self.line_offset:]: - if not line.strip(): - break - if flush_left and (line[0] == ' '): - self.next_line(len(block) - 1) # advance to last line of block - raise UnexpectedIndentationError(block, - self.abs_line_number() + 1) - block.append(line) - self.next_line(len(block) - 1) # advance to last line of block - return block + try: + block = self.input_lines.get_text_block(self.line_offset, + flush_left) + self.next_line(len(block) - 1) + return block + except UnexpectedIndentationError, error: + block, source, lineno = error + self.next_line(len(block) - 1) # advance to last line of block + raise def check_line(self, context, state, transitions=None): """ @@ -386,10 +405,6 @@ class StateMachine: % (state.__class__.__name__, transitions)) for name in transitions: pattern, method, next_state = state.transitions[name] - if self.debug: - print >>sys.stderr, ( - '\nStateMachine.check_line: Trying transition "%s" ' - 'in state "%s".' % (name, state.__class__.__name__)) match = self.match(pattern) if match: if self.debug: @@ -399,6 +414,10 @@ class StateMachine: % (name, state.__class__.__name__)) return method(match, context, next_state) else: + if self.debug: + print >>sys.stderr, ( + '\nStateMachine.check_line: No match in state "%s".' + % state.__class__.__name__) return state.no_match(context, transitions) def match(self, pattern): @@ -445,8 +464,8 @@ class StateMachine: def attach_observer(self, observer): """ - The `observer` parameter is a function or bound method which takes one - argument, ``self`` (this StateMachine object). + The `observer` parameter is a function or bound method which takes two + arguments, the source and offset of the current line. """ self.observers.append(observer) @@ -455,7 +474,11 @@ class StateMachine: def notify_observers(self): for observer in self.observers: - observer(self) + try: + info = self.input_lines.info(self.line_offset) + except IndexError: + info = (None, None) + observer(*info) class State: @@ -762,12 +785,12 @@ class StateMachineWS(StateMachine): - whether or not it finished with a blank line. """ offset = self.abs_line_offset() - indented, indent, blank_finish = extract_indented( - self.input_lines[self.line_offset:], until_blank, strip_indent) + indented, indent, blank_finish = self.input_lines.get_indented( + self.line_offset, until_blank, strip_indent) if indented: self.next_line(len(indented) - 1) # advance to last indented line while indented and not indented[0].strip(): - indented.pop(0) + indented.trim_start() offset += 1 return indented, indent, offset, blank_finish @@ -793,24 +816,12 @@ class StateMachineWS(StateMachine): - whether or not it finished with a blank line. """ offset = self.abs_line_offset() - indented = [self.line[indent:]] - for line in self.input_lines[self.line_offset + 1:]: - if line[:indent].strip(): - blank_finish = not indented[-1].strip() and len(indented) > 1 - break - if until_blank and line.strip(): - blank_finish = 1 - break - if strip_indent: - indented.append(line[indent:]) - else: - indented.append(line) - else: - blank_finish = 1 - if indented: - self.next_line(len(indented) - 1) # advance to last indented line + indented, indent, blank_finish = self.input_lines.get_indented( + self.line_offset, until_blank, strip_indent, + block_indent=indent) + self.next_line(len(indented) - 1) # advance to last indented line while indented and not indented[0].strip(): - indented.pop(0) + indented.trim_start() offset += 1 return indented, offset, blank_finish @@ -837,14 +848,13 @@ class StateMachineWS(StateMachine): - whether or not it finished with a blank line. """ offset = self.abs_line_offset() - indented = [self.line[indent:]] - indented[1:], indent, blank_finish = extract_indented( - self.input_lines[self.line_offset + 1:], until_blank, - strip_indent) + indented, indent, blank_finish = self.input_lines.get_indented( + self.line_offset, until_blank, strip_indent, + first_indent=indent) self.next_line(len(indented) - 1) # advance to last indented line if strip_top: while indented and not indented[0].strip(): - indented.pop(0) + indented.trim_start() offset += 1 return indented, indent, offset, blank_finish @@ -1023,6 +1033,352 @@ class SearchStateMachineWS(_SearchOverride, StateMachineWS): pass +class ViewList: + + """ + List with extended functionality: slices of ViewList objects are child + lists, linked to their parents. Changes made to a child list also affect + the parent list. A child list is effectively a "view" (in the SQL sense) + of the parent list. Changes to parent lists, however, do *not* affect + active child lists. If a parent list is changed, any active child lists + should be recreated. + + The start and end of the slice can be trimmed using the `trim_start()` and + `trim_end()` methods, without affecting the parent list. The link between + child and parent lists can be broken by calling `disconnect()` on the + child list. + + Also, ViewList objects keep track of the source & offset of each item. + This information is accessible via the `source()`, `offset()`, and + `info()` methods. + """ + + def __init__(self, initlist=None, source=None, items=None, + parent=None, parent_offset=None): + self.data = [] + """The actual list of data, flattened from various sources.""" + + self.items = [] + """A list of (source, offset) pairs, same length as `self.data`: the + source of each line and the offset of each line from the beginning of + its source.""" + + self.parent = parent + """The parent list.""" + + self.parent_offset = parent_offset + """Offset of this list from the beginning of the parent list.""" + + if isinstance(initlist, ViewList): + self.data = initlist.data[:] + self.items = initlist.items[:] + elif initlist is not None: + self.data = list(initlist) + if items: + self.items = items + else: + self.items = [(source, i) for i in range(len(initlist))] + assert len(self.data) == len(self.items), 'data mismatch' + + def __str__(self): + return str(self.data) + + def __repr__(self): + return '%s(%s, items=%s)' % (self.__class__.__name__, + self.data, self.items) + + def __lt__(self, other): return self.data < self.__cast(other) + def __le__(self, other): return self.data <= self.__cast(other) + def __eq__(self, other): return self.data == self.__cast(other) + def __ne__(self, other): return self.data != self.__cast(other) + def __gt__(self, other): return self.data > self.__cast(other) + def __ge__(self, other): return self.data >= self.__cast(other) + def __cmp__(self, other): return cmp(self.data, self.__cast(other)) + + def __cast(self, other): + if isinstance(other, ViewList): + return other.data + else: + return other + + def __contains__(self, item): return item in self.data + def __len__(self): return len(self.data) + + def __getitem__(self, i): + try: + return self.data[i] + except TypeError: + assert i.step is None, 'cannot handle slice with stride' + return self.__class__(self.data[i.start:i.stop], + items=self.items[i.start:i.stop], + parent=self, parent_offset=i.start) + + def __setitem__(self, i, item): + try: + self.data[i] = item + if self.parent: + self.parent[i + self.parent_offset] = item + except TypeError: + assert i.step is None, 'cannot handle slice with stride' + if not isinstance(item, ViewList): + raise TypeError('assigning non-ViewList to ViewList slice') + self.data[i.start:i.stop] = item.data + self.items[i.start:i.stop] = item.items + assert len(self.data) == len(self.items), 'data mismatch' + if self.parent: + self.parent[i.start + self.parent_offset + : i.stop + self.parent_offset] = item + + def __delitem__(self, i): + try: + del self.data[i] + del self.items[i] + if self.parent: + del self.parent[i + self.parent_offset] + except TypeError: + assert i.step is None, 'cannot handle slice with stride' + del self.data[i.start:i.stop] + del self.items[i.start:i.stop] + if self.parent: + del self.parent[i.start + self.parent_offset + : i.stop + self.parent_offset] + + def __add__(self, other): + if isinstance(other, ViewList): + return self.__class__(self.data + other.data, + items=(self.items + other.items)) + else: + raise TypeError('adding non-ViewList to a ViewList') + + def __radd__(self, other): + if isinstance(other, ViewList): + return self.__class__(other.data + self.data, + items=(other.items + self.items)) + else: + raise TypeError('adding ViewList to a non-ViewList') + + def __iadd__(self, other): + if isinstance(other, ViewList): + self.data += other.data + else: + raise TypeError('argument to += must be a ViewList') + return self + + def __mul__(self, n): + return self.__class__(self.data * n, items=(self.items * n)) + + __rmul__ = __mul__ + + def __imul__(self, n): + self.data *= n + self.items *= n + return self + + def extend(self, other): + if not isinstance(other, ViewList): + raise TypeError('extending a ViewList with a non-ViewList') + if self.parent: + self.parent.insert(len(self.data) + self.parent_offset, other) + self.data.extend(other.data) + self.items.extend(other.items) + + def append(self, item, source=None, offset=0): + if source is None: + self.extend(item) + else: + if self.parent: + self.parent.insert(len(self.data) + self.parent_offset, item, + source, offset) + self.data.append(item) + self.items.append((source, offset)) + + def insert(self, i, item, source=None, offset=0): + if source is None: + if not isinstance(item, ViewList): + raise TypeError('inserting non-ViewList with no source given') + self.data[i:i] = item.data + self.items[i:i] = item.items + if self.parent: + index = (len(self.data) + i) % len(self.data) + self.parent.insert(index + self.parent_offset, item) + else: + self.data.insert(i, item) + self.items.insert(i, (source, offset)) + if self.parent: + index = (len(self.data) + i) % len(self.data) + self.parent.insert(index + self.parent_offset, item, + source, offset) + + def pop(self, i=-1): + if self.parent: + index = (len(self.data) + i) % len(self.data) + self.parent.pop(index + self.parent_offset) + self.items.pop(i) + return self.data.pop(i) + + def trim_start(self, n=1): + """ + Remove items from the start of the list, without touching the parent. + """ + if n > len(self.data): + raise IndexError("Size of trim too large; can't trim %s items " + "from a list of size %s." % (n, len(self.data))) + elif n < 0: + raise IndexError('Trim size must be >= 0.') + del self.data[:n] + del self.items[:n] + if self.parent: + self.parent_offset += n + + def trim_end(self, n=1): + """ + Remove items from the end of the list, without touching the parent. + """ + if n > len(self.data): + raise IndexError("Size of trim too large; can't trim %s items " + "from a list of size %s." % (n, len(self.data))) + elif n < 0: + raise IndexError('Trim size must be >= 0.') + del self.data[-n:] + del self.items[-n:] + + def remove(self, item): + index = self.index(item) + del self[index] + + def count(self, item): return self.data.count(item) + def index(self, item): return self.data.index(item) + + def reverse(self): + self.data.reverse() + self.items.reverse() + self.parent = None + + def sort(self, *args): + tmp = zip(self.data, self.items) + tmp.sort(*args) + self.data = [entry[0] for entry in tmp] + self.items = [entry[1] for entry in tmp] + self.parent = None + + def info(self, i): + """Return source & offset for index `i`.""" + try: + return self.items[i] + except IndexError: + if i == len(self.data): # Just past the end + return self.items[i - 1][0], None + else: + raise + + def source(self, i): + """Return source for index `i`.""" + return self.info(i)[0] + + def offset(self, i): + """Return offset for index `i`.""" + return self.info(i)[1] + + def disconnect(self): + """Break link between this list and parent list.""" + self.parent = None + + +class StringList(ViewList): + + """A `ViewList` with string-specific methods.""" + + def strip_indent(self, length, start=0, end=sys.maxint): + """ + Strip `length` characters off the beginning of each item, in-place, + from index `start` to `end`. No whitespace-checking is done on the + stripped text. Does not affect slice parent. + """ + self.data[start:end] = [line[length:] + for line in self.data[start:end]] + + def get_text_block(self, start, flush_left=0): + """ + Return a contiguous block of text. + + If `flush_left` is true, raise `UnexpectedIndentationError` if an + indented line is encountered before the text block ends (with a blank + line). + """ + end = start + last = len(self.data) + while end < last: + line = self.data[end] + if not line.strip(): + break + if flush_left and (line[0] == ' '): + source, offset = self.info(end) + raise UnexpectedIndentationError(self[start:end], source, + offset + 1) + end += 1 + return self[start:end] + + def get_indented(self, start=0, until_blank=0, strip_indent=1, + block_indent=None, first_indent=None): + """ + Extract and return a StringList of indented lines of text. + + Collect all lines with indentation, determine the minimum indentation, + remove the minimum indentation from all indented lines (unless + `strip_indent` is false), and return them. All lines up to but not + including the first unindented line will be returned. + + :Parameters: + - `start`: The index of the first line to examine. + - `until_blank`: Stop collecting at the first blank line if true. + - `strip_indent`: Strip common leading indent if true (default). + - `block_indent`: The indent of the entire block, if known. + - `first_indent`: The indent of the first line, if known. + + :Return: + - a StringList of indented lines with mininum indent removed; + - the amount of the indent; + - a boolean: did the indented block finish with a blank line or EOF? + """ + indent = block_indent # start with None if unknown + end = start + if block_indent is not None and first_indent is None: + first_indent = block_indent + if first_indent is not None: + end += 1 + last = len(self.data) + while end < last: + line = self.data[end] + if line and (line[0] != ' ' + or (block_indent is not None + and line[:block_indent].strip())): + # Line not indented or insufficiently indented. + # Block finished properly iff the last indented line blank: + blank_finish = ((end > start) + and not self.data[end - 1].strip()) + break + stripped = line.lstrip() + if not stripped: # blank line + if until_blank: + blank_finish = 1 + break + elif block_indent is None: + line_indent = len(line) - len(stripped) + if indent is None: + indent = line_indent + else: + indent = min(indent, line_indent) + end += 1 + else: + blank_finish = 1 # block ends at end of lines + block = self[start:end] + if first_indent is not None and block: + block.data[0] = block.data[0][first_indent:] + if indent and strip_indent: + block.strip_indent(indent, start=(first_indent is not None)) + return block, indent or 0, blank_finish + + class StateMachineError(Exception): pass class UnknownStateError(StateMachineError): pass class DuplicateStateError(StateMachineError): pass @@ -1070,54 +1426,6 @@ def string2lines(astring, tab_width=8, convert_whitespace=0, astring = whitespace.sub(' ', astring) return [s.expandtabs(tab_width) for s in astring.splitlines()] -def extract_indented(lines, until_blank=0, strip_indent=1): - """ - Extract and return a list of indented lines of text. - - Collect all lines with indentation, determine the minimum indentation, - remove the minimum indentation from all indented lines (unless - `strip_indent` is false), and return them. All lines up to but not - including the first unindented line will be returned. - - :Parameters: - - `lines`: a list of one-line strings without newlines. - - `until_blank`: Stop collecting at the first blank line if true (1). - - `strip_indent`: Strip common leading indent if true (1, default). - - :Return: - - a list of indented lines with mininum indent removed; - - the amount of the indent; - - whether or not the block finished with a blank line or at the end of - `lines`. - """ - source = [] - indent = None - for line in lines: - if line and line[0] != ' ': # line not indented - # block finished properly iff the last indented line was blank - blank_finish = len(source) and not source[-1].strip() - break - stripped = line.lstrip() - if until_blank and not stripped: # blank line - blank_finish = 1 - break - source.append(line) - if not stripped: # blank line - continue - lineindent = len(line) - len(stripped) - if indent is None: - indent = lineindent - else: - indent = min(indent, lineindent) - else: - blank_finish = 1 # block ends at end of lines - if indent: - if strip_indent: - source = [s[indent:] for s in source] - return source, indent, blank_finish - else: - return [], 0, blank_finish - def _exception_data(): """ Return exception information: |
