summaryrefslogtreecommitdiff
path: root/docutils/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'docutils/utils.py')
-rw-r--r--docutils/utils.py373
1 files changed, 373 insertions, 0 deletions
diff --git a/docutils/utils.py b/docutils/utils.py
new file mode 100644
index 000000000..a92c8fb97
--- /dev/null
+++ b/docutils/utils.py
@@ -0,0 +1,373 @@
+#! /usr/bin/env python
+
+"""
+:Author: David Goodger
+:Contact: goodger@users.sourceforge.net
+:Revision: $Revision$
+:Date: $Date$
+:Copyright: This module has been placed in the public domain.
+
+Miscellaneous utilities for the documentation utilities.
+"""
+
+import sys, re
+import nodes
+
+
+class SystemMessage(Exception):
+
+ def __init__(self, system_message):
+ Exception.__init__(self, system_message.astext())
+
+
+class Reporter:
+
+ """
+ Info/warning/error reporter and ``system_message`` element generator.
+
+ Five levels of system messages are defined, along with corresponding
+ methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
+
+ There is typically one Reporter object per process. A Reporter object is
+ instantiated with thresholds for generating warnings and errors (raising
+ exceptions), a switch to turn debug output on or off, and an I/O stream
+ for warnings. These are stored in the default reporting category, ''
+ (zero-length string).
+
+ Multiple reporting categories [#]_ may be set, each with its own warning
+ and error thresholds, debugging switch, and warning stream (collectively a
+ `ConditionSet`). Categories are hierarchically-named strings that look
+ like attribute references: 'spam', 'spam.eggs', 'neeeow.wum.ping'. The
+ 'spam' category is the ancestor of 'spam.bacon.eggs'. Unset categories
+ inherit stored conditions from their closest ancestor category that has
+ been set.
+
+ When a system message is generated, the stored conditions from its
+ category (or ancestor if unset) are retrieved. The system message level is
+ compared to the thresholds stored in the category, and a warning or error
+ is generated as appropriate. Debug messages are produced iff the stored
+ debug switch is on. Message output is sent to the stored warning stream.
+
+ The default category is '' (empty string). By convention, Writers should
+ retrieve reporting conditions from the 'writer' category (which, unless
+ explicitly set, defaults to the conditions of the default category).
+
+ .. [#] The concept of "categories" was inspired by the log4j project:
+ http://jakarta.apache.org/log4j/.
+ """
+
+ levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
+ """List of names for system message levels, indexed by level."""
+
+ def __init__(self, warninglevel, errorlevel, stream=None, debug=0):
+ """
+ Initialize the `ConditionSet` forthe `Reporter`'s default category.
+
+ :Parameters:
+
+ - `warninglevel`: The level at or above which warning output will
+ be sent to `stream`.
+ - `errorlevel`: The level at or above which `SystemMessage`
+ exceptions will be raised.
+ - `debug`: Show debug (level=0) system messages?
+ - `stream`: Where warning output is sent (`None` implies
+ `sys.stderr`).
+ """
+
+ if stream is None:
+ stream = sys.stderr
+
+ self.categories = {'': ConditionSet(debug, warninglevel, errorlevel,
+ stream)}
+ """Mapping of category names to conditions. Default category is ''."""
+
+ def setconditions(self, category, warninglevel, errorlevel,
+ stream=None, debug=0):
+ if stream is None:
+ stream = sys.stderr
+ self.categories[category] = ConditionSet(debug, warninglevel,
+ errorlevel, stream)
+
+ def unsetconditions(self, category):
+ if category and self.categories.has_key(category):
+ del self.categories[category]
+
+ __delitem__ = unsetconditions
+
+ def getconditions(self, category):
+ while not self.categories.has_key(category):
+ category = category[:category.rfind('.') + 1][:-1]
+ return self.categories[category]
+
+ __getitem__ = getconditions
+
+ def system_message(self, level, comment=None, category='',
+ *children, **attributes):
+ """
+ Return a system_message object.
+
+ Raise an exception or generate a warning if appropriate.
+ """
+ msg = nodes.system_message(comment, level=level,
+ type=self.levels[level],
+ *children, **attributes)
+ debug, warninglevel, errorlevel, stream = self[category].astuple()
+ if level >= warninglevel or debug and level == 0:
+ if category:
+ print >>stream, 'Reporter "%s":' % category, msg.astext()
+ else:
+ print >>stream, 'Reporter:', msg.astext()
+ if level >= errorlevel:
+ raise SystemMessage(msg)
+ return msg
+
+ def debug(self, comment=None, category='', *children, **attributes):
+ """
+ Level-0, "DEBUG": an internal reporting issue. Typically, there is no
+ effect on the processing. Level-0 system messages are handled
+ separately from the others.
+ """
+ return self.system_message(
+ 0, comment, category, *children, **attributes)
+
+ def info(self, comment=None, category='', *children, **attributes):
+ """
+ Level-1, "INFO": a minor issue that can be ignored. Typically there is
+ no effect on processing, and level-1 system messages are not reported.
+ """
+ return self.system_message(
+ 1, comment, category, *children, **attributes)
+
+ def warning(self, comment=None, category='', *children, **attributes):
+ """
+ Level-2, "WARNING": an issue that should be addressed. If ignored,
+ there may be unpredictable problems with the output.
+ """
+ return self.system_message(
+ 2, comment, category, *children, **attributes)
+
+ def error(self, comment=None, category='', *children, **attributes):
+ """
+ Level-3, "ERROR": an error that should be addressed. If ignored, the
+ output will contain errors.
+ """
+ return self.system_message(
+ 3, comment, category, *children, **attributes)
+
+ def severe(self, comment=None, category='', *children, **attributes):
+ """
+ Level-4, "SEVERE": a severe error that must be addressed. If ignored,
+ the output will contain severe errors. Typically level-4 system
+ messages are turned into exceptions which halt processing.
+ """
+ return self.system_message(
+ 4, comment, category, *children, **attributes)
+
+
+class ConditionSet:
+
+ """
+ A set of thresholds, switches, and streams corresponding to one `Reporter`
+ category.
+ """
+
+ def __init__(self, debug, warninglevel, errorlevel, stream):
+ self.debug = debug
+ self.warninglevel = warninglevel
+ self.errorlevel = errorlevel
+ self.stream = stream
+
+ def astuple(self):
+ return (self.debug, self.warninglevel, self.errorlevel,
+ self.stream)
+
+
+class ExtensionAttributeError(Exception): pass
+class BadAttributeError(ExtensionAttributeError): pass
+class BadAttributeDataError(ExtensionAttributeError): pass
+class DuplicateAttributeError(ExtensionAttributeError): pass
+
+
+def extract_extension_attributes(field_list, attribute_spec):
+ """
+ Return a dictionary mapping extension attribute names to converted values.
+
+ :Parameters:
+ - `field_list`: A flat field list without field arguments, where each
+ field body consists of a single paragraph only.
+ - `attribute_spec`: Dictionary mapping known attribute names to a
+ conversion function such as `int` or `float`.
+
+ :Exceptions:
+ - `KeyError` for unknown attribute names.
+ - `ValueError` for invalid attribute values (raised by the conversion
+ function).
+ - `DuplicateAttributeError` for duplicate attributes.
+ - `BadAttributeError` for invalid fields.
+ - `BadAttributeDataError` for invalid attribute data (missing name,
+ missing data, bad quotes, etc.).
+ """
+ attlist = extract_attributes(field_list)
+ attdict = assemble_attribute_dict(attlist, attribute_spec)
+ return attdict
+
+def extract_attributes(field_list):
+ """
+ Return a list of attribute (name, value) pairs from field names & bodies.
+
+ :Parameter:
+ `field_list`: A flat field list without field arguments, where each
+ field body consists of a single paragraph only.
+
+ :Exceptions:
+ - `BadAttributeError` for invalid fields.
+ - `BadAttributeDataError` for invalid attribute data (missing name,
+ missing data, bad quotes, etc.).
+ """
+ attlist = []
+ for field in field_list:
+ if len(field) != 2:
+ raise BadAttributeError(
+ 'extension attribute field may not contain field arguments')
+ name = field[0].astext().lower()
+ body = field[1]
+ if len(body) == 0:
+ data = None
+ elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
+ or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
+ raise BadAttributeDataError(
+ 'extension attribute field body may contain\n'
+ 'a single paragraph only (attribute "%s")' % name)
+ else:
+ data = body[0][0].astext()
+ attlist.append((name, data))
+ return attlist
+
+def assemble_attribute_dict(attlist, attspec):
+ """
+ Return a mapping of attribute names to values.
+
+ :Parameters:
+ - `attlist`: A list of (name, value) pairs (the output of
+ `extract_attributes()`).
+ - `attspec`: Dictionary mapping known attribute names to a
+ conversion function such as `int` or `float`.
+
+ :Exceptions:
+ - `KeyError` for unknown attribute names.
+ - `DuplicateAttributeError` for duplicate attributes.
+ - `ValueError` for invalid attribute values (raised by conversion
+ function).
+ """
+ attributes = {}
+ for name, value in attlist:
+ convertor = attspec[name] # raises KeyError if unknown
+ if attributes.has_key(name):
+ raise DuplicateAttributeError('duplicate attribute "%s"' % name)
+ try:
+ attributes[name] = convertor(value)
+ except (ValueError, TypeError), detail:
+ raise detail.__class__('(attribute "%s", value "%r") %s'
+ % (name, value, detail))
+ return attributes
+
+
+class NameValueError(Exception): pass
+
+
+def extract_name_value(line):
+ """
+ Return a list of (name, value) from a line of the form "name=value ...".
+
+ :Exception:
+ `NameValueError` for invalid input (missing name, missing data, bad
+ quotes, etc.).
+ """
+ attlist = []
+ while line:
+ equals = line.find('=')
+ if equals == -1:
+ raise NameValueError('missing "="')
+ attname = line[:equals].strip()
+ if equals == 0 or not attname:
+ raise NameValueError(
+ 'missing attribute name before "="')
+ line = line[equals+1:].lstrip()
+ if not line:
+ raise NameValueError(
+ 'missing value after "%s="' % attname)
+ if line[0] in '\'"':
+ endquote = line.find(line[0], 1)
+ if endquote == -1:
+ raise NameValueError(
+ 'attribute "%s" missing end quote (%s)'
+ % (attname, line[0]))
+ if len(line) > endquote + 1 and line[endquote + 1].strip():
+ raise NameValueError(
+ 'attribute "%s" end quote (%s) not followed by '
+ 'whitespace' % (attname, line[0]))
+ data = line[1:endquote]
+ line = line[endquote+1:].lstrip()
+ else:
+ space = line.find(' ')
+ if space == -1:
+ data = line
+ line = ''
+ else:
+ data = line[:space]
+ line = line[space+1:].lstrip()
+ attlist.append((attname.lower(), data))
+ return attlist
+
+
+def normname(name):
+ """Return a case- and whitespace-normalized name."""
+ return ' '.join(name.lower().split())
+
+def id(string):
+ """
+ Convert `string` into an identifier and return it.
+
+ Docutils identifiers will conform to the regular expression
+ ``[a-z][-a-z0-9]*``. For CSS compatibility, identifiers (the "class" and
+ "id" attributes) should have no underscores, colons, or periods. Hyphens
+ may be used.
+
+ - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
+
+ ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
+ followed by any number of letters, digits ([0-9]), hyphens ("-"),
+ underscores ("_"), colons (":"), and periods (".").
+
+ - However the `CSS1 spec`_ defines identifiers based on the "name" token,
+ a tighter interpretation ("flex" tokenizer notation; "latin1" and
+ "escape" 8-bit characters have been replaced with entities)::
+
+ unicode \\[0-9a-f]{1,4}
+ latin1 [¡-ÿ]
+ escape {unicode}|\\[ -~¡-ÿ]
+ nmchar [-a-z0-9]|{latin1}|{escape}
+ name {nmchar}+
+
+ The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
+ or periods ("."), therefore "class" and "id" attributes should not contain
+ these characters. They should be replaced with hyphens ("-"). Combined
+ with HTML's requirements (the first character must be a letter; no
+ "unicode", "latin1", or "escape" characters), this results in the
+ ``[a-z][-a-z0-9]*`` pattern.
+
+ .. _HTML 4.01 spec: http://www.w3.org/TR/html401
+ .. _CSS1 spec: http://www.w3.org/TR/REC-CSS1
+ """
+ id = non_id_chars.sub('-', normname(string))
+ id = non_id_at_ends.sub('', id)
+ return str(id)
+
+non_id_chars = re.compile('[^a-z0-9]+')
+non_id_at_ends = re.compile('^[-0-9]+|-+$')
+
+def newdocument(languagecode='en', warninglevel=2, errorlevel=4,
+ stream=None, debug=0):
+ reporter = Reporter(warninglevel, errorlevel, stream, debug)
+ document = nodes.document(languagecode=languagecode, reporter=reporter)
+ return document