diff options
Diffstat (limited to 'docutils/utils.py')
-rw-r--r-- | docutils/utils.py | 373 |
1 files changed, 373 insertions, 0 deletions
diff --git a/docutils/utils.py b/docutils/utils.py new file mode 100644 index 000000000..a92c8fb97 --- /dev/null +++ b/docutils/utils.py @@ -0,0 +1,373 @@ +#! /usr/bin/env python + +""" +:Author: David Goodger +:Contact: goodger@users.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This module has been placed in the public domain. + +Miscellaneous utilities for the documentation utilities. +""" + +import sys, re +import nodes + + +class SystemMessage(Exception): + + def __init__(self, system_message): + Exception.__init__(self, system_message.astext()) + + +class Reporter: + + """ + Info/warning/error reporter and ``system_message`` element generator. + + Five levels of system messages are defined, along with corresponding + methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`. + + There is typically one Reporter object per process. A Reporter object is + instantiated with thresholds for generating warnings and errors (raising + exceptions), a switch to turn debug output on or off, and an I/O stream + for warnings. These are stored in the default reporting category, '' + (zero-length string). + + Multiple reporting categories [#]_ may be set, each with its own warning + and error thresholds, debugging switch, and warning stream (collectively a + `ConditionSet`). Categories are hierarchically-named strings that look + like attribute references: 'spam', 'spam.eggs', 'neeeow.wum.ping'. The + 'spam' category is the ancestor of 'spam.bacon.eggs'. Unset categories + inherit stored conditions from their closest ancestor category that has + been set. + + When a system message is generated, the stored conditions from its + category (or ancestor if unset) are retrieved. The system message level is + compared to the thresholds stored in the category, and a warning or error + is generated as appropriate. Debug messages are produced iff the stored + debug switch is on. Message output is sent to the stored warning stream. + + The default category is '' (empty string). By convention, Writers should + retrieve reporting conditions from the 'writer' category (which, unless + explicitly set, defaults to the conditions of the default category). + + .. [#] The concept of "categories" was inspired by the log4j project: + http://jakarta.apache.org/log4j/. + """ + + levels = 'DEBUG INFO WARNING ERROR SEVERE'.split() + """List of names for system message levels, indexed by level.""" + + def __init__(self, warninglevel, errorlevel, stream=None, debug=0): + """ + Initialize the `ConditionSet` forthe `Reporter`'s default category. + + :Parameters: + + - `warninglevel`: The level at or above which warning output will + be sent to `stream`. + - `errorlevel`: The level at or above which `SystemMessage` + exceptions will be raised. + - `debug`: Show debug (level=0) system messages? + - `stream`: Where warning output is sent (`None` implies + `sys.stderr`). + """ + + if stream is None: + stream = sys.stderr + + self.categories = {'': ConditionSet(debug, warninglevel, errorlevel, + stream)} + """Mapping of category names to conditions. Default category is ''.""" + + def setconditions(self, category, warninglevel, errorlevel, + stream=None, debug=0): + if stream is None: + stream = sys.stderr + self.categories[category] = ConditionSet(debug, warninglevel, + errorlevel, stream) + + def unsetconditions(self, category): + if category and self.categories.has_key(category): + del self.categories[category] + + __delitem__ = unsetconditions + + def getconditions(self, category): + while not self.categories.has_key(category): + category = category[:category.rfind('.') + 1][:-1] + return self.categories[category] + + __getitem__ = getconditions + + def system_message(self, level, comment=None, category='', + *children, **attributes): + """ + Return a system_message object. + + Raise an exception or generate a warning if appropriate. + """ + msg = nodes.system_message(comment, level=level, + type=self.levels[level], + *children, **attributes) + debug, warninglevel, errorlevel, stream = self[category].astuple() + if level >= warninglevel or debug and level == 0: + if category: + print >>stream, 'Reporter "%s":' % category, msg.astext() + else: + print >>stream, 'Reporter:', msg.astext() + if level >= errorlevel: + raise SystemMessage(msg) + return msg + + def debug(self, comment=None, category='', *children, **attributes): + """ + Level-0, "DEBUG": an internal reporting issue. Typically, there is no + effect on the processing. Level-0 system messages are handled + separately from the others. + """ + return self.system_message( + 0, comment, category, *children, **attributes) + + def info(self, comment=None, category='', *children, **attributes): + """ + Level-1, "INFO": a minor issue that can be ignored. Typically there is + no effect on processing, and level-1 system messages are not reported. + """ + return self.system_message( + 1, comment, category, *children, **attributes) + + def warning(self, comment=None, category='', *children, **attributes): + """ + Level-2, "WARNING": an issue that should be addressed. If ignored, + there may be unpredictable problems with the output. + """ + return self.system_message( + 2, comment, category, *children, **attributes) + + def error(self, comment=None, category='', *children, **attributes): + """ + Level-3, "ERROR": an error that should be addressed. If ignored, the + output will contain errors. + """ + return self.system_message( + 3, comment, category, *children, **attributes) + + def severe(self, comment=None, category='', *children, **attributes): + """ + Level-4, "SEVERE": a severe error that must be addressed. If ignored, + the output will contain severe errors. Typically level-4 system + messages are turned into exceptions which halt processing. + """ + return self.system_message( + 4, comment, category, *children, **attributes) + + +class ConditionSet: + + """ + A set of thresholds, switches, and streams corresponding to one `Reporter` + category. + """ + + def __init__(self, debug, warninglevel, errorlevel, stream): + self.debug = debug + self.warninglevel = warninglevel + self.errorlevel = errorlevel + self.stream = stream + + def astuple(self): + return (self.debug, self.warninglevel, self.errorlevel, + self.stream) + + +class ExtensionAttributeError(Exception): pass +class BadAttributeError(ExtensionAttributeError): pass +class BadAttributeDataError(ExtensionAttributeError): pass +class DuplicateAttributeError(ExtensionAttributeError): pass + + +def extract_extension_attributes(field_list, attribute_spec): + """ + Return a dictionary mapping extension attribute names to converted values. + + :Parameters: + - `field_list`: A flat field list without field arguments, where each + field body consists of a single paragraph only. + - `attribute_spec`: Dictionary mapping known attribute names to a + conversion function such as `int` or `float`. + + :Exceptions: + - `KeyError` for unknown attribute names. + - `ValueError` for invalid attribute values (raised by the conversion + function). + - `DuplicateAttributeError` for duplicate attributes. + - `BadAttributeError` for invalid fields. + - `BadAttributeDataError` for invalid attribute data (missing name, + missing data, bad quotes, etc.). + """ + attlist = extract_attributes(field_list) + attdict = assemble_attribute_dict(attlist, attribute_spec) + return attdict + +def extract_attributes(field_list): + """ + Return a list of attribute (name, value) pairs from field names & bodies. + + :Parameter: + `field_list`: A flat field list without field arguments, where each + field body consists of a single paragraph only. + + :Exceptions: + - `BadAttributeError` for invalid fields. + - `BadAttributeDataError` for invalid attribute data (missing name, + missing data, bad quotes, etc.). + """ + attlist = [] + for field in field_list: + if len(field) != 2: + raise BadAttributeError( + 'extension attribute field may not contain field arguments') + name = field[0].astext().lower() + body = field[1] + if len(body) == 0: + data = None + elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \ + or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text): + raise BadAttributeDataError( + 'extension attribute field body may contain\n' + 'a single paragraph only (attribute "%s")' % name) + else: + data = body[0][0].astext() + attlist.append((name, data)) + return attlist + +def assemble_attribute_dict(attlist, attspec): + """ + Return a mapping of attribute names to values. + + :Parameters: + - `attlist`: A list of (name, value) pairs (the output of + `extract_attributes()`). + - `attspec`: Dictionary mapping known attribute names to a + conversion function such as `int` or `float`. + + :Exceptions: + - `KeyError` for unknown attribute names. + - `DuplicateAttributeError` for duplicate attributes. + - `ValueError` for invalid attribute values (raised by conversion + function). + """ + attributes = {} + for name, value in attlist: + convertor = attspec[name] # raises KeyError if unknown + if attributes.has_key(name): + raise DuplicateAttributeError('duplicate attribute "%s"' % name) + try: + attributes[name] = convertor(value) + except (ValueError, TypeError), detail: + raise detail.__class__('(attribute "%s", value "%r") %s' + % (name, value, detail)) + return attributes + + +class NameValueError(Exception): pass + + +def extract_name_value(line): + """ + Return a list of (name, value) from a line of the form "name=value ...". + + :Exception: + `NameValueError` for invalid input (missing name, missing data, bad + quotes, etc.). + """ + attlist = [] + while line: + equals = line.find('=') + if equals == -1: + raise NameValueError('missing "="') + attname = line[:equals].strip() + if equals == 0 or not attname: + raise NameValueError( + 'missing attribute name before "="') + line = line[equals+1:].lstrip() + if not line: + raise NameValueError( + 'missing value after "%s="' % attname) + if line[0] in '\'"': + endquote = line.find(line[0], 1) + if endquote == -1: + raise NameValueError( + 'attribute "%s" missing end quote (%s)' + % (attname, line[0])) + if len(line) > endquote + 1 and line[endquote + 1].strip(): + raise NameValueError( + 'attribute "%s" end quote (%s) not followed by ' + 'whitespace' % (attname, line[0])) + data = line[1:endquote] + line = line[endquote+1:].lstrip() + else: + space = line.find(' ') + if space == -1: + data = line + line = '' + else: + data = line[:space] + line = line[space+1:].lstrip() + attlist.append((attname.lower(), data)) + return attlist + + +def normname(name): + """Return a case- and whitespace-normalized name.""" + return ' '.join(name.lower().split()) + +def id(string): + """ + Convert `string` into an identifier and return it. + + Docutils identifiers will conform to the regular expression + ``[a-z][-a-z0-9]*``. For CSS compatibility, identifiers (the "class" and + "id" attributes) should have no underscores, colons, or periods. Hyphens + may be used. + + - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens: + + ID and NAME tokens must begin with a letter ([A-Za-z]) and may be + followed by any number of letters, digits ([0-9]), hyphens ("-"), + underscores ("_"), colons (":"), and periods ("."). + + - However the `CSS1 spec`_ defines identifiers based on the "name" token, + a tighter interpretation ("flex" tokenizer notation; "latin1" and + "escape" 8-bit characters have been replaced with entities):: + + unicode \\[0-9a-f]{1,4} + latin1 [¡-ÿ] + escape {unicode}|\\[ -~¡-ÿ] + nmchar [-a-z0-9]|{latin1}|{escape} + name {nmchar}+ + + The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"), + or periods ("."), therefore "class" and "id" attributes should not contain + these characters. They should be replaced with hyphens ("-"). Combined + with HTML's requirements (the first character must be a letter; no + "unicode", "latin1", or "escape" characters), this results in the + ``[a-z][-a-z0-9]*`` pattern. + + .. _HTML 4.01 spec: http://www.w3.org/TR/html401 + .. _CSS1 spec: http://www.w3.org/TR/REC-CSS1 + """ + id = non_id_chars.sub('-', normname(string)) + id = non_id_at_ends.sub('', id) + return str(id) + +non_id_chars = re.compile('[^a-z0-9]+') +non_id_at_ends = re.compile('^[-0-9]+|-+$') + +def newdocument(languagecode='en', warninglevel=2, errorlevel=4, + stream=None, debug=0): + reporter = Reporter(warninglevel, errorlevel, stream, debug) + document = nodes.document(languagecode=languagecode, reporter=reporter) + return document |