summaryrefslogtreecommitdiff
path: root/docutils/parsers
diff options
context:
space:
mode:
authorwiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2006-01-09 20:44:25 +0000
committerwiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2006-01-09 20:44:25 +0000
commitd77fdfef70e08114f57cbef5d91707df8717ea9f (patch)
tree49444e3486c0c333cb7b33dfa721296c08ee4ece /docutils/parsers
parent53cd16ca6ca5f638cbe5956988e88f9339e355cf (diff)
parent3993c4097756e9885bcfbd07cb1cc1e4e95e50e4 (diff)
downloaddocutils-0.4.tar.gz
Release 0.4: tagging released revisiondocutils-0.4
git-svn-id: http://svn.code.sf.net/p/docutils/code/tags/docutils-0.4@4268 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils/parsers')
-rw-r--r--docutils/parsers/__init__.py49
-rw-r--r--docutils/parsers/null.py22
-rw-r--r--docutils/parsers/rst/__init__.py156
-rw-r--r--docutils/parsers/rst/directives/__init__.py449
-rw-r--r--docutils/parsers/rst/directives/admonitions.py90
-rw-r--r--docutils/parsers/rst/directives/body.py196
-rw-r--r--docutils/parsers/rst/directives/html.py96
-rw-r--r--docutils/parsers/rst/directives/images.py152
-rw-r--r--docutils/parsers/rst/directives/misc.py408
-rw-r--r--docutils/parsers/rst/directives/parts.py126
-rw-r--r--docutils/parsers/rst/directives/references.py27
-rw-r--r--docutils/parsers/rst/directives/tables.py444
-rw-r--r--docutils/parsers/rst/include/README.txt17
-rw-r--r--docutils/parsers/rst/include/isoamsa.txt162
-rw-r--r--docutils/parsers/rst/include/isoamsb.txt126
-rw-r--r--docutils/parsers/rst/include/isoamsc.txt29
-rw-r--r--docutils/parsers/rst/include/isoamsn.txt96
-rw-r--r--docutils/parsers/rst/include/isoamso.txt62
-rw-r--r--docutils/parsers/rst/include/isoamsr.txt191
-rw-r--r--docutils/parsers/rst/include/isobox.txt46
-rw-r--r--docutils/parsers/rst/include/isocyr1.txt73
-rw-r--r--docutils/parsers/rst/include/isocyr2.txt32
-rw-r--r--docutils/parsers/rst/include/isodia.txt20
-rw-r--r--docutils/parsers/rst/include/isogrk1.txt55
-rw-r--r--docutils/parsers/rst/include/isogrk2.txt26
-rw-r--r--docutils/parsers/rst/include/isogrk3.txt52
-rw-r--r--docutils/parsers/rst/include/isogrk4-wide.txt49
-rw-r--r--docutils/parsers/rst/include/isogrk4.txt8
-rw-r--r--docutils/parsers/rst/include/isolat1.txt68
-rw-r--r--docutils/parsers/rst/include/isolat2.txt128
-rw-r--r--docutils/parsers/rst/include/isomfrk-wide.txt58
-rw-r--r--docutils/parsers/rst/include/isomfrk.txt11
-rw-r--r--docutils/parsers/rst/include/isomopf-wide.txt32
-rw-r--r--docutils/parsers/rst/include/isomopf.txt13
-rw-r--r--docutils/parsers/rst/include/isomscr-wide.txt58
-rw-r--r--docutils/parsers/rst/include/isomscr.txt17
-rw-r--r--docutils/parsers/rst/include/isonum.txt82
-rw-r--r--docutils/parsers/rst/include/isopub.txt90
-rw-r--r--docutils/parsers/rst/include/isotech.txt168
-rw-r--r--docutils/parsers/rst/include/mmlalias.txt554
-rw-r--r--docutils/parsers/rst/include/mmlextra-wide.txt113
-rw-r--r--docutils/parsers/rst/include/mmlextra.txt87
-rw-r--r--docutils/parsers/rst/include/s5defs.txt62
-rw-r--r--docutils/parsers/rst/include/xhtml1-lat1.txt102
-rw-r--r--docutils/parsers/rst/include/xhtml1-special.txt37
-rw-r--r--docutils/parsers/rst/include/xhtml1-symbol.txt130
-rw-r--r--docutils/parsers/rst/languages/__init__.py27
-rw-r--r--docutils/parsers/rst/languages/af.py104
-rw-r--r--docutils/parsers/rst/languages/ca.py123
-rw-r--r--docutils/parsers/rst/languages/cs.py106
-rw-r--r--docutils/parsers/rst/languages/de.py97
-rw-r--r--docutils/parsers/rst/languages/en.py106
-rw-r--r--docutils/parsers/rst/languages/eo.py116
-rw-r--r--docutils/parsers/rst/languages/es.py123
-rw-r--r--docutils/parsers/rst/languages/fi.py95
-rw-r--r--docutils/parsers/rst/languages/fr.py101
-rw-r--r--docutils/parsers/rst/languages/it.py94
-rw-r--r--docutils/parsers/rst/languages/ja.py117
-rw-r--r--docutils/parsers/rst/languages/nl.py110
-rw-r--r--docutils/parsers/rst/languages/pt_br.py106
-rw-r--r--docutils/parsers/rst/languages/ru.py105
-rw-r--r--docutils/parsers/rst/languages/sk.py93
-rw-r--r--docutils/parsers/rst/languages/sv.py92
-rw-r--r--docutils/parsers/rst/languages/zh_cn.py102
-rw-r--r--docutils/parsers/rst/languages/zh_tw.py107
-rw-r--r--docutils/parsers/rst/roles.py347
-rw-r--r--docutils/parsers/rst/states.py2948
-rw-r--r--docutils/parsers/rst/tableparser.py527
68 files changed, 10815 insertions, 0 deletions
diff --git a/docutils/parsers/__init__.py b/docutils/parsers/__init__.py
new file mode 100644
index 000000000..027c25a86
--- /dev/null
+++ b/docutils/parsers/__init__.py
@@ -0,0 +1,49 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+This package contains Docutils parser modules.
+"""
+
+__docformat__ = 'reStructuredText'
+
+from docutils import Component
+
+
+class Parser(Component):
+
+ component_type = 'parser'
+ config_section = 'parsers'
+
+ def parse(self, inputstring, document):
+ """Override to parse `inputstring` into document tree `document`."""
+ raise NotImplementedError('subclass must override this method')
+
+ def setup_parse(self, inputstring, document):
+ """Initial parse setup. Call at start of `self.parse()`."""
+ self.inputstring = inputstring
+ self.document = document
+ document.reporter.attach_observer(document.note_parse_message)
+
+ def finish_parse(self):
+ """Finalize parse details. Call at end of `self.parse()`."""
+ self.document.reporter.detach_observer(
+ self.document.note_parse_message)
+
+
+_parser_aliases = {
+ 'restructuredtext': 'rst',
+ 'rest': 'rst',
+ 'restx': 'rst',
+ 'rtxt': 'rst',}
+
+def get_parser_class(parser_name):
+ """Return the Parser class from the `parser_name` module."""
+ parser_name = parser_name.lower()
+ if _parser_aliases.has_key(parser_name):
+ parser_name = _parser_aliases[parser_name]
+ module = __import__(parser_name, globals(), locals())
+ return module.Parser
diff --git a/docutils/parsers/null.py b/docutils/parsers/null.py
new file mode 100644
index 000000000..61702dfaf
--- /dev/null
+++ b/docutils/parsers/null.py
@@ -0,0 +1,22 @@
+# Author: Martin Blais
+# Contact: blais@furius.ca
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""A do-nothing parser."""
+
+from docutils import parsers
+
+
+class Parser(parsers.Parser):
+
+ """A do-nothing parser."""
+
+ supported = ('null',)
+
+ config_section = 'null parser'
+ config_section_dependencies = ('parsers',)
+
+ def parse(self, inputstring, document):
+ pass
diff --git a/docutils/parsers/rst/__init__.py b/docutils/parsers/rst/__init__.py
new file mode 100644
index 000000000..ff1d7b4f8
--- /dev/null
+++ b/docutils/parsers/rst/__init__.py
@@ -0,0 +1,156 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+This is ``docutils.parsers.rst`` package. It exports a single class, `Parser`,
+the reStructuredText parser.
+
+
+Usage
+=====
+
+1. Create a parser::
+
+ parser = docutils.parsers.rst.Parser()
+
+ Several optional arguments may be passed to modify the parser's behavior.
+ Please see `Customizing the Parser`_ below for details.
+
+2. Gather input (a multi-line string), by reading a file or the standard
+ input::
+
+ input = sys.stdin.read()
+
+3. Create a new empty `docutils.nodes.document` tree::
+
+ document = docutils.utils.new_document(source, settings)
+
+ See `docutils.utils.new_document()` for parameter details.
+
+4. Run the parser, populating the document tree::
+
+ parser.parse(input, document)
+
+
+Parser Overview
+===============
+
+The reStructuredText parser is implemented as a state machine, examining its
+input one line at a time. To understand how the parser works, please first
+become familiar with the `docutils.statemachine` module, then see the
+`states` module.
+
+
+Customizing the Parser
+----------------------
+
+Anything that isn't already customizable is that way simply because that type
+of customizability hasn't been implemented yet. Patches welcome!
+
+When instantiating an object of the `Parser` class, two parameters may be
+passed: ``rfc2822`` and ``inliner``. Pass ``rfc2822=1`` to enable an initial
+RFC-2822 style header block, parsed as a "field_list" element (with "class"
+attribute set to "rfc2822"). Currently this is the only body-level element
+which is customizable without subclassing. (Tip: subclass `Parser` and change
+its "state_classes" and "initial_state" attributes to refer to new classes.
+Contact the author if you need more details.)
+
+The ``inliner`` parameter takes an instance of `states.Inliner` or a subclass.
+It handles inline markup recognition. A common extension is the addition of
+further implicit hyperlinks, like "RFC 2822". This can be done by subclassing
+`states.Inliner`, adding a new method for the implicit markup, and adding a
+``(pattern, method)`` pair to the "implicit_dispatch" attribute of the
+subclass. See `states.Inliner.implicit_inline()` for details. Explicit
+inline markup can be customized in a `states.Inliner` subclass via the
+``patterns.initial`` and ``dispatch`` attributes (and new methods as
+appropriate).
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+import docutils.parsers
+import docutils.statemachine
+from docutils.parsers.rst import states
+from docutils import frontend
+
+
+class Parser(docutils.parsers.Parser):
+
+ """The reStructuredText parser."""
+
+ supported = ('restructuredtext', 'rst', 'rest', 'restx', 'rtxt', 'rstx')
+ """Aliases this parser supports."""
+
+ settings_spec = (
+ 'reStructuredText Parser Options',
+ None,
+ (('Recognize and link to standalone PEP references (like "PEP 258").',
+ ['--pep-references'],
+ {'action': 'store_true', 'validator': frontend.validate_boolean}),
+ ('Base URL for PEP references '
+ '(default "http://www.python.org/peps/").',
+ ['--pep-base-url'],
+ {'metavar': '<URL>', 'default': 'http://www.python.org/peps/',
+ 'validator': frontend.validate_url_trailing_slash}),
+ ('Recognize and link to standalone RFC references (like "RFC 822").',
+ ['--rfc-references'],
+ {'action': 'store_true', 'validator': frontend.validate_boolean}),
+ ('Base URL for RFC references (default "http://www.faqs.org/rfcs/").',
+ ['--rfc-base-url'],
+ {'metavar': '<URL>', 'default': 'http://www.faqs.org/rfcs/',
+ 'validator': frontend.validate_url_trailing_slash}),
+ ('Set number of spaces for tab expansion (default 8).',
+ ['--tab-width'],
+ {'metavar': '<width>', 'type': 'int', 'default': 8,
+ 'validator': frontend.validate_nonnegative_int}),
+ ('Remove spaces before footnote references.',
+ ['--trim-footnote-reference-space'],
+ {'action': 'store_true', 'validator': frontend.validate_boolean}),
+ ('Leave spaces before footnote references.',
+ ['--leave-footnote-reference-space'],
+ {'action': 'store_false', 'dest': 'trim_footnote_reference_space',
+ 'validator': frontend.validate_boolean}),
+ ('Disable directives that insert the contents of external file '
+ '("include" & "raw"); replaced with a "warning" system message.',
+ ['--no-file-insertion'],
+ {'action': 'store_false', 'default': 1,
+ 'dest': 'file_insertion_enabled'}),
+ ('Enable directives that insert the contents of external file '
+ '("include" & "raw"). Enabled by default.',
+ ['--file-insertion-enabled'],
+ {'action': 'store_true', 'dest': 'file_insertion_enabled'}),
+ ('Disable the "raw" directives; replaced with a "warning" '
+ 'system message.',
+ ['--no-raw'],
+ {'action': 'store_false', 'default': 1, 'dest': 'raw_enabled'}),
+ ('Enable the "raw" directive. Enabled by default.',
+ ['--raw-enabled'],
+ {'action': 'store_true', 'dest': 'raw_enabled'}),))
+
+ config_section = 'restructuredtext parser'
+ config_section_dependencies = ('parsers',)
+
+ def __init__(self, rfc2822=None, inliner=None):
+ if rfc2822:
+ self.initial_state = 'RFC2822Body'
+ else:
+ self.initial_state = 'Body'
+ self.state_classes = states.state_classes
+ self.inliner = inliner
+
+ def parse(self, inputstring, document):
+ """Parse `inputstring` and populate `document`, a document tree."""
+ self.setup_parse(inputstring, document)
+ self.statemachine = states.RSTStateMachine(
+ state_classes=self.state_classes,
+ initial_state=self.initial_state,
+ debug=document.reporter.debug_flag)
+ inputlines = docutils.statemachine.string2lines(
+ inputstring, tab_width=document.settings.tab_width,
+ convert_whitespace=1)
+ self.statemachine.run(inputlines, document, inliner=self.inliner)
+ self.finish_parse()
diff --git a/docutils/parsers/rst/directives/__init__.py b/docutils/parsers/rst/directives/__init__.py
new file mode 100644
index 000000000..998c391e3
--- /dev/null
+++ b/docutils/parsers/rst/directives/__init__.py
@@ -0,0 +1,449 @@
+# Author: David Goodger
+# Contact: goodger@python.org
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+This package contains directive implementation modules.
+
+The interface for directive functions is as follows::
+
+ def directive_fn(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ code...
+
+ # Set function attributes:
+ directive_fn.arguments = ...
+ directive_fn.options = ...
+ direcitve_fn.content = ...
+
+Parameters:
+
+- ``name`` is the directive type or name (string).
+
+- ``arguments`` is a list of positional arguments (strings).
+
+- ``options`` is a dictionary mapping option names (strings) to values (type
+ depends on option conversion functions; see below).
+
+- ``content`` is a list of strings, the directive content.
+
+- ``lineno`` is the line number of the first line of the directive.
+
+- ``content_offset`` is the line offset of the first line of the content from
+ the beginning of the current input. Used when initiating a nested parse.
+
+- ``block_text`` is a string containing the entire directive. Include it as
+ the content of a literal block in a system message if there is a problem.
+
+- ``state`` is the state which called the directive function.
+
+- ``state_machine`` is the state machine which controls the state which called
+ the directive function.
+
+Function attributes, interpreted by the directive parser (which calls the
+directive function):
+
+- ``arguments``: A 3-tuple specifying the expected positional arguments, or
+ ``None`` if the directive has no arguments. The 3 items in the tuple are
+ ``(required, optional, whitespace OK in last argument)``:
+
+ 1. The number of required arguments.
+ 2. The number of optional arguments.
+ 3. A boolean, indicating if the final argument may contain whitespace.
+
+ Arguments are normally single whitespace-separated words. The final
+ argument may contain whitespace if the third item in the argument spec tuple
+ is 1/True. If the form of the arguments is more complex, specify only one
+ argument (either required or optional) and indicate that final whitespace is
+ OK; the client code must do any context-sensitive parsing.
+
+- ``options``: A dictionary, mapping known option names to conversion
+ functions such as `int` or `float`. ``None`` or an empty dict implies no
+ options to parse. Several directive option conversion functions are defined
+ in this module.
+
+ Option conversion functions take a single parameter, the option argument (a
+ string or ``None``), validate it and/or convert it to the appropriate form.
+ Conversion functions may raise ``ValueError`` and ``TypeError`` exceptions.
+
+- ``content``: A boolean; true if content is allowed. Client code must handle
+ the case where content is required but not supplied (an empty content list
+ will be supplied).
+
+Directive functions return a list of nodes which will be inserted into the
+document tree at the point where the directive was encountered (can be an
+empty list).
+
+See `Creating reStructuredText Directives`_ for more information.
+
+.. _Creating reStructuredText Directives:
+ http://docutils.sourceforge.net/docs/howto/rst-directives.html
+"""
+
+__docformat__ = 'reStructuredText'
+
+import re
+import codecs
+from docutils import nodes
+from docutils.parsers.rst.languages import en as _fallback_language_module
+
+
+_directive_registry = {
+ 'attention': ('admonitions', 'attention'),
+ 'caution': ('admonitions', 'caution'),
+ 'danger': ('admonitions', 'danger'),
+ 'error': ('admonitions', 'error'),
+ 'important': ('admonitions', 'important'),
+ 'note': ('admonitions', 'note'),
+ 'tip': ('admonitions', 'tip'),
+ 'hint': ('admonitions', 'hint'),
+ 'warning': ('admonitions', 'warning'),
+ 'admonition': ('admonitions', 'admonition'),
+ 'sidebar': ('body', 'sidebar'),
+ 'topic': ('body', 'topic'),
+ 'line-block': ('body', 'line_block'),
+ 'parsed-literal': ('body', 'parsed_literal'),
+ 'rubric': ('body', 'rubric'),
+ 'epigraph': ('body', 'epigraph'),
+ 'highlights': ('body', 'highlights'),
+ 'pull-quote': ('body', 'pull_quote'),
+ 'compound': ('body', 'compound'),
+ 'container': ('body', 'container'),
+ #'questions': ('body', 'question_list'),
+ 'table': ('tables', 'table'),
+ 'csv-table': ('tables', 'csv_table'),
+ 'list-table': ('tables', 'list_table'),
+ 'image': ('images', 'image'),
+ 'figure': ('images', 'figure'),
+ 'contents': ('parts', 'contents'),
+ 'sectnum': ('parts', 'sectnum'),
+ 'header': ('parts', 'header'),
+ 'footer': ('parts', 'footer'),
+ #'footnotes': ('parts', 'footnotes'),
+ #'citations': ('parts', 'citations'),
+ 'target-notes': ('references', 'target_notes'),
+ 'meta': ('html', 'meta'),
+ #'imagemap': ('html', 'imagemap'),
+ 'raw': ('misc', 'raw'),
+ 'include': ('misc', 'include'),
+ 'replace': ('misc', 'replace'),
+ 'unicode': ('misc', 'unicode_directive'),
+ 'class': ('misc', 'class_directive'),
+ 'role': ('misc', 'role'),
+ 'default-role': ('misc', 'default_role'),
+ 'title': ('misc', 'title'),
+ 'date': ('misc', 'date'),
+ 'restructuredtext-test-directive': ('misc', 'directive_test_function'),}
+"""Mapping of directive name to (module name, function name). The directive
+name is canonical & must be lowercase. Language-dependent names are defined
+in the ``language`` subpackage."""
+
+_modules = {}
+"""Cache of imported directive modules."""
+
+_directives = {}
+"""Cache of imported directive functions."""
+
+def directive(directive_name, language_module, document):
+ """
+ Locate and return a directive function from its language-dependent name.
+ If not found in the current language, check English. Return None if the
+ named directive cannot be found.
+ """
+ normname = directive_name.lower()
+ messages = []
+ msg_text = []
+ if _directives.has_key(normname):
+ return _directives[normname], messages
+ canonicalname = None
+ try:
+ canonicalname = language_module.directives[normname]
+ except AttributeError, error:
+ msg_text.append('Problem retrieving directive entry from language '
+ 'module %r: %s.' % (language_module, error))
+ except KeyError:
+ msg_text.append('No directive entry for "%s" in module "%s".'
+ % (directive_name, language_module.__name__))
+ if not canonicalname:
+ try:
+ canonicalname = _fallback_language_module.directives[normname]
+ msg_text.append('Using English fallback for directive "%s".'
+ % directive_name)
+ except KeyError:
+ msg_text.append('Trying "%s" as canonical directive name.'
+ % directive_name)
+ # The canonical name should be an English name, but just in case:
+ canonicalname = normname
+ if msg_text:
+ message = document.reporter.info(
+ '\n'.join(msg_text), line=document.current_line)
+ messages.append(message)
+ try:
+ modulename, functionname = _directive_registry[canonicalname]
+ except KeyError:
+ # Error handling done by caller.
+ return None, messages
+ if _modules.has_key(modulename):
+ module = _modules[modulename]
+ else:
+ try:
+ module = __import__(modulename, globals(), locals())
+ except ImportError, detail:
+ messages.append(document.reporter.error(
+ 'Error importing directive module "%s" (directive "%s"):\n%s'
+ % (modulename, directive_name, detail),
+ line=document.current_line))
+ return None, messages
+ try:
+ function = getattr(module, functionname)
+ _directives[normname] = function
+ except AttributeError:
+ messages.append(document.reporter.error(
+ 'No function "%s" in module "%s" (directive "%s").'
+ % (functionname, modulename, directive_name),
+ line=document.current_line))
+ return None, messages
+ return function, messages
+
+def register_directive(name, directive_function):
+ """
+ Register a nonstandard application-defined directive function.
+ Language lookups are not needed for such functions.
+ """
+ _directives[name] = directive_function
+
+def flag(argument):
+ """
+ Check for a valid flag option (no argument) and return ``None``.
+ (Directive option conversion function.)
+
+ Raise ``ValueError`` if an argument is found.
+ """
+ if argument and argument.strip():
+ raise ValueError('no argument is allowed; "%s" supplied' % argument)
+ else:
+ return None
+
+def unchanged_required(argument):
+ """
+ Return the argument text, unchanged.
+ (Directive option conversion function.)
+
+ Raise ``ValueError`` if no argument is found.
+ """
+ if argument is None:
+ raise ValueError('argument required but none supplied')
+ else:
+ return argument # unchanged!
+
+def unchanged(argument):
+ """
+ Return the argument text, unchanged.
+ (Directive option conversion function.)
+
+ No argument implies empty string ("").
+ """
+ if argument is None:
+ return u''
+ else:
+ return argument # unchanged!
+
+def path(argument):
+ """
+ Return the path argument unwrapped (with newlines removed).
+ (Directive option conversion function.)
+
+ Raise ``ValueError`` if no argument is found.
+ """
+ if argument is None:
+ raise ValueError('argument required but none supplied')
+ else:
+ path = ''.join([s.strip() for s in argument.splitlines()])
+ return path
+
+def uri(argument):
+ """
+ Return the URI argument with whitespace removed.
+ (Directive option conversion function.)
+
+ Raise ``ValueError`` if no argument is found.
+ """
+ if argument is None:
+ raise ValueError('argument required but none supplied')
+ else:
+ uri = ''.join(argument.split())
+ return uri
+
+def nonnegative_int(argument):
+ """
+ Check for a nonnegative integer argument; raise ``ValueError`` if not.
+ (Directive option conversion function.)
+ """
+ value = int(argument)
+ if value < 0:
+ raise ValueError('negative value; must be positive or zero')
+ return value
+
+length_units = ['em', 'ex', 'px', 'in', 'cm', 'mm', 'pt', 'pc']
+
+def get_measure(argument, units):
+ """
+ Check for a positive argument of one of the units and return a
+ normalized string of the form "<value><unit>" (without space in
+ between).
+
+ To be called from directive option conversion functions.
+ """
+ match = re.match(r'^([0-9.]+) *(%s)$' % '|'.join(units), argument)
+ try:
+ assert match is not None
+ float(match.group(1))
+ except (AssertionError, ValueError):
+ raise ValueError(
+ 'not a positive measure of one of the following units:\n%s'
+ % ' '.join(['"%s"' % i for i in units]))
+ return match.group(1) + match.group(2)
+
+def length_or_unitless(argument):
+ return get_measure(argument, length_units + [''])
+
+def length_or_percentage_or_unitless(argument):
+ return get_measure(argument, length_units + ['%', ''])
+
+def class_option(argument):
+ """
+ Convert the argument into a list of ID-compatible strings and return it.
+ (Directive option conversion function.)
+
+ Raise ``ValueError`` if no argument is found.
+ """
+ if argument is None:
+ raise ValueError('argument required but none supplied')
+ names = argument.split()
+ class_names = []
+ for name in names:
+ class_name = nodes.make_id(name)
+ if not class_name:
+ raise ValueError('cannot make "%s" into a class name' % name)
+ class_names.append(class_name)
+ return class_names
+
+unicode_pattern = re.compile(
+ r'(?:0x|x|\\x|U\+?|\\u)([0-9a-f]+)$|&#x([0-9a-f]+);$', re.IGNORECASE)
+
+def unicode_code(code):
+ r"""
+ Convert a Unicode character code to a Unicode character.
+ (Directive option conversion function.)
+
+ Codes may be decimal numbers, hexadecimal numbers (prefixed by ``0x``,
+ ``x``, ``\x``, ``U+``, ``u``, or ``\u``; e.g. ``U+262E``), or XML-style
+ numeric character entities (e.g. ``&#x262E;``). Other text remains as-is.
+
+ Raise ValueError for illegal Unicode code values.
+ """
+ try:
+ if code.isdigit(): # decimal number
+ return unichr(int(code))
+ else:
+ match = unicode_pattern.match(code)
+ if match: # hex number
+ value = match.group(1) or match.group(2)
+ return unichr(int(value, 16))
+ else: # other text
+ return code
+ except OverflowError, detail:
+ raise ValueError('code too large (%s)' % detail)
+
+def single_char_or_unicode(argument):
+ """
+ A single character is returned as-is. Unicode characters codes are
+ converted as in `unicode_code`. (Directive option conversion function.)
+ """
+ char = unicode_code(argument)
+ if len(char) > 1:
+ raise ValueError('%r invalid; must be a single character or '
+ 'a Unicode code' % char)
+ return char
+
+def single_char_or_whitespace_or_unicode(argument):
+ """
+ As with `single_char_or_unicode`, but "tab" and "space" are also supported.
+ (Directive option conversion function.)
+ """
+ if argument == 'tab':
+ char = '\t'
+ elif argument == 'space':
+ char = ' '
+ else:
+ char = single_char_or_unicode(argument)
+ return char
+
+def positive_int(argument):
+ """
+ Converts the argument into an integer. Raises ValueError for negative,
+ zero, or non-integer values. (Directive option conversion function.)
+ """
+ value = int(argument)
+ if value < 1:
+ raise ValueError('negative or zero value; must be positive')
+ return value
+
+def positive_int_list(argument):
+ """
+ Converts a space- or comma-separated list of values into a Python list
+ of integers.
+ (Directive option conversion function.)
+
+ Raises ValueError for non-positive-integer values.
+ """
+ if ',' in argument:
+ entries = argument.split(',')
+ else:
+ entries = argument.split()
+ return [positive_int(entry) for entry in entries]
+
+def encoding(argument):
+ """
+ Verfies the encoding argument by lookup.
+ (Directive option conversion function.)
+
+ Raises ValueError for unknown encodings.
+ """
+ try:
+ codecs.lookup(argument)
+ except LookupError:
+ raise ValueError('unknown encoding: "%s"' % argument)
+ return argument
+
+def choice(argument, values):
+ """
+ Directive option utility function, supplied to enable options whose
+ argument must be a member of a finite set of possible values (must be
+ lower case). A custom conversion function must be written to use it. For
+ example::
+
+ from docutils.parsers.rst import directives
+
+ def yesno(argument):
+ return directives.choice(argument, ('yes', 'no'))
+
+ Raise ``ValueError`` if no argument is found or if the argument's value is
+ not valid (not an entry in the supplied list).
+ """
+ try:
+ value = argument.lower().strip()
+ except AttributeError:
+ raise ValueError('must supply an argument; choose from %s'
+ % format_values(values))
+ if value in values:
+ return value
+ else:
+ raise ValueError('"%s" unknown; choose from %s'
+ % (argument, format_values(values)))
+
+def format_values(values):
+ return '%s, or "%s"' % (', '.join(['"%s"' % s for s in values[:-1]]),
+ values[-1])
diff --git a/docutils/parsers/rst/directives/admonitions.py b/docutils/parsers/rst/directives/admonitions.py
new file mode 100644
index 000000000..73ca18161
--- /dev/null
+++ b/docutils/parsers/rst/directives/admonitions.py
@@ -0,0 +1,90 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+Admonition directives.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+from docutils.parsers.rst import states, directives
+from docutils import nodes
+
+
+def make_admonition(node_class, name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ if not content:
+ error = state_machine.reporter.error(
+ 'The "%s" admonition is empty; content required.' % (name),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ text = '\n'.join(content)
+ admonition_node = node_class(text)
+ if arguments:
+ title_text = arguments[0]
+ textnodes, messages = state.inline_text(title_text, lineno)
+ admonition_node += nodes.title(title_text, '', *textnodes)
+ admonition_node += messages
+ if options.has_key('class'):
+ classes = options['class']
+ else:
+ classes = ['admonition-' + nodes.make_id(title_text)]
+ admonition_node['classes'] += classes
+ state.nested_parse(content, content_offset, admonition_node)
+ return [admonition_node]
+
+def admonition(*args):
+ return make_admonition(nodes.admonition, *args)
+
+admonition.arguments = (1, 0, 1)
+admonition.options = {'class': directives.class_option}
+admonition.content = 1
+
+def attention(*args):
+ return make_admonition(nodes.attention, *args)
+
+attention.content = 1
+
+def caution(*args):
+ return make_admonition(nodes.caution, *args)
+
+caution.content = 1
+
+def danger(*args):
+ return make_admonition(nodes.danger, *args)
+
+danger.content = 1
+
+def error(*args):
+ return make_admonition(nodes.error, *args)
+
+error.content = 1
+
+def hint(*args):
+ return make_admonition(nodes.hint, *args)
+
+hint.content = 1
+
+def important(*args):
+ return make_admonition(nodes.important, *args)
+
+important.content = 1
+
+def note(*args):
+ return make_admonition(nodes.note, *args)
+
+note.content = 1
+
+def tip(*args):
+ return make_admonition(nodes.tip, *args)
+
+tip.content = 1
+
+def warning(*args):
+ return make_admonition(nodes.warning, *args)
+
+warning.content = 1
diff --git a/docutils/parsers/rst/directives/body.py b/docutils/parsers/rst/directives/body.py
new file mode 100644
index 000000000..2ff89e617
--- /dev/null
+++ b/docutils/parsers/rst/directives/body.py
@@ -0,0 +1,196 @@
+# Author: David Goodger
+# Contact: goodger@python.org
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+Directives for additional body elements.
+
+See `docutils.parsers.rst.directives` for API details.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+import sys
+from docutils import nodes
+from docutils.parsers.rst import directives
+from docutils.parsers.rst.roles import set_classes
+
+
+def topic(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine,
+ node_class=nodes.topic):
+ if not (state_machine.match_titles
+ or isinstance(state_machine.node, nodes.sidebar)):
+ error = state_machine.reporter.error(
+ 'The "%s" directive may not be used within topics '
+ 'or body elements.' % name,
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ if not content:
+ warning = state_machine.reporter.warning(
+ 'Content block expected for the "%s" directive; none found.'
+ % name, nodes.literal_block(block_text, block_text),
+ line=lineno)
+ return [warning]
+ title_text = arguments[0]
+ textnodes, messages = state.inline_text(title_text, lineno)
+ titles = [nodes.title(title_text, '', *textnodes)]
+ # sidebar uses this code
+ if options.has_key('subtitle'):
+ textnodes, more_messages = state.inline_text(options['subtitle'],
+ lineno)
+ titles.append(nodes.subtitle(options['subtitle'], '', *textnodes))
+ messages.extend(more_messages)
+ text = '\n'.join(content)
+ node = node_class(text, *(titles + messages))
+ node['classes'] += options.get('class', [])
+ if text:
+ state.nested_parse(content, content_offset, node)
+ return [node]
+
+topic.arguments = (1, 0, 1)
+topic.options = {'class': directives.class_option}
+topic.content = 1
+
+def sidebar(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ if isinstance(state_machine.node, nodes.sidebar):
+ error = state_machine.reporter.error(
+ 'The "%s" directive may not be used within a sidebar element.'
+ % name, nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ return topic(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine,
+ node_class=nodes.sidebar)
+
+sidebar.arguments = (1, 0, 1)
+sidebar.options = {'subtitle': directives.unchanged_required,
+ 'class': directives.class_option}
+sidebar.content = 1
+
+def line_block(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ if not content:
+ warning = state_machine.reporter.warning(
+ 'Content block expected for the "%s" directive; none found.'
+ % name, nodes.literal_block(block_text, block_text), line=lineno)
+ return [warning]
+ block = nodes.line_block(classes=options.get('class', []))
+ node_list = [block]
+ for line_text in content:
+ text_nodes, messages = state.inline_text(line_text.strip(),
+ lineno + content_offset)
+ line = nodes.line(line_text, '', *text_nodes)
+ if line_text.strip():
+ line.indent = len(line_text) - len(line_text.lstrip())
+ block += line
+ node_list.extend(messages)
+ content_offset += 1
+ state.nest_line_block_lines(block)
+ return node_list
+
+line_block.options = {'class': directives.class_option}
+line_block.content = 1
+
+def parsed_literal(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ set_classes(options)
+ return block(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine,
+ node_class=nodes.literal_block)
+
+parsed_literal.options = {'class': directives.class_option}
+parsed_literal.content = 1
+
+def block(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine, node_class):
+ if not content:
+ warning = state_machine.reporter.warning(
+ 'Content block expected for the "%s" directive; none found.'
+ % name, nodes.literal_block(block_text, block_text), line=lineno)
+ return [warning]
+ text = '\n'.join(content)
+ text_nodes, messages = state.inline_text(text, lineno)
+ node = node_class(text, '', *text_nodes, **options)
+ node.line = content_offset + 1
+ return [node] + messages
+
+def rubric(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ rubric_text = arguments[0]
+ textnodes, messages = state.inline_text(rubric_text, lineno)
+ rubric = nodes.rubric(rubric_text, '', *textnodes, **options)
+ return [rubric] + messages
+
+rubric.arguments = (1, 0, 1)
+rubric.options = {'class': directives.class_option}
+
+def epigraph(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ block_quote, messages = state.block_quote(content, content_offset)
+ block_quote['classes'].append('epigraph')
+ return [block_quote] + messages
+
+epigraph.content = 1
+
+def highlights(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ block_quote, messages = state.block_quote(content, content_offset)
+ block_quote['classes'].append('highlights')
+ return [block_quote] + messages
+
+highlights.content = 1
+
+def pull_quote(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ block_quote, messages = state.block_quote(content, content_offset)
+ block_quote['classes'].append('pull-quote')
+ return [block_quote] + messages
+
+pull_quote.content = 1
+
+def compound(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ text = '\n'.join(content)
+ if not text:
+ error = state_machine.reporter.error(
+ 'The "%s" directive is empty; content required.' % name,
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ node = nodes.compound(text)
+ node['classes'] += options.get('class', [])
+ state.nested_parse(content, content_offset, node)
+ return [node]
+
+compound.options = {'class': directives.class_option}
+compound.content = 1
+
+def container(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ text = '\n'.join(content)
+ if not text:
+ error = state_machine.reporter.error(
+ 'The "%s" directive is empty; content required.' % name,
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ try:
+ if arguments:
+ classes = directives.class_option(arguments[0])
+ else:
+ classes = []
+ except ValueError:
+ error = state_machine.reporter.error(
+ 'Invalid class attribute value for "%s" directive: "%s".'
+ % (name, arguments[0]),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ node = nodes.container(text)
+ node['classes'].extend(classes)
+ state.nested_parse(content, content_offset, node)
+ return [node]
+
+container.arguments = (0, 1, 1)
+container.content = 1
diff --git a/docutils/parsers/rst/directives/html.py b/docutils/parsers/rst/directives/html.py
new file mode 100644
index 000000000..86e19dcfc
--- /dev/null
+++ b/docutils/parsers/rst/directives/html.py
@@ -0,0 +1,96 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+Directives for typically HTML-specific constructs.
+"""
+
+__docformat__ = 'reStructuredText'
+
+import sys
+from docutils import nodes, utils
+from docutils.parsers.rst import states
+from docutils.transforms import components
+
+
+def meta(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ node = nodes.Element()
+ if content:
+ new_line_offset, blank_finish = state.nested_list_parse(
+ content, content_offset, node, initial_state='MetaBody',
+ blank_finish=1, state_machine_kwargs=metaSMkwargs)
+ if (new_line_offset - content_offset) != len(content):
+ # incomplete parse of block?
+ error = state_machine.reporter.error(
+ 'Invalid meta directive.',
+ nodes.literal_block(block_text, block_text), line=lineno)
+ node += error
+ else:
+ error = state_machine.reporter.error(
+ 'Empty meta directive.',
+ nodes.literal_block(block_text, block_text), line=lineno)
+ node += error
+ return node.children
+
+meta.content = 1
+
+def imagemap(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ return []
+
+
+class MetaBody(states.SpecializedBody):
+
+ class meta(nodes.Special, nodes.PreBibliographic, nodes.Element):
+ """HTML-specific "meta" element."""
+ pass
+
+ def field_marker(self, match, context, next_state):
+ """Meta element."""
+ node, blank_finish = self.parsemeta(match)
+ self.parent += node
+ return [], next_state, []
+
+ def parsemeta(self, match):
+ name = self.parse_field_marker(match)
+ indented, indent, line_offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end())
+ node = self.meta()
+ pending = nodes.pending(components.Filter,
+ {'component': 'writer',
+ 'format': 'html',
+ 'nodes': [node]})
+ node['content'] = ' '.join(indented)
+ if not indented:
+ line = self.state_machine.line
+ msg = self.reporter.info(
+ 'No content for meta tag "%s".' % name,
+ nodes.literal_block(line, line),
+ line=self.state_machine.abs_line_number())
+ return msg, blank_finish
+ tokens = name.split()
+ try:
+ attname, val = utils.extract_name_value(tokens[0])[0]
+ node[attname.lower()] = val
+ except utils.NameValueError:
+ node['name'] = tokens[0]
+ for token in tokens[1:]:
+ try:
+ attname, val = utils.extract_name_value(token)[0]
+ node[attname.lower()] = val
+ except utils.NameValueError, detail:
+ line = self.state_machine.line
+ msg = self.reporter.error(
+ 'Error parsing meta tag attribute "%s": %s.'
+ % (token, detail), nodes.literal_block(line, line),
+ line=self.state_machine.abs_line_number())
+ return msg, blank_finish
+ self.document.note_pending(pending)
+ return pending, blank_finish
+
+
+metaSMkwargs = {'state_classes': (MetaBody,)}
diff --git a/docutils/parsers/rst/directives/images.py b/docutils/parsers/rst/directives/images.py
new file mode 100644
index 000000000..5aed4c01b
--- /dev/null
+++ b/docutils/parsers/rst/directives/images.py
@@ -0,0 +1,152 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+Directives for figures and simple images.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+import sys
+from docutils import nodes, utils
+from docutils.parsers.rst import directives, states
+from docutils.nodes import fully_normalize_name, whitespace_normalize_name
+from docutils.parsers.rst.roles import set_classes
+
+try:
+ import Image # PIL
+except ImportError:
+ Image = None
+
+align_h_values = ('left', 'center', 'right')
+align_v_values = ('top', 'middle', 'bottom')
+align_values = align_v_values + align_h_values
+
+def align(argument):
+ return directives.choice(argument, align_values)
+
+def image(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ if options.has_key('align'):
+ # check for align_v values only
+ if isinstance(state, states.SubstitutionDef):
+ if options['align'] not in align_v_values:
+ error = state_machine.reporter.error(
+ 'Error in "%s" directive: "%s" is not a valid value for '
+ 'the "align" option within a substitution definition. '
+ 'Valid values for "align" are: "%s".'
+ % (name, options['align'], '", "'.join(align_v_values)),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ elif options['align'] not in align_h_values:
+ error = state_machine.reporter.error(
+ 'Error in "%s" directive: "%s" is not a valid value for '
+ 'the "align" option. Valid values for "align" are: "%s".'
+ % (name, options['align'], '", "'.join(align_h_values)),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ messages = []
+ reference = directives.uri(arguments[0])
+ options['uri'] = reference
+ reference_node = None
+ if options.has_key('target'):
+ block = states.escape2null(options['target']).splitlines()
+ block = [line for line in block]
+ target_type, data = state.parse_target(block, block_text, lineno)
+ if target_type == 'refuri':
+ reference_node = nodes.reference(refuri=data)
+ elif target_type == 'refname':
+ reference_node = nodes.reference(
+ refname=fully_normalize_name(data),
+ name=whitespace_normalize_name(data))
+ reference_node.indirect_reference_name = data
+ state.document.note_refname(reference_node)
+ else: # malformed target
+ messages.append(data) # data is a system message
+ del options['target']
+ set_classes(options)
+ image_node = nodes.image(block_text, **options)
+ if reference_node:
+ reference_node += image_node
+ return messages + [reference_node]
+ else:
+ return messages + [image_node]
+
+image.arguments = (1, 0, 1)
+image.options = {'alt': directives.unchanged,
+ 'height': directives.length_or_unitless,
+ 'width': directives.length_or_percentage_or_unitless,
+ 'scale': directives.nonnegative_int,
+ 'align': align,
+ 'target': directives.unchanged_required,
+ 'class': directives.class_option}
+
+def figure_align(argument):
+ return directives.choice(argument, align_h_values)
+
+def figure(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ figwidth = options.get('figwidth')
+ if figwidth:
+ del options['figwidth']
+ figclasses = options.get('figclass')
+ if figclasses:
+ del options['figclass']
+ align = options.get('align')
+ if align:
+ del options['align']
+ (image_node,) = image(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine)
+ if isinstance(image_node, nodes.system_message):
+ return [image_node]
+ figure_node = nodes.figure('', image_node)
+ if figwidth == 'image':
+ if Image and state.document.settings.file_insertion_enabled:
+ # PIL doesn't like Unicode paths:
+ try:
+ i = Image.open(str(image_node['uri']))
+ except (IOError, UnicodeError):
+ pass
+ else:
+ state.document.settings.record_dependencies.add(image_node['uri'])
+ figure_node['width'] = i.size[0]
+ elif figwidth is not None:
+ figure_node['width'] = figwidth
+ if figclasses:
+ figure_node['classes'] += figclasses
+ if align:
+ figure_node['align'] = align
+ if content:
+ node = nodes.Element() # anonymous container for parsing
+ state.nested_parse(content, content_offset, node)
+ first_node = node[0]
+ if isinstance(first_node, nodes.paragraph):
+ caption = nodes.caption(first_node.rawsource, '',
+ *first_node.children)
+ figure_node += caption
+ elif not (isinstance(first_node, nodes.comment)
+ and len(first_node) == 0):
+ error = state_machine.reporter.error(
+ 'Figure caption must be a paragraph or empty comment.',
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [figure_node, error]
+ if len(node) > 1:
+ figure_node += nodes.legend('', *node[1:])
+ return [figure_node]
+
+def figwidth_value(argument):
+ if argument.lower() == 'image':
+ return 'image'
+ else:
+ return directives.nonnegative_int(argument)
+
+figure.arguments = (1, 0, 1)
+figure.options = {'figwidth': figwidth_value,
+ 'figclass': directives.class_option}
+figure.options.update(image.options)
+figure.options['align'] = figure_align
+figure.content = 1
diff --git a/docutils/parsers/rst/directives/misc.py b/docutils/parsers/rst/directives/misc.py
new file mode 100644
index 000000000..42f642fee
--- /dev/null
+++ b/docutils/parsers/rst/directives/misc.py
@@ -0,0 +1,408 @@
+# Authors: David Goodger, Dethe Elza
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""Miscellaneous directives."""
+
+__docformat__ = 'reStructuredText'
+
+import sys
+import os.path
+import re
+import time
+from docutils import io, nodes, statemachine, utils
+from docutils.parsers.rst import directives, roles, states
+from docutils.transforms import misc
+
+try:
+ import urllib2
+except ImportError:
+ urllib2 = None
+
+
+standard_include_path = os.path.join(os.path.dirname(states.__file__),
+ 'include')
+
+def include(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ """Include a reST file as part of the content of this reST file."""
+ if not state.document.settings.file_insertion_enabled:
+ warning = state_machine.reporter.warning(
+ '"%s" directive disabled.' % name,
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [warning]
+ source = state_machine.input_lines.source(
+ lineno - state_machine.input_offset - 1)
+ source_dir = os.path.dirname(os.path.abspath(source))
+ path = directives.path(arguments[0])
+ if path.startswith('<') and path.endswith('>'):
+ path = os.path.join(standard_include_path, path[1:-1])
+ path = os.path.normpath(os.path.join(source_dir, path))
+ path = utils.relative_path(None, path)
+ encoding = options.get('encoding', state.document.settings.input_encoding)
+ try:
+ state.document.settings.record_dependencies.add(path)
+ include_file = io.FileInput(
+ source_path=path, encoding=encoding,
+ error_handler=state.document.settings.input_encoding_error_handler,
+ handle_io_errors=None)
+ except IOError, error:
+ severe = state_machine.reporter.severe(
+ 'Problems with "%s" directive path:\n%s: %s.'
+ % (name, error.__class__.__name__, error),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [severe]
+ try:
+ include_text = include_file.read()
+ except UnicodeError, error:
+ severe = state_machine.reporter.severe(
+ 'Problem with "%s" directive:\n%s: %s'
+ % (name, error.__class__.__name__, error),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [severe]
+ if options.has_key('literal'):
+ literal_block = nodes.literal_block(include_text, include_text,
+ source=path)
+ literal_block.line = 1
+ return literal_block
+ else:
+ include_lines = statemachine.string2lines(include_text,
+ convert_whitespace=1)
+ state_machine.insert_input(include_lines, path)
+ return []
+
+include.arguments = (1, 0, 1)
+include.options = {'literal': directives.flag,
+ 'encoding': directives.encoding}
+
+def raw(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ """
+ Pass through content unchanged
+
+ Content is included in output based on type argument
+
+ Content may be included inline (content section of directive) or
+ imported from a file or url.
+ """
+ if ( not state.document.settings.raw_enabled
+ or (not state.document.settings.file_insertion_enabled
+ and (options.has_key('file') or options.has_key('url'))) ):
+ warning = state_machine.reporter.warning(
+ '"%s" directive disabled.' % name,
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [warning]
+ attributes = {'format': ' '.join(arguments[0].lower().split())}
+ encoding = options.get('encoding', state.document.settings.input_encoding)
+ if content:
+ if options.has_key('file') or options.has_key('url'):
+ error = state_machine.reporter.error(
+ '"%s" directive may not both specify an external file and '
+ 'have content.' % name,
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ text = '\n'.join(content)
+ elif options.has_key('file'):
+ if options.has_key('url'):
+ error = state_machine.reporter.error(
+ 'The "file" and "url" options may not be simultaneously '
+ 'specified for the "%s" directive.' % name,
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ source_dir = os.path.dirname(
+ os.path.abspath(state.document.current_source))
+ path = os.path.normpath(os.path.join(source_dir, options['file']))
+ path = utils.relative_path(None, path)
+ try:
+ state.document.settings.record_dependencies.add(path)
+ raw_file = io.FileInput(
+ source_path=path, encoding=encoding,
+ error_handler=state.document.settings.input_encoding_error_handler,
+ handle_io_errors=None)
+ except IOError, error:
+ severe = state_machine.reporter.severe(
+ 'Problems with "%s" directive path:\n%s.' % (name, error),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [severe]
+ try:
+ text = raw_file.read()
+ except UnicodeError, error:
+ severe = state_machine.reporter.severe(
+ 'Problem with "%s" directive:\n%s: %s'
+ % (name, error.__class__.__name__, error),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [severe]
+ attributes['source'] = path
+ elif options.has_key('url'):
+ if not urllib2:
+ severe = state_machine.reporter.severe(
+ 'Problems with the "%s" directive and its "url" option: '
+ 'unable to access the required functionality (from the '
+ '"urllib2" module).' % name,
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [severe]
+ source = options['url']
+ try:
+ raw_text = urllib2.urlopen(source).read()
+ except (urllib2.URLError, IOError, OSError), error:
+ severe = state_machine.reporter.severe(
+ 'Problems with "%s" directive URL "%s":\n%s.'
+ % (name, options['url'], error),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [severe]
+ raw_file = io.StringInput(
+ source=raw_text, source_path=source, encoding=encoding,
+ error_handler=state.document.settings.input_encoding_error_handler)
+ try:
+ text = raw_file.read()
+ except UnicodeError, error:
+ severe = state_machine.reporter.severe(
+ 'Problem with "%s" directive:\n%s: %s'
+ % (name, error.__class__.__name__, error),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [severe]
+ attributes['source'] = source
+ else:
+ error = state_machine.reporter.warning(
+ 'The "%s" directive requires content; none supplied.' % (name),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ raw_node = nodes.raw('', text, **attributes)
+ return [raw_node]
+
+raw.arguments = (1, 0, 1)
+raw.options = {'file': directives.path,
+ 'url': directives.uri,
+ 'encoding': directives.encoding}
+raw.content = 1
+
+def replace(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ if not isinstance(state, states.SubstitutionDef):
+ error = state_machine.reporter.error(
+ 'Invalid context: the "%s" directive can only be used within a '
+ 'substitution definition.' % (name),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ text = '\n'.join(content)
+ element = nodes.Element(text)
+ if text:
+ state.nested_parse(content, content_offset, element)
+ if len(element) != 1 or not isinstance(element[0], nodes.paragraph):
+ messages = []
+ for node in element:
+ if isinstance(node, nodes.system_message):
+ node['backrefs'] = []
+ messages.append(node)
+ error = state_machine.reporter.error(
+ 'Error in "%s" directive: may contain a single paragraph '
+ 'only.' % (name), line=lineno)
+ messages.append(error)
+ return messages
+ else:
+ return element[0].children
+ else:
+ error = state_machine.reporter.error(
+ 'The "%s" directive is empty; content required.' % (name),
+ line=lineno)
+ return [error]
+
+replace.content = 1
+
+def unicode_directive(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ r"""
+ Convert Unicode character codes (numbers) to characters. Codes may be
+ decimal numbers, hexadecimal numbers (prefixed by ``0x``, ``x``, ``\x``,
+ ``U+``, ``u``, or ``\u``; e.g. ``U+262E``), or XML-style numeric character
+ entities (e.g. ``&#x262E;``). Text following ".." is a comment and is
+ ignored. Spaces are ignored, and any other text remains as-is.
+ """
+ if not isinstance(state, states.SubstitutionDef):
+ error = state_machine.reporter.error(
+ 'Invalid context: the "%s" directive can only be used within a '
+ 'substitution definition.' % (name),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ substitution_definition = state_machine.node
+ if options.has_key('trim'):
+ substitution_definition.attributes['ltrim'] = 1
+ substitution_definition.attributes['rtrim'] = 1
+ if options.has_key('ltrim'):
+ substitution_definition.attributes['ltrim'] = 1
+ if options.has_key('rtrim'):
+ substitution_definition.attributes['rtrim'] = 1
+ codes = unicode_comment_pattern.split(arguments[0])[0].split()
+ element = nodes.Element()
+ for code in codes:
+ try:
+ decoded = directives.unicode_code(code)
+ except ValueError, err:
+ error = state_machine.reporter.error(
+ 'Invalid character code: %s\n%s: %s'
+ % (code, err.__class__.__name__, err),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ element += nodes.Text(decoded)
+ return element.children
+
+unicode_directive.arguments = (1, 0, 1)
+unicode_directive.options = {'trim': directives.flag,
+ 'ltrim': directives.flag,
+ 'rtrim': directives.flag}
+unicode_comment_pattern = re.compile(r'( |\n|^)\.\. ')
+
+def class_directive(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ """
+ Set a "class" attribute on the directive content or the next element.
+ When applied to the next element, a "pending" element is inserted, and a
+ transform does the work later.
+ """
+ try:
+ class_value = directives.class_option(arguments[0])
+ except ValueError:
+ error = state_machine.reporter.error(
+ 'Invalid class attribute value for "%s" directive: "%s".'
+ % (name, arguments[0]),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ node_list = []
+ if content:
+ container = nodes.Element()
+ state.nested_parse(content, content_offset, container)
+ for node in container:
+ node['classes'].extend(class_value)
+ node_list.extend(container.children)
+ else:
+ pending = nodes.pending(misc.ClassAttribute,
+ {'class': class_value, 'directive': name},
+ block_text)
+ state_machine.document.note_pending(pending)
+ node_list.append(pending)
+ return node_list
+
+class_directive.arguments = (1, 0, 1)
+class_directive.content = 1
+
+role_arg_pat = re.compile(r'(%s)\s*(\(\s*(%s)\s*\)\s*)?$'
+ % ((states.Inliner.simplename,) * 2))
+def role(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ """Dynamically create and register a custom interpreted text role."""
+ if content_offset > lineno or not content:
+ error = state_machine.reporter.error(
+ '"%s" directive requires arguments on the first line.'
+ % name, nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ args = content[0]
+ match = role_arg_pat.match(args)
+ if not match:
+ error = state_machine.reporter.error(
+ '"%s" directive arguments not valid role names: "%s".'
+ % (name, args), nodes.literal_block(block_text, block_text),
+ line=lineno)
+ return [error]
+ new_role_name = match.group(1)
+ base_role_name = match.group(3)
+ messages = []
+ if base_role_name:
+ base_role, messages = roles.role(
+ base_role_name, state_machine.language, lineno, state.reporter)
+ if base_role is None:
+ error = state.reporter.error(
+ 'Unknown interpreted text role "%s".' % base_role_name,
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return messages + [error]
+ else:
+ base_role = roles.generic_custom_role
+ assert not hasattr(base_role, 'arguments'), (
+ 'Supplemental directive arguments for "%s" directive not supported'
+ '(specified by "%r" role).' % (name, base_role))
+ try:
+ (arguments, options, content, content_offset) = (
+ state.parse_directive_block(content[1:], content_offset, base_role,
+ option_presets={}))
+ except states.MarkupError, detail:
+ error = state_machine.reporter.error(
+ 'Error in "%s" directive:\n%s.' % (name, detail),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return messages + [error]
+ if not options.has_key('class'):
+ try:
+ options['class'] = directives.class_option(new_role_name)
+ except ValueError, detail:
+ error = state_machine.reporter.error(
+ 'Invalid argument for "%s" directive:\n%s.'
+ % (name, detail),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return messages + [error]
+ role = roles.CustomRole(new_role_name, base_role, options, content)
+ roles.register_local_role(new_role_name, role)
+ return messages
+
+role.content = 1
+
+def default_role(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ """Set the default interpreted text role."""
+ if not arguments:
+ if roles._roles.has_key(''):
+ # restore the "default" default role
+ del roles._roles['']
+ return []
+ role_name = arguments[0]
+ role, messages = roles.role(
+ role_name, state_machine.language, lineno, state.reporter)
+ if role is None:
+ error = state.reporter.error(
+ 'Unknown interpreted text role "%s".' % role_name,
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return messages + [error]
+ roles._roles[''] = role
+ # @@@ should this be local to the document, not the parser?
+ return messages
+
+default_role.arguments = (0, 1, 0)
+
+def title(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ state_machine.document['title'] = arguments[0]
+ return []
+
+title.arguments = (1, 0, 1)
+
+def date(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ if not isinstance(state, states.SubstitutionDef):
+ error = state_machine.reporter.error(
+ 'Invalid context: the "%s" directive can only be used within a '
+ 'substitution definition.' % (name),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ format = '\n'.join(content) or '%Y-%m-%d'
+ text = time.strftime(format)
+ return [nodes.Text(text)]
+
+date.content = 1
+
+def directive_test_function(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ """This directive is useful only for testing purposes."""
+ if content:
+ text = '\n'.join(content)
+ info = state_machine.reporter.info(
+ 'Directive processed. Type="%s", arguments=%r, options=%r, '
+ 'content:' % (name, arguments, options),
+ nodes.literal_block(text, text), line=lineno)
+ else:
+ info = state_machine.reporter.info(
+ 'Directive processed. Type="%s", arguments=%r, options=%r, '
+ 'content: None' % (name, arguments, options), line=lineno)
+ return [info]
+
+directive_test_function.arguments = (0, 1, 1)
+directive_test_function.options = {'option': directives.unchanged_required}
+directive_test_function.content = 1
diff --git a/docutils/parsers/rst/directives/parts.py b/docutils/parsers/rst/directives/parts.py
new file mode 100644
index 000000000..2a1a092a4
--- /dev/null
+++ b/docutils/parsers/rst/directives/parts.py
@@ -0,0 +1,126 @@
+# Author: David Goodger, Dmitry Jemerov
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+Directives for document parts.
+"""
+
+__docformat__ = 'reStructuredText'
+
+from docutils import nodes, languages
+from docutils.transforms import parts
+from docutils.parsers.rst import directives
+
+
+backlinks_values = ('top', 'entry', 'none')
+
+def backlinks(arg):
+ value = directives.choice(arg, backlinks_values)
+ if value == 'none':
+ return None
+ else:
+ return value
+
+def contents(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ """
+ Table of contents.
+
+ The table of contents is generated in two passes: initial parse and
+ transform. During the initial parse, a 'pending' element is generated
+ which acts as a placeholder, storing the TOC title and any options
+ internally. At a later stage in the processing, the 'pending' element is
+ replaced by a 'topic' element, a title and the table of contents proper.
+ """
+ if not (state_machine.match_titles
+ or isinstance(state_machine.node, nodes.sidebar)):
+ error = state_machine.reporter.error(
+ 'The "%s" directive may not be used within topics '
+ 'or body elements.' % name,
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ document = state_machine.document
+ language = languages.get_language(document.settings.language_code)
+ if arguments:
+ title_text = arguments[0]
+ text_nodes, messages = state.inline_text(title_text, lineno)
+ title = nodes.title(title_text, '', *text_nodes)
+ else:
+ messages = []
+ if options.has_key('local'):
+ title = None
+ else:
+ title = nodes.title('', language.labels['contents'])
+ topic = nodes.topic(classes=['contents'])
+ topic['classes'] += options.get('class', [])
+ if options.has_key('local'):
+ topic['classes'].append('local')
+ if title:
+ name = title.astext()
+ topic += title
+ else:
+ name = language.labels['contents']
+ name = nodes.fully_normalize_name(name)
+ if not document.has_name(name):
+ topic['names'].append(name)
+ document.note_implicit_target(topic)
+ pending = nodes.pending(parts.Contents, rawsource=block_text)
+ pending.details.update(options)
+ document.note_pending(pending)
+ topic += pending
+ return [topic] + messages
+
+contents.arguments = (0, 1, 1)
+contents.options = {'depth': directives.nonnegative_int,
+ 'local': directives.flag,
+ 'backlinks': backlinks,
+ 'class': directives.class_option}
+
+def sectnum(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ """Automatic section numbering."""
+ pending = nodes.pending(parts.SectNum)
+ pending.details.update(options)
+ state_machine.document.note_pending(pending)
+ return [pending]
+
+sectnum.options = {'depth': int,
+ 'start': int,
+ 'prefix': directives.unchanged_required,
+ 'suffix': directives.unchanged_required}
+
+def header_footer(node, name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ """Contents of document header or footer."""
+ if not content:
+ warning = state_machine.reporter.warning(
+ 'Content block expected for the "%s" directive; none found.'
+ % name, nodes.literal_block(block_text, block_text),
+ line=lineno)
+ node.append(nodes.paragraph(
+ '', 'Problem with the "%s" directive: no content supplied.' % name))
+ return [warning]
+ text = '\n'.join(content)
+ state.nested_parse(content, content_offset, node)
+ return []
+
+def header(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ decoration = state_machine.document.get_decoration()
+ node = decoration.get_header()
+ return header_footer(node, name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine)
+
+header.content = 1
+
+def footer(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ decoration = state_machine.document.get_decoration()
+ node = decoration.get_footer()
+ return header_footer(node, name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine)
+
+footer.content = 1
diff --git a/docutils/parsers/rst/directives/references.py b/docutils/parsers/rst/directives/references.py
new file mode 100644
index 000000000..0406182b6
--- /dev/null
+++ b/docutils/parsers/rst/directives/references.py
@@ -0,0 +1,27 @@
+# Author: David Goodger, Dmitry Jemerov
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+Directives for references and targets.
+"""
+
+__docformat__ = 'reStructuredText'
+
+from docutils import nodes
+from docutils.transforms import references
+from docutils.parsers.rst import directives
+
+
+def target_notes(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ """Target footnote generation."""
+ pending = nodes.pending(references.TargetNotes)
+ pending.details.update(options)
+ state_machine.document.note_pending(pending)
+ nodelist = [pending]
+ return nodelist
+
+target_notes.options = {'class': directives.class_option}
diff --git a/docutils/parsers/rst/directives/tables.py b/docutils/parsers/rst/directives/tables.py
new file mode 100644
index 000000000..70a0de5ab
--- /dev/null
+++ b/docutils/parsers/rst/directives/tables.py
@@ -0,0 +1,444 @@
+# Authors: David Goodger, David Priest
+# Contact: goodger@python.org
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+Directives for table elements.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+import sys
+import os.path
+from docutils import io, nodes, statemachine, utils
+from docutils.utils import SystemMessagePropagation
+from docutils.parsers.rst import directives
+
+try:
+ import csv # new in Python 2.3
+except ImportError:
+ csv = None
+
+try:
+ import urllib2
+except ImportError:
+ urllib2 = None
+
+try:
+ True
+except NameError: # Python 2.2 & 2.1 compatibility
+ True = not 0
+ False = not 1
+
+
+def table(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ if not content:
+ warning = state_machine.reporter.warning(
+ 'Content block expected for the "%s" directive; none found.'
+ % name, nodes.literal_block(block_text, block_text),
+ line=lineno)
+ return [warning]
+ title, messages = make_title(arguments, state, lineno)
+ node = nodes.Element() # anonymous container for parsing
+ state.nested_parse(content, content_offset, node)
+ if len(node) != 1 or not isinstance(node[0], nodes.table):
+ error = state_machine.reporter.error(
+ 'Error parsing content block for the "%s" directive: '
+ 'exactly one table expected.'
+ % name, nodes.literal_block(block_text, block_text),
+ line=lineno)
+ return [error]
+ table_node = node[0]
+ table_node['classes'] += options.get('class', [])
+ if title:
+ table_node.insert(0, title)
+ return [table_node] + messages
+
+table.arguments = (0, 1, 1)
+table.options = {'class': directives.class_option}
+table.content = 1
+
+def make_title(arguments, state, lineno):
+ if arguments:
+ title_text = arguments[0]
+ text_nodes, messages = state.inline_text(title_text, lineno)
+ title = nodes.title(title_text, '', *text_nodes)
+ else:
+ title = None
+ messages = []
+ return title, messages
+
+
+if csv:
+ class DocutilsDialect(csv.Dialect):
+
+ """CSV dialect for `csv_table` directive function."""
+
+ delimiter = ','
+ quotechar = '"'
+ doublequote = True
+ skipinitialspace = True
+ lineterminator = '\n'
+ quoting = csv.QUOTE_MINIMAL
+
+ def __init__(self, options):
+ if options.has_key('delim'):
+ self.delimiter = str(options['delim'])
+ if options.has_key('keepspace'):
+ self.skipinitialspace = False
+ if options.has_key('quote'):
+ self.quotechar = str(options['quote'])
+ if options.has_key('escape'):
+ self.doublequote = False
+ self.escapechar = str(options['escape'])
+ csv.Dialect.__init__(self)
+
+
+ class HeaderDialect(csv.Dialect):
+
+ """CSV dialect to use for the "header" option data."""
+
+ delimiter = ','
+ quotechar = '"'
+ escapechar = '\\'
+ doublequote = False
+ skipinitialspace = True
+ lineterminator = '\n'
+ quoting = csv.QUOTE_MINIMAL
+
+
+def csv_table(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ try:
+ if ( not state.document.settings.file_insertion_enabled
+ and (options.has_key('file') or options.has_key('url')) ):
+ warning = state_machine.reporter.warning(
+ 'File and URL access deactivated; ignoring "%s" directive.' %
+ name, nodes.literal_block(block_text,block_text), line=lineno)
+ return [warning]
+ check_requirements(name, lineno, block_text, state_machine)
+ title, messages = make_title(arguments, state, lineno)
+ csv_data, source = get_csv_data(
+ name, options, content, lineno, block_text, state, state_machine)
+ table_head, max_header_cols = process_header_option(
+ options, state_machine, lineno)
+ rows, max_cols = parse_csv_data_into_rows(
+ csv_data, DocutilsDialect(options), source, options)
+ max_cols = max(max_cols, max_header_cols)
+ header_rows = options.get('header-rows', 0) # default 0
+ stub_columns = options.get('stub-columns', 0) # default 0
+ check_table_dimensions(
+ rows, header_rows, stub_columns, name, lineno,
+ block_text, state_machine)
+ table_head.extend(rows[:header_rows])
+ table_body = rows[header_rows:]
+ col_widths = get_column_widths(
+ max_cols, name, options, lineno, block_text, state_machine)
+ extend_short_rows_with_empty_cells(max_cols, (table_head, table_body))
+ except SystemMessagePropagation, detail:
+ return [detail.args[0]]
+ except csv.Error, detail:
+ error = state_machine.reporter.error(
+ 'Error with CSV data in "%s" directive:\n%s' % (name, detail),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ table = (col_widths, table_head, table_body)
+ table_node = state.build_table(table, content_offset, stub_columns)
+ table_node['classes'] += options.get('class', [])
+ if title:
+ table_node.insert(0, title)
+ return [table_node] + messages
+
+csv_table.arguments = (0, 1, 1)
+csv_table.options = {'header-rows': directives.nonnegative_int,
+ 'stub-columns': directives.nonnegative_int,
+ 'header': directives.unchanged,
+ 'widths': directives.positive_int_list,
+ 'file': directives.path,
+ 'url': directives.uri,
+ 'encoding': directives.encoding,
+ 'class': directives.class_option,
+ # field delimiter char
+ 'delim': directives.single_char_or_whitespace_or_unicode,
+ # treat whitespace after delimiter as significant
+ 'keepspace': directives.flag,
+ # text field quote/unquote char:
+ 'quote': directives.single_char_or_unicode,
+ # char used to escape delim & quote as-needed:
+ 'escape': directives.single_char_or_unicode,}
+csv_table.content = 1
+
+def check_requirements(name, lineno, block_text, state_machine):
+ if not csv:
+ error = state_machine.reporter.error(
+ 'The "%s" directive is not compatible with this version of '
+ 'Python (%s). Requires the "csv" module, new in Python 2.3.'
+ % (name, sys.version.split()[0]),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(error)
+
+def get_csv_data(name, options, content, lineno, block_text,
+ state, state_machine):
+ """
+ CSV data can come from the directive content, from an external file, or
+ from a URL reference.
+ """
+ encoding = options.get('encoding', state.document.settings.input_encoding)
+ if content: # CSV data is from directive content
+ if options.has_key('file') or options.has_key('url'):
+ error = state_machine.reporter.error(
+ '"%s" directive may not both specify an external file and '
+ 'have content.' % name,
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(error)
+ source = content.source(0)
+ csv_data = content
+ elif options.has_key('file'): # CSV data is from an external file
+ if options.has_key('url'):
+ error = state_machine.reporter.error(
+ 'The "file" and "url" options may not be simultaneously '
+ 'specified for the "%s" directive.' % name,
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(error)
+ source_dir = os.path.dirname(
+ os.path.abspath(state.document.current_source))
+ source = os.path.normpath(os.path.join(source_dir, options['file']))
+ source = utils.relative_path(None, source)
+ try:
+ state.document.settings.record_dependencies.add(source)
+ csv_file = io.FileInput(
+ source_path=source, encoding=encoding,
+ error_handler
+ =state.document.settings.input_encoding_error_handler,
+ handle_io_errors=None)
+ csv_data = csv_file.read().splitlines()
+ except IOError, error:
+ severe = state_machine.reporter.severe(
+ 'Problems with "%s" directive path:\n%s.' % (name, error),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(severe)
+ elif options.has_key('url'): # CSV data is from a URL
+ if not urllib2:
+ severe = state_machine.reporter.severe(
+ 'Problems with the "%s" directive and its "url" option: '
+ 'unable to access the required functionality (from the '
+ '"urllib2" module).' % name,
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(severe)
+ source = options['url']
+ try:
+ csv_text = urllib2.urlopen(source).read()
+ except (urllib2.URLError, IOError, OSError, ValueError), error:
+ severe = state_machine.reporter.severe(
+ 'Problems with "%s" directive URL "%s":\n%s.'
+ % (name, options['url'], error),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(severe)
+ csv_file = io.StringInput(
+ source=csv_text, source_path=source, encoding=encoding,
+ error_handler=state.document.settings.input_encoding_error_handler)
+ csv_data = csv_file.read().splitlines()
+ else:
+ error = state_machine.reporter.warning(
+ 'The "%s" directive requires content; none supplied.' % (name),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(error)
+ return csv_data, source
+
+def process_header_option(options, state_machine, lineno):
+ source = state_machine.get_source(lineno - 1)
+ table_head = []
+ max_header_cols = 0
+ if options.has_key('header'): # separate table header in option
+ rows, max_header_cols = parse_csv_data_into_rows(
+ options['header'].split('\n'), HeaderDialect(), source, options)
+ table_head.extend(rows)
+ return table_head, max_header_cols
+
+def parse_csv_data_into_rows(csv_data, dialect, source, options):
+ # csv.py doesn't do Unicode; encode temporarily as UTF-8
+ csv_reader = csv.reader([line.encode('utf-8') for line in csv_data],
+ dialect=dialect)
+ rows = []
+ max_cols = 0
+ for row in csv_reader:
+ row_data = []
+ for cell in row:
+ # decode UTF-8 back to Unicode
+ cell_text = unicode(cell, 'utf-8')
+ cell_data = (0, 0, 0, statemachine.StringList(
+ cell_text.splitlines(), source=source))
+ row_data.append(cell_data)
+ rows.append(row_data)
+ max_cols = max(max_cols, len(row))
+ return rows, max_cols
+
+def check_table_dimensions(rows, header_rows, stub_columns, name, lineno,
+ block_text, state_machine):
+ if len(rows) < header_rows:
+ error = state_machine.reporter.error(
+ '%s header row(s) specified but only %s row(s) of data supplied '
+ '("%s" directive).' % (header_rows, len(rows), name),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(error)
+ if len(rows) == header_rows > 0:
+ error = state_machine.reporter.error(
+ 'Insufficient data supplied (%s row(s)); no data remaining for '
+ 'table body, required by "%s" directive.' % (len(rows), name),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(error)
+ for row in rows:
+ if len(row) < stub_columns:
+ error = state_machine.reporter.error(
+ '%s stub column(s) specified but only %s columns(s) of data '
+ 'supplied ("%s" directive).' % (stub_columns, len(row), name),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(error)
+ if len(row) == stub_columns > 0:
+ error = state_machine.reporter.error(
+ 'Insufficient data supplied (%s columns(s)); no data remaining '
+ 'for table body, required by "%s" directive.'
+ % (len(row), name),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(error)
+
+def get_column_widths(max_cols, name, options, lineno, block_text,
+ state_machine):
+ if options.has_key('widths'):
+ col_widths = options['widths']
+ if len(col_widths) != max_cols:
+ error = state_machine.reporter.error(
+ '"%s" widths do not match the number of columns in table (%s).'
+ % (name, max_cols),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(error)
+ elif max_cols:
+ col_widths = [100 / max_cols] * max_cols
+ else:
+ error = state_machine.reporter.error(
+ 'No table data detected in CSV file.',
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(error)
+ return col_widths
+
+def extend_short_rows_with_empty_cells(columns, parts):
+ for part in parts:
+ for row in part:
+ if len(row) < columns:
+ row.extend([(0, 0, 0, [])] * (columns - len(row)))
+
+def list_table(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ """
+ Implement tables whose data is encoded as a uniform two-level bullet list.
+ For further ideas, see
+ http://docutils.sf.net/docs/dev/rst/alternatives.html#list-driven-tables
+ """
+ if not content:
+ error = state_machine.reporter.error(
+ 'The "%s" directive is empty; content required.' % name,
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error]
+ title, messages = make_title(arguments, state, lineno)
+ node = nodes.Element() # anonymous container for parsing
+ state.nested_parse(content, content_offset, node)
+ try:
+ num_cols, col_widths = check_list_content(
+ node, name, options, content, lineno, block_text, state_machine)
+ table_data = [[item.children for item in row_list[0]]
+ for row_list in node[0]]
+ header_rows = options.get('header-rows', 0) # default 0
+ stub_columns = options.get('stub-columns', 0) # default 0
+ check_table_dimensions(
+ table_data, header_rows, stub_columns, name, lineno,
+ block_text, state_machine)
+ except SystemMessagePropagation, detail:
+ return [detail.args[0]]
+ table_node = build_table_from_list(table_data, col_widths,
+ header_rows, stub_columns)
+ table_node['classes'] += options.get('class', [])
+ if title:
+ table_node.insert(0, title)
+ return [table_node] + messages
+
+list_table.arguments = (0, 1, 1)
+list_table.options = {'header-rows': directives.nonnegative_int,
+ 'stub-columns': directives.nonnegative_int,
+ 'widths': directives.positive_int_list,
+ 'class': directives.class_option}
+list_table.content = 1
+
+def check_list_content(node, name, options, content, lineno, block_text,
+ state_machine):
+ if len(node) != 1 or not isinstance(node[0], nodes.bullet_list):
+ error = state_machine.reporter.error(
+ 'Error parsing content block for the "%s" directive: '
+ 'exactly one bullet list expected.' % name,
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(error)
+ list_node = node[0]
+ # Check for a uniform two-level bullet list:
+ for item_index in range(len(list_node)):
+ item = list_node[item_index]
+ if len(item) != 1 or not isinstance(item[0], nodes.bullet_list):
+ error = state_machine.reporter.error(
+ 'Error parsing content block for the "%s" directive: '
+ 'two-level bullet list expected, but row %s does not contain '
+ 'a second-level bullet list.' % (name, item_index + 1),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(error)
+ elif item_index:
+ # ATTN pychecker users: num_cols is guaranteed to be set in the
+ # "else" clause below for item_index==0, before this branch is
+ # triggered.
+ if len(item[0]) != num_cols:
+ error = state_machine.reporter.error(
+ 'Error parsing content block for the "%s" directive: '
+ 'uniform two-level bullet list expected, but row %s does '
+ 'not contain the same number of items as row 1 (%s vs %s).'
+ % (name, item_index + 1, len(item[0]), num_cols),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(error)
+ else:
+ num_cols = len(item[0])
+ col_widths = get_column_widths(
+ num_cols, name, options, lineno, block_text, state_machine)
+ if len(col_widths) != num_cols:
+ error = state_machine.reporter.error(
+ 'Error parsing "widths" option of the "%s" directive: '
+ 'number of columns does not match the table data (%s vs %s).'
+ % (name, len(col_widths), num_cols),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ raise SystemMessagePropagation(error)
+ return num_cols, col_widths
+
+def build_table_from_list(table_data, col_widths, header_rows, stub_columns):
+ table = nodes.table()
+ tgroup = nodes.tgroup(cols=len(col_widths))
+ table += tgroup
+ for col_width in col_widths:
+ colspec = nodes.colspec(colwidth=col_width)
+ if stub_columns:
+ colspec.attributes['stub'] = 1
+ stub_columns -= 1
+ tgroup += colspec
+ rows = []
+ for row in table_data:
+ row_node = nodes.row()
+ for cell in row:
+ entry = nodes.entry()
+ entry += cell
+ row_node += entry
+ rows.append(row_node)
+ if header_rows:
+ thead = nodes.thead()
+ thead.extend(rows[:header_rows])
+ tgroup += thead
+ tbody = nodes.tbody()
+ tbody.extend(rows[header_rows:])
+ tgroup += tbody
+ return table
diff --git a/docutils/parsers/rst/include/README.txt b/docutils/parsers/rst/include/README.txt
new file mode 100644
index 000000000..cd03135f9
--- /dev/null
+++ b/docutils/parsers/rst/include/README.txt
@@ -0,0 +1,17 @@
+============================================
+ ``docutils/parsers/rst/include`` Directory
+============================================
+
+This directory contains standard data files intended for inclusion in
+reStructuredText documents. To access these files, use the "include"
+directive with the special syntax for standard "include" data files,
+angle brackets around the file name::
+
+ .. include:: <isonum.txt>
+
+See the documentation for the `"include" directive`__ and
+`reStructuredText Standard Substitution Definition Sets`__ for
+details.
+
+__ http://docutils.sf.net/docs/ref/rst/directives.html#include
+__ http://docutils.sf.net/docs/ref/rst/substitutions.html
diff --git a/docutils/parsers/rst/include/isoamsa.txt b/docutils/parsers/rst/include/isoamsa.txt
new file mode 100644
index 000000000..e6f451800
--- /dev/null
+++ b/docutils/parsers/rst/include/isoamsa.txt
@@ -0,0 +1,162 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |angzarr| unicode:: U+0237C .. RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW
+.. |cirmid| unicode:: U+02AEF .. VERTICAL LINE WITH CIRCLE ABOVE
+.. |cudarrl| unicode:: U+02938 .. RIGHT-SIDE ARC CLOCKWISE ARROW
+.. |cudarrr| unicode:: U+02935 .. ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS
+.. |cularr| unicode:: U+021B6 .. ANTICLOCKWISE TOP SEMICIRCLE ARROW
+.. |cularrp| unicode:: U+0293D .. TOP ARC ANTICLOCKWISE ARROW WITH PLUS
+.. |curarr| unicode:: U+021B7 .. CLOCKWISE TOP SEMICIRCLE ARROW
+.. |curarrm| unicode:: U+0293C .. TOP ARC CLOCKWISE ARROW WITH MINUS
+.. |Darr| unicode:: U+021A1 .. DOWNWARDS TWO HEADED ARROW
+.. |dArr| unicode:: U+021D3 .. DOWNWARDS DOUBLE ARROW
+.. |darr2| unicode:: U+021CA .. DOWNWARDS PAIRED ARROWS
+.. |ddarr| unicode:: U+021CA .. DOWNWARDS PAIRED ARROWS
+.. |DDotrahd| unicode:: U+02911 .. RIGHTWARDS ARROW WITH DOTTED STEM
+.. |dfisht| unicode:: U+0297F .. DOWN FISH TAIL
+.. |dHar| unicode:: U+02965 .. DOWNWARDS HARPOON WITH BARB LEFT BESIDE DOWNWARDS HARPOON WITH BARB RIGHT
+.. |dharl| unicode:: U+021C3 .. DOWNWARDS HARPOON WITH BARB LEFTWARDS
+.. |dharr| unicode:: U+021C2 .. DOWNWARDS HARPOON WITH BARB RIGHTWARDS
+.. |dlarr| unicode:: U+02199 .. SOUTH WEST ARROW
+.. |drarr| unicode:: U+02198 .. SOUTH EAST ARROW
+.. |duarr| unicode:: U+021F5 .. DOWNWARDS ARROW LEFTWARDS OF UPWARDS ARROW
+.. |duhar| unicode:: U+0296F .. DOWNWARDS HARPOON WITH BARB LEFT BESIDE UPWARDS HARPOON WITH BARB RIGHT
+.. |dzigrarr| unicode:: U+027FF .. LONG RIGHTWARDS SQUIGGLE ARROW
+.. |erarr| unicode:: U+02971 .. EQUALS SIGN ABOVE RIGHTWARDS ARROW
+.. |hArr| unicode:: U+021D4 .. LEFT RIGHT DOUBLE ARROW
+.. |harr| unicode:: U+02194 .. LEFT RIGHT ARROW
+.. |harrcir| unicode:: U+02948 .. LEFT RIGHT ARROW THROUGH SMALL CIRCLE
+.. |harrw| unicode:: U+021AD .. LEFT RIGHT WAVE ARROW
+.. |hoarr| unicode:: U+021FF .. LEFT RIGHT OPEN-HEADED ARROW
+.. |imof| unicode:: U+022B7 .. IMAGE OF
+.. |lAarr| unicode:: U+021DA .. LEFTWARDS TRIPLE ARROW
+.. |Larr| unicode:: U+0219E .. LEFTWARDS TWO HEADED ARROW
+.. |larr2| unicode:: U+021C7 .. LEFTWARDS PAIRED ARROWS
+.. |larrbfs| unicode:: U+0291F .. LEFTWARDS ARROW FROM BAR TO BLACK DIAMOND
+.. |larrfs| unicode:: U+0291D .. LEFTWARDS ARROW TO BLACK DIAMOND
+.. |larrhk| unicode:: U+021A9 .. LEFTWARDS ARROW WITH HOOK
+.. |larrlp| unicode:: U+021AB .. LEFTWARDS ARROW WITH LOOP
+.. |larrpl| unicode:: U+02939 .. LEFT-SIDE ARC ANTICLOCKWISE ARROW
+.. |larrsim| unicode:: U+02973 .. LEFTWARDS ARROW ABOVE TILDE OPERATOR
+.. |larrtl| unicode:: U+021A2 .. LEFTWARDS ARROW WITH TAIL
+.. |lAtail| unicode:: U+0291B .. LEFTWARDS DOUBLE ARROW-TAIL
+.. |latail| unicode:: U+02919 .. LEFTWARDS ARROW-TAIL
+.. |lBarr| unicode:: U+0290E .. LEFTWARDS TRIPLE DASH ARROW
+.. |lbarr| unicode:: U+0290C .. LEFTWARDS DOUBLE DASH ARROW
+.. |ldca| unicode:: U+02936 .. ARROW POINTING DOWNWARDS THEN CURVING LEFTWARDS
+.. |ldrdhar| unicode:: U+02967 .. LEFTWARDS HARPOON WITH BARB DOWN ABOVE RIGHTWARDS HARPOON WITH BARB DOWN
+.. |ldrushar| unicode:: U+0294B .. LEFT BARB DOWN RIGHT BARB UP HARPOON
+.. |ldsh| unicode:: U+021B2 .. DOWNWARDS ARROW WITH TIP LEFTWARDS
+.. |lfisht| unicode:: U+0297C .. LEFT FISH TAIL
+.. |lHar| unicode:: U+02962 .. LEFTWARDS HARPOON WITH BARB UP ABOVE LEFTWARDS HARPOON WITH BARB DOWN
+.. |lhard| unicode:: U+021BD .. LEFTWARDS HARPOON WITH BARB DOWNWARDS
+.. |lharu| unicode:: U+021BC .. LEFTWARDS HARPOON WITH BARB UPWARDS
+.. |lharul| unicode:: U+0296A .. LEFTWARDS HARPOON WITH BARB UP ABOVE LONG DASH
+.. |llarr| unicode:: U+021C7 .. LEFTWARDS PAIRED ARROWS
+.. |llhard| unicode:: U+0296B .. LEFTWARDS HARPOON WITH BARB DOWN BELOW LONG DASH
+.. |loarr| unicode:: U+021FD .. LEFTWARDS OPEN-HEADED ARROW
+.. |lrarr| unicode:: U+021C6 .. LEFTWARDS ARROW OVER RIGHTWARDS ARROW
+.. |lrarr2| unicode:: U+021C6 .. LEFTWARDS ARROW OVER RIGHTWARDS ARROW
+.. |lrhar| unicode:: U+021CB .. LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON
+.. |lrhar2| unicode:: U+021CB .. LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON
+.. |lrhard| unicode:: U+0296D .. RIGHTWARDS HARPOON WITH BARB DOWN BELOW LONG DASH
+.. |lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS
+.. |lurdshar| unicode:: U+0294A .. LEFT BARB UP RIGHT BARB DOWN HARPOON
+.. |luruhar| unicode:: U+02966 .. LEFTWARDS HARPOON WITH BARB UP ABOVE RIGHTWARDS HARPOON WITH BARB UP
+.. |Map| unicode:: U+02905 .. RIGHTWARDS TWO-HEADED ARROW FROM BAR
+.. |map| unicode:: U+021A6 .. RIGHTWARDS ARROW FROM BAR
+.. |midcir| unicode:: U+02AF0 .. VERTICAL LINE WITH CIRCLE BELOW
+.. |mumap| unicode:: U+022B8 .. MULTIMAP
+.. |nearhk| unicode:: U+02924 .. NORTH EAST ARROW WITH HOOK
+.. |neArr| unicode:: U+021D7 .. NORTH EAST DOUBLE ARROW
+.. |nearr| unicode:: U+02197 .. NORTH EAST ARROW
+.. |nesear| unicode:: U+02928 .. NORTH EAST ARROW AND SOUTH EAST ARROW
+.. |nhArr| unicode:: U+021CE .. LEFT RIGHT DOUBLE ARROW WITH STROKE
+.. |nharr| unicode:: U+021AE .. LEFT RIGHT ARROW WITH STROKE
+.. |nlArr| unicode:: U+021CD .. LEFTWARDS DOUBLE ARROW WITH STROKE
+.. |nlarr| unicode:: U+0219A .. LEFTWARDS ARROW WITH STROKE
+.. |nrArr| unicode:: U+021CF .. RIGHTWARDS DOUBLE ARROW WITH STROKE
+.. |nrarr| unicode:: U+0219B .. RIGHTWARDS ARROW WITH STROKE
+.. |nrarrc| unicode:: U+02933 U+00338 .. WAVE ARROW POINTING DIRECTLY RIGHT with slash
+.. |nrarrw| unicode:: U+0219D U+00338 .. RIGHTWARDS WAVE ARROW with slash
+.. |nvHarr| unicode:: U+02904 .. LEFT RIGHT DOUBLE ARROW WITH VERTICAL STROKE
+.. |nvlArr| unicode:: U+02902 .. LEFTWARDS DOUBLE ARROW WITH VERTICAL STROKE
+.. |nvrArr| unicode:: U+02903 .. RIGHTWARDS DOUBLE ARROW WITH VERTICAL STROKE
+.. |nwarhk| unicode:: U+02923 .. NORTH WEST ARROW WITH HOOK
+.. |nwArr| unicode:: U+021D6 .. NORTH WEST DOUBLE ARROW
+.. |nwarr| unicode:: U+02196 .. NORTH WEST ARROW
+.. |nwnear| unicode:: U+02927 .. NORTH WEST ARROW AND NORTH EAST ARROW
+.. |olarr| unicode:: U+021BA .. ANTICLOCKWISE OPEN CIRCLE ARROW
+.. |orarr| unicode:: U+021BB .. CLOCKWISE OPEN CIRCLE ARROW
+.. |origof| unicode:: U+022B6 .. ORIGINAL OF
+.. |rAarr| unicode:: U+021DB .. RIGHTWARDS TRIPLE ARROW
+.. |Rarr| unicode:: U+021A0 .. RIGHTWARDS TWO HEADED ARROW
+.. |rarr2| unicode:: U+021C9 .. RIGHTWARDS PAIRED ARROWS
+.. |rarrap| unicode:: U+02975 .. RIGHTWARDS ARROW ABOVE ALMOST EQUAL TO
+.. |rarrbfs| unicode:: U+02920 .. RIGHTWARDS ARROW FROM BAR TO BLACK DIAMOND
+.. |rarrc| unicode:: U+02933 .. WAVE ARROW POINTING DIRECTLY RIGHT
+.. |rarrfs| unicode:: U+0291E .. RIGHTWARDS ARROW TO BLACK DIAMOND
+.. |rarrhk| unicode:: U+021AA .. RIGHTWARDS ARROW WITH HOOK
+.. |rarrlp| unicode:: U+021AC .. RIGHTWARDS ARROW WITH LOOP
+.. |rarrpl| unicode:: U+02945 .. RIGHTWARDS ARROW WITH PLUS BELOW
+.. |rarrsim| unicode:: U+02974 .. RIGHTWARDS ARROW ABOVE TILDE OPERATOR
+.. |Rarrtl| unicode:: U+02916 .. RIGHTWARDS TWO-HEADED ARROW WITH TAIL
+.. |rarrtl| unicode:: U+021A3 .. RIGHTWARDS ARROW WITH TAIL
+.. |rarrw| unicode:: U+0219D .. RIGHTWARDS WAVE ARROW
+.. |rAtail| unicode:: U+0291C .. RIGHTWARDS DOUBLE ARROW-TAIL
+.. |ratail| unicode:: U+0291A .. RIGHTWARDS ARROW-TAIL
+.. |RBarr| unicode:: U+02910 .. RIGHTWARDS TWO-HEADED TRIPLE DASH ARROW
+.. |rBarr| unicode:: U+0290F .. RIGHTWARDS TRIPLE DASH ARROW
+.. |rbarr| unicode:: U+0290D .. RIGHTWARDS DOUBLE DASH ARROW
+.. |rdca| unicode:: U+02937 .. ARROW POINTING DOWNWARDS THEN CURVING RIGHTWARDS
+.. |rdldhar| unicode:: U+02969 .. RIGHTWARDS HARPOON WITH BARB DOWN ABOVE LEFTWARDS HARPOON WITH BARB DOWN
+.. |rdsh| unicode:: U+021B3 .. DOWNWARDS ARROW WITH TIP RIGHTWARDS
+.. |rfisht| unicode:: U+0297D .. RIGHT FISH TAIL
+.. |rHar| unicode:: U+02964 .. RIGHTWARDS HARPOON WITH BARB UP ABOVE RIGHTWARDS HARPOON WITH BARB DOWN
+.. |rhard| unicode:: U+021C1 .. RIGHTWARDS HARPOON WITH BARB DOWNWARDS
+.. |rharu| unicode:: U+021C0 .. RIGHTWARDS HARPOON WITH BARB UPWARDS
+.. |rharul| unicode:: U+0296C .. RIGHTWARDS HARPOON WITH BARB UP ABOVE LONG DASH
+.. |rlarr| unicode:: U+021C4 .. RIGHTWARDS ARROW OVER LEFTWARDS ARROW
+.. |rlarr2| unicode:: U+021C4 .. RIGHTWARDS ARROW OVER LEFTWARDS ARROW
+.. |rlhar| unicode:: U+021CC .. RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON
+.. |rlhar2| unicode:: U+021CC .. RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON
+.. |roarr| unicode:: U+021FE .. RIGHTWARDS OPEN-HEADED ARROW
+.. |rrarr| unicode:: U+021C9 .. RIGHTWARDS PAIRED ARROWS
+.. |rsh| unicode:: U+021B1 .. UPWARDS ARROW WITH TIP RIGHTWARDS
+.. |ruluhar| unicode:: U+02968 .. RIGHTWARDS HARPOON WITH BARB UP ABOVE LEFTWARDS HARPOON WITH BARB UP
+.. |searhk| unicode:: U+02925 .. SOUTH EAST ARROW WITH HOOK
+.. |seArr| unicode:: U+021D8 .. SOUTH EAST DOUBLE ARROW
+.. |searr| unicode:: U+02198 .. SOUTH EAST ARROW
+.. |seswar| unicode:: U+02929 .. SOUTH EAST ARROW AND SOUTH WEST ARROW
+.. |simrarr| unicode:: U+02972 .. TILDE OPERATOR ABOVE RIGHTWARDS ARROW
+.. |slarr| unicode:: U+02190 .. LEFTWARDS ARROW
+.. |srarr| unicode:: U+02192 .. RIGHTWARDS ARROW
+.. |swarhk| unicode:: U+02926 .. SOUTH WEST ARROW WITH HOOK
+.. |swArr| unicode:: U+021D9 .. SOUTH WEST DOUBLE ARROW
+.. |swarr| unicode:: U+02199 .. SOUTH WEST ARROW
+.. |swnwar| unicode:: U+0292A .. SOUTH WEST ARROW AND NORTH WEST ARROW
+.. |Uarr| unicode:: U+0219F .. UPWARDS TWO HEADED ARROW
+.. |uArr| unicode:: U+021D1 .. UPWARDS DOUBLE ARROW
+.. |uarr2| unicode:: U+021C8 .. UPWARDS PAIRED ARROWS
+.. |Uarrocir| unicode:: U+02949 .. UPWARDS TWO-HEADED ARROW FROM SMALL CIRCLE
+.. |udarr| unicode:: U+021C5 .. UPWARDS ARROW LEFTWARDS OF DOWNWARDS ARROW
+.. |udhar| unicode:: U+0296E .. UPWARDS HARPOON WITH BARB LEFT BESIDE DOWNWARDS HARPOON WITH BARB RIGHT
+.. |ufisht| unicode:: U+0297E .. UP FISH TAIL
+.. |uHar| unicode:: U+02963 .. UPWARDS HARPOON WITH BARB LEFT BESIDE UPWARDS HARPOON WITH BARB RIGHT
+.. |uharl| unicode:: U+021BF .. UPWARDS HARPOON WITH BARB LEFTWARDS
+.. |uharr| unicode:: U+021BE .. UPWARDS HARPOON WITH BARB RIGHTWARDS
+.. |uuarr| unicode:: U+021C8 .. UPWARDS PAIRED ARROWS
+.. |vArr| unicode:: U+021D5 .. UP DOWN DOUBLE ARROW
+.. |varr| unicode:: U+02195 .. UP DOWN ARROW
+.. |xhArr| unicode:: U+027FA .. LONG LEFT RIGHT DOUBLE ARROW
+.. |xharr| unicode:: U+027F7 .. LONG LEFT RIGHT ARROW
+.. |xlArr| unicode:: U+027F8 .. LONG LEFTWARDS DOUBLE ARROW
+.. |xlarr| unicode:: U+027F5 .. LONG LEFTWARDS ARROW
+.. |xmap| unicode:: U+027FC .. LONG RIGHTWARDS ARROW FROM BAR
+.. |xrArr| unicode:: U+027F9 .. LONG RIGHTWARDS DOUBLE ARROW
+.. |xrarr| unicode:: U+027F6 .. LONG RIGHTWARDS ARROW
+.. |zigrarr| unicode:: U+021DD .. RIGHTWARDS SQUIGGLE ARROW
diff --git a/docutils/parsers/rst/include/isoamsb.txt b/docutils/parsers/rst/include/isoamsb.txt
new file mode 100644
index 000000000..05e68d99d
--- /dev/null
+++ b/docutils/parsers/rst/include/isoamsb.txt
@@ -0,0 +1,126 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |ac| unicode:: U+0223E .. INVERTED LAZY S
+.. |acE| unicode:: U+0223E U+00333 .. INVERTED LAZY S with double underline
+.. |amalg| unicode:: U+02A3F .. AMALGAMATION OR COPRODUCT
+.. |barvee| unicode:: U+022BD .. NOR
+.. |Barwed| unicode:: U+02306 .. PERSPECTIVE
+.. |barwed| unicode:: U+02305 .. PROJECTIVE
+.. |bsolb| unicode:: U+029C5 .. SQUARED FALLING DIAGONAL SLASH
+.. |Cap| unicode:: U+022D2 .. DOUBLE INTERSECTION
+.. |capand| unicode:: U+02A44 .. INTERSECTION WITH LOGICAL AND
+.. |capbrcup| unicode:: U+02A49 .. INTERSECTION ABOVE BAR ABOVE UNION
+.. |capcap| unicode:: U+02A4B .. INTERSECTION BESIDE AND JOINED WITH INTERSECTION
+.. |capcup| unicode:: U+02A47 .. INTERSECTION ABOVE UNION
+.. |capdot| unicode:: U+02A40 .. INTERSECTION WITH DOT
+.. |caps| unicode:: U+02229 U+0FE00 .. INTERSECTION with serifs
+.. |ccaps| unicode:: U+02A4D .. CLOSED INTERSECTION WITH SERIFS
+.. |ccups| unicode:: U+02A4C .. CLOSED UNION WITH SERIFS
+.. |ccupssm| unicode:: U+02A50 .. CLOSED UNION WITH SERIFS AND SMASH PRODUCT
+.. |coprod| unicode:: U+02210 .. N-ARY COPRODUCT
+.. |Cup| unicode:: U+022D3 .. DOUBLE UNION
+.. |cupbrcap| unicode:: U+02A48 .. UNION ABOVE BAR ABOVE INTERSECTION
+.. |cupcap| unicode:: U+02A46 .. UNION ABOVE INTERSECTION
+.. |cupcup| unicode:: U+02A4A .. UNION BESIDE AND JOINED WITH UNION
+.. |cupdot| unicode:: U+0228D .. MULTISET MULTIPLICATION
+.. |cupor| unicode:: U+02A45 .. UNION WITH LOGICAL OR
+.. |cups| unicode:: U+0222A U+0FE00 .. UNION with serifs
+.. |cuvee| unicode:: U+022CE .. CURLY LOGICAL OR
+.. |cuwed| unicode:: U+022CF .. CURLY LOGICAL AND
+.. |Dagger| unicode:: U+02021 .. DOUBLE DAGGER
+.. |dagger| unicode:: U+02020 .. DAGGER
+.. |diam| unicode:: U+022C4 .. DIAMOND OPERATOR
+.. |divonx| unicode:: U+022C7 .. DIVISION TIMES
+.. |eplus| unicode:: U+02A71 .. EQUALS SIGN ABOVE PLUS SIGN
+.. |hercon| unicode:: U+022B9 .. HERMITIAN CONJUGATE MATRIX
+.. |intcal| unicode:: U+022BA .. INTERCALATE
+.. |iprod| unicode:: U+02A3C .. INTERIOR PRODUCT
+.. |loplus| unicode:: U+02A2D .. PLUS SIGN IN LEFT HALF CIRCLE
+.. |lotimes| unicode:: U+02A34 .. MULTIPLICATION SIGN IN LEFT HALF CIRCLE
+.. |lthree| unicode:: U+022CB .. LEFT SEMIDIRECT PRODUCT
+.. |ltimes| unicode:: U+022C9 .. LEFT NORMAL FACTOR SEMIDIRECT PRODUCT
+.. |midast| unicode:: U+0002A .. ASTERISK
+.. |minusb| unicode:: U+0229F .. SQUARED MINUS
+.. |minusd| unicode:: U+02238 .. DOT MINUS
+.. |minusdu| unicode:: U+02A2A .. MINUS SIGN WITH DOT BELOW
+.. |ncap| unicode:: U+02A43 .. INTERSECTION WITH OVERBAR
+.. |ncup| unicode:: U+02A42 .. UNION WITH OVERBAR
+.. |oast| unicode:: U+0229B .. CIRCLED ASTERISK OPERATOR
+.. |ocir| unicode:: U+0229A .. CIRCLED RING OPERATOR
+.. |odash| unicode:: U+0229D .. CIRCLED DASH
+.. |odiv| unicode:: U+02A38 .. CIRCLED DIVISION SIGN
+.. |odot| unicode:: U+02299 .. CIRCLED DOT OPERATOR
+.. |odsold| unicode:: U+029BC .. CIRCLED ANTICLOCKWISE-ROTATED DIVISION SIGN
+.. |ofcir| unicode:: U+029BF .. CIRCLED BULLET
+.. |ogt| unicode:: U+029C1 .. CIRCLED GREATER-THAN
+.. |ohbar| unicode:: U+029B5 .. CIRCLE WITH HORIZONTAL BAR
+.. |olcir| unicode:: U+029BE .. CIRCLED WHITE BULLET
+.. |olt| unicode:: U+029C0 .. CIRCLED LESS-THAN
+.. |omid| unicode:: U+029B6 .. CIRCLED VERTICAL BAR
+.. |ominus| unicode:: U+02296 .. CIRCLED MINUS
+.. |opar| unicode:: U+029B7 .. CIRCLED PARALLEL
+.. |operp| unicode:: U+029B9 .. CIRCLED PERPENDICULAR
+.. |oplus| unicode:: U+02295 .. CIRCLED PLUS
+.. |osol| unicode:: U+02298 .. CIRCLED DIVISION SLASH
+.. |Otimes| unicode:: U+02A37 .. MULTIPLICATION SIGN IN DOUBLE CIRCLE
+.. |otimes| unicode:: U+02297 .. CIRCLED TIMES
+.. |otimesas| unicode:: U+02A36 .. CIRCLED MULTIPLICATION SIGN WITH CIRCUMFLEX ACCENT
+.. |ovbar| unicode:: U+0233D .. APL FUNCTIONAL SYMBOL CIRCLE STILE
+.. |plusacir| unicode:: U+02A23 .. PLUS SIGN WITH CIRCUMFLEX ACCENT ABOVE
+.. |plusb| unicode:: U+0229E .. SQUARED PLUS
+.. |pluscir| unicode:: U+02A22 .. PLUS SIGN WITH SMALL CIRCLE ABOVE
+.. |plusdo| unicode:: U+02214 .. DOT PLUS
+.. |plusdu| unicode:: U+02A25 .. PLUS SIGN WITH DOT BELOW
+.. |pluse| unicode:: U+02A72 .. PLUS SIGN ABOVE EQUALS SIGN
+.. |plussim| unicode:: U+02A26 .. PLUS SIGN WITH TILDE BELOW
+.. |plustwo| unicode:: U+02A27 .. PLUS SIGN WITH SUBSCRIPT TWO
+.. |prod| unicode:: U+0220F .. N-ARY PRODUCT
+.. |race| unicode:: U+029DA .. LEFT DOUBLE WIGGLY FENCE
+.. |roplus| unicode:: U+02A2E .. PLUS SIGN IN RIGHT HALF CIRCLE
+.. |rotimes| unicode:: U+02A35 .. MULTIPLICATION SIGN IN RIGHT HALF CIRCLE
+.. |rthree| unicode:: U+022CC .. RIGHT SEMIDIRECT PRODUCT
+.. |rtimes| unicode:: U+022CA .. RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT
+.. |sdot| unicode:: U+022C5 .. DOT OPERATOR
+.. |sdotb| unicode:: U+022A1 .. SQUARED DOT OPERATOR
+.. |setmn| unicode:: U+02216 .. SET MINUS
+.. |simplus| unicode:: U+02A24 .. PLUS SIGN WITH TILDE ABOVE
+.. |smashp| unicode:: U+02A33 .. SMASH PRODUCT
+.. |solb| unicode:: U+029C4 .. SQUARED RISING DIAGONAL SLASH
+.. |sqcap| unicode:: U+02293 .. SQUARE CAP
+.. |sqcaps| unicode:: U+02293 U+0FE00 .. SQUARE CAP with serifs
+.. |sqcup| unicode:: U+02294 .. SQUARE CUP
+.. |sqcups| unicode:: U+02294 U+0FE00 .. SQUARE CUP with serifs
+.. |ssetmn| unicode:: U+02216 .. SET MINUS
+.. |sstarf| unicode:: U+022C6 .. STAR OPERATOR
+.. |subdot| unicode:: U+02ABD .. SUBSET WITH DOT
+.. |sum| unicode:: U+02211 .. N-ARY SUMMATION
+.. |supdot| unicode:: U+02ABE .. SUPERSET WITH DOT
+.. |timesb| unicode:: U+022A0 .. SQUARED TIMES
+.. |timesbar| unicode:: U+02A31 .. MULTIPLICATION SIGN WITH UNDERBAR
+.. |timesd| unicode:: U+02A30 .. MULTIPLICATION SIGN WITH DOT ABOVE
+.. |top| unicode:: U+022A4 .. DOWN TACK
+.. |tridot| unicode:: U+025EC .. WHITE UP-POINTING TRIANGLE WITH DOT
+.. |triminus| unicode:: U+02A3A .. MINUS SIGN IN TRIANGLE
+.. |triplus| unicode:: U+02A39 .. PLUS SIGN IN TRIANGLE
+.. |trisb| unicode:: U+029CD .. TRIANGLE WITH SERIFS AT BOTTOM
+.. |tritime| unicode:: U+02A3B .. MULTIPLICATION SIGN IN TRIANGLE
+.. |uplus| unicode:: U+0228E .. MULTISET UNION
+.. |veebar| unicode:: U+022BB .. XOR
+.. |wedbar| unicode:: U+02A5F .. LOGICAL AND WITH UNDERBAR
+.. |wreath| unicode:: U+02240 .. WREATH PRODUCT
+.. |xcap| unicode:: U+022C2 .. N-ARY INTERSECTION
+.. |xcirc| unicode:: U+025EF .. LARGE CIRCLE
+.. |xcup| unicode:: U+022C3 .. N-ARY UNION
+.. |xdtri| unicode:: U+025BD .. WHITE DOWN-POINTING TRIANGLE
+.. |xodot| unicode:: U+02A00 .. N-ARY CIRCLED DOT OPERATOR
+.. |xoplus| unicode:: U+02A01 .. N-ARY CIRCLED PLUS OPERATOR
+.. |xotime| unicode:: U+02A02 .. N-ARY CIRCLED TIMES OPERATOR
+.. |xsqcup| unicode:: U+02A06 .. N-ARY SQUARE UNION OPERATOR
+.. |xuplus| unicode:: U+02A04 .. N-ARY UNION OPERATOR WITH PLUS
+.. |xutri| unicode:: U+025B3 .. WHITE UP-POINTING TRIANGLE
+.. |xvee| unicode:: U+022C1 .. N-ARY LOGICAL OR
+.. |xwedge| unicode:: U+022C0 .. N-ARY LOGICAL AND
diff --git a/docutils/parsers/rst/include/isoamsc.txt b/docutils/parsers/rst/include/isoamsc.txt
new file mode 100644
index 000000000..343504d83
--- /dev/null
+++ b/docutils/parsers/rst/include/isoamsc.txt
@@ -0,0 +1,29 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |dlcorn| unicode:: U+0231E .. BOTTOM LEFT CORNER
+.. |drcorn| unicode:: U+0231F .. BOTTOM RIGHT CORNER
+.. |gtlPar| unicode:: U+02995 .. DOUBLE LEFT ARC GREATER-THAN BRACKET
+.. |langd| unicode:: U+02991 .. LEFT ANGLE BRACKET WITH DOT
+.. |lbrke| unicode:: U+0298B .. LEFT SQUARE BRACKET WITH UNDERBAR
+.. |lbrksld| unicode:: U+0298F .. LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+.. |lbrkslu| unicode:: U+0298D .. LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
+.. |lceil| unicode:: U+02308 .. LEFT CEILING
+.. |lfloor| unicode:: U+0230A .. LEFT FLOOR
+.. |lmoust| unicode:: U+023B0 .. UPPER LEFT OR LOWER RIGHT CURLY BRACKET SECTION
+.. |lpargt| unicode:: U+029A0 .. SPHERICAL ANGLE OPENING LEFT
+.. |lparlt| unicode:: U+02993 .. LEFT ARC LESS-THAN BRACKET
+.. |ltrPar| unicode:: U+02996 .. DOUBLE RIGHT ARC LESS-THAN BRACKET
+.. |rangd| unicode:: U+02992 .. RIGHT ANGLE BRACKET WITH DOT
+.. |rbrke| unicode:: U+0298C .. RIGHT SQUARE BRACKET WITH UNDERBAR
+.. |rbrksld| unicode:: U+0298E .. RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+.. |rbrkslu| unicode:: U+02990 .. RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER
+.. |rceil| unicode:: U+02309 .. RIGHT CEILING
+.. |rfloor| unicode:: U+0230B .. RIGHT FLOOR
+.. |rmoust| unicode:: U+023B1 .. UPPER RIGHT OR LOWER LEFT CURLY BRACKET SECTION
+.. |rpargt| unicode:: U+02994 .. RIGHT ARC GREATER-THAN BRACKET
+.. |ulcorn| unicode:: U+0231C .. TOP LEFT CORNER
+.. |urcorn| unicode:: U+0231D .. TOP RIGHT CORNER
diff --git a/docutils/parsers/rst/include/isoamsn.txt b/docutils/parsers/rst/include/isoamsn.txt
new file mode 100644
index 000000000..5ff17291e
--- /dev/null
+++ b/docutils/parsers/rst/include/isoamsn.txt
@@ -0,0 +1,96 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |gnap| unicode:: U+02A8A .. GREATER-THAN AND NOT APPROXIMATE
+.. |gnE| unicode:: U+02269 .. GREATER-THAN BUT NOT EQUAL TO
+.. |gne| unicode:: U+02A88 .. GREATER-THAN AND SINGLE-LINE NOT EQUAL TO
+.. |gnsim| unicode:: U+022E7 .. GREATER-THAN BUT NOT EQUIVALENT TO
+.. |gvnE| unicode:: U+02269 U+0FE00 .. GREATER-THAN BUT NOT EQUAL TO - with vertical stroke
+.. |lnap| unicode:: U+02A89 .. LESS-THAN AND NOT APPROXIMATE
+.. |lnE| unicode:: U+02268 .. LESS-THAN BUT NOT EQUAL TO
+.. |lne| unicode:: U+02A87 .. LESS-THAN AND SINGLE-LINE NOT EQUAL TO
+.. |lnsim| unicode:: U+022E6 .. LESS-THAN BUT NOT EQUIVALENT TO
+.. |lvnE| unicode:: U+02268 U+0FE00 .. LESS-THAN BUT NOT EQUAL TO - with vertical stroke
+.. |nap| unicode:: U+02249 .. NOT ALMOST EQUAL TO
+.. |napE| unicode:: U+02A70 U+00338 .. APPROXIMATELY EQUAL OR EQUAL TO with slash
+.. |napid| unicode:: U+0224B U+00338 .. TRIPLE TILDE with slash
+.. |ncong| unicode:: U+02247 .. NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
+.. |ncongdot| unicode:: U+02A6D U+00338 .. CONGRUENT WITH DOT ABOVE with slash
+.. |nequiv| unicode:: U+02262 .. NOT IDENTICAL TO
+.. |ngE| unicode:: U+02267 U+00338 .. GREATER-THAN OVER EQUAL TO with slash
+.. |nge| unicode:: U+02271 .. NEITHER GREATER-THAN NOR EQUAL TO
+.. |nges| unicode:: U+02A7E U+00338 .. GREATER-THAN OR SLANTED EQUAL TO with slash
+.. |nGg| unicode:: U+022D9 U+00338 .. VERY MUCH GREATER-THAN with slash
+.. |ngsim| unicode:: U+02275 .. NEITHER GREATER-THAN NOR EQUIVALENT TO
+.. |nGt| unicode:: U+0226B U+020D2 .. MUCH GREATER THAN with vertical line
+.. |ngt| unicode:: U+0226F .. NOT GREATER-THAN
+.. |nGtv| unicode:: U+0226B U+00338 .. MUCH GREATER THAN with slash
+.. |nlE| unicode:: U+02266 U+00338 .. LESS-THAN OVER EQUAL TO with slash
+.. |nle| unicode:: U+02270 .. NEITHER LESS-THAN NOR EQUAL TO
+.. |nles| unicode:: U+02A7D U+00338 .. LESS-THAN OR SLANTED EQUAL TO with slash
+.. |nLl| unicode:: U+022D8 U+00338 .. VERY MUCH LESS-THAN with slash
+.. |nlsim| unicode:: U+02274 .. NEITHER LESS-THAN NOR EQUIVALENT TO
+.. |nLt| unicode:: U+0226A U+020D2 .. MUCH LESS THAN with vertical line
+.. |nlt| unicode:: U+0226E .. NOT LESS-THAN
+.. |nltri| unicode:: U+022EA .. NOT NORMAL SUBGROUP OF
+.. |nltrie| unicode:: U+022EC .. NOT NORMAL SUBGROUP OF OR EQUAL TO
+.. |nLtv| unicode:: U+0226A U+00338 .. MUCH LESS THAN with slash
+.. |nmid| unicode:: U+02224 .. DOES NOT DIVIDE
+.. |npar| unicode:: U+02226 .. NOT PARALLEL TO
+.. |npr| unicode:: U+02280 .. DOES NOT PRECEDE
+.. |nprcue| unicode:: U+022E0 .. DOES NOT PRECEDE OR EQUAL
+.. |npre| unicode:: U+02AAF U+00338 .. PRECEDES ABOVE SINGLE-LINE EQUALS SIGN with slash
+.. |nrtri| unicode:: U+022EB .. DOES NOT CONTAIN AS NORMAL SUBGROUP
+.. |nrtrie| unicode:: U+022ED .. DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
+.. |nsc| unicode:: U+02281 .. DOES NOT SUCCEED
+.. |nsccue| unicode:: U+022E1 .. DOES NOT SUCCEED OR EQUAL
+.. |nsce| unicode:: U+02AB0 U+00338 .. SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN with slash
+.. |nsim| unicode:: U+02241 .. NOT TILDE
+.. |nsime| unicode:: U+02244 .. NOT ASYMPTOTICALLY EQUAL TO
+.. |nsmid| unicode:: U+02224 .. DOES NOT DIVIDE
+.. |nspar| unicode:: U+02226 .. NOT PARALLEL TO
+.. |nsqsube| unicode:: U+022E2 .. NOT SQUARE IMAGE OF OR EQUAL TO
+.. |nsqsupe| unicode:: U+022E3 .. NOT SQUARE ORIGINAL OF OR EQUAL TO
+.. |nsub| unicode:: U+02284 .. NOT A SUBSET OF
+.. |nsubE| unicode:: U+02AC5 U+00338 .. SUBSET OF ABOVE EQUALS SIGN with slash
+.. |nsube| unicode:: U+02288 .. NEITHER A SUBSET OF NOR EQUAL TO
+.. |nsup| unicode:: U+02285 .. NOT A SUPERSET OF
+.. |nsupE| unicode:: U+02AC6 U+00338 .. SUPERSET OF ABOVE EQUALS SIGN with slash
+.. |nsupe| unicode:: U+02289 .. NEITHER A SUPERSET OF NOR EQUAL TO
+.. |ntgl| unicode:: U+02279 .. NEITHER GREATER-THAN NOR LESS-THAN
+.. |ntlg| unicode:: U+02278 .. NEITHER LESS-THAN NOR GREATER-THAN
+.. |nvap| unicode:: U+0224D U+020D2 .. EQUIVALENT TO with vertical line
+.. |nVDash| unicode:: U+022AF .. NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
+.. |nVdash| unicode:: U+022AE .. DOES NOT FORCE
+.. |nvDash| unicode:: U+022AD .. NOT TRUE
+.. |nvdash| unicode:: U+022AC .. DOES NOT PROVE
+.. |nvge| unicode:: U+02265 U+020D2 .. GREATER-THAN OR EQUAL TO with vertical line
+.. |nvgt| unicode:: U+0003E U+020D2 .. GREATER-THAN SIGN with vertical line
+.. |nvle| unicode:: U+02264 U+020D2 .. LESS-THAN OR EQUAL TO with vertical line
+.. |nvlt| unicode:: U+0003C U+020D2 .. LESS-THAN SIGN with vertical line
+.. |nvltrie| unicode:: U+022B4 U+020D2 .. NORMAL SUBGROUP OF OR EQUAL TO with vertical line
+.. |nvrtrie| unicode:: U+022B5 U+020D2 .. CONTAINS AS NORMAL SUBGROUP OR EQUAL TO with vertical line
+.. |nvsim| unicode:: U+0223C U+020D2 .. TILDE OPERATOR with vertical line
+.. |parsim| unicode:: U+02AF3 .. PARALLEL WITH TILDE OPERATOR
+.. |prnap| unicode:: U+02AB9 .. PRECEDES ABOVE NOT ALMOST EQUAL TO
+.. |prnE| unicode:: U+02AB5 .. PRECEDES ABOVE NOT EQUAL TO
+.. |prnsim| unicode:: U+022E8 .. PRECEDES BUT NOT EQUIVALENT TO
+.. |rnmid| unicode:: U+02AEE .. DOES NOT DIVIDE WITH REVERSED NEGATION SLASH
+.. |scnap| unicode:: U+02ABA .. SUCCEEDS ABOVE NOT ALMOST EQUAL TO
+.. |scnE| unicode:: U+02AB6 .. SUCCEEDS ABOVE NOT EQUAL TO
+.. |scnsim| unicode:: U+022E9 .. SUCCEEDS BUT NOT EQUIVALENT TO
+.. |simne| unicode:: U+02246 .. APPROXIMATELY BUT NOT ACTUALLY EQUAL TO
+.. |solbar| unicode:: U+0233F .. APL FUNCTIONAL SYMBOL SLASH BAR
+.. |subnE| unicode:: U+02ACB .. SUBSET OF ABOVE NOT EQUAL TO
+.. |subne| unicode:: U+0228A .. SUBSET OF WITH NOT EQUAL TO
+.. |supnE| unicode:: U+02ACC .. SUPERSET OF ABOVE NOT EQUAL TO
+.. |supne| unicode:: U+0228B .. SUPERSET OF WITH NOT EQUAL TO
+.. |vnsub| unicode:: U+02282 U+020D2 .. SUBSET OF with vertical line
+.. |vnsup| unicode:: U+02283 U+020D2 .. SUPERSET OF with vertical line
+.. |vsubnE| unicode:: U+02ACB U+0FE00 .. SUBSET OF ABOVE NOT EQUAL TO - variant with stroke through bottom members
+.. |vsubne| unicode:: U+0228A U+0FE00 .. SUBSET OF WITH NOT EQUAL TO - variant with stroke through bottom members
+.. |vsupnE| unicode:: U+02ACC U+0FE00 .. SUPERSET OF ABOVE NOT EQUAL TO - variant with stroke through bottom members
+.. |vsupne| unicode:: U+0228B U+0FE00 .. SUPERSET OF WITH NOT EQUAL TO - variant with stroke through bottom members
diff --git a/docutils/parsers/rst/include/isoamso.txt b/docutils/parsers/rst/include/isoamso.txt
new file mode 100644
index 000000000..65cc17e99
--- /dev/null
+++ b/docutils/parsers/rst/include/isoamso.txt
@@ -0,0 +1,62 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |ang| unicode:: U+02220 .. ANGLE
+.. |ange| unicode:: U+029A4 .. ANGLE WITH UNDERBAR
+.. |angmsd| unicode:: U+02221 .. MEASURED ANGLE
+.. |angmsdaa| unicode:: U+029A8 .. MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING UP AND RIGHT
+.. |angmsdab| unicode:: U+029A9 .. MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING UP AND LEFT
+.. |angmsdac| unicode:: U+029AA .. MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING DOWN AND RIGHT
+.. |angmsdad| unicode:: U+029AB .. MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING DOWN AND LEFT
+.. |angmsdae| unicode:: U+029AC .. MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING RIGHT AND UP
+.. |angmsdaf| unicode:: U+029AD .. MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING LEFT AND UP
+.. |angmsdag| unicode:: U+029AE .. MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING RIGHT AND DOWN
+.. |angmsdah| unicode:: U+029AF .. MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING LEFT AND DOWN
+.. |angrtvb| unicode:: U+022BE .. RIGHT ANGLE WITH ARC
+.. |angrtvbd| unicode:: U+0299D .. MEASURED RIGHT ANGLE WITH DOT
+.. |bbrk| unicode:: U+023B5 .. BOTTOM SQUARE BRACKET
+.. |bbrktbrk| unicode:: U+023B6 .. BOTTOM SQUARE BRACKET OVER TOP SQUARE BRACKET
+.. |bemptyv| unicode:: U+029B0 .. REVERSED EMPTY SET
+.. |beth| unicode:: U+02136 .. BET SYMBOL
+.. |boxbox| unicode:: U+029C9 .. TWO JOINED SQUARES
+.. |bprime| unicode:: U+02035 .. REVERSED PRIME
+.. |bsemi| unicode:: U+0204F .. REVERSED SEMICOLON
+.. |cemptyv| unicode:: U+029B2 .. EMPTY SET WITH SMALL CIRCLE ABOVE
+.. |cirE| unicode:: U+029C3 .. CIRCLE WITH TWO HORIZONTAL STROKES TO THE RIGHT
+.. |cirscir| unicode:: U+029C2 .. CIRCLE WITH SMALL CIRCLE TO THE RIGHT
+.. |comp| unicode:: U+02201 .. COMPLEMENT
+.. |daleth| unicode:: U+02138 .. DALET SYMBOL
+.. |demptyv| unicode:: U+029B1 .. EMPTY SET WITH OVERBAR
+.. |ell| unicode:: U+02113 .. SCRIPT SMALL L
+.. |empty| unicode:: U+02205 .. EMPTY SET
+.. |emptyv| unicode:: U+02205 .. EMPTY SET
+.. |gimel| unicode:: U+02137 .. GIMEL SYMBOL
+.. |iiota| unicode:: U+02129 .. TURNED GREEK SMALL LETTER IOTA
+.. |image| unicode:: U+02111 .. BLACK-LETTER CAPITAL I
+.. |imath| unicode:: U+00131 .. LATIN SMALL LETTER DOTLESS I
+.. |inodot| unicode:: U+00131 .. LATIN SMALL LETTER DOTLESS I
+.. |jmath| unicode:: U+0006A .. LATIN SMALL LETTER J
+.. |jnodot| unicode:: U+0006A .. LATIN SMALL LETTER J
+.. |laemptyv| unicode:: U+029B4 .. EMPTY SET WITH LEFT ARROW ABOVE
+.. |lltri| unicode:: U+025FA .. LOWER LEFT TRIANGLE
+.. |lrtri| unicode:: U+022BF .. RIGHT TRIANGLE
+.. |mho| unicode:: U+02127 .. INVERTED OHM SIGN
+.. |nang| unicode:: U+02220 U+020D2 .. ANGLE with vertical line
+.. |nexist| unicode:: U+02204 .. THERE DOES NOT EXIST
+.. |oS| unicode:: U+024C8 .. CIRCLED LATIN CAPITAL LETTER S
+.. |planck| unicode:: U+0210F .. PLANCK CONSTANT OVER TWO PI
+.. |plankv| unicode:: U+0210F .. PLANCK CONSTANT OVER TWO PI
+.. |raemptyv| unicode:: U+029B3 .. EMPTY SET WITH RIGHT ARROW ABOVE
+.. |range| unicode:: U+029A5 .. REVERSED ANGLE WITH UNDERBAR
+.. |real| unicode:: U+0211C .. BLACK-LETTER CAPITAL R
+.. |sbsol| unicode:: U+0FE68 .. SMALL REVERSE SOLIDUS
+.. |tbrk| unicode:: U+023B4 .. TOP SQUARE BRACKET
+.. |trpezium| unicode:: U+0FFFD .. REPLACEMENT CHARACTER
+.. |ultri| unicode:: U+025F8 .. UPPER LEFT TRIANGLE
+.. |urtri| unicode:: U+025F9 .. UPPER RIGHT TRIANGLE
+.. |vprime| unicode:: U+02032 .. PRIME
+.. |vzigzag| unicode:: U+0299A .. VERTICAL ZIGZAG LINE
+.. |weierp| unicode:: U+02118 .. SCRIPT CAPITAL P
diff --git a/docutils/parsers/rst/include/isoamsr.txt b/docutils/parsers/rst/include/isoamsr.txt
new file mode 100644
index 000000000..a3d03dab7
--- /dev/null
+++ b/docutils/parsers/rst/include/isoamsr.txt
@@ -0,0 +1,191 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |apE| unicode:: U+02A70 .. APPROXIMATELY EQUAL OR EQUAL TO
+.. |ape| unicode:: U+0224A .. ALMOST EQUAL OR EQUAL TO
+.. |apid| unicode:: U+0224B .. TRIPLE TILDE
+.. |asymp| unicode:: U+02248 .. ALMOST EQUAL TO
+.. |Barv| unicode:: U+02AE7 .. SHORT DOWN TACK WITH OVERBAR
+.. |bcong| unicode:: U+0224C .. ALL EQUAL TO
+.. |bepsi| unicode:: U+003F6 .. GREEK REVERSED LUNATE EPSILON SYMBOL
+.. |bowtie| unicode:: U+022C8 .. BOWTIE
+.. |bsim| unicode:: U+0223D .. REVERSED TILDE
+.. |bsime| unicode:: U+022CD .. REVERSED TILDE EQUALS
+.. |bsolhsub| unicode:: U+0005C U+02282 .. REVERSE SOLIDUS, SUBSET OF
+.. |bump| unicode:: U+0224E .. GEOMETRICALLY EQUIVALENT TO
+.. |bumpE| unicode:: U+02AAE .. EQUALS SIGN WITH BUMPY ABOVE
+.. |bumpe| unicode:: U+0224F .. DIFFERENCE BETWEEN
+.. |cire| unicode:: U+02257 .. RING EQUAL TO
+.. |Colon| unicode:: U+02237 .. PROPORTION
+.. |Colone| unicode:: U+02A74 .. DOUBLE COLON EQUAL
+.. |colone| unicode:: U+02254 .. COLON EQUALS
+.. |congdot| unicode:: U+02A6D .. CONGRUENT WITH DOT ABOVE
+.. |csub| unicode:: U+02ACF .. CLOSED SUBSET
+.. |csube| unicode:: U+02AD1 .. CLOSED SUBSET OR EQUAL TO
+.. |csup| unicode:: U+02AD0 .. CLOSED SUPERSET
+.. |csupe| unicode:: U+02AD2 .. CLOSED SUPERSET OR EQUAL TO
+.. |cuepr| unicode:: U+022DE .. EQUAL TO OR PRECEDES
+.. |cuesc| unicode:: U+022DF .. EQUAL TO OR SUCCEEDS
+.. |cupre| unicode:: U+0227C .. PRECEDES OR EQUAL TO
+.. |Dashv| unicode:: U+02AE4 .. VERTICAL BAR DOUBLE LEFT TURNSTILE
+.. |dashv| unicode:: U+022A3 .. LEFT TACK
+.. |easter| unicode:: U+02A6E .. EQUALS WITH ASTERISK
+.. |ecir| unicode:: U+02256 .. RING IN EQUAL TO
+.. |ecolon| unicode:: U+02255 .. EQUALS COLON
+.. |eDDot| unicode:: U+02A77 .. EQUALS SIGN WITH TWO DOTS ABOVE AND TWO DOTS BELOW
+.. |eDot| unicode:: U+02251 .. GEOMETRICALLY EQUAL TO
+.. |efDot| unicode:: U+02252 .. APPROXIMATELY EQUAL TO OR THE IMAGE OF
+.. |eg| unicode:: U+02A9A .. DOUBLE-LINE EQUAL TO OR GREATER-THAN
+.. |egs| unicode:: U+02A96 .. SLANTED EQUAL TO OR GREATER-THAN
+.. |egsdot| unicode:: U+02A98 .. SLANTED EQUAL TO OR GREATER-THAN WITH DOT INSIDE
+.. |el| unicode:: U+02A99 .. DOUBLE-LINE EQUAL TO OR LESS-THAN
+.. |els| unicode:: U+02A95 .. SLANTED EQUAL TO OR LESS-THAN
+.. |elsdot| unicode:: U+02A97 .. SLANTED EQUAL TO OR LESS-THAN WITH DOT INSIDE
+.. |equest| unicode:: U+0225F .. QUESTIONED EQUAL TO
+.. |equivDD| unicode:: U+02A78 .. EQUIVALENT WITH FOUR DOTS ABOVE
+.. |erDot| unicode:: U+02253 .. IMAGE OF OR APPROXIMATELY EQUAL TO
+.. |esdot| unicode:: U+02250 .. APPROACHES THE LIMIT
+.. |Esim| unicode:: U+02A73 .. EQUALS SIGN ABOVE TILDE OPERATOR
+.. |esim| unicode:: U+02242 .. MINUS TILDE
+.. |fork| unicode:: U+022D4 .. PITCHFORK
+.. |forkv| unicode:: U+02AD9 .. ELEMENT OF OPENING DOWNWARDS
+.. |frown| unicode:: U+02322 .. FROWN
+.. |gap| unicode:: U+02A86 .. GREATER-THAN OR APPROXIMATE
+.. |gE| unicode:: U+02267 .. GREATER-THAN OVER EQUAL TO
+.. |gEl| unicode:: U+02A8C .. GREATER-THAN ABOVE DOUBLE-LINE EQUAL ABOVE LESS-THAN
+.. |gel| unicode:: U+022DB .. GREATER-THAN EQUAL TO OR LESS-THAN
+.. |ges| unicode:: U+02A7E .. GREATER-THAN OR SLANTED EQUAL TO
+.. |gescc| unicode:: U+02AA9 .. GREATER-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL
+.. |gesdot| unicode:: U+02A80 .. GREATER-THAN OR SLANTED EQUAL TO WITH DOT INSIDE
+.. |gesdoto| unicode:: U+02A82 .. GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE
+.. |gesdotol| unicode:: U+02A84 .. GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE LEFT
+.. |gesl| unicode:: U+022DB U+0FE00 .. GREATER-THAN slanted EQUAL TO OR LESS-THAN
+.. |gesles| unicode:: U+02A94 .. GREATER-THAN ABOVE SLANTED EQUAL ABOVE LESS-THAN ABOVE SLANTED EQUAL
+.. |Gg| unicode:: U+022D9 .. VERY MUCH GREATER-THAN
+.. |gl| unicode:: U+02277 .. GREATER-THAN OR LESS-THAN
+.. |gla| unicode:: U+02AA5 .. GREATER-THAN BESIDE LESS-THAN
+.. |glE| unicode:: U+02A92 .. GREATER-THAN ABOVE LESS-THAN ABOVE DOUBLE-LINE EQUAL
+.. |glj| unicode:: U+02AA4 .. GREATER-THAN OVERLAPPING LESS-THAN
+.. |gsdot| unicode:: U+022D7 .. GREATER-THAN WITH DOT
+.. |gsim| unicode:: U+02273 .. GREATER-THAN OR EQUIVALENT TO
+.. |gsime| unicode:: U+02A8E .. GREATER-THAN ABOVE SIMILAR OR EQUAL
+.. |gsiml| unicode:: U+02A90 .. GREATER-THAN ABOVE SIMILAR ABOVE LESS-THAN
+.. |Gt| unicode:: U+0226B .. MUCH GREATER-THAN
+.. |gtcc| unicode:: U+02AA7 .. GREATER-THAN CLOSED BY CURVE
+.. |gtcir| unicode:: U+02A7A .. GREATER-THAN WITH CIRCLE INSIDE
+.. |gtdot| unicode:: U+022D7 .. GREATER-THAN WITH DOT
+.. |gtquest| unicode:: U+02A7C .. GREATER-THAN WITH QUESTION MARK ABOVE
+.. |gtrarr| unicode:: U+02978 .. GREATER-THAN ABOVE RIGHTWARDS ARROW
+.. |homtht| unicode:: U+0223B .. HOMOTHETIC
+.. |lap| unicode:: U+02A85 .. LESS-THAN OR APPROXIMATE
+.. |lat| unicode:: U+02AAB .. LARGER THAN
+.. |late| unicode:: U+02AAD .. LARGER THAN OR EQUAL TO
+.. |lates| unicode:: U+02AAD U+0FE00 .. LARGER THAN OR slanted EQUAL
+.. |ldot| unicode:: U+022D6 .. LESS-THAN WITH DOT
+.. |lE| unicode:: U+02266 .. LESS-THAN OVER EQUAL TO
+.. |lEg| unicode:: U+02A8B .. LESS-THAN ABOVE DOUBLE-LINE EQUAL ABOVE GREATER-THAN
+.. |leg| unicode:: U+022DA .. LESS-THAN EQUAL TO OR GREATER-THAN
+.. |les| unicode:: U+02A7D .. LESS-THAN OR SLANTED EQUAL TO
+.. |lescc| unicode:: U+02AA8 .. LESS-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL
+.. |lesdot| unicode:: U+02A7F .. LESS-THAN OR SLANTED EQUAL TO WITH DOT INSIDE
+.. |lesdoto| unicode:: U+02A81 .. LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE
+.. |lesdotor| unicode:: U+02A83 .. LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE RIGHT
+.. |lesg| unicode:: U+022DA U+0FE00 .. LESS-THAN slanted EQUAL TO OR GREATER-THAN
+.. |lesges| unicode:: U+02A93 .. LESS-THAN ABOVE SLANTED EQUAL ABOVE GREATER-THAN ABOVE SLANTED EQUAL
+.. |lg| unicode:: U+02276 .. LESS-THAN OR GREATER-THAN
+.. |lgE| unicode:: U+02A91 .. LESS-THAN ABOVE GREATER-THAN ABOVE DOUBLE-LINE EQUAL
+.. |Ll| unicode:: U+022D8 .. VERY MUCH LESS-THAN
+.. |lsim| unicode:: U+02272 .. LESS-THAN OR EQUIVALENT TO
+.. |lsime| unicode:: U+02A8D .. LESS-THAN ABOVE SIMILAR OR EQUAL
+.. |lsimg| unicode:: U+02A8F .. LESS-THAN ABOVE SIMILAR ABOVE GREATER-THAN
+.. |Lt| unicode:: U+0226A .. MUCH LESS-THAN
+.. |ltcc| unicode:: U+02AA6 .. LESS-THAN CLOSED BY CURVE
+.. |ltcir| unicode:: U+02A79 .. LESS-THAN WITH CIRCLE INSIDE
+.. |ltdot| unicode:: U+022D6 .. LESS-THAN WITH DOT
+.. |ltlarr| unicode:: U+02976 .. LESS-THAN ABOVE LEFTWARDS ARROW
+.. |ltquest| unicode:: U+02A7B .. LESS-THAN WITH QUESTION MARK ABOVE
+.. |ltrie| unicode:: U+022B4 .. NORMAL SUBGROUP OF OR EQUAL TO
+.. |mcomma| unicode:: U+02A29 .. MINUS SIGN WITH COMMA ABOVE
+.. |mDDot| unicode:: U+0223A .. GEOMETRIC PROPORTION
+.. |mid| unicode:: U+02223 .. DIVIDES
+.. |mlcp| unicode:: U+02ADB .. TRANSVERSAL INTERSECTION
+.. |models| unicode:: U+022A7 .. MODELS
+.. |mstpos| unicode:: U+0223E .. INVERTED LAZY S
+.. |Pr| unicode:: U+02ABB .. DOUBLE PRECEDES
+.. |pr| unicode:: U+0227A .. PRECEDES
+.. |prap| unicode:: U+02AB7 .. PRECEDES ABOVE ALMOST EQUAL TO
+.. |prcue| unicode:: U+0227C .. PRECEDES OR EQUAL TO
+.. |prE| unicode:: U+02AB3 .. PRECEDES ABOVE EQUALS SIGN
+.. |pre| unicode:: U+02AAF .. PRECEDES ABOVE SINGLE-LINE EQUALS SIGN
+.. |prsim| unicode:: U+0227E .. PRECEDES OR EQUIVALENT TO
+.. |prurel| unicode:: U+022B0 .. PRECEDES UNDER RELATION
+.. |ratio| unicode:: U+02236 .. RATIO
+.. |rtrie| unicode:: U+022B5 .. CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
+.. |rtriltri| unicode:: U+029CE .. RIGHT TRIANGLE ABOVE LEFT TRIANGLE
+.. |samalg| unicode:: U+02210 .. N-ARY COPRODUCT
+.. |Sc| unicode:: U+02ABC .. DOUBLE SUCCEEDS
+.. |sc| unicode:: U+0227B .. SUCCEEDS
+.. |scap| unicode:: U+02AB8 .. SUCCEEDS ABOVE ALMOST EQUAL TO
+.. |sccue| unicode:: U+0227D .. SUCCEEDS OR EQUAL TO
+.. |scE| unicode:: U+02AB4 .. SUCCEEDS ABOVE EQUALS SIGN
+.. |sce| unicode:: U+02AB0 .. SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN
+.. |scsim| unicode:: U+0227F .. SUCCEEDS OR EQUIVALENT TO
+.. |sdote| unicode:: U+02A66 .. EQUALS SIGN WITH DOT BELOW
+.. |sfrown| unicode:: U+02322 .. FROWN
+.. |simg| unicode:: U+02A9E .. SIMILAR OR GREATER-THAN
+.. |simgE| unicode:: U+02AA0 .. SIMILAR ABOVE GREATER-THAN ABOVE EQUALS SIGN
+.. |siml| unicode:: U+02A9D .. SIMILAR OR LESS-THAN
+.. |simlE| unicode:: U+02A9F .. SIMILAR ABOVE LESS-THAN ABOVE EQUALS SIGN
+.. |smid| unicode:: U+02223 .. DIVIDES
+.. |smile| unicode:: U+02323 .. SMILE
+.. |smt| unicode:: U+02AAA .. SMALLER THAN
+.. |smte| unicode:: U+02AAC .. SMALLER THAN OR EQUAL TO
+.. |smtes| unicode:: U+02AAC U+0FE00 .. SMALLER THAN OR slanted EQUAL
+.. |spar| unicode:: U+02225 .. PARALLEL TO
+.. |sqsub| unicode:: U+0228F .. SQUARE IMAGE OF
+.. |sqsube| unicode:: U+02291 .. SQUARE IMAGE OF OR EQUAL TO
+.. |sqsup| unicode:: U+02290 .. SQUARE ORIGINAL OF
+.. |sqsupe| unicode:: U+02292 .. SQUARE ORIGINAL OF OR EQUAL TO
+.. |ssmile| unicode:: U+02323 .. SMILE
+.. |Sub| unicode:: U+022D0 .. DOUBLE SUBSET
+.. |subE| unicode:: U+02AC5 .. SUBSET OF ABOVE EQUALS SIGN
+.. |subedot| unicode:: U+02AC3 .. SUBSET OF OR EQUAL TO WITH DOT ABOVE
+.. |submult| unicode:: U+02AC1 .. SUBSET WITH MULTIPLICATION SIGN BELOW
+.. |subplus| unicode:: U+02ABF .. SUBSET WITH PLUS SIGN BELOW
+.. |subrarr| unicode:: U+02979 .. SUBSET ABOVE RIGHTWARDS ARROW
+.. |subsim| unicode:: U+02AC7 .. SUBSET OF ABOVE TILDE OPERATOR
+.. |subsub| unicode:: U+02AD5 .. SUBSET ABOVE SUBSET
+.. |subsup| unicode:: U+02AD3 .. SUBSET ABOVE SUPERSET
+.. |Sup| unicode:: U+022D1 .. DOUBLE SUPERSET
+.. |supdsub| unicode:: U+02AD8 .. SUPERSET BESIDE AND JOINED BY DASH WITH SUBSET
+.. |supE| unicode:: U+02AC6 .. SUPERSET OF ABOVE EQUALS SIGN
+.. |supedot| unicode:: U+02AC4 .. SUPERSET OF OR EQUAL TO WITH DOT ABOVE
+.. |suphsol| unicode:: U+02283 U+0002F .. SUPERSET OF, SOLIDUS
+.. |suphsub| unicode:: U+02AD7 .. SUPERSET BESIDE SUBSET
+.. |suplarr| unicode:: U+0297B .. SUPERSET ABOVE LEFTWARDS ARROW
+.. |supmult| unicode:: U+02AC2 .. SUPERSET WITH MULTIPLICATION SIGN BELOW
+.. |supplus| unicode:: U+02AC0 .. SUPERSET WITH PLUS SIGN BELOW
+.. |supsim| unicode:: U+02AC8 .. SUPERSET OF ABOVE TILDE OPERATOR
+.. |supsub| unicode:: U+02AD4 .. SUPERSET ABOVE SUBSET
+.. |supsup| unicode:: U+02AD6 .. SUPERSET ABOVE SUPERSET
+.. |thkap| unicode:: U+02248 .. ALMOST EQUAL TO
+.. |thksim| unicode:: U+0223C .. TILDE OPERATOR
+.. |topfork| unicode:: U+02ADA .. PITCHFORK WITH TEE TOP
+.. |trie| unicode:: U+0225C .. DELTA EQUAL TO
+.. |twixt| unicode:: U+0226C .. BETWEEN
+.. |Vbar| unicode:: U+02AEB .. DOUBLE UP TACK
+.. |vBar| unicode:: U+02AE8 .. SHORT UP TACK WITH UNDERBAR
+.. |vBarv| unicode:: U+02AE9 .. SHORT UP TACK ABOVE SHORT DOWN TACK
+.. |VDash| unicode:: U+022AB .. DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
+.. |Vdash| unicode:: U+022A9 .. FORCES
+.. |vDash| unicode:: U+022A8 .. TRUE
+.. |vdash| unicode:: U+022A2 .. RIGHT TACK
+.. |Vdashl| unicode:: U+02AE6 .. LONG DASH FROM LEFT MEMBER OF DOUBLE VERTICAL
+.. |veebar| unicode:: U+022BB .. XOR
+.. |vltri| unicode:: U+022B2 .. NORMAL SUBGROUP OF
+.. |vprop| unicode:: U+0221D .. PROPORTIONAL TO
+.. |vrtri| unicode:: U+022B3 .. CONTAINS AS NORMAL SUBGROUP
+.. |Vvdash| unicode:: U+022AA .. TRIPLE VERTICAL BAR RIGHT TURNSTILE
diff --git a/docutils/parsers/rst/include/isobox.txt b/docutils/parsers/rst/include/isobox.txt
new file mode 100644
index 000000000..2304f8770
--- /dev/null
+++ b/docutils/parsers/rst/include/isobox.txt
@@ -0,0 +1,46 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |boxDL| unicode:: U+02557 .. BOX DRAWINGS DOUBLE DOWN AND LEFT
+.. |boxDl| unicode:: U+02556 .. BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+.. |boxdL| unicode:: U+02555 .. BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+.. |boxdl| unicode:: U+02510 .. BOX DRAWINGS LIGHT DOWN AND LEFT
+.. |boxDR| unicode:: U+02554 .. BOX DRAWINGS DOUBLE DOWN AND RIGHT
+.. |boxDr| unicode:: U+02553 .. BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+.. |boxdR| unicode:: U+02552 .. BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+.. |boxdr| unicode:: U+0250C .. BOX DRAWINGS LIGHT DOWN AND RIGHT
+.. |boxH| unicode:: U+02550 .. BOX DRAWINGS DOUBLE HORIZONTAL
+.. |boxh| unicode:: U+02500 .. BOX DRAWINGS LIGHT HORIZONTAL
+.. |boxHD| unicode:: U+02566 .. BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+.. |boxHd| unicode:: U+02564 .. BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+.. |boxhD| unicode:: U+02565 .. BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+.. |boxhd| unicode:: U+0252C .. BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+.. |boxHU| unicode:: U+02569 .. BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+.. |boxHu| unicode:: U+02567 .. BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+.. |boxhU| unicode:: U+02568 .. BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+.. |boxhu| unicode:: U+02534 .. BOX DRAWINGS LIGHT UP AND HORIZONTAL
+.. |boxUL| unicode:: U+0255D .. BOX DRAWINGS DOUBLE UP AND LEFT
+.. |boxUl| unicode:: U+0255C .. BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+.. |boxuL| unicode:: U+0255B .. BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+.. |boxul| unicode:: U+02518 .. BOX DRAWINGS LIGHT UP AND LEFT
+.. |boxUR| unicode:: U+0255A .. BOX DRAWINGS DOUBLE UP AND RIGHT
+.. |boxUr| unicode:: U+02559 .. BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+.. |boxuR| unicode:: U+02558 .. BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+.. |boxur| unicode:: U+02514 .. BOX DRAWINGS LIGHT UP AND RIGHT
+.. |boxV| unicode:: U+02551 .. BOX DRAWINGS DOUBLE VERTICAL
+.. |boxv| unicode:: U+02502 .. BOX DRAWINGS LIGHT VERTICAL
+.. |boxVH| unicode:: U+0256C .. BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+.. |boxVh| unicode:: U+0256B .. BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+.. |boxvH| unicode:: U+0256A .. BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+.. |boxvh| unicode:: U+0253C .. BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+.. |boxVL| unicode:: U+02563 .. BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+.. |boxVl| unicode:: U+02562 .. BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+.. |boxvL| unicode:: U+02561 .. BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+.. |boxvl| unicode:: U+02524 .. BOX DRAWINGS LIGHT VERTICAL AND LEFT
+.. |boxVR| unicode:: U+02560 .. BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+.. |boxVr| unicode:: U+0255F .. BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+.. |boxvR| unicode:: U+0255E .. BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+.. |boxvr| unicode:: U+0251C .. BOX DRAWINGS LIGHT VERTICAL AND RIGHT
diff --git a/docutils/parsers/rst/include/isocyr1.txt b/docutils/parsers/rst/include/isocyr1.txt
new file mode 100644
index 000000000..afee744cf
--- /dev/null
+++ b/docutils/parsers/rst/include/isocyr1.txt
@@ -0,0 +1,73 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |Acy| unicode:: U+00410 .. CYRILLIC CAPITAL LETTER A
+.. |acy| unicode:: U+00430 .. CYRILLIC SMALL LETTER A
+.. |Bcy| unicode:: U+00411 .. CYRILLIC CAPITAL LETTER BE
+.. |bcy| unicode:: U+00431 .. CYRILLIC SMALL LETTER BE
+.. |CHcy| unicode:: U+00427 .. CYRILLIC CAPITAL LETTER CHE
+.. |chcy| unicode:: U+00447 .. CYRILLIC SMALL LETTER CHE
+.. |Dcy| unicode:: U+00414 .. CYRILLIC CAPITAL LETTER DE
+.. |dcy| unicode:: U+00434 .. CYRILLIC SMALL LETTER DE
+.. |Ecy| unicode:: U+0042D .. CYRILLIC CAPITAL LETTER E
+.. |ecy| unicode:: U+0044D .. CYRILLIC SMALL LETTER E
+.. |Fcy| unicode:: U+00424 .. CYRILLIC CAPITAL LETTER EF
+.. |fcy| unicode:: U+00444 .. CYRILLIC SMALL LETTER EF
+.. |Gcy| unicode:: U+00413 .. CYRILLIC CAPITAL LETTER GHE
+.. |gcy| unicode:: U+00433 .. CYRILLIC SMALL LETTER GHE
+.. |HARDcy| unicode:: U+0042A .. CYRILLIC CAPITAL LETTER HARD SIGN
+.. |hardcy| unicode:: U+0044A .. CYRILLIC SMALL LETTER HARD SIGN
+.. |Icy| unicode:: U+00418 .. CYRILLIC CAPITAL LETTER I
+.. |icy| unicode:: U+00438 .. CYRILLIC SMALL LETTER I
+.. |IEcy| unicode:: U+00415 .. CYRILLIC CAPITAL LETTER IE
+.. |iecy| unicode:: U+00435 .. CYRILLIC SMALL LETTER IE
+.. |IOcy| unicode:: U+00401 .. CYRILLIC CAPITAL LETTER IO
+.. |iocy| unicode:: U+00451 .. CYRILLIC SMALL LETTER IO
+.. |Jcy| unicode:: U+00419 .. CYRILLIC CAPITAL LETTER SHORT I
+.. |jcy| unicode:: U+00439 .. CYRILLIC SMALL LETTER SHORT I
+.. |Kcy| unicode:: U+0041A .. CYRILLIC CAPITAL LETTER KA
+.. |kcy| unicode:: U+0043A .. CYRILLIC SMALL LETTER KA
+.. |KHcy| unicode:: U+00425 .. CYRILLIC CAPITAL LETTER HA
+.. |khcy| unicode:: U+00445 .. CYRILLIC SMALL LETTER HA
+.. |Lcy| unicode:: U+0041B .. CYRILLIC CAPITAL LETTER EL
+.. |lcy| unicode:: U+0043B .. CYRILLIC SMALL LETTER EL
+.. |Mcy| unicode:: U+0041C .. CYRILLIC CAPITAL LETTER EM
+.. |mcy| unicode:: U+0043C .. CYRILLIC SMALL LETTER EM
+.. |Ncy| unicode:: U+0041D .. CYRILLIC CAPITAL LETTER EN
+.. |ncy| unicode:: U+0043D .. CYRILLIC SMALL LETTER EN
+.. |numero| unicode:: U+02116 .. NUMERO SIGN
+.. |Ocy| unicode:: U+0041E .. CYRILLIC CAPITAL LETTER O
+.. |ocy| unicode:: U+0043E .. CYRILLIC SMALL LETTER O
+.. |Pcy| unicode:: U+0041F .. CYRILLIC CAPITAL LETTER PE
+.. |pcy| unicode:: U+0043F .. CYRILLIC SMALL LETTER PE
+.. |Rcy| unicode:: U+00420 .. CYRILLIC CAPITAL LETTER ER
+.. |rcy| unicode:: U+00440 .. CYRILLIC SMALL LETTER ER
+.. |Scy| unicode:: U+00421 .. CYRILLIC CAPITAL LETTER ES
+.. |scy| unicode:: U+00441 .. CYRILLIC SMALL LETTER ES
+.. |SHCHcy| unicode:: U+00429 .. CYRILLIC CAPITAL LETTER SHCHA
+.. |shchcy| unicode:: U+00449 .. CYRILLIC SMALL LETTER SHCHA
+.. |SHcy| unicode:: U+00428 .. CYRILLIC CAPITAL LETTER SHA
+.. |shcy| unicode:: U+00448 .. CYRILLIC SMALL LETTER SHA
+.. |SOFTcy| unicode:: U+0042C .. CYRILLIC CAPITAL LETTER SOFT SIGN
+.. |softcy| unicode:: U+0044C .. CYRILLIC SMALL LETTER SOFT SIGN
+.. |Tcy| unicode:: U+00422 .. CYRILLIC CAPITAL LETTER TE
+.. |tcy| unicode:: U+00442 .. CYRILLIC SMALL LETTER TE
+.. |TScy| unicode:: U+00426 .. CYRILLIC CAPITAL LETTER TSE
+.. |tscy| unicode:: U+00446 .. CYRILLIC SMALL LETTER TSE
+.. |Ucy| unicode:: U+00423 .. CYRILLIC CAPITAL LETTER U
+.. |ucy| unicode:: U+00443 .. CYRILLIC SMALL LETTER U
+.. |Vcy| unicode:: U+00412 .. CYRILLIC CAPITAL LETTER VE
+.. |vcy| unicode:: U+00432 .. CYRILLIC SMALL LETTER VE
+.. |YAcy| unicode:: U+0042F .. CYRILLIC CAPITAL LETTER YA
+.. |yacy| unicode:: U+0044F .. CYRILLIC SMALL LETTER YA
+.. |Ycy| unicode:: U+0042B .. CYRILLIC CAPITAL LETTER YERU
+.. |ycy| unicode:: U+0044B .. CYRILLIC SMALL LETTER YERU
+.. |YUcy| unicode:: U+0042E .. CYRILLIC CAPITAL LETTER YU
+.. |yucy| unicode:: U+0044E .. CYRILLIC SMALL LETTER YU
+.. |Zcy| unicode:: U+00417 .. CYRILLIC CAPITAL LETTER ZE
+.. |zcy| unicode:: U+00437 .. CYRILLIC SMALL LETTER ZE
+.. |ZHcy| unicode:: U+00416 .. CYRILLIC CAPITAL LETTER ZHE
+.. |zhcy| unicode:: U+00436 .. CYRILLIC SMALL LETTER ZHE
diff --git a/docutils/parsers/rst/include/isocyr2.txt b/docutils/parsers/rst/include/isocyr2.txt
new file mode 100644
index 000000000..fe09c015b
--- /dev/null
+++ b/docutils/parsers/rst/include/isocyr2.txt
@@ -0,0 +1,32 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |DJcy| unicode:: U+00402 .. CYRILLIC CAPITAL LETTER DJE
+.. |djcy| unicode:: U+00452 .. CYRILLIC SMALL LETTER DJE
+.. |DScy| unicode:: U+00405 .. CYRILLIC CAPITAL LETTER DZE
+.. |dscy| unicode:: U+00455 .. CYRILLIC SMALL LETTER DZE
+.. |DZcy| unicode:: U+0040F .. CYRILLIC CAPITAL LETTER DZHE
+.. |dzcy| unicode:: U+0045F .. CYRILLIC SMALL LETTER DZHE
+.. |GJcy| unicode:: U+00403 .. CYRILLIC CAPITAL LETTER GJE
+.. |gjcy| unicode:: U+00453 .. CYRILLIC SMALL LETTER GJE
+.. |Iukcy| unicode:: U+00406 .. CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+.. |iukcy| unicode:: U+00456 .. CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+.. |Jsercy| unicode:: U+00408 .. CYRILLIC CAPITAL LETTER JE
+.. |jsercy| unicode:: U+00458 .. CYRILLIC SMALL LETTER JE
+.. |Jukcy| unicode:: U+00404 .. CYRILLIC CAPITAL LETTER UKRAINIAN IE
+.. |jukcy| unicode:: U+00454 .. CYRILLIC SMALL LETTER UKRAINIAN IE
+.. |KJcy| unicode:: U+0040C .. CYRILLIC CAPITAL LETTER KJE
+.. |kjcy| unicode:: U+0045C .. CYRILLIC SMALL LETTER KJE
+.. |LJcy| unicode:: U+00409 .. CYRILLIC CAPITAL LETTER LJE
+.. |ljcy| unicode:: U+00459 .. CYRILLIC SMALL LETTER LJE
+.. |NJcy| unicode:: U+0040A .. CYRILLIC CAPITAL LETTER NJE
+.. |njcy| unicode:: U+0045A .. CYRILLIC SMALL LETTER NJE
+.. |TSHcy| unicode:: U+0040B .. CYRILLIC CAPITAL LETTER TSHE
+.. |tshcy| unicode:: U+0045B .. CYRILLIC SMALL LETTER TSHE
+.. |Ubrcy| unicode:: U+0040E .. CYRILLIC CAPITAL LETTER SHORT U
+.. |ubrcy| unicode:: U+0045E .. CYRILLIC SMALL LETTER SHORT U
+.. |YIcy| unicode:: U+00407 .. CYRILLIC CAPITAL LETTER YI
+.. |yicy| unicode:: U+00457 .. CYRILLIC SMALL LETTER YI
diff --git a/docutils/parsers/rst/include/isodia.txt b/docutils/parsers/rst/include/isodia.txt
new file mode 100644
index 000000000..ede6d9946
--- /dev/null
+++ b/docutils/parsers/rst/include/isodia.txt
@@ -0,0 +1,20 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |acute| unicode:: U+000B4 .. ACUTE ACCENT
+.. |breve| unicode:: U+002D8 .. BREVE
+.. |caron| unicode:: U+002C7 .. CARON
+.. |cedil| unicode:: U+000B8 .. CEDILLA
+.. |circ| unicode:: U+002C6 .. MODIFIER LETTER CIRCUMFLEX ACCENT
+.. |dblac| unicode:: U+002DD .. DOUBLE ACUTE ACCENT
+.. |die| unicode:: U+000A8 .. DIAERESIS
+.. |dot| unicode:: U+002D9 .. DOT ABOVE
+.. |grave| unicode:: U+00060 .. GRAVE ACCENT
+.. |macr| unicode:: U+000AF .. MACRON
+.. |ogon| unicode:: U+002DB .. OGONEK
+.. |ring| unicode:: U+002DA .. RING ABOVE
+.. |tilde| unicode:: U+002DC .. SMALL TILDE
+.. |uml| unicode:: U+000A8 .. DIAERESIS
diff --git a/docutils/parsers/rst/include/isogrk1.txt b/docutils/parsers/rst/include/isogrk1.txt
new file mode 100644
index 000000000..434368a03
--- /dev/null
+++ b/docutils/parsers/rst/include/isogrk1.txt
@@ -0,0 +1,55 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |Agr| unicode:: U+00391 .. GREEK CAPITAL LETTER ALPHA
+.. |agr| unicode:: U+003B1 .. GREEK SMALL LETTER ALPHA
+.. |Bgr| unicode:: U+00392 .. GREEK CAPITAL LETTER BETA
+.. |bgr| unicode:: U+003B2 .. GREEK SMALL LETTER BETA
+.. |Dgr| unicode:: U+00394 .. GREEK CAPITAL LETTER DELTA
+.. |dgr| unicode:: U+003B4 .. GREEK SMALL LETTER DELTA
+.. |EEgr| unicode:: U+00397 .. GREEK CAPITAL LETTER ETA
+.. |eegr| unicode:: U+003B7 .. GREEK SMALL LETTER ETA
+.. |Egr| unicode:: U+00395 .. GREEK CAPITAL LETTER EPSILON
+.. |egr| unicode:: U+003B5 .. GREEK SMALL LETTER EPSILON
+.. |Ggr| unicode:: U+00393 .. GREEK CAPITAL LETTER GAMMA
+.. |ggr| unicode:: U+003B3 .. GREEK SMALL LETTER GAMMA
+.. |Igr| unicode:: U+00399 .. GREEK CAPITAL LETTER IOTA
+.. |igr| unicode:: U+003B9 .. GREEK SMALL LETTER IOTA
+.. |Kgr| unicode:: U+0039A .. GREEK CAPITAL LETTER KAPPA
+.. |kgr| unicode:: U+003BA .. GREEK SMALL LETTER KAPPA
+.. |KHgr| unicode:: U+003A7 .. GREEK CAPITAL LETTER CHI
+.. |khgr| unicode:: U+003C7 .. GREEK SMALL LETTER CHI
+.. |Lgr| unicode:: U+0039B .. GREEK CAPITAL LETTER LAMDA
+.. |lgr| unicode:: U+003BB .. GREEK SMALL LETTER LAMDA
+.. |Mgr| unicode:: U+0039C .. GREEK CAPITAL LETTER MU
+.. |mgr| unicode:: U+003BC .. GREEK SMALL LETTER MU
+.. |Ngr| unicode:: U+0039D .. GREEK CAPITAL LETTER NU
+.. |ngr| unicode:: U+003BD .. GREEK SMALL LETTER NU
+.. |Ogr| unicode:: U+0039F .. GREEK CAPITAL LETTER OMICRON
+.. |ogr| unicode:: U+003BF .. GREEK SMALL LETTER OMICRON
+.. |OHgr| unicode:: U+003A9 .. GREEK CAPITAL LETTER OMEGA
+.. |ohgr| unicode:: U+003C9 .. GREEK SMALL LETTER OMEGA
+.. |Pgr| unicode:: U+003A0 .. GREEK CAPITAL LETTER PI
+.. |pgr| unicode:: U+003C0 .. GREEK SMALL LETTER PI
+.. |PHgr| unicode:: U+003A6 .. GREEK CAPITAL LETTER PHI
+.. |phgr| unicode:: U+003C6 .. GREEK SMALL LETTER PHI
+.. |PSgr| unicode:: U+003A8 .. GREEK CAPITAL LETTER PSI
+.. |psgr| unicode:: U+003C8 .. GREEK SMALL LETTER PSI
+.. |Rgr| unicode:: U+003A1 .. GREEK CAPITAL LETTER RHO
+.. |rgr| unicode:: U+003C1 .. GREEK SMALL LETTER RHO
+.. |sfgr| unicode:: U+003C2 .. GREEK SMALL LETTER FINAL SIGMA
+.. |Sgr| unicode:: U+003A3 .. GREEK CAPITAL LETTER SIGMA
+.. |sgr| unicode:: U+003C3 .. GREEK SMALL LETTER SIGMA
+.. |Tgr| unicode:: U+003A4 .. GREEK CAPITAL LETTER TAU
+.. |tgr| unicode:: U+003C4 .. GREEK SMALL LETTER TAU
+.. |THgr| unicode:: U+00398 .. GREEK CAPITAL LETTER THETA
+.. |thgr| unicode:: U+003B8 .. GREEK SMALL LETTER THETA
+.. |Ugr| unicode:: U+003A5 .. GREEK CAPITAL LETTER UPSILON
+.. |ugr| unicode:: U+003C5 .. GREEK SMALL LETTER UPSILON
+.. |Xgr| unicode:: U+0039E .. GREEK CAPITAL LETTER XI
+.. |xgr| unicode:: U+003BE .. GREEK SMALL LETTER XI
+.. |Zgr| unicode:: U+00396 .. GREEK CAPITAL LETTER ZETA
+.. |zgr| unicode:: U+003B6 .. GREEK SMALL LETTER ZETA
diff --git a/docutils/parsers/rst/include/isogrk2.txt b/docutils/parsers/rst/include/isogrk2.txt
new file mode 100644
index 000000000..fa59f968d
--- /dev/null
+++ b/docutils/parsers/rst/include/isogrk2.txt
@@ -0,0 +1,26 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |Aacgr| unicode:: U+00386 .. GREEK CAPITAL LETTER ALPHA WITH TONOS
+.. |aacgr| unicode:: U+003AC .. GREEK SMALL LETTER ALPHA WITH TONOS
+.. |Eacgr| unicode:: U+00388 .. GREEK CAPITAL LETTER EPSILON WITH TONOS
+.. |eacgr| unicode:: U+003AD .. GREEK SMALL LETTER EPSILON WITH TONOS
+.. |EEacgr| unicode:: U+00389 .. GREEK CAPITAL LETTER ETA WITH TONOS
+.. |eeacgr| unicode:: U+003AE .. GREEK SMALL LETTER ETA WITH TONOS
+.. |Iacgr| unicode:: U+0038A .. GREEK CAPITAL LETTER IOTA WITH TONOS
+.. |iacgr| unicode:: U+003AF .. GREEK SMALL LETTER IOTA WITH TONOS
+.. |idiagr| unicode:: U+00390 .. GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+.. |Idigr| unicode:: U+003AA .. GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+.. |idigr| unicode:: U+003CA .. GREEK SMALL LETTER IOTA WITH DIALYTIKA
+.. |Oacgr| unicode:: U+0038C .. GREEK CAPITAL LETTER OMICRON WITH TONOS
+.. |oacgr| unicode:: U+003CC .. GREEK SMALL LETTER OMICRON WITH TONOS
+.. |OHacgr| unicode:: U+0038F .. GREEK CAPITAL LETTER OMEGA WITH TONOS
+.. |ohacgr| unicode:: U+003CE .. GREEK SMALL LETTER OMEGA WITH TONOS
+.. |Uacgr| unicode:: U+0038E .. GREEK CAPITAL LETTER UPSILON WITH TONOS
+.. |uacgr| unicode:: U+003CD .. GREEK SMALL LETTER UPSILON WITH TONOS
+.. |udiagr| unicode:: U+003B0 .. GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+.. |Udigr| unicode:: U+003AB .. GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+.. |udigr| unicode:: U+003CB .. GREEK SMALL LETTER UPSILON WITH DIALYTIKA
diff --git a/docutils/parsers/rst/include/isogrk3.txt b/docutils/parsers/rst/include/isogrk3.txt
new file mode 100644
index 000000000..efacd980b
--- /dev/null
+++ b/docutils/parsers/rst/include/isogrk3.txt
@@ -0,0 +1,52 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |alpha| unicode:: U+003B1 .. GREEK SMALL LETTER ALPHA
+.. |beta| unicode:: U+003B2 .. GREEK SMALL LETTER BETA
+.. |chi| unicode:: U+003C7 .. GREEK SMALL LETTER CHI
+.. |Delta| unicode:: U+00394 .. GREEK CAPITAL LETTER DELTA
+.. |delta| unicode:: U+003B4 .. GREEK SMALL LETTER DELTA
+.. |epsi| unicode:: U+003F5 .. GREEK LUNATE EPSILON SYMBOL
+.. |epsis| unicode:: U+003F5 .. GREEK LUNATE EPSILON SYMBOL
+.. |epsiv| unicode:: U+003B5 .. GREEK SMALL LETTER EPSILON
+.. |eta| unicode:: U+003B7 .. GREEK SMALL LETTER ETA
+.. |Gamma| unicode:: U+00393 .. GREEK CAPITAL LETTER GAMMA
+.. |gamma| unicode:: U+003B3 .. GREEK SMALL LETTER GAMMA
+.. |Gammad| unicode:: U+003DC .. GREEK LETTER DIGAMMA
+.. |gammad| unicode:: U+003DD .. GREEK SMALL LETTER DIGAMMA
+.. |iota| unicode:: U+003B9 .. GREEK SMALL LETTER IOTA
+.. |kappa| unicode:: U+003BA .. GREEK SMALL LETTER KAPPA
+.. |kappav| unicode:: U+003F0 .. GREEK KAPPA SYMBOL
+.. |Lambda| unicode:: U+0039B .. GREEK CAPITAL LETTER LAMDA
+.. |lambda| unicode:: U+003BB .. GREEK SMALL LETTER LAMDA
+.. |mu| unicode:: U+003BC .. GREEK SMALL LETTER MU
+.. |nu| unicode:: U+003BD .. GREEK SMALL LETTER NU
+.. |Omega| unicode:: U+003A9 .. GREEK CAPITAL LETTER OMEGA
+.. |omega| unicode:: U+003C9 .. GREEK SMALL LETTER OMEGA
+.. |Phi| unicode:: U+003A6 .. GREEK CAPITAL LETTER PHI
+.. |phi| unicode:: U+003D5 .. GREEK PHI SYMBOL
+.. |phis| unicode:: U+003D5 .. GREEK PHI SYMBOL
+.. |phiv| unicode:: U+003C6 .. GREEK SMALL LETTER PHI
+.. |Pi| unicode:: U+003A0 .. GREEK CAPITAL LETTER PI
+.. |pi| unicode:: U+003C0 .. GREEK SMALL LETTER PI
+.. |piv| unicode:: U+003D6 .. GREEK PI SYMBOL
+.. |Psi| unicode:: U+003A8 .. GREEK CAPITAL LETTER PSI
+.. |psi| unicode:: U+003C8 .. GREEK SMALL LETTER PSI
+.. |rho| unicode:: U+003C1 .. GREEK SMALL LETTER RHO
+.. |rhov| unicode:: U+003F1 .. GREEK RHO SYMBOL
+.. |Sigma| unicode:: U+003A3 .. GREEK CAPITAL LETTER SIGMA
+.. |sigma| unicode:: U+003C3 .. GREEK SMALL LETTER SIGMA
+.. |sigmav| unicode:: U+003C2 .. GREEK SMALL LETTER FINAL SIGMA
+.. |tau| unicode:: U+003C4 .. GREEK SMALL LETTER TAU
+.. |Theta| unicode:: U+00398 .. GREEK CAPITAL LETTER THETA
+.. |theta| unicode:: U+003B8 .. GREEK SMALL LETTER THETA
+.. |thetas| unicode:: U+003B8 .. GREEK SMALL LETTER THETA
+.. |thetav| unicode:: U+003D1 .. GREEK THETA SYMBOL
+.. |Upsi| unicode:: U+003D2 .. GREEK UPSILON WITH HOOK SYMBOL
+.. |upsi| unicode:: U+003C5 .. GREEK SMALL LETTER UPSILON
+.. |Xi| unicode:: U+0039E .. GREEK CAPITAL LETTER XI
+.. |xi| unicode:: U+003BE .. GREEK SMALL LETTER XI
+.. |zeta| unicode:: U+003B6 .. GREEK SMALL LETTER ZETA
diff --git a/docutils/parsers/rst/include/isogrk4-wide.txt b/docutils/parsers/rst/include/isogrk4-wide.txt
new file mode 100644
index 000000000..39a63075d
--- /dev/null
+++ b/docutils/parsers/rst/include/isogrk4-wide.txt
@@ -0,0 +1,49 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |b.alpha| unicode:: U+1D6C2 .. MATHEMATICAL BOLD SMALL ALPHA
+.. |b.beta| unicode:: U+1D6C3 .. MATHEMATICAL BOLD SMALL BETA
+.. |b.chi| unicode:: U+1D6D8 .. MATHEMATICAL BOLD SMALL CHI
+.. |b.Delta| unicode:: U+1D6AB .. MATHEMATICAL BOLD CAPITAL DELTA
+.. |b.delta| unicode:: U+1D6C5 .. MATHEMATICAL BOLD SMALL DELTA
+.. |b.epsi| unicode:: U+1D6C6 .. MATHEMATICAL BOLD SMALL EPSILON
+.. |b.epsiv| unicode:: U+1D6DC .. MATHEMATICAL BOLD EPSILON SYMBOL
+.. |b.eta| unicode:: U+1D6C8 .. MATHEMATICAL BOLD SMALL ETA
+.. |b.Gamma| unicode:: U+1D6AA .. MATHEMATICAL BOLD CAPITAL GAMMA
+.. |b.gamma| unicode:: U+1D6C4 .. MATHEMATICAL BOLD SMALL GAMMA
+.. |b.Gammad| unicode:: U+003DC .. GREEK LETTER DIGAMMA
+.. |b.gammad| unicode:: U+003DD .. GREEK SMALL LETTER DIGAMMA
+.. |b.iota| unicode:: U+1D6CA .. MATHEMATICAL BOLD SMALL IOTA
+.. |b.kappa| unicode:: U+1D6CB .. MATHEMATICAL BOLD SMALL KAPPA
+.. |b.kappav| unicode:: U+1D6DE .. MATHEMATICAL BOLD KAPPA SYMBOL
+.. |b.Lambda| unicode:: U+1D6B2 .. MATHEMATICAL BOLD CAPITAL LAMDA
+.. |b.lambda| unicode:: U+1D6CC .. MATHEMATICAL BOLD SMALL LAMDA
+.. |b.mu| unicode:: U+1D6CD .. MATHEMATICAL BOLD SMALL MU
+.. |b.nu| unicode:: U+1D6CE .. MATHEMATICAL BOLD SMALL NU
+.. |b.Omega| unicode:: U+1D6C0 .. MATHEMATICAL BOLD CAPITAL OMEGA
+.. |b.omega| unicode:: U+1D6DA .. MATHEMATICAL BOLD SMALL OMEGA
+.. |b.Phi| unicode:: U+1D6BD .. MATHEMATICAL BOLD CAPITAL PHI
+.. |b.phi| unicode:: U+1D6D7 .. MATHEMATICAL BOLD SMALL PHI
+.. |b.phiv| unicode:: U+1D6DF .. MATHEMATICAL BOLD PHI SYMBOL
+.. |b.Pi| unicode:: U+1D6B7 .. MATHEMATICAL BOLD CAPITAL PI
+.. |b.pi| unicode:: U+1D6D1 .. MATHEMATICAL BOLD SMALL PI
+.. |b.piv| unicode:: U+1D6E1 .. MATHEMATICAL BOLD PI SYMBOL
+.. |b.Psi| unicode:: U+1D6BF .. MATHEMATICAL BOLD CAPITAL PSI
+.. |b.psi| unicode:: U+1D6D9 .. MATHEMATICAL BOLD SMALL PSI
+.. |b.rho| unicode:: U+1D6D2 .. MATHEMATICAL BOLD SMALL RHO
+.. |b.rhov| unicode:: U+1D6E0 .. MATHEMATICAL BOLD RHO SYMBOL
+.. |b.Sigma| unicode:: U+1D6BA .. MATHEMATICAL BOLD CAPITAL SIGMA
+.. |b.sigma| unicode:: U+1D6D4 .. MATHEMATICAL BOLD SMALL SIGMA
+.. |b.sigmav| unicode:: U+1D6D3 .. MATHEMATICAL BOLD SMALL FINAL SIGMA
+.. |b.tau| unicode:: U+1D6D5 .. MATHEMATICAL BOLD SMALL TAU
+.. |b.Theta| unicode:: U+1D6AF .. MATHEMATICAL BOLD CAPITAL THETA
+.. |b.thetas| unicode:: U+1D6C9 .. MATHEMATICAL BOLD SMALL THETA
+.. |b.thetav| unicode:: U+1D6DD .. MATHEMATICAL BOLD THETA SYMBOL
+.. |b.Upsi| unicode:: U+1D6BC .. MATHEMATICAL BOLD CAPITAL UPSILON
+.. |b.upsi| unicode:: U+1D6D6 .. MATHEMATICAL BOLD SMALL UPSILON
+.. |b.Xi| unicode:: U+1D6B5 .. MATHEMATICAL BOLD CAPITAL XI
+.. |b.xi| unicode:: U+1D6CF .. MATHEMATICAL BOLD SMALL XI
+.. |b.zeta| unicode:: U+1D6C7 .. MATHEMATICAL BOLD SMALL ZETA
diff --git a/docutils/parsers/rst/include/isogrk4.txt b/docutils/parsers/rst/include/isogrk4.txt
new file mode 100644
index 000000000..5b9f4104f
--- /dev/null
+++ b/docutils/parsers/rst/include/isogrk4.txt
@@ -0,0 +1,8 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |b.Gammad| unicode:: U+003DC .. GREEK LETTER DIGAMMA
+.. |b.gammad| unicode:: U+003DD .. GREEK SMALL LETTER DIGAMMA
diff --git a/docutils/parsers/rst/include/isolat1.txt b/docutils/parsers/rst/include/isolat1.txt
new file mode 100644
index 000000000..3e9ad9df3
--- /dev/null
+++ b/docutils/parsers/rst/include/isolat1.txt
@@ -0,0 +1,68 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |Aacute| unicode:: U+000C1 .. LATIN CAPITAL LETTER A WITH ACUTE
+.. |aacute| unicode:: U+000E1 .. LATIN SMALL LETTER A WITH ACUTE
+.. |Acirc| unicode:: U+000C2 .. LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+.. |acirc| unicode:: U+000E2 .. LATIN SMALL LETTER A WITH CIRCUMFLEX
+.. |AElig| unicode:: U+000C6 .. LATIN CAPITAL LETTER AE
+.. |aelig| unicode:: U+000E6 .. LATIN SMALL LETTER AE
+.. |Agrave| unicode:: U+000C0 .. LATIN CAPITAL LETTER A WITH GRAVE
+.. |agrave| unicode:: U+000E0 .. LATIN SMALL LETTER A WITH GRAVE
+.. |Aring| unicode:: U+000C5 .. LATIN CAPITAL LETTER A WITH RING ABOVE
+.. |aring| unicode:: U+000E5 .. LATIN SMALL LETTER A WITH RING ABOVE
+.. |Atilde| unicode:: U+000C3 .. LATIN CAPITAL LETTER A WITH TILDE
+.. |atilde| unicode:: U+000E3 .. LATIN SMALL LETTER A WITH TILDE
+.. |Auml| unicode:: U+000C4 .. LATIN CAPITAL LETTER A WITH DIAERESIS
+.. |auml| unicode:: U+000E4 .. LATIN SMALL LETTER A WITH DIAERESIS
+.. |Ccedil| unicode:: U+000C7 .. LATIN CAPITAL LETTER C WITH CEDILLA
+.. |ccedil| unicode:: U+000E7 .. LATIN SMALL LETTER C WITH CEDILLA
+.. |Eacute| unicode:: U+000C9 .. LATIN CAPITAL LETTER E WITH ACUTE
+.. |eacute| unicode:: U+000E9 .. LATIN SMALL LETTER E WITH ACUTE
+.. |Ecirc| unicode:: U+000CA .. LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+.. |ecirc| unicode:: U+000EA .. LATIN SMALL LETTER E WITH CIRCUMFLEX
+.. |Egrave| unicode:: U+000C8 .. LATIN CAPITAL LETTER E WITH GRAVE
+.. |egrave| unicode:: U+000E8 .. LATIN SMALL LETTER E WITH GRAVE
+.. |ETH| unicode:: U+000D0 .. LATIN CAPITAL LETTER ETH
+.. |eth| unicode:: U+000F0 .. LATIN SMALL LETTER ETH
+.. |Euml| unicode:: U+000CB .. LATIN CAPITAL LETTER E WITH DIAERESIS
+.. |euml| unicode:: U+000EB .. LATIN SMALL LETTER E WITH DIAERESIS
+.. |Iacute| unicode:: U+000CD .. LATIN CAPITAL LETTER I WITH ACUTE
+.. |iacute| unicode:: U+000ED .. LATIN SMALL LETTER I WITH ACUTE
+.. |Icirc| unicode:: U+000CE .. LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+.. |icirc| unicode:: U+000EE .. LATIN SMALL LETTER I WITH CIRCUMFLEX
+.. |Igrave| unicode:: U+000CC .. LATIN CAPITAL LETTER I WITH GRAVE
+.. |igrave| unicode:: U+000EC .. LATIN SMALL LETTER I WITH GRAVE
+.. |Iuml| unicode:: U+000CF .. LATIN CAPITAL LETTER I WITH DIAERESIS
+.. |iuml| unicode:: U+000EF .. LATIN SMALL LETTER I WITH DIAERESIS
+.. |Ntilde| unicode:: U+000D1 .. LATIN CAPITAL LETTER N WITH TILDE
+.. |ntilde| unicode:: U+000F1 .. LATIN SMALL LETTER N WITH TILDE
+.. |Oacute| unicode:: U+000D3 .. LATIN CAPITAL LETTER O WITH ACUTE
+.. |oacute| unicode:: U+000F3 .. LATIN SMALL LETTER O WITH ACUTE
+.. |Ocirc| unicode:: U+000D4 .. LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+.. |ocirc| unicode:: U+000F4 .. LATIN SMALL LETTER O WITH CIRCUMFLEX
+.. |Ograve| unicode:: U+000D2 .. LATIN CAPITAL LETTER O WITH GRAVE
+.. |ograve| unicode:: U+000F2 .. LATIN SMALL LETTER O WITH GRAVE
+.. |Oslash| unicode:: U+000D8 .. LATIN CAPITAL LETTER O WITH STROKE
+.. |oslash| unicode:: U+000F8 .. LATIN SMALL LETTER O WITH STROKE
+.. |Otilde| unicode:: U+000D5 .. LATIN CAPITAL LETTER O WITH TILDE
+.. |otilde| unicode:: U+000F5 .. LATIN SMALL LETTER O WITH TILDE
+.. |Ouml| unicode:: U+000D6 .. LATIN CAPITAL LETTER O WITH DIAERESIS
+.. |ouml| unicode:: U+000F6 .. LATIN SMALL LETTER O WITH DIAERESIS
+.. |szlig| unicode:: U+000DF .. LATIN SMALL LETTER SHARP S
+.. |THORN| unicode:: U+000DE .. LATIN CAPITAL LETTER THORN
+.. |thorn| unicode:: U+000FE .. LATIN SMALL LETTER THORN
+.. |Uacute| unicode:: U+000DA .. LATIN CAPITAL LETTER U WITH ACUTE
+.. |uacute| unicode:: U+000FA .. LATIN SMALL LETTER U WITH ACUTE
+.. |Ucirc| unicode:: U+000DB .. LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+.. |ucirc| unicode:: U+000FB .. LATIN SMALL LETTER U WITH CIRCUMFLEX
+.. |Ugrave| unicode:: U+000D9 .. LATIN CAPITAL LETTER U WITH GRAVE
+.. |ugrave| unicode:: U+000F9 .. LATIN SMALL LETTER U WITH GRAVE
+.. |Uuml| unicode:: U+000DC .. LATIN CAPITAL LETTER U WITH DIAERESIS
+.. |uuml| unicode:: U+000FC .. LATIN SMALL LETTER U WITH DIAERESIS
+.. |Yacute| unicode:: U+000DD .. LATIN CAPITAL LETTER Y WITH ACUTE
+.. |yacute| unicode:: U+000FD .. LATIN SMALL LETTER Y WITH ACUTE
+.. |yuml| unicode:: U+000FF .. LATIN SMALL LETTER Y WITH DIAERESIS
diff --git a/docutils/parsers/rst/include/isolat2.txt b/docutils/parsers/rst/include/isolat2.txt
new file mode 100644
index 000000000..20de84576
--- /dev/null
+++ b/docutils/parsers/rst/include/isolat2.txt
@@ -0,0 +1,128 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |Abreve| unicode:: U+00102 .. LATIN CAPITAL LETTER A WITH BREVE
+.. |abreve| unicode:: U+00103 .. LATIN SMALL LETTER A WITH BREVE
+.. |Amacr| unicode:: U+00100 .. LATIN CAPITAL LETTER A WITH MACRON
+.. |amacr| unicode:: U+00101 .. LATIN SMALL LETTER A WITH MACRON
+.. |Aogon| unicode:: U+00104 .. LATIN CAPITAL LETTER A WITH OGONEK
+.. |aogon| unicode:: U+00105 .. LATIN SMALL LETTER A WITH OGONEK
+.. |Cacute| unicode:: U+00106 .. LATIN CAPITAL LETTER C WITH ACUTE
+.. |cacute| unicode:: U+00107 .. LATIN SMALL LETTER C WITH ACUTE
+.. |Ccaron| unicode:: U+0010C .. LATIN CAPITAL LETTER C WITH CARON
+.. |ccaron| unicode:: U+0010D .. LATIN SMALL LETTER C WITH CARON
+.. |Ccirc| unicode:: U+00108 .. LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+.. |ccirc| unicode:: U+00109 .. LATIN SMALL LETTER C WITH CIRCUMFLEX
+.. |Cdot| unicode:: U+0010A .. LATIN CAPITAL LETTER C WITH DOT ABOVE
+.. |cdot| unicode:: U+0010B .. LATIN SMALL LETTER C WITH DOT ABOVE
+.. |Dcaron| unicode:: U+0010E .. LATIN CAPITAL LETTER D WITH CARON
+.. |dcaron| unicode:: U+0010F .. LATIN SMALL LETTER D WITH CARON
+.. |Dstrok| unicode:: U+00110 .. LATIN CAPITAL LETTER D WITH STROKE
+.. |dstrok| unicode:: U+00111 .. LATIN SMALL LETTER D WITH STROKE
+.. |Ecaron| unicode:: U+0011A .. LATIN CAPITAL LETTER E WITH CARON
+.. |ecaron| unicode:: U+0011B .. LATIN SMALL LETTER E WITH CARON
+.. |Edot| unicode:: U+00116 .. LATIN CAPITAL LETTER E WITH DOT ABOVE
+.. |edot| unicode:: U+00117 .. LATIN SMALL LETTER E WITH DOT ABOVE
+.. |Emacr| unicode:: U+00112 .. LATIN CAPITAL LETTER E WITH MACRON
+.. |emacr| unicode:: U+00113 .. LATIN SMALL LETTER E WITH MACRON
+.. |ENG| unicode:: U+0014A .. LATIN CAPITAL LETTER ENG
+.. |eng| unicode:: U+0014B .. LATIN SMALL LETTER ENG
+.. |Eogon| unicode:: U+00118 .. LATIN CAPITAL LETTER E WITH OGONEK
+.. |eogon| unicode:: U+00119 .. LATIN SMALL LETTER E WITH OGONEK
+.. |gacute| unicode:: U+001F5 .. LATIN SMALL LETTER G WITH ACUTE
+.. |Gbreve| unicode:: U+0011E .. LATIN CAPITAL LETTER G WITH BREVE
+.. |gbreve| unicode:: U+0011F .. LATIN SMALL LETTER G WITH BREVE
+.. |Gcedil| unicode:: U+00122 .. LATIN CAPITAL LETTER G WITH CEDILLA
+.. |gcedil| unicode:: U+00123 .. LATIN SMALL LETTER G WITH CEDILLA
+.. |Gcirc| unicode:: U+0011C .. LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+.. |gcirc| unicode:: U+0011D .. LATIN SMALL LETTER G WITH CIRCUMFLEX
+.. |Gdot| unicode:: U+00120 .. LATIN CAPITAL LETTER G WITH DOT ABOVE
+.. |gdot| unicode:: U+00121 .. LATIN SMALL LETTER G WITH DOT ABOVE
+.. |Hcirc| unicode:: U+00124 .. LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+.. |hcirc| unicode:: U+00125 .. LATIN SMALL LETTER H WITH CIRCUMFLEX
+.. |Hstrok| unicode:: U+00126 .. LATIN CAPITAL LETTER H WITH STROKE
+.. |hstrok| unicode:: U+00127 .. LATIN SMALL LETTER H WITH STROKE
+.. |Idot| unicode:: U+00130 .. LATIN CAPITAL LETTER I WITH DOT ABOVE
+.. |IJlig| unicode:: U+00132 .. LATIN CAPITAL LIGATURE IJ
+.. |ijlig| unicode:: U+00133 .. LATIN SMALL LIGATURE IJ
+.. |Imacr| unicode:: U+0012A .. LATIN CAPITAL LETTER I WITH MACRON
+.. |imacr| unicode:: U+0012B .. LATIN SMALL LETTER I WITH MACRON
+.. |inodot| unicode:: U+00131 .. LATIN SMALL LETTER DOTLESS I
+.. |Iogon| unicode:: U+0012E .. LATIN CAPITAL LETTER I WITH OGONEK
+.. |iogon| unicode:: U+0012F .. LATIN SMALL LETTER I WITH OGONEK
+.. |Itilde| unicode:: U+00128 .. LATIN CAPITAL LETTER I WITH TILDE
+.. |itilde| unicode:: U+00129 .. LATIN SMALL LETTER I WITH TILDE
+.. |Jcirc| unicode:: U+00134 .. LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+.. |jcirc| unicode:: U+00135 .. LATIN SMALL LETTER J WITH CIRCUMFLEX
+.. |Kcedil| unicode:: U+00136 .. LATIN CAPITAL LETTER K WITH CEDILLA
+.. |kcedil| unicode:: U+00137 .. LATIN SMALL LETTER K WITH CEDILLA
+.. |kgreen| unicode:: U+00138 .. LATIN SMALL LETTER KRA
+.. |Lacute| unicode:: U+00139 .. LATIN CAPITAL LETTER L WITH ACUTE
+.. |lacute| unicode:: U+0013A .. LATIN SMALL LETTER L WITH ACUTE
+.. |Lcaron| unicode:: U+0013D .. LATIN CAPITAL LETTER L WITH CARON
+.. |lcaron| unicode:: U+0013E .. LATIN SMALL LETTER L WITH CARON
+.. |Lcedil| unicode:: U+0013B .. LATIN CAPITAL LETTER L WITH CEDILLA
+.. |lcedil| unicode:: U+0013C .. LATIN SMALL LETTER L WITH CEDILLA
+.. |Lmidot| unicode:: U+0013F .. LATIN CAPITAL LETTER L WITH MIDDLE DOT
+.. |lmidot| unicode:: U+00140 .. LATIN SMALL LETTER L WITH MIDDLE DOT
+.. |Lstrok| unicode:: U+00141 .. LATIN CAPITAL LETTER L WITH STROKE
+.. |lstrok| unicode:: U+00142 .. LATIN SMALL LETTER L WITH STROKE
+.. |Nacute| unicode:: U+00143 .. LATIN CAPITAL LETTER N WITH ACUTE
+.. |nacute| unicode:: U+00144 .. LATIN SMALL LETTER N WITH ACUTE
+.. |napos| unicode:: U+00149 .. LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+.. |Ncaron| unicode:: U+00147 .. LATIN CAPITAL LETTER N WITH CARON
+.. |ncaron| unicode:: U+00148 .. LATIN SMALL LETTER N WITH CARON
+.. |Ncedil| unicode:: U+00145 .. LATIN CAPITAL LETTER N WITH CEDILLA
+.. |ncedil| unicode:: U+00146 .. LATIN SMALL LETTER N WITH CEDILLA
+.. |Odblac| unicode:: U+00150 .. LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+.. |odblac| unicode:: U+00151 .. LATIN SMALL LETTER O WITH DOUBLE ACUTE
+.. |OElig| unicode:: U+00152 .. LATIN CAPITAL LIGATURE OE
+.. |oelig| unicode:: U+00153 .. LATIN SMALL LIGATURE OE
+.. |Omacr| unicode:: U+0014C .. LATIN CAPITAL LETTER O WITH MACRON
+.. |omacr| unicode:: U+0014D .. LATIN SMALL LETTER O WITH MACRON
+.. |Racute| unicode:: U+00154 .. LATIN CAPITAL LETTER R WITH ACUTE
+.. |racute| unicode:: U+00155 .. LATIN SMALL LETTER R WITH ACUTE
+.. |Rcaron| unicode:: U+00158 .. LATIN CAPITAL LETTER R WITH CARON
+.. |rcaron| unicode:: U+00159 .. LATIN SMALL LETTER R WITH CARON
+.. |Rcedil| unicode:: U+00156 .. LATIN CAPITAL LETTER R WITH CEDILLA
+.. |rcedil| unicode:: U+00157 .. LATIN SMALL LETTER R WITH CEDILLA
+.. |Sacute| unicode:: U+0015A .. LATIN CAPITAL LETTER S WITH ACUTE
+.. |sacute| unicode:: U+0015B .. LATIN SMALL LETTER S WITH ACUTE
+.. |Scaron| unicode:: U+00160 .. LATIN CAPITAL LETTER S WITH CARON
+.. |scaron| unicode:: U+00161 .. LATIN SMALL LETTER S WITH CARON
+.. |Scedil| unicode:: U+0015E .. LATIN CAPITAL LETTER S WITH CEDILLA
+.. |scedil| unicode:: U+0015F .. LATIN SMALL LETTER S WITH CEDILLA
+.. |Scirc| unicode:: U+0015C .. LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+.. |scirc| unicode:: U+0015D .. LATIN SMALL LETTER S WITH CIRCUMFLEX
+.. |Tcaron| unicode:: U+00164 .. LATIN CAPITAL LETTER T WITH CARON
+.. |tcaron| unicode:: U+00165 .. LATIN SMALL LETTER T WITH CARON
+.. |Tcedil| unicode:: U+00162 .. LATIN CAPITAL LETTER T WITH CEDILLA
+.. |tcedil| unicode:: U+00163 .. LATIN SMALL LETTER T WITH CEDILLA
+.. |Tstrok| unicode:: U+00166 .. LATIN CAPITAL LETTER T WITH STROKE
+.. |tstrok| unicode:: U+00167 .. LATIN SMALL LETTER T WITH STROKE
+.. |Ubreve| unicode:: U+0016C .. LATIN CAPITAL LETTER U WITH BREVE
+.. |ubreve| unicode:: U+0016D .. LATIN SMALL LETTER U WITH BREVE
+.. |Udblac| unicode:: U+00170 .. LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+.. |udblac| unicode:: U+00171 .. LATIN SMALL LETTER U WITH DOUBLE ACUTE
+.. |Umacr| unicode:: U+0016A .. LATIN CAPITAL LETTER U WITH MACRON
+.. |umacr| unicode:: U+0016B .. LATIN SMALL LETTER U WITH MACRON
+.. |Uogon| unicode:: U+00172 .. LATIN CAPITAL LETTER U WITH OGONEK
+.. |uogon| unicode:: U+00173 .. LATIN SMALL LETTER U WITH OGONEK
+.. |Uring| unicode:: U+0016E .. LATIN CAPITAL LETTER U WITH RING ABOVE
+.. |uring| unicode:: U+0016F .. LATIN SMALL LETTER U WITH RING ABOVE
+.. |Utilde| unicode:: U+00168 .. LATIN CAPITAL LETTER U WITH TILDE
+.. |utilde| unicode:: U+00169 .. LATIN SMALL LETTER U WITH TILDE
+.. |Wcirc| unicode:: U+00174 .. LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+.. |wcirc| unicode:: U+00175 .. LATIN SMALL LETTER W WITH CIRCUMFLEX
+.. |Ycirc| unicode:: U+00176 .. LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+.. |ycirc| unicode:: U+00177 .. LATIN SMALL LETTER Y WITH CIRCUMFLEX
+.. |Yuml| unicode:: U+00178 .. LATIN CAPITAL LETTER Y WITH DIAERESIS
+.. |Zacute| unicode:: U+00179 .. LATIN CAPITAL LETTER Z WITH ACUTE
+.. |zacute| unicode:: U+0017A .. LATIN SMALL LETTER Z WITH ACUTE
+.. |Zcaron| unicode:: U+0017D .. LATIN CAPITAL LETTER Z WITH CARON
+.. |zcaron| unicode:: U+0017E .. LATIN SMALL LETTER Z WITH CARON
+.. |Zdot| unicode:: U+0017B .. LATIN CAPITAL LETTER Z WITH DOT ABOVE
+.. |zdot| unicode:: U+0017C .. LATIN SMALL LETTER Z WITH DOT ABOVE
diff --git a/docutils/parsers/rst/include/isomfrk-wide.txt b/docutils/parsers/rst/include/isomfrk-wide.txt
new file mode 100644
index 000000000..75bba2575
--- /dev/null
+++ b/docutils/parsers/rst/include/isomfrk-wide.txt
@@ -0,0 +1,58 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |Afr| unicode:: U+1D504 .. MATHEMATICAL FRAKTUR CAPITAL A
+.. |afr| unicode:: U+1D51E .. MATHEMATICAL FRAKTUR SMALL A
+.. |Bfr| unicode:: U+1D505 .. MATHEMATICAL FRAKTUR CAPITAL B
+.. |bfr| unicode:: U+1D51F .. MATHEMATICAL FRAKTUR SMALL B
+.. |Cfr| unicode:: U+0212D .. BLACK-LETTER CAPITAL C
+.. |cfr| unicode:: U+1D520 .. MATHEMATICAL FRAKTUR SMALL C
+.. |Dfr| unicode:: U+1D507 .. MATHEMATICAL FRAKTUR CAPITAL D
+.. |dfr| unicode:: U+1D521 .. MATHEMATICAL FRAKTUR SMALL D
+.. |Efr| unicode:: U+1D508 .. MATHEMATICAL FRAKTUR CAPITAL E
+.. |efr| unicode:: U+1D522 .. MATHEMATICAL FRAKTUR SMALL E
+.. |Ffr| unicode:: U+1D509 .. MATHEMATICAL FRAKTUR CAPITAL F
+.. |ffr| unicode:: U+1D523 .. MATHEMATICAL FRAKTUR SMALL F
+.. |Gfr| unicode:: U+1D50A .. MATHEMATICAL FRAKTUR CAPITAL G
+.. |gfr| unicode:: U+1D524 .. MATHEMATICAL FRAKTUR SMALL G
+.. |Hfr| unicode:: U+0210C .. BLACK-LETTER CAPITAL H
+.. |hfr| unicode:: U+1D525 .. MATHEMATICAL FRAKTUR SMALL H
+.. |Ifr| unicode:: U+02111 .. BLACK-LETTER CAPITAL I
+.. |ifr| unicode:: U+1D526 .. MATHEMATICAL FRAKTUR SMALL I
+.. |Jfr| unicode:: U+1D50D .. MATHEMATICAL FRAKTUR CAPITAL J
+.. |jfr| unicode:: U+1D527 .. MATHEMATICAL FRAKTUR SMALL J
+.. |Kfr| unicode:: U+1D50E .. MATHEMATICAL FRAKTUR CAPITAL K
+.. |kfr| unicode:: U+1D528 .. MATHEMATICAL FRAKTUR SMALL K
+.. |Lfr| unicode:: U+1D50F .. MATHEMATICAL FRAKTUR CAPITAL L
+.. |lfr| unicode:: U+1D529 .. MATHEMATICAL FRAKTUR SMALL L
+.. |Mfr| unicode:: U+1D510 .. MATHEMATICAL FRAKTUR CAPITAL M
+.. |mfr| unicode:: U+1D52A .. MATHEMATICAL FRAKTUR SMALL M
+.. |Nfr| unicode:: U+1D511 .. MATHEMATICAL FRAKTUR CAPITAL N
+.. |nfr| unicode:: U+1D52B .. MATHEMATICAL FRAKTUR SMALL N
+.. |Ofr| unicode:: U+1D512 .. MATHEMATICAL FRAKTUR CAPITAL O
+.. |ofr| unicode:: U+1D52C .. MATHEMATICAL FRAKTUR SMALL O
+.. |Pfr| unicode:: U+1D513 .. MATHEMATICAL FRAKTUR CAPITAL P
+.. |pfr| unicode:: U+1D52D .. MATHEMATICAL FRAKTUR SMALL P
+.. |Qfr| unicode:: U+1D514 .. MATHEMATICAL FRAKTUR CAPITAL Q
+.. |qfr| unicode:: U+1D52E .. MATHEMATICAL FRAKTUR SMALL Q
+.. |Rfr| unicode:: U+0211C .. BLACK-LETTER CAPITAL R
+.. |rfr| unicode:: U+1D52F .. MATHEMATICAL FRAKTUR SMALL R
+.. |Sfr| unicode:: U+1D516 .. MATHEMATICAL FRAKTUR CAPITAL S
+.. |sfr| unicode:: U+1D530 .. MATHEMATICAL FRAKTUR SMALL S
+.. |Tfr| unicode:: U+1D517 .. MATHEMATICAL FRAKTUR CAPITAL T
+.. |tfr| unicode:: U+1D531 .. MATHEMATICAL FRAKTUR SMALL T
+.. |Ufr| unicode:: U+1D518 .. MATHEMATICAL FRAKTUR CAPITAL U
+.. |ufr| unicode:: U+1D532 .. MATHEMATICAL FRAKTUR SMALL U
+.. |Vfr| unicode:: U+1D519 .. MATHEMATICAL FRAKTUR CAPITAL V
+.. |vfr| unicode:: U+1D533 .. MATHEMATICAL FRAKTUR SMALL V
+.. |Wfr| unicode:: U+1D51A .. MATHEMATICAL FRAKTUR CAPITAL W
+.. |wfr| unicode:: U+1D534 .. MATHEMATICAL FRAKTUR SMALL W
+.. |Xfr| unicode:: U+1D51B .. MATHEMATICAL FRAKTUR CAPITAL X
+.. |xfr| unicode:: U+1D535 .. MATHEMATICAL FRAKTUR SMALL X
+.. |Yfr| unicode:: U+1D51C .. MATHEMATICAL FRAKTUR CAPITAL Y
+.. |yfr| unicode:: U+1D536 .. MATHEMATICAL FRAKTUR SMALL Y
+.. |Zfr| unicode:: U+02128 .. BLACK-LETTER CAPITAL Z
+.. |zfr| unicode:: U+1D537 .. MATHEMATICAL FRAKTUR SMALL Z
diff --git a/docutils/parsers/rst/include/isomfrk.txt b/docutils/parsers/rst/include/isomfrk.txt
new file mode 100644
index 000000000..868b687a5
--- /dev/null
+++ b/docutils/parsers/rst/include/isomfrk.txt
@@ -0,0 +1,11 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |Cfr| unicode:: U+0212D .. BLACK-LETTER CAPITAL C
+.. |Hfr| unicode:: U+0210C .. BLACK-LETTER CAPITAL H
+.. |Ifr| unicode:: U+02111 .. BLACK-LETTER CAPITAL I
+.. |Rfr| unicode:: U+0211C .. BLACK-LETTER CAPITAL R
+.. |Zfr| unicode:: U+02128 .. BLACK-LETTER CAPITAL Z
diff --git a/docutils/parsers/rst/include/isomopf-wide.txt b/docutils/parsers/rst/include/isomopf-wide.txt
new file mode 100644
index 000000000..a91ea43eb
--- /dev/null
+++ b/docutils/parsers/rst/include/isomopf-wide.txt
@@ -0,0 +1,32 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |Aopf| unicode:: U+1D538 .. MATHEMATICAL DOUBLE-STRUCK CAPITAL A
+.. |Bopf| unicode:: U+1D539 .. MATHEMATICAL DOUBLE-STRUCK CAPITAL B
+.. |Copf| unicode:: U+02102 .. DOUBLE-STRUCK CAPITAL C
+.. |Dopf| unicode:: U+1D53B .. MATHEMATICAL DOUBLE-STRUCK CAPITAL D
+.. |Eopf| unicode:: U+1D53C .. MATHEMATICAL DOUBLE-STRUCK CAPITAL E
+.. |Fopf| unicode:: U+1D53D .. MATHEMATICAL DOUBLE-STRUCK CAPITAL F
+.. |Gopf| unicode:: U+1D53E .. MATHEMATICAL DOUBLE-STRUCK CAPITAL G
+.. |Hopf| unicode:: U+0210D .. DOUBLE-STRUCK CAPITAL H
+.. |Iopf| unicode:: U+1D540 .. MATHEMATICAL DOUBLE-STRUCK CAPITAL I
+.. |Jopf| unicode:: U+1D541 .. MATHEMATICAL DOUBLE-STRUCK CAPITAL J
+.. |Kopf| unicode:: U+1D542 .. MATHEMATICAL DOUBLE-STRUCK CAPITAL K
+.. |Lopf| unicode:: U+1D543 .. MATHEMATICAL DOUBLE-STRUCK CAPITAL L
+.. |Mopf| unicode:: U+1D544 .. MATHEMATICAL DOUBLE-STRUCK CAPITAL M
+.. |Nopf| unicode:: U+02115 .. DOUBLE-STRUCK CAPITAL N
+.. |Oopf| unicode:: U+1D546 .. MATHEMATICAL DOUBLE-STRUCK CAPITAL O
+.. |Popf| unicode:: U+02119 .. DOUBLE-STRUCK CAPITAL P
+.. |Qopf| unicode:: U+0211A .. DOUBLE-STRUCK CAPITAL Q
+.. |Ropf| unicode:: U+0211D .. DOUBLE-STRUCK CAPITAL R
+.. |Sopf| unicode:: U+1D54A .. MATHEMATICAL DOUBLE-STRUCK CAPITAL S
+.. |Topf| unicode:: U+1D54B .. MATHEMATICAL DOUBLE-STRUCK CAPITAL T
+.. |Uopf| unicode:: U+1D54C .. MATHEMATICAL DOUBLE-STRUCK CAPITAL U
+.. |Vopf| unicode:: U+1D54D .. MATHEMATICAL DOUBLE-STRUCK CAPITAL V
+.. |Wopf| unicode:: U+1D54E .. MATHEMATICAL DOUBLE-STRUCK CAPITAL W
+.. |Xopf| unicode:: U+1D54F .. MATHEMATICAL DOUBLE-STRUCK CAPITAL X
+.. |Yopf| unicode:: U+1D550 .. MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
+.. |Zopf| unicode:: U+02124 .. DOUBLE-STRUCK CAPITAL Z
diff --git a/docutils/parsers/rst/include/isomopf.txt b/docutils/parsers/rst/include/isomopf.txt
new file mode 100644
index 000000000..4350db61b
--- /dev/null
+++ b/docutils/parsers/rst/include/isomopf.txt
@@ -0,0 +1,13 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |Copf| unicode:: U+02102 .. DOUBLE-STRUCK CAPITAL C
+.. |Hopf| unicode:: U+0210D .. DOUBLE-STRUCK CAPITAL H
+.. |Nopf| unicode:: U+02115 .. DOUBLE-STRUCK CAPITAL N
+.. |Popf| unicode:: U+02119 .. DOUBLE-STRUCK CAPITAL P
+.. |Qopf| unicode:: U+0211A .. DOUBLE-STRUCK CAPITAL Q
+.. |Ropf| unicode:: U+0211D .. DOUBLE-STRUCK CAPITAL R
+.. |Zopf| unicode:: U+02124 .. DOUBLE-STRUCK CAPITAL Z
diff --git a/docutils/parsers/rst/include/isomscr-wide.txt b/docutils/parsers/rst/include/isomscr-wide.txt
new file mode 100644
index 000000000..34b278b98
--- /dev/null
+++ b/docutils/parsers/rst/include/isomscr-wide.txt
@@ -0,0 +1,58 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |Ascr| unicode:: U+1D49C .. MATHEMATICAL SCRIPT CAPITAL A
+.. |ascr| unicode:: U+1D4B6 .. MATHEMATICAL SCRIPT SMALL A
+.. |Bscr| unicode:: U+0212C .. SCRIPT CAPITAL B
+.. |bscr| unicode:: U+1D4B7 .. MATHEMATICAL SCRIPT SMALL B
+.. |Cscr| unicode:: U+1D49E .. MATHEMATICAL SCRIPT CAPITAL C
+.. |cscr| unicode:: U+1D4B8 .. MATHEMATICAL SCRIPT SMALL C
+.. |Dscr| unicode:: U+1D49F .. MATHEMATICAL SCRIPT CAPITAL D
+.. |dscr| unicode:: U+1D4B9 .. MATHEMATICAL SCRIPT SMALL D
+.. |Escr| unicode:: U+02130 .. SCRIPT CAPITAL E
+.. |escr| unicode:: U+0212F .. SCRIPT SMALL E
+.. |Fscr| unicode:: U+02131 .. SCRIPT CAPITAL F
+.. |fscr| unicode:: U+1D4BB .. MATHEMATICAL SCRIPT SMALL F
+.. |Gscr| unicode:: U+1D4A2 .. MATHEMATICAL SCRIPT CAPITAL G
+.. |gscr| unicode:: U+0210A .. SCRIPT SMALL G
+.. |Hscr| unicode:: U+0210B .. SCRIPT CAPITAL H
+.. |hscr| unicode:: U+1D4BD .. MATHEMATICAL SCRIPT SMALL H
+.. |Iscr| unicode:: U+02110 .. SCRIPT CAPITAL I
+.. |iscr| unicode:: U+1D4BE .. MATHEMATICAL SCRIPT SMALL I
+.. |Jscr| unicode:: U+1D4A5 .. MATHEMATICAL SCRIPT CAPITAL J
+.. |jscr| unicode:: U+1D4BF .. MATHEMATICAL SCRIPT SMALL J
+.. |Kscr| unicode:: U+1D4A6 .. MATHEMATICAL SCRIPT CAPITAL K
+.. |kscr| unicode:: U+1D4C0 .. MATHEMATICAL SCRIPT SMALL K
+.. |Lscr| unicode:: U+02112 .. SCRIPT CAPITAL L
+.. |lscr| unicode:: U+1D4C1 .. MATHEMATICAL SCRIPT SMALL L
+.. |Mscr| unicode:: U+02133 .. SCRIPT CAPITAL M
+.. |mscr| unicode:: U+1D4C2 .. MATHEMATICAL SCRIPT SMALL M
+.. |Nscr| unicode:: U+1D4A9 .. MATHEMATICAL SCRIPT CAPITAL N
+.. |nscr| unicode:: U+1D4C3 .. MATHEMATICAL SCRIPT SMALL N
+.. |Oscr| unicode:: U+1D4AA .. MATHEMATICAL SCRIPT CAPITAL O
+.. |oscr| unicode:: U+02134 .. SCRIPT SMALL O
+.. |Pscr| unicode:: U+1D4AB .. MATHEMATICAL SCRIPT CAPITAL P
+.. |pscr| unicode:: U+1D4C5 .. MATHEMATICAL SCRIPT SMALL P
+.. |Qscr| unicode:: U+1D4AC .. MATHEMATICAL SCRIPT CAPITAL Q
+.. |qscr| unicode:: U+1D4C6 .. MATHEMATICAL SCRIPT SMALL Q
+.. |Rscr| unicode:: U+0211B .. SCRIPT CAPITAL R
+.. |rscr| unicode:: U+1D4C7 .. MATHEMATICAL SCRIPT SMALL R
+.. |Sscr| unicode:: U+1D4AE .. MATHEMATICAL SCRIPT CAPITAL S
+.. |sscr| unicode:: U+1D4C8 .. MATHEMATICAL SCRIPT SMALL S
+.. |Tscr| unicode:: U+1D4AF .. MATHEMATICAL SCRIPT CAPITAL T
+.. |tscr| unicode:: U+1D4C9 .. MATHEMATICAL SCRIPT SMALL T
+.. |Uscr| unicode:: U+1D4B0 .. MATHEMATICAL SCRIPT CAPITAL U
+.. |uscr| unicode:: U+1D4CA .. MATHEMATICAL SCRIPT SMALL U
+.. |Vscr| unicode:: U+1D4B1 .. MATHEMATICAL SCRIPT CAPITAL V
+.. |vscr| unicode:: U+1D4CB .. MATHEMATICAL SCRIPT SMALL V
+.. |Wscr| unicode:: U+1D4B2 .. MATHEMATICAL SCRIPT CAPITAL W
+.. |wscr| unicode:: U+1D4CC .. MATHEMATICAL SCRIPT SMALL W
+.. |Xscr| unicode:: U+1D4B3 .. MATHEMATICAL SCRIPT CAPITAL X
+.. |xscr| unicode:: U+1D4CD .. MATHEMATICAL SCRIPT SMALL X
+.. |Yscr| unicode:: U+1D4B4 .. MATHEMATICAL SCRIPT CAPITAL Y
+.. |yscr| unicode:: U+1D4CE .. MATHEMATICAL SCRIPT SMALL Y
+.. |Zscr| unicode:: U+1D4B5 .. MATHEMATICAL SCRIPT CAPITAL Z
+.. |zscr| unicode:: U+1D4CF .. MATHEMATICAL SCRIPT SMALL Z
diff --git a/docutils/parsers/rst/include/isomscr.txt b/docutils/parsers/rst/include/isomscr.txt
new file mode 100644
index 000000000..a77890e97
--- /dev/null
+++ b/docutils/parsers/rst/include/isomscr.txt
@@ -0,0 +1,17 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |Bscr| unicode:: U+0212C .. SCRIPT CAPITAL B
+.. |Escr| unicode:: U+02130 .. SCRIPT CAPITAL E
+.. |escr| unicode:: U+0212F .. SCRIPT SMALL E
+.. |Fscr| unicode:: U+02131 .. SCRIPT CAPITAL F
+.. |gscr| unicode:: U+0210A .. SCRIPT SMALL G
+.. |Hscr| unicode:: U+0210B .. SCRIPT CAPITAL H
+.. |Iscr| unicode:: U+02110 .. SCRIPT CAPITAL I
+.. |Lscr| unicode:: U+02112 .. SCRIPT CAPITAL L
+.. |Mscr| unicode:: U+02133 .. SCRIPT CAPITAL M
+.. |oscr| unicode:: U+02134 .. SCRIPT SMALL O
+.. |Rscr| unicode:: U+0211B .. SCRIPT CAPITAL R
diff --git a/docutils/parsers/rst/include/isonum.txt b/docutils/parsers/rst/include/isonum.txt
new file mode 100644
index 000000000..35793b365
--- /dev/null
+++ b/docutils/parsers/rst/include/isonum.txt
@@ -0,0 +1,82 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |amp| unicode:: U+00026 .. AMPERSAND
+.. |apos| unicode:: U+00027 .. APOSTROPHE
+.. |ast| unicode:: U+0002A .. ASTERISK
+.. |brvbar| unicode:: U+000A6 .. BROKEN BAR
+.. |bsol| unicode:: U+0005C .. REVERSE SOLIDUS
+.. |cent| unicode:: U+000A2 .. CENT SIGN
+.. |colon| unicode:: U+0003A .. COLON
+.. |comma| unicode:: U+0002C .. COMMA
+.. |commat| unicode:: U+00040 .. COMMERCIAL AT
+.. |copy| unicode:: U+000A9 .. COPYRIGHT SIGN
+.. |curren| unicode:: U+000A4 .. CURRENCY SIGN
+.. |darr| unicode:: U+02193 .. DOWNWARDS ARROW
+.. |deg| unicode:: U+000B0 .. DEGREE SIGN
+.. |divide| unicode:: U+000F7 .. DIVISION SIGN
+.. |dollar| unicode:: U+00024 .. DOLLAR SIGN
+.. |equals| unicode:: U+0003D .. EQUALS SIGN
+.. |excl| unicode:: U+00021 .. EXCLAMATION MARK
+.. |frac12| unicode:: U+000BD .. VULGAR FRACTION ONE HALF
+.. |frac14| unicode:: U+000BC .. VULGAR FRACTION ONE QUARTER
+.. |frac18| unicode:: U+0215B .. VULGAR FRACTION ONE EIGHTH
+.. |frac34| unicode:: U+000BE .. VULGAR FRACTION THREE QUARTERS
+.. |frac38| unicode:: U+0215C .. VULGAR FRACTION THREE EIGHTHS
+.. |frac58| unicode:: U+0215D .. VULGAR FRACTION FIVE EIGHTHS
+.. |frac78| unicode:: U+0215E .. VULGAR FRACTION SEVEN EIGHTHS
+.. |gt| unicode:: U+0003E .. GREATER-THAN SIGN
+.. |half| unicode:: U+000BD .. VULGAR FRACTION ONE HALF
+.. |horbar| unicode:: U+02015 .. HORIZONTAL BAR
+.. |hyphen| unicode:: U+02010 .. HYPHEN
+.. |iexcl| unicode:: U+000A1 .. INVERTED EXCLAMATION MARK
+.. |iquest| unicode:: U+000BF .. INVERTED QUESTION MARK
+.. |laquo| unicode:: U+000AB .. LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+.. |larr| unicode:: U+02190 .. LEFTWARDS ARROW
+.. |lcub| unicode:: U+0007B .. LEFT CURLY BRACKET
+.. |ldquo| unicode:: U+0201C .. LEFT DOUBLE QUOTATION MARK
+.. |lowbar| unicode:: U+0005F .. LOW LINE
+.. |lpar| unicode:: U+00028 .. LEFT PARENTHESIS
+.. |lsqb| unicode:: U+0005B .. LEFT SQUARE BRACKET
+.. |lsquo| unicode:: U+02018 .. LEFT SINGLE QUOTATION MARK
+.. |lt| unicode:: U+0003C .. LESS-THAN SIGN
+.. |micro| unicode:: U+000B5 .. MICRO SIGN
+.. |middot| unicode:: U+000B7 .. MIDDLE DOT
+.. |nbsp| unicode:: U+000A0 .. NO-BREAK SPACE
+.. |not| unicode:: U+000AC .. NOT SIGN
+.. |num| unicode:: U+00023 .. NUMBER SIGN
+.. |ohm| unicode:: U+02126 .. OHM SIGN
+.. |ordf| unicode:: U+000AA .. FEMININE ORDINAL INDICATOR
+.. |ordm| unicode:: U+000BA .. MASCULINE ORDINAL INDICATOR
+.. |para| unicode:: U+000B6 .. PILCROW SIGN
+.. |percnt| unicode:: U+00025 .. PERCENT SIGN
+.. |period| unicode:: U+0002E .. FULL STOP
+.. |plus| unicode:: U+0002B .. PLUS SIGN
+.. |plusmn| unicode:: U+000B1 .. PLUS-MINUS SIGN
+.. |pound| unicode:: U+000A3 .. POUND SIGN
+.. |quest| unicode:: U+0003F .. QUESTION MARK
+.. |quot| unicode:: U+00022 .. QUOTATION MARK
+.. |raquo| unicode:: U+000BB .. RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+.. |rarr| unicode:: U+02192 .. RIGHTWARDS ARROW
+.. |rcub| unicode:: U+0007D .. RIGHT CURLY BRACKET
+.. |rdquo| unicode:: U+0201D .. RIGHT DOUBLE QUOTATION MARK
+.. |reg| unicode:: U+000AE .. REGISTERED SIGN
+.. |rpar| unicode:: U+00029 .. RIGHT PARENTHESIS
+.. |rsqb| unicode:: U+0005D .. RIGHT SQUARE BRACKET
+.. |rsquo| unicode:: U+02019 .. RIGHT SINGLE QUOTATION MARK
+.. |sect| unicode:: U+000A7 .. SECTION SIGN
+.. |semi| unicode:: U+0003B .. SEMICOLON
+.. |shy| unicode:: U+000AD .. SOFT HYPHEN
+.. |sol| unicode:: U+0002F .. SOLIDUS
+.. |sung| unicode:: U+0266A .. EIGHTH NOTE
+.. |sup1| unicode:: U+000B9 .. SUPERSCRIPT ONE
+.. |sup2| unicode:: U+000B2 .. SUPERSCRIPT TWO
+.. |sup3| unicode:: U+000B3 .. SUPERSCRIPT THREE
+.. |times| unicode:: U+000D7 .. MULTIPLICATION SIGN
+.. |trade| unicode:: U+02122 .. TRADE MARK SIGN
+.. |uarr| unicode:: U+02191 .. UPWARDS ARROW
+.. |verbar| unicode:: U+0007C .. VERTICAL LINE
+.. |yen| unicode:: U+000A5 .. YEN SIGN
diff --git a/docutils/parsers/rst/include/isopub.txt b/docutils/parsers/rst/include/isopub.txt
new file mode 100644
index 000000000..bc5b6d491
--- /dev/null
+++ b/docutils/parsers/rst/include/isopub.txt
@@ -0,0 +1,90 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |blank| unicode:: U+02423 .. OPEN BOX
+.. |blk12| unicode:: U+02592 .. MEDIUM SHADE
+.. |blk14| unicode:: U+02591 .. LIGHT SHADE
+.. |blk34| unicode:: U+02593 .. DARK SHADE
+.. |block| unicode:: U+02588 .. FULL BLOCK
+.. |bull| unicode:: U+02022 .. BULLET
+.. |caret| unicode:: U+02041 .. CARET INSERTION POINT
+.. |check| unicode:: U+02713 .. CHECK MARK
+.. |cir| unicode:: U+025CB .. WHITE CIRCLE
+.. |clubs| unicode:: U+02663 .. BLACK CLUB SUIT
+.. |copysr| unicode:: U+02117 .. SOUND RECORDING COPYRIGHT
+.. |cross| unicode:: U+02717 .. BALLOT X
+.. |Dagger| unicode:: U+02021 .. DOUBLE DAGGER
+.. |dagger| unicode:: U+02020 .. DAGGER
+.. |dash| unicode:: U+02010 .. HYPHEN
+.. |diams| unicode:: U+02666 .. BLACK DIAMOND SUIT
+.. |dlcrop| unicode:: U+0230D .. BOTTOM LEFT CROP
+.. |drcrop| unicode:: U+0230C .. BOTTOM RIGHT CROP
+.. |dtri| unicode:: U+025BF .. WHITE DOWN-POINTING SMALL TRIANGLE
+.. |dtrif| unicode:: U+025BE .. BLACK DOWN-POINTING SMALL TRIANGLE
+.. |emsp| unicode:: U+02003 .. EM SPACE
+.. |emsp13| unicode:: U+02004 .. THREE-PER-EM SPACE
+.. |emsp14| unicode:: U+02005 .. FOUR-PER-EM SPACE
+.. |ensp| unicode:: U+02002 .. EN SPACE
+.. |female| unicode:: U+02640 .. FEMALE SIGN
+.. |ffilig| unicode:: U+0FB03 .. LATIN SMALL LIGATURE FFI
+.. |fflig| unicode:: U+0FB00 .. LATIN SMALL LIGATURE FF
+.. |ffllig| unicode:: U+0FB04 .. LATIN SMALL LIGATURE FFL
+.. |filig| unicode:: U+0FB01 .. LATIN SMALL LIGATURE FI
+.. |flat| unicode:: U+0266D .. MUSIC FLAT SIGN
+.. |fllig| unicode:: U+0FB02 .. LATIN SMALL LIGATURE FL
+.. |frac13| unicode:: U+02153 .. VULGAR FRACTION ONE THIRD
+.. |frac15| unicode:: U+02155 .. VULGAR FRACTION ONE FIFTH
+.. |frac16| unicode:: U+02159 .. VULGAR FRACTION ONE SIXTH
+.. |frac23| unicode:: U+02154 .. VULGAR FRACTION TWO THIRDS
+.. |frac25| unicode:: U+02156 .. VULGAR FRACTION TWO FIFTHS
+.. |frac35| unicode:: U+02157 .. VULGAR FRACTION THREE FIFTHS
+.. |frac45| unicode:: U+02158 .. VULGAR FRACTION FOUR FIFTHS
+.. |frac56| unicode:: U+0215A .. VULGAR FRACTION FIVE SIXTHS
+.. |hairsp| unicode:: U+0200A .. HAIR SPACE
+.. |hearts| unicode:: U+02665 .. BLACK HEART SUIT
+.. |hellip| unicode:: U+02026 .. HORIZONTAL ELLIPSIS
+.. |hybull| unicode:: U+02043 .. HYPHEN BULLET
+.. |incare| unicode:: U+02105 .. CARE OF
+.. |ldquor| unicode:: U+0201E .. DOUBLE LOW-9 QUOTATION MARK
+.. |lhblk| unicode:: U+02584 .. LOWER HALF BLOCK
+.. |loz| unicode:: U+025CA .. LOZENGE
+.. |lozf| unicode:: U+029EB .. BLACK LOZENGE
+.. |lsquor| unicode:: U+0201A .. SINGLE LOW-9 QUOTATION MARK
+.. |ltri| unicode:: U+025C3 .. WHITE LEFT-POINTING SMALL TRIANGLE
+.. |ltrif| unicode:: U+025C2 .. BLACK LEFT-POINTING SMALL TRIANGLE
+.. |male| unicode:: U+02642 .. MALE SIGN
+.. |malt| unicode:: U+02720 .. MALTESE CROSS
+.. |marker| unicode:: U+025AE .. BLACK VERTICAL RECTANGLE
+.. |mdash| unicode:: U+02014 .. EM DASH
+.. |mldr| unicode:: U+02026 .. HORIZONTAL ELLIPSIS
+.. |natur| unicode:: U+0266E .. MUSIC NATURAL SIGN
+.. |ndash| unicode:: U+02013 .. EN DASH
+.. |nldr| unicode:: U+02025 .. TWO DOT LEADER
+.. |numsp| unicode:: U+02007 .. FIGURE SPACE
+.. |phone| unicode:: U+0260E .. BLACK TELEPHONE
+.. |puncsp| unicode:: U+02008 .. PUNCTUATION SPACE
+.. |rdquor| unicode:: U+0201D .. RIGHT DOUBLE QUOTATION MARK
+.. |rect| unicode:: U+025AD .. WHITE RECTANGLE
+.. |rsquor| unicode:: U+02019 .. RIGHT SINGLE QUOTATION MARK
+.. |rtri| unicode:: U+025B9 .. WHITE RIGHT-POINTING SMALL TRIANGLE
+.. |rtrif| unicode:: U+025B8 .. BLACK RIGHT-POINTING SMALL TRIANGLE
+.. |rx| unicode:: U+0211E .. PRESCRIPTION TAKE
+.. |sext| unicode:: U+02736 .. SIX POINTED BLACK STAR
+.. |sharp| unicode:: U+0266F .. MUSIC SHARP SIGN
+.. |spades| unicode:: U+02660 .. BLACK SPADE SUIT
+.. |squ| unicode:: U+025A1 .. WHITE SQUARE
+.. |squf| unicode:: U+025AA .. BLACK SMALL SQUARE
+.. |star| unicode:: U+02606 .. WHITE STAR
+.. |starf| unicode:: U+02605 .. BLACK STAR
+.. |target| unicode:: U+02316 .. POSITION INDICATOR
+.. |telrec| unicode:: U+02315 .. TELEPHONE RECORDER
+.. |thinsp| unicode:: U+02009 .. THIN SPACE
+.. |uhblk| unicode:: U+02580 .. UPPER HALF BLOCK
+.. |ulcrop| unicode:: U+0230F .. TOP LEFT CROP
+.. |urcrop| unicode:: U+0230E .. TOP RIGHT CROP
+.. |utri| unicode:: U+025B5 .. WHITE UP-POINTING SMALL TRIANGLE
+.. |utrif| unicode:: U+025B4 .. BLACK UP-POINTING SMALL TRIANGLE
+.. |vellip| unicode:: U+022EE .. VERTICAL ELLIPSIS
diff --git a/docutils/parsers/rst/include/isotech.txt b/docutils/parsers/rst/include/isotech.txt
new file mode 100644
index 000000000..01f7e346f
--- /dev/null
+++ b/docutils/parsers/rst/include/isotech.txt
@@ -0,0 +1,168 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |acd| unicode:: U+0223F .. SINE WAVE
+.. |aleph| unicode:: U+02135 .. ALEF SYMBOL
+.. |And| unicode:: U+02A53 .. DOUBLE LOGICAL AND
+.. |and| unicode:: U+02227 .. LOGICAL AND
+.. |andand| unicode:: U+02A55 .. TWO INTERSECTING LOGICAL AND
+.. |andd| unicode:: U+02A5C .. LOGICAL AND WITH HORIZONTAL DASH
+.. |andslope| unicode:: U+02A58 .. SLOPING LARGE AND
+.. |andv| unicode:: U+02A5A .. LOGICAL AND WITH MIDDLE STEM
+.. |ang90| unicode:: U+0221F .. RIGHT ANGLE
+.. |angrt| unicode:: U+0221F .. RIGHT ANGLE
+.. |angsph| unicode:: U+02222 .. SPHERICAL ANGLE
+.. |angst| unicode:: U+0212B .. ANGSTROM SIGN
+.. |ap| unicode:: U+02248 .. ALMOST EQUAL TO
+.. |apacir| unicode:: U+02A6F .. ALMOST EQUAL TO WITH CIRCUMFLEX ACCENT
+.. |awconint| unicode:: U+02233 .. ANTICLOCKWISE CONTOUR INTEGRAL
+.. |awint| unicode:: U+02A11 .. ANTICLOCKWISE INTEGRATION
+.. |becaus| unicode:: U+02235 .. BECAUSE
+.. |bernou| unicode:: U+0212C .. SCRIPT CAPITAL B
+.. |bne| unicode:: U+0003D U+020E5 .. EQUALS SIGN with reverse slash
+.. |bnequiv| unicode:: U+02261 U+020E5 .. IDENTICAL TO with reverse slash
+.. |bNot| unicode:: U+02AED .. REVERSED DOUBLE STROKE NOT SIGN
+.. |bnot| unicode:: U+02310 .. REVERSED NOT SIGN
+.. |bottom| unicode:: U+022A5 .. UP TACK
+.. |cap| unicode:: U+02229 .. INTERSECTION
+.. |Cconint| unicode:: U+02230 .. VOLUME INTEGRAL
+.. |cirfnint| unicode:: U+02A10 .. CIRCULATION FUNCTION
+.. |compfn| unicode:: U+02218 .. RING OPERATOR
+.. |cong| unicode:: U+02245 .. APPROXIMATELY EQUAL TO
+.. |Conint| unicode:: U+0222F .. SURFACE INTEGRAL
+.. |conint| unicode:: U+0222E .. CONTOUR INTEGRAL
+.. |ctdot| unicode:: U+022EF .. MIDLINE HORIZONTAL ELLIPSIS
+.. |cup| unicode:: U+0222A .. UNION
+.. |cwconint| unicode:: U+02232 .. CLOCKWISE CONTOUR INTEGRAL
+.. |cwint| unicode:: U+02231 .. CLOCKWISE INTEGRAL
+.. |cylcty| unicode:: U+0232D .. CYLINDRICITY
+.. |disin| unicode:: U+022F2 .. ELEMENT OF WITH LONG HORIZONTAL STROKE
+.. |Dot| unicode:: U+000A8 .. DIAERESIS
+.. |DotDot| unicode:: U+020DC .. COMBINING FOUR DOTS ABOVE
+.. |dsol| unicode:: U+029F6 .. SOLIDUS WITH OVERBAR
+.. |dtdot| unicode:: U+022F1 .. DOWN RIGHT DIAGONAL ELLIPSIS
+.. |dwangle| unicode:: U+029A6 .. OBLIQUE ANGLE OPENING UP
+.. |elinters| unicode:: U+0FFFD .. REPLACEMENT CHARACTER
+.. |epar| unicode:: U+022D5 .. EQUAL AND PARALLEL TO
+.. |eparsl| unicode:: U+029E3 .. EQUALS SIGN AND SLANTED PARALLEL
+.. |equiv| unicode:: U+02261 .. IDENTICAL TO
+.. |eqvparsl| unicode:: U+029E5 .. IDENTICAL TO AND SLANTED PARALLEL
+.. |exist| unicode:: U+02203 .. THERE EXISTS
+.. |fltns| unicode:: U+025B1 .. WHITE PARALLELOGRAM
+.. |fnof| unicode:: U+00192 .. LATIN SMALL LETTER F WITH HOOK
+.. |forall| unicode:: U+02200 .. FOR ALL
+.. |fpartint| unicode:: U+02A0D .. FINITE PART INTEGRAL
+.. |ge| unicode:: U+02265 .. GREATER-THAN OR EQUAL TO
+.. |hamilt| unicode:: U+0210B .. SCRIPT CAPITAL H
+.. |iff| unicode:: U+021D4 .. LEFT RIGHT DOUBLE ARROW
+.. |iinfin| unicode:: U+029DC .. INCOMPLETE INFINITY
+.. |imped| unicode:: U+001B5 .. LATIN CAPITAL LETTER Z WITH STROKE
+.. |infin| unicode:: U+0221E .. INFINITY
+.. |infintie| unicode:: U+029DD .. TIE OVER INFINITY
+.. |Int| unicode:: U+0222C .. DOUBLE INTEGRAL
+.. |int| unicode:: U+0222B .. INTEGRAL
+.. |intlarhk| unicode:: U+02A17 .. INTEGRAL WITH LEFTWARDS ARROW WITH HOOK
+.. |isin| unicode:: U+02208 .. ELEMENT OF
+.. |isindot| unicode:: U+022F5 .. ELEMENT OF WITH DOT ABOVE
+.. |isinE| unicode:: U+022F9 .. ELEMENT OF WITH TWO HORIZONTAL STROKES
+.. |isins| unicode:: U+022F4 .. SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
+.. |isinsv| unicode:: U+022F3 .. ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
+.. |isinv| unicode:: U+02208 .. ELEMENT OF
+.. |lagran| unicode:: U+02112 .. SCRIPT CAPITAL L
+.. |Lang| unicode:: U+0300A .. LEFT DOUBLE ANGLE BRACKET
+.. |lang| unicode:: U+02329 .. LEFT-POINTING ANGLE BRACKET
+.. |lArr| unicode:: U+021D0 .. LEFTWARDS DOUBLE ARROW
+.. |lbbrk| unicode:: U+03014 .. LEFT TORTOISE SHELL BRACKET
+.. |le| unicode:: U+02264 .. LESS-THAN OR EQUAL TO
+.. |loang| unicode:: U+03018 .. LEFT WHITE TORTOISE SHELL BRACKET
+.. |lobrk| unicode:: U+0301A .. LEFT WHITE SQUARE BRACKET
+.. |lopar| unicode:: U+02985 .. LEFT WHITE PARENTHESIS
+.. |lowast| unicode:: U+02217 .. ASTERISK OPERATOR
+.. |minus| unicode:: U+02212 .. MINUS SIGN
+.. |mnplus| unicode:: U+02213 .. MINUS-OR-PLUS SIGN
+.. |nabla| unicode:: U+02207 .. NABLA
+.. |ne| unicode:: U+02260 .. NOT EQUAL TO
+.. |nedot| unicode:: U+02250 U+00338 .. APPROACHES THE LIMIT with slash
+.. |nhpar| unicode:: U+02AF2 .. PARALLEL WITH HORIZONTAL STROKE
+.. |ni| unicode:: U+0220B .. CONTAINS AS MEMBER
+.. |nis| unicode:: U+022FC .. SMALL CONTAINS WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
+.. |nisd| unicode:: U+022FA .. CONTAINS WITH LONG HORIZONTAL STROKE
+.. |niv| unicode:: U+0220B .. CONTAINS AS MEMBER
+.. |Not| unicode:: U+02AEC .. DOUBLE STROKE NOT SIGN
+.. |notin| unicode:: U+02209 .. NOT AN ELEMENT OF
+.. |notindot| unicode:: U+022F5 U+00338 .. ELEMENT OF WITH DOT ABOVE with slash
+.. |notinE| unicode:: U+022F9 U+00338 .. ELEMENT OF WITH TWO HORIZONTAL STROKES with slash
+.. |notinva| unicode:: U+02209 .. NOT AN ELEMENT OF
+.. |notinvb| unicode:: U+022F7 .. SMALL ELEMENT OF WITH OVERBAR
+.. |notinvc| unicode:: U+022F6 .. ELEMENT OF WITH OVERBAR
+.. |notni| unicode:: U+0220C .. DOES NOT CONTAIN AS MEMBER
+.. |notniva| unicode:: U+0220C .. DOES NOT CONTAIN AS MEMBER
+.. |notnivb| unicode:: U+022FE .. SMALL CONTAINS WITH OVERBAR
+.. |notnivc| unicode:: U+022FD .. CONTAINS WITH OVERBAR
+.. |nparsl| unicode:: U+02AFD U+020E5 .. DOUBLE SOLIDUS OPERATOR with reverse slash
+.. |npart| unicode:: U+02202 U+00338 .. PARTIAL DIFFERENTIAL with slash
+.. |npolint| unicode:: U+02A14 .. LINE INTEGRATION NOT INCLUDING THE POLE
+.. |nvinfin| unicode:: U+029DE .. INFINITY NEGATED WITH VERTICAL BAR
+.. |olcross| unicode:: U+029BB .. CIRCLE WITH SUPERIMPOSED X
+.. |Or| unicode:: U+02A54 .. DOUBLE LOGICAL OR
+.. |or| unicode:: U+02228 .. LOGICAL OR
+.. |ord| unicode:: U+02A5D .. LOGICAL OR WITH HORIZONTAL DASH
+.. |order| unicode:: U+02134 .. SCRIPT SMALL O
+.. |oror| unicode:: U+02A56 .. TWO INTERSECTING LOGICAL OR
+.. |orslope| unicode:: U+02A57 .. SLOPING LARGE OR
+.. |orv| unicode:: U+02A5B .. LOGICAL OR WITH MIDDLE STEM
+.. |par| unicode:: U+02225 .. PARALLEL TO
+.. |parsl| unicode:: U+02AFD .. DOUBLE SOLIDUS OPERATOR
+.. |part| unicode:: U+02202 .. PARTIAL DIFFERENTIAL
+.. |permil| unicode:: U+02030 .. PER MILLE SIGN
+.. |perp| unicode:: U+022A5 .. UP TACK
+.. |pertenk| unicode:: U+02031 .. PER TEN THOUSAND SIGN
+.. |phmmat| unicode:: U+02133 .. SCRIPT CAPITAL M
+.. |pointint| unicode:: U+02A15 .. INTEGRAL AROUND A POINT OPERATOR
+.. |Prime| unicode:: U+02033 .. DOUBLE PRIME
+.. |prime| unicode:: U+02032 .. PRIME
+.. |profalar| unicode:: U+0232E .. ALL AROUND-PROFILE
+.. |profline| unicode:: U+02312 .. ARC
+.. |profsurf| unicode:: U+02313 .. SEGMENT
+.. |prop| unicode:: U+0221D .. PROPORTIONAL TO
+.. |qint| unicode:: U+02A0C .. QUADRUPLE INTEGRAL OPERATOR
+.. |qprime| unicode:: U+02057 .. QUADRUPLE PRIME
+.. |quatint| unicode:: U+02A16 .. QUATERNION INTEGRAL OPERATOR
+.. |radic| unicode:: U+0221A .. SQUARE ROOT
+.. |Rang| unicode:: U+0300B .. RIGHT DOUBLE ANGLE BRACKET
+.. |rang| unicode:: U+0232A .. RIGHT-POINTING ANGLE BRACKET
+.. |rArr| unicode:: U+021D2 .. RIGHTWARDS DOUBLE ARROW
+.. |rbbrk| unicode:: U+03015 .. RIGHT TORTOISE SHELL BRACKET
+.. |roang| unicode:: U+03019 .. RIGHT WHITE TORTOISE SHELL BRACKET
+.. |robrk| unicode:: U+0301B .. RIGHT WHITE SQUARE BRACKET
+.. |ropar| unicode:: U+02986 .. RIGHT WHITE PARENTHESIS
+.. |rppolint| unicode:: U+02A12 .. LINE INTEGRATION WITH RECTANGULAR PATH AROUND POLE
+.. |scpolint| unicode:: U+02A13 .. LINE INTEGRATION WITH SEMICIRCULAR PATH AROUND POLE
+.. |sim| unicode:: U+0223C .. TILDE OPERATOR
+.. |simdot| unicode:: U+02A6A .. TILDE OPERATOR WITH DOT ABOVE
+.. |sime| unicode:: U+02243 .. ASYMPTOTICALLY EQUAL TO
+.. |smeparsl| unicode:: U+029E4 .. EQUALS SIGN AND SLANTED PARALLEL WITH TILDE ABOVE
+.. |square| unicode:: U+025A1 .. WHITE SQUARE
+.. |squarf| unicode:: U+025AA .. BLACK SMALL SQUARE
+.. |strns| unicode:: U+000AF .. MACRON
+.. |sub| unicode:: U+02282 .. SUBSET OF
+.. |sube| unicode:: U+02286 .. SUBSET OF OR EQUAL TO
+.. |sup| unicode:: U+02283 .. SUPERSET OF
+.. |supe| unicode:: U+02287 .. SUPERSET OF OR EQUAL TO
+.. |tdot| unicode:: U+020DB .. COMBINING THREE DOTS ABOVE
+.. |there4| unicode:: U+02234 .. THEREFORE
+.. |tint| unicode:: U+0222D .. TRIPLE INTEGRAL
+.. |top| unicode:: U+022A4 .. DOWN TACK
+.. |topbot| unicode:: U+02336 .. APL FUNCTIONAL SYMBOL I-BEAM
+.. |topcir| unicode:: U+02AF1 .. DOWN TACK WITH CIRCLE BELOW
+.. |tprime| unicode:: U+02034 .. TRIPLE PRIME
+.. |utdot| unicode:: U+022F0 .. UP RIGHT DIAGONAL ELLIPSIS
+.. |uwangle| unicode:: U+029A7 .. OBLIQUE ANGLE OPENING DOWN
+.. |vangrt| unicode:: U+0299C .. RIGHT ANGLE VARIANT WITH SQUARE
+.. |veeeq| unicode:: U+0225A .. EQUIANGULAR TO
+.. |Verbar| unicode:: U+02016 .. DOUBLE VERTICAL LINE
+.. |wedgeq| unicode:: U+02259 .. ESTIMATES
+.. |xnis| unicode:: U+022FB .. CONTAINS WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
diff --git a/docutils/parsers/rst/include/mmlalias.txt b/docutils/parsers/rst/include/mmlalias.txt
new file mode 100644
index 000000000..cabc54ac4
--- /dev/null
+++ b/docutils/parsers/rst/include/mmlalias.txt
@@ -0,0 +1,554 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |angle| unicode:: U+02220 .. ANGLE
+.. |ApplyFunction| unicode:: U+02061 .. FUNCTION APPLICATION
+.. |approx| unicode:: U+02248 .. ALMOST EQUAL TO
+.. |approxeq| unicode:: U+0224A .. ALMOST EQUAL OR EQUAL TO
+.. |Assign| unicode:: U+02254 .. COLON EQUALS
+.. |backcong| unicode:: U+0224C .. ALL EQUAL TO
+.. |backepsilon| unicode:: U+003F6 .. GREEK REVERSED LUNATE EPSILON SYMBOL
+.. |backprime| unicode:: U+02035 .. REVERSED PRIME
+.. |backsim| unicode:: U+0223D .. REVERSED TILDE
+.. |backsimeq| unicode:: U+022CD .. REVERSED TILDE EQUALS
+.. |Backslash| unicode:: U+02216 .. SET MINUS
+.. |barwedge| unicode:: U+02305 .. PROJECTIVE
+.. |Because| unicode:: U+02235 .. BECAUSE
+.. |because| unicode:: U+02235 .. BECAUSE
+.. |Bernoullis| unicode:: U+0212C .. SCRIPT CAPITAL B
+.. |between| unicode:: U+0226C .. BETWEEN
+.. |bigcap| unicode:: U+022C2 .. N-ARY INTERSECTION
+.. |bigcirc| unicode:: U+025EF .. LARGE CIRCLE
+.. |bigcup| unicode:: U+022C3 .. N-ARY UNION
+.. |bigodot| unicode:: U+02A00 .. N-ARY CIRCLED DOT OPERATOR
+.. |bigoplus| unicode:: U+02A01 .. N-ARY CIRCLED PLUS OPERATOR
+.. |bigotimes| unicode:: U+02A02 .. N-ARY CIRCLED TIMES OPERATOR
+.. |bigsqcup| unicode:: U+02A06 .. N-ARY SQUARE UNION OPERATOR
+.. |bigstar| unicode:: U+02605 .. BLACK STAR
+.. |bigtriangledown| unicode:: U+025BD .. WHITE DOWN-POINTING TRIANGLE
+.. |bigtriangleup| unicode:: U+025B3 .. WHITE UP-POINTING TRIANGLE
+.. |biguplus| unicode:: U+02A04 .. N-ARY UNION OPERATOR WITH PLUS
+.. |bigvee| unicode:: U+022C1 .. N-ARY LOGICAL OR
+.. |bigwedge| unicode:: U+022C0 .. N-ARY LOGICAL AND
+.. |bkarow| unicode:: U+0290D .. RIGHTWARDS DOUBLE DASH ARROW
+.. |blacklozenge| unicode:: U+029EB .. BLACK LOZENGE
+.. |blacksquare| unicode:: U+025AA .. BLACK SMALL SQUARE
+.. |blacktriangle| unicode:: U+025B4 .. BLACK UP-POINTING SMALL TRIANGLE
+.. |blacktriangledown| unicode:: U+025BE .. BLACK DOWN-POINTING SMALL TRIANGLE
+.. |blacktriangleleft| unicode:: U+025C2 .. BLACK LEFT-POINTING SMALL TRIANGLE
+.. |blacktriangleright| unicode:: U+025B8 .. BLACK RIGHT-POINTING SMALL TRIANGLE
+.. |bot| unicode:: U+022A5 .. UP TACK
+.. |boxminus| unicode:: U+0229F .. SQUARED MINUS
+.. |boxplus| unicode:: U+0229E .. SQUARED PLUS
+.. |boxtimes| unicode:: U+022A0 .. SQUARED TIMES
+.. |Breve| unicode:: U+002D8 .. BREVE
+.. |bullet| unicode:: U+02022 .. BULLET
+.. |Bumpeq| unicode:: U+0224E .. GEOMETRICALLY EQUIVALENT TO
+.. |bumpeq| unicode:: U+0224F .. DIFFERENCE BETWEEN
+.. |CapitalDifferentialD| unicode:: U+02145 .. DOUBLE-STRUCK ITALIC CAPITAL D
+.. |Cayleys| unicode:: U+0212D .. BLACK-LETTER CAPITAL C
+.. |Cedilla| unicode:: U+000B8 .. CEDILLA
+.. |CenterDot| unicode:: U+000B7 .. MIDDLE DOT
+.. |centerdot| unicode:: U+000B7 .. MIDDLE DOT
+.. |checkmark| unicode:: U+02713 .. CHECK MARK
+.. |circeq| unicode:: U+02257 .. RING EQUAL TO
+.. |circlearrowleft| unicode:: U+021BA .. ANTICLOCKWISE OPEN CIRCLE ARROW
+.. |circlearrowright| unicode:: U+021BB .. CLOCKWISE OPEN CIRCLE ARROW
+.. |circledast| unicode:: U+0229B .. CIRCLED ASTERISK OPERATOR
+.. |circledcirc| unicode:: U+0229A .. CIRCLED RING OPERATOR
+.. |circleddash| unicode:: U+0229D .. CIRCLED DASH
+.. |CircleDot| unicode:: U+02299 .. CIRCLED DOT OPERATOR
+.. |circledR| unicode:: U+000AE .. REGISTERED SIGN
+.. |circledS| unicode:: U+024C8 .. CIRCLED LATIN CAPITAL LETTER S
+.. |CircleMinus| unicode:: U+02296 .. CIRCLED MINUS
+.. |CirclePlus| unicode:: U+02295 .. CIRCLED PLUS
+.. |CircleTimes| unicode:: U+02297 .. CIRCLED TIMES
+.. |ClockwiseContourIntegral| unicode:: U+02232 .. CLOCKWISE CONTOUR INTEGRAL
+.. |CloseCurlyDoubleQuote| unicode:: U+0201D .. RIGHT DOUBLE QUOTATION MARK
+.. |CloseCurlyQuote| unicode:: U+02019 .. RIGHT SINGLE QUOTATION MARK
+.. |clubsuit| unicode:: U+02663 .. BLACK CLUB SUIT
+.. |coloneq| unicode:: U+02254 .. COLON EQUALS
+.. |complement| unicode:: U+02201 .. COMPLEMENT
+.. |complexes| unicode:: U+02102 .. DOUBLE-STRUCK CAPITAL C
+.. |Congruent| unicode:: U+02261 .. IDENTICAL TO
+.. |ContourIntegral| unicode:: U+0222E .. CONTOUR INTEGRAL
+.. |Coproduct| unicode:: U+02210 .. N-ARY COPRODUCT
+.. |CounterClockwiseContourIntegral| unicode:: U+02233 .. ANTICLOCKWISE CONTOUR INTEGRAL
+.. |CupCap| unicode:: U+0224D .. EQUIVALENT TO
+.. |curlyeqprec| unicode:: U+022DE .. EQUAL TO OR PRECEDES
+.. |curlyeqsucc| unicode:: U+022DF .. EQUAL TO OR SUCCEEDS
+.. |curlyvee| unicode:: U+022CE .. CURLY LOGICAL OR
+.. |curlywedge| unicode:: U+022CF .. CURLY LOGICAL AND
+.. |curvearrowleft| unicode:: U+021B6 .. ANTICLOCKWISE TOP SEMICIRCLE ARROW
+.. |curvearrowright| unicode:: U+021B7 .. CLOCKWISE TOP SEMICIRCLE ARROW
+.. |dbkarow| unicode:: U+0290F .. RIGHTWARDS TRIPLE DASH ARROW
+.. |ddagger| unicode:: U+02021 .. DOUBLE DAGGER
+.. |ddotseq| unicode:: U+02A77 .. EQUALS SIGN WITH TWO DOTS ABOVE AND TWO DOTS BELOW
+.. |Del| unicode:: U+02207 .. NABLA
+.. |DiacriticalAcute| unicode:: U+000B4 .. ACUTE ACCENT
+.. |DiacriticalDot| unicode:: U+002D9 .. DOT ABOVE
+.. |DiacriticalDoubleAcute| unicode:: U+002DD .. DOUBLE ACUTE ACCENT
+.. |DiacriticalGrave| unicode:: U+00060 .. GRAVE ACCENT
+.. |DiacriticalTilde| unicode:: U+002DC .. SMALL TILDE
+.. |Diamond| unicode:: U+022C4 .. DIAMOND OPERATOR
+.. |diamond| unicode:: U+022C4 .. DIAMOND OPERATOR
+.. |diamondsuit| unicode:: U+02666 .. BLACK DIAMOND SUIT
+.. |DifferentialD| unicode:: U+02146 .. DOUBLE-STRUCK ITALIC SMALL D
+.. |digamma| unicode:: U+003DD .. GREEK SMALL LETTER DIGAMMA
+.. |div| unicode:: U+000F7 .. DIVISION SIGN
+.. |divideontimes| unicode:: U+022C7 .. DIVISION TIMES
+.. |doteq| unicode:: U+02250 .. APPROACHES THE LIMIT
+.. |doteqdot| unicode:: U+02251 .. GEOMETRICALLY EQUAL TO
+.. |DotEqual| unicode:: U+02250 .. APPROACHES THE LIMIT
+.. |dotminus| unicode:: U+02238 .. DOT MINUS
+.. |dotplus| unicode:: U+02214 .. DOT PLUS
+.. |dotsquare| unicode:: U+022A1 .. SQUARED DOT OPERATOR
+.. |doublebarwedge| unicode:: U+02306 .. PERSPECTIVE
+.. |DoubleContourIntegral| unicode:: U+0222F .. SURFACE INTEGRAL
+.. |DoubleDot| unicode:: U+000A8 .. DIAERESIS
+.. |DoubleDownArrow| unicode:: U+021D3 .. DOWNWARDS DOUBLE ARROW
+.. |DoubleLeftArrow| unicode:: U+021D0 .. LEFTWARDS DOUBLE ARROW
+.. |DoubleLeftRightArrow| unicode:: U+021D4 .. LEFT RIGHT DOUBLE ARROW
+.. |DoubleLeftTee| unicode:: U+02AE4 .. VERTICAL BAR DOUBLE LEFT TURNSTILE
+.. |DoubleLongLeftArrow| unicode:: U+027F8 .. LONG LEFTWARDS DOUBLE ARROW
+.. |DoubleLongLeftRightArrow| unicode:: U+027FA .. LONG LEFT RIGHT DOUBLE ARROW
+.. |DoubleLongRightArrow| unicode:: U+027F9 .. LONG RIGHTWARDS DOUBLE ARROW
+.. |DoubleRightArrow| unicode:: U+021D2 .. RIGHTWARDS DOUBLE ARROW
+.. |DoubleRightTee| unicode:: U+022A8 .. TRUE
+.. |DoubleUpArrow| unicode:: U+021D1 .. UPWARDS DOUBLE ARROW
+.. |DoubleUpDownArrow| unicode:: U+021D5 .. UP DOWN DOUBLE ARROW
+.. |DoubleVerticalBar| unicode:: U+02225 .. PARALLEL TO
+.. |DownArrow| unicode:: U+02193 .. DOWNWARDS ARROW
+.. |Downarrow| unicode:: U+021D3 .. DOWNWARDS DOUBLE ARROW
+.. |downarrow| unicode:: U+02193 .. DOWNWARDS ARROW
+.. |DownArrowUpArrow| unicode:: U+021F5 .. DOWNWARDS ARROW LEFTWARDS OF UPWARDS ARROW
+.. |downdownarrows| unicode:: U+021CA .. DOWNWARDS PAIRED ARROWS
+.. |downharpoonleft| unicode:: U+021C3 .. DOWNWARDS HARPOON WITH BARB LEFTWARDS
+.. |downharpoonright| unicode:: U+021C2 .. DOWNWARDS HARPOON WITH BARB RIGHTWARDS
+.. |DownLeftVector| unicode:: U+021BD .. LEFTWARDS HARPOON WITH BARB DOWNWARDS
+.. |DownRightVector| unicode:: U+021C1 .. RIGHTWARDS HARPOON WITH BARB DOWNWARDS
+.. |DownTee| unicode:: U+022A4 .. DOWN TACK
+.. |DownTeeArrow| unicode:: U+021A7 .. DOWNWARDS ARROW FROM BAR
+.. |drbkarow| unicode:: U+02910 .. RIGHTWARDS TWO-HEADED TRIPLE DASH ARROW
+.. |Element| unicode:: U+02208 .. ELEMENT OF
+.. |emptyset| unicode:: U+02205 .. EMPTY SET
+.. |eqcirc| unicode:: U+02256 .. RING IN EQUAL TO
+.. |eqcolon| unicode:: U+02255 .. EQUALS COLON
+.. |eqsim| unicode:: U+02242 .. MINUS TILDE
+.. |eqslantgtr| unicode:: U+02A96 .. SLANTED EQUAL TO OR GREATER-THAN
+.. |eqslantless| unicode:: U+02A95 .. SLANTED EQUAL TO OR LESS-THAN
+.. |EqualTilde| unicode:: U+02242 .. MINUS TILDE
+.. |Equilibrium| unicode:: U+021CC .. RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON
+.. |Exists| unicode:: U+02203 .. THERE EXISTS
+.. |expectation| unicode:: U+02130 .. SCRIPT CAPITAL E
+.. |ExponentialE| unicode:: U+02147 .. DOUBLE-STRUCK ITALIC SMALL E
+.. |exponentiale| unicode:: U+02147 .. DOUBLE-STRUCK ITALIC SMALL E
+.. |fallingdotseq| unicode:: U+02252 .. APPROXIMATELY EQUAL TO OR THE IMAGE OF
+.. |ForAll| unicode:: U+02200 .. FOR ALL
+.. |Fouriertrf| unicode:: U+02131 .. SCRIPT CAPITAL F
+.. |geq| unicode:: U+02265 .. GREATER-THAN OR EQUAL TO
+.. |geqq| unicode:: U+02267 .. GREATER-THAN OVER EQUAL TO
+.. |geqslant| unicode:: U+02A7E .. GREATER-THAN OR SLANTED EQUAL TO
+.. |gg| unicode:: U+0226B .. MUCH GREATER-THAN
+.. |ggg| unicode:: U+022D9 .. VERY MUCH GREATER-THAN
+.. |gnapprox| unicode:: U+02A8A .. GREATER-THAN AND NOT APPROXIMATE
+.. |gneq| unicode:: U+02A88 .. GREATER-THAN AND SINGLE-LINE NOT EQUAL TO
+.. |gneqq| unicode:: U+02269 .. GREATER-THAN BUT NOT EQUAL TO
+.. |GreaterEqual| unicode:: U+02265 .. GREATER-THAN OR EQUAL TO
+.. |GreaterEqualLess| unicode:: U+022DB .. GREATER-THAN EQUAL TO OR LESS-THAN
+.. |GreaterFullEqual| unicode:: U+02267 .. GREATER-THAN OVER EQUAL TO
+.. |GreaterLess| unicode:: U+02277 .. GREATER-THAN OR LESS-THAN
+.. |GreaterSlantEqual| unicode:: U+02A7E .. GREATER-THAN OR SLANTED EQUAL TO
+.. |GreaterTilde| unicode:: U+02273 .. GREATER-THAN OR EQUIVALENT TO
+.. |gtrapprox| unicode:: U+02A86 .. GREATER-THAN OR APPROXIMATE
+.. |gtrdot| unicode:: U+022D7 .. GREATER-THAN WITH DOT
+.. |gtreqless| unicode:: U+022DB .. GREATER-THAN EQUAL TO OR LESS-THAN
+.. |gtreqqless| unicode:: U+02A8C .. GREATER-THAN ABOVE DOUBLE-LINE EQUAL ABOVE LESS-THAN
+.. |gtrless| unicode:: U+02277 .. GREATER-THAN OR LESS-THAN
+.. |gtrsim| unicode:: U+02273 .. GREATER-THAN OR EQUIVALENT TO
+.. |gvertneqq| unicode:: U+02269 U+0FE00 .. GREATER-THAN BUT NOT EQUAL TO - with vertical stroke
+.. |Hacek| unicode:: U+002C7 .. CARON
+.. |hbar| unicode:: U+0210F .. PLANCK CONSTANT OVER TWO PI
+.. |heartsuit| unicode:: U+02665 .. BLACK HEART SUIT
+.. |HilbertSpace| unicode:: U+0210B .. SCRIPT CAPITAL H
+.. |hksearow| unicode:: U+02925 .. SOUTH EAST ARROW WITH HOOK
+.. |hkswarow| unicode:: U+02926 .. SOUTH WEST ARROW WITH HOOK
+.. |hookleftarrow| unicode:: U+021A9 .. LEFTWARDS ARROW WITH HOOK
+.. |hookrightarrow| unicode:: U+021AA .. RIGHTWARDS ARROW WITH HOOK
+.. |hslash| unicode:: U+0210F .. PLANCK CONSTANT OVER TWO PI
+.. |HumpDownHump| unicode:: U+0224E .. GEOMETRICALLY EQUIVALENT TO
+.. |HumpEqual| unicode:: U+0224F .. DIFFERENCE BETWEEN
+.. |iiiint| unicode:: U+02A0C .. QUADRUPLE INTEGRAL OPERATOR
+.. |iiint| unicode:: U+0222D .. TRIPLE INTEGRAL
+.. |Im| unicode:: U+02111 .. BLACK-LETTER CAPITAL I
+.. |ImaginaryI| unicode:: U+02148 .. DOUBLE-STRUCK ITALIC SMALL I
+.. |imagline| unicode:: U+02110 .. SCRIPT CAPITAL I
+.. |imagpart| unicode:: U+02111 .. BLACK-LETTER CAPITAL I
+.. |Implies| unicode:: U+021D2 .. RIGHTWARDS DOUBLE ARROW
+.. |in| unicode:: U+02208 .. ELEMENT OF
+.. |integers| unicode:: U+02124 .. DOUBLE-STRUCK CAPITAL Z
+.. |Integral| unicode:: U+0222B .. INTEGRAL
+.. |intercal| unicode:: U+022BA .. INTERCALATE
+.. |Intersection| unicode:: U+022C2 .. N-ARY INTERSECTION
+.. |intprod| unicode:: U+02A3C .. INTERIOR PRODUCT
+.. |InvisibleComma| unicode:: U+02063 .. INVISIBLE SEPARATOR
+.. |InvisibleTimes| unicode:: U+02062 .. INVISIBLE TIMES
+.. |langle| unicode:: U+02329 .. LEFT-POINTING ANGLE BRACKET
+.. |Laplacetrf| unicode:: U+02112 .. SCRIPT CAPITAL L
+.. |lbrace| unicode:: U+0007B .. LEFT CURLY BRACKET
+.. |lbrack| unicode:: U+0005B .. LEFT SQUARE BRACKET
+.. |LeftAngleBracket| unicode:: U+02329 .. LEFT-POINTING ANGLE BRACKET
+.. |LeftArrow| unicode:: U+02190 .. LEFTWARDS ARROW
+.. |Leftarrow| unicode:: U+021D0 .. LEFTWARDS DOUBLE ARROW
+.. |leftarrow| unicode:: U+02190 .. LEFTWARDS ARROW
+.. |LeftArrowBar| unicode:: U+021E4 .. LEFTWARDS ARROW TO BAR
+.. |LeftArrowRightArrow| unicode:: U+021C6 .. LEFTWARDS ARROW OVER RIGHTWARDS ARROW
+.. |leftarrowtail| unicode:: U+021A2 .. LEFTWARDS ARROW WITH TAIL
+.. |LeftCeiling| unicode:: U+02308 .. LEFT CEILING
+.. |LeftDoubleBracket| unicode:: U+0301A .. LEFT WHITE SQUARE BRACKET
+.. |LeftDownVector| unicode:: U+021C3 .. DOWNWARDS HARPOON WITH BARB LEFTWARDS
+.. |LeftFloor| unicode:: U+0230A .. LEFT FLOOR
+.. |leftharpoondown| unicode:: U+021BD .. LEFTWARDS HARPOON WITH BARB DOWNWARDS
+.. |leftharpoonup| unicode:: U+021BC .. LEFTWARDS HARPOON WITH BARB UPWARDS
+.. |leftleftarrows| unicode:: U+021C7 .. LEFTWARDS PAIRED ARROWS
+.. |LeftRightArrow| unicode:: U+02194 .. LEFT RIGHT ARROW
+.. |Leftrightarrow| unicode:: U+021D4 .. LEFT RIGHT DOUBLE ARROW
+.. |leftrightarrow| unicode:: U+02194 .. LEFT RIGHT ARROW
+.. |leftrightarrows| unicode:: U+021C6 .. LEFTWARDS ARROW OVER RIGHTWARDS ARROW
+.. |leftrightharpoons| unicode:: U+021CB .. LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON
+.. |leftrightsquigarrow| unicode:: U+021AD .. LEFT RIGHT WAVE ARROW
+.. |LeftTee| unicode:: U+022A3 .. LEFT TACK
+.. |LeftTeeArrow| unicode:: U+021A4 .. LEFTWARDS ARROW FROM BAR
+.. |leftthreetimes| unicode:: U+022CB .. LEFT SEMIDIRECT PRODUCT
+.. |LeftTriangle| unicode:: U+022B2 .. NORMAL SUBGROUP OF
+.. |LeftTriangleEqual| unicode:: U+022B4 .. NORMAL SUBGROUP OF OR EQUAL TO
+.. |LeftUpVector| unicode:: U+021BF .. UPWARDS HARPOON WITH BARB LEFTWARDS
+.. |LeftVector| unicode:: U+021BC .. LEFTWARDS HARPOON WITH BARB UPWARDS
+.. |leq| unicode:: U+02264 .. LESS-THAN OR EQUAL TO
+.. |leqq| unicode:: U+02266 .. LESS-THAN OVER EQUAL TO
+.. |leqslant| unicode:: U+02A7D .. LESS-THAN OR SLANTED EQUAL TO
+.. |lessapprox| unicode:: U+02A85 .. LESS-THAN OR APPROXIMATE
+.. |lessdot| unicode:: U+022D6 .. LESS-THAN WITH DOT
+.. |lesseqgtr| unicode:: U+022DA .. LESS-THAN EQUAL TO OR GREATER-THAN
+.. |lesseqqgtr| unicode:: U+02A8B .. LESS-THAN ABOVE DOUBLE-LINE EQUAL ABOVE GREATER-THAN
+.. |LessEqualGreater| unicode:: U+022DA .. LESS-THAN EQUAL TO OR GREATER-THAN
+.. |LessFullEqual| unicode:: U+02266 .. LESS-THAN OVER EQUAL TO
+.. |LessGreater| unicode:: U+02276 .. LESS-THAN OR GREATER-THAN
+.. |lessgtr| unicode:: U+02276 .. LESS-THAN OR GREATER-THAN
+.. |lesssim| unicode:: U+02272 .. LESS-THAN OR EQUIVALENT TO
+.. |LessSlantEqual| unicode:: U+02A7D .. LESS-THAN OR SLANTED EQUAL TO
+.. |LessTilde| unicode:: U+02272 .. LESS-THAN OR EQUIVALENT TO
+.. |ll| unicode:: U+0226A .. MUCH LESS-THAN
+.. |llcorner| unicode:: U+0231E .. BOTTOM LEFT CORNER
+.. |Lleftarrow| unicode:: U+021DA .. LEFTWARDS TRIPLE ARROW
+.. |lmoustache| unicode:: U+023B0 .. UPPER LEFT OR LOWER RIGHT CURLY BRACKET SECTION
+.. |lnapprox| unicode:: U+02A89 .. LESS-THAN AND NOT APPROXIMATE
+.. |lneq| unicode:: U+02A87 .. LESS-THAN AND SINGLE-LINE NOT EQUAL TO
+.. |lneqq| unicode:: U+02268 .. LESS-THAN BUT NOT EQUAL TO
+.. |LongLeftArrow| unicode:: U+027F5 .. LONG LEFTWARDS ARROW
+.. |Longleftarrow| unicode:: U+027F8 .. LONG LEFTWARDS DOUBLE ARROW
+.. |longleftarrow| unicode:: U+027F5 .. LONG LEFTWARDS ARROW
+.. |LongLeftRightArrow| unicode:: U+027F7 .. LONG LEFT RIGHT ARROW
+.. |Longleftrightarrow| unicode:: U+027FA .. LONG LEFT RIGHT DOUBLE ARROW
+.. |longleftrightarrow| unicode:: U+027F7 .. LONG LEFT RIGHT ARROW
+.. |longmapsto| unicode:: U+027FC .. LONG RIGHTWARDS ARROW FROM BAR
+.. |LongRightArrow| unicode:: U+027F6 .. LONG RIGHTWARDS ARROW
+.. |Longrightarrow| unicode:: U+027F9 .. LONG RIGHTWARDS DOUBLE ARROW
+.. |longrightarrow| unicode:: U+027F6 .. LONG RIGHTWARDS ARROW
+.. |looparrowleft| unicode:: U+021AB .. LEFTWARDS ARROW WITH LOOP
+.. |looparrowright| unicode:: U+021AC .. RIGHTWARDS ARROW WITH LOOP
+.. |LowerLeftArrow| unicode:: U+02199 .. SOUTH WEST ARROW
+.. |LowerRightArrow| unicode:: U+02198 .. SOUTH EAST ARROW
+.. |lozenge| unicode:: U+025CA .. LOZENGE
+.. |lrcorner| unicode:: U+0231F .. BOTTOM RIGHT CORNER
+.. |Lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS
+.. |lvertneqq| unicode:: U+02268 U+0FE00 .. LESS-THAN BUT NOT EQUAL TO - with vertical stroke
+.. |maltese| unicode:: U+02720 .. MALTESE CROSS
+.. |mapsto| unicode:: U+021A6 .. RIGHTWARDS ARROW FROM BAR
+.. |measuredangle| unicode:: U+02221 .. MEASURED ANGLE
+.. |Mellintrf| unicode:: U+02133 .. SCRIPT CAPITAL M
+.. |MinusPlus| unicode:: U+02213 .. MINUS-OR-PLUS SIGN
+.. |mp| unicode:: U+02213 .. MINUS-OR-PLUS SIGN
+.. |multimap| unicode:: U+022B8 .. MULTIMAP
+.. |napprox| unicode:: U+02249 .. NOT ALMOST EQUAL TO
+.. |natural| unicode:: U+0266E .. MUSIC NATURAL SIGN
+.. |naturals| unicode:: U+02115 .. DOUBLE-STRUCK CAPITAL N
+.. |nearrow| unicode:: U+02197 .. NORTH EAST ARROW
+.. |NegativeMediumSpace| unicode:: U+0200B .. ZERO WIDTH SPACE
+.. |NegativeThickSpace| unicode:: U+0200B .. ZERO WIDTH SPACE
+.. |NegativeThinSpace| unicode:: U+0200B .. ZERO WIDTH SPACE
+.. |NegativeVeryThinSpace| unicode:: U+0200B .. ZERO WIDTH SPACE
+.. |NestedGreaterGreater| unicode:: U+0226B .. MUCH GREATER-THAN
+.. |NestedLessLess| unicode:: U+0226A .. MUCH LESS-THAN
+.. |nexists| unicode:: U+02204 .. THERE DOES NOT EXIST
+.. |ngeq| unicode:: U+02271 .. NEITHER GREATER-THAN NOR EQUAL TO
+.. |ngeqq| unicode:: U+02267 U+00338 .. GREATER-THAN OVER EQUAL TO with slash
+.. |ngeqslant| unicode:: U+02A7E U+00338 .. GREATER-THAN OR SLANTED EQUAL TO with slash
+.. |ngtr| unicode:: U+0226F .. NOT GREATER-THAN
+.. |nLeftarrow| unicode:: U+021CD .. LEFTWARDS DOUBLE ARROW WITH STROKE
+.. |nleftarrow| unicode:: U+0219A .. LEFTWARDS ARROW WITH STROKE
+.. |nLeftrightarrow| unicode:: U+021CE .. LEFT RIGHT DOUBLE ARROW WITH STROKE
+.. |nleftrightarrow| unicode:: U+021AE .. LEFT RIGHT ARROW WITH STROKE
+.. |nleq| unicode:: U+02270 .. NEITHER LESS-THAN NOR EQUAL TO
+.. |nleqq| unicode:: U+02266 U+00338 .. LESS-THAN OVER EQUAL TO with slash
+.. |nleqslant| unicode:: U+02A7D U+00338 .. LESS-THAN OR SLANTED EQUAL TO with slash
+.. |nless| unicode:: U+0226E .. NOT LESS-THAN
+.. |NonBreakingSpace| unicode:: U+000A0 .. NO-BREAK SPACE
+.. |NotCongruent| unicode:: U+02262 .. NOT IDENTICAL TO
+.. |NotDoubleVerticalBar| unicode:: U+02226 .. NOT PARALLEL TO
+.. |NotElement| unicode:: U+02209 .. NOT AN ELEMENT OF
+.. |NotEqual| unicode:: U+02260 .. NOT EQUAL TO
+.. |NotEqualTilde| unicode:: U+02242 U+00338 .. MINUS TILDE with slash
+.. |NotExists| unicode:: U+02204 .. THERE DOES NOT EXIST
+.. |NotGreater| unicode:: U+0226F .. NOT GREATER-THAN
+.. |NotGreaterEqual| unicode:: U+02271 .. NEITHER GREATER-THAN NOR EQUAL TO
+.. |NotGreaterFullEqual| unicode:: U+02266 U+00338 .. LESS-THAN OVER EQUAL TO with slash
+.. |NotGreaterGreater| unicode:: U+0226B U+00338 .. MUCH GREATER THAN with slash
+.. |NotGreaterLess| unicode:: U+02279 .. NEITHER GREATER-THAN NOR LESS-THAN
+.. |NotGreaterSlantEqual| unicode:: U+02A7E U+00338 .. GREATER-THAN OR SLANTED EQUAL TO with slash
+.. |NotGreaterTilde| unicode:: U+02275 .. NEITHER GREATER-THAN NOR EQUIVALENT TO
+.. |NotHumpDownHump| unicode:: U+0224E U+00338 .. GEOMETRICALLY EQUIVALENT TO with slash
+.. |NotLeftTriangle| unicode:: U+022EA .. NOT NORMAL SUBGROUP OF
+.. |NotLeftTriangleEqual| unicode:: U+022EC .. NOT NORMAL SUBGROUP OF OR EQUAL TO
+.. |NotLess| unicode:: U+0226E .. NOT LESS-THAN
+.. |NotLessEqual| unicode:: U+02270 .. NEITHER LESS-THAN NOR EQUAL TO
+.. |NotLessGreater| unicode:: U+02278 .. NEITHER LESS-THAN NOR GREATER-THAN
+.. |NotLessLess| unicode:: U+0226A U+00338 .. MUCH LESS THAN with slash
+.. |NotLessSlantEqual| unicode:: U+02A7D U+00338 .. LESS-THAN OR SLANTED EQUAL TO with slash
+.. |NotLessTilde| unicode:: U+02274 .. NEITHER LESS-THAN NOR EQUIVALENT TO
+.. |NotPrecedes| unicode:: U+02280 .. DOES NOT PRECEDE
+.. |NotPrecedesEqual| unicode:: U+02AAF U+00338 .. PRECEDES ABOVE SINGLE-LINE EQUALS SIGN with slash
+.. |NotPrecedesSlantEqual| unicode:: U+022E0 .. DOES NOT PRECEDE OR EQUAL
+.. |NotReverseElement| unicode:: U+0220C .. DOES NOT CONTAIN AS MEMBER
+.. |NotRightTriangle| unicode:: U+022EB .. DOES NOT CONTAIN AS NORMAL SUBGROUP
+.. |NotRightTriangleEqual| unicode:: U+022ED .. DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
+.. |NotSquareSubsetEqual| unicode:: U+022E2 .. NOT SQUARE IMAGE OF OR EQUAL TO
+.. |NotSquareSupersetEqual| unicode:: U+022E3 .. NOT SQUARE ORIGINAL OF OR EQUAL TO
+.. |NotSubset| unicode:: U+02282 U+020D2 .. SUBSET OF with vertical line
+.. |NotSubsetEqual| unicode:: U+02288 .. NEITHER A SUBSET OF NOR EQUAL TO
+.. |NotSucceeds| unicode:: U+02281 .. DOES NOT SUCCEED
+.. |NotSucceedsEqual| unicode:: U+02AB0 U+00338 .. SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN with slash
+.. |NotSucceedsSlantEqual| unicode:: U+022E1 .. DOES NOT SUCCEED OR EQUAL
+.. |NotSuperset| unicode:: U+02283 U+020D2 .. SUPERSET OF with vertical line
+.. |NotSupersetEqual| unicode:: U+02289 .. NEITHER A SUPERSET OF NOR EQUAL TO
+.. |NotTilde| unicode:: U+02241 .. NOT TILDE
+.. |NotTildeEqual| unicode:: U+02244 .. NOT ASYMPTOTICALLY EQUAL TO
+.. |NotTildeFullEqual| unicode:: U+02247 .. NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
+.. |NotTildeTilde| unicode:: U+02249 .. NOT ALMOST EQUAL TO
+.. |NotVerticalBar| unicode:: U+02224 .. DOES NOT DIVIDE
+.. |nparallel| unicode:: U+02226 .. NOT PARALLEL TO
+.. |nprec| unicode:: U+02280 .. DOES NOT PRECEDE
+.. |npreceq| unicode:: U+02AAF U+00338 .. PRECEDES ABOVE SINGLE-LINE EQUALS SIGN with slash
+.. |nRightarrow| unicode:: U+021CF .. RIGHTWARDS DOUBLE ARROW WITH STROKE
+.. |nrightarrow| unicode:: U+0219B .. RIGHTWARDS ARROW WITH STROKE
+.. |nshortmid| unicode:: U+02224 .. DOES NOT DIVIDE
+.. |nshortparallel| unicode:: U+02226 .. NOT PARALLEL TO
+.. |nsimeq| unicode:: U+02244 .. NOT ASYMPTOTICALLY EQUAL TO
+.. |nsubset| unicode:: U+02282 U+020D2 .. SUBSET OF with vertical line
+.. |nsubseteq| unicode:: U+02288 .. NEITHER A SUBSET OF NOR EQUAL TO
+.. |nsubseteqq| unicode:: U+02AC5 U+00338 .. SUBSET OF ABOVE EQUALS SIGN with slash
+.. |nsucc| unicode:: U+02281 .. DOES NOT SUCCEED
+.. |nsucceq| unicode:: U+02AB0 U+00338 .. SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN with slash
+.. |nsupset| unicode:: U+02283 U+020D2 .. SUPERSET OF with vertical line
+.. |nsupseteq| unicode:: U+02289 .. NEITHER A SUPERSET OF NOR EQUAL TO
+.. |nsupseteqq| unicode:: U+02AC6 U+00338 .. SUPERSET OF ABOVE EQUALS SIGN with slash
+.. |ntriangleleft| unicode:: U+022EA .. NOT NORMAL SUBGROUP OF
+.. |ntrianglelefteq| unicode:: U+022EC .. NOT NORMAL SUBGROUP OF OR EQUAL TO
+.. |ntriangleright| unicode:: U+022EB .. DOES NOT CONTAIN AS NORMAL SUBGROUP
+.. |ntrianglerighteq| unicode:: U+022ED .. DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
+.. |nwarrow| unicode:: U+02196 .. NORTH WEST ARROW
+.. |oint| unicode:: U+0222E .. CONTOUR INTEGRAL
+.. |OpenCurlyDoubleQuote| unicode:: U+0201C .. LEFT DOUBLE QUOTATION MARK
+.. |OpenCurlyQuote| unicode:: U+02018 .. LEFT SINGLE QUOTATION MARK
+.. |orderof| unicode:: U+02134 .. SCRIPT SMALL O
+.. |parallel| unicode:: U+02225 .. PARALLEL TO
+.. |PartialD| unicode:: U+02202 .. PARTIAL DIFFERENTIAL
+.. |pitchfork| unicode:: U+022D4 .. PITCHFORK
+.. |PlusMinus| unicode:: U+000B1 .. PLUS-MINUS SIGN
+.. |pm| unicode:: U+000B1 .. PLUS-MINUS SIGN
+.. |Poincareplane| unicode:: U+0210C .. BLACK-LETTER CAPITAL H
+.. |prec| unicode:: U+0227A .. PRECEDES
+.. |precapprox| unicode:: U+02AB7 .. PRECEDES ABOVE ALMOST EQUAL TO
+.. |preccurlyeq| unicode:: U+0227C .. PRECEDES OR EQUAL TO
+.. |Precedes| unicode:: U+0227A .. PRECEDES
+.. |PrecedesEqual| unicode:: U+02AAF .. PRECEDES ABOVE SINGLE-LINE EQUALS SIGN
+.. |PrecedesSlantEqual| unicode:: U+0227C .. PRECEDES OR EQUAL TO
+.. |PrecedesTilde| unicode:: U+0227E .. PRECEDES OR EQUIVALENT TO
+.. |preceq| unicode:: U+02AAF .. PRECEDES ABOVE SINGLE-LINE EQUALS SIGN
+.. |precnapprox| unicode:: U+02AB9 .. PRECEDES ABOVE NOT ALMOST EQUAL TO
+.. |precneqq| unicode:: U+02AB5 .. PRECEDES ABOVE NOT EQUAL TO
+.. |precnsim| unicode:: U+022E8 .. PRECEDES BUT NOT EQUIVALENT TO
+.. |precsim| unicode:: U+0227E .. PRECEDES OR EQUIVALENT TO
+.. |primes| unicode:: U+02119 .. DOUBLE-STRUCK CAPITAL P
+.. |Proportion| unicode:: U+02237 .. PROPORTION
+.. |Proportional| unicode:: U+0221D .. PROPORTIONAL TO
+.. |propto| unicode:: U+0221D .. PROPORTIONAL TO
+.. |quaternions| unicode:: U+0210D .. DOUBLE-STRUCK CAPITAL H
+.. |questeq| unicode:: U+0225F .. QUESTIONED EQUAL TO
+.. |rangle| unicode:: U+0232A .. RIGHT-POINTING ANGLE BRACKET
+.. |rationals| unicode:: U+0211A .. DOUBLE-STRUCK CAPITAL Q
+.. |rbrace| unicode:: U+0007D .. RIGHT CURLY BRACKET
+.. |rbrack| unicode:: U+0005D .. RIGHT SQUARE BRACKET
+.. |Re| unicode:: U+0211C .. BLACK-LETTER CAPITAL R
+.. |realine| unicode:: U+0211B .. SCRIPT CAPITAL R
+.. |realpart| unicode:: U+0211C .. BLACK-LETTER CAPITAL R
+.. |reals| unicode:: U+0211D .. DOUBLE-STRUCK CAPITAL R
+.. |ReverseElement| unicode:: U+0220B .. CONTAINS AS MEMBER
+.. |ReverseEquilibrium| unicode:: U+021CB .. LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON
+.. |ReverseUpEquilibrium| unicode:: U+0296F .. DOWNWARDS HARPOON WITH BARB LEFT BESIDE UPWARDS HARPOON WITH BARB RIGHT
+.. |RightAngleBracket| unicode:: U+0232A .. RIGHT-POINTING ANGLE BRACKET
+.. |RightArrow| unicode:: U+02192 .. RIGHTWARDS ARROW
+.. |Rightarrow| unicode:: U+021D2 .. RIGHTWARDS DOUBLE ARROW
+.. |rightarrow| unicode:: U+02192 .. RIGHTWARDS ARROW
+.. |RightArrowBar| unicode:: U+021E5 .. RIGHTWARDS ARROW TO BAR
+.. |RightArrowLeftArrow| unicode:: U+021C4 .. RIGHTWARDS ARROW OVER LEFTWARDS ARROW
+.. |rightarrowtail| unicode:: U+021A3 .. RIGHTWARDS ARROW WITH TAIL
+.. |RightCeiling| unicode:: U+02309 .. RIGHT CEILING
+.. |RightDoubleBracket| unicode:: U+0301B .. RIGHT WHITE SQUARE BRACKET
+.. |RightDownVector| unicode:: U+021C2 .. DOWNWARDS HARPOON WITH BARB RIGHTWARDS
+.. |RightFloor| unicode:: U+0230B .. RIGHT FLOOR
+.. |rightharpoondown| unicode:: U+021C1 .. RIGHTWARDS HARPOON WITH BARB DOWNWARDS
+.. |rightharpoonup| unicode:: U+021C0 .. RIGHTWARDS HARPOON WITH BARB UPWARDS
+.. |rightleftarrows| unicode:: U+021C4 .. RIGHTWARDS ARROW OVER LEFTWARDS ARROW
+.. |rightleftharpoons| unicode:: U+021CC .. RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON
+.. |rightrightarrows| unicode:: U+021C9 .. RIGHTWARDS PAIRED ARROWS
+.. |rightsquigarrow| unicode:: U+0219D .. RIGHTWARDS WAVE ARROW
+.. |RightTee| unicode:: U+022A2 .. RIGHT TACK
+.. |RightTeeArrow| unicode:: U+021A6 .. RIGHTWARDS ARROW FROM BAR
+.. |rightthreetimes| unicode:: U+022CC .. RIGHT SEMIDIRECT PRODUCT
+.. |RightTriangle| unicode:: U+022B3 .. CONTAINS AS NORMAL SUBGROUP
+.. |RightTriangleEqual| unicode:: U+022B5 .. CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
+.. |RightUpVector| unicode:: U+021BE .. UPWARDS HARPOON WITH BARB RIGHTWARDS
+.. |RightVector| unicode:: U+021C0 .. RIGHTWARDS HARPOON WITH BARB UPWARDS
+.. |risingdotseq| unicode:: U+02253 .. IMAGE OF OR APPROXIMATELY EQUAL TO
+.. |rmoustache| unicode:: U+023B1 .. UPPER RIGHT OR LOWER LEFT CURLY BRACKET SECTION
+.. |Rrightarrow| unicode:: U+021DB .. RIGHTWARDS TRIPLE ARROW
+.. |Rsh| unicode:: U+021B1 .. UPWARDS ARROW WITH TIP RIGHTWARDS
+.. |searrow| unicode:: U+02198 .. SOUTH EAST ARROW
+.. |setminus| unicode:: U+02216 .. SET MINUS
+.. |ShortDownArrow| unicode:: U+02193 .. DOWNWARDS ARROW
+.. |ShortLeftArrow| unicode:: U+02190 .. LEFTWARDS ARROW
+.. |shortmid| unicode:: U+02223 .. DIVIDES
+.. |shortparallel| unicode:: U+02225 .. PARALLEL TO
+.. |ShortRightArrow| unicode:: U+02192 .. RIGHTWARDS ARROW
+.. |ShortUpArrow| unicode:: U+02191 .. UPWARDS ARROW
+.. |simeq| unicode:: U+02243 .. ASYMPTOTICALLY EQUAL TO
+.. |SmallCircle| unicode:: U+02218 .. RING OPERATOR
+.. |smallsetminus| unicode:: U+02216 .. SET MINUS
+.. |spadesuit| unicode:: U+02660 .. BLACK SPADE SUIT
+.. |Sqrt| unicode:: U+0221A .. SQUARE ROOT
+.. |sqsubset| unicode:: U+0228F .. SQUARE IMAGE OF
+.. |sqsubseteq| unicode:: U+02291 .. SQUARE IMAGE OF OR EQUAL TO
+.. |sqsupset| unicode:: U+02290 .. SQUARE ORIGINAL OF
+.. |sqsupseteq| unicode:: U+02292 .. SQUARE ORIGINAL OF OR EQUAL TO
+.. |Square| unicode:: U+025A1 .. WHITE SQUARE
+.. |SquareIntersection| unicode:: U+02293 .. SQUARE CAP
+.. |SquareSubset| unicode:: U+0228F .. SQUARE IMAGE OF
+.. |SquareSubsetEqual| unicode:: U+02291 .. SQUARE IMAGE OF OR EQUAL TO
+.. |SquareSuperset| unicode:: U+02290 .. SQUARE ORIGINAL OF
+.. |SquareSupersetEqual| unicode:: U+02292 .. SQUARE ORIGINAL OF OR EQUAL TO
+.. |SquareUnion| unicode:: U+02294 .. SQUARE CUP
+.. |Star| unicode:: U+022C6 .. STAR OPERATOR
+.. |straightepsilon| unicode:: U+003F5 .. GREEK LUNATE EPSILON SYMBOL
+.. |straightphi| unicode:: U+003D5 .. GREEK PHI SYMBOL
+.. |Subset| unicode:: U+022D0 .. DOUBLE SUBSET
+.. |subset| unicode:: U+02282 .. SUBSET OF
+.. |subseteq| unicode:: U+02286 .. SUBSET OF OR EQUAL TO
+.. |subseteqq| unicode:: U+02AC5 .. SUBSET OF ABOVE EQUALS SIGN
+.. |SubsetEqual| unicode:: U+02286 .. SUBSET OF OR EQUAL TO
+.. |subsetneq| unicode:: U+0228A .. SUBSET OF WITH NOT EQUAL TO
+.. |subsetneqq| unicode:: U+02ACB .. SUBSET OF ABOVE NOT EQUAL TO
+.. |succ| unicode:: U+0227B .. SUCCEEDS
+.. |succapprox| unicode:: U+02AB8 .. SUCCEEDS ABOVE ALMOST EQUAL TO
+.. |succcurlyeq| unicode:: U+0227D .. SUCCEEDS OR EQUAL TO
+.. |Succeeds| unicode:: U+0227B .. SUCCEEDS
+.. |SucceedsEqual| unicode:: U+02AB0 .. SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN
+.. |SucceedsSlantEqual| unicode:: U+0227D .. SUCCEEDS OR EQUAL TO
+.. |SucceedsTilde| unicode:: U+0227F .. SUCCEEDS OR EQUIVALENT TO
+.. |succeq| unicode:: U+02AB0 .. SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN
+.. |succnapprox| unicode:: U+02ABA .. SUCCEEDS ABOVE NOT ALMOST EQUAL TO
+.. |succneqq| unicode:: U+02AB6 .. SUCCEEDS ABOVE NOT EQUAL TO
+.. |succnsim| unicode:: U+022E9 .. SUCCEEDS BUT NOT EQUIVALENT TO
+.. |succsim| unicode:: U+0227F .. SUCCEEDS OR EQUIVALENT TO
+.. |SuchThat| unicode:: U+0220B .. CONTAINS AS MEMBER
+.. |Sum| unicode:: U+02211 .. N-ARY SUMMATION
+.. |Superset| unicode:: U+02283 .. SUPERSET OF
+.. |SupersetEqual| unicode:: U+02287 .. SUPERSET OF OR EQUAL TO
+.. |Supset| unicode:: U+022D1 .. DOUBLE SUPERSET
+.. |supset| unicode:: U+02283 .. SUPERSET OF
+.. |supseteq| unicode:: U+02287 .. SUPERSET OF OR EQUAL TO
+.. |supseteqq| unicode:: U+02AC6 .. SUPERSET OF ABOVE EQUALS SIGN
+.. |supsetneq| unicode:: U+0228B .. SUPERSET OF WITH NOT EQUAL TO
+.. |supsetneqq| unicode:: U+02ACC .. SUPERSET OF ABOVE NOT EQUAL TO
+.. |swarrow| unicode:: U+02199 .. SOUTH WEST ARROW
+.. |Therefore| unicode:: U+02234 .. THEREFORE
+.. |therefore| unicode:: U+02234 .. THEREFORE
+.. |thickapprox| unicode:: U+02248 .. ALMOST EQUAL TO
+.. |thicksim| unicode:: U+0223C .. TILDE OPERATOR
+.. |ThinSpace| unicode:: U+02009 .. THIN SPACE
+.. |Tilde| unicode:: U+0223C .. TILDE OPERATOR
+.. |TildeEqual| unicode:: U+02243 .. ASYMPTOTICALLY EQUAL TO
+.. |TildeFullEqual| unicode:: U+02245 .. APPROXIMATELY EQUAL TO
+.. |TildeTilde| unicode:: U+02248 .. ALMOST EQUAL TO
+.. |toea| unicode:: U+02928 .. NORTH EAST ARROW AND SOUTH EAST ARROW
+.. |tosa| unicode:: U+02929 .. SOUTH EAST ARROW AND SOUTH WEST ARROW
+.. |triangle| unicode:: U+025B5 .. WHITE UP-POINTING SMALL TRIANGLE
+.. |triangledown| unicode:: U+025BF .. WHITE DOWN-POINTING SMALL TRIANGLE
+.. |triangleleft| unicode:: U+025C3 .. WHITE LEFT-POINTING SMALL TRIANGLE
+.. |trianglelefteq| unicode:: U+022B4 .. NORMAL SUBGROUP OF OR EQUAL TO
+.. |triangleq| unicode:: U+0225C .. DELTA EQUAL TO
+.. |triangleright| unicode:: U+025B9 .. WHITE RIGHT-POINTING SMALL TRIANGLE
+.. |trianglerighteq| unicode:: U+022B5 .. CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
+.. |TripleDot| unicode:: U+020DB .. COMBINING THREE DOTS ABOVE
+.. |twoheadleftarrow| unicode:: U+0219E .. LEFTWARDS TWO HEADED ARROW
+.. |twoheadrightarrow| unicode:: U+021A0 .. RIGHTWARDS TWO HEADED ARROW
+.. |ulcorner| unicode:: U+0231C .. TOP LEFT CORNER
+.. |Union| unicode:: U+022C3 .. N-ARY UNION
+.. |UnionPlus| unicode:: U+0228E .. MULTISET UNION
+.. |UpArrow| unicode:: U+02191 .. UPWARDS ARROW
+.. |Uparrow| unicode:: U+021D1 .. UPWARDS DOUBLE ARROW
+.. |uparrow| unicode:: U+02191 .. UPWARDS ARROW
+.. |UpArrowDownArrow| unicode:: U+021C5 .. UPWARDS ARROW LEFTWARDS OF DOWNWARDS ARROW
+.. |UpDownArrow| unicode:: U+02195 .. UP DOWN ARROW
+.. |Updownarrow| unicode:: U+021D5 .. UP DOWN DOUBLE ARROW
+.. |updownarrow| unicode:: U+02195 .. UP DOWN ARROW
+.. |UpEquilibrium| unicode:: U+0296E .. UPWARDS HARPOON WITH BARB LEFT BESIDE DOWNWARDS HARPOON WITH BARB RIGHT
+.. |upharpoonleft| unicode:: U+021BF .. UPWARDS HARPOON WITH BARB LEFTWARDS
+.. |upharpoonright| unicode:: U+021BE .. UPWARDS HARPOON WITH BARB RIGHTWARDS
+.. |UpperLeftArrow| unicode:: U+02196 .. NORTH WEST ARROW
+.. |UpperRightArrow| unicode:: U+02197 .. NORTH EAST ARROW
+.. |upsilon| unicode:: U+003C5 .. GREEK SMALL LETTER UPSILON
+.. |UpTee| unicode:: U+022A5 .. UP TACK
+.. |UpTeeArrow| unicode:: U+021A5 .. UPWARDS ARROW FROM BAR
+.. |upuparrows| unicode:: U+021C8 .. UPWARDS PAIRED ARROWS
+.. |urcorner| unicode:: U+0231D .. TOP RIGHT CORNER
+.. |varepsilon| unicode:: U+003B5 .. GREEK SMALL LETTER EPSILON
+.. |varkappa| unicode:: U+003F0 .. GREEK KAPPA SYMBOL
+.. |varnothing| unicode:: U+02205 .. EMPTY SET
+.. |varphi| unicode:: U+003C6 .. GREEK SMALL LETTER PHI
+.. |varpi| unicode:: U+003D6 .. GREEK PI SYMBOL
+.. |varpropto| unicode:: U+0221D .. PROPORTIONAL TO
+.. |varrho| unicode:: U+003F1 .. GREEK RHO SYMBOL
+.. |varsigma| unicode:: U+003C2 .. GREEK SMALL LETTER FINAL SIGMA
+.. |varsubsetneq| unicode:: U+0228A U+0FE00 .. SUBSET OF WITH NOT EQUAL TO - variant with stroke through bottom members
+.. |varsubsetneqq| unicode:: U+02ACB U+0FE00 .. SUBSET OF ABOVE NOT EQUAL TO - variant with stroke through bottom members
+.. |varsupsetneq| unicode:: U+0228B U+0FE00 .. SUPERSET OF WITH NOT EQUAL TO - variant with stroke through bottom members
+.. |varsupsetneqq| unicode:: U+02ACC U+0FE00 .. SUPERSET OF ABOVE NOT EQUAL TO - variant with stroke through bottom members
+.. |vartheta| unicode:: U+003D1 .. GREEK THETA SYMBOL
+.. |vartriangleleft| unicode:: U+022B2 .. NORMAL SUBGROUP OF
+.. |vartriangleright| unicode:: U+022B3 .. CONTAINS AS NORMAL SUBGROUP
+.. |Vee| unicode:: U+022C1 .. N-ARY LOGICAL OR
+.. |vee| unicode:: U+02228 .. LOGICAL OR
+.. |Vert| unicode:: U+02016 .. DOUBLE VERTICAL LINE
+.. |vert| unicode:: U+0007C .. VERTICAL LINE
+.. |VerticalBar| unicode:: U+02223 .. DIVIDES
+.. |VerticalTilde| unicode:: U+02240 .. WREATH PRODUCT
+.. |VeryThinSpace| unicode:: U+0200A .. HAIR SPACE
+.. |Wedge| unicode:: U+022C0 .. N-ARY LOGICAL AND
+.. |wedge| unicode:: U+02227 .. LOGICAL AND
+.. |wp| unicode:: U+02118 .. SCRIPT CAPITAL P
+.. |wr| unicode:: U+02240 .. WREATH PRODUCT
+.. |zeetrf| unicode:: U+02128 .. BLACK-LETTER CAPITAL Z
diff --git a/docutils/parsers/rst/include/mmlextra-wide.txt b/docutils/parsers/rst/include/mmlextra-wide.txt
new file mode 100644
index 000000000..0177ccc09
--- /dev/null
+++ b/docutils/parsers/rst/include/mmlextra-wide.txt
@@ -0,0 +1,113 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |af| unicode:: U+02061 .. FUNCTION APPLICATION
+.. |aopf| unicode:: U+1D552 .. MATHEMATICAL DOUBLE-STRUCK SMALL A
+.. |asympeq| unicode:: U+0224D .. EQUIVALENT TO
+.. |bopf| unicode:: U+1D553 .. MATHEMATICAL DOUBLE-STRUCK SMALL B
+.. |copf| unicode:: U+1D554 .. MATHEMATICAL DOUBLE-STRUCK SMALL C
+.. |Cross| unicode:: U+02A2F .. VECTOR OR CROSS PRODUCT
+.. |DD| unicode:: U+02145 .. DOUBLE-STRUCK ITALIC CAPITAL D
+.. |dd| unicode:: U+02146 .. DOUBLE-STRUCK ITALIC SMALL D
+.. |dopf| unicode:: U+1D555 .. MATHEMATICAL DOUBLE-STRUCK SMALL D
+.. |DownArrowBar| unicode:: U+02913 .. DOWNWARDS ARROW TO BAR
+.. |DownBreve| unicode:: U+00311 .. COMBINING INVERTED BREVE
+.. |DownLeftRightVector| unicode:: U+02950 .. LEFT BARB DOWN RIGHT BARB DOWN HARPOON
+.. |DownLeftTeeVector| unicode:: U+0295E .. LEFTWARDS HARPOON WITH BARB DOWN FROM BAR
+.. |DownLeftVectorBar| unicode:: U+02956 .. LEFTWARDS HARPOON WITH BARB DOWN TO BAR
+.. |DownRightTeeVector| unicode:: U+0295F .. RIGHTWARDS HARPOON WITH BARB DOWN FROM BAR
+.. |DownRightVectorBar| unicode:: U+02957 .. RIGHTWARDS HARPOON WITH BARB DOWN TO BAR
+.. |ee| unicode:: U+02147 .. DOUBLE-STRUCK ITALIC SMALL E
+.. |EmptySmallSquare| unicode:: U+025FB .. WHITE MEDIUM SQUARE
+.. |EmptyVerySmallSquare| unicode:: U+025AB .. WHITE SMALL SQUARE
+.. |eopf| unicode:: U+1D556 .. MATHEMATICAL DOUBLE-STRUCK SMALL E
+.. |Equal| unicode:: U+02A75 .. TWO CONSECUTIVE EQUALS SIGNS
+.. |FilledSmallSquare| unicode:: U+025FC .. BLACK MEDIUM SQUARE
+.. |FilledVerySmallSquare| unicode:: U+025AA .. BLACK SMALL SQUARE
+.. |fopf| unicode:: U+1D557 .. MATHEMATICAL DOUBLE-STRUCK SMALL F
+.. |gopf| unicode:: U+1D558 .. MATHEMATICAL DOUBLE-STRUCK SMALL G
+.. |GreaterGreater| unicode:: U+02AA2 .. DOUBLE NESTED GREATER-THAN
+.. |Hat| unicode:: U+0005E .. CIRCUMFLEX ACCENT
+.. |hopf| unicode:: U+1D559 .. MATHEMATICAL DOUBLE-STRUCK SMALL H
+.. |HorizontalLine| unicode:: U+02500 .. BOX DRAWINGS LIGHT HORIZONTAL
+.. |ic| unicode:: U+02063 .. INVISIBLE SEPARATOR
+.. |ii| unicode:: U+02148 .. DOUBLE-STRUCK ITALIC SMALL I
+.. |iopf| unicode:: U+1D55A .. MATHEMATICAL DOUBLE-STRUCK SMALL I
+.. |it| unicode:: U+02062 .. INVISIBLE TIMES
+.. |jopf| unicode:: U+1D55B .. MATHEMATICAL DOUBLE-STRUCK SMALL J
+.. |kopf| unicode:: U+1D55C .. MATHEMATICAL DOUBLE-STRUCK SMALL K
+.. |larrb| unicode:: U+021E4 .. LEFTWARDS ARROW TO BAR
+.. |LeftDownTeeVector| unicode:: U+02961 .. DOWNWARDS HARPOON WITH BARB LEFT FROM BAR
+.. |LeftDownVectorBar| unicode:: U+02959 .. DOWNWARDS HARPOON WITH BARB LEFT TO BAR
+.. |LeftRightVector| unicode:: U+0294E .. LEFT BARB UP RIGHT BARB UP HARPOON
+.. |LeftTeeVector| unicode:: U+0295A .. LEFTWARDS HARPOON WITH BARB UP FROM BAR
+.. |LeftTriangleBar| unicode:: U+029CF .. LEFT TRIANGLE BESIDE VERTICAL BAR
+.. |LeftUpDownVector| unicode:: U+02951 .. UP BARB LEFT DOWN BARB LEFT HARPOON
+.. |LeftUpTeeVector| unicode:: U+02960 .. UPWARDS HARPOON WITH BARB LEFT FROM BAR
+.. |LeftUpVectorBar| unicode:: U+02958 .. UPWARDS HARPOON WITH BARB LEFT TO BAR
+.. |LeftVectorBar| unicode:: U+02952 .. LEFTWARDS HARPOON WITH BARB UP TO BAR
+.. |LessLess| unicode:: U+02AA1 .. DOUBLE NESTED LESS-THAN
+.. |lopf| unicode:: U+1D55D .. MATHEMATICAL DOUBLE-STRUCK SMALL L
+.. |mapstodown| unicode:: U+021A7 .. DOWNWARDS ARROW FROM BAR
+.. |mapstoleft| unicode:: U+021A4 .. LEFTWARDS ARROW FROM BAR
+.. |mapstoup| unicode:: U+021A5 .. UPWARDS ARROW FROM BAR
+.. |MediumSpace| unicode:: U+0205F .. MEDIUM MATHEMATICAL SPACE
+.. |mopf| unicode:: U+1D55E .. MATHEMATICAL DOUBLE-STRUCK SMALL M
+.. |nbump| unicode:: U+0224E U+00338 .. GEOMETRICALLY EQUIVALENT TO with slash
+.. |nbumpe| unicode:: U+0224F U+00338 .. DIFFERENCE BETWEEN with slash
+.. |nesim| unicode:: U+02242 U+00338 .. MINUS TILDE with slash
+.. |NewLine| unicode:: U+0000A .. LINE FEED (LF)
+.. |NoBreak| unicode:: U+02060 .. WORD JOINER
+.. |nopf| unicode:: U+1D55F .. MATHEMATICAL DOUBLE-STRUCK SMALL N
+.. |NotCupCap| unicode:: U+0226D .. NOT EQUIVALENT TO
+.. |NotHumpEqual| unicode:: U+0224F U+00338 .. DIFFERENCE BETWEEN with slash
+.. |NotLeftTriangleBar| unicode:: U+029CF U+00338 .. LEFT TRIANGLE BESIDE VERTICAL BAR with slash
+.. |NotNestedGreaterGreater| unicode:: U+02AA2 U+00338 .. DOUBLE NESTED GREATER-THAN with slash
+.. |NotNestedLessLess| unicode:: U+02AA1 U+00338 .. DOUBLE NESTED LESS-THAN with slash
+.. |NotRightTriangleBar| unicode:: U+029D0 U+00338 .. VERTICAL BAR BESIDE RIGHT TRIANGLE with slash
+.. |NotSquareSubset| unicode:: U+0228F U+00338 .. SQUARE IMAGE OF with slash
+.. |NotSquareSuperset| unicode:: U+02290 U+00338 .. SQUARE ORIGINAL OF with slash
+.. |NotSucceedsTilde| unicode:: U+0227F U+00338 .. SUCCEEDS OR EQUIVALENT TO with slash
+.. |oopf| unicode:: U+1D560 .. MATHEMATICAL DOUBLE-STRUCK SMALL O
+.. |OverBar| unicode:: U+000AF .. MACRON
+.. |OverBrace| unicode:: U+0FE37 .. PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET
+.. |OverBracket| unicode:: U+023B4 .. TOP SQUARE BRACKET
+.. |OverParenthesis| unicode:: U+0FE35 .. PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
+.. |planckh| unicode:: U+0210E .. PLANCK CONSTANT
+.. |popf| unicode:: U+1D561 .. MATHEMATICAL DOUBLE-STRUCK SMALL P
+.. |Product| unicode:: U+0220F .. N-ARY PRODUCT
+.. |qopf| unicode:: U+1D562 .. MATHEMATICAL DOUBLE-STRUCK SMALL Q
+.. |rarrb| unicode:: U+021E5 .. RIGHTWARDS ARROW TO BAR
+.. |RightDownTeeVector| unicode:: U+0295D .. DOWNWARDS HARPOON WITH BARB RIGHT FROM BAR
+.. |RightDownVectorBar| unicode:: U+02955 .. DOWNWARDS HARPOON WITH BARB RIGHT TO BAR
+.. |RightTeeVector| unicode:: U+0295B .. RIGHTWARDS HARPOON WITH BARB UP FROM BAR
+.. |RightTriangleBar| unicode:: U+029D0 .. VERTICAL BAR BESIDE RIGHT TRIANGLE
+.. |RightUpDownVector| unicode:: U+0294F .. UP BARB RIGHT DOWN BARB RIGHT HARPOON
+.. |RightUpTeeVector| unicode:: U+0295C .. UPWARDS HARPOON WITH BARB RIGHT FROM BAR
+.. |RightUpVectorBar| unicode:: U+02954 .. UPWARDS HARPOON WITH BARB RIGHT TO BAR
+.. |RightVectorBar| unicode:: U+02953 .. RIGHTWARDS HARPOON WITH BARB UP TO BAR
+.. |ropf| unicode:: U+1D563 .. MATHEMATICAL DOUBLE-STRUCK SMALL R
+.. |RoundImplies| unicode:: U+02970 .. RIGHT DOUBLE ARROW WITH ROUNDED HEAD
+.. |RuleDelayed| unicode:: U+029F4 .. RULE-DELAYED
+.. |sopf| unicode:: U+1D564 .. MATHEMATICAL DOUBLE-STRUCK SMALL S
+.. |Tab| unicode:: U+00009 .. CHARACTER TABULATION
+.. |ThickSpace| unicode:: U+02009 U+0200A U+0200A .. space of width 5/18 em
+.. |topf| unicode:: U+1D565 .. MATHEMATICAL DOUBLE-STRUCK SMALL T
+.. |UnderBar| unicode:: U+00332 .. COMBINING LOW LINE
+.. |UnderBrace| unicode:: U+0FE38 .. PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET
+.. |UnderBracket| unicode:: U+023B5 .. BOTTOM SQUARE BRACKET
+.. |UnderParenthesis| unicode:: U+0FE36 .. PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS
+.. |uopf| unicode:: U+1D566 .. MATHEMATICAL DOUBLE-STRUCK SMALL U
+.. |UpArrowBar| unicode:: U+02912 .. UPWARDS ARROW TO BAR
+.. |Upsilon| unicode:: U+003A5 .. GREEK CAPITAL LETTER UPSILON
+.. |VerticalLine| unicode:: U+0007C .. VERTICAL LINE
+.. |VerticalSeparator| unicode:: U+02758 .. LIGHT VERTICAL BAR
+.. |vopf| unicode:: U+1D567 .. MATHEMATICAL DOUBLE-STRUCK SMALL V
+.. |wopf| unicode:: U+1D568 .. MATHEMATICAL DOUBLE-STRUCK SMALL W
+.. |xopf| unicode:: U+1D569 .. MATHEMATICAL DOUBLE-STRUCK SMALL X
+.. |yopf| unicode:: U+1D56A .. MATHEMATICAL DOUBLE-STRUCK SMALL Y
+.. |ZeroWidthSpace| unicode:: U+0200B .. ZERO WIDTH SPACE
+.. |zopf| unicode:: U+1D56B .. MATHEMATICAL DOUBLE-STRUCK SMALL Z
diff --git a/docutils/parsers/rst/include/mmlextra.txt b/docutils/parsers/rst/include/mmlextra.txt
new file mode 100644
index 000000000..790a9775a
--- /dev/null
+++ b/docutils/parsers/rst/include/mmlextra.txt
@@ -0,0 +1,87 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |af| unicode:: U+02061 .. FUNCTION APPLICATION
+.. |asympeq| unicode:: U+0224D .. EQUIVALENT TO
+.. |Cross| unicode:: U+02A2F .. VECTOR OR CROSS PRODUCT
+.. |DD| unicode:: U+02145 .. DOUBLE-STRUCK ITALIC CAPITAL D
+.. |dd| unicode:: U+02146 .. DOUBLE-STRUCK ITALIC SMALL D
+.. |DownArrowBar| unicode:: U+02913 .. DOWNWARDS ARROW TO BAR
+.. |DownBreve| unicode:: U+00311 .. COMBINING INVERTED BREVE
+.. |DownLeftRightVector| unicode:: U+02950 .. LEFT BARB DOWN RIGHT BARB DOWN HARPOON
+.. |DownLeftTeeVector| unicode:: U+0295E .. LEFTWARDS HARPOON WITH BARB DOWN FROM BAR
+.. |DownLeftVectorBar| unicode:: U+02956 .. LEFTWARDS HARPOON WITH BARB DOWN TO BAR
+.. |DownRightTeeVector| unicode:: U+0295F .. RIGHTWARDS HARPOON WITH BARB DOWN FROM BAR
+.. |DownRightVectorBar| unicode:: U+02957 .. RIGHTWARDS HARPOON WITH BARB DOWN TO BAR
+.. |ee| unicode:: U+02147 .. DOUBLE-STRUCK ITALIC SMALL E
+.. |EmptySmallSquare| unicode:: U+025FB .. WHITE MEDIUM SQUARE
+.. |EmptyVerySmallSquare| unicode:: U+025AB .. WHITE SMALL SQUARE
+.. |Equal| unicode:: U+02A75 .. TWO CONSECUTIVE EQUALS SIGNS
+.. |FilledSmallSquare| unicode:: U+025FC .. BLACK MEDIUM SQUARE
+.. |FilledVerySmallSquare| unicode:: U+025AA .. BLACK SMALL SQUARE
+.. |GreaterGreater| unicode:: U+02AA2 .. DOUBLE NESTED GREATER-THAN
+.. |Hat| unicode:: U+0005E .. CIRCUMFLEX ACCENT
+.. |HorizontalLine| unicode:: U+02500 .. BOX DRAWINGS LIGHT HORIZONTAL
+.. |ic| unicode:: U+02063 .. INVISIBLE SEPARATOR
+.. |ii| unicode:: U+02148 .. DOUBLE-STRUCK ITALIC SMALL I
+.. |it| unicode:: U+02062 .. INVISIBLE TIMES
+.. |larrb| unicode:: U+021E4 .. LEFTWARDS ARROW TO BAR
+.. |LeftDownTeeVector| unicode:: U+02961 .. DOWNWARDS HARPOON WITH BARB LEFT FROM BAR
+.. |LeftDownVectorBar| unicode:: U+02959 .. DOWNWARDS HARPOON WITH BARB LEFT TO BAR
+.. |LeftRightVector| unicode:: U+0294E .. LEFT BARB UP RIGHT BARB UP HARPOON
+.. |LeftTeeVector| unicode:: U+0295A .. LEFTWARDS HARPOON WITH BARB UP FROM BAR
+.. |LeftTriangleBar| unicode:: U+029CF .. LEFT TRIANGLE BESIDE VERTICAL BAR
+.. |LeftUpDownVector| unicode:: U+02951 .. UP BARB LEFT DOWN BARB LEFT HARPOON
+.. |LeftUpTeeVector| unicode:: U+02960 .. UPWARDS HARPOON WITH BARB LEFT FROM BAR
+.. |LeftUpVectorBar| unicode:: U+02958 .. UPWARDS HARPOON WITH BARB LEFT TO BAR
+.. |LeftVectorBar| unicode:: U+02952 .. LEFTWARDS HARPOON WITH BARB UP TO BAR
+.. |LessLess| unicode:: U+02AA1 .. DOUBLE NESTED LESS-THAN
+.. |mapstodown| unicode:: U+021A7 .. DOWNWARDS ARROW FROM BAR
+.. |mapstoleft| unicode:: U+021A4 .. LEFTWARDS ARROW FROM BAR
+.. |mapstoup| unicode:: U+021A5 .. UPWARDS ARROW FROM BAR
+.. |MediumSpace| unicode:: U+0205F .. MEDIUM MATHEMATICAL SPACE
+.. |nbump| unicode:: U+0224E U+00338 .. GEOMETRICALLY EQUIVALENT TO with slash
+.. |nbumpe| unicode:: U+0224F U+00338 .. DIFFERENCE BETWEEN with slash
+.. |nesim| unicode:: U+02242 U+00338 .. MINUS TILDE with slash
+.. |NewLine| unicode:: U+0000A .. LINE FEED (LF)
+.. |NoBreak| unicode:: U+02060 .. WORD JOINER
+.. |NotCupCap| unicode:: U+0226D .. NOT EQUIVALENT TO
+.. |NotHumpEqual| unicode:: U+0224F U+00338 .. DIFFERENCE BETWEEN with slash
+.. |NotLeftTriangleBar| unicode:: U+029CF U+00338 .. LEFT TRIANGLE BESIDE VERTICAL BAR with slash
+.. |NotNestedGreaterGreater| unicode:: U+02AA2 U+00338 .. DOUBLE NESTED GREATER-THAN with slash
+.. |NotNestedLessLess| unicode:: U+02AA1 U+00338 .. DOUBLE NESTED LESS-THAN with slash
+.. |NotRightTriangleBar| unicode:: U+029D0 U+00338 .. VERTICAL BAR BESIDE RIGHT TRIANGLE with slash
+.. |NotSquareSubset| unicode:: U+0228F U+00338 .. SQUARE IMAGE OF with slash
+.. |NotSquareSuperset| unicode:: U+02290 U+00338 .. SQUARE ORIGINAL OF with slash
+.. |NotSucceedsTilde| unicode:: U+0227F U+00338 .. SUCCEEDS OR EQUIVALENT TO with slash
+.. |OverBar| unicode:: U+000AF .. MACRON
+.. |OverBrace| unicode:: U+0FE37 .. PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET
+.. |OverBracket| unicode:: U+023B4 .. TOP SQUARE BRACKET
+.. |OverParenthesis| unicode:: U+0FE35 .. PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
+.. |planckh| unicode:: U+0210E .. PLANCK CONSTANT
+.. |Product| unicode:: U+0220F .. N-ARY PRODUCT
+.. |rarrb| unicode:: U+021E5 .. RIGHTWARDS ARROW TO BAR
+.. |RightDownTeeVector| unicode:: U+0295D .. DOWNWARDS HARPOON WITH BARB RIGHT FROM BAR
+.. |RightDownVectorBar| unicode:: U+02955 .. DOWNWARDS HARPOON WITH BARB RIGHT TO BAR
+.. |RightTeeVector| unicode:: U+0295B .. RIGHTWARDS HARPOON WITH BARB UP FROM BAR
+.. |RightTriangleBar| unicode:: U+029D0 .. VERTICAL BAR BESIDE RIGHT TRIANGLE
+.. |RightUpDownVector| unicode:: U+0294F .. UP BARB RIGHT DOWN BARB RIGHT HARPOON
+.. |RightUpTeeVector| unicode:: U+0295C .. UPWARDS HARPOON WITH BARB RIGHT FROM BAR
+.. |RightUpVectorBar| unicode:: U+02954 .. UPWARDS HARPOON WITH BARB RIGHT TO BAR
+.. |RightVectorBar| unicode:: U+02953 .. RIGHTWARDS HARPOON WITH BARB UP TO BAR
+.. |RoundImplies| unicode:: U+02970 .. RIGHT DOUBLE ARROW WITH ROUNDED HEAD
+.. |RuleDelayed| unicode:: U+029F4 .. RULE-DELAYED
+.. |Tab| unicode:: U+00009 .. CHARACTER TABULATION
+.. |ThickSpace| unicode:: U+02009 U+0200A U+0200A .. space of width 5/18 em
+.. |UnderBar| unicode:: U+00332 .. COMBINING LOW LINE
+.. |UnderBrace| unicode:: U+0FE38 .. PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET
+.. |UnderBracket| unicode:: U+023B5 .. BOTTOM SQUARE BRACKET
+.. |UnderParenthesis| unicode:: U+0FE36 .. PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS
+.. |UpArrowBar| unicode:: U+02912 .. UPWARDS ARROW TO BAR
+.. |Upsilon| unicode:: U+003A5 .. GREEK CAPITAL LETTER UPSILON
+.. |VerticalLine| unicode:: U+0007C .. VERTICAL LINE
+.. |VerticalSeparator| unicode:: U+02758 .. LIGHT VERTICAL BAR
+.. |ZeroWidthSpace| unicode:: U+0200B .. ZERO WIDTH SPACE
diff --git a/docutils/parsers/rst/include/s5defs.txt b/docutils/parsers/rst/include/s5defs.txt
new file mode 100644
index 000000000..dca77ca43
--- /dev/null
+++ b/docutils/parsers/rst/include/s5defs.txt
@@ -0,0 +1,62 @@
+.. Definitions of interpreted text roles (classes) for S5/HTML data.
+.. This data file has been placed in the public domain.
+
+.. Colours
+ =======
+
+.. role:: black
+.. role:: gray
+.. role:: silver
+.. role:: white
+
+.. role:: maroon
+.. role:: red
+.. role:: magenta
+.. role:: fuchsia
+.. role:: pink
+.. role:: orange
+.. role:: yellow
+.. role:: lime
+.. role:: green
+.. role:: olive
+.. role:: teal
+.. role:: cyan
+.. role:: aqua
+.. role:: blue
+.. role:: navy
+.. role:: purple
+
+
+.. Text Sizes
+ ==========
+
+.. role:: huge
+.. role:: big
+.. role:: small
+.. role:: tiny
+
+
+.. Display in Slides (Presentation Mode) Only
+ ==========================================
+
+.. role:: slide
+ :class: slide-display
+
+
+.. Display in Outline Mode Only
+ ============================
+
+.. role:: outline
+
+
+.. Display in Print Only
+ =====================
+
+.. role:: print
+
+
+.. Incremental Display
+ ===================
+
+.. role:: incremental
+.. default-role:: incremental
diff --git a/docutils/parsers/rst/include/xhtml1-lat1.txt b/docutils/parsers/rst/include/xhtml1-lat1.txt
new file mode 100644
index 000000000..824dc61c0
--- /dev/null
+++ b/docutils/parsers/rst/include/xhtml1-lat1.txt
@@ -0,0 +1,102 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |Aacute| unicode:: U+000C1 .. LATIN CAPITAL LETTER A WITH ACUTE
+.. |aacute| unicode:: U+000E1 .. LATIN SMALL LETTER A WITH ACUTE
+.. |Acirc| unicode:: U+000C2 .. LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+.. |acirc| unicode:: U+000E2 .. LATIN SMALL LETTER A WITH CIRCUMFLEX
+.. |acute| unicode:: U+000B4 .. ACUTE ACCENT
+.. |AElig| unicode:: U+000C6 .. LATIN CAPITAL LETTER AE
+.. |aelig| unicode:: U+000E6 .. LATIN SMALL LETTER AE
+.. |Agrave| unicode:: U+000C0 .. LATIN CAPITAL LETTER A WITH GRAVE
+.. |agrave| unicode:: U+000E0 .. LATIN SMALL LETTER A WITH GRAVE
+.. |Aring| unicode:: U+000C5 .. LATIN CAPITAL LETTER A WITH RING ABOVE
+.. |aring| unicode:: U+000E5 .. LATIN SMALL LETTER A WITH RING ABOVE
+.. |Atilde| unicode:: U+000C3 .. LATIN CAPITAL LETTER A WITH TILDE
+.. |atilde| unicode:: U+000E3 .. LATIN SMALL LETTER A WITH TILDE
+.. |Auml| unicode:: U+000C4 .. LATIN CAPITAL LETTER A WITH DIAERESIS
+.. |auml| unicode:: U+000E4 .. LATIN SMALL LETTER A WITH DIAERESIS
+.. |brvbar| unicode:: U+000A6 .. BROKEN BAR
+.. |Ccedil| unicode:: U+000C7 .. LATIN CAPITAL LETTER C WITH CEDILLA
+.. |ccedil| unicode:: U+000E7 .. LATIN SMALL LETTER C WITH CEDILLA
+.. |cedil| unicode:: U+000B8 .. CEDILLA
+.. |cent| unicode:: U+000A2 .. CENT SIGN
+.. |copy| unicode:: U+000A9 .. COPYRIGHT SIGN
+.. |curren| unicode:: U+000A4 .. CURRENCY SIGN
+.. |deg| unicode:: U+000B0 .. DEGREE SIGN
+.. |divide| unicode:: U+000F7 .. DIVISION SIGN
+.. |Eacute| unicode:: U+000C9 .. LATIN CAPITAL LETTER E WITH ACUTE
+.. |eacute| unicode:: U+000E9 .. LATIN SMALL LETTER E WITH ACUTE
+.. |Ecirc| unicode:: U+000CA .. LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+.. |ecirc| unicode:: U+000EA .. LATIN SMALL LETTER E WITH CIRCUMFLEX
+.. |Egrave| unicode:: U+000C8 .. LATIN CAPITAL LETTER E WITH GRAVE
+.. |egrave| unicode:: U+000E8 .. LATIN SMALL LETTER E WITH GRAVE
+.. |ETH| unicode:: U+000D0 .. LATIN CAPITAL LETTER ETH
+.. |eth| unicode:: U+000F0 .. LATIN SMALL LETTER ETH
+.. |Euml| unicode:: U+000CB .. LATIN CAPITAL LETTER E WITH DIAERESIS
+.. |euml| unicode:: U+000EB .. LATIN SMALL LETTER E WITH DIAERESIS
+.. |frac12| unicode:: U+000BD .. VULGAR FRACTION ONE HALF
+.. |frac14| unicode:: U+000BC .. VULGAR FRACTION ONE QUARTER
+.. |frac34| unicode:: U+000BE .. VULGAR FRACTION THREE QUARTERS
+.. |Iacute| unicode:: U+000CD .. LATIN CAPITAL LETTER I WITH ACUTE
+.. |iacute| unicode:: U+000ED .. LATIN SMALL LETTER I WITH ACUTE
+.. |Icirc| unicode:: U+000CE .. LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+.. |icirc| unicode:: U+000EE .. LATIN SMALL LETTER I WITH CIRCUMFLEX
+.. |iexcl| unicode:: U+000A1 .. INVERTED EXCLAMATION MARK
+.. |Igrave| unicode:: U+000CC .. LATIN CAPITAL LETTER I WITH GRAVE
+.. |igrave| unicode:: U+000EC .. LATIN SMALL LETTER I WITH GRAVE
+.. |iquest| unicode:: U+000BF .. INVERTED QUESTION MARK
+.. |Iuml| unicode:: U+000CF .. LATIN CAPITAL LETTER I WITH DIAERESIS
+.. |iuml| unicode:: U+000EF .. LATIN SMALL LETTER I WITH DIAERESIS
+.. |laquo| unicode:: U+000AB .. LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+.. |macr| unicode:: U+000AF .. MACRON
+.. |micro| unicode:: U+000B5 .. MICRO SIGN
+.. |middot| unicode:: U+000B7 .. MIDDLE DOT
+.. |nbsp| unicode:: U+000A0 .. NO-BREAK SPACE
+.. |not| unicode:: U+000AC .. NOT SIGN
+.. |Ntilde| unicode:: U+000D1 .. LATIN CAPITAL LETTER N WITH TILDE
+.. |ntilde| unicode:: U+000F1 .. LATIN SMALL LETTER N WITH TILDE
+.. |Oacute| unicode:: U+000D3 .. LATIN CAPITAL LETTER O WITH ACUTE
+.. |oacute| unicode:: U+000F3 .. LATIN SMALL LETTER O WITH ACUTE
+.. |Ocirc| unicode:: U+000D4 .. LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+.. |ocirc| unicode:: U+000F4 .. LATIN SMALL LETTER O WITH CIRCUMFLEX
+.. |Ograve| unicode:: U+000D2 .. LATIN CAPITAL LETTER O WITH GRAVE
+.. |ograve| unicode:: U+000F2 .. LATIN SMALL LETTER O WITH GRAVE
+.. |ordf| unicode:: U+000AA .. FEMININE ORDINAL INDICATOR
+.. |ordm| unicode:: U+000BA .. MASCULINE ORDINAL INDICATOR
+.. |Oslash| unicode:: U+000D8 .. LATIN CAPITAL LETTER O WITH STROKE
+.. |oslash| unicode:: U+000F8 .. LATIN SMALL LETTER O WITH STROKE
+.. |Otilde| unicode:: U+000D5 .. LATIN CAPITAL LETTER O WITH TILDE
+.. |otilde| unicode:: U+000F5 .. LATIN SMALL LETTER O WITH TILDE
+.. |Ouml| unicode:: U+000D6 .. LATIN CAPITAL LETTER O WITH DIAERESIS
+.. |ouml| unicode:: U+000F6 .. LATIN SMALL LETTER O WITH DIAERESIS
+.. |para| unicode:: U+000B6 .. PILCROW SIGN
+.. |plusmn| unicode:: U+000B1 .. PLUS-MINUS SIGN
+.. |pound| unicode:: U+000A3 .. POUND SIGN
+.. |raquo| unicode:: U+000BB .. RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+.. |reg| unicode:: U+000AE .. REGISTERED SIGN
+.. |sect| unicode:: U+000A7 .. SECTION SIGN
+.. |shy| unicode:: U+000AD .. SOFT HYPHEN
+.. |sup1| unicode:: U+000B9 .. SUPERSCRIPT ONE
+.. |sup2| unicode:: U+000B2 .. SUPERSCRIPT TWO
+.. |sup3| unicode:: U+000B3 .. SUPERSCRIPT THREE
+.. |szlig| unicode:: U+000DF .. LATIN SMALL LETTER SHARP S
+.. |THORN| unicode:: U+000DE .. LATIN CAPITAL LETTER THORN
+.. |thorn| unicode:: U+000FE .. LATIN SMALL LETTER THORN
+.. |times| unicode:: U+000D7 .. MULTIPLICATION SIGN
+.. |Uacute| unicode:: U+000DA .. LATIN CAPITAL LETTER U WITH ACUTE
+.. |uacute| unicode:: U+000FA .. LATIN SMALL LETTER U WITH ACUTE
+.. |Ucirc| unicode:: U+000DB .. LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+.. |ucirc| unicode:: U+000FB .. LATIN SMALL LETTER U WITH CIRCUMFLEX
+.. |Ugrave| unicode:: U+000D9 .. LATIN CAPITAL LETTER U WITH GRAVE
+.. |ugrave| unicode:: U+000F9 .. LATIN SMALL LETTER U WITH GRAVE
+.. |uml| unicode:: U+000A8 .. DIAERESIS
+.. |Uuml| unicode:: U+000DC .. LATIN CAPITAL LETTER U WITH DIAERESIS
+.. |uuml| unicode:: U+000FC .. LATIN SMALL LETTER U WITH DIAERESIS
+.. |Yacute| unicode:: U+000DD .. LATIN CAPITAL LETTER Y WITH ACUTE
+.. |yacute| unicode:: U+000FD .. LATIN SMALL LETTER Y WITH ACUTE
+.. |yen| unicode:: U+000A5 .. YEN SIGN
+.. |yuml| unicode:: U+000FF .. LATIN SMALL LETTER Y WITH DIAERESIS
diff --git a/docutils/parsers/rst/include/xhtml1-special.txt b/docutils/parsers/rst/include/xhtml1-special.txt
new file mode 100644
index 000000000..dc6f5753c
--- /dev/null
+++ b/docutils/parsers/rst/include/xhtml1-special.txt
@@ -0,0 +1,37 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |bdquo| unicode:: U+0201E .. DOUBLE LOW-9 QUOTATION MARK
+.. |circ| unicode:: U+002C6 .. MODIFIER LETTER CIRCUMFLEX ACCENT
+.. |Dagger| unicode:: U+02021 .. DOUBLE DAGGER
+.. |dagger| unicode:: U+02020 .. DAGGER
+.. |emsp| unicode:: U+02003 .. EM SPACE
+.. |ensp| unicode:: U+02002 .. EN SPACE
+.. |euro| unicode:: U+020AC .. EURO SIGN
+.. |gt| unicode:: U+0003E .. GREATER-THAN SIGN
+.. |ldquo| unicode:: U+0201C .. LEFT DOUBLE QUOTATION MARK
+.. |lrm| unicode:: U+0200E .. LEFT-TO-RIGHT MARK
+.. |lsaquo| unicode:: U+02039 .. SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+.. |lsquo| unicode:: U+02018 .. LEFT SINGLE QUOTATION MARK
+.. |lt| unicode:: U+0003C .. LESS-THAN SIGN
+.. |mdash| unicode:: U+02014 .. EM DASH
+.. |ndash| unicode:: U+02013 .. EN DASH
+.. |OElig| unicode:: U+00152 .. LATIN CAPITAL LIGATURE OE
+.. |oelig| unicode:: U+00153 .. LATIN SMALL LIGATURE OE
+.. |permil| unicode:: U+02030 .. PER MILLE SIGN
+.. |quot| unicode:: U+00022 .. QUOTATION MARK
+.. |rdquo| unicode:: U+0201D .. RIGHT DOUBLE QUOTATION MARK
+.. |rlm| unicode:: U+0200F .. RIGHT-TO-LEFT MARK
+.. |rsaquo| unicode:: U+0203A .. SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+.. |rsquo| unicode:: U+02019 .. RIGHT SINGLE QUOTATION MARK
+.. |sbquo| unicode:: U+0201A .. SINGLE LOW-9 QUOTATION MARK
+.. |Scaron| unicode:: U+00160 .. LATIN CAPITAL LETTER S WITH CARON
+.. |scaron| unicode:: U+00161 .. LATIN SMALL LETTER S WITH CARON
+.. |thinsp| unicode:: U+02009 .. THIN SPACE
+.. |tilde| unicode:: U+002DC .. SMALL TILDE
+.. |Yuml| unicode:: U+00178 .. LATIN CAPITAL LETTER Y WITH DIAERESIS
+.. |zwj| unicode:: U+0200D .. ZERO WIDTH JOINER
+.. |zwnj| unicode:: U+0200C .. ZERO WIDTH NON-JOINER
diff --git a/docutils/parsers/rst/include/xhtml1-symbol.txt b/docutils/parsers/rst/include/xhtml1-symbol.txt
new file mode 100644
index 000000000..8fe97f808
--- /dev/null
+++ b/docutils/parsers/rst/include/xhtml1-symbol.txt
@@ -0,0 +1,130 @@
+.. This data file has been placed in the public domain.
+.. Derived from the Unicode character mappings available from
+ <http://www.w3.org/2003/entities/xml/>.
+ Processed by unicode2rstsubs.py, part of Docutils:
+ <http://docutils.sourceforge.net>.
+
+.. |alefsym| unicode:: U+02135 .. ALEF SYMBOL
+.. |Alpha| unicode:: U+00391 .. GREEK CAPITAL LETTER ALPHA
+.. |alpha| unicode:: U+003B1 .. GREEK SMALL LETTER ALPHA
+.. |and| unicode:: U+02227 .. LOGICAL AND
+.. |ang| unicode:: U+02220 .. ANGLE
+.. |asymp| unicode:: U+02248 .. ALMOST EQUAL TO
+.. |Beta| unicode:: U+00392 .. GREEK CAPITAL LETTER BETA
+.. |beta| unicode:: U+003B2 .. GREEK SMALL LETTER BETA
+.. |bull| unicode:: U+02022 .. BULLET
+.. |cap| unicode:: U+02229 .. INTERSECTION
+.. |Chi| unicode:: U+003A7 .. GREEK CAPITAL LETTER CHI
+.. |chi| unicode:: U+003C7 .. GREEK SMALL LETTER CHI
+.. |clubs| unicode:: U+02663 .. BLACK CLUB SUIT
+.. |cong| unicode:: U+02245 .. APPROXIMATELY EQUAL TO
+.. |crarr| unicode:: U+021B5 .. DOWNWARDS ARROW WITH CORNER LEFTWARDS
+.. |cup| unicode:: U+0222A .. UNION
+.. |dArr| unicode:: U+021D3 .. DOWNWARDS DOUBLE ARROW
+.. |darr| unicode:: U+02193 .. DOWNWARDS ARROW
+.. |Delta| unicode:: U+00394 .. GREEK CAPITAL LETTER DELTA
+.. |delta| unicode:: U+003B4 .. GREEK SMALL LETTER DELTA
+.. |diams| unicode:: U+02666 .. BLACK DIAMOND SUIT
+.. |empty| unicode:: U+02205 .. EMPTY SET
+.. |Epsilon| unicode:: U+00395 .. GREEK CAPITAL LETTER EPSILON
+.. |epsilon| unicode:: U+003B5 .. GREEK SMALL LETTER EPSILON
+.. |equiv| unicode:: U+02261 .. IDENTICAL TO
+.. |Eta| unicode:: U+00397 .. GREEK CAPITAL LETTER ETA
+.. |eta| unicode:: U+003B7 .. GREEK SMALL LETTER ETA
+.. |exist| unicode:: U+02203 .. THERE EXISTS
+.. |fnof| unicode:: U+00192 .. LATIN SMALL LETTER F WITH HOOK
+.. |forall| unicode:: U+02200 .. FOR ALL
+.. |frasl| unicode:: U+02044 .. FRACTION SLASH
+.. |Gamma| unicode:: U+00393 .. GREEK CAPITAL LETTER GAMMA
+.. |gamma| unicode:: U+003B3 .. GREEK SMALL LETTER GAMMA
+.. |ge| unicode:: U+02265 .. GREATER-THAN OR EQUAL TO
+.. |hArr| unicode:: U+021D4 .. LEFT RIGHT DOUBLE ARROW
+.. |harr| unicode:: U+02194 .. LEFT RIGHT ARROW
+.. |hearts| unicode:: U+02665 .. BLACK HEART SUIT
+.. |hellip| unicode:: U+02026 .. HORIZONTAL ELLIPSIS
+.. |image| unicode:: U+02111 .. BLACK-LETTER CAPITAL I
+.. |infin| unicode:: U+0221E .. INFINITY
+.. |int| unicode:: U+0222B .. INTEGRAL
+.. |Iota| unicode:: U+00399 .. GREEK CAPITAL LETTER IOTA
+.. |iota| unicode:: U+003B9 .. GREEK SMALL LETTER IOTA
+.. |isin| unicode:: U+02208 .. ELEMENT OF
+.. |Kappa| unicode:: U+0039A .. GREEK CAPITAL LETTER KAPPA
+.. |kappa| unicode:: U+003BA .. GREEK SMALL LETTER KAPPA
+.. |Lambda| unicode:: U+0039B .. GREEK CAPITAL LETTER LAMDA
+.. |lambda| unicode:: U+003BB .. GREEK SMALL LETTER LAMDA
+.. |lang| unicode:: U+02329 .. LEFT-POINTING ANGLE BRACKET
+.. |lArr| unicode:: U+021D0 .. LEFTWARDS DOUBLE ARROW
+.. |larr| unicode:: U+02190 .. LEFTWARDS ARROW
+.. |lceil| unicode:: U+02308 .. LEFT CEILING
+.. |le| unicode:: U+02264 .. LESS-THAN OR EQUAL TO
+.. |lfloor| unicode:: U+0230A .. LEFT FLOOR
+.. |lowast| unicode:: U+02217 .. ASTERISK OPERATOR
+.. |loz| unicode:: U+025CA .. LOZENGE
+.. |minus| unicode:: U+02212 .. MINUS SIGN
+.. |Mu| unicode:: U+0039C .. GREEK CAPITAL LETTER MU
+.. |mu| unicode:: U+003BC .. GREEK SMALL LETTER MU
+.. |nabla| unicode:: U+02207 .. NABLA
+.. |ne| unicode:: U+02260 .. NOT EQUAL TO
+.. |ni| unicode:: U+0220B .. CONTAINS AS MEMBER
+.. |notin| unicode:: U+02209 .. NOT AN ELEMENT OF
+.. |nsub| unicode:: U+02284 .. NOT A SUBSET OF
+.. |Nu| unicode:: U+0039D .. GREEK CAPITAL LETTER NU
+.. |nu| unicode:: U+003BD .. GREEK SMALL LETTER NU
+.. |oline| unicode:: U+0203E .. OVERLINE
+.. |Omega| unicode:: U+003A9 .. GREEK CAPITAL LETTER OMEGA
+.. |omega| unicode:: U+003C9 .. GREEK SMALL LETTER OMEGA
+.. |Omicron| unicode:: U+0039F .. GREEK CAPITAL LETTER OMICRON
+.. |omicron| unicode:: U+003BF .. GREEK SMALL LETTER OMICRON
+.. |oplus| unicode:: U+02295 .. CIRCLED PLUS
+.. |or| unicode:: U+02228 .. LOGICAL OR
+.. |otimes| unicode:: U+02297 .. CIRCLED TIMES
+.. |part| unicode:: U+02202 .. PARTIAL DIFFERENTIAL
+.. |perp| unicode:: U+022A5 .. UP TACK
+.. |Phi| unicode:: U+003A6 .. GREEK CAPITAL LETTER PHI
+.. |phi| unicode:: U+003D5 .. GREEK PHI SYMBOL
+.. |Pi| unicode:: U+003A0 .. GREEK CAPITAL LETTER PI
+.. |pi| unicode:: U+003C0 .. GREEK SMALL LETTER PI
+.. |piv| unicode:: U+003D6 .. GREEK PI SYMBOL
+.. |Prime| unicode:: U+02033 .. DOUBLE PRIME
+.. |prime| unicode:: U+02032 .. PRIME
+.. |prod| unicode:: U+0220F .. N-ARY PRODUCT
+.. |prop| unicode:: U+0221D .. PROPORTIONAL TO
+.. |Psi| unicode:: U+003A8 .. GREEK CAPITAL LETTER PSI
+.. |psi| unicode:: U+003C8 .. GREEK SMALL LETTER PSI
+.. |radic| unicode:: U+0221A .. SQUARE ROOT
+.. |rang| unicode:: U+0232A .. RIGHT-POINTING ANGLE BRACKET
+.. |rArr| unicode:: U+021D2 .. RIGHTWARDS DOUBLE ARROW
+.. |rarr| unicode:: U+02192 .. RIGHTWARDS ARROW
+.. |rceil| unicode:: U+02309 .. RIGHT CEILING
+.. |real| unicode:: U+0211C .. BLACK-LETTER CAPITAL R
+.. |rfloor| unicode:: U+0230B .. RIGHT FLOOR
+.. |Rho| unicode:: U+003A1 .. GREEK CAPITAL LETTER RHO
+.. |rho| unicode:: U+003C1 .. GREEK SMALL LETTER RHO
+.. |sdot| unicode:: U+022C5 .. DOT OPERATOR
+.. |Sigma| unicode:: U+003A3 .. GREEK CAPITAL LETTER SIGMA
+.. |sigma| unicode:: U+003C3 .. GREEK SMALL LETTER SIGMA
+.. |sigmaf| unicode:: U+003C2 .. GREEK SMALL LETTER FINAL SIGMA
+.. |sim| unicode:: U+0223C .. TILDE OPERATOR
+.. |spades| unicode:: U+02660 .. BLACK SPADE SUIT
+.. |sub| unicode:: U+02282 .. SUBSET OF
+.. |sube| unicode:: U+02286 .. SUBSET OF OR EQUAL TO
+.. |sum| unicode:: U+02211 .. N-ARY SUMMATION
+.. |sup| unicode:: U+02283 .. SUPERSET OF
+.. |supe| unicode:: U+02287 .. SUPERSET OF OR EQUAL TO
+.. |Tau| unicode:: U+003A4 .. GREEK CAPITAL LETTER TAU
+.. |tau| unicode:: U+003C4 .. GREEK SMALL LETTER TAU
+.. |there4| unicode:: U+02234 .. THEREFORE
+.. |Theta| unicode:: U+00398 .. GREEK CAPITAL LETTER THETA
+.. |theta| unicode:: U+003B8 .. GREEK SMALL LETTER THETA
+.. |thetasym| unicode:: U+003D1 .. GREEK THETA SYMBOL
+.. |trade| unicode:: U+02122 .. TRADE MARK SIGN
+.. |uArr| unicode:: U+021D1 .. UPWARDS DOUBLE ARROW
+.. |uarr| unicode:: U+02191 .. UPWARDS ARROW
+.. |upsih| unicode:: U+003D2 .. GREEK UPSILON WITH HOOK SYMBOL
+.. |Upsilon| unicode:: U+003A5 .. GREEK CAPITAL LETTER UPSILON
+.. |upsilon| unicode:: U+003C5 .. GREEK SMALL LETTER UPSILON
+.. |weierp| unicode:: U+02118 .. SCRIPT CAPITAL P
+.. |Xi| unicode:: U+0039E .. GREEK CAPITAL LETTER XI
+.. |xi| unicode:: U+003BE .. GREEK SMALL LETTER XI
+.. |Zeta| unicode:: U+00396 .. GREEK CAPITAL LETTER ZETA
+.. |zeta| unicode:: U+003B6 .. GREEK SMALL LETTER ZETA
diff --git a/docutils/parsers/rst/languages/__init__.py b/docutils/parsers/rst/languages/__init__.py
new file mode 100644
index 000000000..be6feb644
--- /dev/null
+++ b/docutils/parsers/rst/languages/__init__.py
@@ -0,0 +1,27 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# Internationalization details are documented in
+# <http://docutils.sf.net/docs/howto/i18n.html>.
+
+"""
+This package contains modules for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+_languages = {}
+
+def get_language(language_code):
+ if _languages.has_key(language_code):
+ return _languages[language_code]
+ try:
+ module = __import__(language_code, globals(), locals())
+ except ImportError:
+ return None
+ _languages[language_code] = module
+ return module
diff --git a/docutils/parsers/rst/languages/af.py b/docutils/parsers/rst/languages/af.py
new file mode 100644
index 000000000..c2d8fd4c7
--- /dev/null
+++ b/docutils/parsers/rst/languages/af.py
@@ -0,0 +1,104 @@
+# Author: Jannie Hofmeyr
+# Contact: jhsh@sun.ac.za
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+Afrikaans-language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+directives = {
+ 'aandag': 'attention',
+ 'versigtig': 'caution',
+ 'gevaar': 'danger',
+ 'fout': 'error',
+ 'wenk': 'hint',
+ 'belangrik': 'important',
+ 'nota': 'note',
+ 'tip': 'tip', # hint and tip both have the same translation: wenk
+ 'waarskuwing': 'warning',
+ 'vermaning': 'admonition',
+ 'kantstreep': 'sidebar',
+ 'onderwerp': 'topic',
+ 'lynblok': 'line-block',
+ 'parsed-literal (translation required)': 'parsed-literal',
+ 'rubriek': 'rubric',
+ 'epigraaf': 'epigraph',
+ 'hoogtepunte': 'highlights',
+ 'pull-quote (translation required)': 'pull-quote',
+ u'compound (translation required)': 'compound',
+ u'container (translation required)': 'container',
+ #'vrae': 'questions',
+ #'qa': 'questions',
+ #'faq': 'questions',
+ 'table (translation required)': 'table',
+ 'csv-table (translation required)': 'csv-table',
+ 'list-table (translation required)': 'list-table',
+ 'meta': 'meta',
+ #'beeldkaart': 'imagemap',
+ 'beeld': 'image',
+ 'figuur': 'figure',
+ 'insluiting': 'include',
+ 'rou': 'raw',
+ 'vervang': 'replace',
+ 'unicode': 'unicode', # should this be translated? unikode
+ 'datum': 'date',
+ 'klas': 'class',
+ 'role (translation required)': 'role',
+ 'default-role (translation required)': 'default-role',
+ 'title (translation required)': 'title',
+ 'inhoud': 'contents',
+ 'sectnum': 'sectnum',
+ 'section-numbering': 'sectnum',
+ u'header (translation required)': 'header',
+ u'footer (translation required)': 'footer',
+ #'voetnote': 'footnotes',
+ #'aanhalings': 'citations',
+ 'teikennotas': 'target-notes',
+ 'restructuredtext-test-directive': 'restructuredtext-test-directive'}
+"""Afrikaans name to registered (in directives/__init__.py) directive name
+mapping."""
+
+roles = {
+ 'afkorting': 'abbreviation',
+ 'ab': 'abbreviation',
+ 'akroniem': 'acronym',
+ 'ac': 'acronym',
+ 'indeks': 'index',
+ 'i': 'index',
+ 'voetskrif': 'subscript',
+ 'sub': 'subscript',
+ 'boskrif': 'superscript',
+ 'sup': 'superscript',
+ 'titelverwysing': 'title-reference',
+ 'titel': 'title-reference',
+ 't': 'title-reference',
+ 'pep-verwysing': 'pep-reference',
+ 'pep': 'pep-reference',
+ 'rfc-verwysing': 'rfc-reference',
+ 'rfc': 'rfc-reference',
+ 'nadruk': 'emphasis',
+ 'sterk': 'strong',
+ 'literal (translation required)': 'literal',
+ 'benoemde verwysing': 'named-reference',
+ 'anonieme verwysing': 'anonymous-reference',
+ 'voetnootverwysing': 'footnote-reference',
+ 'aanhalingverwysing': 'citation-reference',
+ 'vervangingsverwysing': 'substitution-reference',
+ 'teiken': 'target',
+ 'uri-verwysing': 'uri-reference',
+ 'uri': 'uri-reference',
+ 'url': 'uri-reference',
+ 'rou': 'raw',}
+"""Mapping of Afrikaans role names to canonical role names for interpreted text.
+"""
diff --git a/docutils/parsers/rst/languages/ca.py b/docutils/parsers/rst/languages/ca.py
new file mode 100644
index 000000000..ed181668d
--- /dev/null
+++ b/docutils/parsers/rst/languages/ca.py
@@ -0,0 +1,123 @@
+# Author: Ivan Vilata i Balaguer
+# Contact: ivan@selidor.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+Catalan-language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+directives = {
+ # language-dependent: fixed
+ u'atenci\u00F3': 'attention',
+ u'compte': 'caution',
+ u'perill': 'danger',
+ u'error': 'error',
+ u'suggeriment': 'hint',
+ u'important': 'important',
+ u'nota': 'note',
+ u'consell': 'tip',
+ u'av\u00EDs': 'warning',
+ u'advertiment': 'admonition',
+ u'nota-al-marge': 'sidebar',
+ u'nota-marge': 'sidebar',
+ u'tema': 'topic',
+ u'bloc-de-l\u00EDnies': 'line-block',
+ u'bloc-l\u00EDnies': 'line-block',
+ u'literal-analitzat': 'parsed-literal',
+ u'r\u00FAbrica': 'rubric',
+ u'ep\u00EDgraf': 'epigraph',
+ u'sumari': 'highlights',
+ u'cita-destacada': 'pull-quote',
+ u'compost': 'compound',
+ u'container (translation required)': 'container',
+ #'questions': 'questions',
+ u'taula': 'table',
+ u'taula-csv': 'csv-table',
+ u'taula-llista': 'list-table',
+ #'qa': 'questions',
+ #'faq': 'questions',
+ u'meta': 'meta',
+ #'imagemap': 'imagemap',
+ u'imatge': 'image',
+ u'figura': 'figure',
+ u'inclou': 'include',
+ u'incloure': 'include',
+ u'cru': 'raw',
+ u'reempla\u00E7a': 'replace',
+ u'reempla\u00E7ar': 'replace',
+ u'unicode': 'unicode',
+ u'data': 'date',
+ u'classe': 'class',
+ u'rol': 'role',
+ u'default-role (translation required)': 'default-role',
+ u'title (translation required)': 'title',
+ u'contingut': 'contents',
+ u'numsec': 'sectnum',
+ u'numeraci\u00F3-de-seccions': 'sectnum',
+ u'numeraci\u00F3-seccions': 'sectnum',
+ u'cap\u00E7alera': 'header',
+ u'peu-de-p\u00E0gina': 'footer',
+ u'peu-p\u00E0gina': 'footer',
+ #'footnotes': 'footnotes',
+ #'citations': 'citations',
+ u'notes-amb-destinacions': 'target-notes',
+ u'notes-destinacions': 'target-notes',
+ u'directiva-de-prova-de-restructuredtext': 'restructuredtext-test-directive'}
+"""Catalan name to registered (in directives/__init__.py) directive name
+mapping."""
+
+roles = {
+ # language-dependent: fixed
+ u'abreviatura': 'abbreviation',
+ u'abreviaci\u00F3': 'abbreviation',
+ u'abrev': 'abbreviation',
+ u'ab': 'abbreviation',
+ u'acr\u00F2nim': 'acronym',
+ u'ac': 'acronym',
+ u'\u00EDndex': 'index',
+ u'i': 'index',
+ u'sub\u00EDndex': 'subscript',
+ u'sub': 'subscript',
+ u'super\u00EDndex': 'superscript',
+ u'sup': 'superscript',
+ u'refer\u00E8ncia-a-t\u00EDtol': 'title-reference',
+ u'refer\u00E8ncia-t\u00EDtol': 'title-reference',
+ u't\u00EDtol': 'title-reference',
+ u't': 'title-reference',
+ u'refer\u00E8ncia-a-pep': 'pep-reference',
+ u'refer\u00E8ncia-pep': 'pep-reference',
+ u'pep': 'pep-reference',
+ u'refer\u00E8ncia-a-rfc': 'rfc-reference',
+ u'refer\u00E8ncia-rfc': 'rfc-reference',
+ u'rfc': 'rfc-reference',
+ u'\u00E8mfasi': 'emphasis',
+ u'destacat': 'strong',
+ u'literal': 'literal',
+ u'refer\u00E8ncia-amb-nom': 'named-reference',
+ u'refer\u00E8ncia-nom': 'named-reference',
+ u'refer\u00E8ncia-an\u00F2nima': 'anonymous-reference',
+ u'refer\u00E8ncia-a-nota-al-peu': 'footnote-reference',
+ u'refer\u00E8ncia-nota-al-peu': 'footnote-reference',
+ u'refer\u00E8ncia-a-cita': 'citation-reference',
+ u'refer\u00E8ncia-cita': 'citation-reference',
+ u'refer\u00E8ncia-a-substituci\u00F3': 'substitution-reference',
+ u'refer\u00E8ncia-substituci\u00F3': 'substitution-reference',
+ u'destinaci\u00F3': 'target',
+ u'refer\u00E8ncia-a-uri': 'uri-reference',
+ u'refer\u00E8ncia-uri': 'uri-reference',
+ u'uri': 'uri-reference',
+ u'url': 'uri-reference',
+ u'cru': 'raw',}
+"""Mapping of Catalan role names to canonical role names for interpreted text.
+"""
diff --git a/docutils/parsers/rst/languages/cs.py b/docutils/parsers/rst/languages/cs.py
new file mode 100644
index 000000000..169d2c4f7
--- /dev/null
+++ b/docutils/parsers/rst/languages/cs.py
@@ -0,0 +1,106 @@
+# Author: Marek Blaha
+# Contact: mb@dat.cz
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+Czech-language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+directives = {
+ # language-dependent: fixed
+ u'pozor': 'attention',
+ u'caution (translation required)': 'caution', # jak rozlisit caution a warning?
+ u'nebezpe\u010D\u00ED': 'danger',
+ u'chyba': 'error',
+ u'rada': 'hint',
+ u'd\u016Fle\u017Eit\u00E9': 'important',
+ u'pozn\u00E1mka': 'note',
+ u'tip (translation required)': 'tip',
+ u'varov\u00E1n\u00ED': 'warning',
+ u'admonition (translation required)': 'admonition',
+ u'sidebar (translation required)': 'sidebar',
+ u't\u00E9ma': 'topic',
+ u'line-block (translation required)': 'line-block',
+ u'parsed-literal (translation required)': 'parsed-literal',
+ u'odd\u00EDl': 'rubric',
+ u'moto': 'epigraph',
+ u'highlights (translation required)': 'highlights',
+ u'pull-quote (translation required)': 'pull-quote',
+ u'compound (translation required)': 'compound',
+ u'container (translation required)': 'container',
+ #'questions': 'questions',
+ #'qa': 'questions',
+ #'faq': 'questions',
+ u'table (translation required)': 'table',
+ u'csv-table (translation required)': 'csv-table',
+ u'list-table (translation required)': 'list-table',
+ u'meta (translation required)': 'meta',
+ #'imagemap': 'imagemap',
+ u'image (translation required)': 'image', # obrazek
+ u'figure (translation required)': 'figure', # a tady?
+ u'include (translation required)': 'include',
+ u'raw (translation required)': 'raw',
+ u'replace (translation required)': 'replace',
+ u'unicode (translation required)': 'unicode',
+ u'datum': 'date',
+ u't\u0159\u00EDda': 'class',
+ u'role (translation required)': 'role',
+ u'default-role (translation required)': 'default-role',
+ u'title (translation required)': 'title',
+ u'obsah': 'contents',
+ u'sectnum (translation required)': 'sectnum',
+ u'section-numbering (translation required)': 'sectnum',
+ u'header (translation required)': 'header',
+ u'footer (translation required)': 'footer',
+ #'footnotes': 'footnotes',
+ #'citations': 'citations',
+ u'target-notes (translation required)': 'target-notes',
+ u'restructuredtext-test-directive': 'restructuredtext-test-directive'}
+"""Czech name to registered (in directives/__init__.py) directive name
+mapping."""
+
+roles = {
+ # language-dependent: fixed
+ u'abbreviation (translation required)': 'abbreviation',
+ u'ab (translation required)': 'abbreviation',
+ u'acronym (translation required)': 'acronym',
+ u'ac (translation required)': 'acronym',
+ u'index (translation required)': 'index',
+ u'i (translation required)': 'index',
+ u'subscript (translation required)': 'subscript',
+ u'sub (translation required)': 'subscript',
+ u'superscript (translation required)': 'superscript',
+ u'sup (translation required)': 'superscript',
+ u'title-reference (translation required)': 'title-reference',
+ u'title (translation required)': 'title-reference',
+ u't (translation required)': 'title-reference',
+ u'pep-reference (translation required)': 'pep-reference',
+ u'pep (translation required)': 'pep-reference',
+ u'rfc-reference (translation required)': 'rfc-reference',
+ u'rfc (translation required)': 'rfc-reference',
+ u'emphasis (translation required)': 'emphasis',
+ u'strong (translation required)': 'strong',
+ u'literal (translation required)': 'literal',
+ u'named-reference (translation required)': 'named-reference',
+ u'anonymous-reference (translation required)': 'anonymous-reference',
+ u'footnote-reference (translation required)': 'footnote-reference',
+ u'citation-reference (translation required)': 'citation-reference',
+ u'substitution-reference (translation required)': 'substitution-reference',
+ u'target (translation required)': 'target',
+ u'uri-reference (translation required)': 'uri-reference',
+ u'uri (translation required)': 'uri-reference',
+ u'url (translation required)': 'uri-reference',
+ u'raw (translation required)': 'raw',}
+"""Mapping of Czech role names to canonical role names for interpreted text.
+"""
diff --git a/docutils/parsers/rst/languages/de.py b/docutils/parsers/rst/languages/de.py
new file mode 100644
index 000000000..da528a7be
--- /dev/null
+++ b/docutils/parsers/rst/languages/de.py
@@ -0,0 +1,97 @@
+# Authors: Engelbert Gruber; Felix Wiemann
+# Contact: grubert@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+German-language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+directives = {
+ 'achtung': 'attention',
+ 'vorsicht': 'caution',
+ 'gefahr': 'danger',
+ 'fehler': 'error',
+ 'hinweis': 'hint',
+ 'wichtig': 'important',
+ 'notiz': 'note',
+ 'tipp': 'tip',
+ 'warnung': 'warning',
+ 'ermahnung': 'admonition',
+ 'kasten': 'sidebar',
+ 'seitenkasten': 'sidebar',
+ 'thema': 'topic',
+ 'zeilen-block': 'line-block',
+ 'parsed-literal (translation required)': 'parsed-literal',
+ 'rubrik': 'rubric',
+ 'epigraph': 'epigraph',
+ 'highlights (translation required)': 'highlights',
+ 'pull-quote (translation required)': 'pull-quote', # kasten too ?
+ 'zusammengesetzt': 'compound',
+ 'verbund': 'compound',
+ u'container (translation required)': 'container',
+ #'fragen': 'questions',
+ 'tabelle': 'table',
+ 'csv-tabelle': 'csv-table',
+ 'list-table (translation required)': 'list-table',
+ 'meta': 'meta',
+ #'imagemap': 'imagemap',
+ 'bild': 'image',
+ 'abbildung': 'figure',
+ u'unver\xe4ndert': 'raw',
+ u'roh': 'raw',
+ u'einf\xfcgen': 'include',
+ 'ersetzung': 'replace',
+ 'ersetzen': 'replace',
+ 'ersetze': 'replace',
+ 'unicode': 'unicode',
+ 'datum': 'date',
+ 'klasse': 'class',
+ 'rolle': 'role',
+ u'default-role (translation required)': 'default-role',
+ u'title (translation required)': 'title',
+ 'inhalt': 'contents',
+ 'kapitel-nummerierung': 'sectnum',
+ 'abschnitts-nummerierung': 'sectnum',
+ u'linkziel-fu\xdfnoten': 'target-notes',
+ u'header (translation required)': 'header',
+ u'footer (translation required)': 'footer',
+ #u'fu\xdfnoten': 'footnotes',
+ #'zitate': 'citations',
+ }
+"""German name to registered (in directives/__init__.py) directive name
+mapping."""
+
+roles = {
+ u'abk\xfcrzung': 'abbreviation',
+ 'akronym': 'acronym',
+ 'index': 'index',
+ 'tiefgestellt': 'subscript',
+ 'hochgestellt': 'superscript',
+ 'titel-referenz': 'title-reference',
+ 'pep-referenz': 'pep-reference',
+ 'rfc-referenz': 'rfc-reference',
+ 'betonung': 'emphasis',
+ 'fett': 'strong',
+ u'w\xf6rtlich': 'literal',
+ 'benannte-referenz': 'named-reference',
+ 'unbenannte-referenz': 'anonymous-reference',
+ u'fu\xdfnoten-referenz': 'footnote-reference',
+ 'zitat-referenz': 'citation-reference',
+ 'ersetzungs-referenz': 'substitution-reference',
+ 'ziel': 'target',
+ 'uri-referenz': 'uri-reference',
+ u'unver\xe4ndert': 'raw',
+ u'roh': 'raw',}
+"""Mapping of German role names to canonical role names for interpreted text.
+"""
diff --git a/docutils/parsers/rst/languages/en.py b/docutils/parsers/rst/languages/en.py
new file mode 100644
index 000000000..1c0d7edba
--- /dev/null
+++ b/docutils/parsers/rst/languages/en.py
@@ -0,0 +1,106 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+English-language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+directives = {
+ # language-dependent: fixed
+ 'attention': 'attention',
+ 'caution': 'caution',
+ 'danger': 'danger',
+ 'error': 'error',
+ 'hint': 'hint',
+ 'important': 'important',
+ 'note': 'note',
+ 'tip': 'tip',
+ 'warning': 'warning',
+ 'admonition': 'admonition',
+ 'sidebar': 'sidebar',
+ 'topic': 'topic',
+ 'line-block': 'line-block',
+ 'parsed-literal': 'parsed-literal',
+ 'rubric': 'rubric',
+ 'epigraph': 'epigraph',
+ 'highlights': 'highlights',
+ 'pull-quote': 'pull-quote',
+ 'compound': 'compound',
+ 'container': 'container',
+ #'questions': 'questions',
+ 'table': 'table',
+ 'csv-table': 'csv-table',
+ 'list-table': 'list-table',
+ #'qa': 'questions',
+ #'faq': 'questions',
+ 'meta': 'meta',
+ #'imagemap': 'imagemap',
+ 'image': 'image',
+ 'figure': 'figure',
+ 'include': 'include',
+ 'raw': 'raw',
+ 'replace': 'replace',
+ 'unicode': 'unicode',
+ 'date': 'date',
+ 'class': 'class',
+ 'role': 'role',
+ 'default-role': 'default-role',
+ 'title': 'title',
+ 'contents': 'contents',
+ 'sectnum': 'sectnum',
+ 'section-numbering': 'sectnum',
+ 'header': 'header',
+ 'footer': 'footer',
+ #'footnotes': 'footnotes',
+ #'citations': 'citations',
+ 'target-notes': 'target-notes',
+ 'restructuredtext-test-directive': 'restructuredtext-test-directive'}
+"""English name to registered (in directives/__init__.py) directive name
+mapping."""
+
+roles = {
+ # language-dependent: fixed
+ 'abbreviation': 'abbreviation',
+ 'ab': 'abbreviation',
+ 'acronym': 'acronym',
+ 'ac': 'acronym',
+ 'index': 'index',
+ 'i': 'index',
+ 'subscript': 'subscript',
+ 'sub': 'subscript',
+ 'superscript': 'superscript',
+ 'sup': 'superscript',
+ 'title-reference': 'title-reference',
+ 'title': 'title-reference',
+ 't': 'title-reference',
+ 'pep-reference': 'pep-reference',
+ 'pep': 'pep-reference',
+ 'rfc-reference': 'rfc-reference',
+ 'rfc': 'rfc-reference',
+ 'emphasis': 'emphasis',
+ 'strong': 'strong',
+ 'literal': 'literal',
+ 'named-reference': 'named-reference',
+ 'anonymous-reference': 'anonymous-reference',
+ 'footnote-reference': 'footnote-reference',
+ 'citation-reference': 'citation-reference',
+ 'substitution-reference': 'substitution-reference',
+ 'target': 'target',
+ 'uri-reference': 'uri-reference',
+ 'uri': 'uri-reference',
+ 'url': 'uri-reference',
+ 'raw': 'raw',}
+"""Mapping of English role names to canonical role names for interpreted text.
+"""
diff --git a/docutils/parsers/rst/languages/eo.py b/docutils/parsers/rst/languages/eo.py
new file mode 100644
index 000000000..8565d6c8d
--- /dev/null
+++ b/docutils/parsers/rst/languages/eo.py
@@ -0,0 +1,116 @@
+# Author: Marcelo Huerta San Martin
+# Contact: richieadler@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+Esperanto-language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+directives = {
+ # language-dependent: fixed
+ u'atentu': 'attention',
+ u'zorgu': 'caution',
+ u'dangxero': 'danger',
+ u'dan\u011dero': 'danger',
+ u'eraro': 'error',
+ u'spuro': 'hint',
+ u'grava': 'important',
+ u'noto': 'note',
+ u'helpeto': 'tip',
+ u'averto': 'warning',
+ u'admono': 'admonition',
+ u'flankteksto': 'sidebar',
+ u'temo': 'topic',
+ u'linea-bloko': 'line-block',
+ u'analizota-literalo': 'parsed-literal',
+ u'rubriko': 'rubric',
+ u'epigrafo': 'epigraph',
+ u'elstarajxoj': 'highlights',
+ u'elstara\u0135oj': 'highlights',
+ u'ekstera-citajxo': 'pull-quote',
+ u'ekstera-cita\u0135o': 'pull-quote',
+ u'kombinajxo': 'compound',
+ u'kombina\u0135o': 'compound',
+ u'tekstingo': 'container',
+ u'enhavilo': 'container',
+ #'questions': 'questions',
+ #'qa': 'questions',
+ #'faq': 'questions',
+ u'tabelo': 'table',
+ u'tabelo-vdk': 'csv-table', # "valoroj disigitaj per komoj"
+ u'tabelo-csv': 'csv-table',
+ u'tabelo-lista': 'list-table',
+ u'meta': 'meta',
+ #'imagemap': 'imagemap',
+ u'bildo': 'image',
+ u'figuro': 'figure',
+ u'inkludi': 'include',
+ u'senanaliza': 'raw',
+ u'anstatauxi': 'replace',
+ u'anstata\u016di': 'replace',
+ u'unicode': 'unicode',
+ u'dato': 'date',
+ u'klaso': 'class',
+ u'rolo': 'role',
+ u'preterlasita-rolo': 'default-role',
+ u'titolo': 'title',
+ u'enhavo': 'contents',
+ u'seknum': 'sectnum',
+ u'sekcia-numerado': 'sectnum',
+ u'kapsekcio': 'header',
+ u'piedsekcio': 'footer',
+ #'footnotes': 'footnotes',
+ #'citations': 'citations',
+ u'celaj-notoj': 'target-notes',
+ u'restructuredtext-test-directive': 'restructuredtext-test-directive'}
+"""Esperanto name to registered (in directives/__init__.py) directive name
+mapping."""
+
+roles = {
+ # language-dependent: fixed
+ u'mallongigo': 'abbreviation',
+ u'mall': 'abbreviation',
+ u'komenclitero': 'acronym',
+ u'kl': 'acronym',
+ u'indekso': 'index',
+ u'i': 'index',
+ u'subskribo': 'subscript',
+ u'sub': 'subscript',
+ u'supraskribo': 'superscript',
+ u'sup': 'superscript',
+ u'titola-referenco': 'title-reference',
+ u'titolo': 'title-reference',
+ u't': 'title-reference',
+ u'pep-referenco': 'pep-reference',
+ u'pep': 'pep-reference',
+ u'rfc-referenco': 'rfc-reference',
+ u'rfc': 'rfc-reference',
+ u'emfazo': 'emphasis',
+ u'forta': 'strong',
+ u'litera': 'literal',
+ u'nomita-referenco': 'named-reference',
+ u'nenomita-referenco': 'anonymous-reference',
+ u'piednota-referenco': 'footnote-reference',
+ u'citajxo-referenco': 'citation-reference',
+ u'cita\u0135o-referenco': 'citation-reference',
+ u'anstatauxa-referenco': 'substitution-reference',
+ u'anstata\u016da-referenco': 'substitution-reference',
+ u'celo': 'target',
+ u'uri-referenco': 'uri-reference',
+ u'uri': 'uri-reference',
+ u'url': 'uri-reference',
+ u'senanaliza': 'raw',
+}
+"""Mapping of Esperanto role names to canonical role names for interpreted text.
+"""
diff --git a/docutils/parsers/rst/languages/es.py b/docutils/parsers/rst/languages/es.py
new file mode 100644
index 000000000..8e86bae37
--- /dev/null
+++ b/docutils/parsers/rst/languages/es.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+# Author: Marcelo Huerta San Martín
+# Contact: richieadler@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+Spanish-language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+directives = {
+ u'atenci\u00f3n': 'attention',
+ u'atencion': 'attention',
+ u'precauci\u00f3n': 'caution',
+ u'precaucion': 'caution',
+ u'peligro': 'danger',
+ u'error': 'error',
+ u'sugerencia': 'hint',
+ u'importante': 'important',
+ u'nota': 'note',
+ u'consejo': 'tip',
+ u'advertencia': 'warning',
+ u'exhortacion': 'admonition',
+ u'exhortaci\u00f3n': 'admonition',
+ u'nota-al-margen': 'sidebar',
+ u'tema': 'topic',
+ u'bloque-de-lineas': 'line-block',
+ u'bloque-de-l\u00edneas': 'line-block',
+ u'literal-evaluado': 'parsed-literal',
+ u'firma': 'rubric',
+ u'ep\u00edgrafe': 'epigraph',
+ u'epigrafe': 'epigraph',
+ u'destacado': 'highlights',
+ u'cita-destacada': 'pull-quote',
+ u'combinacion': 'compound',
+ u'combinaci\u00f3n': 'compound',
+ u'contenedor': 'container',
+ #'questions': 'questions',
+ #'qa': 'questions',
+ #'faq': 'questions',
+ u'tabla': 'table',
+ u'tabla-vsc': 'csv-table',
+ u'tabla-csv': 'csv-table',
+ u'tabla-lista': 'list-table',
+ u'meta': 'meta',
+ #'imagemap': 'imagemap',
+ u'imagen': 'image',
+ u'figura': 'figure',
+ u'incluir': 'include',
+ u'sin-analisis': 'raw',
+ u'sin-an\u00e1lisis': 'raw',
+ u'reemplazar': 'replace',
+ u'unicode': 'unicode',
+ u'fecha': 'date',
+ u'clase': 'class',
+ u'rol': 'role',
+ u'rol-por-omision': 'default-role',
+ u'rol-por-omisi\u00f3n': 'default-role',
+ u'titulo': 'title',
+ u't\u00edtulo': 'title',
+ u'contenido': 'contents',
+ u'numseccion': 'sectnum',
+ u'numsecci\u00f3n': 'sectnum',
+ u'numeracion-seccion': 'sectnum',
+ u'numeraci\u00f3n-secci\u00f3n': 'sectnum',
+ u'notas-destino': 'target-notes',
+ u'cabecera': 'header',
+ u'pie': 'footer',
+ #'footnotes': 'footnotes',
+ #'citations': 'citations',
+ u'restructuredtext-test-directive': 'restructuredtext-test-directive'}
+"""Spanish name to registered (in directives/__init__.py) directive name
+mapping."""
+
+roles = {
+ u'abreviatura': 'abbreviation',
+ u'ab': 'abbreviation',
+ u'acronimo': 'acronym',
+ u'acronimo': 'acronym',
+ u'ac': 'acronym',
+ u'indice': 'index',
+ u'i': 'index',
+ u'subindice': 'subscript',
+ u'sub\u00edndice': 'subscript',
+ u'superindice': 'superscript',
+ u'super\u00edndice': 'superscript',
+ u'referencia-titulo': 'title-reference',
+ u'titulo': 'title-reference',
+ u't': 'title-reference',
+ u'referencia-pep': 'pep-reference',
+ u'pep': 'pep-reference',
+ u'referencia-rfc': 'rfc-reference',
+ u'rfc': 'rfc-reference',
+ u'enfasis': 'emphasis',
+ u'\u00e9nfasis': 'emphasis',
+ u'destacado': 'strong',
+ u'literal': 'literal', # "literal" is also a word in Spanish :-)
+ u'referencia-con-nombre': 'named-reference',
+ u'referencia-anonima': 'anonymous-reference',
+ u'referencia-an\u00f3nima': 'anonymous-reference',
+ u'referencia-nota-al-pie': 'footnote-reference',
+ u'referencia-cita': 'citation-reference',
+ u'referencia-sustitucion': 'substitution-reference',
+ u'referencia-sustituci\u00f3n': 'substitution-reference',
+ u'destino': 'target',
+ u'referencia-uri': 'uri-reference',
+ u'uri': 'uri-reference',
+ u'url': 'uri-reference',
+ u'sin-analisis': 'raw',
+ u'sin-an\u00e1lisis': 'raw',
+}
+"""Mapping of Spanish role names to canonical role names for interpreted text.
+"""
diff --git a/docutils/parsers/rst/languages/fi.py b/docutils/parsers/rst/languages/fi.py
new file mode 100644
index 000000000..8f92a67ad
--- /dev/null
+++ b/docutils/parsers/rst/languages/fi.py
@@ -0,0 +1,95 @@
+# Author: Asko Soukka
+# Contact: asko.soukka@iki.fi
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+Finnish-language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+directives = {
+ # language-dependent: fixed
+ u'huomio': u'attention',
+ u'varo': u'caution',
+ u'vaara': u'danger',
+ u'virhe': u'error',
+ u'vihje': u'hint',
+ u't\u00e4rke\u00e4\u00e4': u'important',
+ u'huomautus': u'note',
+ u'neuvo': u'tip',
+ u'varoitus': u'warning',
+ u'kehotus': u'admonition',
+ u'sivupalkki': u'sidebar',
+ u'aihe': u'topic',
+ u'rivi': u'line-block',
+ u'tasalevyinen': u'parsed-literal',
+ u'ohje': u'rubric',
+ u'epigraafi': u'epigraph',
+ u'kohokohdat': u'highlights',
+ u'lainaus': u'pull-quote',
+ u'taulukko': u'table',
+ u'csv-taulukko': u'csv-table',
+ u'list-table (translation required)': 'list-table',
+ u'compound (translation required)': 'compound',
+ u'container (translation required)': 'container',
+ #u'kysymykset': u'questions',
+ u'meta': u'meta',
+ #u'kuvakartta': u'imagemap',
+ u'kuva': u'image',
+ u'kaavio': u'figure',
+ u'sis\u00e4llyt\u00e4': u'include',
+ u'raaka': u'raw',
+ u'korvaa': u'replace',
+ u'unicode': u'unicode',
+ u'p\u00e4iv\u00e4ys': u'date',
+ u'luokka': u'class',
+ u'rooli': u'role',
+ u'default-role (translation required)': 'default-role',
+ u'title (translation required)': 'title',
+ u'sis\u00e4llys': u'contents',
+ u'kappale': u'sectnum',
+ u'header (translation required)': 'header',
+ u'footer (translation required)': 'footer',
+ #u'alaviitteet': u'footnotes',
+ #u'viitaukset': u'citations',
+ u'target-notes (translation required)': u'target-notes'}
+"""Finnish name to registered (in directives/__init__.py) directive name
+mapping."""
+
+roles = {
+ # language-dependent: fixed
+ u'lyhennys': u'abbreviation',
+ u'akronyymi': u'acronym',
+ u'kirjainsana': u'acronym',
+ u'hakemisto': u'index',
+ u'luettelo': u'index',
+ u'alaindeksi': u'subscript',
+ u'indeksi': u'subscript',
+ u'yl\u00e4indeksi': u'superscript',
+ u'title-reference (translation required)': u'title-reference',
+ u'title (translation required)': u'title-reference',
+ u'pep-reference (translation required)': u'pep-reference',
+ u'rfc-reference (translation required)': u'rfc-reference',
+ u'korostus': u'emphasis',
+ u'vahvistus': u'strong',
+ u'tasalevyinen': u'literal',
+ u'named-reference (translation required)': u'named-reference',
+ u'anonymous-reference (translation required)': u'anonymous-reference',
+ u'footnote-reference (translation required)': u'footnote-reference',
+ u'citation-reference (translation required)': u'citation-reference',
+ u'substitution-reference (translation required)': u'substitution-reference',
+ u'kohde': u'target',
+ u'uri-reference (translation required)': u'uri-reference',
+ u'raw (translation required)': 'raw',}
+"""Mapping of Finnish role names to canonical role names for interpreted text.
+"""
diff --git a/docutils/parsers/rst/languages/fr.py b/docutils/parsers/rst/languages/fr.py
new file mode 100644
index 000000000..7520f743b
--- /dev/null
+++ b/docutils/parsers/rst/languages/fr.py
@@ -0,0 +1,101 @@
+# Authors: David Goodger; William Dode
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+French-language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+directives = {
+ u'attention': 'attention',
+ u'pr\u00E9caution': 'caution',
+ u'danger': 'danger',
+ u'erreur': 'error',
+ u'conseil': 'hint',
+ u'important': 'important',
+ u'note': 'note',
+ u'astuce': 'tip',
+ u'avertissement': 'warning',
+ u'admonition': 'admonition',
+ u'encadr\u00E9': 'sidebar',
+ u'sujet': 'topic',
+ u'bloc-textuel': 'line-block',
+ u'bloc-interpr\u00E9t\u00E9': 'parsed-literal',
+ u'code-interpr\u00E9t\u00E9': 'parsed-literal',
+ u'intertitre': 'rubric',
+ u'exergue': 'epigraph',
+ u'\u00E9pigraphe': 'epigraph',
+ u'chapeau': 'highlights',
+ u'accroche': 'pull-quote',
+ u'compound (translation required)': 'compound',
+ u'container (translation required)': 'container',
+ #u'questions': 'questions',
+ #u'qr': 'questions',
+ #u'faq': 'questions',
+ u'tableau': 'table',
+ u'csv-table (translation required)': 'csv-table',
+ u'list-table (translation required)': 'list-table',
+ u'm\u00E9ta': 'meta',
+ #u'imagemap (translation required)': 'imagemap',
+ u'image': 'image',
+ u'figure': 'figure',
+ u'inclure': 'include',
+ u'brut': 'raw',
+ u'remplacer': 'replace',
+ u'remplace': 'replace',
+ u'unicode': 'unicode',
+ u'date': 'date',
+ u'classe': 'class',
+ u'role (translation required)': 'role',
+ u'default-role (translation required)': 'default-role',
+ u'titre (translation required)': 'title',
+ u'sommaire': 'contents',
+ u'table-des-mati\u00E8res': 'contents',
+ u'sectnum': 'sectnum',
+ u'section-num\u00E9rot\u00E9e': 'sectnum',
+ u'liens': 'target-notes',
+ u'header (translation required)': 'header',
+ u'footer (translation required)': 'footer',
+ #u'footnotes (translation required)': 'footnotes',
+ #u'citations (translation required)': 'citations',
+ }
+"""French name to registered (in directives/__init__.py) directive name
+mapping."""
+
+roles = {
+ u'abr\u00E9viation': 'abbreviation',
+ u'acronyme': 'acronym',
+ u'sigle': 'acronym',
+ u'index': 'index',
+ u'indice': 'subscript',
+ u'ind': 'subscript',
+ u'exposant': 'superscript',
+ u'exp': 'superscript',
+ u'titre-r\u00E9f\u00E9rence': 'title-reference',
+ u'titre': 'title-reference',
+ u'pep-r\u00E9f\u00E9rence': 'pep-reference',
+ u'rfc-r\u00E9f\u00E9rence': 'rfc-reference',
+ u'emphase': 'emphasis',
+ u'fort': 'strong',
+ u'litt\u00E9ral': 'literal',
+ u'nomm\u00E9e-r\u00E9f\u00E9rence': 'named-reference',
+ u'anonyme-r\u00E9f\u00E9rence': 'anonymous-reference',
+ u'note-r\u00E9f\u00E9rence': 'footnote-reference',
+ u'citation-r\u00E9f\u00E9rence': 'citation-reference',
+ u'substitution-r\u00E9f\u00E9rence': 'substitution-reference',
+ u'lien': 'target',
+ u'uri-r\u00E9f\u00E9rence': 'uri-reference',
+ u'brut': 'raw',}
+"""Mapping of French role names to canonical role names for interpreted text.
+"""
diff --git a/docutils/parsers/rst/languages/it.py b/docutils/parsers/rst/languages/it.py
new file mode 100644
index 000000000..cc1701941
--- /dev/null
+++ b/docutils/parsers/rst/languages/it.py
@@ -0,0 +1,94 @@
+# Author: Nicola Larosa, Lele Gaifax
+# Contact: docutils@tekNico.net, lele@seldati.it
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# Beware: the italian translation of the reStructuredText documentation
+# at http://docit.bice.dyndns.org/static/ReST, in particular
+# http://docit.bice.dyndns.org/static/ReST/ref/rst/directives.html, needs
+# to be synced with the content of this file.
+
+"""
+Italian-language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+directives = {
+ 'attenzione': 'attention',
+ 'cautela': 'caution',
+ 'pericolo': 'danger',
+ 'errore': 'error',
+ 'suggerimento': 'hint',
+ 'importante': 'important',
+ 'nota': 'note',
+ 'consiglio': 'tip',
+ 'avvertenza': 'warning',
+ 'ammonizione': 'admonition',
+ 'riquadro': 'sidebar',
+ 'argomento': 'topic',
+ 'blocco-di-righe': 'line-block',
+ 'blocco-interpretato': 'parsed-literal',
+ 'rubrica': 'rubric',
+ 'epigrafe': 'epigraph',
+ 'punti-salienti': 'highlights',
+ 'estratto-evidenziato': 'pull-quote',
+ 'composito': 'compound',
+ u'container (translation required)': 'container',
+ #'questions': 'questions',
+ #'qa': 'questions',
+ #'faq': 'questions',
+ 'tabella': 'table',
+ 'tabella-csv': 'csv-table',
+ 'tabella-elenco': 'list-table',
+ 'meta': 'meta',
+ #'imagemap': 'imagemap',
+ 'immagine': 'image',
+ 'figura': 'figure',
+ 'includi': 'include',
+ 'grezzo': 'raw',
+ 'sostituisci': 'replace',
+ 'unicode': 'unicode',
+ 'data': 'date',
+ 'classe': 'class',
+ 'ruolo': 'role',
+ 'ruolo-predefinito': 'default-role',
+ 'titolo': 'title',
+ 'indice': 'contents',
+ 'contenuti': 'contents',
+ 'seznum': 'sectnum',
+ 'sezioni-autonumerate': 'sectnum',
+ 'annota-riferimenti-esterni': 'target-notes',
+ 'intestazione': 'header',
+ 'piede-pagina': 'footer',
+ #'footnotes': 'footnotes',
+ #'citations': 'citations',
+ 'restructuredtext-test-directive': 'restructuredtext-test-directive'}
+"""Italian name to registered (in directives/__init__.py) directive name
+mapping."""
+
+roles = {
+ 'abbreviazione': 'abbreviation',
+ 'acronimo': 'acronym',
+ 'indice': 'index',
+ 'deponente': 'subscript',
+ 'esponente': 'superscript',
+ 'riferimento-titolo': 'title-reference',
+ 'riferimento-pep': 'pep-reference',
+ 'riferimento-rfc': 'rfc-reference',
+ 'enfasi': 'emphasis',
+ 'forte': 'strong',
+ 'letterale': 'literal',
+ 'riferimento-con-nome': 'named-reference',
+ 'riferimento-anonimo': 'anonymous-reference',
+ 'riferimento-nota': 'footnote-reference',
+ 'riferimento-citazione': 'citation-reference',
+ 'riferimento-sostituzione': 'substitution-reference',
+ 'destinazione': 'target',
+ 'riferimento-uri': 'uri-reference',
+ 'grezzo': 'raw',}
+"""Mapping of Italian role names to canonical role names for interpreted text.
+"""
diff --git a/docutils/parsers/rst/languages/ja.py b/docutils/parsers/rst/languages/ja.py
new file mode 100644
index 000000000..0bb701d85
--- /dev/null
+++ b/docutils/parsers/rst/languages/ja.py
@@ -0,0 +1,117 @@
+# -*- coding: utf-8 -*-
+# Author: David Goodger
+# Contact: goodger@python.org
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+Japanese-language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+# Corrections to these translations are welcome!
+# 間違いがあれば、どうぞ正しい翻訳を教えて下さい。
+
+directives = {
+ # language-dependent: fixed
+ u'注目': 'attention',
+ u'注意': 'caution',
+ u'危険': 'danger',
+ u'エラー': 'error',
+ u'ヒント': 'hint',
+ u'重要': 'important',
+ u'備考': 'note',
+ u'通報': 'tip',
+ u'警告': 'warning',
+ u'戒告': 'admonition',
+ u'サイドバー': 'sidebar',
+ u'トピック': 'topic',
+ u'ラインブロック': 'line-block',
+ u'パーズドリテラル': 'parsed-literal',
+ u'ルブリック': 'rubric',
+ u'エピグラフ': 'epigraph',
+ u'題言': 'epigraph',
+ u'ハイライト': 'highlights',
+ u'見所': 'highlights',
+ u'プルクオート': 'pull-quote',
+ u'合成': 'compound',
+ u'コンテナー': 'container',
+ u'容器': 'container',
+ u'表': 'table',
+ u'csv表': 'csv-table',
+ u'リスト表': 'list-table',
+ #u'質問': 'questions',
+ #u'問答': 'questions',
+ #u'faq': 'questions',
+ u'メタ': 'meta',
+ #u'イメージマプ': 'imagemap',
+ u'イメージ': 'image',
+ u'画像': 'image',
+ u'フィグア': 'figure',
+ u'図版': 'figure',
+ u'インクルード': 'include',
+ u'含む': 'include',
+ u'組み込み': 'include',
+ u'生': 'raw',
+ u'原': 'raw',
+ u'換える': 'replace',
+ u'取り換える': 'replace',
+ u'掛け替える': 'replace',
+ u'ユニコード': 'unicode',
+ u'日付': 'date',
+ u'クラス': 'class',
+ u'ロール': 'role',
+ u'役': 'role',
+ u'ディフォルトロール': 'default-role',
+ u'既定役': 'default-role',
+ u'タイトル': 'title',
+ u'題': 'title', # 題名 件名
+ u'目次': 'contents',
+ u'節数': 'sectnum',
+ u'ヘッダ': 'header',
+ u'フッタ': 'footer',
+ #u'脚注': 'footnotes', # 脚註?
+ #u'サイテーション': 'citations',   # 出典 引証 引用
+ u'ターゲットノート': 'target-notes', # 的注 的脚注
+ }
+"""Japanese name to registered (in directives/__init__.py) directive name
+mapping."""
+
+roles = {
+ # language-dependent: fixed
+ u'略': 'abbreviation',
+ u'頭字語': 'acronym',
+ u'インデックス': 'index',
+ u'索引': 'index',
+ u'添字': 'subscript',
+ u'下付': 'subscript',
+ u'下': 'subscript',
+ u'上付': 'superscript',
+ u'上': 'superscript',
+ u'題参照': 'title-reference',
+ u'pep参照': 'pep-reference',
+ u'rfc参照': 'rfc-reference',
+ u'強調': 'emphasis',
+ u'強い': 'strong',
+ u'リテラル': 'literal',
+ u'整形済み': 'literal',
+ u'名付参照': 'named-reference',
+ u'無名参照': 'anonymous-reference',
+ u'脚注参照': 'footnote-reference',
+ u'出典参照': 'citation-reference',
+ u'代入参照': 'substitution-reference',
+ u'的': 'target',
+ u'uri参照': 'uri-reference',
+ u'uri': 'uri-reference',
+ u'url': 'uri-reference',
+ u'生': 'raw',}
+"""Mapping of Japanese role names to canonical role names for interpreted
+text."""
diff --git a/docutils/parsers/rst/languages/nl.py b/docutils/parsers/rst/languages/nl.py
new file mode 100644
index 000000000..baaf2ae70
--- /dev/null
+++ b/docutils/parsers/rst/languages/nl.py
@@ -0,0 +1,110 @@
+# Author: Martijn Pieters
+# Contact: mjpieters@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+Dutch-language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+directives = {
+ # language-dependent: fixed
+ 'attentie': 'attention',
+ 'let-op': 'caution',
+ 'gevaar': 'danger',
+ 'fout': 'error',
+ 'hint': 'hint',
+ 'belangrijk': 'important',
+ 'opmerking': 'note',
+ 'tip': 'tip',
+ 'waarschuwing': 'warning',
+ 'aanmaning': 'admonition',
+ 'katern': 'sidebar',
+ 'onderwerp': 'topic',
+ 'lijn-blok': 'line-block',
+ 'letterlijk-ontleed': 'parsed-literal',
+ 'rubriek': 'rubric',
+ 'opschrift': 'epigraph',
+ 'hoogtepunten': 'highlights',
+ 'pull-quote': 'pull-quote', # Dutch printers use the english term
+ 'samenstelling': 'compound',
+ 'verbinding': 'compound',
+ u'container (translation required)': 'container',
+ #'vragen': 'questions',
+ 'tabel': 'table',
+ 'csv-tabel': 'csv-table',
+ 'lijst-tabel': 'list-table',
+ #'veelgestelde-vragen': 'questions',
+ 'meta': 'meta',
+ #'imagemap': 'imagemap',
+ 'beeld': 'image',
+ 'figuur': 'figure',
+ 'opnemen': 'include',
+ 'onbewerkt': 'raw',
+ 'vervang': 'replace',
+ 'vervanging': 'replace',
+ 'unicode': 'unicode',
+ 'datum': 'date',
+ 'klasse': 'class',
+ 'rol': 'role',
+ u'default-role (translation required)': 'default-role',
+ 'title (translation required)': 'title',
+ 'inhoud': 'contents',
+ 'sectnum': 'sectnum',
+ 'sectie-nummering': 'sectnum',
+ 'hoofdstuk-nummering': 'sectnum',
+ u'header (translation required)': 'header',
+ u'footer (translation required)': 'footer',
+ #'voetnoten': 'footnotes',
+ #'citaten': 'citations',
+ 'verwijzing-voetnoten': 'target-notes',
+ 'restructuredtext-test-instructie': 'restructuredtext-test-directive'}
+"""Dutch name to registered (in directives/__init__.py) directive name
+mapping."""
+
+roles = {
+ # language-dependent: fixed
+ 'afkorting': 'abbreviation',
+ # 'ab': 'abbreviation',
+ 'acroniem': 'acronym',
+ 'ac': 'acronym',
+ 'index': 'index',
+ 'i': 'index',
+ 'inferieur': 'subscript',
+ 'inf': 'subscript',
+ 'superieur': 'superscript',
+ 'sup': 'superscript',
+ 'titel-referentie': 'title-reference',
+ 'titel': 'title-reference',
+ 't': 'title-reference',
+ 'pep-referentie': 'pep-reference',
+ 'pep': 'pep-reference',
+ 'rfc-referentie': 'rfc-reference',
+ 'rfc': 'rfc-reference',
+ 'nadruk': 'emphasis',
+ 'extra': 'strong',
+ 'extra-nadruk': 'strong',
+ 'vet': 'strong',
+ 'letterlijk': 'literal',
+ 'benoemde-referentie': 'named-reference',
+ 'anonieme-referentie': 'anonymous-reference',
+ 'voetnoot-referentie': 'footnote-reference',
+ 'citaat-referentie': 'citation-reference',
+ 'substitie-reference': 'substitution-reference',
+ 'verwijzing': 'target',
+ 'uri-referentie': 'uri-reference',
+ 'uri': 'uri-reference',
+ 'url': 'uri-reference',
+ 'onbewerkt': 'raw',}
+"""Mapping of Dutch role names to canonical role names for interpreted text.
+"""
diff --git a/docutils/parsers/rst/languages/pt_br.py b/docutils/parsers/rst/languages/pt_br.py
new file mode 100644
index 000000000..43ebfaa3b
--- /dev/null
+++ b/docutils/parsers/rst/languages/pt_br.py
@@ -0,0 +1,106 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+Brazilian Portuguese-language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+directives = {
+ # language-dependent: fixed
+ u'aten\u00E7\u00E3o': 'attention',
+ 'cuidado': 'caution',
+ 'perigo': 'danger',
+ 'erro': 'error',
+ u'sugest\u00E3o': 'hint',
+ 'importante': 'important',
+ 'nota': 'note',
+ 'dica': 'tip',
+ 'aviso': 'warning',
+ u'exorta\u00E7\u00E3o': 'admonition',
+ 'barra-lateral': 'sidebar',
+ u't\u00F3pico': 'topic',
+ 'bloco-de-linhas': 'line-block',
+ 'literal-interpretado': 'parsed-literal',
+ 'rubrica': 'rubric',
+ u'ep\u00EDgrafo': 'epigraph',
+ 'destaques': 'highlights',
+ u'cita\u00E7\u00E3o-destacada': 'pull-quote',
+ u'compound (translation required)': 'compound',
+ u'container (translation required)': 'container',
+ #'perguntas': 'questions',
+ #'qa': 'questions',
+ #'faq': 'questions',
+ u'table (translation required)': 'table',
+ u'csv-table (translation required)': 'csv-table',
+ u'list-table (translation required)': 'list-table',
+ 'meta': 'meta',
+ #'imagemap': 'imagemap',
+ 'imagem': 'image',
+ 'figura': 'figure',
+ u'inclus\u00E3o': 'include',
+ 'cru': 'raw',
+ u'substitui\u00E7\u00E3o': 'replace',
+ 'unicode': 'unicode',
+ 'data': 'date',
+ 'classe': 'class',
+ 'role (translation required)': 'role',
+ u'default-role (translation required)': 'default-role',
+ u'title (translation required)': 'title',
+ u'\u00EDndice': 'contents',
+ 'numsec': 'sectnum',
+ u'numera\u00E7\u00E3o-de-se\u00E7\u00F5es': 'sectnum',
+ u'header (translation required)': 'header',
+ u'footer (translation required)': 'footer',
+ #u'notas-de-rorap\u00E9': 'footnotes',
+ #u'cita\u00E7\u00F5es': 'citations',
+ u'links-no-rodap\u00E9': 'target-notes',
+ 'restructuredtext-test-directive': 'restructuredtext-test-directive'}
+"""Brazilian Portuguese name to registered (in directives/__init__.py)
+directive name mapping."""
+
+roles = {
+ # language-dependent: fixed
+ u'abbrevia\u00E7\u00E3o': 'abbreviation',
+ 'ab': 'abbreviation',
+ u'acr\u00F4nimo': 'acronym',
+ 'ac': 'acronym',
+ u'\u00EDndice-remissivo': 'index',
+ 'i': 'index',
+ 'subscrito': 'subscript',
+ 'sub': 'subscript',
+ 'sobrescrito': 'superscript',
+ 'sob': 'superscript',
+ u'refer\u00EAncia-a-t\u00EDtulo': 'title-reference',
+ u't\u00EDtulo': 'title-reference',
+ 't': 'title-reference',
+ u'refer\u00EAncia-a-pep': 'pep-reference',
+ 'pep': 'pep-reference',
+ u'refer\u00EAncia-a-rfc': 'rfc-reference',
+ 'rfc': 'rfc-reference',
+ u'\u00EAnfase': 'emphasis',
+ 'forte': 'strong',
+ 'literal': 'literal', # translation required?
+ u'refer\u00EAncia-por-nome': 'named-reference',
+ u'refer\u00EAncia-an\u00F4nima': 'anonymous-reference',
+ u'refer\u00EAncia-a-nota-de-rodap\u00E9': 'footnote-reference',
+ u'refer\u00EAncia-a-cita\u00E7\u00E3o': 'citation-reference',
+ u'refer\u00EAncia-a-substitui\u00E7\u00E3o': 'substitution-reference',
+ 'alvo': 'target',
+ u'refer\u00EAncia-a-uri': 'uri-reference',
+ 'uri': 'uri-reference',
+ 'url': 'uri-reference',
+ 'cru': 'raw',}
+"""Mapping of Brazilian Portuguese role names to canonical role names
+for interpreted text."""
diff --git a/docutils/parsers/rst/languages/ru.py b/docutils/parsers/rst/languages/ru.py
new file mode 100644
index 000000000..18354f738
--- /dev/null
+++ b/docutils/parsers/rst/languages/ru.py
@@ -0,0 +1,105 @@
+# Author: Roman Suzi
+# Contact: rnd@onego.ru
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+Russian-language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+directives = {
+ u'\u0431\u043b\u043e\u043a-\u0441\u0442\u0440\u043e\u043a': u'line-block',
+ u'meta': u'meta',
+ u'\u043e\u0431\u0440\u0430\u0431\u043e\u0442\u0430\u043d\u043d\u044b\u0439-\u043b\u0438\u0442\u0435\u0440\u0430\u043b':
+ u'parsed-literal',
+ u'\u0432\u044b\u0434\u0435\u043b\u0435\u043d\u043d\u0430\u044f-\u0446\u0438\u0442\u0430\u0442\u0430':
+ u'pull-quote',
+ u'compound (translation required)': 'compound',
+ u'container (translation required)': 'container',
+ u'table (translation required)': 'table',
+ u'csv-table (translation required)': 'csv-table',
+ u'list-table (translation required)': 'list-table',
+ u'\u0441\u044b\u0440\u043e\u0439': u'raw',
+ u'\u0437\u0430\u043c\u0435\u043d\u0430': u'replace',
+ u'\u0442\u0435\u0441\u0442\u043e\u0432\u0430\u044f-\u0434\u0438\u0440\u0435\u043a\u0442\u0438\u0432\u0430-restructuredtext':
+ u'restructuredtext-test-directive',
+ u'\u0446\u0435\u043b\u0435\u0432\u044b\u0435-\u0441\u043d\u043e\u0441\u043a\u0438':
+ u'target-notes',
+ u'unicode': u'unicode',
+ u'\u0434\u0430\u0442\u0430': u'date',
+ u'\u0431\u043e\u043a\u043e\u0432\u0430\u044f-\u043f\u043e\u043b\u043e\u0441\u0430':
+ u'sidebar',
+ u'\u0432\u0430\u0436\u043d\u043e': u'important',
+ u'\u0432\u043a\u043b\u044e\u0447\u0430\u0442\u044c': u'include',
+ u'\u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435': u'attention',
+ u'\u0432\u044b\u0434\u0435\u043b\u0435\u043d\u0438\u0435': u'highlights',
+ u'\u0437\u0430\u043c\u0435\u0447\u0430\u043d\u0438\u0435': u'admonition',
+ u'\u0438\u0437\u043e\u0431\u0440\u0430\u0436\u0435\u043d\u0438\u0435':
+ u'image',
+ u'\u043a\u043b\u0430\u0441\u0441': u'class',
+ u'role (translation required)': 'role',
+ u'default-role (translation required)': 'default-role',
+ u'title (translation required)': 'title',
+ u'\u043d\u043e\u043c\u0435\u0440-\u0440\u0430\u0437\u0434\u0435\u043b\u0430':
+ u'sectnum',
+ u'\u043d\u0443\u043c\u0435\u0440\u0430\u0446\u0438\u044f-\u0440\u0430\u0437'
+ u'\u0434\u0435\u043b\u043e\u0432': u'sectnum',
+ u'\u043e\u043f\u0430\u0441\u043d\u043e': u'danger',
+ u'\u043e\u0441\u0442\u043e\u0440\u043e\u0436\u043d\u043e': u'caution',
+ u'\u043e\u0448\u0438\u0431\u043a\u0430': u'error',
+ u'\u043f\u043e\u0434\u0441\u043a\u0430\u0437\u043a\u0430': u'tip',
+ u'\u043f\u0440\u0435\u0434\u0443\u043f\u0440\u0435\u0436\u0434\u0435\u043d'
+ u'\u0438\u0435': u'warning',
+ u'\u043f\u0440\u0438\u043c\u0435\u0447\u0430\u043d\u0438\u0435': u'note',
+ u'\u0440\u0438\u0441\u0443\u043d\u043e\u043a': u'figure',
+ u'\u0440\u0443\u0431\u0440\u0438\u043a\u0430': u'rubric',
+ u'\u0441\u043e\u0432\u0435\u0442': u'hint',
+ u'\u0441\u043e\u0434\u0435\u0440\u0436\u0430\u043d\u0438\u0435': u'contents',
+ u'\u0442\u0435\u043c\u0430': u'topic',
+ u'\u044d\u043f\u0438\u0433\u0440\u0430\u0444': u'epigraph',
+ u'header (translation required)': 'header',
+ u'footer (translation required)': 'footer',}
+"""Russian name to registered (in directives/__init__.py) directive name
+mapping."""
+
+roles = {
+ u'\u0430\u043a\u0440\u043e\u043d\u0438\u043c': 'acronym',
+ u'\u0430\u043d\u043e\u043d\u0438\u043c\u043d\u0430\u044f-\u0441\u0441\u044b\u043b\u043a\u0430':
+ 'anonymous-reference',
+ u'\u0431\u0443\u043a\u0432\u0430\u043b\u044c\u043d\u043e': 'literal',
+ u'\u0432\u0435\u0440\u0445\u043d\u0438\u0439-\u0438\u043d\u0434\u0435\u043a\u0441':
+ 'superscript',
+ u'\u0432\u044b\u0434\u0435\u043b\u0435\u043d\u0438\u0435': 'emphasis',
+ u'\u0438\u043c\u0435\u043d\u043e\u0432\u0430\u043d\u043d\u0430\u044f-\u0441\u0441\u044b\u043b\u043a\u0430':
+ 'named-reference',
+ u'\u0438\u043d\u0434\u0435\u043a\u0441': 'index',
+ u'\u043d\u0438\u0436\u043d\u0438\u0439-\u0438\u043d\u0434\u0435\u043a\u0441':
+ 'subscript',
+ u'\u0441\u0438\u043b\u044c\u043d\u043e\u0435-\u0432\u044b\u0434\u0435\u043b\u0435\u043d\u0438\u0435':
+ 'strong',
+ u'\u0441\u043e\u043a\u0440\u0430\u0449\u0435\u043d\u0438\u0435':
+ 'abbreviation',
+ u'\u0441\u0441\u044b\u043b\u043a\u0430-\u0437\u0430\u043c\u0435\u043d\u0430':
+ 'substitution-reference',
+ u'\u0441\u0441\u044b\u043b\u043a\u0430-\u043d\u0430-pep': 'pep-reference',
+ u'\u0441\u0441\u044b\u043b\u043a\u0430-\u043d\u0430-rfc': 'rfc-reference',
+ u'\u0441\u0441\u044b\u043b\u043a\u0430-\u043d\u0430-uri': 'uri-reference',
+ u'\u0441\u0441\u044b\u043b\u043a\u0430-\u043d\u0430-\u0437\u0430\u0433\u043b\u0430\u0432\u0438\u0435':
+ 'title-reference',
+ u'\u0441\u0441\u044b\u043b\u043a\u0430-\u043d\u0430-\u0441\u043d\u043e\u0441\u043a\u0443':
+ 'footnote-reference',
+ u'\u0446\u0438\u0442\u0430\u0442\u043d\u0430\u044f-\u0441\u0441\u044b\u043b\u043a\u0430':
+ 'citation-reference',
+ u'\u0446\u0435\u043b\u044c': 'target',
+ u'raw (translation required)': 'raw',}
+"""Mapping of Russian role names to canonical role names for interpreted text.
+"""
diff --git a/docutils/parsers/rst/languages/sk.py b/docutils/parsers/rst/languages/sk.py
new file mode 100644
index 000000000..513c432f4
--- /dev/null
+++ b/docutils/parsers/rst/languages/sk.py
@@ -0,0 +1,93 @@
+# Author: Miroslav Vasko
+# Contact: zemiak@zoznam.sk
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+Slovak-language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+directives = {
+ u'pozor': 'attention',
+ u'opatrne': 'caution',
+ u'nebezpe\xe8enstvo': 'danger',
+ u'chyba': 'error',
+ u'rada': 'hint',
+ u'd\xf4le\x9eit\xe9': 'important',
+ u'pozn\xe1mka': 'note',
+ u'tip (translation required)': 'tip',
+ u'varovanie': 'warning',
+ u'admonition (translation required)': 'admonition',
+ u'sidebar (translation required)': 'sidebar',
+ u't\xe9ma': 'topic',
+ u'blok-riadkov': 'line-block',
+ u'parsed-literal': 'parsed-literal',
+ u'rubric (translation required)': 'rubric',
+ u'epigraph (translation required)': 'epigraph',
+ u'highlights (translation required)': 'highlights',
+ u'pull-quote (translation required)': 'pull-quote',
+ u'compound (translation required)': 'compound',
+ u'container (translation required)': 'container',
+ #u'questions': 'questions',
+ #u'qa': 'questions',
+ #u'faq': 'questions',
+ u'table (translation required)': 'table',
+ u'csv-table (translation required)': 'csv-table',
+ u'list-table (translation required)': 'list-table',
+ u'meta': 'meta',
+ #u'imagemap': 'imagemap',
+ u'obr\xe1zok': 'image',
+ u'tvar': 'figure',
+ u'vlo\x9ei\x9d': 'include',
+ u'raw (translation required)': 'raw',
+ u'nahradi\x9d': 'replace',
+ u'unicode': 'unicode',
+ u'd\u00E1tum': 'date',
+ u'class (translation required)': 'class',
+ u'role (translation required)': 'role',
+ u'default-role (translation required)': 'default-role',
+ u'title (translation required)': 'title',
+ u'obsah': 'contents',
+ u'\xe8as\x9d': 'sectnum',
+ u'\xe8as\x9d-\xe8\xedslovanie': 'sectnum',
+ u'cie\xbeov\xe9-pozn\xe1mky': 'target-notes',
+ u'header (translation required)': 'header',
+ u'footer (translation required)': 'footer',
+ #u'footnotes': 'footnotes',
+ #u'citations': 'citations',
+ }
+"""Slovak name to registered (in directives/__init__.py) directive name
+mapping."""
+
+roles = {
+ u'abbreviation (translation required)': 'abbreviation',
+ u'acronym (translation required)': 'acronym',
+ u'index (translation required)': 'index',
+ u'subscript (translation required)': 'subscript',
+ u'superscript (translation required)': 'superscript',
+ u'title-reference (translation required)': 'title-reference',
+ u'pep-reference (translation required)': 'pep-reference',
+ u'rfc-reference (translation required)': 'rfc-reference',
+ u'emphasis (translation required)': 'emphasis',
+ u'strong (translation required)': 'strong',
+ u'literal (translation required)': 'literal',
+ u'named-reference (translation required)': 'named-reference',
+ u'anonymous-reference (translation required)': 'anonymous-reference',
+ u'footnote-reference (translation required)': 'footnote-reference',
+ u'citation-reference (translation required)': 'citation-reference',
+ u'substitution-reference (translation required)': 'substitution-reference',
+ u'target (translation required)': 'target',
+ u'uri-reference (translation required)': 'uri-reference',
+ u'raw (translation required)': 'raw',}
+"""Mapping of Slovak role names to canonical role names for interpreted text.
+"""
diff --git a/docutils/parsers/rst/languages/sv.py b/docutils/parsers/rst/languages/sv.py
new file mode 100644
index 000000000..8ffe76e50
--- /dev/null
+++ b/docutils/parsers/rst/languages/sv.py
@@ -0,0 +1,92 @@
+# Author: Adam Chodorowski
+# Contact: chodorowski@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+Swedish language mappings for language-dependent features of reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+directives = {
+ u'observera': 'attention',
+ u'caution (translation required)': 'caution',
+ u'fara': 'danger',
+ u'fel': 'error',
+ u'v\u00e4gledning': 'hint',
+ u'viktigt': 'important',
+ u'notera': 'note',
+ u'tips': 'tip',
+ u'varning': 'warning',
+ u'admonition (translation required)': 'admonition',
+ u'sidebar (translation required)': 'sidebar',
+ u'\u00e4mne': 'topic',
+ u'line-block (translation required)': 'line-block',
+ u'parsed-literal (translation required)': 'parsed-literal',
+ u'mellanrubrik': 'rubric',
+ u'epigraph (translation required)': 'epigraph',
+ u'highlights (translation required)': 'highlights',
+ u'pull-quote (translation required)': 'pull-quote',
+ u'compound (translation required)': 'compound',
+ u'container (translation required)': 'container',
+ # u'fr\u00e5gor': 'questions',
+ # NOTE: A bit long, but recommended by http://www.nada.kth.se/dataterm/:
+ # u'fr\u00e5gor-och-svar': 'questions',
+ # u'vanliga-fr\u00e5gor': 'questions',
+ u'table (translation required)': 'table',
+ u'csv-table (translation required)': 'csv-table',
+ u'list-table (translation required)': 'list-table',
+ u'meta': 'meta',
+ # u'bildkarta': 'imagemap', # FIXME: Translation might be too literal.
+ u'bild': 'image',
+ u'figur': 'figure',
+ u'inkludera': 'include',
+ u'r\u00e5': 'raw', # FIXME: Translation might be too literal.
+ u'ers\u00e4tt': 'replace',
+ u'unicode': 'unicode',
+ u'datum': 'date',
+ u'class (translation required)': 'class',
+ u'role (translation required)': 'role',
+ u'default-role (translation required)': 'default-role',
+ u'title (translation required)': 'title',
+ u'inneh\u00e5ll': 'contents',
+ u'sektionsnumrering': 'sectnum',
+ u'target-notes (translation required)': 'target-notes',
+ u'header (translation required)': 'header',
+ u'footer (translation required)': 'footer',
+ # u'fotnoter': 'footnotes',
+ # u'citeringar': 'citations',
+ }
+"""Swedish name to registered (in directives/__init__.py) directive name
+mapping."""
+
+roles = {
+ u'abbreviation (translation required)': 'abbreviation',
+ u'acronym (translation required)': 'acronym',
+ u'index (translation required)': 'index',
+ u'subscript (translation required)': 'subscript',
+ u'superscript (translation required)': 'superscript',
+ u'title-reference (translation required)': 'title-reference',
+ u'pep-reference (translation required)': 'pep-reference',
+ u'rfc-reference (translation required)': 'rfc-reference',
+ u'emphasis (translation required)': 'emphasis',
+ u'strong (translation required)': 'strong',
+ u'literal (translation required)': 'literal',
+ u'named-reference (translation required)': 'named-reference',
+ u'anonymous-reference (translation required)': 'anonymous-reference',
+ u'footnote-reference (translation required)': 'footnote-reference',
+ u'citation-reference (translation required)': 'citation-reference',
+ u'substitution-reference (translation required)': 'substitution-reference',
+ u'target (translation required)': 'target',
+ u'uri-reference (translation required)': 'uri-reference',
+ u'r\u00e5': 'raw',}
+"""Mapping of Swedish role names to canonical role names for interpreted text.
+"""
diff --git a/docutils/parsers/rst/languages/zh_cn.py b/docutils/parsers/rst/languages/zh_cn.py
new file mode 100644
index 000000000..dee301ca6
--- /dev/null
+++ b/docutils/parsers/rst/languages/zh_cn.py
@@ -0,0 +1,102 @@
+# -*- coding: utf-8 -*-
+# Author: Panjunyong
+# Contact: panjy@zopechina.com
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+Simplified Chinese language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+directives = {
+ # language-dependent: fixed
+ u'注意': 'attention',
+ u'小心': 'caution',
+ u'危险': 'danger',
+ u'错误': 'error',
+ u'提示': 'hint',
+ u'重要': 'important',
+ u'注解': 'note',
+ u'技巧': 'tip',
+ u'警告': 'warning',
+ u'忠告': 'admonition',
+ u'侧框': 'sidebar',
+ u'主题': 'topic',
+ u'line-block (translation required)': 'line-block',
+ u'parsed-literal (translation required)': 'parsed-literal',
+ u'醒目': 'rubric',
+ u'铭文': 'epigraph',
+ u'要点': 'highlights',
+ u'pull-quote (translation required)': 'pull-quote',
+ u'复合': 'compound',
+ u'容器': 'container',
+ #u'questions (translation required)': 'questions',
+ u'表格': 'table',
+ u'csv表格': 'csv-table',
+ u'列表表格': 'list-table',
+ #u'qa (translation required)': 'questions',
+ #u'faq (translation required)': 'questions',
+ u'元数据': 'meta',
+ #u'imagemap (translation required)': 'imagemap',
+ u'图片': 'image',
+ u'图例': 'figure',
+ u'包含': 'include',
+ u'原文': 'raw',
+ u'代替': 'replace',
+ u'统一码': 'unicode',
+ u'日期': 'date',
+ u'类型': 'class',
+ u'角色': 'role',
+ u'默认角色': 'default-role',
+ u'标题': 'title',
+ u'目录': 'contents',
+ u'章节序号': 'sectnum',
+ u'题头': 'header',
+ u'页脚': 'footer',
+ #u'footnotes (translation required)': 'footnotes',
+ #u'citations (translation required)': 'citations',
+ u'target-notes (translation required)': 'target-notes',
+ u'restructuredtext-test-directive': 'restructuredtext-test-directive'}
+"""Simplified Chinese name to registered (in directives/__init__.py)
+directive name mapping."""
+
+roles = {
+ # language-dependent: fixed
+ u'缩写': 'abbreviation',
+ u'简称': 'acronym',
+ u'index (translation required)': 'index',
+ u'i (translation required)': 'index',
+ u'下标': 'subscript',
+ u'上标': 'superscript',
+ u'title-reference (translation required)': 'title-reference',
+ u'title (translation required)': 'title-reference',
+ u't (translation required)': 'title-reference',
+ u'pep-reference (translation required)': 'pep-reference',
+ u'pep (translation required)': 'pep-reference',
+ u'rfc-reference (translation required)': 'rfc-reference',
+ u'rfc (translation required)': 'rfc-reference',
+ u'强调': 'emphasis',
+ u'加粗': 'strong',
+ u'字面': 'literal',
+ u'named-reference (translation required)': 'named-reference',
+ u'anonymous-reference (translation required)': 'anonymous-reference',
+ u'footnote-reference (translation required)': 'footnote-reference',
+ u'citation-reference (translation required)': 'citation-reference',
+ u'substitution-reference (translation required)': 'substitution-reference',
+ u'target (translation required)': 'target',
+ u'uri-reference (translation required)': 'uri-reference',
+ u'uri (translation required)': 'uri-reference',
+ u'url (translation required)': 'uri-reference',
+ u'raw (translation required)': 'raw',}
+"""Mapping of Simplified Chinese role names to canonical role names
+for interpreted text."""
diff --git a/docutils/parsers/rst/languages/zh_tw.py b/docutils/parsers/rst/languages/zh_tw.py
new file mode 100644
index 000000000..77574b591
--- /dev/null
+++ b/docutils/parsers/rst/languages/zh_tw.py
@@ -0,0 +1,107 @@
+# -*- coding: utf-8 -*-
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+# New language mappings are welcome. Before doing a new translation, please
+# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be
+# translated for each language: one in docutils/languages, the other in
+# docutils/parsers/rst/languages.
+
+"""
+Traditional Chinese language mappings for language-dependent features of
+reStructuredText.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+directives = {
+ # language-dependent: fixed
+ 'attention (translation required)': 'attention',
+ 'caution (translation required)': 'caution',
+ 'danger (translation required)': 'danger',
+ 'error (translation required)': 'error',
+ 'hint (translation required)': 'hint',
+ 'important (translation required)': 'important',
+ 'note (translation required)': 'note',
+ 'tip (translation required)': 'tip',
+ 'warning (translation required)': 'warning',
+ 'admonition (translation required)': 'admonition',
+ 'sidebar (translation required)': 'sidebar',
+ 'topic (translation required)': 'topic',
+ 'line-block (translation required)': 'line-block',
+ 'parsed-literal (translation required)': 'parsed-literal',
+ 'rubric (translation required)': 'rubric',
+ 'epigraph (translation required)': 'epigraph',
+ 'highlights (translation required)': 'highlights',
+ 'pull-quote (translation required)': 'pull-quote',
+ 'compound (translation required)': 'compound',
+ u'container (translation required)': 'container',
+ #'questions (translation required)': 'questions',
+ 'table (translation required)': 'table',
+ 'csv-table (translation required)': 'csv-table',
+ 'list-table (translation required)': 'list-table',
+ #'qa (translation required)': 'questions',
+ #'faq (translation required)': 'questions',
+ 'meta (translation required)': 'meta',
+ #'imagemap (translation required)': 'imagemap',
+ 'image (translation required)': 'image',
+ 'figure (translation required)': 'figure',
+ 'include (translation required)': 'include',
+ 'raw (translation required)': 'raw',
+ 'replace (translation required)': 'replace',
+ 'unicode (translation required)': 'unicode',
+ u'日期': 'date',
+ 'class (translation required)': 'class',
+ 'role (translation required)': 'role',
+ u'default-role (translation required)': 'default-role',
+ u'title (translation required)': 'title',
+ 'contents (translation required)': 'contents',
+ 'sectnum (translation required)': 'sectnum',
+ 'section-numbering (translation required)': 'sectnum',
+ u'header (translation required)': 'header',
+ u'footer (translation required)': 'footer',
+ #'footnotes (translation required)': 'footnotes',
+ #'citations (translation required)': 'citations',
+ 'target-notes (translation required)': 'target-notes',
+ 'restructuredtext-test-directive': 'restructuredtext-test-directive'}
+"""Traditional Chinese name to registered (in directives/__init__.py)
+directive name mapping."""
+
+roles = {
+ # language-dependent: fixed
+ 'abbreviation (translation required)': 'abbreviation',
+ 'ab (translation required)': 'abbreviation',
+ 'acronym (translation required)': 'acronym',
+ 'ac (translation required)': 'acronym',
+ 'index (translation required)': 'index',
+ 'i (translation required)': 'index',
+ 'subscript (translation required)': 'subscript',
+ 'sub (translation required)': 'subscript',
+ 'superscript (translation required)': 'superscript',
+ 'sup (translation required)': 'superscript',
+ 'title-reference (translation required)': 'title-reference',
+ 'title (translation required)': 'title-reference',
+ 't (translation required)': 'title-reference',
+ 'pep-reference (translation required)': 'pep-reference',
+ 'pep (translation required)': 'pep-reference',
+ 'rfc-reference (translation required)': 'rfc-reference',
+ 'rfc (translation required)': 'rfc-reference',
+ 'emphasis (translation required)': 'emphasis',
+ 'strong (translation required)': 'strong',
+ 'literal (translation required)': 'literal',
+ 'named-reference (translation required)': 'named-reference',
+ 'anonymous-reference (translation required)': 'anonymous-reference',
+ 'footnote-reference (translation required)': 'footnote-reference',
+ 'citation-reference (translation required)': 'citation-reference',
+ 'substitution-reference (translation required)': 'substitution-reference',
+ 'target (translation required)': 'target',
+ 'uri-reference (translation required)': 'uri-reference',
+ 'uri (translation required)': 'uri-reference',
+ 'url (translation required)': 'uri-reference',
+ 'raw (translation required)': 'raw',}
+"""Mapping of Traditional Chinese role names to canonical role names for
+interpreted text."""
diff --git a/docutils/parsers/rst/roles.py b/docutils/parsers/rst/roles.py
new file mode 100644
index 000000000..554e1f441
--- /dev/null
+++ b/docutils/parsers/rst/roles.py
@@ -0,0 +1,347 @@
+# Author: Edward Loper
+# Contact: edloper@gradient.cis.upenn.edu
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+This module defines standard interpreted text role functions, a registry for
+interpreted text roles, and an API for adding to and retrieving from the
+registry.
+
+The interface for interpreted role functions is as follows::
+
+ def role_fn(name, rawtext, text, lineno, inliner,
+ options={}, content=[]):
+ code...
+
+ # Set function attributes for customization:
+ role_fn.options = ...
+ role_fn.content = ...
+
+Parameters:
+
+- ``name`` is the local name of the interpreted text role, the role name
+ actually used in the document.
+
+- ``rawtext`` is a string containing the entire interpreted text construct.
+ Return it as a ``problematic`` node linked to a system message if there is a
+ problem.
+
+- ``text`` is the interpreted text content, with backslash escapes converted
+ to nulls (``\x00``).
+
+- ``lineno`` is the line number where the interpreted text beings.
+
+- ``inliner`` is the Inliner object that called the role function.
+ It defines the following useful attributes: ``reporter``,
+ ``problematic``, ``memo``, ``parent``, ``document``.
+
+- ``options``: A dictionary of directive options for customization, to be
+ interpreted by the role function. Used for additional attributes for the
+ generated elements and other functionality.
+
+- ``content``: A list of strings, the directive content for customization
+ ("role" directive). To be interpreted by the role function.
+
+Function attributes for customization, interpreted by the "role" directive:
+
+- ``options``: A dictionary, mapping known option names to conversion
+ functions such as `int` or `float`. ``None`` or an empty dict implies no
+ options to parse. Several directive option conversion functions are defined
+ in the `directives` module.
+
+ All role functions implicitly support the "class" option, unless disabled
+ with an explicit ``{'class': None}``.
+
+- ``content``: A boolean; true if content is allowed. Client code must handle
+ the case where content is required but not supplied (an empty content list
+ will be supplied).
+
+Note that unlike directives, the "arguments" function attribute is not
+supported for role customization. Directive arguments are handled by the
+"role" directive itself.
+
+Interpreted role functions return a tuple of two values:
+
+- A list of nodes which will be inserted into the document tree at the
+ point where the interpreted role was encountered (can be an empty
+ list).
+
+- A list of system messages, which will be inserted into the document tree
+ immediately after the end of the current inline block (can also be empty).
+"""
+
+__docformat__ = 'reStructuredText'
+
+from docutils import nodes, utils
+from docutils.parsers.rst import directives
+from docutils.parsers.rst.languages import en as _fallback_language_module
+
+DEFAULT_INTERPRETED_ROLE = 'title-reference'
+"""
+The canonical name of the default interpreted role. This role is used
+when no role is specified for a piece of interpreted text.
+"""
+
+_role_registry = {}
+"""Mapping of canonical role names to role functions. Language-dependent role
+names are defined in the ``language`` subpackage."""
+
+_roles = {}
+"""Mapping of local or language-dependent interpreted text role names to role
+functions."""
+
+def role(role_name, language_module, lineno, reporter):
+ """
+ Locate and return a role function from its language-dependent name, along
+ with a list of system messages. If the role is not found in the current
+ language, check English. Return a 2-tuple: role function (``None`` if the
+ named role cannot be found) and a list of system messages.
+ """
+ normname = role_name.lower()
+ messages = []
+ msg_text = []
+
+ if _roles.has_key(normname):
+ return _roles[normname], messages
+
+ if role_name:
+ canonicalname = None
+ try:
+ canonicalname = language_module.roles[normname]
+ except AttributeError, error:
+ msg_text.append('Problem retrieving role entry from language '
+ 'module %r: %s.' % (language_module, error))
+ except KeyError:
+ msg_text.append('No role entry for "%s" in module "%s".'
+ % (role_name, language_module.__name__))
+ else:
+ canonicalname = DEFAULT_INTERPRETED_ROLE
+
+ # If we didn't find it, try English as a fallback.
+ if not canonicalname:
+ try:
+ canonicalname = _fallback_language_module.roles[normname]
+ msg_text.append('Using English fallback for role "%s".'
+ % role_name)
+ except KeyError:
+ msg_text.append('Trying "%s" as canonical role name.'
+ % role_name)
+ # The canonical name should be an English name, but just in case:
+ canonicalname = normname
+
+ # Collect any messages that we generated.
+ if msg_text:
+ message = reporter.info('\n'.join(msg_text), line=lineno)
+ messages.append(message)
+
+ # Look the role up in the registry, and return it.
+ if _role_registry.has_key(canonicalname):
+ role_fn = _role_registry[canonicalname]
+ register_local_role(normname, role_fn)
+ return role_fn, messages
+ else:
+ return None, messages # Error message will be generated by caller.
+
+def register_canonical_role(name, role_fn):
+ """
+ Register an interpreted text role by its canonical name.
+
+ :Parameters:
+ - `name`: The canonical name of the interpreted role.
+ - `role_fn`: The role function. See the module docstring.
+ """
+ set_implicit_options(role_fn)
+ _role_registry[name] = role_fn
+
+def register_local_role(name, role_fn):
+ """
+ Register an interpreted text role by its local or language-dependent name.
+
+ :Parameters:
+ - `name`: The local or language-dependent name of the interpreted role.
+ - `role_fn`: The role function. See the module docstring.
+ """
+ set_implicit_options(role_fn)
+ _roles[name] = role_fn
+
+def set_implicit_options(role_fn):
+ """
+ Add customization options to role functions, unless explicitly set or
+ disabled.
+ """
+ if not hasattr(role_fn, 'options') or role_fn.options is None:
+ role_fn.options = {'class': directives.class_option}
+ elif not role_fn.options.has_key('class'):
+ role_fn.options['class'] = directives.class_option
+
+def register_generic_role(canonical_name, node_class):
+ """For roles which simply wrap a given `node_class` around the text."""
+ role = GenericRole(canonical_name, node_class)
+ register_canonical_role(canonical_name, role)
+
+
+class GenericRole:
+
+ """
+ Generic interpreted text role, where the interpreted text is simply
+ wrapped with the provided node class.
+ """
+
+ def __init__(self, role_name, node_class):
+ self.name = role_name
+ self.node_class = node_class
+
+ def __call__(self, role, rawtext, text, lineno, inliner,
+ options={}, content=[]):
+ set_classes(options)
+ return [self.node_class(rawtext, utils.unescape(text), **options)], []
+
+
+class CustomRole:
+
+ """
+ Wrapper for custom interpreted text roles.
+ """
+
+ def __init__(self, role_name, base_role, options={}, content=[]):
+ self.name = role_name
+ self.base_role = base_role
+ self.options = None
+ if hasattr(base_role, 'options'):
+ self.options = base_role.options
+ self.content = None
+ if hasattr(base_role, 'content'):
+ self.content = base_role.content
+ self.supplied_options = options
+ self.supplied_content = content
+
+ def __call__(self, role, rawtext, text, lineno, inliner,
+ options={}, content=[]):
+ opts = self.supplied_options.copy()
+ opts.update(options)
+ cont = list(self.supplied_content)
+ if cont and content:
+ cont += '\n'
+ cont.extend(content)
+ return self.base_role(role, rawtext, text, lineno, inliner,
+ options=opts, content=cont)
+
+
+def generic_custom_role(role, rawtext, text, lineno, inliner,
+ options={}, content=[]):
+ """"""
+ # Once nested inline markup is implemented, this and other methods should
+ # recursively call inliner.nested_parse().
+ set_classes(options)
+ return [nodes.inline(rawtext, utils.unescape(text), **options)], []
+
+generic_custom_role.options = {'class': directives.class_option}
+
+
+######################################################################
+# Define and register the standard roles:
+######################################################################
+
+register_generic_role('abbreviation', nodes.abbreviation)
+register_generic_role('acronym', nodes.acronym)
+register_generic_role('emphasis', nodes.emphasis)
+register_generic_role('literal', nodes.literal)
+register_generic_role('strong', nodes.strong)
+register_generic_role('subscript', nodes.subscript)
+register_generic_role('superscript', nodes.superscript)
+register_generic_role('title-reference', nodes.title_reference)
+
+def pep_reference_role(role, rawtext, text, lineno, inliner,
+ options={}, content=[]):
+ try:
+ pepnum = int(text)
+ if pepnum < 0 or pepnum > 9999:
+ raise ValueError
+ except ValueError:
+ msg = inliner.reporter.error(
+ 'PEP number must be a number from 0 to 9999; "%s" is invalid.'
+ % text, line=lineno)
+ prb = inliner.problematic(rawtext, rawtext, msg)
+ return [prb], [msg]
+ # Base URL mainly used by inliner.pep_reference; so this is correct:
+ ref = inliner.document.settings.pep_base_url + inliner.pep_url % pepnum
+ set_classes(options)
+ return [nodes.reference(rawtext, 'PEP ' + utils.unescape(text), refuri=ref,
+ **options)], []
+
+register_canonical_role('pep-reference', pep_reference_role)
+
+def rfc_reference_role(role, rawtext, text, lineno, inliner,
+ options={}, content=[]):
+ try:
+ rfcnum = int(text)
+ if rfcnum <= 0:
+ raise ValueError
+ except ValueError:
+ msg = inliner.reporter.error(
+ 'RFC number must be a number greater than or equal to 1; '
+ '"%s" is invalid.' % text, line=lineno)
+ prb = inliner.problematic(rawtext, rawtext, msg)
+ return [prb], [msg]
+ # Base URL mainly used by inliner.rfc_reference, so this is correct:
+ ref = inliner.document.settings.rfc_base_url + inliner.rfc_url % rfcnum
+ set_classes(options)
+ node = nodes.reference(rawtext, 'RFC ' + utils.unescape(text), refuri=ref,
+ **options)
+ return [node], []
+
+register_canonical_role('rfc-reference', rfc_reference_role)
+
+def raw_role(role, rawtext, text, lineno, inliner, options={}, content=[]):
+ if not options.has_key('format'):
+ msg = inliner.reporter.error(
+ 'No format (Writer name) is associated with this role: "%s".\n'
+ 'The "raw" role cannot be used directly.\n'
+ 'Instead, use the "role" directive to create a new role with '
+ 'an associated format.' % role, line=lineno)
+ prb = inliner.problematic(rawtext, rawtext, msg)
+ return [prb], [msg]
+ set_classes(options)
+ node = nodes.raw(rawtext, utils.unescape(text, 1), **options)
+ return [node], []
+
+raw_role.options = {'format': directives.unchanged}
+
+register_canonical_role('raw', raw_role)
+
+
+######################################################################
+# Register roles that are currently unimplemented.
+######################################################################
+
+def unimplemented_role(role, rawtext, text, lineno, inliner, attributes={}):
+ msg = inliner.reporter.error(
+ 'Interpreted text role "%s" not implemented.' % role, line=lineno)
+ prb = inliner.problematic(rawtext, rawtext, msg)
+ return [prb], [msg]
+
+register_canonical_role('index', unimplemented_role)
+register_canonical_role('named-reference', unimplemented_role)
+register_canonical_role('anonymous-reference', unimplemented_role)
+register_canonical_role('uri-reference', unimplemented_role)
+register_canonical_role('footnote-reference', unimplemented_role)
+register_canonical_role('citation-reference', unimplemented_role)
+register_canonical_role('substitution-reference', unimplemented_role)
+register_canonical_role('target', unimplemented_role)
+
+# This should remain unimplemented, for testing purposes:
+register_canonical_role('restructuredtext-unimplemented-role',
+ unimplemented_role)
+
+
+def set_classes(options):
+ """
+ Auxiliary function to set options['classes'] and delete
+ options['class'].
+ """
+ if options.has_key('class'):
+ assert not options.has_key('classes')
+ options['classes'] = options['class']
+ del options['class']
diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py
new file mode 100644
index 000000000..363ef8fdd
--- /dev/null
+++ b/docutils/parsers/rst/states.py
@@ -0,0 +1,2948 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+This is the ``docutils.parsers.restructuredtext.states`` module, the core of
+the reStructuredText parser. It defines the following:
+
+:Classes:
+ - `RSTStateMachine`: reStructuredText parser's entry point.
+ - `NestedStateMachine`: recursive StateMachine.
+ - `RSTState`: reStructuredText State superclass.
+ - `Inliner`: For parsing inline markup.
+ - `Body`: Generic classifier of the first line of a block.
+ - `SpecializedBody`: Superclass for compound element members.
+ - `BulletList`: Second and subsequent bullet_list list_items
+ - `DefinitionList`: Second+ definition_list_items.
+ - `EnumeratedList`: Second+ enumerated_list list_items.
+ - `FieldList`: Second+ fields.
+ - `OptionList`: Second+ option_list_items.
+ - `RFC2822List`: Second+ RFC2822-style fields.
+ - `ExtensionOptions`: Parses directive option fields.
+ - `Explicit`: Second+ explicit markup constructs.
+ - `SubstitutionDef`: For embedded directives in substitution definitions.
+ - `Text`: Classifier of second line of a text block.
+ - `SpecializedText`: Superclass for continuation lines of Text-variants.
+ - `Definition`: Second line of potential definition_list_item.
+ - `Line`: Second line of overlined section title or transition marker.
+ - `Struct`: An auxiliary collection class.
+
+:Exception classes:
+ - `MarkupError`
+ - `ParserError`
+ - `MarkupMismatch`
+
+:Functions:
+ - `escape2null()`: Return a string, escape-backslashes converted to nulls.
+ - `unescape()`: Return a string, nulls removed or restored to backslashes.
+
+:Attributes:
+ - `state_classes`: set of State classes used with `RSTStateMachine`.
+
+Parser Overview
+===============
+
+The reStructuredText parser is implemented as a recursive state machine,
+examining its input one line at a time. To understand how the parser works,
+please first become familiar with the `docutils.statemachine` module. In the
+description below, references are made to classes defined in this module;
+please see the individual classes for details.
+
+Parsing proceeds as follows:
+
+1. The state machine examines each line of input, checking each of the
+ transition patterns of the state `Body`, in order, looking for a match.
+ The implicit transitions (blank lines and indentation) are checked before
+ any others. The 'text' transition is a catch-all (matches anything).
+
+2. The method associated with the matched transition pattern is called.
+
+ A. Some transition methods are self-contained, appending elements to the
+ document tree (`Body.doctest` parses a doctest block). The parser's
+ current line index is advanced to the end of the element, and parsing
+ continues with step 1.
+
+ B. Other transition methods trigger the creation of a nested state machine,
+ whose job is to parse a compound construct ('indent' does a block quote,
+ 'bullet' does a bullet list, 'overline' does a section [first checking
+ for a valid section header], etc.).
+
+ - In the case of lists and explicit markup, a one-off state machine is
+ created and run to parse contents of the first item.
+
+ - A new state machine is created and its initial state is set to the
+ appropriate specialized state (`BulletList` in the case of the
+ 'bullet' transition; see `SpecializedBody` for more detail). This
+ state machine is run to parse the compound element (or series of
+ explicit markup elements), and returns as soon as a non-member element
+ is encountered. For example, the `BulletList` state machine ends as
+ soon as it encounters an element which is not a list item of that
+ bullet list. The optional omission of inter-element blank lines is
+ enabled by this nested state machine.
+
+ - The current line index is advanced to the end of the elements parsed,
+ and parsing continues with step 1.
+
+ C. The result of the 'text' transition depends on the next line of text.
+ The current state is changed to `Text`, under which the second line is
+ examined. If the second line is:
+
+ - Indented: The element is a definition list item, and parsing proceeds
+ similarly to step 2.B, using the `DefinitionList` state.
+
+ - A line of uniform punctuation characters: The element is a section
+ header; again, parsing proceeds as in step 2.B, and `Body` is still
+ used.
+
+ - Anything else: The element is a paragraph, which is examined for
+ inline markup and appended to the parent element. Processing
+ continues with step 1.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+import sys
+import re
+import roman
+from types import TupleType
+from docutils import nodes, statemachine, utils, urischemes
+from docutils import ApplicationError, DataError
+from docutils.statemachine import StateMachineWS, StateWS
+from docutils.nodes import fully_normalize_name as normalize_name
+from docutils.nodes import whitespace_normalize_name
+from docutils.utils import escape2null, unescape, column_width
+from docutils.parsers.rst import directives, languages, tableparser, roles
+from docutils.parsers.rst.languages import en as _fallback_language_module
+
+
+class MarkupError(DataError): pass
+class UnknownInterpretedRoleError(DataError): pass
+class InterpretedRoleNotImplementedError(DataError): pass
+class ParserError(ApplicationError): pass
+class MarkupMismatch(Exception): pass
+
+
+class Struct:
+
+ """Stores data attributes for dotted-attribute access."""
+
+ def __init__(self, **keywordargs):
+ self.__dict__.update(keywordargs)
+
+
+class RSTStateMachine(StateMachineWS):
+
+ """
+ reStructuredText's master StateMachine.
+
+ The entry point to reStructuredText parsing is the `run()` method.
+ """
+
+ def run(self, input_lines, document, input_offset=0, match_titles=1,
+ inliner=None):
+ """
+ Parse `input_lines` and modify the `document` node in place.
+
+ Extend `StateMachineWS.run()`: set up parse-global data and
+ run the StateMachine.
+ """
+ self.language = languages.get_language(
+ document.settings.language_code)
+ self.match_titles = match_titles
+ if inliner is None:
+ inliner = Inliner()
+ inliner.init_customizations(document.settings)
+ self.memo = Struct(document=document,
+ reporter=document.reporter,
+ language=self.language,
+ title_styles=[],
+ section_level=0,
+ section_bubble_up_kludge=0,
+ inliner=inliner)
+ self.document = document
+ self.attach_observer(document.note_source)
+ self.reporter = self.memo.reporter
+ self.node = document
+ results = StateMachineWS.run(self, input_lines, input_offset,
+ input_source=document['source'])
+ assert results == [], 'RSTStateMachine.run() results should be empty!'
+ self.node = self.memo = None # remove unneeded references
+
+
+class NestedStateMachine(StateMachineWS):
+
+ """
+ StateMachine run from within other StateMachine runs, to parse nested
+ document structures.
+ """
+
+ def run(self, input_lines, input_offset, memo, node, match_titles=1):
+ """
+ Parse `input_lines` and populate a `docutils.nodes.document` instance.
+
+ Extend `StateMachineWS.run()`: set up document-wide data.
+ """
+ self.match_titles = match_titles
+ self.memo = memo
+ self.document = memo.document
+ self.attach_observer(self.document.note_source)
+ self.reporter = memo.reporter
+ self.language = memo.language
+ self.node = node
+ results = StateMachineWS.run(self, input_lines, input_offset)
+ assert results == [], ('NestedStateMachine.run() results should be '
+ 'empty!')
+ return results
+
+
+class RSTState(StateWS):
+
+ """
+ reStructuredText State superclass.
+
+ Contains methods used by all State subclasses.
+ """
+
+ nested_sm = NestedStateMachine
+
+ def __init__(self, state_machine, debug=0):
+ self.nested_sm_kwargs = {'state_classes': state_classes,
+ 'initial_state': 'Body'}
+ StateWS.__init__(self, state_machine, debug)
+
+ def runtime_init(self):
+ StateWS.runtime_init(self)
+ memo = self.state_machine.memo
+ self.memo = memo
+ self.reporter = memo.reporter
+ self.inliner = memo.inliner
+ self.document = memo.document
+ self.parent = self.state_machine.node
+
+ def goto_line(self, abs_line_offset):
+ """
+ Jump to input line `abs_line_offset`, ignoring jumps past the end.
+ """
+ try:
+ self.state_machine.goto_line(abs_line_offset)
+ except EOFError:
+ pass
+
+ def no_match(self, context, transitions):
+ """
+ Override `StateWS.no_match` to generate a system message.
+
+ This code should never be run.
+ """
+ self.reporter.severe(
+ 'Internal error: no transition pattern match. State: "%s"; '
+ 'transitions: %s; context: %s; current line: %r.'
+ % (self.__class__.__name__, transitions, context,
+ self.state_machine.line),
+ line=self.state_machine.abs_line_number())
+ return context, None, []
+
+ def bof(self, context):
+ """Called at beginning of file."""
+ return [], []
+
+ def nested_parse(self, block, input_offset, node, match_titles=0,
+ state_machine_class=None, state_machine_kwargs=None):
+ """
+ Create a new StateMachine rooted at `node` and run it over the input
+ `block`.
+ """
+ if state_machine_class is None:
+ state_machine_class = self.nested_sm
+ if state_machine_kwargs is None:
+ state_machine_kwargs = self.nested_sm_kwargs
+ block_length = len(block)
+ state_machine = state_machine_class(debug=self.debug,
+ **state_machine_kwargs)
+ state_machine.run(block, input_offset, memo=self.memo,
+ node=node, match_titles=match_titles)
+ state_machine.unlink()
+ new_offset = state_machine.abs_line_offset()
+ # No `block.parent` implies disconnected -- lines aren't in sync:
+ if block.parent and (len(block) - block_length) != 0:
+ # Adjustment for block if modified in nested parse:
+ self.state_machine.next_line(len(block) - block_length)
+ return new_offset
+
+ def nested_list_parse(self, block, input_offset, node, initial_state,
+ blank_finish,
+ blank_finish_state=None,
+ extra_settings={},
+ match_titles=0,
+ state_machine_class=None,
+ state_machine_kwargs=None):
+ """
+ Create a new StateMachine rooted at `node` and run it over the input
+ `block`. Also keep track of optional intermediate blank lines and the
+ required final one.
+ """
+ if state_machine_class is None:
+ state_machine_class = self.nested_sm
+ if state_machine_kwargs is None:
+ state_machine_kwargs = self.nested_sm_kwargs.copy()
+ state_machine_kwargs['initial_state'] = initial_state
+ state_machine = state_machine_class(debug=self.debug,
+ **state_machine_kwargs)
+ if blank_finish_state is None:
+ blank_finish_state = initial_state
+ state_machine.states[blank_finish_state].blank_finish = blank_finish
+ for key, value in extra_settings.items():
+ setattr(state_machine.states[initial_state], key, value)
+ state_machine.run(block, input_offset, memo=self.memo,
+ node=node, match_titles=match_titles)
+ blank_finish = state_machine.states[blank_finish_state].blank_finish
+ state_machine.unlink()
+ return state_machine.abs_line_offset(), blank_finish
+
+ def section(self, title, source, style, lineno, messages):
+ """Check for a valid subsection and create one if it checks out."""
+ if self.check_subsection(source, style, lineno):
+ self.new_subsection(title, lineno, messages)
+
+ def check_subsection(self, source, style, lineno):
+ """
+ Check for a valid subsection header. Return 1 (true) or None (false).
+
+ When a new section is reached that isn't a subsection of the current
+ section, back up the line count (use ``previous_line(-x)``), then
+ ``raise EOFError``. The current StateMachine will finish, then the
+ calling StateMachine can re-examine the title. This will work its way
+ back up the calling chain until the correct section level isreached.
+
+ @@@ Alternative: Evaluate the title, store the title info & level, and
+ back up the chain until that level is reached. Store in memo? Or
+ return in results?
+
+ :Exception: `EOFError` when a sibling or supersection encountered.
+ """
+ memo = self.memo
+ title_styles = memo.title_styles
+ mylevel = memo.section_level
+ try: # check for existing title style
+ level = title_styles.index(style) + 1
+ except ValueError: # new title style
+ if len(title_styles) == memo.section_level: # new subsection
+ title_styles.append(style)
+ return 1
+ else: # not at lowest level
+ self.parent += self.title_inconsistent(source, lineno)
+ return None
+ if level <= mylevel: # sibling or supersection
+ memo.section_level = level # bubble up to parent section
+ if len(style) == 2:
+ memo.section_bubble_up_kludge = 1
+ # back up 2 lines for underline title, 3 for overline title
+ self.state_machine.previous_line(len(style) + 1)
+ raise EOFError # let parent section re-evaluate
+ if level == mylevel + 1: # immediate subsection
+ return 1
+ else: # invalid subsection
+ self.parent += self.title_inconsistent(source, lineno)
+ return None
+
+ def title_inconsistent(self, sourcetext, lineno):
+ error = self.reporter.severe(
+ 'Title level inconsistent:', nodes.literal_block('', sourcetext),
+ line=lineno)
+ return error
+
+ def new_subsection(self, title, lineno, messages):
+ """Append new subsection to document tree. On return, check level."""
+ memo = self.memo
+ mylevel = memo.section_level
+ memo.section_level += 1
+ section_node = nodes.section()
+ self.parent += section_node
+ textnodes, title_messages = self.inline_text(title, lineno)
+ titlenode = nodes.title(title, '', *textnodes)
+ name = normalize_name(titlenode.astext())
+ section_node['names'].append(name)
+ section_node += titlenode
+ section_node += messages
+ section_node += title_messages
+ self.document.note_implicit_target(section_node, section_node)
+ offset = self.state_machine.line_offset + 1
+ absoffset = self.state_machine.abs_line_offset() + 1
+ newabsoffset = self.nested_parse(
+ self.state_machine.input_lines[offset:], input_offset=absoffset,
+ node=section_node, match_titles=1)
+ self.goto_line(newabsoffset)
+ if memo.section_level <= mylevel: # can't handle next section?
+ raise EOFError # bubble up to supersection
+ # reset section_level; next pass will detect it properly
+ memo.section_level = mylevel
+
+ def paragraph(self, lines, lineno):
+ """
+ Return a list (paragraph & messages) & a boolean: literal_block next?
+ """
+ data = '\n'.join(lines).rstrip()
+ if re.search(r'(?<!\\)(\\\\)*::$', data):
+ if len(data) == 2:
+ return [], 1
+ elif data[-3] in ' \n':
+ text = data[:-3].rstrip()
+ else:
+ text = data[:-1]
+ literalnext = 1
+ else:
+ text = data
+ literalnext = 0
+ textnodes, messages = self.inline_text(text, lineno)
+ p = nodes.paragraph(data, '', *textnodes)
+ p.line = lineno
+ return [p] + messages, literalnext
+
+ def inline_text(self, text, lineno):
+ """
+ Return 2 lists: nodes (text and inline elements), and system_messages.
+ """
+ return self.inliner.parse(text, lineno, self.memo, self.parent)
+
+ def unindent_warning(self, node_name):
+ return self.reporter.warning(
+ '%s ends without a blank line; unexpected unindent.' % node_name,
+ line=(self.state_machine.abs_line_number() + 1))
+
+
+def build_regexp(definition, compile=1):
+ """
+ Build, compile and return a regular expression based on `definition`.
+
+ :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
+ where "parts" is a list of regular expressions and/or regular
+ expression definitions to be joined into an or-group.
+ """
+ name, prefix, suffix, parts = definition
+ part_strings = []
+ for part in parts:
+ if type(part) is TupleType:
+ part_strings.append(build_regexp(part, None))
+ else:
+ part_strings.append(part)
+ or_group = '|'.join(part_strings)
+ regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
+ if compile:
+ return re.compile(regexp, re.UNICODE)
+ else:
+ return regexp
+
+
+class Inliner:
+
+ """
+ Parse inline markup; call the `parse()` method.
+ """
+
+ def __init__(self):
+ self.implicit_dispatch = [(self.patterns.uri, self.standalone_uri),]
+ """List of (pattern, bound method) tuples, used by
+ `self.implicit_inline`."""
+
+ def init_customizations(self, settings):
+ """Setting-based customizations; run when parsing begins."""
+ if settings.pep_references:
+ self.implicit_dispatch.append((self.patterns.pep,
+ self.pep_reference))
+ if settings.rfc_references:
+ self.implicit_dispatch.append((self.patterns.rfc,
+ self.rfc_reference))
+
+ def parse(self, text, lineno, memo, parent):
+ # Needs to be refactored for nested inline markup.
+ # Add nested_parse() method?
+ """
+ Return 2 lists: nodes (text and inline elements), and system_messages.
+
+ Using `self.patterns.initial`, a pattern which matches start-strings
+ (emphasis, strong, interpreted, phrase reference, literal,
+ substitution reference, and inline target) and complete constructs
+ (simple reference, footnote reference), search for a candidate. When
+ one is found, check for validity (e.g., not a quoted '*' character).
+ If valid, search for the corresponding end string if applicable, and
+ check it for validity. If not found or invalid, generate a warning
+ and ignore the start-string. Implicit inline markup (e.g. standalone
+ URIs) is found last.
+ """
+ self.reporter = memo.reporter
+ self.document = memo.document
+ self.language = memo.language
+ self.parent = parent
+ pattern_search = self.patterns.initial.search
+ dispatch = self.dispatch
+ remaining = escape2null(text)
+ processed = []
+ unprocessed = []
+ messages = []
+ while remaining:
+ match = pattern_search(remaining)
+ if match:
+ groups = match.groupdict()
+ method = dispatch[groups['start'] or groups['backquote']
+ or groups['refend'] or groups['fnend']]
+ before, inlines, remaining, sysmessages = method(self, match,
+ lineno)
+ unprocessed.append(before)
+ messages += sysmessages
+ if inlines:
+ processed += self.implicit_inline(''.join(unprocessed),
+ lineno)
+ processed += inlines
+ unprocessed = []
+ else:
+ break
+ remaining = ''.join(unprocessed) + remaining
+ if remaining:
+ processed += self.implicit_inline(remaining, lineno)
+ return processed, messages
+
+ openers = '\'"([{<'
+ closers = '\'")]}>'
+ start_string_prefix = (r'((?<=^)|(?<=[-/: \n%s]))' % re.escape(openers))
+ end_string_suffix = (r'((?=$)|(?=[-/:.,;!? \n\x00%s]))'
+ % re.escape(closers))
+ non_whitespace_before = r'(?<![ \n])'
+ non_whitespace_escape_before = r'(?<![ \n\x00])'
+ non_whitespace_after = r'(?![ \n])'
+ # Alphanumerics with isolated internal [-._] chars (i.e. not 2 together):
+ simplename = r'(?:(?!_)\w)+(?:[-._](?:(?!_)\w)+)*'
+ # Valid URI characters (see RFC 2396 & RFC 2732);
+ # final \x00 allows backslash escapes in URIs:
+ uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
+ # Delimiter indicating the end of a URI (not part of the URI):
+ uri_end_delim = r"""[>]"""
+ # Last URI character; same as uric but no punctuation:
+ urilast = r"""[_~*/=+a-zA-Z0-9]"""
+ # End of a URI (either 'urilast' or 'uric followed by a
+ # uri_end_delim'):
+ uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
+ emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
+ email_pattern = r"""
+ %(emailc)s+(?:\.%(emailc)s+)* # name
+ (?<!\x00)@ # at
+ %(emailc)s+(?:\.%(emailc)s*)* # host
+ %(uri_end)s # final URI char
+ """
+ parts = ('initial_inline', start_string_prefix, '',
+ [('start', '', non_whitespace_after, # simple start-strings
+ [r'\*\*', # strong
+ r'\*(?!\*)', # emphasis but not strong
+ r'``', # literal
+ r'_`', # inline internal target
+ r'\|(?!\|)'] # substitution reference
+ ),
+ ('whole', '', end_string_suffix, # whole constructs
+ [# reference name & end-string
+ r'(?P<refname>%s)(?P<refend>__?)' % simplename,
+ ('footnotelabel', r'\[', r'(?P<fnend>\]_)',
+ [r'[0-9]+', # manually numbered
+ r'\#(%s)?' % simplename, # auto-numbered (w/ label?)
+ r'\*', # auto-symbol
+ r'(?P<citationlabel>%s)' % simplename] # citation reference
+ )
+ ]
+ ),
+ ('backquote', # interpreted text or phrase reference
+ '(?P<role>(:%s:)?)' % simplename, # optional role
+ non_whitespace_after,
+ ['`(?!`)'] # but not literal
+ )
+ ]
+ )
+ patterns = Struct(
+ initial=build_regexp(parts),
+ emphasis=re.compile(non_whitespace_escape_before
+ + r'(\*)' + end_string_suffix),
+ strong=re.compile(non_whitespace_escape_before
+ + r'(\*\*)' + end_string_suffix),
+ interpreted_or_phrase_ref=re.compile(
+ r"""
+ %(non_whitespace_escape_before)s
+ (
+ `
+ (?P<suffix>
+ (?P<role>:%(simplename)s:)?
+ (?P<refend>__?)?
+ )
+ )
+ %(end_string_suffix)s
+ """ % locals(), re.VERBOSE | re.UNICODE),
+ embedded_uri=re.compile(
+ r"""
+ (
+ (?:[ \n]+|^) # spaces or beginning of line/string
+ < # open bracket
+ %(non_whitespace_after)s
+ ([^<>\x00]+) # anything but angle brackets & nulls
+ %(non_whitespace_before)s
+ > # close bracket w/o whitespace before
+ )
+ $ # end of string
+ """ % locals(), re.VERBOSE),
+ literal=re.compile(non_whitespace_before + '(``)'
+ + end_string_suffix),
+ target=re.compile(non_whitespace_escape_before
+ + r'(`)' + end_string_suffix),
+ substitution_ref=re.compile(non_whitespace_escape_before
+ + r'(\|_{0,2})'
+ + end_string_suffix),
+ email=re.compile(email_pattern % locals() + '$', re.VERBOSE),
+ uri=re.compile(
+ (r"""
+ %(start_string_prefix)s
+ (?P<whole>
+ (?P<absolute> # absolute URI
+ (?P<scheme> # scheme (http, ftp, mailto)
+ [a-zA-Z][a-zA-Z0-9.+-]*
+ )
+ :
+ (
+ ( # either:
+ (//?)? # hierarchical URI
+ %(uric)s* # URI characters
+ %(uri_end)s # final URI char
+ )
+ ( # optional query
+ \?%(uric)s*
+ %(uri_end)s
+ )?
+ ( # optional fragment
+ \#%(uric)s*
+ %(uri_end)s
+ )?
+ )
+ )
+ | # *OR*
+ (?P<email> # email address
+ """ + email_pattern + r"""
+ )
+ )
+ %(end_string_suffix)s
+ """) % locals(), re.VERBOSE),
+ pep=re.compile(
+ r"""
+ %(start_string_prefix)s
+ (
+ (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
+ |
+ (PEP\s+(?P<pepnum2>\d+)) # reference by name
+ )
+ %(end_string_suffix)s""" % locals(), re.VERBOSE),
+ rfc=re.compile(
+ r"""
+ %(start_string_prefix)s
+ (RFC(-|\s+)?(?P<rfcnum>\d+))
+ %(end_string_suffix)s""" % locals(), re.VERBOSE))
+
+ def quoted_start(self, match):
+ """Return 1 if inline markup start-string is 'quoted', 0 if not."""
+ string = match.string
+ start = match.start()
+ end = match.end()
+ if start == 0: # start-string at beginning of text
+ return 0
+ prestart = string[start - 1]
+ try:
+ poststart = string[end]
+ if self.openers.index(prestart) \
+ == self.closers.index(poststart): # quoted
+ return 1
+ except IndexError: # start-string at end of text
+ return 1
+ except ValueError: # not quoted
+ pass
+ return 0
+
+ def inline_obj(self, match, lineno, end_pattern, nodeclass,
+ restore_backslashes=0):
+ string = match.string
+ matchstart = match.start('start')
+ matchend = match.end('start')
+ if self.quoted_start(match):
+ return (string[:matchend], [], string[matchend:], [], '')
+ endmatch = end_pattern.search(string[matchend:])
+ if endmatch and endmatch.start(1): # 1 or more chars
+ text = unescape(endmatch.string[:endmatch.start(1)],
+ restore_backslashes)
+ textend = matchend + endmatch.end(1)
+ rawsource = unescape(string[matchstart:textend], 1)
+ return (string[:matchstart], [nodeclass(rawsource, text)],
+ string[textend:], [], endmatch.group(1))
+ msg = self.reporter.warning(
+ 'Inline %s start-string without end-string.'
+ % nodeclass.__name__, line=lineno)
+ text = unescape(string[matchstart:matchend], 1)
+ rawsource = unescape(string[matchstart:matchend], 1)
+ prb = self.problematic(text, rawsource, msg)
+ return string[:matchstart], [prb], string[matchend:], [msg], ''
+
+ def problematic(self, text, rawsource, message):
+ msgid = self.document.set_id(message, self.parent)
+ problematic = nodes.problematic(rawsource, text, refid=msgid)
+ prbid = self.document.set_id(problematic)
+ message.add_backref(prbid)
+ return problematic
+
+ def emphasis(self, match, lineno):
+ before, inlines, remaining, sysmessages, endstring = self.inline_obj(
+ match, lineno, self.patterns.emphasis, nodes.emphasis)
+ return before, inlines, remaining, sysmessages
+
+ def strong(self, match, lineno):
+ before, inlines, remaining, sysmessages, endstring = self.inline_obj(
+ match, lineno, self.patterns.strong, nodes.strong)
+ return before, inlines, remaining, sysmessages
+
+ def interpreted_or_phrase_ref(self, match, lineno):
+ end_pattern = self.patterns.interpreted_or_phrase_ref
+ string = match.string
+ matchstart = match.start('backquote')
+ matchend = match.end('backquote')
+ rolestart = match.start('role')
+ role = match.group('role')
+ position = ''
+ if role:
+ role = role[1:-1]
+ position = 'prefix'
+ elif self.quoted_start(match):
+ return (string[:matchend], [], string[matchend:], [])
+ endmatch = end_pattern.search(string[matchend:])
+ if endmatch and endmatch.start(1): # 1 or more chars
+ textend = matchend + endmatch.end()
+ if endmatch.group('role'):
+ if role:
+ msg = self.reporter.warning(
+ 'Multiple roles in interpreted text (both '
+ 'prefix and suffix present; only one allowed).',
+ line=lineno)
+ text = unescape(string[rolestart:textend], 1)
+ prb = self.problematic(text, text, msg)
+ return string[:rolestart], [prb], string[textend:], [msg]
+ role = endmatch.group('suffix')[1:-1]
+ position = 'suffix'
+ escaped = endmatch.string[:endmatch.start(1)]
+ rawsource = unescape(string[matchstart:textend], 1)
+ if rawsource[-1:] == '_':
+ if role:
+ msg = self.reporter.warning(
+ 'Mismatch: both interpreted text role %s and '
+ 'reference suffix.' % position, line=lineno)
+ text = unescape(string[rolestart:textend], 1)
+ prb = self.problematic(text, text, msg)
+ return string[:rolestart], [prb], string[textend:], [msg]
+ return self.phrase_ref(string[:matchstart], string[textend:],
+ rawsource, escaped, unescape(escaped))
+ else:
+ rawsource = unescape(string[rolestart:textend], 1)
+ nodelist, messages = self.interpreted(rawsource, escaped, role,
+ lineno)
+ return (string[:rolestart], nodelist,
+ string[textend:], messages)
+ msg = self.reporter.warning(
+ 'Inline interpreted text or phrase reference start-string '
+ 'without end-string.', line=lineno)
+ text = unescape(string[matchstart:matchend], 1)
+ prb = self.problematic(text, text, msg)
+ return string[:matchstart], [prb], string[matchend:], [msg]
+
+ def phrase_ref(self, before, after, rawsource, escaped, text):
+ match = self.patterns.embedded_uri.search(escaped)
+ if match:
+ text = unescape(escaped[:match.start(0)])
+ uri_text = match.group(2)
+ uri = ''.join(uri_text.split())
+ uri = self.adjust_uri(uri)
+ if uri:
+ target = nodes.target(match.group(1), refuri=uri)
+ else:
+ raise ApplicationError('problem with URI: %r' % uri_text)
+ if not text:
+ text = uri
+ else:
+ target = None
+ refname = normalize_name(text)
+ reference = nodes.reference(rawsource, text,
+ name=whitespace_normalize_name(text))
+ node_list = [reference]
+ if rawsource[-2:] == '__':
+ if target:
+ reference['refuri'] = uri
+ else:
+ reference['anonymous'] = 1
+ else:
+ if target:
+ reference['refuri'] = uri
+ target['names'].append(refname)
+ self.document.note_explicit_target(target, self.parent)
+ node_list.append(target)
+ else:
+ reference['refname'] = refname
+ self.document.note_refname(reference)
+ return before, node_list, after, []
+
+ def adjust_uri(self, uri):
+ match = self.patterns.email.match(uri)
+ if match:
+ return 'mailto:' + uri
+ else:
+ return uri
+
+ def interpreted(self, rawsource, text, role, lineno):
+ role_fn, messages = roles.role(role, self.language, lineno,
+ self.reporter)
+ if role_fn:
+ nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
+ return nodes, messages + messages2
+ else:
+ msg = self.reporter.error(
+ 'Unknown interpreted text role "%s".' % role,
+ line=lineno)
+ return ([self.problematic(rawsource, rawsource, msg)],
+ messages + [msg])
+
+ def literal(self, match, lineno):
+ before, inlines, remaining, sysmessages, endstring = self.inline_obj(
+ match, lineno, self.patterns.literal, nodes.literal,
+ restore_backslashes=1)
+ return before, inlines, remaining, sysmessages
+
+ def inline_internal_target(self, match, lineno):
+ before, inlines, remaining, sysmessages, endstring = self.inline_obj(
+ match, lineno, self.patterns.target, nodes.target)
+ if inlines and isinstance(inlines[0], nodes.target):
+ assert len(inlines) == 1
+ target = inlines[0]
+ name = normalize_name(target.astext())
+ target['names'].append(name)
+ self.document.note_explicit_target(target, self.parent)
+ return before, inlines, remaining, sysmessages
+
+ def substitution_reference(self, match, lineno):
+ before, inlines, remaining, sysmessages, endstring = self.inline_obj(
+ match, lineno, self.patterns.substitution_ref,
+ nodes.substitution_reference)
+ if len(inlines) == 1:
+ subref_node = inlines[0]
+ if isinstance(subref_node, nodes.substitution_reference):
+ subref_text = subref_node.astext()
+ self.document.note_substitution_ref(subref_node, subref_text)
+ if endstring[-1:] == '_':
+ reference_node = nodes.reference(
+ '|%s%s' % (subref_text, endstring), '')
+ if endstring[-2:] == '__':
+ reference_node['anonymous'] = 1
+ else:
+ reference_node['refname'] = normalize_name(subref_text)
+ self.document.note_refname(reference_node)
+ reference_node += subref_node
+ inlines = [reference_node]
+ return before, inlines, remaining, sysmessages
+
+ def footnote_reference(self, match, lineno):
+ """
+ Handles `nodes.footnote_reference` and `nodes.citation_reference`
+ elements.
+ """
+ label = match.group('footnotelabel')
+ refname = normalize_name(label)
+ string = match.string
+ before = string[:match.start('whole')]
+ remaining = string[match.end('whole'):]
+ if match.group('citationlabel'):
+ refnode = nodes.citation_reference('[%s]_' % label,
+ refname=refname)
+ refnode += nodes.Text(label)
+ self.document.note_citation_ref(refnode)
+ else:
+ refnode = nodes.footnote_reference('[%s]_' % label)
+ if refname[0] == '#':
+ refname = refname[1:]
+ refnode['auto'] = 1
+ self.document.note_autofootnote_ref(refnode)
+ elif refname == '*':
+ refname = ''
+ refnode['auto'] = '*'
+ self.document.note_symbol_footnote_ref(
+ refnode)
+ else:
+ refnode += nodes.Text(label)
+ if refname:
+ refnode['refname'] = refname
+ self.document.note_footnote_ref(refnode)
+ if utils.get_trim_footnote_ref_space(self.document.settings):
+ before = before.rstrip()
+ return (before, [refnode], remaining, [])
+
+ def reference(self, match, lineno, anonymous=None):
+ referencename = match.group('refname')
+ refname = normalize_name(referencename)
+ referencenode = nodes.reference(
+ referencename + match.group('refend'), referencename,
+ name=whitespace_normalize_name(referencename))
+ if anonymous:
+ referencenode['anonymous'] = 1
+ else:
+ referencenode['refname'] = refname
+ self.document.note_refname(referencenode)
+ string = match.string
+ matchstart = match.start('whole')
+ matchend = match.end('whole')
+ return (string[:matchstart], [referencenode], string[matchend:], [])
+
+ def anonymous_reference(self, match, lineno):
+ return self.reference(match, lineno, anonymous=1)
+
+ def standalone_uri(self, match, lineno):
+ if not match.group('scheme') or urischemes.schemes.has_key(
+ match.group('scheme').lower()):
+ if match.group('email'):
+ addscheme = 'mailto:'
+ else:
+ addscheme = ''
+ text = match.group('whole')
+ unescaped = unescape(text, 0)
+ return [nodes.reference(unescape(text, 1), unescaped,
+ refuri=addscheme + unescaped)]
+ else: # not a valid scheme
+ raise MarkupMismatch
+
+ pep_url = 'pep-%04d.html'
+
+ def pep_reference(self, match, lineno):
+ text = match.group(0)
+ if text.startswith('pep-'):
+ pepnum = int(match.group('pepnum1'))
+ elif text.startswith('PEP'):
+ pepnum = int(match.group('pepnum2'))
+ else:
+ raise MarkupMismatch
+ ref = self.document.settings.pep_base_url + self.pep_url % pepnum
+ unescaped = unescape(text, 0)
+ return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
+
+ rfc_url = 'rfc%d.html'
+
+ def rfc_reference(self, match, lineno):
+ text = match.group(0)
+ if text.startswith('RFC'):
+ rfcnum = int(match.group('rfcnum'))
+ ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
+ else:
+ raise MarkupMismatch
+ unescaped = unescape(text, 0)
+ return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
+
+ def implicit_inline(self, text, lineno):
+ """
+ Check each of the patterns in `self.implicit_dispatch` for a match,
+ and dispatch to the stored method for the pattern. Recursively check
+ the text before and after the match. Return a list of `nodes.Text`
+ and inline element nodes.
+ """
+ if not text:
+ return []
+ for pattern, method in self.implicit_dispatch:
+ match = pattern.search(text)
+ if match:
+ try:
+ # Must recurse on strings before *and* after the match;
+ # there may be multiple patterns.
+ return (self.implicit_inline(text[:match.start()], lineno)
+ + method(match, lineno) +
+ self.implicit_inline(text[match.end():], lineno))
+ except MarkupMismatch:
+ pass
+ return [nodes.Text(unescape(text), rawsource=unescape(text, 1))]
+
+ dispatch = {'*': emphasis,
+ '**': strong,
+ '`': interpreted_or_phrase_ref,
+ '``': literal,
+ '_`': inline_internal_target,
+ ']_': footnote_reference,
+ '|': substitution_reference,
+ '_': reference,
+ '__': anonymous_reference}
+
+
+def _loweralpha_to_int(s, _zero=(ord('a')-1)):
+ return ord(s) - _zero
+
+def _upperalpha_to_int(s, _zero=(ord('A')-1)):
+ return ord(s) - _zero
+
+def _lowerroman_to_int(s):
+ return roman.fromRoman(s.upper())
+
+
+class Body(RSTState):
+
+ """
+ Generic classifier of the first line of a block.
+ """
+
+ double_width_pad_char = tableparser.TableParser.double_width_pad_char
+ """Padding character for East Asian double-width text."""
+
+ enum = Struct()
+ """Enumerated list parsing information."""
+
+ enum.formatinfo = {
+ 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),
+ 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),
+ 'period': Struct(prefix='', suffix='.', start=0, end=-1)}
+ enum.formats = enum.formatinfo.keys()
+ enum.sequences = ['arabic', 'loweralpha', 'upperalpha',
+ 'lowerroman', 'upperroman'] # ORDERED!
+ enum.sequencepats = {'arabic': '[0-9]+',
+ 'loweralpha': '[a-z]',
+ 'upperalpha': '[A-Z]',
+ 'lowerroman': '[ivxlcdm]+',
+ 'upperroman': '[IVXLCDM]+',}
+ enum.converters = {'arabic': int,
+ 'loweralpha': _loweralpha_to_int,
+ 'upperalpha': _upperalpha_to_int,
+ 'lowerroman': _lowerroman_to_int,
+ 'upperroman': roman.fromRoman}
+
+ enum.sequenceregexps = {}
+ for sequence in enum.sequences:
+ enum.sequenceregexps[sequence] = re.compile(
+ enum.sequencepats[sequence] + '$')
+
+ grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')
+ """Matches the top (& bottom) of a full table)."""
+
+ simple_table_top_pat = re.compile('=+( +=+)+ *$')
+ """Matches the top of a simple table."""
+
+ simple_table_border_pat = re.compile('=+[ =]*$')
+ """Matches the bottom & header bottom of a simple table."""
+
+ pats = {}
+ """Fragments of patterns used by transitions."""
+
+ pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
+ pats['alpha'] = '[a-zA-Z]'
+ pats['alphanum'] = '[a-zA-Z0-9]'
+ pats['alphanumplus'] = '[a-zA-Z0-9_-]'
+ pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
+ '|%(upperroman)s|#)' % enum.sequencepats)
+ pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
+ # @@@ Loosen up the pattern? Allow Unicode?
+ pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
+ pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
+ pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
+ pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
+
+ for format in enum.formats:
+ pats[format] = '(?P<%s>%s%s%s)' % (
+ format, re.escape(enum.formatinfo[format].prefix),
+ pats['enum'], re.escape(enum.formatinfo[format].suffix))
+
+ patterns = {
+ 'bullet': r'[-+*]( +|$)',
+ 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
+ 'field_marker': r':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)',
+ 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,
+ 'doctest': r'>>>( +|$)',
+ 'line_block': r'\|( +|$)',
+ 'grid_table_top': grid_table_top_pat,
+ 'simple_table_top': simple_table_top_pat,
+ 'explicit_markup': r'\.\.( +|$)',
+ 'anonymous': r'__( +|$)',
+ 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,
+ 'text': r''}
+ initial_transitions = (
+ 'bullet',
+ 'enumerator',
+ 'field_marker',
+ 'option_marker',
+ 'doctest',
+ 'line_block',
+ 'grid_table_top',
+ 'simple_table_top',
+ 'explicit_markup',
+ 'anonymous',
+ 'line',
+ 'text')
+
+ def indent(self, match, context, next_state):
+ """Block quote."""
+ indented, indent, line_offset, blank_finish = \
+ self.state_machine.get_indented()
+ blockquote, messages = self.block_quote(indented, line_offset)
+ self.parent += blockquote
+ self.parent += messages
+ if not blank_finish:
+ self.parent += self.unindent_warning('Block quote')
+ return context, next_state, []
+
+ def block_quote(self, indented, line_offset):
+ blockquote_lines, attribution_lines, attribution_offset = \
+ self.check_attribution(indented, line_offset)
+ blockquote = nodes.block_quote()
+ self.nested_parse(blockquote_lines, line_offset, blockquote)
+ messages = []
+ if attribution_lines:
+ attribution, messages = self.parse_attribution(attribution_lines,
+ attribution_offset)
+ blockquote += attribution
+ return blockquote, messages
+
+ # u'\u2014' is an em-dash:
+ attribution_pattern = re.compile(ur'(---?(?!-)|\u2014) *(?=[^ \n])')
+
+ def check_attribution(self, indented, line_offset):
+ """
+ Check for an attribution in the last contiguous block of `indented`.
+
+ * First line after last blank line must begin with "--" (etc.).
+ * Every line after that must have consistent indentation.
+
+ Return a 3-tuple: (block quote lines, attribution lines,
+ attribution offset).
+ """
+ #import pdb ; pdb.set_trace()
+ blank = None
+ nonblank_seen = None
+ indent = 0
+ for i in range(len(indented) - 1, 0, -1): # don't check first line
+ this_line_blank = not indented[i].strip()
+ if nonblank_seen and this_line_blank:
+ match = self.attribution_pattern.match(indented[i + 1])
+ if match:
+ blank = i
+ break
+ elif not this_line_blank:
+ nonblank_seen = 1
+ if blank and len(indented) - blank > 2: # multi-line attribution
+ indent = (len(indented[blank + 2])
+ - len(indented[blank + 2].lstrip()))
+ for j in range(blank + 3, len(indented)):
+ if ( indented[j] # may be blank last line
+ and indent != (len(indented[j])
+ - len(indented[j].lstrip()))):
+ # bad shape
+ blank = None
+ break
+ if blank:
+ a_lines = indented[blank + 1:]
+ a_lines.trim_left(match.end(), end=1)
+ a_lines.trim_left(indent, start=1)
+ return (indented[:blank], a_lines, line_offset + blank + 1)
+ else:
+ return (indented, None, None)
+
+ def parse_attribution(self, indented, line_offset):
+ text = '\n'.join(indented).rstrip()
+ lineno = self.state_machine.abs_line_number() + line_offset
+ textnodes, messages = self.inline_text(text, lineno)
+ node = nodes.attribution(text, '', *textnodes)
+ node.line = lineno
+ return node, messages
+
+ def bullet(self, match, context, next_state):
+ """Bullet list item."""
+ bulletlist = nodes.bullet_list()
+ self.parent += bulletlist
+ bulletlist['bullet'] = match.string[0]
+ i, blank_finish = self.list_item(match.end())
+ bulletlist += i
+ offset = self.state_machine.line_offset + 1 # next line
+ new_line_offset, blank_finish = self.nested_list_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=self.state_machine.abs_line_offset() + 1,
+ node=bulletlist, initial_state='BulletList',
+ blank_finish=blank_finish)
+ self.goto_line(new_line_offset)
+ if not blank_finish:
+ self.parent += self.unindent_warning('Bullet list')
+ return [], next_state, []
+
+ def list_item(self, indent):
+ if self.state_machine.line[indent:]:
+ indented, line_offset, blank_finish = (
+ self.state_machine.get_known_indented(indent))
+ else:
+ indented, indent, line_offset, blank_finish = (
+ self.state_machine.get_first_known_indented(indent))
+ listitem = nodes.list_item('\n'.join(indented))
+ if indented:
+ self.nested_parse(indented, input_offset=line_offset,
+ node=listitem)
+ return listitem, blank_finish
+
+ def enumerator(self, match, context, next_state):
+ """Enumerated List Item"""
+ format, sequence, text, ordinal = self.parse_enumerator(match)
+ if not self.is_enumerated_list_item(ordinal, sequence, format):
+ raise statemachine.TransitionCorrection('text')
+ enumlist = nodes.enumerated_list()
+ self.parent += enumlist
+ if sequence == '#':
+ enumlist['enumtype'] = 'arabic'
+ else:
+ enumlist['enumtype'] = sequence
+ enumlist['prefix'] = self.enum.formatinfo[format].prefix
+ enumlist['suffix'] = self.enum.formatinfo[format].suffix
+ if ordinal != 1:
+ enumlist['start'] = ordinal
+ msg = self.reporter.info(
+ 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
+ % (text, ordinal), line=self.state_machine.abs_line_number())
+ self.parent += msg
+ listitem, blank_finish = self.list_item(match.end())
+ enumlist += listitem
+ offset = self.state_machine.line_offset + 1 # next line
+ newline_offset, blank_finish = self.nested_list_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=self.state_machine.abs_line_offset() + 1,
+ node=enumlist, initial_state='EnumeratedList',
+ blank_finish=blank_finish,
+ extra_settings={'lastordinal': ordinal,
+ 'format': format,
+ 'auto': sequence == '#'})
+ self.goto_line(newline_offset)
+ if not blank_finish:
+ self.parent += self.unindent_warning('Enumerated list')
+ return [], next_state, []
+
+ def parse_enumerator(self, match, expected_sequence=None):
+ """
+ Analyze an enumerator and return the results.
+
+ :Return:
+ - the enumerator format ('period', 'parens', or 'rparen'),
+ - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
+ - the text of the enumerator, stripped of formatting, and
+ - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
+ ``None`` is returned for invalid enumerator text).
+
+ The enumerator format has already been determined by the regular
+ expression match. If `expected_sequence` is given, that sequence is
+ tried first. If not, we check for Roman numeral 1. This way,
+ single-character Roman numerals (which are also alphabetical) can be
+ matched. If no sequence has been matched, all sequences are checked in
+ order.
+ """
+ groupdict = match.groupdict()
+ sequence = ''
+ for format in self.enum.formats:
+ if groupdict[format]: # was this the format matched?
+ break # yes; keep `format`
+ else: # shouldn't happen
+ raise ParserError('enumerator format not matched')
+ text = groupdict[format][self.enum.formatinfo[format].start
+ :self.enum.formatinfo[format].end]
+ if text == '#':
+ sequence = '#'
+ elif expected_sequence:
+ try:
+ if self.enum.sequenceregexps[expected_sequence].match(text):
+ sequence = expected_sequence
+ except KeyError: # shouldn't happen
+ raise ParserError('unknown enumerator sequence: %s'
+ % sequence)
+ elif text == 'i':
+ sequence = 'lowerroman'
+ elif text == 'I':
+ sequence = 'upperroman'
+ if not sequence:
+ for sequence in self.enum.sequences:
+ if self.enum.sequenceregexps[sequence].match(text):
+ break
+ else: # shouldn't happen
+ raise ParserError('enumerator sequence not matched')
+ if sequence == '#':
+ ordinal = 1
+ else:
+ try:
+ ordinal = self.enum.converters[sequence](text)
+ except roman.InvalidRomanNumeralError:
+ ordinal = None
+ return format, sequence, text, ordinal
+
+ def is_enumerated_list_item(self, ordinal, sequence, format):
+ """
+ Check validity based on the ordinal value and the second line.
+
+ Return true iff the ordinal is valid and the second line is blank,
+ indented, or starts with the next enumerator or an auto-enumerator.
+ """
+ if ordinal is None:
+ return None
+ try:
+ next_line = self.state_machine.next_line()
+ except EOFError: # end of input lines
+ self.state_machine.previous_line()
+ return 1
+ else:
+ self.state_machine.previous_line()
+ if not next_line[:1].strip(): # blank or indented
+ return 1
+ result = self.make_enumerator(ordinal + 1, sequence, format)
+ if result:
+ next_enumerator, auto_enumerator = result
+ try:
+ if ( next_line.startswith(next_enumerator) or
+ next_line.startswith(auto_enumerator) ):
+ return 1
+ except TypeError:
+ pass
+ return None
+
+ def make_enumerator(self, ordinal, sequence, format):
+ """
+ Construct and return the next enumerated list item marker, and an
+ auto-enumerator ("#" instead of the regular enumerator).
+
+ Return ``None`` for invalid (out of range) ordinals.
+ """ #"
+ if sequence == '#':
+ enumerator = '#'
+ elif sequence == 'arabic':
+ enumerator = str(ordinal)
+ else:
+ if sequence.endswith('alpha'):
+ if ordinal > 26:
+ return None
+ enumerator = chr(ordinal + ord('a') - 1)
+ elif sequence.endswith('roman'):
+ try:
+ enumerator = roman.toRoman(ordinal)
+ except roman.RomanError:
+ return None
+ else: # shouldn't happen
+ raise ParserError('unknown enumerator sequence: "%s"'
+ % sequence)
+ if sequence.startswith('lower'):
+ enumerator = enumerator.lower()
+ elif sequence.startswith('upper'):
+ enumerator = enumerator.upper()
+ else: # shouldn't happen
+ raise ParserError('unknown enumerator sequence: "%s"'
+ % sequence)
+ formatinfo = self.enum.formatinfo[format]
+ next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix
+ + ' ')
+ auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '
+ return next_enumerator, auto_enumerator
+
+ def field_marker(self, match, context, next_state):
+ """Field list item."""
+ field_list = nodes.field_list()
+ self.parent += field_list
+ field, blank_finish = self.field(match)
+ field_list += field
+ offset = self.state_machine.line_offset + 1 # next line
+ newline_offset, blank_finish = self.nested_list_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=self.state_machine.abs_line_offset() + 1,
+ node=field_list, initial_state='FieldList',
+ blank_finish=blank_finish)
+ self.goto_line(newline_offset)
+ if not blank_finish:
+ self.parent += self.unindent_warning('Field list')
+ return [], next_state, []
+
+ def field(self, match):
+ name = self.parse_field_marker(match)
+ lineno = self.state_machine.abs_line_number()
+ indented, indent, line_offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end())
+ field_node = nodes.field()
+ field_node.line = lineno
+ name_nodes, name_messages = self.inline_text(name, lineno)
+ field_node += nodes.field_name(name, '', *name_nodes)
+ field_body = nodes.field_body('\n'.join(indented), *name_messages)
+ field_node += field_body
+ if indented:
+ self.parse_field_body(indented, line_offset, field_body)
+ return field_node, blank_finish
+
+ def parse_field_marker(self, match):
+ """Extract & return field name from a field marker match."""
+ field = match.group()[1:] # strip off leading ':'
+ field = field[:field.rfind(':')] # strip off trailing ':' etc.
+ return field
+
+ def parse_field_body(self, indented, offset, node):
+ self.nested_parse(indented, input_offset=offset, node=node)
+
+ def option_marker(self, match, context, next_state):
+ """Option list item."""
+ optionlist = nodes.option_list()
+ try:
+ listitem, blank_finish = self.option_list_item(match)
+ except MarkupError, (message, lineno):
+ # This shouldn't happen; pattern won't match.
+ msg = self.reporter.error(
+ 'Invalid option list marker: %s' % message, line=lineno)
+ self.parent += msg
+ indented, indent, line_offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end())
+ blockquote, messages = self.block_quote(indented, line_offset)
+ self.parent += blockquote
+ self.parent += messages
+ if not blank_finish:
+ self.parent += self.unindent_warning('Option list')
+ return [], next_state, []
+ self.parent += optionlist
+ optionlist += listitem
+ offset = self.state_machine.line_offset + 1 # next line
+ newline_offset, blank_finish = self.nested_list_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=self.state_machine.abs_line_offset() + 1,
+ node=optionlist, initial_state='OptionList',
+ blank_finish=blank_finish)
+ self.goto_line(newline_offset)
+ if not blank_finish:
+ self.parent += self.unindent_warning('Option list')
+ return [], next_state, []
+
+ def option_list_item(self, match):
+ offset = self.state_machine.abs_line_offset()
+ options = self.parse_option_marker(match)
+ indented, indent, line_offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end())
+ if not indented: # not an option list item
+ self.goto_line(offset)
+ raise statemachine.TransitionCorrection('text')
+ option_group = nodes.option_group('', *options)
+ description = nodes.description('\n'.join(indented))
+ option_list_item = nodes.option_list_item('', option_group,
+ description)
+ if indented:
+ self.nested_parse(indented, input_offset=line_offset,
+ node=description)
+ return option_list_item, blank_finish
+
+ def parse_option_marker(self, match):
+ """
+ Return a list of `node.option` and `node.option_argument` objects,
+ parsed from an option marker match.
+
+ :Exception: `MarkupError` for invalid option markers.
+ """
+ optlist = []
+ optionstrings = match.group().rstrip().split(', ')
+ for optionstring in optionstrings:
+ tokens = optionstring.split()
+ delimiter = ' '
+ firstopt = tokens[0].split('=')
+ if len(firstopt) > 1:
+ # "--opt=value" form
+ tokens[:1] = firstopt
+ delimiter = '='
+ elif (len(tokens[0]) > 2
+ and ((tokens[0].startswith('-')
+ and not tokens[0].startswith('--'))
+ or tokens[0].startswith('+'))):
+ # "-ovalue" form
+ tokens[:1] = [tokens[0][:2], tokens[0][2:]]
+ delimiter = ''
+ if len(tokens) > 1 and (tokens[1].startswith('<')
+ and tokens[-1].endswith('>')):
+ # "-o <value1 value2>" form; join all values into one token
+ tokens[1:] = [' '.join(tokens[1:])]
+ if 0 < len(tokens) <= 2:
+ option = nodes.option(optionstring)
+ option += nodes.option_string(tokens[0], tokens[0])
+ if len(tokens) > 1:
+ option += nodes.option_argument(tokens[1], tokens[1],
+ delimiter=delimiter)
+ optlist.append(option)
+ else:
+ raise MarkupError(
+ 'wrong number of option tokens (=%s), should be 1 or 2: '
+ '"%s"' % (len(tokens), optionstring),
+ self.state_machine.abs_line_number() + 1)
+ return optlist
+
+ def doctest(self, match, context, next_state):
+ data = '\n'.join(self.state_machine.get_text_block())
+ self.parent += nodes.doctest_block(data, data)
+ return [], next_state, []
+
+ def line_block(self, match, context, next_state):
+ """First line of a line block."""
+ block = nodes.line_block()
+ self.parent += block
+ lineno = self.state_machine.abs_line_number()
+ line, messages, blank_finish = self.line_block_line(match, lineno)
+ block += line
+ self.parent += messages
+ if not blank_finish:
+ offset = self.state_machine.line_offset + 1 # next line
+ new_line_offset, blank_finish = self.nested_list_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=self.state_machine.abs_line_offset() + 1,
+ node=block, initial_state='LineBlock',
+ blank_finish=0)
+ self.goto_line(new_line_offset)
+ if not blank_finish:
+ self.parent += self.reporter.warning(
+ 'Line block ends without a blank line.',
+ line=(self.state_machine.abs_line_number() + 1))
+ if len(block):
+ if block[0].indent is None:
+ block[0].indent = 0
+ self.nest_line_block_lines(block)
+ return [], next_state, []
+
+ def line_block_line(self, match, lineno):
+ """Return one line element of a line_block."""
+ indented, indent, line_offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end(),
+ until_blank=1)
+ text = u'\n'.join(indented)
+ text_nodes, messages = self.inline_text(text, lineno)
+ line = nodes.line(text, '', *text_nodes)
+ if match.string.rstrip() != '|': # not empty
+ line.indent = len(match.group(1)) - 1
+ return line, messages, blank_finish
+
+ def nest_line_block_lines(self, block):
+ for index in range(1, len(block)):
+ if block[index].indent is None:
+ block[index].indent = block[index - 1].indent
+ self.nest_line_block_segment(block)
+
+ def nest_line_block_segment(self, block):
+ indents = [item.indent for item in block]
+ least = min(indents)
+ new_items = []
+ new_block = nodes.line_block()
+ for item in block:
+ if item.indent > least:
+ new_block.append(item)
+ else:
+ if len(new_block):
+ self.nest_line_block_segment(new_block)
+ new_items.append(new_block)
+ new_block = nodes.line_block()
+ new_items.append(item)
+ if len(new_block):
+ self.nest_line_block_segment(new_block)
+ new_items.append(new_block)
+ block[:] = new_items
+
+ def grid_table_top(self, match, context, next_state):
+ """Top border of a full table."""
+ return self.table_top(match, context, next_state,
+ self.isolate_grid_table,
+ tableparser.GridTableParser)
+
+ def simple_table_top(self, match, context, next_state):
+ """Top border of a simple table."""
+ return self.table_top(match, context, next_state,
+ self.isolate_simple_table,
+ tableparser.SimpleTableParser)
+
+ def table_top(self, match, context, next_state,
+ isolate_function, parser_class):
+ """Top border of a generic table."""
+ nodelist, blank_finish = self.table(isolate_function, parser_class)
+ self.parent += nodelist
+ if not blank_finish:
+ msg = self.reporter.warning(
+ 'Blank line required after table.',
+ line=self.state_machine.abs_line_number() + 1)
+ self.parent += msg
+ return [], next_state, []
+
+ def table(self, isolate_function, parser_class):
+ """Parse a table."""
+ block, messages, blank_finish = isolate_function()
+ if block:
+ try:
+ parser = parser_class()
+ tabledata = parser.parse(block)
+ tableline = (self.state_machine.abs_line_number() - len(block)
+ + 1)
+ table = self.build_table(tabledata, tableline)
+ nodelist = [table] + messages
+ except tableparser.TableMarkupError, detail:
+ nodelist = self.malformed_table(
+ block, ' '.join(detail.args)) + messages
+ else:
+ nodelist = messages
+ return nodelist, blank_finish
+
+ def isolate_grid_table(self):
+ messages = []
+ blank_finish = 1
+ try:
+ block = self.state_machine.get_text_block(flush_left=1)
+ except statemachine.UnexpectedIndentationError, instance:
+ block, source, lineno = instance.args
+ messages.append(self.reporter.error('Unexpected indentation.',
+ source=source, line=lineno))
+ blank_finish = 0
+ block.disconnect()
+ # for East Asian chars:
+ block.pad_double_width(self.double_width_pad_char)
+ width = len(block[0].strip())
+ for i in range(len(block)):
+ block[i] = block[i].strip()
+ if block[i][0] not in '+|': # check left edge
+ blank_finish = 0
+ self.state_machine.previous_line(len(block) - i)
+ del block[i:]
+ break
+ if not self.grid_table_top_pat.match(block[-1]): # find bottom
+ blank_finish = 0
+ # from second-last to third line of table:
+ for i in range(len(block) - 2, 1, -1):
+ if self.grid_table_top_pat.match(block[i]):
+ self.state_machine.previous_line(len(block) - i + 1)
+ del block[i+1:]
+ break
+ else:
+ messages.extend(self.malformed_table(block))
+ return [], messages, blank_finish
+ for i in range(len(block)): # check right edge
+ if len(block[i]) != width or block[i][-1] not in '+|':
+ messages.extend(self.malformed_table(block))
+ return [], messages, blank_finish
+ return block, messages, blank_finish
+
+ def isolate_simple_table(self):
+ start = self.state_machine.line_offset
+ lines = self.state_machine.input_lines
+ limit = len(lines) - 1
+ toplen = len(lines[start].strip())
+ pattern_match = self.simple_table_border_pat.match
+ found = 0
+ found_at = None
+ i = start + 1
+ while i <= limit:
+ line = lines[i]
+ match = pattern_match(line)
+ if match:
+ if len(line.strip()) != toplen:
+ self.state_machine.next_line(i - start)
+ messages = self.malformed_table(
+ lines[start:i+1], 'Bottom/header table border does '
+ 'not match top border.')
+ return [], messages, i == limit or not lines[i+1].strip()
+ found += 1
+ found_at = i
+ if found == 2 or i == limit or not lines[i+1].strip():
+ end = i
+ break
+ i += 1
+ else: # reached end of input_lines
+ if found:
+ extra = ' or no blank line after table bottom'
+ self.state_machine.next_line(found_at - start)
+ block = lines[start:found_at+1]
+ else:
+ extra = ''
+ self.state_machine.next_line(i - start - 1)
+ block = lines[start:]
+ messages = self.malformed_table(
+ block, 'No bottom table border found%s.' % extra)
+ return [], messages, not extra
+ self.state_machine.next_line(end - start)
+ block = lines[start:end+1]
+ # for East Asian chars:
+ block.pad_double_width(self.double_width_pad_char)
+ return block, [], end == limit or not lines[end+1].strip()
+
+ def malformed_table(self, block, detail=''):
+ block.replace(self.double_width_pad_char, '')
+ data = '\n'.join(block)
+ message = 'Malformed table.'
+ lineno = self.state_machine.abs_line_number() - len(block) + 1
+ if detail:
+ message += '\n' + detail
+ error = self.reporter.error(message, nodes.literal_block(data, data),
+ line=lineno)
+ return [error]
+
+ def build_table(self, tabledata, tableline, stub_columns=0):
+ colwidths, headrows, bodyrows = tabledata
+ table = nodes.table()
+ tgroup = nodes.tgroup(cols=len(colwidths))
+ table += tgroup
+ for colwidth in colwidths:
+ colspec = nodes.colspec(colwidth=colwidth)
+ if stub_columns:
+ colspec.attributes['stub'] = 1
+ stub_columns -= 1
+ tgroup += colspec
+ if headrows:
+ thead = nodes.thead()
+ tgroup += thead
+ for row in headrows:
+ thead += self.build_table_row(row, tableline)
+ tbody = nodes.tbody()
+ tgroup += tbody
+ for row in bodyrows:
+ tbody += self.build_table_row(row, tableline)
+ return table
+
+ def build_table_row(self, rowdata, tableline):
+ row = nodes.row()
+ for cell in rowdata:
+ if cell is None:
+ continue
+ morerows, morecols, offset, cellblock = cell
+ attributes = {}
+ if morerows:
+ attributes['morerows'] = morerows
+ if morecols:
+ attributes['morecols'] = morecols
+ entry = nodes.entry(**attributes)
+ row += entry
+ if ''.join(cellblock):
+ self.nested_parse(cellblock, input_offset=tableline+offset,
+ node=entry)
+ return row
+
+
+ explicit = Struct()
+ """Patterns and constants used for explicit markup recognition."""
+
+ explicit.patterns = Struct(
+ target=re.compile(r"""
+ (
+ _ # anonymous target
+ | # *OR*
+ (?P<quote>`?) # optional open quote
+ (?![ `]) # first char. not space or
+ # backquote
+ (?P<name> # reference name
+ .+?
+ )
+ %(non_whitespace_escape_before)s
+ (?P=quote) # close quote if open quote used
+ )
+ (?<!(?<!\x00):) # no unescaped colon at end
+ %(non_whitespace_escape_before)s
+ [ ]? # optional space
+ : # end of reference name
+ ([ ]+|$) # followed by whitespace
+ """ % vars(Inliner), re.VERBOSE),
+ reference=re.compile(r"""
+ (
+ (?P<simple>%(simplename)s)_
+ | # *OR*
+ ` # open backquote
+ (?![ ]) # not space
+ (?P<phrase>.+?) # hyperlink phrase
+ %(non_whitespace_escape_before)s
+ `_ # close backquote,
+ # reference mark
+ )
+ $ # end of string
+ """ % vars(Inliner), re.VERBOSE | re.UNICODE),
+ substitution=re.compile(r"""
+ (
+ (?![ ]) # first char. not space
+ (?P<name>.+?) # substitution text
+ %(non_whitespace_escape_before)s
+ \| # close delimiter
+ )
+ ([ ]+|$) # followed by whitespace
+ """ % vars(Inliner), re.VERBOSE),)
+
+ def footnote(self, match):
+ lineno = self.state_machine.abs_line_number()
+ indented, indent, offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end())
+ label = match.group(1)
+ name = normalize_name(label)
+ footnote = nodes.footnote('\n'.join(indented))
+ footnote.line = lineno
+ if name[0] == '#': # auto-numbered
+ name = name[1:] # autonumber label
+ footnote['auto'] = 1
+ if name:
+ footnote['names'].append(name)
+ self.document.note_autofootnote(footnote)
+ elif name == '*': # auto-symbol
+ name = ''
+ footnote['auto'] = '*'
+ self.document.note_symbol_footnote(footnote)
+ else: # manually numbered
+ footnote += nodes.label('', label)
+ footnote['names'].append(name)
+ self.document.note_footnote(footnote)
+ if name:
+ self.document.note_explicit_target(footnote, footnote)
+ else:
+ self.document.set_id(footnote, footnote)
+ if indented:
+ self.nested_parse(indented, input_offset=offset, node=footnote)
+ return [footnote], blank_finish
+
+ def citation(self, match):
+ lineno = self.state_machine.abs_line_number()
+ indented, indent, offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end())
+ label = match.group(1)
+ name = normalize_name(label)
+ citation = nodes.citation('\n'.join(indented))
+ citation.line = lineno
+ citation += nodes.label('', label)
+ citation['names'].append(name)
+ self.document.note_citation(citation)
+ self.document.note_explicit_target(citation, citation)
+ if indented:
+ self.nested_parse(indented, input_offset=offset, node=citation)
+ return [citation], blank_finish
+
+ def hyperlink_target(self, match):
+ pattern = self.explicit.patterns.target
+ lineno = self.state_machine.abs_line_number()
+ block, indent, offset, blank_finish = \
+ self.state_machine.get_first_known_indented(
+ match.end(), until_blank=1, strip_indent=0)
+ blocktext = match.string[:match.end()] + '\n'.join(block)
+ block = [escape2null(line) for line in block]
+ escaped = block[0]
+ blockindex = 0
+ while 1:
+ targetmatch = pattern.match(escaped)
+ if targetmatch:
+ break
+ blockindex += 1
+ try:
+ escaped += block[blockindex]
+ except IndexError:
+ raise MarkupError('malformed hyperlink target.', lineno)
+ del block[:blockindex]
+ block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()
+ target = self.make_target(block, blocktext, lineno,
+ targetmatch.group('name'))
+ return [target], blank_finish
+
+ def make_target(self, block, block_text, lineno, target_name):
+ target_type, data = self.parse_target(block, block_text, lineno)
+ if target_type == 'refname':
+ target = nodes.target(block_text, '', refname=normalize_name(data))
+ target.indirect_reference_name = data
+ self.add_target(target_name, '', target, lineno)
+ self.document.note_indirect_target(target)
+ return target
+ elif target_type == 'refuri':
+ target = nodes.target(block_text, '')
+ self.add_target(target_name, data, target, lineno)
+ return target
+ else:
+ return data
+
+ def parse_target(self, block, block_text, lineno):
+ """
+ Determine the type of reference of a target.
+
+ :Return: A 2-tuple, one of:
+
+ - 'refname' and the indirect reference name
+ - 'refuri' and the URI
+ - 'malformed' and a system_message node
+ """
+ if block and block[-1].strip()[-1:] == '_': # possible indirect target
+ reference = ' '.join([line.strip() for line in block])
+ refname = self.is_reference(reference)
+ if refname:
+ return 'refname', refname
+ reference = ''.join([''.join(line.split()) for line in block])
+ return 'refuri', unescape(reference)
+
+ def is_reference(self, reference):
+ match = self.explicit.patterns.reference.match(
+ whitespace_normalize_name(reference))
+ if not match:
+ return None
+ return unescape(match.group('simple') or match.group('phrase'))
+
+ def add_target(self, targetname, refuri, target, lineno):
+ target.line = lineno
+ if targetname:
+ name = normalize_name(unescape(targetname))
+ target['names'].append(name)
+ if refuri:
+ uri = self.inliner.adjust_uri(refuri)
+ if uri:
+ target['refuri'] = uri
+ else:
+ raise ApplicationError('problem with URI: %r' % refuri)
+ self.document.note_explicit_target(target, self.parent)
+ else: # anonymous target
+ if refuri:
+ target['refuri'] = refuri
+ target['anonymous'] = 1
+ self.document.note_anonymous_target(target)
+
+ def substitution_def(self, match):
+ pattern = self.explicit.patterns.substitution
+ lineno = self.state_machine.abs_line_number()
+ block, indent, offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end(),
+ strip_indent=0)
+ blocktext = (match.string[:match.end()] + '\n'.join(block))
+ block.disconnect()
+ escaped = escape2null(block[0].rstrip())
+ blockindex = 0
+ while 1:
+ subdefmatch = pattern.match(escaped)
+ if subdefmatch:
+ break
+ blockindex += 1
+ try:
+ escaped = escaped + ' ' + escape2null(block[blockindex].strip())
+ except IndexError:
+ raise MarkupError('malformed substitution definition.',
+ lineno)
+ del block[:blockindex] # strip out the substitution marker
+ block[0] = (block[0].strip() + ' ')[subdefmatch.end()-len(escaped)-1:-1]
+ if not block[0]:
+ del block[0]
+ offset += 1
+ while block and not block[-1].strip():
+ block.pop()
+ subname = subdefmatch.group('name')
+ substitution_node = nodes.substitution_definition(blocktext)
+ substitution_node.line = lineno
+ if not block:
+ msg = self.reporter.warning(
+ 'Substitution definition "%s" missing contents.' % subname,
+ nodes.literal_block(blocktext, blocktext), line=lineno)
+ return [msg], blank_finish
+ block[0] = block[0].strip()
+ substitution_node['names'].append(
+ nodes.whitespace_normalize_name(subname))
+ new_abs_offset, blank_finish = self.nested_list_parse(
+ block, input_offset=offset, node=substitution_node,
+ initial_state='SubstitutionDef', blank_finish=blank_finish)
+ i = 0
+ for node in substitution_node[:]:
+ if not (isinstance(node, nodes.Inline) or
+ isinstance(node, nodes.Text)):
+ self.parent += substitution_node[i]
+ del substitution_node[i]
+ else:
+ i += 1
+ for node in substitution_node.traverse(nodes.Element):
+ if self.disallowed_inside_substitution_definitions(node):
+ pformat = nodes.literal_block('', node.pformat().rstrip())
+ msg = self.reporter.error(
+ 'Substitution definition contains illegal element:',
+ pformat, nodes.literal_block(blocktext, blocktext),
+ line=lineno)
+ return [msg], blank_finish
+ if len(substitution_node) == 0:
+ msg = self.reporter.warning(
+ 'Substitution definition "%s" empty or invalid.'
+ % subname,
+ nodes.literal_block(blocktext, blocktext), line=lineno)
+ return [msg], blank_finish
+ self.document.note_substitution_def(
+ substitution_node, subname, self.parent)
+ return [substitution_node], blank_finish
+
+ def disallowed_inside_substitution_definitions(self, node):
+ if (node['ids'] or
+ isinstance(node, nodes.reference) and node.get('anonymous') or
+ isinstance(node, nodes.footnote_reference) and node.get('auto')):
+ return 1
+ else:
+ return 0
+
+ def directive(self, match, **option_presets):
+ """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
+ type_name = match.group(1)
+ directive_function, messages = directives.directive(
+ type_name, self.memo.language, self.document)
+ self.parent += messages
+ if directive_function:
+ return self.run_directive(
+ directive_function, match, type_name, option_presets)
+ else:
+ return self.unknown_directive(type_name)
+
+ def run_directive(self, directive_fn, match, type_name, option_presets):
+ """
+ Parse a directive then run its directive function.
+
+ Parameters:
+
+ - `directive_fn`: The function implementing the directive. Uses
+ function attributes ``arguments``, ``options``, and/or ``content``
+ if present.
+
+ - `match`: A regular expression match object which matched the first
+ line of the directive.
+
+ - `type_name`: The directive name, as used in the source text.
+
+ - `option_presets`: A dictionary of preset options, defaults for the
+ directive options. Currently, only an "alt" option is passed by
+ substitution definitions (value: the substitution name), which may
+ be used by an embedded image directive.
+
+ Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
+ """
+ lineno = self.state_machine.abs_line_number()
+ initial_line_offset = self.state_machine.line_offset
+ indented, indent, line_offset, blank_finish \
+ = self.state_machine.get_first_known_indented(match.end(),
+ strip_top=0)
+ block_text = '\n'.join(self.state_machine.input_lines[
+ initial_line_offset : self.state_machine.line_offset + 1])
+ try:
+ arguments, options, content, content_offset = (
+ self.parse_directive_block(indented, line_offset,
+ directive_fn, option_presets))
+ except MarkupError, detail:
+ error = self.reporter.error(
+ 'Error in "%s" directive:\n%s.' % (type_name,
+ ' '.join(detail.args)),
+ nodes.literal_block(block_text, block_text), line=lineno)
+ return [error], blank_finish
+ result = directive_fn(type_name, arguments, options, content, lineno,
+ content_offset, block_text, self,
+ self.state_machine)
+ return (result,
+ blank_finish or self.state_machine.is_next_line_blank())
+
+ def parse_directive_block(self, indented, line_offset, directive_fn,
+ option_presets):
+ arguments = []
+ options = {}
+ argument_spec = getattr(directive_fn, 'arguments', None)
+ if argument_spec and argument_spec[:2] == (0, 0):
+ argument_spec = None
+ option_spec = getattr(directive_fn, 'options', None)
+ content_spec = getattr(directive_fn, 'content', None)
+ if indented and not indented[0].strip():
+ indented.trim_start()
+ line_offset += 1
+ while indented and not indented[-1].strip():
+ indented.trim_end()
+ if indented and (argument_spec or option_spec):
+ for i in range(len(indented)):
+ if not indented[i].strip():
+ break
+ else:
+ i += 1
+ arg_block = indented[:i]
+ content = indented[i+1:]
+ content_offset = line_offset + i + 1
+ else:
+ content = indented
+ content_offset = line_offset
+ arg_block = []
+ while content and not content[0].strip():
+ content.trim_start()
+ content_offset += 1
+ if option_spec:
+ options, arg_block = self.parse_directive_options(
+ option_presets, option_spec, arg_block)
+ if arg_block and not argument_spec:
+ raise MarkupError('no arguments permitted; blank line '
+ 'required before content block')
+ if argument_spec:
+ arguments = self.parse_directive_arguments(
+ argument_spec, arg_block)
+ if content and not content_spec:
+ raise MarkupError('no content permitted')
+ return (arguments, options, content, content_offset)
+
+ def parse_directive_options(self, option_presets, option_spec, arg_block):
+ options = option_presets.copy()
+ for i in range(len(arg_block)):
+ if arg_block[i][:1] == ':':
+ opt_block = arg_block[i:]
+ arg_block = arg_block[:i]
+ break
+ else:
+ opt_block = []
+ if opt_block:
+ success, data = self.parse_extension_options(option_spec,
+ opt_block)
+ if success: # data is a dict of options
+ options.update(data)
+ else: # data is an error string
+ raise MarkupError(data)
+ return options, arg_block
+
+ def parse_directive_arguments(self, argument_spec, arg_block):
+ required, optional, last_whitespace = argument_spec
+ arg_text = '\n'.join(arg_block)
+ arguments = arg_text.split()
+ if len(arguments) < required:
+ raise MarkupError('%s argument(s) required, %s supplied'
+ % (required, len(arguments)))
+ elif len(arguments) > required + optional:
+ if last_whitespace:
+ arguments = arg_text.split(None, required + optional - 1)
+ else:
+ raise MarkupError(
+ 'maximum %s argument(s) allowed, %s supplied'
+ % (required + optional, len(arguments)))
+ return arguments
+
+ def parse_extension_options(self, option_spec, datalines):
+ """
+ Parse `datalines` for a field list containing extension options
+ matching `option_spec`.
+
+ :Parameters:
+ - `option_spec`: a mapping of option name to conversion
+ function, which should raise an exception on bad input.
+ - `datalines`: a list of input strings.
+
+ :Return:
+ - Success value, 1 or 0.
+ - An option dictionary on success, an error string on failure.
+ """
+ node = nodes.field_list()
+ newline_offset, blank_finish = self.nested_list_parse(
+ datalines, 0, node, initial_state='ExtensionOptions',
+ blank_finish=1)
+ if newline_offset != len(datalines): # incomplete parse of block
+ return 0, 'invalid option block'
+ try:
+ options = utils.extract_extension_options(node, option_spec)
+ except KeyError, detail:
+ return 0, ('unknown option: "%s"' % detail.args[0])
+ except (ValueError, TypeError), detail:
+ return 0, ('invalid option value: %s' % ' '.join(detail.args))
+ except utils.ExtensionOptionError, detail:
+ return 0, ('invalid option data: %s' % ' '.join(detail.args))
+ if blank_finish:
+ return 1, options
+ else:
+ return 0, 'option data incompletely parsed'
+
+ def unknown_directive(self, type_name):
+ lineno = self.state_machine.abs_line_number()
+ indented, indent, offset, blank_finish = \
+ self.state_machine.get_first_known_indented(0, strip_indent=0)
+ text = '\n'.join(indented)
+ error = self.reporter.error(
+ 'Unknown directive type "%s".' % type_name,
+ nodes.literal_block(text, text), line=lineno)
+ return [error], blank_finish
+
+ def comment(self, match):
+ if not match.string[match.end():].strip() \
+ and self.state_machine.is_next_line_blank(): # an empty comment?
+ return [nodes.comment()], 1 # "A tiny but practical wart."
+ indented, indent, offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end())
+ while indented and not indented[-1].strip():
+ indented.trim_end()
+ text = '\n'.join(indented)
+ return [nodes.comment(text, text)], blank_finish
+
+ explicit.constructs = [
+ (footnote,
+ re.compile(r"""
+ \.\.[ ]+ # explicit markup start
+ \[
+ ( # footnote label:
+ [0-9]+ # manually numbered footnote
+ | # *OR*
+ \# # anonymous auto-numbered footnote
+ | # *OR*
+ \#%s # auto-number ed?) footnote label
+ | # *OR*
+ \* # auto-symbol footnote
+ )
+ \]
+ ([ ]+|$) # whitespace or end of line
+ """ % Inliner.simplename, re.VERBOSE | re.UNICODE)),
+ (citation,
+ re.compile(r"""
+ \.\.[ ]+ # explicit markup start
+ \[(%s)\] # citation label
+ ([ ]+|$) # whitespace or end of line
+ """ % Inliner.simplename, re.VERBOSE | re.UNICODE)),
+ (hyperlink_target,
+ re.compile(r"""
+ \.\.[ ]+ # explicit markup start
+ _ # target indicator
+ (?![ ]|$) # first char. not space or EOL
+ """, re.VERBOSE)),
+ (substitution_def,
+ re.compile(r"""
+ \.\.[ ]+ # explicit markup start
+ \| # substitution indicator
+ (?![ ]|$) # first char. not space or EOL
+ """, re.VERBOSE)),
+ (directive,
+ re.compile(r"""
+ \.\.[ ]+ # explicit markup start
+ (%s) # directive name
+ [ ]? # optional space
+ :: # directive delimiter
+ ([ ]+|$) # whitespace or end of line
+ """ % Inliner.simplename, re.VERBOSE | re.UNICODE))]
+
+ def explicit_markup(self, match, context, next_state):
+ """Footnotes, hyperlink targets, directives, comments."""
+ nodelist, blank_finish = self.explicit_construct(match)
+ self.parent += nodelist
+ self.explicit_list(blank_finish)
+ return [], next_state, []
+
+ def explicit_construct(self, match):
+ """Determine which explicit construct this is, parse & return it."""
+ errors = []
+ for method, pattern in self.explicit.constructs:
+ expmatch = pattern.match(match.string)
+ if expmatch:
+ try:
+ return method(self, expmatch)
+ except MarkupError, (message, lineno): # never reached?
+ errors.append(self.reporter.warning(message, line=lineno))
+ break
+ nodelist, blank_finish = self.comment(match)
+ return nodelist + errors, blank_finish
+
+ def explicit_list(self, blank_finish):
+ """
+ Create a nested state machine for a series of explicit markup
+ constructs (including anonymous hyperlink targets).
+ """
+ offset = self.state_machine.line_offset + 1 # next line
+ newline_offset, blank_finish = self.nested_list_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=self.state_machine.abs_line_offset() + 1,
+ node=self.parent, initial_state='Explicit',
+ blank_finish=blank_finish,
+ match_titles=self.state_machine.match_titles)
+ self.goto_line(newline_offset)
+ if not blank_finish:
+ self.parent += self.unindent_warning('Explicit markup')
+
+ def anonymous(self, match, context, next_state):
+ """Anonymous hyperlink targets."""
+ nodelist, blank_finish = self.anonymous_target(match)
+ self.parent += nodelist
+ self.explicit_list(blank_finish)
+ return [], next_state, []
+
+ def anonymous_target(self, match):
+ lineno = self.state_machine.abs_line_number()
+ block, indent, offset, blank_finish \
+ = self.state_machine.get_first_known_indented(match.end(),
+ until_blank=1)
+ blocktext = match.string[:match.end()] + '\n'.join(block)
+ block = [escape2null(line) for line in block]
+ target = self.make_target(block, blocktext, lineno, '')
+ return [target], blank_finish
+
+ def line(self, match, context, next_state):
+ """Section title overline or transition marker."""
+ if self.state_machine.match_titles:
+ return [match.string], 'Line', []
+ elif match.string.strip() == '::':
+ raise statemachine.TransitionCorrection('text')
+ elif len(match.string.strip()) < 4:
+ msg = self.reporter.info(
+ 'Unexpected possible title overline or transition.\n'
+ "Treating it as ordinary text because it's so short.",
+ line=self.state_machine.abs_line_number())
+ self.parent += msg
+ raise statemachine.TransitionCorrection('text')
+ else:
+ blocktext = self.state_machine.line
+ msg = self.reporter.severe(
+ 'Unexpected section title or transition.',
+ nodes.literal_block(blocktext, blocktext),
+ line=self.state_machine.abs_line_number())
+ self.parent += msg
+ return [], next_state, []
+
+ def text(self, match, context, next_state):
+ """Titles, definition lists, paragraphs."""
+ return [match.string], 'Text', []
+
+
+class RFC2822Body(Body):
+
+ """
+ RFC2822 headers are only valid as the first constructs in documents. As
+ soon as anything else appears, the `Body` state should take over.
+ """
+
+ patterns = Body.patterns.copy() # can't modify the original
+ patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'
+ initial_transitions = [(name, 'Body')
+ for name in Body.initial_transitions]
+ initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'
+
+ def rfc2822(self, match, context, next_state):
+ """RFC2822-style field list item."""
+ fieldlist = nodes.field_list(classes=['rfc2822'])
+ self.parent += fieldlist
+ field, blank_finish = self.rfc2822_field(match)
+ fieldlist += field
+ offset = self.state_machine.line_offset + 1 # next line
+ newline_offset, blank_finish = self.nested_list_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=self.state_machine.abs_line_offset() + 1,
+ node=fieldlist, initial_state='RFC2822List',
+ blank_finish=blank_finish)
+ self.goto_line(newline_offset)
+ if not blank_finish:
+ self.parent += self.unindent_warning(
+ 'RFC2822-style field list')
+ return [], next_state, []
+
+ def rfc2822_field(self, match):
+ name = match.string[:match.string.find(':')]
+ indented, indent, line_offset, blank_finish = \
+ self.state_machine.get_first_known_indented(match.end(),
+ until_blank=1)
+ fieldnode = nodes.field()
+ fieldnode += nodes.field_name(name, name)
+ fieldbody = nodes.field_body('\n'.join(indented))
+ fieldnode += fieldbody
+ if indented:
+ self.nested_parse(indented, input_offset=line_offset,
+ node=fieldbody)
+ return fieldnode, blank_finish
+
+
+class SpecializedBody(Body):
+
+ """
+ Superclass for second and subsequent compound element members. Compound
+ elements are lists and list-like constructs.
+
+ All transition methods are disabled (redefined as `invalid_input`).
+ Override individual methods in subclasses to re-enable.
+
+ For example, once an initial bullet list item, say, is recognized, the
+ `BulletList` subclass takes over, with a "bullet_list" node as its
+ container. Upon encountering the initial bullet list item, `Body.bullet`
+ calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
+ starts up a nested parsing session with `BulletList` as the initial state.
+ Only the ``bullet`` transition method is enabled in `BulletList`; as long
+ as only bullet list items are encountered, they are parsed and inserted
+ into the container. The first construct which is *not* a bullet list item
+ triggers the `invalid_input` method, which ends the nested parse and
+ closes the container. `BulletList` needs to recognize input that is
+ invalid in the context of a bullet list, which means everything *other
+ than* bullet list items, so it inherits the transition list created in
+ `Body`.
+ """
+
+ def invalid_input(self, match=None, context=None, next_state=None):
+ """Not a compound element member. Abort this state machine."""
+ self.state_machine.previous_line() # back up so parent SM can reassess
+ raise EOFError
+
+ indent = invalid_input
+ bullet = invalid_input
+ enumerator = invalid_input
+ field_marker = invalid_input
+ option_marker = invalid_input
+ doctest = invalid_input
+ line_block = invalid_input
+ grid_table_top = invalid_input
+ simple_table_top = invalid_input
+ explicit_markup = invalid_input
+ anonymous = invalid_input
+ line = invalid_input
+ text = invalid_input
+
+
+class BulletList(SpecializedBody):
+
+ """Second and subsequent bullet_list list_items."""
+
+ def bullet(self, match, context, next_state):
+ """Bullet list item."""
+ if match.string[0] != self.parent['bullet']:
+ # different bullet: new list
+ self.invalid_input()
+ listitem, blank_finish = self.list_item(match.end())
+ self.parent += listitem
+ self.blank_finish = blank_finish
+ return [], next_state, []
+
+
+class DefinitionList(SpecializedBody):
+
+ """Second and subsequent definition_list_items."""
+
+ def text(self, match, context, next_state):
+ """Definition lists."""
+ return [match.string], 'Definition', []
+
+
+class EnumeratedList(SpecializedBody):
+
+ """Second and subsequent enumerated_list list_items."""
+
+ def enumerator(self, match, context, next_state):
+ """Enumerated list item."""
+ format, sequence, text, ordinal = self.parse_enumerator(
+ match, self.parent['enumtype'])
+ if ( format != self.format
+ or (sequence != '#' and (sequence != self.parent['enumtype']
+ or self.auto
+ or ordinal != (self.lastordinal + 1)))
+ or not self.is_enumerated_list_item(ordinal, sequence, format)):
+ # different enumeration: new list
+ self.invalid_input()
+ if sequence == '#':
+ self.auto = 1
+ listitem, blank_finish = self.list_item(match.end())
+ self.parent += listitem
+ self.blank_finish = blank_finish
+ self.lastordinal = ordinal
+ return [], next_state, []
+
+
+class FieldList(SpecializedBody):
+
+ """Second and subsequent field_list fields."""
+
+ def field_marker(self, match, context, next_state):
+ """Field list field."""
+ field, blank_finish = self.field(match)
+ self.parent += field
+ self.blank_finish = blank_finish
+ return [], next_state, []
+
+
+class OptionList(SpecializedBody):
+
+ """Second and subsequent option_list option_list_items."""
+
+ def option_marker(self, match, context, next_state):
+ """Option list item."""
+ try:
+ option_list_item, blank_finish = self.option_list_item(match)
+ except MarkupError, (message, lineno):
+ self.invalid_input()
+ self.parent += option_list_item
+ self.blank_finish = blank_finish
+ return [], next_state, []
+
+
+class RFC2822List(SpecializedBody, RFC2822Body):
+
+ """Second and subsequent RFC2822-style field_list fields."""
+
+ patterns = RFC2822Body.patterns
+ initial_transitions = RFC2822Body.initial_transitions
+
+ def rfc2822(self, match, context, next_state):
+ """RFC2822-style field list item."""
+ field, blank_finish = self.rfc2822_field(match)
+ self.parent += field
+ self.blank_finish = blank_finish
+ return [], 'RFC2822List', []
+
+ blank = SpecializedBody.invalid_input
+
+
+class ExtensionOptions(FieldList):
+
+ """
+ Parse field_list fields for extension options.
+
+ No nested parsing is done (including inline markup parsing).
+ """
+
+ def parse_field_body(self, indented, offset, node):
+ """Override `Body.parse_field_body` for simpler parsing."""
+ lines = []
+ for line in list(indented) + ['']:
+ if line.strip():
+ lines.append(line)
+ elif lines:
+ text = '\n'.join(lines)
+ node += nodes.paragraph(text, text)
+ lines = []
+
+
+class LineBlock(SpecializedBody):
+
+ """Second and subsequent lines of a line_block."""
+
+ blank = SpecializedBody.invalid_input
+
+ def line_block(self, match, context, next_state):
+ """New line of line block."""
+ lineno = self.state_machine.abs_line_number()
+ line, messages, blank_finish = self.line_block_line(match, lineno)
+ self.parent += line
+ self.parent.parent += messages
+ self.blank_finish = blank_finish
+ return [], next_state, []
+
+
+class Explicit(SpecializedBody):
+
+ """Second and subsequent explicit markup construct."""
+
+ def explicit_markup(self, match, context, next_state):
+ """Footnotes, hyperlink targets, directives, comments."""
+ nodelist, blank_finish = self.explicit_construct(match)
+ self.parent += nodelist
+ self.blank_finish = blank_finish
+ return [], next_state, []
+
+ def anonymous(self, match, context, next_state):
+ """Anonymous hyperlink targets."""
+ nodelist, blank_finish = self.anonymous_target(match)
+ self.parent += nodelist
+ self.blank_finish = blank_finish
+ return [], next_state, []
+
+ blank = SpecializedBody.invalid_input
+
+
+class SubstitutionDef(Body):
+
+ """
+ Parser for the contents of a substitution_definition element.
+ """
+
+ patterns = {
+ 'embedded_directive': re.compile(r'(%s)::( +|$)'
+ % Inliner.simplename, re.UNICODE),
+ 'text': r''}
+ initial_transitions = ['embedded_directive', 'text']
+
+ def embedded_directive(self, match, context, next_state):
+ nodelist, blank_finish = self.directive(match,
+ alt=self.parent['names'][0])
+ self.parent += nodelist
+ if not self.state_machine.at_eof():
+ self.blank_finish = blank_finish
+ raise EOFError
+
+ def text(self, match, context, next_state):
+ if not self.state_machine.at_eof():
+ self.blank_finish = self.state_machine.is_next_line_blank()
+ raise EOFError
+
+
+class Text(RSTState):
+
+ """
+ Classifier of second line of a text block.
+
+ Could be a paragraph, a definition list item, or a title.
+ """
+
+ patterns = {'underline': Body.patterns['line'],
+ 'text': r''}
+ initial_transitions = [('underline', 'Body'), ('text', 'Body')]
+
+ def blank(self, match, context, next_state):
+ """End of paragraph."""
+ paragraph, literalnext = self.paragraph(
+ context, self.state_machine.abs_line_number() - 1)
+ self.parent += paragraph
+ if literalnext:
+ self.parent += self.literal_block()
+ return [], 'Body', []
+
+ def eof(self, context):
+ if context:
+ self.blank(None, context, None)
+ return []
+
+ def indent(self, match, context, next_state):
+ """Definition list item."""
+ definitionlist = nodes.definition_list()
+ definitionlistitem, blank_finish = self.definition_list_item(context)
+ definitionlist += definitionlistitem
+ self.parent += definitionlist
+ offset = self.state_machine.line_offset + 1 # next line
+ newline_offset, blank_finish = self.nested_list_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=self.state_machine.abs_line_offset() + 1,
+ node=definitionlist, initial_state='DefinitionList',
+ blank_finish=blank_finish, blank_finish_state='Definition')
+ self.goto_line(newline_offset)
+ if not blank_finish:
+ self.parent += self.unindent_warning('Definition list')
+ return [], 'Body', []
+
+ def underline(self, match, context, next_state):
+ """Section title."""
+ lineno = self.state_machine.abs_line_number()
+ title = context[0].rstrip()
+ underline = match.string.rstrip()
+ source = title + '\n' + underline
+ messages = []
+ if column_width(title) > len(underline):
+ if len(underline) < 4:
+ if self.state_machine.match_titles:
+ msg = self.reporter.info(
+ 'Possible title underline, too short for the title.\n'
+ "Treating it as ordinary text because it's so short.",
+ line=lineno)
+ self.parent += msg
+ raise statemachine.TransitionCorrection('text')
+ else:
+ blocktext = context[0] + '\n' + self.state_machine.line
+ msg = self.reporter.warning(
+ 'Title underline too short.',
+ nodes.literal_block(blocktext, blocktext), line=lineno)
+ messages.append(msg)
+ if not self.state_machine.match_titles:
+ blocktext = context[0] + '\n' + self.state_machine.line
+ msg = self.reporter.severe(
+ 'Unexpected section title.',
+ nodes.literal_block(blocktext, blocktext), line=lineno)
+ self.parent += messages
+ self.parent += msg
+ return [], next_state, []
+ style = underline[0]
+ context[:] = []
+ self.section(title, source, style, lineno - 1, messages)
+ return [], next_state, []
+
+ def text(self, match, context, next_state):
+ """Paragraph."""
+ startline = self.state_machine.abs_line_number() - 1
+ msg = None
+ try:
+ block = self.state_machine.get_text_block(flush_left=1)
+ except statemachine.UnexpectedIndentationError, instance:
+ block, source, lineno = instance.args
+ msg = self.reporter.error('Unexpected indentation.',
+ source=source, line=lineno)
+ lines = context + list(block)
+ paragraph, literalnext = self.paragraph(lines, startline)
+ self.parent += paragraph
+ self.parent += msg
+ if literalnext:
+ try:
+ self.state_machine.next_line()
+ except EOFError:
+ pass
+ self.parent += self.literal_block()
+ return [], next_state, []
+
+ def literal_block(self):
+ """Return a list of nodes."""
+ indented, indent, offset, blank_finish = \
+ self.state_machine.get_indented()
+ while indented and not indented[-1].strip():
+ indented.trim_end()
+ if not indented:
+ return self.quoted_literal_block()
+ data = '\n'.join(indented)
+ literal_block = nodes.literal_block(data, data)
+ literal_block.line = offset + 1
+ nodelist = [literal_block]
+ if not blank_finish:
+ nodelist.append(self.unindent_warning('Literal block'))
+ return nodelist
+
+ def quoted_literal_block(self):
+ abs_line_offset = self.state_machine.abs_line_offset()
+ offset = self.state_machine.line_offset
+ parent_node = nodes.Element()
+ new_abs_offset = self.nested_parse(
+ self.state_machine.input_lines[offset:],
+ input_offset=abs_line_offset, node=parent_node, match_titles=0,
+ state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),
+ 'initial_state': 'QuotedLiteralBlock'})
+ self.goto_line(new_abs_offset)
+ return parent_node.children
+
+ def definition_list_item(self, termline):
+ indented, indent, line_offset, blank_finish = \
+ self.state_machine.get_indented()
+ definitionlistitem = nodes.definition_list_item(
+ '\n'.join(termline + list(indented)))
+ lineno = self.state_machine.abs_line_number() - 1
+ definitionlistitem.line = lineno
+ termlist, messages = self.term(termline, lineno)
+ definitionlistitem += termlist
+ definition = nodes.definition('', *messages)
+ definitionlistitem += definition
+ if termline[0][-2:] == '::':
+ definition += self.reporter.info(
+ 'Blank line missing before literal block (after the "::")? '
+ 'Interpreted as a definition list item.', line=line_offset+1)
+ self.nested_parse(indented, input_offset=line_offset, node=definition)
+ return definitionlistitem, blank_finish
+
+ classifier_delimiter = re.compile(' +: +')
+
+ def term(self, lines, lineno):
+ """Return a definition_list's term and optional classifiers."""
+ assert len(lines) == 1
+ text_nodes, messages = self.inline_text(lines[0], lineno)
+ term_node = nodes.term()
+ node_list = [term_node]
+ for i in range(len(text_nodes)):
+ node = text_nodes[i]
+ if isinstance(node, nodes.Text):
+ parts = self.classifier_delimiter.split(node.rawsource)
+ if len(parts) == 1:
+ node_list[-1] += node
+ else:
+
+ node_list[-1] += nodes.Text(parts[0].rstrip())
+ for part in parts[1:]:
+ classifier_node = nodes.classifier('', part)
+ node_list.append(classifier_node)
+ else:
+ node_list[-1] += node
+ return node_list, messages
+
+
+class SpecializedText(Text):
+
+ """
+ Superclass for second and subsequent lines of Text-variants.
+
+ All transition methods are disabled. Override individual methods in
+ subclasses to re-enable.
+ """
+
+ def eof(self, context):
+ """Incomplete construct."""
+ return []
+
+ def invalid_input(self, match=None, context=None, next_state=None):
+ """Not a compound element member. Abort this state machine."""
+ raise EOFError
+
+ blank = invalid_input
+ indent = invalid_input
+ underline = invalid_input
+ text = invalid_input
+
+
+class Definition(SpecializedText):
+
+ """Second line of potential definition_list_item."""
+
+ def eof(self, context):
+ """Not a definition."""
+ self.state_machine.previous_line(2) # so parent SM can reassess
+ return []
+
+ def indent(self, match, context, next_state):
+ """Definition list item."""
+ definitionlistitem, blank_finish = self.definition_list_item(context)
+ self.parent += definitionlistitem
+ self.blank_finish = blank_finish
+ return [], 'DefinitionList', []
+
+
+class Line(SpecializedText):
+
+ """
+ Second line of over- & underlined section title or transition marker.
+ """
+
+ eofcheck = 1 # @@@ ???
+ """Set to 0 while parsing sections, so that we don't catch the EOF."""
+
+ def eof(self, context):
+ """Transition marker at end of section or document."""
+ marker = context[0].strip()
+ if self.memo.section_bubble_up_kludge:
+ self.memo.section_bubble_up_kludge = 0
+ elif len(marker) < 4:
+ self.state_correction(context)
+ if self.eofcheck: # ignore EOFError with sections
+ lineno = self.state_machine.abs_line_number() - 1
+ transition = nodes.transition(rawsource=context[0])
+ transition.line = lineno
+ self.parent += transition
+ self.eofcheck = 1
+ return []
+
+ def blank(self, match, context, next_state):
+ """Transition marker."""
+ lineno = self.state_machine.abs_line_number() - 1
+ marker = context[0].strip()
+ if len(marker) < 4:
+ self.state_correction(context)
+ transition = nodes.transition(rawsource=marker)
+ transition.line = lineno
+ self.parent += transition
+ return [], 'Body', []
+
+ def text(self, match, context, next_state):
+ """Potential over- & underlined title."""
+ lineno = self.state_machine.abs_line_number() - 1
+ overline = context[0]
+ title = match.string
+ underline = ''
+ try:
+ underline = self.state_machine.next_line()
+ except EOFError:
+ blocktext = overline + '\n' + title
+ if len(overline.rstrip()) < 4:
+ self.short_overline(context, blocktext, lineno, 2)
+ else:
+ msg = self.reporter.severe(
+ 'Incomplete section title.',
+ nodes.literal_block(blocktext, blocktext), line=lineno)
+ self.parent += msg
+ return [], 'Body', []
+ source = '%s\n%s\n%s' % (overline, title, underline)
+ overline = overline.rstrip()
+ underline = underline.rstrip()
+ if not self.transitions['underline'][0].match(underline):
+ blocktext = overline + '\n' + title + '\n' + underline
+ if len(overline.rstrip()) < 4:
+ self.short_overline(context, blocktext, lineno, 2)
+ else:
+ msg = self.reporter.severe(
+ 'Missing matching underline for section title overline.',
+ nodes.literal_block(source, source), line=lineno)
+ self.parent += msg
+ return [], 'Body', []
+ elif overline != underline:
+ blocktext = overline + '\n' + title + '\n' + underline
+ if len(overline.rstrip()) < 4:
+ self.short_overline(context, blocktext, lineno, 2)
+ else:
+ msg = self.reporter.severe(
+ 'Title overline & underline mismatch.',
+ nodes.literal_block(source, source), line=lineno)
+ self.parent += msg
+ return [], 'Body', []
+ title = title.rstrip()
+ messages = []
+ if column_width(title) > len(overline):
+ blocktext = overline + '\n' + title + '\n' + underline
+ if len(overline.rstrip()) < 4:
+ self.short_overline(context, blocktext, lineno, 2)
+ else:
+ msg = self.reporter.warning(
+ 'Title overline too short.',
+ nodes.literal_block(source, source), line=lineno)
+ messages.append(msg)
+ style = (overline[0], underline[0])
+ self.eofcheck = 0 # @@@ not sure this is correct
+ self.section(title.lstrip(), source, style, lineno + 1, messages)
+ self.eofcheck = 1
+ return [], 'Body', []
+
+ indent = text # indented title
+
+ def underline(self, match, context, next_state):
+ overline = context[0]
+ blocktext = overline + '\n' + self.state_machine.line
+ lineno = self.state_machine.abs_line_number() - 1
+ if len(overline.rstrip()) < 4:
+ self.short_overline(context, blocktext, lineno, 1)
+ msg = self.reporter.error(
+ 'Invalid section title or transition marker.',
+ nodes.literal_block(blocktext, blocktext), line=lineno)
+ self.parent += msg
+ return [], 'Body', []
+
+ def short_overline(self, context, blocktext, lineno, lines=1):
+ msg = self.reporter.info(
+ 'Possible incomplete section title.\nTreating the overline as '
+ "ordinary text because it's so short.", line=lineno)
+ self.parent += msg
+ self.state_correction(context, lines)
+
+ def state_correction(self, context, lines=1):
+ self.state_machine.previous_line(lines)
+ context[:] = []
+ raise statemachine.StateCorrection('Body', 'text')
+
+
+class QuotedLiteralBlock(RSTState):
+
+ """
+ Nested parse handler for quoted (unindented) literal blocks.
+
+ Special-purpose. Not for inclusion in `state_classes`.
+ """
+
+ patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,
+ 'text': r''}
+ initial_transitions = ('initial_quoted', 'text')
+
+ def __init__(self, state_machine, debug=0):
+ RSTState.__init__(self, state_machine, debug)
+ self.messages = []
+ self.initial_lineno = None
+
+ def blank(self, match, context, next_state):
+ if context:
+ raise EOFError
+ else:
+ return context, next_state, []
+
+ def eof(self, context):
+ if context:
+ text = '\n'.join(context)
+ literal_block = nodes.literal_block(text, text)
+ literal_block.line = self.initial_lineno
+ self.parent += literal_block
+ else:
+ self.parent += self.reporter.warning(
+ 'Literal block expected; none found.',
+ line=self.state_machine.abs_line_number())
+ self.state_machine.previous_line()
+ self.parent += self.messages
+ return []
+
+ def indent(self, match, context, next_state):
+ assert context, ('QuotedLiteralBlock.indent: context should not '
+ 'be empty!')
+ self.messages.append(
+ self.reporter.error('Unexpected indentation.',
+ line=self.state_machine.abs_line_number()))
+ self.state_machine.previous_line()
+ raise EOFError
+
+ def initial_quoted(self, match, context, next_state):
+ """Match arbitrary quote character on the first line only."""
+ self.remove_transition('initial_quoted')
+ quote = match.string[0]
+ pattern = re.compile(re.escape(quote))
+ # New transition matches consistent quotes only:
+ self.add_transition('quoted',
+ (pattern, self.quoted, self.__class__.__name__))
+ self.initial_lineno = self.state_machine.abs_line_number()
+ return [match.string], next_state, []
+
+ def quoted(self, match, context, next_state):
+ """Match consistent quotes on subsequent lines."""
+ context.append(match.string)
+ return context, next_state, []
+
+ def text(self, match, context, next_state):
+ if context:
+ self.messages.append(
+ self.reporter.error('Inconsistent literal block quoting.',
+ line=self.state_machine.abs_line_number()))
+ self.state_machine.previous_line()
+ raise EOFError
+
+
+state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
+ OptionList, LineBlock, ExtensionOptions, Explicit, Text,
+ Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
+"""Standard set of State classes used to start `RSTStateMachine`."""
diff --git a/docutils/parsers/rst/tableparser.py b/docutils/parsers/rst/tableparser.py
new file mode 100644
index 000000000..1d5dc9dfd
--- /dev/null
+++ b/docutils/parsers/rst/tableparser.py
@@ -0,0 +1,527 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+This module defines table parser classes,which parse plaintext-graphic tables
+and produce a well-formed data structure suitable for building a CALS table.
+
+:Classes:
+ - `GridTableParser`: Parse fully-formed tables represented with a grid.
+ - `SimpleTableParser`: Parse simple tables, delimited by top & bottom
+ borders.
+
+:Exception class: `TableMarkupError`
+
+:Function:
+ `update_dict_of_lists()`: Merge two dictionaries containing list values.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+import re
+import sys
+from docutils import DataError
+
+
+class TableMarkupError(DataError): pass
+
+
+class TableParser:
+
+ """
+ Abstract superclass for the common parts of the syntax-specific parsers.
+ """
+
+ head_body_separator_pat = None
+ """Matches the row separator between head rows and body rows."""
+
+ double_width_pad_char = '\x00'
+ """Padding character for East Asian double-width text."""
+
+ def parse(self, block):
+ """
+ Analyze the text `block` and return a table data structure.
+
+ Given a plaintext-graphic table in `block` (list of lines of text; no
+ whitespace padding), parse the table, construct and return the data
+ necessary to construct a CALS table or equivalent.
+
+ Raise `TableMarkupError` if there is any problem with the markup.
+ """
+ self.setup(block)
+ self.find_head_body_sep()
+ self.parse_table()
+ structure = self.structure_from_cells()
+ return structure
+
+ def find_head_body_sep(self):
+ """Look for a head/body row separator line; store the line index."""
+ for i in range(len(self.block)):
+ line = self.block[i]
+ if self.head_body_separator_pat.match(line):
+ if self.head_body_sep:
+ raise TableMarkupError(
+ 'Multiple head/body row separators in table (at line '
+ 'offset %s and %s); only one allowed.'
+ % (self.head_body_sep, i))
+ else:
+ self.head_body_sep = i
+ self.block[i] = line.replace('=', '-')
+ if self.head_body_sep == 0 or self.head_body_sep == (len(self.block)
+ - 1):
+ raise TableMarkupError('The head/body row separator may not be '
+ 'the first or last line of the table.')
+
+
+class GridTableParser(TableParser):
+
+ """
+ Parse a grid table using `parse()`.
+
+ Here's an example of a grid table::
+
+ +------------------------+------------+----------+----------+
+ | Header row, column 1 | Header 2 | Header 3 | Header 4 |
+ +========================+============+==========+==========+
+ | body row 1, column 1 | column 2 | column 3 | column 4 |
+ +------------------------+------------+----------+----------+
+ | body row 2 | Cells may span columns. |
+ +------------------------+------------+---------------------+
+ | body row 3 | Cells may | - Table cells |
+ +------------------------+ span rows. | - contain |
+ | body row 4 | | - body elements. |
+ +------------------------+------------+---------------------+
+
+ Intersections use '+', row separators use '-' (except for one optional
+ head/body row separator, which uses '='), and column separators use '|'.
+
+ Passing the above table to the `parse()` method will result in the
+ following data structure::
+
+ ([24, 12, 10, 10],
+ [[(0, 0, 1, ['Header row, column 1']),
+ (0, 0, 1, ['Header 2']),
+ (0, 0, 1, ['Header 3']),
+ (0, 0, 1, ['Header 4'])]],
+ [[(0, 0, 3, ['body row 1, column 1']),
+ (0, 0, 3, ['column 2']),
+ (0, 0, 3, ['column 3']),
+ (0, 0, 3, ['column 4'])],
+ [(0, 0, 5, ['body row 2']),
+ (0, 2, 5, ['Cells may span columns.']),
+ None,
+ None],
+ [(0, 0, 7, ['body row 3']),
+ (1, 0, 7, ['Cells may', 'span rows.', '']),
+ (1, 1, 7, ['- Table cells', '- contain', '- body elements.']),
+ None],
+ [(0, 0, 9, ['body row 4']), None, None, None]])
+
+ The first item is a list containing column widths (colspecs). The second
+ item is a list of head rows, and the third is a list of body rows. Each
+ row contains a list of cells. Each cell is either None (for a cell unused
+ because of another cell's span), or a tuple. A cell tuple contains four
+ items: the number of extra rows used by the cell in a vertical span
+ (morerows); the number of extra columns used by the cell in a horizontal
+ span (morecols); the line offset of the first line of the cell contents;
+ and the cell contents, a list of lines of text.
+ """
+
+ head_body_separator_pat = re.compile(r'\+=[=+]+=\+ *$')
+
+ def setup(self, block):
+ self.block = block[:] # make a copy; it may be modified
+ self.block.disconnect() # don't propagate changes to parent
+ self.bottom = len(block) - 1
+ self.right = len(block[0]) - 1
+ self.head_body_sep = None
+ self.done = [-1] * len(block[0])
+ self.cells = []
+ self.rowseps = {0: [0]}
+ self.colseps = {0: [0]}
+
+ def parse_table(self):
+ """
+ Start with a queue of upper-left corners, containing the upper-left
+ corner of the table itself. Trace out one rectangular cell, remember
+ it, and add its upper-right and lower-left corners to the queue of
+ potential upper-left corners of further cells. Process the queue in
+ top-to-bottom order, keeping track of how much of each text column has
+ been seen.
+
+ We'll end up knowing all the row and column boundaries, cell positions
+ and their dimensions.
+ """
+ corners = [(0, 0)]
+ while corners:
+ top, left = corners.pop(0)
+ if top == self.bottom or left == self.right \
+ or top <= self.done[left]:
+ continue
+ result = self.scan_cell(top, left)
+ if not result:
+ continue
+ bottom, right, rowseps, colseps = result
+ update_dict_of_lists(self.rowseps, rowseps)
+ update_dict_of_lists(self.colseps, colseps)
+ self.mark_done(top, left, bottom, right)
+ cellblock = self.block.get_2D_block(top + 1, left + 1,
+ bottom, right)
+ cellblock.disconnect() # lines in cell can't sync with parent
+ cellblock.replace(self.double_width_pad_char, '')
+ self.cells.append((top, left, bottom, right, cellblock))
+ corners.extend([(top, right), (bottom, left)])
+ corners.sort()
+ if not self.check_parse_complete():
+ raise TableMarkupError('Malformed table; parse incomplete.')
+
+ def mark_done(self, top, left, bottom, right):
+ """For keeping track of how much of each text column has been seen."""
+ before = top - 1
+ after = bottom - 1
+ for col in range(left, right):
+ assert self.done[col] == before
+ self.done[col] = after
+
+ def check_parse_complete(self):
+ """Each text column should have been completely seen."""
+ last = self.bottom - 1
+ for col in range(self.right):
+ if self.done[col] != last:
+ return None
+ return 1
+
+ def scan_cell(self, top, left):
+ """Starting at the top-left corner, start tracing out a cell."""
+ assert self.block[top][left] == '+'
+ result = self.scan_right(top, left)
+ return result
+
+ def scan_right(self, top, left):
+ """
+ Look for the top-right corner of the cell, and make note of all column
+ boundaries ('+').
+ """
+ colseps = {}
+ line = self.block[top]
+ for i in range(left + 1, self.right + 1):
+ if line[i] == '+':
+ colseps[i] = [top]
+ result = self.scan_down(top, left, i)
+ if result:
+ bottom, rowseps, newcolseps = result
+ update_dict_of_lists(colseps, newcolseps)
+ return bottom, i, rowseps, colseps
+ elif line[i] != '-':
+ return None
+ return None
+
+ def scan_down(self, top, left, right):
+ """
+ Look for the bottom-right corner of the cell, making note of all row
+ boundaries.
+ """
+ rowseps = {}
+ for i in range(top + 1, self.bottom + 1):
+ if self.block[i][right] == '+':
+ rowseps[i] = [right]
+ result = self.scan_left(top, left, i, right)
+ if result:
+ newrowseps, colseps = result
+ update_dict_of_lists(rowseps, newrowseps)
+ return i, rowseps, colseps
+ elif self.block[i][right] != '|':
+ return None
+ return None
+
+ def scan_left(self, top, left, bottom, right):
+ """
+ Noting column boundaries, look for the bottom-left corner of the cell.
+ It must line up with the starting point.
+ """
+ colseps = {}
+ line = self.block[bottom]
+ for i in range(right - 1, left, -1):
+ if line[i] == '+':
+ colseps[i] = [bottom]
+ elif line[i] != '-':
+ return None
+ if line[left] != '+':
+ return None
+ result = self.scan_up(top, left, bottom, right)
+ if result is not None:
+ rowseps = result
+ return rowseps, colseps
+ return None
+
+ def scan_up(self, top, left, bottom, right):
+ """
+ Noting row boundaries, see if we can return to the starting point.
+ """
+ rowseps = {}
+ for i in range(bottom - 1, top, -1):
+ if self.block[i][left] == '+':
+ rowseps[i] = [left]
+ elif self.block[i][left] != '|':
+ return None
+ return rowseps
+
+ def structure_from_cells(self):
+ """
+ From the data collected by `scan_cell()`, convert to the final data
+ structure.
+ """
+ rowseps = self.rowseps.keys() # list of row boundaries
+ rowseps.sort()
+ rowindex = {}
+ for i in range(len(rowseps)):
+ rowindex[rowseps[i]] = i # row boundary -> row number mapping
+ colseps = self.colseps.keys() # list of column boundaries
+ colseps.sort()
+ colindex = {}
+ for i in range(len(colseps)):
+ colindex[colseps[i]] = i # column boundary -> col number map
+ colspecs = [(colseps[i] - colseps[i - 1] - 1)
+ for i in range(1, len(colseps))] # list of column widths
+ # prepare an empty table with the correct number of rows & columns
+ onerow = [None for i in range(len(colseps) - 1)]
+ rows = [onerow[:] for i in range(len(rowseps) - 1)]
+ # keep track of # of cells remaining; should reduce to zero
+ remaining = (len(rowseps) - 1) * (len(colseps) - 1)
+ for top, left, bottom, right, block in self.cells:
+ rownum = rowindex[top]
+ colnum = colindex[left]
+ assert rows[rownum][colnum] is None, (
+ 'Cell (row %s, column %s) already used.'
+ % (rownum + 1, colnum + 1))
+ morerows = rowindex[bottom] - rownum - 1
+ morecols = colindex[right] - colnum - 1
+ remaining -= (morerows + 1) * (morecols + 1)
+ # write the cell into the table
+ rows[rownum][colnum] = (morerows, morecols, top + 1, block)
+ assert remaining == 0, 'Unused cells remaining.'
+ if self.head_body_sep: # separate head rows from body rows
+ numheadrows = rowindex[self.head_body_sep]
+ headrows = rows[:numheadrows]
+ bodyrows = rows[numheadrows:]
+ else:
+ headrows = []
+ bodyrows = rows
+ return (colspecs, headrows, bodyrows)
+
+
+class SimpleTableParser(TableParser):
+
+ """
+ Parse a simple table using `parse()`.
+
+ Here's an example of a simple table::
+
+ ===== =====
+ col 1 col 2
+ ===== =====
+ 1 Second column of row 1.
+ 2 Second column of row 2.
+ Second line of paragraph.
+ 3 - Second column of row 3.
+
+ - Second item in bullet
+ list (row 3, column 2).
+ 4 is a span
+ ------------
+ 5
+ ===== =====
+
+ Top and bottom borders use '=', column span underlines use '-', column
+ separation is indicated with spaces.
+
+ Passing the above table to the `parse()` method will result in the
+ following data structure, whose interpretation is the same as for
+ `GridTableParser`::
+
+ ([5, 25],
+ [[(0, 0, 1, ['col 1']),
+ (0, 0, 1, ['col 2'])]],
+ [[(0, 0, 3, ['1']),
+ (0, 0, 3, ['Second column of row 1.'])],
+ [(0, 0, 4, ['2']),
+ (0, 0, 4, ['Second column of row 2.',
+ 'Second line of paragraph.'])],
+ [(0, 0, 6, ['3']),
+ (0, 0, 6, ['- Second column of row 3.',
+ '',
+ '- Second item in bullet',
+ ' list (row 3, column 2).'])],
+ [(0, 1, 10, ['4 is a span'])],
+ [(0, 0, 12, ['5']),
+ (0, 0, 12, [''])]])
+ """
+
+ head_body_separator_pat = re.compile('=[ =]*$')
+ span_pat = re.compile('-[ -]*$')
+
+ def setup(self, block):
+ self.block = block[:] # make a copy; it will be modified
+ self.block.disconnect() # don't propagate changes to parent
+ # Convert top & bottom borders to column span underlines:
+ self.block[0] = self.block[0].replace('=', '-')
+ self.block[-1] = self.block[-1].replace('=', '-')
+ self.head_body_sep = None
+ self.columns = []
+ self.border_end = None
+ self.table = []
+ self.done = [-1] * len(block[0])
+ self.rowseps = {0: [0]}
+ self.colseps = {0: [0]}
+
+ def parse_table(self):
+ """
+ First determine the column boundaries from the top border, then
+ process rows. Each row may consist of multiple lines; accumulate
+ lines until a row is complete. Call `self.parse_row` to finish the
+ job.
+ """
+ # Top border must fully describe all table columns.
+ self.columns = self.parse_columns(self.block[0], 0)
+ self.border_end = self.columns[-1][1]
+ firststart, firstend = self.columns[0]
+ offset = 1 # skip top border
+ start = 1
+ text_found = None
+ while offset < len(self.block):
+ line = self.block[offset]
+ if self.span_pat.match(line):
+ # Column span underline or border; row is complete.
+ self.parse_row(self.block[start:offset], start,
+ (line.rstrip(), offset))
+ start = offset + 1
+ text_found = None
+ elif line[firststart:firstend].strip():
+ # First column not blank, therefore it's a new row.
+ if text_found and offset != start:
+ self.parse_row(self.block[start:offset], start)
+ start = offset
+ text_found = 1
+ elif not text_found:
+ start = offset + 1
+ offset += 1
+
+ def parse_columns(self, line, offset):
+ """
+ Given a column span underline, return a list of (begin, end) pairs.
+ """
+ cols = []
+ end = 0
+ while 1:
+ begin = line.find('-', end)
+ end = line.find(' ', begin)
+ if begin < 0:
+ break
+ if end < 0:
+ end = len(line)
+ cols.append((begin, end))
+ if self.columns:
+ if cols[-1][1] != self.border_end:
+ raise TableMarkupError('Column span incomplete at line '
+ 'offset %s.' % offset)
+ # Allow for an unbounded rightmost column:
+ cols[-1] = (cols[-1][0], self.columns[-1][1])
+ return cols
+
+ def init_row(self, colspec, offset):
+ i = 0
+ cells = []
+ for start, end in colspec:
+ morecols = 0
+ try:
+ assert start == self.columns[i][0]
+ while end != self.columns[i][1]:
+ i += 1
+ morecols += 1
+ except (AssertionError, IndexError):
+ raise TableMarkupError('Column span alignment problem at '
+ 'line offset %s.' % (offset + 1))
+ cells.append([0, morecols, offset, []])
+ i += 1
+ return cells
+
+ def parse_row(self, lines, start, spanline=None):
+ """
+ Given the text `lines` of a row, parse it and append to `self.table`.
+
+ The row is parsed according to the current column spec (either
+ `spanline` if provided or `self.columns`). For each column, extract
+ text from each line, and check for text in column margins. Finally,
+ adjust for insigificant whitespace.
+ """
+ if not (lines or spanline):
+ # No new row, just blank lines.
+ return
+ if spanline:
+ columns = self.parse_columns(*spanline)
+ span_offset = spanline[1]
+ else:
+ columns = self.columns[:]
+ span_offset = start
+ self.check_columns(lines, start, columns)
+ row = self.init_row(columns, start)
+ for i in range(len(columns)):
+ start, end = columns[i]
+ cellblock = lines.get_2D_block(0, start, len(lines), end)
+ cellblock.disconnect() # lines in cell can't sync with parent
+ cellblock.replace(self.double_width_pad_char, '')
+ row[i][3] = cellblock
+ self.table.append(row)
+
+ def check_columns(self, lines, first_line, columns):
+ """
+ Check for text in column margins and text overflow in the last column.
+ Raise TableMarkupError if anything but whitespace is in column margins.
+ Adjust the end value for the last column if there is text overflow.
+ """
+ # "Infinite" value for a dummy last column's beginning, used to
+ # check for text overflow:
+ columns.append((sys.maxint, None))
+ lastcol = len(columns) - 2
+ for i in range(len(columns) - 1):
+ start, end = columns[i]
+ nextstart = columns[i+1][0]
+ offset = 0
+ for line in lines:
+ if i == lastcol and line[end:].strip():
+ text = line[start:].rstrip()
+ new_end = start + len(text)
+ columns[i] = (start, new_end)
+ main_start, main_end = self.columns[-1]
+ if new_end > main_end:
+ self.columns[-1] = (main_start, new_end)
+ elif line[end:nextstart].strip():
+ raise TableMarkupError('Text in column margin at line '
+ 'offset %s.' % (first_line + offset))
+ offset += 1
+ columns.pop()
+
+ def structure_from_cells(self):
+ colspecs = [end - start for start, end in self.columns]
+ first_body_row = 0
+ if self.head_body_sep:
+ for i in range(len(self.table)):
+ if self.table[i][0][2] > self.head_body_sep:
+ first_body_row = i
+ break
+ return (colspecs, self.table[:first_body_row],
+ self.table[first_body_row:])
+
+
+def update_dict_of_lists(master, newdata):
+ """
+ Extend the list values of `master` with those from `newdata`.
+
+ Both parameters must be dictionaries containing list values.
+ """
+ for key, values in newdata.items():
+ master.setdefault(key, []).extend(values)