Clean up parser code.

Simplify conditional loading and mock definition for recommonmark. Do not repeat generic settings in rst parser. git-svn-id: https://svn.code.sf.net/p/docutils/code/trunk@8670 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
author: milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2021-04-07 12:04:36 +0000
committer: milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2021-04-07 12:04:36 +0000
commit: 881989f11b0c2e1498c7ebbbb5476c24b21bc69e (patch)
tree: c8f46e6d720c9299e94dede85fd46c3eeee743fe /docutils
parent: 388e9f71675a377332095acce43be7c50400a476 (diff)
download: docutils-881989f11b0c2e1498c7ebbbb5476c24b21bc69e.tar.gz
5 files changed, 205 insertions, 125 deletions
diff --git a/docutils/docutils/parsers/__init__.py b/docutils/docutils/parsers/__init__.py
index 3cfa86891..bf614ef55 100644
--- a/docutils/docutils/parsers/__init__.py
+++ b/docutils/docutils/parsers/__init__.py
@@ -15,16 +15,25 @@ from docutils import Component, frontend
 
 
 class Parser(Component):
-
     settings_spec = (
         'Generic Parser Options',
         None,
-        (('Disable the "raw" directives; replaced with a "warning" '
-          'system message.',
+        (('Disable directives that insert the contents of an external file; '
+          'replaced with a "warning" system message.',
+          ['--no-file-insertion'],
+          {'action': 'store_false', 'default': 1,
+           'dest': 'file_insertion_enabled',
+           'validator': frontend.validate_boolean}),
+         ('Enable directives that insert the contents '
+          'of an external file. (default)',
+          ['--file-insertion-enabled'],
+          {'action': 'store_true'}),
+         ('Disable the "raw" directive; '
+          'replaced with a "warning" system message.',
           ['--no-raw'],
           {'action': 'store_false', 'default': 1, 'dest': 'raw_enabled',
            'validator': frontend.validate_boolean}),
-         ('Enable the "raw" directive.  Enabled by default.',
+         ('Enable the "raw" directive. (default)',
           ['--raw-enabled'],
           {'action': 'store_true'}),
          ('Maximal number of characters in an input line. Default 10 000.',
diff --git a/docutils/docutils/parsers/recommonmark_wrapper.py b/docutils/docutils/parsers/recommonmark_wrapper.py
index b7e5f9b71..0a7fef5e1 100644
--- a/docutils/docutils/parsers/recommonmark_wrapper.py
+++ b/docutils/docutils/parsers/recommonmark_wrapper.py
@@ -19,16 +19,14 @@ __ https://pypi.org/project/recommonmark/
 """
 
 import docutils.parsers
-from docutils import nodes
+from docutils import nodes, Component
 
 try:
-    from recommonmark.parser import CommonMarkParser as _recommonmarkParser
-    _recommonmarkParser.supported = ('recommonmark', 'commonmark',
-                                     'markdown', 'md')
-    with_recommonmark = True
+    from recommonmark.parser import CommonMarkParser
+    from recommonmark.transform import AutoStructify
 except ImportError as err:
-    with_recommonmark = False
-    class _recommonmarkParser(docutils.parsers.Parser):
+    CommonMarkParser = None
+    class Parser(docutils.parsers.Parser):
         def parse(self, inputstring, document):
             error = document.reporter.warning(
                 'Missing dependency: MarkDown input is processed by a 3rd '
@@ -36,89 +34,101 @@ except ImportError as err:
                 '"recommonmark" (https://pypi.org/project/recommonmark/).')
             document.append(error)
 
-class Parser(_recommonmarkParser):
-    config_section = 'recommonmark parser'
-    config_section_dependencies = ('parsers',)
-
-    def parse(self, inputstring, document):
-        """Use the upstream parser and clean up afterwards.
-        """
-        # check for exorbitantly long lines
-        for i, line in enumerate(inputstring.split('\n')):
-            if len(line) > document.settings.line_length_limit:
-                error = document.reporter.error(
-                    'Line %d exceeds the line-length-limit.'%(i+1))
+
+if CommonMarkParser:
+    class Parser(CommonMarkParser):
+        """MarkDown parser based on recommonmark."""
+        # TODO: settings for AutoStructify
+        # settings_spec = docutils.parsers.Parser.settings_spec + (
+        # see https://recommonmark.readthedocs.io/en/latest/#autostructify
+
+        supported = ('recommonmark', 'commonmark',
+                     'markdown', 'md')
+        config_section = 'recommonmark parser'
+        config_section_dependencies = ('parsers',)
+
+        # def get_transforms(self):
+        #     return Component.get_transforms(self) + [AutoStructify]
+
+        def parse(self, inputstring, document):
+            """Use the upstream parser and clean up afterwards.
+            """
+            # check for exorbitantly long lines
+            for i, line in enumerate(inputstring.split('\n')):
+                if len(line) > document.settings.line_length_limit:
+                    error = document.reporter.error(
+                        'Line %d exceeds the line-length-limit.'%(i+1))
+                    document.append(error)
+                    return
+
+            # pass to upstream parser
+            try:
+                CommonMarkParser.parse(self, inputstring, document)
+            except Exception as err:
+                error = document.reporter.error('Parsing with "recommonmark" '
+                                                'returned the error:\n%s'%err)
                 document.append(error)
-                return
-
-        # pass to upstream parser
-        try:
-            _recommonmarkParser.parse(self, inputstring, document)
-        except Exception as err:
-            error = document.reporter.error('Parsing with "recommonmark" '
-                                            'returned the error:\n%s'%err)
-            document.append(error)
 
-        # Post-Processing
-        # ---------------
-
-        # merge adjoining Text nodes:
-        for node in document.traverse(nodes.TextElement):
-            children = node.children
-            i = 0
-            while i+1 < len(children):
-                if (isinstance(children[i], nodes.Text)
-                    and isinstance(children[i+1], nodes.Text)):
-                    children[i] = nodes.Text(children[i]+children.pop(i+1))
-                    children[i].parent = node
-                else:
-                    i += 1
-
-        # add "code" class argument to inline literal (code spans)
-        for node in document.traverse(lambda n: isinstance(n,
-                                (nodes.literal, nodes.literal_block))):
-            node['classes'].append('code')
-        # move "language" argument to classes
-        for node in document.traverse(nodes.literal_block):
-            if 'language' in node.attributes:
-                node['classes'].append(node['language'])
-                del node['language']
-
-        # remove empty target nodes
-        for node in document.traverse(nodes.target):
-            # remove empty name
-            node['names'] = [v for v in node['names'] if v]
-            if node.children or [v for v in node.attributes.values() if v]:
-                continue
-            node.parent.remove(node)
-
-        # replace raw nodes if raw is not allowed
-        if not document.settings.raw_enabled:
-            for node in document.traverse(nodes.raw):
-                warning = document.reporter.warning('Raw content disabled.')
-                node.parent.replace(node, warning)
-
-        # fix section nodes
-        for node in document.traverse(nodes.section):
-            # remove spurious IDs (first may be from duplicate name)
-            if len(node['ids']) > 1:
-                node['ids'].pop()
-            # fix section levels
-            section_level = self.get_section_level(node)
-            if node['level'] != section_level:
-                warning = document.reporter.warning(
-                    'Title level inconsistent. Changing from %d to %d.'
-                    %(node['level'], section_level),
-                    nodes.literal_block('', node[0].astext()))
-                node.insert(1, warning)
-            # remove non-standard attribute "level"
-            del node['level'] # TODO: store the original md level somewhere
-
-    def get_section_level(self, node):
-        level = 1
-        while True:
-            node = node.parent
-            if isinstance(node, nodes.document):
-                return level
-            if isinstance(node, nodes.section):
-                level += 1
+            # Post-Processing
+            # ---------------
+
+            # merge adjoining Text nodes:
+            for node in document.traverse(nodes.TextElement):
+                children = node.children
+                i = 0
+                while i+1 < len(children):
+                    if (isinstance(children[i], nodes.Text)
+                        and isinstance(children[i+1], nodes.Text)):
+                        children[i] = nodes.Text(children[i]+children.pop(i+1))
+                        children[i].parent = node
+                    else:
+                        i += 1
+
+            # add "code" class argument to inline literal (code spans)
+            for node in document.traverse(lambda n: isinstance(n,
+                                    (nodes.literal, nodes.literal_block))):
+                node['classes'].append('code')
+            # move "language" argument to classes
+            for node in document.traverse(nodes.literal_block):
+                if 'language' in node.attributes:
+                    node['classes'].append(node['language'])
+                    del node['language']
+
+            # remove empty target nodes
+            for node in document.traverse(nodes.target):
+                # remove empty name
+                node['names'] = [v for v in node['names'] if v]
+                if node.children or [v for v in node.attributes.values() if v]:
+                    continue
+                node.parent.remove(node)
+
+            # replace raw nodes if raw is not allowed
+            if not document.settings.raw_enabled:
+                for node in document.traverse(nodes.raw):
+                    warning = document.reporter.warning('Raw content disabled.')
+                    node.parent.replace(node, warning)
+
+            # fix section nodes
+            for node in document.traverse(nodes.section):
+                # remove spurious IDs (first may be from duplicate name)
+                if len(node['ids']) > 1:
+                    node['ids'].pop()
+                # fix section levels
+                section_level = self.get_section_level(node)
+                if node['level'] != section_level:
+                    warning = document.reporter.warning(
+                        'Title level inconsistent. Changing from %d to %d.'
+                        %(node['level'], section_level),
+                        nodes.literal_block('', node[0].astext()))
+                    node.insert(1, warning)
+                # remove non-standard attribute "level"
+                del node['level'] # TODO: store the original md level somewhere
+
+        def get_section_level(self, node):
+            level = 1
+            while True:
+                node = node.parent
+                if isinstance(node, nodes.document):
+                    return level
+                if isinstance(node, nodes.section):
+                    level += 1
diff --git a/docutils/docutils/parsers/rst/__init__.py b/docutils/docutils/parsers/rst/__init__.py
index 98c2f9f57..921977e25 100644
--- a/docutils/docutils/parsers/rst/__init__.py
+++ b/docutils/docutils/parsers/rst/__init__.py
@@ -84,7 +84,7 @@ class Parser(docutils.parsers.Parser):
     supported = ('restructuredtext', 'rst', 'rest', 'restx', 'rtxt', 'rstx')
     """Aliases this parser supports."""
 
-    settings_spec = (
+    settings_spec = docutils.parsers.Parser.settings_spec + (
         'reStructuredText Parser Options',
         None,
         (('Recognize and link to standalone PEP references (like "PEP 258").',
@@ -115,28 +115,6 @@ class Parser(docutils.parsers.Parser):
          ('Leave spaces before footnote references.',
           ['--leave-footnote-reference-space'],
           {'action': 'store_false', 'dest': 'trim_footnote_reference_space'}),
-         ('Disable directives that insert the contents of external file '
-          '("include" & "raw"); replaced with a "warning" system message.',
-          ['--no-file-insertion'],
-          {'action': 'store_false', 'default': 1,
-           'dest': 'file_insertion_enabled',
-           'validator': frontend.validate_boolean}),
-         ('Enable directives that insert the contents of external file '
-          '("include" & "raw").  Enabled by default.',
-          ['--file-insertion-enabled'],
-          {'action': 'store_true'}),
-         ('Disable the "raw" directives; replaced with a "warning" '
-          'system message.',
-          ['--no-raw'],
-          {'action': 'store_false', 'default': 1, 'dest': 'raw_enabled',
-           'validator': frontend.validate_boolean}),
-         ('Enable the "raw" directive.  Enabled by default.',
-          ['--raw-enabled'],
-          {'action': 'store_true'}),
-         ('Maximal number of characters in an input line. Default 10 000.',
-          ['--line-length-limit'],
-          {'metavar': '<length>', 'type': 'int', 'default': 10000,
-           'validator': frontend.validate_nonnegative_int}),
          ('Token name set for parsing code with Pygments: one of '
           '"long", "short", or "none" (no parsing). Default is "long".',
           ['--syntax-highlight'],
diff --git a/docutils/test/test_parsers/test_recommonmark/test_misc.py b/docutils/test/test_parsers/test_recommonmark/test_misc.py
index 6672bc7ca..c47f546e8 100644
--- a/docutils/test/test_parsers/test_recommonmark/test_misc.py
+++ b/docutils/test/test_parsers/test_recommonmark/test_misc.py
@@ -50,7 +50,7 @@ skip_msg = 'optional module "recommonmark" not found'
 
 class reCommonMarkParserTests(unittest.TestCase):
 
-    @unittest.skipUnless(recommonmark_wrapper.with_recommonmark, skip_msg)
+    @unittest.skipUnless(recommonmark_wrapper.CommonMarkParser, skip_msg)
     def test_parsing_error(self):
         output = publish_string(sample1, parser_name='recommonmark',
                                 settings_overrides={'warning_stream': ''})
@@ -58,7 +58,7 @@ class reCommonMarkParserTests(unittest.TestCase):
         self.assertIn(b'Parsing with "recommonmark" returned the error:',
                       output)
 
-    @unittest.skipUnless(recommonmark_wrapper.with_recommonmark, skip_msg)
+    @unittest.skipUnless(recommonmark_wrapper.CommonMarkParser, skip_msg)
     def test_raw_disabled(self):
         output = publish_string(sample_with_html, parser_name='recommonmark',
                                 settings_overrides={'warning_stream': '',
@@ -67,7 +67,7 @@ class reCommonMarkParserTests(unittest.TestCase):
         self.assertIn(b'<system_message', output)
         self.assertIn(b'Raw content disabled.', output)
 
-    @unittest.skipUnless(recommonmark_wrapper.with_recommonmark, skip_msg)
+    @unittest.skipUnless(recommonmark_wrapper.CommonMarkParser, skip_msg)
     def test_raw_disabled_inline(self):
         output = publish_string('foo <a href="uri">', parser_name='recommonmark',
                                 settings_overrides={'warning_stream': '',
@@ -78,7 +78,7 @@ class reCommonMarkParserTests(unittest.TestCase):
         self.assertIn(b'Raw content disabled.', output)
 
 
-    @unittest.skipUnless(recommonmark_wrapper.with_recommonmark, skip_msg)
+    @unittest.skipUnless(recommonmark_wrapper.CommonMarkParser, skip_msg)
     def test_raw_disabled(self):
         output = publish_string(sample_with_html, parser_name='recommonmark',
                                 settings_overrides={'warning_stream': '',
@@ -88,7 +88,7 @@ class reCommonMarkParserTests(unittest.TestCase):
         self.assertNotIn(b'<raw>', output)
         self.assertNotIn(b'<system_message', output)
 
-    @unittest.skipIf(recommonmark_wrapper.with_recommonmark,
+    @unittest.skipIf(recommonmark_wrapper.CommonMarkParser,
                      'recommonmark_wrapper: parser found, fallback not used')
     def test_fallback_parser(self):
         output = publish_string(sample1, parser_name='recommonmark',
diff --git a/docutils/test/test_parsers/test_rst/test_directives/test_include.py b/docutils/test/test_parsers/test_rst/test_directives/test_include.py
index 47e6c3f45..da673347f 100755
--- a/docutils/test/test_parsers/test_rst/test_directives/test_include.py
+++ b/docutils/test/test_parsers/test_rst/test_directives/test_include.py
@@ -15,7 +15,7 @@ if __name__ == '__main__':
     import __init__
 from test_parsers import DocutilsTestSupport
 from docutils.parsers.rst import states
-from docutils.parsers.recommonmark_wrapper import with_recommonmark
+from docutils.parsers import recommonmark_wrapper
 from docutils.utils.code_analyzer import with_pygments
 
 if sys.version_info >= (3, 0):
@@ -76,7 +76,7 @@ InputError: [Errno 2] No such file or directory: '\u043c\u0438\u0440.txt'.\
 
 # Parsing with Markdown (recommonmark) is an optional feature depending
 # on 3rd-party modules:
-if with_recommonmark:
+if recommonmark_wrapper.CommonMarkParser:
     markdown_parsing_result = """\
     <section ids="title-1" names="title\\ 1">
         <title>
@@ -1048,6 +1048,89 @@ Included code
         .
 """ % reldir(include1)],
 ["""\
+TAB expansion with included code:
+
+.. include:: %s
+   :code: rst
+""" % include_literal,
+"""\
+<document source="test data">
+    <paragraph>
+        TAB expansion with included code:
+    <literal_block classes="code rst" source="%s" xml:space="preserve">
+        Literal included this should \n\
+        <inline classes="generic strong">
+            **not**
+         be \n\
+        <inline classes="generic emph">
+            *marked*
+         \n\
+        <inline classes="name variable">
+            `up`
+        .
+                <- leading raw tab.
+        \n\
+        Newlines
+        are
+        normalized.
+""" % include_literal],
+["""\
+Custom TAB expansion with included code:
+
+.. include:: %s
+   :code: rst
+   :tab-width: 2
+""" % include_literal,
+"""\
+<document source="test data">
+    <paragraph>
+        Custom TAB expansion with included code:
+    <literal_block classes="code rst" source="%s" xml:space="preserve">
+        Literal included this should \n\
+        <inline classes="generic strong">
+            **not**
+         be \n\
+        <inline classes="generic emph">
+            *marked*
+         \n\
+        <inline classes="name variable">
+            `up`
+        .
+          <- leading raw tab.
+        \n\
+        Newlines
+        are
+        normalized.
+""" % include_literal],
+["""\
+Custom TAB expansion with included code:
+
+.. include:: %s
+   :code: rst
+   :tab-width: -1
+""" % include_literal,
+"""\
+<document source="test data">
+    <paragraph>
+        Custom TAB expansion with included code:
+    <literal_block classes="code rst" source="%s" xml:space="preserve">
+        Literal included this should \n\
+        <inline classes="generic strong">
+            **not**
+         be \n\
+        <inline classes="generic emph">
+            *marked*
+         \n\
+        <inline classes="name variable">
+            `up`
+        .
+        \t<- leading raw tab.
+        \n\
+        Newlines
+        are
+        normalized.
+""" % include_literal],
+["""\
 Including includes/include14.txt
 
 .. include:: %s
author	milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2021-04-07 12:04:36 +0000
committer	milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2021-04-07 12:04:36 +0000
commit	881989f11b0c2e1498c7ebbbb5476c24b21bc69e (patch)
tree	c8f46e6d720c9299e94dede85fd46c3eeee743fe /docutils
parent	388e9f71675a377332095acce43be7c50400a476 (diff)
download	docutils-881989f11b0c2e1498c7ebbbb5476c24b21bc69e.tar.gz