summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWaylan Limberg <waylan.limberg@icloud.com>2020-10-14 13:48:47 -0400
committerWaylan Limberg <waylan.limberg@icloud.com>2020-10-14 15:21:44 -0400
commit607a091918004c4ca10fb621e53a09a00b8d299b (patch)
treed6adc30ad8159e69060518d462f37dbaa017a33f
parentb4a399ca1a39244a84a2dda0ee3fa2d8f17916fa (diff)
downloadpython-markdown-607a091918004c4ca10fb621e53a09a00b8d299b.tar.gz
Account for Etree Elements in HTML Stash
By calling str on all stash elements we ensure they don't raise an error. Worse case, soemthing like `<Element 'div' at 0x000001B2DAE94900>` gets inserted into the output. However, with the override in the md_in_html extension, we actually serialize and reinsert the original HTML. Worse case, an HTML block which should be parsed as Markdown gets skipped by the extension (`<div markdown="block"></div>` gets inserting into the output). The tricky part is testing as there should be no known cases where this ever occurs. Therefore, we forefully pass an etree Element directly to the method in the test. That said, as #1040 is unresolved at this point, I have tested locally with a real existing case and it works well. Related to #1040.
-rw-r--r--markdown/extensions/md_in_html.py12
-rw-r--r--markdown/postprocessors.py6
-rw-r--r--tests/test_syntax/extensions/test_md_in_html.py17
3 files changed, 33 insertions, 2 deletions
diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py
index 3518d05..174224a 100644
--- a/markdown/extensions/md_in_html.py
+++ b/markdown/extensions/md_in_html.py
@@ -17,6 +17,7 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php)
from . import Extension
from ..blockprocessors import BlockProcessor
from ..preprocessors import Preprocessor
+from ..postprocessors import RawHtmlPostprocessor
from .. import util
from ..htmlparser import HTMLExtractor
import xml.etree.ElementTree as etree
@@ -263,6 +264,15 @@ class MarkdownInHtmlProcessor(BlockProcessor):
return False
+class MarkdownInHTMLPostprocessor(RawHtmlPostprocessor):
+ def stash_to_string(self, text):
+ """ Override default to handle any etree elements still in the stash. """
+ if isinstance(text, etree.Element):
+ return self.md.serializer(text)
+ else:
+ return str(text)
+
+
class MarkdownInHtmlExtension(Extension):
"""Add Markdown parsing in HTML to Markdown class."""
@@ -275,6 +285,8 @@ class MarkdownInHtmlExtension(Extension):
md.parser.blockprocessors.register(
MarkdownInHtmlProcessor(md.parser), 'markdown_block', 105
)
+ # Replace raw HTML postprocessor
+ md.postprocessors.register(MarkdownInHTMLPostprocessor(md), 'raw_html', 30)
def makeExtension(**kwargs): # pragma: no cover
diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py
index cd32687..2e68cd9 100644
--- a/markdown/postprocessors.py
+++ b/markdown/postprocessors.py
@@ -69,7 +69,7 @@ class RawHtmlPostprocessor(Postprocessor):
""" Iterate over html stash and restore html. """
replacements = OrderedDict()
for i in range(self.md.htmlStash.html_counter):
- html = self.md.htmlStash.rawHtmlBlocks[i]
+ html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i])
if self.isblocklevel(html):
replacements["<p>{}</p>".format(
self.md.htmlStash.get_placeholder(i))] = html
@@ -95,6 +95,10 @@ class RawHtmlPostprocessor(Postprocessor):
return self.md.is_block_level(m.group(1))
return False
+ def stash_to_string(self, text):
+ """ Convert a stashed object to a string. """
+ return str(text)
+
class AndSubstitutePostprocessor(Postprocessor):
""" Restore valid entities """
diff --git a/tests/test_syntax/extensions/test_md_in_html.py b/tests/test_syntax/extensions/test_md_in_html.py
index b68412c..433cdd5 100644
--- a/tests/test_syntax/extensions/test_md_in_html.py
+++ b/tests/test_syntax/extensions/test_md_in_html.py
@@ -23,6 +23,21 @@ License: BSD (see LICENSE.md for details).
from unittest import TestSuite
from markdown.test_tools import TestCase
from ..blocks.test_html_blocks import TestHTMLBlocks
+from markdown import Markdown
+from xml.etree.ElementTree import Element
+
+
+class TestMarkdownInHTMLPostProcessor(TestCase):
+ """ Ensure any remaining elements in HTML stash are properly serialized. """
+
+ def test_stash_to_string(self):
+ # There should be no known cases where this actually happens so we need to
+ # forcefully pass an etree Element to the method to ensure proper behavior.
+ element = Element('div')
+ element.text = 'Foo bar.'
+ md = Markdown(extensions=['md_in_html'])
+ result = md.postprocessors['raw_html'].stash_to_string(element)
+ self.assertEqual(result, '<div>Foo bar.</div>')
class TestDefaultwMdInHTML(TestHTMLBlocks):
@@ -758,7 +773,7 @@ class TestMdInHTML(TestCase):
def load_tests(loader, tests, pattern):
''' Ensure TestHTMLBlocks doesn't get run twice by excluding it here. '''
suite = TestSuite()
- for test_class in [TestDefaultwMdInHTML, TestMdInHTML]:
+ for test_class in [TestDefaultwMdInHTML, TestMdInHTML, TestMarkdownInHTMLPostProcessor]:
tests = loader.loadTestsFromTestCase(test_class)
suite.addTests(tests)
return suite