diff options
author | Isaac Muse <faceless.shop@gmail.com> | 2020-10-19 12:07:45 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-19 14:07:45 -0400 |
commit | 2766698ac88ae9218d41b3ca1d9fbd4b4bd105e5 (patch) | |
tree | 0b3c2bd1648f5d26266618ccde5d64c5ea81c36b | |
parent | 607a091918004c4ca10fb621e53a09a00b8d299b (diff) | |
download | python-markdown-2766698ac88ae9218d41b3ca1d9fbd4b4bd105e5.tar.gz |
Properly parse inline HTML in md_in_html
Fixes #1040 and fixes #1045.
-rw-r--r-- | markdown/extensions/md_in_html.py | 36 | ||||
-rw-r--r-- | tests/test_syntax/extensions/test_md_in_html.py | 160 |
2 files changed, 191 insertions, 5 deletions
diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index 174224a..f635563 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -86,6 +86,14 @@ class HTMLExtractorExtra(HTMLExtractor): else: # pragma: no cover return None + def at_line_start(self): + """At line start.""" + + value = super().at_line_start() + if not value and self.cleandoc and self.cleandoc[-1].endswith('\n'): + value = True + return value + def handle_starttag(self, tag, attrs): if tag in block_level_tags: # Valueless attr (ex: `<tag checked>`) results in `[('checked', None)]`. @@ -93,7 +101,7 @@ class HTMLExtractorExtra(HTMLExtractor): attrs = {key: value if value is not None else key for key, value in attrs} state = self.get_state(tag, attrs) - if self.inraw or (state in [None, 'off'] and not self.mdstack): + if self.inraw or (state in [None, 'off'] and not self.mdstack) or not self.at_line_start(): # fall back to default behavior attrs.pop('markdown', None) super().handle_starttag(tag, attrs) @@ -111,7 +119,10 @@ class HTMLExtractorExtra(HTMLExtractor): super().handle_starttag(tag, attrs) else: text = self.get_starttag_text() - self.handle_data(text) + if self.mdstate and self.mdstate[-1] == "off": + self.handle_data(self.md.htmlStash.store(text)) + else: + self.handle_data(text) def handle_endtag(self, tag): if tag in block_level_tags: @@ -128,20 +139,32 @@ class HTMLExtractorExtra(HTMLExtractor): if not self.mdstack: # Last item in stack is closed. Stash it element = self.get_element() + # Get last entry to see if it ends in newlines + # If it is an element, assume there is no newlines + item = self.cleandoc[-1] if self.cleandoc else '' + # If we only have one newline before block element, add another + if not item.endswith('\n\n') and item.endswith('\n'): + self.cleandoc.append('\n') self.cleandoc.append(self.md.htmlStash.store(element)) self.cleandoc.append('\n\n') self.state = [] else: # Treat orphan closing tag as a span level tag. text = self.get_endtag_text(tag) - self.handle_data(text) + if self.mdstate and self.mdstate[-1] == "off": + self.handle_data(self.md.htmlStash.store(text)) + else: + self.handle_data(text) else: # Span level tag if self.inraw: super().handle_endtag(tag) else: text = self.get_endtag_text(tag) - self.handle_data(text) + if self.mdstate and self.mdstate[-1] == "off": + self.handle_data(self.md.htmlStash.store(text)) + else: + self.handle_data(text) def handle_data(self, data): if self.inraw or not self.mdstack: @@ -156,7 +179,10 @@ class HTMLExtractorExtra(HTMLExtractor): if self.at_line_start() and is_block: self.handle_data('\n' + self.md.htmlStash.store(data) + '\n\n') else: - self.handle_data(data) + if self.mdstate and self.mdstate[-1] == "off": + self.handle_data(self.md.htmlStash.store(data)) + else: + self.handle_data(data) class HtmlBlockPreprocessor(Preprocessor): diff --git a/tests/test_syntax/extensions/test_md_in_html.py b/tests/test_syntax/extensions/test_md_in_html.py index 433cdd5..946e922 100644 --- a/tests/test_syntax/extensions/test_md_in_html.py +++ b/tests/test_syntax/extensions/test_md_in_html.py @@ -390,6 +390,166 @@ class TestMdInHTML(TestCase): ) ) + def test_orphan_end_tag_in_raw_html(self): + self.assertMarkdownRenders( + self.dedent( + """ + <div markdown="1"> + <div> + Test + + </pre> + + Test + </div> + </div> + """ + ), + self.dedent( + """ + <div> + <div> + Test + + </pre> + + Test + </div> + </div> + """ + ) + ) + + def test_complex_nested_case(self): + self.assertMarkdownRenders( + self.dedent( + """ + <div markdown="1"> + **test** + <div> + **test** + <img src=""/> + <code>Test</code> + <span>**test**</span> + <p>Test 2</p> + </div> + </div> + """ + ), + self.dedent( + """ + <div> + <p><strong>test</strong></p> + <div> + **test** + <img src=""/> + <code>Test</code> + <span>**test**</span> + <p>Test 2</p> + </div> + </div> + """ + ) + ) + + def test_complex_nested_case_whitespace(self): + self.assertMarkdownRenders( + self.dedent( + """ + Text with space\t + <div markdown="1">\t + \t + <div> + **test** + <img src=""/> + <code>Test</code> + <span>**test**</span> + <div>With whitespace</div> + <p>Test 2</p> + </div> + **test** + </div> + """ + ), + self.dedent( + """ + <p>Text with space </p> + <div> + <div> + **test** + <img src=""/> + <code>Test</code> + <span>**test**</span> + <div>With whitespace</div> + <p>Test 2</p> + </div> + <p><strong>test</strong></p> + </div> + """ + ) + ) + + def test_md1_intail_md1(self): + self.assertMarkdownRenders( + '<div markdown="1">*foo*</div><div markdown="1">*bar*</div>', + self.dedent( + """ + <div> + <p><em>foo</em></p> + </div> + <div> + <p><em>bar</em></p> + </div> + """ + ) + ) + + def test_md1_no_blank_line_before(self): + self.assertMarkdownRenders( + self.dedent( + """ + A _Markdown_ paragraph with no blank line after. + <div markdown="1"> + A _Markdown_ paragraph in an HTML block with no blank line before. + </div> + """ + ), + self.dedent( + """ + <p>A <em>Markdown</em> paragraph with no blank line after.</p> + <div> + <p>A <em>Markdown</em> paragraph in an HTML block with no blank line before.</p> + </div> + """ + ) + ) + + def test_md1_no_line_break(self): + # The div here is parsed as a span-level element. Bad input equals bad output! + self.assertMarkdownRenders( + 'A _Markdown_ paragraph with <div markdown="1">no _line break_.</div>', + '<p>A <em>Markdown</em> paragraph with <div markdown="1">no <em>line break</em>.</div></p>' + ) + + def test_md1_in_tail(self): + self.assertMarkdownRenders( + self.dedent( + """ + <div></div><div markdown="1"> + A _Markdown_ paragraph in an HTML block in tail of previous element. + </div> + """ + ), + self.dedent( + """ + <div></div> + <div> + <p>A <em>Markdown</em> paragraph in an HTML block in tail of previous element.</p> + </div> + """ + ) + ) + def test_md_span_paragraph(self): self.assertMarkdownRenders( '<p markdown="span">*foo*</p>', |