diff options
author | Isaac Muse <faceless.shop@gmail.com> | 2020-04-06 09:40:56 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-06 11:40:56 -0400 |
commit | ada40c6619fd0be740af646b043937e560716c95 (patch) | |
tree | 23acd9542f62070ad5bf78e48ffc9c0113973592 | |
parent | 7c595e28491d3cca28bc9901bb099bca8bcf4626 (diff) | |
download | python-markdown-ada40c6619fd0be740af646b043937e560716c95.tar.gz |
TOC fix for AtomicString handling (#934)
Fixes #931.
-rw-r--r-- | docs/change_log/index.md | 6 | ||||
-rw-r--r-- | markdown/extensions/toc.py | 17 | ||||
-rw-r--r-- | tests/test_syntax/extensions/test_toc.py | 22 |
3 files changed, 42 insertions, 3 deletions
diff --git a/docs/change_log/index.md b/docs/change_log/index.md index 62fe4b2..3370c4f 100644 --- a/docs/change_log/index.md +++ b/docs/change_log/index.md @@ -3,7 +3,11 @@ title: Change Log Python-Markdown Change Log ========================= -Feb 12, 2020: Released version 3.2.1 (a bug-fix release). +Under development: version 3.2.2 (a bug-fix release). + +* Fixed issue where double escaped entities could end up in TOC. + +Feb 12, 2020: Released version 3.2.1 (a bug-fix release). * The `name` property in `toc_tokens` from the TOC extension now escapes HTML special characters (`<`, `>`, and `&`). diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index 8f2b13f..b6cdc73 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -15,9 +15,10 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php) from . import Extension from ..treeprocessors import Treeprocessor -from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE +from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE, AtomicString from ..postprocessors import UnescapePostprocessor import re +import html import unicodedata import xml.etree.ElementTree as etree @@ -44,6 +45,18 @@ def unique(id, ids): return id +def get_name(el): + """Get title name.""" + + text = [] + for c in el.itertext(): + if isinstance(c, AtomicString): + text.append(html.unescape(c)) + else: + text.append(c) + return ''.join(text).strip() + + def stashedHTML2text(text, md, strip_entities=True): """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """ def _html_sub(m): @@ -253,7 +266,7 @@ class TocTreeprocessor(Treeprocessor): self.set_level(el) if int(el.tag[-1]) < self.toc_top or int(el.tag[-1]) > self.toc_bottom: continue - text = ''.join(el.itertext()).strip() + text = get_name(el) # Do not override pre-existing ids if "id" not in el.attrib: diff --git a/tests/test_syntax/extensions/test_toc.py b/tests/test_syntax/extensions/test_toc.py index 5b9ad92..3fc9780 100644 --- a/tests/test_syntax/extensions/test_toc.py +++ b/tests/test_syntax/extensions/test_toc.py @@ -27,6 +27,28 @@ class TestTOC(TestCase): # TODO: Move the rest of the TOC tests here. + def test_escaped_code(self): + self.assertMarkdownRenders( + self.dedent( + ''' + [TOC] + + # `<test>` + ''' + ), + self.dedent( + ''' + <div class="toc"> + <ul> + <li><a href="#test"><test></a></li> + </ul> + </div> + <h1 id="test"><code><test></code></h1> + ''' + ), + extensions=['toc'] + ) + def test_escaped_char_in_id(self): self.assertMarkdownRenders( r'# escaped\_character', |