TOC fix for AtomicString handling (#934)

Fixes #931.
author: Isaac Muse <faceless.shop@gmail.com> 2020-04-06 09:40:56 -0600
committer: GitHub <noreply@github.com> 2020-04-06 11:40:56 -0400
commit: ada40c6619fd0be740af646b043937e560716c95 (patch)
tree: 23acd9542f62070ad5bf78e48ffc9c0113973592
parent: 7c595e28491d3cca28bc9901bb099bca8bcf4626 (diff)
download: python-markdown-ada40c6619fd0be740af646b043937e560716c95.tar.gz
3 files changed, 42 insertions, 3 deletions
diff --git a/docs/change_log/index.md b/docs/change_log/index.md
index 62fe4b2..3370c4f 100644
--- a/docs/change_log/index.md
+++ b/docs/change_log/index.md
@@ -3,7 +3,11 @@ title: Change Log
 Python-Markdown Change Log
 =========================
 
-Feb 12, 2020: Released version 3.2.1 (a bug-fix release). 
+Under development: version 3.2.2 (a bug-fix release).
+
+* Fixed issue where double escaped entities could end up in TOC.
+
+Feb 12, 2020: Released version 3.2.1 (a bug-fix release).
 
 * The `name` property in `toc_tokens` from the TOC extension now
   escapes HTML special characters (`<`, `>`, and `&`).
diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py
index 8f2b13f..b6cdc73 100644
--- a/markdown/extensions/toc.py
+++ b/markdown/extensions/toc.py
@@ -15,9 +15,10 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php)
 
 from . import Extension
 from ..treeprocessors import Treeprocessor
-from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE
+from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE, AtomicString
 from ..postprocessors import UnescapePostprocessor
 import re
+import html
 import unicodedata
 import xml.etree.ElementTree as etree
 
@@ -44,6 +45,18 @@ def unique(id, ids):
     return id
 
 
+def get_name(el):
+    """Get title name."""
+
+    text = []
+    for c in el.itertext():
+        if isinstance(c, AtomicString):
+            text.append(html.unescape(c))
+        else:
+            text.append(c)
+    return ''.join(text).strip()
+
+
 def stashedHTML2text(text, md, strip_entities=True):
     """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
     def _html_sub(m):
@@ -253,7 +266,7 @@ class TocTreeprocessor(Treeprocessor):
                 self.set_level(el)
                 if int(el.tag[-1]) < self.toc_top or int(el.tag[-1]) > self.toc_bottom:
                     continue
-                text = ''.join(el.itertext()).strip()
+                text = get_name(el)
 
                 # Do not override pre-existing ids
                 if "id" not in el.attrib:
diff --git a/tests/test_syntax/extensions/test_toc.py b/tests/test_syntax/extensions/test_toc.py
index 5b9ad92..3fc9780 100644
--- a/tests/test_syntax/extensions/test_toc.py
+++ b/tests/test_syntax/extensions/test_toc.py
@@ -27,6 +27,28 @@ class TestTOC(TestCase):
 
     # TODO: Move the rest of the TOC tests here.
 
+    def test_escaped_code(self):
+        self.assertMarkdownRenders(
+            self.dedent(
+                '''
+                [TOC]
+
+                # `<test>`
+                '''
+            ),
+            self.dedent(
+                '''
+                <div class="toc">
+                <ul>
+                <li><a href="#test">&lt;test&gt;</a></li>
+                </ul>
+                </div>
+                <h1 id="test"><code>&lt;test&gt;</code></h1>
+                '''
+            ),
+            extensions=['toc']
+        )
+
     def test_escaped_char_in_id(self):
         self.assertMarkdownRenders(
             r'# escaped\_character',
author	Isaac Muse <faceless.shop@gmail.com>	2020-04-06 09:40:56 -0600
committer	GitHub <noreply@github.com>	2020-04-06 11:40:56 -0400
commit	ada40c6619fd0be740af646b043937e560716c95 (patch)
tree	23acd9542f62070ad5bf78e48ffc9c0113973592
parent	7c595e28491d3cca28bc9901bb099bca8bcf4626 (diff)
download	python-markdown-ada40c6619fd0be740af646b043937e560716c95.tar.gz