summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIsaac Muse <faceless.shop@gmail.com>2020-04-06 09:40:56 -0600
committerGitHub <noreply@github.com>2020-04-06 11:40:56 -0400
commitada40c6619fd0be740af646b043937e560716c95 (patch)
tree23acd9542f62070ad5bf78e48ffc9c0113973592
parent7c595e28491d3cca28bc9901bb099bca8bcf4626 (diff)
downloadpython-markdown-ada40c6619fd0be740af646b043937e560716c95.tar.gz
TOC fix for AtomicString handling (#934)
Fixes #931.
-rw-r--r--docs/change_log/index.md6
-rw-r--r--markdown/extensions/toc.py17
-rw-r--r--tests/test_syntax/extensions/test_toc.py22
3 files changed, 42 insertions, 3 deletions
diff --git a/docs/change_log/index.md b/docs/change_log/index.md
index 62fe4b2..3370c4f 100644
--- a/docs/change_log/index.md
+++ b/docs/change_log/index.md
@@ -3,7 +3,11 @@ title: Change Log
Python-Markdown Change Log
=========================
-Feb 12, 2020: Released version 3.2.1 (a bug-fix release).
+Under development: version 3.2.2 (a bug-fix release).
+
+* Fixed issue where double escaped entities could end up in TOC.
+
+Feb 12, 2020: Released version 3.2.1 (a bug-fix release).
* The `name` property in `toc_tokens` from the TOC extension now
escapes HTML special characters (`<`, `>`, and `&`).
diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py
index 8f2b13f..b6cdc73 100644
--- a/markdown/extensions/toc.py
+++ b/markdown/extensions/toc.py
@@ -15,9 +15,10 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php)
from . import Extension
from ..treeprocessors import Treeprocessor
-from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE
+from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE, AtomicString
from ..postprocessors import UnescapePostprocessor
import re
+import html
import unicodedata
import xml.etree.ElementTree as etree
@@ -44,6 +45,18 @@ def unique(id, ids):
return id
+def get_name(el):
+ """Get title name."""
+
+ text = []
+ for c in el.itertext():
+ if isinstance(c, AtomicString):
+ text.append(html.unescape(c))
+ else:
+ text.append(c)
+ return ''.join(text).strip()
+
+
def stashedHTML2text(text, md, strip_entities=True):
""" Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
def _html_sub(m):
@@ -253,7 +266,7 @@ class TocTreeprocessor(Treeprocessor):
self.set_level(el)
if int(el.tag[-1]) < self.toc_top or int(el.tag[-1]) > self.toc_bottom:
continue
- text = ''.join(el.itertext()).strip()
+ text = get_name(el)
# Do not override pre-existing ids
if "id" not in el.attrib:
diff --git a/tests/test_syntax/extensions/test_toc.py b/tests/test_syntax/extensions/test_toc.py
index 5b9ad92..3fc9780 100644
--- a/tests/test_syntax/extensions/test_toc.py
+++ b/tests/test_syntax/extensions/test_toc.py
@@ -27,6 +27,28 @@ class TestTOC(TestCase):
# TODO: Move the rest of the TOC tests here.
+ def test_escaped_code(self):
+ self.assertMarkdownRenders(
+ self.dedent(
+ '''
+ [TOC]
+
+ # `<test>`
+ '''
+ ),
+ self.dedent(
+ '''
+ <div class="toc">
+ <ul>
+ <li><a href="#test">&lt;test&gt;</a></li>
+ </ul>
+ </div>
+ <h1 id="test"><code>&lt;test&gt;</code></h1>
+ '''
+ ),
+ extensions=['toc']
+ )
+
def test_escaped_char_in_id(self):
self.assertMarkdownRenders(
r'# escaped\_character',