summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWaylan Limberg <waylan.limberg@icloud.com>2022-07-15 08:38:34 -0400
committerGitHub <noreply@github.com>2022-07-15 08:38:34 -0400
commitc0f6e5a31ea8e7fe98910a0523144c2a96fa9bf1 (patch)
tree5686120668ce35073d08e08a366d6d3371ab2b17
parent77fb7f1b51076becff488a9b42ef2883153262a0 (diff)
downloadpython-markdown-c0f6e5a31ea8e7fe98910a0523144c2a96fa9bf1.tar.gz
Move backslash unescaping to treeprocessor
By unescaping backslash escapes in a treeprocessor, the text is properly escaped during serialization. Fixes #1131. As it is recognized that various third-party extensions may be calling the old class at `postprocessors.UnescapePostprocessor` the old class remains in the codebase, but has been deprecated and will be removed in a future release. The new class `treeprocessors.UnescapeTreeprocessor` should be used instead.
-rw-r--r--docs/change_log/release-3.4.md10
-rw-r--r--markdown/extensions/toc.py10
-rw-r--r--markdown/postprocessors.py5
-rw-r--r--markdown/treeprocessors.py27
-rw-r--r--tests/basic/backlash-escapes.html2
-rw-r--r--tests/test_syntax/extensions/test_smarty.py36
6 files changed, 82 insertions, 8 deletions
diff --git a/docs/change_log/release-3.4.md b/docs/change_log/release-3.4.md
index 9d1cd17..9db6707 100644
--- a/docs/change_log/release-3.4.md
+++ b/docs/change_log/release-3.4.md
@@ -30,10 +30,18 @@ markdown.markdown(src, extensions=[TableExtension(use_align_attribute=True)])
In addition, tests were moved to the modern test environment.
+### Backslash unescaping moved to Treeprocessor (#1131).
+
+Unescaping backslash escapes has been moved to a Treeprocessor. However, it is
+recognized that various third-party extensions may be calling the old class at
+`postprocessors.UnescapePostprocessor`. Therefore, the old class remains in the
+code base, but has been deprecated and will be removed in a future release. The
+new class `treeprocessors.UnescapeTreeprocessor` should be used instead.
+
### Previously deprecated objects have been removed
Various objects were deprecated in version 3.0 and began raising deprecation
-warnings (see the [version 3.0 release notes] for details). Any of those object
+warnings (see the [version 3.0 release notes] for details). Any of those objects
which remained in version 3.3 have been removed from the code base in version 3.4
and will now raise errors. A summary of the objects are provided below.
diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py
index 80138b3..1ded18d 100644
--- a/markdown/extensions/toc.py
+++ b/markdown/extensions/toc.py
@@ -16,7 +16,7 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php)
from . import Extension
from ..treeprocessors import Treeprocessor
from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE, AtomicString
-from ..postprocessors import UnescapePostprocessor
+from ..treeprocessors import UnescapeTreeprocessor
import re
import html
import unicodedata
@@ -84,8 +84,8 @@ def stashedHTML2text(text, md, strip_entities=True):
def unescape(text):
""" Unescape escaped text. """
- c = UnescapePostprocessor()
- return c.run(text)
+ c = UnescapeTreeprocessor()
+ return c.unescape(text)
def nest_toc_tokens(toc_list):
@@ -289,10 +289,10 @@ class TocTreeprocessor(Treeprocessor):
toc_tokens.append({
'level': int(el.tag[-1]),
'id': el.attrib["id"],
- 'name': unescape(stashedHTML2text(
+ 'name': stashedHTML2text(
code_escape(el.attrib.get('data-toc-label', text)),
self.md, strip_entities=False
- ))
+ )
})
# Remove the data-toc-label attribute as it is no longer needed
diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py
index f4fb924..498f7e8 100644
--- a/markdown/postprocessors.py
+++ b/markdown/postprocessors.py
@@ -37,7 +37,6 @@ def build_postprocessors(md, **kwargs):
postprocessors = util.Registry()
postprocessors.register(RawHtmlPostprocessor(md), 'raw_html', 30)
postprocessors.register(AndSubstitutePostprocessor(), 'amp_substitute', 20)
- postprocessors.register(UnescapePostprocessor(), 'unescape', 10)
return postprocessors
@@ -122,6 +121,10 @@ class AndSubstitutePostprocessor(Postprocessor):
return text
+@util.deprecated(
+ "This class will be removed in the future; "
+ "use 'treeprocessors.UnescapeTreeprocessor' instead."
+)
class UnescapePostprocessor(Postprocessor):
""" Restore escaped chars """
diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py
index e02a505..e9f48ca 100644
--- a/markdown/treeprocessors.py
+++ b/markdown/treeprocessors.py
@@ -19,6 +19,7 @@ Copyright 2004 Manfred Stienstra (the original version)
License: BSD (see LICENSE.md for details).
"""
+import re
import xml.etree.ElementTree as etree
from . import util
from . import inlinepatterns
@@ -29,6 +30,7 @@ def build_treeprocessors(md, **kwargs):
treeprocessors = util.Registry()
treeprocessors.register(InlineProcessor(md), 'inline', 20)
treeprocessors.register(PrettifyTreeprocessor(md), 'prettify', 10)
+ treeprocessors.register(UnescapeTreeprocessor(md), 'unescape', 0)
return treeprocessors
@@ -429,3 +431,28 @@ class PrettifyTreeprocessor(Treeprocessor):
# Only prettify code containing text only
if not len(code) and code.text is not None:
code.text = util.AtomicString(code.text.rstrip() + '\n')
+
+
+class UnescapeTreeprocessor(Treeprocessor):
+ """ Restore escaped chars """
+
+ RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX))
+
+ def _unescape(self, m):
+ return chr(int(m.group(1)))
+
+ def unescape(self, text):
+ return self.RE.sub(self._unescape, text)
+
+ def run(self, root):
+ """ Loop over all elements and unescape all text. """
+ for elem in root.iter():
+ # Unescape text content
+ if elem.text and not elem.tag == 'code':
+ elem.text = self.unescape(elem.text)
+ # Unescape tail content
+ if elem.tail:
+ elem.tail = self.unescape(elem.tail)
+ # Unescape attribute values
+ for key, value in elem.items():
+ elem.set(key, self.unescape(value))
diff --git a/tests/basic/backlash-escapes.html b/tests/basic/backlash-escapes.html
index ef7c4b5..876775f 100644
--- a/tests/basic/backlash-escapes.html
+++ b/tests/basic/backlash-escapes.html
@@ -9,7 +9,7 @@
<p>Right bracket: ]</p>
<p>Left paren: (</p>
<p>Right paren: )</p>
-<p>Greater-than: ></p>
+<p>Greater-than: &gt;</p>
<p>Hash: #</p>
<p>Period: .</p>
<p>Bang: !</p>
diff --git a/tests/test_syntax/extensions/test_smarty.py b/tests/test_syntax/extensions/test_smarty.py
new file mode 100644
index 0000000..fc635ad
--- /dev/null
+++ b/tests/test_syntax/extensions/test_smarty.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+"""
+Python Markdown
+
+A Python implementation of John Gruber's Markdown.
+
+Documentation: https://python-markdown.github.io/
+GitHub: https://github.com/Python-Markdown/markdown/
+PyPI: https://pypi.org/project/Markdown/
+
+Started by Manfred Stienstra (http://www.dwerg.net/).
+Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+Currently maintained by Waylan Limberg (https://github.com/waylan),
+Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+
+Copyright 2007-2022 The Python Markdown Project (v. 1.7 and later)
+Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+Copyright 2004 Manfred Stienstra (the original version)
+
+License: BSD (see LICENSE.md for details).
+"""
+
+from markdown.test_tools import TestCase
+
+
+class TestSmarty(TestCase):
+
+ default_kwargs = {'extensions': ['smarty']}
+
+ def test_escaped_attr(self):
+ self.assertMarkdownRenders(
+ '![x\"x](x)',
+ '<p><img alt="x&quot;x" src="x" /></p>'
+ )
+
+ # TODO: Move rest of smarty tests here.