summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWaylan Limberg <waylan.limberg@icloud.com>2020-10-24 17:22:57 -0400
committerWaylan Limberg <waylan.limberg@icloud.com>2020-10-24 17:36:53 -0400
commit18b17e1bf5efa22ed06f09df14cc4c3ff8d7b5f8 (patch)
tree60dbad108871892387bdd60dde3daf874e150ab7
parent897c8541998cdbd0499b04942244f7fdf1191a6c (diff)
downloadpython-markdown-18b17e1bf5efa22ed06f09df14cc4c3ff8d7b5f8.tar.gz
Avoid catastrophic backtracking in `hr` regex
Fixes #1055.
-rw-r--r--docs/change_log/index.md1
-rw-r--r--markdown/blockprocessors.py9
-rw-r--r--tests/test_syntax/blocks/test_hr.py23
3 files changed, 28 insertions, 5 deletions
diff --git a/docs/change_log/index.md b/docs/change_log/index.md
index fffa216..a47afec 100644
--- a/docs/change_log/index.md
+++ b/docs/change_log/index.md
@@ -7,6 +7,7 @@ Under development: version 3.3.3 (a bug-fix release).
* Unify all block-level tags (#1047).
* Fix issue where some empty elements would have text rendered as `None` when using `md_in_html` (#1049).
+* Avoid catastrophic backtracking in `hr` regex (#1055).
Oct 19, 2020: version 3.3.2 (a bug-fix release).
diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py
index 742f174..7d31a7f 100644
--- a/markdown/blockprocessors.py
+++ b/markdown/blockprocessors.py
@@ -496,16 +496,15 @@ class SetextHeaderProcessor(BlockProcessor):
class HRProcessor(BlockProcessor):
""" Process Horizontal Rules. """
- RE = r'^[ ]{0,3}((-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,})[ ]*$'
+ # Python's re module doesn't officially support atomic grouping. However you can fake it.
+ # See https://stackoverflow.com/a/13577411/866026
+ RE = r'^[ ]{0,3}(?=(?P<atomicgroup>(-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,}))(?P=atomicgroup)[ ]*$'
# Detect hr on any line of a block.
SEARCH_RE = re.compile(RE, re.MULTILINE)
def test(self, parent, block):
m = self.SEARCH_RE.search(block)
- # No atomic grouping in python so we simulate it here for performance.
- # The regex only matches what would be in the atomic group - the HR.
- # Then check if we are at end of block or if next char is a newline.
- if m and (m.end() == len(block) or block[m.end()] == '\n'):
+ if m:
# Save match object on class instance so we can use it later.
self.match = m
return True
diff --git a/tests/test_syntax/blocks/test_hr.py b/tests/test_syntax/blocks/test_hr.py
index 009a39d..85a51b3 100644
--- a/tests/test_syntax/blocks/test_hr.py
+++ b/tests/test_syntax/blocks/test_hr.py
@@ -377,3 +377,26 @@ class TestHorizontalRules(TestCase):
'<p>_ _</p>'
)
+
+ def test_2_consecutive_hr(self):
+ self.assertMarkdownRenders(
+ self.dedent(
+ """
+ - - -
+ - - -
+ """
+ ),
+ self.dedent(
+ """
+ <hr />
+ <hr />
+ """
+ )
+ )
+
+ def test_not_hr_end_in_char(self):
+ self.assertMarkdownRenders(
+ '--------------------------------------c',
+
+ '<p>--------------------------------------c</p>'
+ )