Avoid catastrophic backtracking in `hr` regex

Fixes #1055.
author: Waylan Limberg <waylan.limberg@icloud.com> 2020-10-24 17:22:57 -0400
committer: Waylan Limberg <waylan.limberg@icloud.com> 2020-10-24 17:36:53 -0400
commit: 18b17e1bf5efa22ed06f09df14cc4c3ff8d7b5f8 (patch)
tree: 60dbad108871892387bdd60dde3daf874e150ab7
parent: 897c8541998cdbd0499b04942244f7fdf1191a6c (diff)
download: python-markdown-18b17e1bf5efa22ed06f09df14cc4c3ff8d7b5f8.tar.gz
3 files changed, 28 insertions, 5 deletions
diff --git a/docs/change_log/index.md b/docs/change_log/index.md
index fffa216..a47afec 100644
--- a/docs/change_log/index.md
+++ b/docs/change_log/index.md
@@ -7,6 +7,7 @@ Under development: version 3.3.3 (a bug-fix release).
 
 * Unify all block-level tags (#1047).
 * Fix issue where some empty elements would have text rendered as `None` when using `md_in_html` (#1049).
+* Avoid catastrophic backtracking in `hr` regex (#1055).
 
 Oct 19, 2020: version 3.3.2 (a bug-fix release).
 
diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py
index 742f174..7d31a7f 100644
--- a/markdown/blockprocessors.py
+++ b/markdown/blockprocessors.py
@@ -496,16 +496,15 @@ class SetextHeaderProcessor(BlockProcessor):
 class HRProcessor(BlockProcessor):
     """ Process Horizontal Rules. """
 
-    RE = r'^[ ]{0,3}((-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,})[ ]*$'
+    # Python's re module doesn't officially support atomic grouping. However you can fake it.
+    # See https://stackoverflow.com/a/13577411/866026
+    RE = r'^[ ]{0,3}(?=(?P<atomicgroup>(-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,}))(?P=atomicgroup)[ ]*$'
     # Detect hr on any line of a block.
     SEARCH_RE = re.compile(RE, re.MULTILINE)
 
     def test(self, parent, block):
         m = self.SEARCH_RE.search(block)
-        # No atomic grouping in python so we simulate it here for performance.
-        # The regex only matches what would be in the atomic group - the HR.
-        # Then check if we are at end of block or if next char is a newline.
-        if m and (m.end() == len(block) or block[m.end()] == '\n'):
+        if m:
             # Save match object on class instance so we can use it later.
             self.match = m
             return True
diff --git a/tests/test_syntax/blocks/test_hr.py b/tests/test_syntax/blocks/test_hr.py
index 009a39d..85a51b3 100644
--- a/tests/test_syntax/blocks/test_hr.py
+++ b/tests/test_syntax/blocks/test_hr.py
@@ -377,3 +377,26 @@ class TestHorizontalRules(TestCase):
 
             '<p>_ _</p>'
         )
+
+    def test_2_consecutive_hr(self):
+        self.assertMarkdownRenders(
+            self.dedent(
+                """
+                - - -
+                - - -
+                """
+            ),
+            self.dedent(
+                """
+                <hr />
+                <hr />
+                """
+            )
+        )
+
+    def test_not_hr_end_in_char(self):
+        self.assertMarkdownRenders(
+            '--------------------------------------c',
+
+            '<p>--------------------------------------c</p>'
+        )
author	Waylan Limberg <waylan.limberg@icloud.com>	2020-10-24 17:22:57 -0400
committer	Waylan Limberg <waylan.limberg@icloud.com>	2020-10-24 17:36:53 -0400
commit	18b17e1bf5efa22ed06f09df14cc4c3ff8d7b5f8 (patch)
tree	60dbad108871892387bdd60dde3daf874e150ab7
parent	897c8541998cdbd0499b04942244f7fdf1191a6c (diff)
download	python-markdown-18b17e1bf5efa22ed06f09df14cc4c3ff8d7b5f8.tar.gz