diff options
author | Waylan Limberg <waylan.limberg@icloud.com> | 2020-10-24 17:22:57 -0400 |
---|---|---|
committer | Waylan Limberg <waylan.limberg@icloud.com> | 2020-10-24 17:36:53 -0400 |
commit | 18b17e1bf5efa22ed06f09df14cc4c3ff8d7b5f8 (patch) | |
tree | 60dbad108871892387bdd60dde3daf874e150ab7 | |
parent | 897c8541998cdbd0499b04942244f7fdf1191a6c (diff) | |
download | python-markdown-18b17e1bf5efa22ed06f09df14cc4c3ff8d7b5f8.tar.gz |
Avoid catastrophic backtracking in `hr` regex
Fixes #1055.
-rw-r--r-- | docs/change_log/index.md | 1 | ||||
-rw-r--r-- | markdown/blockprocessors.py | 9 | ||||
-rw-r--r-- | tests/test_syntax/blocks/test_hr.py | 23 |
3 files changed, 28 insertions, 5 deletions
diff --git a/docs/change_log/index.md b/docs/change_log/index.md index fffa216..a47afec 100644 --- a/docs/change_log/index.md +++ b/docs/change_log/index.md @@ -7,6 +7,7 @@ Under development: version 3.3.3 (a bug-fix release). * Unify all block-level tags (#1047). * Fix issue where some empty elements would have text rendered as `None` when using `md_in_html` (#1049). +* Avoid catastrophic backtracking in `hr` regex (#1055). Oct 19, 2020: version 3.3.2 (a bug-fix release). diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index 742f174..7d31a7f 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -496,16 +496,15 @@ class SetextHeaderProcessor(BlockProcessor): class HRProcessor(BlockProcessor): """ Process Horizontal Rules. """ - RE = r'^[ ]{0,3}((-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,})[ ]*$' + # Python's re module doesn't officially support atomic grouping. However you can fake it. + # See https://stackoverflow.com/a/13577411/866026 + RE = r'^[ ]{0,3}(?=(?P<atomicgroup>(-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,}))(?P=atomicgroup)[ ]*$' # Detect hr on any line of a block. SEARCH_RE = re.compile(RE, re.MULTILINE) def test(self, parent, block): m = self.SEARCH_RE.search(block) - # No atomic grouping in python so we simulate it here for performance. - # The regex only matches what would be in the atomic group - the HR. - # Then check if we are at end of block or if next char is a newline. - if m and (m.end() == len(block) or block[m.end()] == '\n'): + if m: # Save match object on class instance so we can use it later. self.match = m return True diff --git a/tests/test_syntax/blocks/test_hr.py b/tests/test_syntax/blocks/test_hr.py index 009a39d..85a51b3 100644 --- a/tests/test_syntax/blocks/test_hr.py +++ b/tests/test_syntax/blocks/test_hr.py @@ -377,3 +377,26 @@ class TestHorizontalRules(TestCase): '<p>_ _</p>' ) + + def test_2_consecutive_hr(self): + self.assertMarkdownRenders( + self.dedent( + """ + - - - + - - - + """ + ), + self.dedent( + """ + <hr /> + <hr /> + """ + ) + ) + + def test_not_hr_end_in_char(self): + self.assertMarkdownRenders( + '--------------------------------------c', + + '<p>--------------------------------------c</p>' + ) |