summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorB. Caller <73827525+b-c-ds@users.noreply.github.com>2021-05-07 15:34:39 +0100
committerWaylan Limberg <waylan.limberg@icloud.com>2021-05-07 13:09:20 -0400
commiteacff473a2600902c200af8c88446af6c183203f (patch)
tree5df2bffcb907545eb373fea64a75d58481b68a2e
parent4acb949256adc535d6e6cd84c4fb47db8dda2f46 (diff)
downloadpython-markdown-eacff473a2600902c200af8c88446af6c183203f.tar.gz
Fix cubic ReDoS in fenced code and references
Two regular expressions were vulerable to Regular Expression Denial of Service (ReDoS). Crafted strings containing a long sequence of spaces could cause Denial of Service by making markdown take a long time to process. This represents a vulnerability when untrusted user input is processed with the markdown package. ReferencesProcessor: https://github.com/Python-Markdown/markdown/blob/4acb949256adc535d6e6cd8/markdown/blockprocessors.py#L559-L563 e.g.: ```python import markdown markdown.markdown('[]:0' + ' ' * 4321 + '0') ``` FencedBlockPreprocessor (requires fenced_code extension): https://github.com/Python-Markdown/markdown/blob/a11431539d08e14b0bd821c/markdown/extensions/fenced_code.py#L43-L54 e.g.: ```python import markdown markdown.markdown('```' + ' ' * 4321, extensions=['fenced_code']) ``` Both regular expressions had cubic worst-case complexity, so doubling the number of spaces made processing take 8 times as long. The cubic behaviour can be seen as follows: ``` $ time python -c "import markdown; markdown.markdown('[]:0' + ' ' * 1000 + '0')" python -c "import markdown; markdown.markdown('[]:0' + ' ' * 1000 + '0')" 1.25s user 0.02s system 99% cpu 1.271 total $ time python -c "import markdown; markdown.markdown('[]:0' + ' ' * 2000 + '0')" python -c "import markdown; markdown.markdown('[]:0' + ' ' * 2000 + '0')" 9.01s user 0.02s system 99% cpu 9.040 total $ time python -c "import markdown; markdown.markdown('[]:0' + ' ' * 4000 + '0')" python -c "import markdown; markdown.markdown('[]:0' + ' ' * 4000 + '0')" 74.86s user 0.27s system 99% cpu 1:15.38 total ``` Both regexes had three `[ ]*` groups separated by optional groups, in effect making the regex `[ ]*[ ]*[ ]*`. Discovered using [regexploit](https://github.com/doyensec/regexploit).
-rw-r--r--markdown/blockprocessors.py2
-rw-r--r--markdown/extensions/fenced_code.py14
2 files changed, 8 insertions, 8 deletions
diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py
index 8518e50..dac3f08 100644
--- a/markdown/blockprocessors.py
+++ b/markdown/blockprocessors.py
@@ -559,7 +559,7 @@ class EmptyBlockProcessor(BlockProcessor):
class ReferenceProcessor(BlockProcessor):
""" Process link references. """
RE = re.compile(
- r'^[ ]{0,3}\[([^\]]*)\]:[ ]*\n?[ ]*([^\s]+)[ ]*\n?[ ]*((["\'])(.*)\4|\((.*)\))?[ ]*$', re.MULTILINE
+ r'^[ ]{0,3}\[([^\]]*)\]:[ ]*\n?[ ]*([^\s]+)[ ]*(?:\n[ ]*)?((["\'])(.*)\4[ ]*|\((.*)\)[ ]*)?$', re.MULTILINE
)
def test(self, parent, block):
diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py
index 716b467..04c249e 100644
--- a/markdown/extensions/fenced_code.py
+++ b/markdown/extensions/fenced_code.py
@@ -42,13 +42,13 @@ class FencedCodeExtension(Extension):
class FencedBlockPreprocessor(Preprocessor):
FENCED_BLOCK_RE = re.compile(
dedent(r'''
- (?P<fence>^(?:~{3,}|`{3,}))[ ]* # opening fence
- ((\{(?P<attrs>[^\}\n]*)\})?| # (optional {attrs} or
- (\.?(?P<lang>[\w#.+-]*))?[ ]* # optional (.)lang
- (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?) # optional hl_lines)
- [ ]*\n # newline (end of opening fence)
- (?P<code>.*?)(?<=\n) # the code block
- (?P=fence)[ ]*$ # closing fence
+ (?P<fence>^(?:~{3,}|`{3,}))[ ]* # opening fence
+ ((\{(?P<attrs>[^\}\n]*)\})| # (optional {attrs} or
+ (\.?(?P<lang>[\w#.+-]*)[ ]*)? # optional (.)lang
+ (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot)[ ]*)?) # optional hl_lines)
+ \n # newline (end of opening fence)
+ (?P<code>.*?)(?<=\n) # the code block
+ (?P=fence)[ ]*$ # closing fence
'''),
re.MULTILINE | re.DOTALL | re.VERBOSE
)