summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpgjones <philip.graham.jones@googlemail.com>2023-05-01 13:53:20 +0100
committerDavid Lord <davidism@gmail.com>2023-05-01 07:16:23 -0700
commitf0a1733f52a7241f51cc519593233e8be6aeaa0e (patch)
tree185ce78f2cbdbcaa25733b7e17a1df39f9969372
parent53f571cf3a6aafc1f46a18d7f893652997dc9cb9 (diff)
downloadwerkzeug-f0a1733f52a7241f51cc519593233e8be6aeaa0e.tar.gz
Fix the parsing of large multipart bodies
There were two issues to fix. Firstly if a boundary couldn't be found the parser should have parsed up to the end of the buffer or last newline (whichever is earlier). However the last newline would be the first character since 082e0e5b9c01fa3178ac0153413f082616f10914 as the DATA_START state would have a buffer that starts with newline. This was fixed by changing the last_newline method to take the data to search as an argument. Secondly the parsing was slow as the shortcut search for the boundary was removed resulting in full regex matches on each iteration. Restoring the shortcut restores the previous performance.
-rw-r--r--CHANGES.rst2
-rw-r--r--src/werkzeug/sansio/multipart.py38
2 files changed, 25 insertions, 15 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index 075ca2bc..091aa553 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -5,6 +5,8 @@ Version 2.3.3
Unreleased
+- Fix parsing of large multipart bodies. Remove invalid leading newline, and restore
+ parsing speed. :issue:`2658, 2675`
- The cookie ``Path`` attribute is set to ``/`` by default again, to prevent clients
from falling back to RFC 6265's ``default-path`` behavior. :issue:`2672, 2679`
diff --git a/src/werkzeug/sansio/multipart.py b/src/werkzeug/sansio/multipart.py
index ae633b81..11e65ed0 100644
--- a/src/werkzeug/sansio/multipart.py
+++ b/src/werkzeug/sansio/multipart.py
@@ -121,15 +121,15 @@ class MultipartDecoder:
self._search_position = 0
self._parts_decoded = 0
- def last_newline(self) -> int:
+ def last_newline(self, data: bytes) -> int:
try:
- last_nl = self.buffer.rindex(b"\n")
+ last_nl = data.rindex(b"\n")
except ValueError:
- last_nl = len(self.buffer)
+ last_nl = len(data)
try:
- last_cr = self.buffer.rindex(b"\r")
+ last_cr = data.rindex(b"\r")
except ValueError:
- last_cr = len(self.buffer)
+ last_cr = len(data)
return min(last_nl, last_cr)
@@ -251,17 +251,25 @@ class MultipartDecoder:
else:
data_start = 0
- match = self.boundary_re.search(data)
- if match is not None:
- if match.group(1).startswith(b"--"):
- self.state = State.EPILOGUE
- else:
- self.state = State.PART
- data_end = match.start()
- del_index = match.end()
+ if self.buffer.find(b"--" + self.boundary) == -1:
+ # No complete boundary in the buffer, but there may be
+ # a partial boundary at the end. As the boundary
+ # starts with either a nl or cr find the earliest and
+ # return up to that as data.
+ data_end = del_index = self.last_newline(data[data_start:])
+ more_data = True
else:
- data_end = del_index = self.last_newline()
- more_data = match is None
+ match = self.boundary_re.search(data)
+ if match is not None:
+ if match.group(1).startswith(b"--"):
+ self.state = State.EPILOGUE
+ else:
+ self.state = State.PART
+ data_end = match.start()
+ del_index = match.end()
+ else:
+ data_end = del_index = self.last_newline(data[data_start:])
+ more_data = match is None
return bytes(data[data_start:data_end]), del_index, more_data