summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Lord <davidism@gmail.com>2023-05-01 07:24:02 -0700
committerGitHub <noreply@github.com>2023-05-01 07:24:02 -0700
commit07c27a803b0902ec114bf809bee26824ffd82ec5 (patch)
tree185ce78f2cbdbcaa25733b7e17a1df39f9969372
parent53f571cf3a6aafc1f46a18d7f893652997dc9cb9 (diff)
parentf0a1733f52a7241f51cc519593233e8be6aeaa0e (diff)
downloadwerkzeug-07c27a803b0902ec114bf809bee26824ffd82ec5.tar.gz
Fix the parsing of large multipart bodies (#2678)
-rw-r--r--CHANGES.rst2
-rw-r--r--src/werkzeug/sansio/multipart.py38
2 files changed, 25 insertions, 15 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index 075ca2bc..091aa553 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -5,6 +5,8 @@ Version 2.3.3
Unreleased
+- Fix parsing of large multipart bodies. Remove invalid leading newline, and restore
+ parsing speed. :issue:`2658, 2675`
- The cookie ``Path`` attribute is set to ``/`` by default again, to prevent clients
from falling back to RFC 6265's ``default-path`` behavior. :issue:`2672, 2679`
diff --git a/src/werkzeug/sansio/multipart.py b/src/werkzeug/sansio/multipart.py
index ae633b81..11e65ed0 100644
--- a/src/werkzeug/sansio/multipart.py
+++ b/src/werkzeug/sansio/multipart.py
@@ -121,15 +121,15 @@ class MultipartDecoder:
self._search_position = 0
self._parts_decoded = 0
- def last_newline(self) -> int:
+ def last_newline(self, data: bytes) -> int:
try:
- last_nl = self.buffer.rindex(b"\n")
+ last_nl = data.rindex(b"\n")
except ValueError:
- last_nl = len(self.buffer)
+ last_nl = len(data)
try:
- last_cr = self.buffer.rindex(b"\r")
+ last_cr = data.rindex(b"\r")
except ValueError:
- last_cr = len(self.buffer)
+ last_cr = len(data)
return min(last_nl, last_cr)
@@ -251,17 +251,25 @@ class MultipartDecoder:
else:
data_start = 0
- match = self.boundary_re.search(data)
- if match is not None:
- if match.group(1).startswith(b"--"):
- self.state = State.EPILOGUE
- else:
- self.state = State.PART
- data_end = match.start()
- del_index = match.end()
+ if self.buffer.find(b"--" + self.boundary) == -1:
+ # No complete boundary in the buffer, but there may be
+ # a partial boundary at the end. As the boundary
+ # starts with either a nl or cr find the earliest and
+ # return up to that as data.
+ data_end = del_index = self.last_newline(data[data_start:])
+ more_data = True
else:
- data_end = del_index = self.last_newline()
- more_data = match is None
+ match = self.boundary_re.search(data)
+ if match is not None:
+ if match.group(1).startswith(b"--"):
+ self.state = State.EPILOGUE
+ else:
+ self.state = State.PART
+ data_end = match.start()
+ del_index = match.end()
+ else:
+ data_end = del_index = self.last_newline(data[data_start:])
+ more_data = match is None
return bytes(data[data_start:data_end]), del_index, more_data