Fix the parsing of large multipart bodies (#2678)

author: David Lord <davidism@gmail.com> 2023-05-01 07:24:02 -0700
committer: GitHub <noreply@github.com> 2023-05-01 07:24:02 -0700
commit: 07c27a803b0902ec114bf809bee26824ffd82ec5 (patch)
tree: 185ce78f2cbdbcaa25733b7e17a1df39f9969372
parent: 53f571cf3a6aafc1f46a18d7f893652997dc9cb9 (diff)
parent: f0a1733f52a7241f51cc519593233e8be6aeaa0e (diff)
download: werkzeug-07c27a803b0902ec114bf809bee26824ffd82ec5.tar.gz
2 files changed, 25 insertions, 15 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index 075ca2bc..091aa553 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -5,6 +5,8 @@ Version 2.3.3
 
 Unreleased
 
+-   Fix parsing of large multipart bodies. Remove invalid leading newline, and restore
+    parsing speed. :issue:`2658, 2675`
 -   The cookie ``Path`` attribute is set to ``/`` by default again, to prevent clients
     from falling back to RFC 6265's ``default-path`` behavior. :issue:`2672, 2679`
 
diff --git a/src/werkzeug/sansio/multipart.py b/src/werkzeug/sansio/multipart.py
index ae633b81..11e65ed0 100644
--- a/src/werkzeug/sansio/multipart.py
+++ b/src/werkzeug/sansio/multipart.py
@@ -121,15 +121,15 @@ class MultipartDecoder:
         self._search_position = 0
         self._parts_decoded = 0
 
-    def last_newline(self) -> int:
+    def last_newline(self, data: bytes) -> int:
         try:
-            last_nl = self.buffer.rindex(b"\n")
+            last_nl = data.rindex(b"\n")
         except ValueError:
-            last_nl = len(self.buffer)
+            last_nl = len(data)
         try:
-            last_cr = self.buffer.rindex(b"\r")
+            last_cr = data.rindex(b"\r")
         except ValueError:
-            last_cr = len(self.buffer)
+            last_cr = len(data)
 
         return min(last_nl, last_cr)
 
@@ -251,17 +251,25 @@ class MultipartDecoder:
         else:
             data_start = 0
 
-        match = self.boundary_re.search(data)
-        if match is not None:
-            if match.group(1).startswith(b"--"):
-                self.state = State.EPILOGUE
-            else:
-                self.state = State.PART
-            data_end = match.start()
-            del_index = match.end()
+        if self.buffer.find(b"--" + self.boundary) == -1:
+            # No complete boundary in the buffer, but there may be
+            # a partial boundary at the end. As the boundary
+            # starts with either a nl or cr find the earliest and
+            # return up to that as data.
+            data_end = del_index = self.last_newline(data[data_start:])
+            more_data = True
         else:
-            data_end = del_index = self.last_newline()
-        more_data = match is None
+            match = self.boundary_re.search(data)
+            if match is not None:
+                if match.group(1).startswith(b"--"):
+                    self.state = State.EPILOGUE
+                else:
+                    self.state = State.PART
+                data_end = match.start()
+                del_index = match.end()
+            else:
+                data_end = del_index = self.last_newline(data[data_start:])
+            more_data = match is None
 
         return bytes(data[data_start:data_end]), del_index, more_data
author	David Lord <davidism@gmail.com>	2023-05-01 07:24:02 -0700
committer	GitHub <noreply@github.com>	2023-05-01 07:24:02 -0700
commit	07c27a803b0902ec114bf809bee26824ffd82ec5 (patch)
tree	185ce78f2cbdbcaa25733b7e17a1df39f9969372
parent	53f571cf3a6aafc1f46a18d7f893652997dc9cb9 (diff)
parent	f0a1733f52a7241f51cc519593233e8be6aeaa0e (diff)
download	werkzeug-07c27a803b0902ec114bf809bee26824ffd82ec5.tar.gz