diff options
author | David Lord <davidism@gmail.com> | 2023-05-01 07:24:02 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-01 07:24:02 -0700 |
commit | 07c27a803b0902ec114bf809bee26824ffd82ec5 (patch) | |
tree | 185ce78f2cbdbcaa25733b7e17a1df39f9969372 | |
parent | 53f571cf3a6aafc1f46a18d7f893652997dc9cb9 (diff) | |
parent | f0a1733f52a7241f51cc519593233e8be6aeaa0e (diff) | |
download | werkzeug-07c27a803b0902ec114bf809bee26824ffd82ec5.tar.gz |
Fix the parsing of large multipart bodies (#2678)
-rw-r--r-- | CHANGES.rst | 2 | ||||
-rw-r--r-- | src/werkzeug/sansio/multipart.py | 38 |
2 files changed, 25 insertions, 15 deletions
diff --git a/CHANGES.rst b/CHANGES.rst index 075ca2bc..091aa553 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,8 @@ Version 2.3.3 Unreleased +- Fix parsing of large multipart bodies. Remove invalid leading newline, and restore + parsing speed. :issue:`2658, 2675` - The cookie ``Path`` attribute is set to ``/`` by default again, to prevent clients from falling back to RFC 6265's ``default-path`` behavior. :issue:`2672, 2679` diff --git a/src/werkzeug/sansio/multipart.py b/src/werkzeug/sansio/multipart.py index ae633b81..11e65ed0 100644 --- a/src/werkzeug/sansio/multipart.py +++ b/src/werkzeug/sansio/multipart.py @@ -121,15 +121,15 @@ class MultipartDecoder: self._search_position = 0 self._parts_decoded = 0 - def last_newline(self) -> int: + def last_newline(self, data: bytes) -> int: try: - last_nl = self.buffer.rindex(b"\n") + last_nl = data.rindex(b"\n") except ValueError: - last_nl = len(self.buffer) + last_nl = len(data) try: - last_cr = self.buffer.rindex(b"\r") + last_cr = data.rindex(b"\r") except ValueError: - last_cr = len(self.buffer) + last_cr = len(data) return min(last_nl, last_cr) @@ -251,17 +251,25 @@ class MultipartDecoder: else: data_start = 0 - match = self.boundary_re.search(data) - if match is not None: - if match.group(1).startswith(b"--"): - self.state = State.EPILOGUE - else: - self.state = State.PART - data_end = match.start() - del_index = match.end() + if self.buffer.find(b"--" + self.boundary) == -1: + # No complete boundary in the buffer, but there may be + # a partial boundary at the end. As the boundary + # starts with either a nl or cr find the earliest and + # return up to that as data. + data_end = del_index = self.last_newline(data[data_start:]) + more_data = True else: - data_end = del_index = self.last_newline() - more_data = match is None + match = self.boundary_re.search(data) + if match is not None: + if match.group(1).startswith(b"--"): + self.state = State.EPILOGUE + else: + self.state = State.PART + data_end = match.start() + del_index = match.end() + else: + data_end = del_index = self.last_newline(data[data_start:]) + more_data = match is None return bytes(data[data_start:data_end]), del_index, more_data |