summaryrefslogtreecommitdiff
path: root/Lib/pickle.py
diff options
context:
space:
mode:
authorOlivier Grisel <olivier.grisel@ensta.org>2018-01-06 16:18:54 +0100
committerSerhiy Storchaka <storchaka@gmail.com>2018-01-06 17:18:54 +0200
commit3cd7c6e6eb43dbd7d7180503265772a67953e682 (patch)
tree7f09aaed6d17611ef6904591525d74de89f854e9 /Lib/pickle.py
parent85ac726a40707ae68a23d568c322868e353217ce (diff)
downloadcpython-git-3cd7c6e6eb43dbd7d7180503265772a67953e682.tar.gz
bpo-31993: Do not allocate large temporary buffers in pickle dump. (#4353)
The picklers do no longer allocate temporary memory when dumping large bytes and str objects into a file object. Instead the data is directly streamed into the underlying file object. Previously the C implementation would buffer all content and issue a single call to file.write() at the end of the dump. With protocol 4 this behavior has changed to issue one call to file.write() per frame. The Python pickler with protocol 4 now dumps each frame content as a memoryview to an IOBytes instance that is never reused and the memoryview is no longer released after the call to write. This makes it possible for the file object to delay access to the memoryview of previous frames without forcing any additional memory copy as was already possible with the C pickler.
Diffstat (limited to 'Lib/pickle.py')
-rw-r--r--Lib/pickle.py50
1 files changed, 40 insertions, 10 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 350d4a46c0..301e8cf558 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -201,14 +201,24 @@ class _Framer:
if self.current_frame:
f = self.current_frame
if f.tell() >= self._FRAME_SIZE_TARGET or force:
- with f.getbuffer() as data:
- n = len(data)
- write = self.file_write
- write(FRAME)
- write(pack("<Q", n))
- write(data)
- f.seek(0)
- f.truncate()
+ data = f.getbuffer()
+ write = self.file_write
+ # Issue a single call to the write method of the underlying
+ # file object for the frame opcode with the size of the
+ # frame. The concatenation is expected to be less expensive
+ # than issuing an additional call to write.
+ write(FRAME + pack("<Q", len(data)))
+
+ # Issue a separate call to write to append the frame
+ # contents without concatenation to the above to avoid a
+ # memory copy.
+ write(data)
+
+ # Start the new frame with a new io.BytesIO instance so that
+ # the file object can have delayed access to the previous frame
+ # contents via an unreleased memoryview of the previous
+ # io.BytesIO instance.
+ self.current_frame = io.BytesIO()
def write(self, data):
if self.current_frame:
@@ -216,6 +226,21 @@ class _Framer:
else:
return self.file_write(data)
+ def write_large_bytes(self, header, payload):
+ write = self.file_write
+ if self.current_frame:
+ # Terminate the current frame and flush it to the file.
+ self.commit_frame(force=True)
+
+ # Perform direct write of the header and payload of the large binary
+ # object. Be careful not to concatenate the header and the payload
+ # prior to calling 'write' as we do not want to allocate a large
+ # temporary bytes object.
+ # We intentionally do not insert a protocol 4 frame opcode to make
+ # it possible to optimize file.read calls in the loader.
+ write(header)
+ write(payload)
+
class _Unframer:
@@ -379,6 +404,7 @@ class _Pickler:
raise TypeError("file must have a 'write' attribute")
self.framer = _Framer(self._file_write)
self.write = self.framer.write
+ self._write_large_bytes = self.framer.write_large_bytes
self.memo = {}
self.proto = int(protocol)
self.bin = protocol >= 1
@@ -699,7 +725,9 @@ class _Pickler:
if n <= 0xff:
self.write(SHORT_BINBYTES + pack("<B", n) + obj)
elif n > 0xffffffff and self.proto >= 4:
- self.write(BINBYTES8 + pack("<Q", n) + obj)
+ self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj)
+ elif n >= self.framer._FRAME_SIZE_TARGET:
+ self._write_large_bytes(BINBYTES + pack("<I", n), obj)
else:
self.write(BINBYTES + pack("<I", n) + obj)
self.memoize(obj)
@@ -712,7 +740,9 @@ class _Pickler:
if n <= 0xff and self.proto >= 4:
self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
elif n > 0xffffffff and self.proto >= 4:
- self.write(BINUNICODE8 + pack("<Q", n) + encoded)
+ self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded)
+ elif n >= self.framer._FRAME_SIZE_TARGET:
+ self._write_large_bytes(BINUNICODE + pack("<I", n), encoded)
else:
self.write(BINUNICODE + pack("<I", n) + encoded)
else: