From 0a2da50e1867831251fad75377d0f10713eb6301 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 11 Jan 2018 13:03:20 +0200 Subject: bpo-31993: Do not create frames for large bytes and str objects (#5114) when serialize into memory buffer with C pickle implementations. This optimization already is performed when serialize into memory with Python pickle implementations or into a file with both implementations. --- Modules/_pickle.c | 180 ++++++++++++++++++++++++++---------------------------- 1 file changed, 86 insertions(+), 94 deletions(-) (limited to 'Modules/_pickle.c') diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 5cb1fba6cc..5b3de4d96d 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2142,47 +2142,74 @@ done: return 0; } -/* No-copy code-path to write large contiguous data directly into the - underlying file object, bypassing the output_buffer of the Pickler. */ -static int -_Pickler_write_large_bytes( - PicklerObject *self, const char *header, Py_ssize_t header_size, - PyObject *payload) -{ - assert(self->output_buffer != NULL); - assert(self->write != NULL); - PyObject *result; +/* Perform direct write of the header and payload of the binary object. - /* Commit the previous frame. */ - if (_Pickler_CommitFrame(self)) { - return -1; + The large contiguous data is written directly into the underlying file + object, bypassing the output_buffer of the Pickler. We intentionally + do not insert a protocol 4 frame opcode to make it possible to optimize + file.read calls in the loader. + */ +static int +_Pickler_write_bytes(PicklerObject *self, + const char *header, Py_ssize_t header_size, + const char *data, Py_ssize_t data_size, + PyObject *payload) +{ + int bypass_buffer = (data_size >= FRAME_SIZE_TARGET); + int framing = self->framing; + + if (bypass_buffer) { + assert(self->output_buffer != NULL); + /* Commit the previous frame. */ + if (_Pickler_CommitFrame(self)) { + return -1; + } + /* Disable framing temporarily */ + self->framing = 0; } - /* Disable frameing temporarily */ - self->framing = 0; if (_Pickler_Write(self, header, header_size) < 0) { return -1; } - /* Dump the output buffer to the file. */ - if (_Pickler_FlushToFile(self) < 0) { - return -1; - } - /* Stream write the payload into the file without going through the - output buffer. */ - result = PyObject_CallFunctionObjArgs(self->write, payload, NULL); - if (result == NULL) { - return -1; - } - Py_DECREF(result); + if (bypass_buffer && self->write != NULL) { + /* Bypass the in-memory buffer to directly stream large data + into the underlying file object. */ + PyObject *result, *mem = NULL; + /* Dump the output buffer to the file. */ + if (_Pickler_FlushToFile(self) < 0) { + return -1; + } - /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */ - if (_Pickler_ClearBuffer(self) < 0) { - return -1; + /* Stream write the payload into the file without going through the + output buffer. */ + if (payload == NULL) { + payload = mem = PyMemoryView_FromMemory((char *) data, data_size, + PyBUF_READ); + if (payload == NULL) { + return -1; + } + } + result = PyObject_CallFunctionObjArgs(self->write, payload, NULL); + Py_XDECREF(mem); + if (result == NULL) { + return -1; + } + Py_DECREF(result); + + /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */ + if (_Pickler_ClearBuffer(self) < 0) { + return -1; + } + } + else { + if (_Pickler_Write(self, data, data_size) < 0) { + return -1; + } } /* Re-enable framing for subsequent calls to _Pickler_Write. */ - self->framing = 1; + self->framing = framing; return 0; } @@ -2265,20 +2292,10 @@ save_bytes(PicklerObject *self, PyObject *obj) return -1; /* string too large */ } - if (size < FRAME_SIZE_TARGET || self->write == NULL) { - if (_Pickler_Write(self, header, len) < 0) { - return -1; - } - if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0) { - return -1; - } - } - else { - /* Bypass the in-memory buffer to directly stream large data - into the underlying file object. */ - if (_Pickler_write_large_bytes(self, header, len, obj) < 0) { - return -1; - } + if (_Pickler_write_bytes(self, header, len, + PyBytes_AS_STRING(obj), size, obj) < 0) + { + return -1; } if (memo_put(self, obj) < 0) @@ -2360,11 +2377,29 @@ error: } static int -write_utf8(PicklerObject *self, const char *data, Py_ssize_t size) +write_unicode_binary(PicklerObject *self, PyObject *obj) { char header[9]; Py_ssize_t len; - PyObject *mem; + PyObject *encoded = NULL; + Py_ssize_t size; + const char *data; + + if (PyUnicode_READY(obj)) + return -1; + + data = PyUnicode_AsUTF8AndSize(obj, &size); + if (data == NULL) { + /* Issue #8383: for strings with lone surrogates, fallback on the + "surrogatepass" error handler. */ + PyErr_Clear(); + encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass"); + if (encoded == NULL) + return -1; + + data = PyBytes_AS_STRING(encoded); + size = PyBytes_GET_SIZE(encoded); + } assert(size >= 0); if (size <= 0xff && self->proto >= 4) { @@ -2388,61 +2423,18 @@ write_utf8(PicklerObject *self, const char *data, Py_ssize_t size) else { PyErr_SetString(PyExc_OverflowError, "cannot serialize a string larger than 4GiB"); + Py_XDECREF(encoded); return -1; } - if (size < FRAME_SIZE_TARGET || self->write == NULL) { - if (_Pickler_Write(self, header, len) < 0) { - return -1; - } - if (_Pickler_Write(self, data, size) < 0) { - return -1; - } - } - else { - /* Bypass the in-memory buffer to directly stream large data - into the underlying file object. */ - mem = PyMemoryView_FromMemory((char *) data, size, PyBUF_READ); - if (mem == NULL) { - return -1; - } - if (_Pickler_write_large_bytes(self, header, len, mem) < 0) { - Py_DECREF(mem); - return -1; - } - Py_DECREF(mem); + if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) { + Py_XDECREF(encoded); + return -1; } + Py_XDECREF(encoded); return 0; } -static int -write_unicode_binary(PicklerObject *self, PyObject *obj) -{ - PyObject *encoded = NULL; - Py_ssize_t size; - const char *data; - int r; - - if (PyUnicode_READY(obj)) - return -1; - - data = PyUnicode_AsUTF8AndSize(obj, &size); - if (data != NULL) - return write_utf8(self, data, size); - - /* Issue #8383: for strings with lone surrogates, fallback on the - "surrogatepass" error handler. */ - PyErr_Clear(); - encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass"); - if (encoded == NULL) - return -1; - - r = write_utf8(self, PyBytes_AS_STRING(encoded), - PyBytes_GET_SIZE(encoded)); - Py_DECREF(encoded); - return r; -} - static int save_unicode(PicklerObject *self, PyObject *obj) { -- cgit v1.2.1