diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2015-10-14 09:56:53 +0200 |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2015-10-14 09:56:53 +0200 |
commit | 772b2b09f279fdcb01bbd703735d35bd02dd8ec1 (patch) | |
tree | 06c585be7db2c586bb8e2a09c2e74a8b2188e841 | |
parent | 661aaccf9def380540cc1d440761159a414094d1 (diff) | |
download | cpython-git-772b2b09f279fdcb01bbd703735d35bd02dd8ec1.tar.gz |
Optimize bytearray % args
Issue #25399: Don't create temporary bytes objects: modify _PyBytes_Format() to
create work directly on bytearray objects.
* Rename _PyBytes_Format() to _PyBytes_FormatEx() just in case if something
outside CPython uses it
* _PyBytes_FormatEx() now uses (char*, Py_ssize_t) for the input string, so
bytearray_format() doesn't need tot create a temporary input bytes object
* Add use_bytearray parameter to _PyBytes_FormatEx() which is passed to
_PyBytesWriter, to create a bytearray buffer instead of a bytes buffer
Most formatting operations are now between 2.5 and 5 times faster.
-rw-r--r-- | Include/bytesobject.h | 6 | ||||
-rw-r--r-- | Objects/bytearrayobject.c | 22 | ||||
-rw-r--r-- | Objects/bytesobject.c | 41 |
3 files changed, 33 insertions, 36 deletions
diff --git a/Include/bytesobject.h b/Include/bytesobject.h index fbb63226f6..b5b37efd25 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -62,7 +62,11 @@ PyAPI_FUNC(void) PyBytes_Concat(PyObject **, PyObject *); PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *); #ifndef Py_LIMITED_API PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t); -PyAPI_FUNC(PyObject *) _PyBytes_Format(PyObject *, PyObject *); +PyAPI_FUNC(PyObject*) _PyBytes_FormatEx( + const char *format, + Py_ssize_t format_len, + PyObject *args, + int use_bytearray); #endif PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, const char *, Py_ssize_t, diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 5647b57a52..e535bce8d7 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -282,26 +282,14 @@ PyByteArray_Concat(PyObject *a, PyObject *b) static PyObject * bytearray_format(PyByteArrayObject *self, PyObject *args) { - PyObject *bytes_in, *bytes_out, *res; - char *bytestring; - - if (self == NULL || !PyByteArray_Check(self) || args == NULL) { + if (self == NULL || !PyByteArray_Check(self)) { PyErr_BadInternalCall(); return NULL; } - bytestring = PyByteArray_AS_STRING(self); - bytes_in = PyBytes_FromString(bytestring); - if (bytes_in == NULL) - return NULL; - bytes_out = _PyBytes_Format(bytes_in, args); - Py_DECREF(bytes_in); - if (bytes_out == NULL) - return NULL; - res = PyByteArray_FromObject(bytes_out); - Py_DECREF(bytes_out); - if (res == NULL) - return NULL; - return res; + + return _PyBytes_FormatEx(PyByteArray_AS_STRING(self), + PyByteArray_GET_SIZE(self), + args, 1); } /* Functions stuffed into the type object */ diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index a1f2958fb2..20b11fb375 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -568,28 +568,32 @@ format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen) /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */ PyObject * -_PyBytes_Format(PyObject *format, PyObject *args) +_PyBytes_FormatEx(const char *format, Py_ssize_t format_len, + PyObject *args, int use_bytearray) { - char *fmt, *res; + const char *fmt; + char *res; Py_ssize_t arglen, argidx; Py_ssize_t fmtcnt; int args_owned = 0; PyObject *dict = NULL; _PyBytesWriter writer; - if (format == NULL || !PyBytes_Check(format) || args == NULL) { + if (args == NULL) { PyErr_BadInternalCall(); return NULL; } - fmt = PyBytes_AS_STRING(format); - fmtcnt = PyBytes_GET_SIZE(format); + fmt = format; + fmtcnt = format_len; _PyBytesWriter_Init(&writer); + writer.use_bytearray = use_bytearray; res = _PyBytesWriter_Alloc(&writer, fmtcnt); if (res == NULL) return NULL; - writer.overallocate = 1; + if (!use_bytearray) + writer.overallocate = 1; if (PyTuple_Check(args)) { arglen = PyTuple_GET_SIZE(args); @@ -613,10 +617,8 @@ _PyBytes_Format(PyObject *format, PyObject *args) pos = strchr(fmt + 1, '%'); if (pos != NULL) len = pos - fmt; - else { - len = PyBytes_GET_SIZE(format); - len -= (fmt - PyBytes_AS_STRING(format)); - } + else + len = format_len - (fmt - format); assert(len != 0); Py_MEMCPY(res, fmt, len); @@ -644,7 +646,7 @@ _PyBytes_Format(PyObject *format, PyObject *args) fmt++; if (*fmt == '(') { - char *keystart; + const char *keystart; Py_ssize_t keylen; PyObject *key; int pcount = 1; @@ -924,8 +926,7 @@ _PyBytes_Format(PyObject *format, PyObject *args) "unsupported format character '%c' (0x%x) " "at index %zd", c, c, - (Py_ssize_t)(fmt - 1 - - PyBytes_AsString(format))); + (Py_ssize_t)(fmt - 1 - format)); goto error; } @@ -1028,7 +1029,7 @@ _PyBytes_Format(PyObject *format, PyObject *args) /* If overallocation was disabled, ensure that it was the last write. Otherwise, we missed an optimization */ - assert(writer.overallocate || fmtcnt < 0); + assert(writer.overallocate || fmtcnt < 0 || use_bytearray); } /* until end */ if (argidx < arglen && !dict) { @@ -3233,11 +3234,15 @@ bytes_methods[] = { }; static PyObject * -bytes_mod(PyObject *v, PyObject *w) +bytes_mod(PyObject *self, PyObject *args) { - if (!PyBytes_Check(v)) - Py_RETURN_NOTIMPLEMENTED; - return _PyBytes_Format(v, w); + if (self == NULL || !PyBytes_Check(self)) { + PyErr_BadInternalCall(); + return NULL; + } + + return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), + args, 0); } static PyNumberMethods bytes_as_number = { |