diff options
Diffstat (limited to 'Objects/accu.c')
-rw-r--r-- | Objects/accu.c | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/Objects/accu.c b/Objects/accu.c new file mode 100644 index 0000000000..88e8f08f59 --- /dev/null +++ b/Objects/accu.c @@ -0,0 +1,114 @@ +/* Accumulator struct implementation */ + +#include "Python.h" + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return ''.join(lst) */ + PyObject *sep, *ret; + sep = PyUnicode_FromStringAndSize("", 0); + ret = PyUnicode_Join(sep, lst); + Py_DECREF(sep); + return ret; +} + +int +_PyAccu_Init(_PyAccu *acc) +{ + /* Lazily allocated */ + acc->large = NULL; + acc->small = PyList_New(0); + if (acc->small == NULL) + return -1; + return 0; +} + +static int +flush_accumulator(_PyAccu *acc) +{ + Py_ssize_t nsmall = PyList_GET_SIZE(acc->small); + if (nsmall) { + int ret; + PyObject *joined; + if (acc->large == NULL) { + acc->large = PyList_New(0); + if (acc->large == NULL) + return -1; + } + joined = join_list_unicode(acc->small); + if (joined == NULL) + return -1; + if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) { + Py_DECREF(joined); + return -1; + } + ret = PyList_Append(acc->large, joined); + Py_DECREF(joined); + return ret; + } + return 0; +} + +int +_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode) +{ + Py_ssize_t nsmall; + assert(PyUnicode_Check(unicode)); + + if (PyList_Append(acc->small, unicode)) + return -1; + nsmall = PyList_GET_SIZE(acc->small); + /* Each item in a list of unicode objects has an overhead (in 64-bit + * builds) of: + * - 8 bytes for the list slot + * - 56 bytes for the header of the unicode object + * that is, 64 bytes. 100000 such objects waste more than 6MB + * compared to a single concatenated string. + */ + if (nsmall < 100000) + return 0; + return flush_accumulator(acc); +} + +PyObject * +_PyAccu_FinishAsList(_PyAccu *acc) +{ + int ret; + PyObject *res; + + ret = flush_accumulator(acc); + Py_CLEAR(acc->small); + if (ret) { + Py_CLEAR(acc->large); + return NULL; + } + res = acc->large; + acc->large = NULL; + return res; +} + +PyObject * +_PyAccu_Finish(_PyAccu *acc) +{ + PyObject *list, *res; + if (acc->large == NULL) { + list = acc->small; + acc->small = NULL; + } + else { + list = _PyAccu_FinishAsList(acc); + if (!list) + return NULL; + } + res = join_list_unicode(list); + Py_DECREF(list); + return res; +} + +void +_PyAccu_Destroy(_PyAccu *acc) +{ + Py_CLEAR(acc->small); + Py_CLEAR(acc->large); +} |