diff options
Diffstat (limited to 'Objects')
33 files changed, 1229 insertions, 672 deletions
diff --git a/Objects/abstract.c b/Objects/abstract.c index 562549876b..44ed5b3932 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -2669,6 +2669,32 @@ PyIter_Next(PyObject *iter) return result; } +PySendResult +PyIter_Send(PyObject *iter, PyObject *arg, PyObject **result) +{ + _Py_IDENTIFIER(send); + assert(arg != NULL); + assert(result != NULL); + if (PyType_HasFeature(Py_TYPE(iter), Py_TPFLAGS_HAVE_AM_SEND)) { + assert (Py_TYPE(iter)->tp_as_async != NULL); + assert (Py_TYPE(iter)->tp_as_async->am_send != NULL); + return Py_TYPE(iter)->tp_as_async->am_send(iter, arg, result); + } + if (arg == Py_None && PyIter_Check(iter)) { + *result = Py_TYPE(iter)->tp_iternext(iter); + } + else { + *result = _PyObject_CallMethodIdOneArg(iter, &PyId_send, arg); + } + if (*result != NULL) { + return PYGEN_NEXT; + } + if (_PyGen_FetchStopIterationValue(result) == 0) { + return PYGEN_RETURN; + } + return PYGEN_ERROR; +} + /* * Flatten a sequence of bytes() objects into a C array of * NULL terminated string pointers with a NULL char* terminating the array. diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 8b57fb679d..7cb2b1478c 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -13,10 +13,9 @@ class bytearray "PyByteArrayObject *" "&PyByteArray_Type" [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=5535b77c37a119e0]*/ +/* For PyByteArray_AS_STRING(). */ char _PyByteArray_empty_string[] = ""; -/* end nullbytes support */ - /* Helpers */ static int @@ -266,7 +265,7 @@ PyByteArray_Concat(PyObject *a, PyObject *b) result = (PyByteArrayObject *) \ PyByteArray_FromStringAndSize(NULL, va.len + vb.len); - // result->ob_bytes is NULL if result is an empty string: + // result->ob_bytes is NULL if result is an empty bytearray: // if va.len + vb.len equals zero. if (result != NULL && result->ob_bytes != NULL) { memcpy(result->ob_bytes, va.buf, va.len); @@ -1005,26 +1004,20 @@ bytearray_richcompare(PyObject *self, PyObject *other, int op) { Py_ssize_t self_size, other_size; Py_buffer self_bytes, other_bytes; - int cmp, rc; - - /* Bytes can be compared to anything that supports the (binary) - buffer API. Except that a comparison with Unicode is always an - error, even if the comparison is for equality. */ - rc = PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type); - if (!rc) - rc = PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type); - if (rc < 0) - return NULL; - if (rc) { - if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) { - if (PyErr_WarnEx(PyExc_BytesWarning, - "Comparison between bytearray and string", 1)) - return NULL; + int cmp; + + if (!PyObject_CheckBuffer(self) || !PyObject_CheckBuffer(other)) { + if (PyUnicode_Check(self) || PyUnicode_Check(other)) { + if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) { + if (PyErr_WarnEx(PyExc_BytesWarning, + "Comparison between bytearray and string", 1)) + return NULL; + } } - Py_RETURN_NOTIMPLEMENTED; } + /* Bytearrays can be compared to anything that supports the buffer API. */ if (PyObject_GetBuffer(self, &self_bytes, PyBUF_SIMPLE) != 0) { PyErr_Clear(); Py_RETURN_NOTIMPLEMENTED; @@ -1332,7 +1325,7 @@ bytearray_translate_impl(PyByteArrayObject *self, PyObject *table, if (trans_table[c] != -1) *output++ = (char)trans_table[c]; } - /* Fix the size of the resulting string */ + /* Fix the size of the resulting bytearray */ if (inlen > 0) if (PyByteArray_Resize(result, output - output_start) < 0) { Py_CLEAR(result); @@ -2087,7 +2080,7 @@ bytearray.hex How many bytes between separators. Positive values count from the right, negative values count from the left. -Create a str of hexadecimal numbers from a bytearray object. +Create a string of hexadecimal numbers from a bytearray object. Example: >>> value = bytearray([0xb9, 0x01, 0xef]) @@ -2103,7 +2096,7 @@ Example: static PyObject * bytearray_hex_impl(PyByteArrayObject *self, PyObject *sep, int bytes_per_sep) -/*[clinic end generated code: output=29c4e5ef72c565a0 input=814c15830ac8c4b5]*/ +/*[clinic end generated code: output=29c4e5ef72c565a0 input=808667e49bcccb54]*/ { char* argbuf = PyByteArray_AS_STRING(self); Py_ssize_t arglen = PyByteArray_GET_SIZE(self); @@ -2362,7 +2355,7 @@ PyTypeObject PyByteArray_Type = { PyObject_Del, /* tp_free */ }; -/*********************** Bytes Iterator ****************************/ +/*********************** Bytearray Iterator ****************************/ typedef struct { PyObject_HEAD diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 990730cd8c..ccabbdca1d 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -5,6 +5,7 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_bytes_methods.h" // _Py_bytes_startswith() +#include "pycore_format.h" // F_LJUST #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_object.h" // _PyObject_GC_TRACK #include "pycore_pymem.h" // PYMEM_CLEANBYTE @@ -21,11 +22,11 @@ class bytes "PyBytesObject *" "&PyBytes_Type" _Py_IDENTIFIER(__bytes__); -/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation - for a string of length n should request PyBytesObject_SIZE + n bytes. +/* PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation + for a bytes object of length n should request PyBytesObject_SIZE + n bytes. Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves - 3 bytes per string allocation on a typical system. + 3 or 7 bytes per bytes object allocation on a typical system. */ #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1) @@ -198,7 +199,7 @@ PyBytes_FromString(const char *str) } /* Inline PyObject_NewVar */ - op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size); + op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size); if (op == NULL) { return PyErr_NoMemory(); } @@ -439,19 +440,6 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) return NULL; } -/* Format codes - * F_LJUST '-' - * F_SIGN '+' - * F_BLANK ' ' - * F_ALT '#' - * F_ZERO '0' - */ -#define F_LJUST (1<<0) -#define F_SIGN (1<<1) -#define F_BLANK (1<<2) -#define F_ALT (1<<3) -#define F_ZERO (1<<4) - /* Returns a new reference to a PyBytes object, or NULL on failure. */ static char* @@ -1475,7 +1463,7 @@ bytes_repeat(PyBytesObject *a, Py_ssize_t n) "repeated bytes are too long"); return NULL; } - op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes); + op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + nbytes); if (op == NULL) { return PyErr_NoMemory(); } @@ -1538,36 +1526,19 @@ bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op) int c; Py_ssize_t len_a, len_b; Py_ssize_t min_len; - int rc; /* Make sure both arguments are strings. */ if (!(PyBytes_Check(a) && PyBytes_Check(b))) { if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) { - rc = PyObject_IsInstance((PyObject*)a, - (PyObject*)&PyUnicode_Type); - if (!rc) - rc = PyObject_IsInstance((PyObject*)b, - (PyObject*)&PyUnicode_Type); - if (rc < 0) - return NULL; - if (rc) { + if (PyUnicode_Check(a) || PyUnicode_Check(b)) { if (PyErr_WarnEx(PyExc_BytesWarning, "Comparison between bytes and string", 1)) return NULL; } - else { - rc = PyObject_IsInstance((PyObject*)a, - (PyObject*)&PyLong_Type); - if (!rc) - rc = PyObject_IsInstance((PyObject*)b, - (PyObject*)&PyLong_Type); - if (rc < 0) + if (PyLong_Check(a) || PyLong_Check(b)) { + if (PyErr_WarnEx(PyExc_BytesWarning, + "Comparison between bytes and int", 1)) return NULL; - if (rc) { - if (PyErr_WarnEx(PyExc_BytesWarning, - "Comparison between bytes and int", 1)) - return NULL; - } } } Py_RETURN_NOTIMPLEMENTED; @@ -1577,7 +1548,7 @@ bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op) case Py_EQ: case Py_LE: case Py_GE: - /* a string is equal to itself */ + /* a byte string is equal to itself */ Py_RETURN_TRUE; case Py_NE: case Py_LT: @@ -2166,7 +2137,7 @@ bytes_translate_impl(PyBytesObject *self, PyObject *table, Py_INCREF(input_obj); return input_obj; } - /* Fix the size of the resulting string */ + /* Fix the size of the resulting byte string */ if (inlen > 0) _PyBytes_Resize(&result, output - output_start); return result; @@ -2470,7 +2441,7 @@ bytes.hex How many bytes between separators. Positive values count from the right, negative values count from the left. -Create a str of hexadecimal numbers from a bytes object. +Create a string of hexadecimal numbers from a bytes object. Example: >>> value = b'\xb9\x01\xef' @@ -2486,7 +2457,7 @@ Example: static PyObject * bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep) -/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/ +/*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/ { const char *argbuf = PyBytes_AS_STRING(self); Py_ssize_t arglen = PyBytes_GET_SIZE(self); @@ -2788,7 +2759,7 @@ _PyBytes_FromIterator(PyObject *it, PyObject *x) Py_ssize_t i, size; _PyBytesWriter writer; - /* For iterator version, create a string object and resize as needed */ + /* For iterator version, create a bytes object and resize as needed */ size = PyObject_LengthHint(x, 64); if (size == -1 && PyErr_Occurred()) return NULL; @@ -3071,9 +3042,9 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) _Py_ForgetReference(v); #endif *pv = (PyObject *) - PyObject_REALLOC(v, PyBytesObject_SIZE + newsize); + PyObject_Realloc(v, PyBytesObject_SIZE + newsize); if (*pv == NULL) { - PyObject_Del(v); + PyObject_Free(v); PyErr_NoMemory(); return -1; } diff --git a/Objects/capsule.c b/Objects/capsule.c index ed24cc1d6a..800a6c4b25 100644 --- a/Objects/capsule.c +++ b/Objects/capsule.c @@ -198,7 +198,7 @@ PyCapsule_Import(const char *name, int no_block) void *return_value = NULL; char *trace; size_t name_length = (strlen(name) + 1) * sizeof(char); - char *name_dup = (char *)PyMem_MALLOC(name_length); + char *name_dup = (char *)PyMem_Malloc(name_length); if (!name_dup) { return PyErr_NoMemory(); @@ -247,7 +247,7 @@ PyCapsule_Import(const char *name, int no_block) EXIT: Py_XDECREF(object); if (name_dup) { - PyMem_FREE(name_dup); + PyMem_Free(name_dup); } return return_value; } @@ -260,7 +260,7 @@ capsule_dealloc(PyObject *o) if (capsule->destructor) { capsule->destructor(o); } - PyObject_DEL(o); + PyObject_Free(o); } diff --git a/Objects/clinic/bytearrayobject.c.h b/Objects/clinic/bytearrayobject.c.h index 3452b24174..1e3f197561 100644 --- a/Objects/clinic/bytearrayobject.c.h +++ b/Objects/clinic/bytearrayobject.c.h @@ -990,7 +990,7 @@ PyDoc_STRVAR(bytearray_hex__doc__, "hex($self, /, sep=<unrepresentable>, bytes_per_sep=1)\n" "--\n" "\n" -"Create a str of hexadecimal numbers from a bytearray object.\n" +"Create a string of hexadecimal numbers from a bytearray object.\n" "\n" " sep\n" " An optional single character or byte to separate hex bytes.\n" @@ -1120,4 +1120,4 @@ bytearray_sizeof(PyByteArrayObject *self, PyObject *Py_UNUSED(ignored)) { return bytearray_sizeof_impl(self); } -/*[clinic end generated code: output=47cd9ad3fdc3ac0c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a82659f581e55629 input=a9049054013a1b77]*/ diff --git a/Objects/clinic/bytesobject.c.h b/Objects/clinic/bytesobject.c.h index 27ac6b1067..9e365ce1a0 100644 --- a/Objects/clinic/bytesobject.c.h +++ b/Objects/clinic/bytesobject.c.h @@ -750,7 +750,7 @@ PyDoc_STRVAR(bytes_hex__doc__, "hex($self, /, sep=<unrepresentable>, bytes_per_sep=1)\n" "--\n" "\n" -"Create a str of hexadecimal numbers from a bytes object.\n" +"Create a string of hexadecimal numbers from a bytes object.\n" "\n" " sep\n" " An optional single character or byte to separate hex bytes.\n" @@ -878,4 +878,4 @@ skip_optional_pos: exit: return return_value; } -/*[clinic end generated code: output=6101b417d6a6a717 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b3f0ec2753246b9c input=a9049054013a1b77]*/ diff --git a/Objects/clinic/codeobject.c.h b/Objects/clinic/codeobject.c.h index c7395375e6..bae2ab0764 100644 --- a/Objects/clinic/codeobject.c.h +++ b/Objects/clinic/codeobject.c.h @@ -5,7 +5,7 @@ preserve PyDoc_STRVAR(code_new__doc__, "code(argcount, posonlyargcount, kwonlyargcount, nlocals, stacksize,\n" " flags, codestring, constants, names, varnames, filename, name,\n" -" firstlineno, lnotab, freevars=(), cellvars=(), /)\n" +" firstlineno, linetable, freevars=(), cellvars=(), /)\n" "--\n" "\n" "Create a code object. Not for the faint of heart."); @@ -15,7 +15,7 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount, int kwonlyargcount, int nlocals, int stacksize, int flags, PyObject *code, PyObject *consts, PyObject *names, PyObject *varnames, PyObject *filename, PyObject *name, - int firstlineno, PyObject *lnotab, PyObject *freevars, + int firstlineno, PyObject *linetable, PyObject *freevars, PyObject *cellvars); static PyObject * @@ -35,7 +35,7 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *filename; PyObject *name; int firstlineno; - PyObject *lnotab; + PyObject *linetable; PyObject *freevars = NULL; PyObject *cellvars = NULL; @@ -114,7 +114,7 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("code", "argument 14", "bytes", PyTuple_GET_ITEM(args, 13)); goto exit; } - lnotab = PyTuple_GET_ITEM(args, 13); + linetable = PyTuple_GET_ITEM(args, 13); if (PyTuple_GET_SIZE(args) < 15) { goto skip_optional; } @@ -132,7 +132,7 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) } cellvars = PyTuple_GET_ITEM(args, 15); skip_optional: - return_value = code_new_impl(type, argcount, posonlyargcount, kwonlyargcount, nlocals, stacksize, flags, code, consts, names, varnames, filename, name, firstlineno, lnotab, freevars, cellvars); + return_value = code_new_impl(type, argcount, posonlyargcount, kwonlyargcount, nlocals, stacksize, flags, code, consts, names, varnames, filename, name, firstlineno, linetable, freevars, cellvars); exit: return return_value; @@ -144,7 +144,7 @@ PyDoc_STRVAR(code_replace__doc__, " co_flags=-1, co_firstlineno=-1, co_code=None, co_consts=None,\n" " co_names=None, co_varnames=None, co_freevars=None,\n" " co_cellvars=None, co_filename=None, co_name=None,\n" -" co_lnotab=None)\n" +" co_linetable=None)\n" "--\n" "\n" "Return a copy of the code object with new values for the specified fields."); @@ -160,13 +160,13 @@ code_replace_impl(PyCodeObject *self, int co_argcount, PyObject *co_consts, PyObject *co_names, PyObject *co_varnames, PyObject *co_freevars, PyObject *co_cellvars, PyObject *co_filename, - PyObject *co_name, PyBytesObject *co_lnotab); + PyObject *co_name, PyBytesObject *co_linetable); static PyObject * code_replace(PyCodeObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; - static const char * const _keywords[] = {"co_argcount", "co_posonlyargcount", "co_kwonlyargcount", "co_nlocals", "co_stacksize", "co_flags", "co_firstlineno", "co_code", "co_consts", "co_names", "co_varnames", "co_freevars", "co_cellvars", "co_filename", "co_name", "co_lnotab", NULL}; + static const char * const _keywords[] = {"co_argcount", "co_posonlyargcount", "co_kwonlyargcount", "co_nlocals", "co_stacksize", "co_flags", "co_firstlineno", "co_code", "co_consts", "co_names", "co_varnames", "co_freevars", "co_cellvars", "co_filename", "co_name", "co_linetable", NULL}; static _PyArg_Parser _parser = {NULL, _keywords, "replace", 0}; PyObject *argsbuf[16]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; @@ -185,7 +185,7 @@ code_replace(PyCodeObject *self, PyObject *const *args, Py_ssize_t nargs, PyObje PyObject *co_cellvars = self->co_cellvars; PyObject *co_filename = self->co_filename; PyObject *co_name = self->co_name; - PyBytesObject *co_lnotab = (PyBytesObject *)self->co_lnotab; + PyBytesObject *co_linetable = (PyBytesObject *)self->co_linetable; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 0, 0, argsbuf); if (!args) { @@ -344,14 +344,14 @@ code_replace(PyCodeObject *self, PyObject *const *args, Py_ssize_t nargs, PyObje } } if (!PyBytes_Check(args[15])) { - _PyArg_BadArgument("replace", "argument 'co_lnotab'", "bytes", args[15]); + _PyArg_BadArgument("replace", "argument 'co_linetable'", "bytes", args[15]); goto exit; } - co_lnotab = (PyBytesObject *)args[15]; + co_linetable = (PyBytesObject *)args[15]; skip_optional_kwonly: - return_value = code_replace_impl(self, co_argcount, co_posonlyargcount, co_kwonlyargcount, co_nlocals, co_stacksize, co_flags, co_firstlineno, co_code, co_consts, co_names, co_varnames, co_freevars, co_cellvars, co_filename, co_name, co_lnotab); + return_value = code_replace_impl(self, co_argcount, co_posonlyargcount, co_kwonlyargcount, co_nlocals, co_stacksize, co_flags, co_firstlineno, co_code, co_consts, co_names, co_varnames, co_freevars, co_cellvars, co_filename, co_name, co_linetable); exit: return return_value; } -/*[clinic end generated code: output=18c31941ec09e9ca input=a9049054013a1b77]*/ +/*[clinic end generated code: output=e3091c7baaaaa420 input=a9049054013a1b77]*/ diff --git a/Objects/codeobject.c b/Objects/codeobject.c index c86d0e1f4a..0b0b8f98ae 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -119,7 +119,7 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount, PyObject *code, PyObject *consts, PyObject *names, PyObject *varnames, PyObject *freevars, PyObject *cellvars, PyObject *filename, PyObject *name, int firstlineno, - PyObject *lnotab) + PyObject *linetable) { PyCodeObject *co; Py_ssize_t *cell2arg = NULL; @@ -137,7 +137,7 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount, cellvars == NULL || !PyTuple_Check(cellvars) || name == NULL || !PyUnicode_Check(name) || filename == NULL || !PyUnicode_Check(filename) || - lnotab == NULL || !PyBytes_Check(lnotab)) { + linetable == NULL || !PyBytes_Check(linetable)) { PyErr_BadInternalCall(); return NULL; } @@ -213,7 +213,7 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount, PyObject *arg = PyTuple_GET_ITEM(varnames, j); int cmp = PyUnicode_Compare(cell, arg); if (cmp == -1 && PyErr_Occurred()) { - PyMem_FREE(cell2arg); + PyMem_Free(cell2arg); return NULL; } if (cmp == 0) { @@ -224,14 +224,14 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount, } } if (!used_cell2arg) { - PyMem_FREE(cell2arg); + PyMem_Free(cell2arg); cell2arg = NULL; } } co = PyObject_New(PyCodeObject, &PyCode_Type); if (co == NULL) { if (cell2arg) - PyMem_FREE(cell2arg); + PyMem_Free(cell2arg); return NULL; } co->co_argcount = argcount; @@ -258,8 +258,8 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount, Py_INCREF(name); co->co_name = name; co->co_firstlineno = firstlineno; - Py_INCREF(lnotab); - co->co_lnotab = lnotab; + Py_INCREF(linetable); + co->co_linetable = linetable; co->co_zombieframe = NULL; co->co_weakreflist = NULL; co->co_extra = NULL; @@ -277,12 +277,12 @@ PyCode_New(int argcount, int kwonlyargcount, PyObject *code, PyObject *consts, PyObject *names, PyObject *varnames, PyObject *freevars, PyObject *cellvars, PyObject *filename, PyObject *name, int firstlineno, - PyObject *lnotab) + PyObject *linetable) { return PyCode_NewWithPosOnlyArgs(argcount, 0, kwonlyargcount, nlocals, stacksize, flags, code, consts, names, varnames, freevars, cellvars, filename, - name, firstlineno, lnotab); + name, firstlineno, linetable); } int @@ -314,12 +314,12 @@ _PyCode_InitOpcache(PyCodeObject *co) if (opts) { co->co_opcache = (_PyOpcache *)PyMem_Calloc(opts, sizeof(_PyOpcache)); if (co->co_opcache == NULL) { - PyMem_FREE(co->co_opcache_map); + PyMem_Free(co->co_opcache_map); return -1; } } else { - PyMem_FREE(co->co_opcache_map); + PyMem_Free(co->co_opcache_map); co->co_opcache_map = NULL; co->co_opcache = NULL; } @@ -369,7 +369,7 @@ PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno) filename_ob, /* filename */ funcname_ob, /* name */ firstlineno, /* firstlineno */ - emptystring /* lnotab */ + emptystring /* linetable */ ); failed: @@ -395,11 +395,89 @@ static PyMemberDef code_memberlist[] = { {"co_cellvars", T_OBJECT, OFF(co_cellvars), READONLY}, {"co_filename", T_OBJECT, OFF(co_filename), READONLY}, {"co_name", T_OBJECT, OFF(co_name), READONLY}, - {"co_firstlineno", T_INT, OFF(co_firstlineno), READONLY}, - {"co_lnotab", T_OBJECT, OFF(co_lnotab), READONLY}, + {"co_firstlineno", T_INT, OFF(co_firstlineno), READONLY}, + {"co_linetable", T_OBJECT, OFF(co_linetable), READONLY}, {NULL} /* Sentinel */ }; +static int +emit_pair(PyObject **bytes, int *offset, int a, int b) +{ + Py_ssize_t len = PyBytes_GET_SIZE(*bytes); + if (*offset + 2 >= len) { + if (_PyBytes_Resize(bytes, len * 2) < 0) + return 0; + } + unsigned char *lnotab = (unsigned char *) + PyBytes_AS_STRING(*bytes) + *offset; + *lnotab++ = a; + *lnotab++ = b; + *offset += 2; + return 1; +} + +static int +emit_delta(PyObject **bytes, int bdelta, int ldelta, int *offset) +{ + while (bdelta > 255) { + if (!emit_pair(bytes, offset, 255, 0)) { + return 0; + } + bdelta -= 255; + } + while (ldelta > 127) { + if (!emit_pair(bytes, offset, bdelta, 127)) { + return 0; + } + bdelta = 0; + ldelta -= 127; + } + while (ldelta < -128) { + if (!emit_pair(bytes, offset, bdelta, -128)) { + return 0; + } + bdelta = 0; + ldelta += 128; + } + return emit_pair(bytes, offset, bdelta, ldelta); +} + +static PyObject * +code_getlnotab(PyCodeObject *code, void *closure) +{ + PyCodeAddressRange bounds; + PyObject *bytes; + int table_offset = 0; + int code_offset = 0; + int line = code->co_firstlineno; + bytes = PyBytes_FromStringAndSize(NULL, 64); + if (bytes == NULL) { + return NULL; + } + _PyCode_InitAddressRange(code, &bounds); + while (PyLineTable_NextAddressRange(&bounds)) { + if (bounds.ar_computed_line != line) { + int bdelta = bounds.ar_start - code_offset; + int ldelta = bounds.ar_computed_line - line; + if (!emit_delta(&bytes, bdelta, ldelta, &table_offset)) { + Py_DECREF(bytes); + return NULL; + } + code_offset = bounds.ar_start; + line = bounds.ar_computed_line; + } + } + _PyBytes_Resize(&bytes, table_offset); + return bytes; +} + + +static PyGetSetDef code_getsetlist[] = { + {"co_lnotab", (getter)code_getlnotab, NULL, NULL}, + {0} +}; + + /* Helper for code_new: return a shallow copy of a tuple that is guaranteed to contain exact strings, by converting string subclasses to exact strings and complaining if a non-string is found. */ @@ -459,7 +537,7 @@ code.__new__ as code_new filename: unicode name: unicode firstlineno: int - lnotab: object(subclass_of="&PyBytes_Type") + linetable: object(subclass_of="&PyBytes_Type") freevars: object(subclass_of="&PyTuple_Type", c_default="NULL") = () cellvars: object(subclass_of="&PyTuple_Type", c_default="NULL") = () / @@ -472,9 +550,9 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount, int kwonlyargcount, int nlocals, int stacksize, int flags, PyObject *code, PyObject *consts, PyObject *names, PyObject *varnames, PyObject *filename, PyObject *name, - int firstlineno, PyObject *lnotab, PyObject *freevars, + int firstlineno, PyObject *linetable, PyObject *freevars, PyObject *cellvars) -/*[clinic end generated code: output=612aac5395830184 input=85e678ea4178f234]*/ +/*[clinic end generated code: output=42c1839b082ba293 input=0ec80da632b99f57]*/ { PyObject *co = NULL; PyObject *ournames = NULL; @@ -540,7 +618,7 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount, code, consts, ournames, ourvarnames, ourfreevars, ourcellvars, filename, - name, firstlineno, lnotab); + name, firstlineno, linetable); cleanup: Py_XDECREF(ournames); Py_XDECREF(ourvarnames); @@ -553,10 +631,10 @@ static void code_dealloc(PyCodeObject *co) { if (co->co_opcache != NULL) { - PyMem_FREE(co->co_opcache); + PyMem_Free(co->co_opcache); } if (co->co_opcache_map != NULL) { - PyMem_FREE(co->co_opcache_map); + PyMem_Free(co->co_opcache_map); } co->co_opcache_flag = 0; co->co_opcache_size = 0; @@ -584,14 +662,14 @@ code_dealloc(PyCodeObject *co) Py_XDECREF(co->co_cellvars); Py_XDECREF(co->co_filename); Py_XDECREF(co->co_name); - Py_XDECREF(co->co_lnotab); + Py_XDECREF(co->co_linetable); if (co->co_cell2arg != NULL) - PyMem_FREE(co->co_cell2arg); + PyMem_Free(co->co_cell2arg); if (co->co_zombieframe != NULL) PyObject_GC_Del(co->co_zombieframe); if (co->co_weakreflist != NULL) PyObject_ClearWeakRefs((PyObject*)co); - PyObject_DEL(co); + PyObject_Free(co); } static PyObject * @@ -636,7 +714,7 @@ code.replace co_cellvars: object(subclass_of="&PyTuple_Type", c_default="self->co_cellvars") = None co_filename: unicode(c_default="self->co_filename") = None co_name: unicode(c_default="self->co_name") = None - co_lnotab: PyBytesObject(c_default="(PyBytesObject *)self->co_lnotab") = None + co_linetable: PyBytesObject(c_default="(PyBytesObject *)self->co_linetable") = None Return a copy of the code object with new values for the specified fields. [clinic start generated code]*/ @@ -649,8 +727,8 @@ code_replace_impl(PyCodeObject *self, int co_argcount, PyObject *co_consts, PyObject *co_names, PyObject *co_varnames, PyObject *co_freevars, PyObject *co_cellvars, PyObject *co_filename, - PyObject *co_name, PyBytesObject *co_lnotab) -/*[clinic end generated code: output=25c8e303913bcace input=d9051bc8f24e6b28]*/ + PyObject *co_name, PyBytesObject *co_linetable) +/*[clinic end generated code: output=50d77e668d3b449b input=a5f997b173d7f636]*/ { #define CHECK_INT_ARG(ARG) \ if (ARG < 0) { \ @@ -680,7 +758,7 @@ code_replace_impl(PyCodeObject *self, int co_argcount, co_argcount, co_posonlyargcount, co_kwonlyargcount, co_nlocals, co_stacksize, co_flags, (PyObject*)co_code, co_consts, co_names, co_varnames, co_freevars, co_cellvars, co_filename, co_name, - co_firstlineno, (PyObject*)co_lnotab); + co_firstlineno, (PyObject*)co_linetable); } static PyObject * @@ -933,10 +1011,189 @@ code_hash(PyCodeObject *co) return h; } +typedef struct { + PyObject_HEAD + PyCodeObject *li_code; + PyCodeAddressRange li_line; + char *li_end; +} lineiterator; + + +static void +lineiter_dealloc(lineiterator *li) +{ + Py_DECREF(li->li_code); + Py_TYPE(li)->tp_free(li); +} + +static PyObject * +lineiter_next(lineiterator *li) +{ + PyCodeAddressRange *bounds = &li->li_line; + if (!PyLineTable_NextAddressRange(bounds)) { + return NULL; + } + PyObject *start = NULL; + PyObject *end = NULL; + PyObject *line = NULL; + PyObject *result = PyTuple_New(3); + start = PyLong_FromLong(bounds->ar_start); + end = PyLong_FromLong(bounds->ar_end); + if (bounds->ar_line < 0) { + Py_INCREF(Py_None); + line = Py_None; + } + else { + line = PyLong_FromLong(bounds->ar_line); + } + if (result == NULL || start == NULL || end == NULL || line == NULL) { + goto error; + } + PyTuple_SET_ITEM(result, 0, start); + PyTuple_SET_ITEM(result, 1, end); + PyTuple_SET_ITEM(result, 2, line); + return result; +error: + Py_XDECREF(start); + Py_XDECREF(end); + Py_XDECREF(line); + Py_XDECREF(result); + return result; +} + +static PyTypeObject LineIterator = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "line_iterator", /* tp_name */ + sizeof(lineiterator), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)lineiter_dealloc, /* tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)lineiter_next, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + PyObject_Del, /* tp_free */ +}; + +static PyObject * +code_linesiterator(PyCodeObject *code, PyObject *Py_UNUSED(args)) +{ + lineiterator *li = (lineiterator *)PyType_GenericAlloc(&LineIterator, 0); + if (li == NULL) { + return NULL; + } + Py_INCREF(code); + li->li_code = code; + _PyCode_InitAddressRange(code, &li->li_line); + return (PyObject *)li; +} + +static void +retreat(PyCodeAddressRange *bounds) +{ + int ldelta = ((signed char *)bounds->lo_next)[-1]; + if (ldelta == -128) { + ldelta = 0; + } + bounds->ar_computed_line -= ldelta; + bounds->lo_next -= 2; + bounds->ar_end = bounds->ar_start; + bounds->ar_start -= ((unsigned char *)bounds->lo_next)[-2]; + ldelta = ((signed char *)bounds->lo_next)[-1]; + if (ldelta == -128) { + bounds->ar_line = -1; + } + else { + bounds->ar_line = bounds->ar_computed_line; + } +} + +static void +advance(PyCodeAddressRange *bounds) +{ + bounds->ar_start = bounds->ar_end; + int delta = ((unsigned char *)bounds->lo_next)[0]; + assert (delta < 255); + bounds->ar_end += delta; + int ldelta = ((signed char *)bounds->lo_next)[1]; + bounds->lo_next += 2; + if (ldelta == -128) { + bounds->ar_line = -1; + } + else { + bounds->ar_computed_line += ldelta; + bounds->ar_line = bounds->ar_computed_line; + } +} + +static inline int +at_end(PyCodeAddressRange *bounds) { + return ((unsigned char *)bounds->lo_next)[0] == 255; +} + +int +PyLineTable_PreviousAddressRange(PyCodeAddressRange *range) +{ + if (range->ar_start <= 0) { + return 0; + } + retreat(range); + while (range->ar_start == range->ar_end) { + assert(range->ar_start > 0); + retreat(range); + } + return 1; +} + +int +PyLineTable_NextAddressRange(PyCodeAddressRange *range) +{ + if (at_end(range)) { + return 0; + } + advance(range); + while (range->ar_start == range->ar_end) { + assert(!at_end(range)); + advance(range); + } + return 1; +} + + /* XXX code objects need to participate in GC? */ static struct PyMethodDef code_methods[] = { {"__sizeof__", (PyCFunction)code_sizeof, METH_NOARGS}, + {"co_lines", (PyCFunction)code_linesiterator, METH_NOARGS}, CODE_REPLACE_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -971,7 +1228,7 @@ PyTypeObject PyCode_Type = { 0, /* tp_iternext */ code_methods, /* tp_methods */ code_memberlist, /* tp_members */ - 0, /* tp_getset */ + code_getsetlist, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ @@ -982,78 +1239,55 @@ PyTypeObject PyCode_Type = { code_new, /* tp_new */ }; -/* Use co_lnotab to compute the line number from a bytecode index, addrq. See +/* Use co_linetable to compute the line number from a bytecode index, addrq. See lnotab_notes.txt for the details of the lnotab representation. */ int PyCode_Addr2Line(PyCodeObject *co, int addrq) { - Py_ssize_t size = PyBytes_Size(co->co_lnotab) / 2; - unsigned char *p = (unsigned char*)PyBytes_AsString(co->co_lnotab); - int line = co->co_firstlineno; - int addr = 0; - while (--size >= 0) { - addr += *p++; - if (addr > addrq) - break; - line += (signed char)*p; - p++; - } - return line; + if (addrq == -1) { + return co->co_firstlineno; + } + assert(addrq >= 0 && addrq < PyBytes_GET_SIZE(co->co_code)); + PyCodeAddressRange bounds; + _PyCode_InitAddressRange(co, &bounds); + return _PyCode_CheckLineNumber(addrq, &bounds); +} + +void +PyLineTable_InitAddressRange(char *linetable, int firstlineno, PyCodeAddressRange *range) +{ + range->lo_next = linetable; + range->ar_start = -1; + range->ar_end = 0; + range->ar_computed_line = range->ar_line = firstlineno; +} + +int +_PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds) +{ + char *linetable = PyBytes_AS_STRING(co->co_linetable); + PyLineTable_InitAddressRange(linetable, co->co_firstlineno, bounds); + return bounds->ar_line; } /* Update *bounds to describe the first and one-past-the-last instructions in - the same line as lasti. Return the number of that line. */ + the same line as lasti. Return the number of that line, or -1 if lasti is out of bounds. */ int -_PyCode_CheckLineNumber(PyCodeObject* co, int lasti, PyAddrPair *bounds) +_PyCode_CheckLineNumber(int lasti, PyCodeAddressRange *bounds) { - Py_ssize_t size; - int addr, line; - unsigned char* p; - - p = (unsigned char*)PyBytes_AS_STRING(co->co_lnotab); - size = PyBytes_GET_SIZE(co->co_lnotab) / 2; - - addr = 0; - line = co->co_firstlineno; - assert(line > 0); - - /* possible optimization: if f->f_lasti == instr_ub - (likely to be a common case) then we already know - instr_lb -- if we stored the matching value of p - somewhere we could skip the first while loop. */ - - /* See lnotab_notes.txt for the description of - co_lnotab. A point to remember: increments to p - come in (addr, line) pairs. */ - - bounds->ap_lower = 0; - while (size > 0) { - if (addr + *p > lasti) - break; - addr += *p++; - if ((signed char)*p) - bounds->ap_lower = addr; - line += (signed char)*p; - p++; - --size; - } - - if (size > 0) { - while (--size >= 0) { - addr += *p++; - if ((signed char)*p) - break; - p++; + while (bounds->ar_end <= lasti) { + if (!PyLineTable_NextAddressRange(bounds)) { + return -1; } - bounds->ap_upper = addr; } - else { - bounds->ap_upper = INT_MAX; + while (bounds->ar_start > lasti) { + if (!PyLineTable_PreviousAddressRange(bounds)) { + return -1; + } } - - return line; + return bounds->ar_line; } diff --git a/Objects/complexobject.c b/Objects/complexobject.c index a481d9ad8b..a65ebdfa6c 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -233,7 +233,7 @@ PyObject * PyComplex_FromCComplex(Py_complex cval) { /* Inline PyObject_New */ - PyComplexObject *op = PyObject_MALLOC(sizeof(PyComplexObject)); + PyComplexObject *op = PyObject_Malloc(sizeof(PyComplexObject)); if (op == NULL) { return PyErr_NoMemory(); } diff --git a/Objects/dictobject.c b/Objects/dictobject.c index faa8696153..35e881fe27 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -269,7 +269,7 @@ _PyDict_ClearFreeList(PyThreadState *tstate) PyObject_GC_Del(op); } while (state->keys_numfree) { - PyObject_FREE(state->keys_free_list[--state->keys_numfree]); + PyObject_Free(state->keys_free_list[--state->keys_numfree]); } } @@ -597,7 +597,7 @@ new_keys_object(Py_ssize_t size) } else { - dk = PyObject_MALLOC(sizeof(PyDictKeysObject) + dk = PyObject_Malloc(sizeof(PyDictKeysObject) + es * size + sizeof(PyDictKeyEntry) * usable); if (dk == NULL) { @@ -636,11 +636,11 @@ free_keys_object(PyDictKeysObject *keys) state->keys_free_list[state->keys_numfree++] = keys; return; } - PyObject_FREE(keys); + PyObject_Free(keys); } #define new_values(size) PyMem_NEW(PyObject *, size) -#define free_values(values) PyMem_FREE(values) +#define free_values(values) PyMem_Free(values) /* Consumes a reference to the keys object */ static PyObject * @@ -1303,7 +1303,7 @@ dictresize(PyDictObject *mp, Py_ssize_t newsize) state->keys_free_list[state->keys_numfree++] = oldkeys; } else { - PyObject_FREE(oldkeys); + PyObject_Free(oldkeys); } } @@ -3989,6 +3989,11 @@ dictiter_iternextitem(dictiterobject *di) Py_INCREF(result); Py_DECREF(oldkey); Py_DECREF(oldvalue); + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(2); @@ -4104,6 +4109,11 @@ dictreviter_iternext(dictiterobject *di) Py_INCREF(result); Py_DECREF(oldkey); Py_DECREF(oldvalue); + // bpo-42536: The GC may have untracked this result tuple. Since + // we're recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(2); diff --git a/Objects/enumobject.c b/Objects/enumobject.c index 8b5e7d3a3c..98ece3f13f 100644 --- a/Objects/enumobject.c +++ b/Objects/enumobject.c @@ -2,6 +2,7 @@ #include "Python.h" #include "pycore_long.h" // _PyLong_GetOne() +#include "pycore_object.h" // _PyObject_GC_TRACK() #include "clinic/enumobject.c.h" @@ -131,6 +132,11 @@ enum_next_long(enumobject *en, PyObject* next_item) PyTuple_SET_ITEM(result, 1, next_item); Py_DECREF(old_index); Py_DECREF(old_item); + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } return result; } result = PyTuple_New(2); @@ -176,6 +182,11 @@ enum_next(enumobject *en) PyTuple_SET_ITEM(result, 1, next_item); Py_DECREF(old_index); Py_DECREF(old_item); + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } return result; } result = PyTuple_New(2); diff --git a/Objects/exceptions.c b/Objects/exceptions.c index b14da20db0..d4824938a0 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2547,8 +2547,10 @@ _PyExc_Init(PyThreadState *tstate) do { \ PyObject *_code = PyLong_FromLong(CODE); \ assert(_PyObject_RealIsSubclass(PyExc_ ## TYPE, PyExc_OSError)); \ - if (!_code || PyDict_SetItem(state->errnomap, _code, PyExc_ ## TYPE)) \ + if (!_code || PyDict_SetItem(state->errnomap, _code, PyExc_ ## TYPE)) { \ + Py_XDECREF(_code); \ return _PyStatus_ERR("errmap insertion problem."); \ + } \ Py_DECREF(_code); \ } while (0) diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 1550b2eedc..34fb57a946 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -237,7 +237,7 @@ float_dealloc(PyFloatObject *op) assert(state->numfree != -1); #endif if (state->numfree >= PyFloat_MAXFREELIST) { - PyObject_FREE(op); + PyObject_Free(op); return; } state->numfree++; @@ -2032,7 +2032,7 @@ _PyFloat_ClearFreeList(PyThreadState *tstate) PyFloatObject *f = state->free_list; while (f != NULL) { PyFloatObject *next = (PyFloatObject*) Py_TYPE(f); - PyObject_FREE(f); + PyObject_Free(f); f = next; } state->free_list = NULL; diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 8838b80746..787cd8b272 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -249,36 +249,22 @@ explain_incompatible_block_stack(int64_t to_stack) static int * marklines(PyCodeObject *code, int len) { + PyCodeAddressRange bounds; + _PyCode_InitAddressRange(code, &bounds); + assert (bounds.ar_end == 0); + int *linestarts = PyMem_New(int, len); if (linestarts == NULL) { return NULL; } - Py_ssize_t size = PyBytes_GET_SIZE(code->co_lnotab) / 2; - unsigned char *p = (unsigned char*)PyBytes_AS_STRING(code->co_lnotab); - int line = code->co_firstlineno; - int addr = 0; - int index = 0; - while (--size >= 0) { - addr += *p++; - if (index*2 < addr) { - linestarts[index++] = line; - } - while (index*2 < addr) { - linestarts[index++] = -1; - if (index >= len) { - break; - } - } - line += (signed char)*p; - p++; - } - if (index < len) { - linestarts[index++] = line; + for (int i = 0; i < len; i++) { + linestarts[i] = -1; } - while (index < len) { - linestarts[index++] = -1; + + while (PyLineTable_NextAddressRange(&bounds)) { + assert(bounds.ar_start/2 < len); + linestarts[bounds.ar_start/2] = bounds.ar_line; } - assert(index == len); return linestarts; } @@ -925,7 +911,7 @@ _PyFrame_New_NoTrack(PyThreadState *tstate, PyCodeObject *code, } f->f_lasti = -1; - f->f_lineno = code->co_firstlineno; + f->f_lineno = 0; f->f_iblock = 0; f->f_state = FRAME_CREATED; f->f_gen = NULL; diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 9b4302a13c..e7961b3e6e 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -424,6 +424,25 @@ func_get_annotations(PyFunctionObject *op, void *Py_UNUSED(ignored)) if (op->func_annotations == NULL) return NULL; } + if (PyTuple_CheckExact(op->func_annotations)) { + PyObject *ann_tuple = op->func_annotations; + PyObject *ann_dict = PyDict_New(); + if (ann_dict == NULL) { + return NULL; + } + + assert(PyTuple_GET_SIZE(ann_tuple) % 2 == 0); + + for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(ann_tuple); i += 2) { + int err = PyDict_SetItem(ann_dict, + PyTuple_GET_ITEM(ann_tuple, i), + PyTuple_GET_ITEM(ann_tuple, i + 1)); + + if (err < 0) + return NULL; + } + Py_SETREF(op->func_annotations, ann_dict); + } Py_INCREF(op->func_annotations); return op->func_annotations; } diff --git a/Objects/genericaliasobject.c b/Objects/genericaliasobject.c index 6508c69cbf..4cc82ffcdf 100644 --- a/Objects/genericaliasobject.c +++ b/Objects/genericaliasobject.c @@ -2,6 +2,7 @@ #include "Python.h" #include "pycore_object.h" +#include "pycore_unionobject.h" // _Py_union_as_number #include "structmember.h" // PyMemberDef typedef struct { @@ -9,6 +10,7 @@ typedef struct { PyObject *origin; PyObject *args; PyObject *parameters; + PyObject* weakreflist; } gaobject; static void @@ -17,6 +19,9 @@ ga_dealloc(PyObject *self) gaobject *alias = (gaobject *)self; _PyObject_GC_UNTRACK(self); + if (alias->weakreflist != NULL) { + PyObject_ClearWeakRefs((PyObject *)alias); + } Py_XDECREF(alias->origin); Py_XDECREF(alias->args); Py_XDECREF(alias->parameters); @@ -151,13 +156,24 @@ error: return NULL; } -// isinstance(obj, TypeVar) without importing typing.py. -// Returns -1 for errors. -static int -is_typevar(PyObject *obj) +/* Checks if a variable number of names are from typing.py. +* If any one of the names are found, return 1, else 0. +**/ +static inline int +is_typing_name(PyObject *obj, int num, ...) { + va_list names; + va_start(names, num); + PyTypeObject *type = Py_TYPE(obj); - if (strcmp(type->tp_name, "TypeVar") != 0) { + int hit = 0; + for (int i = 0; i < num; ++i) { + if (!strcmp(type->tp_name, va_arg(names, const char *))) { + hit = 1; + break; + } + } + if (!hit) { return 0; } PyObject *module = PyObject_GetAttrString((PyObject *)type, "__module__"); @@ -167,9 +183,25 @@ is_typevar(PyObject *obj) int res = PyUnicode_Check(module) && _PyUnicode_EqualToASCIIString(module, "typing"); Py_DECREF(module); + + va_end(names); return res; } +// isinstance(obj, (TypeVar, ParamSpec)) without importing typing.py. +// Returns -1 for errors. +static inline int +is_typevarlike(PyObject *obj) +{ + return is_typing_name(obj, 2, "TypeVar", "ParamSpec"); +} + +static inline int +is_paramspec(PyObject *obj) +{ + return is_typing_name(obj, 1, "ParamSpec"); +} + // Index of item in self[:len], or -1 if not found (self is a tuple) static Py_ssize_t tuple_index(PyObject *self, Py_ssize_t len, PyObject *item) @@ -204,7 +236,7 @@ make_parameters(PyObject *args) Py_ssize_t iparam = 0; for (Py_ssize_t iarg = 0; iarg < nargs; iarg++) { PyObject *t = PyTuple_GET_ITEM(args, iarg); - int typevar = is_typevar(t); + int typevar = is_typevarlike(t); if (typevar < 0) { Py_DECREF(parameters); return NULL; @@ -274,7 +306,14 @@ subs_tvars(PyObject *obj, PyObject *params, PyObject **argitems) if (iparam >= 0) { arg = argitems[iparam]; } - Py_INCREF(arg); + // convert all the lists inside args to tuples to help + // with caching in other libaries + if (PyList_CheckExact(arg)) { + arg = PyList_AsTuple(arg); + } + else { + Py_INCREF(arg); + } PyTuple_SET_ITEM(subargs, i, arg); } @@ -309,11 +348,19 @@ ga_getitem(PyObject *self, PyObject *item) int is_tuple = PyTuple_Check(item); Py_ssize_t nitems = is_tuple ? PyTuple_GET_SIZE(item) : 1; PyObject **argitems = is_tuple ? &PyTuple_GET_ITEM(item, 0) : &item; - if (nitems != nparams) { - return PyErr_Format(PyExc_TypeError, - "Too %s arguments for %R", - nitems > nparams ? "many" : "few", - self); + // A special case in PEP 612 where if X = Callable[P, int], + // then X[int, str] == X[[int, str]]. + if (nparams == 1 && nitems > 1 && is_tuple && + is_paramspec(PyTuple_GET_ITEM(alias->parameters, 0))) { + argitems = &item; + } + else { + if (nitems != nparams) { + return PyErr_Format(PyExc_TypeError, + "Too %s arguments for %R", + nitems > nparams ? "many" : "few", + self); + } } /* Replace all type variables (specified by alias->parameters) with corresponding values specified by argitems. @@ -328,7 +375,7 @@ ga_getitem(PyObject *self, PyObject *item) } for (Py_ssize_t iarg = 0; iarg < nargs; iarg++) { PyObject *arg = PyTuple_GET_ITEM(alias->args, iarg); - int typevar = is_typevar(arg); + int typevar = is_typevarlike(arg); if (typevar < 0) { Py_DECREF(newargs); return NULL; @@ -337,7 +384,13 @@ ga_getitem(PyObject *self, PyObject *item) Py_ssize_t iparam = tuple_index(alias->parameters, nparams, arg); assert(iparam >= 0); arg = argitems[iparam]; - Py_INCREF(arg); + // convert lists to tuples to help with caching in other libaries. + if (PyList_CheckExact(arg)) { + arg = PyList_AsTuple(arg); + } + else { + Py_INCREF(arg); + } } else { arg = subs_tvars(arg, alias->parameters, argitems); @@ -424,8 +477,8 @@ ga_getattro(PyObject *self, PyObject *name) static PyObject * ga_richcompare(PyObject *a, PyObject *b, int op) { - if (!Py_IS_TYPE(a, &Py_GenericAliasType) || - !Py_IS_TYPE(b, &Py_GenericAliasType) || + if (!PyObject_TypeCheck(a, &Py_GenericAliasType) || + !PyObject_TypeCheck(b, &Py_GenericAliasType) || (op != Py_EQ && op != Py_NE)) { Py_RETURN_NOTIMPLEMENTED; @@ -559,10 +612,33 @@ static PyGetSetDef ga_properties[] = { {0} }; +/* A helper function to create GenericAlias' args tuple and set its attributes. + * Returns 1 on success, 0 on failure. + */ +static inline int +setup_ga(gaobject *alias, PyObject *origin, PyObject *args) { + if (!PyTuple_Check(args)) { + args = PyTuple_Pack(1, args); + if (args == NULL) { + return 0; + } + } + else { + Py_INCREF(args); + } + + Py_INCREF(origin); + alias->origin = origin; + alias->args = args; + alias->parameters = NULL; + alias->weakreflist = NULL; + return 1; +} + static PyObject * ga_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - if (!_PyArg_NoKwnames("GenericAlias", kwds)) { + if (!_PyArg_NoKeywords("GenericAlias", kwds)) { return NULL; } if (!_PyArg_CheckPositional("GenericAlias", PyTuple_GET_SIZE(args), 2, 2)) { @@ -570,9 +646,21 @@ ga_new(PyTypeObject *type, PyObject *args, PyObject *kwds) } PyObject *origin = PyTuple_GET_ITEM(args, 0); PyObject *arguments = PyTuple_GET_ITEM(args, 1); - return Py_GenericAlias(origin, arguments); + gaobject *self = (gaobject *)type->tp_alloc(type, 0); + if (self == NULL) { + return NULL; + } + if (!setup_ga(self, origin, arguments)) { + type->tp_free((PyObject *)self); + return NULL; + } + return (PyObject *)self; } +static PyNumberMethods ga_as_number = { + .nb_or = (binaryfunc)_Py_union_type_or, // Add __or__ function +}; + // TODO: // - argument clinic? // - __doc__? @@ -586,13 +674,15 @@ PyTypeObject Py_GenericAliasType = { .tp_basicsize = sizeof(gaobject), .tp_dealloc = ga_dealloc, .tp_repr = ga_repr, + .tp_as_number = &ga_as_number, // allow X | Y of GenericAlias objs .tp_as_mapping = &ga_as_mapping, .tp_hash = ga_hash, .tp_call = ga_call, .tp_getattro = ga_getattro, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, .tp_traverse = ga_traverse, .tp_richcompare = ga_richcompare, + .tp_weaklistoffset = offsetof(gaobject, weakreflist), .tp_methods = ga_methods, .tp_members = ga_members, .tp_alloc = PyType_GenericAlloc, @@ -604,26 +694,14 @@ PyTypeObject Py_GenericAliasType = { PyObject * Py_GenericAlias(PyObject *origin, PyObject *args) { - if (!PyTuple_Check(args)) { - args = PyTuple_Pack(1, args); - if (args == NULL) { - return NULL; - } - } - else { - Py_INCREF(args); - } - gaobject *alias = PyObject_GC_New(gaobject, &Py_GenericAliasType); if (alias == NULL) { - Py_DECREF(args); return NULL; } - - Py_INCREF(origin); - alias->origin = origin; - alias->args = args; - alias->parameters = NULL; + if (!setup_ga(alias, origin, args)) { + PyObject_GC_Del((PyObject *)alias); + return NULL; + } _PyObject_GC_TRACK(alias); return (PyObject *)alias; } diff --git a/Objects/genobject.c b/Objects/genobject.c index c1b26e9da3..bde92b462d 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -268,30 +268,10 @@ gen_send_ex2(PyGenObject *gen, PyObject *arg, PyObject **presult, return result ? PYGEN_RETURN : PYGEN_ERROR; } -PySendResult -PyIter_Send(PyObject *iter, PyObject *arg, PyObject **result) +static PySendResult +PyGen_am_send(PyGenObject *gen, PyObject *arg, PyObject **result) { - _Py_IDENTIFIER(send); - assert(arg != NULL); - assert(result != NULL); - - if (PyGen_CheckExact(iter) || PyCoro_CheckExact(iter)) { - return gen_send_ex2((PyGenObject *)iter, arg, result, 0, 0); - } - - if (arg == Py_None && PyIter_Check(iter)) { - *result = Py_TYPE(iter)->tp_iternext(iter); - } - else { - *result = _PyObject_CallMethodIdOneArg(iter, &PyId_send, arg); - } - if (*result != NULL) { - return PYGEN_NEXT; - } - if (_PyGen_FetchStopIterationValue(result) == 0) { - return PYGEN_RETURN; - } - return PYGEN_ERROR; + return gen_send_ex2(gen, arg, result, 0, 0); } static PyObject * @@ -788,6 +768,14 @@ static PyMethodDef gen_methods[] = { {NULL, NULL} /* Sentinel */ }; +static PyAsyncMethods gen_as_async = { + 0, /* am_await */ + 0, /* am_aiter */ + 0, /* am_anext */ + (sendfunc)PyGen_am_send, /* am_send */ +}; + + PyTypeObject PyGen_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "generator", /* tp_name */ @@ -798,7 +786,7 @@ PyTypeObject PyGen_Type = { 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ - 0, /* tp_as_async */ + &gen_as_async, /* tp_as_async */ (reprfunc)gen_repr, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ @@ -809,7 +797,8 @@ PyTypeObject PyGen_Type = { PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_HAVE_AM_SEND, /* tp_flags */ 0, /* tp_doc */ (traverseproc)gen_traverse, /* tp_traverse */ 0, /* tp_clear */ @@ -1031,7 +1020,8 @@ static PyMethodDef coro_methods[] = { static PyAsyncMethods coro_as_async = { (unaryfunc)coro_await, /* am_await */ 0, /* am_aiter */ - 0 /* am_anext */ + 0, /* am_anext */ + (sendfunc)PyGen_am_send, /* am_send */ }; PyTypeObject PyCoro_Type = { @@ -1055,7 +1045,8 @@ PyTypeObject PyCoro_Type = { PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_HAVE_AM_SEND, /* tp_flags */ 0, /* tp_doc */ (traverseproc)gen_traverse, /* tp_traverse */ 0, /* tp_clear */ @@ -1413,7 +1404,8 @@ static PyMethodDef async_gen_methods[] = { static PyAsyncMethods async_gen_as_async = { 0, /* am_await */ PyObject_SelfIter, /* am_aiter */ - (unaryfunc)async_gen_anext /* am_anext */ + (unaryfunc)async_gen_anext, /* am_anext */ + (sendfunc)PyGen_am_send, /* am_send */ }; @@ -1438,7 +1430,8 @@ PyTypeObject PyAsyncGen_Type = { PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_HAVE_AM_SEND, /* tp_flags */ 0, /* tp_doc */ (traverseproc)async_gen_traverse, /* tp_traverse */ 0, /* tp_clear */ @@ -1676,7 +1669,8 @@ static PyMethodDef async_gen_asend_methods[] = { static PyAsyncMethods async_gen_asend_as_async = { PyObject_SelfIter, /* am_await */ 0, /* am_aiter */ - 0 /* am_anext */ + 0, /* am_anext */ + 0, /* am_send */ }; @@ -2084,7 +2078,8 @@ static PyMethodDef async_gen_athrow_methods[] = { static PyAsyncMethods async_gen_athrow_as_async = { PyObject_SelfIter, /* am_await */ 0, /* am_aiter */ - 0 /* am_anext */ + 0, /* am_anext */ + 0, /* am_send */ }; diff --git a/Objects/listobject.c b/Objects/listobject.c index aac87ea1b6..ca9df599a0 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -341,7 +341,7 @@ list_dealloc(PyListObject *op) while (--i >= 0) { Py_XDECREF(op->ob_item[i]); } - PyMem_FREE(op->ob_item); + PyMem_Free(op->ob_item); } struct _Py_list_state *state = get_list_state(); #ifdef Py_DEBUG @@ -592,7 +592,7 @@ _list_clear(PyListObject *a) while (--i >= 0) { Py_XDECREF(item[i]); } - PyMem_FREE(item); + PyMem_Free(item); } /* Never fails; the return value can be ignored. Note that there is no guarantee that the list is actually empty @@ -668,7 +668,7 @@ list_ass_slice(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v) /* If norig == 0, item might be NULL, in which case we may not memcpy from it. */ if (s) { if (s > sizeof(recycle_on_stack)) { - recycle = (PyObject **)PyMem_MALLOC(s); + recycle = (PyObject **)PyMem_Malloc(s); if (recycle == NULL) { PyErr_NoMemory(); goto Error; @@ -706,7 +706,7 @@ list_ass_slice(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v) result = 0; Error: if (recycle != recycle_on_stack) - PyMem_FREE(recycle); + PyMem_Free(recycle); Py_XDECREF(v_as_SF); return result; #undef b @@ -2230,7 +2230,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) /* Leverage stack space we allocated but won't otherwise use */ keys = &ms.temparray[saved_ob_size+1]; else { - keys = PyMem_MALLOC(sizeof(PyObject *) * saved_ob_size); + keys = PyMem_Malloc(sizeof(PyObject *) * saved_ob_size); if (keys == NULL) { PyErr_NoMemory(); goto keyfunc_fail; @@ -2243,7 +2243,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) for (i=i-1 ; i>=0 ; i--) Py_DECREF(keys[i]); if (saved_ob_size >= MERGESTATE_TEMP_SIZE/2) - PyMem_FREE(keys); + PyMem_Free(keys); goto keyfunc_fail; } } @@ -2414,7 +2414,7 @@ fail: for (i = 0; i < saved_ob_size; i++) Py_DECREF(keys[i]); if (saved_ob_size >= MERGESTATE_TEMP_SIZE/2) - PyMem_FREE(keys); + PyMem_Free(keys); } if (self->allocated != -1 && result != NULL) { @@ -2442,7 +2442,7 @@ keyfunc_fail: while (--i >= 0) { Py_XDECREF(final_ob_item[i]); } - PyMem_FREE(final_ob_item); + PyMem_Free(final_ob_item); } Py_XINCREF(result); return result; @@ -2908,7 +2908,7 @@ list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value) } garbage = (PyObject**) - PyMem_MALLOC(slicelength*sizeof(PyObject*)); + PyMem_Malloc(slicelength*sizeof(PyObject*)); if (!garbage) { PyErr_NoMemory(); return -1; @@ -2949,7 +2949,7 @@ list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value) for (i = 0; i < slicelength; i++) { Py_DECREF(garbage[i]); } - PyMem_FREE(garbage); + PyMem_Free(garbage); return res; } @@ -2990,7 +2990,7 @@ list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value) } garbage = (PyObject**) - PyMem_MALLOC(slicelength*sizeof(PyObject*)); + PyMem_Malloc(slicelength*sizeof(PyObject*)); if (!garbage) { Py_DECREF(seq); PyErr_NoMemory(); @@ -3011,7 +3011,7 @@ list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value) Py_DECREF(garbage[i]); } - PyMem_FREE(garbage); + PyMem_Free(garbage); Py_DECREF(seq); return 0; diff --git a/Objects/lnotab_notes.txt b/Objects/lnotab_notes.txt index 71a2979718..046f753ed3 100644 --- a/Objects/lnotab_notes.txt +++ b/Objects/lnotab_notes.txt @@ -1,11 +1,103 @@ -All about co_lnotab, the line number table. - -Code objects store a field named co_lnotab. This is an array of unsigned bytes -disguised as a Python bytes object. It is used to map bytecode offsets to -source code line #s for tracebacks and to identify line number boundaries for -line tracing. Because of internals of the peephole optimizer, it's possible -for lnotab to contain bytecode offsets that are no longer valid (for example -if the optimizer removed the last line in a function). +Description of the internal format of the line number table + +Conceptually, the line number table consists of a sequence of triples: + start-offset (inclusive), end-offset (exclusive), line-number. + +Note that note all byte codes have a line number so we need handle `None` for the line-number. + +However, storing the above sequence directly would be very inefficient as we would need 12 bytes per entry. + +First of all, we can note that the end of one entry is the same as the start of the next, so we can overlap entries. +Secondly we also note that we don't really need arbitrary access to the sequence, so we can store deltas. + +We just need to store (end - start, line delta) pairs. The start offset of the first entry is always zero. + +Thirdly, most deltas are small, so we can use a single byte for each value, as long we allow several entries for the same line. + +Consider the following table + Start End Line + 0 6 1 + 6 50 2 + 50 350 7 + 350 360 No line number + 360 376 8 + 376 380 208 + +Stripping the redundant ends gives: + + End-Start Line-delta + 6 +1 + 44 +1 + 300 +5 + 10 No line number + 16 +1 + 4 +200 + + +Note that the end - start value is always positive. + +Finally in order, to fit into a single byte we need to convert start deltas to the range 0 <= delta <= 254, +and line deltas to the range -127 <= delta <= 127. +A line delta of -128 is used to indicate no line number. +A start delta of 255 is used as a sentinel to mark the end of the table. +Also note that a delta of zero indicates that there are no bytecodes in the given range, +which means can use an invalidate line number for that range. + +Final form: + + Start delta Line delta + 6 +1 + 44 +1 + 254 +5 + 46 0 + 10 -128 (No line number, treated as a delta of zero) + 16 +1 + 0 +127 (line 135, but the range is empty as no bytecodes are at line 135) + 4 +73 + 255 (end mark) --- + +Iterating over the table. +------------------------- + +For the `co_lines` attribute we want to emit the full form, omitting the (350, 360, No line number) and empty entries. + +The code is as follows: + +def co_lines(code): + line = code.co_firstlineno + end = 0 + table_iter = iter(code.internal_line_table): + for sdelta, ldelta in table_iter: + if sdelta == 255: + break + if ldelta == 0: # No change to line number, just accumulate changes to end + end += odelta + continue + start = end + end = start + sdelta + if ldelta == -128: # No valid line number -- skip entry + continue + line += ldelta + if end == start: # Empty range, omit. + continue + yield start, end, line + + + + +The historical co_lnotab format +------------------------------- + +prior to 3.10 code objects stored a field named co_lnotab. +This was an array of unsigned bytes disguised as a Python bytes object. + +The old co_lnotab did not account for the presence of bytecodes without a line number, +nor was it well suited to tracing as a number of workarounds were required. + +The old format can still be accessed via `code.co_lnotab`, which is lazily computed from the new format. + +Below is the description of the old co_lnotab format: + The array is conceptually a compressed list of (bytecode offset increment, line number increment) diff --git a/Objects/longobject.c b/Objects/longobject.c index e0d6410fe6..240e92a41e 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -131,7 +131,7 @@ _PyLong_New(Py_ssize_t size) "too many digits in integer"); return NULL; } - result = PyObject_MALLOC(offsetof(PyLongObject, ob_digit) + + result = PyObject_Malloc(offsetof(PyLongObject, ob_digit) + size*sizeof(digit)); if (!result) { PyErr_NoMemory(); diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index c3ceb788e8..6590387dac 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -211,7 +211,7 @@ _PyModule_CreateInitialized(struct PyModuleDef* module, int module_api_version) return NULL; if (module->m_size > 0) { - m->md_state = PyMem_MALLOC(module->m_size); + m->md_state = PyMem_Malloc(module->m_size); if (!m->md_state) { PyErr_NoMemory(); Py_DECREF(m); @@ -377,7 +377,7 @@ PyModule_ExecDef(PyObject *module, PyModuleDef *def) if (md->md_state == NULL) { /* Always set a state pointer; this serves as a marker to skip * multiple initialization (importlib.reload() is no-op) */ - md->md_state = PyMem_MALLOC(def->m_size); + md->md_state = PyMem_Malloc(def->m_size); if (!md->md_state) { PyErr_NoMemory(); return -1; @@ -681,7 +681,7 @@ module_dealloc(PyModuleObject *m) Py_XDECREF(m->md_dict); Py_XDECREF(m->md_name); if (m->md_state != NULL) - PyMem_FREE(m->md_state); + PyMem_Free(m->md_state); Py_TYPE(m)->tp_free((PyObject *)m); } diff --git a/Objects/object.c b/Objects/object.c index 7bc3e48d40..0a8621b350 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -161,7 +161,7 @@ PyObject_InitVar(PyVarObject *op, PyTypeObject *tp, Py_ssize_t size) PyObject * _PyObject_New(PyTypeObject *tp) { - PyObject *op = (PyObject *) PyObject_MALLOC(_PyObject_SIZE(tp)); + PyObject *op = (PyObject *) PyObject_Malloc(_PyObject_SIZE(tp)); if (op == NULL) { return PyErr_NoMemory(); } @@ -174,7 +174,7 @@ _PyObject_NewVar(PyTypeObject *tp, Py_ssize_t nitems) { PyVarObject *op; const size_t size = _PyObject_VAR_SIZE(tp, nitems); - op = (PyVarObject *) PyObject_MALLOC(size); + op = (PyVarObject *) PyObject_Malloc(size); if (op == NULL) { return (PyVarObject *)PyErr_NoMemory(); } @@ -2134,6 +2134,15 @@ _PyTrash_end(PyThreadState *tstate) } +/* bpo-40170: It's only be used in Py_TRASHCAN_BEGIN macro to hide + implementation details. */ +int +_PyTrash_cond(PyObject *op, destructor dealloc) +{ + return Py_TYPE(op)->tp_dealloc == dealloc; +} + + void _Py_NO_RETURN _PyObject_AssertFailed(PyObject *obj, const char *expr, const char *msg, const char *file, int line, const char *function) @@ -2208,6 +2217,22 @@ PyObject_GET_WEAKREFS_LISTPTR(PyObject *op) } +#undef Py_NewRef +#undef Py_XNewRef + +// Export Py_NewRef() and Py_XNewRef() as regular functions for the stable ABI. +PyObject* +Py_NewRef(PyObject *obj) +{ + return _Py_NewRef(obj); +} + +PyObject* +Py_XNewRef(PyObject *obj) +{ + return _Py_XNewRef(obj); +} + #ifdef __cplusplus } #endif diff --git a/Objects/odictobject.c b/Objects/odictobject.c index b4ac560d23..6c7f1175cd 100644 --- a/Objects/odictobject.c +++ b/Objects/odictobject.c @@ -459,7 +459,7 @@ later: - implement a fuller MutableMapping API in C? - move the MutableMapping implementation to abstract.c? - optimize mutablemapping_update -- use PyObject_MALLOC (small object allocator) for odict nodes? +- use PyObject_Malloc (small object allocator) for odict nodes? - support subclasses better (e.g. in odict_richcompare) */ @@ -567,14 +567,14 @@ _odict_resize(PyODictObject *od) i = _odict_get_index_raw(od, _odictnode_KEY(node), _odictnode_HASH(node)); if (i < 0) { - PyMem_FREE(fast_nodes); + PyMem_Free(fast_nodes); return -1; } fast_nodes[i] = node; } /* Replace the old fast nodes table. */ - PyMem_FREE(od->od_fast_nodes); + PyMem_Free(od->od_fast_nodes); od->od_fast_nodes = fast_nodes; od->od_fast_nodes_size = size; od->od_resize_sentinel = ((PyDictObject *)od)->ma_keys; @@ -683,7 +683,7 @@ _odict_add_new_node(PyODictObject *od, PyObject *key, Py_hash_t hash) } /* must not be added yet */ - node = (_ODictNode *)PyMem_MALLOC(sizeof(_ODictNode)); + node = (_ODictNode *)PyMem_Malloc(sizeof(_ODictNode)); if (node == NULL) { Py_DECREF(key); PyErr_NoMemory(); @@ -701,7 +701,7 @@ _odict_add_new_node(PyODictObject *od, PyObject *key, Py_hash_t hash) #define _odictnode_DEALLOC(node) \ do { \ Py_DECREF(_odictnode_KEY(node)); \ - PyMem_FREE((void *)node); \ + PyMem_Free((void *)node); \ } while (0) /* Repeated calls on the same node are no-ops. */ @@ -776,7 +776,7 @@ _odict_clear_nodes(PyODictObject *od) { _ODictNode *node, *next; - PyMem_FREE(od->od_fast_nodes); + PyMem_Free(od->od_fast_nodes); od->od_fast_nodes = NULL; od->od_fast_nodes_size = 0; od->od_resize_sentinel = NULL; @@ -1814,6 +1814,11 @@ odictiter_iternext(odictiterobject *di) Py_INCREF(result); Py_DECREF(PyTuple_GET_ITEM(result, 0)); /* borrowed */ Py_DECREF(PyTuple_GET_ITEM(result, 1)); /* borrowed */ + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(2); diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index 787d113800..530426c8ac 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -171,7 +171,7 @@ range_dealloc(rangeobject *r) Py_DECREF(r->stop); Py_DECREF(r->step); Py_DECREF(r->length); - PyObject_Del(r); + PyObject_Free(r); } /* Return number of items in range (lo, hi, step) as a PyLong object, @@ -1021,7 +1021,7 @@ longrangeiter_dealloc(longrangeiterobject *r) Py_XDECREF(r->start); Py_XDECREF(r->step); Py_XDECREF(r->len); - PyObject_Del(r); + PyObject_Free(r); } static PyObject * diff --git a/Objects/setobject.c b/Objects/setobject.c index af8ee03d83..79e8451192 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -289,7 +289,7 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) } if (is_oldtable_malloced) - PyMem_DEL(oldtable); + PyMem_Free(oldtable); return 0; } @@ -424,7 +424,7 @@ set_clear_internal(PySetObject *so) } if (table_is_malloced) - PyMem_DEL(table); + PyMem_Free(table); return 0; } @@ -484,7 +484,7 @@ set_dealloc(PySetObject *so) } } if (so->table != so->smalltable) - PyMem_DEL(so->table); + PyMem_Free(so->table); Py_TYPE(so)->tp_free(so); Py_TRASHCAN_END } diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h index 53bcbdea7a..62e4c98de7 100644 --- a/Objects/stringlib/join.h +++ b/Objects/stringlib/join.h @@ -155,7 +155,7 @@ done: for (i = 0; i < nbufs; i++) PyBuffer_Release(&buffers[i]); if (buffers != static_buffers) - PyMem_FREE(buffers); + PyMem_Free(buffers); return res; } diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h index b526ad21b8..7152ec6ebe 100644 --- a/Objects/stringlib/unicode_format.h +++ b/Objects/stringlib/unicode_format.h @@ -983,7 +983,7 @@ static void formatteriter_dealloc(formatteriterobject *it) { Py_XDECREF(it->str); - PyObject_FREE(it); + PyObject_Free(it); } /* returns a tuple: @@ -1147,7 +1147,7 @@ static void fieldnameiter_dealloc(fieldnameiterobject *it) { Py_XDECREF(it->str); - PyObject_FREE(it); + PyObject_Free(it); } /* returns a tuple: diff --git a/Objects/structseq.c b/Objects/structseq.c index 5caa3bd52e..5d71fcff34 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -467,14 +467,14 @@ PyStructSequence_InitType2(PyTypeObject *type, PyStructSequence_Desc *desc) type->tp_members = members; if (PyType_Ready(type) < 0) { - PyMem_FREE(members); + PyMem_Free(members); return -1; } Py_INCREF(type); if (initialize_structseq_dict( desc, type->tp_dict, n_members, n_unnamed_members) < 0) { - PyMem_FREE(members); + PyMem_Free(members); Py_DECREF(type); return -1; } @@ -492,7 +492,6 @@ PyTypeObject * PyStructSequence_NewType(PyStructSequence_Desc *desc) { PyMemberDef *members; - PyObject *bases; PyTypeObject *type; PyType_Slot slots[8]; PyType_Spec spec; @@ -526,14 +525,8 @@ PyStructSequence_NewType(PyStructSequence_Desc *desc) spec.flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC; spec.slots = slots; - bases = PyTuple_Pack(1, &PyTuple_Type); - if (bases == NULL) { - PyMem_FREE(members); - return NULL; - } - type = (PyTypeObject *)PyType_FromSpecWithBases(&spec, bases); - Py_DECREF(bases); - PyMem_FREE(members); + type = (PyTypeObject *)PyType_FromSpecWithBases(&spec, (PyObject *)&PyTuple_Type); + PyMem_Free(members); if (type == NULL) { return NULL; } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index bd1587ace8..3498f0d484 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6,7 +6,7 @@ #include "pycore_object.h" #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyThreadState_GET() -#include "pycore_unionobject.h" // _Py_Union() +#include "pycore_unionobject.h" // _Py_Union(), _Py_union_type_or #include "frameobject.h" #include "structmember.h" // PyMemberDef @@ -20,20 +20,13 @@ class object "PyObject *" "&PyBaseObject_Type" #include "clinic/typeobject.c.h" -/* bpo-40521: Type method cache is shared by all subinterpreters */ -#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS -# define MCACHE -#endif - -#ifdef MCACHE -/* Support type attribute cache */ +/* Support type attribute lookup cache */ /* The cache can keep references to the names alive for longer than they normally would. This is why the maximum size is limited to MCACHE_MAX_ATTR_SIZE, since it might be a problem if very large strings are used as attribute names. */ #define MCACHE_MAX_ATTR_SIZE 100 -#define MCACHE_SIZE_EXP 12 #define MCACHE_HASH(version, name_hash) \ (((unsigned int)(version) ^ (unsigned int)(name_hash)) \ & ((1 << MCACHE_SIZE_EXP) - 1)) @@ -44,30 +37,16 @@ class object "PyObject *" "&PyBaseObject_Type" #define MCACHE_CACHEABLE_NAME(name) \ PyUnicode_CheckExact(name) && \ PyUnicode_IS_READY(name) && \ - PyUnicode_GET_LENGTH(name) <= MCACHE_MAX_ATTR_SIZE - -struct method_cache_entry { - unsigned int version; - PyObject *name; /* reference to exactly a str or None */ - PyObject *value; /* borrowed */ -}; + (PyUnicode_GET_LENGTH(name) <= MCACHE_MAX_ATTR_SIZE) -static struct method_cache_entry method_cache[1 << MCACHE_SIZE_EXP]; +// Used to set PyTypeObject.tp_version_tag static unsigned int next_version_tag = 0; -#endif -#define MCACHE_STATS 0 +typedef struct PySlot_Offset { + short subslot_offset; + short slot_offset; +} PySlot_Offset; -#if MCACHE_STATS -static size_t method_cache_hits = 0; -static size_t method_cache_misses = 0; -static size_t method_cache_collisions = 0; -#endif - -/* bpo-40521: Interned strings are shared by all subinterpreters */ -#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS -# define INTERN_NAME_STRINGS -#endif /* alphabetical order */ _Py_IDENTIFIER(__abstractmethods__); @@ -224,46 +203,95 @@ _PyType_GetTextSignatureFromInternalDoc(const char *name, const char *internal_d return PyUnicode_FromStringAndSize(start, end - start); } -unsigned int -PyType_ClearCache(void) + +static struct type_cache* +get_type_cache(void) { -#ifdef MCACHE - Py_ssize_t i; - unsigned int cur_version_tag = next_version_tag - 1; + PyInterpreterState *interp = _PyInterpreterState_GET(); + return &interp->type_cache; +} + + +static void +type_cache_clear(struct type_cache *cache, int use_none) +{ + for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) { + struct type_cache_entry *entry = &cache->hashtable[i]; + entry->version = 0; + if (use_none) { + // Set to None so _PyType_Lookup() can use Py_SETREF(), + // rather than using slower Py_XSETREF(). + Py_XSETREF(entry->name, Py_NewRef(Py_None)); + } + else { + Py_CLEAR(entry->name); + } + entry->value = NULL; + } + + // Mark all version tags as invalid + PyType_Modified(&PyBaseObject_Type); +} + + +void +_PyType_InitCache(PyInterpreterState *interp) +{ + struct type_cache *cache = &interp->type_cache; + for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) { + struct type_cache_entry *entry = &cache->hashtable[i]; + assert(entry->name == NULL); + entry->version = 0; + // Set to None so _PyType_Lookup() can use Py_SETREF(), + // rather than using slower Py_XSETREF(). + entry->name = Py_NewRef(Py_None); + entry->value = NULL; + } +} + + +static unsigned int +_PyType_ClearCache(struct type_cache *cache) +{ #if MCACHE_STATS - size_t total = method_cache_hits + method_cache_collisions + method_cache_misses; + size_t total = cache->hits + cache->collisions + cache->misses; fprintf(stderr, "-- Method cache hits = %zd (%d%%)\n", - method_cache_hits, (int) (100.0 * method_cache_hits / total)); + cache->hits, (int) (100.0 * cache->hits / total)); fprintf(stderr, "-- Method cache true misses = %zd (%d%%)\n", - method_cache_misses, (int) (100.0 * method_cache_misses / total)); + cache->misses, (int) (100.0 * cache->misses / total)); fprintf(stderr, "-- Method cache collisions = %zd (%d%%)\n", - method_cache_collisions, (int) (100.0 * method_cache_collisions / total)); + cache->collisions, (int) (100.0 * cache->collisions / total)); fprintf(stderr, "-- Method cache size = %zd KiB\n", - sizeof(method_cache) / 1024); + sizeof(cache->hashtable) / 1024); #endif - for (i = 0; i < (1 << MCACHE_SIZE_EXP); i++) { - method_cache[i].version = 0; - Py_CLEAR(method_cache[i].name); - method_cache[i].value = NULL; - } + unsigned int cur_version_tag = next_version_tag - 1; next_version_tag = 0; - /* mark all version tags as invalid */ - PyType_Modified(&PyBaseObject_Type); + type_cache_clear(cache, 0); + return cur_version_tag; -#else - return 0; -#endif } + +unsigned int +PyType_ClearCache(void) +{ + struct type_cache *cache = get_type_cache(); + return _PyType_ClearCache(cache); +} + + void -_PyType_Fini(void) +_PyType_Fini(PyThreadState *tstate) { - PyType_ClearCache(); - clear_slotdefs(); + _PyType_ClearCache(&tstate->interp->type_cache); + if (_Py_IsMainInterpreter(tstate)) { + clear_slotdefs(); + } } + void PyType_Modified(PyTypeObject *type) { @@ -365,9 +393,8 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { Py_TPFLAGS_VALID_VERSION_TAG); } -#ifdef MCACHE static int -assign_version_tag(PyTypeObject *type) +assign_version_tag(struct type_cache *cache, PyTypeObject *type) { /* Ensure that the tp_version_tag is valid and set Py_TPFLAGS_VALID_VERSION_TAG. To respect the invariant, this @@ -388,31 +415,22 @@ assign_version_tag(PyTypeObject *type) /* for stress-testing: next_version_tag &= 0xFF; */ if (type->tp_version_tag == 0) { - /* wrap-around or just starting Python - clear the whole - cache by filling names with references to Py_None. - Values are also set to NULL for added protection, as they - are borrowed reference */ - for (i = 0; i < (1 << MCACHE_SIZE_EXP); i++) { - method_cache[i].value = NULL; - Py_INCREF(Py_None); - Py_XSETREF(method_cache[i].name, Py_None); - } - /* mark all version tags as invalid */ - PyType_Modified(&PyBaseObject_Type); + // Wrap-around or just starting Python - clear the whole cache + type_cache_clear(cache, 1); return 1; } + bases = type->tp_bases; n = PyTuple_GET_SIZE(bases); for (i = 0; i < n; i++) { PyObject *b = PyTuple_GET_ITEM(bases, i); assert(PyType_Check(b)); - if (!assign_version_tag((PyTypeObject *)b)) + if (!assign_version_tag(cache, (PyTypeObject *)b)) return 0; } type->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG; return 1; } -#endif static PyMemberDef type_members[] = { @@ -1054,7 +1072,7 @@ PyType_GenericAlloc(PyTypeObject *type, Py_ssize_t nitems) obj = _PyObject_GC_Malloc(size); } else { - obj = (PyObject *)PyObject_MALLOC(size); + obj = (PyObject *)PyObject_Malloc(size); } if (obj == NULL) { @@ -1774,7 +1792,7 @@ pmerge(PyObject *acc, PyObject **to_merge, Py_ssize_t to_merge_size) } out: - PyMem_Del(remain); + PyMem_Free(remain); return res; } @@ -1854,7 +1872,7 @@ mro_implementation(PyTypeObject *type) result = PyList_New(1); if (result == NULL) { - PyMem_Del(to_merge); + PyMem_Free(to_merge); return NULL; } @@ -1864,7 +1882,7 @@ mro_implementation(PyTypeObject *type) Py_CLEAR(result); } - PyMem_Del(to_merge); + PyMem_Free(to_merge); return result; } @@ -2702,7 +2720,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) goto error; /* Silently truncate the docstring if it contains null bytes. */ len = strlen(doc_str); - tp_doc = (char *)PyObject_MALLOC(len + 1); + tp_doc = (char *)PyObject_Malloc(len + 1); if (tp_doc == NULL) { PyErr_NoMemory(); goto error; @@ -2870,8 +2888,18 @@ error: return NULL; } -static const short slotoffsets[] = { - -1, /* invalid slot */ +/* An array of type slot offsets corresponding to Py_tp_* constants, + * for use in e.g. PyType_Spec and PyType_GetSlot. + * Each entry has two offsets: "slot_offset" and "subslot_offset". + * If is subslot_offset is -1, slot_offset is an offset within the + * PyTypeObject struct. + * Otherwise slot_offset is an offset to a pointer to a sub-slots struct + * (such as "tp_as_number"), and subslot_offset is the offset within + * that struct. + * The actual table is generated by a script. + */ +static const PySlot_Offset pyslot_offsets[] = { + {0, 0}, #include "typeslots.inc" }; @@ -2892,6 +2920,7 @@ PyType_FromModuleAndSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) const PyType_Slot *slot; Py_ssize_t nmembers, weaklistoffset, dictoffset, vectorcalloffset; char *res_start; + short slot_offset, subslot_offset; nmembers = weaklistoffset = dictoffset = vectorcalloffset = 0; for (slot = spec->slots; slot->slot; slot++) { @@ -2961,26 +2990,41 @@ PyType_FromModuleAndSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) base = slot->pfunc; else if (slot->slot == Py_tp_bases) { bases = slot->pfunc; - Py_INCREF(bases); } } - if (!bases) + if (!bases) { bases = PyTuple_Pack(1, base); + if (!bases) + goto fail; + } + else if (!PyTuple_Check(bases)) { + PyErr_SetString(PyExc_SystemError, "Py_tp_bases is not a tuple"); + goto fail; + } + else { + Py_INCREF(bases); + } + } + else if (!PyTuple_Check(bases)) { + bases = PyTuple_Pack(1, bases); if (!bases) goto fail; } - else + else { Py_INCREF(bases); + } /* Calculate best base, and check that all bases are type objects */ base = best_base(bases); if (base == NULL) { + Py_DECREF(bases); goto fail; } if (!_PyType_HasFeature(base, Py_TPFLAGS_BASETYPE)) { PyErr_Format(PyExc_TypeError, "type '%.100s' is not an acceptable base type", base->tp_name); + Py_DECREF(bases); goto fail; } @@ -2992,7 +3036,6 @@ PyType_FromModuleAndSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) type->tp_as_buffer = &res->as_buffer; /* Set tp_base and tp_bases */ type->tp_bases = bases; - bases = NULL; Py_INCREF(base); type->tp_base = base; @@ -3001,7 +3044,7 @@ PyType_FromModuleAndSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) for (slot = spec->slots; slot->slot; slot++) { if (slot->slot < 0 - || (size_t)slot->slot >= Py_ARRAY_LENGTH(slotoffsets)) { + || (size_t)slot->slot >= Py_ARRAY_LENGTH(pyslot_offsets)) { PyErr_SetString(PyExc_RuntimeError, "invalid slot offset"); goto fail; } @@ -3012,8 +3055,12 @@ PyType_FromModuleAndSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) else if (slot->slot == Py_tp_doc) { /* For the docstring slot, which usually points to a static string literal, we need to make a copy */ + if (slot->pfunc == NULL) { + type->tp_doc = NULL; + continue; + } size_t len = strlen(slot->pfunc)+1; - char *tp_doc = PyObject_MALLOC(len); + char *tp_doc = PyObject_Malloc(len); if (tp_doc == NULL) { type->tp_doc = NULL; PyErr_NoMemory(); @@ -3030,7 +3077,15 @@ PyType_FromModuleAndSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) } else { /* Copy other slots directly */ - *(void**)(res_start + slotoffsets[slot->slot]) = slot->pfunc; + PySlot_Offset slotoffsets = pyslot_offsets[slot->slot]; + slot_offset = slotoffsets.slot_offset; + if (slotoffsets.subslot_offset == -1) { + *(void**)((char*)res_start + slot_offset) = slot->pfunc; + } else { + void *parent_slot = *(void**)((char*)res_start + slot_offset); + subslot_offset = slotoffsets.subslot_offset; + *(void**)((char*)parent_slot + subslot_offset) = slot->pfunc; + } } } if (type->tp_dealloc == NULL) { @@ -3113,15 +3168,23 @@ PyType_FromSpec(PyType_Spec *spec) void * PyType_GetSlot(PyTypeObject *type, int slot) { - if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE) || slot < 0) { + void *parent_slot; + int slots_len = Py_ARRAY_LENGTH(pyslot_offsets); + + if (slot <= 0 || slot >= slots_len) { PyErr_BadInternalCall(); return NULL; } - if ((size_t)slot >= Py_ARRAY_LENGTH(slotoffsets)) { - /* Extension module requesting slot from a future version */ + + parent_slot = *(void**)((char*)type + pyslot_offsets[slot].slot_offset); + if (parent_slot == NULL) { return NULL; } - return *(void**)(((char*)type) + slotoffsets[slot]); + /* Return slot directly if we have no sub slot. */ + if (pyslot_offsets[slot].subslot_offset == -1) { + return parent_slot; + } + return *(void**)((char*)parent_slot + pyslot_offsets[slot].subslot_offset); } PyObject * @@ -3158,6 +3221,44 @@ PyType_GetModuleState(PyTypeObject *type) return PyModule_GetState(m); } + +/* Get the module of the first superclass where the module has the + * given PyModuleDef. + * Implemented by walking the MRO, is relatively slow. + * + * This is internal API for experimentation within stdlib. Discussion: + * https://mail.python.org/archives/list/capi-sig@python.org/thread/T3P2QNLNLBRFHWSKYSTPMVEIL2EEKFJU/ + */ +PyObject * +_PyType_GetModuleByDef(PyTypeObject *type, struct PyModuleDef *def) +{ + assert(PyType_Check(type)); + assert(type->tp_mro); + int i; + for (i = 0; i < PyTuple_GET_SIZE(type->tp_mro); i++) { + PyObject *super = PyTuple_GET_ITEM(type->tp_mro, i); + if (!PyType_HasFeature((PyTypeObject *)super, Py_TPFLAGS_HEAPTYPE)) { + /* Currently, there's no way for static types to inherit + * from heap types, but to allow that possibility, + * we `continue` rather than `break`. + * We'll just potentially loop a few more times before throwing + * the error. + */ + continue; + } + PyHeapTypeObject *ht = (PyHeapTypeObject*)super; + if (ht->ht_module && PyModule_GetDef(ht->ht_module) == def) { + return ht->ht_module; + } + } + PyErr_Format( + PyExc_TypeError, + "_PyType_GetModuleByDef: No superclass of '%s' has the given module", + type->tp_name); + return NULL; +} + + /* Internal API to look for a name through the MRO, bypassing the method cache. This returns a borrowed reference, and might set an exception. 'error' is set to: -1: error with exception; 1: error without exception; 0: ok */ @@ -3228,20 +3329,19 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) PyObject *res; int error; -#ifdef MCACHE if (MCACHE_CACHEABLE_NAME(name) && _PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)) { /* fast path */ unsigned int h = MCACHE_HASH_METHOD(type, name); - if (method_cache[h].version == type->tp_version_tag && - method_cache[h].name == name) { + struct type_cache *cache = get_type_cache(); + struct type_cache_entry *entry = &cache->hashtable[h]; + if (entry->version == type->tp_version_tag && entry->name == name) { #if MCACHE_STATS - method_cache_hits++; + cache->hits++; #endif - return method_cache[h].value; + return entry->value; } } -#endif /* We may end up clearing live exceptions below, so make sure it's ours. */ assert(!PyErr_Occurred()); @@ -3263,22 +3363,25 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) return NULL; } -#ifdef MCACHE - if (MCACHE_CACHEABLE_NAME(name) && assign_version_tag(type)) { - unsigned int h = MCACHE_HASH_METHOD(type, name); - method_cache[h].version = type->tp_version_tag; - method_cache[h].value = res; /* borrowed */ - Py_INCREF(name); - assert(((PyASCIIObject *)(name))->hash != -1); + if (MCACHE_CACHEABLE_NAME(name)) { + struct type_cache *cache = get_type_cache(); + if (assign_version_tag(cache, type)) { + unsigned int h = MCACHE_HASH_METHOD(type, name); + struct type_cache_entry *entry = &cache->hashtable[h]; + entry->version = type->tp_version_tag; + entry->value = res; /* borrowed */ + assert(((PyASCIIObject *)(name))->hash != -1); #if MCACHE_STATS - if (method_cache[h].name != Py_None && method_cache[h].name != name) - method_cache_collisions++; - else - method_cache_misses++; + if (entry->name != Py_None && entry->name != name) { + cache->collisions++; + } + else { + cache->misses++; + } #endif - Py_SETREF(method_cache[h].name, name); + Py_SETREF(entry->name, Py_NewRef(name)); + } } -#endif return res; } @@ -3421,7 +3524,6 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value) if (name == NULL) return -1; } -#ifdef INTERN_NAME_STRINGS if (!PyUnicode_CHECK_INTERNED(name)) { PyUnicode_InternInPlace(&name); if (!PyUnicode_CHECK_INTERNED(name)) { @@ -3431,7 +3533,6 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value) return -1; } } -#endif } else { /* Will fail in _PyObject_GenericSetAttrWithDict. */ @@ -3747,19 +3848,9 @@ type_is_gc(PyTypeObject *type) return type->tp_flags & Py_TPFLAGS_HEAPTYPE; } -static PyObject * -type_or(PyTypeObject* self, PyObject* param) { - PyObject *tuple = PyTuple_Pack(2, self, param); - if (tuple == NULL) { - return NULL; - } - PyObject *new_union = _Py_Union(tuple); - Py_DECREF(tuple); - return new_union; -} static PyNumberMethods type_as_number = { - .nb_or = (binaryfunc)type_or, // Add __or__ function + .nb_or = _Py_union_type_or, // Add __or__ function }; PyTypeObject PyType_Type = { @@ -5395,6 +5486,13 @@ PyType_Ready(PyTypeObject *type) _PyObject_ASSERT((PyObject *)type, type->tp_vectorcall_offset > 0); _PyObject_ASSERT((PyObject *)type, type->tp_call != NULL); } + /* Consistency check for Py_TPFLAGS_HAVE_AM_SEND - flag requires + * type->tp_as_async->am_send to be present. + */ + if (type->tp_flags & Py_TPFLAGS_HAVE_AM_SEND) { + _PyObject_ASSERT((PyObject *)type, type->tp_as_async != NULL); + _PyObject_ASSERT((PyObject *)type, type->tp_as_async->am_send != NULL); + } type->tp_flags |= Py_TPFLAGS_READYING; @@ -5416,8 +5514,13 @@ PyType_Ready(PyTypeObject *type) /* Initialize tp_base (defaults to BaseObject unless that's us) */ base = type->tp_base; if (base == NULL && type != &PyBaseObject_Type) { - base = type->tp_base = &PyBaseObject_Type; - Py_INCREF(base); + base = &PyBaseObject_Type; + if (type->tp_flags & Py_TPFLAGS_HEAPTYPE) { + type->tp_base = (PyTypeObject*)Py_NewRef((PyObject*)base); + } + else { + type->tp_base = base; + } } /* Now the only way base can still be NULL is if type is @@ -7575,17 +7678,10 @@ _PyTypes_InitSlotDefs(void) for (slotdef *p = slotdefs; p->name; p++) { /* Slots must be ordered by their offset in the PyHeapTypeObject. */ assert(!p[1].name || p->offset <= p[1].offset); -#ifdef INTERN_NAME_STRINGS p->name_strobj = PyUnicode_InternFromString(p->name); if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) { return _PyStatus_NO_MEMORY(); } -#else - p->name_strobj = PyUnicode_FromString(p->name); - if (!p->name_strobj) { - return _PyStatus_NO_MEMORY(); - } -#endif } slotdefs_initialized = 1; return _PyStatus_OK(); @@ -7610,24 +7706,16 @@ update_slot(PyTypeObject *type, PyObject *name) int offset; assert(PyUnicode_CheckExact(name)); -#ifdef INTERN_NAME_STRINGS assert(PyUnicode_CHECK_INTERNED(name)); -#endif assert(slotdefs_initialized); pp = ptrs; for (p = slotdefs; p->name; p++) { assert(PyUnicode_CheckExact(p->name_strobj)); assert(PyUnicode_CheckExact(name)); -#ifdef INTERN_NAME_STRINGS if (p->name_strobj == name) { *pp++ = p; } -#else - if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) { - *pp++ = p; - } -#endif } *pp = NULL; for (pp = ptrs; *pp; pp++) { diff --git a/Objects/typeslots.inc b/Objects/typeslots.inc index ffc9bb2e1c..896daa7d80 100644 --- a/Objects/typeslots.inc +++ b/Objects/typeslots.inc @@ -1,81 +1,82 @@ /* Generated by typeslots.py */ -offsetof(PyHeapTypeObject, as_buffer.bf_getbuffer), -offsetof(PyHeapTypeObject, as_buffer.bf_releasebuffer), -offsetof(PyHeapTypeObject, as_mapping.mp_ass_subscript), -offsetof(PyHeapTypeObject, as_mapping.mp_length), -offsetof(PyHeapTypeObject, as_mapping.mp_subscript), -offsetof(PyHeapTypeObject, as_number.nb_absolute), -offsetof(PyHeapTypeObject, as_number.nb_add), -offsetof(PyHeapTypeObject, as_number.nb_and), -offsetof(PyHeapTypeObject, as_number.nb_bool), -offsetof(PyHeapTypeObject, as_number.nb_divmod), -offsetof(PyHeapTypeObject, as_number.nb_float), -offsetof(PyHeapTypeObject, as_number.nb_floor_divide), -offsetof(PyHeapTypeObject, as_number.nb_index), -offsetof(PyHeapTypeObject, as_number.nb_inplace_add), -offsetof(PyHeapTypeObject, as_number.nb_inplace_and), -offsetof(PyHeapTypeObject, as_number.nb_inplace_floor_divide), -offsetof(PyHeapTypeObject, as_number.nb_inplace_lshift), -offsetof(PyHeapTypeObject, as_number.nb_inplace_multiply), -offsetof(PyHeapTypeObject, as_number.nb_inplace_or), -offsetof(PyHeapTypeObject, as_number.nb_inplace_power), -offsetof(PyHeapTypeObject, as_number.nb_inplace_remainder), -offsetof(PyHeapTypeObject, as_number.nb_inplace_rshift), -offsetof(PyHeapTypeObject, as_number.nb_inplace_subtract), -offsetof(PyHeapTypeObject, as_number.nb_inplace_true_divide), -offsetof(PyHeapTypeObject, as_number.nb_inplace_xor), -offsetof(PyHeapTypeObject, as_number.nb_int), -offsetof(PyHeapTypeObject, as_number.nb_invert), -offsetof(PyHeapTypeObject, as_number.nb_lshift), -offsetof(PyHeapTypeObject, as_number.nb_multiply), -offsetof(PyHeapTypeObject, as_number.nb_negative), -offsetof(PyHeapTypeObject, as_number.nb_or), -offsetof(PyHeapTypeObject, as_number.nb_positive), -offsetof(PyHeapTypeObject, as_number.nb_power), -offsetof(PyHeapTypeObject, as_number.nb_remainder), -offsetof(PyHeapTypeObject, as_number.nb_rshift), -offsetof(PyHeapTypeObject, as_number.nb_subtract), -offsetof(PyHeapTypeObject, as_number.nb_true_divide), -offsetof(PyHeapTypeObject, as_number.nb_xor), -offsetof(PyHeapTypeObject, as_sequence.sq_ass_item), -offsetof(PyHeapTypeObject, as_sequence.sq_concat), -offsetof(PyHeapTypeObject, as_sequence.sq_contains), -offsetof(PyHeapTypeObject, as_sequence.sq_inplace_concat), -offsetof(PyHeapTypeObject, as_sequence.sq_inplace_repeat), -offsetof(PyHeapTypeObject, as_sequence.sq_item), -offsetof(PyHeapTypeObject, as_sequence.sq_length), -offsetof(PyHeapTypeObject, as_sequence.sq_repeat), -offsetof(PyHeapTypeObject, ht_type.tp_alloc), -offsetof(PyHeapTypeObject, ht_type.tp_base), -offsetof(PyHeapTypeObject, ht_type.tp_bases), -offsetof(PyHeapTypeObject, ht_type.tp_call), -offsetof(PyHeapTypeObject, ht_type.tp_clear), -offsetof(PyHeapTypeObject, ht_type.tp_dealloc), -offsetof(PyHeapTypeObject, ht_type.tp_del), -offsetof(PyHeapTypeObject, ht_type.tp_descr_get), -offsetof(PyHeapTypeObject, ht_type.tp_descr_set), -offsetof(PyHeapTypeObject, ht_type.tp_doc), -offsetof(PyHeapTypeObject, ht_type.tp_getattr), -offsetof(PyHeapTypeObject, ht_type.tp_getattro), -offsetof(PyHeapTypeObject, ht_type.tp_hash), -offsetof(PyHeapTypeObject, ht_type.tp_init), -offsetof(PyHeapTypeObject, ht_type.tp_is_gc), -offsetof(PyHeapTypeObject, ht_type.tp_iter), -offsetof(PyHeapTypeObject, ht_type.tp_iternext), -offsetof(PyHeapTypeObject, ht_type.tp_methods), -offsetof(PyHeapTypeObject, ht_type.tp_new), -offsetof(PyHeapTypeObject, ht_type.tp_repr), -offsetof(PyHeapTypeObject, ht_type.tp_richcompare), -offsetof(PyHeapTypeObject, ht_type.tp_setattr), -offsetof(PyHeapTypeObject, ht_type.tp_setattro), -offsetof(PyHeapTypeObject, ht_type.tp_str), -offsetof(PyHeapTypeObject, ht_type.tp_traverse), -offsetof(PyHeapTypeObject, ht_type.tp_members), -offsetof(PyHeapTypeObject, ht_type.tp_getset), -offsetof(PyHeapTypeObject, ht_type.tp_free), -offsetof(PyHeapTypeObject, as_number.nb_matrix_multiply), -offsetof(PyHeapTypeObject, as_number.nb_inplace_matrix_multiply), -offsetof(PyHeapTypeObject, as_async.am_await), -offsetof(PyHeapTypeObject, as_async.am_aiter), -offsetof(PyHeapTypeObject, as_async.am_anext), -offsetof(PyHeapTypeObject, ht_type.tp_finalize), +{offsetof(PyBufferProcs, bf_getbuffer), offsetof(PyTypeObject, tp_as_buffer)}, +{offsetof(PyBufferProcs, bf_releasebuffer), offsetof(PyTypeObject, tp_as_buffer)}, +{offsetof(PyMappingMethods, mp_ass_subscript), offsetof(PyTypeObject, tp_as_mapping)}, +{offsetof(PyMappingMethods, mp_length), offsetof(PyTypeObject, tp_as_mapping)}, +{offsetof(PyMappingMethods, mp_subscript), offsetof(PyTypeObject, tp_as_mapping)}, +{offsetof(PyNumberMethods, nb_absolute), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_add), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_and), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_bool), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_divmod), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_float), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_floor_divide), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_index), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_inplace_add), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_inplace_and), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_inplace_floor_divide), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_inplace_lshift), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_inplace_multiply), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_inplace_or), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_inplace_power), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_inplace_remainder), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_inplace_rshift), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_inplace_subtract), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_inplace_true_divide), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_inplace_xor), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_int), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_invert), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_lshift), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_multiply), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_negative), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_or), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_positive), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_power), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_remainder), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_rshift), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_subtract), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_true_divide), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_xor), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PySequenceMethods, sq_ass_item), offsetof(PyTypeObject, tp_as_sequence)}, +{offsetof(PySequenceMethods, sq_concat), offsetof(PyTypeObject, tp_as_sequence)}, +{offsetof(PySequenceMethods, sq_contains), offsetof(PyTypeObject, tp_as_sequence)}, +{offsetof(PySequenceMethods, sq_inplace_concat), offsetof(PyTypeObject, tp_as_sequence)}, +{offsetof(PySequenceMethods, sq_inplace_repeat), offsetof(PyTypeObject, tp_as_sequence)}, +{offsetof(PySequenceMethods, sq_item), offsetof(PyTypeObject, tp_as_sequence)}, +{offsetof(PySequenceMethods, sq_length), offsetof(PyTypeObject, tp_as_sequence)}, +{offsetof(PySequenceMethods, sq_repeat), offsetof(PyTypeObject, tp_as_sequence)}, +{-1, offsetof(PyTypeObject, tp_alloc)}, +{-1, offsetof(PyTypeObject, tp_base)}, +{-1, offsetof(PyTypeObject, tp_bases)}, +{-1, offsetof(PyTypeObject, tp_call)}, +{-1, offsetof(PyTypeObject, tp_clear)}, +{-1, offsetof(PyTypeObject, tp_dealloc)}, +{-1, offsetof(PyTypeObject, tp_del)}, +{-1, offsetof(PyTypeObject, tp_descr_get)}, +{-1, offsetof(PyTypeObject, tp_descr_set)}, +{-1, offsetof(PyTypeObject, tp_doc)}, +{-1, offsetof(PyTypeObject, tp_getattr)}, +{-1, offsetof(PyTypeObject, tp_getattro)}, +{-1, offsetof(PyTypeObject, tp_hash)}, +{-1, offsetof(PyTypeObject, tp_init)}, +{-1, offsetof(PyTypeObject, tp_is_gc)}, +{-1, offsetof(PyTypeObject, tp_iter)}, +{-1, offsetof(PyTypeObject, tp_iternext)}, +{-1, offsetof(PyTypeObject, tp_methods)}, +{-1, offsetof(PyTypeObject, tp_new)}, +{-1, offsetof(PyTypeObject, tp_repr)}, +{-1, offsetof(PyTypeObject, tp_richcompare)}, +{-1, offsetof(PyTypeObject, tp_setattr)}, +{-1, offsetof(PyTypeObject, tp_setattro)}, +{-1, offsetof(PyTypeObject, tp_str)}, +{-1, offsetof(PyTypeObject, tp_traverse)}, +{-1, offsetof(PyTypeObject, tp_members)}, +{-1, offsetof(PyTypeObject, tp_getset)}, +{-1, offsetof(PyTypeObject, tp_free)}, +{offsetof(PyNumberMethods, nb_matrix_multiply), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyNumberMethods, nb_inplace_matrix_multiply), offsetof(PyTypeObject, tp_as_number)}, +{offsetof(PyAsyncMethods, am_await), offsetof(PyTypeObject, tp_as_async)}, +{offsetof(PyAsyncMethods, am_aiter), offsetof(PyTypeObject, tp_as_async)}, +{offsetof(PyAsyncMethods, am_anext), offsetof(PyTypeObject, tp_as_async)}, +{-1, offsetof(PyTypeObject, tp_finalize)}, +{offsetof(PyAsyncMethods, am_send), offsetof(PyTypeObject, tp_as_async)}, diff --git a/Objects/typeslots.py b/Objects/typeslots.py index 9b6d4adbc7..8ab05f91be 100755 --- a/Objects/typeslots.py +++ b/Objects/typeslots.py @@ -3,6 +3,7 @@ import sys, re + def generate_typeslots(out=sys.stdout): out.write("/* Generated by typeslots.py */\n") res = {} @@ -10,27 +11,34 @@ def generate_typeslots(out=sys.stdout): m = re.match("#define Py_([a-z_]+) ([0-9]+)", line) if not m: continue + member = m.group(1) if member.startswith("tp_"): - member = "ht_type."+member + member = f'{{-1, offsetof(PyTypeObject, {member})}}' elif member.startswith("am_"): - member = "as_async."+member + member = (f'{{offsetof(PyAsyncMethods, {member}),'+ + ' offsetof(PyTypeObject, tp_as_async)}') elif member.startswith("nb_"): - member = "as_number."+member + member = (f'{{offsetof(PyNumberMethods, {member}),'+ + ' offsetof(PyTypeObject, tp_as_number)}') elif member.startswith("mp_"): - member = "as_mapping."+member + member = (f'{{offsetof(PyMappingMethods, {member}),'+ + ' offsetof(PyTypeObject, tp_as_mapping)}') elif member.startswith("sq_"): - member = "as_sequence."+member + member = (f'{{offsetof(PySequenceMethods, {member}),'+ + ' offsetof(PyTypeObject, tp_as_sequence)}') elif member.startswith("bf_"): - member = "as_buffer."+member + member = (f'{{offsetof(PyBufferProcs, {member}),'+ + ' offsetof(PyTypeObject, tp_as_buffer)}') res[int(m.group(2))] = member M = max(res.keys())+1 for i in range(1,M): if i in res: - out.write("offsetof(PyHeapTypeObject, %s),\n" % res[i]) + out.write("%s,\n" % res[i]) else: - out.write("0,\n") + out.write("{0, 0},\n") + def main(): if len(sys.argv) == 2: diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9058018201..ad32a062d4 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -41,7 +41,9 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #define PY_SSIZE_T_CLEAN #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() +#include "pycore_atomic_funcs.h" // _Py_atomic_size_get() #include "pycore_bytes_methods.h" // _Py_bytes_lower() +#include "pycore_format.h" // F_LJUST #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // PyInterpreterState.fs_codec #include "pycore_object.h" // _PyObject_GC_TRACK() @@ -204,22 +206,6 @@ extern "C" { # define OVERALLOCATE_FACTOR 4 #endif -/* bpo-40521: Interned strings are shared by all interpreters. */ -#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS -# define INTERNED_STRINGS -#endif - -/* This dictionary holds all interned unicode strings. Note that references - to strings in this dictionary are *not* counted in the string's ob_refcnt. - When the interned string reaches a refcnt of 0 the string deallocation - function will delete the reference from this dictionary. - - Another way to look at this is that to say that the actual reference - count of a string is: s->ob_refcnt + (s->state ? 2 : 0) -*/ -#ifdef INTERNED_STRINGS -static PyObject *interned = NULL; -#endif static struct _Py_unicode_state* get_unicode_state(void) @@ -301,9 +287,6 @@ unicode_decode_utf8(const char *s, Py_ssize_t size, _Py_error_handler error_handler, const char *errors, Py_ssize_t *consumed); -/* List of static strings. */ -static _Py_Identifier *static_strings = NULL; - /* Fast detection of the most frequent whitespace characters */ const unsigned char _Py_ascii_whitespace[] = { 0, 0, 0, 0, 0, 0, 0, 0, @@ -839,7 +822,11 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str, /* generate replacement */ for (i = collstart; i < collend; ++i) { - str += sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i)); + size = sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i)); + if (size < 0) { + return NULL; + } + str += size; } return str; } @@ -1057,7 +1044,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length) new_size = (struct_size + (length + 1) * char_size); if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { - PyObject_DEL(_PyUnicode_UTF8(unicode)); + PyObject_Free(_PyUnicode_UTF8(unicode)); _PyUnicode_UTF8(unicode) = NULL; _PyUnicode_UTF8_LENGTH(unicode) = 0; } @@ -1068,7 +1055,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length) _Py_ForgetReference(unicode); #endif - new_unicode = (PyObject *)PyObject_REALLOC(unicode, new_size); + new_unicode = (PyObject *)PyObject_Realloc(unicode, new_size); if (new_unicode == NULL) { _Py_NewReference(unicode); PyErr_NoMemory(); @@ -1084,7 +1071,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length) _PyUnicode_WSTR_LENGTH(unicode) = length; } else if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) { - PyObject_DEL(_PyUnicode_WSTR(unicode)); + PyObject_Free(_PyUnicode_WSTR(unicode)); _PyUnicode_WSTR(unicode) = NULL; if (!PyUnicode_IS_ASCII(unicode)) _PyUnicode_WSTR_LENGTH(unicode) = 0; @@ -1127,12 +1114,12 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode)) { - PyObject_DEL(_PyUnicode_UTF8(unicode)); + PyObject_Free(_PyUnicode_UTF8(unicode)); _PyUnicode_UTF8(unicode) = NULL; _PyUnicode_UTF8_LENGTH(unicode) = 0; } - data = (PyObject *)PyObject_REALLOC(data, new_size); + data = (PyObject *)PyObject_Realloc(data, new_size); if (data == NULL) { PyErr_NoMemory(); return -1; @@ -1165,7 +1152,7 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) } new_size = sizeof(wchar_t) * (length + 1); wstr = _PyUnicode_WSTR(unicode); - wstr = PyObject_REALLOC(wstr, new_size); + wstr = PyObject_Realloc(wstr, new_size); if (!wstr) { PyErr_NoMemory(); return -1; @@ -1255,7 +1242,7 @@ _PyUnicode_New(Py_ssize_t length) _PyUnicode_UTF8(unicode) = NULL; _PyUnicode_UTF8_LENGTH(unicode) = 0; - _PyUnicode_WSTR(unicode) = (Py_UNICODE*) PyObject_MALLOC(new_size); + _PyUnicode_WSTR(unicode) = (Py_UNICODE*) PyObject_Malloc(new_size); if (!_PyUnicode_WSTR(unicode)) { Py_DECREF(unicode); PyErr_NoMemory(); @@ -1452,7 +1439,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) * PyObject_New() so we are able to allocate space for the object and * it's data buffer. */ - obj = (PyObject *) PyObject_MALLOC(struct_size + (size + 1) * char_size); + obj = (PyObject *) PyObject_Malloc(struct_size + (size + 1) * char_size); if (obj == NULL) { return PyErr_NoMemory(); } @@ -1834,7 +1821,7 @@ _PyUnicode_Ready(PyObject *unicode) return -1; if (maxchar < 256) { - _PyUnicode_DATA_ANY(unicode) = PyObject_MALLOC(_PyUnicode_WSTR_LENGTH(unicode) + 1); + _PyUnicode_DATA_ANY(unicode) = PyObject_Malloc(_PyUnicode_WSTR_LENGTH(unicode) + 1); if (!_PyUnicode_DATA_ANY(unicode)) { PyErr_NoMemory(); return -1; @@ -1855,7 +1842,7 @@ _PyUnicode_Ready(PyObject *unicode) _PyUnicode_UTF8(unicode) = NULL; _PyUnicode_UTF8_LENGTH(unicode) = 0; } - PyObject_FREE(_PyUnicode_WSTR(unicode)); + PyObject_Free(_PyUnicode_WSTR(unicode)); _PyUnicode_WSTR(unicode) = NULL; _PyUnicode_WSTR_LENGTH(unicode) = 0; } @@ -1875,7 +1862,7 @@ _PyUnicode_Ready(PyObject *unicode) _PyUnicode_UTF8_LENGTH(unicode) = 0; #else /* sizeof(wchar_t) == 4 */ - _PyUnicode_DATA_ANY(unicode) = PyObject_MALLOC( + _PyUnicode_DATA_ANY(unicode) = PyObject_Malloc( 2 * (_PyUnicode_WSTR_LENGTH(unicode) + 1)); if (!_PyUnicode_DATA_ANY(unicode)) { PyErr_NoMemory(); @@ -1889,12 +1876,12 @@ _PyUnicode_Ready(PyObject *unicode) _PyUnicode_STATE(unicode).kind = PyUnicode_2BYTE_KIND; _PyUnicode_UTF8(unicode) = NULL; _PyUnicode_UTF8_LENGTH(unicode) = 0; - PyObject_FREE(_PyUnicode_WSTR(unicode)); + PyObject_Free(_PyUnicode_WSTR(unicode)); _PyUnicode_WSTR(unicode) = NULL; _PyUnicode_WSTR_LENGTH(unicode) = 0; #endif } - /* maxchar exeeds 16 bit, wee need 4 bytes for unicode characters */ + /* maxchar exceeds 16 bit, wee need 4 bytes for unicode characters */ else { #if SIZEOF_WCHAR_T == 2 /* in case the native representation is 2-bytes, we need to allocate a @@ -1904,7 +1891,7 @@ _PyUnicode_Ready(PyObject *unicode) PyErr_NoMemory(); return -1; } - _PyUnicode_DATA_ANY(unicode) = PyObject_MALLOC(4 * (length_wo_surrogates + 1)); + _PyUnicode_DATA_ANY(unicode) = PyObject_Malloc(4 * (length_wo_surrogates + 1)); if (!_PyUnicode_DATA_ANY(unicode)) { PyErr_NoMemory(); return -1; @@ -1916,7 +1903,7 @@ _PyUnicode_Ready(PyObject *unicode) /* unicode_convert_wchar_to_ucs4() requires a ready string */ _PyUnicode_STATE(unicode).ready = 1; unicode_convert_wchar_to_ucs4(_PyUnicode_WSTR(unicode), end, unicode); - PyObject_FREE(_PyUnicode_WSTR(unicode)); + PyObject_Free(_PyUnicode_WSTR(unicode)); _PyUnicode_WSTR(unicode) = NULL; _PyUnicode_WSTR_LENGTH(unicode) = 0; #else @@ -1943,7 +1930,8 @@ unicode_dealloc(PyObject *unicode) break; case SSTATE_INTERNED_MORTAL: -#ifdef INTERNED_STRINGS + { + struct _Py_unicode_state *state = get_unicode_state(); /* Revive the dead object temporarily. PyDict_DelItem() removes two references (key and value) which were ignored by PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2 @@ -1951,14 +1939,14 @@ unicode_dealloc(PyObject *unicode) PyDict_DelItem(). */ assert(Py_REFCNT(unicode) == 0); Py_SET_REFCNT(unicode, 3); - if (PyDict_DelItem(interned, unicode) != 0) { + if (PyDict_DelItem(state->interned, unicode) != 0) { _PyErr_WriteUnraisableMsg("deletion of interned string failed", NULL); } assert(Py_REFCNT(unicode) == 1); Py_SET_REFCNT(unicode, 0); -#endif break; + } case SSTATE_INTERNED_IMMORTAL: _PyObject_ASSERT_FAILED_MSG(unicode, "Immortal interned string died"); @@ -1969,13 +1957,13 @@ unicode_dealloc(PyObject *unicode) } if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) { - PyObject_DEL(_PyUnicode_WSTR(unicode)); + PyObject_Free(_PyUnicode_WSTR(unicode)); } if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { - PyObject_DEL(_PyUnicode_UTF8(unicode)); + PyObject_Free(_PyUnicode_UTF8(unicode)); } if (!PyUnicode_IS_COMPACT(unicode) && _PyUnicode_DATA_ANY(unicode)) { - PyObject_DEL(_PyUnicode_DATA_ANY(unicode)); + PyObject_Free(_PyUnicode_DATA_ANY(unicode)); } Py_TYPE(unicode)->tp_free(unicode); @@ -2307,42 +2295,84 @@ PyUnicode_FromString(const char *u) return PyUnicode_DecodeUTF8Stateful(u, (Py_ssize_t)size, NULL, NULL); } + PyObject * _PyUnicode_FromId(_Py_Identifier *id) { - if (id->object) { - return id->object; + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct _Py_unicode_ids *ids = &interp->unicode.ids; + + int index = _Py_atomic_size_get(&id->index); + if (index < 0) { + struct _Py_unicode_runtime_ids *rt_ids = &interp->runtime->unicode_ids; + + PyThread_acquire_lock(rt_ids->lock, WAIT_LOCK); + // Check again to detect concurrent access. Another thread can have + // initialized the index while this thread waited for the lock. + index = _Py_atomic_size_get(&id->index); + if (index < 0) { + assert(rt_ids->next_index < PY_SSIZE_T_MAX); + index = rt_ids->next_index; + rt_ids->next_index++; + _Py_atomic_size_set(&id->index, index); + } + PyThread_release_lock(rt_ids->lock); } + assert(index >= 0); PyObject *obj; - obj = PyUnicode_DecodeUTF8Stateful(id->string, - strlen(id->string), + if (index < ids->size) { + obj = ids->array[index]; + if (obj) { + // Return a borrowed reference + return obj; + } + } + + obj = PyUnicode_DecodeUTF8Stateful(id->string, strlen(id->string), NULL, NULL); if (!obj) { return NULL; } PyUnicode_InternInPlace(&obj); - assert(!id->next); - id->object = obj; - id->next = static_strings; - static_strings = id; - return id->object; + if (index >= ids->size) { + // Overallocate to reduce the number of realloc + Py_ssize_t new_size = Py_MAX(index * 2, 16); + Py_ssize_t item_size = sizeof(ids->array[0]); + PyObject **new_array = PyMem_Realloc(ids->array, new_size * item_size); + if (new_array == NULL) { + PyErr_NoMemory(); + return NULL; + } + memset(&new_array[ids->size], 0, (new_size - ids->size) * item_size); + ids->array = new_array; + ids->size = new_size; + } + + // The array stores a strong reference + ids->array[index] = obj; + + // Return a borrowed reference + return obj; } + static void -unicode_clear_static_strings(void) +unicode_clear_identifiers(struct _Py_unicode_state *state) { - _Py_Identifier *tmp, *s = static_strings; - while (s) { - Py_CLEAR(s->object); - tmp = s->next; - s->next = NULL; - s = tmp; + struct _Py_unicode_ids *ids = &state->ids; + for (Py_ssize_t i=0; i < ids->size; i++) { + Py_XDECREF(ids->array[i]); } - static_strings = NULL; + ids->size = 0; + PyMem_Free(ids->array); + ids->array = NULL; + // Don't reset _PyRuntime next_index: _Py_Identifier.id remains valid + // after Py_Finalize(). } + /* Internal function, doesn't check maximum character */ PyObject* @@ -3294,7 +3324,7 @@ PyUnicode_AsWideCharString(PyObject *unicode, *size = buflen; } else if (wcslen(buffer) != (size_t)buflen) { - PyMem_FREE(buffer); + PyMem_Free(buffer); PyErr_SetString(PyExc_ValueError, "embedded null character"); return NULL; @@ -4195,7 +4225,7 @@ PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) PyErr_NoMemory(); return NULL; } - w = (wchar_t *) PyObject_MALLOC(sizeof(wchar_t) * (wlen + 1)); + w = (wchar_t *) PyObject_Malloc(sizeof(wchar_t) * (wlen + 1)); if (w == NULL) { PyErr_NoMemory(); return NULL; @@ -5623,7 +5653,7 @@ unicode_fill_utf8(PyObject *unicode) PyBytes_AS_STRING(writer.buffer); Py_ssize_t len = end - start; - char *cache = PyObject_MALLOC(len + 1); + char *cache = PyObject_Malloc(len + 1); if (cache == NULL) { _PyBytesWriter_Dealloc(&writer); PyErr_NoMemory(); @@ -8540,7 +8570,7 @@ PyUnicode_BuildEncodingMap(PyObject* string) } /* Create a three-level trie */ - result = PyObject_MALLOC(sizeof(struct encoding_map) + + result = PyObject_Malloc(sizeof(struct encoding_map) + 16*count2 + 128*count3 - 1); if (!result) { return PyErr_NoMemory(); @@ -10207,7 +10237,7 @@ case_operation(PyObject *self, PyErr_SetString(PyExc_OverflowError, "string is too long"); return NULL; } - tmp = PyMem_MALLOC(sizeof(Py_UCS4) * 3 * length); + tmp = PyMem_Malloc(sizeof(Py_UCS4) * 3 * length); if (tmp == NULL) return PyErr_NoMemory(); newlength = perform(kind, data, length, tmp, &maxchar); @@ -10231,7 +10261,7 @@ case_operation(PyObject *self, Py_UNREACHABLE(); } leave: - PyMem_FREE(tmp); + PyMem_Free(tmp); return res; } @@ -11046,11 +11076,11 @@ replace(PyObject *self, PyObject *str1, assert(release1 == (buf1 != PyUnicode_DATA(str1))); assert(release2 == (buf2 != PyUnicode_DATA(str2))); if (srelease) - PyMem_FREE((void *)sbuf); + PyMem_Free((void *)sbuf); if (release1) - PyMem_FREE((void *)buf1); + PyMem_Free((void *)buf1); if (release2) - PyMem_FREE((void *)buf2); + PyMem_Free((void *)buf2); assert(_PyUnicode_CheckConsistency(u, 1)); return u; @@ -11060,11 +11090,11 @@ replace(PyObject *self, PyObject *str1, assert(release1 == (buf1 != PyUnicode_DATA(str1))); assert(release2 == (buf2 != PyUnicode_DATA(str2))); if (srelease) - PyMem_FREE((void *)sbuf); + PyMem_Free((void *)sbuf); if (release1) - PyMem_FREE((void *)buf1); + PyMem_Free((void *)buf1); if (release2) - PyMem_FREE((void *)buf2); + PyMem_Free((void *)buf2); return unicode_result_unchanged(self); error: @@ -11072,11 +11102,11 @@ replace(PyObject *self, PyObject *str1, assert(release1 == (buf1 != PyUnicode_DATA(str1))); assert(release2 == (buf2 != PyUnicode_DATA(str2))); if (srelease) - PyMem_FREE((void *)sbuf); + PyMem_Free((void *)sbuf); if (release1) - PyMem_FREE((void *)buf1); + PyMem_Free((void *)buf1); if (release2) - PyMem_FREE((void *)buf2); + PyMem_Free((void *)buf2); return NULL; } @@ -11490,12 +11520,11 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) if (PyUnicode_CHECK_INTERNED(left)) return 0; -#ifdef INTERNED_STRINGS assert(_PyUnicode_HASH(right_uni) != -1); Py_hash_t hash = _PyUnicode_HASH(left); - if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) + if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) { return 0; -#endif + } return unicode_compare_eq(left, right_uni); } @@ -15563,7 +15592,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode) PyErr_NoMemory(); goto onError; } - data = PyObject_MALLOC((length + 1) * char_size); + data = PyObject_Malloc((length + 1) * char_size); if (data == NULL) { PyErr_NoMemory(); goto onError; @@ -15719,23 +15748,21 @@ PyUnicode_InternInPlace(PyObject **p) return; } -#ifdef INTERNED_STRINGS if (PyUnicode_READY(s) == -1) { PyErr_Clear(); return; } - if (interned == NULL) { - interned = PyDict_New(); - if (interned == NULL) { + struct _Py_unicode_state *state = get_unicode_state(); + if (state->interned == NULL) { + state->interned = PyDict_New(); + if (state->interned == NULL) { PyErr_Clear(); /* Don't leave an exception */ return; } } - PyObject *t; - t = PyDict_SetDefault(interned, s, s); - + PyObject *t = PyDict_SetDefault(state->interned, s, s); if (t == NULL) { PyErr_Clear(); return; @@ -15752,13 +15779,9 @@ PyUnicode_InternInPlace(PyObject **p) this. */ Py_SET_REFCNT(s, Py_REFCNT(s) - 2); _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL; -#else - // PyDict expects that interned strings have their hash - // (PyASCIIObject.hash) already computed. - (void)unicode_hash(s); -#endif } + void PyUnicode_InternImmortal(PyObject **p) { @@ -15792,35 +15815,25 @@ PyUnicode_InternFromString(const char *cp) void _PyUnicode_ClearInterned(PyThreadState *tstate) { - if (!_Py_IsMainInterpreter(tstate)) { - // interned dict is shared by all interpreters - return; - } - - if (interned == NULL) { - return; - } - assert(PyDict_CheckExact(interned)); - - PyObject *keys = PyDict_Keys(interned); - if (keys == NULL) { - PyErr_Clear(); + struct _Py_unicode_state *state = &tstate->interp->unicode; + if (state->interned == NULL) { return; } - assert(PyList_CheckExact(keys)); + assert(PyDict_CheckExact(state->interned)); /* Interned unicode strings are not forcibly deallocated; rather, we give them their stolen references back, and then clear and DECREF the interned dict. */ - Py_ssize_t n = PyList_GET_SIZE(keys); #ifdef INTERNED_STATS - fprintf(stderr, "releasing %zd interned strings\n", n); + fprintf(stderr, "releasing %zd interned strings\n", + PyDict_GET_SIZE(state->interned)); Py_ssize_t immortal_size = 0, mortal_size = 0; #endif - for (Py_ssize_t i = 0; i < n; i++) { - PyObject *s = PyList_GET_ITEM(keys, i); + Py_ssize_t pos = 0; + PyObject *s, *ignored_value; + while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) { assert(PyUnicode_IS_READY(s)); switch (PyUnicode_CHECK_INTERNED(s)) { @@ -15850,10 +15863,9 @@ _PyUnicode_ClearInterned(PyThreadState *tstate) "total size of all interned strings: %zd/%zd mortal/immortal\n", mortal_size, immortal_size); #endif - Py_DECREF(keys); - PyDict_Clear(interned); - Py_CLEAR(interned); + PyDict_Clear(state->interned); + Py_CLEAR(state->interned); } @@ -16223,21 +16235,19 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void) void _PyUnicode_Fini(PyThreadState *tstate) { + struct _Py_unicode_state *state = &tstate->interp->unicode; + // _PyUnicode_ClearInterned() must be called before + assert(state->interned == NULL); - struct _Py_unicode_state *state = &tstate->interp->unicode; + _PyUnicode_FiniEncodings(&state->fs_codec); - Py_CLEAR(state->empty_string); + unicode_clear_identifiers(state); for (Py_ssize_t i = 0; i < 256; i++) { Py_CLEAR(state->latin1[i]); } - - if (_Py_IsMainInterpreter(tstate)) { - unicode_clear_static_strings(); - } - - _PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec); + Py_CLEAR(state->empty_string); } diff --git a/Objects/unionobject.c b/Objects/unionobject.c index 1b7f8ab51a..32aa5078af 100644 --- a/Objects/unionobject.c +++ b/Objects/unionobject.c @@ -237,9 +237,19 @@ dedup_and_flatten_args(PyObject* args) PyObject* i_element = PyTuple_GET_ITEM(args, i); for (Py_ssize_t j = i + 1; j < arg_length; j++) { PyObject* j_element = PyTuple_GET_ITEM(args, j); - if (i_element == j_element) { - is_duplicate = 1; + int is_ga = PyObject_TypeCheck(i_element, &Py_GenericAliasType) && + PyObject_TypeCheck(j_element, &Py_GenericAliasType); + // RichCompare to also deduplicate GenericAlias types (slower) + is_duplicate = is_ga ? PyObject_RichCompareBool(i_element, j_element, Py_EQ) + : i_element == j_element; + // Should only happen if RichCompare fails + if (is_duplicate < 0) { + Py_DECREF(args); + Py_DECREF(new_args); + return NULL; } + if (is_duplicate) + break; } if (!is_duplicate) { Py_INCREF(i_element); @@ -286,12 +296,12 @@ is_unionable(PyObject *obj) is_new_type(obj) || is_special_form(obj) || PyType_Check(obj) || - type == &Py_GenericAliasType || + PyObject_TypeCheck(obj, &Py_GenericAliasType) || type == &_Py_UnionType); } -static PyObject * -type_or(PyTypeObject* self, PyObject* param) +PyObject * +_Py_union_type_or(PyObject* self, PyObject* param) { PyObject *tuple = PyTuple_Pack(2, self, param); if (tuple == NULL) { @@ -404,7 +414,7 @@ static PyMethodDef union_methods[] = { {0}}; static PyNumberMethods union_as_number = { - .nb_or = (binaryfunc)type_or, // Add __or__ function + .nb_or = _Py_union_type_or, // Add __or__ function }; PyTypeObject _Py_UnionType = { |