diff options
author | Matti Picus <matti.picus@gmail.com> | 2020-06-04 22:03:43 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-04 22:03:43 +0300 |
commit | a5d021a1b6f439a19812926bc4d796ef5f346c44 (patch) | |
tree | 998f27e5296a2894b41a9600c9bdb8b7806241c1 | |
parent | abce2a0604a4db5d8072a0b1478c7af439882ed7 (diff) | |
parent | 8da4fba24c193807555dfcbc0e67834cae9185ea (diff) | |
download | numpy-a5d021a1b6f439a19812926bc4d796ef5f346c44.tar.gz |
Merge pull request #15162 from seberg/reduce-axes-try
BUG,MAINT: Fix issues with non-reduce broadcasting axes
-rw-r--r-- | numpy/core/src/multiarray/common.h | 9 | ||||
-rw-r--r-- | numpy/core/src/multiarray/einsum.c.src | 6 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer_api.c | 16 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer_constr.c | 402 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer_impl.h | 35 | ||||
-rw-r--r-- | numpy/core/src/umath/reduction.c | 513 | ||||
-rw-r--r-- | numpy/core/src/umath/reduction.h | 5 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 13 | ||||
-rw-r--r-- | numpy/core/tests/test_nditer.py | 15 | ||||
-rw-r--r-- | numpy/core/tests/test_ufunc.py | 60 |
10 files changed, 479 insertions, 595 deletions
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h index 78a15a63c..4ba25c079 100644 --- a/numpy/core/src/multiarray/common.h +++ b/numpy/core/src/multiarray/common.h @@ -339,5 +339,14 @@ NPY_NO_EXPORT PyArrayObject * new_array_for_sum(PyArrayObject *ap1, PyArrayObject *ap2, PyArrayObject* out, int nd, npy_intp dimensions[], int typenum, PyArrayObject **result); + +/* + * Used to indicate a broadcast axis, see also `npyiter_get_op_axis` in + * `nditer_constr.c`. This may be the preferred API for reduction axes + * probably. So we should consider making this public either as a macro or + * function (so that the way we flag the axis can be changed). + */ +#define NPY_ITER_REDUCTION_AXIS(axis) (axis + (1 << (NPY_BITSOF_INT - 2))) + #endif diff --git a/numpy/core/src/multiarray/einsum.c.src b/numpy/core/src/multiarray/einsum.c.src index 1cc557825..b914e5bb3 100644 --- a/numpy/core/src/multiarray/einsum.c.src +++ b/numpy/core/src/multiarray/einsum.c.src @@ -2740,7 +2740,7 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop, op_axes[nop][idim] = idim; } for (idim = ndim_output; idim < ndim_iter; ++idim) { - op_axes[nop][idim] = -1; + op_axes[nop][idim] = NPY_ITER_REDUCTION_AXIS(-1); } /* Set the iterator per-op flags */ @@ -2753,13 +2753,11 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop, op_flags[nop] = NPY_ITER_READWRITE| NPY_ITER_NBO| NPY_ITER_ALIGNED| - NPY_ITER_ALLOCATE| - NPY_ITER_NO_BROADCAST; + NPY_ITER_ALLOCATE; iter_flags = NPY_ITER_EXTERNAL_LOOP| NPY_ITER_BUFFERED| NPY_ITER_DELAY_BUFALLOC| NPY_ITER_GROWINNER| - NPY_ITER_REDUCE_OK| NPY_ITER_REFS_OK| NPY_ITER_ZEROSIZE_OK; if (out != NULL) { diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c index e7fe0fa50..a5b5e5c51 100644 --- a/numpy/core/src/multiarray/nditer_api.c +++ b/numpy/core/src/multiarray/nditer_api.c @@ -935,13 +935,8 @@ NpyIter_GetShape(NpyIter *iter, npy_intp *outshape) if (itflags&NPY_ITFLAG_HASMULTIINDEX) { perm = NIT_PERM(iter); for(idim = 0; idim < ndim; ++idim) { - npy_int8 p = perm[idim]; - if (p < 0) { - outshape[ndim+p] = NAD_SHAPE(axisdata); - } - else { - outshape[ndim-p-1] = NAD_SHAPE(axisdata); - } + int axis = npyiter_undo_iter_axis_perm(idim, ndim, perm, NULL); + outshape[axis] = NAD_SHAPE(axisdata); NIT_ADVANCE_AXISDATA(axisdata, 1); } @@ -1005,8 +1000,9 @@ NpyIter_CreateCompatibleStrides(NpyIter *iter, perm = NIT_PERM(iter); for(idim = 0; idim < ndim; ++idim) { - npy_int8 p = perm[idim]; - if (p < 0) { + npy_bool flipped; + npy_int8 axis = npyiter_undo_iter_axis_perm(idim, ndim, perm, &flipped); + if (flipped) { PyErr_SetString(PyExc_RuntimeError, "Iterator CreateCompatibleStrides may only be called " "if DONT_NEGATE_STRIDES was used to prevent reverse " @@ -1014,7 +1010,7 @@ NpyIter_CreateCompatibleStrides(NpyIter *iter, return NPY_FAIL; } else { - outstrides[ndim-p-1] = itemsize; + outstrides[axis] = itemsize; } itemsize *= NAD_SHAPE(axisdata); diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c index 620c7d593..f2dbc9f03 100644 --- a/numpy/core/src/multiarray/nditer_constr.c +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -57,11 +57,12 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf char **op_dataptr, const npy_uint32 *op_flags, int **op_axes, npy_intp const *itershape); +static NPY_INLINE int +npyiter_get_op_axis(int axis, npy_bool *reduction_axis); static void -npyiter_replace_axisdata(NpyIter *iter, int iop, - PyArrayObject *op, - int op_ndim, char *op_dataptr, - int *op_axes); +npyiter_replace_axisdata( + NpyIter *iter, int iop, PyArrayObject *op, + int orig_op_ndim, const int *op_axes); static void npyiter_compute_index_strides(NpyIter *iter, npy_uint32 flags); static void @@ -826,7 +827,8 @@ npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes, if (axes != NULL) { memset(axes_dupcheck, 0, NPY_MAXDIMS); for (idim = 0; idim < oa_ndim; ++idim) { - int i = axes[idim]; + int i = npyiter_get_op_axis(axes[idim], NULL); + if (i >= 0) { if (i >= NPY_MAXDIMS) { PyErr_Format(PyExc_ValueError, @@ -1391,6 +1393,61 @@ check_mask_for_writemasked_reduction(NpyIter *iter, int iop) } /* + * Check whether a reduction is OK based on the flags and the operand being + * readwrite. This path is deprecated, since usually only specific axes + * should be reduced. If axes are specified explicitely, the flag is + * unnecessary. + */ +static int +npyiter_check_reduce_ok_and_set_flags( + NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags, + int dim) { + /* If it's writeable, this means a reduction */ + if (*op_itflags & NPY_OP_ITFLAG_WRITE) { + if (!(flags & NPY_ITER_REDUCE_OK)) { + PyErr_Format(PyExc_ValueError, + "output operand requires a reduction along dimension %d, " + "but the reduction is not enabled. The dimension size of 1 " + "does not match the expected output shape.", dim); + return 0; + } + if (!(*op_itflags & NPY_OP_ITFLAG_READ)) { + PyErr_SetString(PyExc_ValueError, + "output operand requires a reduction, but is flagged as " + "write-only, not read-write"); + return 0; + } + NPY_IT_DBG_PRINT("Iterator: Indicating that a reduction is" + "occurring\n"); + + NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; + *op_itflags |= NPY_OP_ITFLAG_REDUCE; + } + return 1; +} + +/** + * Removes the (additive) NPY_ITER_REDUCTION_AXIS indication and sets + * is_forced_broadcast to 1 if it is set. Otherwise to 0. + * + * @param axis The op_axes[i] to normalize. + * @param reduction_axis Output 1 if a reduction axis, otherwise 0. + * @returns The normalized axis (without reduce axis flag). + */ +static NPY_INLINE int +npyiter_get_op_axis(int axis, npy_bool *reduction_axis) { + npy_bool forced_broadcast = axis >= NPY_ITER_REDUCTION_AXIS(-1); + + if (reduction_axis != NULL) { + *reduction_axis = forced_broadcast; + } + if (forced_broadcast) { + return axis - NPY_ITER_REDUCTION_AXIS(0); + } + return axis; +} + +/* * Fills in the AXISDATA for the 'nop' operands, broadcasting * the dimensionas as necessary. Also fills * in the ITERSIZE data member. @@ -1450,8 +1507,9 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf return 0; } for (idim = 0; idim < ondim; ++idim) { - npy_intp bshape = broadcast_shape[idim+ndim-ondim], - op_shape = shape[idim]; + npy_intp bshape = broadcast_shape[idim+ndim-ondim]; + npy_intp op_shape = shape[idim]; + if (bshape == 1) { broadcast_shape[idim+ndim-ondim] = op_shape; } @@ -1463,11 +1521,13 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf else { int *axes = op_axes[iop]; for (idim = 0; idim < ndim; ++idim) { - int i = axes[idim]; + int i = npyiter_get_op_axis(axes[idim], NULL); + if (i >= 0) { if (i < ondim) { - npy_intp bshape = broadcast_shape[idim], - op_shape = shape[i]; + npy_intp bshape = broadcast_shape[idim]; + npy_intp op_shape = shape[i]; + if (bshape == 1) { broadcast_shape[idim] = op_shape; } @@ -1589,9 +1649,37 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf } else { int *axes = op_axes[iop]; - int i = axes[ndim-idim-1]; - if (i >= 0) { - if (bshape == 1 || op_cur == NULL) { + npy_bool reduction_axis; + int i; + i = npyiter_get_op_axis(axes[ndim - idim - 1], &reduction_axis); + + if (reduction_axis) { + /* This is explicitly a reduction axis */ + strides[iop] = 0; + NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; + op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE; + + if (NPY_UNLIKELY((i >= 0) && (op_cur != NULL) && + (PyArray_DIM(op_cur, i) != 1))) { + PyErr_Format(PyExc_ValueError, + "operand was set up as a reduction along axis " + "%d, but the length of the axis is %zd " + "(it has to be 1)", + i, (Py_ssize_t)PyArray_DIM(op_cur, i)); + return 0; + } + } + else if (bshape == 1) { + /* + * If the full iterator shape is 1, zero always works. + * NOTE: We thus always allow broadcast dimensions (i = -1) + * if the shape is 1. + */ + strides[iop] = 0; + } + else if (i >= 0) { + if (op_cur == NULL) { + /* stride is filled later, shape will match `bshape` */ strides[iop] = 0; } else if (PyArray_DIM(op_cur, i) == 1) { @@ -1599,51 +1687,20 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf if (op_flags[iop] & NPY_ITER_NO_BROADCAST) { goto operand_different_than_broadcast; } - /* If it's writeable, this means a reduction */ - if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) { - if (!(flags & NPY_ITER_REDUCE_OK)) { - PyErr_SetString(PyExc_ValueError, - "output operand requires a reduction, but " - "reduction is not enabled"); - return 0; - } - if (!(op_itflags[iop] & NPY_OP_ITFLAG_READ)) { - PyErr_SetString(PyExc_ValueError, - "output operand requires a reduction, but " - "is flagged as write-only, not " - "read-write"); - return 0; - } - NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; - op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE; + if (!npyiter_check_reduce_ok_and_set_flags( + iter, flags, &op_itflags[iop], i)) { + return 0; } } else { strides[iop] = PyArray_STRIDE(op_cur, i); } } - else if (bshape == 1) { - strides[iop] = 0; - } else { strides[iop] = 0; - /* If it's writeable, this means a reduction */ - if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) { - if (!(flags & NPY_ITER_REDUCE_OK)) { - PyErr_SetString(PyExc_ValueError, - "output operand requires a reduction, but " - "reduction is not enabled"); - return 0; - } - if (!(op_itflags[iop] & NPY_OP_ITFLAG_READ)) { - PyErr_SetString(PyExc_ValueError, - "output operand requires a reduction, but " - "is flagged as write-only, not " - "read-write"); - return 0; - } - NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; - op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE; + if (!npyiter_check_reduce_ok_and_set_flags( + iter, flags, &op_itflags[iop], i)) { + return 0; } } } @@ -1753,7 +1810,7 @@ broadcast_error: { if (axes != NULL) { for (idim = 0; idim < ndim; ++idim) { - npy_intp i = axes[idim]; + int i = npyiter_get_op_axis(axes[idim], NULL); if (i >= 0 && i < PyArray_NDIM(op[iop])) { remdims[idim] = PyArray_DIM(op[iop], i); @@ -1899,14 +1956,14 @@ operand_different_than_broadcast: { * array. */ static void -npyiter_replace_axisdata(NpyIter *iter, int iop, - PyArrayObject *op, - int op_ndim, char *op_dataptr, - int *op_axes) +npyiter_replace_axisdata( + NpyIter *iter, int iop, PyArrayObject *op, + int orig_op_ndim, const int *op_axes) { npy_uint32 itflags = NIT_ITFLAGS(iter); int idim, ndim = NIT_NDIM(iter); int nop = NIT_NOP(iter); + char *op_dataptr = PyArray_DATA(op); NpyIter_AxisData *axisdata0, *axisdata; npy_intp sizeof_axisdata; @@ -1925,25 +1982,20 @@ npyiter_replace_axisdata(NpyIter *iter, int iop, if (op_axes != NULL) { for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - npy_int8 p; int i; + npy_bool axis_flipped; npy_intp shape; - /* Apply the perm to get the original axis */ - p = perm[idim]; - if (p < 0) { - i = op_axes[ndim+p]; - } - else { - i = op_axes[ndim-p-1]; - } + /* Apply perm to get the original axis, and check if its flipped */ + i = npyiter_undo_iter_axis_perm(idim, ndim, perm, &axis_flipped); - if (0 <= i && i < op_ndim) { + i = npyiter_get_op_axis(op_axes[i], NULL); + assert(i < orig_op_ndim); + if (i >= 0) { shape = PyArray_DIM(op, i); if (shape != 1) { npy_intp stride = PyArray_STRIDE(op, i); - if (p < 0) { - /* If the perm entry is negative, flip the axis */ + if (axis_flipped) { NAD_STRIDES(axisdata)[iop] = -stride; baseoffset += stride*(shape-1); } @@ -1956,25 +2008,18 @@ npyiter_replace_axisdata(NpyIter *iter, int iop, } else { for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - npy_int8 p; int i; + npy_bool axis_flipped; npy_intp shape; - /* Apply the perm to get the original axis */ - p = perm[idim]; - if (p < 0) { - i = op_ndim+p; - } - else { - i = op_ndim-p-1; - } + i = npyiter_undo_iter_axis_perm( + idim, orig_op_ndim, perm, &axis_flipped); if (i >= 0) { shape = PyArray_DIM(op, i); if (shape != 1) { npy_intp stride = PyArray_STRIDE(op, i); - if (p < 0) { - /* If the perm entry is negative, flip the axis */ + if (axis_flipped) { NAD_STRIDES(axisdata)[iop] = -stride; baseoffset += stride*(shape-1); } @@ -2461,14 +2506,15 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, { npy_uint32 itflags = NIT_ITFLAGS(iter); int idim, ndim = NIT_NDIM(iter); + int used_op_ndim; int nop = NIT_NOP(iter); npy_int8 *perm = NIT_PERM(iter); - npy_intp new_shape[NPY_MAXDIMS], strides[NPY_MAXDIMS], - stride = op_dtype->elsize; + npy_intp new_shape[NPY_MAXDIMS], strides[NPY_MAXDIMS]; + npy_intp stride = op_dtype->elsize; NpyIter_AxisData *axisdata; npy_intp sizeof_axisdata; - npy_intp i; + int i; PyArrayObject *ret; @@ -2495,39 +2541,46 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); /* Initialize the strides to invalid values */ - for (i = 0; i < NPY_MAXDIMS; ++i) { + for (i = 0; i < op_ndim; ++i) { strides[i] = NPY_MAX_INTP; } if (op_axes != NULL) { + used_op_ndim = 0; for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - npy_int8 p; + npy_bool reduction_axis; /* Apply the perm to get the original axis */ - p = perm[idim]; - if (p < 0) { - i = op_axes[ndim+p]; - } - else { - i = op_axes[ndim-p-1]; - } + i = npyiter_undo_iter_axis_perm(idim, ndim, perm, NULL); + i = npyiter_get_op_axis(op_axes[i], &reduction_axis); if (i >= 0) { NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d " "for iterator dimension %d to %d\n", (int)i, (int)idim, (int)stride); + used_op_ndim += 1; strides[i] = stride; if (shape == NULL) { - new_shape[i] = NAD_SHAPE(axisdata); + if (reduction_axis) { + /* reduction axes always have a length of 1 */ + new_shape[i] = 1; + } + else { + new_shape[i] = NAD_SHAPE(axisdata); + } stride *= new_shape[i]; if (i >= ndim) { - PyErr_SetString(PyExc_ValueError, + PyErr_Format(PyExc_ValueError, "automatically allocated output array " - "specified with an inconsistent axis mapping"); + "specified with an inconsistent axis mapping; " + "the axis mapping cannot include dimension %d " + "which is too large for the iterator dimension " + "of %d.", i, ndim); return NULL; } } else { + assert(!reduction_axis || shape[i] == 1); stride *= shape[i]; } } @@ -2535,44 +2588,25 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, if (shape == NULL) { /* * If deleting this axis produces a reduction, but - * reduction wasn't enabled, throw an error + * reduction wasn't enabled, throw an error. + * NOTE: We currently always allow new-axis if the iteration + * size is 1 (thus allowing broadcasting sometimes). */ - if (NAD_SHAPE(axisdata) != 1) { - if (!(flags & NPY_ITER_REDUCE_OK)) { - PyErr_SetString(PyExc_ValueError, - "output requires a reduction, but " - "reduction is not enabled"); - return NULL; - } - if (!((*op_itflags) & NPY_OP_ITFLAG_READ)) { - PyErr_SetString(PyExc_ValueError, - "output requires a reduction, but " - "is flagged as write-only, not read-write"); + if (!reduction_axis && NAD_SHAPE(axisdata) != 1) { + if (!npyiter_check_reduce_ok_and_set_flags( + iter, flags, op_itflags, i)) { return NULL; } - - NPY_IT_DBG_PRINT("Iterator: Indicating that a " - "reduction is occurring\n"); - /* Indicate that a reduction is occurring */ - NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; - (*op_itflags) |= NPY_OP_ITFLAG_REDUCE; } } } } } else { + used_op_ndim = ndim; for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - npy_int8 p; - /* Apply the perm to get the original axis */ - p = perm[idim]; - if (p < 0) { - i = op_ndim + p; - } - else { - i = op_ndim - p - 1; - } + i = npyiter_undo_iter_axis_perm(idim, op_ndim, perm, NULL); if (i >= 0) { NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d " @@ -2590,73 +2624,58 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, } } - /* - * If custom axes were specified, some dimensions may not have been used. - * Add the REDUCE itflag if this creates a reduction situation. - */ if (shape == NULL) { - /* Ensure there are no dimension gaps in op_axes, and find op_ndim */ - op_ndim = ndim; - if (op_axes != NULL) { - for (i = 0; i < ndim; ++i) { - if (strides[i] == NPY_MAX_INTP) { - if (op_ndim == ndim) { - op_ndim = i; - } - } - /* - * If there's a gap in the array's dimensions, it's an error. - * For example, op_axes of [0,2] for the automatically - * allocated output. - */ - else if (op_ndim != ndim) { - PyErr_SetString(PyExc_ValueError, - "automatically allocated output array " - "specified with an inconsistent axis mapping"); - return NULL; - } + /* If shape was NULL, use the shape we calculated */ + op_ndim = used_op_ndim; + shape = new_shape; + /* + * If there's a gap in the array's dimensions, it's an error. + * For instance, if op_axes [0, 2] is specified, there will a place + * in the strides array where the value is not set. + */ + for (i = 0; i < op_ndim; i++) { + if (strides[i] == NPY_MAX_INTP) { + PyErr_Format(PyExc_ValueError, + "automatically allocated output array " + "specified with an inconsistent axis mapping; " + "the axis mapping is missing an entry for " + "dimension %d.", i); + return NULL; } } } - else { - for (i = 0; i < op_ndim; ++i) { - if (strides[i] == NPY_MAX_INTP) { - npy_intp factor, new_strides[NPY_MAXDIMS], - itemsize; - - /* Fill in the missing strides in C order */ - factor = 1; - itemsize = op_dtype->elsize; - for (i = op_ndim-1; i >= 0; --i) { - if (strides[i] == NPY_MAX_INTP) { - new_strides[i] = factor * itemsize; - factor *= shape[i]; - } - } - - /* - * Copy the missing strides, and multiply the existing strides - * by the calculated factor. This way, the missing strides - * are tighter together in memory, which is good for nested - * loops. - */ - for (i = 0; i < op_ndim; ++i) { - if (strides[i] == NPY_MAX_INTP) { - strides[i] = new_strides[i]; - } - else { - strides[i] *= factor; - } - } + else if (used_op_ndim < op_ndim) { + /* + * If custom axes were specified, some dimensions may not have + * been used. These are additional axes which are ignored in the + * iterator but need to be handled here. + */ + npy_intp factor, itemsize, new_strides[NPY_MAXDIMS]; - break; + /* Fill in the missing strides in C order */ + factor = 1; + itemsize = op_dtype->elsize; + for (i = op_ndim-1; i >= 0; --i) { + if (strides[i] == NPY_MAX_INTP) { + new_strides[i] = factor * itemsize; + factor *= shape[i]; } } - } - /* If shape was NULL, set it to the shape we calculated */ - if (shape == NULL) { - shape = new_shape; + /* + * Copy the missing strides, and multiply the existing strides + * by the calculated factor. This way, the missing strides + * are tighter together in memory, which is good for nested + * loops. + */ + for (i = 0; i < op_ndim; ++i) { + if (strides[i] == NPY_MAX_INTP) { + strides[i] = new_strides[i]; + } + else { + strides[i] *= factor; + } + } } /* Allocate the temporary array */ @@ -2669,6 +2688,11 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, /* Double-check that the subtype didn't mess with the dimensions */ if (subtype != &PyArray_Type) { + /* + * TODO: the dtype could have a subarray, which adds new dimensions + * to `ret`, that should typically be fine, but will break + * in this branch. + */ if (PyArray_NDIM(ret) != op_ndim || !PyArray_CompareLists(shape, PyArray_DIMS(ret), op_ndim)) { PyErr_SetString(PyExc_RuntimeError, @@ -2805,16 +2829,21 @@ npyiter_allocate_arrays(NpyIter *iter, if (op[iop] == NULL) { PyArrayObject *out; PyTypeObject *op_subtype; - int ondim = ndim; /* Check whether the subtype was disabled */ op_subtype = (op_flags[iop] & NPY_ITER_NO_SUBTYPE) ? &PyArray_Type : subtype; - /* Allocate the output array */ + /* + * Allocate the output array. + * + * Note that here, ndim is always correct if no op_axes was given + * (but the actual dimension of op can be larger). If op_axes + * is given, ndim is not actually used. + */ out = npyiter_new_temp_array(iter, op_subtype, flags, &op_itflags[iop], - ondim, + ndim, NULL, op_dtype[iop], op_axes ? op_axes[iop] : NULL); @@ -2828,8 +2857,8 @@ npyiter_allocate_arrays(NpyIter *iter, * Now we need to replace the pointers and strides with values * from the new array. */ - npyiter_replace_axisdata(iter, iop, op[iop], ondim, - PyArray_DATA(op[iop]), op_axes ? op_axes[iop] : NULL); + npyiter_replace_axisdata(iter, iop, op[iop], ndim, + op_axes ? op_axes[iop] : NULL); /* * New arrays are guaranteed true-aligned, but copy/cast code @@ -2870,8 +2899,7 @@ npyiter_allocate_arrays(NpyIter *iter, * Now we need to replace the pointers and strides with values * from the temporary array. */ - npyiter_replace_axisdata(iter, iop, op[iop], 0, - PyArray_DATA(op[iop]), NULL); + npyiter_replace_axisdata(iter, iop, op[iop], 0, NULL); /* * New arrays are guaranteed true-aligned, but copy/cast code @@ -2943,7 +2971,7 @@ npyiter_allocate_arrays(NpyIter *iter, * from the temporary array. */ npyiter_replace_axisdata(iter, iop, op[iop], ondim, - PyArray_DATA(op[iop]), op_axes ? op_axes[iop] : NULL); + op_axes ? op_axes[iop] : NULL); /* * New arrays are guaranteed true-aligned, but copy/cast code diff --git a/numpy/core/src/multiarray/nditer_impl.h b/numpy/core/src/multiarray/nditer_impl.h index 5fb146026..1477c8631 100644 --- a/numpy/core/src/multiarray/nditer_impl.h +++ b/numpy/core/src/multiarray/nditer_impl.h @@ -301,6 +301,41 @@ struct NpyIter_AD { NIT_AXISDATA_SIZEOF(itflags, ndim, nop)*(ndim ? ndim : 1)) /* Internal helper functions shared between implementation files */ + +/** + * Undo the axis permutation of the iterator. When the operand has fewer + * dimensions then the iterator, this can return negative values for + * inserted (broadcast) dimensions. + * + * @param axis Axis for which to undo the iterator axis permutation. + * @param ndim If `op_axes` is being used, this is the iterator dimension, + * otherwise this is the operand dimension. + * @param perm The iterator axis permutation NIT_PERM(iter) + * @param axis_flipped Will be set to true if this is a flipped axis + * (i.e. is iterated in reversed order) and otherwise false. + * Can be NULL if the information is not needed. + * @return The unpermuted axis. Without `op_axes` this is correct, with + * `op_axes` this indexes into `op_axes` (unpermuted iterator axis) + */ +static NPY_INLINE int +npyiter_undo_iter_axis_perm( + int axis, int ndim, const npy_int8 *perm, npy_bool *axis_flipped) +{ + npy_int8 p = perm[axis]; + /* The iterator treats axis reversed, thus adjust by ndim */ + npy_bool flipped = p < 0; + if (axis_flipped != NULL) { + *axis_flipped = flipped; + } + if (flipped) { + axis = ndim + p; + } + else { + axis = ndim - p - 1; + } + return axis; +} + NPY_NO_EXPORT void npyiter_coalesce_axes(NpyIter *iter); NPY_NO_EXPORT int diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c index 79adb0051..4037a4757 100644 --- a/numpy/core/src/umath/reduction.c +++ b/numpy/core/src/umath/reduction.c @@ -25,235 +25,6 @@ #include "reduction.h" #include "extobj.h" /* for _check_ufunc_fperr */ -/* - * Allocates a result array for a reduction operation, with - * dimensions matching 'arr' except set to 1 with 0 stride - * wherever axis_flags is True. Dropping the reduction axes - * from the result must be done later by the caller once the - * computation is complete. - * - * This function always allocates a base class ndarray. - * - * If 'dtype' isn't NULL, this function steals its reference. - */ -static PyArrayObject * -allocate_reduce_result(PyArrayObject *arr, const npy_bool *axis_flags, - PyArray_Descr *dtype, int subok) -{ - npy_intp strides[NPY_MAXDIMS], stride; - npy_intp shape[NPY_MAXDIMS], *arr_shape = PyArray_DIMS(arr); - npy_stride_sort_item strideperm[NPY_MAXDIMS]; - int idim, ndim = PyArray_NDIM(arr); - - if (dtype == NULL) { - dtype = PyArray_DTYPE(arr); - Py_INCREF(dtype); - } - - PyArray_CreateSortedStridePerm(PyArray_NDIM(arr), - PyArray_STRIDES(arr), strideperm); - - /* Build the new strides and shape */ - stride = dtype->elsize; - if (ndim) { - memcpy(shape, arr_shape, ndim * sizeof(shape[0])); - } - for (idim = ndim-1; idim >= 0; --idim) { - npy_intp i_perm = strideperm[idim].perm; - if (axis_flags[i_perm]) { - strides[i_perm] = 0; - shape[i_perm] = 1; - } - else { - strides[i_perm] = stride; - stride *= shape[i_perm]; - } - } - - /* Finally, allocate the array */ - return (PyArrayObject *)PyArray_NewFromDescr( - subok ? Py_TYPE(arr) : &PyArray_Type, - dtype, ndim, shape, strides, - NULL, 0, subok ? (PyObject *)arr : NULL); -} - -/* - * Conforms an output parameter 'out' to have 'ndim' dimensions - * with dimensions of size one added in the appropriate places - * indicated by 'axis_flags'. - * - * The return value is a view into 'out'. - */ -static PyArrayObject * -conform_reduce_result(PyArrayObject *in, const npy_bool *axis_flags, - PyArrayObject *out, int keepdims, const char *funcname, - int need_copy) -{ - /* unpack shape information */ - int const ndim = PyArray_NDIM(in); - npy_intp const *shape_in = PyArray_DIMS(in); - - int const ndim_out = PyArray_NDIM(out); - npy_intp const *strides_out = PyArray_STRIDES(out); - npy_intp const *shape_out = PyArray_DIMS(out); - - /* - * If the 'keepdims' parameter is true, do a simpler validation and - * return a new reference to 'out'. - */ - if (keepdims) { - if (PyArray_NDIM(out) != ndim) { - PyErr_Format(PyExc_ValueError, - "output parameter for reduction operation %s " - "has the wrong number of dimensions (must match " - "the operand's when keepdims=True)", funcname); - return NULL; - } - - for (int idim = 0; idim < ndim; ++idim) { - if (axis_flags[idim]) { - if (shape_out[idim] != 1) { - PyErr_Format(PyExc_ValueError, - "output parameter for reduction operation %s " - "has a reduction dimension not equal to one " - "(required when keepdims=True)", funcname); - return NULL; - } - } - else { - if (shape_out[idim] != shape_in[idim]) { - PyErr_Format(PyExc_ValueError, - "output parameter for reduction operation %s " - "has a non-reduction dimension not equal to " - "the input one.", funcname); - return NULL; - } - } - - } - - Py_INCREF(out); - return out; - } - - /* Construct the strides and shape */ - npy_intp strides[NPY_MAXDIMS], shape[NPY_MAXDIMS]; - int idim_out = 0; - for (int idim = 0; idim < ndim; ++idim) { - if (axis_flags[idim]) { - strides[idim] = 0; - shape[idim] = 1; - } - else { - if (idim_out >= ndim_out) { - PyErr_Format(PyExc_ValueError, - "output parameter for reduction operation %s " - "does not have enough dimensions", funcname); - return NULL; - } - if (shape_out[idim_out] != shape_in[idim]) { - PyErr_Format(PyExc_ValueError, - "output parameter for reduction operation %s " - "has a non-reduction dimension not equal to " - "the input one.", funcname); - return NULL; - } - strides[idim] = strides_out[idim_out]; - shape[idim] = shape_out[idim_out]; - ++idim_out; - } - } - - if (idim_out != ndim_out) { - PyErr_Format(PyExc_ValueError, - "output parameter for reduction operation %s " - "has too many dimensions", funcname); - return NULL; - } - - /* Allocate the view */ - PyArray_Descr *dtype = PyArray_DESCR(out); - Py_INCREF(dtype); - - PyArrayObject_fields *ret = (PyArrayObject_fields *)PyArray_NewFromDescrAndBase( - &PyArray_Type, dtype, - ndim, shape, strides, PyArray_DATA(out), - PyArray_FLAGS(out), NULL, (PyObject *)out); - if (ret == NULL) { - return NULL; - } - - if (need_copy) { - PyArrayObject *ret_copy = (PyArrayObject *)PyArray_NewLikeArray( - (PyArrayObject *)ret, NPY_ANYORDER, NULL, 0); - if (ret_copy == NULL) { - Py_DECREF(ret); - return NULL; - } - - if (PyArray_CopyInto(ret_copy, (PyArrayObject *)ret) != 0) { - Py_DECREF(ret); - Py_DECREF(ret_copy); - return NULL; - } - - if (PyArray_SetWritebackIfCopyBase(ret_copy, (PyArrayObject *)ret) < 0) { - Py_DECREF(ret_copy); - return NULL; - } - - return ret_copy; - } - else { - return (PyArrayObject *)ret; - } -} - -/* - * Creates a result for reducing 'operand' along the axes specified - * in 'axis_flags'. If 'dtype' isn't NULL, this function steals a - * reference to 'dtype'. - * - * If 'out' isn't NULL, this function creates a view conforming - * to the number of dimensions of 'operand', adding a singleton dimension - * for each reduction axis specified. In this case, 'dtype' is ignored - * (but its reference is still stolen), and the caller must handle any - * type conversion/validity check for 'out' - * - * If 'subok' is true, creates a result with the subtype of 'operand', - * otherwise creates on with the base ndarray class. - * - * If 'out' is NULL, it allocates a new array whose shape matches that of - * 'operand', except for at the reduction axes. If 'dtype' is NULL, the dtype - * of 'operand' is used for the result. - */ -NPY_NO_EXPORT PyArrayObject * -PyArray_CreateReduceResult(PyArrayObject *operand, PyArrayObject *out, - PyArray_Descr *dtype, npy_bool *axis_flags, - int keepdims, int subok, - const char *funcname) -{ - PyArrayObject *result; - - if (out == NULL) { - /* This function steals the reference to 'dtype' */ - result = allocate_reduce_result(operand, axis_flags, dtype, subok); - } - else { - int need_copy = 0; - - if (solve_may_share_memory(operand, out, 1) != 0) { - need_copy = 1; - } - - /* Steal the dtype reference */ - Py_XDECREF(dtype); - result = conform_reduce_result(operand, axis_flags, - out, keepdims, funcname, need_copy); - } - - return result; -} /* * Count the number of dimensions selected in 'axis_flags' @@ -275,18 +46,15 @@ count_axes(int ndim, const npy_bool *axis_flags) /* * This function initializes a result array for a reduction operation * which has no identity. This means it needs to copy the first element - * it sees along the reduction axes to result, then return a view of - * the operand which excludes that element. + * it sees along the reduction axes to result. * * If a reduction has an identity, such as 0 or 1, the result should be - * initialized by calling PyArray_AssignZero(result, NULL, NULL) or - * PyArray_AssignOne(result, NULL, NULL), because this function raises an - * exception when there are no elements to reduce (which appropriate iff the - * reduction operation has no identity). + * fully initialized to the identity, because this function raises an + * exception when there are no elements to reduce (which is appropriate if, + * and only if, the reduction operation has no identity). * * This means it copies the subarray indexed at zero along each reduction axis - * into 'result', then returns a view into 'operand' excluding those copied - * elements. + * into 'result'. * * result : The array into which the result is computed. This must have * the same number of dimensions as 'operand', but for each @@ -294,105 +62,83 @@ count_axes(int ndim, const npy_bool *axis_flags) * operand : The array being reduced. * axis_flags : An array of boolean flags, one for each axis of 'operand'. * When a flag is True, it indicates to reduce along that axis. - * out_skip_first_count : This gets populated with the number of first-visit - * elements that should be skipped during the - * iteration loop. * funcname : The name of the reduction operation, for the purpose of * better quality error messages. For example, "numpy.max" * would be a good name for NumPy's max function. * - * Returns a view which contains the remaining elements on which to do - * the reduction. + * Returns -1 if an error occurred, and otherwise the reduce arrays size, + * which is the number of elements already initialized. */ -NPY_NO_EXPORT PyArrayObject * -PyArray_InitializeReduceResult( +NPY_NO_EXPORT int +PyArray_CopyInitialReduceValues( PyArrayObject *result, PyArrayObject *operand, - const npy_bool *axis_flags, - npy_intp *out_skip_first_count, const char *funcname) + const npy_bool *axis_flags, const char *funcname, + int keepdims) { - npy_intp *strides, *shape, shape_orig[NPY_MAXDIMS]; + npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS]; + npy_intp *shape_orig = PyArray_SHAPE(operand); + npy_intp *strides_orig = PyArray_STRIDES(operand); PyArrayObject *op_view = NULL; - int idim, ndim, nreduce_axes; - ndim = PyArray_NDIM(operand); - - /* Default to no skipping first-visit elements in the iteration */ - *out_skip_first_count = 0; - - /* Take a view into 'operand' which we can modify. */ - op_view = (PyArrayObject *)PyArray_View(operand, NULL, &PyArray_Type); - if (op_view == NULL) { - return NULL; - } + int ndim = PyArray_NDIM(operand); /* - * Now copy the subarray of the first element along each reduction axis, - * then return a view to the rest. + * Copy the subarray of the first element along each reduction axis. * * Adjust the shape to only look at the first element along - * any of the reduction axes. We count the number of reduction axes - * at the same time. + * any of the reduction axes. If keepdims is False remove the axes + * entirely. */ - shape = PyArray_SHAPE(op_view); - nreduce_axes = 0; - if (ndim) { - memcpy(shape_orig, shape, ndim * sizeof(npy_intp)); - } - for (idim = 0; idim < ndim; ++idim) { + int idim_out = 0; + npy_intp size = 1; + for (int idim = 0; idim < ndim; idim++) { if (axis_flags[idim]) { - if (shape[idim] == 0) { + if (NPY_UNLIKELY(shape_orig[idim] == 0)) { PyErr_Format(PyExc_ValueError, - "zero-size array to reduction operation %s " - "which has no identity", - funcname); - Py_DECREF(op_view); - return NULL; + "zero-size array to reduction operation %s " + "which has no identity", funcname); + return -1; } - shape[idim] = 1; - ++nreduce_axes; + if (keepdims) { + shape[idim_out] = 1; + strides[idim_out] = 0; + idim_out++; + } + } + else { + size *= shape_orig[idim]; + shape[idim_out] = shape_orig[idim]; + strides[idim_out] = strides_orig[idim]; + idim_out++; } } - /* - * Copy the elements into the result to start. - */ - if (PyArray_CopyInto(result, op_view) < 0) { - Py_DECREF(op_view); - return NULL; + PyArray_Descr *descr = PyArray_DESCR(operand); + Py_INCREF(descr); + op_view = (PyArrayObject *)PyArray_NewFromDescr( + &PyArray_Type, descr, idim_out, shape, strides, + PyArray_DATA(operand), 0, NULL); + if (op_view == NULL) { + return -1; } /* - * If there is one reduction axis, adjust the view's - * shape to only look at the remaining elements + * Copy the elements into the result to start. */ - if (nreduce_axes == 1) { - strides = PyArray_STRIDES(op_view); - for (idim = 0; idim < ndim; ++idim) { - if (axis_flags[idim]) { - shape[idim] = shape_orig[idim] - 1; - ((PyArrayObject_fields *)op_view)->data += strides[idim]; - } - } - } - /* If there are zero reduction axes, make the view empty */ - else if (nreduce_axes == 0) { - for (idim = 0; idim < ndim; ++idim) { - shape[idim] = 0; - } + int res = PyArray_CopyInto(result, op_view); + Py_DECREF(op_view); + if (res < 0) { + return -1; } + /* - * Otherwise iterate over the whole operand, but tell the inner loop - * to skip the elements we already copied by setting the skip_first_count. + * If there were no reduction axes, we would already be done here. + * Note that if there is only a single reduction axis, in principle the + * iteration could be set up more efficiently here by removing that + * axis before setting up the iterator (simplifying the iteration since + * `skip_first_count` (the returned size) can be set to 0). */ - else { - *out_skip_first_count = PyArray_SIZE(result); - - Py_DECREF(op_view); - Py_INCREF(operand); - op_view = operand; - } - - return op_view; + return size; } /* @@ -418,7 +164,7 @@ PyArray_InitializeReduceResult( * with size one. * subok : If true, the result uses the subclass of operand, otherwise * it is always a base class ndarray. - * identity : If Py_None, PyArray_InitializeReduceResult is used, otherwise + * identity : If Py_None, PyArray_CopyInitialReduceValues is used, otherwise * this value is used to initialize the result to * the reduction's unit. * loop : The loop which does the reduction. @@ -444,13 +190,12 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, NPY_CASTING casting, npy_bool *axis_flags, int reorderable, int keepdims, - int subok, PyObject *identity, PyArray_ReduceLoopFunc *loop, void *data, npy_intp buffersize, const char *funcname, int errormask) { - PyArrayObject *result = NULL, *op_view = NULL; + PyArrayObject *result = NULL; npy_intp skip_first_count = 0; /* Iterator parameters */ @@ -476,49 +221,10 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, return NULL; } - /* - * This either conforms 'out' to the ndim of 'operand', or allocates - * a new array appropriate for this reduction. - * - * A new array with WRITEBACKIFCOPY is allocated if operand and out have memory - * overlap. - */ - Py_INCREF(result_dtype); - result = PyArray_CreateReduceResult(operand, out, - result_dtype, axis_flags, - keepdims, subok, funcname); - if (result == NULL) { - goto fail; - } - - /* - * Initialize the result to the reduction unit if possible, - * otherwise copy the initial values and get a view to the rest. - */ - if (identity != Py_None) { - if (PyArray_FillWithScalar(result, identity) < 0) { - goto fail; - } - op_view = operand; - Py_INCREF(op_view); - } - else { - op_view = PyArray_InitializeReduceResult( - result, operand, axis_flags, &skip_first_count, funcname); - if (op_view == NULL) { - goto fail; - } - /* empty op_view signals no reduction; but 0-d arrays cannot be empty */ - if ((PyArray_SIZE(op_view) == 0) || (PyArray_NDIM(operand) == 0)) { - Py_DECREF(op_view); - op_view = NULL; - goto finish; - } - } /* Set up the iterator */ - op[0] = result; - op[1] = op_view; + op[0] = out; + op[1] = operand; op_dtypes[0] = result_dtype; op_dtypes[1] = operand_dtype; @@ -527,13 +233,17 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, NPY_ITER_GROWINNER | NPY_ITER_DONT_NEGATE_STRIDES | NPY_ITER_ZEROSIZE_OK | - NPY_ITER_REDUCE_OK | - NPY_ITER_REFS_OK; + NPY_ITER_REFS_OK | + NPY_ITER_DELAY_BUFALLOC | + NPY_ITER_COPY_IF_OVERLAP; op_flags[0] = NPY_ITER_READWRITE | NPY_ITER_ALIGNED | + NPY_ITER_ALLOCATE | NPY_ITER_NO_SUBTYPE; op_flags[1] = NPY_ITER_READONLY | - NPY_ITER_ALIGNED; + NPY_ITER_ALIGNED | + NPY_ITER_NO_BROADCAST; + if (wheremask != NULL) { op[2] = wheremask; /* wheremask is guaranteed to be NPY_BOOL, so borrow its reference */ @@ -545,15 +255,84 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, op_flags[2] = NPY_ITER_READONLY; } + /* Set up result array axes mapping, operand and wheremask use default */ + int result_axes[NPY_MAXDIMS]; + int *op_axes[3] = {result_axes, NULL, NULL}; + + int curr_axis = 0; + for (int i = 0; i < PyArray_NDIM(operand); i++) { + if (axis_flags[i]) { + if (keepdims) { + result_axes[i] = NPY_ITER_REDUCTION_AXIS(curr_axis); + curr_axis++; + } + else { + result_axes[i] = NPY_ITER_REDUCTION_AXIS(-1); + } + } + else { + result_axes[i] = curr_axis; + curr_axis++; + } + } + if (out != NULL) { + /* NpyIter does not raise a good error message in this common case. */ + if (NPY_UNLIKELY(curr_axis != PyArray_NDIM(out))) { + if (keepdims) { + PyErr_Format(PyExc_ValueError, + "output parameter for reduction operation %s has the " + "wrong number of dimensions: Found %d but expected %d " + "(must match the operand's when keepdims=True)", + funcname, PyArray_NDIM(out), curr_axis); + } + else { + PyErr_Format(PyExc_ValueError, + "output parameter for reduction operation %s has the " + "wrong number of dimensions: Found %d but expected %d", + funcname, PyArray_NDIM(out), curr_axis); + } + goto fail; + } + } + iter = NpyIter_AdvancedNew(wheremask == NULL ? 2 : 3, op, flags, NPY_KEEPORDER, casting, op_flags, op_dtypes, - -1, NULL, NULL, buffersize); + PyArray_NDIM(operand), op_axes, NULL, buffersize); if (iter == NULL) { goto fail; } + result = NpyIter_GetOperandArray(iter)[0]; + + /* + * Initialize the result to the reduction unit if possible, + * otherwise copy the initial values and get a view to the rest. + */ + + if (identity != Py_None) { + if (PyArray_FillWithScalar(result, identity) < 0) { + goto fail; + } + } + else { + /* + * For 1-D skip_first_count could be optimized to 0, but no-identity + * reductions are not super common. + * (see also comment in CopyInitialReduceValues) + */ + skip_first_count = PyArray_CopyInitialReduceValues( + result, operand, axis_flags, funcname, keepdims); + if (skip_first_count < 0) { + goto fail; + } + } + + if (!NpyIter_Reset(iter, NULL)) { + goto fail; + } + /* Start with the floating-point exception flags cleared */ npy_clear_floatstatus_barrier((char*)&iter); @@ -595,29 +374,15 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, goto fail; } - NpyIter_Deallocate(iter); - Py_DECREF(op_view); - -finish: - /* Strip out the extra 'one' dimensions in the result */ - if (out == NULL) { - if (!keepdims) { - PyArray_RemoveAxesInPlace(result, axis_flags); - } - } - else { - PyArray_ResolveWritebackIfCopy(result); /* prevent spurious warnings */ - Py_DECREF(result); + if (out != NULL) { result = out; - Py_INCREF(result); } + Py_INCREF(result); + NpyIter_Deallocate(iter); return result; fail: - PyArray_ResolveWritebackIfCopy(result); /* prevent spurious warnings */ - Py_XDECREF(result); - Py_XDECREF(op_view); if (iter != NULL) { NpyIter_Deallocate(iter); } diff --git a/numpy/core/src/umath/reduction.h b/numpy/core/src/umath/reduction.h index 0c2183ed6..372605dba 100644 --- a/numpy/core/src/umath/reduction.h +++ b/numpy/core/src/umath/reduction.h @@ -128,9 +128,7 @@ typedef int (PyArray_ReduceLoopFunc)(NpyIter *iter, * of cache behavior or multithreading requirements. * keepdims : If true, leaves the reduction dimensions in the result * with size one. - * subok : If true, the result uses the subclass of operand, otherwise - * it is always a base class ndarray. - * identity : If Py_None, PyArray_InitializeReduceResult is used, otherwise + * identity : If Py_None, PyArray_CopyInitialReduceValues is used, otherwise * this value is used to initialize the result to * the reduction's unit. * loop : The loop which does the reduction. @@ -147,7 +145,6 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, NPY_CASTING casting, npy_bool *axis_flags, int reorderable, int keepdims, - int subok, PyObject *identity, PyArray_ReduceLoopFunc *loop, void *data, npy_intp buffersize, const char *funcname, diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 591fbd7ad..b35f377d7 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -2701,9 +2701,13 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, } } - /* Any output core dimensions shape should be ignored */ + /* + * Any output core dimensions shape should be ignored, so we add + * it as a Reduce dimension (which can be broadcast with the rest). + * These will be removed before the actual iteration for gufuncs. + */ for (idim = broadcast_ndim; idim < iter_ndim; ++idim) { - op_axes_arrays[i][idim] = -1; + op_axes_arrays[i][idim] = NPY_ITER_REDUCTION_AXIS(-1); } /* Except for when it belongs to this output */ @@ -2775,7 +2779,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, */ _ufunc_setup_flags(ufunc, NPY_ITER_COPY | NPY_UFUNC_DEFAULT_INPUT_FLAGS, NPY_ITER_UPDATEIFCOPY | - NPY_ITER_READWRITE | + NPY_ITER_WRITEONLY | NPY_UFUNC_DEFAULT_OUTPUT_FLAGS, op_flags); /* For the generalized ufunc, we get the loop right away too */ @@ -2824,7 +2828,6 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, iter_flags = ufunc->iter_flags | NPY_ITER_MULTI_INDEX | NPY_ITER_REFS_OK | - NPY_ITER_REDUCE_OK | NPY_ITER_ZEROSIZE_OK | NPY_ITER_COPY_IF_OVERLAP; @@ -3650,7 +3653,7 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, result = PyUFunc_ReduceWrapper(arr, out, wheremask, dtype, dtype, NPY_UNSAFE_CASTING, axis_flags, reorderable, - keepdims, 0, + keepdims, initial, reduce_loop, ufunc, buffersize, ufunc_name, errormask); diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py index c106c528d..7b3c3a40d 100644 --- a/numpy/core/tests/test_nditer.py +++ b/numpy/core/tests/test_nditer.py @@ -1520,6 +1520,13 @@ def test_iter_allocate_output_errors(): [['readonly'], ['writeonly', 'allocate']], op_dtypes=[None, np.dtype('f4')], op_axes=[None, [0, 2, 1, 0]]) + # Not all axes may be specified if a reduction. If there is a hole + # in op_axes, this is an error. + a = arange(24, dtype='i4').reshape(2, 3, 4) + assert_raises(ValueError, nditer, [a, None], ["reduce_ok"], + [['readonly'], ['readwrite', 'allocate']], + op_dtypes=[None, np.dtype('f4')], + op_axes=[None, [0, np.newaxis, 2]]) def test_iter_remove_axis(): a = arange(24).reshape(2, 3, 4) @@ -2661,6 +2668,14 @@ def test_iter_allocated_array_dtypes(): b[1] = a + 1 assert_equal(it.operands[1], [[0, 2], [2, 4], [19, 21]]) + # Check the same (less sensitive) thing when `op_axes` with -1 is given. + it = np.nditer(([[1, 3, 20]], None), op_dtypes=[None, ('i4', (2,))], + flags=["reduce_ok"], op_axes=[None, (-1, 0)]) + for a, b in it: + b[0] = a - 1 + b[1] = a + 1 + assert_equal(it.operands[1], [[0, 2], [2, 4], [19, 21]]) + # Make sure this works for scalars too it = np.nditer((10, 2, None), op_dtypes=[None, None, ('i4', (2, 2))]) for a, b, c in it: diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index 642308d3b..1305f4877 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -610,7 +610,17 @@ class TestUfunc: warnings.simplefilter("always") u += v assert_equal(len(w), 1) - assert_(x[0,0] != u[0, 0]) + assert_(x[0, 0] != u[0, 0]) + + # Output reduction should not be allowed. + # See gh-15139 + a = np.arange(6).reshape(3, 2) + b = np.ones(2) + out = np.empty(()) + assert_raises(ValueError, umt.inner1d, a, b, out) + out2 = np.empty(3) + c = umt.inner1d(a, b, out2) + assert_(c is out2) def test_out_broadcasts(self): # For ufuncs and gufuncs (not for reductions), we currently allow @@ -957,7 +967,10 @@ class TestUfunc: assert_array_equal(result, np.vstack((np.zeros(3), a[2], -a[1]))) assert_raises(ValueError, umt.cross1d, np.eye(4), np.eye(4)) assert_raises(ValueError, umt.cross1d, a, np.arange(4.)) + # Wrong output core dimension. assert_raises(ValueError, umt.cross1d, a, np.arange(3.), np.zeros((3, 4))) + # Wrong output broadcast dimension (see gh-15139). + assert_raises(ValueError, umt.cross1d, a, np.arange(3.), np.zeros(3)) def test_can_ignore_signature(self): # Comparing the effects of ? in signature: @@ -1905,22 +1918,47 @@ class TestUfunc: assert_equal(y_base[1,:], y_base_copy[1,:]) assert_equal(y_base[3,:], y_base_copy[3,:]) - @pytest.mark.parametrize('output_shape', - [(), (1,), (1, 1), (1, 3), (4, 3)]) + @pytest.mark.parametrize('out_shape', + [(), (1,), (3,), (1, 1), (1, 3), (4, 3)]) + @pytest.mark.parametrize('keepdims', [True, False]) @pytest.mark.parametrize('f_reduce', [np.add.reduce, np.minimum.reduce]) - def test_reduce_wrong_dimension_output(self, f_reduce, output_shape): + def test_reduce_wrong_dimension_output(self, f_reduce, keepdims, out_shape): # Test that we're not incorrectly broadcasting dimensions. # See gh-15144 (failed for np.add.reduce previously). a = np.arange(12.).reshape(4, 3) - out = np.empty(output_shape, a.dtype) - assert_raises(ValueError, f_reduce, a, axis=0, out=out) - if output_shape != (1, 3): - assert_raises(ValueError, f_reduce, a, axis=0, out=out, - keepdims=True) + out = np.empty(out_shape, a.dtype) + + correct_out = f_reduce(a, axis=0, keepdims=keepdims) + if out_shape != correct_out.shape: + with assert_raises(ValueError): + f_reduce(a, axis=0, out=out, keepdims=keepdims) else: - check = f_reduce(a, axis=0, out=out, keepdims=True) + check = f_reduce(a, axis=0, out=out, keepdims=keepdims) assert_(check is out) - assert_array_equal(check, f_reduce(a, axis=0, keepdims=True)) + assert_array_equal(check, correct_out) + + def test_reduce_output_does_not_broadcast_input(self): + # Test that the output shape cannot broadcast an input dimension + # (it never can add dimensions, but it might expand an existing one) + a = np.ones((1, 10)) + out_correct = (np.empty((1, 1))) + out_incorrect = np.empty((3, 1)) + np.add.reduce(a, axis=-1, out=out_correct, keepdims=True) + np.add.reduce(a, axis=-1, out=out_correct[:, 0], keepdims=False) + with assert_raises(ValueError): + np.add.reduce(a, axis=-1, out=out_incorrect, keepdims=True) + with assert_raises(ValueError): + np.add.reduce(a, axis=-1, out=out_incorrect[:, 0], keepdims=False) + + def test_reduce_output_subclass_ok(self): + class MyArr(np.ndarray): + pass + + out = np.empty(()) + np.add.reduce(np.ones(5), out=out) # no subclass, all fine + out = out.view(MyArr) + assert np.add.reduce(np.ones(5), out=out) is out + assert type(np.add.reduce(out)) is MyArr def test_no_doc_string(self): # gh-9337 |