summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatti Picus <matti.picus@gmail.com>2020-06-04 22:03:43 +0300
committerGitHub <noreply@github.com>2020-06-04 22:03:43 +0300
commita5d021a1b6f439a19812926bc4d796ef5f346c44 (patch)
tree998f27e5296a2894b41a9600c9bdb8b7806241c1
parentabce2a0604a4db5d8072a0b1478c7af439882ed7 (diff)
parent8da4fba24c193807555dfcbc0e67834cae9185ea (diff)
downloadnumpy-a5d021a1b6f439a19812926bc4d796ef5f346c44.tar.gz
Merge pull request #15162 from seberg/reduce-axes-try
BUG,MAINT: Fix issues with non-reduce broadcasting axes
-rw-r--r--numpy/core/src/multiarray/common.h9
-rw-r--r--numpy/core/src/multiarray/einsum.c.src6
-rw-r--r--numpy/core/src/multiarray/nditer_api.c16
-rw-r--r--numpy/core/src/multiarray/nditer_constr.c402
-rw-r--r--numpy/core/src/multiarray/nditer_impl.h35
-rw-r--r--numpy/core/src/umath/reduction.c513
-rw-r--r--numpy/core/src/umath/reduction.h5
-rw-r--r--numpy/core/src/umath/ufunc_object.c13
-rw-r--r--numpy/core/tests/test_nditer.py15
-rw-r--r--numpy/core/tests/test_ufunc.py60
10 files changed, 479 insertions, 595 deletions
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index 78a15a63c..4ba25c079 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -339,5 +339,14 @@ NPY_NO_EXPORT PyArrayObject *
new_array_for_sum(PyArrayObject *ap1, PyArrayObject *ap2, PyArrayObject* out,
int nd, npy_intp dimensions[], int typenum, PyArrayObject **result);
+
+/*
+ * Used to indicate a broadcast axis, see also `npyiter_get_op_axis` in
+ * `nditer_constr.c`. This may be the preferred API for reduction axes
+ * probably. So we should consider making this public either as a macro or
+ * function (so that the way we flag the axis can be changed).
+ */
+#define NPY_ITER_REDUCTION_AXIS(axis) (axis + (1 << (NPY_BITSOF_INT - 2)))
+
#endif
diff --git a/numpy/core/src/multiarray/einsum.c.src b/numpy/core/src/multiarray/einsum.c.src
index 1cc557825..b914e5bb3 100644
--- a/numpy/core/src/multiarray/einsum.c.src
+++ b/numpy/core/src/multiarray/einsum.c.src
@@ -2740,7 +2740,7 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
op_axes[nop][idim] = idim;
}
for (idim = ndim_output; idim < ndim_iter; ++idim) {
- op_axes[nop][idim] = -1;
+ op_axes[nop][idim] = NPY_ITER_REDUCTION_AXIS(-1);
}
/* Set the iterator per-op flags */
@@ -2753,13 +2753,11 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
op_flags[nop] = NPY_ITER_READWRITE|
NPY_ITER_NBO|
NPY_ITER_ALIGNED|
- NPY_ITER_ALLOCATE|
- NPY_ITER_NO_BROADCAST;
+ NPY_ITER_ALLOCATE;
iter_flags = NPY_ITER_EXTERNAL_LOOP|
NPY_ITER_BUFFERED|
NPY_ITER_DELAY_BUFALLOC|
NPY_ITER_GROWINNER|
- NPY_ITER_REDUCE_OK|
NPY_ITER_REFS_OK|
NPY_ITER_ZEROSIZE_OK;
if (out != NULL) {
diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c
index e7fe0fa50..a5b5e5c51 100644
--- a/numpy/core/src/multiarray/nditer_api.c
+++ b/numpy/core/src/multiarray/nditer_api.c
@@ -935,13 +935,8 @@ NpyIter_GetShape(NpyIter *iter, npy_intp *outshape)
if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
perm = NIT_PERM(iter);
for(idim = 0; idim < ndim; ++idim) {
- npy_int8 p = perm[idim];
- if (p < 0) {
- outshape[ndim+p] = NAD_SHAPE(axisdata);
- }
- else {
- outshape[ndim-p-1] = NAD_SHAPE(axisdata);
- }
+ int axis = npyiter_undo_iter_axis_perm(idim, ndim, perm, NULL);
+ outshape[axis] = NAD_SHAPE(axisdata);
NIT_ADVANCE_AXISDATA(axisdata, 1);
}
@@ -1005,8 +1000,9 @@ NpyIter_CreateCompatibleStrides(NpyIter *iter,
perm = NIT_PERM(iter);
for(idim = 0; idim < ndim; ++idim) {
- npy_int8 p = perm[idim];
- if (p < 0) {
+ npy_bool flipped;
+ npy_int8 axis = npyiter_undo_iter_axis_perm(idim, ndim, perm, &flipped);
+ if (flipped) {
PyErr_SetString(PyExc_RuntimeError,
"Iterator CreateCompatibleStrides may only be called "
"if DONT_NEGATE_STRIDES was used to prevent reverse "
@@ -1014,7 +1010,7 @@ NpyIter_CreateCompatibleStrides(NpyIter *iter,
return NPY_FAIL;
}
else {
- outstrides[ndim-p-1] = itemsize;
+ outstrides[axis] = itemsize;
}
itemsize *= NAD_SHAPE(axisdata);
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index 620c7d593..f2dbc9f03 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -57,11 +57,12 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf
char **op_dataptr,
const npy_uint32 *op_flags, int **op_axes,
npy_intp const *itershape);
+static NPY_INLINE int
+npyiter_get_op_axis(int axis, npy_bool *reduction_axis);
static void
-npyiter_replace_axisdata(NpyIter *iter, int iop,
- PyArrayObject *op,
- int op_ndim, char *op_dataptr,
- int *op_axes);
+npyiter_replace_axisdata(
+ NpyIter *iter, int iop, PyArrayObject *op,
+ int orig_op_ndim, const int *op_axes);
static void
npyiter_compute_index_strides(NpyIter *iter, npy_uint32 flags);
static void
@@ -826,7 +827,8 @@ npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes,
if (axes != NULL) {
memset(axes_dupcheck, 0, NPY_MAXDIMS);
for (idim = 0; idim < oa_ndim; ++idim) {
- int i = axes[idim];
+ int i = npyiter_get_op_axis(axes[idim], NULL);
+
if (i >= 0) {
if (i >= NPY_MAXDIMS) {
PyErr_Format(PyExc_ValueError,
@@ -1391,6 +1393,61 @@ check_mask_for_writemasked_reduction(NpyIter *iter, int iop)
}
/*
+ * Check whether a reduction is OK based on the flags and the operand being
+ * readwrite. This path is deprecated, since usually only specific axes
+ * should be reduced. If axes are specified explicitely, the flag is
+ * unnecessary.
+ */
+static int
+npyiter_check_reduce_ok_and_set_flags(
+ NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags,
+ int dim) {
+ /* If it's writeable, this means a reduction */
+ if (*op_itflags & NPY_OP_ITFLAG_WRITE) {
+ if (!(flags & NPY_ITER_REDUCE_OK)) {
+ PyErr_Format(PyExc_ValueError,
+ "output operand requires a reduction along dimension %d, "
+ "but the reduction is not enabled. The dimension size of 1 "
+ "does not match the expected output shape.", dim);
+ return 0;
+ }
+ if (!(*op_itflags & NPY_OP_ITFLAG_READ)) {
+ PyErr_SetString(PyExc_ValueError,
+ "output operand requires a reduction, but is flagged as "
+ "write-only, not read-write");
+ return 0;
+ }
+ NPY_IT_DBG_PRINT("Iterator: Indicating that a reduction is"
+ "occurring\n");
+
+ NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
+ *op_itflags |= NPY_OP_ITFLAG_REDUCE;
+ }
+ return 1;
+}
+
+/**
+ * Removes the (additive) NPY_ITER_REDUCTION_AXIS indication and sets
+ * is_forced_broadcast to 1 if it is set. Otherwise to 0.
+ *
+ * @param axis The op_axes[i] to normalize.
+ * @param reduction_axis Output 1 if a reduction axis, otherwise 0.
+ * @returns The normalized axis (without reduce axis flag).
+ */
+static NPY_INLINE int
+npyiter_get_op_axis(int axis, npy_bool *reduction_axis) {
+ npy_bool forced_broadcast = axis >= NPY_ITER_REDUCTION_AXIS(-1);
+
+ if (reduction_axis != NULL) {
+ *reduction_axis = forced_broadcast;
+ }
+ if (forced_broadcast) {
+ return axis - NPY_ITER_REDUCTION_AXIS(0);
+ }
+ return axis;
+}
+
+/*
* Fills in the AXISDATA for the 'nop' operands, broadcasting
* the dimensionas as necessary. Also fills
* in the ITERSIZE data member.
@@ -1450,8 +1507,9 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf
return 0;
}
for (idim = 0; idim < ondim; ++idim) {
- npy_intp bshape = broadcast_shape[idim+ndim-ondim],
- op_shape = shape[idim];
+ npy_intp bshape = broadcast_shape[idim+ndim-ondim];
+ npy_intp op_shape = shape[idim];
+
if (bshape == 1) {
broadcast_shape[idim+ndim-ondim] = op_shape;
}
@@ -1463,11 +1521,13 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf
else {
int *axes = op_axes[iop];
for (idim = 0; idim < ndim; ++idim) {
- int i = axes[idim];
+ int i = npyiter_get_op_axis(axes[idim], NULL);
+
if (i >= 0) {
if (i < ondim) {
- npy_intp bshape = broadcast_shape[idim],
- op_shape = shape[i];
+ npy_intp bshape = broadcast_shape[idim];
+ npy_intp op_shape = shape[i];
+
if (bshape == 1) {
broadcast_shape[idim] = op_shape;
}
@@ -1589,9 +1649,37 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf
}
else {
int *axes = op_axes[iop];
- int i = axes[ndim-idim-1];
- if (i >= 0) {
- if (bshape == 1 || op_cur == NULL) {
+ npy_bool reduction_axis;
+ int i;
+ i = npyiter_get_op_axis(axes[ndim - idim - 1], &reduction_axis);
+
+ if (reduction_axis) {
+ /* This is explicitly a reduction axis */
+ strides[iop] = 0;
+ NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
+ op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE;
+
+ if (NPY_UNLIKELY((i >= 0) && (op_cur != NULL) &&
+ (PyArray_DIM(op_cur, i) != 1))) {
+ PyErr_Format(PyExc_ValueError,
+ "operand was set up as a reduction along axis "
+ "%d, but the length of the axis is %zd "
+ "(it has to be 1)",
+ i, (Py_ssize_t)PyArray_DIM(op_cur, i));
+ return 0;
+ }
+ }
+ else if (bshape == 1) {
+ /*
+ * If the full iterator shape is 1, zero always works.
+ * NOTE: We thus always allow broadcast dimensions (i = -1)
+ * if the shape is 1.
+ */
+ strides[iop] = 0;
+ }
+ else if (i >= 0) {
+ if (op_cur == NULL) {
+ /* stride is filled later, shape will match `bshape` */
strides[iop] = 0;
}
else if (PyArray_DIM(op_cur, i) == 1) {
@@ -1599,51 +1687,20 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf
if (op_flags[iop] & NPY_ITER_NO_BROADCAST) {
goto operand_different_than_broadcast;
}
- /* If it's writeable, this means a reduction */
- if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) {
- if (!(flags & NPY_ITER_REDUCE_OK)) {
- PyErr_SetString(PyExc_ValueError,
- "output operand requires a reduction, but "
- "reduction is not enabled");
- return 0;
- }
- if (!(op_itflags[iop] & NPY_OP_ITFLAG_READ)) {
- PyErr_SetString(PyExc_ValueError,
- "output operand requires a reduction, but "
- "is flagged as write-only, not "
- "read-write");
- return 0;
- }
- NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
- op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE;
+ if (!npyiter_check_reduce_ok_and_set_flags(
+ iter, flags, &op_itflags[iop], i)) {
+ return 0;
}
}
else {
strides[iop] = PyArray_STRIDE(op_cur, i);
}
}
- else if (bshape == 1) {
- strides[iop] = 0;
- }
else {
strides[iop] = 0;
- /* If it's writeable, this means a reduction */
- if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) {
- if (!(flags & NPY_ITER_REDUCE_OK)) {
- PyErr_SetString(PyExc_ValueError,
- "output operand requires a reduction, but "
- "reduction is not enabled");
- return 0;
- }
- if (!(op_itflags[iop] & NPY_OP_ITFLAG_READ)) {
- PyErr_SetString(PyExc_ValueError,
- "output operand requires a reduction, but "
- "is flagged as write-only, not "
- "read-write");
- return 0;
- }
- NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
- op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE;
+ if (!npyiter_check_reduce_ok_and_set_flags(
+ iter, flags, &op_itflags[iop], i)) {
+ return 0;
}
}
}
@@ -1753,7 +1810,7 @@ broadcast_error: {
if (axes != NULL) {
for (idim = 0; idim < ndim; ++idim) {
- npy_intp i = axes[idim];
+ int i = npyiter_get_op_axis(axes[idim], NULL);
if (i >= 0 && i < PyArray_NDIM(op[iop])) {
remdims[idim] = PyArray_DIM(op[iop], i);
@@ -1899,14 +1956,14 @@ operand_different_than_broadcast: {
* array.
*/
static void
-npyiter_replace_axisdata(NpyIter *iter, int iop,
- PyArrayObject *op,
- int op_ndim, char *op_dataptr,
- int *op_axes)
+npyiter_replace_axisdata(
+ NpyIter *iter, int iop, PyArrayObject *op,
+ int orig_op_ndim, const int *op_axes)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int idim, ndim = NIT_NDIM(iter);
int nop = NIT_NOP(iter);
+ char *op_dataptr = PyArray_DATA(op);
NpyIter_AxisData *axisdata0, *axisdata;
npy_intp sizeof_axisdata;
@@ -1925,25 +1982,20 @@ npyiter_replace_axisdata(NpyIter *iter, int iop,
if (op_axes != NULL) {
for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
- npy_int8 p;
int i;
+ npy_bool axis_flipped;
npy_intp shape;
- /* Apply the perm to get the original axis */
- p = perm[idim];
- if (p < 0) {
- i = op_axes[ndim+p];
- }
- else {
- i = op_axes[ndim-p-1];
- }
+ /* Apply perm to get the original axis, and check if its flipped */
+ i = npyiter_undo_iter_axis_perm(idim, ndim, perm, &axis_flipped);
- if (0 <= i && i < op_ndim) {
+ i = npyiter_get_op_axis(op_axes[i], NULL);
+ assert(i < orig_op_ndim);
+ if (i >= 0) {
shape = PyArray_DIM(op, i);
if (shape != 1) {
npy_intp stride = PyArray_STRIDE(op, i);
- if (p < 0) {
- /* If the perm entry is negative, flip the axis */
+ if (axis_flipped) {
NAD_STRIDES(axisdata)[iop] = -stride;
baseoffset += stride*(shape-1);
}
@@ -1956,25 +2008,18 @@ npyiter_replace_axisdata(NpyIter *iter, int iop,
}
else {
for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
- npy_int8 p;
int i;
+ npy_bool axis_flipped;
npy_intp shape;
- /* Apply the perm to get the original axis */
- p = perm[idim];
- if (p < 0) {
- i = op_ndim+p;
- }
- else {
- i = op_ndim-p-1;
- }
+ i = npyiter_undo_iter_axis_perm(
+ idim, orig_op_ndim, perm, &axis_flipped);
if (i >= 0) {
shape = PyArray_DIM(op, i);
if (shape != 1) {
npy_intp stride = PyArray_STRIDE(op, i);
- if (p < 0) {
- /* If the perm entry is negative, flip the axis */
+ if (axis_flipped) {
NAD_STRIDES(axisdata)[iop] = -stride;
baseoffset += stride*(shape-1);
}
@@ -2461,14 +2506,15 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int idim, ndim = NIT_NDIM(iter);
+ int used_op_ndim;
int nop = NIT_NOP(iter);
npy_int8 *perm = NIT_PERM(iter);
- npy_intp new_shape[NPY_MAXDIMS], strides[NPY_MAXDIMS],
- stride = op_dtype->elsize;
+ npy_intp new_shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
+ npy_intp stride = op_dtype->elsize;
NpyIter_AxisData *axisdata;
npy_intp sizeof_axisdata;
- npy_intp i;
+ int i;
PyArrayObject *ret;
@@ -2495,39 +2541,46 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
/* Initialize the strides to invalid values */
- for (i = 0; i < NPY_MAXDIMS; ++i) {
+ for (i = 0; i < op_ndim; ++i) {
strides[i] = NPY_MAX_INTP;
}
if (op_axes != NULL) {
+ used_op_ndim = 0;
for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
- npy_int8 p;
+ npy_bool reduction_axis;
/* Apply the perm to get the original axis */
- p = perm[idim];
- if (p < 0) {
- i = op_axes[ndim+p];
- }
- else {
- i = op_axes[ndim-p-1];
- }
+ i = npyiter_undo_iter_axis_perm(idim, ndim, perm, NULL);
+ i = npyiter_get_op_axis(op_axes[i], &reduction_axis);
if (i >= 0) {
NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d "
"for iterator dimension %d to %d\n", (int)i,
(int)idim, (int)stride);
+ used_op_ndim += 1;
strides[i] = stride;
if (shape == NULL) {
- new_shape[i] = NAD_SHAPE(axisdata);
+ if (reduction_axis) {
+ /* reduction axes always have a length of 1 */
+ new_shape[i] = 1;
+ }
+ else {
+ new_shape[i] = NAD_SHAPE(axisdata);
+ }
stride *= new_shape[i];
if (i >= ndim) {
- PyErr_SetString(PyExc_ValueError,
+ PyErr_Format(PyExc_ValueError,
"automatically allocated output array "
- "specified with an inconsistent axis mapping");
+ "specified with an inconsistent axis mapping; "
+ "the axis mapping cannot include dimension %d "
+ "which is too large for the iterator dimension "
+ "of %d.", i, ndim);
return NULL;
}
}
else {
+ assert(!reduction_axis || shape[i] == 1);
stride *= shape[i];
}
}
@@ -2535,44 +2588,25 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
if (shape == NULL) {
/*
* If deleting this axis produces a reduction, but
- * reduction wasn't enabled, throw an error
+ * reduction wasn't enabled, throw an error.
+ * NOTE: We currently always allow new-axis if the iteration
+ * size is 1 (thus allowing broadcasting sometimes).
*/
- if (NAD_SHAPE(axisdata) != 1) {
- if (!(flags & NPY_ITER_REDUCE_OK)) {
- PyErr_SetString(PyExc_ValueError,
- "output requires a reduction, but "
- "reduction is not enabled");
- return NULL;
- }
- if (!((*op_itflags) & NPY_OP_ITFLAG_READ)) {
- PyErr_SetString(PyExc_ValueError,
- "output requires a reduction, but "
- "is flagged as write-only, not read-write");
+ if (!reduction_axis && NAD_SHAPE(axisdata) != 1) {
+ if (!npyiter_check_reduce_ok_and_set_flags(
+ iter, flags, op_itflags, i)) {
return NULL;
}
-
- NPY_IT_DBG_PRINT("Iterator: Indicating that a "
- "reduction is occurring\n");
- /* Indicate that a reduction is occurring */
- NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
- (*op_itflags) |= NPY_OP_ITFLAG_REDUCE;
}
}
}
}
}
else {
+ used_op_ndim = ndim;
for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
- npy_int8 p;
-
/* Apply the perm to get the original axis */
- p = perm[idim];
- if (p < 0) {
- i = op_ndim + p;
- }
- else {
- i = op_ndim - p - 1;
- }
+ i = npyiter_undo_iter_axis_perm(idim, op_ndim, perm, NULL);
if (i >= 0) {
NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d "
@@ -2590,73 +2624,58 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
}
}
- /*
- * If custom axes were specified, some dimensions may not have been used.
- * Add the REDUCE itflag if this creates a reduction situation.
- */
if (shape == NULL) {
- /* Ensure there are no dimension gaps in op_axes, and find op_ndim */
- op_ndim = ndim;
- if (op_axes != NULL) {
- for (i = 0; i < ndim; ++i) {
- if (strides[i] == NPY_MAX_INTP) {
- if (op_ndim == ndim) {
- op_ndim = i;
- }
- }
- /*
- * If there's a gap in the array's dimensions, it's an error.
- * For example, op_axes of [0,2] for the automatically
- * allocated output.
- */
- else if (op_ndim != ndim) {
- PyErr_SetString(PyExc_ValueError,
- "automatically allocated output array "
- "specified with an inconsistent axis mapping");
- return NULL;
- }
+ /* If shape was NULL, use the shape we calculated */
+ op_ndim = used_op_ndim;
+ shape = new_shape;
+ /*
+ * If there's a gap in the array's dimensions, it's an error.
+ * For instance, if op_axes [0, 2] is specified, there will a place
+ * in the strides array where the value is not set.
+ */
+ for (i = 0; i < op_ndim; i++) {
+ if (strides[i] == NPY_MAX_INTP) {
+ PyErr_Format(PyExc_ValueError,
+ "automatically allocated output array "
+ "specified with an inconsistent axis mapping; "
+ "the axis mapping is missing an entry for "
+ "dimension %d.", i);
+ return NULL;
}
}
}
- else {
- for (i = 0; i < op_ndim; ++i) {
- if (strides[i] == NPY_MAX_INTP) {
- npy_intp factor, new_strides[NPY_MAXDIMS],
- itemsize;
-
- /* Fill in the missing strides in C order */
- factor = 1;
- itemsize = op_dtype->elsize;
- for (i = op_ndim-1; i >= 0; --i) {
- if (strides[i] == NPY_MAX_INTP) {
- new_strides[i] = factor * itemsize;
- factor *= shape[i];
- }
- }
-
- /*
- * Copy the missing strides, and multiply the existing strides
- * by the calculated factor. This way, the missing strides
- * are tighter together in memory, which is good for nested
- * loops.
- */
- for (i = 0; i < op_ndim; ++i) {
- if (strides[i] == NPY_MAX_INTP) {
- strides[i] = new_strides[i];
- }
- else {
- strides[i] *= factor;
- }
- }
+ else if (used_op_ndim < op_ndim) {
+ /*
+ * If custom axes were specified, some dimensions may not have
+ * been used. These are additional axes which are ignored in the
+ * iterator but need to be handled here.
+ */
+ npy_intp factor, itemsize, new_strides[NPY_MAXDIMS];
- break;
+ /* Fill in the missing strides in C order */
+ factor = 1;
+ itemsize = op_dtype->elsize;
+ for (i = op_ndim-1; i >= 0; --i) {
+ if (strides[i] == NPY_MAX_INTP) {
+ new_strides[i] = factor * itemsize;
+ factor *= shape[i];
}
}
- }
- /* If shape was NULL, set it to the shape we calculated */
- if (shape == NULL) {
- shape = new_shape;
+ /*
+ * Copy the missing strides, and multiply the existing strides
+ * by the calculated factor. This way, the missing strides
+ * are tighter together in memory, which is good for nested
+ * loops.
+ */
+ for (i = 0; i < op_ndim; ++i) {
+ if (strides[i] == NPY_MAX_INTP) {
+ strides[i] = new_strides[i];
+ }
+ else {
+ strides[i] *= factor;
+ }
+ }
}
/* Allocate the temporary array */
@@ -2669,6 +2688,11 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
/* Double-check that the subtype didn't mess with the dimensions */
if (subtype != &PyArray_Type) {
+ /*
+ * TODO: the dtype could have a subarray, which adds new dimensions
+ * to `ret`, that should typically be fine, but will break
+ * in this branch.
+ */
if (PyArray_NDIM(ret) != op_ndim ||
!PyArray_CompareLists(shape, PyArray_DIMS(ret), op_ndim)) {
PyErr_SetString(PyExc_RuntimeError,
@@ -2805,16 +2829,21 @@ npyiter_allocate_arrays(NpyIter *iter,
if (op[iop] == NULL) {
PyArrayObject *out;
PyTypeObject *op_subtype;
- int ondim = ndim;
/* Check whether the subtype was disabled */
op_subtype = (op_flags[iop] & NPY_ITER_NO_SUBTYPE) ?
&PyArray_Type : subtype;
- /* Allocate the output array */
+ /*
+ * Allocate the output array.
+ *
+ * Note that here, ndim is always correct if no op_axes was given
+ * (but the actual dimension of op can be larger). If op_axes
+ * is given, ndim is not actually used.
+ */
out = npyiter_new_temp_array(iter, op_subtype,
flags, &op_itflags[iop],
- ondim,
+ ndim,
NULL,
op_dtype[iop],
op_axes ? op_axes[iop] : NULL);
@@ -2828,8 +2857,8 @@ npyiter_allocate_arrays(NpyIter *iter,
* Now we need to replace the pointers and strides with values
* from the new array.
*/
- npyiter_replace_axisdata(iter, iop, op[iop], ondim,
- PyArray_DATA(op[iop]), op_axes ? op_axes[iop] : NULL);
+ npyiter_replace_axisdata(iter, iop, op[iop], ndim,
+ op_axes ? op_axes[iop] : NULL);
/*
* New arrays are guaranteed true-aligned, but copy/cast code
@@ -2870,8 +2899,7 @@ npyiter_allocate_arrays(NpyIter *iter,
* Now we need to replace the pointers and strides with values
* from the temporary array.
*/
- npyiter_replace_axisdata(iter, iop, op[iop], 0,
- PyArray_DATA(op[iop]), NULL);
+ npyiter_replace_axisdata(iter, iop, op[iop], 0, NULL);
/*
* New arrays are guaranteed true-aligned, but copy/cast code
@@ -2943,7 +2971,7 @@ npyiter_allocate_arrays(NpyIter *iter,
* from the temporary array.
*/
npyiter_replace_axisdata(iter, iop, op[iop], ondim,
- PyArray_DATA(op[iop]), op_axes ? op_axes[iop] : NULL);
+ op_axes ? op_axes[iop] : NULL);
/*
* New arrays are guaranteed true-aligned, but copy/cast code
diff --git a/numpy/core/src/multiarray/nditer_impl.h b/numpy/core/src/multiarray/nditer_impl.h
index 5fb146026..1477c8631 100644
--- a/numpy/core/src/multiarray/nditer_impl.h
+++ b/numpy/core/src/multiarray/nditer_impl.h
@@ -301,6 +301,41 @@ struct NpyIter_AD {
NIT_AXISDATA_SIZEOF(itflags, ndim, nop)*(ndim ? ndim : 1))
/* Internal helper functions shared between implementation files */
+
+/**
+ * Undo the axis permutation of the iterator. When the operand has fewer
+ * dimensions then the iterator, this can return negative values for
+ * inserted (broadcast) dimensions.
+ *
+ * @param axis Axis for which to undo the iterator axis permutation.
+ * @param ndim If `op_axes` is being used, this is the iterator dimension,
+ * otherwise this is the operand dimension.
+ * @param perm The iterator axis permutation NIT_PERM(iter)
+ * @param axis_flipped Will be set to true if this is a flipped axis
+ * (i.e. is iterated in reversed order) and otherwise false.
+ * Can be NULL if the information is not needed.
+ * @return The unpermuted axis. Without `op_axes` this is correct, with
+ * `op_axes` this indexes into `op_axes` (unpermuted iterator axis)
+ */
+static NPY_INLINE int
+npyiter_undo_iter_axis_perm(
+ int axis, int ndim, const npy_int8 *perm, npy_bool *axis_flipped)
+{
+ npy_int8 p = perm[axis];
+ /* The iterator treats axis reversed, thus adjust by ndim */
+ npy_bool flipped = p < 0;
+ if (axis_flipped != NULL) {
+ *axis_flipped = flipped;
+ }
+ if (flipped) {
+ axis = ndim + p;
+ }
+ else {
+ axis = ndim - p - 1;
+ }
+ return axis;
+}
+
NPY_NO_EXPORT void
npyiter_coalesce_axes(NpyIter *iter);
NPY_NO_EXPORT int
diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c
index 79adb0051..4037a4757 100644
--- a/numpy/core/src/umath/reduction.c
+++ b/numpy/core/src/umath/reduction.c
@@ -25,235 +25,6 @@
#include "reduction.h"
#include "extobj.h" /* for _check_ufunc_fperr */
-/*
- * Allocates a result array for a reduction operation, with
- * dimensions matching 'arr' except set to 1 with 0 stride
- * wherever axis_flags is True. Dropping the reduction axes
- * from the result must be done later by the caller once the
- * computation is complete.
- *
- * This function always allocates a base class ndarray.
- *
- * If 'dtype' isn't NULL, this function steals its reference.
- */
-static PyArrayObject *
-allocate_reduce_result(PyArrayObject *arr, const npy_bool *axis_flags,
- PyArray_Descr *dtype, int subok)
-{
- npy_intp strides[NPY_MAXDIMS], stride;
- npy_intp shape[NPY_MAXDIMS], *arr_shape = PyArray_DIMS(arr);
- npy_stride_sort_item strideperm[NPY_MAXDIMS];
- int idim, ndim = PyArray_NDIM(arr);
-
- if (dtype == NULL) {
- dtype = PyArray_DTYPE(arr);
- Py_INCREF(dtype);
- }
-
- PyArray_CreateSortedStridePerm(PyArray_NDIM(arr),
- PyArray_STRIDES(arr), strideperm);
-
- /* Build the new strides and shape */
- stride = dtype->elsize;
- if (ndim) {
- memcpy(shape, arr_shape, ndim * sizeof(shape[0]));
- }
- for (idim = ndim-1; idim >= 0; --idim) {
- npy_intp i_perm = strideperm[idim].perm;
- if (axis_flags[i_perm]) {
- strides[i_perm] = 0;
- shape[i_perm] = 1;
- }
- else {
- strides[i_perm] = stride;
- stride *= shape[i_perm];
- }
- }
-
- /* Finally, allocate the array */
- return (PyArrayObject *)PyArray_NewFromDescr(
- subok ? Py_TYPE(arr) : &PyArray_Type,
- dtype, ndim, shape, strides,
- NULL, 0, subok ? (PyObject *)arr : NULL);
-}
-
-/*
- * Conforms an output parameter 'out' to have 'ndim' dimensions
- * with dimensions of size one added in the appropriate places
- * indicated by 'axis_flags'.
- *
- * The return value is a view into 'out'.
- */
-static PyArrayObject *
-conform_reduce_result(PyArrayObject *in, const npy_bool *axis_flags,
- PyArrayObject *out, int keepdims, const char *funcname,
- int need_copy)
-{
- /* unpack shape information */
- int const ndim = PyArray_NDIM(in);
- npy_intp const *shape_in = PyArray_DIMS(in);
-
- int const ndim_out = PyArray_NDIM(out);
- npy_intp const *strides_out = PyArray_STRIDES(out);
- npy_intp const *shape_out = PyArray_DIMS(out);
-
- /*
- * If the 'keepdims' parameter is true, do a simpler validation and
- * return a new reference to 'out'.
- */
- if (keepdims) {
- if (PyArray_NDIM(out) != ndim) {
- PyErr_Format(PyExc_ValueError,
- "output parameter for reduction operation %s "
- "has the wrong number of dimensions (must match "
- "the operand's when keepdims=True)", funcname);
- return NULL;
- }
-
- for (int idim = 0; idim < ndim; ++idim) {
- if (axis_flags[idim]) {
- if (shape_out[idim] != 1) {
- PyErr_Format(PyExc_ValueError,
- "output parameter for reduction operation %s "
- "has a reduction dimension not equal to one "
- "(required when keepdims=True)", funcname);
- return NULL;
- }
- }
- else {
- if (shape_out[idim] != shape_in[idim]) {
- PyErr_Format(PyExc_ValueError,
- "output parameter for reduction operation %s "
- "has a non-reduction dimension not equal to "
- "the input one.", funcname);
- return NULL;
- }
- }
-
- }
-
- Py_INCREF(out);
- return out;
- }
-
- /* Construct the strides and shape */
- npy_intp strides[NPY_MAXDIMS], shape[NPY_MAXDIMS];
- int idim_out = 0;
- for (int idim = 0; idim < ndim; ++idim) {
- if (axis_flags[idim]) {
- strides[idim] = 0;
- shape[idim] = 1;
- }
- else {
- if (idim_out >= ndim_out) {
- PyErr_Format(PyExc_ValueError,
- "output parameter for reduction operation %s "
- "does not have enough dimensions", funcname);
- return NULL;
- }
- if (shape_out[idim_out] != shape_in[idim]) {
- PyErr_Format(PyExc_ValueError,
- "output parameter for reduction operation %s "
- "has a non-reduction dimension not equal to "
- "the input one.", funcname);
- return NULL;
- }
- strides[idim] = strides_out[idim_out];
- shape[idim] = shape_out[idim_out];
- ++idim_out;
- }
- }
-
- if (idim_out != ndim_out) {
- PyErr_Format(PyExc_ValueError,
- "output parameter for reduction operation %s "
- "has too many dimensions", funcname);
- return NULL;
- }
-
- /* Allocate the view */
- PyArray_Descr *dtype = PyArray_DESCR(out);
- Py_INCREF(dtype);
-
- PyArrayObject_fields *ret = (PyArrayObject_fields *)PyArray_NewFromDescrAndBase(
- &PyArray_Type, dtype,
- ndim, shape, strides, PyArray_DATA(out),
- PyArray_FLAGS(out), NULL, (PyObject *)out);
- if (ret == NULL) {
- return NULL;
- }
-
- if (need_copy) {
- PyArrayObject *ret_copy = (PyArrayObject *)PyArray_NewLikeArray(
- (PyArrayObject *)ret, NPY_ANYORDER, NULL, 0);
- if (ret_copy == NULL) {
- Py_DECREF(ret);
- return NULL;
- }
-
- if (PyArray_CopyInto(ret_copy, (PyArrayObject *)ret) != 0) {
- Py_DECREF(ret);
- Py_DECREF(ret_copy);
- return NULL;
- }
-
- if (PyArray_SetWritebackIfCopyBase(ret_copy, (PyArrayObject *)ret) < 0) {
- Py_DECREF(ret_copy);
- return NULL;
- }
-
- return ret_copy;
- }
- else {
- return (PyArrayObject *)ret;
- }
-}
-
-/*
- * Creates a result for reducing 'operand' along the axes specified
- * in 'axis_flags'. If 'dtype' isn't NULL, this function steals a
- * reference to 'dtype'.
- *
- * If 'out' isn't NULL, this function creates a view conforming
- * to the number of dimensions of 'operand', adding a singleton dimension
- * for each reduction axis specified. In this case, 'dtype' is ignored
- * (but its reference is still stolen), and the caller must handle any
- * type conversion/validity check for 'out'
- *
- * If 'subok' is true, creates a result with the subtype of 'operand',
- * otherwise creates on with the base ndarray class.
- *
- * If 'out' is NULL, it allocates a new array whose shape matches that of
- * 'operand', except for at the reduction axes. If 'dtype' is NULL, the dtype
- * of 'operand' is used for the result.
- */
-NPY_NO_EXPORT PyArrayObject *
-PyArray_CreateReduceResult(PyArrayObject *operand, PyArrayObject *out,
- PyArray_Descr *dtype, npy_bool *axis_flags,
- int keepdims, int subok,
- const char *funcname)
-{
- PyArrayObject *result;
-
- if (out == NULL) {
- /* This function steals the reference to 'dtype' */
- result = allocate_reduce_result(operand, axis_flags, dtype, subok);
- }
- else {
- int need_copy = 0;
-
- if (solve_may_share_memory(operand, out, 1) != 0) {
- need_copy = 1;
- }
-
- /* Steal the dtype reference */
- Py_XDECREF(dtype);
- result = conform_reduce_result(operand, axis_flags,
- out, keepdims, funcname, need_copy);
- }
-
- return result;
-}
/*
* Count the number of dimensions selected in 'axis_flags'
@@ -275,18 +46,15 @@ count_axes(int ndim, const npy_bool *axis_flags)
/*
* This function initializes a result array for a reduction operation
* which has no identity. This means it needs to copy the first element
- * it sees along the reduction axes to result, then return a view of
- * the operand which excludes that element.
+ * it sees along the reduction axes to result.
*
* If a reduction has an identity, such as 0 or 1, the result should be
- * initialized by calling PyArray_AssignZero(result, NULL, NULL) or
- * PyArray_AssignOne(result, NULL, NULL), because this function raises an
- * exception when there are no elements to reduce (which appropriate iff the
- * reduction operation has no identity).
+ * fully initialized to the identity, because this function raises an
+ * exception when there are no elements to reduce (which is appropriate if,
+ * and only if, the reduction operation has no identity).
*
* This means it copies the subarray indexed at zero along each reduction axis
- * into 'result', then returns a view into 'operand' excluding those copied
- * elements.
+ * into 'result'.
*
* result : The array into which the result is computed. This must have
* the same number of dimensions as 'operand', but for each
@@ -294,105 +62,83 @@ count_axes(int ndim, const npy_bool *axis_flags)
* operand : The array being reduced.
* axis_flags : An array of boolean flags, one for each axis of 'operand'.
* When a flag is True, it indicates to reduce along that axis.
- * out_skip_first_count : This gets populated with the number of first-visit
- * elements that should be skipped during the
- * iteration loop.
* funcname : The name of the reduction operation, for the purpose of
* better quality error messages. For example, "numpy.max"
* would be a good name for NumPy's max function.
*
- * Returns a view which contains the remaining elements on which to do
- * the reduction.
+ * Returns -1 if an error occurred, and otherwise the reduce arrays size,
+ * which is the number of elements already initialized.
*/
-NPY_NO_EXPORT PyArrayObject *
-PyArray_InitializeReduceResult(
+NPY_NO_EXPORT int
+PyArray_CopyInitialReduceValues(
PyArrayObject *result, PyArrayObject *operand,
- const npy_bool *axis_flags,
- npy_intp *out_skip_first_count, const char *funcname)
+ const npy_bool *axis_flags, const char *funcname,
+ int keepdims)
{
- npy_intp *strides, *shape, shape_orig[NPY_MAXDIMS];
+ npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
+ npy_intp *shape_orig = PyArray_SHAPE(operand);
+ npy_intp *strides_orig = PyArray_STRIDES(operand);
PyArrayObject *op_view = NULL;
- int idim, ndim, nreduce_axes;
- ndim = PyArray_NDIM(operand);
-
- /* Default to no skipping first-visit elements in the iteration */
- *out_skip_first_count = 0;
-
- /* Take a view into 'operand' which we can modify. */
- op_view = (PyArrayObject *)PyArray_View(operand, NULL, &PyArray_Type);
- if (op_view == NULL) {
- return NULL;
- }
+ int ndim = PyArray_NDIM(operand);
/*
- * Now copy the subarray of the first element along each reduction axis,
- * then return a view to the rest.
+ * Copy the subarray of the first element along each reduction axis.
*
* Adjust the shape to only look at the first element along
- * any of the reduction axes. We count the number of reduction axes
- * at the same time.
+ * any of the reduction axes. If keepdims is False remove the axes
+ * entirely.
*/
- shape = PyArray_SHAPE(op_view);
- nreduce_axes = 0;
- if (ndim) {
- memcpy(shape_orig, shape, ndim * sizeof(npy_intp));
- }
- for (idim = 0; idim < ndim; ++idim) {
+ int idim_out = 0;
+ npy_intp size = 1;
+ for (int idim = 0; idim < ndim; idim++) {
if (axis_flags[idim]) {
- if (shape[idim] == 0) {
+ if (NPY_UNLIKELY(shape_orig[idim] == 0)) {
PyErr_Format(PyExc_ValueError,
- "zero-size array to reduction operation %s "
- "which has no identity",
- funcname);
- Py_DECREF(op_view);
- return NULL;
+ "zero-size array to reduction operation %s "
+ "which has no identity", funcname);
+ return -1;
}
- shape[idim] = 1;
- ++nreduce_axes;
+ if (keepdims) {
+ shape[idim_out] = 1;
+ strides[idim_out] = 0;
+ idim_out++;
+ }
+ }
+ else {
+ size *= shape_orig[idim];
+ shape[idim_out] = shape_orig[idim];
+ strides[idim_out] = strides_orig[idim];
+ idim_out++;
}
}
- /*
- * Copy the elements into the result to start.
- */
- if (PyArray_CopyInto(result, op_view) < 0) {
- Py_DECREF(op_view);
- return NULL;
+ PyArray_Descr *descr = PyArray_DESCR(operand);
+ Py_INCREF(descr);
+ op_view = (PyArrayObject *)PyArray_NewFromDescr(
+ &PyArray_Type, descr, idim_out, shape, strides,
+ PyArray_DATA(operand), 0, NULL);
+ if (op_view == NULL) {
+ return -1;
}
/*
- * If there is one reduction axis, adjust the view's
- * shape to only look at the remaining elements
+ * Copy the elements into the result to start.
*/
- if (nreduce_axes == 1) {
- strides = PyArray_STRIDES(op_view);
- for (idim = 0; idim < ndim; ++idim) {
- if (axis_flags[idim]) {
- shape[idim] = shape_orig[idim] - 1;
- ((PyArrayObject_fields *)op_view)->data += strides[idim];
- }
- }
- }
- /* If there are zero reduction axes, make the view empty */
- else if (nreduce_axes == 0) {
- for (idim = 0; idim < ndim; ++idim) {
- shape[idim] = 0;
- }
+ int res = PyArray_CopyInto(result, op_view);
+ Py_DECREF(op_view);
+ if (res < 0) {
+ return -1;
}
+
/*
- * Otherwise iterate over the whole operand, but tell the inner loop
- * to skip the elements we already copied by setting the skip_first_count.
+ * If there were no reduction axes, we would already be done here.
+ * Note that if there is only a single reduction axis, in principle the
+ * iteration could be set up more efficiently here by removing that
+ * axis before setting up the iterator (simplifying the iteration since
+ * `skip_first_count` (the returned size) can be set to 0).
*/
- else {
- *out_skip_first_count = PyArray_SIZE(result);
-
- Py_DECREF(op_view);
- Py_INCREF(operand);
- op_view = operand;
- }
-
- return op_view;
+ return size;
}
/*
@@ -418,7 +164,7 @@ PyArray_InitializeReduceResult(
* with size one.
* subok : If true, the result uses the subclass of operand, otherwise
* it is always a base class ndarray.
- * identity : If Py_None, PyArray_InitializeReduceResult is used, otherwise
+ * identity : If Py_None, PyArray_CopyInitialReduceValues is used, otherwise
* this value is used to initialize the result to
* the reduction's unit.
* loop : The loop which does the reduction.
@@ -444,13 +190,12 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
NPY_CASTING casting,
npy_bool *axis_flags, int reorderable,
int keepdims,
- int subok,
PyObject *identity,
PyArray_ReduceLoopFunc *loop,
void *data, npy_intp buffersize, const char *funcname,
int errormask)
{
- PyArrayObject *result = NULL, *op_view = NULL;
+ PyArrayObject *result = NULL;
npy_intp skip_first_count = 0;
/* Iterator parameters */
@@ -476,49 +221,10 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
return NULL;
}
- /*
- * This either conforms 'out' to the ndim of 'operand', or allocates
- * a new array appropriate for this reduction.
- *
- * A new array with WRITEBACKIFCOPY is allocated if operand and out have memory
- * overlap.
- */
- Py_INCREF(result_dtype);
- result = PyArray_CreateReduceResult(operand, out,
- result_dtype, axis_flags,
- keepdims, subok, funcname);
- if (result == NULL) {
- goto fail;
- }
-
- /*
- * Initialize the result to the reduction unit if possible,
- * otherwise copy the initial values and get a view to the rest.
- */
- if (identity != Py_None) {
- if (PyArray_FillWithScalar(result, identity) < 0) {
- goto fail;
- }
- op_view = operand;
- Py_INCREF(op_view);
- }
- else {
- op_view = PyArray_InitializeReduceResult(
- result, operand, axis_flags, &skip_first_count, funcname);
- if (op_view == NULL) {
- goto fail;
- }
- /* empty op_view signals no reduction; but 0-d arrays cannot be empty */
- if ((PyArray_SIZE(op_view) == 0) || (PyArray_NDIM(operand) == 0)) {
- Py_DECREF(op_view);
- op_view = NULL;
- goto finish;
- }
- }
/* Set up the iterator */
- op[0] = result;
- op[1] = op_view;
+ op[0] = out;
+ op[1] = operand;
op_dtypes[0] = result_dtype;
op_dtypes[1] = operand_dtype;
@@ -527,13 +233,17 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
NPY_ITER_GROWINNER |
NPY_ITER_DONT_NEGATE_STRIDES |
NPY_ITER_ZEROSIZE_OK |
- NPY_ITER_REDUCE_OK |
- NPY_ITER_REFS_OK;
+ NPY_ITER_REFS_OK |
+ NPY_ITER_DELAY_BUFALLOC |
+ NPY_ITER_COPY_IF_OVERLAP;
op_flags[0] = NPY_ITER_READWRITE |
NPY_ITER_ALIGNED |
+ NPY_ITER_ALLOCATE |
NPY_ITER_NO_SUBTYPE;
op_flags[1] = NPY_ITER_READONLY |
- NPY_ITER_ALIGNED;
+ NPY_ITER_ALIGNED |
+ NPY_ITER_NO_BROADCAST;
+
if (wheremask != NULL) {
op[2] = wheremask;
/* wheremask is guaranteed to be NPY_BOOL, so borrow its reference */
@@ -545,15 +255,84 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
op_flags[2] = NPY_ITER_READONLY;
}
+ /* Set up result array axes mapping, operand and wheremask use default */
+ int result_axes[NPY_MAXDIMS];
+ int *op_axes[3] = {result_axes, NULL, NULL};
+
+ int curr_axis = 0;
+ for (int i = 0; i < PyArray_NDIM(operand); i++) {
+ if (axis_flags[i]) {
+ if (keepdims) {
+ result_axes[i] = NPY_ITER_REDUCTION_AXIS(curr_axis);
+ curr_axis++;
+ }
+ else {
+ result_axes[i] = NPY_ITER_REDUCTION_AXIS(-1);
+ }
+ }
+ else {
+ result_axes[i] = curr_axis;
+ curr_axis++;
+ }
+ }
+ if (out != NULL) {
+ /* NpyIter does not raise a good error message in this common case. */
+ if (NPY_UNLIKELY(curr_axis != PyArray_NDIM(out))) {
+ if (keepdims) {
+ PyErr_Format(PyExc_ValueError,
+ "output parameter for reduction operation %s has the "
+ "wrong number of dimensions: Found %d but expected %d "
+ "(must match the operand's when keepdims=True)",
+ funcname, PyArray_NDIM(out), curr_axis);
+ }
+ else {
+ PyErr_Format(PyExc_ValueError,
+ "output parameter for reduction operation %s has the "
+ "wrong number of dimensions: Found %d but expected %d",
+ funcname, PyArray_NDIM(out), curr_axis);
+ }
+ goto fail;
+ }
+ }
+
iter = NpyIter_AdvancedNew(wheremask == NULL ? 2 : 3, op, flags,
NPY_KEEPORDER, casting,
op_flags,
op_dtypes,
- -1, NULL, NULL, buffersize);
+ PyArray_NDIM(operand), op_axes, NULL, buffersize);
if (iter == NULL) {
goto fail;
}
+ result = NpyIter_GetOperandArray(iter)[0];
+
+ /*
+ * Initialize the result to the reduction unit if possible,
+ * otherwise copy the initial values and get a view to the rest.
+ */
+
+ if (identity != Py_None) {
+ if (PyArray_FillWithScalar(result, identity) < 0) {
+ goto fail;
+ }
+ }
+ else {
+ /*
+ * For 1-D skip_first_count could be optimized to 0, but no-identity
+ * reductions are not super common.
+ * (see also comment in CopyInitialReduceValues)
+ */
+ skip_first_count = PyArray_CopyInitialReduceValues(
+ result, operand, axis_flags, funcname, keepdims);
+ if (skip_first_count < 0) {
+ goto fail;
+ }
+ }
+
+ if (!NpyIter_Reset(iter, NULL)) {
+ goto fail;
+ }
+
/* Start with the floating-point exception flags cleared */
npy_clear_floatstatus_barrier((char*)&iter);
@@ -595,29 +374,15 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
goto fail;
}
- NpyIter_Deallocate(iter);
- Py_DECREF(op_view);
-
-finish:
- /* Strip out the extra 'one' dimensions in the result */
- if (out == NULL) {
- if (!keepdims) {
- PyArray_RemoveAxesInPlace(result, axis_flags);
- }
- }
- else {
- PyArray_ResolveWritebackIfCopy(result); /* prevent spurious warnings */
- Py_DECREF(result);
+ if (out != NULL) {
result = out;
- Py_INCREF(result);
}
+ Py_INCREF(result);
+ NpyIter_Deallocate(iter);
return result;
fail:
- PyArray_ResolveWritebackIfCopy(result); /* prevent spurious warnings */
- Py_XDECREF(result);
- Py_XDECREF(op_view);
if (iter != NULL) {
NpyIter_Deallocate(iter);
}
diff --git a/numpy/core/src/umath/reduction.h b/numpy/core/src/umath/reduction.h
index 0c2183ed6..372605dba 100644
--- a/numpy/core/src/umath/reduction.h
+++ b/numpy/core/src/umath/reduction.h
@@ -128,9 +128,7 @@ typedef int (PyArray_ReduceLoopFunc)(NpyIter *iter,
* of cache behavior or multithreading requirements.
* keepdims : If true, leaves the reduction dimensions in the result
* with size one.
- * subok : If true, the result uses the subclass of operand, otherwise
- * it is always a base class ndarray.
- * identity : If Py_None, PyArray_InitializeReduceResult is used, otherwise
+ * identity : If Py_None, PyArray_CopyInitialReduceValues is used, otherwise
* this value is used to initialize the result to
* the reduction's unit.
* loop : The loop which does the reduction.
@@ -147,7 +145,6 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
NPY_CASTING casting,
npy_bool *axis_flags, int reorderable,
int keepdims,
- int subok,
PyObject *identity,
PyArray_ReduceLoopFunc *loop,
void *data, npy_intp buffersize, const char *funcname,
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 591fbd7ad..b35f377d7 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -2701,9 +2701,13 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
}
}
- /* Any output core dimensions shape should be ignored */
+ /*
+ * Any output core dimensions shape should be ignored, so we add
+ * it as a Reduce dimension (which can be broadcast with the rest).
+ * These will be removed before the actual iteration for gufuncs.
+ */
for (idim = broadcast_ndim; idim < iter_ndim; ++idim) {
- op_axes_arrays[i][idim] = -1;
+ op_axes_arrays[i][idim] = NPY_ITER_REDUCTION_AXIS(-1);
}
/* Except for when it belongs to this output */
@@ -2775,7 +2779,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
*/
_ufunc_setup_flags(ufunc, NPY_ITER_COPY | NPY_UFUNC_DEFAULT_INPUT_FLAGS,
NPY_ITER_UPDATEIFCOPY |
- NPY_ITER_READWRITE |
+ NPY_ITER_WRITEONLY |
NPY_UFUNC_DEFAULT_OUTPUT_FLAGS,
op_flags);
/* For the generalized ufunc, we get the loop right away too */
@@ -2824,7 +2828,6 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
iter_flags = ufunc->iter_flags |
NPY_ITER_MULTI_INDEX |
NPY_ITER_REFS_OK |
- NPY_ITER_REDUCE_OK |
NPY_ITER_ZEROSIZE_OK |
NPY_ITER_COPY_IF_OVERLAP;
@@ -3650,7 +3653,7 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
result = PyUFunc_ReduceWrapper(arr, out, wheremask, dtype, dtype,
NPY_UNSAFE_CASTING,
axis_flags, reorderable,
- keepdims, 0,
+ keepdims,
initial,
reduce_loop,
ufunc, buffersize, ufunc_name, errormask);
diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py
index c106c528d..7b3c3a40d 100644
--- a/numpy/core/tests/test_nditer.py
+++ b/numpy/core/tests/test_nditer.py
@@ -1520,6 +1520,13 @@ def test_iter_allocate_output_errors():
[['readonly'], ['writeonly', 'allocate']],
op_dtypes=[None, np.dtype('f4')],
op_axes=[None, [0, 2, 1, 0]])
+ # Not all axes may be specified if a reduction. If there is a hole
+ # in op_axes, this is an error.
+ a = arange(24, dtype='i4').reshape(2, 3, 4)
+ assert_raises(ValueError, nditer, [a, None], ["reduce_ok"],
+ [['readonly'], ['readwrite', 'allocate']],
+ op_dtypes=[None, np.dtype('f4')],
+ op_axes=[None, [0, np.newaxis, 2]])
def test_iter_remove_axis():
a = arange(24).reshape(2, 3, 4)
@@ -2661,6 +2668,14 @@ def test_iter_allocated_array_dtypes():
b[1] = a + 1
assert_equal(it.operands[1], [[0, 2], [2, 4], [19, 21]])
+ # Check the same (less sensitive) thing when `op_axes` with -1 is given.
+ it = np.nditer(([[1, 3, 20]], None), op_dtypes=[None, ('i4', (2,))],
+ flags=["reduce_ok"], op_axes=[None, (-1, 0)])
+ for a, b in it:
+ b[0] = a - 1
+ b[1] = a + 1
+ assert_equal(it.operands[1], [[0, 2], [2, 4], [19, 21]])
+
# Make sure this works for scalars too
it = np.nditer((10, 2, None), op_dtypes=[None, None, ('i4', (2, 2))])
for a, b, c in it:
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 642308d3b..1305f4877 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -610,7 +610,17 @@ class TestUfunc:
warnings.simplefilter("always")
u += v
assert_equal(len(w), 1)
- assert_(x[0,0] != u[0, 0])
+ assert_(x[0, 0] != u[0, 0])
+
+ # Output reduction should not be allowed.
+ # See gh-15139
+ a = np.arange(6).reshape(3, 2)
+ b = np.ones(2)
+ out = np.empty(())
+ assert_raises(ValueError, umt.inner1d, a, b, out)
+ out2 = np.empty(3)
+ c = umt.inner1d(a, b, out2)
+ assert_(c is out2)
def test_out_broadcasts(self):
# For ufuncs and gufuncs (not for reductions), we currently allow
@@ -957,7 +967,10 @@ class TestUfunc:
assert_array_equal(result, np.vstack((np.zeros(3), a[2], -a[1])))
assert_raises(ValueError, umt.cross1d, np.eye(4), np.eye(4))
assert_raises(ValueError, umt.cross1d, a, np.arange(4.))
+ # Wrong output core dimension.
assert_raises(ValueError, umt.cross1d, a, np.arange(3.), np.zeros((3, 4)))
+ # Wrong output broadcast dimension (see gh-15139).
+ assert_raises(ValueError, umt.cross1d, a, np.arange(3.), np.zeros(3))
def test_can_ignore_signature(self):
# Comparing the effects of ? in signature:
@@ -1905,22 +1918,47 @@ class TestUfunc:
assert_equal(y_base[1,:], y_base_copy[1,:])
assert_equal(y_base[3,:], y_base_copy[3,:])
- @pytest.mark.parametrize('output_shape',
- [(), (1,), (1, 1), (1, 3), (4, 3)])
+ @pytest.mark.parametrize('out_shape',
+ [(), (1,), (3,), (1, 1), (1, 3), (4, 3)])
+ @pytest.mark.parametrize('keepdims', [True, False])
@pytest.mark.parametrize('f_reduce', [np.add.reduce, np.minimum.reduce])
- def test_reduce_wrong_dimension_output(self, f_reduce, output_shape):
+ def test_reduce_wrong_dimension_output(self, f_reduce, keepdims, out_shape):
# Test that we're not incorrectly broadcasting dimensions.
# See gh-15144 (failed for np.add.reduce previously).
a = np.arange(12.).reshape(4, 3)
- out = np.empty(output_shape, a.dtype)
- assert_raises(ValueError, f_reduce, a, axis=0, out=out)
- if output_shape != (1, 3):
- assert_raises(ValueError, f_reduce, a, axis=0, out=out,
- keepdims=True)
+ out = np.empty(out_shape, a.dtype)
+
+ correct_out = f_reduce(a, axis=0, keepdims=keepdims)
+ if out_shape != correct_out.shape:
+ with assert_raises(ValueError):
+ f_reduce(a, axis=0, out=out, keepdims=keepdims)
else:
- check = f_reduce(a, axis=0, out=out, keepdims=True)
+ check = f_reduce(a, axis=0, out=out, keepdims=keepdims)
assert_(check is out)
- assert_array_equal(check, f_reduce(a, axis=0, keepdims=True))
+ assert_array_equal(check, correct_out)
+
+ def test_reduce_output_does_not_broadcast_input(self):
+ # Test that the output shape cannot broadcast an input dimension
+ # (it never can add dimensions, but it might expand an existing one)
+ a = np.ones((1, 10))
+ out_correct = (np.empty((1, 1)))
+ out_incorrect = np.empty((3, 1))
+ np.add.reduce(a, axis=-1, out=out_correct, keepdims=True)
+ np.add.reduce(a, axis=-1, out=out_correct[:, 0], keepdims=False)
+ with assert_raises(ValueError):
+ np.add.reduce(a, axis=-1, out=out_incorrect, keepdims=True)
+ with assert_raises(ValueError):
+ np.add.reduce(a, axis=-1, out=out_incorrect[:, 0], keepdims=False)
+
+ def test_reduce_output_subclass_ok(self):
+ class MyArr(np.ndarray):
+ pass
+
+ out = np.empty(())
+ np.add.reduce(np.ones(5), out=out) # no subclass, all fine
+ out = out.view(MyArr)
+ assert np.add.reduce(np.ones(5), out=out) is out
+ assert type(np.add.reduce(out)) is MyArr
def test_no_doc_string(self):
# gh-9337