summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaron Meurer <asmeurer@gmail.com>2020-08-06 17:20:24 -0600
committerGitHub <noreply@github.com>2020-08-06 18:20:24 -0500
commit29e2293844e0ac2772b1382ab0ec6a35543954de (patch)
tree9c2ad06f6890fb3115d7e0c37c3e247185770502
parent3023d06a05136bf8345e781cc11a01d776a9f314 (diff)
downloadnumpy-29e2293844e0ac2772b1382ab0ec6a35543954de.tar.gz
BUG: Raise correct errors in boolean indexing fast path (gh-17010)
Previously the logic assumed that an index was valid if the size was the same as the indexed array, rather than the shape. This resulted in an index being incorrectly allowed if the index was all False, like np.empty((3, 3))[np.full((1, 9), False)], or incorrectly giving a ValueError about broadcasting if the index was not all False (like np.empty((3, 3))[np.full((1, 9), True)]). Now these examples both give IndexError with the usual error message. Fixes #16997.
-rw-r--r--doc/release/upcoming_changes/17010.compatibility.rst24
-rw-r--r--numpy/core/src/multiarray/mapping.c18
-rw-r--r--numpy/core/tests/test_indexing.py39
3 files changed, 70 insertions, 11 deletions
diff --git a/doc/release/upcoming_changes/17010.compatibility.rst b/doc/release/upcoming_changes/17010.compatibility.rst
new file mode 100644
index 000000000..72ca0a963
--- /dev/null
+++ b/doc/release/upcoming_changes/17010.compatibility.rst
@@ -0,0 +1,24 @@
+Boolean array indices with mismatching shapes now properly give ``IndexError``
+------------------------------------------------------------------------------
+
+Previously, if a boolean array index matched the size of the indexed array but
+not the shape, it was incorrectly allowed in some cases. In other cases, it
+gave an error, but the error was incorrectly a ``ValueError`` with a message
+about broadcasting instead of the correct ``IndexError``.
+
+For example, the following used to incorrectly give ``ValueError: operands
+could not be broadcast together with shapes (2,2) (1,4)``:
+
+.. code:: python
+
+ np.empty((2, 2))[np.array([[True, False, False, False]])]
+
+And the following used to incorrectly return ``array([], dtype=float64)``:
+
+.. code:: python
+
+ np.empty((2, 2))[np.array([[False, False, False, False]])]
+
+Both now correctly give ``IndexError: boolean index did not match indexed
+array along dimension 0; dimension is 2 but corresponding boolean dimension is
+1``.
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index c27e0c391..1f2bec8b1 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -539,22 +539,22 @@ prepare_index(PyArrayObject *self, PyObject *index,
/*
* There are two types of boolean indices (which are equivalent,
* for the most part though). A single boolean index of matching
- * dimensionality and size is a boolean index.
- * If this is not the case, it is instead expanded into (multiple)
- * integer array indices.
+ * shape is a boolean index. If this is not the case, it is
+ * instead expanded into (multiple) integer array indices.
*/
PyArrayObject *nonzero_result[NPY_MAXDIMS];
if ((index_ndim == 1) && allow_boolean) {
/*
- * If ndim and size match, this can be optimized as a single
- * boolean index. The size check is necessary only to support
- * old non-matching sizes by using fancy indexing instead.
- * The reason for that is that fancy indexing uses nonzero,
- * and only the result of nonzero is checked for legality.
+ * If shapes match exactly, this can be optimized as a single
+ * boolean index. When the dimensions are identical but the shapes are not,
+ * this is always an error. The check ensures that these errors are raised
+ * and match those of the generic path.
*/
if ((PyArray_NDIM(arr) == PyArray_NDIM(self))
- && PyArray_SIZE(arr) == PyArray_SIZE(self)) {
+ && PyArray_CompareLists(PyArray_DIMS(arr),
+ PyArray_DIMS(self),
+ PyArray_NDIM(arr))) {
index_type = HAS_BOOL;
indices[curr_idx].type = HAS_BOOL;
diff --git a/numpy/core/tests/test_indexing.py b/numpy/core/tests/test_indexing.py
index 1069cbe8d..667c49240 100644
--- a/numpy/core/tests/test_indexing.py
+++ b/numpy/core/tests/test_indexing.py
@@ -7,8 +7,8 @@ import numpy as np
from numpy.core._multiarray_tests import array_indexing
from itertools import product
from numpy.testing import (
- assert_, assert_equal, assert_raises, assert_array_equal, assert_warns,
- HAS_REFCOUNT,
+ assert_, assert_equal, assert_raises, assert_raises_regex,
+ assert_array_equal, assert_warns, HAS_REFCOUNT,
)
@@ -1239,6 +1239,41 @@ class TestBooleanIndexing:
assert_raises(IndexError, lambda: a[False, [0, 1], ...])
+ def test_boolean_indexing_fast_path(self):
+ # These used to either give the wrong error, or incorrectly give no
+ # error.
+ a = np.ones((3, 3))
+
+ # This used to incorrectly work (and give an array of shape (0,))
+ idx1 = np.array([[False]*9])
+ assert_raises_regex(IndexError,
+ "boolean index did not match indexed array along dimension 0; "
+ "dimension is 3 but corresponding boolean dimension is 1",
+ lambda: a[idx1])
+
+ # This used to incorrectly give a ValueError: operands could not be broadcast together
+ idx2 = np.array([[False]*8 + [True]])
+ assert_raises_regex(IndexError,
+ "boolean index did not match indexed array along dimension 0; "
+ "dimension is 3 but corresponding boolean dimension is 1",
+ lambda: a[idx2])
+
+ # This is the same as it used to be. The above two should work like this.
+ idx3 = np.array([[False]*10])
+ assert_raises_regex(IndexError,
+ "boolean index did not match indexed array along dimension 0; "
+ "dimension is 3 but corresponding boolean dimension is 1",
+ lambda: a[idx3])
+
+ # This used to give ValueError: non-broadcastable operand
+ a = np.ones((1, 1, 2))
+ idx = np.array([[[True], [False]]])
+ assert_raises_regex(IndexError,
+ "boolean index did not match indexed array along dimension 1; "
+ "dimension is 1 but corresponding boolean dimension is 2",
+ lambda: a[idx])
+
+
class TestArrayToIndexDeprecation:
"""Creating an an index from array not 0-D is an error.