diff options
author | Aaron Meurer <asmeurer@gmail.com> | 2020-08-06 17:20:24 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-08-06 18:20:24 -0500 |
commit | 29e2293844e0ac2772b1382ab0ec6a35543954de (patch) | |
tree | 9c2ad06f6890fb3115d7e0c37c3e247185770502 | |
parent | 3023d06a05136bf8345e781cc11a01d776a9f314 (diff) | |
download | numpy-29e2293844e0ac2772b1382ab0ec6a35543954de.tar.gz |
BUG: Raise correct errors in boolean indexing fast path (gh-17010)
Previously the logic assumed that an index was valid if the size was the same
as the indexed array, rather than the shape. This resulted in an index being
incorrectly allowed if the index was all False, like np.empty((3, 3))[np.full((1, 9),
False)], or incorrectly giving a ValueError about broadcasting if the index
was not all False (like np.empty((3, 3))[np.full((1, 9), True)]). Now these
examples both give IndexError with the usual error message.
Fixes #16997.
-rw-r--r-- | doc/release/upcoming_changes/17010.compatibility.rst | 24 | ||||
-rw-r--r-- | numpy/core/src/multiarray/mapping.c | 18 | ||||
-rw-r--r-- | numpy/core/tests/test_indexing.py | 39 |
3 files changed, 70 insertions, 11 deletions
diff --git a/doc/release/upcoming_changes/17010.compatibility.rst b/doc/release/upcoming_changes/17010.compatibility.rst new file mode 100644 index 000000000..72ca0a963 --- /dev/null +++ b/doc/release/upcoming_changes/17010.compatibility.rst @@ -0,0 +1,24 @@ +Boolean array indices with mismatching shapes now properly give ``IndexError`` +------------------------------------------------------------------------------ + +Previously, if a boolean array index matched the size of the indexed array but +not the shape, it was incorrectly allowed in some cases. In other cases, it +gave an error, but the error was incorrectly a ``ValueError`` with a message +about broadcasting instead of the correct ``IndexError``. + +For example, the following used to incorrectly give ``ValueError: operands +could not be broadcast together with shapes (2,2) (1,4)``: + +.. code:: python + + np.empty((2, 2))[np.array([[True, False, False, False]])] + +And the following used to incorrectly return ``array([], dtype=float64)``: + +.. code:: python + + np.empty((2, 2))[np.array([[False, False, False, False]])] + +Both now correctly give ``IndexError: boolean index did not match indexed +array along dimension 0; dimension is 2 but corresponding boolean dimension is +1``. diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index c27e0c391..1f2bec8b1 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -539,22 +539,22 @@ prepare_index(PyArrayObject *self, PyObject *index, /* * There are two types of boolean indices (which are equivalent, * for the most part though). A single boolean index of matching - * dimensionality and size is a boolean index. - * If this is not the case, it is instead expanded into (multiple) - * integer array indices. + * shape is a boolean index. If this is not the case, it is + * instead expanded into (multiple) integer array indices. */ PyArrayObject *nonzero_result[NPY_MAXDIMS]; if ((index_ndim == 1) && allow_boolean) { /* - * If ndim and size match, this can be optimized as a single - * boolean index. The size check is necessary only to support - * old non-matching sizes by using fancy indexing instead. - * The reason for that is that fancy indexing uses nonzero, - * and only the result of nonzero is checked for legality. + * If shapes match exactly, this can be optimized as a single + * boolean index. When the dimensions are identical but the shapes are not, + * this is always an error. The check ensures that these errors are raised + * and match those of the generic path. */ if ((PyArray_NDIM(arr) == PyArray_NDIM(self)) - && PyArray_SIZE(arr) == PyArray_SIZE(self)) { + && PyArray_CompareLists(PyArray_DIMS(arr), + PyArray_DIMS(self), + PyArray_NDIM(arr))) { index_type = HAS_BOOL; indices[curr_idx].type = HAS_BOOL; diff --git a/numpy/core/tests/test_indexing.py b/numpy/core/tests/test_indexing.py index 1069cbe8d..667c49240 100644 --- a/numpy/core/tests/test_indexing.py +++ b/numpy/core/tests/test_indexing.py @@ -7,8 +7,8 @@ import numpy as np from numpy.core._multiarray_tests import array_indexing from itertools import product from numpy.testing import ( - assert_, assert_equal, assert_raises, assert_array_equal, assert_warns, - HAS_REFCOUNT, + assert_, assert_equal, assert_raises, assert_raises_regex, + assert_array_equal, assert_warns, HAS_REFCOUNT, ) @@ -1239,6 +1239,41 @@ class TestBooleanIndexing: assert_raises(IndexError, lambda: a[False, [0, 1], ...]) + def test_boolean_indexing_fast_path(self): + # These used to either give the wrong error, or incorrectly give no + # error. + a = np.ones((3, 3)) + + # This used to incorrectly work (and give an array of shape (0,)) + idx1 = np.array([[False]*9]) + assert_raises_regex(IndexError, + "boolean index did not match indexed array along dimension 0; " + "dimension is 3 but corresponding boolean dimension is 1", + lambda: a[idx1]) + + # This used to incorrectly give a ValueError: operands could not be broadcast together + idx2 = np.array([[False]*8 + [True]]) + assert_raises_regex(IndexError, + "boolean index did not match indexed array along dimension 0; " + "dimension is 3 but corresponding boolean dimension is 1", + lambda: a[idx2]) + + # This is the same as it used to be. The above two should work like this. + idx3 = np.array([[False]*10]) + assert_raises_regex(IndexError, + "boolean index did not match indexed array along dimension 0; " + "dimension is 3 but corresponding boolean dimension is 1", + lambda: a[idx3]) + + # This used to give ValueError: non-broadcastable operand + a = np.ones((1, 1, 2)) + idx = np.array([[[True], [False]]]) + assert_raises_regex(IndexError, + "boolean index did not match indexed array along dimension 1; " + "dimension is 1 but corresponding boolean dimension is 2", + lambda: a[idx]) + + class TestArrayToIndexDeprecation: """Creating an an index from array not 0-D is an error. |