diff options
author | Matti Picus <matti.picus@gmail.com> | 2020-07-23 07:40:37 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-07-23 07:40:37 +0300 |
commit | c3a887e55e0a14e3a31460e7a79e5f7f965fea68 (patch) | |
tree | 3d6817b928b2a1e543ee37644ea6ba3d4d5bfe13 | |
parent | a39e3021b9304fb5a76542d444b7fec2dcff1374 (diff) | |
parent | 325fbe4c90b0f499ccbd7750bd628dc8cebbcfbc (diff) | |
download | numpy-c3a887e55e0a14e3a31460e7a79e5f7f965fea68.tar.gz |
Merge pull request #16248 from alexrockhill/edge
MRG, ENH: added edge keyword argument to digitize
-rw-r--r-- | doc/release/upcoming_changes/16248.new_feature.rst | 13 | ||||
-rw-r--r-- | numpy/lib/function_base.py | 26 | ||||
-rw-r--r-- | numpy/lib/tests/test_function_base.py | 7 |
3 files changed, 43 insertions, 3 deletions
diff --git a/doc/release/upcoming_changes/16248.new_feature.rst b/doc/release/upcoming_changes/16248.new_feature.rst new file mode 100644 index 000000000..823646d00 --- /dev/null +++ b/doc/release/upcoming_changes/16248.new_feature.rst @@ -0,0 +1,13 @@ +Digitize Keyword Egde +--------------------- +A keyword argument has been added to `np.digitize` so that the +edge case is covered in the last bin. For example, + +``` +x = [1, 2, 3] +bins = [0, 1.5, 3] +``` + +`np.digitize(x, bins, right=False, edge=True)` yields `[1, 2, 2]` +whereas before the edge keyword argument existed it would yield +`[1, 2, 3]`.
\ No newline at end of file diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 6ea9cc4de..41caa805e 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -4733,12 +4733,12 @@ def append(arr, values, axis=None): return concatenate((arr, values), axis=axis) -def _digitize_dispatcher(x, bins, right=None): +def _digitize_dispatcher(x, bins, right=None, edge=None): return (x, bins) @array_function_dispatch(_digitize_dispatcher) -def digitize(x, bins, right=False): +def digitize(x, bins, right=False, edge=False): """ Return the indices of the bins to which each value in input array belongs. @@ -4767,6 +4767,10 @@ def digitize(x, bins, right=False): does not include the right edge. The left bin end is open in this case, i.e., bins[i-1] <= x < bins[i] is the default behavior for monotonically increasing bins. + edge : bool, optional + Whether to include the last right edge if right==False or the first + left edge if right==True so that the whole interval from the least + to the greatest value of bins is covered. Returns ------- @@ -4782,7 +4786,7 @@ def digitize(x, bins, right=False): See Also -------- - bincount, histogram, unique, searchsorted + bincount, histogram, unique, nextafter, searchsorted Notes ----- @@ -4839,6 +4843,22 @@ def digitize(x, bins, right=False): if mono == 0: raise ValueError("bins must be monotonically increasing or decreasing") + if edge: + # ========= ============= ============================ ===== ===== + # `right` order of bins returned index `i` satisfies delta index + # ========= ============= ============================ ===== ===== + # ``False`` increasing ``bins[i-1] <= x < bins[i]`` 1 -1 + # ``True`` increasing ``bins[i-1] < x <= bins[i]`` -1 0 + # ``False`` decreasing ``bins[i-1] > x >= bins[i]`` 1 0 + # ``True`` decreasing ``bins[i-1] >= x > bins[i]`` -1 -1 + # ========= ============= ============================ ===== ===== + delta = -1 if right else 1 + idx = -1 if delta == mono else 0 + if np.issubdtype(bins.dtype, _nx.integer): + bins[idx] += delta + else: + bins[idx] = np.nextafter(bins[idx], bins[idx] + delta) + # this is backwards because the arguments below are swapped side = 'left' if right else 'right' if mono == -1: diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index eb2fc3311..32f660772 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -1712,6 +1712,12 @@ class TestDigitize: bins = [1, 1, 0] assert_array_equal(digitize(x, bins, False), [3, 2, 0, 0]) assert_array_equal(digitize(x, bins, True), [3, 3, 2, 0]) + bins = [-1, 0, 1, 2] + assert_array_equal(digitize(x, bins, False, True), [1, 2, 3, 3]) + assert_array_equal(digitize(x, bins, True, True), [1, 1, 2, 3]) + bins = [2, 1, 0, -1] + assert_array_equal(digitize(x, bins, False, True), [3, 2, 1, 1]) + assert_array_equal(digitize(x, bins, True, True), [3, 3, 2, 1]) bins = [1, 1, 1, 1] assert_array_equal(digitize(x, bins, False), [0, 0, 4, 4]) assert_array_equal(digitize(x, bins, True), [0, 0, 0, 4]) @@ -1740,6 +1746,7 @@ class TestDigitize: # gh-11022 x = 2**54 # loses precision in a float assert_equal(np.digitize(x, [x - 1, x + 1]), 1) + assert_equal(np.digitize(x, [x - 1, x + 1], False, True), 1) @pytest.mark.xfail( reason="gh-11022: np.core.multiarray._monoticity loses precision") |