Merge pull request #16248 from alexrockhill/edge

MRG, ENH: added edge keyword argument to digitize
author: Matti Picus <matti.picus@gmail.com> 2020-07-23 07:40:37 +0300
committer: GitHub <noreply@github.com> 2020-07-23 07:40:37 +0300
commit: c3a887e55e0a14e3a31460e7a79e5f7f965fea68 (patch)
tree: 3d6817b928b2a1e543ee37644ea6ba3d4d5bfe13
parent: a39e3021b9304fb5a76542d444b7fec2dcff1374 (diff)
parent: 325fbe4c90b0f499ccbd7750bd628dc8cebbcfbc (diff)
download: numpy-c3a887e55e0a14e3a31460e7a79e5f7f965fea68.tar.gz
3 files changed, 43 insertions, 3 deletions
diff --git a/doc/release/upcoming_changes/16248.new_feature.rst b/doc/release/upcoming_changes/16248.new_feature.rst
new file mode 100644
index 000000000..823646d00
--- /dev/null
+++ b/doc/release/upcoming_changes/16248.new_feature.rst
@@ -0,0 +1,13 @@
+Digitize Keyword Egde
+---------------------
+A keyword argument has been added to `np.digitize` so that the
+edge case is covered in the last bin. For example,
+
+```
+x = [1, 2, 3]
+bins = [0, 1.5, 3]
+```
+
+`np.digitize(x, bins, right=False, edge=True)` yields `[1, 2, 2]`
+whereas before the edge keyword argument existed it would yield
+`[1, 2, 3]`.
+\ No newline at end of file
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 6ea9cc4de..41caa805e 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -4733,12 +4733,12 @@ def append(arr, values, axis=None):
     return concatenate((arr, values), axis=axis)
 
 
-def _digitize_dispatcher(x, bins, right=None):
+def _digitize_dispatcher(x, bins, right=None, edge=None):
     return (x, bins)
 
 
 @array_function_dispatch(_digitize_dispatcher)
-def digitize(x, bins, right=False):
+def digitize(x, bins, right=False, edge=False):
     """
     Return the indices of the bins to which each value in input array belongs.
 
@@ -4767,6 +4767,10 @@ def digitize(x, bins, right=False):
         does not include the right edge. The left bin end is open in this
         case, i.e., bins[i-1] <= x < bins[i] is the default behavior for
         monotonically increasing bins.
+    edge : bool, optional
+        Whether to include the last right edge if right==False or the first
+        left edge if right==True so that the whole interval from the least
+        to the greatest value of bins is covered.
 
     Returns
     -------
@@ -4782,7 +4786,7 @@ def digitize(x, bins, right=False):
 
     See Also
     --------
-    bincount, histogram, unique, searchsorted
+    bincount, histogram, unique, nextafter, searchsorted
 
     Notes
     -----
@@ -4839,6 +4843,22 @@ def digitize(x, bins, right=False):
     if mono == 0:
         raise ValueError("bins must be monotonically increasing or decreasing")
 
+    if edge:
+        # =========  =============  ============================ ===== =====
+        # `right`    order of bins  returned index `i` satisfies delta index
+        # =========  =============  ============================ ===== =====
+        # ``False``  increasing     ``bins[i-1] <= x < bins[i]``   1    -1
+        # ``True``   increasing     ``bins[i-1] < x <= bins[i]``   -1    0
+        # ``False``  decreasing     ``bins[i-1] > x >= bins[i]``   1    0
+        # ``True``   decreasing     ``bins[i-1] >= x > bins[i]``   -1    -1
+        # =========  =============  ============================ ===== =====
+        delta = -1 if right else 1
+        idx = -1 if delta == mono else 0
+        if np.issubdtype(bins.dtype, _nx.integer):
+            bins[idx] += delta
+        else:
+            bins[idx] = np.nextafter(bins[idx], bins[idx] + delta)
+
     # this is backwards because the arguments below are swapped
     side = 'left' if right else 'right'
     if mono == -1:
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index eb2fc3311..32f660772 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -1712,6 +1712,12 @@ class TestDigitize:
         bins = [1, 1, 0]
         assert_array_equal(digitize(x, bins, False), [3, 2, 0, 0])
         assert_array_equal(digitize(x, bins, True), [3, 3, 2, 0])
+        bins = [-1, 0, 1, 2]
+        assert_array_equal(digitize(x, bins, False, True), [1, 2, 3, 3])
+        assert_array_equal(digitize(x, bins, True, True), [1, 1, 2, 3])
+        bins = [2, 1, 0, -1]
+        assert_array_equal(digitize(x, bins, False, True), [3, 2, 1, 1])
+        assert_array_equal(digitize(x, bins, True, True), [3, 3, 2, 1])
         bins = [1, 1, 1, 1]
         assert_array_equal(digitize(x, bins, False), [0, 0, 4, 4])
         assert_array_equal(digitize(x, bins, True), [0, 0, 0, 4])
@@ -1740,6 +1746,7 @@ class TestDigitize:
         # gh-11022
         x = 2**54  # loses precision in a float
         assert_equal(np.digitize(x, [x - 1, x + 1]), 1)
+        assert_equal(np.digitize(x, [x - 1, x + 1], False, True), 1)
 
     @pytest.mark.xfail(
         reason="gh-11022: np.core.multiarray._monoticity loses precision")
author	Matti Picus <matti.picus@gmail.com>	2020-07-23 07:40:37 +0300
committer	GitHub <noreply@github.com>	2020-07-23 07:40:37 +0300
commit	c3a887e55e0a14e3a31460e7a79e5f7f965fea68 (patch)
tree	3d6817b928b2a1e543ee37644ea6ba3d4d5bfe13
parent	a39e3021b9304fb5a76542d444b7fec2dcff1374 (diff)
parent	325fbe4c90b0f499ccbd7750bd628dc8cebbcfbc (diff)
download	numpy-c3a887e55e0a14e3a31460e7a79e5f7f965fea68.tar.gz