diff options
author | Warren Weckesser <warren.weckesser@gmail.com> | 2019-08-27 15:15:26 -0400 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2019-11-08 06:03:21 -0700 |
commit | 461a64aac1f8eca81f400f6e783c90f5ccad5f48 (patch) | |
tree | f888ae8c85e4d638a9f3c25a1dcdd4ae05d5b690 | |
parent | 426f03164ebe572e74fd26786b86e9dbc83d3487 (diff) | |
download | numpy-461a64aac1f8eca81f400f6e783c90f5ccad5f48.tar.gz |
BUG: lib: Fix histogram problem with signed integer arrays.
An input such as
np.histogram(np.array([-2, 0, 127], dtype=np.int8), bins="auto")
would raise the exception
ValueError: Number of samples, -1, must be non-negative.
The problem was that the peak-to-peak value for the input array was
computed with the `ptp` method, which returned negative values for
signed integer arrays when the actual value was more than the
maximum signed value of the array's data type.
The fix is to use a peak-to-peak function that returns an
unsigned value for signed integer arrays.
Closes gh-14379.
-rw-r--r-- | numpy/lib/histograms.py | 20 | ||||
-rw-r--r-- | numpy/lib/tests/test_histograms.py | 11 |
2 files changed, 26 insertions, 5 deletions
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index d69e04e80..bed1f46b0 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -21,6 +21,16 @@ array_function_dispatch = functools.partial( _range = range +def _ptp(x): + """Peak-to-peak value of x. + + This implementation avoids the problem of signed integer arrays having a + peak-to-peak value that cannot be represented with the array's data type. + This function returns an unsigned value for signed integer arrays. + """ + return _unsigned_subtract(x.max(), x.min()) + + def _hist_bin_sqrt(x, range): """ Square root histogram bin estimator. @@ -39,7 +49,7 @@ def _hist_bin_sqrt(x, range): h : An estimate of the optimal bin width for the given data. """ del range # unused - return x.ptp() / np.sqrt(x.size) + return _ptp(x) / np.sqrt(x.size) def _hist_bin_sturges(x, range): @@ -62,7 +72,7 @@ def _hist_bin_sturges(x, range): h : An estimate of the optimal bin width for the given data. """ del range # unused - return x.ptp() / (np.log2(x.size) + 1.0) + return _ptp(x) / (np.log2(x.size) + 1.0) def _hist_bin_rice(x, range): @@ -86,7 +96,7 @@ def _hist_bin_rice(x, range): h : An estimate of the optimal bin width for the given data. """ del range # unused - return x.ptp() / (2.0 * x.size ** (1.0 / 3)) + return _ptp(x) / (2.0 * x.size ** (1.0 / 3)) def _hist_bin_scott(x, range): @@ -136,7 +146,7 @@ def _hist_bin_stone(x, range): """ n = x.size - ptp_x = np.ptp(x) + ptp_x = _ptp(x) if n <= 1 or ptp_x == 0: return 0 @@ -182,7 +192,7 @@ def _hist_bin_doane(x, range): np.true_divide(temp, sigma, temp) np.power(temp, 3, temp) g1 = np.mean(temp) - return x.ptp() / (1.0 + np.log2(x.size) + + return _ptp(x) / (1.0 + np.log2(x.size) + np.log2(1.0 + np.absolute(g1) / sg1)) return 0.0 diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py index c96b01d42..594c8e782 100644 --- a/numpy/lib/tests/test_histograms.py +++ b/numpy/lib/tests/test_histograms.py @@ -8,6 +8,7 @@ from numpy.testing import ( assert_array_almost_equal, assert_raises, assert_allclose, assert_array_max_ulp, assert_raises_regex, suppress_warnings, ) +import pytest class TestHistogram(object): @@ -595,6 +596,16 @@ class TestHistogramOptimBinNums(object): msg += " with datasize of {0}".format(testlen) assert_equal(len(a), numbins, err_msg=msg) + @pytest.mark.parametrize("bins", ['auto', 'fd', 'doane', 'scott', + 'stone', 'rice', 'sturges']) + def test_signed_integer_data(self, bins): + # Regression test for gh-14379. + a = np.array([-2, 0, 127], dtype=np.int8) + hist, edges = np.histogram(a, bins=bins) + hist32, edges32 = np.histogram(a.astype(np.int32), bins=bins) + assert_array_equal(hist, hist32) + assert_array_equal(edges, edges32) + def test_simple_weighted(self): """ Check that weighted data raises a TypeError |