Merge pull request #3908 from juliantaylor/median-percentilepre-removal-numpybook

add extended axis and keepdims support to percentile and median
author: Charles Harris <charlesr.harris@gmail.com> 2014-03-13 16:34:07 -0600
committer: Charles Harris <charlesr.harris@gmail.com> 2014-03-13 16:34:07 -0600
commit: 48c77a64aba109de54d90f103203f1990325c37c (patch)
tree: 8b6ea9fd608233937a5a8d092221ceb2f0c58c65 /numpy/lib/function_base.py
parent: 50b60fef378cf6ae99334475141bebca8fe43f50 (diff)
parent: 7d53c812d3e68ff28320ee7e32bc9816937b4142 (diff)
download: numpy-pre-removal-numpybook.tar.gz
1 files changed, 116 insertions, 9 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 7b2d077c3..edce15776 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -13,6 +13,7 @@ __all__ = [
 import warnings
 import sys
 import collections
+import operator
 
 import numpy as np
 import numpy.core.numeric as _nx
@@ -2694,7 +2695,67 @@ def msort(a):
     return b
 
 
-def median(a, axis=None, out=None, overwrite_input=False):
+def _ureduce(a, func, **kwargs):
+    """
+    Internal Function.
+    Call `func` with `a` as first argument swapping the axes to use extended
+    axis on functions that don't support it natively.
+
+    Returns result and a.shape with axis dims set to 1.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array or object that can be converted to an array.
+    func : callable
+        Reduction function Kapable of receiving an axis argument.
+        It is is called with `a` as first argument followed by `kwargs`.
+     kwargs : keyword arguments
+        additional keyword arguments to pass to `func`.
+
+    Returns
+    -------
+    result : tuple
+        Result of func(a, **kwargs) and a.shape with axis dims set to 1
+        which can be used to reshape the result to the same shape a ufunc with
+        keepdims=True would produce.
+
+    """
+    a = np.asanyarray(a)
+    axis = kwargs.get('axis', None)
+    if axis is not None:
+        keepdim = list(a.shape)
+        nd = a.ndim
+        try:
+            axis = operator.index(axis)
+            if axis >= nd or axis < -nd:
+                raise IndexError("axis %d out of bounds (%d)" % (axis, a.ndim))
+            keepdim[axis] = 1
+        except TypeError:
+            sax = set()
+            for x in axis:
+                if x >= nd or x < -nd:
+                    raise IndexError("axis %d out of bounds (%d)" % (x, nd))
+                if x in sax:
+                    raise ValueError("duplicate value in axis")
+                sax.add(x % nd)
+                keepdim[x] = 1
+            keep = sax.symmetric_difference(frozenset(range(nd)))
+            nkeep = len(keep)
+            # swap axis that should not be reduced to front
+            for i, s in enumerate(sorted(keep)):
+                a = a.swapaxes(i, s)
+            # merge reduced axis
+            a = a.reshape(a.shape[:nkeep] + (-1,))
+            kwargs['axis'] = -1
+    else:
+        keepdim = [1] * a.ndim
+
+    r = func(a, **kwargs)
+    return r, keepdim
+
+
+def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
     """
     Compute the median along the specified axis.
 
@@ -2704,9 +2765,10 @@ def median(a, axis=None, out=None, overwrite_input=False):
     ----------
     a : array_like
         Input array or object that can be converted to an array.
-    axis : int, optional
+    axis : int or sequence of int, optional
         Axis along which the medians are computed. The default (axis=None)
         is to compute the median along a flattened version of the array.
+        A sequence of axes is supported since version 1.9.0.
     out : ndarray, optional
         Alternative output array in which to place the result. It must have
         the same shape and buffer length as the expected output, but the
@@ -2719,6 +2781,13 @@ def median(a, axis=None, out=None, overwrite_input=False):
        will probably be fully or partially sorted. Default is False. Note
        that, if `overwrite_input` is True and the input is not already an
        ndarray, an error will be raised.
+    keepdims : bool, optional
+        If this is set to True, the axes which are reduced are left
+        in the result as dimensions with size one. With this option,
+        the result will broadcast correctly against the original `arr`.
+
+        .. versionadded:: 1.9.0
+
 
     Returns
     -------
@@ -2768,6 +2837,16 @@ def median(a, axis=None, out=None, overwrite_input=False):
     >>> assert not np.all(a==b)
 
     """
+    r, k = _ureduce(a, func=_median, axis=axis, out=out,
+                    overwrite_input=overwrite_input)
+    if keepdims:
+        return r.reshape(k)
+    else:
+        return r
+
+def _median(a, axis=None, out=None, overwrite_input=False):
+    # can't be reasonably be implemented in terms of percentile as we have to
+    # call mean to not break astropy
     a = np.asanyarray(a)
     if axis is not None and axis >= a.ndim:
         raise IndexError(
@@ -2817,7 +2896,7 @@ def median(a, axis=None, out=None, overwrite_input=False):
 
 
 def percentile(a, q, axis=None, out=None,
-               overwrite_input=False, interpolation='linear'):
+               overwrite_input=False, interpolation='linear', keepdims=False):
     """
     Compute the qth percentile of the data along the specified axis.
 
@@ -2829,9 +2908,10 @@ def percentile(a, q, axis=None, out=None,
         Input array or object that can be converted to an array.
     q : float in range of [0,100] (or sequence of floats)
         Percentile to compute which must be between 0 and 100 inclusive.
-    axis : int, optional
+    axis : int or sequence of int, optional
         Axis along which the percentiles are computed. The default (None)
         is to compute the percentiles along a flattened version of the array.
+        A sequence of axes is supported since version 1.9.0.
     out : ndarray, optional
         Alternative output array in which to place the result. It must
         have the same shape and buffer length as the expected output,
@@ -2857,6 +2937,12 @@ def percentile(a, q, axis=None, out=None,
             * midpoint: (`i` + `j`) / 2.
 
         .. versionadded:: 1.9.0
+    keepdims : bool, optional
+        If this is set to True, the axes which are reduced are left
+        in the result as dimensions with size one. With this option,
+        the result will broadcast correctly against the original `arr`.
+
+        .. versionadded:: 1.9.0
 
     Returns
     -------
@@ -2913,8 +2999,22 @@ def percentile(a, q, axis=None, out=None,
     array([ 3.5])
 
     """
+    q = asarray(q, dtype=np.float64)
+    r, k = _ureduce(a, func=_percentile, q=q, axis=axis, out=out,
+                    overwrite_input=overwrite_input,
+                    interpolation=interpolation)
+    if keepdims:
+        if q.ndim == 0:
+            return r.reshape(k)
+        else:
+            return r.reshape([len(q)] + k)
+    else:
+        return r
+
+
+def _percentile(a, q, axis=None, out=None,
+                overwrite_input=False, interpolation='linear', keepdims=False):
     a = asarray(a)
-    q = asarray(q)
     if q.ndim == 0:
         # Do not allow 0-d arrays because following code fails for scalar
         zerod = True
@@ -2922,10 +3022,17 @@ def percentile(a, q, axis=None, out=None,
     else:
         zerod = False
 
-    q = q / 100.0
-    if (q < 0).any() or (q > 1).any():
-        raise ValueError(
-            "Percentiles must be in the range [0,100]")
+    # avoid expensive reductions, relevant for arrays with < O(1000) elements
+    if q.size < 10:
+        for i in range(q.size):
+            if q[i] < 0. or q[i] > 100.:
+                raise ValueError("Percentiles must be in the range [0,100]")
+            q[i] /= 100.
+    else:
+        # faster than any()
+        if np.count_nonzero(q < 0.) or np.count_nonzero(q > 100.):
+            raise ValueError("Percentiles must be in the range [0,100]")
+        q /= 100.
 
     # prepare a for partioning
     if overwrite_input:
author	Charles Harris <charlesr.harris@gmail.com>	2014-03-13 16:34:07 -0600
committer	Charles Harris <charlesr.harris@gmail.com>	2014-03-13 16:34:07 -0600
commit	48c77a64aba109de54d90f103203f1990325c37c (patch)
tree	8b6ea9fd608233937a5a8d092221ceb2f0c58c65 /numpy/lib/function_base.py
parent	50b60fef378cf6ae99334475141bebca8fe43f50 (diff)
parent	7d53c812d3e68ff28320ee7e32bc9816937b4142 (diff)
download	numpy-pre-removal-numpybook.tar.gz