ENH: Make var and std methods raise error when ddof too big.

Currently the results may be infinite or negative. Instead, raise a ValueError in this case.
author: Charles Harris <charlesr.harris@gmail.com> 2013-07-14 09:57:13 -0600
committer: Charles Harris <charlesr.harris@gmail.com> 2013-08-12 22:33:56 -0600
commit: 02f5258125debef5a0e5f6072805a8c72e4a1bde (patch)
tree: ee2a8e5758202fb3818c6828b14ccd09753c8639 /numpy/core/_methods.py
parent: f16b12e87667a85ab0dd6e26ddd4083117459fa6 (diff)
download: numpy-02f5258125debef5a0e5f6072805a8c72e4a1bde.tar.gz
1 files changed, 29 insertions, 23 deletions
diff --git a/numpy/core/_methods.py b/numpy/core/_methods.py
index ccf02ce36..27e445a54 100644
--- a/numpy/core/_methods.py
+++ b/numpy/core/_methods.py
@@ -5,6 +5,8 @@ and the Python code for the NumPy-namespace function
 """
 from __future__ import division, absolute_import, print_function
 
+import warnings
+
 from numpy.core import multiarray as mu
 from numpy.core import umath as um
 from numpy.core.numeric import asanyarray
@@ -47,15 +49,17 @@ def _count_reduce_items(arr, axis):
 def _mean(a, axis=None, dtype=None, out=None, keepdims=False):
     arr = asanyarray(a)
 
-    # Cast bool, unsigned int, and int to float64
+    rcount = _count_reduce_items(arr, axis)
+    # Make this warning show up first
+    if rcount == 0:
+        warnings.warn("Mean of empty slice.", RuntimeWarning)
+
+
+    # Cast bool, unsigned int, and int to float64 by default
     if dtype is None and issubclass(arr.dtype.type, (nt.integer, nt.bool_)):
-        ret = um.add.reduce(arr, axis=axis, dtype='f8',
-                            out=out, keepdims=keepdims)
-    else:
-        ret = um.add.reduce(arr, axis=axis, dtype=dtype,
-                            out=out, keepdims=keepdims)
+        dtype = mu.dtype('f8')
 
-    rcount = _count_reduce_items(arr, axis)
+    ret = um.add.reduce(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
     if isinstance(ret, mu.ndarray):
         ret = um.true_divide(
                 ret, rcount, out=ret, casting='unsafe', subok=False)
@@ -64,39 +68,41 @@ def _mean(a, axis=None, dtype=None, out=None, keepdims=False):
 
     return ret
 
-def _var(a, axis=None, dtype=None, out=None, ddof=0,
-                            keepdims=False):
+def _var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
     arr = asanyarray(a)
 
-    # First compute the mean, saving 'rcount' for reuse later
+    rcount = _count_reduce_items(arr, axis)
+    # Make this warning show up on top.
+    if ddof >= rcount:
+        warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
+
+    # Cast bool, unsigned int, and int to float64 by default
     if dtype is None and issubclass(arr.dtype.type, (nt.integer, nt.bool_)):
-        arrmean = um.add.reduce(arr, axis=axis, dtype='f8', keepdims=True)
-    else:
-        arrmean = um.add.reduce(arr, axis=axis, dtype=dtype, keepdims=True)
+        dtype = mu.dtype('f8')
 
-    rcount = _count_reduce_items(arr, axis)
+    # Compute the mean.
+    # Note that if dtype is not of inexact type then arraymean will
+    # not be either.
+    arrmean = um.add.reduce(arr, axis=axis, dtype=dtype, keepdims=True)
     if isinstance(arrmean, mu.ndarray):
         arrmean = um.true_divide(
                 arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
     else:
         arrmean = arrmean.dtype.type(arrmean / rcount)
 
-    # arr - arrmean
+    # Compute sum of squared deviations from mean
+    # Note that x may not be inexact
     x = arr - arrmean
-
-    # (arr - arrmean) ** 2
     if issubclass(arr.dtype.type, nt.complexfloating):
         x = um.multiply(x, um.conjugate(x), out=x).real
     else:
         x = um.multiply(x, x, out=x)
-
-    # add.reduce((arr - arrmean) ** 2, axis)
     ret = um.add.reduce(x, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
 
-    # add.reduce((arr - arrmean) ** 2, axis) / (n - ddof)
-    if not keepdims and isinstance(rcount, mu.ndarray):
-        rcount = rcount.squeeze(axis=axis)
-    rcount -= ddof
+    # Compute degrees of freedom and make sure it is not negative.
+    rcount = max([rcount - ddof, 0])
+
+    # divide by degrees of freedom
     if isinstance(ret, mu.ndarray):
         ret = um.true_divide(
                 ret, rcount, out=ret, casting='unsafe', subok=False)
author	Charles Harris <charlesr.harris@gmail.com>	2013-07-14 09:57:13 -0600
committer	Charles Harris <charlesr.harris@gmail.com>	2013-08-12 22:33:56 -0600
commit	02f5258125debef5a0e5f6072805a8c72e4a1bde (patch)
tree	ee2a8e5758202fb3818c6828b14ccd09753c8639 /numpy/core/_methods.py
parent	f16b12e87667a85ab0dd6e26ddd4083117459fa6 (diff)
download	numpy-02f5258125debef5a0e5f6072805a8c72e4a1bde.tar.gz