Merge pull request #261 from charris/backport-lib

Backport numpy.lib bug fixes.
author: Ralf Gommers <ralf.gommers@googlemail.com> 2012-05-04 12:40:12 -0700
committer: Ralf Gommers <ralf.gommers@googlemail.com> 2012-05-04 12:40:12 -0700
commit: 10a65ff403c86ab7064af8b75fe30e8994e32d60 (patch)
tree: 55a09252076f0d087fb300af247a648bbddcd2b4
parent: 1a9e14f7ba0692bd791fc640850e0342748d2aef (diff)
parent: 3b0aeb2063ff043dee15d95d5103648862a54982 (diff)
download: numpy-10a65ff403c86ab7064af8b75fe30e8994e32d60.tar.gz
8 files changed, 220 insertions, 50 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 18d25f08d..ad0067c7f 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -335,11 +335,11 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
             Found bin edge of size <= 0. Did you specify `bins` with
             non-monotonic sequence?""")
 
+    nbin =  asarray(nbin)
+
     # Handle empty input.
     if N == 0:
-        return np.zeros(D), edges
-
-    nbin =  asarray(nbin)
+        return np.zeros(nbin-2), edges
 
     # Compute the bin number each sample falls into.
     Ncount = {}
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 9c385bb9d..9177d5f2a 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -9,6 +9,7 @@ import os
 import sys
 import itertools
 import warnings
+import weakref
 from operator import itemgetter
 
 from cPickle import load as _cload, loads
@@ -107,7 +108,8 @@ class BagObj(object):
 
     """
     def __init__(self, obj):
-        self._obj = obj
+        # Use weakref to make NpzFile objects collectable by refcount
+        self._obj = weakref.proxy(obj)
     def __getattribute__(self, key):
         try:
             return object.__getattribute__(self, '_obj')[key]
@@ -205,6 +207,7 @@ class NpzFile(object):
         if self.fid is not None:
             self.fid.close()
             self.fid = None
+        self.f = None # break reference cycle
 
     def __del__(self):
         self.close()
@@ -704,11 +707,10 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
             if len(shape) == 0:
                 return ([dt.base], None)
             else:
-                packing = [(shape[-1], tuple)]
+                packing = [(shape[-1], list)]
                 if len(shape) > 1:
-                    for dim in dt.shape[-2:0:-1]:
-                        packing = [(dim*packing[0][0],packing*dim)]
-                    packing = packing*shape[0]
+                    for dim in dt.shape[-2::-1]:
+                        packing = [(dim*packing[0][0], packing*dim)]
                 return ([dt.base] * int(np.prod(dt.shape)), packing)
         else:
             types = []
@@ -717,7 +719,11 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
                 tp, bytes = dt.fields[field]
                 flat_dt, flat_packing = flatten_dtype(tp)
                 types.extend(flat_dt)
-                packing.append((len(flat_dt),flat_packing))
+                # Avoid extra nesting for subarrays
+                if len(tp.shape) > 0:
+                    packing.extend(flat_packing)
+                else:
+                    packing.append((len(flat_dt), flat_packing))
             return (types, packing)
 
     def pack_items(items, packing):
@@ -726,6 +732,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
             return items[0]
         elif packing is tuple:
             return tuple(items)
+        elif packing is list:
+            return list(items)
         else:
             start = 0
             ret = []
@@ -762,6 +770,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
             # End of lines reached
             first_line = ''
             first_vals = []
+            warnings.warn('loadtxt: Empty input file: "%s"' % fname)
         N = len(usecols or first_vals)
 
         dtype_types, packing = flatten_dtype(dtype)
@@ -847,15 +856,22 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n'):
     fmt : str or sequence of strs
         A single format (%10.5f), a sequence of formats, or a
         multi-format string, e.g. 'Iteration %d -- %10.5f', in which
-        case `delimiter` is ignored.
-    delimiter : str
+        case `delimiter` is ignored. For complex `X`, the legal options
+        for `fmt` are:
+            a) a single specifier, `fmt='%.4e'`, resulting in numbers formatted
+                like `' (%s+%sj)' % (fmt, fmt)`
+            b) a full string specifying every real and imaginary part, e.g.
+                `' %.4e %+.4j %.4e %+.4j %.4e %+.4j'` for 3 columns
+            c) a list of specifiers, one per column - in this case, the real
+                and imaginary part must have separate specifiers,
+                e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns
+    delimiter : str, optional
         Character separating columns.
     newline : str
         .. versionadded:: 1.5.0
 
         Character separating lines.
 
-
     See Also
     --------
     save : Save an array to a binary file in NumPy ``.npy`` format
@@ -959,6 +975,7 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n'):
         else:
             ncol = X.shape[1]
 
+        iscomplex_X = np.iscomplexobj(X)
         # `fmt` can be a string with multiple insertion points or a
         # list of formats.  E.g. '%10.5f\t%10d' or ('%10.5f', '$10d')
         if type(fmt) in (list, tuple):
@@ -966,17 +983,31 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n'):
                 raise AttributeError('fmt has wrong shape.  %s' % str(fmt))
             format = asstr(delimiter).join(map(asstr, fmt))
         elif type(fmt) is str:
-            if fmt.count('%') == 1:
-                fmt = [fmt, ]*ncol
+            n_fmt_chars = fmt.count('%')
+            error = ValueError('fmt has wrong number of %% formats:  %s' % fmt)
+            if n_fmt_chars == 1:
+                if iscomplex_X:
+                    fmt = [' (%s+%sj)' % (fmt, fmt),] * ncol
+                else:
+                    fmt = [fmt, ] * ncol
                 format = delimiter.join(fmt)
-            elif fmt.count('%') != ncol:
-                raise AttributeError('fmt has wrong number of %% formats.  %s'
-                                     % fmt)
+            elif iscomplex_X and n_fmt_chars != (2 * ncol):
+                raise error
+            elif ((not iscomplex_X) and n_fmt_chars != ncol):
+                raise error
             else:
                 format = fmt
 
-        for row in X:
-            fh.write(asbytes(format % tuple(row) + newline))
+        if iscomplex_X:
+            for row in X:
+                row2 = []
+                for number in row:
+                    row2.append(number.real)
+                    row2.append(number.imag)
+                fh.write(asbytes(format % tuple(row2) + newline))
+        else:
+            for row in X:
+                fh.write(asbytes(format % tuple(row) + newline))
     finally:
         if own_fh:
             fh.close()
@@ -1274,8 +1305,10 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
                     first_line = asbytes('').join(first_line.split(comments)[1:])
             first_values = split_line(first_line)
     except StopIteration:
-        # might want to return empty array instead of raising error.
-        raise IOError('End-of-file reached before encountering data.')
+        # return an empty array if the datafile is empty
+        first_line = asbytes('')
+        first_values = []
+        warnings.warn('genfromtxt: Empty input file: "%s"' % fname)
 
     # Should we take the first values as names ?
     if names is True:
diff --git a/numpy/lib/src/_compiled_base.c b/numpy/lib/src/_compiled_base.c
index 066519bf1..de6d8b2a8 100644
--- a/numpy/lib/src/_compiled_base.c
+++ b/numpy/lib/src/_compiled_base.c
@@ -1,6 +1,7 @@
 #include "Python.h"
 #include "structmember.h"
 #include "numpy/noprefix.h"
+#include "numpy/npy_3kcompat.h"
 #include "npy_config.h"
 
 static intp
@@ -115,16 +116,30 @@ arr_bincount(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
                 kwlist, &list, &weight, &mlength)) {
             goto fail;
     }
-    if (!(lst = PyArray_ContiguousFromAny(list, PyArray_INTP, 1, 1))) {
-            goto fail;
+
+    lst = (PyArrayObject *)PyArray_ContiguousFromAny(list, NPY_INTP, 1, 1);
+    if (lst == NULL) {
+        goto fail;
     }
     len = PyArray_SIZE(lst);
+    type = PyArray_DescrFromType(NPY_INTP);
+
+    /* handle empty list */
     if (len < 1) {
-        PyErr_SetString(PyExc_ValueError,
-                "The first argument cannot be empty.");
-        goto fail;
+        if (mlength == Py_None) {
+            minlength = 0;
+        }
+        else if (!(minlength = PyArray_PyIntAsIntp(mlength))) {
+            goto fail;
+        }
+        if (!(ans = PyArray_Zeros(1, &minlength, type, 0))){
+            goto fail;
+        }
+        Py_DECREF(lst);
+        return ans;
     }
-    numbers = (intp *) PyArray_DATA(lst);
+
+    numbers = (npy_intp *) PyArray_DATA(lst);
     mxi = mxx(numbers, len);
     mni = mnx(numbers, len);
     if (numbers[mni] < 0) {
@@ -147,7 +162,6 @@ arr_bincount(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
             ans_size = minlength;
         }
     }
-    type = PyArray_DescrFromType(PyArray_INTP);
     if (weight == Py_None) {
         if (!(ans = PyArray_Zeros(1, &ans_size, type, 0))) {
             goto fail;
@@ -1096,8 +1110,8 @@ arr_add_docstring(PyObject *NPY_UNUSED(dummy), PyObject *args)
     docstr = PyString_AS_STRING(str);
 #endif
 
-#define _TESTDOC1(typebase) (obj->ob_type == &Py##typebase##_Type)
-#define _TESTDOC2(typebase) (obj->ob_type == Py##typebase##_TypePtr)
+#define _TESTDOC1(typebase) (Py_TYPE(obj) == &Py##typebase##_Type)
+#define _TESTDOC2(typebase) (Py_TYPE(obj) == Py##typebase##_TypePtr)
 #define _ADDDOC(typebase, doc, name) do {                               \
         Py##typebase##Object *new = (Py##typebase##Object *)obj;        \
         if (!(doc)) {                                                   \
@@ -1297,8 +1311,8 @@ pack_or_unpack_bits(PyObject *input, int axis, int unpack)
             new = temp;
         }
         else {
-            ubyte *optr, *iptr;
-            out = PyArray_New(new->ob_type, 0, NULL, NPY_UBYTE,
+            char *optr, *iptr;
+            out = (PyArrayObject *)PyArray_New(Py_TYPE(new), 0, NULL, NPY_UBYTE,
                     NULL, NULL, 0, 0, NULL);
             if (out == NULL) {
                 goto fail;
@@ -1338,8 +1352,9 @@ pack_or_unpack_bits(PyObject *input, int axis, int unpack)
     }
 
     /* Create output array */
-    out = PyArray_New(new->ob_type, PyArray_NDIM(new), outdims, PyArray_UBYTE,
-            NULL, NULL, 0, PyArray_ISFORTRAN(new), NULL);
+    out = (PyArrayObject *)PyArray_New(Py_TYPE(new),
+                        PyArray_NDIM(new), outdims, NPY_UBYTE,
+                        NULL, NULL, 0, PyArray_ISFORTRAN(new), NULL);
     if (out == NULL) {
         goto fail;
     }
@@ -1437,17 +1452,17 @@ define_types(void)
     if (myobj == NULL) {
         return;
     }
-    PyGetSetDescr_TypePtr = myobj->ob_type;
+    PyGetSetDescr_TypePtr = Py_TYPE(myobj);
     myobj = PyDict_GetItemString(tp_dict, "alignment");
     if (myobj == NULL) {
         return;
     }
-    PyMemberDescr_TypePtr = myobj->ob_type;
+    PyMemberDescr_TypePtr = Py_TYPE(myobj);
     myobj = PyDict_GetItemString(tp_dict, "newbyteorder");
     if (myobj == NULL) {
         return;
     }
-    PyMethodDescr_TypePtr = myobj->ob_type;
+    PyMethodDescr_TypePtr = Py_TYPE(myobj);
     return;
 }
 
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index e65c84158..d7d5513a6 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -749,7 +749,10 @@ class TestHistogramdd(TestCase):
 
     def test_empty(self):
         a, b = histogramdd([[], []], bins=([0,1], [0,1]))
-        assert_array_max_ulp(a, array([ 0., 0.]))
+        assert_array_max_ulp(a, array([[ 0.]]))
+        a, b = np.histogramdd([[], [], []], bins=2)
+        assert_array_max_ulp(a, np.zeros((2, 2, 2)))
+
 
     def test_bins_errors(self):
         """There are two ways to specify bins. Check for the right errors when
@@ -1100,6 +1103,17 @@ class TestBincount(TestCase):
         y = np.bincount(x, w, 8)
         assert_array_equal(y, np.array([0, 0.2, 0.5, 0, 0.5, 0.1, 0, 0]))
 
+    def test_empty(self):
+        x = np.array([], dtype=int)
+        y = np.bincount(x)
+        assert_array_equal(x,y)
+
+    def test_empty_with_minlength(self):
+        x = np.array([], dtype=int)
+        y = np.bincount(x, minlength=5)
+        assert_array_equal(y, np.zeros(5, dtype=int))
+
+
 class TestInterp(TestCase):
     def test_exceptions(self):
         assert_raises(ValueError, interp, 0, [], [])
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index cb42b66a7..adb8db0ff 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -2,7 +2,7 @@ import numpy as np
 import numpy.ma as ma
 from numpy.ma.testutils import (TestCase, assert_equal, assert_array_equal,
     assert_raises, run_module_suite)
-from numpy.testing import assert_warns, assert_
+from numpy.testing import assert_warns, assert_, build_err_msg
 
 import sys
 
@@ -13,6 +13,9 @@ import threading
 from tempfile import mkstemp, NamedTemporaryFile
 import time
 from datetime import datetime
+import warnings
+import gc
+from numpy.testing.utils import WarningManager
 
 from numpy.lib._iotools import ConverterError, ConverterLockError, \
                                ConversionWarning
@@ -238,6 +241,58 @@ class TestSaveTxt(TestCase):
         finally:
             os.unlink(name)
 
+    def test_complex_arrays(self):
+        ncols = 2
+        nrows = 2
+        a = np.zeros((ncols, nrows), dtype=np.complex128)
+        re = np.pi
+        im = np.e
+        a[:] = re + 1.0j * im
+        # One format only
+        c = StringIO()
+        np.savetxt(c, a, fmt=' %+.3e')
+        c.seek(0)
+        lines = c.readlines()
+        _assert_floatstr_lines_equal(lines, asbytes_nested([
+            ' ( +3.142e+00+ +2.718e+00j)  ( +3.142e+00+ +2.718e+00j)\n',
+            ' ( +3.142e+00+ +2.718e+00j)  ( +3.142e+00+ +2.718e+00j)\n']))
+        # One format for each real and imaginary part
+        c = StringIO()
+        np.savetxt(c, a, fmt='  %+.3e' * 2 * ncols)
+        c.seek(0)
+        lines = c.readlines()
+        _assert_floatstr_lines_equal(lines, asbytes_nested([
+            '  +3.142e+00  +2.718e+00  +3.142e+00  +2.718e+00\n',
+            '  +3.142e+00  +2.718e+00  +3.142e+00  +2.718e+00\n']))
+        # One format for each complex number
+        c = StringIO()
+        np.savetxt(c, a, fmt=['(%.3e%+.3ej)'] * ncols)
+        c.seek(0)
+        lines = c.readlines()
+        _assert_floatstr_lines_equal(lines, asbytes_nested([
+            '(3.142e+00+2.718e+00j) (3.142e+00+2.718e+00j)\n',
+            '(3.142e+00+2.718e+00j) (3.142e+00+2.718e+00j)\n']))
+
+
+def _assert_floatstr_lines_equal(actual_lines, expected_lines):
+    """A string comparison function that also works on Windows + Python 2.5.
+
+    This is necessary because Python 2.5 on Windows inserts an extra 0 in
+    the exponent of the string representation of floating point numbers.
+
+    Only used in TestSaveTxt.test_complex_arrays, no attempt made to make this
+    more generic.
+
+    Once Python 2.5 compatibility is dropped, simply use `assert_equal` instead
+    of this function.
+    """
+    for actual, expected in zip(actual_lines, expected_lines):
+        if actual != expected:
+            expected_win25 = expected.replace("e+00", "e+000")
+            if actual != expected_win25:
+                msg = build_err_msg([actual, expected], '', verbose=True)
+                raise AssertionError(msg)
+
 
 class TestLoadTxt(TestCase):
     def test_record(self):
@@ -392,6 +447,7 @@ class TestLoadTxt(TestCase):
         assert_array_equal(x, a)
 
     def test_empty_file(self):
+        warnings.filterwarnings("ignore", message="loadtxt: Empty input file:")
         c = StringIO()
         x = np.loadtxt(c)
         assert_equal(x.shape, (0,))
@@ -951,11 +1007,17 @@ M   33  21.99
                              usecols=('a', 'c'), **kwargs)
         assert_equal(test, ctrl)
 
-
     def test_empty_file(self):
-        "Test that an empty file raises the proper exception"
-        data = StringIO()
-        assert_raises(IOError, np.ndfromtxt, data)
+        "Test that an empty file raises the proper warning."
+        warn_ctx = WarningManager()
+        warn_ctx.__enter__()
+        try:
+            warnings.filterwarnings("ignore", message="genfromtxt: Empty input file:")
+            data = StringIO()
+            test = np.genfromtxt(data)
+            assert_equal(test, np.array([]))
+        finally:
+            warn_ctx.__exit__()
 
 
     def test_fancy_dtype_alt(self):
@@ -1429,5 +1491,20 @@ def test_npzfile_dict():
 
     assert 'x' in list(z.iterkeys())
 
+def test_load_refcount():
+    # Check that objects returned by np.load are directly freed based on
+    # their refcount, rather than needing the gc to collect them.
+
+    f = StringIO()
+    np.savez(f, [1, 2, 3])
+    f.seek(0)
+
+    gc.collect()
+    n_before = len(gc.get_objects())
+    np.load(f)
+    n_after = len(gc.get_objects())
+
+    assert_equal(n_before, n_after)
+
 if __name__ == "__main__":
     run_module_suite()
diff --git a/numpy/lib/tests/test_regression.py b/numpy/lib/tests/test_regression.py
index c0cfff9a5..71400d112 100644
--- a/numpy/lib/tests/test_regression.py
+++ b/numpy/lib/tests/test_regression.py
@@ -169,10 +169,6 @@ class TestRegression(TestCase):
             sys.stdout.close()
             sys.stdout = oldstdout
 
-    def test_bincount_empty(self):
-        """Ticket #1387: empty array as input for bincount."""
-        assert_raises(ValueError, lambda : np.bincount(np.array([], dtype=np.intp)))
-
     def test_include_dirs(self):
         """As a sanity check, just test that get_include and
         get_numarray_include include something reasonable.  Somewhat
@@ -202,5 +198,25 @@ class TestRegression(TestCase):
         except:
             raise AssertionError()
 
+    def test_loadtxt_fields_subarrays(self):
+        # For ticket #1936
+        from StringIO import StringIO
+        dt = [("a", 'u1', 2), ("b", 'u1', 2)]
+        x = np.loadtxt(StringIO("0 1 2 3"), dtype=dt)
+        assert_equal(x, np.array([((0, 1), (2, 3))], dtype=dt))
+
+        dt = [("a", [("a", 'u1', (1,3)), ("b", 'u1')])]
+        x = np.loadtxt(StringIO("0 1 2 3"), dtype=dt)
+        assert_equal(x, np.array([(((0,1,2), 3),)], dtype=dt))
+
+        dt = [("a", 'u1', (2,2))]
+        x = np.loadtxt(StringIO("0 1 2 3"), dtype=dt)
+        assert_equal(x, np.array([(((0, 1), (2, 3)),)], dtype=dt))
+
+        dt = [("a", 'u1', (2,3,2))]
+        x = np.loadtxt(StringIO("0 1 2 3 4 5 6 7 8 9 10 11"), dtype=dt)
+        data = [((((0,1), (2,3), (4,5)), ((6,7), (8,9), (10,11))),)]
+        assert_equal(x, np.array(data, dtype=dt))
+
 if __name__ == "__main__":
     run_module_suite()
diff --git a/numpy/lib/tests/test_twodim_base.py b/numpy/lib/tests/test_twodim_base.py
index 85e76a384..e5731ff88 100644
--- a/numpy/lib/tests/test_twodim_base.py
+++ b/numpy/lib/tests/test_twodim_base.py
@@ -224,7 +224,10 @@ class TestHistogram2d(TestCase):
 
     def test_empty(self):
         a, edge1, edge2 = histogram2d([],[], bins=([0,1],[0,1]))
-        assert_array_max_ulp(a, array([ 0., 0.]))
+        assert_array_max_ulp(a, array([[ 0.]]))
+
+        a, edge1, edge2 = histogram2d([], [], bins=4)
+        assert_array_max_ulp(a, np.zeros((4, 4)))
 
 
 class TestTri(TestCase):
@@ -236,6 +239,18 @@ class TestTri(TestCase):
         assert_array_equal(tri(3,dtype=bool),out.astype(bool))
 
 
+def test_tril_triu():
+    for dtype in np.typecodes['AllFloat'] + np.typecodes['AllInteger']:
+        a = np.ones((2, 2), dtype=dtype)
+        b = np.tril(a)
+        c = np.triu(a)
+        assert_array_equal(b, [[1, 0], [1, 1]])
+        assert_array_equal(c, b.T)
+        # should return the same dtype as the original array
+        assert_equal(b.dtype, a.dtype)
+        assert_equal(c.dtype, a.dtype)
+
+
 def test_mask_indices():
     # simple test without offset
     iu = mask_indices(3, np.triu)
diff --git a/numpy/lib/twodim_base.py b/numpy/lib/twodim_base.py
index 5e89e606b..27424c996 100644
--- a/numpy/lib/twodim_base.py
+++ b/numpy/lib/twodim_base.py
@@ -424,7 +424,7 @@ def tril(m, k=0):
 
     """
     m = asanyarray(m)
-    out = multiply(tri(m.shape[0], m.shape[1], k=k, dtype=int),m)
+    out = multiply(tri(m.shape[0], m.shape[1], k=k, dtype=m.dtype),m)
     return out
 
 def triu(m, k=0):
@@ -450,7 +450,7 @@ def triu(m, k=0):
 
     """
     m = asanyarray(m)
-    out = multiply((1 - tri(m.shape[0], m.shape[1], k - 1, int)), m)
+    out = multiply((1 - tri(m.shape[0], m.shape[1], k - 1, dtype=m.dtype)), m)
     return out
 
 # borrowed from John Hunter and matplotlib
author	Ralf Gommers <ralf.gommers@googlemail.com>	2012-05-04 12:40:12 -0700
committer	Ralf Gommers <ralf.gommers@googlemail.com>	2012-05-04 12:40:12 -0700
commit	10a65ff403c86ab7064af8b75fe30e8994e32d60 (patch)
tree	55a09252076f0d087fb300af247a648bbddcd2b4
parent	1a9e14f7ba0692bd791fc640850e0342748d2aef (diff)
parent	3b0aeb2063ff043dee15d95d5103648862a54982 (diff)
download	numpy-10a65ff403c86ab7064af8b75fe30e8994e32d60.tar.gz