diff options
author | Allan Haldane <allan.haldane@gmail.com> | 2018-06-10 21:54:21 -0400 |
---|---|---|
committer | Allan Haldane <allan.haldane@gmail.com> | 2018-06-11 11:51:04 -0400 |
commit | e08eced7990fbdcecb2bd81d3fc736f69bad6dfd (patch) | |
tree | 9ded5059b440cff8c4f52519692e4c456c66717d | |
parent | 8525d3e30f3b57332dfd374bc64b068366126b52 (diff) | |
download | numpy-e08eced7990fbdcecb2bd81d3fc736f69bad6dfd.tar.gz |
MAINT: push back multifield copy->view changes to 1.16
-rw-r--r-- | doc/release/1.15.0-notes.rst | 10 | ||||
-rw-r--r-- | numpy/core/src/multiarray/common.c | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/convert.c | 8 | ||||
-rw-r--r-- | numpy/core/src/multiarray/mapping.c | 60 | ||||
-rw-r--r-- | numpy/core/src/private/npy_import.h | 14 | ||||
-rw-r--r-- | numpy/core/tests/test_multiarray.py | 67 | ||||
-rw-r--r-- | numpy/core/tests/test_records.py | 2 | ||||
-rw-r--r-- | numpy/doc/structured_arrays.py | 74 | ||||
-rw-r--r-- | numpy/lib/recfunctions.py | 78 | ||||
-rw-r--r-- | numpy/lib/tests/test_recfunctions.py | 16 |
10 files changed, 297 insertions, 34 deletions
diff --git a/doc/release/1.15.0-notes.rst b/doc/release/1.15.0-notes.rst index a269e25f1..cc193530c 100644 --- a/doc/release/1.15.0-notes.rst +++ b/doc/release/1.15.0-notes.rst @@ -154,6 +154,16 @@ Since ``np.ma.masked`` is a readonly scalar, copying should be a no-op. These functions now behave consistently with ``np.copy()``. +Multifield Indexing of Structured Arrays will still return a copy +----------------------------------------------------------------- +The change that multi-field indexing of structured arrays returns a view +instead of a copy is pushed back to 1.16. A new method +``numpy.lib.recfunctions.repack_fields`` has been introduced to help mitigate +the effects of this change, which can be used to write code compatible with +both numpy 1.15 and 1.16. For more information on how to update code to account +for this future change see "basics/structured arrays/accessing multiple fields" +in the user guide. + C API changes ============= diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index f191f8db4..c70f8526e 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -424,7 +424,7 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims, * __len__ is not defined. */ if (maxdims == 0 || !PySequence_Check(obj) || PySequence_Size(obj) < 0) { - // clear any PySequence_Size error, which corrupts further calls to it + /* clear any PySequence_Size error which corrupts further calls */ PyErr_Clear(); if (*out_dtype == NULL || (*out_dtype)->type_num != NPY_OBJECT) { diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c index 0e38aaa61..e88582a51 100644 --- a/numpy/core/src/multiarray/convert.c +++ b/numpy/core/src/multiarray/convert.c @@ -613,11 +613,14 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype) subtype = Py_TYPE(self); } - if (type != NULL && (PyArray_FLAGS(self) & NPY_ARRAY_WARN_ON_WRITE)) { + dtype = PyArray_DESCR(self); + + if (type != NULL && !PyArray_EquivTypes(dtype, type) && + (PyArray_FLAGS(self) & NPY_ARRAY_WARN_ON_WRITE)) { const char *msg = "Numpy has detected that you may be viewing or writing to an array " "returned by selecting multiple fields in a structured array. \n\n" - "This code may break in numpy 1.13 because this will return a view " + "This code may break in numpy 1.16 because this will return a view " "instead of a copy -- see release notes for details."; /* 2016-09-19, 1.12 */ if (DEPRECATE_FUTUREWARNING(msg) < 0) { @@ -629,7 +632,6 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype) flags = PyArray_FLAGS(self); - dtype = PyArray_DESCR(self); Py_INCREF(dtype); ret = (PyArrayObject *)PyArray_NewFromDescr_int( subtype, dtype, diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index 6a7ffd39d..82a0683f9 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -1387,14 +1387,54 @@ array_subscript_asarray(PyArrayObject *self, PyObject *op) } /* + * Helper function for _get_field_view which turns a multifield + * view into a "packed" copy, as done in numpy 1.15 and before. + * In numpy 1.16 this function should be removed. + */ +NPY_NO_EXPORT int +_multifield_view_to_copy(PyArrayObject **view) { + static PyObject *copyfunc = NULL; + PyObject *viewcopy; + + /* return a repacked copy of the view */ + npy_cache_import("numpy.lib.recfunctions", "repack_fields", ©func); + if (copyfunc == NULL) { + goto view_fail; + } + + PyArray_CLEARFLAGS(*view, NPY_ARRAY_WARN_ON_WRITE); + viewcopy = PyObject_CallFunction(copyfunc, "O", *view); + if (viewcopy == NULL) { + goto view_fail; + } + Py_DECREF(*view); + *view = (PyArrayObject*)viewcopy; + + /* warn when writing to the copy */ + PyArray_ENABLEFLAGS(*view, NPY_ARRAY_WARN_ON_WRITE); + return 0; + +view_fail: + Py_DECREF(*view); + *view = NULL; + return 0; +} + +/* * Attempts to subscript an array using a field name or list of field names. * * If an error occurred, return 0 and set view to NULL. If the subscript is not * a string or list of strings, return -1 and set view to NULL. Otherwise * return 0 and set view to point to a new view into arr for the given fields. + * + * In numpy 1.15 and before, in the case of a list of field names the returned + * view will actually be a copy by default, with fields packed together. + * The `force_view` argument causes a view to be returned. This argument can be + * removed in 1.16 when we plan to return a view always. */ NPY_NO_EXPORT int -_get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view) +_get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view, + int force_view) { *view = NULL; @@ -1489,12 +1529,12 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view) Py_DECREF(names); return 0; } - // disallow use of titles as index + /* disallow use of titles as index */ if (PyTuple_Size(tup) == 3) { PyObject *title = PyTuple_GET_ITEM(tup, 2); int titlecmp = PyObject_RichCompareBool(title, name, Py_EQ); if (titlecmp == 1) { - // if title == name, we were given a title, not a field name + /* if title == name, we got a title, not a field name */ PyErr_SetString(PyExc_KeyError, "cannot use field titles in multi-field index"); } @@ -1507,7 +1547,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view) } Py_DECREF(title); } - // disallow duplicate field indices + /* disallow duplicate field indices */ if (PyDict_Contains(fields, name)) { PyObject *errmsg = PyUString_FromString( "duplicate field of name "); @@ -1552,10 +1592,16 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view) PyArray_FLAGS(arr), (PyObject *)arr, (PyObject *)arr, 0, 1); + if (*view == NULL) { return 0; } - return 0; + + /* the code below can be replaced by "return 0" in 1.16 */ + if (force_view) { + return 0; + } + return _multifield_view_to_copy(view); } return -1; } @@ -1583,7 +1629,7 @@ array_subscript(PyArrayObject *self, PyObject *op) /* return fields if op is a string index */ if (PyDataType_HASFIELDS(PyArray_DESCR(self))) { PyArrayObject *view; - int ret = _get_field_view(self, op, &view); + int ret = _get_field_view(self, op, &view, 0); if (ret == 0){ if (view == NULL) { return NULL; @@ -1865,7 +1911,7 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op) /* field access */ if (PyDataType_HASFIELDS(PyArray_DESCR(self))){ PyArrayObject *view; - int ret = _get_field_view(self, ind, &view); + int ret = _get_field_view(self, ind, &view, 1); if (ret == 0){ if (view == NULL) { return -1; diff --git a/numpy/core/src/private/npy_import.h b/numpy/core/src/private/npy_import.h index 221e1e645..e145a843e 100644 --- a/numpy/core/src/private/npy_import.h +++ b/numpy/core/src/private/npy_import.h @@ -29,4 +29,18 @@ npy_cache_import(const char *module, const char *attr, PyObject **cache) } } +NPY_INLINE static PyObject * +npy_import(const char *module, const char *attr) +{ + PyObject *mod = PyImport_ImportModule(module); + PyObject *ret = NULL; + + if (mod != NULL) { + ret = PyObject_GetAttrString(mod, attr); + } + Py_XDECREF(mod); + + return ret; +} + #endif diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index b13e48fdc..37d73e42c 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -4796,9 +4796,25 @@ class TestRecord(object): fn2 = func('f2') b[fn2] = 3 - assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3)) - assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2)) - assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,))) + # In 1.16 code below can be replaced by: + # assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3)) + # assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2)) + # assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,))) + with suppress_warnings() as sup: + sup.filter(FutureWarning, + ".* selecting multiple fields .*") + + assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3)) + assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2)) + assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,))) + # view of subfield view/copy + assert_equal(b[['f1', 'f2']][0].view(('i4', 2)).tolist(), + (2, 3)) + assert_equal(b[['f2', 'f1']][0].view(('i4', 2)).tolist(), + (3, 2)) + view_dtype = [('f1', 'i4'), ('f3', [('', 'i4')])] + assert_equal(b[['f1', 'f3']][0].view(view_dtype).tolist(), + (2, (1,))) # non-ascii unicode field indexing is well behaved if not is_py3: @@ -4808,6 +4824,51 @@ class TestRecord(object): assert_raises(ValueError, a.__setitem__, u'\u03e0', 1) assert_raises(ValueError, a.__getitem__, u'\u03e0') + # can be removed in 1.16 + def test_field_names_deprecation(self): + + def collect_warnings(f, *args, **kwargs): + with warnings.catch_warnings(record=True) as log: + warnings.simplefilter("always") + f(*args, **kwargs) + return [w.category for w in log] + + a = np.zeros((1,), dtype=[('f1', 'i4'), + ('f2', 'i4'), + ('f3', [('sf1', 'i4')])]) + a['f1'][0] = 1 + a['f2'][0] = 2 + a['f3'][0] = (3,) + b = np.zeros((1,), dtype=[('f1', 'i4'), + ('f2', 'i4'), + ('f3', [('sf1', 'i4')])]) + b['f1'][0] = 1 + b['f2'][0] = 2 + b['f3'][0] = (3,) + + # All the different functions raise a warning, but not an error + assert_equal(collect_warnings(a[['f1', 'f2']].__setitem__, 0, (10, 20)), + [FutureWarning]) + # For <=1.12 a is not modified, but it will be in 1.13 + assert_equal(a, b) + + # Views also warn + subset = a[['f1', 'f2']] + subset_view = subset.view() + assert_equal(collect_warnings(subset_view['f1'].__setitem__, 0, 10), + [FutureWarning]) + # But the write goes through: + assert_equal(subset['f1'][0], 10) + # Only one warning per multiple field indexing, though (even if there + # are multiple views involved): + assert_equal(collect_warnings(subset['f1'].__setitem__, 0, 10), []) + + # make sure views of a multi-field index warn too + c = np.zeros(3, dtype='i8,i8,i8') + assert_equal(collect_warnings(c[['f0', 'f2']].view, 'i8,i8'), + [FutureWarning]) + + def test_record_hash(self): a = np.array([(1, 2), (1, 2)], dtype='i1,i2') a.flags.writeable = False diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py index 9aeb93f74..d7c7d16e3 100644 --- a/numpy/core/tests/test_records.py +++ b/numpy/core/tests/test_records.py @@ -11,6 +11,7 @@ import pickle import warnings import textwrap from os import path +import pytest import numpy as np from numpy.testing import ( @@ -360,6 +361,7 @@ class TestRecord(object): with assert_raises(ValueError): r.setfield([2,3], *r.dtype.fields['f']) + @pytest.mark.xfail(reason="See gh-10411, becomes real error in 1.16") def test_out_of_order_fields(self): # names in the same order, padding added to descr x = self.data[['col1', 'col2']] diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index ba667da59..ab97c5df6 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -133,10 +133,9 @@ summary they are: Offsets may be chosen such that the fields overlap, though this will mean that assigning to one field may clobber any overlapping field's data. As - an exception, fields of :class:`numpy.object` type .. (see - :ref:`object arrays <arrays.object>`) cannot overlap with other fields, - because of the risk of clobbering the internal object pointer and then - dereferencing it. + an exception, fields of :class:`numpy.object` type cannot overlap with + other fields, because of the risk of clobbering the internal object + pointer and then dereferencing it. The optional 'aligned' value can be set to ``True`` to make the automatic offset computation use aligned offsets (see :ref:`offsets-and-alignment`), @@ -235,6 +234,11 @@ If the offsets of the fields and itemsize of a structured array satisfy the alignment conditions, the array will have the ``ALIGNED`` :ref:`flag <numpy.ndarray.flags>` set. +A convenience function :func:`numpy.lib.recfunctions.repack_fields` converts an +aligned dtype or array to a packed one and vice versa. It takes either a dtype +or structured ndarray as an argument, and returns a copy with fields re-packed, +with or without padding bytes. + .. _titles: Field Titles @@ -396,27 +400,61 @@ typically a non-structured array, except in the case of nested structures. Accessing Multiple Fields ``````````````````````````` -One can index a structured array with a multi-field index, where the index is a -list of field names:: +One can index and assign to a structured array with a multi-field index, where +the index is a list of field names. + +.. warning:: + The behavior of multi-field indexes will change from Numpy 1.15 to Numpy + 1.16. - >>> a = np.zeros(3, dtype=[('a', 'i8'), ('b', 'i4'), ('c', 'f8')]) +In Numpy 1.16, the result of indexing with a multi-field index will be a view +into the original array, as follows:: + + >>> a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'i4'), ('c', 'f4')]) >>> a[['a', 'c']] - array([(0, 0.0), (0, 0.0), (0, 0.0)], - dtype={'names':['a','c'], 'formats':['<i8','<f8'], 'offsets':[0,11], 'itemsize':19}) + array([(0, 0.), (0, 0.), (0, 0.)], + dtype={'names':['a','c'], 'formats':['<i4','<f4'], 'offsets':[0,8], 'itemsize':12}) + +Assignment to the view modifies the original array. The view's fields will be +in the order they were indexed. Note that unlike for single-field indexing, the +view's dtype has the same itemsize as the original array, and has fields at the +same offsets as in the original array, and unindexed fields are merely missing. + +In Numpy 1.15, indexing an array with a multi-field index returns a copy of +the result above for 1.16, but with fields packed together in memory as if +passed through :func:`numpy.lib.recfunctions.repack_fields`. This is the +behavior since Numpy 1.7. + +.. warning:: + The new behavior in Numpy 1.16 leads to extra "padding" bytes at the + location of unindexed fields. You will need to update any code which depends + on the data having a "packed" layout. For instance code such as:: + + >>> a[['a','c']].view('i8') # will fail in Numpy 1.16 + ValueError: When changing to a smaller dtype, its size must be a divisor of the size of original dtype + + will need to be changed. This code has raised a ``FutureWarning`` since + Numpy 1.12. + + The following is a recommended fix, which will behave identically in Numpy + 1.15 and Numpy 1.16:: + + >>> from numpy.lib.recfunctions import repack_fields + >>> repack_fields(a[['a','c']]).view('i8') # supported 1.15 and 1.16 + array([0, 0, 0]) + +Assigning to an array with a multi-field index will behave the same in Numpy +1.15 and Numpy 1.16. In both versions the assignment will modify the original +array:: + >>> a[['a', 'c']] = (2, 3) >>> a array([(2, 0, 3.0), (2, 0, 3.0), (2, 0, 3.0)], dtype=[('a', '<i8'), ('b', '<i4'), ('c', '<f8')]) -The resulting array is a view into the original array, such that assignment to -the view modifies the original array. The view's fields will be in the order -they were indexed. Note that unlike for single-field indexing, the view's dtype -has the same itemsize as the original array, and has fields at the same offsets -as in the original array, and unindexed fields are merely missing. - -Since the view is a structured array itself, it obeys the assignment rules -described above. For example, this means that one can swap the values of two -fields using appropriate multi-field indexes:: +This obeys the structured array assignment rules described above. For example, +this means that one can swap the values of two fields using appropriate +multi-field indexes:: >>> a[['a', 'c']] = a[['c', 'a']] diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index c455bd93f..b6453d5a2 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -732,6 +732,84 @@ def rec_append_fields(base, names, data, dtypes=None): return append_fields(base, names, data=data, dtypes=dtypes, asrecarray=True, usemask=False) +def repack_fields(a, align=False, recurse=False): + """ + Re-pack the fields of a structured array or dtype in memory. + + The memory layout of structured datatypes allows fields at arbitrary + byte offsets. This means the fields can be separated by padding bytes, + their offsets can be non-monotonically increasing, and they can overlap. + + This method removes any overlaps and reorders the fields in memory so they + have increasing byte offsets, and adds or removes padding bytes depending + on the `align` option, which behaves like the `align` option to `np.dtype`. + + If `align=False`, this method produces a "packed" memory layout in which + each field starts at the byte the previous field ended, and any padding + bytes are removed. + + If `align=True`, this methods produces an "aligned" memory layout in which + each field's offset is a multiple of its alignment, and the total itemsize + is a multiple of the largest alignment, by adding padding bytes as needed. + + Parameters + ---------- + a : ndarray or dtype + array or dtype for which to repack the fields. + align : boolean + If true, use an "aligned" memory layout, otherwise use a "packed" layout. + recurse : boolean + If True, also repack nested structures. + + Returns + ------- + repacked : ndarray or dtype + Copy of `a` with fields repacked, or `a` itself if no repacking was + needed. + + Examples + -------- + + >>> def print_offsets(d): + ... print("offsets:", [d.fields[name][1] for name in d.names]) + ... print("itemsize:", d.itemsize) + ... + >>> dt = np.dtype('u1,i4,f4', align=True) + >>> dt + dtype({'names':['f0','f1','f2'], 'formats':['u1','<i4','<f8'], 'offsets':[0,4,8], 'itemsize':16}, align=True) + >>> print_offsets(dt) + offsets: [0, 4, 8] + itemsize: 16 + >>> packed_dt = repack_fields(dt) + >>> packed_dt + dtype([('f0', 'u1'), ('f1', '<i4'), ('f2', '<f8')]) + >>> print_offsets(packed_dt) + offsets: [0, 1, 5] + itemsize: 13 + + """ + if not isinstance(a, np.dtype): + dt = repack_fields(a.dtype, align=align, recurse=recurse) + return a.astype(dt, copy=False) + + if a.names is None: + return a + + fieldinfo = [] + for name in a.names: + tup = a.fields[name] + if recurse: + fmt = repack_fields(tup[0], align=align, recurse=True) + else: + fmt = tup[0] + + if len(tup) == 3: + name = (tup[2], name) + + fieldinfo.append((name, fmt)) + + dt = np.dtype(fieldinfo, align=align) + return np.dtype((a.type, dt)) def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, autoconvert=False): diff --git a/numpy/lib/tests/test_recfunctions.py b/numpy/lib/tests/test_recfunctions.py index 219ae24fa..d4828bc1f 100644 --- a/numpy/lib/tests/test_recfunctions.py +++ b/numpy/lib/tests/test_recfunctions.py @@ -9,8 +9,8 @@ from numpy.ma.testutils import assert_equal from numpy.testing import assert_, assert_raises from numpy.lib.recfunctions import ( drop_fields, rename_fields, get_fieldstructure, recursive_fill_fields, - find_duplicates, merge_arrays, append_fields, stack_arrays, join_by - ) + find_duplicates, merge_arrays, append_fields, stack_arrays, join_by, + repack_fields) get_names = np.lib.recfunctions.get_names get_names_flat = np.lib.recfunctions.get_names_flat zip_descr = np.lib.recfunctions.zip_descr @@ -192,6 +192,18 @@ class TestRecFunctions(object): assert_equal(sorted(test[-1]), control) assert_equal(test[0], a[test[-1]]) + def test_repack_fields(self): + dt = np.dtype('u1,f4,i8', align=True) + a = np.zeros(2, dtype=dt) + + assert_equal(repack_fields(dt), np.dtype('u1,f4,i8')) + assert_equal(repack_fields(a).itemsize, 13) + assert_equal(repack_fields(repack_fields(dt), align=True), dt) + + # make sure type is preserved + dt = np.dtype((np.record, dt)) + assert_(repack_fields(dt).type is np.record) + class TestRecursiveFillFields(object): # Test recursive_fill_fields. |