summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAllan Haldane <allan.haldane@gmail.com>2018-06-10 21:54:21 -0400
committerAllan Haldane <allan.haldane@gmail.com>2018-06-11 11:51:04 -0400
commite08eced7990fbdcecb2bd81d3fc736f69bad6dfd (patch)
tree9ded5059b440cff8c4f52519692e4c456c66717d
parent8525d3e30f3b57332dfd374bc64b068366126b52 (diff)
downloadnumpy-e08eced7990fbdcecb2bd81d3fc736f69bad6dfd.tar.gz
MAINT: push back multifield copy->view changes to 1.16
-rw-r--r--doc/release/1.15.0-notes.rst10
-rw-r--r--numpy/core/src/multiarray/common.c2
-rw-r--r--numpy/core/src/multiarray/convert.c8
-rw-r--r--numpy/core/src/multiarray/mapping.c60
-rw-r--r--numpy/core/src/private/npy_import.h14
-rw-r--r--numpy/core/tests/test_multiarray.py67
-rw-r--r--numpy/core/tests/test_records.py2
-rw-r--r--numpy/doc/structured_arrays.py74
-rw-r--r--numpy/lib/recfunctions.py78
-rw-r--r--numpy/lib/tests/test_recfunctions.py16
10 files changed, 297 insertions, 34 deletions
diff --git a/doc/release/1.15.0-notes.rst b/doc/release/1.15.0-notes.rst
index a269e25f1..cc193530c 100644
--- a/doc/release/1.15.0-notes.rst
+++ b/doc/release/1.15.0-notes.rst
@@ -154,6 +154,16 @@ Since ``np.ma.masked`` is a readonly scalar, copying should be a no-op. These
functions now behave consistently with ``np.copy()``.
+Multifield Indexing of Structured Arrays will still return a copy
+-----------------------------------------------------------------
+The change that multi-field indexing of structured arrays returns a view
+instead of a copy is pushed back to 1.16. A new method
+``numpy.lib.recfunctions.repack_fields`` has been introduced to help mitigate
+the effects of this change, which can be used to write code compatible with
+both numpy 1.15 and 1.16. For more information on how to update code to account
+for this future change see "basics/structured arrays/accessing multiple fields"
+in the user guide.
+
C API changes
=============
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index f191f8db4..c70f8526e 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -424,7 +424,7 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
* __len__ is not defined.
*/
if (maxdims == 0 || !PySequence_Check(obj) || PySequence_Size(obj) < 0) {
- // clear any PySequence_Size error, which corrupts further calls to it
+ /* clear any PySequence_Size error which corrupts further calls */
PyErr_Clear();
if (*out_dtype == NULL || (*out_dtype)->type_num != NPY_OBJECT) {
diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c
index 0e38aaa61..e88582a51 100644
--- a/numpy/core/src/multiarray/convert.c
+++ b/numpy/core/src/multiarray/convert.c
@@ -613,11 +613,14 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype)
subtype = Py_TYPE(self);
}
- if (type != NULL && (PyArray_FLAGS(self) & NPY_ARRAY_WARN_ON_WRITE)) {
+ dtype = PyArray_DESCR(self);
+
+ if (type != NULL && !PyArray_EquivTypes(dtype, type) &&
+ (PyArray_FLAGS(self) & NPY_ARRAY_WARN_ON_WRITE)) {
const char *msg =
"Numpy has detected that you may be viewing or writing to an array "
"returned by selecting multiple fields in a structured array. \n\n"
- "This code may break in numpy 1.13 because this will return a view "
+ "This code may break in numpy 1.16 because this will return a view "
"instead of a copy -- see release notes for details.";
/* 2016-09-19, 1.12 */
if (DEPRECATE_FUTUREWARNING(msg) < 0) {
@@ -629,7 +632,6 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype)
flags = PyArray_FLAGS(self);
- dtype = PyArray_DESCR(self);
Py_INCREF(dtype);
ret = (PyArrayObject *)PyArray_NewFromDescr_int(
subtype, dtype,
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 6a7ffd39d..82a0683f9 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -1387,14 +1387,54 @@ array_subscript_asarray(PyArrayObject *self, PyObject *op)
}
/*
+ * Helper function for _get_field_view which turns a multifield
+ * view into a "packed" copy, as done in numpy 1.15 and before.
+ * In numpy 1.16 this function should be removed.
+ */
+NPY_NO_EXPORT int
+_multifield_view_to_copy(PyArrayObject **view) {
+ static PyObject *copyfunc = NULL;
+ PyObject *viewcopy;
+
+ /* return a repacked copy of the view */
+ npy_cache_import("numpy.lib.recfunctions", "repack_fields", &copyfunc);
+ if (copyfunc == NULL) {
+ goto view_fail;
+ }
+
+ PyArray_CLEARFLAGS(*view, NPY_ARRAY_WARN_ON_WRITE);
+ viewcopy = PyObject_CallFunction(copyfunc, "O", *view);
+ if (viewcopy == NULL) {
+ goto view_fail;
+ }
+ Py_DECREF(*view);
+ *view = (PyArrayObject*)viewcopy;
+
+ /* warn when writing to the copy */
+ PyArray_ENABLEFLAGS(*view, NPY_ARRAY_WARN_ON_WRITE);
+ return 0;
+
+view_fail:
+ Py_DECREF(*view);
+ *view = NULL;
+ return 0;
+}
+
+/*
* Attempts to subscript an array using a field name or list of field names.
*
* If an error occurred, return 0 and set view to NULL. If the subscript is not
* a string or list of strings, return -1 and set view to NULL. Otherwise
* return 0 and set view to point to a new view into arr for the given fields.
+ *
+ * In numpy 1.15 and before, in the case of a list of field names the returned
+ * view will actually be a copy by default, with fields packed together.
+ * The `force_view` argument causes a view to be returned. This argument can be
+ * removed in 1.16 when we plan to return a view always.
*/
NPY_NO_EXPORT int
-_get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
+_get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view,
+ int force_view)
{
*view = NULL;
@@ -1489,12 +1529,12 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
Py_DECREF(names);
return 0;
}
- // disallow use of titles as index
+ /* disallow use of titles as index */
if (PyTuple_Size(tup) == 3) {
PyObject *title = PyTuple_GET_ITEM(tup, 2);
int titlecmp = PyObject_RichCompareBool(title, name, Py_EQ);
if (titlecmp == 1) {
- // if title == name, we were given a title, not a field name
+ /* if title == name, we got a title, not a field name */
PyErr_SetString(PyExc_KeyError,
"cannot use field titles in multi-field index");
}
@@ -1507,7 +1547,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
}
Py_DECREF(title);
}
- // disallow duplicate field indices
+ /* disallow duplicate field indices */
if (PyDict_Contains(fields, name)) {
PyObject *errmsg = PyUString_FromString(
"duplicate field of name ");
@@ -1552,10 +1592,16 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
PyArray_FLAGS(arr),
(PyObject *)arr, (PyObject *)arr,
0, 1);
+
if (*view == NULL) {
return 0;
}
- return 0;
+
+ /* the code below can be replaced by "return 0" in 1.16 */
+ if (force_view) {
+ return 0;
+ }
+ return _multifield_view_to_copy(view);
}
return -1;
}
@@ -1583,7 +1629,7 @@ array_subscript(PyArrayObject *self, PyObject *op)
/* return fields if op is a string index */
if (PyDataType_HASFIELDS(PyArray_DESCR(self))) {
PyArrayObject *view;
- int ret = _get_field_view(self, op, &view);
+ int ret = _get_field_view(self, op, &view, 0);
if (ret == 0){
if (view == NULL) {
return NULL;
@@ -1865,7 +1911,7 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
/* field access */
if (PyDataType_HASFIELDS(PyArray_DESCR(self))){
PyArrayObject *view;
- int ret = _get_field_view(self, ind, &view);
+ int ret = _get_field_view(self, ind, &view, 1);
if (ret == 0){
if (view == NULL) {
return -1;
diff --git a/numpy/core/src/private/npy_import.h b/numpy/core/src/private/npy_import.h
index 221e1e645..e145a843e 100644
--- a/numpy/core/src/private/npy_import.h
+++ b/numpy/core/src/private/npy_import.h
@@ -29,4 +29,18 @@ npy_cache_import(const char *module, const char *attr, PyObject **cache)
}
}
+NPY_INLINE static PyObject *
+npy_import(const char *module, const char *attr)
+{
+ PyObject *mod = PyImport_ImportModule(module);
+ PyObject *ret = NULL;
+
+ if (mod != NULL) {
+ ret = PyObject_GetAttrString(mod, attr);
+ }
+ Py_XDECREF(mod);
+
+ return ret;
+}
+
#endif
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index b13e48fdc..37d73e42c 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -4796,9 +4796,25 @@ class TestRecord(object):
fn2 = func('f2')
b[fn2] = 3
- assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3))
- assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2))
- assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,)))
+ # In 1.16 code below can be replaced by:
+ # assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3))
+ # assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2))
+ # assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,)))
+ with suppress_warnings() as sup:
+ sup.filter(FutureWarning,
+ ".* selecting multiple fields .*")
+
+ assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3))
+ assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2))
+ assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,)))
+ # view of subfield view/copy
+ assert_equal(b[['f1', 'f2']][0].view(('i4', 2)).tolist(),
+ (2, 3))
+ assert_equal(b[['f2', 'f1']][0].view(('i4', 2)).tolist(),
+ (3, 2))
+ view_dtype = [('f1', 'i4'), ('f3', [('', 'i4')])]
+ assert_equal(b[['f1', 'f3']][0].view(view_dtype).tolist(),
+ (2, (1,)))
# non-ascii unicode field indexing is well behaved
if not is_py3:
@@ -4808,6 +4824,51 @@ class TestRecord(object):
assert_raises(ValueError, a.__setitem__, u'\u03e0', 1)
assert_raises(ValueError, a.__getitem__, u'\u03e0')
+ # can be removed in 1.16
+ def test_field_names_deprecation(self):
+
+ def collect_warnings(f, *args, **kwargs):
+ with warnings.catch_warnings(record=True) as log:
+ warnings.simplefilter("always")
+ f(*args, **kwargs)
+ return [w.category for w in log]
+
+ a = np.zeros((1,), dtype=[('f1', 'i4'),
+ ('f2', 'i4'),
+ ('f3', [('sf1', 'i4')])])
+ a['f1'][0] = 1
+ a['f2'][0] = 2
+ a['f3'][0] = (3,)
+ b = np.zeros((1,), dtype=[('f1', 'i4'),
+ ('f2', 'i4'),
+ ('f3', [('sf1', 'i4')])])
+ b['f1'][0] = 1
+ b['f2'][0] = 2
+ b['f3'][0] = (3,)
+
+ # All the different functions raise a warning, but not an error
+ assert_equal(collect_warnings(a[['f1', 'f2']].__setitem__, 0, (10, 20)),
+ [FutureWarning])
+ # For <=1.12 a is not modified, but it will be in 1.13
+ assert_equal(a, b)
+
+ # Views also warn
+ subset = a[['f1', 'f2']]
+ subset_view = subset.view()
+ assert_equal(collect_warnings(subset_view['f1'].__setitem__, 0, 10),
+ [FutureWarning])
+ # But the write goes through:
+ assert_equal(subset['f1'][0], 10)
+ # Only one warning per multiple field indexing, though (even if there
+ # are multiple views involved):
+ assert_equal(collect_warnings(subset['f1'].__setitem__, 0, 10), [])
+
+ # make sure views of a multi-field index warn too
+ c = np.zeros(3, dtype='i8,i8,i8')
+ assert_equal(collect_warnings(c[['f0', 'f2']].view, 'i8,i8'),
+ [FutureWarning])
+
+
def test_record_hash(self):
a = np.array([(1, 2), (1, 2)], dtype='i1,i2')
a.flags.writeable = False
diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py
index 9aeb93f74..d7c7d16e3 100644
--- a/numpy/core/tests/test_records.py
+++ b/numpy/core/tests/test_records.py
@@ -11,6 +11,7 @@ import pickle
import warnings
import textwrap
from os import path
+import pytest
import numpy as np
from numpy.testing import (
@@ -360,6 +361,7 @@ class TestRecord(object):
with assert_raises(ValueError):
r.setfield([2,3], *r.dtype.fields['f'])
+ @pytest.mark.xfail(reason="See gh-10411, becomes real error in 1.16")
def test_out_of_order_fields(self):
# names in the same order, padding added to descr
x = self.data[['col1', 'col2']]
diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py
index ba667da59..ab97c5df6 100644
--- a/numpy/doc/structured_arrays.py
+++ b/numpy/doc/structured_arrays.py
@@ -133,10 +133,9 @@ summary they are:
Offsets may be chosen such that the fields overlap, though this will mean
that assigning to one field may clobber any overlapping field's data. As
- an exception, fields of :class:`numpy.object` type .. (see
- :ref:`object arrays <arrays.object>`) cannot overlap with other fields,
- because of the risk of clobbering the internal object pointer and then
- dereferencing it.
+ an exception, fields of :class:`numpy.object` type cannot overlap with
+ other fields, because of the risk of clobbering the internal object
+ pointer and then dereferencing it.
The optional 'aligned' value can be set to ``True`` to make the automatic
offset computation use aligned offsets (see :ref:`offsets-and-alignment`),
@@ -235,6 +234,11 @@ If the offsets of the fields and itemsize of a structured array satisfy the
alignment conditions, the array will have the ``ALIGNED`` :ref:`flag
<numpy.ndarray.flags>` set.
+A convenience function :func:`numpy.lib.recfunctions.repack_fields` converts an
+aligned dtype or array to a packed one and vice versa. It takes either a dtype
+or structured ndarray as an argument, and returns a copy with fields re-packed,
+with or without padding bytes.
+
.. _titles:
Field Titles
@@ -396,27 +400,61 @@ typically a non-structured array, except in the case of nested structures.
Accessing Multiple Fields
```````````````````````````
-One can index a structured array with a multi-field index, where the index is a
-list of field names::
+One can index and assign to a structured array with a multi-field index, where
+the index is a list of field names.
+
+.. warning::
+ The behavior of multi-field indexes will change from Numpy 1.15 to Numpy
+ 1.16.
- >>> a = np.zeros(3, dtype=[('a', 'i8'), ('b', 'i4'), ('c', 'f8')])
+In Numpy 1.16, the result of indexing with a multi-field index will be a view
+into the original array, as follows::
+
+ >>> a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'i4'), ('c', 'f4')])
>>> a[['a', 'c']]
- array([(0, 0.0), (0, 0.0), (0, 0.0)],
- dtype={'names':['a','c'], 'formats':['<i8','<f8'], 'offsets':[0,11], 'itemsize':19})
+ array([(0, 0.), (0, 0.), (0, 0.)],
+ dtype={'names':['a','c'], 'formats':['<i4','<f4'], 'offsets':[0,8], 'itemsize':12})
+
+Assignment to the view modifies the original array. The view's fields will be
+in the order they were indexed. Note that unlike for single-field indexing, the
+view's dtype has the same itemsize as the original array, and has fields at the
+same offsets as in the original array, and unindexed fields are merely missing.
+
+In Numpy 1.15, indexing an array with a multi-field index returns a copy of
+the result above for 1.16, but with fields packed together in memory as if
+passed through :func:`numpy.lib.recfunctions.repack_fields`. This is the
+behavior since Numpy 1.7.
+
+.. warning::
+ The new behavior in Numpy 1.16 leads to extra "padding" bytes at the
+ location of unindexed fields. You will need to update any code which depends
+ on the data having a "packed" layout. For instance code such as::
+
+ >>> a[['a','c']].view('i8') # will fail in Numpy 1.16
+ ValueError: When changing to a smaller dtype, its size must be a divisor of the size of original dtype
+
+ will need to be changed. This code has raised a ``FutureWarning`` since
+ Numpy 1.12.
+
+ The following is a recommended fix, which will behave identically in Numpy
+ 1.15 and Numpy 1.16::
+
+ >>> from numpy.lib.recfunctions import repack_fields
+ >>> repack_fields(a[['a','c']]).view('i8') # supported 1.15 and 1.16
+ array([0, 0, 0])
+
+Assigning to an array with a multi-field index will behave the same in Numpy
+1.15 and Numpy 1.16. In both versions the assignment will modify the original
+array::
+
>>> a[['a', 'c']] = (2, 3)
>>> a
array([(2, 0, 3.0), (2, 0, 3.0), (2, 0, 3.0)],
dtype=[('a', '<i8'), ('b', '<i4'), ('c', '<f8')])
-The resulting array is a view into the original array, such that assignment to
-the view modifies the original array. The view's fields will be in the order
-they were indexed. Note that unlike for single-field indexing, the view's dtype
-has the same itemsize as the original array, and has fields at the same offsets
-as in the original array, and unindexed fields are merely missing.
-
-Since the view is a structured array itself, it obeys the assignment rules
-described above. For example, this means that one can swap the values of two
-fields using appropriate multi-field indexes::
+This obeys the structured array assignment rules described above. For example,
+this means that one can swap the values of two fields using appropriate
+multi-field indexes::
>>> a[['a', 'c']] = a[['c', 'a']]
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index c455bd93f..b6453d5a2 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -732,6 +732,84 @@ def rec_append_fields(base, names, data, dtypes=None):
return append_fields(base, names, data=data, dtypes=dtypes,
asrecarray=True, usemask=False)
+def repack_fields(a, align=False, recurse=False):
+ """
+ Re-pack the fields of a structured array or dtype in memory.
+
+ The memory layout of structured datatypes allows fields at arbitrary
+ byte offsets. This means the fields can be separated by padding bytes,
+ their offsets can be non-monotonically increasing, and they can overlap.
+
+ This method removes any overlaps and reorders the fields in memory so they
+ have increasing byte offsets, and adds or removes padding bytes depending
+ on the `align` option, which behaves like the `align` option to `np.dtype`.
+
+ If `align=False`, this method produces a "packed" memory layout in which
+ each field starts at the byte the previous field ended, and any padding
+ bytes are removed.
+
+ If `align=True`, this methods produces an "aligned" memory layout in which
+ each field's offset is a multiple of its alignment, and the total itemsize
+ is a multiple of the largest alignment, by adding padding bytes as needed.
+
+ Parameters
+ ----------
+ a : ndarray or dtype
+ array or dtype for which to repack the fields.
+ align : boolean
+ If true, use an "aligned" memory layout, otherwise use a "packed" layout.
+ recurse : boolean
+ If True, also repack nested structures.
+
+ Returns
+ -------
+ repacked : ndarray or dtype
+ Copy of `a` with fields repacked, or `a` itself if no repacking was
+ needed.
+
+ Examples
+ --------
+
+ >>> def print_offsets(d):
+ ... print("offsets:", [d.fields[name][1] for name in d.names])
+ ... print("itemsize:", d.itemsize)
+ ...
+ >>> dt = np.dtype('u1,i4,f4', align=True)
+ >>> dt
+ dtype({'names':['f0','f1','f2'], 'formats':['u1','<i4','<f8'], 'offsets':[0,4,8], 'itemsize':16}, align=True)
+ >>> print_offsets(dt)
+ offsets: [0, 4, 8]
+ itemsize: 16
+ >>> packed_dt = repack_fields(dt)
+ >>> packed_dt
+ dtype([('f0', 'u1'), ('f1', '<i4'), ('f2', '<f8')])
+ >>> print_offsets(packed_dt)
+ offsets: [0, 1, 5]
+ itemsize: 13
+
+ """
+ if not isinstance(a, np.dtype):
+ dt = repack_fields(a.dtype, align=align, recurse=recurse)
+ return a.astype(dt, copy=False)
+
+ if a.names is None:
+ return a
+
+ fieldinfo = []
+ for name in a.names:
+ tup = a.fields[name]
+ if recurse:
+ fmt = repack_fields(tup[0], align=align, recurse=True)
+ else:
+ fmt = tup[0]
+
+ if len(tup) == 3:
+ name = (tup[2], name)
+
+ fieldinfo.append((name, fmt))
+
+ dt = np.dtype(fieldinfo, align=align)
+ return np.dtype((a.type, dt))
def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
autoconvert=False):
diff --git a/numpy/lib/tests/test_recfunctions.py b/numpy/lib/tests/test_recfunctions.py
index 219ae24fa..d4828bc1f 100644
--- a/numpy/lib/tests/test_recfunctions.py
+++ b/numpy/lib/tests/test_recfunctions.py
@@ -9,8 +9,8 @@ from numpy.ma.testutils import assert_equal
from numpy.testing import assert_, assert_raises
from numpy.lib.recfunctions import (
drop_fields, rename_fields, get_fieldstructure, recursive_fill_fields,
- find_duplicates, merge_arrays, append_fields, stack_arrays, join_by
- )
+ find_duplicates, merge_arrays, append_fields, stack_arrays, join_by,
+ repack_fields)
get_names = np.lib.recfunctions.get_names
get_names_flat = np.lib.recfunctions.get_names_flat
zip_descr = np.lib.recfunctions.zip_descr
@@ -192,6 +192,18 @@ class TestRecFunctions(object):
assert_equal(sorted(test[-1]), control)
assert_equal(test[0], a[test[-1]])
+ def test_repack_fields(self):
+ dt = np.dtype('u1,f4,i8', align=True)
+ a = np.zeros(2, dtype=dt)
+
+ assert_equal(repack_fields(dt), np.dtype('u1,f4,i8'))
+ assert_equal(repack_fields(a).itemsize, 13)
+ assert_equal(repack_fields(repack_fields(dt), align=True), dt)
+
+ # make sure type is preserved
+ dt = np.dtype((np.record, dt))
+ assert_(repack_fields(dt).type is np.record)
+
class TestRecursiveFillFields(object):
# Test recursive_fill_fields.