/* * This file implements the API functions for NumPy's nditer that * are specialized using the templating system. * * Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com) * The University of British Columbia * * See LICENSE.txt for the license. */ /* Indicate that this .c file is allowed to include the header */ #define NPY_ITERATOR_IMPLEMENTATION_CODE #include "nditer_impl.h" /* SPECIALIZED iternext functions that handle the non-buffering part */ /**begin repeat * #const_itflags = 0, * NPY_ITFLAG_HASINDEX, * NPY_ITFLAG_EXLOOP, * NPY_ITFLAG_RANGE, * NPY_ITFLAG_RANGE|NPY_ITFLAG_HASINDEX# * #tag_itflags = 0, IND, NOINN, RNG, RNGuIND# */ /**begin repeat1 * #const_ndim = 1, 2, NPY_MAXDIMS# * #tag_ndim = 1, 2, ANY# */ /**begin repeat2 * #const_nop = 1, 2, NPY_MAXDIMS# * #tag_nop = 1, 2, ANY# */ /* Specialized iternext (@const_itflags@,@tag_ndim@,@tag_nop@) */ static int npyiter_iternext_itflags@tag_itflags@_dims@tag_ndim@_iters@tag_nop@( NpyIter *iter) { #if !(@const_itflags@&NPY_ITFLAG_EXLOOP) || (@const_ndim@ > 1) const npy_uint32 itflags = @const_itflags@; # if @const_ndim@ >= NPY_MAXDIMS int idim, ndim = NIT_NDIM(iter); # endif # if @const_nop@ < NPY_MAXDIMS const int nop = @const_nop@; # else int nop = NIT_NOP(iter); # endif NpyIter_AxisData *axisdata0; npy_intp istrides, nstrides = NAD_NSTRIDES(); #endif #if @const_ndim@ > 1 NpyIter_AxisData *axisdata1; npy_intp sizeof_axisdata; #endif #if @const_ndim@ > 2 NpyIter_AxisData *axisdata2; #endif #if (@const_itflags@&NPY_ITFLAG_RANGE) /* When ranged iteration is enabled, use the iterindex */ if (++NIT_ITERINDEX(iter) >= NIT_ITEREND(iter)) { return 0; } #endif #if @const_ndim@ > 1 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); #endif # if !(@const_itflags@&NPY_ITFLAG_EXLOOP) || (@const_ndim@ > 1) axisdata0 = NIT_AXISDATA(iter); # endif # if !(@const_itflags@&NPY_ITFLAG_EXLOOP) /* Increment index 0 */ NAD_INDEX(axisdata0)++; /* Increment pointer 0 */ for (istrides = 0; istrides < nstrides; ++istrides) { NAD_PTRS(axisdata0)[istrides] += NAD_STRIDES(axisdata0)[istrides]; } # endif #if @const_ndim@ == 1 # if !(@const_itflags@&NPY_ITFLAG_EXLOOP) /* Finished when the index equals the shape */ return NAD_INDEX(axisdata0) < NAD_SHAPE(axisdata0); # else return 0; # endif #else # if !(@const_itflags@&NPY_ITFLAG_EXLOOP) if (NAD_INDEX(axisdata0) < NAD_SHAPE(axisdata0)) { return 1; } # endif axisdata1 = NIT_INDEX_AXISDATA(axisdata0, 1); /* Increment index 1 */ NAD_INDEX(axisdata1)++; /* Increment pointer 1 */ for (istrides = 0; istrides < nstrides; ++istrides) { NAD_PTRS(axisdata1)[istrides] += NAD_STRIDES(axisdata1)[istrides]; } if (NAD_INDEX(axisdata1) < NAD_SHAPE(axisdata1)) { /* Reset the 1st index to 0 */ NAD_INDEX(axisdata0) = 0; /* Reset the 1st pointer to the value of the 2nd */ for (istrides = 0; istrides < nstrides; ++istrides) { NAD_PTRS(axisdata0)[istrides] = NAD_PTRS(axisdata1)[istrides]; } return 1; } # if @const_ndim@ == 2 return 0; # else axisdata2 = NIT_INDEX_AXISDATA(axisdata1, 1); /* Increment index 2 */ NAD_INDEX(axisdata2)++; /* Increment pointer 2 */ for (istrides = 0; istrides < nstrides; ++istrides) { NAD_PTRS(axisdata2)[istrides] += NAD_STRIDES(axisdata2)[istrides]; } if (NAD_INDEX(axisdata2) < NAD_SHAPE(axisdata2)) { /* Reset the 1st and 2nd indices to 0 */ NAD_INDEX(axisdata0) = 0; NAD_INDEX(axisdata1) = 0; /* Reset the 1st and 2nd pointers to the value of the 3rd */ for (istrides = 0; istrides < nstrides; ++istrides) { NAD_PTRS(axisdata0)[istrides] = NAD_PTRS(axisdata2)[istrides]; NAD_PTRS(axisdata1)[istrides] = NAD_PTRS(axisdata2)[istrides]; } return 1; } for (idim = 3; idim < ndim; ++idim) { NIT_ADVANCE_AXISDATA(axisdata2, 1); /* Increment the index */ NAD_INDEX(axisdata2)++; /* Increment the pointer */ for (istrides = 0; istrides < nstrides; ++istrides) { NAD_PTRS(axisdata2)[istrides] += NAD_STRIDES(axisdata2)[istrides]; } if (NAD_INDEX(axisdata2) < NAD_SHAPE(axisdata2)) { /* Reset the indices and pointers of all previous axisdatas */ axisdata1 = axisdata2; do { NIT_ADVANCE_AXISDATA(axisdata1, -1); /* Reset the index to 0 */ NAD_INDEX(axisdata1) = 0; /* Reset the pointer to the updated value */ for (istrides = 0; istrides < nstrides; ++istrides) { NAD_PTRS(axisdata1)[istrides] = NAD_PTRS(axisdata2)[istrides]; } } while (axisdata1 != axisdata0); return 1; } } return 0; # endif /* ndim != 2 */ #endif /* ndim != 1 */ } /**end repeat2**/ /**end repeat1**/ /**end repeat**/ /**begin repeat * #const_nop = 1, 2, 3, 4, NPY_MAXDIMS# * #tag_nop = 1, 2, 3, 4, ANY# */ /* * Iternext function that handles the reduction buffering part. This * is done with a double loop to avoid frequent re-buffering. */ static int npyiter_buffered_reduce_iternext_iters@tag_nop@(NpyIter *iter) { npy_uint32 itflags = NIT_ITFLAGS(iter); /*int ndim = NIT_NDIM(iter);*/ #if @const_nop@ >= NPY_MAXDIMS int nop = NIT_NOP(iter); #else const int nop = @const_nop@; #endif int iop; NpyIter_AxisData *axisdata; NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); char **ptrs; char *prev_dataptrs[NPY_MAXARGS]; ptrs = NBF_PTRS(bufferdata); /* * If the iterator handles the inner loop, need to increment all * the indices and pointers */ if (!(itflags&NPY_ITFLAG_EXLOOP)) { /* Increment within the buffer */ if (++NIT_ITERINDEX(iter) < NBF_BUFITEREND(bufferdata)) { npy_intp *strides; strides = NBF_STRIDES(bufferdata); for (iop = 0; iop < nop; ++iop) { ptrs[iop] += strides[iop]; } return 1; } } else { NIT_ITERINDEX(iter) += NBF_SIZE(bufferdata); } NPY_IT_DBG_PRINT1("Iterator: Finished iteration %d of outer reduce loop\n", (int)NBF_REDUCE_POS(bufferdata)); /* The outer increment for the reduce double loop */ if (++NBF_REDUCE_POS(bufferdata) < NBF_REDUCE_OUTERSIZE(bufferdata)) { npy_intp *reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata); char **reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata); for (iop = 0; iop < nop; ++iop) { char *ptr = reduce_outerptrs[iop] + reduce_outerstrides[iop]; ptrs[iop] = ptr; reduce_outerptrs[iop] = ptr; } NBF_BUFITEREND(bufferdata) = NIT_ITERINDEX(iter) + NBF_SIZE(bufferdata); return 1; } /* Save the previously used data pointers */ axisdata = NIT_AXISDATA(iter); memcpy(prev_dataptrs, NAD_PTRS(axisdata), NPY_SIZEOF_INTP*nop); /* Write back to the arrays */ if (npyiter_copy_from_buffers(iter) < 0) { npyiter_clear_buffers(iter); return 0; } /* Check if we're past the end */ if (NIT_ITERINDEX(iter) >= NIT_ITEREND(iter)) { NBF_SIZE(bufferdata) = 0; return 0; } /* Increment to the next buffer */ else { npyiter_goto_iterindex(iter, NIT_ITERINDEX(iter)); } /* Prepare the next buffers and set iterend/size */ if (npyiter_copy_to_buffers(iter, prev_dataptrs) < 0) { npyiter_clear_buffers(iter); return 0; } return 1; } /**end repeat**/ /* iternext function that handles the buffering part */ static int npyiter_buffered_iternext(NpyIter *iter) { npy_uint32 itflags = NIT_ITFLAGS(iter); /*int ndim = NIT_NDIM(iter);*/ int nop = NIT_NOP(iter); NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); /* * If the iterator handles the inner loop, need to increment all * the indices and pointers */ if (!(itflags&NPY_ITFLAG_EXLOOP)) { /* Increment within the buffer */ if (++NIT_ITERINDEX(iter) < NBF_BUFITEREND(bufferdata)) { int iop; npy_intp *strides; char **ptrs; strides = NBF_STRIDES(bufferdata); ptrs = NBF_PTRS(bufferdata); for (iop = 0; iop < nop; ++iop) { ptrs[iop] += strides[iop]; } return 1; } } else { NIT_ITERINDEX(iter) += NBF_SIZE(bufferdata); } /* Write back to the arrays */ if (npyiter_copy_from_buffers(iter) < 0) { npyiter_clear_buffers(iter); return 0; } /* Check if we're past the end */ if (NIT_ITERINDEX(iter) >= NIT_ITEREND(iter)) { NBF_SIZE(bufferdata) = 0; return 0; } /* Increment to the next buffer */ else { npyiter_goto_iterindex(iter, NIT_ITERINDEX(iter)); } /* Prepare the next buffers and set iterend/size */ if (npyiter_copy_to_buffers(iter, NULL) < 0) { npyiter_clear_buffers(iter); return 0; } return 1; } /**end repeat2**/ /**end repeat1**/ /**end repeat**/ /* Specialization of iternext for when the iteration size is 1 */ static int npyiter_iternext_sizeone(NpyIter *iter) { return 0; } /*NUMPY_API * Compute the specialized iteration function for an iterator * * If errmsg is non-NULL, it should point to a variable which will * receive the error message, and no Python exception will be set. * This is so that the function can be called from code not holding * the GIL. */ NPY_NO_EXPORT NpyIter_IterNextFunc * NpyIter_GetIterNext(NpyIter *iter, char **errmsg) { npy_uint32 itflags = NIT_ITFLAGS(iter); int ndim = NIT_NDIM(iter); int nop = NIT_NOP(iter); if (NIT_ITERSIZE(iter) < 0) { if (errmsg == NULL) { PyErr_SetString(PyExc_ValueError, "iterator is too large"); } else { *errmsg = "iterator is too large"; } return NULL; } /* * When there is just one iteration and buffering is disabled * the iternext function is very simple. */ if (itflags&NPY_ITFLAG_ONEITERATION) { return &npyiter_iternext_sizeone; } /* * If buffering is enabled. */ if (itflags&NPY_ITFLAG_BUFFER) { if (itflags&NPY_ITFLAG_REDUCE) { switch (nop) { case 1: return &npyiter_buffered_reduce_iternext_iters1; case 2: return &npyiter_buffered_reduce_iternext_iters2; case 3: return &npyiter_buffered_reduce_iternext_iters3; case 4: return &npyiter_buffered_reduce_iternext_iters4; default: return &npyiter_buffered_reduce_iternext_itersANY; } } else { return &npyiter_buffered_iternext; } } /* * Ignore all the flags that don't affect the iterator memory * layout or the iternext function. Currently only HASINDEX, * EXLOOP, and RANGE affect them here. */ itflags &= (NPY_ITFLAG_HASINDEX|NPY_ITFLAG_EXLOOP|NPY_ITFLAG_RANGE); /* Switch statements let the compiler optimize this most effectively */ switch (itflags) { /* * The combinations HASINDEX|EXLOOP and RANGE|EXLOOP are excluded * by the New functions */ /**begin repeat * #const_itflags = 0, * NPY_ITFLAG_HASINDEX, * NPY_ITFLAG_EXLOOP, * NPY_ITFLAG_RANGE, * NPY_ITFLAG_RANGE|NPY_ITFLAG_HASINDEX# * #tag_itflags = 0, IND, NOINN, RNG, RNGuIND# */ case @const_itflags@: switch (ndim) { /**begin repeat1 * #const_ndim = 1, 2# * #tag_ndim = 1, 2# */ case @const_ndim@: switch (nop) { /**begin repeat2 * #const_nop = 1, 2# * #tag_nop = 1, 2# */ case @const_nop@: return &npyiter_iternext_itflags@tag_itflags@_dims@tag_ndim@_iters@tag_nop@; /**end repeat2**/ /* Not specialized on nop */ default: return &npyiter_iternext_itflags@tag_itflags@_dims@tag_ndim@_itersANY; } /**end repeat1**/ /* Not specialized on ndim */ default: switch (nop) { /**begin repeat1 * #const_nop = 1, 2# * #tag_nop = 1, 2# */ case @const_nop@: return &npyiter_iternext_itflags@tag_itflags@_dimsANY_iters@tag_nop@; /**end repeat1**/ /* Not specialized on nop */ default: return &npyiter_iternext_itflags@tag_itflags@_dimsANY_itersANY; } } /**end repeat**/ } /* The switch above should have caught all the possibilities. */ if (errmsg == NULL) { PyErr_Format(PyExc_ValueError, "GetIterNext internal iterator error - unexpected " "itflags/ndim/nop combination (%04x/%d/%d)", (int)itflags, (int)ndim, (int)nop); } else { *errmsg = "GetIterNext internal iterator error - unexpected " "itflags/ndim/nop combination"; } return NULL; } /* SPECIALIZED getindex functions */ /**begin repeat * #const_itflags = 0, * NPY_ITFLAG_HASINDEX, * NPY_ITFLAG_IDENTPERM, * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_IDENTPERM, * NPY_ITFLAG_NEGPERM, * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_NEGPERM, * NPY_ITFLAG_BUFFER, * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_BUFFER, * NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_BUFFER, * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_BUFFER, * NPY_ITFLAG_NEGPERM|NPY_ITFLAG_BUFFER, * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_NEGPERM|NPY_ITFLAG_BUFFER# * #tag_itflags = 0, IND, IDP, INDuIDP, NEGP, INDuNEGP, * BUF, INDuBUF, IDPuBUF, INDuIDPuBUF, NEGPuBUF, INDuNEGPuBUF# */ static void npyiter_get_multi_index_itflags@tag_itflags@( NpyIter *iter, npy_intp *out_multi_index) { const npy_uint32 itflags = @const_itflags@; int idim, ndim = NIT_NDIM(iter); int nop = NIT_NOP(iter); npy_intp sizeof_axisdata; NpyIter_AxisData *axisdata; #if !((@const_itflags@)&NPY_ITFLAG_IDENTPERM) npy_int8 *perm = NIT_PERM(iter); #endif axisdata = NIT_AXISDATA(iter); sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); #if ((@const_itflags@)&NPY_ITFLAG_IDENTPERM) out_multi_index += ndim-1; for(idim = 0; idim < ndim; ++idim, --out_multi_index, NIT_ADVANCE_AXISDATA(axisdata, 1)) { *out_multi_index = NAD_INDEX(axisdata); } #elif !((@const_itflags@)&NPY_ITFLAG_NEGPERM) for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { npy_int8 p = perm[idim]; out_multi_index[ndim-p-1] = NAD_INDEX(axisdata); } #else for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { npy_int8 p = perm[idim]; if (p < 0) { /* If the perm entry is negative, reverse the index */ out_multi_index[ndim+p] = NAD_SHAPE(axisdata) - NAD_INDEX(axisdata) - 1; } else { out_multi_index[ndim-p-1] = NAD_INDEX(axisdata); } } #endif /* not ident perm */ } /**end repeat**/ /*NUMPY_API * Compute a specialized get_multi_index function for the iterator * * If errmsg is non-NULL, it should point to a variable which will * receive the error message, and no Python exception will be set. * This is so that the function can be called from code not holding * the GIL. */ NPY_NO_EXPORT NpyIter_GetMultiIndexFunc * NpyIter_GetGetMultiIndex(NpyIter *iter, char **errmsg) { npy_uint32 itflags = NIT_ITFLAGS(iter); int ndim = NIT_NDIM(iter); int nop = NIT_NOP(iter); /* These flags must be correct */ if ((itflags&(NPY_ITFLAG_HASMULTIINDEX|NPY_ITFLAG_DELAYBUF)) != NPY_ITFLAG_HASMULTIINDEX) { if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) { if (errmsg == NULL) { PyErr_SetString(PyExc_ValueError, "Cannot retrieve a GetMultiIndex function for an " "iterator that doesn't track a multi-index."); } else { *errmsg = "Cannot retrieve a GetMultiIndex function for an " "iterator that doesn't track a multi-index."; } return NULL; } else { if (errmsg == NULL) { PyErr_SetString(PyExc_ValueError, "Cannot retrieve a GetMultiIndex function for an " "iterator that used DELAY_BUFALLOC before a Reset call"); } else { *errmsg = "Cannot retrieve a GetMultiIndex function for an " "iterator that used DELAY_BUFALLOC before a " "Reset call"; } return NULL; } } /* * Only these flags affect the iterator memory layout or * the get_multi_index behavior. IDENTPERM and NEGPERM are mutually * exclusive, so that reduces the number of cases slightly. */ itflags &= (NPY_ITFLAG_HASINDEX | NPY_ITFLAG_IDENTPERM | NPY_ITFLAG_NEGPERM | NPY_ITFLAG_BUFFER); switch (itflags) { /**begin repeat * #const_itflags = 0, * NPY_ITFLAG_HASINDEX, * NPY_ITFLAG_IDENTPERM, * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_IDENTPERM, * NPY_ITFLAG_NEGPERM, * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_NEGPERM, * NPY_ITFLAG_BUFFER, * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_BUFFER, * NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_BUFFER, * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_BUFFER, * NPY_ITFLAG_NEGPERM|NPY_ITFLAG_BUFFER, * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_NEGPERM|NPY_ITFLAG_BUFFER# * #tag_itflags = 0, IND, IDP, INDuIDP, NEGP, INDuNEGP, * BUF, INDuBUF, IDPuBUF, INDuIDPuBUF, NEGPuBUF, INDuNEGPuBUF# */ case @const_itflags@: return npyiter_get_multi_index_itflags@tag_itflags@; /**end repeat**/ } /* The switch above should have caught all the possibilities. */ if (errmsg == NULL) { PyErr_Format(PyExc_ValueError, "GetGetMultiIndex internal iterator error - unexpected " "itflags/ndim/nop combination (%04x/%d/%d)", (int)itflags, (int)ndim, (int)nop); } else { *errmsg = "GetGetMultiIndex internal iterator error - unexpected " "itflags/ndim/nop combination"; } return NULL; } #undef NPY_ITERATOR_IMPLEMENTATION_CODE