/*
* This file implements most of the main API functions of NumPy's nditer.
* This excludes functions specialized using the templating system.
*
* Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com)
* The University of British Columbia
*
* Copyright (c) 2011 Enthought, Inc
*
* See LICENSE.txt for the license.
*/
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
/* Indicate that this .c file is allowed to include the header */
#define NPY_ITERATOR_IMPLEMENTATION_CODE
#include "nditer_impl.h"
#include "templ_common.h"
/* Internal helper functions private to this file */
static npy_intp
npyiter_checkreducesize(NpyIter *iter, npy_intp count,
npy_intp *reduce_innersize,
npy_intp *reduce_outerdim);
/*NUMPY_API
* Removes an axis from iteration. This requires that NPY_ITER_MULTI_INDEX
* was set for iterator creation, and does not work if buffering is
* enabled. This function also resets the iterator to its initial state.
*
* Returns NPY_SUCCEED or NPY_FAIL.
*/
NPY_NO_EXPORT int
NpyIter_RemoveAxis(NpyIter *iter, int axis)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int idim, ndim = NIT_NDIM(iter);
int iop, nop = NIT_NOP(iter);
int xdim = 0;
npy_int8 *perm = NIT_PERM(iter);
NpyIter_AxisData *axisdata_del = NIT_AXISDATA(iter), *axisdata;
npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
npy_intp *baseoffsets = NIT_BASEOFFSETS(iter);
char **resetdataptr = NIT_RESETDATAPTR(iter);
if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
PyErr_SetString(PyExc_RuntimeError,
"Iterator RemoveAxis may only be called "
"if a multi-index is being tracked");
return NPY_FAIL;
}
else if (itflags&NPY_ITFLAG_HASINDEX) {
PyErr_SetString(PyExc_RuntimeError,
"Iterator RemoveAxis may not be called on "
"an index is being tracked");
return NPY_FAIL;
}
else if (itflags&NPY_ITFLAG_BUFFER) {
PyErr_SetString(PyExc_RuntimeError,
"Iterator RemoveAxis may not be called on "
"a buffered iterator");
return NPY_FAIL;
}
else if (axis < 0 || axis >= ndim) {
PyErr_SetString(PyExc_ValueError,
"axis out of bounds in iterator RemoveAxis");
return NPY_FAIL;
}
/* Reverse axis, since the iterator treats them that way */
axis = ndim - 1 - axis;
/* First find the axis in question */
for (idim = 0; idim < ndim; ++idim) {
/* If this is it, and it's iterated forward, done */
if (perm[idim] == axis) {
xdim = idim;
break;
}
/* If this is it, but it's iterated backward, must reverse the axis */
else if (-1 - perm[idim] == axis) {
npy_intp *strides = NAD_STRIDES(axisdata_del);
npy_intp shape = NAD_SHAPE(axisdata_del), offset;
xdim = idim;
/*
* Adjust baseoffsets and resetbaseptr back to the start of
* this axis.
*/
for (iop = 0; iop < nop; ++iop) {
offset = (shape-1)*strides[iop];
baseoffsets[iop] += offset;
resetdataptr[iop] += offset;
}
break;
}
NIT_ADVANCE_AXISDATA(axisdata_del, 1);
}
if (idim == ndim) {
PyErr_SetString(PyExc_RuntimeError,
"internal error in iterator perm");
return NPY_FAIL;
}
/* Adjust the permutation */
for (idim = 0; idim < ndim-1; ++idim) {
npy_int8 p = (idim < xdim) ? perm[idim] : perm[idim+1];
if (p >= 0) {
if (p > axis) {
--p;
}
}
else if (p <= 0) {
if (p < -1-axis) {
++p;
}
}
perm[idim] = p;
}
/* Shift all the axisdata structures by one */
axisdata = NIT_INDEX_AXISDATA(axisdata_del, 1);
memmove(axisdata_del, axisdata, (ndim-1-xdim)*sizeof_axisdata);
/* Adjust the iteration size and reset iterend */
NIT_ITERSIZE(iter) = 1;
axisdata = NIT_AXISDATA(iter);
for (idim = 0; idim < ndim-1; ++idim) {
if (npy_mul_with_overflow_intp(&NIT_ITERSIZE(iter),
NIT_ITERSIZE(iter), NAD_SHAPE(axisdata))) {
NIT_ITERSIZE(iter) = -1;
break;
}
NIT_ADVANCE_AXISDATA(axisdata, 1);
}
NIT_ITEREND(iter) = NIT_ITERSIZE(iter);
/* Shrink the iterator */
NIT_NDIM(iter) = ndim - 1;
/* If it is now 0-d fill the singleton dimension */
if (ndim == 1) {
npy_intp *strides = NAD_STRIDES(axisdata_del);
NAD_SHAPE(axisdata_del) = 1;
for (iop = 0; iop < nop; ++iop) {
strides[iop] = 0;
}
NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
}
return NpyIter_Reset(iter, NULL);
}
/*NUMPY_API
* Removes multi-index support from an iterator.
*
* Returns NPY_SUCCEED or NPY_FAIL.
*/
NPY_NO_EXPORT int
NpyIter_RemoveMultiIndex(NpyIter *iter)
{
npy_uint32 itflags;
/* Make sure the iterator is reset */
if (NpyIter_Reset(iter, NULL) != NPY_SUCCEED) {
return NPY_FAIL;
}
itflags = NIT_ITFLAGS(iter);
if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
if (NIT_ITERSIZE(iter) < 0) {
PyErr_SetString(PyExc_ValueError, "iterator is too large");
return NPY_FAIL;
}
NIT_ITFLAGS(iter) = itflags & ~NPY_ITFLAG_HASMULTIINDEX;
npyiter_coalesce_axes(iter);
}
return NPY_SUCCEED;
}
/*NUMPY_API
* Removes the inner loop handling (so HasExternalLoop returns true)
*/
NPY_NO_EXPORT int
NpyIter_EnableExternalLoop(NpyIter *iter)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
/*int ndim = NIT_NDIM(iter);*/
int nop = NIT_NOP(iter);
/* Check conditions under which this can be done */
if (itflags&(NPY_ITFLAG_HASINDEX|NPY_ITFLAG_HASMULTIINDEX)) {
PyErr_SetString(PyExc_ValueError,
"Iterator flag EXTERNAL_LOOP cannot be used "
"if an index or multi-index is being tracked");
return NPY_FAIL;
}
if ((itflags&(NPY_ITFLAG_BUFFER|NPY_ITFLAG_RANGE|NPY_ITFLAG_EXLOOP))
== (NPY_ITFLAG_RANGE|NPY_ITFLAG_EXLOOP)) {
PyErr_SetString(PyExc_ValueError,
"Iterator flag EXTERNAL_LOOP cannot be used "
"with ranged iteration unless buffering is also enabled");
return NPY_FAIL;
}
/* Set the flag */
if (!(itflags&NPY_ITFLAG_EXLOOP)) {
itflags |= NPY_ITFLAG_EXLOOP;
NIT_ITFLAGS(iter) = itflags;
/*
* Check whether we can apply the single iteration
* optimization to the iternext function.
*/
if (!(itflags&NPY_ITFLAG_BUFFER)) {
NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
if (NIT_ITERSIZE(iter) == NAD_SHAPE(axisdata)) {
NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
}
}
}
/* Reset the iterator */
return NpyIter_Reset(iter, NULL);
}
/*NUMPY_API
* Resets the iterator to its initial state
*
* If errmsg is non-NULL, it should point to a variable which will
* receive the error message, and no Python exception will be set.
* This is so that the function can be called from code not holding
* the GIL.
*/
NPY_NO_EXPORT int
NpyIter_Reset(NpyIter *iter, char **errmsg)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
/*int ndim = NIT_NDIM(iter);*/
int nop = NIT_NOP(iter);
if (itflags&NPY_ITFLAG_BUFFER) {
NpyIter_BufferData *bufferdata;
/* If buffer allocation was delayed, do it now */
if (itflags&NPY_ITFLAG_DELAYBUF) {
if (!npyiter_allocate_buffers(iter, errmsg)) {
return NPY_FAIL;
}
NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_DELAYBUF;
}
else {
/*
* If the iterindex is already right, no need to
* do anything
*/
bufferdata = NIT_BUFFERDATA(iter);
if (NIT_ITERINDEX(iter) == NIT_ITERSTART(iter) &&
NBF_BUFITEREND(bufferdata) <= NIT_ITEREND(iter) &&
NBF_SIZE(bufferdata) > 0) {
return NPY_SUCCEED;
}
/* Copy any data from the buffers back to the arrays */
npyiter_copy_from_buffers(iter);
}
}
npyiter_goto_iterindex(iter, NIT_ITERSTART(iter));
if (itflags&NPY_ITFLAG_BUFFER) {
/* Prepare the next buffers and set iterend/size */
npyiter_copy_to_buffers(iter, NULL);
}
return NPY_SUCCEED;
}
/*NUMPY_API
* Resets the iterator to its initial state, with new base data pointers.
* This function requires great caution.
*
* If errmsg is non-NULL, it should point to a variable which will
* receive the error message, and no Python exception will be set.
* This is so that the function can be called from code not holding
* the GIL.
*/
NPY_NO_EXPORT int
NpyIter_ResetBasePointers(NpyIter *iter, char **baseptrs, char **errmsg)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
/*int ndim = NIT_NDIM(iter);*/
int iop, nop = NIT_NOP(iter);
char **resetdataptr = NIT_RESETDATAPTR(iter);
npy_intp *baseoffsets = NIT_BASEOFFSETS(iter);
if (itflags&NPY_ITFLAG_BUFFER) {
/* If buffer allocation was delayed, do it now */
if (itflags&NPY_ITFLAG_DELAYBUF) {
if (!npyiter_allocate_buffers(iter, errmsg)) {
return NPY_FAIL;
}
NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_DELAYBUF;
}
else {
/* Copy any data from the buffers back to the arrays */
npyiter_copy_from_buffers(iter);
}
}
/* The new data pointers for resetting */
for (iop = 0; iop < nop; ++iop) {
resetdataptr[iop] = baseptrs[iop] + baseoffsets[iop];
}
npyiter_goto_iterindex(iter, NIT_ITERSTART(iter));
if (itflags&NPY_ITFLAG_BUFFER) {
/* Prepare the next buffers and set iterend/size */
npyiter_copy_to_buffers(iter, NULL);
}
return NPY_SUCCEED;
}
/*NUMPY_API
* Resets the iterator to a new iterator index range
*
* If errmsg is non-NULL, it should point to a variable which will
* receive the error message, and no Python exception will be set.
* This is so that the function can be called from code not holding
* the GIL.
*/
NPY_NO_EXPORT int
NpyIter_ResetToIterIndexRange(NpyIter *iter,
npy_intp istart, npy_intp iend, char **errmsg)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
/*int ndim = NIT_NDIM(iter);*/
/*int nop = NIT_NOP(iter);*/
if (!(itflags&NPY_ITFLAG_RANGE)) {
if (errmsg == NULL) {
PyErr_SetString(PyExc_ValueError,
"Cannot call ResetToIterIndexRange on an iterator without "
"requesting ranged iteration support in the constructor");
}
else {
*errmsg = "Cannot call ResetToIterIndexRange on an iterator "
"without requesting ranged iteration support in the "
"constructor";
}
return NPY_FAIL;
}
if (istart < 0 || iend > NIT_ITERSIZE(iter)) {
if (NIT_ITERSIZE(iter) < 0) {
if (errmsg == NULL) {
PyErr_SetString(PyExc_ValueError, "iterator is too large");
}
else {
*errmsg = "iterator is too large";
}
return NPY_FAIL;
}
if (errmsg == NULL) {
PyErr_Format(PyExc_ValueError,
"Out-of-bounds range [%d, %d) passed to "
"ResetToIterIndexRange", (int)istart, (int)iend);
}
else {
*errmsg = "Out-of-bounds range passed to ResetToIterIndexRange";
}
return NPY_FAIL;
}
else if (iend < istart) {
if (errmsg == NULL) {
PyErr_Format(PyExc_ValueError,
"Invalid range [%d, %d) passed to ResetToIterIndexRange",
(int)istart, (int)iend);
}
else {
*errmsg = "Invalid range passed to ResetToIterIndexRange";
}
return NPY_FAIL;
}
NIT_ITERSTART(iter) = istart;
NIT_ITEREND(iter) = iend;
return NpyIter_Reset(iter, errmsg);
}
/*NUMPY_API
* Sets the iterator to the specified multi-index, which must have the
* correct number of entries for 'ndim'. It is only valid
* when NPY_ITER_MULTI_INDEX was passed to the constructor. This operation
* fails if the multi-index is out of bounds.
*
* Returns NPY_SUCCEED on success, NPY_FAIL on failure.
*/
NPY_NO_EXPORT int
NpyIter_GotoMultiIndex(NpyIter *iter, npy_intp *multi_index)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int idim, ndim = NIT_NDIM(iter);
int nop = NIT_NOP(iter);
npy_intp iterindex, factor;
NpyIter_AxisData *axisdata;
npy_intp sizeof_axisdata;
npy_int8 *perm;
if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
PyErr_SetString(PyExc_ValueError,
"Cannot call GotoMultiIndex on an iterator without "
"requesting a multi-index in the constructor");
return NPY_FAIL;
}
if (itflags&NPY_ITFLAG_BUFFER) {
PyErr_SetString(PyExc_ValueError,
"Cannot call GotoMultiIndex on an iterator which "
"is buffered");
return NPY_FAIL;
}
if (itflags&NPY_ITFLAG_EXLOOP) {
PyErr_SetString(PyExc_ValueError,
"Cannot call GotoMultiIndex on an iterator which "
"has the flag EXTERNAL_LOOP");
return NPY_FAIL;
}
perm = NIT_PERM(iter);
axisdata = NIT_AXISDATA(iter);
sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
/* Compute the iterindex corresponding to the multi-index */
iterindex = 0;
factor = 1;
for (idim = 0; idim < ndim; ++idim) {
npy_int8 p = perm[idim];
npy_intp i, shape;
shape = NAD_SHAPE(axisdata);
if (p < 0) {
/* If the perm entry is negative, reverse the index */
i = shape - multi_index[ndim+p] - 1;
}
else {
i = multi_index[ndim-p-1];
}
/* Bounds-check this index */
if (i >= 0 && i < shape) {
iterindex += factor * i;
factor *= shape;
}
else {
PyErr_SetString(PyExc_IndexError,
"Iterator GotoMultiIndex called with an out-of-bounds "
"multi-index");
return NPY_FAIL;
}
NIT_ADVANCE_AXISDATA(axisdata, 1);
}
if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
if (NIT_ITERSIZE(iter) < 0) {
PyErr_SetString(PyExc_ValueError, "iterator is too large");
return NPY_FAIL;
}
PyErr_SetString(PyExc_IndexError,
"Iterator GotoMultiIndex called with a multi-index outside the "
"restricted iteration range");
return NPY_FAIL;
}
npyiter_goto_iterindex(iter, iterindex);
return NPY_SUCCEED;
}
/*NUMPY_API
* If the iterator is tracking an index, sets the iterator
* to the specified index.
*
* Returns NPY_SUCCEED on success, NPY_FAIL on failure.
*/
NPY_NO_EXPORT int
NpyIter_GotoIndex(NpyIter *iter, npy_intp flat_index)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int idim, ndim = NIT_NDIM(iter);
int nop = NIT_NOP(iter);
npy_intp iterindex, factor;
NpyIter_AxisData *axisdata;
npy_intp sizeof_axisdata;
if (!(itflags&NPY_ITFLAG_HASINDEX)) {
PyErr_SetString(PyExc_ValueError,
"Cannot call GotoIndex on an iterator without "
"requesting a C or Fortran index in the constructor");
return NPY_FAIL;
}
if (itflags&NPY_ITFLAG_BUFFER) {
PyErr_SetString(PyExc_ValueError,
"Cannot call GotoIndex on an iterator which "
"is buffered");
return NPY_FAIL;
}
if (itflags&NPY_ITFLAG_EXLOOP) {
PyErr_SetString(PyExc_ValueError,
"Cannot call GotoIndex on an iterator which "
"has the flag EXTERNAL_LOOP");
return NPY_FAIL;
}
if (flat_index < 0 || flat_index >= NIT_ITERSIZE(iter)) {
PyErr_SetString(PyExc_IndexError,
"Iterator GotoIndex called with an out-of-bounds "
"index");
return NPY_FAIL;
}
axisdata = NIT_AXISDATA(iter);
sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
/* Compute the iterindex corresponding to the flat_index */
iterindex = 0;
factor = 1;
for (idim = 0; idim < ndim; ++idim) {
npy_intp i, shape, iterstride;
iterstride = NAD_STRIDES(axisdata)[nop];
shape = NAD_SHAPE(axisdata);
/* Extract the index from the flat_index */
if (iterstride == 0) {
i = 0;
}
else if (iterstride < 0) {
i = shape - (flat_index/(-iterstride))%shape - 1;
}
else {
i = (flat_index/iterstride)%shape;
}
/* Add its contribution to iterindex */
iterindex += factor * i;
factor *= shape;
NIT_ADVANCE_AXISDATA(axisdata, 1);
}
if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
PyErr_SetString(PyExc_IndexError,
"Iterator GotoIndex called with an index outside the "
"restricted iteration range.");
return NPY_FAIL;
}
npyiter_goto_iterindex(iter, iterindex);
return NPY_SUCCEED;
}
/*NUMPY_API
* Sets the iterator position to the specified iterindex,
* which matches the iteration order of the iterator.
*
* Returns NPY_SUCCEED on success, NPY_FAIL on failure.
*/
NPY_NO_EXPORT int
NpyIter_GotoIterIndex(NpyIter *iter, npy_intp iterindex)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
/*int ndim = NIT_NDIM(iter);*/
int iop, nop = NIT_NOP(iter);
if (itflags&NPY_ITFLAG_EXLOOP) {
PyErr_SetString(PyExc_ValueError,
"Cannot call GotoIterIndex on an iterator which "
"has the flag EXTERNAL_LOOP");
return NPY_FAIL;
}
if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
if (NIT_ITERSIZE(iter) < 0) {
PyErr_SetString(PyExc_ValueError, "iterator is too large");
return NPY_FAIL;
}
PyErr_SetString(PyExc_IndexError,
"Iterator GotoIterIndex called with an iterindex outside the "
"iteration range.");
return NPY_FAIL;
}
if (itflags&NPY_ITFLAG_BUFFER) {
NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
npy_intp bufiterend, size;
size = NBF_SIZE(bufferdata);
bufiterend = NBF_BUFITEREND(bufferdata);
/* Check if the new iterindex is already within the buffer */
if (!(itflags&NPY_ITFLAG_REDUCE) && iterindex < bufiterend &&
iterindex >= bufiterend - size) {
npy_intp *strides, delta;
char **ptrs;
strides = NBF_STRIDES(bufferdata);
ptrs = NBF_PTRS(bufferdata);
delta = iterindex - NIT_ITERINDEX(iter);
for (iop = 0; iop < nop; ++iop) {
ptrs[iop] += delta * strides[iop];
}
NIT_ITERINDEX(iter) = iterindex;
}
/* Start the buffer at the provided iterindex */
else {
/* Write back to the arrays */
npyiter_copy_from_buffers(iter);
npyiter_goto_iterindex(iter, iterindex);
/* Prepare the next buffers and set iterend/size */
npyiter_copy_to_buffers(iter, NULL);
}
}
else {
npyiter_goto_iterindex(iter, iterindex);
}
return NPY_SUCCEED;
}
/*NUMPY_API
* Gets the current iteration index
*/
NPY_NO_EXPORT npy_intp
NpyIter_GetIterIndex(NpyIter *iter)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int idim, ndim = NIT_NDIM(iter);
int nop = NIT_NOP(iter);
/* iterindex is only used if NPY_ITER_RANGED or NPY_ITER_BUFFERED was set */
if (itflags&(NPY_ITFLAG_RANGE|NPY_ITFLAG_BUFFER)) {
return NIT_ITERINDEX(iter);
}
else {
npy_intp iterindex;
NpyIter_AxisData *axisdata;
npy_intp sizeof_axisdata;
iterindex = 0;
if (ndim == 0) {
return 0;
}
sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
axisdata = NIT_INDEX_AXISDATA(NIT_AXISDATA(iter), ndim-1);
for (idim = ndim-2; idim >= 0; --idim) {
iterindex += NAD_INDEX(axisdata);
NIT_ADVANCE_AXISDATA(axisdata, -1);
iterindex *= NAD_SHAPE(axisdata);
}
iterindex += NAD_INDEX(axisdata);
return iterindex;
}
}
/*NUMPY_API
* Whether the buffer allocation is being delayed
*/
NPY_NO_EXPORT npy_bool
NpyIter_HasDelayedBufAlloc(NpyIter *iter)
{
return (NIT_ITFLAGS(iter)&NPY_ITFLAG_DELAYBUF) != 0;
}
/*NUMPY_API
* Whether the iterator handles the inner loop
*/
NPY_NO_EXPORT npy_bool
NpyIter_HasExternalLoop(NpyIter *iter)
{
return (NIT_ITFLAGS(iter)&NPY_ITFLAG_EXLOOP) != 0;
}
/*NUMPY_API
* Whether the iterator is tracking a multi-index
*/
NPY_NO_EXPORT npy_bool
NpyIter_HasMultiIndex(NpyIter *iter)
{
return (NIT_ITFLAGS(iter)&NPY_ITFLAG_HASMULTIINDEX) != 0;
}
/*NUMPY_API
* Whether the iterator is tracking an index
*/
NPY_NO_EXPORT npy_bool
NpyIter_HasIndex(NpyIter *iter)
{
return (NIT_ITFLAGS(iter)&NPY_ITFLAG_HASINDEX) != 0;
}
/*NUMPY_API
* Checks to see whether this is the first time the elements
* of the specified reduction operand which the iterator points at are
* being seen for the first time. The function returns
* a reasonable answer for reduction operands and when buffering is
* disabled. The answer may be incorrect for buffered non-reduction
* operands.
*
* This function is intended to be used in EXTERNAL_LOOP mode only,
* and will produce some wrong answers when that mode is not enabled.
*
* If this function returns true, the caller should also
* check the inner loop stride of the operand, because if
* that stride is 0, then only the first element of the innermost
* external loop is being visited for the first time.
*
* WARNING: For performance reasons, 'iop' is not bounds-checked,
* it is not confirmed that 'iop' is actually a reduction
* operand, and it is not confirmed that EXTERNAL_LOOP
* mode is enabled. These checks are the responsibility of
* the caller, and should be done outside of any inner loops.
*/
NPY_NO_EXPORT npy_bool
NpyIter_IsFirstVisit(NpyIter *iter, int iop)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int idim, ndim = NIT_NDIM(iter);
int nop = NIT_NOP(iter);
NpyIter_AxisData *axisdata;
npy_intp sizeof_axisdata;
sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
axisdata = NIT_AXISDATA(iter);
for (idim = 0; idim < ndim; ++idim) {
npy_intp coord = NAD_INDEX(axisdata);
npy_intp stride = NAD_STRIDES(axisdata)[iop];
/*
* If this is a reduction dimension and the coordinate
* is not at the start, it's definitely not the first visit
*/
if (stride == 0 && coord != 0) {
return 0;
}
NIT_ADVANCE_AXISDATA(axisdata, 1);
}
/*
* In reduction buffering mode, there's a double loop being
* tracked in the buffer part of the iterator data structure.
* We only need to check the outer level of this two-level loop,
* because of the requirement that EXTERNAL_LOOP be enabled.
*/
if (itflags&NPY_ITFLAG_BUFFER) {
NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
/* The outer reduce loop */
if (NBF_REDUCE_POS(bufferdata) != 0 &&
NBF_REDUCE_OUTERSTRIDES(bufferdata)[iop] == 0) {
return 0;
}
}
return 1;
}
/*NUMPY_API
* Whether the iteration could be done with no buffering.
*/
NPY_NO_EXPORT npy_bool
NpyIter_RequiresBuffering(NpyIter *iter)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
/*int ndim = NIT_NDIM(iter);*/
int iop, nop = NIT_NOP(iter);
npyiter_opitflags *op_itflags;
if (!(itflags&NPY_ITFLAG_BUFFER)) {
return 0;
}
op_itflags = NIT_OPITFLAGS(iter);
/* If any operand requires a cast, buffering is mandatory */
for (iop = 0; iop < nop; ++iop) {
if (op_itflags[iop]&NPY_OP_ITFLAG_CAST) {
return 1;
}
}
return 0;
}
/*NUMPY_API
* Whether the iteration loop, and in particular the iternext()
* function, needs API access. If this is true, the GIL must
* be retained while iterating.
*/
NPY_NO_EXPORT npy_bool
NpyIter_IterationNeedsAPI(NpyIter *iter)
{
return (NIT_ITFLAGS(iter)&NPY_ITFLAG_NEEDSAPI) != 0;
}
/*NUMPY_API
* Gets the number of dimensions being iterated
*/
NPY_NO_EXPORT int
NpyIter_GetNDim(NpyIter *iter)
{
return NIT_NDIM(iter);
}
/*NUMPY_API
* Gets the number of operands being iterated
*/
NPY_NO_EXPORT int
NpyIter_GetNOp(NpyIter *iter)
{
return NIT_NOP(iter);
}
/*NUMPY_API
* Gets the number of elements being iterated
*/
NPY_NO_EXPORT npy_intp
NpyIter_GetIterSize(NpyIter *iter)
{
return NIT_ITERSIZE(iter);
}
/*NUMPY_API
* Whether the iterator is buffered
*/
NPY_NO_EXPORT npy_bool
NpyIter_IsBuffered(NpyIter *iter)
{
return (NIT_ITFLAGS(iter)&NPY_ITFLAG_BUFFER) != 0;
}
/*NUMPY_API
* Whether the inner loop can grow if buffering is unneeded
*/
NPY_NO_EXPORT npy_bool
NpyIter_IsGrowInner(NpyIter *iter)
{
return (NIT_ITFLAGS(iter)&NPY_ITFLAG_GROWINNER) != 0;
}
/*NUMPY_API
* Gets the size of the buffer, or 0 if buffering is not enabled
*/
NPY_NO_EXPORT npy_intp
NpyIter_GetBufferSize(NpyIter *iter)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
/*int ndim = NIT_NDIM(iter);*/
int nop = NIT_NOP(iter);
if (itflags&NPY_ITFLAG_BUFFER) {
NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
return NBF_BUFFERSIZE(bufferdata);
}
else {
return 0;
}
}
/*NUMPY_API
* Gets the range of iteration indices being iterated
*/
NPY_NO_EXPORT void
NpyIter_GetIterIndexRange(NpyIter *iter,
npy_intp *istart, npy_intp *iend)
{
*istart = NIT_ITERSTART(iter);
*iend = NIT_ITEREND(iter);
}
/*NUMPY_API
* Gets the broadcast shape if a multi-index is being tracked by the iterator,
* otherwise gets the shape of the iteration as Fortran-order
* (fastest-changing index first).
*
* The reason Fortran-order is returned when a multi-index
* is not enabled is that this is providing a direct view into how
* the iterator traverses the n-dimensional space. The iterator organizes
* its memory from fastest index to slowest index, and when
* a multi-index is enabled, it uses a permutation to recover the original
* order.
*
* Returns NPY_SUCCEED or NPY_FAIL.
*/
NPY_NO_EXPORT int
NpyIter_GetShape(NpyIter *iter, npy_intp *outshape)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int ndim = NIT_NDIM(iter);
int nop = NIT_NOP(iter);
int idim, sizeof_axisdata;
NpyIter_AxisData *axisdata;
npy_int8 *perm;
axisdata = NIT_AXISDATA(iter);
sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
perm = NIT_PERM(iter);
for(idim = 0; idim < ndim; ++idim) {
npy_int8 p = perm[idim];
if (p < 0) {
outshape[ndim+p] = NAD_SHAPE(axisdata);
}
else {
outshape[ndim-p-1] = NAD_SHAPE(axisdata);
}
NIT_ADVANCE_AXISDATA(axisdata, 1);
}
}
else {
for(idim = 0; idim < ndim; ++idim) {
outshape[idim] = NAD_SHAPE(axisdata);
NIT_ADVANCE_AXISDATA(axisdata, 1);
}
}
return NPY_SUCCEED;
}
/*NUMPY_API
* Builds a set of strides which are the same as the strides of an
* output array created using the NPY_ITER_ALLOCATE flag, where NULL
* was passed for op_axes. This is for data packed contiguously,
* but not necessarily in C or Fortran order. This should be used
* together with NpyIter_GetShape and NpyIter_GetNDim.
*
* A use case for this function is to match the shape and layout of
* the iterator and tack on one or more dimensions. For example,
* in order to generate a vector per input value for a numerical gradient,
* you pass in ndim*itemsize for itemsize, then add another dimension to
* the end with size ndim and stride itemsize. To do the Hessian matrix,
* you do the same thing but add two dimensions, or take advantage of
* the symmetry and pack it into 1 dimension with a particular encoding.
*
* This function may only be called if the iterator is tracking a multi-index
* and if NPY_ITER_DONT_NEGATE_STRIDES was used to prevent an axis from
* being iterated in reverse order.
*
* If an array is created with this method, simply adding 'itemsize'
* for each iteration will traverse the new array matching the
* iterator.
*
* Returns NPY_SUCCEED or NPY_FAIL.
*/
NPY_NO_EXPORT int
NpyIter_CreateCompatibleStrides(NpyIter *iter,
npy_intp itemsize, npy_intp *outstrides)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int idim, ndim = NIT_NDIM(iter);
int nop = NIT_NOP(iter);
npy_intp sizeof_axisdata;
NpyIter_AxisData *axisdata;
npy_int8 *perm;
if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
PyErr_SetString(PyExc_RuntimeError,
"Iterator CreateCompatibleStrides may only be called "
"if a multi-index is being tracked");
return NPY_FAIL;
}
axisdata = NIT_AXISDATA(iter);
sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
perm = NIT_PERM(iter);
for(idim = 0; idim < ndim; ++idim) {
npy_int8 p = perm[idim];
if (p < 0) {
PyErr_SetString(PyExc_RuntimeError,
"Iterator CreateCompatibleStrides may only be called "
"if DONT_NEGATE_STRIDES was used to prevent reverse "
"iteration of an axis");
return NPY_FAIL;
}
else {
outstrides[ndim-p-1] = itemsize;
}
itemsize *= NAD_SHAPE(axisdata);
NIT_ADVANCE_AXISDATA(axisdata, 1);
}
return NPY_SUCCEED;
}
/*NUMPY_API
* Get the array of data pointers (1 per object being iterated)
*
* This function may be safely called without holding the Python GIL.
*/
NPY_NO_EXPORT char **
NpyIter_GetDataPtrArray(NpyIter *iter)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
/*int ndim = NIT_NDIM(iter);*/
int nop = NIT_NOP(iter);
if (itflags&NPY_ITFLAG_BUFFER) {
NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
return NBF_PTRS(bufferdata);
}
else {
NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
return NAD_PTRS(axisdata);
}
}
/*NUMPY_API
* Get the array of data pointers (1 per object being iterated),
* directly into the arrays (never pointing to a buffer), for starting
* unbuffered iteration. This always returns the addresses for the
* iterator position as reset to iterator index 0.
*
* These pointers are different from the pointers accepted by
* NpyIter_ResetBasePointers, because the direction along some
* axes may have been reversed, requiring base offsets.
*
* This function may be safely called without holding the Python GIL.
*/
NPY_NO_EXPORT char **
NpyIter_GetInitialDataPtrArray(NpyIter *iter)
{
/*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
/*int ndim = NIT_NDIM(iter);*/
int nop = NIT_NOP(iter);
return NIT_RESETDATAPTR(iter);
}
/*NUMPY_API
* Get the array of data type pointers (1 per object being iterated)
*/
NPY_NO_EXPORT PyArray_Descr **
NpyIter_GetDescrArray(NpyIter *iter)
{
/*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
/*int ndim = NIT_NDIM(iter);*/
/*int nop = NIT_NOP(iter);*/
return NIT_DTYPES(iter);
}
/*NUMPY_API
* Get the array of objects being iterated
*/
NPY_NO_EXPORT PyArrayObject **
NpyIter_GetOperandArray(NpyIter *iter)
{
/*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
/*int ndim = NIT_NDIM(iter);*/
int nop = NIT_NOP(iter);
return NIT_OPERANDS(iter);
}
/*NUMPY_API
* Returns a view to the i-th object with the iterator's internal axes
*/
NPY_NO_EXPORT PyArrayObject *
NpyIter_GetIterView(NpyIter *iter, npy_intp i)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int idim, ndim = NIT_NDIM(iter);
int nop = NIT_NOP(iter);
npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
PyArrayObject *obj, *view;
PyArray_Descr *dtype;
char *dataptr;
NpyIter_AxisData *axisdata;
npy_intp sizeof_axisdata;
int writeable;
if (i < 0) {
PyErr_SetString(PyExc_IndexError,
"index provided for an iterator view was out of bounds");
return NULL;
}
/* Don't provide views if buffering is enabled */
if (itflags&NPY_ITFLAG_BUFFER) {
PyErr_SetString(PyExc_ValueError,
"cannot provide an iterator view when buffering is enabled");
return NULL;
}
obj = NIT_OPERANDS(iter)[i];
dtype = PyArray_DESCR(obj);
writeable = NIT_OPITFLAGS(iter)[i]&NPY_OP_ITFLAG_WRITE;
dataptr = NIT_RESETDATAPTR(iter)[i];
axisdata = NIT_AXISDATA(iter);
sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
/* Retrieve the shape and strides from the axisdata */
for (idim = 0; idim < ndim; ++idim) {
shape[ndim-idim-1] = NAD_SHAPE(axisdata);
strides[ndim-idim-1] = NAD_STRIDES(axisdata)[i];
NIT_ADVANCE_AXISDATA(axisdata, 1);
}
Py_INCREF(dtype);
view = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype, ndim,
shape, strides, dataptr,
writeable ? NPY_ARRAY_WRITEABLE : 0,
NULL);
if (view == NULL) {
return NULL;
}
/* Tell the view who owns the data */
Py_INCREF(obj);
if (PyArray_SetBaseObject(view, (PyObject *)obj) < 0) {
Py_DECREF(view);
return NULL;
}
/* Make sure all the flags are good */
PyArray_UpdateFlags(view, NPY_ARRAY_UPDATE_ALL);
return view;
}
/*NUMPY_API
* Get a pointer to the index, if it is being tracked
*/
NPY_NO_EXPORT npy_intp *
NpyIter_GetIndexPtr(NpyIter *iter)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
/*int ndim = NIT_NDIM(iter);*/
int nop = NIT_NOP(iter);
NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
if (itflags&NPY_ITFLAG_HASINDEX) {
/* The index is just after the data pointers */
return (npy_intp*)NAD_PTRS(axisdata) + nop;
}
else {
return NULL;
}
}
/*NUMPY_API
* Gets an array of read flags (1 per object being iterated)
*/
NPY_NO_EXPORT void
NpyIter_GetReadFlags(NpyIter *iter, char *outreadflags)
{
/*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
/*int ndim = NIT_NDIM(iter);*/
int iop, nop = NIT_NOP(iter);
npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
for (iop = 0; iop < nop; ++iop) {
outreadflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_READ) != 0;
}
}
/*NUMPY_API
* Gets an array of write flags (1 per object being iterated)
*/
NPY_NO_EXPORT void
NpyIter_GetWriteFlags(NpyIter *iter, char *outwriteflags)
{
/*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
/*int ndim = NIT_NDIM(iter);*/
int iop, nop = NIT_NOP(iter);
npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
for (iop = 0; iop < nop; ++iop) {
outwriteflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) != 0;
}
}
/*NUMPY_API
* Get the array of strides for the inner loop (when HasExternalLoop is true)
*
* This function may be safely called without holding the Python GIL.
*/
NPY_NO_EXPORT npy_intp *
NpyIter_GetInnerStrideArray(NpyIter *iter)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
/*int ndim = NIT_NDIM(iter);*/
int nop = NIT_NOP(iter);
if (itflags&NPY_ITFLAG_BUFFER) {
NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
return NBF_STRIDES(data);
}
else {
NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
return NAD_STRIDES(axisdata);
}
}
/*NUMPY_API
* Gets the array of strides for the specified axis.
* If the iterator is tracking a multi-index, gets the strides
* for the axis specified, otherwise gets the strides for
* the iteration axis as Fortran order (fastest-changing axis first).
*
* Returns NULL if an error occurs.
*/
NPY_NO_EXPORT npy_intp *
NpyIter_GetAxisStrideArray(NpyIter *iter, int axis)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int idim, ndim = NIT_NDIM(iter);
int nop = NIT_NOP(iter);
npy_int8 *perm = NIT_PERM(iter);
NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
if (axis < 0 || axis >= ndim) {
PyErr_SetString(PyExc_ValueError,
"axis out of bounds in iterator GetStrideAxisArray");
return NULL;
}
if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
/* Reverse axis, since the iterator treats them that way */
axis = ndim-1-axis;
/* First find the axis in question */
for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
if (perm[idim] == axis || -1 - perm[idim] == axis) {
return NAD_STRIDES(axisdata);
}
}
}
else {
return NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, axis));
}
PyErr_SetString(PyExc_RuntimeError,
"internal error in iterator perm");
return NULL;
}
/*NUMPY_API
* Get an array of strides which are fixed. Any strides which may
* change during iteration receive the value NPY_MAX_INTP. Once
* the iterator is ready to iterate, call this to get the strides
* which will always be fixed in the inner loop, then choose optimized
* inner loop functions which take advantage of those fixed strides.
*
* This function may be safely called without holding the Python GIL.
*/
NPY_NO_EXPORT void
NpyIter_GetInnerFixedStrideArray(NpyIter *iter, npy_intp *out_strides)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int ndim = NIT_NDIM(iter);
int iop, nop = NIT_NOP(iter);
NpyIter_AxisData *axisdata0 = NIT_AXISDATA(iter);
npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
if (itflags&NPY_ITFLAG_BUFFER) {
NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
npy_intp stride, *strides = NBF_STRIDES(data),
*ad_strides = NAD_STRIDES(axisdata0);
PyArray_Descr **dtypes = NIT_DTYPES(iter);
for (iop = 0; iop < nop; ++iop) {
stride = strides[iop];
/*
* Operands which are always/never buffered have fixed strides,
* and everything has fixed strides when ndim is 0 or 1
*/
if (ndim <= 1 || (op_itflags[iop]&
(NPY_OP_ITFLAG_CAST|NPY_OP_ITFLAG_BUFNEVER))) {
out_strides[iop] = stride;
}
/* If it's a reduction, 0-stride inner loop may have fixed stride */
else if (stride == 0 && (itflags&NPY_ITFLAG_REDUCE)) {
/* If it's a reduction operand, definitely fixed stride */
if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
out_strides[iop] = stride;
}
/*
* Otherwise it's guaranteed to be a fixed stride if the
* stride is 0 for all the dimensions.
*/
else {
NpyIter_AxisData *axisdata = axisdata0;
int idim;
for (idim = 0; idim < ndim; ++idim) {
if (NAD_STRIDES(axisdata)[iop] != 0) {
break;
}
NIT_ADVANCE_AXISDATA(axisdata, 1);
}
/* If all the strides were 0, the stride won't change */
if (idim == ndim) {
out_strides[iop] = stride;
}
else {
out_strides[iop] = NPY_MAX_INTP;
}
}
}
/*
* Inner loop contiguous array means its stride won't change when
* switching between buffering and not buffering
*/
else if (ad_strides[iop] == dtypes[iop]->elsize) {
out_strides[iop] = ad_strides[iop];
}
/*
* Otherwise the strides can change if the operand is sometimes
* buffered, sometimes not.
*/
else {
out_strides[iop] = NPY_MAX_INTP;
}
}
}
else {
/* If there's no buffering, the strides are always fixed */
memcpy(out_strides, NAD_STRIDES(axisdata0), nop*NPY_SIZEOF_INTP);
}
}
/*NUMPY_API
* Get a pointer to the size of the inner loop (when HasExternalLoop is true)
*
* This function may be safely called without holding the Python GIL.
*/
NPY_NO_EXPORT npy_intp *
NpyIter_GetInnerLoopSizePtr(NpyIter *iter)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
/*int ndim = NIT_NDIM(iter);*/
int nop = NIT_NOP(iter);
if (itflags&NPY_ITFLAG_BUFFER) {
NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
return &NBF_SIZE(data);
}
else {
NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
return &NAD_SHAPE(axisdata);
}
}
/*NUMPY_API
* For debugging
*/
NPY_NO_EXPORT void
NpyIter_DebugPrint(NpyIter *iter)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int idim, ndim = NIT_NDIM(iter);
int iop, nop = NIT_NOP(iter);
NpyIter_AxisData *axisdata;
npy_intp sizeof_axisdata;
NPY_ALLOW_C_API_DEF
NPY_ALLOW_C_API
printf("\n------ BEGIN ITERATOR DUMP ------\n");
printf("| Iterator Address: %p\n", (void *)iter);
printf("| ItFlags: ");
if (itflags&NPY_ITFLAG_IDENTPERM)
printf("IDENTPERM ");
if (itflags&NPY_ITFLAG_NEGPERM)
printf("NEGPERM ");
if (itflags&NPY_ITFLAG_HASINDEX)
printf("HASINDEX ");
if (itflags&NPY_ITFLAG_HASMULTIINDEX)
printf("HASMULTIINDEX ");
if (itflags&NPY_ITFLAG_FORCEDORDER)
printf("FORCEDORDER ");
if (itflags&NPY_ITFLAG_EXLOOP)
printf("EXLOOP ");
if (itflags&NPY_ITFLAG_RANGE)
printf("RANGE ");
if (itflags&NPY_ITFLAG_BUFFER)
printf("BUFFER ");
if (itflags&NPY_ITFLAG_GROWINNER)
printf("GROWINNER ");
if (itflags&NPY_ITFLAG_ONEITERATION)
printf("ONEITERATION ");
if (itflags&NPY_ITFLAG_DELAYBUF)
printf("DELAYBUF ");
if (itflags&NPY_ITFLAG_NEEDSAPI)
printf("NEEDSAPI ");
if (itflags&NPY_ITFLAG_REDUCE)
printf("REDUCE ");
if (itflags&NPY_ITFLAG_REUSE_REDUCE_LOOPS)
printf("REUSE_REDUCE_LOOPS ");
printf("\n");
printf("| NDim: %d\n", (int)ndim);
printf("| NOp: %d\n", (int)nop);
if (NIT_MASKOP(iter) >= 0) {
printf("| MaskOp: %d\n", (int)NIT_MASKOP(iter));
}
printf("| IterSize: %d\n", (int)NIT_ITERSIZE(iter));
printf("| IterStart: %d\n", (int)NIT_ITERSTART(iter));
printf("| IterEnd: %d\n", (int)NIT_ITEREND(iter));
printf("| IterIndex: %d\n", (int)NIT_ITERINDEX(iter));
printf("| Iterator SizeOf: %d\n",
(int)NIT_SIZEOF_ITERATOR(itflags, ndim, nop));
printf("| BufferData SizeOf: %d\n",
(int)NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop));
printf("| AxisData SizeOf: %d\n",
(int)NIT_AXISDATA_SIZEOF(itflags, ndim, nop));
printf("|\n");
printf("| Perm: ");
for (idim = 0; idim < ndim; ++idim) {
printf("%d ", (int)NIT_PERM(iter)[idim]);
}
printf("\n");
printf("| DTypes: ");
for (iop = 0; iop < nop; ++iop) {
printf("%p ", (void *)NIT_DTYPES(iter)[iop]);
}
printf("\n");
printf("| DTypes: ");
for (iop = 0; iop < nop; ++iop) {
if (NIT_DTYPES(iter)[iop] != NULL)
PyObject_Print((PyObject*)NIT_DTYPES(iter)[iop], stdout, 0);
else
printf("(nil) ");
printf(" ");
}
printf("\n");
printf("| InitDataPtrs: ");
for (iop = 0; iop < nop; ++iop) {
printf("%p ", (void *)NIT_RESETDATAPTR(iter)[iop]);
}
printf("\n");
printf("| BaseOffsets: ");
for (iop = 0; iop < nop; ++iop) {
printf("%i ", (int)NIT_BASEOFFSETS(iter)[iop]);
}
printf("\n");
if (itflags&NPY_ITFLAG_HASINDEX) {
printf("| InitIndex: %d\n",
(int)(npy_intp)NIT_RESETDATAPTR(iter)[nop]);
}
printf("| Operands: ");
for (iop = 0; iop < nop; ++iop) {
printf("%p ", (void *)NIT_OPERANDS(iter)[iop]);
}
printf("\n");
printf("| Operand DTypes: ");
for (iop = 0; iop < nop; ++iop) {
PyArray_Descr *dtype;
if (NIT_OPERANDS(iter)[iop] != NULL) {
dtype = PyArray_DESCR(NIT_OPERANDS(iter)[iop]);
if (dtype != NULL)
PyObject_Print((PyObject *)dtype, stdout, 0);
else
printf("(nil) ");
}
else {
printf("(op nil) ");
}
printf(" ");
}
printf("\n");
printf("| OpItFlags:\n");
for (iop = 0; iop < nop; ++iop) {
printf("| Flags[%d]: ", (int)iop);
if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_READ)
printf("READ ");
if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_WRITE)
printf("WRITE ");
if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_CAST)
printf("CAST ");
if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_BUFNEVER)
printf("BUFNEVER ");
if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_ALIGNED)
printf("ALIGNED ");
if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_REDUCE)
printf("REDUCE ");
if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_VIRTUAL)
printf("VIRTUAL ");
if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_WRITEMASKED)
printf("WRITEMASKED ");
printf("\n");
}
printf("|\n");
if (itflags&NPY_ITFLAG_BUFFER) {
NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
printf("| BufferData:\n");
printf("| BufferSize: %d\n", (int)NBF_BUFFERSIZE(bufferdata));
printf("| Size: %d\n", (int)NBF_SIZE(bufferdata));
printf("| BufIterEnd: %d\n", (int)NBF_BUFITEREND(bufferdata));
if (itflags&NPY_ITFLAG_REDUCE) {
printf("| REDUCE Pos: %d\n",
(int)NBF_REDUCE_POS(bufferdata));
printf("| REDUCE OuterSize: %d\n",
(int)NBF_REDUCE_OUTERSIZE(bufferdata));
printf("| REDUCE OuterDim: %d\n",
(int)NBF_REDUCE_OUTERDIM(bufferdata));
}
printf("| Strides: ");
for (iop = 0; iop < nop; ++iop)
printf("%d ", (int)NBF_STRIDES(bufferdata)[iop]);
printf("\n");
/* Print the fixed strides when there's no inner loop */
if (itflags&NPY_ITFLAG_EXLOOP) {
npy_intp fixedstrides[NPY_MAXDIMS];
printf("| Fixed Strides: ");
NpyIter_GetInnerFixedStrideArray(iter, fixedstrides);
for (iop = 0; iop < nop; ++iop)
printf("%d ", (int)fixedstrides[iop]);
printf("\n");
}
printf("| Ptrs: ");
for (iop = 0; iop < nop; ++iop)
printf("%p ", (void *)NBF_PTRS(bufferdata)[iop]);
printf("\n");
if (itflags&NPY_ITFLAG_REDUCE) {
printf("| REDUCE Outer Strides: ");
for (iop = 0; iop < nop; ++iop)
printf("%d ", (int)NBF_REDUCE_OUTERSTRIDES(bufferdata)[iop]);
printf("\n");
printf("| REDUCE Outer Ptrs: ");
for (iop = 0; iop < nop; ++iop)
printf("%p ", (void *)NBF_REDUCE_OUTERPTRS(bufferdata)[iop]);
printf("\n");
}
printf("| ReadTransferFn: ");
for (iop = 0; iop < nop; ++iop)
printf("%p ", (void *)NBF_READTRANSFERFN(bufferdata)[iop]);
printf("\n");
printf("| ReadTransferData: ");
for (iop = 0; iop < nop; ++iop)
printf("%p ", (void *)NBF_READTRANSFERDATA(bufferdata)[iop]);
printf("\n");
printf("| WriteTransferFn: ");
for (iop = 0; iop < nop; ++iop)
printf("%p ", (void *)NBF_WRITETRANSFERFN(bufferdata)[iop]);
printf("\n");
printf("| WriteTransferData: ");
for (iop = 0; iop < nop; ++iop)
printf("%p ", (void *)NBF_WRITETRANSFERDATA(bufferdata)[iop]);
printf("\n");
printf("| Buffers: ");
for (iop = 0; iop < nop; ++iop)
printf("%p ", (void *)NBF_BUFFERS(bufferdata)[iop]);
printf("\n");
printf("|\n");
}
axisdata = NIT_AXISDATA(iter);
sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
printf("| AxisData[%d]:\n", (int)idim);
printf("| Shape: %d\n", (int)NAD_SHAPE(axisdata));
printf("| Index: %d\n", (int)NAD_INDEX(axisdata));
printf("| Strides: ");
for (iop = 0; iop < nop; ++iop) {
printf("%d ", (int)NAD_STRIDES(axisdata)[iop]);
}
printf("\n");
if (itflags&NPY_ITFLAG_HASINDEX) {
printf("| Index Stride: %d\n", (int)NAD_STRIDES(axisdata)[nop]);
}
printf("| Ptrs: ");
for (iop = 0; iop < nop; ++iop) {
printf("%p ", (void *)NAD_PTRS(axisdata)[iop]);
}
printf("\n");
if (itflags&NPY_ITFLAG_HASINDEX) {
printf("| Index Value: %d\n",
(int)((npy_intp*)NAD_PTRS(axisdata))[nop]);
}
}
printf("------- END ITERATOR DUMP -------\n");
fflush(stdout);
NPY_DISABLE_C_API
}
NPY_NO_EXPORT void
npyiter_coalesce_axes(NpyIter *iter)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int idim, ndim = NIT_NDIM(iter);
int nop = NIT_NOP(iter);
npy_intp istrides, nstrides = NAD_NSTRIDES();
NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
NpyIter_AxisData *ad_compress;
npy_intp new_ndim = 1;
/* The HASMULTIINDEX or IDENTPERM flags do not apply after coalescing */
NIT_ITFLAGS(iter) &= ~(NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_HASMULTIINDEX);
axisdata = NIT_AXISDATA(iter);
ad_compress = axisdata;
for (idim = 0; idim < ndim-1; ++idim) {
int can_coalesce = 1;
npy_intp shape0 = NAD_SHAPE(ad_compress);
npy_intp shape1 = NAD_SHAPE(NIT_INDEX_AXISDATA(axisdata, 1));
npy_intp *strides0 = NAD_STRIDES(ad_compress);
npy_intp *strides1 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, 1));
/* Check that all the axes can be coalesced */
for (istrides = 0; istrides < nstrides; ++istrides) {
if (!((shape0 == 1 && strides0[istrides] == 0) ||
(shape1 == 1 && strides1[istrides] == 0)) &&
(strides0[istrides]*shape0 != strides1[istrides])) {
can_coalesce = 0;
break;
}
}
if (can_coalesce) {
npy_intp *strides = NAD_STRIDES(ad_compress);
NIT_ADVANCE_AXISDATA(axisdata, 1);
NAD_SHAPE(ad_compress) *= NAD_SHAPE(axisdata);
for (istrides = 0; istrides < nstrides; ++istrides) {
if (strides[istrides] == 0) {
strides[istrides] = NAD_STRIDES(axisdata)[istrides];
}
}
}
else {
NIT_ADVANCE_AXISDATA(axisdata, 1);
NIT_ADVANCE_AXISDATA(ad_compress, 1);
if (ad_compress != axisdata) {
memcpy(ad_compress, axisdata, sizeof_axisdata);
}
++new_ndim;
}
}
/*
* If the number of axes shrunk, reset the perm and
* compress the data into the new layout.
*/
if (new_ndim < ndim) {
npy_int8 *perm = NIT_PERM(iter);
/* Reset to an identity perm */
for (idim = 0; idim < new_ndim; ++idim) {
perm[idim] = (npy_int8)idim;
}
NIT_NDIM(iter) = new_ndim;
}
}
/*
* If errmsg is non-NULL, it should point to a variable which will
* receive the error message, and no Python exception will be set.
* This is so that the function can be called from code not holding
* the GIL.
*/
NPY_NO_EXPORT int
npyiter_allocate_buffers(NpyIter *iter, char **errmsg)
{
/*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
/*int ndim = NIT_NDIM(iter);*/
int iop = 0, nop = NIT_NOP(iter);
npy_intp i;
npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
PyArray_Descr **op_dtype = NIT_DTYPES(iter);
npy_intp buffersize = NBF_BUFFERSIZE(bufferdata);
char *buffer, **buffers = NBF_BUFFERS(bufferdata);
for (iop = 0; iop < nop; ++iop) {
npyiter_opitflags flags = op_itflags[iop];
/*
* If we have determined that a buffer may be needed,
* allocate one.
*/
if (!(flags&NPY_OP_ITFLAG_BUFNEVER)) {
npy_intp itemsize = op_dtype[iop]->elsize;
buffer = PyArray_malloc(itemsize*buffersize);
if (buffer == NULL) {
if (errmsg == NULL) {
PyErr_NoMemory();
}
else {
*errmsg = "out of memory";
}
goto fail;
}
buffers[iop] = buffer;
}
}
return 1;
fail:
for (i = 0; i < iop; ++i) {
if (buffers[i] != NULL) {
PyArray_free(buffers[i]);
buffers[i] = NULL;
}
}
return 0;
}
/*
* This sets the AXISDATA portion of the iterator to the specified
* iterindex, updating the pointers as well. This function does
* no error checking.
*/
NPY_NO_EXPORT void
npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int idim, ndim = NIT_NDIM(iter);
int nop = NIT_NOP(iter);
char **dataptr;
NpyIter_AxisData *axisdata;
npy_intp sizeof_axisdata;
npy_intp istrides, nstrides, i, shape;
axisdata = NIT_AXISDATA(iter);
sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
nstrides = NAD_NSTRIDES();
NIT_ITERINDEX(iter) = iterindex;
ndim = ndim ? ndim : 1;
if (iterindex == 0) {
dataptr = NIT_RESETDATAPTR(iter);
for (idim = 0; idim < ndim; ++idim) {
char **ptrs;
NAD_INDEX(axisdata) = 0;
ptrs = NAD_PTRS(axisdata);
for (istrides = 0; istrides < nstrides; ++istrides) {
ptrs[istrides] = dataptr[istrides];
}
NIT_ADVANCE_AXISDATA(axisdata, 1);
}
}
else {
/*
* Set the multi-index, from the fastest-changing to the
* slowest-changing.
*/
axisdata = NIT_AXISDATA(iter);
shape = NAD_SHAPE(axisdata);
i = iterindex;
iterindex /= shape;
NAD_INDEX(axisdata) = i - iterindex * shape;
for (idim = 0; idim < ndim-1; ++idim) {
NIT_ADVANCE_AXISDATA(axisdata, 1);
shape = NAD_SHAPE(axisdata);
i = iterindex;
iterindex /= shape;
NAD_INDEX(axisdata) = i - iterindex * shape;
}
dataptr = NIT_RESETDATAPTR(iter);
/*
* Accumulate the successive pointers with their
* offsets in the opposite order, starting from the
* original data pointers.
*/
for (idim = 0; idim < ndim; ++idim) {
npy_intp *strides;
char **ptrs;
strides = NAD_STRIDES(axisdata);
ptrs = NAD_PTRS(axisdata);
i = NAD_INDEX(axisdata);
for (istrides = 0; istrides < nstrides; ++istrides) {
ptrs[istrides] = dataptr[istrides] + i*strides[istrides];
}
dataptr = ptrs;
NIT_ADVANCE_AXISDATA(axisdata, -1);
}
}
}
/*
* This gets called after the buffers have been exhausted, and
* their data needs to be written back to the arrays. The multi-index
* must be positioned for the beginning of the buffer.
*/
NPY_NO_EXPORT void
npyiter_copy_from_buffers(NpyIter *iter)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int ndim = NIT_NDIM(iter);
int iop, nop = NIT_NOP(iter);
int maskop = NIT_MASKOP(iter);
npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
NpyIter_AxisData *axisdata = NIT_AXISDATA(iter),
*reduce_outeraxisdata = NULL;
PyArray_Descr **dtypes = NIT_DTYPES(iter);
npy_intp transfersize = NBF_SIZE(bufferdata);
npy_intp *strides = NBF_STRIDES(bufferdata),
*ad_strides = NAD_STRIDES(axisdata);
npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
char **ad_ptrs = NAD_PTRS(axisdata);
char **buffers = NBF_BUFFERS(bufferdata);
char *buffer;
npy_intp reduce_outerdim = 0;
npy_intp *reduce_outerstrides = NULL;
PyArray_StridedUnaryOp *stransfer = NULL;
NpyAuxData *transferdata = NULL;
npy_intp axisdata_incr = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) /
NPY_SIZEOF_INTP;
/* If we're past the end, nothing to copy */
if (NBF_SIZE(bufferdata) == 0) {
return;
}
NPY_IT_DBG_PRINT("Iterator: Copying buffers to outputs\n");
if (itflags&NPY_ITFLAG_REDUCE) {
reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
transfersize *= NBF_REDUCE_OUTERSIZE(bufferdata);
}
for (iop = 0; iop < nop; ++iop) {
stransfer = NBF_WRITETRANSFERFN(bufferdata)[iop];
transferdata = NBF_WRITETRANSFERDATA(bufferdata)[iop];
buffer = buffers[iop];
/*
* Copy the data back to the arrays. If the type has refs,
* this function moves them so the buffer's refs are released.
*
* The flag USINGBUFFER is set when the buffer was used, so
* only copy back when this flag is on.
*/
if ((stransfer != NULL) &&
(op_itflags[iop]&(NPY_OP_ITFLAG_WRITE|NPY_OP_ITFLAG_USINGBUFFER))
== (NPY_OP_ITFLAG_WRITE|NPY_OP_ITFLAG_USINGBUFFER)) {
npy_intp op_transfersize;
npy_intp src_stride, *dst_strides, *dst_coords, *dst_shape;
int ndim_transfer;
NPY_IT_DBG_PRINT1("Iterator: Operand %d was buffered\n",
(int)iop);
/*
* If this operand is being reduced in the inner loop,
* its buffering stride was set to zero, and just
* one element was copied.
*/
if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
if (strides[iop] == 0) {
if (reduce_outerstrides[iop] == 0) {
op_transfersize = 1;
src_stride = 0;
dst_strides = &src_stride;
dst_coords = &NAD_INDEX(reduce_outeraxisdata);
dst_shape = &NAD_SHAPE(reduce_outeraxisdata);
ndim_transfer = 1;
}
else {
op_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata);
src_stride = reduce_outerstrides[iop];
dst_strides =
&NAD_STRIDES(reduce_outeraxisdata)[iop];
dst_coords = &NAD_INDEX(reduce_outeraxisdata);
dst_shape = &NAD_SHAPE(reduce_outeraxisdata);
ndim_transfer = ndim - reduce_outerdim;
}
}
else {
if (reduce_outerstrides[iop] == 0) {
op_transfersize = NBF_SIZE(bufferdata);
src_stride = strides[iop];
dst_strides = &ad_strides[iop];
dst_coords = &NAD_INDEX(axisdata);
dst_shape = &NAD_SHAPE(axisdata);
ndim_transfer = reduce_outerdim ?
reduce_outerdim : 1;
}
else {
op_transfersize = transfersize;
src_stride = strides[iop];
dst_strides = &ad_strides[iop];
dst_coords = &NAD_INDEX(axisdata);
dst_shape = &NAD_SHAPE(axisdata);
ndim_transfer = ndim;
}
}
}
else {
op_transfersize = transfersize;
src_stride = strides[iop];
dst_strides = &ad_strides[iop];
dst_coords = &NAD_INDEX(axisdata);
dst_shape = &NAD_SHAPE(axisdata);
ndim_transfer = ndim;
}
NPY_IT_DBG_PRINT2("Iterator: Copying buffer to "
"operand %d (%d items)\n",
(int)iop, (int)op_transfersize);
/* WRITEMASKED operand */
if (op_itflags[iop] & NPY_OP_ITFLAG_WRITEMASKED) {
npy_bool *maskptr;
/*
* The mask pointer may be in the buffer or in
* the array, detect which one.
*/
if ((op_itflags[maskop]&NPY_OP_ITFLAG_USINGBUFFER) != 0) {
maskptr = (npy_bool *)buffers[maskop];
}
else {
maskptr = (npy_bool *)ad_ptrs[maskop];
}
PyArray_TransferMaskedStridedToNDim(ndim_transfer,
ad_ptrs[iop], dst_strides, axisdata_incr,
buffer, src_stride,
maskptr, strides[maskop],
dst_coords, axisdata_incr,
dst_shape, axisdata_incr,
op_transfersize, dtypes[iop]->elsize,
(PyArray_MaskedStridedUnaryOp *)stransfer,
transferdata);
}
/* Regular operand */
else {
PyArray_TransferStridedToNDim(ndim_transfer,
ad_ptrs[iop], dst_strides, axisdata_incr,
buffer, src_stride,
dst_coords, axisdata_incr,
dst_shape, axisdata_incr,
op_transfersize, dtypes[iop]->elsize,
stransfer,
transferdata);
}
}
/* If there's no copy back, we may have to decrement refs. In
* this case, the transfer function has a 'decsrcref' transfer
* function, so we can use it to do the decrement.
*
* The flag USINGBUFFER is set when the buffer was used, so
* only decrement refs when this flag is on.
*/
else if (stransfer != NULL &&
(op_itflags[iop]&NPY_OP_ITFLAG_USINGBUFFER) != 0) {
NPY_IT_DBG_PRINT1("Iterator: Freeing refs and zeroing buffer "
"of operand %d\n", (int)iop);
/* Decrement refs */
stransfer(NULL, 0, buffer, dtypes[iop]->elsize,
transfersize, dtypes[iop]->elsize,
transferdata);
/*
* Zero out the memory for safety. For instance,
* if during iteration some Python code copied an
* array pointing into the buffer, it will get None
* values for its references after this.
*/
memset(buffer, 0, dtypes[iop]->elsize*transfersize);
}
}
NPY_IT_DBG_PRINT("Iterator: Finished copying buffers to outputs\n");
}
/*
* This gets called after the iterator has been positioned to a multi-index
* for the start of a buffer. It decides which operands need a buffer,
* and copies the data into the buffers.
*/
NPY_NO_EXPORT void
npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int ndim = NIT_NDIM(iter);
int iop, nop = NIT_NOP(iter);
npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
NpyIter_AxisData *axisdata = NIT_AXISDATA(iter),
*reduce_outeraxisdata = NULL;
PyArray_Descr **dtypes = NIT_DTYPES(iter);
PyArrayObject **operands = NIT_OPERANDS(iter);
npy_intp *strides = NBF_STRIDES(bufferdata),
*ad_strides = NAD_STRIDES(axisdata);
npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
char **ptrs = NBF_PTRS(bufferdata), **ad_ptrs = NAD_PTRS(axisdata);
char **buffers = NBF_BUFFERS(bufferdata);
npy_intp iterindex, iterend, transfersize,
singlestridesize, reduce_innersize = 0, reduce_outerdim = 0;
int is_onestride = 0, any_buffered = 0;
npy_intp *reduce_outerstrides = NULL;
char **reduce_outerptrs = NULL;
PyArray_StridedUnaryOp *stransfer = NULL;
NpyAuxData *transferdata = NULL;
/*
* Have to get this flag before npyiter_checkreducesize sets
* it for the next iteration.
*/
npy_bool reuse_reduce_loops = (prev_dataptrs != NULL) &&
((itflags&NPY_ITFLAG_REUSE_REDUCE_LOOPS) != 0);
npy_intp axisdata_incr = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) /
NPY_SIZEOF_INTP;
NPY_IT_DBG_PRINT("Iterator: Copying inputs to buffers\n");
/* Calculate the size if using any buffers */
iterindex = NIT_ITERINDEX(iter);
iterend = NIT_ITEREND(iter);
transfersize = NBF_BUFFERSIZE(bufferdata);
if (transfersize > iterend - iterindex) {
transfersize = iterend - iterindex;
}
/* If last time around, the reduce loop structure was full, we reuse it */
if (reuse_reduce_loops) {
npy_intp full_transfersize, prev_reduce_outersize;
prev_reduce_outersize = NBF_REDUCE_OUTERSIZE(bufferdata);
reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
reduce_innersize = NBF_SIZE(bufferdata);
NBF_REDUCE_POS(bufferdata) = 0;
/*
* Try to do make the outersize as big as possible. This allows
* it to shrink when processing the last bit of the outer reduce loop,
* then grow again at the beginnning of the next outer reduce loop.
*/
NBF_REDUCE_OUTERSIZE(bufferdata) = (NAD_SHAPE(reduce_outeraxisdata)-
NAD_INDEX(reduce_outeraxisdata));
full_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata)*reduce_innersize;
/* If the full transfer size doesn't fit in the buffer, truncate it */
if (full_transfersize > NBF_BUFFERSIZE(bufferdata)) {
NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize;
transfersize = NBF_REDUCE_OUTERSIZE(bufferdata)*reduce_innersize;
}
else {
transfersize = full_transfersize;
}
if (prev_reduce_outersize < NBF_REDUCE_OUTERSIZE(bufferdata)) {
/*
* If the previous time around less data was copied it may not
* be safe to reuse the buffers even if the pointers match.
*/
reuse_reduce_loops = 0;
}
NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
NPY_IT_DBG_PRINT3("Reused reduce transfersize: %d innersize: %d "
"itersize: %d\n",
(int)transfersize,
(int)reduce_innersize,
(int)NpyIter_GetIterSize(iter));
NPY_IT_DBG_PRINT1("Reduced reduce outersize: %d",
(int)NBF_REDUCE_OUTERSIZE(bufferdata));
}
/*
* If there are any reduction operands, we may have to make
* the size smaller so we don't copy the same value into
* a buffer twice, as the buffering does not have a mechanism
* to combine values itself.
*/
else if (itflags&NPY_ITFLAG_REDUCE) {
NPY_IT_DBG_PRINT("Iterator: Calculating reduce loops\n");
transfersize = npyiter_checkreducesize(iter, transfersize,
&reduce_innersize,
&reduce_outerdim);
NPY_IT_DBG_PRINT3("Reduce transfersize: %d innersize: %d "
"itersize: %d\n",
(int)transfersize,
(int)reduce_innersize,
(int)NpyIter_GetIterSize(iter));
reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
NBF_SIZE(bufferdata) = reduce_innersize;
NBF_REDUCE_POS(bufferdata) = 0;
NBF_REDUCE_OUTERDIM(bufferdata) = reduce_outerdim;
NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
if (reduce_innersize == 0) {
NBF_REDUCE_OUTERSIZE(bufferdata) = 0;
return;
}
else {
NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize;
}
}
else {
NBF_SIZE(bufferdata) = transfersize;
NBF_BUFITEREND(bufferdata) = iterindex + transfersize;
}
/* Calculate the maximum size if using a single stride and no buffers */
singlestridesize = NAD_SHAPE(axisdata)-NAD_INDEX(axisdata);
if (singlestridesize > iterend - iterindex) {
singlestridesize = iterend - iterindex;
}
if (singlestridesize >= transfersize) {
is_onestride = 1;
}
for (iop = 0; iop < nop; ++iop) {
/*
* If the buffer is write-only, these two are NULL, and the buffer
* pointers will be set up but the read copy won't be done
*/
stransfer = NBF_READTRANSFERFN(bufferdata)[iop];
transferdata = NBF_READTRANSFERDATA(bufferdata)[iop];
switch (op_itflags[iop]&
(NPY_OP_ITFLAG_BUFNEVER|
NPY_OP_ITFLAG_CAST|
NPY_OP_ITFLAG_REDUCE)) {
/* Never need to buffer this operand */
case NPY_OP_ITFLAG_BUFNEVER:
ptrs[iop] = ad_ptrs[iop];
if (itflags&NPY_ITFLAG_REDUCE) {
reduce_outerstrides[iop] = reduce_innersize *
strides[iop];
reduce_outerptrs[iop] = ptrs[iop];
}
/*
* Should not adjust the stride - ad_strides[iop]
* could be zero, but strides[iop] was initialized
* to the first non-trivial stride.
*/
stransfer = NULL;
/* The flag NPY_OP_ITFLAG_USINGBUFFER can be ignored here */
break;
/* Never need to buffer this operand */
case NPY_OP_ITFLAG_BUFNEVER|NPY_OP_ITFLAG_REDUCE:
ptrs[iop] = ad_ptrs[iop];
reduce_outerptrs[iop] = ptrs[iop];
reduce_outerstrides[iop] = 0;
/*
* Should not adjust the stride - ad_strides[iop]
* could be zero, but strides[iop] was initialized
* to the first non-trivial stride.
*/
stransfer = NULL;
/* The flag NPY_OP_ITFLAG_USINGBUFFER can be ignored here */
break;
/* Just a copy */
case 0:
/* Do not reuse buffer if it did not exist */
if (!(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER) &&
(prev_dataptrs != NULL)) {
prev_dataptrs[iop] = NULL;
}
/*
* No copyswap or cast was requested, so all we're
* doing is copying the data to fill the buffer and
* produce a single stride. If the underlying data
* already does that, no need to copy it.
*/
if (is_onestride) {
ptrs[iop] = ad_ptrs[iop];
strides[iop] = ad_strides[iop];
stransfer = NULL;
/* Signal that the buffer is not being used */
op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
}
/* If some other op is reduced, we have a double reduce loop */
else if ((itflags&NPY_ITFLAG_REDUCE) &&
(reduce_outerdim == 1) &&
(transfersize/reduce_innersize <=
NAD_SHAPE(reduce_outeraxisdata) -
NAD_INDEX(reduce_outeraxisdata))) {
ptrs[iop] = ad_ptrs[iop];
reduce_outerptrs[iop] = ptrs[iop];
strides[iop] = ad_strides[iop];
reduce_outerstrides[iop] =
NAD_STRIDES(reduce_outeraxisdata)[iop];
stransfer = NULL;
/* Signal that the buffer is not being used */
op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
}
else {
/* In this case, the buffer is being used */
ptrs[iop] = buffers[iop];
strides[iop] = dtypes[iop]->elsize;
if (itflags&NPY_ITFLAG_REDUCE) {
reduce_outerstrides[iop] = reduce_innersize *
strides[iop];
reduce_outerptrs[iop] = ptrs[iop];
}
/* Signal that the buffer is being used */
op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER;
}
break;
/* Just a copy, but with a reduction */
case NPY_OP_ITFLAG_REDUCE:
/* Do not reuse buffer if it did not exist */
if (!(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER) &&
(prev_dataptrs != NULL)) {
prev_dataptrs[iop] = NULL;
}
if (ad_strides[iop] == 0) {
strides[iop] = 0;
/* It's all in one stride in the inner loop dimension */
if (is_onestride) {
NPY_IT_DBG_PRINT1("reduce op %d all one stride\n", (int)iop);
ptrs[iop] = ad_ptrs[iop];
reduce_outerstrides[iop] = 0;
stransfer = NULL;
/* Signal that the buffer is not being used */
op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
}
/* It's all in one stride in the reduce outer loop */
else if ((reduce_outerdim > 0) &&
(transfersize/reduce_innersize <=
NAD_SHAPE(reduce_outeraxisdata) -
NAD_INDEX(reduce_outeraxisdata))) {
NPY_IT_DBG_PRINT1("reduce op %d all one outer stride\n",
(int)iop);
ptrs[iop] = ad_ptrs[iop];
/* Outer reduce loop advances by one item */
reduce_outerstrides[iop] =
NAD_STRIDES(reduce_outeraxisdata)[iop];
stransfer = NULL;
/* Signal that the buffer is not being used */
op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
}
/* In this case, the buffer is being used */
else {
NPY_IT_DBG_PRINT1("reduce op %d must buffer\n", (int)iop);
ptrs[iop] = buffers[iop];
/* Both outer and inner reduce loops have stride 0 */
if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
reduce_outerstrides[iop] = 0;
}
/* Outer reduce loop advances by one item */
else {
reduce_outerstrides[iop] = dtypes[iop]->elsize;
}
/* Signal that the buffer is being used */
op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER;
}
}
else if (is_onestride) {
NPY_IT_DBG_PRINT1("reduce op %d all one stride in dim 0\n", (int)iop);
ptrs[iop] = ad_ptrs[iop];
strides[iop] = ad_strides[iop];
reduce_outerstrides[iop] = 0;
stransfer = NULL;
/* Signal that the buffer is not being used */
op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
}
else {
/* It's all in one stride in the reduce outer loop */
if ((reduce_outerdim == 1) &&
(transfersize/reduce_innersize <=
NAD_SHAPE(reduce_outeraxisdata) -
NAD_INDEX(reduce_outeraxisdata))) {
ptrs[iop] = ad_ptrs[iop];
strides[iop] = ad_strides[iop];
/* Outer reduce loop advances by one item */
reduce_outerstrides[iop] =
NAD_STRIDES(reduce_outeraxisdata)[iop];
stransfer = NULL;
/* Signal that the buffer is not being used */
op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
}
/* In this case, the buffer is being used */
else {
ptrs[iop] = buffers[iop];
strides[iop] = dtypes[iop]->elsize;
if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
/* Reduction in outer reduce loop */
reduce_outerstrides[iop] = 0;
}
else {
/* Advance to next items in outer reduce loop */
reduce_outerstrides[iop] = reduce_innersize *
dtypes[iop]->elsize;
}
/* Signal that the buffer is being used */
op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER;
}
}
reduce_outerptrs[iop] = ptrs[iop];
break;
default:
/* In this case, the buffer is always being used */
any_buffered = 1;
/* Signal that the buffer is being used */
op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER;
if (!(op_itflags[iop]&NPY_OP_ITFLAG_REDUCE)) {
ptrs[iop] = buffers[iop];
strides[iop] = dtypes[iop]->elsize;
if (itflags&NPY_ITFLAG_REDUCE) {
reduce_outerstrides[iop] = reduce_innersize *
strides[iop];
reduce_outerptrs[iop] = ptrs[iop];
}
}
/* The buffer is being used with reduction */
else {
ptrs[iop] = buffers[iop];
if (ad_strides[iop] == 0) {
NPY_IT_DBG_PRINT1("cast op %d has innermost stride 0\n", (int)iop);
strides[iop] = 0;
/* Both outer and inner reduce loops have stride 0 */
if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
NPY_IT_DBG_PRINT1("cast op %d has outermost stride 0\n", (int)iop);
reduce_outerstrides[iop] = 0;
}
/* Outer reduce loop advances by one item */
else {
NPY_IT_DBG_PRINT1("cast op %d has outermost stride !=0\n", (int)iop);
reduce_outerstrides[iop] = dtypes[iop]->elsize;
}
}
else {
NPY_IT_DBG_PRINT1("cast op %d has innermost stride !=0\n", (int)iop);
strides[iop] = dtypes[iop]->elsize;
if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
NPY_IT_DBG_PRINT1("cast op %d has outermost stride 0\n", (int)iop);
/* Reduction in outer reduce loop */
reduce_outerstrides[iop] = 0;
}
else {
NPY_IT_DBG_PRINT1("cast op %d has outermost stride !=0\n", (int)iop);
/* Advance to next items in outer reduce loop */
reduce_outerstrides[iop] = reduce_innersize *
dtypes[iop]->elsize;
}
}
reduce_outerptrs[iop] = ptrs[iop];
}
break;
}
if (stransfer != NULL) {
npy_intp src_itemsize;
npy_intp op_transfersize;
npy_intp dst_stride, *src_strides, *src_coords, *src_shape;
int ndim_transfer;
npy_bool skip_transfer = 0;
src_itemsize = PyArray_DTYPE(operands[iop])->elsize;
/* If stransfer wasn't set to NULL, buffering is required */
any_buffered = 1;
/*
* If this operand is being reduced in the inner loop,
* set its buffering stride to zero, and just copy
* one element.
*/
if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
if (ad_strides[iop] == 0) {
strides[iop] = 0;
if (reduce_outerstrides[iop] == 0) {
op_transfersize = 1;
dst_stride = 0;
src_strides = &dst_stride;
src_coords = &NAD_INDEX(reduce_outeraxisdata);
src_shape = &NAD_SHAPE(reduce_outeraxisdata);
ndim_transfer = 1;
/*
* When we're reducing a single element, and
* it's still the same element, don't overwrite
* it even when reuse reduce loops is unset.
* This preserves the precision of the
* intermediate calculation.
*/
if (prev_dataptrs &&
prev_dataptrs[iop] == ad_ptrs[iop]) {
NPY_IT_DBG_PRINT1("Iterator: skipping operand %d"
" copy because it's a 1-element reduce\n",
(int)iop);
skip_transfer = 1;
}
}
else {
op_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata);
dst_stride = reduce_outerstrides[iop];
src_strides = &NAD_STRIDES(reduce_outeraxisdata)[iop];
src_coords = &NAD_INDEX(reduce_outeraxisdata);
src_shape = &NAD_SHAPE(reduce_outeraxisdata);
ndim_transfer = ndim - reduce_outerdim;
}
}
else {
if (reduce_outerstrides[iop] == 0) {
op_transfersize = NBF_SIZE(bufferdata);
dst_stride = strides[iop];
src_strides = &ad_strides[iop];
src_coords = &NAD_INDEX(axisdata);
src_shape = &NAD_SHAPE(axisdata);
ndim_transfer = reduce_outerdim ? reduce_outerdim : 1;
}
else {
op_transfersize = transfersize;
dst_stride = strides[iop];
src_strides = &ad_strides[iop];
src_coords = &NAD_INDEX(axisdata);
src_shape = &NAD_SHAPE(axisdata);
ndim_transfer = ndim;
}
}
}
else {
op_transfersize = transfersize;
dst_stride = strides[iop];
src_strides = &ad_strides[iop];
src_coords = &NAD_INDEX(axisdata);
src_shape = &NAD_SHAPE(axisdata);
ndim_transfer = ndim;
}
/*
* If the whole buffered loop structure remains the same,
* and the source pointer for this data didn't change,
* we don't have to copy the data again.
*/
if (reuse_reduce_loops && prev_dataptrs[iop] == ad_ptrs[iop]) {
NPY_IT_DBG_PRINT2("Iterator: skipping operands %d "
"copy (%d items) because loops are reused and the data "
"pointer didn't change\n",
(int)iop, (int)op_transfersize);
skip_transfer = 1;
}
/* If the data type requires zero-inititialization */
if (PyDataType_FLAGCHK(dtypes[iop], NPY_NEEDS_INIT)) {
NPY_IT_DBG_PRINT("Iterator: Buffer requires init, "
"memsetting to 0\n");
memset(ptrs[iop], 0, dtypes[iop]->elsize*op_transfersize);
/* Can't skip the transfer in this case */
skip_transfer = 0;
}
if (!skip_transfer) {
NPY_IT_DBG_PRINT2("Iterator: Copying operand %d to "
"buffer (%d items)\n",
(int)iop, (int)op_transfersize);
PyArray_TransferNDimToStrided(ndim_transfer,
ptrs[iop], dst_stride,
ad_ptrs[iop], src_strides, axisdata_incr,
src_coords, axisdata_incr,
src_shape, axisdata_incr,
op_transfersize, src_itemsize,
stransfer,
transferdata);
}
}
else if (ptrs[iop] == buffers[iop]) {
/* If the data type requires zero-inititialization */
if (PyDataType_FLAGCHK(dtypes[iop], NPY_NEEDS_INIT)) {
NPY_IT_DBG_PRINT1("Iterator: Write-only buffer for "
"operand %d requires init, "
"memsetting to 0\n", (int)iop);
memset(ptrs[iop], 0, dtypes[iop]->elsize*transfersize);
}
}
}
/*
* If buffering wasn't needed, we can grow the inner
* loop to as large as possible.
*
* TODO: Could grow REDUCE loop too with some more logic above.
*/
if (!any_buffered && (itflags&NPY_ITFLAG_GROWINNER) &&
!(itflags&NPY_ITFLAG_REDUCE)) {
if (singlestridesize > transfersize) {
NPY_IT_DBG_PRINT2("Iterator: Expanding inner loop size "
"from %d to %d since buffering wasn't needed\n",
(int)NBF_SIZE(bufferdata), (int)singlestridesize);
NBF_SIZE(bufferdata) = singlestridesize;
NBF_BUFITEREND(bufferdata) = iterindex + singlestridesize;
}
}
NPY_IT_DBG_PRINT1("Any buffering needed: %d\n", any_buffered);
NPY_IT_DBG_PRINT1("Iterator: Finished copying inputs to buffers "
"(buffered size is %d)\n", (int)NBF_SIZE(bufferdata));
}
/*
* This checks how much space can be buffered without encountering the
* same value twice, or for operands whose innermost stride is zero,
* without encountering a different value. By reducing the buffered
* amount to this size, reductions can be safely buffered.
*
* Reductions are buffered with two levels of looping, to avoid
* frequent copying to the buffers. The return value is the over-all
* buffer size, and when the flag NPY_ITFLAG_REDUCE is set, reduce_innersize
* receives the size of the inner of the two levels of looping.
*
* The value placed in reduce_outerdim is the index into the AXISDATA
* for where the second level of the double loop begins.
*
* The return value is always a multiple of the value placed in
* reduce_innersize.
*/
static npy_intp
npyiter_checkreducesize(NpyIter *iter, npy_intp count,
npy_intp *reduce_innersize,
npy_intp *reduce_outerdim)
{
npy_uint32 itflags = NIT_ITFLAGS(iter);
int idim, ndim = NIT_NDIM(iter);
int iop, nop = NIT_NOP(iter);
NpyIter_AxisData *axisdata;
npy_intp sizeof_axisdata;
npy_intp coord, shape, *strides;
npy_intp reducespace = 1, factor;
npy_bool nonzerocoord;
npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
char stride0op[NPY_MAXARGS];
/* Default to no outer axis */
*reduce_outerdim = 0;
/* If there's only one dimension, no need to calculate anything */
if (ndim == 1 || count == 0) {
*reduce_innersize = count;
return count;
}
sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
axisdata = NIT_AXISDATA(iter);
/* Indicate which REDUCE operands have stride 0 in the inner loop */
strides = NAD_STRIDES(axisdata);
for (iop = 0; iop < nop; ++iop) {
stride0op[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) &&
(strides[iop] == 0);
NPY_IT_DBG_PRINT2("Iterator: Operand %d has stride 0 in "
"the inner loop? %d\n", iop, (int)stride0op[iop]);
}
shape = NAD_SHAPE(axisdata);
coord = NAD_INDEX(axisdata);
reducespace += (shape-coord-1);
factor = shape;
NIT_ADVANCE_AXISDATA(axisdata, 1);
/* Initialize nonzerocoord based on the first coordinate */
nonzerocoord = (coord != 0);
/* Go forward through axisdata, calculating the space available */
for (idim = 1; idim < ndim && reducespace < count;
++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
NPY_IT_DBG_PRINT2("Iterator: inner loop reducespace %d, count %d\n",
(int)reducespace, (int)count);
strides = NAD_STRIDES(axisdata);
for (iop = 0; iop < nop; ++iop) {
/*
* If a reduce stride switched from zero to non-zero, or
* vice versa, that's the point where the data will stop
* being the same element or will repeat, and if the
* buffer starts with an all zero multi-index up to this
* point, gives us the reduce_innersize.
*/
if((stride0op[iop] && (strides[iop] != 0)) ||
(!stride0op[iop] &&
(strides[iop] == 0) &&
(op_itflags[iop]&NPY_OP_ITFLAG_REDUCE))) {
NPY_IT_DBG_PRINT1("Iterator: Reduce operation limits "
"buffer to %d\n", (int)reducespace);
/*
* If we already found more elements than count, or
* the starting coordinate wasn't zero, the two-level
* looping is unnecessary/can't be done, so return.
*/
if (count <= reducespace) {
*reduce_innersize = count;
NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
return count;
}
else if (nonzerocoord) {
if (reducespace < count) {
count = reducespace;
}
*reduce_innersize = count;
/* NOTE: This is similar to the (coord != 0) case below. */
NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
return count;
}
else {
*reduce_innersize = reducespace;
break;
}
}
}
/* If we broke out of the loop early, we found reduce_innersize */
if (iop != nop) {
NPY_IT_DBG_PRINT2("Iterator: Found first dim not "
"reduce (%d of %d)\n", iop, nop);
break;
}
shape = NAD_SHAPE(axisdata);
coord = NAD_INDEX(axisdata);
if (coord != 0) {
nonzerocoord = 1;
}
reducespace += (shape-coord-1) * factor;
factor *= shape;
}
/*
* If there was any non-zero coordinate, the reduction inner
* loop doesn't fit in the buffersize, or the reduction inner loop
* covered the entire iteration size, can't do the double loop.
*/
if (nonzerocoord || count < reducespace || idim == ndim) {
if (reducespace < count) {
count = reducespace;
}
*reduce_innersize = count;
/* In this case, we can't reuse the reduce loops */
NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
return count;
}
coord = NAD_INDEX(axisdata);
if (coord != 0) {
/*
* In this case, it is only safe to reuse the buffer if the amount
* of data copied is not more then the current axes, as is the
* case when reuse_reduce_loops was active already.
* It should be in principle OK when the idim loop returns immidiatly.
*/
NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
}
else {
/* In this case, we can reuse the reduce loops */
NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
}
*reduce_innersize = reducespace;
count /= reducespace;
NPY_IT_DBG_PRINT2("Iterator: reduce_innersize %d count /ed %d\n",
(int)reducespace, (int)count);
/*
* Continue through the rest of the dimensions. If there are
* two separated reduction axes, we may have to cut the buffer
* short again.
*/
*reduce_outerdim = idim;
reducespace = 1;
factor = 1;
/* Indicate which REDUCE operands have stride 0 at the current level */
strides = NAD_STRIDES(axisdata);
for (iop = 0; iop < nop; ++iop) {
stride0op[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) &&
(strides[iop] == 0);
NPY_IT_DBG_PRINT2("Iterator: Operand %d has stride 0 in "
"the outer loop? %d\n", iop, (int)stride0op[iop]);
}
shape = NAD_SHAPE(axisdata);
reducespace += (shape-coord-1) * factor;
factor *= shape;
NIT_ADVANCE_AXISDATA(axisdata, 1);
++idim;
for (; idim < ndim && reducespace < count;
++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
NPY_IT_DBG_PRINT2("Iterator: outer loop reducespace %d, count %d\n",
(int)reducespace, (int)count);
strides = NAD_STRIDES(axisdata);
for (iop = 0; iop < nop; ++iop) {
/*
* If a reduce stride switched from zero to non-zero, or
* vice versa, that's the point where the data will stop
* being the same element or will repeat, and if the
* buffer starts with an all zero multi-index up to this
* point, gives us the reduce_innersize.
*/
if((stride0op[iop] && (strides[iop] != 0)) ||
(!stride0op[iop] &&
(strides[iop] == 0) &&
(op_itflags[iop]&NPY_OP_ITFLAG_REDUCE))) {
NPY_IT_DBG_PRINT1("Iterator: Reduce operation limits "
"buffer to %d\n", (int)reducespace);
/*
* This terminates the outer level of our double loop.
*/
if (count <= reducespace) {
return count * (*reduce_innersize);
}
else {
return reducespace * (*reduce_innersize);
}
}
}
shape = NAD_SHAPE(axisdata);
coord = NAD_INDEX(axisdata);
if (coord != 0) {
nonzerocoord = 1;
}
reducespace += (shape-coord-1) * factor;
factor *= shape;
}
if (reducespace < count) {
count = reducespace;
}
return count * (*reduce_innersize);
}
#undef NPY_ITERATOR_IMPLEMENTATION_CODE