Blob Blame History Raw
/*
 * This is a PRIVATE INTERNAL NumPy header, intended to be used *ONLY*
 * by the iterator implementation code. All other internal NumPy code
 * should use the exposed iterator API.
 */
#ifndef NPY_ITERATOR_IMPLEMENTATION_CODE
#error "This header is intended for use ONLY by iterator implementation code."
#endif

#ifndef _NPY_PRIVATE__NDITER_IMPL_H_
#define _NPY_PRIVATE__NDITER_IMPL_H_

#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "structmember.h"

#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE
#include <numpy/arrayobject.h>
#include <npy_pycompat.h>
#include "convert_datatype.h"

#include "lowlevel_strided_loops.h"

/********** ITERATOR CONSTRUCTION TIMING **************/
#define NPY_IT_CONSTRUCTION_TIMING 0

#if NPY_IT_CONSTRUCTION_TIMING
#define NPY_IT_TIME_POINT(var) { \
            unsigned int hi, lo; \
            __asm__ __volatile__ ( \
                "rdtsc" \
                : "=d" (hi), "=a" (lo)); \
            var = (((unsigned long long)hi) << 32) | lo; \
        }
#define NPY_IT_PRINT_TIME_START(var) { \
            printf("%30s: start\n", #var); \
            c_temp = var; \
        }
#define NPY_IT_PRINT_TIME_VAR(var) { \
            printf("%30s: %6.0f clocks\n", #var, \
                    ((double)(var-c_temp))); \
            c_temp = var; \
        }
#else
#define NPY_IT_TIME_POINT(var)
#endif

/******************************************************/

/********** PRINTF DEBUG TRACING **************/
#define NPY_IT_DBG_TRACING 0

#if NPY_IT_DBG_TRACING
#define NPY_IT_DBG_PRINT(s) printf("%s", s)
#define NPY_IT_DBG_PRINT1(s, p1) printf(s, p1)
#define NPY_IT_DBG_PRINT2(s, p1, p2) printf(s, p1, p2)
#define NPY_IT_DBG_PRINT3(s, p1, p2, p3) printf(s, p1, p2, p3)
#else
#define NPY_IT_DBG_PRINT(s)
#define NPY_IT_DBG_PRINT1(s, p1)
#define NPY_IT_DBG_PRINT2(s, p1, p2)
#define NPY_IT_DBG_PRINT3(s, p1, p2, p3)
#endif
/**********************************************/

/* Rounds up a number of bytes to be divisible by sizeof intp */
#if NPY_SIZEOF_INTP == 4
#define NPY_INTP_ALIGNED(size) ((size + 0x3)&(-0x4))
#else
#define NPY_INTP_ALIGNED(size) ((size + 0x7)&(-0x8))
#endif

/* Internal iterator flags */

/* The perm is the identity */
#define NPY_ITFLAG_IDENTPERM    0x0001
/* The perm has negative entries (indicating flipped axes) */
#define NPY_ITFLAG_NEGPERM      0x0002
/* The iterator is tracking an index */
#define NPY_ITFLAG_HASINDEX     0x0004
/* The iterator is tracking a multi-index */
#define NPY_ITFLAG_HASMULTIINDEX    0x0008
/* The iteration order was forced on construction */
#define NPY_ITFLAG_FORCEDORDER  0x0010
/* The inner loop is handled outside the iterator */
#define NPY_ITFLAG_EXLOOP      0x0020
/* The iterator is ranged */
#define NPY_ITFLAG_RANGE        0x0040
/* The iterator is buffered */
#define NPY_ITFLAG_BUFFER       0x0080
/* The iterator should grow the buffered inner loop when possible */
#define NPY_ITFLAG_GROWINNER    0x0100
/* There is just one iteration, can specialize iternext for that */
#define NPY_ITFLAG_ONEITERATION 0x0200
/* Delay buffer allocation until first Reset* call */
#define NPY_ITFLAG_DELAYBUF     0x0400
/* Iteration needs API access during iternext */
#define NPY_ITFLAG_NEEDSAPI     0x0800
/* Iteration includes one or more operands being reduced */
#define NPY_ITFLAG_REDUCE       0x1000
/* Reduce iteration doesn't need to recalculate reduce loops next time */
#define NPY_ITFLAG_REUSE_REDUCE_LOOPS 0x2000

/* Internal iterator per-operand iterator flags */

/* The operand will be written to */
#define NPY_OP_ITFLAG_WRITE        0x0001
/* The operand will be read from */
#define NPY_OP_ITFLAG_READ         0x0002
/* The operand needs type conversion/byte swapping/alignment */
#define NPY_OP_ITFLAG_CAST         0x0004
/* The operand never needs buffering */
#define NPY_OP_ITFLAG_BUFNEVER     0x0008
/* The operand is aligned */
#define NPY_OP_ITFLAG_ALIGNED      0x0010
/* The operand is being reduced */
#define NPY_OP_ITFLAG_REDUCE       0x0020
/* The operand is for temporary use, does not have a backing array */
#define NPY_OP_ITFLAG_VIRTUAL      0x0040
/* The operand requires masking when copying buffer -> array */
#define NPY_OP_ITFLAG_WRITEMASKED  0x0080
/* The operand's data pointer is pointing into its buffer */
#define NPY_OP_ITFLAG_USINGBUFFER  0x0100
/* The operand must be copied (with UPDATEIFCOPY if also ITFLAG_WRITE) */
#define NPY_OP_ITFLAG_FORCECOPY    0x0200

/*
 * The data layout of the iterator is fully specified by
 * a triple (itflags, ndim, nop).  These three variables
 * are expected to exist in all functions calling these macros,
 * either as true variables initialized to the correct values
 * from the iterator, or as constants in the case of specialized
 * functions such as the various iternext functions.
 */

struct NpyIter_InternalOnly {
    /* Initial fixed position data */
    npy_uint32 itflags;
    npy_uint8 ndim, nop;
    npy_int8 maskop;
    npy_intp itersize, iterstart, iterend;
    /* iterindex is only used if RANGED or BUFFERED is set */
    npy_intp iterindex;
    /* The rest is variable */
    char iter_flexdata;
};

typedef struct NpyIter_AD NpyIter_AxisData;
typedef struct NpyIter_BD NpyIter_BufferData;

typedef npy_int16 npyiter_opitflags;

/* Byte sizes of the iterator members */
#define NIT_PERM_SIZEOF(itflags, ndim, nop) \
        NPY_INTP_ALIGNED(NPY_MAXDIMS)
#define NIT_DTYPES_SIZEOF(itflags, ndim, nop) \
        ((NPY_SIZEOF_INTP)*(nop))
#define NIT_RESETDATAPTR_SIZEOF(itflags, ndim, nop) \
        ((NPY_SIZEOF_INTP)*(nop+1))
#define NIT_BASEOFFSETS_SIZEOF(itflags, ndim, nop) \
        ((NPY_SIZEOF_INTP)*(nop+1))
#define NIT_OPERANDS_SIZEOF(itflags, ndim, nop) \
        ((NPY_SIZEOF_INTP)*(nop))
#define NIT_OPITFLAGS_SIZEOF(itflags, ndim, nop) \
        (NPY_INTP_ALIGNED(sizeof(npyiter_opitflags) * nop))
#define NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop) \
        ((itflags&NPY_ITFLAG_BUFFER) ? ((NPY_SIZEOF_INTP)*(6 + 9*nop)) : 0)

/* Byte offsets of the iterator members starting from iter->iter_flexdata */
#define NIT_PERM_OFFSET() \
        (0)
#define NIT_DTYPES_OFFSET(itflags, ndim, nop) \
        (NIT_PERM_OFFSET() + \
         NIT_PERM_SIZEOF(itflags, ndim, nop))
#define NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop) \
        (NIT_DTYPES_OFFSET(itflags, ndim, nop) + \
         NIT_DTYPES_SIZEOF(itflags, ndim, nop))
#define NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop) \
        (NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop) + \
         NIT_RESETDATAPTR_SIZEOF(itflags, ndim, nop))
#define NIT_OPERANDS_OFFSET(itflags, ndim, nop) \
        (NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop) + \
         NIT_BASEOFFSETS_SIZEOF(itflags, ndim, nop))
#define NIT_OPITFLAGS_OFFSET(itflags, ndim, nop) \
        (NIT_OPERANDS_OFFSET(itflags, ndim, nop) + \
         NIT_OPERANDS_SIZEOF(itflags, ndim, nop))
#define NIT_BUFFERDATA_OFFSET(itflags, ndim, nop) \
        (NIT_OPITFLAGS_OFFSET(itflags, ndim, nop) + \
         NIT_OPITFLAGS_SIZEOF(itflags, ndim, nop))
#define NIT_AXISDATA_OFFSET(itflags, ndim, nop) \
        (NIT_BUFFERDATA_OFFSET(itflags, ndim, nop) + \
         NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop))

/* Internal-only ITERATOR DATA MEMBER ACCESS */
#define NIT_ITFLAGS(iter) \
        ((iter)->itflags)
#define NIT_NDIM(iter) \
        ((iter)->ndim)
#define NIT_NOP(iter) \
        ((iter)->nop)
#define NIT_MASKOP(iter) \
        ((iter)->maskop)
#define NIT_ITERSIZE(iter) \
        (iter->itersize)
#define NIT_ITERSTART(iter) \
        (iter->iterstart)
#define NIT_ITEREND(iter) \
        (iter->iterend)
#define NIT_ITERINDEX(iter) \
        (iter->iterindex)
#define NIT_PERM(iter)  ((npy_int8 *)( \
        &(iter)->iter_flexdata + NIT_PERM_OFFSET()))
#define NIT_DTYPES(iter) ((PyArray_Descr **)( \
        &(iter)->iter_flexdata + NIT_DTYPES_OFFSET(itflags, ndim, nop)))
#define NIT_RESETDATAPTR(iter) ((char **)( \
        &(iter)->iter_flexdata + NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop)))
#define NIT_BASEOFFSETS(iter) ((npy_intp *)( \
        &(iter)->iter_flexdata + NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop)))
#define NIT_OPERANDS(iter) ((PyArrayObject **)( \
        &(iter)->iter_flexdata + NIT_OPERANDS_OFFSET(itflags, ndim, nop)))
#define NIT_OPITFLAGS(iter) ((npyiter_opitflags *)( \
        &(iter)->iter_flexdata + NIT_OPITFLAGS_OFFSET(itflags, ndim, nop)))
#define NIT_BUFFERDATA(iter) ((NpyIter_BufferData *)( \
        &(iter)->iter_flexdata + NIT_BUFFERDATA_OFFSET(itflags, ndim, nop)))
#define NIT_AXISDATA(iter) ((NpyIter_AxisData *)( \
        &(iter)->iter_flexdata + NIT_AXISDATA_OFFSET(itflags, ndim, nop)))

/* Internal-only BUFFERDATA MEMBER ACCESS */
struct NpyIter_BD {
    npy_intp buffersize, size, bufiterend,
             reduce_pos, reduce_outersize, reduce_outerdim;
    npy_intp bd_flexdata;
};
#define NBF_BUFFERSIZE(bufferdata) ((bufferdata)->buffersize)
#define NBF_SIZE(bufferdata) ((bufferdata)->size)
#define NBF_BUFITEREND(bufferdata) ((bufferdata)->bufiterend)
#define NBF_REDUCE_POS(bufferdata) ((bufferdata)->reduce_pos)
#define NBF_REDUCE_OUTERSIZE(bufferdata) ((bufferdata)->reduce_outersize)
#define NBF_REDUCE_OUTERDIM(bufferdata) ((bufferdata)->reduce_outerdim)
#define NBF_STRIDES(bufferdata) ( \
        &(bufferdata)->bd_flexdata + 0)
#define NBF_PTRS(bufferdata) ((char **) \
        (&(bufferdata)->bd_flexdata + 1*(nop)))
#define NBF_REDUCE_OUTERSTRIDES(bufferdata) ( \
        (&(bufferdata)->bd_flexdata + 2*(nop)))
#define NBF_REDUCE_OUTERPTRS(bufferdata) ((char **) \
        (&(bufferdata)->bd_flexdata + 3*(nop)))
#define NBF_READTRANSFERFN(bufferdata) ((PyArray_StridedUnaryOp **) \
        (&(bufferdata)->bd_flexdata + 4*(nop)))
#define NBF_READTRANSFERDATA(bufferdata) ((NpyAuxData **) \
        (&(bufferdata)->bd_flexdata + 5*(nop)))
#define NBF_WRITETRANSFERFN(bufferdata) ((PyArray_StridedUnaryOp **) \
        (&(bufferdata)->bd_flexdata + 6*(nop)))
#define NBF_WRITETRANSFERDATA(bufferdata) ((NpyAuxData **) \
        (&(bufferdata)->bd_flexdata + 7*(nop)))
#define NBF_BUFFERS(bufferdata) ((char **) \
        (&(bufferdata)->bd_flexdata + 8*(nop)))

/* Internal-only AXISDATA MEMBER ACCESS. */
struct NpyIter_AD {
    npy_intp shape, index;
    npy_intp ad_flexdata;
};
#define NAD_SHAPE(axisdata) ((axisdata)->shape)
#define NAD_INDEX(axisdata) ((axisdata)->index)
#define NAD_STRIDES(axisdata) ( \
        &(axisdata)->ad_flexdata + 0)
#define NAD_PTRS(axisdata) ((char **) \
        &(axisdata)->ad_flexdata + 1*(nop+1))

#define NAD_NSTRIDES() \
        ((nop) + ((itflags&NPY_ITFLAG_HASINDEX) ? 1 : 0))

/* Size of one AXISDATA struct within the iterator */
#define NIT_AXISDATA_SIZEOF(itflags, ndim, nop) (( \
        /* intp shape */ \
        1 + \
        /* intp index */ \
        1 + \
        /* intp stride[nop+1] AND char* ptr[nop+1] */ \
        2*((nop)+1) \
        )*NPY_SIZEOF_INTP )

/*
 * Macro to advance an AXISDATA pointer by a specified count.
 * Requires that sizeof_axisdata be previously initialized
 * to NIT_AXISDATA_SIZEOF(itflags, ndim, nop).
 */
#define NIT_INDEX_AXISDATA(axisdata, index) ((NpyIter_AxisData *) \
        (((char *)(axisdata)) + (index)*sizeof_axisdata))
#define NIT_ADVANCE_AXISDATA(axisdata, count) \
        axisdata = NIT_INDEX_AXISDATA(axisdata, count)

/* Size of the whole iterator */
#define NIT_SIZEOF_ITERATOR(itflags, ndim, nop) ( \
        sizeof(struct NpyIter_InternalOnly) + \
        NIT_AXISDATA_OFFSET(itflags, ndim, nop) + \
        NIT_AXISDATA_SIZEOF(itflags, ndim, nop)*(ndim ? ndim : 1))

/* Internal helper functions shared between implementation files */
NPY_NO_EXPORT void
npyiter_coalesce_axes(NpyIter *iter);
NPY_NO_EXPORT int
npyiter_allocate_buffers(NpyIter *iter, char **errmsg);
NPY_NO_EXPORT void
npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex);
NPY_NO_EXPORT void
npyiter_copy_from_buffers(NpyIter *iter);
NPY_NO_EXPORT void
npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs);


#endif