/* -*- c -*- */
#define _UMATHMODULE
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#include "Python.h"
#include "npy_config.h"
#define PY_ARRAY_UNIQUE_SYMBOL _npy_umathmodule_ARRAY_API
#define NO_IMPORT_ARRAY
#include "numpy/npy_common.h"
#include "numpy/arrayobject.h"
#include "numpy/ufuncobject.h"
#include "numpy/npy_math.h"
#include "numpy/halffloat.h"
#include "lowlevel_strided_loops.h"
#include "npy_pycompat.h"
#include "ufunc_object.h"
#include <string.h> /* for memchr */
/*
* cutoff blocksize for pairwise summation
* decreasing it decreases errors slightly as more pairs are summed but
* also lowers performance, as the inner loop is unrolled eight times it is
* effectively 16
*/
#define PW_BLOCKSIZE 128
/*
* include vectorized functions and dispatchers
* this file is safe to include also for generic builds
* platform specific instructions are either masked via the proprocessor or
* runtime detected
*/
#include "simd.inc"
/*
*****************************************************************************
** UFUNC LOOPS **
*****************************************************************************
*/
/* unary loop input and output contiguous */
#define IS_UNARY_CONT(tin, tout) (steps[0] == sizeof(tin) && \
steps[1] == sizeof(tout))
#define IS_BINARY_REDUCE ((args[0] == args[2])\
&& (steps[0] == steps[2])\
&& (steps[0] == 0))
/* binary loop input and output contiguous */
#define IS_BINARY_CONT(tin, tout) (steps[0] == sizeof(tin) && \
steps[1] == sizeof(tin) && \
steps[2] == sizeof(tout))
/* binary loop input and output contiguous with first scalar */
#define IS_BINARY_CONT_S1(tin, tout) (steps[0] == 0 && \
steps[1] == sizeof(tin) && \
steps[2] == sizeof(tout))
/* binary loop input and output contiguous with second scalar */
#define IS_BINARY_CONT_S2(tin, tout) (steps[0] == sizeof(tin) && \
steps[1] == 0 && \
steps[2] == sizeof(tout))
#define OUTPUT_LOOP\
char *op1 = args[1];\
npy_intp os1 = steps[1];\
npy_intp n = dimensions[0];\
npy_intp i;\
for(i = 0; i < n; i++, op1 += os1)
#define UNARY_LOOP\
char *ip1 = args[0], *op1 = args[1];\
npy_intp is1 = steps[0], os1 = steps[1];\
npy_intp n = dimensions[0];\
npy_intp i;\
for(i = 0; i < n; i++, ip1 += is1, op1 += os1)
/*
* loop with contiguous specialization
* op should be the code working on `tin in` and
* storing the result in `tout * out`
* combine with NPY_GCC_OPT_3 to allow autovectorization
* should only be used where its worthwhile to avoid code bloat
*/
#define BASE_UNARY_LOOP(tin, tout, op) \
UNARY_LOOP { \
const tin in = *(tin *)ip1; \
tout * out = (tout *)op1; \
op; \
}
#define UNARY_LOOP_FAST(tin, tout, op) \
do { \
/* condition allows compiler to optimize the generic macro */ \
if (IS_UNARY_CONT(tin, tout)) { \
if (args[0] == args[1]) { \
BASE_UNARY_LOOP(tin, tout, op) \
} \
else { \
BASE_UNARY_LOOP(tin, tout, op) \
} \
} \
else { \
BASE_UNARY_LOOP(tin, tout, op) \
} \
} \
while (0)
#define UNARY_LOOP_TWO_OUT\
char *ip1 = args[0], *op1 = args[1], *op2 = args[2];\
npy_intp is1 = steps[0], os1 = steps[1], os2 = steps[2];\
npy_intp n = dimensions[0];\
npy_intp i;\
for(i = 0; i < n; i++, ip1 += is1, op1 += os1, op2 += os2)
#define BINARY_LOOP\
char *ip1 = args[0], *ip2 = args[1], *op1 = args[2];\
npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];\
npy_intp n = dimensions[0];\
npy_intp i;\
for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1)
/*
* loop with contiguous specialization
* op should be the code working on `tin in1`, `tin in2` and
* storing the result in `tout * out`
* combine with NPY_GCC_OPT_3 to allow autovectorization
* should only be used where its worthwhile to avoid code bloat
*/
#define BASE_BINARY_LOOP(tin, tout, op) \
BINARY_LOOP { \
const tin in1 = *(tin *)ip1; \
const tin in2 = *(tin *)ip2; \
tout * out = (tout *)op1; \
op; \
}
/*
* unfortunately gcc 6/7 regressed and we need to give it additional hints to
* vectorize inplace operations (PR80198)
* must only be used after op1 == ip1 or ip2 has been checked
* TODO: using ivdep might allow other compilers to vectorize too
*/
#if __GNUC__ >= 6
#define IVDEP_LOOP _Pragma("GCC ivdep")
#else
#define IVDEP_LOOP
#endif
#define BASE_BINARY_LOOP_INP(tin, tout, op) \
char *ip1 = args[0], *ip2 = args[1], *op1 = args[2];\
npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];\
npy_intp n = dimensions[0];\
npy_intp i;\
IVDEP_LOOP \
for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1) { \
const tin in1 = *(tin *)ip1; \
const tin in2 = *(tin *)ip2; \
tout * out = (tout *)op1; \
op; \
}
#define BASE_BINARY_LOOP_S(tin, tout, cin, cinp, vin, vinp, op) \
const tin cin = *(tin *)cinp; \
BINARY_LOOP { \
const tin vin = *(tin *)vinp; \
tout * out = (tout *)op1; \
op; \
}
/* PR80198 again, scalar works without the pragma */
#define BASE_BINARY_LOOP_S_INP(tin, tout, cin, cinp, vin, vinp, op) \
const tin cin = *(tin *)cinp; \
BINARY_LOOP { \
const tin vin = *(tin *)vinp; \
tout * out = (tout *)vinp; \
op; \
}
#define BINARY_LOOP_FAST(tin, tout, op) \
do { \
/* condition allows compiler to optimize the generic macro */ \
if (IS_BINARY_CONT(tin, tout)) { \
if (args[2] == args[0]) { \
BASE_BINARY_LOOP_INP(tin, tout, op) \
} \
else if (args[2] == args[1]) { \
BASE_BINARY_LOOP_INP(tin, tout, op) \
} \
else { \
BASE_BINARY_LOOP(tin, tout, op) \
} \
} \
else if (IS_BINARY_CONT_S1(tin, tout)) { \
if (args[1] == args[2]) { \
BASE_BINARY_LOOP_S_INP(tin, tout, in1, args[0], in2, ip2, op) \
} \
else { \
BASE_BINARY_LOOP_S(tin, tout, in1, args[0], in2, ip2, op) \
} \
} \
else if (IS_BINARY_CONT_S2(tin, tout)) { \
if (args[0] == args[2]) { \
BASE_BINARY_LOOP_S_INP(tin, tout, in2, args[1], in1, ip1, op) \
} \
else { \
BASE_BINARY_LOOP_S(tin, tout, in2, args[1], in1, ip1, op) \
}\
} \
else { \
BASE_BINARY_LOOP(tin, tout, op) \
} \
} \
while (0)
#define BINARY_REDUCE_LOOP_INNER\
char *ip2 = args[1]; \
npy_intp is2 = steps[1]; \
npy_intp n = dimensions[0]; \
npy_intp i; \
for(i = 0; i < n; i++, ip2 += is2)
#define BINARY_REDUCE_LOOP(TYPE)\
char *iop1 = args[0]; \
TYPE io1 = *(TYPE *)iop1; \
BINARY_REDUCE_LOOP_INNER
#define BINARY_LOOP_TWO_OUT\
char *ip1 = args[0], *ip2 = args[1], *op1 = args[2], *op2 = args[3];\
npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2], os2 = steps[3];\
npy_intp n = dimensions[0];\
npy_intp i;\
for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1, op2 += os2)
/******************************************************************************
** GENERIC FLOAT LOOPS **
*****************************************************************************/
typedef float halfUnaryFunc(npy_half x);
typedef float floatUnaryFunc(float x);
typedef double doubleUnaryFunc(double x);
typedef npy_longdouble longdoubleUnaryFunc(npy_longdouble x);
typedef npy_half halfBinaryFunc(npy_half x, npy_half y);
typedef float floatBinaryFunc(float x, float y);
typedef double doubleBinaryFunc(double x, double y);
typedef npy_longdouble longdoubleBinaryFunc(npy_longdouble x, npy_longdouble y);
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_e_e(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
halfUnaryFunc *f = (halfUnaryFunc *)func;
UNARY_LOOP {
const npy_half in1 = *(npy_half *)ip1;
*(npy_half *)op1 = f(in1);
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_e_e_As_f_f(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
floatUnaryFunc *f = (floatUnaryFunc *)func;
UNARY_LOOP {
const float in1 = npy_half_to_float(*(npy_half *)ip1);
*(npy_half *)op1 = npy_float_to_half(f(in1));
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_e_e_As_d_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
doubleUnaryFunc *f = (doubleUnaryFunc *)func;
UNARY_LOOP {
const double in1 = npy_half_to_double(*(npy_half *)ip1);
*(npy_half *)op1 = npy_double_to_half(f(in1));
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_f_f(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
floatUnaryFunc *f = (floatUnaryFunc *)func;
UNARY_LOOP {
const float in1 = *(float *)ip1;
*(float *)op1 = f(in1);
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_f_f_As_d_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
doubleUnaryFunc *f = (doubleUnaryFunc *)func;
UNARY_LOOP {
const float in1 = *(float *)ip1;
*(float *)op1 = (float)f((double)in1);
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_ee_e(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
halfBinaryFunc *f = (halfBinaryFunc *)func;
BINARY_LOOP {
npy_half in1 = *(npy_half *)ip1;
npy_half in2 = *(npy_half *)ip2;
*(npy_half *)op1 = f(in1, in2);
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_ee_e_As_ff_f(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
floatBinaryFunc *f = (floatBinaryFunc *)func;
BINARY_LOOP {
float in1 = npy_half_to_float(*(npy_half *)ip1);
float in2 = npy_half_to_float(*(npy_half *)ip2);
*(npy_half *)op1 = npy_float_to_half(f(in1, in2));
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_ee_e_As_dd_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
doubleBinaryFunc *f = (doubleBinaryFunc *)func;
BINARY_LOOP {
double in1 = npy_half_to_double(*(npy_half *)ip1);
double in2 = npy_half_to_double(*(npy_half *)ip2);
*(npy_half *)op1 = npy_double_to_half(f(in1, in2));
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_ff_f(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
floatBinaryFunc *f = (floatBinaryFunc *)func;
BINARY_LOOP {
float in1 = *(float *)ip1;
float in2 = *(float *)ip2;
*(float *)op1 = f(in1, in2);
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_ff_f_As_dd_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
doubleBinaryFunc *f = (doubleBinaryFunc *)func;
BINARY_LOOP {
float in1 = *(float *)ip1;
float in2 = *(float *)ip2;
*(float *)op1 = (double)f((double)in1, (double)in2);
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_d_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
doubleUnaryFunc *f = (doubleUnaryFunc *)func;
UNARY_LOOP {
double in1 = *(double *)ip1;
*(double *)op1 = f(in1);
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_dd_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
doubleBinaryFunc *f = (doubleBinaryFunc *)func;
BINARY_LOOP {
double in1 = *(double *)ip1;
double in2 = *(double *)ip2;
*(double *)op1 = f(in1, in2);
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_g_g(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
longdoubleUnaryFunc *f = (longdoubleUnaryFunc *)func;
UNARY_LOOP {
npy_longdouble in1 = *(npy_longdouble *)ip1;
*(npy_longdouble *)op1 = f(in1);
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_gg_g(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
longdoubleBinaryFunc *f = (longdoubleBinaryFunc *)func;
BINARY_LOOP {
npy_longdouble in1 = *(npy_longdouble *)ip1;
npy_longdouble in2 = *(npy_longdouble *)ip2;
*(npy_longdouble *)op1 = f(in1, in2);
}
}
/******************************************************************************
** GENERIC COMPLEX LOOPS **
*****************************************************************************/
typedef void cdoubleUnaryFunc(npy_cdouble *x, npy_cdouble *r);
typedef void cfloatUnaryFunc(npy_cfloat *x, npy_cfloat *r);
typedef void clongdoubleUnaryFunc(npy_clongdouble *x, npy_clongdouble *r);
typedef void cdoubleBinaryFunc(npy_cdouble *x, npy_cdouble *y, npy_cdouble *r);
typedef void cfloatBinaryFunc(npy_cfloat *x, npy_cfloat *y, npy_cfloat *r);
typedef void clongdoubleBinaryFunc(npy_clongdouble *x, npy_clongdouble *y,
npy_clongdouble *r);
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_F_F(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
cfloatUnaryFunc *f = (cfloatUnaryFunc *)func;
UNARY_LOOP {
npy_cfloat in1 = *(npy_cfloat *)ip1;
npy_cfloat *out = (npy_cfloat *)op1;
f(&in1, out);
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_F_F_As_D_D(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
cdoubleUnaryFunc *f = (cdoubleUnaryFunc *)func;
UNARY_LOOP {
npy_cdouble tmp, out;
tmp.real = (double)((float *)ip1)[0];
tmp.imag = (double)((float *)ip1)[1];
f(&tmp, &out);
((float *)op1)[0] = (float)out.real;
((float *)op1)[1] = (float)out.imag;
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_FF_F(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
cfloatBinaryFunc *f = (cfloatBinaryFunc *)func;
BINARY_LOOP {
npy_cfloat in1 = *(npy_cfloat *)ip1;
npy_cfloat in2 = *(npy_cfloat *)ip2;
npy_cfloat *out = (npy_cfloat *)op1;
f(&in1, &in2, out);
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_FF_F_As_DD_D(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
cdoubleBinaryFunc *f = (cdoubleBinaryFunc *)func;
BINARY_LOOP {
npy_cdouble tmp1, tmp2, out;
tmp1.real = (double)((float *)ip1)[0];
tmp1.imag = (double)((float *)ip1)[1];
tmp2.real = (double)((float *)ip2)[0];
tmp2.imag = (double)((float *)ip2)[1];
f(&tmp1, &tmp2, &out);
((float *)op1)[0] = (float)out.real;
((float *)op1)[1] = (float)out.imag;
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_D_D(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
cdoubleUnaryFunc *f = (cdoubleUnaryFunc *)func;
UNARY_LOOP {
npy_cdouble in1 = *(npy_cdouble *)ip1;
npy_cdouble *out = (npy_cdouble *)op1;
f(&in1, out);
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_DD_D(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
cdoubleBinaryFunc *f = (cdoubleBinaryFunc *)func;
BINARY_LOOP {
npy_cdouble in1 = *(npy_cdouble *)ip1;
npy_cdouble in2 = *(npy_cdouble *)ip2;
npy_cdouble *out = (npy_cdouble *)op1;
f(&in1, &in2, out);
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_G_G(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
clongdoubleUnaryFunc *f = (clongdoubleUnaryFunc *)func;
UNARY_LOOP {
npy_clongdouble in1 = *(npy_clongdouble *)ip1;
npy_clongdouble *out = (npy_clongdouble *)op1;
f(&in1, out);
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_GG_G(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
clongdoubleBinaryFunc *f = (clongdoubleBinaryFunc *)func;
BINARY_LOOP {
npy_clongdouble in1 = *(npy_clongdouble *)ip1;
npy_clongdouble in2 = *(npy_clongdouble *)ip2;
npy_clongdouble *out = (npy_clongdouble *)op1;
f(&in1, &in2, out);
}
}
/******************************************************************************
** GENERIC OBJECT lOOPS **
*****************************************************************************/
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_O_O(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
unaryfunc f = (unaryfunc)func;
UNARY_LOOP {
PyObject *in1 = *(PyObject **)ip1;
PyObject **out = (PyObject **)op1;
PyObject *ret = f(in1 ? in1 : Py_None);
if (ret == NULL) {
return;
}
Py_XDECREF(*out);
*out = ret;
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_O_O_method(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
char *meth = (char *)func;
UNARY_LOOP {
PyObject *in1 = *(PyObject **)ip1;
PyObject **out = (PyObject **)op1;
PyObject *ret = PyObject_CallMethod(in1 ? in1 : Py_None, meth, NULL);
if (ret == NULL) {
return;
}
Py_XDECREF(*out);
*out = ret;
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_OO_O(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
binaryfunc f = (binaryfunc)func;
BINARY_LOOP {
PyObject *in1 = *(PyObject **)ip1;
PyObject *in2 = *(PyObject **)ip2;
PyObject **out = (PyObject **)op1;
PyObject *ret = f(in1 ? in1 : Py_None, in2 ? in2 : Py_None);
if (ret == NULL) {
return;
}
Py_XDECREF(*out);
*out = ret;
}
}
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_OO_O_method(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
char *meth = (char *)func;
BINARY_LOOP {
PyObject *in1 = *(PyObject **)ip1;
PyObject *in2 = *(PyObject **)ip2;
PyObject **out = (PyObject **)op1;
PyObject *ret = PyObject_CallMethod(in1 ? in1 : Py_None,
meth, "(O)", in2);
if (ret == NULL) {
return;
}
Py_XDECREF(*out);
*out = ret;
}
}
/*
* A general-purpose ufunc that deals with general-purpose Python callable.
* func is a structure with nin, nout, and a Python callable function
*/
/*UFUNC_API*/
NPY_NO_EXPORT void
PyUFunc_On_Om(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
{
npy_intp n = dimensions[0];
PyUFunc_PyFuncData *data = (PyUFunc_PyFuncData *)func;
int nin = data->nin;
int nout = data->nout;
PyObject *tocall = data->callable;
char *ptrs[NPY_MAXARGS];
PyObject *arglist, *result;
PyObject *in, **op;
npy_intp i, j, ntot;
ntot = nin+nout;
for(j = 0; j < ntot; j++) {
ptrs[j] = args[j];
}
for(i = 0; i < n; i++) {
arglist = PyTuple_New(nin);
if (arglist == NULL) {
return;
}
for(j = 0; j < nin; j++) {
in = *((PyObject **)ptrs[j]);
if (in == NULL) {
in = Py_None;
}
PyTuple_SET_ITEM(arglist, j, in);
Py_INCREF(in);
}
result = PyEval_CallObject(tocall, arglist);
Py_DECREF(arglist);
if (result == NULL) {
return;
}
if (nout == 0 && result == Py_None) {
/* No output expected, no output received, continue */
Py_DECREF(result);
}
else if (nout == 1) {
/* Single output expected, assign and continue */
op = (PyObject **)ptrs[nin];
Py_XDECREF(*op);
*op = result;
}
else if (PyTuple_Check(result) && nout == PyTuple_Size(result)) {
/*
* Multiple returns match expected number of outputs, assign
* and continue. Will also gobble empty tuples if nout == 0.
*/
for(j = 0; j < nout; j++) {
op = (PyObject **)ptrs[j+nin];
Py_XDECREF(*op);
*op = PyTuple_GET_ITEM(result, j);
Py_INCREF(*op);
}
Py_DECREF(result);
}
else {
/* Mismatch between returns and expected outputs, exit */
Py_DECREF(result);
return;
}
for(j = 0; j < ntot; j++) {
ptrs[j] += steps[j];
}
}
}
/*
*****************************************************************************
** BOOLEAN LOOPS **
*****************************************************************************
*/
/**begin repeat
* #kind = equal, not_equal, greater, greater_equal, less, less_equal#
* #OP = ==, !=, >, >=, <, <=#
**/
NPY_NO_EXPORT void
BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
npy_bool in1 = *((npy_bool *)ip1) != 0;
npy_bool in2 = *((npy_bool *)ip2) != 0;
*((npy_bool *)op1)= in1 @OP@ in2;
}
}
/**end repeat**/
/**begin repeat
* #kind = logical_and, logical_or#
* #OP = &&, ||#
* #SC = ==, !=#
* #and = 1, 0#
**/
NPY_NO_EXPORT void
BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
if(IS_BINARY_REDUCE) {
#ifdef NPY_HAVE_SSE2_INTRINSICS
/*
* stick with our variant for more reliable performance, only known
* platform which outperforms it by ~20% is an i7 with glibc 2.17
*/
if (run_reduce_simd_@kind@_BOOL(args, dimensions, steps)) {
return;
}
#else
/* for now only use libc on 32-bit/non-x86 */
if (steps[1] == 1) {
npy_bool * op = (npy_bool *)args[0];
#if @and@
/* np.all(), search for a zero (false) */
if (*op) {
*op = memchr(args[1], 0, dimensions[0]) == NULL;
}
#else
/*
* np.any(), search for a non-zero (true) via comparing against
* zero blocks, memcmp is faster than memchr on SSE4 machines
* with glibc >= 2.12 and memchr can only check for equal 1
*/
static const npy_bool zero[4096]; /* zero by C standard */
npy_uintp i, n = dimensions[0];
for (i = 0; !*op && i < n - (n % sizeof(zero)); i += sizeof(zero)) {
*op = memcmp(&args[1][i], zero, sizeof(zero)) != 0;
}
if (!*op && n - i > 0) {
*op = memcmp(&args[1][i], zero, n - i) != 0;
}
#endif
return;
}
#endif
else {
BINARY_REDUCE_LOOP(npy_bool) {
const npy_bool in2 = *(npy_bool *)ip2;
io1 = io1 @OP@ in2;
if (io1 @SC@ 0) {
break;
}
}
*((npy_bool *)iop1) = io1;
}
}
else {
if (run_binary_simd_@kind@_BOOL(args, dimensions, steps)) {
return;
}
else {
BINARY_LOOP {
const npy_bool in1 = *(npy_bool *)ip1;
const npy_bool in2 = *(npy_bool *)ip2;
*((npy_bool *)op1) = in1 @OP@ in2;
}
}
}
}
/**end repeat**/
/**begin repeat
* #kind = absolute, logical_not#
* #OP = !=, ==#
**/
NPY_NO_EXPORT void
BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
if (run_unary_simd_@kind@_BOOL(args, dimensions, steps)) {
return;
}
else {
UNARY_LOOP {
npy_bool in1 = *(npy_bool *)ip1;
*((npy_bool *)op1) = in1 @OP@ 0;
}
}
}
/**end repeat**/
NPY_NO_EXPORT void
BOOL__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
OUTPUT_LOOP {
*((npy_bool *)op1) = 1;
}
}
/*
*****************************************************************************
** INTEGER LOOPS
*****************************************************************************
*/
/**begin repeat
* #TYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
* LONG, ULONG, LONGLONG, ULONGLONG#
* #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
* npy_long, npy_ulong, npy_longlong, npy_ulonglong#
* #ftype = npy_float, npy_float, npy_float, npy_float, npy_double, npy_double,
* npy_double, npy_double, npy_double, npy_double#
* #SIGNED = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0#
*/
#define @TYPE@_floor_divide @TYPE@_divide
#define @TYPE@_fmax @TYPE@_maximum
#define @TYPE@_fmin @TYPE@_minimum
NPY_NO_EXPORT void
@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
OUTPUT_LOOP {
*((@type@ *)op1) = 1;
}
}
NPY_NO_EXPORT void
@TYPE@_positive(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP_FAST(@type@, @type@, *out = +in);
}
/**begin repeat1
* #isa = , _avx2#
* #ISA = , AVX2#
* #CHK = 1, HAVE_ATTRIBUTE_TARGET_AVX2#
* #ATTR = , NPY_GCC_TARGET_AVX2#
*/
#if @CHK@
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
@TYPE@_square@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
UNARY_LOOP_FAST(@type@, @type@, *out = in * in);
}
#endif
#if @CHK@
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
@TYPE@_reciprocal@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
UNARY_LOOP_FAST(@type@, @type@, *out = 1.0 / in);
}
#endif
#if @CHK@
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
@TYPE@_conjugate@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP_FAST(@type@, @type@, *out = in);
}
#endif
#if @CHK@
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
@TYPE@_negative@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP_FAST(@type@, @type@, *out = -in);
}
#endif
#if @CHK@
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
@TYPE@_logical_not@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP_FAST(@type@, npy_bool, *out = !in);
}
#endif
#if @CHK@
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
@TYPE@_invert@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP_FAST(@type@, @type@, *out = ~in);
}
#endif
/**begin repeat2
* Arithmetic
* #kind = add, subtract, multiply, bitwise_and, bitwise_or, bitwise_xor,
* left_shift, right_shift#
* #OP = +, -,*, &, |, ^, <<, >>#
*/
#if @CHK@
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
@TYPE@_@kind@@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
if(IS_BINARY_REDUCE) {
BINARY_REDUCE_LOOP(@type@) {
io1 @OP@= *(@type@ *)ip2;
}
*((@type@ *)iop1) = io1;
}
else {
BINARY_LOOP_FAST(@type@, @type@, *out = in1 @OP@ in2);
}
}
#endif
/**end repeat2**/
/**begin repeat2
* #kind = equal, not_equal, greater, greater_equal, less, less_equal,
* logical_and, logical_or#
* #OP = ==, !=, >, >=, <, <=, &&, ||#
*/
#if @CHK@
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
@TYPE@_@kind@@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
/*
* gcc vectorization of this is not good (PR60575) but manual integer
* vectorization is too tedious to be worthwhile
*/
BINARY_LOOP_FAST(@type@, npy_bool, *out = in1 @OP@ in2);
}
#endif
/**end repeat2**/
#if @CHK@
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
@TYPE@_logical_xor@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const int t1 = !!*(@type@ *)ip1;
const int t2 = !!*(@type@ *)ip2;
*((npy_bool *)op1) = (t1 != t2);
}
}
#endif
/**end repeat1**/
/**begin repeat1
* #kind = maximum, minimum#
* #OP = >, <#
**/
NPY_NO_EXPORT void
@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
if (IS_BINARY_REDUCE) {
BINARY_REDUCE_LOOP(@type@) {
const @type@ in2 = *(@type@ *)ip2;
io1 = (io1 @OP@ in2) ? io1 : in2;
}
*((@type@ *)iop1) = io1;
}
else {
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
*((@type@ *)op1) = (in1 @OP@ in2) ? in1 : in2;
}
}
}
/**end repeat1**/
NPY_NO_EXPORT void
@TYPE@_power(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
@type@ in1 = *(@type@ *)ip1;
@type@ in2 = *(@type@ *)ip2;
@type@ out;
#if @SIGNED@
if (in2 < 0) {
NPY_ALLOW_C_API_DEF
NPY_ALLOW_C_API;
PyErr_SetString(PyExc_ValueError,
"Integers to negative integer powers are not allowed.");
NPY_DISABLE_C_API;
return;
}
#endif
if (in2 == 0) {
*((@type@ *)op1) = 1;
continue;
}
if (in1 == 1) {
*((@type@ *)op1) = 1;
continue;
}
out = in2 & 1 ? in1 : 1;
in2 >>= 1;
while (in2 > 0) {
in1 *= in1;
if (in2 & 1) {
out *= in1;
}
in2 >>= 1;
}
*((@type@ *) op1) = out;
}
}
NPY_NO_EXPORT void
@TYPE@_fmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
if (in2 == 0) {
npy_set_floatstatus_divbyzero();
*((@type@ *)op1) = 0;
}
else {
*((@type@ *)op1)= in1 % in2;
}
}
}
/**end repeat**/
/**begin repeat
* #TYPE = BYTE, SHORT, INT, LONG, LONGLONG#
* #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong#
*/
NPY_NO_EXPORT NPY_GCC_OPT_3 void
@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP_FAST(@type@, @type@, *out = (in >= 0) ? in : -in);
}
NPY_NO_EXPORT NPY_GCC_OPT_3 void
@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP_FAST(@type@, @type@, *out = in > 0 ? 1 : (in < 0 ? -1 : 0));
}
NPY_NO_EXPORT void
@TYPE@_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
/*
* FIXME: On x86 at least, dividing the smallest representable integer
* by -1 causes a SIFGPE (division overflow). We treat this case here
* (to avoid a SIGFPE crash at python level), but a good solution would
* be to treat integer division problems separately from FPU exceptions
* (i.e. a different approach than npy_set_floatstatus_divbyzero()).
*/
if (in2 == 0 || (in1 == NPY_MIN_@TYPE@ && in2 == -1)) {
npy_set_floatstatus_divbyzero();
*((@type@ *)op1) = 0;
}
else if (((in1 > 0) != (in2 > 0)) && (in1 % in2 != 0)) {
*((@type@ *)op1) = in1/in2 - 1;
}
else {
*((@type@ *)op1) = in1/in2;
}
}
}
NPY_NO_EXPORT void
@TYPE@_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
if (in2 == 0) {
npy_set_floatstatus_divbyzero();
*((@type@ *)op1) = 0;
}
else {
/* handle mixed case the way Python does */
const @type@ rem = in1 % in2;
if ((in1 > 0) == (in2 > 0) || rem == 0) {
*((@type@ *)op1) = rem;
}
else {
*((@type@ *)op1) = rem + in2;
}
}
}
}
NPY_NO_EXPORT void
@TYPE@_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP_TWO_OUT {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
/* see FIXME note for divide above */
if (in2 == 0 || (in1 == NPY_MIN_@TYPE@ && in2 == -1)) {
npy_set_floatstatus_divbyzero();
*((@type@ *)op1) = 0;
*((@type@ *)op2) = 0;
}
else {
/* handle mixed case the way Python does */
const @type@ quo = in1 / in2;
const @type@ rem = in1 % in2;
if ((in1 > 0) == (in2 > 0) || rem == 0) {
*((@type@ *)op1) = quo;
*((@type@ *)op2) = rem;
}
else {
*((@type@ *)op1) = quo - 1;
*((@type@ *)op2) = rem + in2;
}
}
}
}
/**end repeat**/
/**begin repeat
* #TYPE = UBYTE, USHORT, UINT, ULONG, ULONGLONG#
* #type = npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong#
*/
NPY_NO_EXPORT void
@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
*((@type@ *)op1) = in1;
}
}
NPY_NO_EXPORT NPY_GCC_OPT_3 void
@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP_FAST(@type@, @type@, *out = in > 0 ? 1 : 0);
}
NPY_NO_EXPORT void
@TYPE@_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
if (in2 == 0) {
npy_set_floatstatus_divbyzero();
*((@type@ *)op1) = 0;
}
else {
*((@type@ *)op1)= in1/in2;
}
}
}
NPY_NO_EXPORT void
@TYPE@_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
if (in2 == 0) {
npy_set_floatstatus_divbyzero();
*((@type@ *)op1) = 0;
}
else {
*((@type@ *)op1) = in1 % in2;
}
}
}
NPY_NO_EXPORT void
@TYPE@_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP_TWO_OUT {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
if (in2 == 0) {
npy_set_floatstatus_divbyzero();
*((@type@ *)op1) = 0;
*((@type@ *)op2) = 0;
}
else {
*((@type@ *)op1)= in1/in2;
*((@type@ *)op2) = in1 % in2;
}
}
}
/**end repeat**/
/*
*****************************************************************************
** DATETIME LOOPS **
*****************************************************************************
*/
NPY_NO_EXPORT void
TIMEDELTA_negative(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const npy_timedelta in1 = *(npy_timedelta *)ip1;
if (in1 == NPY_DATETIME_NAT) {
*((npy_timedelta *)op1) = NPY_DATETIME_NAT;
}
else {
*((npy_timedelta *)op1) = -in1;
}
}
}
NPY_NO_EXPORT void
TIMEDELTA_positive(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const npy_timedelta in1 = *(npy_timedelta *)ip1;
*((npy_timedelta *)op1) = +in1;
}
}
NPY_NO_EXPORT void
TIMEDELTA_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const npy_timedelta in1 = *(npy_timedelta *)ip1;
if (in1 == NPY_DATETIME_NAT) {
*((npy_timedelta *)op1) = NPY_DATETIME_NAT;
}
else {
*((npy_timedelta *)op1) = (in1 >= 0) ? in1 : -in1;
}
}
}
NPY_NO_EXPORT void
TIMEDELTA_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const npy_timedelta in1 = *(npy_timedelta *)ip1;
*((npy_timedelta *)op1) = in1 > 0 ? 1 : (in1 < 0 ? -1 : 0);
}
}
/**begin repeat
* #type = npy_datetime, npy_timedelta#
* #TYPE = DATETIME, TIMEDELTA#
*/
NPY_NO_EXPORT void
@TYPE@_isnat(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
*((npy_bool *)op1) = (in1 == NPY_DATETIME_NAT);
}
}
NPY_NO_EXPORT void
@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
OUTPUT_LOOP {
*((@type@ *)op1) = 1;
}
}
/**begin repeat1
* #kind = equal, greater, greater_equal, less, less_equal#
* #OP = ==, >, >=, <, <=#
*/
NPY_NO_EXPORT void
@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
npy_bool give_future_warning = 0;
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
const npy_bool res = in1 @OP@ in2;
*((npy_bool *)op1) = res;
if ((in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) && res) {
give_future_warning = 1;
}
}
if (give_future_warning) {
NPY_ALLOW_C_API_DEF
NPY_ALLOW_C_API;
/* 2016-01-18, 1.11 */
if (DEPRECATE_FUTUREWARNING(
"In the future, 'NAT @OP@ x' and 'x @OP@ NAT' "
"will always be False.") < 0) {
/* nothing to do, we return anyway */
}
NPY_DISABLE_C_API;
}
}
/**end repeat1**/
NPY_NO_EXPORT void
@TYPE@_not_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
npy_bool give_future_warning = 0;
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
*((npy_bool *)op1) = in1 != in2;
if (in1 == NPY_DATETIME_NAT && in2 == NPY_DATETIME_NAT) {
give_future_warning = 1;
}
}
if (give_future_warning) {
NPY_ALLOW_C_API_DEF
NPY_ALLOW_C_API;
/* 2016-01-18, 1.11 */
if (DEPRECATE_FUTUREWARNING(
"In the future, NAT != NAT will be True "
"rather than False.") < 0) {
/* nothing to do, we return anyway */
}
NPY_DISABLE_C_API;
}
}
/**begin repeat1
* #kind = maximum, minimum#
* #OP = >, <#
**/
NPY_NO_EXPORT void
@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
if (in1 == NPY_DATETIME_NAT) {
*((@type@ *)op1) = in2;
}
else if (in2 == NPY_DATETIME_NAT) {
*((@type@ *)op1) = in1;
}
else {
*((@type@ *)op1) = (in1 @OP@ in2) ? in1 : in2;
}
}
}
/**end repeat1**/
/**end repeat**/
NPY_NO_EXPORT void
DATETIME_Mm_M_add(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
BINARY_LOOP {
const npy_datetime in1 = *(npy_datetime *)ip1;
const npy_timedelta in2 = *(npy_timedelta *)ip2;
if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
*((npy_datetime *)op1) = NPY_DATETIME_NAT;
}
else {
*((npy_datetime *)op1) = in1 + in2;
}
}
}
NPY_NO_EXPORT void
DATETIME_mM_M_add(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const npy_timedelta in1 = *(npy_timedelta *)ip1;
const npy_datetime in2 = *(npy_datetime *)ip2;
if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
*((npy_datetime *)op1) = NPY_DATETIME_NAT;
}
else {
*((npy_datetime *)op1) = in1 + in2;
}
}
}
NPY_NO_EXPORT void
TIMEDELTA_mm_m_add(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const npy_timedelta in1 = *(npy_timedelta *)ip1;
const npy_timedelta in2 = *(npy_timedelta *)ip2;
if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
*((npy_timedelta *)op1) = NPY_DATETIME_NAT;
}
else {
*((npy_timedelta *)op1) = in1 + in2;
}
}
}
NPY_NO_EXPORT void
DATETIME_Mm_M_subtract(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const npy_datetime in1 = *(npy_datetime *)ip1;
const npy_timedelta in2 = *(npy_timedelta *)ip2;
if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
*((npy_datetime *)op1) = NPY_DATETIME_NAT;
}
else {
*((npy_datetime *)op1) = in1 - in2;
}
}
}
NPY_NO_EXPORT void
DATETIME_MM_m_subtract(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const npy_datetime in1 = *(npy_datetime *)ip1;
const npy_datetime in2 = *(npy_datetime *)ip2;
if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
*((npy_timedelta *)op1) = NPY_DATETIME_NAT;
}
else {
*((npy_timedelta *)op1) = in1 - in2;
}
}
}
NPY_NO_EXPORT void
TIMEDELTA_mm_m_subtract(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const npy_timedelta in1 = *(npy_timedelta *)ip1;
const npy_timedelta in2 = *(npy_timedelta *)ip2;
if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
*((npy_timedelta *)op1) = NPY_DATETIME_NAT;
}
else {
*((npy_timedelta *)op1) = in1 - in2;
}
}
}
/* Note: Assuming 'q' == NPY_LONGLONG */
NPY_NO_EXPORT void
TIMEDELTA_mq_m_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const npy_timedelta in1 = *(npy_timedelta *)ip1;
const npy_int64 in2 = *(npy_int64 *)ip2;
if (in1 == NPY_DATETIME_NAT) {
*((npy_timedelta *)op1) = NPY_DATETIME_NAT;
}
else {
*((npy_timedelta *)op1) = in1 * in2;
}
}
}
/* Note: Assuming 'q' == NPY_LONGLONG */
NPY_NO_EXPORT void
TIMEDELTA_qm_m_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const npy_int64 in1 = *(npy_int64 *)ip1;
const npy_timedelta in2 = *(npy_timedelta *)ip2;
if (in2 == NPY_DATETIME_NAT) {
*((npy_timedelta *)op1) = NPY_DATETIME_NAT;
}
else {
*((npy_timedelta *)op1) = in1 * in2;
}
}
}
NPY_NO_EXPORT void
TIMEDELTA_md_m_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const npy_timedelta in1 = *(npy_timedelta *)ip1;
const double in2 = *(double *)ip2;
if (in1 == NPY_DATETIME_NAT) {
*((npy_timedelta *)op1) = NPY_DATETIME_NAT;
}
else {
double result = in1 * in2;
if (npy_isfinite(result)) {
*((npy_timedelta *)op1) = (npy_timedelta)result;
}
else {
*((npy_timedelta *)op1) = NPY_DATETIME_NAT;
}
}
}
}
NPY_NO_EXPORT void
TIMEDELTA_dm_m_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const double in1 = *(double *)ip1;
const npy_timedelta in2 = *(npy_timedelta *)ip2;
if (in2 == NPY_DATETIME_NAT) {
*((npy_timedelta *)op1) = NPY_DATETIME_NAT;
}
else {
double result = in1 * in2;
if (npy_isfinite(result)) {
*((npy_timedelta *)op1) = (npy_timedelta)result;
}
else {
*((npy_timedelta *)op1) = NPY_DATETIME_NAT;
}
}
}
}
/* Note: Assuming 'q' == NPY_LONGLONG */
NPY_NO_EXPORT void
TIMEDELTA_mq_m_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const npy_timedelta in1 = *(npy_timedelta *)ip1;
const npy_int64 in2 = *(npy_int64 *)ip2;
if (in1 == NPY_DATETIME_NAT || in2 == 0) {
*((npy_timedelta *)op1) = NPY_DATETIME_NAT;
}
else {
*((npy_timedelta *)op1) = in1 / in2;
}
}
}
NPY_NO_EXPORT void
TIMEDELTA_md_m_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const npy_timedelta in1 = *(npy_timedelta *)ip1;
const double in2 = *(double *)ip2;
if (in1 == NPY_DATETIME_NAT) {
*((npy_timedelta *)op1) = NPY_DATETIME_NAT;
}
else {
double result = in1 / in2;
if (npy_isfinite(result)) {
*((npy_timedelta *)op1) = (npy_timedelta)result;
}
else {
*((npy_timedelta *)op1) = NPY_DATETIME_NAT;
}
}
}
}
NPY_NO_EXPORT void
TIMEDELTA_mm_d_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const npy_timedelta in1 = *(npy_timedelta *)ip1;
const npy_timedelta in2 = *(npy_timedelta *)ip2;
if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
*((double *)op1) = NPY_NAN;
}
else {
*((double *)op1) = (double)in1 / (double)in2;
}
}
}
/*
*****************************************************************************
** FLOAT LOOPS **
*****************************************************************************
*/
/**begin repeat
* Float types
* #type = npy_float, npy_double#
* #TYPE = FLOAT, DOUBLE#
* #scalarf = npy_sqrtf, npy_sqrt#
*/
NPY_NO_EXPORT void
@TYPE@_sqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
if (!run_unary_simd_sqrt_@TYPE@(args, dimensions, steps)) {
UNARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
*(@type@ *)op1 = @scalarf@(in1);
}
}
}
/**end repeat**/
/**begin repeat
* Float types
* #type = npy_float, npy_double, npy_longdouble, npy_float#
* #dtype = npy_float, npy_double, npy_longdouble, npy_half#
* #TYPE = FLOAT, DOUBLE, LONGDOUBLE, HALF#
* #c = f, , l, #
* #C = F, , L, #
* #trf = , , , npy_half_to_float#
*/
/*
* Pairwise summation, rounding error O(lg n) instead of O(n).
* The recursion depth is O(lg n) as well.
* when updating also update similar complex floats summation
*/
static @type@
pairwise_sum_@TYPE@(char *a, npy_uintp n, npy_intp stride)
{
if (n < 8) {
npy_intp i;
@type@ res = 0.;
for (i = 0; i < n; i++) {
res += @trf@(*((@dtype@*)(a + i * stride)));
}
return res;
}
else if (n <= PW_BLOCKSIZE) {
npy_intp i;
@type@ r[8], res;
/*
* sum a block with 8 accumulators
* 8 times unroll reduces blocksize to 16 and allows vectorization with
* avx without changing summation ordering
*/
r[0] = @trf@(*((@dtype@ *)(a + 0 * stride)));
r[1] = @trf@(*((@dtype@ *)(a + 1 * stride)));
r[2] = @trf@(*((@dtype@ *)(a + 2 * stride)));
r[3] = @trf@(*((@dtype@ *)(a + 3 * stride)));
r[4] = @trf@(*((@dtype@ *)(a + 4 * stride)));
r[5] = @trf@(*((@dtype@ *)(a + 5 * stride)));
r[6] = @trf@(*((@dtype@ *)(a + 6 * stride)));
r[7] = @trf@(*((@dtype@ *)(a + 7 * stride)));
for (i = 8; i < n - (n % 8); i += 8) {
/* small blocksizes seems to mess with hardware prefetch */
NPY_PREFETCH(a + (i + 512 / sizeof(@dtype@)) * stride, 0, 3);
r[0] += @trf@(*((@dtype@ *)(a + (i + 0) * stride)));
r[1] += @trf@(*((@dtype@ *)(a + (i + 1) * stride)));
r[2] += @trf@(*((@dtype@ *)(a + (i + 2) * stride)));
r[3] += @trf@(*((@dtype@ *)(a + (i + 3) * stride)));
r[4] += @trf@(*((@dtype@ *)(a + (i + 4) * stride)));
r[5] += @trf@(*((@dtype@ *)(a + (i + 5) * stride)));
r[6] += @trf@(*((@dtype@ *)(a + (i + 6) * stride)));
r[7] += @trf@(*((@dtype@ *)(a + (i + 7) * stride)));
}
/* accumulate now to avoid stack spills for single peel loop */
res = ((r[0] + r[1]) + (r[2] + r[3])) +
((r[4] + r[5]) + (r[6] + r[7]));
/* do non multiple of 8 rest */
for (; i < n; i++) {
res += @trf@(*((@dtype@ *)(a + i * stride)));
}
return res;
}
else {
/* divide by two but avoid non-multiples of unroll factor */
npy_uintp n2 = n / 2;
n2 -= n2 % 8;
return pairwise_sum_@TYPE@(a, n2, stride) +
pairwise_sum_@TYPE@(a + n2 * stride, n - n2, stride);
}
}
/**end repeat**/
/**begin repeat
* Float types
* #type = npy_float, npy_double, npy_longdouble#
* #TYPE = FLOAT, DOUBLE, LONGDOUBLE#
* #c = f, , l#
* #C = F, , L#
*/
/**begin repeat1
* Arithmetic
* # kind = add, subtract, multiply, divide#
* # OP = +, -, *, /#
* # PW = 1, 0, 0, 0#
*/
NPY_NO_EXPORT void
@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
if (IS_BINARY_REDUCE) {
#if @PW@
@type@ * iop1 = (@type@ *)args[0];
npy_intp n = dimensions[0];
*iop1 @OP@= pairwise_sum_@TYPE@(args[1], n, steps[1]);
#else
BINARY_REDUCE_LOOP(@type@) {
io1 @OP@= *(@type@ *)ip2;
}
*((@type@ *)iop1) = io1;
#endif
}
else if (!run_binary_simd_@kind@_@TYPE@(args, dimensions, steps)) {
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
*((@type@ *)op1) = in1 @OP@ in2;
}
}
}
/**end repeat1**/
/**begin repeat1
* #kind = equal, not_equal, less, less_equal, greater, greater_equal,
* logical_and, logical_or#
* #OP = ==, !=, <, <=, >, >=, &&, ||#
*/
NPY_NO_EXPORT void
@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
if (!run_binary_simd_@kind@_@TYPE@(args, dimensions, steps)) {
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
*((npy_bool *)op1) = in1 @OP@ in2;
}
}
}
/**end repeat1**/
NPY_NO_EXPORT void
@TYPE@_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const int t1 = !!*(@type@ *)ip1;
const int t2 = !!*(@type@ *)ip2;
*((npy_bool *)op1) = (t1 != t2);
}
}
NPY_NO_EXPORT void
@TYPE@_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
*((npy_bool *)op1) = !in1;
}
}
/**begin repeat1
* #kind = isnan, isinf, isfinite, signbit#
* #func = npy_isnan, npy_isinf, npy_isfinite, npy_signbit#
**/
NPY_NO_EXPORT void
@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
if (!run_@kind@_simd_@TYPE@(args, dimensions, steps)) {
UNARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
*((npy_bool *)op1) = @func@(in1) != 0;
}
}
npy_clear_floatstatus();
}
/**end repeat1**/
NPY_NO_EXPORT void
@TYPE@_spacing(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
*((@type@ *)op1) = npy_spacing@c@(in1);
}
}
NPY_NO_EXPORT void
@TYPE@_copysign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
*((@type@ *)op1)= npy_copysign@c@(in1, in2);
}
}
NPY_NO_EXPORT void
@TYPE@_nextafter(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
*((@type@ *)op1)= npy_nextafter@c@(in1, in2);
}
}
/**begin repeat1
* #kind = maximum, minimum#
* #OP = >=, <=#
**/
NPY_NO_EXPORT void
@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
/* */
if (IS_BINARY_REDUCE) {
if (!run_unary_reduce_simd_@kind@_@TYPE@(args, dimensions, steps)) {
BINARY_REDUCE_LOOP(@type@) {
const @type@ in2 = *(@type@ *)ip2;
io1 = (io1 @OP@ in2 || npy_isnan(io1)) ? io1 : in2;
}
*((@type@ *)iop1) = io1;
}
}
else {
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
*((@type@ *)op1) = (in1 @OP@ in2 || npy_isnan(in1)) ? in1 : in2;
}
}
}
/**end repeat1**/
/**begin repeat1
* #kind = fmax, fmin#
* #OP = >=, <=#
**/
NPY_NO_EXPORT void
@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
/* */
if (IS_BINARY_REDUCE) {
BINARY_REDUCE_LOOP(@type@) {
const @type@ in2 = *(@type@ *)ip2;
io1 = (io1 @OP@ in2 || npy_isnan(in2)) ? io1 : in2;
}
*((@type@ *)iop1) = io1;
}
else {
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
*((@type@ *)op1) = (in1 @OP@ in2 || npy_isnan(in2)) ? in1 : in2;
}
}
npy_clear_floatstatus();
}
/**end repeat1**/
NPY_NO_EXPORT void
@TYPE@_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
@type@ mod;
*((@type@ *)op1) = npy_divmod@c@(in1, in2, &mod);
}
}
NPY_NO_EXPORT void
@TYPE@_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
npy_divmod@c@(in1, in2, (@type@ *)op1);
}
}
NPY_NO_EXPORT void
@TYPE@_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP_TWO_OUT {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
*((@type@ *)op1) = npy_divmod@c@(in1, in2, (@type@ *)op2);
}
}
NPY_NO_EXPORT void
@TYPE@_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
char * margs[] = {args[0], args[0], args[1]};
npy_intp msteps[] = {steps[0], steps[0], steps[1]};
if (!run_binary_simd_multiply_@TYPE@(margs, dimensions, msteps)) {
UNARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
*((@type@ *)op1) = in1*in1;
}
}
}
NPY_NO_EXPORT void
@TYPE@_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
@type@ one = 1.@c@;
char * margs[] = {(char*)&one, args[0], args[1]};
npy_intp msteps[] = {0, steps[0], steps[1]};
if (!run_binary_simd_divide_@TYPE@(margs, dimensions, msteps)) {
UNARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
*((@type@ *)op1) = 1/in1;
}
}
}
NPY_NO_EXPORT void
@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
OUTPUT_LOOP {
*((@type@ *)op1) = 1;
}
}
NPY_NO_EXPORT void
@TYPE@_conjugate(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
*((@type@ *)op1) = in1;
}
}
NPY_NO_EXPORT void
@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
if (!run_unary_simd_absolute_@TYPE@(args, dimensions, steps)) {
UNARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ tmp = in1 > 0 ? in1 : -in1;
/* add 0 to clear -0.0 */
*((@type@ *)op1) = tmp + 0;
}
}
npy_clear_floatstatus();
}
NPY_NO_EXPORT void
@TYPE@_negative(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
if (!run_unary_simd_negative_@TYPE@(args, dimensions, steps)) {
UNARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
*((@type@ *)op1) = -in1;
}
}
}
NPY_NO_EXPORT void
@TYPE@_positive(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
*((@type@ *)op1) = +in1;
}
}
NPY_NO_EXPORT void
@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
/* Sign of nan is nan */
UNARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
*((@type@ *)op1) = in1 > 0 ? 1 : (in1 < 0 ? -1 : (in1 == 0 ? 0 : in1));
}
}
NPY_NO_EXPORT void
@TYPE@_modf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP_TWO_OUT {
const @type@ in1 = *(@type@ *)ip1;
*((@type@ *)op1) = npy_modf@c@(in1, (@type@ *)op2);
}
}
NPY_NO_EXPORT void
@TYPE@_frexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP_TWO_OUT {
const @type@ in1 = *(@type@ *)ip1;
*((@type@ *)op1) = npy_frexp@c@(in1, (int *)op2);
}
}
NPY_NO_EXPORT void
@TYPE@_ldexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const int in2 = *(int *)ip2;
*((@type@ *)op1) = npy_ldexp@c@(in1, in2);
}
}
NPY_NO_EXPORT void
@TYPE@_ldexp_long(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
/*
* Additional loop to handle npy_long integer inputs (cf. #866, #1633).
* npy_long != npy_int on many 64-bit platforms, so we need this second loop
* to handle the default integer type.
*/
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const long in2 = *(long *)ip2;
if (((int)in2) == in2) {
/* Range OK */
*((@type@ *)op1) = npy_ldexp@c@(in1, ((int)in2));
}
else {
/*
* Outside npy_int range -- also ldexp will overflow in this case,
* given that exponent has less bits than npy_int.
*/
if (in2 > 0) {
*((@type@ *)op1) = npy_ldexp@c@(in1, NPY_MAX_INT);
}
else {
*((@type@ *)op1) = npy_ldexp@c@(in1, NPY_MIN_INT);
}
}
}
}
#define @TYPE@_true_divide @TYPE@_divide
/**end repeat**/
/*
*****************************************************************************
** HALF-FLOAT LOOPS **
*****************************************************************************
*/
/**begin repeat
* Arithmetic
* # kind = add, subtract, multiply, divide#
* # OP = +, -, *, /#
* # PW = 1, 0, 0, 0#
*/
NPY_NO_EXPORT void
HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
if (IS_BINARY_REDUCE) {
char *iop1 = args[0];
float io1 = npy_half_to_float(*(npy_half *)iop1);
#if @PW@
npy_intp n = dimensions[0];
io1 @OP@= pairwise_sum_HALF(args[1], n, steps[1]);
#else
BINARY_REDUCE_LOOP_INNER {
io1 @OP@= npy_half_to_float(*(npy_half *)ip2);
}
#endif
*((npy_half *)iop1) = npy_float_to_half(io1);
}
else {
BINARY_LOOP {
const float in1 = npy_half_to_float(*(npy_half *)ip1);
const float in2 = npy_half_to_float(*(npy_half *)ip2);
*((npy_half *)op1) = npy_float_to_half(in1 @OP@ in2);
}
}
}
/**end repeat**/
#define _HALF_LOGICAL_AND(a,b) (!npy_half_iszero(a) && !npy_half_iszero(b))
#define _HALF_LOGICAL_OR(a,b) (!npy_half_iszero(a) || !npy_half_iszero(b))
/**begin repeat
* #kind = equal, not_equal, less, less_equal, greater,
* greater_equal, logical_and, logical_or#
* #OP = npy_half_eq, npy_half_ne, npy_half_lt, npy_half_le, npy_half_gt,
* npy_half_ge, _HALF_LOGICAL_AND, _HALF_LOGICAL_OR#
*/
NPY_NO_EXPORT void
HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const npy_half in1 = *(npy_half *)ip1;
const npy_half in2 = *(npy_half *)ip2;
*((npy_bool *)op1) = @OP@(in1, in2);
}
}
/**end repeat**/
#undef _HALF_LOGICAL_AND
#undef _HALF_LOGICAL_OR
NPY_NO_EXPORT void
HALF_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const int in1 = !npy_half_iszero(*(npy_half *)ip1);
const int in2 = !npy_half_iszero(*(npy_half *)ip2);
*((npy_bool *)op1) = (in1 != in2);
}
}
NPY_NO_EXPORT void
HALF_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const npy_half in1 = *(npy_half *)ip1;
*((npy_bool *)op1) = npy_half_iszero(in1);
}
}
/**begin repeat
* #kind = isnan, isinf, isfinite, signbit#
* #func = npy_half_isnan, npy_half_isinf, npy_half_isfinite, npy_half_signbit#
**/
NPY_NO_EXPORT void
HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const npy_half in1 = *(npy_half *)ip1;
*((npy_bool *)op1) = @func@(in1) != 0;
}
npy_clear_floatstatus();
}
/**end repeat**/
NPY_NO_EXPORT void
HALF_spacing(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const npy_half in1 = *(npy_half *)ip1;
*((npy_half *)op1) = npy_half_spacing(in1);
}
}
NPY_NO_EXPORT void
HALF_copysign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const npy_half in1 = *(npy_half *)ip1;
const npy_half in2 = *(npy_half *)ip2;
*((npy_half *)op1)= npy_half_copysign(in1, in2);
}
}
NPY_NO_EXPORT void
HALF_nextafter(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const npy_half in1 = *(npy_half *)ip1;
const npy_half in2 = *(npy_half *)ip2;
*((npy_half *)op1)= npy_half_nextafter(in1, in2);
}
}
/**begin repeat
* #kind = maximum, minimum#
* #OP = npy_half_ge, npy_half_le#
**/
NPY_NO_EXPORT void
HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
/* */
BINARY_LOOP {
const npy_half in1 = *(npy_half *)ip1;
const npy_half in2 = *(npy_half *)ip2;
*((npy_half *)op1) = (@OP@(in1, in2) || npy_half_isnan(in1)) ? in1 : in2;
}
}
/**end repeat**/
/**begin repeat
* #kind = fmax, fmin#
* #OP = npy_half_ge, npy_half_le#
**/
NPY_NO_EXPORT void
HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
/* */
BINARY_LOOP {
const npy_half in1 = *(npy_half *)ip1;
const npy_half in2 = *(npy_half *)ip2;
*((npy_half *)op1) = (@OP@(in1, in2) || npy_half_isnan(in2)) ? in1 : in2;
}
npy_clear_floatstatus();
}
/**end repeat**/
NPY_NO_EXPORT void
HALF_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const npy_half in1 = *(npy_half *)ip1;
const npy_half in2 = *(npy_half *)ip2;
npy_half mod;
*((npy_half *)op1) = npy_half_divmod(in1, in2, &mod);
}
}
NPY_NO_EXPORT void
HALF_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const npy_half in1 = *(npy_half *)ip1;
const npy_half in2 = *(npy_half *)ip2;
npy_half_divmod(in1, in2, (npy_half *)op1);
}
}
NPY_NO_EXPORT void
HALF_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP_TWO_OUT {
const npy_half in1 = *(npy_half *)ip1;
const npy_half in2 = *(npy_half *)ip2;
*((npy_half *)op1) = npy_half_divmod(in1, in2, (npy_half *)op2);
}
}
NPY_NO_EXPORT void
HALF_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
UNARY_LOOP {
const float in1 = npy_half_to_float(*(npy_half *)ip1);
*((npy_half *)op1) = npy_float_to_half(in1*in1);
}
}
NPY_NO_EXPORT void
HALF_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
UNARY_LOOP {
const float in1 = npy_half_to_float(*(npy_half *)ip1);
*((npy_half *)op1) = npy_float_to_half(1/in1);
}
}
NPY_NO_EXPORT void
HALF__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
OUTPUT_LOOP {
*((npy_half *)op1) = NPY_HALF_ONE;
}
}
NPY_NO_EXPORT void
HALF_conjugate(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const npy_half in1 = *(npy_half *)ip1;
*((npy_half *)op1) = in1;
}
}
NPY_NO_EXPORT void
HALF_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const npy_half in1 = *(npy_half *)ip1;
*((npy_half *)op1) = in1&0x7fffu;
}
}
NPY_NO_EXPORT void
HALF_negative(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const npy_half in1 = *(npy_half *)ip1;
*((npy_half *)op1) = in1^0x8000u;
}
}
NPY_NO_EXPORT void
HALF_positive(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const npy_half in1 = *(npy_half *)ip1;
*((npy_half *)op1) = +in1;
}
}
NPY_NO_EXPORT void
HALF_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
/* Sign of nan is nan */
UNARY_LOOP {
const npy_half in1 = *(npy_half *)ip1;
*((npy_half *)op1) = npy_half_isnan(in1) ? in1 :
(((in1&0x7fffu) == 0) ? 0 :
(((in1&0x8000u) == 0) ? NPY_HALF_ONE : NPY_HALF_NEGONE));
}
}
NPY_NO_EXPORT void
HALF_modf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
float temp;
UNARY_LOOP_TWO_OUT {
const float in1 = npy_half_to_float(*(npy_half *)ip1);
*((npy_half *)op1) = npy_float_to_half(npy_modff(in1, &temp));
*((npy_half *)op2) = npy_float_to_half(temp);
}
}
NPY_NO_EXPORT void
HALF_frexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP_TWO_OUT {
const float in1 = npy_half_to_float(*(npy_half *)ip1);
*((npy_half *)op1) = npy_float_to_half(npy_frexpf(in1, (int *)op2));
}
}
NPY_NO_EXPORT void
HALF_ldexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const float in1 = npy_half_to_float(*(npy_half *)ip1);
const int in2 = *(int *)ip2;
*((npy_half *)op1) = npy_float_to_half(npy_ldexpf(in1, in2));
}
}
NPY_NO_EXPORT void
HALF_ldexp_long(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
/*
* Additional loop to handle npy_long integer inputs (cf. #866, #1633).
* npy_long != npy_int on many 64-bit platforms, so we need this second loop
* to handle the default integer type.
*/
BINARY_LOOP {
const float in1 = npy_half_to_float(*(npy_half *)ip1);
const long in2 = *(long *)ip2;
if (((int)in2) == in2) {
/* Range OK */
*((npy_half *)op1) = npy_float_to_half(npy_ldexpf(in1, ((int)in2)));
}
else {
/*
* Outside npy_int range -- also ldexp will overflow in this case,
* given that exponent has less bits than npy_int.
*/
if (in2 > 0) {
*((npy_half *)op1) = npy_float_to_half(npy_ldexpf(in1, NPY_MAX_INT));
}
else {
*((npy_half *)op1) = npy_float_to_half(npy_ldexpf(in1, NPY_MIN_INT));
}
}
}
}
#define HALF_true_divide HALF_divide
/*
*****************************************************************************
** COMPLEX LOOPS **
*****************************************************************************
*/
#define CGE(xr,xi,yr,yi) ((xr > yr && !npy_isnan(xi) && !npy_isnan(yi)) \
|| (xr == yr && xi >= yi))
#define CLE(xr,xi,yr,yi) ((xr < yr && !npy_isnan(xi) && !npy_isnan(yi)) \
|| (xr == yr && xi <= yi))
#define CGT(xr,xi,yr,yi) ((xr > yr && !npy_isnan(xi) && !npy_isnan(yi)) \
|| (xr == yr && xi > yi))
#define CLT(xr,xi,yr,yi) ((xr < yr && !npy_isnan(xi) && !npy_isnan(yi)) \
|| (xr == yr && xi < yi))
#define CEQ(xr,xi,yr,yi) (xr == yr && xi == yi)
#define CNE(xr,xi,yr,yi) (xr != yr || xi != yi)
/**begin repeat
* complex types
* #TYPE = CFLOAT, CDOUBLE, CLONGDOUBLE#
* #ftype = npy_float, npy_double, npy_longdouble#
* #c = f, , l#
* #C = F, , L#
*/
/* similar to pairwise sum of real floats */
static void
pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_uintp n,
npy_intp stride)
{
assert(n % 2 == 0);
if (n < 8) {
npy_intp i;
*rr = 0.;
*ri = 0.;
for (i = 0; i < n; i += 2) {
*rr += *((@ftype@ *)(a + i * stride + 0));
*ri += *((@ftype@ *)(a + i * stride + sizeof(@ftype@)));
}
return;
}
else if (n <= PW_BLOCKSIZE) {
npy_intp i;
@ftype@ r[8];
/*
* sum a block with 8 accumulators
* 8 times unroll reduces blocksize to 16 and allows vectorization with
* avx without changing summation ordering
*/
r[0] = *((@ftype@ *)(a + 0 * stride));
r[1] = *((@ftype@ *)(a + 0 * stride + sizeof(@ftype@)));
r[2] = *((@ftype@ *)(a + 2 * stride));
r[3] = *((@ftype@ *)(a + 2 * stride + sizeof(@ftype@)));
r[4] = *((@ftype@ *)(a + 4 * stride));
r[5] = *((@ftype@ *)(a + 4 * stride + sizeof(@ftype@)));
r[6] = *((@ftype@ *)(a + 6 * stride));
r[7] = *((@ftype@ *)(a + 6 * stride + sizeof(@ftype@)));
for (i = 8; i < n - (n % 8); i += 8) {
/* small blocksizes seems to mess with hardware prefetch */
NPY_PREFETCH(a + (i + 512 / sizeof(@ftype@)) * stride, 0, 3);
r[0] += *((@ftype@ *)(a + (i + 0) * stride));
r[1] += *((@ftype@ *)(a + (i + 0) * stride + sizeof(@ftype@)));
r[2] += *((@ftype@ *)(a + (i + 2) * stride));
r[3] += *((@ftype@ *)(a + (i + 2) * stride + sizeof(@ftype@)));
r[4] += *((@ftype@ *)(a + (i + 4) * stride));
r[5] += *((@ftype@ *)(a + (i + 4) * stride + sizeof(@ftype@)));
r[6] += *((@ftype@ *)(a + (i + 6) * stride));
r[7] += *((@ftype@ *)(a + (i + 6) * stride + sizeof(@ftype@)));
}
/* accumulate now to avoid stack spills for single peel loop */
*rr = ((r[0] + r[2]) + (r[4] + r[6]));
*ri = ((r[1] + r[3]) + (r[5] + r[7]));
/* do non multiple of 8 rest */
for (; i < n; i+=2) {
*rr += *((@ftype@ *)(a + i * stride + 0));
*ri += *((@ftype@ *)(a + i * stride + sizeof(@ftype@)));
}
return;
}
else {
/* divide by two but avoid non-multiples of unroll factor */
@ftype@ rr1, ri1, rr2, ri2;
npy_uintp n2 = n / 2;
n2 -= n2 % 8;
pairwise_sum_@TYPE@(&rr1, &ri1, a, n2, stride);
pairwise_sum_@TYPE@(&rr2, &ri2, a + n2 * stride, n - n2, stride);
*rr = rr1 + rr2;
*ri = ri1 + ri2;
return;
}
}
/**begin repeat1
* arithmetic
* #kind = add, subtract#
* #OP = +, -#
* #PW = 1, 0#
*/
NPY_NO_EXPORT void
@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
if (IS_BINARY_REDUCE && @PW@) {
npy_intp n = dimensions[0];
@ftype@ * or = ((@ftype@ *)args[0]);
@ftype@ * oi = ((@ftype@ *)args[0]) + 1;
@ftype@ rr, ri;
pairwise_sum_@TYPE@(&rr, &ri, args[1], n * 2, steps[1] / 2);
*or @OP@= rr;
*oi @OP@= ri;
return;
}
else {
BINARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
const @ftype@ in2r = ((@ftype@ *)ip2)[0];
const @ftype@ in2i = ((@ftype@ *)ip2)[1];
((@ftype@ *)op1)[0] = in1r @OP@ in2r;
((@ftype@ *)op1)[1] = in1i @OP@ in2i;
}
}
}
/**end repeat1**/
NPY_NO_EXPORT void
@TYPE@_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
const @ftype@ in2r = ((@ftype@ *)ip2)[0];
const @ftype@ in2i = ((@ftype@ *)ip2)[1];
((@ftype@ *)op1)[0] = in1r*in2r - in1i*in2i;
((@ftype@ *)op1)[1] = in1r*in2i + in1i*in2r;
}
}
NPY_NO_EXPORT void
@TYPE@_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
const @ftype@ in2r = ((@ftype@ *)ip2)[0];
const @ftype@ in2i = ((@ftype@ *)ip2)[1];
const @ftype@ in2r_abs = npy_fabs@c@(in2r);
const @ftype@ in2i_abs = npy_fabs@c@(in2i);
if (in2r_abs >= in2i_abs) {
if (in2r_abs == 0 && in2i_abs == 0) {
/* divide by zero should yield a complex inf or nan */
((@ftype@ *)op1)[0] = in1r/in2r_abs;
((@ftype@ *)op1)[1] = in1i/in2i_abs;
}
else {
const @ftype@ rat = in2i/in2r;
const @ftype@ scl = 1.0@c@/(in2r + in2i*rat);
((@ftype@ *)op1)[0] = (in1r + in1i*rat)*scl;
((@ftype@ *)op1)[1] = (in1i - in1r*rat)*scl;
}
}
else {
const @ftype@ rat = in2r/in2i;
const @ftype@ scl = 1.0@c@/(in2i + in2r*rat);
((@ftype@ *)op1)[0] = (in1r*rat + in1i)*scl;
((@ftype@ *)op1)[1] = (in1i*rat - in1r)*scl;
}
}
}
NPY_NO_EXPORT void
@TYPE@_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
const @ftype@ in2r = ((@ftype@ *)ip2)[0];
const @ftype@ in2i = ((@ftype@ *)ip2)[1];
if (npy_fabs@c@(in2r) >= npy_fabs@c@(in2i)) {
const @ftype@ rat = in2i/in2r;
((@ftype@ *)op1)[0] = npy_floor@c@((in1r + in1i*rat)/(in2r + in2i*rat));
((@ftype@ *)op1)[1] = 0;
}
else {
const @ftype@ rat = in2r/in2i;
((@ftype@ *)op1)[0] = npy_floor@c@((in1r*rat + in1i)/(in2i + in2r*rat));
((@ftype@ *)op1)[1] = 0;
}
}
}
/**begin repeat1
* #kind= greater, greater_equal, less, less_equal, equal, not_equal#
* #OP = CGT, CGE, CLT, CLE, CEQ, CNE#
*/
NPY_NO_EXPORT void
@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
const @ftype@ in2r = ((@ftype@ *)ip2)[0];
const @ftype@ in2i = ((@ftype@ *)ip2)[1];
*((npy_bool *)op1) = @OP@(in1r,in1i,in2r,in2i);
}
}
/**end repeat1**/
/**begin repeat1
#kind = logical_and, logical_or#
#OP1 = ||, ||#
#OP2 = &&, ||#
*/
NPY_NO_EXPORT void
@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
const @ftype@ in2r = ((@ftype@ *)ip2)[0];
const @ftype@ in2i = ((@ftype@ *)ip2)[1];
*((npy_bool *)op1) = (in1r @OP1@ in1i) @OP2@ (in2r @OP1@ in2i);
}
}
/**end repeat1**/
NPY_NO_EXPORT void
@TYPE@_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
const @ftype@ in2r = ((@ftype@ *)ip2)[0];
const @ftype@ in2i = ((@ftype@ *)ip2)[1];
const npy_bool tmp1 = (in1r || in1i);
const npy_bool tmp2 = (in2r || in2i);
*((npy_bool *)op1) = tmp1 != tmp2;
}
}
NPY_NO_EXPORT void
@TYPE@_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
*((npy_bool *)op1) = !(in1r || in1i);
}
}
/**begin repeat1
* #kind = isnan, isinf, isfinite#
* #func = npy_isnan, npy_isinf, npy_isfinite#
* #OP = ||, ||, &&#
**/
NPY_NO_EXPORT void
@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
*((npy_bool *)op1) = @func@(in1r) @OP@ @func@(in1i);
}
npy_clear_floatstatus();
}
/**end repeat1**/
NPY_NO_EXPORT void
@TYPE@_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
UNARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
((@ftype@ *)op1)[0] = in1r*in1r - in1i*in1i;
((@ftype@ *)op1)[1] = in1r*in1i + in1i*in1r;
}
}
NPY_NO_EXPORT void
@TYPE@_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
UNARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
if (npy_fabs@c@(in1i) <= npy_fabs@c@(in1r)) {
const @ftype@ r = in1i/in1r;
const @ftype@ d = in1r + in1i*r;
((@ftype@ *)op1)[0] = 1/d;
((@ftype@ *)op1)[1] = -r/d;
} else {
const @ftype@ r = in1r/in1i;
const @ftype@ d = in1r*r + in1i;
((@ftype@ *)op1)[0] = r/d;
((@ftype@ *)op1)[1] = -1/d;
}
}
}
NPY_NO_EXPORT void
@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
OUTPUT_LOOP {
((@ftype@ *)op1)[0] = 1;
((@ftype@ *)op1)[1] = 0;
}
}
NPY_NO_EXPORT void
@TYPE@_conjugate(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) {
UNARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
((@ftype@ *)op1)[0] = in1r;
((@ftype@ *)op1)[1] = -in1i;
}
}
NPY_NO_EXPORT void
@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
*((@ftype@ *)op1) = npy_hypot@c@(in1r, in1i);
}
}
NPY_NO_EXPORT void
@TYPE@__arg(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
*((@ftype@ *)op1) = npy_atan2@c@(in1i, in1r);
}
}
NPY_NO_EXPORT void
@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
/* fixme: sign of nan is currently 0 */
UNARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
((@ftype@ *)op1)[0] = CGT(in1r, in1i, 0.0, 0.0) ? 1 :
(CLT(in1r, in1i, 0.0, 0.0) ? -1 :
(CEQ(in1r, in1i, 0.0, 0.0) ? 0 : NPY_NAN@C@));
((@ftype@ *)op1)[1] = 0;
}
}
/**begin repeat1
* #kind = maximum, minimum#
* #OP = CGE, CLE#
*/
NPY_NO_EXPORT void
@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
const @ftype@ in2r = ((@ftype@ *)ip2)[0];
const @ftype@ in2i = ((@ftype@ *)ip2)[1];
if (@OP@(in1r, in1i, in2r, in2i) || npy_isnan(in1r) || npy_isnan(in1i)) {
((@ftype@ *)op1)[0] = in1r;
((@ftype@ *)op1)[1] = in1i;
}
else {
((@ftype@ *)op1)[0] = in2r;
((@ftype@ *)op1)[1] = in2i;
}
}
npy_clear_floatstatus();
}
/**end repeat1**/
/**begin repeat1
* #kind = fmax, fmin#
* #OP = CGE, CLE#
*/
NPY_NO_EXPORT void
@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
const @ftype@ in2r = ((@ftype@ *)ip2)[0];
const @ftype@ in2i = ((@ftype@ *)ip2)[1];
if (@OP@(in1r, in1i, in2r, in2i) || npy_isnan(in2r) || npy_isnan(in2i)) {
((@ftype@ *)op1)[0] = in1r;
((@ftype@ *)op1)[1] = in1i;
}
else {
((@ftype@ *)op1)[0] = in2r;
((@ftype@ *)op1)[1] = in2i;
}
}
}
/**end repeat1**/
#define @TYPE@_true_divide @TYPE@_divide
/**end repeat**/
#undef CGE
#undef CLE
#undef CGT
#undef CLT
#undef CEQ
#undef CNE
/*
*****************************************************************************
** OBJECT LOOPS **
*****************************************************************************
*/
/**begin repeat
* #kind = equal, not_equal, greater, greater_equal, less, less_equal#
* #OP = EQ, NE, GT, GE, LT, LE#
* #identity = NPY_TRUE, NPY_FALSE, -1*4#
*/
NPY_NO_EXPORT void
OBJECT_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) {
BINARY_LOOP {
int ret;
PyObject *ret_obj;
PyObject *in1 = *(PyObject **)ip1;
PyObject *in2 = *(PyObject **)ip2;
in1 = in1 ? in1 : Py_None;
in2 = in2 ? in2 : Py_None;
/*
* Do not use RichCompareBool because it includes an identity check for
* == and !=. This is wrong for elementwise behaviour, since it means
* that NaN can be equal to NaN and an array is equal to itself.
*/
ret_obj = PyObject_RichCompare(in1, in2, Py_@OP@);
if (ret_obj == NULL) {
return;
}
ret = PyObject_IsTrue(ret_obj);
Py_DECREF(ret_obj);
if (ret == -1) {
return;
}
*((npy_bool *)op1) = (npy_bool)ret;
}
}
/**end repeat**/
NPY_NO_EXPORT void
OBJECT_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
PyObject *zero = PyLong_FromLong(0);
UNARY_LOOP {
PyObject *in1 = *(PyObject **)ip1;
PyObject **out = (PyObject **)op1;
PyObject *ret = NULL;
int v;
if (in1 == NULL) {
in1 = Py_None;
}
if ((v = PyObject_RichCompareBool(in1, zero, Py_LT)) == 1) {
ret = PyLong_FromLong(-1);
}
else if (v == 0 &&
(v = PyObject_RichCompareBool(in1, zero, Py_GT)) == 1) {
ret = PyLong_FromLong(1);
}
else if (v == 0 &&
(v = PyObject_RichCompareBool(in1, zero, Py_EQ)) == 1) {
ret = PyLong_FromLong(0);
}
else if (v == 0) {
/* in1 is NaN */
PyErr_SetString(PyExc_TypeError,
"unorderable types for comparison");
}
if (ret == NULL) {
break;
}
Py_XDECREF(*out);
*out = ret;
}
Py_XDECREF(zero);
}
/*
*****************************************************************************
** END LOOPS **
*****************************************************************************
*/