diff --git a/numpy/core/include/numpy/npy_cpu.h b/numpy/core/include/numpy/npy_cpu.h index 1109426..84653ea 100644 --- a/numpy/core/include/numpy/npy_cpu.h +++ b/numpy/core/include/numpy/npy_cpu.h @@ -17,7 +17,6 @@ * NPY_CPU_SH_BE * NPY_CPU_ARCEL * NPY_CPU_ARCEB - * NPY_CPU_RISCV64 */ #ifndef _NPY_CPUARCH_H_ #define _NPY_CPUARCH_H_ @@ -61,27 +60,10 @@ #define NPY_CPU_HPPA #elif defined(__alpha__) #define NPY_CPU_ALPHA -#elif defined(__arm__) || defined(__aarch64__) - #if defined(__ARMEB__) || defined(__AARCH64EB__) - #if defined(__ARM_32BIT_STATE) - #define NPY_CPU_ARMEB_AARCH32 - #elif defined(__ARM_64BIT_STATE) - #define NPY_CPU_ARMEB_AARCH64 - #else - #define NPY_CPU_ARMEB - #endif - #elif defined(__ARMEL__) || defined(__AARCH64EL__) - #if defined(__ARM_32BIT_STATE) - #define NPY_CPU_ARMEL_AARCH32 - #elif defined(__ARM_64BIT_STATE) - #define NPY_CPU_ARMEL_AARCH64 - #else - #define NPY_CPU_ARMEL - #endif - #else - # error Unknown ARM CPU, please report this to numpy maintainers with \ - information about your platform (OS, CPU and compiler) - #endif +#elif defined(__arm__) && defined(__ARMEL__) + #define NPY_CPU_ARMEL +#elif defined(__arm__) && defined(__ARMEB__) + #define NPY_CPU_ARMEB #elif defined(__sh__) && defined(__LITTLE_ENDIAN__) #define NPY_CPU_SH_LE #elif defined(__sh__) && defined(__BIG_ENDIAN__) @@ -92,14 +74,14 @@ #define NPY_CPU_MIPSEB #elif defined(__or1k__) #define NPY_CPU_OR1K +#elif defined(__aarch64__) + #define NPY_CPU_AARCH64 #elif defined(__mc68000__) #define NPY_CPU_M68K #elif defined(__arc__) && defined(__LITTLE_ENDIAN__) #define NPY_CPU_ARCEL #elif defined(__arc__) && defined(__BIG_ENDIAN__) #define NPY_CPU_ARCEB -#elif defined(__riscv) && defined(__riscv_xlen) && __riscv_xlen == 64 - #define NPY_CPU_RISCV64 #else #error Unknown CPU, please report this to numpy maintainers with \ information about your platform (OS, CPU and compiler) diff --git a/numpy/core/include/numpy/npy_endian.h b/numpy/core/include/numpy/npy_endian.h index 44cdffd..1a42121 100644 --- a/numpy/core/include/numpy/npy_endian.h +++ b/numpy/core/include/numpy/npy_endian.h @@ -37,31 +37,27 @@ #define NPY_LITTLE_ENDIAN 1234 #define NPY_BIG_ENDIAN 4321 - #if defined(NPY_CPU_X86) \ - || defined(NPY_CPU_AMD64) \ - || defined(NPY_CPU_IA64) \ - || defined(NPY_CPU_ALPHA) \ - || defined(NPY_CPU_ARMEL) \ - || defined(NPY_CPU_ARMEL_AARCH32) \ - || defined(NPY_CPU_ARMEL_AARCH64) \ - || defined(NPY_CPU_SH_LE) \ - || defined(NPY_CPU_MIPSEL) \ - || defined(NPY_CPU_PPC64LE) \ - || defined(NPY_CPU_ARCEL) \ - || defined(NPY_CPU_RISCV64) + #if defined(NPY_CPU_X86) \ + || defined(NPY_CPU_AMD64) \ + || defined(NPY_CPU_IA64) \ + || defined(NPY_CPU_ALPHA) \ + || defined(NPY_CPU_ARMEL) \ + || defined(NPY_CPU_AARCH64) \ + || defined(NPY_CPU_SH_LE) \ + || defined(NPY_CPU_MIPSEL) \ + || defined(NPY_CPU_PPC64LE) \ + || defined(NPY_CPU_ARCEL) #define NPY_BYTE_ORDER NPY_LITTLE_ENDIAN - #elif defined(NPY_CPU_PPC) \ - || defined(NPY_CPU_SPARC) \ - || defined(NPY_CPU_S390) \ - || defined(NPY_CPU_HPPA) \ - || defined(NPY_CPU_PPC64) \ - || defined(NPY_CPU_ARMEB) \ - || defined(NPY_CPU_ARMEB_AARCH32) \ - || defined(NPY_CPU_ARMEB_AARCH64) \ - || defined(NPY_CPU_SH_BE) \ - || defined(NPY_CPU_MIPSEB) \ - || defined(NPY_CPU_OR1K) \ - || defined(NPY_CPU_M68K) \ + #elif defined(NPY_CPU_PPC) \ + || defined(NPY_CPU_SPARC) \ + || defined(NPY_CPU_S390) \ + || defined(NPY_CPU_HPPA) \ + || defined(NPY_CPU_PPC64) \ + || defined(NPY_CPU_ARMEB) \ + || defined(NPY_CPU_SH_BE) \ + || defined(NPY_CPU_MIPSEB) \ + || defined(NPY_CPU_OR1K) \ + || defined(NPY_CPU_M68K) \ || defined(NPY_CPU_ARCEB) #define NPY_BYTE_ORDER NPY_BIG_ENDIAN #else diff --git a/numpy/core/setup.py b/numpy/core/setup.py index cc1bb5b..371df5b 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -452,8 +452,17 @@ def configuration(parent_package='',top_path=None): moredefs.append(('NPY_RELAXED_STRIDES_DEBUG', 1)) # Get long double representation - rep = check_long_double_representation(config_cmd) - moredefs.append(('HAVE_LDOUBLE_%s' % rep, 1)) + if sys.platform != 'darwin': + rep = check_long_double_representation(config_cmd) + if rep in ['INTEL_EXTENDED_12_BYTES_LE', + 'INTEL_EXTENDED_16_BYTES_LE', + 'MOTOROLA_EXTENDED_12_BYTES_BE', + 'IEEE_QUAD_LE', 'IEEE_QUAD_BE', + 'IEEE_DOUBLE_LE', 'IEEE_DOUBLE_BE', + 'DOUBLE_DOUBLE_BE', 'DOUBLE_DOUBLE_LE']: + moredefs.append(('HAVE_LDOUBLE_%s' % rep, 1)) + else: + raise ValueError("Unrecognized long double format: %s" % rep) # Py3K check if sys.version_info[0] == 3: diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 343318c..bd093c5 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -333,9 +333,9 @@ _MOTOROLA_EXTENDED_12B = ['300', '031', '000', '000', '353', '171', _IEEE_QUAD_PREC_BE = ['300', '031', '326', '363', '105', '100', '000', '000', '000', '000', '000', '000', '000', '000', '000', '000'] _IEEE_QUAD_PREC_LE = _IEEE_QUAD_PREC_BE[::-1] -_IBM_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] + +_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] + ['000'] * 8) -_IBM_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] + +_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] + ['000'] * 8) def long_double_representation(lines): @@ -362,16 +362,11 @@ def long_double_representation(lines): # the long double if read[-8:] == _AFTER_SEQ: saw = copy.copy(read) - # if the content was 12 bytes, we only have 32 - 8 - 12 = 12 - # "before" bytes. In other words the first 4 "before" bytes went - # past the sliding window. if read[:12] == _BEFORE_SEQ[4:]: if read[12:-8] == _INTEL_EXTENDED_12B: return 'INTEL_EXTENDED_12_BYTES_LE' if read[12:-8] == _MOTOROLA_EXTENDED_12B: return 'MOTOROLA_EXTENDED_12_BYTES_BE' - # if the content was 16 bytes, we are left with 32-8-16 = 16 - # "before" bytes, so 8 went past the sliding window. elif read[:8] == _BEFORE_SEQ[8:]: if read[8:-8] == _INTEL_EXTENDED_16B: return 'INTEL_EXTENDED_16_BYTES_LE' @@ -379,11 +374,10 @@ def long_double_representation(lines): return 'IEEE_QUAD_BE' elif read[8:-8] == _IEEE_QUAD_PREC_LE: return 'IEEE_QUAD_LE' - elif read[8:-8] == _IBM_DOUBLE_DOUBLE_LE: - return 'IBM_DOUBLE_DOUBLE_LE' - elif read[8:-8] == _IBM_DOUBLE_DOUBLE_BE: - return 'IBM_DOUBLE_DOUBLE_BE' - # if the content was 8 bytes, left with 32-8-8 = 16 bytes + elif read[8:-8] == _DOUBLE_DOUBLE_BE: + return 'DOUBLE_DOUBLE_BE' + elif read[8:-8] == _DOUBLE_DOUBLE_LE: + return 'DOUBLE_DOUBLE_LE' elif read[:16] == _BEFORE_SEQ: if read[16:-8] == _IEEE_DOUBLE_LE: return 'IEEE_DOUBLE_LE' diff --git a/numpy/core/src/multiarray/dragon4.c b/numpy/core/src/multiarray/dragon4.c index e3da79f..e005234 100644 --- a/numpy/core/src/multiarray/dragon4.c +++ b/numpy/core/src/multiarray/dragon4.c @@ -42,18 +42,6 @@ #define DEBUG_ASSERT(stmnt) do {} while(0) #endif -static inline npy_uint64 -bitmask_u64(npy_uint32 n) -{ - return ~(~((npy_uint64)0) << n); -} - -static inline npy_uint32 -bitmask_u32(npy_uint32 n) -{ - return ~(~((npy_uint32)0) << n); -} - /* * Get the log base 2 of a 32-bit unsigned integer. * http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogLookup @@ -114,17 +102,6 @@ LogBase2_64(npy_uint64 val) return LogBase2_32((npy_uint32)val); } -#if defined(HAVE_LDOUBLE_IEEE_QUAD_LE) || defined(HAVE_LDOUBLE_IEEE_QUAD_BE) -static npy_uint32 -LogBase2_128(npy_uint64 hi, npy_uint64 lo) -{ - if (hi) { - return 64 + LogBase2_64(hi); - } - - return LogBase2_64(lo); -} -#endif /* HAVE_LDOUBLE_IEEE_QUAD_LE */ /* * Maximum number of 32 bit blocks needed in high precision arithmetic to print @@ -145,45 +122,6 @@ typedef struct BigInt { npy_uint32 blocks[c_BigInt_MaxBlocks]; } BigInt; -/* - * Dummy implementation of a memory manager for BigInts. Currently, only - * supports a single call to Dragon4, but that is OK because Dragon4 - * does not release the GIL. - * - * We try to raise an error anyway if dragon4 re-enters, and this code serves - * as a placeholder if we want to make it re-entrant in the future. - * - * Each call to dragon4 uses 7 BigInts. - */ -#define BIGINT_DRAGON4_GROUPSIZE 7 -typedef struct { - BigInt bigints[BIGINT_DRAGON4_GROUPSIZE]; - char repr[16384]; -} Dragon4_Scratch; - -static int _bigint_static_in_use = 0; -static Dragon4_Scratch _bigint_static; - -static Dragon4_Scratch* -get_dragon4_bigint_scratch(void) { - /* this test+set is not threadsafe, but no matter because we have GIL */ - if (_bigint_static_in_use) { - PyErr_SetString(PyExc_RuntimeError, - "numpy float printing code is not re-entrant. " - "Ping the devs to fix it."); - return NULL; - } - _bigint_static_in_use = 1; - - /* in this dummy implementation we only return the static allocation */ - return &_bigint_static; -} - -static void -free_dragon4_bigint_scratch(Dragon4_Scratch *mem){ - _bigint_static_in_use = 0; -} - /* Copy integer */ static void BigInt_Copy(BigInt *dst, const BigInt *src) @@ -201,62 +139,26 @@ BigInt_Copy(BigInt *dst, const BigInt *src) static void BigInt_Set_uint64(BigInt *i, npy_uint64 val) { - if (val > bitmask_u64(32)) { - i->blocks[0] = val & bitmask_u64(32); - i->blocks[1] = (val >> 32) & bitmask_u64(32); + if (val > 0xFFFFFFFF) { + i->blocks[0] = val & 0xFFFFFFFF; + i->blocks[1] = (val >> 32) & 0xFFFFFFFF; i->length = 2; } else if (val != 0) { - i->blocks[0] = val & bitmask_u64(32); - i->length = 1; - } - else { - i->length = 0; - } -} - -#if (defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) || \ - defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) || \ - defined(HAVE_LDOUBLE_IEEE_QUAD_LE) || \ - defined(HAVE_LDOUBLE_IEEE_QUAD_BE)) -static void -BigInt_Set_2x_uint64(BigInt *i, npy_uint64 hi, npy_uint64 lo) -{ - if (hi > bitmask_u64(32)) { - i->length = 4; - } - else if (hi != 0) { - i->length = 3; - } - else if (lo > bitmask_u64(32)) { - i->length = 2; - } - else if (lo != 0) { + i->blocks[0] = val & 0xFFFFFFFF; i->length = 1; } else { i->length = 0; } - - switch (i->length) { - case 4: - i->blocks[3] = (hi >> 32) & bitmask_u64(32); - case 3: - i->blocks[2] = hi & bitmask_u64(32); - case 2: - i->blocks[1] = (lo >> 32) & bitmask_u64(32); - case 1: - i->blocks[0] = lo & bitmask_u64(32); - } } -#endif /* DOUBLE_DOUBLE and QUAD */ static void BigInt_Set_uint32(BigInt *i, npy_uint32 val) { if (val != 0) { i->blocks[0] = val; - i->length = 1; + i->length = (val != 0); } else { i->length = 0; @@ -264,24 +166,6 @@ BigInt_Set_uint32(BigInt *i, npy_uint32 val) } /* - * Returns 1 if the value is zero - */ -static int -BigInt_IsZero(const BigInt *i) -{ - return i->length == 0; -} - -/* - * Returns 1 if the value is even - */ -static int -BigInt_IsEven(const BigInt *i) -{ - return (i->length == 0) || ( (i->blocks[0] % 2) == 0); -} - -/* * Returns 0 if (lhs = rhs), negative if (lhs < rhs), positive if (lhs > rhs) */ static npy_int32 @@ -344,7 +228,7 @@ BigInt_Add(BigInt *result, const BigInt *lhs, const BigInt *rhs) npy_uint64 sum = carry + (npy_uint64)(*largeCur) + (npy_uint64)(*smallCur); carry = sum >> 32; - *resultCur = sum & bitmask_u64(32); + *resultCur = sum & 0xFFFFFFFF; ++largeCur; ++smallCur; ++resultCur; @@ -354,7 +238,7 @@ BigInt_Add(BigInt *result, const BigInt *lhs, const BigInt *rhs) while (largeCur != largeEnd) { npy_uint64 sum = carry + (npy_uint64)(*largeCur); carry = sum >> 32; - (*resultCur) = sum & bitmask_u64(32); + (*resultCur) = sum & 0xFFFFFFFF; ++largeCur; ++resultCur; } @@ -423,13 +307,13 @@ BigInt_Multiply(BigInt *result, const BigInt *lhs, const BigInt *rhs) npy_uint64 product = (*resultCur) + (*largeCur)*(npy_uint64)multiplier + carry; carry = product >> 32; - *resultCur = product & bitmask_u64(32); + *resultCur = product & 0xFFFFFFFF; ++largeCur; ++resultCur; } while(largeCur != large->blocks + large->length); DEBUG_ASSERT(resultCur < result->blocks + maxResultLen); - *resultCur = (npy_uint32)(carry & bitmask_u64(32)); + *resultCur = (npy_uint32)(carry & 0xFFFFFFFF); } } @@ -453,7 +337,7 @@ BigInt_Multiply_int(BigInt *result, const BigInt *lhs, npy_uint32 rhs) const npy_uint32 *pLhsEnd = lhs->blocks + lhs->length; for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) { npy_uint64 product = (npy_uint64)(*pLhsCur) * rhs + carry; - *resultCur = (npy_uint32)(product & bitmask_u64(32)); + *resultCur = (npy_uint32)(product & 0xFFFFFFFF); carry = product >> 32; } @@ -530,7 +414,7 @@ BigInt_Multiply10(BigInt *result) npy_uint32 *end = result->blocks + result->length; for ( ; cur != end; ++cur) { npy_uint64 product = (npy_uint64)(*cur) * 10ull + carry; - (*cur) = (npy_uint32)(product & bitmask_u64(32)); + (*cur) = (npy_uint32)(product & 0xFFFFFFFF); carry = product >> 32; } @@ -753,11 +637,13 @@ static BigInt g_PowerOf10_Big[] = /* result = 10^exponent */ static void -BigInt_Pow10(BigInt *result, npy_uint32 exponent, BigInt *temp) +BigInt_Pow10(BigInt *result, npy_uint32 exponent) { - /* use two temporary values to reduce large integer copy operations */ - BigInt *curTemp = result; - BigInt *pNextTemp = temp; + /* create two temporary values to reduce large integer copy operations */ + BigInt temp1; + BigInt temp2; + BigInt *curTemp = &temp1; + BigInt *pNextTemp = &temp2; npy_uint32 smallExponent; npy_uint32 tableIdx = 0; @@ -768,7 +654,7 @@ BigInt_Pow10(BigInt *result, npy_uint32 exponent, BigInt *temp) * initialize the result by looking up a 32-bit power of 10 corresponding to * the first 3 bits */ - smallExponent = exponent & bitmask_u32(3); + smallExponent = exponent & 0x7; BigInt_Set_uint32(curTemp, g_PowerOf10_U32[smallExponent]); /* remove the low bits that we used for the 32-bit lookup table */ @@ -795,17 +681,19 @@ BigInt_Pow10(BigInt *result, npy_uint32 exponent, BigInt *temp) } /* output the result */ - if (curTemp != result) { - BigInt_Copy(result, curTemp); - } + BigInt_Copy(result, curTemp); } -/* in = in * 10^exponent */ +/* result = in * 10^exponent */ static void -BigInt_MultiplyPow10(BigInt *in, npy_uint32 exponent, BigInt *temp) +BigInt_MultiplyPow10(BigInt *result, const BigInt *in, npy_uint32 exponent) { - /* use two temporary values to reduce large integer copy operations */ - BigInt *curTemp, *pNextTemp; + + /* create two temporary values to reduce large integer copy operations */ + BigInt temp1; + BigInt temp2; + BigInt *curTemp = &temp1; + BigInt *pNextTemp = &temp2; npy_uint32 smallExponent; npy_uint32 tableIdx = 0; @@ -816,15 +704,12 @@ BigInt_MultiplyPow10(BigInt *in, npy_uint32 exponent, BigInt *temp) * initialize the result by looking up a 32-bit power of 10 corresponding to * the first 3 bits */ - smallExponent = exponent & bitmask_u32(3); + smallExponent = exponent & 0x7; if (smallExponent != 0) { - BigInt_Multiply_int(temp, in, g_PowerOf10_U32[smallExponent]); - curTemp = temp; - pNextTemp = in; + BigInt_Multiply_int(curTemp, in, g_PowerOf10_U32[smallExponent]); } else { - curTemp = in; - pNextTemp = temp; + BigInt_Copy(curTemp, in); } /* remove the low bits that we used for the 32-bit lookup table */ @@ -839,7 +724,7 @@ BigInt_MultiplyPow10(BigInt *in, npy_uint32 exponent, BigInt *temp) /* multiply into the next temporary */ BigInt_Multiply(pNextTemp, curTemp, &g_PowerOf10_Big[tableIdx]); - /* swap to the next temporary */ + // swap to the next temporary pSwap = curTemp; curTemp = pNextTemp; pNextTemp = pSwap; @@ -851,9 +736,7 @@ BigInt_MultiplyPow10(BigInt *in, npy_uint32 exponent, BigInt *temp) } /* output the result */ - if (curTemp != in){ - BigInt_Copy(in, curTemp); - } + BigInt_Copy(result, curTemp); } /* result = 2^exponent */ @@ -905,7 +788,7 @@ BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor) */ DEBUG_ASSERT(!divisor->length == 0 && divisor->blocks[divisor->length-1] >= 8 && - divisor->blocks[divisor->length-1] < bitmask_u64(32) && + divisor->blocks[divisor->length-1] < 0xFFFFFFFF && dividend->length <= divisor->length); /* @@ -942,10 +825,10 @@ BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor) carry = product >> 32; difference = (npy_uint64)*dividendCur - - (product & bitmask_u64(32)) - borrow; + - (product & 0xFFFFFFFF) - borrow; borrow = (difference >> 32) & 1; - *dividendCur = difference & bitmask_u64(32); + *dividendCur = difference & 0xFFFFFFFF; ++divisorCur; ++dividendCur; @@ -977,7 +860,7 @@ BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor) - (npy_uint64)*divisorCur - borrow; borrow = (difference >> 32) & 1; - *dividendCur = difference & bitmask_u64(32); + *dividendCur = difference & 0xFFFFFFFF; ++divisorCur; ++dividendCur; @@ -1110,20 +993,12 @@ BigInt_ShiftLeft(BigInt *result, npy_uint32 shift) * There is some more documentation of these changes on Ryan Juckett's website * at http://www.ryanjuckett.com/programming/printing-floating-point-numbers/ * - * This code also has a few implementation differences from Ryan Juckett's - * version: - * 1. fixed overflow problems when mantissa was 64 bits (in float128 types), - * by replacing multiplication by 2 or 4 by BigInt_ShiftLeft calls. - * 2. Increased c_BigInt_MaxBlocks, for 128-bit floats - * 3. Added more entries to the g_PowerOf10_Big table, for 128-bit floats. - * 4. Added unbiased rounding calculation with isEven. Ryan Juckett's - * implementation did not implement "IEEE unbiased rounding", except in the - * last digit. This has been added back, following the Burger & Dybvig - * code, using the isEven variable. + * Ryan Juckett's implementation did not implement "IEEE unbiased rounding", + * except in the last digit. This has been added back, following the Burger & + * Dybvig code, using the isEven variable. * * Arguments: - * * bigints - memory to store all bigints needed (7) for dragon4 computation. - * The first BigInt should be filled in with the mantissa. + * * mantissa - value significand * * exponent - value exponent in base 2 * * mantissaBit - index of the highest set mantissa bit * * hasUnequalMargins - is the high margin twice as large as the low margin @@ -1132,11 +1007,9 @@ BigInt_ShiftLeft(BigInt *result, npy_uint32 shift) * * pOutBuffer - buffer to output into * * bufferSize - maximum characters that can be printed to pOutBuffer * * pOutExponent - the base 10 exponent of the first digit - * - * Returns the number of digits written to the output buffer. */ static npy_uint32 -Dragon4(BigInt *bigints, const npy_int32 exponent, +Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, const npy_uint32 mantissaBit, const npy_bool hasUnequalMargins, const DigitMode digitMode, const CutoffMode cutoffMode, npy_int32 cutoffNumber, char *pOutBuffer, @@ -1152,24 +1025,21 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, * Here, marginLow and marginHigh represent 1/2 of the distance to the next * floating point value above/below the mantissa. * - * scaledMarginHigh will point to scaledMarginLow in the case they must be - * equal to each other, otherwise it will point to optionalMarginHigh. + * scaledMarginHigh is a pointer so that it can point to scaledMarginLow in + * the case they must be equal to each other, otherwise it will point to + * optionalMarginHigh. */ - BigInt *mantissa = &bigints[0]; /* the only initialized bigint */ - BigInt *scale = &bigints[1]; - BigInt *scaledValue = &bigints[2]; - BigInt *scaledMarginLow = &bigints[3]; + BigInt scale; + BigInt scaledValue; + BigInt scaledMarginLow; BigInt *scaledMarginHigh; - BigInt *optionalMarginHigh = &bigints[4]; - - BigInt *temp1 = &bigints[5]; - BigInt *temp2 = &bigints[6]; + BigInt optionalMarginHigh; const npy_float64 log10_2 = 0.30102999566398119521373889472449; npy_int32 digitExponent, cutoffExponent, hiBlock; npy_uint32 outputDigit; /* current digit being output */ npy_uint32 outputLen; - npy_bool isEven = BigInt_IsEven(mantissa); + npy_bool isEven = (mantissa % 2) == 0; npy_int32 cmp; /* values used to determine how to round */ @@ -1178,14 +1048,12 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, DEBUG_ASSERT(bufferSize > 0); /* if the mantissa is zero, the value is zero regardless of the exponent */ - if (BigInt_IsZero(mantissa)) { + if (mantissa == 0) { *curDigit = '0'; *pOutExponent = 0; return 1; } - BigInt_Copy(scaledValue, mantissa); - if (hasUnequalMargins) { /* if we have no fractional component */ if (exponent > 0) { @@ -1199,13 +1067,17 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, */ /* scaledValue = 2 * 2 * mantissa*2^exponent */ - BigInt_ShiftLeft(scaledValue, exponent + 2); + BigInt_Set_uint64(&scaledValue, mantissa); + BigInt_ShiftLeft(&scaledValue, exponent + 2); + /* scale = 2 * 2 * 1 */ - BigInt_Set_uint32(scale, 4); + BigInt_Set_uint32(&scale, 4); + /* scaledMarginLow = 2 * 2^(exponent-1) */ - BigInt_Pow2(scaledMarginLow, exponent); + BigInt_Pow2(&scaledMarginLow, exponent); + /* scaledMarginHigh = 2 * 2 * 2^(exponent-1) */ - BigInt_Pow2(optionalMarginHigh, exponent + 1); + BigInt_Pow2(&optionalMarginHigh, exponent + 1); } /* else we have a fractional exponent */ else { @@ -1215,27 +1087,34 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, */ /* scaledValue = 2 * 2 * mantissa */ - BigInt_ShiftLeft(scaledValue, 2); + BigInt_Set_uint64(&scaledValue, mantissa); + BigInt_ShiftLeft(&scaledValue, 2); + /* scale = 2 * 2 * 2^(-exponent) */ - BigInt_Pow2(scale, -exponent + 2); + BigInt_Pow2(&scale, -exponent + 2); + /* scaledMarginLow = 2 * 2^(-1) */ - BigInt_Set_uint32(scaledMarginLow, 1); + BigInt_Set_uint32(&scaledMarginLow, 1); + /* scaledMarginHigh = 2 * 2 * 2^(-1) */ - BigInt_Set_uint32(optionalMarginHigh, 2); + BigInt_Set_uint32(&optionalMarginHigh, 2); } /* the high and low margins are different */ - scaledMarginHigh = optionalMarginHigh; + scaledMarginHigh = &optionalMarginHigh; } else { /* if we have no fractional component */ if (exponent > 0) { /* scaledValue = 2 * mantissa*2^exponent */ - BigInt_ShiftLeft(scaledValue, exponent + 1); + BigInt_Set_uint64(&scaledValue, mantissa); + BigInt_ShiftLeft(&scaledValue, exponent + 1); + /* scale = 2 * 1 */ - BigInt_Set_uint32(scale, 2); + BigInt_Set_uint32(&scale, 2); + /* scaledMarginLow = 2 * 2^(exponent-1) */ - BigInt_Pow2(scaledMarginLow, exponent); + BigInt_Pow2(&scaledMarginLow, exponent); } /* else we have a fractional exponent */ else { @@ -1245,15 +1124,18 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, */ /* scaledValue = 2 * mantissa */ - BigInt_ShiftLeft(scaledValue, 1); + BigInt_Set_uint64(&scaledValue, mantissa); + BigInt_ShiftLeft(&scaledValue, 1); + /* scale = 2 * 2^(-exponent) */ - BigInt_Pow2(scale, -exponent + 1); + BigInt_Pow2(&scale, -exponent + 1); + /* scaledMarginLow = 2 * 2^(-1) */ - BigInt_Set_uint32(scaledMarginLow, 1); + BigInt_Set_uint32(&scaledMarginLow, 1); } /* the high and low margins are equal */ - scaledMarginHigh = scaledMarginLow; + scaledMarginHigh = &scaledMarginLow; } /* @@ -1276,9 +1158,6 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, * <= log10(v) + log10(2) * floor(log10(v)) < ceil((mantissaBit + exponent) * log10(2)) * <= floor(log10(v)) + 1 - * - * Warning: This calculation assumes npy_float64 is an IEEE-binary64 - * float. This line may need to be updated if this is not the case. */ digitExponent = (npy_int32)( ceil((npy_float64)((npy_int32)mantissaBit + exponent) * log10_2 - 0.69)); @@ -1300,29 +1179,31 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, /* Divide value by 10^digitExponent. */ if (digitExponent > 0) { /* A positive exponent creates a division so we multiply the scale. */ - BigInt_MultiplyPow10(scale, digitExponent, temp1); + BigInt temp; + BigInt_MultiplyPow10(&temp, &scale, digitExponent); + BigInt_Copy(&scale, &temp); } else if (digitExponent < 0) { /* * A negative exponent creates a multiplication so we multiply up the * scaledValue, scaledMarginLow and scaledMarginHigh. */ - BigInt *temp=temp1, *pow10=temp2; - BigInt_Pow10(pow10, -digitExponent, temp); + BigInt pow10, temp; + BigInt_Pow10(&pow10, -digitExponent); - BigInt_Multiply(temp, scaledValue, pow10); - BigInt_Copy(scaledValue, temp); + BigInt_Multiply(&temp, &scaledValue, &pow10); + BigInt_Copy(&scaledValue, &temp); - BigInt_Multiply(temp, scaledMarginLow, pow10); - BigInt_Copy(scaledMarginLow, temp); + BigInt_Multiply(&temp, &scaledMarginLow, &pow10); + BigInt_Copy(&scaledMarginLow, &temp); - if (scaledMarginHigh != scaledMarginLow) { - BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); + if (scaledMarginHigh != &scaledMarginLow) { + BigInt_Multiply2(scaledMarginHigh, &scaledMarginLow); } } /* If (value >= 1), our estimate for digitExponent was too low */ - if (BigInt_Compare(scaledValue, scale) >= 0) { + if (BigInt_Compare(&scaledValue, &scale) >= 0) { /* * The exponent estimate was incorrect. * Increment the exponent and don't perform the premultiply needed @@ -1336,10 +1217,10 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, * Multiply larger by the output base to prepare for the first loop * iteration. */ - BigInt_Multiply10(scaledValue); - BigInt_Multiply10(scaledMarginLow); - if (scaledMarginHigh != scaledMarginLow) { - BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); + BigInt_Multiply10(&scaledValue); + BigInt_Multiply10(&scaledMarginLow); + if (scaledMarginHigh != &scaledMarginLow) { + BigInt_Multiply2(scaledMarginHigh, &scaledMarginLow); } } @@ -1380,8 +1261,8 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, * to be less than or equal to 429496729 which is the highest number that * can be multiplied by 10 without overflowing to a new block. */ - DEBUG_ASSERT(scale->length > 0); - hiBlock = scale->blocks[scale->length - 1]; + DEBUG_ASSERT(scale.length > 0); + hiBlock = scale.blocks[scale.length - 1]; if (hiBlock < 8 || hiBlock > 429496729) { npy_uint32 hiBlockLog2, shift; @@ -1399,11 +1280,11 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, DEBUG_ASSERT(hiBlockLog2 < 3 || hiBlockLog2 > 27); shift = (32 + 27 - hiBlockLog2) % 32; - BigInt_ShiftLeft(scale, shift); - BigInt_ShiftLeft(scaledValue, shift); - BigInt_ShiftLeft(scaledMarginLow, shift); - if (scaledMarginHigh != scaledMarginLow) { - BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); + BigInt_ShiftLeft(&scale, shift); + BigInt_ShiftLeft(&scaledValue, shift); + BigInt_ShiftLeft(&scaledMarginLow, shift); + if (scaledMarginHigh != &scaledMarginLow) { + BigInt_Multiply2(scaledMarginHigh, &scaledMarginLow); } } @@ -1415,25 +1296,25 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, * terminate early. */ for (;;) { - BigInt *scaledValueHigh = temp1; + BigInt scaledValueHigh; digitExponent = digitExponent-1; /* divide out the scale to extract the digit */ outputDigit = - BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale); + BigInt_DivideWithRemainder_MaxQuotient9(&scaledValue, &scale); DEBUG_ASSERT(outputDigit < 10); /* update the high end of the value */ - BigInt_Add(scaledValueHigh, scaledValue, scaledMarginHigh); + BigInt_Add(&scaledValueHigh, &scaledValue, scaledMarginHigh); /* * stop looping if we are far enough away from our neighboring * values or if we have reached the cutoff digit */ - cmp = BigInt_Compare(scaledValue, scaledMarginLow); + cmp = BigInt_Compare(&scaledValue, &scaledMarginLow); low = isEven ? (cmp <= 0) : (cmp < 0); - cmp = BigInt_Compare(scaledValueHigh, scale); + cmp = BigInt_Compare(&scaledValueHigh, &scale); high = isEven ? (cmp >= 0) : (cmp > 0); if (low | high | (digitExponent == cutoffExponent)) break; @@ -1443,10 +1324,10 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, ++curDigit; /* multiply larger by the output base */ - BigInt_Multiply10(scaledValue); - BigInt_Multiply10(scaledMarginLow); - if (scaledMarginHigh != scaledMarginLow) { - BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); + BigInt_Multiply10(&scaledValue); + BigInt_Multiply10(&scaledMarginLow); + if (scaledMarginHigh != &scaledMarginLow) { + BigInt_Multiply2(scaledMarginHigh, &scaledMarginLow); } } } @@ -1464,11 +1345,10 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, /* divide out the scale to extract the digit */ outputDigit = - BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale); + BigInt_DivideWithRemainder_MaxQuotient9(&scaledValue, &scale); DEBUG_ASSERT(outputDigit < 10); - if ((scaledValue->length == 0) | - (digitExponent == cutoffExponent)) { + if ((scaledValue.length == 0) | (digitExponent == cutoffExponent)) { break; } @@ -1477,7 +1357,7 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, ++curDigit; /* multiply larger by the output base */ - BigInt_Multiply10(scaledValue); + BigInt_Multiply10(&scaledValue); } } @@ -1495,8 +1375,8 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, * compare( scale * value, scale * 0.5 ) * compare( 2 * scale * value, scale ) */ - BigInt_Multiply2_inplace(scaledValue); - compare = BigInt_Compare(scaledValue, scale); + BigInt_Multiply2_inplace(&scaledValue); + compare = BigInt_Compare(&scaledValue, &scale); roundDown = compare < 0; /* @@ -1551,53 +1431,134 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, /* - * The FormatPositional and FormatScientific functions have been more - * significantly rewritten relative to Ryan Juckett's code. - * - * The binary16 and the various 128-bit float functions are new, and adapted - * from the 64 bit version. The python interface functions are new. + * Helper union to decompose a 16-bit IEEE float. + * sign: 1 bit + * exponent: 5 bits + * mantissa: 10 bits + */ +typedef union FloatUnion16 +{ + npy_uint16 integer; +} FloatUnion16; + +npy_bool IsNegative_F16(FloatUnion16 *v) { return (v->integer >> 15) != 0; } +npy_uint32 GetExponent_F16(FloatUnion16 *v) { return (v->integer >> 10) & 0x1F;} +npy_uint32 GetMantissa_F16(FloatUnion16 *v) { return v->integer & 0x3FF; } + + +/* + * Helper union to decompose a 32-bit IEEE float. + * sign: 1 bit + * exponent: 8 bits + * mantissa: 23 bits + */ +typedef union FloatUnion32 +{ + npy_float32 floatingPoint; + npy_uint32 integer; +} FloatUnion32; + +npy_bool IsNegative_F32(FloatUnion32 *v) { return (v->integer >> 31) != 0; } +npy_uint32 GetExponent_F32(FloatUnion32 *v) { return (v->integer >> 23) & 0xFF;} +npy_uint32 GetMantissa_F32(FloatUnion32 *v) { return v->integer & 0x7FFFFF; } + +/* + * Helper union to decompose a 64-bit IEEE float. + * sign: 1 bit + * exponent: 11 bits + * mantissa: 52 bits + */ +typedef union FloatUnion64 +{ + npy_float64 floatingPoint; + npy_uint64 integer; +} FloatUnion64; +npy_bool IsNegative_F64(FloatUnion64 *v) { return (v->integer >> 63) != 0; } +npy_uint32 GetExponent_F64(FloatUnion64 *v) { return (v->integer >> 52) & 0x7FF; } +npy_uint64 GetMantissa_F64(FloatUnion64 *v) { return v->integer & 0xFFFFFFFFFFFFFull; } + +/* + * Helper unions and datatype to decompose a 80-bit IEEE float + * sign: 1 bit, second u64 + * exponent: 15 bits, second u64 + * intbit 1 bit, first u64 + * mantissa: 63 bits, first u64 + */ + +/* + * Since systems have different types of long doubles, and may not necessarily + * have a 128-byte format we can use to pass values around, here we create + * our own 128-bit storage type for convenience. + */ +typedef struct FloatVal128 { + npy_uint64 integer[2]; +} FloatVal128; +npy_bool IsNegative_F128(FloatVal128 *v) { + return ((v->integer[1] >> 15) & 0x1) != 0; +} +npy_uint32 GetExponent_F128(FloatVal128 *v) { return v->integer[1] & 0x7FFF; } +npy_uint64 GetMantissa_F128(FloatVal128 *v) { + return v->integer[0] & 0x7FFFFFFFFFFFFFFFull; +} + +/* + * then for each different definition of long double, we create a union to + * unpack the float data safely. We can then copy these integers to a + * FloatVal128. */ +#ifdef NPY_FLOAT128 +typedef union FloatUnion128 +{ + npy_float128 floatingPoint; + struct { + npy_uint64 a; + npy_uint16 b; + } integer; +} FloatUnion128; +#endif + +#ifdef NPY_FLOAT96 +typedef union FloatUnion96 +{ + npy_float96 floatingPoint; + struct { + npy_uint64 a; + npy_uint32 b; + } integer; +} FloatUnion96; +#endif + +#ifdef NPY_FLOAT80 +typedef union FloatUnion80 +{ + npy_float80 floatingPoint; + struct { + npy_uint64 a; + npy_uint16 b; + } integer; +} FloatUnion80; +#endif -/* Options struct for easy passing of Dragon4 options. +/* + * The main changes above this point, relative to Ryan Juckett's code, are: + * 1. fixed overflow problems when mantissa was 64 bits (in float128 types), + * by replacing multiplication by 2 or 4 by BigInt_ShiftLeft calls. + * 2. Increased c_BigInt_MaxBlocks + * 3. Added more entries to the g_PowerOf10_Big table + * 4. Added unbiased rounding calculation with isEven * - * scientific - boolean controlling whether scientific notation is used - * digit_mode - whether to use unique or fixed fracional output - * cutoff_mode - whether 'precision' refers to toal digits, or digits past - * the decimal point. - * precision - When negative, prints as many digits as needed for a unique - * number. When positive specifies the maximum number of - * significant digits to print. - * sign - whether to always show sign - * trim_mode - how to treat trailing 0s and '.'. See TrimMode comments. - * digits_left - pad characters to left of decimal point. -1 for no padding - * digits_right - pad characters to right of decimal point. -1 for no padding. - * Padding adds whitespace until there are the specified - * number characters to sides of decimal point. Applies after - * trim_mode characters were removed. If digits_right is - * positive and the decimal point was trimmed, decimal point - * will be replaced by a whitespace character. - * exp_digits - Only affects scientific output. If positive, pads the - * exponent with 0s until there are this many digits. If - * negative, only use sufficient digits. + * Below this point, the FormatPositional and FormatScientific functions have + * been more significantly rewritten. The Dragon4_PrintFloat16 and + * Dragon4_PrintFloat128 functions are new, and were adapted from the 64 and 32 + * bit versions. The python interfacing functions (in the header) are new. */ -typedef struct Dragon4_Options { - npy_bool scientific; - DigitMode digit_mode; - CutoffMode cutoff_mode; - npy_int32 precision; - npy_bool sign; - TrimMode trim_mode; - npy_int32 digits_left; - npy_int32 digits_right; - npy_int32 exp_digits; -} Dragon4_Options; + /* * Outputs the positive number with positional notation: ddddd.dddd * The output is always NUL terminated and the output length (not including the * NUL) is returned. - * * Arguments: * buffer - buffer to output into * bufferSize - maximum characters that can be printed to buffer @@ -1606,11 +1567,20 @@ typedef struct Dragon4_Options { * signbit - value of the sign position. Should be '+', '-' or '' * mantissaBit - index of the highest set mantissa bit * hasUnequalMargins - is the high margin twice as large as the low margin - * - * See Dragon4_Options for description of remaining arguments. + * precision - Negative prints as many digits as are needed for a unique + * number. Positive specifies the maximum number of significant + * digits to print past the decimal point. + * trim_mode - how to treat trailing 0s and '.'. See TrimMode comments. + * digits_left - pad characters to left of decimal point. -1 for no padding + * digits_right - pad characters to right of decimal point. -1 for no padding + * padding adds whitespace until there are the specified + * number characters to sides of decimal point. Applies after + * trim_mode characters were removed. If digits_right is + * positive and the decimal point was trimmed, decimal point + * will be replaced by a whitespace character. */ static npy_uint32 -FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, +FormatPositional(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, npy_int32 exponent, char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins, DigitMode digit_mode, CutoffMode cutoff_mode, npy_int32 precision, @@ -1737,7 +1707,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, /* always add decimal point, except for DprZeros mode */ if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 && - pos < maxPrintLen) { + pos < maxPrintLen){ buffer[pos++] = '.'; } @@ -1775,7 +1745,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, * when rounding, we may still end up with trailing zeros. Remove them * depending on trim settings. */ - if (precision >= 0 && trim_mode != TrimMode_None && numFractionDigits > 0) { + if (precision >= 0 && trim_mode != TrimMode_None && numFractionDigits > 0){ while (buffer[pos-1] == '0') { pos--; numFractionDigits--; @@ -1809,7 +1779,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 shift = digits_left - (numWholeDigits + has_sign); npy_int32 count = pos; - if (count + shift > maxPrintLen) { + if (count + shift > maxPrintLen){ count = maxPrintLen - shift; } @@ -1833,7 +1803,6 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, * Outputs the positive number with scientific notation: d.dddde[sign]ddd * The output is always NUL terminated and the output length (not including the * NUL) is returned. - * * Arguments: * buffer - buffer to output into * bufferSize - maximum characters that can be printed to buffer @@ -1842,11 +1811,15 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, * signbit - value of the sign position. Should be '+', '-' or '' * mantissaBit - index of the highest set mantissa bit * hasUnequalMargins - is the high margin twice as large as the low margin - * - * See Dragon4_Options for description of remaining arguments. + * precision - Negative prints as many digits as are needed for a unique + * number. Positive specifies the maximum number of significant + * digits to print past the decimal point. + * trim_mode - how to treat trailing 0s and '.'. See TrimMode comments. + * digits_left - pad characters to left of decimal point. -1 for no padding + * exp_digits - pads exponent with zeros until it has this many digits */ static npy_uint32 -FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, +FormatScientific (char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, npy_int32 exponent, char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins, DigitMode digit_mode, npy_int32 precision, TrimMode trim_mode, @@ -1871,7 +1844,7 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, leftchars = 1 + (signbit == '-' || signbit == '+'); if (digits_left > leftchars) { int i; - for (i = 0; i < digits_left - leftchars && bufferSize > 1; i++) { + for (i = 0; i < digits_left - leftchars && bufferSize > 1; i++){ *pCurOut = ' '; pCurOut++; --bufferSize; @@ -1919,7 +1892,7 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, /* always add decimal point, except for DprZeros mode */ if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 && - bufferSize > 1) { + bufferSize > 1){ *pCurOut = '.'; ++pCurOut; --bufferSize; @@ -1958,7 +1931,7 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, * when rounding, we may still end up with trailing zeros. Remove them * depending on trim settings. */ - if (precision >= 0 && trim_mode != TrimMode_None && numFractionDigits > 0) { + if (precision >= 0 && trim_mode != TrimMode_None && numFractionDigits > 0){ --pCurOut; while (*pCurOut == '0') { --pCurOut; @@ -1999,7 +1972,7 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, DEBUG_ASSERT(printExponent < 100000); /* get exp digits */ - for (i = 0; i < 5; i++) { + for (i = 0; i < 5; i++){ digits[i] = printExponent % 10; printExponent /= 10; } @@ -2008,7 +1981,7 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, } exp_size = i; /* write remaining digits to tmp buf */ - for (i = exp_size; i > 0; i--) { + for (i = exp_size; i > 0; i--){ exponentBuffer[2 + (exp_size-i)] = (char)('0' + digits[i-1]); } @@ -2084,12 +2057,12 @@ PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, /* only print sign for inf values (though nan can have a sign set) */ if (signbit == '+') { - if (pos < maxPrintLen-1) { + if (pos < maxPrintLen-1){ buffer[pos++] = '+'; } } else if (signbit == '-') { - if (pos < maxPrintLen-1) { + if (pos < maxPrintLen-1){ buffer[pos++] = '-'; } } @@ -2107,9 +2080,7 @@ PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, buffer[pos + printLen] = '\0'; /* - * For numpy we ignore unusual mantissa values for nan, but keep this - * code in case we change our mind later. - * + * // XXX: Should we change this for numpy? * // append HEX value * if (maxPrintLen > 3) { * printLen += PrintHex(buffer+3, bufferSize-3, mantissa, @@ -2122,63 +2093,34 @@ PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, } /* - * The functions below format a floating-point numbers stored in particular - * formats, as a decimal string. The output string is always NUL terminated - * and the string length (not including the NUL) is returned. - * - * For 16, 32 and 64 bit floats we assume they are the IEEE 754 type. - * For 128 bit floats we account for different definitions. + * These functions print a floating-point number as a decimal string. + * The output string is always NUL terminated and the string length (not + * including the NUL) is returned. * * Arguments are: * buffer - buffer to output into * bufferSize - maximum characters that can be printed to buffer - * value - value to print - * opt - Dragon4 options, see above - */ - -/* - * Helper function that takes Dragon4 parameters and options and - * calls Dragon4. - */ -static npy_uint32 -Format_floatbits(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, - npy_int32 exponent, char signbit, npy_uint32 mantissaBit, - npy_bool hasUnequalMargins, Dragon4_Options *opt) -{ - /* format the value */ - if (opt->scientific) { - return FormatScientific(buffer, bufferSize, mantissa, exponent, - signbit, mantissaBit, hasUnequalMargins, - opt->digit_mode, opt->precision, - opt->trim_mode, opt->digits_left, - opt->exp_digits); - } - else { - return FormatPositional(buffer, bufferSize, mantissa, exponent, - signbit, mantissaBit, hasUnequalMargins, - opt->digit_mode, opt->cutoff_mode, - opt->precision, opt->trim_mode, - opt->digits_left, opt->digits_right); - } -} - -/* - * IEEE binary16 floating-point format - * - * sign: 1 bit - * exponent: 5 bits - * mantissa: 10 bits + * value - value significand + * scientific - boolean controlling whether scientific notation is used + * precision - If positive, specifies the number of decimals to show after + * decimal point. If negative, sufficient digits to uniquely + * specify the float will be output. + * trim_mode - how to treat trailing zeros and decimal point. See TrimMode. + * digits_right - pad the result with '' on the right past the decimal point + * digits_left - pad the result with '' on the right past the decimal point + * exp_digits - Only affects scientific output. If positive, pads the + * exponent with 0s until there are this many digits. If + * negative, only use sufficient digits. */ static npy_uint32 -Dragon4_PrintFloat_IEEE_binary16( - Dragon4_Scratch *scratch, npy_half *value, Dragon4_Options *opt) +Dragon4_PrintFloat16(char *buffer, npy_uint32 bufferSize, npy_uint16 value, + npy_bool scientific, DigitMode digit_mode, + CutoffMode cutoff_mode, npy_int32 precision, + npy_bool sign, TrimMode trim_mode, npy_int32 digits_left, + npy_int32 digits_right, npy_int32 exp_digits) { - char *buffer = scratch->repr; - npy_uint32 bufferSize = sizeof(scratch->repr); - BigInt *bigints = scratch->bigints; - - npy_uint16 val = *value; - npy_uint32 floatExponent, floatMantissa, floatSign; + FloatUnion16 floatUnion; + npy_uint32 floatExponent, floatMantissa; npy_uint32 mantissa; npy_int32 exponent; @@ -2196,20 +2138,20 @@ Dragon4_PrintFloat_IEEE_binary16( } /* deconstruct the floating point value */ - floatMantissa = val & bitmask_u32(10); - floatExponent = (val >> 10) & bitmask_u32(5); - floatSign = val >> 15; + floatUnion.integer = value; + floatExponent = GetExponent_F16(&floatUnion); + floatMantissa = GetMantissa_F16(&floatUnion); /* output the sign */ - if (floatSign != 0) { + if (IsNegative_F16(&floatUnion)) { signbit = '-'; } - else if (opt->sign) { + else if (sign) { signbit = '+'; } /* if this is a special value */ - if (floatExponent == bitmask_u32(5)) { + if (floatExponent == 0x1F) { return PrintInfNan(buffer, bufferSize, floatMantissa, 3, signbit); } /* else this is a number */ @@ -2253,33 +2195,29 @@ Dragon4_PrintFloat_IEEE_binary16( hasUnequalMargins = NPY_FALSE; } - BigInt_Set_uint32(&bigints[0], mantissa); - return Format_floatbits(buffer, bufferSize, bigints, exponent, - signbit, mantissaBit, hasUnequalMargins, opt); + /* format the value */ + if (scientific) { + return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit, + mantissaBit, hasUnequalMargins, digit_mode, + precision, trim_mode, digits_left, exp_digits); + } + else { + return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit, + mantissaBit, hasUnequalMargins, digit_mode, + cutoff_mode, precision, trim_mode, + digits_left, digits_right); + } } -/* - * IEEE binary32 floating-point format - * - * sign: 1 bit - * exponent: 8 bits - * mantissa: 23 bits - */ static npy_uint32 -Dragon4_PrintFloat_IEEE_binary32( - Dragon4_Scratch *scratch, npy_float32 *value, - Dragon4_Options *opt) +Dragon4_PrintFloat32(char *buffer, npy_uint32 bufferSize, npy_float32 value, + npy_bool scientific, DigitMode digit_mode, + CutoffMode cutoff_mode, npy_int32 precision, + npy_bool sign, TrimMode trim_mode, npy_int32 digits_left, + npy_int32 digits_right, npy_int32 exp_digits) { - char *buffer = scratch->repr; - npy_uint32 bufferSize = sizeof(scratch->repr); - BigInt *bigints = scratch->bigints; - - union - { - npy_float32 floatingPoint; - npy_uint32 integer; - } floatUnion; - npy_uint32 floatExponent, floatMantissa, floatSign; + FloatUnion32 floatUnion; + npy_uint32 floatExponent, floatMantissa; npy_uint32 mantissa; npy_int32 exponent; @@ -2297,21 +2235,20 @@ Dragon4_PrintFloat_IEEE_binary32( } /* deconstruct the floating point value */ - floatUnion.floatingPoint = *value; - floatMantissa = floatUnion.integer & bitmask_u32(23); - floatExponent = (floatUnion.integer >> 23) & bitmask_u32(8); - floatSign = floatUnion.integer >> 31; + floatUnion.floatingPoint = value; + floatExponent = GetExponent_F32(&floatUnion); + floatMantissa = GetMantissa_F32(&floatUnion); /* output the sign */ - if (floatSign != 0) { + if (IsNegative_F32(&floatUnion)) { signbit = '-'; } - else if (opt->sign) { + else if (sign) { signbit = '+'; } /* if this is a special value */ - if (floatExponent == bitmask_u32(8)) { + if (floatExponent == 0xFF) { return PrintInfNan(buffer, bufferSize, floatMantissa, 6, signbit); } /* else this is a number */ @@ -2355,32 +2292,29 @@ Dragon4_PrintFloat_IEEE_binary32( hasUnequalMargins = NPY_FALSE; } - BigInt_Set_uint32(&bigints[0], mantissa); - return Format_floatbits(buffer, bufferSize, bigints, exponent, - signbit, mantissaBit, hasUnequalMargins, opt); + /* format the value */ + if (scientific) { + return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit, + mantissaBit, hasUnequalMargins, digit_mode, + precision, trim_mode, digits_left, exp_digits); + } + else { + return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit, + mantissaBit, hasUnequalMargins, digit_mode, + cutoff_mode, precision, trim_mode, + digits_left, digits_right); + } } -/* - * IEEE binary64 floating-point format - * - * sign: 1 bit - * exponent: 11 bits - * mantissa: 52 bits - */ static npy_uint32 -Dragon4_PrintFloat_IEEE_binary64( - Dragon4_Scratch *scratch, npy_float64 *value, Dragon4_Options *opt) +Dragon4_PrintFloat64(char *buffer, npy_uint32 bufferSize, npy_float64 value, + npy_bool scientific, DigitMode digit_mode, + CutoffMode cutoff_mode, npy_int32 precision, + npy_bool sign, TrimMode trim_mode, npy_int32 digits_left, + npy_int32 digits_right, npy_int32 exp_digits) { - char *buffer = scratch->repr; - npy_uint32 bufferSize = sizeof(scratch->repr); - BigInt *bigints = scratch->bigints; - - union - { - npy_float64 floatingPoint; - npy_uint64 integer; - } floatUnion; - npy_uint32 floatExponent, floatSign; + FloatUnion64 floatUnion; + npy_uint32 floatExponent; npy_uint64 floatMantissa; npy_uint64 mantissa; @@ -2399,21 +2333,20 @@ Dragon4_PrintFloat_IEEE_binary64( } /* deconstruct the floating point value */ - floatUnion.floatingPoint = *value; - floatMantissa = floatUnion.integer & bitmask_u64(52); - floatExponent = (floatUnion.integer >> 52) & bitmask_u32(11); - floatSign = floatUnion.integer >> 63; + floatUnion.floatingPoint = value; + floatExponent = GetExponent_F64(&floatUnion); + floatMantissa = GetMantissa_F64(&floatUnion); /* output the sign */ - if (floatSign != 0) { + if (IsNegative_F64(&floatUnion)) { signbit = '-'; } - else if (opt->sign) { + else if (sign) { signbit = '+'; } /* if this is a special value */ - if (floatExponent == bitmask_u32(11)) { + if (floatExponent == 0x7FF) { return PrintInfNan(buffer, bufferSize, floatMantissa, 13, signbit); } /* else this is a number */ @@ -2457,48 +2390,28 @@ Dragon4_PrintFloat_IEEE_binary64( hasUnequalMargins = NPY_FALSE; } - BigInt_Set_uint64(&bigints[0], mantissa); - return Format_floatbits(buffer, bufferSize, bigints, exponent, - signbit, mantissaBit, hasUnequalMargins, opt); + /* format the value */ + if (scientific) { + return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit, + mantissaBit, hasUnequalMargins, digit_mode, + precision, trim_mode, digits_left, exp_digits); + } + else { + return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit, + mantissaBit, hasUnequalMargins, digit_mode, + cutoff_mode, precision, trim_mode, + digits_left, digits_right); + } } - -/* - * Since systems have different types of long doubles, and may not necessarily - * have a 128-byte format we can use to pass values around, here we create - * our own 128-bit storage type for convenience. - */ -typedef struct FloatVal128 { - npy_uint64 hi, lo; -} FloatVal128; - -#if defined(HAVE_LDOUBLE_INTEL_EXTENDED_10_BYTES_LE) || \ - defined(HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE) || \ - defined(HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE) || \ - defined(HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE) -/* - * Intel's 80-bit IEEE extended precision floating-point format - * - * "long doubles" with this format are stored as 96 or 128 bits, but - * are equivalent to the 80 bit type with some zero padding on the high bits. - * This method expects the user to pass in the value using a 128-bit - * FloatVal128, so can support 80, 96, or 128 bit storage formats, - * and is endian-independent. - * - * sign: 1 bit, second u64 - * exponent: 15 bits, second u64 - * intbit 1 bit, first u64 - * mantissa: 63 bits, first u64 - */ static npy_uint32 -Dragon4_PrintFloat_Intel_extended( - Dragon4_Scratch *scratch, FloatVal128 value, Dragon4_Options *opt) +Dragon4_PrintFloat128(char *buffer, npy_uint32 bufferSize, FloatVal128 value, + npy_bool scientific, DigitMode digit_mode, + CutoffMode cutoff_mode, npy_int32 precision, + npy_bool sign, TrimMode trim_mode, npy_int32 digits_left, + npy_int32 digits_right, npy_int32 exp_digits) { - char *buffer = scratch->repr; - npy_uint32 bufferSize = sizeof(scratch->repr); - BigInt *bigints = scratch->bigints; - - npy_uint32 floatExponent, floatSign; + npy_uint32 floatExponent; npy_uint64 floatMantissa; npy_uint64 mantissa; @@ -2516,27 +2429,20 @@ Dragon4_PrintFloat_Intel_extended( return 0; } - /* deconstruct the floating point value (we ignore the intbit) */ - floatMantissa = value.lo & bitmask_u64(63); - floatExponent = value.hi & bitmask_u32(15); - floatSign = (value.hi >> 15) & 0x1; + /* deconstruct the floating point value */ + floatExponent = GetExponent_F128(&value); + floatMantissa = GetMantissa_F128(&value); /* output the sign */ - if (floatSign != 0) { + if (IsNegative_F128(&value)) { signbit = '-'; } - else if (opt->sign) { + else if (sign) { signbit = '+'; } /* if this is a special value */ - if (floatExponent == bitmask_u32(15)) { - /* - * Note: Technically there are other special extended values defined if - * the intbit is 0, like Pseudo-Infinity, Pseudo-Nan, Quiet-NaN. We - * ignore all of these since they are not generated on modern - * processors. We treat Quiet-Nan as simply Nan. - */ + if (floatExponent == 0x7FFF) { return PrintInfNan(buffer, bufferSize, floatMantissa, 16, signbit); } /* else this is a number */ @@ -2580,668 +2486,247 @@ Dragon4_PrintFloat_Intel_extended( hasUnequalMargins = NPY_FALSE; } - BigInt_Set_uint64(&bigints[0], mantissa); - return Format_floatbits(buffer, bufferSize, bigints, exponent, - signbit, mantissaBit, hasUnequalMargins, opt); - -} - -#endif /* INTEL_EXTENDED group */ - - -#ifdef HAVE_LDOUBLE_INTEL_EXTENDED_10_BYTES_LE -/* - * Intel's 80-bit IEEE extended precision format, 80-bit storage - * - * Note: It is not clear if a long double with 10-byte storage exists on any - * system. But numpy defines NPY_FLOAT80, so if we come across it, assume it is - * an Intel extended format. - */ -static npy_uint32 -Dragon4_PrintFloat_Intel_extended80( - Dragon4_Scratch *scratch, npy_float80 *value, Dragon4_Options *opt) -{ - FloatVal128 val128; - union { - npy_float80 floatingPoint; - struct { - npy_uint64 a; - npy_uint16 b; - } integer; - } buf80; - - buf80.floatingPoint = *value; - /* Intel is little-endian */ - val128.lo = buf80.integer.a; - val128.hi = buf80.integer.b; - - return Dragon4_PrintFloat_Intel_extended(scratch, val128, opt); -} -#endif /* HAVE_LDOUBLE_INTEL_EXTENDED_10_BYTES_LE */ - -#ifdef HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE -/* Intel's 80-bit IEEE extended precision format, 96-bit storage */ -static npy_uint32 -Dragon4_PrintFloat_Intel_extended96( - Dragon4_Scratch *scratch, npy_float96 *value, Dragon4_Options *opt) -{ - FloatVal128 val128; - union { - npy_float96 floatingPoint; - struct { - npy_uint64 a; - npy_uint32 b; - } integer; - } buf96; - - buf96.floatingPoint = *value; - /* Intel is little-endian */ - val128.lo = buf96.integer.a; - val128.hi = buf96.integer.b; - - return Dragon4_PrintFloat_Intel_extended(scratch, val128, opt); -} -#endif /* HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE */ - -#ifdef HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE -/* Motorola Big-endian equivalent of the Intel-extended 96 fp format */ -static npy_uint32 -Dragon4_PrintFloat_Motorola_extended96( - Dragon4_Scratch *scratch, npy_float96 *value, Dragon4_Options *opt) -{ - FloatVal128 val128; - union { - npy_float96 floatingPoint; - struct { - npy_uint64 a; - npy_uint32 b; - } integer; - } buf96; - - buf96.floatingPoint = *value; - /* Motorola is big-endian */ - val128.lo = buf96.integer.b; - val128.hi = buf96.integer.a >> 16; - /* once again we assume the int has same endianness as the float */ - - return Dragon4_PrintFloat_Intel_extended(scratch, val128, opt); -} -#endif /* HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE */ - - -#ifdef NPY_FLOAT128 - -typedef union FloatUnion128 -{ - npy_float128 floatingPoint; - struct { - npy_uint64 a; - npy_uint64 b; - } integer; -} FloatUnion128; - -#ifdef HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE -/* Intel's 80-bit IEEE extended precision format, 128-bit storage */ -static npy_uint32 -Dragon4_PrintFloat_Intel_extended128( - Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt) -{ - FloatVal128 val128; - FloatUnion128 buf128; - - buf128.floatingPoint = *value; - /* Intel is little-endian */ - val128.lo = buf128.integer.a; - val128.hi = buf128.integer.b; - - return Dragon4_PrintFloat_Intel_extended(scratch, val128, opt); -} -#endif /* HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE */ - -#if defined(HAVE_LDOUBLE_IEEE_QUAD_LE) || defined(HAVE_LDOUBLE_IEEE_QUAD_BE) -/* - * IEEE binary128 floating-point format - * - * sign: 1 bit - * exponent: 15 bits - * mantissa: 112 bits - * - * Currently binary128 format exists on only a few CPUs, such as on the POWER9 - * arch or aarch64. Because of this, this code has not been extensively tested. - * I am not sure if the arch also supports uint128, and C does not seem to - * support int128 literals. So we use uint64 to do manipulation. - */ -static npy_uint32 -Dragon4_PrintFloat_IEEE_binary128( - Dragon4_Scratch *scratch, FloatVal128 val128, Dragon4_Options *opt) -{ - char *buffer = scratch->repr; - npy_uint32 bufferSize = sizeof(scratch->repr); - BigInt *bigints = scratch->bigints; - - npy_uint32 floatExponent, floatSign; - - npy_uint64 mantissa_hi, mantissa_lo; - npy_int32 exponent; - npy_uint32 mantissaBit; - npy_bool hasUnequalMargins; - char signbit = '\0'; - - if (bufferSize == 0) { - return 0; - } - - if (bufferSize == 1) { - buffer[0] = '\0'; - return 0; - } - - mantissa_hi = val128.hi & bitmask_u64(48); - mantissa_lo = val128.lo; - floatExponent = (val128.hi >> 48) & bitmask_u32(15); - floatSign = val128.hi >> 63; - - /* output the sign */ - if (floatSign != 0) { - signbit = '-'; - } - else if (opt->sign) { - signbit = '+'; - } - - /* if this is a special value */ - if (floatExponent == bitmask_u32(15)) { - npy_uint64 mantissa_zero = mantissa_hi == 0 && mantissa_lo == 0; - return PrintInfNan(buffer, bufferSize, !mantissa_zero, 16, signbit); - } - /* else this is a number */ - - /* factor the value into its parts */ - if (floatExponent != 0) { - /* - * normal - * The floating point equation is: - * value = (1 + mantissa/2^112) * 2 ^ (exponent-16383) - * We convert the integer equation by factoring a 2^112 out of the - * exponent - * value = (1 + mantissa/2^112) * 2^112 * 2 ^ (exponent-16383-112) - * value = (2^112 + mantissa) * 2 ^ (exponent-16383-112) - * Because of the implied 1 in front of the mantissa we have 112 bits of - * precision. - * m = (2^112 + mantissa) - * e = (exponent-16383+1-112) - * - * Adding 2^112 to the mantissa is the same as adding 2^48 to the hi - * 64 bit part. - */ - mantissa_hi = (1ull << 48) | mantissa_hi; - /* mantissa_lo is unchanged */ - exponent = floatExponent - 16383 - 112; - mantissaBit = 112; - hasUnequalMargins = (floatExponent != 1) && (mantissa_hi == 0 && - mantissa_lo == 0); + /* format the value */ + if (scientific) { + return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit, + mantissaBit, hasUnequalMargins, digit_mode, + precision, trim_mode, digits_left, exp_digits); } else { - /* - * subnormal - * The floating point equation is: - * value = (mantissa/2^112) * 2 ^ (1-16383) - * We convert the integer equation by factoring a 2^112 out of the - * exponent - * value = (mantissa/2^112) * 2^112 * 2 ^ (1-16383-112) - * value = mantissa * 2 ^ (1-16383-112) - * We have up to 112 bits of precision. - * m = (mantissa) - * e = (1-16383-112) - */ - exponent = 1 - 16383 - 112; - mantissaBit = LogBase2_128(mantissa_hi, mantissa_lo); - hasUnequalMargins = NPY_FALSE; + return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit, + mantissaBit, hasUnequalMargins, digit_mode, + cutoff_mode, precision, trim_mode, + digits_left, digits_right); } - - BigInt_Set_2x_uint64(&bigints[0], mantissa_hi, mantissa_lo); - return Format_floatbits(buffer, bufferSize, bigints, exponent, - signbit, mantissaBit, hasUnequalMargins, opt); } -#if defined(HAVE_LDOUBLE_IEEE_QUAD_LE) -static npy_uint32 -Dragon4_PrintFloat_IEEE_binary128_le( - Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt) -{ - FloatVal128 val128; - FloatUnion128 buf128; - - buf128.floatingPoint = *value; - val128.lo = buf128.integer.a; - val128.hi = buf128.integer.b; - - return Dragon4_PrintFloat_IEEE_binary128(scratch, val128, opt); -} -#endif /* HAVE_LDOUBLE_IEEE_QUAD_LE */ - -#if defined(HAVE_LDOUBLE_IEEE_QUAD_BE) -/* - * This function is untested, very few, if any, architectures implement - * big endian IEEE binary128 floating point. - */ -static npy_uint32 -Dragon4_PrintFloat_IEEE_binary128_be( - Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt) -{ - FloatVal128 val128; - FloatUnion128 buf128; - - buf128.floatingPoint = *value; - val128.lo = buf128.integer.b; - val128.hi = buf128.integer.a; - - return Dragon4_PrintFloat_IEEE_binary128(scratch, val128, opt); -} -#endif /* HAVE_LDOUBLE_IEEE_QUAD_BE */ - -#endif /* HAVE_LDOUBLE_IEEE_QUAD_LE | HAVE_LDOUBLE_IEEE_BE*/ - -#if (defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) || \ - defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE)) -/* - * IBM extended precision 128-bit floating-point format, aka IBM double-double - * - * IBM's double-double type is a pair of IEEE binary64 values, which you add - * together to get a total value. The exponents are arranged so that the lower - * double is about 2^52 times smaller than the high one, and the nearest - * float64 value is simply the upper double, in which case the pair is - * considered "normalized" (not to confuse with "normal" and "subnormal" - * binary64 values). We assume normalized values. You can see the glibc's - * printf on ppc does so too by constructing un-normalized values to get - * strange behavior from the OS printf: - * - * >>> from numpy.core._multiarray_tests import format_float_OSprintf_g - * >>> x = np.array([0.3,0.3], dtype='f8').view('f16')[0] - * >>> format_float_OSprintf_g(x, 2) - * 0.30 - * >>> format_float_OSprintf_g(2*x, 2) - * 1.20 - * - * If we don't assume normalization, x should really print as 0.6. - * - * For normalized values gcc assumes that the total mantissa is no - * more than 106 bits (53+53), so we can drop bits from the second double which - * would be pushed past 106 when left-shifting by its exponent, as happens - * sometimes. (There has been debate about this, see - * https://gcc.gnu.org/bugzilla/show_bug.cgi?format=multiple&id=70117, - * https://sourceware.org/bugzilla/show_bug.cgi?id=22752 ) - * - * Note: This function is for the IBM-double-double which is a pair of IEEE - * binary64 floats, like on ppc64 systems. This is *not* the hexadecimal - * IBM-double-double type, which is a pair of IBM hexadecimal64 floats. - * - * See also: - * https://gcc.gnu.org/wiki/Ieee128PowerPCA - * https://www.ibm.com/support/knowledgecenter/en/ssw_aix_71/com.ibm.aix.genprogc/128bit_long_double_floating-point_datatype.htm - */ -static npy_uint32 -Dragon4_PrintFloat_IBM_double_double( - Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt) +PyObject * +Dragon4_Positional_AnySize(void *val, size_t size, DigitMode digit_mode, + CutoffMode cutoff_mode, int precision, int sign, + TrimMode trim, int pad_left, int pad_right) { - char *buffer = scratch->repr; - npy_uint32 bufferSize = sizeof(scratch->repr); - BigInt *bigints = scratch->bigints; - + /* + * Use a very large buffer in case anyone tries to output a large numberG. + * 16384 should be enough to uniquely print any float128, which goes up + * to about 10^4932 */ + static char repr[16384]; FloatVal128 val128; +#ifdef NPY_FLOAT80 + FloatUnion80 buf80;; +#endif +#ifdef NPY_FLOAT96 + FloatUnion96 buf96; +#endif +#ifdef NPY_FLOAT128 FloatUnion128 buf128; +#endif - npy_uint32 floatExponent1, floatExponent2; - npy_uint64 floatMantissa1, floatMantissa2; - npy_uint32 floatSign1, floatSign2; - - npy_uint64 mantissa1, mantissa2; - npy_int32 exponent1, exponent2; - int shift; - npy_uint32 mantissaBit; - npy_bool hasUnequalMargins; - char signbit = '\0'; - - if (bufferSize == 0) { - return 0; - } - - if (bufferSize == 1) { - buffer[0] = '\0'; - return 0; - } - - /* The high part always comes before the low part, regardless of the - * endianness of the system. */ - buf128.floatingPoint = *value; - val128.hi = buf128.integer.a; - val128.lo = buf128.integer.b; - - /* deconstruct the floating point values */ - floatMantissa1 = val128.hi & bitmask_u64(52); - floatExponent1 = (val128.hi >> 52) & bitmask_u32(11); - floatSign1 = (val128.hi >> 63) != 0; - - floatMantissa2 = val128.lo & bitmask_u64(52); - floatExponent2 = (val128.lo >> 52) & bitmask_u32(11); - floatSign2 = (val128.lo >> 63) != 0; - - /* output the sign using 1st float's sign */ - if (floatSign1) { - signbit = '-'; - } - else if (opt->sign) { - signbit = '+'; - } - - /* we only need to look at the first float for inf/nan */ - if (floatExponent1 == bitmask_u32(11)) { - return PrintInfNan(buffer, bufferSize, floatMantissa1, 13, signbit); - } - - /* else this is a number */ - - /* Factor the 1st value into its parts, see binary64 for comments. */ - if (floatExponent1 == 0) { - /* - * If the first number is a subnormal value, the 2nd has to be 0 for - * the float128 to be normalized, so we can ignore it. In this case - * the float128 only has the precision of a single binary64 value. - */ - mantissa1 = floatMantissa1; - exponent1 = 1 - 1023 - 52; - mantissaBit = LogBase2_64(mantissa1); - hasUnequalMargins = NPY_FALSE; - - BigInt_Set_uint64(&bigints[0], mantissa1); - } - else { - mantissa1 = (1ull << 52) | floatMantissa1; - exponent1 = floatExponent1 - 1023 - 52; - mantissaBit = 52 + 53; - - /* - * Computing hasUnequalMargins and mantissaBit: - * This is a little trickier than for IEEE formats. - * - * When both doubles are "normal" it is clearer since we can think of - * it as an IEEE type with a 106 bit mantissa. This value can never - * have "unequal" margins because of the implied 1 bit in the 2nd - * value. (unequal margins only happen when the mantissa has a value - * like "10000000000...", all zeros except the implied bit at the - * start, since the next lowest number has a different exponent). - * mantissaBits will always be 52+53 in this case. - * - * If the 1st number is a very small normal, and the 2nd is subnormal - * and not 2^52 times smaller, the number behaves like a subnormal - * overall, where the upper number just adds some bits on the left. - * Like usual subnormals, it has "equal" margins. The slightly tricky - * thing is that the number of mantissaBits varies. It will be 52 - * (from lower double) plus a variable number depending on the upper - * number's exponent. We recompute the number of bits in the shift - * calculation below, because the shift will be equal to the number of - * lost bits. - * - * We can get unequal margins only if the first value has all-0 - * mantissa (except implied bit), and the second value is exactly 0. As - * a special exception the smallest normal value (smallest exponent, 0 - * mantissa) should have equal margins, since it is "next to" a - * subnormal value. - */ - - /* factor the 2nd value into its parts */ - if (floatExponent2 != 0) { - mantissa2 = (1ull << 52) | floatMantissa2; - exponent2 = floatExponent2 - 1023 - 52; - hasUnequalMargins = NPY_FALSE; - } - else { - /* shift exp by one so that leading mantissa bit is still bit 53 */ - mantissa2 = floatMantissa2 << 1; - exponent2 = - 1023 - 52; - hasUnequalMargins = (floatExponent1 != 1) && (floatMantissa1 == 0) - && (floatMantissa2 == 0); - } - - /* - * The 2nd val's exponent might not be exactly 52 smaller than the 1st, - * it can vary a little bit. So do some shifting of the low mantissa, - * so that the total mantissa is equivalent to bits 53 to 0 of the - * first double immediately followed by bits 53 to 0 of the second. - */ - shift = exponent1 - exponent2 - 53; - if (shift > 0) { - /* shift more than 64 is undefined behavior */ - mantissa2 = shift < 64 ? mantissa2 >> shift : 0; - } - else if (shift < 0) { - /* - * This only happens if the 2nd value is subnormal. - * We expect that shift > -64, but check it anyway - */ - mantissa2 = -shift < 64 ? mantissa2 << -shift : 0; - } - - /* - * If the low double is a different sign from the high double, - * rearrange so that the total mantissa is the sum of the two - * mantissas, instead of a subtraction. - * hi - lo -> (hi-1) + (1-lo), where lo < 1 - */ - if (floatSign1 != floatSign2 && mantissa2 != 0) { - mantissa1--; - mantissa2 = (1ull << 53) - mantissa2; - } - - /* - * Compute the number of bits if we are in the subnormal range. - * The value "shift" happens to be exactly the number of lost bits. - * Also, shift the bits so that the least significant bit is at - * bit position 0, like a typical subnormal. After this exponent1 - * should always be 2^-1022 - */ - if (shift < 0) { - mantissa2 = (mantissa2 >> -shift) | (mantissa1 << (53 + shift)); - mantissa1 = mantissa1 >> -shift; - mantissaBit = mantissaBit -(-shift); - exponent1 -= shift; - DEBUG_ASSERT(exponent1 == -1022); - } - - /* - * set up the BigInt mantissa, by shifting the parts as needed - * We can use | instead of + since the mantissas should not overlap - */ - BigInt_Set_2x_uint64(&bigints[0], mantissa1 >> 11, - (mantissa1 << 53) | (mantissa2)); - exponent1 = exponent1 - 53; + switch (size) { + case 2: + Dragon4_PrintFloat16(repr, sizeof(repr), *(npy_float16*)val, + 0, digit_mode, cutoff_mode, precision, + sign, trim, pad_left, pad_right, -1); + break; + case 4: + Dragon4_PrintFloat32(repr, sizeof(repr), *(npy_float32*)val, + 0, digit_mode, cutoff_mode, precision, + sign, trim, pad_left, pad_right, -1); + break; + case 8: + Dragon4_PrintFloat64(repr, sizeof(repr), *(npy_float64*)val, + 0, digit_mode, cutoff_mode, precision, + sign, trim, pad_left, pad_right, -1); + break; +#ifdef NPY_FLOAT80 + case 10: + buf80.floatingPoint = *(npy_float80*)val; + val128.integer[0] = buf80.integer.a; + val128.integer[1] = buf80.integer.b; + Dragon4_PrintFloat128(repr, sizeof(repr), val128, + 0, digit_mode, cutoff_mode, precision, + sign, trim, pad_left, pad_right, -1); + break; +#endif +#ifdef NPY_FLOAT96 + case 12: + buf96.floatingPoint = *(npy_float96*)val; + val128.integer[0] = buf96.integer.a; + val128.integer[1] = buf96.integer.b; + Dragon4_PrintFloat128(repr, sizeof(repr), val128, + 0, digit_mode, cutoff_mode, precision, + sign, trim, pad_left, pad_right, -1); + break; +#endif +#ifdef NPY_FLOAT128 + case 16: + buf128.floatingPoint = *(npy_float128*)val; + val128.integer[0] = buf128.integer.a; + val128.integer[1] = buf128.integer.b; + Dragon4_PrintFloat128(repr, sizeof(repr), val128, + 0, digit_mode, cutoff_mode, precision, + sign, trim, pad_left, pad_right, -1); + break; +#endif + default: + PyErr_Format(PyExc_ValueError, "unexpected itemsize %zu", size); + return NULL; } - return Format_floatbits(buffer, bufferSize, bigints, exponent1, - signbit, mantissaBit, hasUnequalMargins, opt); + return PyUString_FromString(repr); } -#endif /* HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE | HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE */ - -#endif /* NPY_FLOAT128 */ - - -/* - * Here we define two Dragon4 entry functions for each type. One of them - * accepts the args in a Dragon4_Options struct for convenience, the - * other enumerates only the necessary parameters. - * - * Use a very large string buffer in case anyone tries to output a large number. - * 16384 should be enough to exactly print the integer part of any float128, - * which goes up to about 10^4932. The Dragon4_scratch struct provides a string - * buffer of this size. - */ -#define make_dragon4_typefuncs_inner(Type, npy_type, format) \ -\ -PyObject *\ -Dragon4_Positional_##Type##_opt(npy_type *val, Dragon4_Options *opt)\ -{\ - PyObject *ret;\ - Dragon4_Scratch *scratch = get_dragon4_bigint_scratch();\ - if (scratch == NULL) {\ - return NULL;\ - }\ - if (Dragon4_PrintFloat_##format(scratch, val, opt) < 0) {\ - free_dragon4_bigint_scratch(scratch);\ - return NULL;\ - }\ - ret = PyUString_FromString(scratch->repr);\ - free_dragon4_bigint_scratch(scratch);\ - return ret;\ -}\ -\ -PyObject *\ -Dragon4_Positional_##Type(npy_type *val, DigitMode digit_mode,\ - CutoffMode cutoff_mode, int precision,\ - int sign, TrimMode trim, int pad_left, int pad_right)\ -{\ - Dragon4_Options opt;\ - \ - opt.scientific = 0;\ - opt.digit_mode = digit_mode;\ - opt.cutoff_mode = cutoff_mode;\ - opt.precision = precision;\ - opt.sign = sign;\ - opt.trim_mode = trim;\ - opt.digits_left = pad_left;\ - opt.digits_right = pad_right;\ - opt.exp_digits = -1;\ -\ - return Dragon4_Positional_##Type##_opt(val, &opt);\ -}\ -\ -PyObject *\ -Dragon4_Scientific_##Type##_opt(npy_type *val, Dragon4_Options *opt)\ -{\ - PyObject *ret;\ - Dragon4_Scratch *scratch = get_dragon4_bigint_scratch();\ - if (scratch == NULL) {\ - return NULL;\ - }\ - if (Dragon4_PrintFloat_##format(scratch, val, opt) < 0) {\ - free_dragon4_bigint_scratch(scratch);\ - return NULL;\ - }\ - ret = PyUString_FromString(scratch->repr);\ - free_dragon4_bigint_scratch(scratch);\ - return ret;\ -}\ -PyObject *\ -Dragon4_Scientific_##Type(npy_type *val, DigitMode digit_mode, int precision,\ - int sign, TrimMode trim, int pad_left, int exp_digits)\ -{\ - Dragon4_Options opt;\ -\ - opt.scientific = 1;\ - opt.digit_mode = digit_mode;\ - opt.cutoff_mode = CutoffMode_TotalLength;\ - opt.precision = precision;\ - opt.sign = sign;\ - opt.trim_mode = trim;\ - opt.digits_left = pad_left;\ - opt.digits_right = -1;\ - opt.exp_digits = exp_digits;\ -\ - return Dragon4_Scientific_##Type##_opt(val, &opt);\ -} - -#define make_dragon4_typefuncs(Type, npy_type, format) \ - make_dragon4_typefuncs_inner(Type, npy_type, format) - -make_dragon4_typefuncs(Half, npy_half, NPY_HALF_BINFMT_NAME) -make_dragon4_typefuncs(Float, npy_float, NPY_FLOAT_BINFMT_NAME) -make_dragon4_typefuncs(Double, npy_double, NPY_DOUBLE_BINFMT_NAME) -make_dragon4_typefuncs(LongDouble, npy_longdouble, NPY_LONGDOUBLE_BINFMT_NAME) - -#undef make_dragon4_typefuncs -#undef make_dragon4_typefuncs_inner - PyObject * Dragon4_Positional(PyObject *obj, DigitMode digit_mode, CutoffMode cutoff_mode, int precision, int sign, TrimMode trim, int pad_left, int pad_right) { - npy_double val; - Dragon4_Options opt; - - opt.scientific = 0; - opt.digit_mode = digit_mode; - opt.cutoff_mode = cutoff_mode; - opt.precision = precision; - opt.sign = sign; - opt.trim_mode = trim; - opt.digits_left = pad_left; - opt.digits_right = pad_right; - opt.exp_digits = -1; + double val; if (PyArray_IsScalar(obj, Half)) { npy_half x = ((PyHalfScalarObject *)obj)->obval; - return Dragon4_Positional_Half_opt(&x, &opt); + return Dragon4_Positional_AnySize(&x, sizeof(npy_half), + digit_mode, cutoff_mode, precision, + sign, trim, pad_left, pad_right); } else if (PyArray_IsScalar(obj, Float)) { npy_float x = ((PyFloatScalarObject *)obj)->obval; - return Dragon4_Positional_Float_opt(&x, &opt); + return Dragon4_Positional_AnySize(&x, sizeof(npy_float), + digit_mode, cutoff_mode, precision, + sign, trim, pad_left, pad_right); } else if (PyArray_IsScalar(obj, Double)) { npy_double x = ((PyDoubleScalarObject *)obj)->obval; - return Dragon4_Positional_Double_opt(&x, &opt); + return Dragon4_Positional_AnySize(&x, sizeof(npy_double), + digit_mode, cutoff_mode, precision, + sign, trim, pad_left, pad_right); } else if (PyArray_IsScalar(obj, LongDouble)) { npy_longdouble x = ((PyLongDoubleScalarObject *)obj)->obval; - return Dragon4_Positional_LongDouble_opt(&x, &opt); + return Dragon4_Positional_AnySize(&x, sizeof(npy_longdouble), + digit_mode, cutoff_mode, precision, + sign, trim, pad_left, pad_right); } val = PyFloat_AsDouble(obj); if (PyErr_Occurred()) { return NULL; } - return Dragon4_Positional_Double_opt(&val, &opt); + return Dragon4_Positional_AnySize(&val, sizeof(double), + digit_mode, cutoff_mode, precision, + sign, trim, pad_left, pad_right); +} + +PyObject * +Dragon4_Scientific_AnySize(void *val, size_t size, DigitMode digit_mode, + int precision, int sign, TrimMode trim, + int pad_left, int exp_digits) +{ + /* use a very large buffer in case anyone tries to output a large precision */ + static char repr[4096]; + FloatVal128 val128; +#ifdef NPY_FLOAT80 + FloatUnion80 buf80;; +#endif +#ifdef NPY_FLOAT96 + FloatUnion96 buf96; +#endif +#ifdef NPY_FLOAT128 + FloatUnion128 buf128; +#endif + + /* dummy, is ignored in scientific mode */ + CutoffMode cutoff_mode = CutoffMode_TotalLength; + + switch (size) { + case 2: + Dragon4_PrintFloat16(repr, sizeof(repr), *(npy_float16*)val, + 1, digit_mode, cutoff_mode, precision, sign, + trim, pad_left, -1, exp_digits); + break; + case 4: + Dragon4_PrintFloat32(repr, sizeof(repr), *(npy_float32*)val, + 1, digit_mode, cutoff_mode, precision, sign, + trim, pad_left, -1, exp_digits); + break; + case 8: + Dragon4_PrintFloat64(repr, sizeof(repr), *(npy_float64*)val, + 1, digit_mode, cutoff_mode, precision, sign, + trim, pad_left, -1, exp_digits); + break; +#ifdef NPY_FLOAT80 + case 10: + buf80.floatingPoint = *(npy_float80*)val; + val128.integer[0] = buf80.integer.a; + val128.integer[1] = buf80.integer.b; + Dragon4_PrintFloat128(repr, sizeof(repr), val128, + 1, digit_mode, cutoff_mode, precision, sign, + trim, pad_left, -1, exp_digits); + break; +#endif +#ifdef NPY_FLOAT96 + case 12: + buf96.floatingPoint = *(npy_float96*)val; + val128.integer[0] = buf96.integer.a; + val128.integer[1] = buf96.integer.b; + Dragon4_PrintFloat128(repr, sizeof(repr), val128, + 1, digit_mode, cutoff_mode, precision, sign, + trim, pad_left, -1, exp_digits); + break; +#endif +#ifdef NPY_FLOAT128 + case 16: + buf128.floatingPoint = *(npy_float128*)val; + val128.integer[0] = buf128.integer.a; + val128.integer[1] = buf128.integer.b; + Dragon4_PrintFloat128(repr, sizeof(repr), val128, + 1, digit_mode, cutoff_mode, precision, sign, + trim, pad_left, -1, exp_digits); + break; +#endif + default: + PyErr_Format(PyExc_ValueError, "unexpected itemsize %zu", size); + return NULL; + } + + return PyUString_FromString(repr); } PyObject * Dragon4_Scientific(PyObject *obj, DigitMode digit_mode, int precision, int sign, TrimMode trim, int pad_left, int exp_digits) { - npy_double val; - Dragon4_Options opt; - - opt.scientific = 1; - opt.digit_mode = digit_mode; - opt.cutoff_mode = CutoffMode_TotalLength; - opt.precision = precision; - opt.sign = sign; - opt.trim_mode = trim; - opt.digits_left = pad_left; - opt.digits_right = -1; - opt.exp_digits = exp_digits; + double val; if (PyArray_IsScalar(obj, Half)) { npy_half x = ((PyHalfScalarObject *)obj)->obval; - return Dragon4_Scientific_Half_opt(&x, &opt); + return Dragon4_Scientific_AnySize(&x, sizeof(npy_half), + digit_mode, precision, + sign, trim, pad_left, exp_digits); } else if (PyArray_IsScalar(obj, Float)) { npy_float x = ((PyFloatScalarObject *)obj)->obval; - return Dragon4_Scientific_Float_opt(&x, &opt); + return Dragon4_Scientific_AnySize(&x, sizeof(npy_float), + digit_mode, precision, + sign, trim, pad_left, exp_digits); } else if (PyArray_IsScalar(obj, Double)) { npy_double x = ((PyDoubleScalarObject *)obj)->obval; - return Dragon4_Scientific_Double_opt(&x, &opt); + return Dragon4_Scientific_AnySize(&x, sizeof(npy_double), + digit_mode, precision, + sign, trim, pad_left, exp_digits); } else if (PyArray_IsScalar(obj, LongDouble)) { npy_longdouble x = ((PyLongDoubleScalarObject *)obj)->obval; - return Dragon4_Scientific_LongDouble_opt(&x, &opt); + return Dragon4_Scientific_AnySize(&x, sizeof(npy_longdouble), + digit_mode, precision, + sign, trim, pad_left, exp_digits); } val = PyFloat_AsDouble(obj); if (PyErr_Occurred()) { return NULL; } - return Dragon4_Scientific_Double_opt(&val, &opt); + return Dragon4_Scientific_AnySize(&val, sizeof(double), + digit_mode, precision, + sign, trim, pad_left, exp_digits); } - -#undef DEBUG_ASSERT diff --git a/numpy/core/src/multiarray/dragon4.h b/numpy/core/src/multiarray/dragon4.h index 2b8b4ce..5559c51 100644 --- a/numpy/core/src/multiarray/dragon4.h +++ b/numpy/core/src/multiarray/dragon4.h @@ -40,48 +40,6 @@ #include "npy_pycompat.h" #include "numpy/arrayscalars.h" -/* Half binary format */ -#define NPY_HALF_BINFMT_NAME IEEE_binary16 - -/* Float binary format */ -#if NPY_BITSOF_FLOAT == 32 - #define NPY_FLOAT_BINFMT_NAME IEEE_binary32 -#elif NPY_BITSOF_FLOAT == 64 - #define NPY_FLOAT_BINFMT_NAME IEEE_binary64 -#else - #error No float representation defined -#endif - -/* Double binary format */ -#if NPY_BITSOF_DOUBLE == 32 - #define NPY_DOUBLE_BINFMT_NAME IEEE_binary32 -#elif NPY_BITSOF_DOUBLE == 64 - #define NPY_DOUBLE_BINFMT_NAME IEEE_binary64 -#else - #error No double representation defined -#endif - -/* LongDouble binary format */ -#if defined(HAVE_LDOUBLE_IEEE_QUAD_BE) - #define NPY_LONGDOUBLE_BINFMT_NAME IEEE_binary128_be -#elif defined(HAVE_LDOUBLE_IEEE_QUAD_LE) - #define NPY_LONGDOUBLE_BINFMT_NAME IEEE_binary128_le -#elif (defined(HAVE_LDOUBLE_IEEE_DOUBLE_LE) || \ - defined(HAVE_LDOUBLE_IEEE_DOUBLE_BE)) - #define NPY_LONGDOUBLE_BINFMT_NAME IEEE_binary64 -#elif defined(HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE) - #define NPY_LONGDOUBLE_BINFMT_NAME Intel_extended96 -#elif defined(HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE) - #define NPY_LONGDOUBLE_BINFMT_NAME Intel_extended128 -#elif defined(HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE) - #define NPY_LONGDOUBLE_BINFMT_NAME Motorola_extended96 -#elif (defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) || \ - defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE)) - #define NPY_LONGDOUBLE_BINFMT_NAME IBM_double_double -#else - #error No long double representation defined -#endif - typedef enum DigitMode { /* Round digits to print shortest uniquely identifiable number. */ @@ -106,23 +64,15 @@ typedef enum TrimMode TrimMode_DptZeros, /* trim trailing zeros & trailing decimal point */ } TrimMode; -#define make_dragon4_typedecl(Type, npy_type) \ - PyObject *\ - Dragon4_Positional_##Type(npy_type *val, DigitMode digit_mode,\ - CutoffMode cutoff_mode, int precision,\ - int sign, TrimMode trim, int pad_left,\ - int pad_right);\ - PyObject *\ - Dragon4_Scientific_##Type(npy_type *val, DigitMode digit_mode,\ - int precision, int sign, TrimMode trim,\ - int pad_left, int exp_digits); - -make_dragon4_typedecl(Half, npy_half) -make_dragon4_typedecl(Float, npy_float) -make_dragon4_typedecl(Double, npy_double) -make_dragon4_typedecl(LongDouble, npy_longdouble) +PyObject * +Dragon4_Positional_AnySize(void *val, size_t size, DigitMode digit_mode, + CutoffMode cutoff_mode, int precision, int sign, + TrimMode trim, int pad_left, int pad_right); -#undef make_dragon4_typedecl +PyObject * +Dragon4_Scientific_AnySize(void *val, size_t size, DigitMode digit_mode, + int precision, int sign, TrimMode trim, + int pad_left, int pad_right); PyObject * Dragon4_Positional(PyObject *obj, DigitMode digit_mode, CutoffMode cutoff_mode, diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index 42d613c..c2d4bde 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -432,7 +432,6 @@ gentype_format(PyObject *self, PyObject *args) /**begin repeat * #name = half, float, double, longdouble# - * #Name = Half, Float, Double, LongDouble# * #NAME = HALF, FLOAT, DOUBLE, LONGDOUBLE# * #type = npy_half, npy_float, npy_double, npy_longdouble# * #suff = h, f, d, l# @@ -444,12 +443,12 @@ format_@name@(@type@ val, npy_bool scientific, int pad_left, int pad_right, int exp_digits) { if (scientific) { - return Dragon4_Scientific_@Name@(&val, + return Dragon4_Scientific_AnySize(&val, sizeof(@type@), DigitMode_Unique, precision, sign, trim, pad_left, exp_digits); } else { - return Dragon4_Positional_@Name@(&val, + return Dragon4_Positional_AnySize(&val, sizeof(@type@), DigitMode_Unique, CutoffMode_TotalLength, precision, sign, trim, pad_left, pad_right); } diff --git a/numpy/core/src/npymath/ieee754.c.src b/numpy/core/src/npymath/ieee754.c.src index 170471f..0370ea6 100644 --- a/numpy/core/src/npymath/ieee754.c.src +++ b/numpy/core/src/npymath/ieee754.c.src @@ -183,7 +183,6 @@ static npy_longdouble _nextl(npy_longdouble x, int p) { npy_int64 hx,ihx,ilx; npy_uint64 lx; - npy_longdouble u; GET_LDOUBLE_WORDS64(hx, lx, x); ihx = hx & 0x7fffffffffffffffLL; /* |hx| */ @@ -194,6 +193,7 @@ static npy_longdouble _nextl(npy_longdouble x, int p) return x; /* signal the nan */ } if(ihx == 0 && ilx == 0) { /* x == 0 */ + npy_longdouble u; SET_LDOUBLE_WORDS64(x, p, 0ULL);/* return +-minsubnormal */ u = x * x; if (u == x) { @@ -203,6 +203,7 @@ static npy_longdouble _nextl(npy_longdouble x, int p) } } + npy_longdouble u; if(p < 0) { /* p < 0, x -= ulp */ if((hx==0xffefffffffffffffLL)&&(lx==0xfc8ffffffffffffeLL)) return x+x; /* overflow, return -inf */ diff --git a/numpy/core/src/npymath/npy_math_private.h b/numpy/core/src/npymath/npy_math_private.h index e4a919d..d75b9e9 100644 --- a/numpy/core/src/npymath/npy_math_private.h +++ b/numpy/core/src/npymath/npy_math_private.h @@ -287,7 +287,8 @@ do { \ typedef npy_uint32 ldouble_man_t; typedef npy_uint32 ldouble_exp_t; typedef npy_uint32 ldouble_sign_t; -#elif defined(HAVE_LDOUBLE_IEEE_DOUBLE_BE) +#elif defined(HAVE_LDOUBLE_IEEE_DOUBLE_16_BYTES_BE) || \ + defined(HAVE_LDOUBLE_IEEE_DOUBLE_BE) /* 64 bits IEEE double precision aligned on 16 bytes: used by ppc arch on * Mac OS X */ @@ -434,8 +435,8 @@ do { \ typedef npy_uint32 ldouble_sign_t; #endif -#if !defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) && \ - !defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) +#if !defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_BE) && \ + !defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_LE) /* Get the sign bit of x. x should be of type IEEEl2bitsrep */ #define GET_LDOUBLE_SIGN(x) \ (((x).a[LDBL_SIGN_INDEX] & LDBL_SIGN_MASK) >> LDBL_SIGN_SHIFT) @@ -476,7 +477,7 @@ do { \ ((x).a[LDBL_MANH_INDEX] & ~LDBL_MANH_MASK) | \ (((IEEEl2bitsrep_part)(v) << LDBL_MANH_SHIFT) & LDBL_MANH_MASK)) -#endif /* !HAVE_LDOUBLE_DOUBLE_DOUBLE_* */ +#endif /* #ifndef HAVE_LDOUBLE_DOUBLE_DOUBLE_BE */ /* * Those unions are used to convert a pointer of npy_cdouble to native C99 diff --git a/numpy/core/src/private/npy_config.h b/numpy/core/src/private/npy_config.h index 8143e77..107b3cb 100644 --- a/numpy/core/src/private/npy_config.h +++ b/numpy/core/src/private/npy_config.h @@ -15,8 +15,7 @@ * amd64 is not harmed much by the bloat as the system provides 16 byte * alignment by default. */ -#if (defined NPY_CPU_X86 || defined _WIN32 || defined NPY_CPU_ARMEL_AARCH32 ||\ - defined NPY_CPU_ARMEB_AARCH32) +#if (defined NPY_CPU_X86 || defined _WIN32) #define NPY_MAX_COPY_ALIGNMENT 8 #else #define NPY_MAX_COPY_ALIGNMENT 16 diff --git a/numpy/core/src/private/npy_fpmath.h b/numpy/core/src/private/npy_fpmath.h index dbb3fb2..86b9cf3 100644 --- a/numpy/core/src/private/npy_fpmath.h +++ b/numpy/core/src/private/npy_fpmath.h @@ -7,24 +7,45 @@ #include "numpy/npy_cpu.h" #include "numpy/npy_common.h" +#ifdef NPY_OS_DARWIN + /* This hardcoded logic is fragile, but universal builds makes it + * difficult to detect arch-specific features */ + + /* MAC OS X < 10.4 and gcc < 4 does not support proper long double, and + * is the same as double on those platforms */ + #if NPY_BITSOF_LONGDOUBLE == NPY_BITSOF_DOUBLE + /* This assumes that FPU and ALU have the same endianness */ + #if NPY_BYTE_ORDER == NPY_LITTLE_ENDIAN + #define HAVE_LDOUBLE_IEEE_DOUBLE_LE + #elif NPY_BYTE_ORDER == NPY_BIG_ENDIAN + #define HAVE_LDOUBLE_IEEE_DOUBLE_BE + #else + #error Endianness undefined ? + #endif + #else + #if defined(NPY_CPU_X86) + #define HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE + #elif defined(NPY_CPU_AMD64) + #define HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE + #elif defined(NPY_CPU_PPC) || defined(NPY_CPU_PPC64) + #define HAVE_LDOUBLE_IEEE_DOUBLE_16_BYTES_BE + #elif defined(NPY_CPU_PPC64LE) + #define HAVE_LDOUBLE_IEEE_DOUBLE_16_BYTES_LE + #endif + #endif +#endif + #if !(defined(HAVE_LDOUBLE_IEEE_QUAD_BE) || \ defined(HAVE_LDOUBLE_IEEE_QUAD_LE) || \ defined(HAVE_LDOUBLE_IEEE_DOUBLE_LE) || \ defined(HAVE_LDOUBLE_IEEE_DOUBLE_BE) || \ + defined(HAVE_LDOUBLE_IEEE_DOUBLE_16_BYTES_BE) || \ defined(HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE) || \ defined(HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE) || \ defined(HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE) || \ - defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) || \ - defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE)) + defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_BE) || \ + defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_LE)) #error No long double representation defined #endif -/* for back-compat, also keep old name for double-double */ -#ifdef HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE - #define HAVE_LDOUBLE_DOUBLE_DOUBLE_LE -#endif -#ifdef HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE - #define HAVE_LDOUBLE_DOUBLE_DOUBLE_BE -#endif - #endif diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index b144aec..52793d4 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -96,7 +96,7 @@ class TestRegression(object): ca = np.char.array(np.arange(1000, 1010), itemsize=4) ca.dump(f) f.seek(0) - ca = np.load(f, allow_pickle=True) + ca = np.load(f) f.close() def test_noncontiguous_fill(self): diff --git a/numpy/core/tests/test_scalarprint.py b/numpy/core/tests/test_scalarprint.py index 10fbb8b..6177461 100644 --- a/numpy/core/tests/test_scalarprint.py +++ b/numpy/core/tests/test_scalarprint.py @@ -5,9 +5,8 @@ from __future__ import division, absolute_import, print_function import numpy as np -from numpy.testing import assert_, assert_equal, run_module_suite, dec +from numpy.testing import assert_, assert_equal, run_module_suite import sys, tempfile -import platform class TestRealScalars(object): @@ -218,66 +217,6 @@ class TestRealScalars(object): "1.2" if tp != np.float16 else "1.2002") assert_equal(fpos(tp('1.'), trim='-'), "1") - @dec.skipif(not platform.machine().startswith("ppc64"), - "only applies to ppc float128 values") - def test_ppc64_ibm_double_double128(self): - # check that the precision decreases once we get into the subnormal - # range. Unlike float64, this starts around 1e-292 instead of 1e-308, - # which happens when the first double is normal and the second is - # subnormal. - x = np.float128('2.123123123123123123123123123123123e-286') - got = [str(x/np.float128('2e' + str(i))) for i in range(0,40)] - expected = [ - "1.06156156156156156156156156156157e-286", - "1.06156156156156156156156156156158e-287", - "1.06156156156156156156156156156159e-288", - "1.0615615615615615615615615615616e-289", - "1.06156156156156156156156156156157e-290", - "1.06156156156156156156156156156156e-291", - "1.0615615615615615615615615615616e-292", - "1.0615615615615615615615615615615e-293", - "1.061561561561561561561561561562e-294", - "1.06156156156156156156156156155e-295", - "1.0615615615615615615615615616e-296", - "1.06156156156156156156156156e-297", - "1.06156156156156156156156157e-298", - "1.0615615615615615615615616e-299", - "1.06156156156156156156156e-300", - "1.06156156156156156156155e-301", - "1.0615615615615615615616e-302", - "1.061561561561561561562e-303", - "1.06156156156156156156e-304", - "1.0615615615615615618e-305", - "1.06156156156156156e-306", - "1.06156156156156157e-307", - "1.0615615615615616e-308", - "1.06156156156156e-309", - "1.06156156156157e-310", - "1.0615615615616e-311", - "1.06156156156e-312", - "1.06156156154e-313", - "1.0615615616e-314", - "1.06156156e-315", - "1.06156155e-316", - "1.061562e-317", - "1.06156e-318", - "1.06155e-319", - "1.0617e-320", - "1.06e-321", - "1.04e-322", - "1e-323", - "0.0", - "0.0"] - assert_equal(got, expected) - - # Note: we follow glibc behavior, but it (or gcc) might not be right. - # In particular we can get two values that print the same but are not - # equal: - a = np.float128('2')/np.float128('3') - b = np.float128(str(a)) - assert_equal(str(a), str(b)) - assert_(a != b) - def float32_roundtrip(self): # gh-9360 x = np.float32(1024 - 2**-14) diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index 09d1bbe..bebeddc 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -2515,10 +2515,8 @@ def test_nextafter(): def test_nextafterf(): return _test_nextafter(np.float32) -@dec.skipif(np.finfo(np.double) == np.finfo(np.longdouble), - "long double is same as double") -@dec.knownfailureif(platform.machine().startswith("ppc64"), - "IBM double double") +@dec.knownfailureif(sys.platform == 'win32', + "Long double support buggy on win32, ticket 1664.") def test_nextafterl(): return _test_nextafter(np.longdouble) @@ -2546,10 +2544,8 @@ def test_spacing(): def test_spacingf(): return _test_spacing(np.float32) -@dec.skipif(np.finfo(np.double) == np.finfo(np.longdouble), - "long double is same as double") -@dec.knownfailureif(platform.machine().startswith("ppc64"), - "IBM double double") +@dec.knownfailureif(sys.platform == 'win32', + "Long double support buggy on win32, ticket 1664.") def test_spacingl(): return _test_spacing(np.longdouble) diff --git a/numpy/f2py/setup.py b/numpy/f2py/setup.py index 0d47580..3204129 100644 --- a/numpy/f2py/setup.py +++ b/numpy/f2py/setup.py @@ -55,9 +55,9 @@ def configuration(parent_package='', top_path=None): config.make_svn_version_py() def generate_f2py_py(build_dir): - f2py_exe = 'f2py' + str(sys.version_info.major) - if sys.executable.endswith('.exe'): - f2py_exe = f2py_exe + '.py' + f2py_exe = 'f2py' + os.path.basename(sys.executable)[6:] + if f2py_exe[-4:] == '.exe': + f2py_exe = f2py_exe[:-4] + '.py' if 'bdist_wininst' in sys.argv and f2py_exe[-3:] != '.py': f2py_exe = f2py_exe + '.py' target = os.path.join(build_dir, f2py_exe) diff --git a/numpy/lib/format.py b/numpy/lib/format.py index e6144b3..363bb21 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -602,7 +602,7 @@ def write_array(fp, array, version=None, allow_pickle=True, pickle_kwargs=None): fp.write(chunk.tobytes('C')) -def read_array(fp, allow_pickle=False, pickle_kwargs=None): +def read_array(fp, allow_pickle=True, pickle_kwargs=None): """ Read an array from an NPY file. @@ -612,11 +612,7 @@ def read_array(fp, allow_pickle=False, pickle_kwargs=None): If this is not a real file object, then this may take extra memory and time. allow_pickle : bool, optional - Whether to allow writing pickled data. Default: False - - .. versionchanged:: 1.14.3 - Made default False in response to CVE-2019-6446. - + Whether to allow reading pickled data. Default: True pickle_kwargs : dict Additional keyword arguments to pass to pickle.load. These are only useful when loading object arrays saved on Python 2 when using diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index ef84d68..76b135c 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -130,11 +130,7 @@ class NpzFile(object): An object on which attribute can be performed as an alternative to getitem access on the `NpzFile` instance itself. allow_pickle : bool, optional - Allow loading pickled data. Default: False - - .. versionchanged:: 1.14.3 - Made default False in response to CVE-2019-6446. - + Allow loading pickled data. Default: True pickle_kwargs : dict, optional Additional keyword arguments to pass on to pickle.load. These are only useful when loading object arrays saved on @@ -170,7 +166,7 @@ class NpzFile(object): """ - def __init__(self, fid, own_fid=False, allow_pickle=False, + def __init__(self, fid, own_fid=False, allow_pickle=True, pickle_kwargs=None): # Import is postponed to here since zipfile depends on gzip, an # optional component of the so-called standard library. @@ -269,7 +265,7 @@ class NpzFile(object): return self.files.__contains__(key) -def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, +def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, encoding='ASCII'): """ Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files. @@ -291,11 +287,8 @@ def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, Allow loading pickled object arrays stored in npy files. Reasons for disallowing pickles include security, as loading pickled data can execute arbitrary code. If pickles are disallowed, loading object - arrays will fail. Default: False - - .. versionchanged:: 1.14.3 - Made default False in response to CVE-2019-6446. - + arrays will fail. + Default: True fix_imports : bool, optional Only useful when loading Python 2 generated pickled files on Python 3, which includes npy/npz files containing object arrays. If `fix_imports` diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py index 04e090c..2d2b4ce 100644 --- a/numpy/lib/tests/test_format.py +++ b/numpy/lib/tests/test_format.py @@ -426,7 +426,7 @@ def roundtrip(arr): f = BytesIO() format.write_array(f, arr) f2 = BytesIO(f.getvalue()) - arr2 = format.read_array(f2, allow_pickle=True) + arr2 = format.read_array(f2) return arr2 @@ -553,7 +553,7 @@ def test_pickle_python2_python3(): path = os.path.join(data_dir, fname) for encoding in ['bytes', 'latin1']: - data_f = np.load(path, allow_pickle=True, encoding=encoding) + data_f = np.load(path, encoding=encoding) if fname.endswith('.npz'): data = data_f['x'] data_f.close() @@ -575,19 +575,16 @@ def test_pickle_python2_python3(): if sys.version_info[0] >= 3: if fname.startswith('py2'): if fname.endswith('.npz'): - data = np.load(path, allow_pickle=True) + data = np.load(path) assert_raises(UnicodeError, data.__getitem__, 'x') data.close() - data = np.load(path, allow_pickle=True, fix_imports=False, - encoding='latin1') + data = np.load(path, fix_imports=False, encoding='latin1') assert_raises(ImportError, data.__getitem__, 'x') data.close() else: - assert_raises(UnicodeError, np.load, path, - allow_pickle=True) + assert_raises(UnicodeError, np.load, path) assert_raises(ImportError, np.load, path, - allow_pickle=True, fix_imports=False, - encoding='latin1') + encoding='latin1', fix_imports=False) def test_pickle_disallow(): diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index bde2567..2daa015 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -87,7 +87,7 @@ class RoundtripTest(object): """ save_kwds = kwargs.get('save_kwds', {}) - load_kwds = kwargs.get('load_kwds', {"allow_pickle": True}) + load_kwds = kwargs.get('load_kwds', {}) file_on_disk = kwargs.get('file_on_disk', False) if file_on_disk: diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index 1b47021..708c12e 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -3278,13 +3278,18 @@ class TestMaskedArrayMethods(object): assert_equal(test.mask, mask_first.mask) # Test sort on dtype with subarray (gh-8069) - # Just check that the sort does not error, structured array subarrays - # are treated as byte strings and that leads to differing behavior - # depending on endianess and `endwith`. dt = np.dtype([('v', int, 2)]) a = a.view(dt) + mask_last = mask_last.view(dt) + mask_first = mask_first.view(dt) + test = sort(a) + assert_equal(test, mask_last) + assert_equal(test.mask, mask_last.mask) + test = sort(a, endwith=False) + assert_equal(test, mask_first) + assert_equal(test.mask, mask_first.mask) def test_argsort(self): # Test argsort