/*
* Copyright (c) 2014 Ryan Juckett
* http://www.ryanjuckett.com/
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
*
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
*
* 3. This notice may not be removed or altered from any source
* distribution.
*/
/*
* This file contains a modified version of Ryan Juckett's Dragon4
* implementation, which has been ported from C++ to C and which has
* modifications specific to printing floats in numpy.
*/
#include "dragon4.h"
#include <numpy/npy_common.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#if 0
#define DEBUG_ASSERT(stmnt) assert(stmnt)
#else
#define DEBUG_ASSERT(stmnt) do {} while(0)
#endif
/*
* Get the log base 2 of a 32-bit unsigned integer.
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogLookup
*/
static npy_uint32
LogBase2_32(npy_uint32 val)
{
static const npy_uint8 logTable[256] =
{
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
};
npy_uint32 temp;
temp = val >> 24;
if (temp) {
return 24 + logTable[temp];
}
temp = val >> 16;
if (temp) {
return 16 + logTable[temp];
}
temp = val >> 8;
if (temp) {
return 8 + logTable[temp];
}
return logTable[val];
}
static npy_uint32
LogBase2_64(npy_uint64 val)
{
npy_uint64 temp;
temp = val >> 32;
if (temp) {
return 32 + LogBase2_32((npy_uint32)temp);
}
return LogBase2_32((npy_uint32)val);
}
/*
* Maximum number of 32 bit blocks needed in high precision arithmetic to print
* out 128 bit IEEE floating point values. 1023 chosen to be large enough for
* 128 bit floats, and BigInt is exactly 4kb (nice for page/cache?)
*/
#define c_BigInt_MaxBlocks 1023
/*
* This structure stores a high precision unsigned integer. It uses a buffer of
* 32 bit integer blocks along with a length. The lowest bits of the integer
* are stored at the start of the buffer and the length is set to the minimum
* value that contains the integer. Thus, there are never any zero blocks at
* the end of the buffer.
*/
typedef struct BigInt {
npy_uint32 length;
npy_uint32 blocks[c_BigInt_MaxBlocks];
} BigInt;
/* Copy integer */
static void
BigInt_Copy(BigInt *dst, const BigInt *src)
{
npy_uint32 length = src->length;
npy_uint32 * dstp = dst->blocks;
const npy_uint32 *srcp;
for (srcp = src->blocks; srcp != src->blocks + length; ++dstp, ++srcp) {
*dstp = *srcp;
}
dst->length = length;
}
/* Basic type accessors */
static void
BigInt_Set_uint64(BigInt *i, npy_uint64 val)
{
if (val > 0xFFFFFFFF) {
i->blocks[0] = val & 0xFFFFFFFF;
i->blocks[1] = (val >> 32) & 0xFFFFFFFF;
i->length = 2;
}
else if (val != 0) {
i->blocks[0] = val & 0xFFFFFFFF;
i->length = 1;
}
else {
i->length = 0;
}
}
static void
BigInt_Set_uint32(BigInt *i, npy_uint32 val)
{
if (val != 0) {
i->blocks[0] = val;
i->length = (val != 0);
}
else {
i->length = 0;
}
}
/*
* Returns 0 if (lhs = rhs), negative if (lhs < rhs), positive if (lhs > rhs)
*/
static npy_int32
BigInt_Compare(const BigInt *lhs, const BigInt *rhs)
{
int i;
/* A bigger length implies a bigger number. */
npy_int32 lengthDiff = lhs->length - rhs->length;
if (lengthDiff != 0) {
return lengthDiff;
}
/* Compare blocks one by one from high to low. */
for (i = lhs->length - 1; i >= 0; --i) {
if (lhs->blocks[i] == rhs->blocks[i]) {
continue;
}
else if (lhs->blocks[i] > rhs->blocks[i]) {
return 1;
}
else {
return -1;
}
}
/* no blocks differed */
return 0;
}
/* result = lhs + rhs */
static void
BigInt_Add(BigInt *result, const BigInt *lhs, const BigInt *rhs)
{
/* determine which operand has the smaller length */
const BigInt *large, *small;
npy_uint64 carry = 0;
const npy_uint32 *largeCur, *smallCur, *largeEnd, *smallEnd;
npy_uint32 *resultCur;
if (lhs->length < rhs->length) {
small = lhs;
large = rhs;
}
else {
small = rhs;
large = lhs;
}
/* The output will be at least as long as the largest input */
result->length = large->length;
/* Add each block and add carry the overflow to the next block */
largeCur = large->blocks;
largeEnd = largeCur + large->length;
smallCur = small->blocks;
smallEnd = smallCur + small->length;
resultCur = result->blocks;
while (smallCur != smallEnd) {
npy_uint64 sum = carry + (npy_uint64)(*largeCur) +
(npy_uint64)(*smallCur);
carry = sum >> 32;
*resultCur = sum & 0xFFFFFFFF;
++largeCur;
++smallCur;
++resultCur;
}
/* Add the carry to any blocks that only exist in the large operand */
while (largeCur != largeEnd) {
npy_uint64 sum = carry + (npy_uint64)(*largeCur);
carry = sum >> 32;
(*resultCur) = sum & 0xFFFFFFFF;
++largeCur;
++resultCur;
}
/* If there's still a carry, append a new block */
if (carry != 0) {
DEBUG_ASSERT(carry == 1);
DEBUG_ASSERT((npy_uint32)(resultCur - result->blocks) ==
large->length && (large->length < c_BigInt_MaxBlocks));
*resultCur = 1;
result->length = large->length + 1;
}
else {
result->length = large->length;
}
}
/*
* result = lhs * rhs
*/
static void
BigInt_Multiply(BigInt *result, const BigInt *lhs, const BigInt *rhs)
{
const BigInt *large;
const BigInt *small;
npy_uint32 maxResultLen;
npy_uint32 *cur, *end, *resultStart;
const npy_uint32 *smallCur;
DEBUG_ASSERT(result != lhs && result != rhs);
/* determine which operand has the smaller length */
if (lhs->length < rhs->length) {
small = lhs;
large = rhs;
}
else {
small = rhs;
large = lhs;
}
/* set the maximum possible result length */
maxResultLen = large->length + small->length;
DEBUG_ASSERT(maxResultLen <= c_BigInt_MaxBlocks);
/* clear the result data */
for (cur = result->blocks, end = cur + maxResultLen; cur != end; ++cur) {
*cur = 0;
}
/* perform standard long multiplication for each small block */
resultStart = result->blocks;
for (smallCur = small->blocks;
smallCur != small->blocks + small->length;
++smallCur, ++resultStart) {
/*
* if non-zero, multiply against all the large blocks and add into the
* result
*/
const npy_uint32 multiplier = *smallCur;
if (multiplier != 0) {
const npy_uint32 *largeCur = large->blocks;
npy_uint32 *resultCur = resultStart;
npy_uint64 carry = 0;
do {
npy_uint64 product = (*resultCur) +
(*largeCur)*(npy_uint64)multiplier + carry;
carry = product >> 32;
*resultCur = product & 0xFFFFFFFF;
++largeCur;
++resultCur;
} while(largeCur != large->blocks + large->length);
DEBUG_ASSERT(resultCur < result->blocks + maxResultLen);
*resultCur = (npy_uint32)(carry & 0xFFFFFFFF);
}
}
/* check if the terminating block has no set bits */
if (maxResultLen > 0 && result->blocks[maxResultLen - 1] == 0) {
result->length = maxResultLen-1;
}
else {
result->length = maxResultLen;
}
}
/* result = lhs * rhs */
static void
BigInt_Multiply_int(BigInt *result, const BigInt *lhs, npy_uint32 rhs)
{
/* perform long multiplication */
npy_uint32 carry = 0;
npy_uint32 *resultCur = result->blocks;
const npy_uint32 *pLhsCur = lhs->blocks;
const npy_uint32 *pLhsEnd = lhs->blocks + lhs->length;
for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) {
npy_uint64 product = (npy_uint64)(*pLhsCur) * rhs + carry;
*resultCur = (npy_uint32)(product & 0xFFFFFFFF);
carry = product >> 32;
}
/* if there is a remaining carry, grow the array */
if (carry != 0) {
/* grow the array */
DEBUG_ASSERT(lhs->length + 1 <= c_BigInt_MaxBlocks);
*resultCur = (npy_uint32)carry;
result->length = lhs->length + 1;
}
else {
result->length = lhs->length;
}
}
/* result = in * 2 */
static void
BigInt_Multiply2(BigInt *result, const BigInt *in)
{
/* shift all the blocks by one */
npy_uint32 carry = 0;
npy_uint32 *resultCur = result->blocks;
const npy_uint32 *pLhsCur = in->blocks;
const npy_uint32 *pLhsEnd = in->blocks + in->length;
for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) {
npy_uint32 cur = *pLhsCur;
*resultCur = (cur << 1) | carry;
carry = cur >> 31;
}
if (carry != 0) {
/* grow the array */
DEBUG_ASSERT(in->length + 1 <= c_BigInt_MaxBlocks);
*resultCur = carry;
result->length = in->length + 1;
}
else {
result->length = in->length;
}
}
/* result = result * 2 */
static void
BigInt_Multiply2_inplace(BigInt *result)
{
/* shift all the blocks by one */
npy_uint32 carry = 0;
npy_uint32 *cur = result->blocks;
npy_uint32 *end = result->blocks + result->length;
for ( ; cur != end; ++cur) {
npy_uint32 tmpcur = *cur;
*cur = (tmpcur << 1) | carry;
carry = tmpcur >> 31;
}
if (carry != 0) {
/* grow the array */
DEBUG_ASSERT(result->length + 1 <= c_BigInt_MaxBlocks);
*cur = carry;
++result->length;
}
}
/* result = result * 10 */
static void
BigInt_Multiply10(BigInt *result)
{
/* multiply all the blocks */
npy_uint64 carry = 0;
npy_uint32 *cur = result->blocks;
npy_uint32 *end = result->blocks + result->length;
for ( ; cur != end; ++cur) {
npy_uint64 product = (npy_uint64)(*cur) * 10ull + carry;
(*cur) = (npy_uint32)(product & 0xFFFFFFFF);
carry = product >> 32;
}
if (carry != 0) {
/* grow the array */
DEBUG_ASSERT(result->length + 1 <= c_BigInt_MaxBlocks);
*cur = (npy_uint32)carry;
++result->length;
}
}
static npy_uint32 g_PowerOf10_U32[] =
{
1, /* 10 ^ 0 */
10, /* 10 ^ 1 */
100, /* 10 ^ 2 */
1000, /* 10 ^ 3 */
10000, /* 10 ^ 4 */
100000, /* 10 ^ 5 */
1000000, /* 10 ^ 6 */
10000000, /* 10 ^ 7 */
};
/*
* Note: This has a lot of wasted space in the big integer structures of the
* early table entries. It wouldn't be terribly hard to make the multiply
* function work on integer pointers with an array length instead of
* the BigInt struct which would allow us to store a minimal amount of
* data here.
*/
static BigInt g_PowerOf10_Big[] =
{
/* 10 ^ 8 */
{ 1, { 100000000 } },
/* 10 ^ 16 */
{ 2, { 0x6fc10000, 0x002386f2 } },
/* 10 ^ 32 */
{ 4, { 0x00000000, 0x85acef81, 0x2d6d415b, 0x000004ee, } },
/* 10 ^ 64 */
{ 7, { 0x00000000, 0x00000000, 0xbf6a1f01, 0x6e38ed64, 0xdaa797ed,
0xe93ff9f4, 0x00184f03, } },
/* 10 ^ 128 */
{ 14, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2e953e01,
0x03df9909, 0x0f1538fd, 0x2374e42f, 0xd3cff5ec, 0xc404dc08,
0xbccdb0da, 0xa6337f19, 0xe91f2603, 0x0000024e, } },
/* 10 ^ 256 */
{ 27, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x982e7c01, 0xbed3875b,
0xd8d99f72, 0x12152f87, 0x6bde50c6, 0xcf4a6e70, 0xd595d80f,
0x26b2716e, 0xadc666b0, 0x1d153624, 0x3c42d35a, 0x63ff540e,
0xcc5573c0, 0x65f9ef17, 0x55bc28f2, 0x80dcc7f7, 0xf46eeddc,
0x5fdcefce, 0x000553f7, } },
/* 10 ^ 512 */
{ 54, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0xfc6cf801, 0x77f27267, 0x8f9546dc, 0x5d96976f,
0xb83a8a97, 0xc31e1ad9, 0x46c40513, 0x94e65747, 0xc88976c1,
0x4475b579, 0x28f8733b, 0xaa1da1bf, 0x703ed321, 0x1e25cfea,
0xb21a2f22, 0xbc51fb2e, 0x96e14f5d, 0xbfa3edac, 0x329c57ae,
0xe7fc7153, 0xc3fc0695, 0x85a91924, 0xf95f635e, 0xb2908ee0,
0x93abade4, 0x1366732a, 0x9449775c, 0x69be5b0e, 0x7343afac,
0xb099bc81, 0x45a71d46, 0xa2699748, 0x8cb07303, 0x8a0b1f13,
0x8cab8a97, 0xc1d238d9, 0x633415d4, 0x0000001c, } },
/* 10 ^ 1024 */
{ 107, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x2919f001, 0xf55b2b72, 0x6e7c215b,
0x1ec29f86, 0x991c4e87, 0x15c51a88, 0x140ac535, 0x4c7d1e1a,
0xcc2cd819, 0x0ed1440e, 0x896634ee, 0x7de16cfb, 0x1e43f61f,
0x9fce837d, 0x231d2b9c, 0x233e55c7, 0x65dc60d7, 0xf451218b,
0x1c5cd134, 0xc9635986, 0x922bbb9f, 0xa7e89431, 0x9f9f2a07,
0x62be695a, 0x8e1042c4, 0x045b7a74, 0x1abe1de3, 0x8ad822a5,
0xba34c411, 0xd814b505, 0xbf3fdeb3, 0x8fc51a16, 0xb1b896bc,
0xf56deeec, 0x31fb6bfd, 0xb6f4654b, 0x101a3616, 0x6b7595fb,
0xdc1a47fe, 0x80d98089, 0x80bda5a5, 0x9a202882, 0x31eb0f66,
0xfc8f1f90, 0x976a3310, 0xe26a7b7e, 0xdf68368a, 0x3ce3a0b8,
0x8e4262ce, 0x75a351a2, 0x6cb0b6c9, 0x44597583, 0x31b5653f,
0xc356e38a, 0x35faaba6, 0x0190fba0, 0x9fc4ed52, 0x88bc491b,
0x1640114a, 0x005b8041, 0xf4f3235e, 0x1e8d4649, 0x36a8de06,
0x73c55349, 0xa7e6bd2a, 0xc1a6970c, 0x47187094, 0xd2db49ef,
0x926c3f5b, 0xae6209d4, 0x2d433949, 0x34f4a3c6, 0xd4305d94,
0xd9d61a05, 0x00000325, } },
/* 10 ^ 2048 */
{ 213, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1333e001,
0xe3096865, 0xb27d4d3f, 0x49e28dcf, 0xec2e4721, 0xee87e354,
0xb6067584, 0x368b8abb, 0xa5e5a191, 0x2ed56d55, 0xfd827773,
0xea50d142, 0x51b78db2, 0x98342c9e, 0xc850dabc, 0x866ed6f1,
0x19342c12, 0x92794987, 0xd2f869c2, 0x66912e4a, 0x71c7fd8f,
0x57a7842d, 0x235552eb, 0xfb7fedcc, 0xf3861ce0, 0x38209ce1,
0x9713b449, 0x34c10134, 0x8c6c54de, 0xa7a8289c, 0x2dbb6643,
0xe3cb64f3, 0x8074ff01, 0xe3892ee9, 0x10c17f94, 0xa8f16f92,
0xa8281ed6, 0x967abbb3, 0x5a151440, 0x9952fbed, 0x13b41e44,
0xafe609c3, 0xa2bca416, 0xf111821f, 0xfb1264b4, 0x91bac974,
0xd6c7d6ab, 0x8e48ff35, 0x4419bd43, 0xc4a65665, 0x685e5510,
0x33554c36, 0xab498697, 0x0dbd21fe, 0x3cfe491d, 0x982da466,
0xcbea4ca7, 0x9e110c7b, 0x79c56b8a, 0x5fc5a047, 0x84d80e2e,
0x1aa9f444, 0x730f203c, 0x6a57b1ab, 0xd752f7a6, 0x87a7dc62,
0x944545ff, 0x40660460, 0x77c1a42f, 0xc9ac375d, 0xe866d7ef,
0x744695f0, 0x81428c85, 0xa1fc6b96, 0xd7917c7b, 0x7bf03c19,
0x5b33eb41, 0x5715f791, 0x8f6cae5f, 0xdb0708fd, 0xb125ac8e,
0x785ce6b7, 0x56c6815b, 0x6f46eadb, 0x4eeebeee, 0x195355d8,
0xa244de3c, 0x9d7389c0, 0x53761abd, 0xcf99d019, 0xde9ec24b,
0x0d76ce39, 0x70beb181, 0x2e55ecee, 0xd5f86079, 0xf56d9d4b,
0xfb8886fb, 0x13ef5a83, 0x408f43c5, 0x3f3389a4, 0xfad37943,
0x58ccf45c, 0xf82df846, 0x415c7f3e, 0x2915e818, 0x8b3d5cf4,
0x6a445f27, 0xf8dbb57a, 0xca8f0070, 0x8ad803ec, 0xb2e87c34,
0x038f9245, 0xbedd8a6c, 0xc7c9dee0, 0x0eac7d56, 0x2ad3fa14,
0xe0de0840, 0xf775677c, 0xf1bd0ad5, 0x92be221e, 0x87fa1fb9,
0xce9d04a4, 0xd2c36fa9, 0x3f6f7024, 0xb028af62, 0x907855ee,
0xd83e49d6, 0x4efac5dc, 0xe7151aab, 0x77cd8c6b, 0x0a753b7d,
0x0af908b4, 0x8c983623, 0xe50f3027, 0x94222771, 0x1d08e2d6,
0xf7e928e6, 0xf2ee5ca6, 0x1b61b93c, 0x11eb962b, 0x9648b21c,
0xce2bcba1, 0x34f77154, 0x7bbebe30, 0xe526a319, 0x8ce329ac,
0xde4a74d2, 0xb5dc53d5, 0x0009e8b3, } },
/* 10 ^ 4096 */
{ 426, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x2a67c001, 0xd4724e8d,
0x8efe7ae7, 0xf89a1e90, 0xef084117, 0x54e05154, 0x13b1bb51,
0x506be829, 0xfb29b172, 0xe599574e, 0xf0da6146, 0x806c0ed3,
0xb86ae5be, 0x45155e93, 0xc0591cc2, 0x7e1e7c34, 0x7c4823da,
0x1d1f4cce, 0x9b8ba1e8, 0xd6bfdf75, 0xe341be10, 0xc2dfae78,
0x016b67b2, 0x0f237f1a, 0x3dbeabcd, 0xaf6a2574, 0xcab3e6d7,
0x142e0e80, 0x61959127, 0x2c234811, 0x87009701, 0xcb4bf982,
0xf8169c84, 0x88052f8c, 0x68dde6d4, 0xbc131761, 0xff0b0905,
0x54ab9c41, 0x7613b224, 0x1a1c304e, 0x3bfe167b, 0x441c2d47,
0x4f6cea9c, 0x78f06181, 0xeb659fb8, 0x30c7ae41, 0x947e0d0e,
0xa1ebcad7, 0xd97d9556, 0x2130504d, 0x1a8309cb, 0xf2acd507,
0x3f8ec72a, 0xfd82373a, 0x95a842bc, 0x280f4d32, 0xf3618ac0,
0x811a4f04, 0x6dc3a5b4, 0xd3967a1b, 0x15b8c898, 0xdcfe388f,
0x454eb2a0, 0x8738b909, 0x10c4e996, 0x2bd9cc11, 0x3297cd0c,
0x655fec30, 0xae0725b1, 0xf4090ee8, 0x037d19ee, 0x398c6fed,
0x3b9af26b, 0xc994a450, 0xb5341743, 0x75a697b2, 0xac50b9c1,
0x3ccb5b92, 0xffe06205, 0xa8329761, 0xdfea5242, 0xeb83cadb,
0xe79dadf7, 0x3c20ee69, 0x1e0a6817, 0x7021b97a, 0x743074fa,
0x176ca776, 0x77fb8af6, 0xeca19beb, 0x92baf1de, 0xaf63b712,
0xde35c88b, 0xa4eb8f8c, 0xe137d5e9, 0x40b464a0, 0x87d1cde8,
0x42923bbd, 0xcd8f62ff, 0x2e2690f3, 0x095edc16, 0x59c89f1b,
0x1fa8fd5d, 0x5138753d, 0x390a2b29, 0x80152f18, 0x2dd8d925,
0xf984d83e, 0x7a872e74, 0xc19e1faf, 0xed4d542d, 0xecf9b5d0,
0x9462ea75, 0xc53c0adf, 0x0caea134, 0x37a2d439, 0xc8fa2e8a,
0x2181327e, 0x6e7bb827, 0x2d240820, 0x50be10e0, 0x5893d4b8,
0xab312bb9, 0x1f2b2322, 0x440b3f25, 0xbf627ede, 0x72dac789,
0xb608b895, 0x78787e2a, 0x86deb3f0, 0x6fee7aab, 0xbb9373f4,
0x27ecf57b, 0xf7d8b57e, 0xfca26a9f, 0x3d04e8d2, 0xc9df13cb,
0x3172826a, 0xcd9e8d7c, 0xa8fcd8e0, 0xb2c39497, 0x307641d9,
0x1cc939c1, 0x2608c4cf, 0xb6d1c7bf, 0x3d326a7e, 0xeeaf19e6,
0x8e13e25f, 0xee63302b, 0x2dfe6d97, 0x25971d58, 0xe41d3cc4,
0x0a80627c, 0xab8db59a, 0x9eea37c8, 0xe90afb77, 0x90ca19cf,
0x9ee3352c, 0x3613c850, 0xfe78d682, 0x788f6e50, 0x5b060904,
0xb71bd1a4, 0x3fecb534, 0xb32c450c, 0x20c33857, 0xa6e9cfda,
0x0239f4ce, 0x48497187, 0xa19adb95, 0xb492ed8a, 0x95aca6a8,
0x4dcd6cd9, 0xcf1b2350, 0xfbe8b12a, 0x1a67778c, 0x38eb3acc,
0xc32da383, 0xfb126ab1, 0xa03f40a8, 0xed5bf546, 0xe9ce4724,
0x4c4a74fd, 0x73a130d8, 0xd9960e2d, 0xa2ebd6c1, 0x94ab6feb,
0x6f233b7c, 0x49126080, 0x8e7b9a73, 0x4b8c9091, 0xd298f999,
0x35e836b5, 0xa96ddeff, 0x96119b31, 0x6b0dd9bc, 0xc6cc3f8d,
0x282566fb, 0x72b882e7, 0xd6769f3b, 0xa674343d, 0x00fc509b,
0xdcbf7789, 0xd6266a3f, 0xae9641fd, 0x4e89541b, 0x11953407,
0x53400d03, 0x8e0dd75a, 0xe5b53345, 0x108f19ad, 0x108b89bc,
0x41a4c954, 0xe03b2b63, 0x437b3d7f, 0x97aced8e, 0xcbd66670,
0x2c5508c2, 0x650ebc69, 0x5c4f2ef0, 0x904ff6bf, 0x9985a2df,
0x9faddd9e, 0x5ed8d239, 0x25585832, 0xe3e51cb9, 0x0ff4f1d4,
0x56c02d9a, 0x8c4ef804, 0xc1a08a13, 0x13fd01c8, 0xe6d27671,
0xa7c234f4, 0x9d0176cc, 0xd0d73df2, 0x4d8bfa89, 0x544f10cd,
0x2b17e0b2, 0xb70a5c7d, 0xfd86fe49, 0xdf373f41, 0x214495bb,
0x84e857fd, 0x00d313d5, 0x0496fcbe, 0xa4ba4744, 0xe8cac982,
0xaec29e6e, 0x87ec7038, 0x7000a519, 0xaeee333b, 0xff66e42c,
0x8afd6b25, 0x03b4f63b, 0xbd7991dc, 0x5ab8d9c7, 0x2ed4684e,
0x48741a6c, 0xaf06940d, 0x2fdc6349, 0xb03d7ecd, 0xe974996f,
0xac7867f9, 0x52ec8721, 0xbcdd9d4a, 0x8edd2d00, 0x3557de06,
0x41c759f8, 0x3956d4b9, 0xa75409f2, 0x123cd8a1, 0xb6100fab,
0x3e7b21e2, 0x2e8d623b, 0x92959da2, 0xbca35f77, 0x200c03a5,
0x35fcb457, 0x1bb6c6e4, 0xf74eb928, 0x3d5d0b54, 0x87cc1d21,
0x4964046f, 0x18ae4240, 0xd868b275, 0x8bd2b496, 0x1c5563f4,
0xc234d8f5, 0xf868e970, 0xf9151fff, 0xae7be4a2, 0x271133ee,
0xbb0fd922, 0x25254932, 0xa60a9fc0, 0x104bcd64, 0x30290145,
0x00000062, } },
};
/* result = 10^exponent */
static void
BigInt_Pow10(BigInt *result, npy_uint32 exponent)
{
/* create two temporary values to reduce large integer copy operations */
BigInt temp1;
BigInt temp2;
BigInt *curTemp = &temp1;
BigInt *pNextTemp = &temp2;
npy_uint32 smallExponent;
npy_uint32 tableIdx = 0;
/* make sure the exponent is within the bounds of the lookup table data */
DEBUG_ASSERT(exponent < 8192);
/*
* initialize the result by looking up a 32-bit power of 10 corresponding to
* the first 3 bits
*/
smallExponent = exponent & 0x7;
BigInt_Set_uint32(curTemp, g_PowerOf10_U32[smallExponent]);
/* remove the low bits that we used for the 32-bit lookup table */
exponent >>= 3;
/* while there are remaining bits in the exponent to be processed */
while (exponent != 0) {
/* if the current bit is set, multiply by this power of 10 */
if (exponent & 1) {
BigInt *pSwap;
/* multiply into the next temporary */
BigInt_Multiply(pNextTemp, curTemp, &g_PowerOf10_Big[tableIdx]);
/* swap to the next temporary */
pSwap = curTemp;
curTemp = pNextTemp;
pNextTemp = pSwap;
}
/* advance to the next bit */
++tableIdx;
exponent >>= 1;
}
/* output the result */
BigInt_Copy(result, curTemp);
}
/* result = in * 10^exponent */
static void
BigInt_MultiplyPow10(BigInt *result, const BigInt *in, npy_uint32 exponent)
{
/* create two temporary values to reduce large integer copy operations */
BigInt temp1;
BigInt temp2;
BigInt *curTemp = &temp1;
BigInt *pNextTemp = &temp2;
npy_uint32 smallExponent;
npy_uint32 tableIdx = 0;
/* make sure the exponent is within the bounds of the lookup table data */
DEBUG_ASSERT(exponent < 8192);
/*
* initialize the result by looking up a 32-bit power of 10 corresponding to
* the first 3 bits
*/
smallExponent = exponent & 0x7;
if (smallExponent != 0) {
BigInt_Multiply_int(curTemp, in, g_PowerOf10_U32[smallExponent]);
}
else {
BigInt_Copy(curTemp, in);
}
/* remove the low bits that we used for the 32-bit lookup table */
exponent >>= 3;
/* while there are remaining bits in the exponent to be processed */
while (exponent != 0) {
/* if the current bit is set, multiply by this power of 10 */
if (exponent & 1) {
BigInt *pSwap;
/* multiply into the next temporary */
BigInt_Multiply(pNextTemp, curTemp, &g_PowerOf10_Big[tableIdx]);
// swap to the next temporary
pSwap = curTemp;
curTemp = pNextTemp;
pNextTemp = pSwap;
}
/* advance to the next bit */
++tableIdx;
exponent >>= 1;
}
/* output the result */
BigInt_Copy(result, curTemp);
}
/* result = 2^exponent */
static inline void
BigInt_Pow2(BigInt *result, npy_uint32 exponent)
{
npy_uint32 bitIdx;
npy_uint32 blockIdx = exponent / 32;
npy_uint32 i;
DEBUG_ASSERT(blockIdx < c_BigInt_MaxBlocks);
for (i = 0; i <= blockIdx; ++i) {
result->blocks[i] = 0;
}
result->length = blockIdx + 1;
bitIdx = (exponent % 32);
result->blocks[blockIdx] |= (1 << bitIdx);
}
/*
* This function will divide two large numbers under the assumption that the
* result is within the range [0,10) and the input numbers have been shifted
* to satisfy:
* - The highest block of the divisor is greater than or equal to 8 such that
* there is enough precision to make an accurate first guess at the quotient.
* - The highest block of the divisor is less than the maximum value on an
* unsigned 32-bit integer such that we can safely increment without overflow.
* - The dividend does not contain more blocks than the divisor such that we
* can estimate the quotient by dividing the equivalently placed high blocks.
*
* quotient = floor(dividend / divisor)
* remainder = dividend - quotient*divisor
*
* dividend is updated to be the remainder and the quotient is returned.
*/
static npy_uint32
BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor)
{
npy_uint32 length, quotient;
const npy_uint32 *finalDivisorBlock;
npy_uint32 *finalDividendBlock;
/*
* Check that the divisor has been correctly shifted into range and that it
* is not smaller than the dividend in length.
*/
DEBUG_ASSERT(!divisor->length == 0 &&
divisor->blocks[divisor->length-1] >= 8 &&
divisor->blocks[divisor->length-1] < 0xFFFFFFFF &&
dividend->length <= divisor->length);
/*
* If the dividend is smaller than the divisor, the quotient is zero and the
* divisor is already the remainder.
*/
length = divisor->length;
if (dividend->length < divisor->length) {
return 0;
}
finalDivisorBlock = divisor->blocks + length - 1;
finalDividendBlock = dividend->blocks + length - 1;
/*
* Compute an estimated quotient based on the high block value. This will
* either match the actual quotient or undershoot by one.
*/
quotient = *finalDividendBlock / (*finalDivisorBlock + 1);
DEBUG_ASSERT(quotient <= 9);
/* Divide out the estimated quotient */
if (quotient != 0) {
/* dividend = dividend - divisor*quotient */
const npy_uint32 *divisorCur = divisor->blocks;
npy_uint32 *dividendCur = dividend->blocks;
npy_uint64 borrow = 0;
npy_uint64 carry = 0;
do {
npy_uint64 difference, product;
product = (npy_uint64)*divisorCur * (npy_uint64)quotient + carry;
carry = product >> 32;
difference = (npy_uint64)*dividendCur
- (product & 0xFFFFFFFF) - borrow;
borrow = (difference >> 32) & 1;
*dividendCur = difference & 0xFFFFFFFF;
++divisorCur;
++dividendCur;
} while(divisorCur <= finalDivisorBlock);
/* remove all leading zero blocks from dividend */
while (length > 0 && dividend->blocks[length - 1] == 0) {
--length;
}
dividend->length = length;
}
/*
* If the dividend is still larger than the divisor, we overshot our
* estimate quotient. To correct, we increment the quotient and subtract one
* more divisor from the dividend.
*/
if (BigInt_Compare(dividend, divisor) >= 0) {
/* dividend = dividend - divisor */
const npy_uint32 *divisorCur = divisor->blocks;
npy_uint32 *dividendCur = dividend->blocks;
npy_uint64 borrow = 0;
++quotient;
do {
npy_uint64 difference = (npy_uint64)*dividendCur
- (npy_uint64)*divisorCur - borrow;
borrow = (difference >> 32) & 1;
*dividendCur = difference & 0xFFFFFFFF;
++divisorCur;
++dividendCur;
} while(divisorCur <= finalDivisorBlock);
/* remove all leading zero blocks from dividend */
while (length > 0 && dividend->blocks[length - 1] == 0) {
--length;
}
dividend->length = length;
}
return quotient;
}
/* result = result << shift */
static void
BigInt_ShiftLeft(BigInt *result, npy_uint32 shift)
{
npy_uint32 shiftBlocks = shift / 32;
npy_uint32 shiftBits = shift % 32;
/* process blocks high to low so that we can safely process in place */
const npy_uint32 *pInBlocks = result->blocks;
npy_int32 inLength = result->length;
npy_uint32 *pInCur, *pOutCur;
DEBUG_ASSERT(inLength + shiftBlocks < c_BigInt_MaxBlocks);
DEBUG_ASSERT(shift != 0);
/* check if the shift is block aligned */
if (shiftBits == 0) {
npy_uint32 i;
/* copy blocks from high to low */
for (pInCur = result->blocks + result->length,
pOutCur = pInCur + shiftBlocks;
pInCur >= pInBlocks;
--pInCur, --pOutCur) {
*pOutCur = *pInCur;
}
/* zero the remaining low blocks */
for (i = 0; i < shiftBlocks; ++i) {
result->blocks[i] = 0;
}
result->length += shiftBlocks;
}
/* else we need to shift partial blocks */
else {
npy_uint32 i;
npy_int32 inBlockIdx = inLength - 1;
npy_uint32 outBlockIdx = inLength + shiftBlocks;
/* output the initial blocks */
const npy_uint32 lowBitsShift = (32 - shiftBits);
npy_uint32 highBits = 0;
npy_uint32 block = result->blocks[inBlockIdx];
npy_uint32 lowBits = block >> lowBitsShift;
/* set the length to hold the shifted blocks */
DEBUG_ASSERT(outBlockIdx < c_BigInt_MaxBlocks);
result->length = outBlockIdx + 1;
while (inBlockIdx > 0) {
result->blocks[outBlockIdx] = highBits | lowBits;
highBits = block << shiftBits;
--inBlockIdx;
--outBlockIdx;
block = result->blocks[inBlockIdx];
lowBits = block >> lowBitsShift;
}
/* output the final blocks */
DEBUG_ASSERT(outBlockIdx == shiftBlocks + 1);
result->blocks[outBlockIdx] = highBits | lowBits;
result->blocks[outBlockIdx-1] = block << shiftBits;
/* zero the remaining low blocks */
for (i = 0; i < shiftBlocks; ++i) {
result->blocks[i] = 0;
}
/* check if the terminating block has no set bits */
if (result->blocks[result->length - 1] == 0) {
--result->length;
}
}
}
/*
* This is an implementation the Dragon4 algorithm to convert a binary number in
* floating point format to a decimal number in string format. The function
* returns the number of digits written to the output buffer and the output is
* not NUL terminated.
*
* The floating point input value is (mantissa * 2^exponent).
*
* See the following papers for more information on the algorithm:
* "How to Print Floating-Point Numbers Accurately"
* Steele and White
* http://kurtstephens.com/files/p372-steele.pdf
* "Printing Floating-Point Numbers Quickly and Accurately"
* Burger and Dybvig
* http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.4656
*
* This implementation is essentially a port of the "Figure 3" Scheme code from
* Burger and Dybvig, but with the following additional differences:
* 1. Instead of finding the highest k such that high < B**k, we search
* for the one where v < B**k. This has a downside that if a power
* of 10 exists between v and high, we will output a 9 instead of a 1 as
* first digit, violating the "no-carry" guarantee of the paper. This is
* accounted for in a new post-processing loop which implements a carry
* operation. The upside is one less BigInt multiplication.
* 2. The approximate value of k found is offset by a different amount
* (0.69), in order to hit the "fast" branch more often. This is
* extensively described on Ryan Juckett's website.
* 3. The fixed precision mode is much simpler than proposed in the paper.
* It simply outputs digits by repeatedly dividing by 10. The new "carry"
* loop at the end rounds this output nicely.
* There is also some new code to account for details of the BigInt
* implementation, which are not present in the paper since it does not specify
* details of the integer calculations.
*
* There is some more documentation of these changes on Ryan Juckett's website
* at http://www.ryanjuckett.com/programming/printing-floating-point-numbers/
*
* Ryan Juckett's implementation did not implement "IEEE unbiased rounding",
* except in the last digit. This has been added back, following the Burger &
* Dybvig code, using the isEven variable.
*
* Arguments:
* * mantissa - value significand
* * exponent - value exponent in base 2
* * mantissaBit - index of the highest set mantissa bit
* * hasUnequalMargins - is the high margin twice as large as the low margin
* * cutoffMode - how to interpret cutoffNumber: fractional or total digits?
* * cutoffNumber - cut off printing after this many digits. -1 for no cutoff
* * pOutBuffer - buffer to output into
* * bufferSize - maximum characters that can be printed to pOutBuffer
* * pOutExponent - the base 10 exponent of the first digit
*/
static npy_uint32
Dragon4(const npy_uint64 mantissa, const npy_int32 exponent,
const npy_uint32 mantissaBit, const npy_bool hasUnequalMargins,
const DigitMode digitMode, const CutoffMode cutoffMode,
npy_int32 cutoffNumber, char *pOutBuffer,
npy_uint32 bufferSize, npy_int32 *pOutExponent)
{
char *curDigit = pOutBuffer;
/*
* We compute values in integer format by rescaling as
* mantissa = scaledValue / scale
* marginLow = scaledMarginLow / scale
* marginHigh = scaledMarginHigh / scale
* Here, marginLow and marginHigh represent 1/2 of the distance to the next
* floating point value above/below the mantissa.
*
* scaledMarginHigh is a pointer so that it can point to scaledMarginLow in
* the case they must be equal to each other, otherwise it will point to
* optionalMarginHigh.
*/
BigInt scale;
BigInt scaledValue;
BigInt scaledMarginLow;
BigInt *scaledMarginHigh;
BigInt optionalMarginHigh;
const npy_float64 log10_2 = 0.30102999566398119521373889472449;
npy_int32 digitExponent, cutoffExponent, hiBlock;
npy_uint32 outputDigit; /* current digit being output */
npy_uint32 outputLen;
npy_bool isEven = (mantissa % 2) == 0;
npy_int32 cmp;
/* values used to determine how to round */
npy_bool low, high, roundDown;
DEBUG_ASSERT(bufferSize > 0);
/* if the mantissa is zero, the value is zero regardless of the exponent */
if (mantissa == 0) {
*curDigit = '0';
*pOutExponent = 0;
return 1;
}
if (hasUnequalMargins) {
/* if we have no fractional component */
if (exponent > 0) {
/*
* 1) Expand the input value by multiplying out the mantissa and
* exponent. This represents the input value in its whole number
* representation.
* 2) Apply an additional scale of 2 such that later comparisons
* against the margin values are simplified.
* 3) Set the margin value to the lowest mantissa bit's scale.
*/
/* scaledValue = 2 * 2 * mantissa*2^exponent */
BigInt_Set_uint64(&scaledValue, mantissa);
BigInt_ShiftLeft(&scaledValue, exponent + 2);
/* scale = 2 * 2 * 1 */
BigInt_Set_uint32(&scale, 4);
/* scaledMarginLow = 2 * 2^(exponent-1) */
BigInt_Pow2(&scaledMarginLow, exponent);
/* scaledMarginHigh = 2 * 2 * 2^(exponent-1) */
BigInt_Pow2(&optionalMarginHigh, exponent + 1);
}
/* else we have a fractional exponent */
else {
/*
* In order to track the mantissa data as an integer, we store it as
* is with a large scale
*/
/* scaledValue = 2 * 2 * mantissa */
BigInt_Set_uint64(&scaledValue, mantissa);
BigInt_ShiftLeft(&scaledValue, 2);
/* scale = 2 * 2 * 2^(-exponent) */
BigInt_Pow2(&scale, -exponent + 2);
/* scaledMarginLow = 2 * 2^(-1) */
BigInt_Set_uint32(&scaledMarginLow, 1);
/* scaledMarginHigh = 2 * 2 * 2^(-1) */
BigInt_Set_uint32(&optionalMarginHigh, 2);
}
/* the high and low margins are different */
scaledMarginHigh = &optionalMarginHigh;
}
else {
/* if we have no fractional component */
if (exponent > 0) {
/* scaledValue = 2 * mantissa*2^exponent */
BigInt_Set_uint64(&scaledValue, mantissa);
BigInt_ShiftLeft(&scaledValue, exponent + 1);
/* scale = 2 * 1 */
BigInt_Set_uint32(&scale, 2);
/* scaledMarginLow = 2 * 2^(exponent-1) */
BigInt_Pow2(&scaledMarginLow, exponent);
}
/* else we have a fractional exponent */
else {
/*
* In order to track the mantissa data as an integer, we store it as
* is with a large scale
*/
/* scaledValue = 2 * mantissa */
BigInt_Set_uint64(&scaledValue, mantissa);
BigInt_ShiftLeft(&scaledValue, 1);
/* scale = 2 * 2^(-exponent) */
BigInt_Pow2(&scale, -exponent + 1);
/* scaledMarginLow = 2 * 2^(-1) */
BigInt_Set_uint32(&scaledMarginLow, 1);
}
/* the high and low margins are equal */
scaledMarginHigh = &scaledMarginLow;
}
/*
* Compute an estimate for digitExponent that will be correct or undershoot
* by one. This optimization is based on the paper "Printing Floating-Point
* Numbers Quickly and Accurately" by Burger and Dybvig
* http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.4656
* We perform an additional subtraction of 0.69 to increase the frequency of
* a failed estimate because that lets us take a faster branch in the code.
* 0.69 is chosen because 0.69 + log10(2) is less than one by a reasonable
* epsilon that will account for any floating point error.
*
* We want to set digitExponent to floor(log10(v)) + 1
* v = mantissa*2^exponent
* log2(v) = log2(mantissa) + exponent;
* log10(v) = log2(v) * log10(2)
* floor(log2(v)) = mantissaBit + exponent;
* log10(v) - log10(2) < (mantissaBit + exponent) * log10(2) <= log10(v)
* log10(v) < (mantissaBit + exponent) * log10(2) + log10(2)
* <= log10(v) + log10(2)
* floor(log10(v)) < ceil((mantissaBit + exponent) * log10(2))
* <= floor(log10(v)) + 1
*/
digitExponent = (npy_int32)(
ceil((npy_float64)((npy_int32)mantissaBit + exponent) * log10_2 - 0.69));
/*
* if the digit exponent is smaller than the smallest desired digit for
* fractional cutoff, pull the digit back into legal range at which point we
* will round to the appropriate value. Note that while our value for
* digitExponent is still an estimate, this is safe because it only
* increases the number. This will either correct digitExponent to an
* accurate value or it will clamp it above the accurate value.
*/
if (cutoffNumber >= 0 && cutoffMode == CutoffMode_FractionLength &&
digitExponent <= -cutoffNumber) {
digitExponent = -cutoffNumber + 1;
}
/* Divide value by 10^digitExponent. */
if (digitExponent > 0) {
/* A positive exponent creates a division so we multiply the scale. */
BigInt temp;
BigInt_MultiplyPow10(&temp, &scale, digitExponent);
BigInt_Copy(&scale, &temp);
}
else if (digitExponent < 0) {
/*
* A negative exponent creates a multiplication so we multiply up the
* scaledValue, scaledMarginLow and scaledMarginHigh.
*/
BigInt pow10, temp;
BigInt_Pow10(&pow10, -digitExponent);
BigInt_Multiply(&temp, &scaledValue, &pow10);
BigInt_Copy(&scaledValue, &temp);
BigInt_Multiply(&temp, &scaledMarginLow, &pow10);
BigInt_Copy(&scaledMarginLow, &temp);
if (scaledMarginHigh != &scaledMarginLow) {
BigInt_Multiply2(scaledMarginHigh, &scaledMarginLow);
}
}
/* If (value >= 1), our estimate for digitExponent was too low */
if (BigInt_Compare(&scaledValue, &scale) >= 0) {
/*
* The exponent estimate was incorrect.
* Increment the exponent and don't perform the premultiply needed
* for the first loop iteration.
*/
digitExponent = digitExponent + 1;
}
else {
/*
* The exponent estimate was correct.
* Multiply larger by the output base to prepare for the first loop
* iteration.
*/
BigInt_Multiply10(&scaledValue);
BigInt_Multiply10(&scaledMarginLow);
if (scaledMarginHigh != &scaledMarginLow) {
BigInt_Multiply2(scaledMarginHigh, &scaledMarginLow);
}
}
/*
* Compute the cutoff exponent (the exponent of the final digit to print).
* Default to the maximum size of the output buffer.
*/
cutoffExponent = digitExponent - bufferSize;
if (cutoffNumber >= 0) {
npy_int32 desiredCutoffExponent;
if (cutoffMode == CutoffMode_TotalLength) {
desiredCutoffExponent = digitExponent - cutoffNumber;
if (desiredCutoffExponent > cutoffExponent) {
cutoffExponent = desiredCutoffExponent;
}
}
/* Otherwise it's CutoffMode_FractionLength. Print cutoffNumber digits
* past the decimal point or until we reach the buffer size
*/
else {
desiredCutoffExponent = -cutoffNumber;
if (desiredCutoffExponent > cutoffExponent) {
cutoffExponent = desiredCutoffExponent;
}
}
}
/* Output the exponent of the first digit we will print */
*pOutExponent = digitExponent-1;
/*
* In preparation for calling BigInt_DivideWithRemainder_MaxQuotient9(), we
* need to scale up our values such that the highest block of the
* denominator is greater than or equal to 8. We also need to guarantee that
* the numerator can never have a length greater than the denominator after
* each loop iteration. This requires the highest block of the denominator
* to be less than or equal to 429496729 which is the highest number that
* can be multiplied by 10 without overflowing to a new block.
*/
DEBUG_ASSERT(scale.length > 0);
hiBlock = scale.blocks[scale.length - 1];
if (hiBlock < 8 || hiBlock > 429496729) {
npy_uint32 hiBlockLog2, shift;
/*
* Perform a bit shift on all values to get the highest block of the
* denominator into the range [8,429496729]. We are more likely to make
* accurate quotient estimations in
* BigInt_DivideWithRemainder_MaxQuotient9() with higher denominator
* values so we shift the denominator to place the highest bit at index
* 27 of the highest block. This is safe because (2^28 - 1) = 268435455
* which is less than 429496729. This means that all values with a
* highest bit at index 27 are within range.
*/
hiBlockLog2 = LogBase2_32(hiBlock);
DEBUG_ASSERT(hiBlockLog2 < 3 || hiBlockLog2 > 27);
shift = (32 + 27 - hiBlockLog2) % 32;
BigInt_ShiftLeft(&scale, shift);
BigInt_ShiftLeft(&scaledValue, shift);
BigInt_ShiftLeft(&scaledMarginLow, shift);
if (scaledMarginHigh != &scaledMarginLow) {
BigInt_Multiply2(scaledMarginHigh, &scaledMarginLow);
}
}
if (digitMode == DigitMode_Unique) {
/*
* For the unique cutoff mode, we will try to print until we have
* reached a level of precision that uniquely distinguishes this value
* from its neighbors. If we run out of space in the output buffer, we
* terminate early.
*/
for (;;) {
BigInt scaledValueHigh;
digitExponent = digitExponent-1;
/* divide out the scale to extract the digit */
outputDigit =
BigInt_DivideWithRemainder_MaxQuotient9(&scaledValue, &scale);
DEBUG_ASSERT(outputDigit < 10);
/* update the high end of the value */
BigInt_Add(&scaledValueHigh, &scaledValue, scaledMarginHigh);
/*
* stop looping if we are far enough away from our neighboring
* values or if we have reached the cutoff digit
*/
cmp = BigInt_Compare(&scaledValue, &scaledMarginLow);
low = isEven ? (cmp <= 0) : (cmp < 0);
cmp = BigInt_Compare(&scaledValueHigh, &scale);
high = isEven ? (cmp >= 0) : (cmp > 0);
if (low | high | (digitExponent == cutoffExponent))
break;
/* store the output digit */
*curDigit = (char)('0' + outputDigit);
++curDigit;
/* multiply larger by the output base */
BigInt_Multiply10(&scaledValue);
BigInt_Multiply10(&scaledMarginLow);
if (scaledMarginHigh != &scaledMarginLow) {
BigInt_Multiply2(scaledMarginHigh, &scaledMarginLow);
}
}
}
else {
/*
* For exact digit mode, we will try to print until we
* have exhausted all precision (i.e. all remaining digits are zeros) or
* until we reach the desired cutoff digit.
*/
low = NPY_FALSE;
high = NPY_FALSE;
for (;;) {
digitExponent = digitExponent-1;
/* divide out the scale to extract the digit */
outputDigit =
BigInt_DivideWithRemainder_MaxQuotient9(&scaledValue, &scale);
DEBUG_ASSERT(outputDigit < 10);
if ((scaledValue.length == 0) | (digitExponent == cutoffExponent)) {
break;
}
/* store the output digit */
*curDigit = (char)('0' + outputDigit);
++curDigit;
/* multiply larger by the output base */
BigInt_Multiply10(&scaledValue);
}
}
/* default to rounding down the final digit if value got too close to 0 */
roundDown = low;
/* if it is legal to round up and down */
if (low == high) {
npy_int32 compare;
/*
* round to the closest digit by comparing value with 0.5. To do this we
* need to convert the inequality to large integer values.
* compare( value, 0.5 )
* compare( scale * value, scale * 0.5 )
* compare( 2 * scale * value, scale )
*/
BigInt_Multiply2_inplace(&scaledValue);
compare = BigInt_Compare(&scaledValue, &scale);
roundDown = compare < 0;
/*
* if we are directly in the middle, round towards the even digit (i.e.
* IEEE rounding rules)
*/
if (compare == 0) {
roundDown = (outputDigit & 1) == 0;
}
}
/* print the rounded digit */
if (roundDown) {
*curDigit = (char)('0' + outputDigit);
++curDigit;
}
else {
/* handle rounding up */
if (outputDigit == 9) {
/* find the first non-nine prior digit */
for (;;) {
/* if we are at the first digit */
if (curDigit == pOutBuffer) {
/* output 1 at the next highest exponent */
*curDigit = '1';
++curDigit;
*pOutExponent += 1;
break;
}
--curDigit;
if (*curDigit != '9') {
/* increment the digit */
*curDigit += 1;
++curDigit;
break;
}
}
}
else {
/* values in the range [0,8] can perform a simple round up */
*curDigit = (char)('0' + outputDigit + 1);
++curDigit;
}
}
/* return the number of digits output */
outputLen = (npy_uint32)(curDigit - pOutBuffer);
DEBUG_ASSERT(outputLen <= bufferSize);
return outputLen;
}
/*
* Helper union to decompose a 16-bit IEEE float.
* sign: 1 bit
* exponent: 5 bits
* mantissa: 10 bits
*/
typedef union FloatUnion16
{
npy_uint16 integer;
} FloatUnion16;
npy_bool IsNegative_F16(FloatUnion16 *v) { return (v->integer >> 15) != 0; }
npy_uint32 GetExponent_F16(FloatUnion16 *v) { return (v->integer >> 10) & 0x1F;}
npy_uint32 GetMantissa_F16(FloatUnion16 *v) { return v->integer & 0x3FF; }
/*
* Helper union to decompose a 32-bit IEEE float.
* sign: 1 bit
* exponent: 8 bits
* mantissa: 23 bits
*/
typedef union FloatUnion32
{
npy_float32 floatingPoint;
npy_uint32 integer;
} FloatUnion32;
npy_bool IsNegative_F32(FloatUnion32 *v) { return (v->integer >> 31) != 0; }
npy_uint32 GetExponent_F32(FloatUnion32 *v) { return (v->integer >> 23) & 0xFF;}
npy_uint32 GetMantissa_F32(FloatUnion32 *v) { return v->integer & 0x7FFFFF; }
/*
* Helper union to decompose a 64-bit IEEE float.
* sign: 1 bit
* exponent: 11 bits
* mantissa: 52 bits
*/
typedef union FloatUnion64
{
npy_float64 floatingPoint;
npy_uint64 integer;
} FloatUnion64;
npy_bool IsNegative_F64(FloatUnion64 *v) { return (v->integer >> 63) != 0; }
npy_uint32 GetExponent_F64(FloatUnion64 *v) { return (v->integer >> 52) & 0x7FF; }
npy_uint64 GetMantissa_F64(FloatUnion64 *v) { return v->integer & 0xFFFFFFFFFFFFFull; }
/*
* Helper unions and datatype to decompose a 80-bit IEEE float
* sign: 1 bit, second u64
* exponent: 15 bits, second u64
* intbit 1 bit, first u64
* mantissa: 63 bits, first u64
*/
/*
* Since systems have different types of long doubles, and may not necessarily
* have a 128-byte format we can use to pass values around, here we create
* our own 128-bit storage type for convenience.
*/
typedef struct FloatVal128 {
npy_uint64 integer[2];
} FloatVal128;
npy_bool IsNegative_F128(FloatVal128 *v) {
return ((v->integer[1] >> 15) & 0x1) != 0;
}
npy_uint32 GetExponent_F128(FloatVal128 *v) { return v->integer[1] & 0x7FFF; }
npy_uint64 GetMantissa_F128(FloatVal128 *v) {
return v->integer[0] & 0x7FFFFFFFFFFFFFFFull;
}
/*
* then for each different definition of long double, we create a union to
* unpack the float data safely. We can then copy these integers to a
* FloatVal128.
*/
#ifdef NPY_FLOAT128
typedef union FloatUnion128
{
npy_float128 floatingPoint;
struct {
npy_uint64 a;
npy_uint16 b;
} integer;
} FloatUnion128;
#endif
#ifdef NPY_FLOAT96
typedef union FloatUnion96
{
npy_float96 floatingPoint;
struct {
npy_uint64 a;
npy_uint32 b;
} integer;
} FloatUnion96;
#endif
#ifdef NPY_FLOAT80
typedef union FloatUnion80
{
npy_float80 floatingPoint;
struct {
npy_uint64 a;
npy_uint16 b;
} integer;
} FloatUnion80;
#endif
/*
* The main changes above this point, relative to Ryan Juckett's code, are:
* 1. fixed overflow problems when mantissa was 64 bits (in float128 types),
* by replacing multiplication by 2 or 4 by BigInt_ShiftLeft calls.
* 2. Increased c_BigInt_MaxBlocks
* 3. Added more entries to the g_PowerOf10_Big table
* 4. Added unbiased rounding calculation with isEven
*
* Below this point, the FormatPositional and FormatScientific functions have
* been more significantly rewritten. The Dragon4_PrintFloat16 and
* Dragon4_PrintFloat128 functions are new, and were adapted from the 64 and 32
* bit versions. The python interfacing functions (in the header) are new.
*/
/*
* Outputs the positive number with positional notation: ddddd.dddd
* The output is always NUL terminated and the output length (not including the
* NUL) is returned.
* Arguments:
* buffer - buffer to output into
* bufferSize - maximum characters that can be printed to buffer
* mantissa - value significand
* exponent - value exponent in base 2
* signbit - value of the sign position. Should be '+', '-' or ''
* mantissaBit - index of the highest set mantissa bit
* hasUnequalMargins - is the high margin twice as large as the low margin
* precision - Negative prints as many digits as are needed for a unique
* number. Positive specifies the maximum number of significant
* digits to print past the decimal point.
* trim_mode - how to treat trailing 0s and '.'. See TrimMode comments.
* digits_left - pad characters to left of decimal point. -1 for no padding
* digits_right - pad characters to right of decimal point. -1 for no padding
* padding adds whitespace until there are the specified
* number characters to sides of decimal point. Applies after
* trim_mode characters were removed. If digits_right is
* positive and the decimal point was trimmed, decimal point
* will be replaced by a whitespace character.
*/
static npy_uint32
FormatPositional(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
npy_int32 exponent, char signbit, npy_uint32 mantissaBit,
npy_bool hasUnequalMargins, DigitMode digit_mode,
CutoffMode cutoff_mode, npy_int32 precision,
TrimMode trim_mode, npy_int32 digits_left,
npy_int32 digits_right)
{
npy_int32 printExponent;
npy_int32 numDigits, numWholeDigits=0, has_sign=0;
npy_int32 maxPrintLen = (npy_int32)bufferSize - 1, pos = 0;
/* track the # of digits past the decimal point that have been printed */
npy_int32 numFractionDigits = 0, desiredFractionalDigits;
DEBUG_ASSERT(bufferSize > 0);
if (digit_mode != DigitMode_Unique) {
DEBUG_ASSERT(precision >= 0);
}
if (signbit == '+' && pos < maxPrintLen) {
buffer[pos++] = '+';
has_sign = 1;
}
else if (signbit == '-' && pos < maxPrintLen) {
buffer[pos++] = '-';
has_sign = 1;
}
numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins,
digit_mode, cutoff_mode, precision, buffer + has_sign,
maxPrintLen - has_sign, &printExponent);
DEBUG_ASSERT(numDigits > 0);
DEBUG_ASSERT(numDigits <= bufferSize);
/* if output has a whole number */
if (printExponent >= 0) {
/* leave the whole number at the start of the buffer */
numWholeDigits = printExponent+1;
if (numDigits <= numWholeDigits) {
npy_int32 count = numWholeDigits - numDigits;
pos += numDigits;
/* don't overflow the buffer */
if (pos + count > maxPrintLen) {
count = maxPrintLen - pos;
}
/* add trailing zeros up to the decimal point */
numDigits += count;
for ( ; count > 0; count--) {
buffer[pos++] = '0';
}
}
/* insert the decimal point prior to the fraction */
else if (numDigits > numWholeDigits) {
npy_int32 maxFractionDigits;
numFractionDigits = numDigits - numWholeDigits;
maxFractionDigits = maxPrintLen - numWholeDigits - 1 - pos;
if (numFractionDigits > maxFractionDigits) {
numFractionDigits = maxFractionDigits;
}
memmove(buffer + pos + numWholeDigits + 1,
buffer + pos + numWholeDigits, numFractionDigits);
pos += numWholeDigits;
buffer[pos] = '.';
numDigits = numWholeDigits + 1 + numFractionDigits;
pos += 1 + numFractionDigits;
}
}
else {
/* shift out the fraction to make room for the leading zeros */
npy_int32 numFractionZeros = 0;
if (pos + 2 < maxPrintLen) {
npy_int32 maxFractionZeros, digitsStartIdx, maxFractionDigits, i;
maxFractionZeros = maxPrintLen - 2 - pos;
numFractionZeros = -(printExponent + 1);
if (numFractionZeros > maxFractionZeros) {
numFractionZeros = maxFractionZeros;
}
digitsStartIdx = 2 + numFractionZeros;
/*
* shift the significant digits right such that there is room for
* leading zeros
*/
numFractionDigits = numDigits;
maxFractionDigits = maxPrintLen - digitsStartIdx - pos;
if (numFractionDigits > maxFractionDigits) {
numFractionDigits = maxFractionDigits;
}
memmove(buffer + pos + digitsStartIdx, buffer + pos,
numFractionDigits);
/* insert the leading zeros */
for (i = 2; i < digitsStartIdx; ++i) {
buffer[pos + i] = '0';
}
/* update the counts */
numFractionDigits += numFractionZeros;
numDigits = numFractionDigits;
}
/* add the decimal point */
if (pos + 1 < maxPrintLen) {
buffer[pos+1] = '.';
}
/* add the initial zero */
if (pos < maxPrintLen) {
buffer[pos] = '0';
numDigits += 1;
}
numWholeDigits = 1;
pos += 2 + numFractionDigits;
}
/* always add decimal point, except for DprZeros mode */
if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 &&
pos < maxPrintLen){
buffer[pos++] = '.';
}
desiredFractionalDigits = precision;
if (cutoff_mode == CutoffMode_TotalLength && precision >= 0) {
desiredFractionalDigits = precision - numWholeDigits;
}
if (trim_mode == TrimMode_LeaveOneZero) {
/* if we didn't print any fractional digits, add a trailing 0 */
if (numFractionDigits == 0 && pos < maxPrintLen) {
buffer[pos++] = '0';
numFractionDigits++;
}
}
else if (trim_mode == TrimMode_None &&
digit_mode != DigitMode_Unique &&
desiredFractionalDigits > numFractionDigits &&
pos < maxPrintLen) {
/* add trailing zeros up to precision length */
/* compute the number of trailing zeros needed */
npy_int32 count = desiredFractionalDigits - numFractionDigits;
if (pos + count > maxPrintLen) {
count = maxPrintLen - pos;
}
numFractionDigits += count;
for ( ; count > 0; count--) {
buffer[pos++] = '0';
}
}
/* else, for trim_mode Zeros or DptZeros, there is nothing more to add */
/*
* when rounding, we may still end up with trailing zeros. Remove them
* depending on trim settings.
*/
if (precision >= 0 && trim_mode != TrimMode_None && numFractionDigits > 0){
while (buffer[pos-1] == '0') {
pos--;
numFractionDigits--;
}
if (trim_mode == TrimMode_LeaveOneZero && buffer[pos-1] == '.') {
buffer[pos++] = '0';
numFractionDigits++;
}
}
/* add any whitespace padding to right side */
if (digits_right >= numFractionDigits) {
npy_int32 count = digits_right - numFractionDigits;
/* in trim_mode DptZeros, if right padding, add a space for the . */
if (trim_mode == TrimMode_DptZeros && numFractionDigits == 0
&& pos < maxPrintLen) {
buffer[pos++] = ' ';
}
if (pos + count > maxPrintLen) {
count = maxPrintLen - pos;
}
for ( ; count > 0; count--) {
buffer[pos++] = ' ';
}
}
/* add any whitespace padding to left side */
if (digits_left > numWholeDigits + has_sign) {
npy_int32 shift = digits_left - (numWholeDigits + has_sign);
npy_int32 count = pos;
if (count + shift > maxPrintLen){
count = maxPrintLen - shift;
}
if (count > 0) {
memmove(buffer + shift, buffer, count);
}
pos = shift + count;
for ( ; shift > 0; shift--) {
buffer[shift - 1] = ' ';
}
}
/* terminate the buffer */
DEBUG_ASSERT(pos <= maxPrintLen);
buffer[pos] = '\0';
return pos;
}
/*
* Outputs the positive number with scientific notation: d.dddde[sign]ddd
* The output is always NUL terminated and the output length (not including the
* NUL) is returned.
* Arguments:
* buffer - buffer to output into
* bufferSize - maximum characters that can be printed to buffer
* mantissa - value significand
* exponent - value exponent in base 2
* signbit - value of the sign position. Should be '+', '-' or ''
* mantissaBit - index of the highest set mantissa bit
* hasUnequalMargins - is the high margin twice as large as the low margin
* precision - Negative prints as many digits as are needed for a unique
* number. Positive specifies the maximum number of significant
* digits to print past the decimal point.
* trim_mode - how to treat trailing 0s and '.'. See TrimMode comments.
* digits_left - pad characters to left of decimal point. -1 for no padding
* exp_digits - pads exponent with zeros until it has this many digits
*/
static npy_uint32
FormatScientific (char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
npy_int32 exponent, char signbit, npy_uint32 mantissaBit,
npy_bool hasUnequalMargins, DigitMode digit_mode,
npy_int32 precision, TrimMode trim_mode,
npy_int32 digits_left, npy_int32 exp_digits)
{
npy_int32 printExponent;
npy_int32 numDigits;
char *pCurOut;
npy_int32 numFractionDigits;
npy_int32 leftchars;
if (digit_mode != DigitMode_Unique) {
DEBUG_ASSERT(precision >= 0);
}
DEBUG_ASSERT(bufferSize > 0);
pCurOut = buffer;
/* add any whitespace padding to left side */
leftchars = 1 + (signbit == '-' || signbit == '+');
if (digits_left > leftchars) {
int i;
for (i = 0; i < digits_left - leftchars && bufferSize > 1; i++){
*pCurOut = ' ';
pCurOut++;
--bufferSize;
}
}
if (signbit == '+' && bufferSize > 1) {
*pCurOut = '+';
pCurOut++;
--bufferSize;
}
else if (signbit == '-' && bufferSize > 1) {
*pCurOut = '-';
pCurOut++;
--bufferSize;
}
numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins,
digit_mode, CutoffMode_TotalLength, precision + 1,
pCurOut, bufferSize, &printExponent);
DEBUG_ASSERT(numDigits > 0);
DEBUG_ASSERT(numDigits <= bufferSize);
/* keep the whole number as the first digit */
if (bufferSize > 1) {
pCurOut += 1;
bufferSize -= 1;
}
/* insert the decimal point prior to the fractional number */
numFractionDigits = numDigits-1;
if (numFractionDigits > 0 && bufferSize > 1) {
npy_int32 maxFractionDigits = (npy_int32)bufferSize - 2;
if (numFractionDigits > maxFractionDigits) {
numFractionDigits = maxFractionDigits;
}
memmove(pCurOut + 1, pCurOut, numFractionDigits);
pCurOut[0] = '.';
pCurOut += (1 + numFractionDigits);
bufferSize -= (1 + numFractionDigits);
}
/* always add decimal point, except for DprZeros mode */
if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 &&
bufferSize > 1){
*pCurOut = '.';
++pCurOut;
--bufferSize;
}
if (trim_mode == TrimMode_LeaveOneZero) {
/* if we didn't print any fractional digits, add the 0 */
if (numFractionDigits == 0 && bufferSize > 1) {
*pCurOut = '0';
++pCurOut;
--bufferSize;
++numFractionDigits;
}
}
else if (trim_mode == TrimMode_None &&
digit_mode != DigitMode_Unique) {
/* add trailing zeros up to precision length */
if (precision > (npy_int32)numFractionDigits) {
char *pEnd;
/* compute the number of trailing zeros needed */
npy_int32 numZeros = (precision - numFractionDigits);
if (numZeros > (npy_int32)bufferSize - 1) {
numZeros = (npy_int32)bufferSize - 1;
}
for (pEnd = pCurOut + numZeros; pCurOut < pEnd; ++pCurOut) {
*pCurOut = '0';
++numFractionDigits;
}
}
}
/* else, for trim_mode Zeros or DptZeros, there is nothing more to add */
/*
* when rounding, we may still end up with trailing zeros. Remove them
* depending on trim settings.
*/
if (precision >= 0 && trim_mode != TrimMode_None && numFractionDigits > 0){
--pCurOut;
while (*pCurOut == '0') {
--pCurOut;
++bufferSize;
--numFractionDigits;
}
if (trim_mode == TrimMode_LeaveOneZero && *pCurOut == '.') {
++pCurOut;
*pCurOut = '0';
--bufferSize;
++numFractionDigits;
}
++pCurOut;
}
/* print the exponent into a local buffer and copy into output buffer */
if (bufferSize > 1) {
char exponentBuffer[7];
npy_int32 digits[5];
npy_int32 i, exp_size, count;
if (exp_digits > 5) {
exp_digits = 5;
}
if (exp_digits < 0) {
exp_digits = 2;
}
exponentBuffer[0] = 'e';
if (printExponent >= 0) {
exponentBuffer[1] = '+';
}
else {
exponentBuffer[1] = '-';
printExponent = -printExponent;
}
DEBUG_ASSERT(printExponent < 100000);
/* get exp digits */
for (i = 0; i < 5; i++){
digits[i] = printExponent % 10;
printExponent /= 10;
}
/* count back over leading zeros */
for (i = 5; i > exp_digits && digits[i-1] == 0; i--) {
}
exp_size = i;
/* write remaining digits to tmp buf */
for (i = exp_size; i > 0; i--){
exponentBuffer[2 + (exp_size-i)] = (char)('0' + digits[i-1]);
}
/* copy the exponent buffer into the output */
count = exp_size + 2;
if (count > (npy_int32)bufferSize - 1) {
count = (npy_int32)bufferSize - 1;
}
memcpy(pCurOut, exponentBuffer, count);
pCurOut += count;
bufferSize -= count;
}
DEBUG_ASSERT(bufferSize > 0);
pCurOut[0] = '\0';
return pCurOut - buffer;
}
/*
* Print a hexadecimal value with a given width.
* The output string is always NUL terminated and the string length (not
* including the NUL) is returned.
*/
/* Unused for now
static npy_uint32
PrintHex(char * buffer, npy_uint32 bufferSize, npy_uint64 value,
npy_uint32 width)
{
const char digits[] = "0123456789abcdef";
char *pCurOut;
DEBUG_ASSERT(bufferSize > 0);
npy_uint32 maxPrintLen = bufferSize-1;
if (width > maxPrintLen) {
width = maxPrintLen;
}
pCurOut = buffer;
while (width > 0) {
--width;
npy_uint8 digit = (npy_uint8)((value >> 4ull*(npy_uint64)width) & 0xF);
*pCurOut = digits[digit];
++pCurOut;
}
*pCurOut = '\0';
return pCurOut - buffer;
}
*/
/*
* Print special case values for infinities and NaNs.
* The output string is always NUL terminated and the string length (not
* including the NUL) is returned.
*/
static npy_uint32
PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
npy_uint32 mantissaHexWidth, char signbit)
{
npy_uint32 maxPrintLen = bufferSize-1;
npy_uint32 pos = 0;
DEBUG_ASSERT(bufferSize > 0);
/* Check for infinity */
if (mantissa == 0) {
npy_uint32 printLen;
/* only print sign for inf values (though nan can have a sign set) */
if (signbit == '+') {
if (pos < maxPrintLen-1){
buffer[pos++] = '+';
}
}
else if (signbit == '-') {
if (pos < maxPrintLen-1){
buffer[pos++] = '-';
}
}
/* copy and make sure the buffer is terminated */
printLen = (3 < maxPrintLen - pos) ? 3 : maxPrintLen - pos;
memcpy(buffer + pos, "inf", printLen);
buffer[pos + printLen] = '\0';
return pos + printLen;
}
else {
/* copy and make sure the buffer is terminated */
npy_uint32 printLen = (3 < maxPrintLen - pos) ? 3 : maxPrintLen - pos;
memcpy(buffer + pos, "nan", printLen);
buffer[pos + printLen] = '\0';
/*
* // XXX: Should we change this for numpy?
* // append HEX value
* if (maxPrintLen > 3) {
* printLen += PrintHex(buffer+3, bufferSize-3, mantissa,
* mantissaHexWidth);
* }
*/
return pos + printLen;
}
}
/*
* These functions print a floating-point number as a decimal string.
* The output string is always NUL terminated and the string length (not
* including the NUL) is returned.
*
* Arguments are:
* buffer - buffer to output into
* bufferSize - maximum characters that can be printed to buffer
* value - value significand
* scientific - boolean controlling whether scientific notation is used
* precision - If positive, specifies the number of decimals to show after
* decimal point. If negative, sufficient digits to uniquely
* specify the float will be output.
* trim_mode - how to treat trailing zeros and decimal point. See TrimMode.
* digits_right - pad the result with '' on the right past the decimal point
* digits_left - pad the result with '' on the right past the decimal point
* exp_digits - Only affects scientific output. If positive, pads the
* exponent with 0s until there are this many digits. If
* negative, only use sufficient digits.
*/
static npy_uint32
Dragon4_PrintFloat16(char *buffer, npy_uint32 bufferSize, npy_uint16 value,
npy_bool scientific, DigitMode digit_mode,
CutoffMode cutoff_mode, npy_int32 precision,
npy_bool sign, TrimMode trim_mode, npy_int32 digits_left,
npy_int32 digits_right, npy_int32 exp_digits)
{
FloatUnion16 floatUnion;
npy_uint32 floatExponent, floatMantissa;
npy_uint32 mantissa;
npy_int32 exponent;
npy_uint32 mantissaBit;
npy_bool hasUnequalMargins;
char signbit = '\0';
if (bufferSize == 0) {
return 0;
}
if (bufferSize == 1) {
buffer[0] = '\0';
return 0;
}
/* deconstruct the floating point value */
floatUnion.integer = value;
floatExponent = GetExponent_F16(&floatUnion);
floatMantissa = GetMantissa_F16(&floatUnion);
/* output the sign */
if (IsNegative_F16(&floatUnion)) {
signbit = '-';
}
else if (sign) {
signbit = '+';
}
/* if this is a special value */
if (floatExponent == 0x1F) {
return PrintInfNan(buffer, bufferSize, floatMantissa, 3, signbit);
}
/* else this is a number */
/* factor the value into its parts */
if (floatExponent != 0) {
/*
* normalized
* The floating point equation is:
* value = (1 + mantissa/2^10) * 2 ^ (exponent-15)
* We convert the integer equation by factoring a 2^10 out of the
* exponent
* value = (1 + mantissa/2^10) * 2^10 * 2 ^ (exponent-15-10)
* value = (2^10 + mantissa) * 2 ^ (exponent-15-10)
* Because of the implied 1 in front of the mantissa we have 10 bits of
* precision.
* m = (2^10 + mantissa)
* e = (exponent-15-10)
*/
mantissa = (1UL << 10) | floatMantissa;
exponent = floatExponent - 15 - 10;
mantissaBit = 10;
hasUnequalMargins = (floatExponent != 1) && (floatMantissa == 0);
}
else {
/*
* denormalized
* The floating point equation is:
* value = (mantissa/2^10) * 2 ^ (1-15)
* We convert the integer equation by factoring a 2^23 out of the
* exponent
* value = (mantissa/2^10) * 2^10 * 2 ^ (1-15-10)
* value = mantissa * 2 ^ (1-15-10)
* We have up to 10 bits of precision.
* m = (mantissa)
* e = (1-15-10)
*/
mantissa = floatMantissa;
exponent = 1 - 15 - 10;
mantissaBit = LogBase2_32(mantissa);
hasUnequalMargins = NPY_FALSE;
}
/* format the value */
if (scientific) {
return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit,
mantissaBit, hasUnequalMargins, digit_mode,
precision, trim_mode, digits_left, exp_digits);
}
else {
return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit,
mantissaBit, hasUnequalMargins, digit_mode,
cutoff_mode, precision, trim_mode,
digits_left, digits_right);
}
}
static npy_uint32
Dragon4_PrintFloat32(char *buffer, npy_uint32 bufferSize, npy_float32 value,
npy_bool scientific, DigitMode digit_mode,
CutoffMode cutoff_mode, npy_int32 precision,
npy_bool sign, TrimMode trim_mode, npy_int32 digits_left,
npy_int32 digits_right, npy_int32 exp_digits)
{
FloatUnion32 floatUnion;
npy_uint32 floatExponent, floatMantissa;
npy_uint32 mantissa;
npy_int32 exponent;
npy_uint32 mantissaBit;
npy_bool hasUnequalMargins;
char signbit = '\0';
if (bufferSize == 0) {
return 0;
}
if (bufferSize == 1) {
buffer[0] = '\0';
return 0;
}
/* deconstruct the floating point value */
floatUnion.floatingPoint = value;
floatExponent = GetExponent_F32(&floatUnion);
floatMantissa = GetMantissa_F32(&floatUnion);
/* output the sign */
if (IsNegative_F32(&floatUnion)) {
signbit = '-';
}
else if (sign) {
signbit = '+';
}
/* if this is a special value */
if (floatExponent == 0xFF) {
return PrintInfNan(buffer, bufferSize, floatMantissa, 6, signbit);
}
/* else this is a number */
/* factor the value into its parts */
if (floatExponent != 0) {
/*
* normalized
* The floating point equation is:
* value = (1 + mantissa/2^23) * 2 ^ (exponent-127)
* We convert the integer equation by factoring a 2^23 out of the
* exponent
* value = (1 + mantissa/2^23) * 2^23 * 2 ^ (exponent-127-23)
* value = (2^23 + mantissa) * 2 ^ (exponent-127-23)
* Because of the implied 1 in front of the mantissa we have 24 bits of
* precision.
* m = (2^23 + mantissa)
* e = (exponent-127-23)
*/
mantissa = (1UL << 23) | floatMantissa;
exponent = floatExponent - 127 - 23;
mantissaBit = 23;
hasUnequalMargins = (floatExponent != 1) && (floatMantissa == 0);
}
else {
/*
* denormalized
* The floating point equation is:
* value = (mantissa/2^23) * 2 ^ (1-127)
* We convert the integer equation by factoring a 2^23 out of the
* exponent
* value = (mantissa/2^23) * 2^23 * 2 ^ (1-127-23)
* value = mantissa * 2 ^ (1-127-23)
* We have up to 23 bits of precision.
* m = (mantissa)
* e = (1-127-23)
*/
mantissa = floatMantissa;
exponent = 1 - 127 - 23;
mantissaBit = LogBase2_32(mantissa);
hasUnequalMargins = NPY_FALSE;
}
/* format the value */
if (scientific) {
return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit,
mantissaBit, hasUnequalMargins, digit_mode,
precision, trim_mode, digits_left, exp_digits);
}
else {
return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit,
mantissaBit, hasUnequalMargins, digit_mode,
cutoff_mode, precision, trim_mode,
digits_left, digits_right);
}
}
static npy_uint32
Dragon4_PrintFloat64(char *buffer, npy_uint32 bufferSize, npy_float64 value,
npy_bool scientific, DigitMode digit_mode,
CutoffMode cutoff_mode, npy_int32 precision,
npy_bool sign, TrimMode trim_mode, npy_int32 digits_left,
npy_int32 digits_right, npy_int32 exp_digits)
{
FloatUnion64 floatUnion;
npy_uint32 floatExponent;
npy_uint64 floatMantissa;
npy_uint64 mantissa;
npy_int32 exponent;
npy_uint32 mantissaBit;
npy_bool hasUnequalMargins;
char signbit = '\0';
if (bufferSize == 0) {
return 0;
}
if (bufferSize == 1) {
buffer[0] = '\0';
return 0;
}
/* deconstruct the floating point value */
floatUnion.floatingPoint = value;
floatExponent = GetExponent_F64(&floatUnion);
floatMantissa = GetMantissa_F64(&floatUnion);
/* output the sign */
if (IsNegative_F64(&floatUnion)) {
signbit = '-';
}
else if (sign) {
signbit = '+';
}
/* if this is a special value */
if (floatExponent == 0x7FF) {
return PrintInfNan(buffer, bufferSize, floatMantissa, 13, signbit);
}
/* else this is a number */
/* factor the value into its parts */
if (floatExponent != 0) {
/*
* normal
* The floating point equation is:
* value = (1 + mantissa/2^52) * 2 ^ (exponent-1023)
* We convert the integer equation by factoring a 2^52 out of the
* exponent
* value = (1 + mantissa/2^52) * 2^52 * 2 ^ (exponent-1023-52)
* value = (2^52 + mantissa) * 2 ^ (exponent-1023-52)
* Because of the implied 1 in front of the mantissa we have 53 bits of
* precision.
* m = (2^52 + mantissa)
* e = (exponent-1023+1-53)
*/
mantissa = (1ull << 52) | floatMantissa;
exponent = floatExponent - 1023 - 52;
mantissaBit = 52;
hasUnequalMargins = (floatExponent != 1) && (floatMantissa == 0);
}
else {
/*
* subnormal
* The floating point equation is:
* value = (mantissa/2^52) * 2 ^ (1-1023)
* We convert the integer equation by factoring a 2^52 out of the
* exponent
* value = (mantissa/2^52) * 2^52 * 2 ^ (1-1023-52)
* value = mantissa * 2 ^ (1-1023-52)
* We have up to 52 bits of precision.
* m = (mantissa)
* e = (1-1023-52)
*/
mantissa = floatMantissa;
exponent = 1 - 1023 - 52;
mantissaBit = LogBase2_64(mantissa);
hasUnequalMargins = NPY_FALSE;
}
/* format the value */
if (scientific) {
return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit,
mantissaBit, hasUnequalMargins, digit_mode,
precision, trim_mode, digits_left, exp_digits);
}
else {
return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit,
mantissaBit, hasUnequalMargins, digit_mode,
cutoff_mode, precision, trim_mode,
digits_left, digits_right);
}
}
static npy_uint32
Dragon4_PrintFloat128(char *buffer, npy_uint32 bufferSize, FloatVal128 value,
npy_bool scientific, DigitMode digit_mode,
CutoffMode cutoff_mode, npy_int32 precision,
npy_bool sign, TrimMode trim_mode, npy_int32 digits_left,
npy_int32 digits_right, npy_int32 exp_digits)
{
npy_uint32 floatExponent;
npy_uint64 floatMantissa;
npy_uint64 mantissa;
npy_int32 exponent;
npy_uint32 mantissaBit;
npy_bool hasUnequalMargins;
char signbit = '\0';
if (bufferSize == 0) {
return 0;
}
if (bufferSize == 1) {
buffer[0] = '\0';
return 0;
}
/* deconstruct the floating point value */
floatExponent = GetExponent_F128(&value);
floatMantissa = GetMantissa_F128(&value);
/* output the sign */
if (IsNegative_F128(&value)) {
signbit = '-';
}
else if (sign) {
signbit = '+';
}
/* if this is a special value */
if (floatExponent == 0x7FFF) {
return PrintInfNan(buffer, bufferSize, floatMantissa, 16, signbit);
}
/* else this is a number */
/* factor the value into its parts */
if (floatExponent != 0) {
/*
* normal
* The floating point equation is:
* value = (1 + mantissa/2^63) * 2 ^ (exponent-16383)
* We convert the integer equation by factoring a 2^63 out of the
* exponent
* value = (1 + mantissa/2^63) * 2^63 * 2 ^ (exponent-16383-63)
* value = (2^63 + mantissa) * 2 ^ (exponent-16383-63)
* Because of the implied 1 in front of the mantissa we have 64 bits of
* precision.
* m = (2^63 + mantissa)
* e = (exponent-16383+1-64)
*/
mantissa = (1ull << 63) | floatMantissa;
exponent = floatExponent - 16383 - 63;
mantissaBit = 63;
hasUnequalMargins = (floatExponent != 1) && (floatMantissa == 0);
}
else {
/*
* subnormal
* The floating point equation is:
* value = (mantissa/2^63) * 2 ^ (1-16383)
* We convert the integer equation by factoring a 2^52 out of the
* exponent
* value = (mantissa/2^63) * 2^52 * 2 ^ (1-16383-63)
* value = mantissa * 2 ^ (1-16383-63)
* We have up to 63 bits of precision.
* m = (mantissa)
* e = (1-16383-63)
*/
mantissa = floatMantissa;
exponent = 1 - 16383 - 63;
mantissaBit = LogBase2_64(mantissa);
hasUnequalMargins = NPY_FALSE;
}
/* format the value */
if (scientific) {
return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit,
mantissaBit, hasUnequalMargins, digit_mode,
precision, trim_mode, digits_left, exp_digits);
}
else {
return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit,
mantissaBit, hasUnequalMargins, digit_mode,
cutoff_mode, precision, trim_mode,
digits_left, digits_right);
}
}
PyObject *
Dragon4_Positional_AnySize(void *val, size_t size, DigitMode digit_mode,
CutoffMode cutoff_mode, int precision, int sign,
TrimMode trim, int pad_left, int pad_right)
{
/*
* Use a very large buffer in case anyone tries to output a large numberG.
* 16384 should be enough to uniquely print any float128, which goes up
* to about 10^4932 */
static char repr[16384];
FloatVal128 val128;
#ifdef NPY_FLOAT80
FloatUnion80 buf80;;
#endif
#ifdef NPY_FLOAT96
FloatUnion96 buf96;
#endif
#ifdef NPY_FLOAT128
FloatUnion128 buf128;
#endif
switch (size) {
case 2:
Dragon4_PrintFloat16(repr, sizeof(repr), *(npy_float16*)val,
0, digit_mode, cutoff_mode, precision,
sign, trim, pad_left, pad_right, -1);
break;
case 4:
Dragon4_PrintFloat32(repr, sizeof(repr), *(npy_float32*)val,
0, digit_mode, cutoff_mode, precision,
sign, trim, pad_left, pad_right, -1);
break;
case 8:
Dragon4_PrintFloat64(repr, sizeof(repr), *(npy_float64*)val,
0, digit_mode, cutoff_mode, precision,
sign, trim, pad_left, pad_right, -1);
break;
#ifdef NPY_FLOAT80
case 10:
buf80.floatingPoint = *(npy_float80*)val;
val128.integer[0] = buf80.integer.a;
val128.integer[1] = buf80.integer.b;
Dragon4_PrintFloat128(repr, sizeof(repr), val128,
0, digit_mode, cutoff_mode, precision,
sign, trim, pad_left, pad_right, -1);
break;
#endif
#ifdef NPY_FLOAT96
case 12:
buf96.floatingPoint = *(npy_float96*)val;
val128.integer[0] = buf96.integer.a;
val128.integer[1] = buf96.integer.b;
Dragon4_PrintFloat128(repr, sizeof(repr), val128,
0, digit_mode, cutoff_mode, precision,
sign, trim, pad_left, pad_right, -1);
break;
#endif
#ifdef NPY_FLOAT128
case 16:
buf128.floatingPoint = *(npy_float128*)val;
val128.integer[0] = buf128.integer.a;
val128.integer[1] = buf128.integer.b;
Dragon4_PrintFloat128(repr, sizeof(repr), val128,
0, digit_mode, cutoff_mode, precision,
sign, trim, pad_left, pad_right, -1);
break;
#endif
default:
PyErr_Format(PyExc_ValueError, "unexpected itemsize %zu", size);
return NULL;
}
return PyUString_FromString(repr);
}
PyObject *
Dragon4_Positional(PyObject *obj, DigitMode digit_mode, CutoffMode cutoff_mode,
int precision, int sign, TrimMode trim, int pad_left,
int pad_right)
{
double val;
if (PyArray_IsScalar(obj, Half)) {
npy_half x = ((PyHalfScalarObject *)obj)->obval;
return Dragon4_Positional_AnySize(&x, sizeof(npy_half),
digit_mode, cutoff_mode, precision,
sign, trim, pad_left, pad_right);
}
else if (PyArray_IsScalar(obj, Float)) {
npy_float x = ((PyFloatScalarObject *)obj)->obval;
return Dragon4_Positional_AnySize(&x, sizeof(npy_float),
digit_mode, cutoff_mode, precision,
sign, trim, pad_left, pad_right);
}
else if (PyArray_IsScalar(obj, Double)) {
npy_double x = ((PyDoubleScalarObject *)obj)->obval;
return Dragon4_Positional_AnySize(&x, sizeof(npy_double),
digit_mode, cutoff_mode, precision,
sign, trim, pad_left, pad_right);
}
else if (PyArray_IsScalar(obj, LongDouble)) {
npy_longdouble x = ((PyLongDoubleScalarObject *)obj)->obval;
return Dragon4_Positional_AnySize(&x, sizeof(npy_longdouble),
digit_mode, cutoff_mode, precision,
sign, trim, pad_left, pad_right);
}
val = PyFloat_AsDouble(obj);
if (PyErr_Occurred()) {
return NULL;
}
return Dragon4_Positional_AnySize(&val, sizeof(double),
digit_mode, cutoff_mode, precision,
sign, trim, pad_left, pad_right);
}
PyObject *
Dragon4_Scientific_AnySize(void *val, size_t size, DigitMode digit_mode,
int precision, int sign, TrimMode trim,
int pad_left, int exp_digits)
{
/* use a very large buffer in case anyone tries to output a large precision */
static char repr[4096];
FloatVal128 val128;
#ifdef NPY_FLOAT80
FloatUnion80 buf80;;
#endif
#ifdef NPY_FLOAT96
FloatUnion96 buf96;
#endif
#ifdef NPY_FLOAT128
FloatUnion128 buf128;
#endif
/* dummy, is ignored in scientific mode */
CutoffMode cutoff_mode = CutoffMode_TotalLength;
switch (size) {
case 2:
Dragon4_PrintFloat16(repr, sizeof(repr), *(npy_float16*)val,
1, digit_mode, cutoff_mode, precision, sign,
trim, pad_left, -1, exp_digits);
break;
case 4:
Dragon4_PrintFloat32(repr, sizeof(repr), *(npy_float32*)val,
1, digit_mode, cutoff_mode, precision, sign,
trim, pad_left, -1, exp_digits);
break;
case 8:
Dragon4_PrintFloat64(repr, sizeof(repr), *(npy_float64*)val,
1, digit_mode, cutoff_mode, precision, sign,
trim, pad_left, -1, exp_digits);
break;
#ifdef NPY_FLOAT80
case 10:
buf80.floatingPoint = *(npy_float80*)val;
val128.integer[0] = buf80.integer.a;
val128.integer[1] = buf80.integer.b;
Dragon4_PrintFloat128(repr, sizeof(repr), val128,
1, digit_mode, cutoff_mode, precision, sign,
trim, pad_left, -1, exp_digits);
break;
#endif
#ifdef NPY_FLOAT96
case 12:
buf96.floatingPoint = *(npy_float96*)val;
val128.integer[0] = buf96.integer.a;
val128.integer[1] = buf96.integer.b;
Dragon4_PrintFloat128(repr, sizeof(repr), val128,
1, digit_mode, cutoff_mode, precision, sign,
trim, pad_left, -1, exp_digits);
break;
#endif
#ifdef NPY_FLOAT128
case 16:
buf128.floatingPoint = *(npy_float128*)val;
val128.integer[0] = buf128.integer.a;
val128.integer[1] = buf128.integer.b;
Dragon4_PrintFloat128(repr, sizeof(repr), val128,
1, digit_mode, cutoff_mode, precision, sign,
trim, pad_left, -1, exp_digits);
break;
#endif
default:
PyErr_Format(PyExc_ValueError, "unexpected itemsize %zu", size);
return NULL;
}
return PyUString_FromString(repr);
}
PyObject *
Dragon4_Scientific(PyObject *obj, DigitMode digit_mode, int precision,
int sign, TrimMode trim, int pad_left, int exp_digits)
{
double val;
if (PyArray_IsScalar(obj, Half)) {
npy_half x = ((PyHalfScalarObject *)obj)->obval;
return Dragon4_Scientific_AnySize(&x, sizeof(npy_half),
digit_mode, precision,
sign, trim, pad_left, exp_digits);
}
else if (PyArray_IsScalar(obj, Float)) {
npy_float x = ((PyFloatScalarObject *)obj)->obval;
return Dragon4_Scientific_AnySize(&x, sizeof(npy_float),
digit_mode, precision,
sign, trim, pad_left, exp_digits);
}
else if (PyArray_IsScalar(obj, Double)) {
npy_double x = ((PyDoubleScalarObject *)obj)->obval;
return Dragon4_Scientific_AnySize(&x, sizeof(npy_double),
digit_mode, precision,
sign, trim, pad_left, exp_digits);
}
else if (PyArray_IsScalar(obj, LongDouble)) {
npy_longdouble x = ((PyLongDoubleScalarObject *)obj)->obval;
return Dragon4_Scientific_AnySize(&x, sizeof(npy_longdouble),
digit_mode, precision,
sign, trim, pad_left, exp_digits);
}
val = PyFloat_AsDouble(obj);
if (PyErr_Occurred()) {
return NULL;
}
return Dragon4_Scientific_AnySize(&val, sizeof(double),
digit_mode, precision,
sign, trim, pad_left, exp_digits);
}