Tree - source-git/glibc - CentOS Git server

source-git / glibc

Blame sysdeps/ieee754/ldbl-96/s_fmal.c

Blob History Raw

Packit	6c4009	`/* Compute x * y + z as ternary operation.`
Packit	6c4009	`Copyright (C) 2010-2018 Free Software Foundation, Inc.`
Packit	6c4009	`This file is part of the GNU C Library.`
Packit	6c4009	`Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.`
Packit	6c4009
Packit	6c4009	`The GNU C Library is free software; you can redistribute it and/or`
Packit	6c4009	`modify it under the terms of the GNU Lesser General Public`
Packit	6c4009	`License as published by the Free Software Foundation; either`
Packit	6c4009	`version 2.1 of the License, or (at your option) any later version.`
Packit	6c4009
Packit	6c4009	`The GNU C Library is distributed in the hope that it will be useful,`
Packit	6c4009	`but WITHOUT ANY WARRANTY; without even the implied warranty of`
Packit	6c4009	`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
Packit	6c4009	`Lesser General Public License for more details.`
Packit	6c4009
Packit	6c4009	`You should have received a copy of the GNU Lesser General Public`
Packit	6c4009	`License along with the GNU C Library; if not, see`
Packit	6c4009	`<http://www.gnu.org/licenses/>. */`
Packit	6c4009
Packit	6c4009	`#include <float.h>`
Packit	6c4009	`#include <math.h>`
Packit	6c4009	`#include <fenv.h>`
Packit	6c4009	`#include <ieee754.h>`
Packit	6c4009	`#include <math-barriers.h>`
Packit	6c4009	`#include <math_private.h>`
Packit	6c4009	`#include <libm-alias-ldouble.h>`
Packit	6c4009	`#include <tininess.h>`
Packit	6c4009
Packit	6c4009	`/* This implementation uses rounding to odd to avoid problems with`
Packit	6c4009	`double rounding. See a paper by Boldo and Melquiond:`
Packit	6c4009	`http://www.lri.fr/~melquion/doc/08-tc.pdf */`
Packit	6c4009
Packit	6c4009	`long double`
Packit	6c4009	`__fmal (long double x, long double y, long double z)`
Packit	6c4009	`{`
Packit	6c4009	`union ieee854_long_double u, v, w;`
Packit	6c4009	`int adjust = 0;`
Packit	6c4009	`u.d = x;`
Packit	6c4009	`v.d = y;`
Packit	6c4009	`w.d = z;`
Packit	6c4009	`if (__builtin_expect (u.ieee.exponent + v.ieee.exponent`
Packit	6c4009	`>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS`
Packit	6c4009	`- LDBL_MANT_DIG, 0)`
Packit	6c4009	`\|\| __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)`
Packit	6c4009	`\|\| __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)`
Packit	6c4009	`\|\| __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)`
Packit	6c4009	`\|\| __builtin_expect (u.ieee.exponent + v.ieee.exponent`
Packit	6c4009	`<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))`
Packit	6c4009	`{`
Packit	6c4009	`/* If z is Inf, but x and y are finite, the result should be`
Packit	6c4009	`z rather than NaN. */`
Packit	6c4009	`if (w.ieee.exponent == 0x7fff`
Packit	6c4009	`&& u.ieee.exponent != 0x7fff`
Packit	6c4009	`&& v.ieee.exponent != 0x7fff)`
Packit	6c4009	`return (z + x) + y;`
Packit	6c4009	`/* If z is zero and x are y are nonzero, compute the result`
Packit	6c4009	`as x * y to avoid the wrong sign of a zero result if x * y`
Packit	6c4009	`underflows to 0. */`
Packit	6c4009	`if (z == 0 && x != 0 && y != 0)`
Packit	6c4009	`return x * y;`
Packit	6c4009	`/* If x or y or z is Inf/NaN, or if x * y is zero, compute as`
Packit	6c4009	`x * y + z. */`
Packit	6c4009	`if (u.ieee.exponent == 0x7fff`
Packit	6c4009	`\|\| v.ieee.exponent == 0x7fff`
Packit	6c4009	`\|\| w.ieee.exponent == 0x7fff`
Packit	6c4009	`\|\| x == 0`
Packit	6c4009	`\|\| y == 0)`
Packit	6c4009	`return x * y + z;`
Packit	6c4009	`/* If fma will certainly overflow, compute as x * y. */`
Packit	6c4009	`if (u.ieee.exponent + v.ieee.exponent`
Packit	6c4009	`> 0x7fff + IEEE854_LONG_DOUBLE_BIAS)`
Packit	6c4009	`return x * y;`
Packit	6c4009	`/* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the`
Packit	6c4009	`result nor whether there is underflow depends on its exact`
Packit	6c4009	`value, only on its sign. */`
Packit	6c4009	`if (u.ieee.exponent + v.ieee.exponent`
Packit	6c4009	`< IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)`
Packit	6c4009	`{`
Packit	6c4009	`int neg = u.ieee.negative ^ v.ieee.negative;`
Packit	6c4009	`long double tiny = neg ? -0x1p-16445L : 0x1p-16445L;`
Packit	6c4009	`if (w.ieee.exponent >= 3)`
Packit	6c4009	`return tiny + z;`
Packit	6c4009	`/* Scaling up, adding TINY and scaling down produces the`
Packit	6c4009	`correct result, because in round-to-nearest mode adding`
Packit	6c4009	`TINY has no effect and in other modes double rounding is`
Packit	6c4009	`harmless. But it may not produce required underflow`
Packit	6c4009	`exceptions. */`
Packit	6c4009	`v.d = z * 0x1p65L + tiny;`
Packit	6c4009	`if (TININESS_AFTER_ROUNDING`
Packit	6c4009	`? v.ieee.exponent < 66`
Packit	6c4009	`: (w.ieee.exponent == 0`
Packit	6c4009	`\|\| (w.ieee.exponent == 1`
Packit	6c4009	`&& w.ieee.negative != neg`
Packit	6c4009	`&& w.ieee.mantissa1 == 0`
Packit	6c4009	`&& w.ieee.mantissa0 == 0x80000000)))`
Packit	6c4009	`{`
Packit	6c4009	`long double force_underflow = x * y;`
Packit	6c4009	`math_force_eval (force_underflow);`
Packit	6c4009	`}`
Packit	6c4009	`return v.d * 0x1p-65L;`
Packit	6c4009	`}`
Packit	6c4009	`if (u.ieee.exponent + v.ieee.exponent`
Packit	6c4009	`>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)`
Packit	6c4009	`{`
Packit	6c4009	`/* Compute 1p-64 times smaller result and multiply`
Packit	6c4009	`at the end. */`
Packit	6c4009	`if (u.ieee.exponent > v.ieee.exponent)`
Packit	6c4009	`u.ieee.exponent -= LDBL_MANT_DIG;`
Packit	6c4009	`else`
Packit	6c4009	`v.ieee.exponent -= LDBL_MANT_DIG;`
Packit	6c4009	`/* If x + y exponent is very large and z exponent is very small,`
Packit	6c4009	`it doesn't matter if we don't adjust it. */`
Packit	6c4009	`if (w.ieee.exponent > LDBL_MANT_DIG)`
Packit	6c4009	`w.ieee.exponent -= LDBL_MANT_DIG;`
Packit	6c4009	`adjust = 1;`
Packit	6c4009	`}`
Packit	6c4009	`else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)`
Packit	6c4009	`{`
Packit	6c4009	`/* Similarly.`
Packit	6c4009	`If z exponent is very large and x and y exponents are`
Packit	6c4009	`very small, adjust them up to avoid spurious underflows,`
Packit	6c4009	`rather than down. */`
Packit	6c4009	`if (u.ieee.exponent + v.ieee.exponent`
Packit	6c4009	`<= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG)`
Packit	6c4009	`{`
Packit	6c4009	`if (u.ieee.exponent > v.ieee.exponent)`
Packit	6c4009	`u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;`
Packit	6c4009	`else`
Packit	6c4009	`v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;`
Packit	6c4009	`}`
Packit	6c4009	`else if (u.ieee.exponent > v.ieee.exponent)`
Packit	6c4009	`{`
Packit	6c4009	`if (u.ieee.exponent > LDBL_MANT_DIG)`
Packit	6c4009	`u.ieee.exponent -= LDBL_MANT_DIG;`
Packit	6c4009	`}`
Packit	6c4009	`else if (v.ieee.exponent > LDBL_MANT_DIG)`
Packit	6c4009	`v.ieee.exponent -= LDBL_MANT_DIG;`
Packit	6c4009	`w.ieee.exponent -= LDBL_MANT_DIG;`
Packit	6c4009	`adjust = 1;`
Packit	6c4009	`}`
Packit	6c4009	`else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)`
Packit	6c4009	`{`
Packit	6c4009	`u.ieee.exponent -= LDBL_MANT_DIG;`
Packit	6c4009	`if (v.ieee.exponent)`
Packit	6c4009	`v.ieee.exponent += LDBL_MANT_DIG;`
Packit	6c4009	`else`
Packit	6c4009	`v.d *= 0x1p64L;`
Packit	6c4009	`}`
Packit	6c4009	`else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)`
Packit	6c4009	`{`
Packit	6c4009	`v.ieee.exponent -= LDBL_MANT_DIG;`
Packit	6c4009	`if (u.ieee.exponent)`
Packit	6c4009	`u.ieee.exponent += LDBL_MANT_DIG;`
Packit	6c4009	`else`
Packit	6c4009	`u.d *= 0x1p64L;`
Packit	6c4009	`}`
Packit	6c4009	`else /* if (u.ieee.exponent + v.ieee.exponent`
Packit	6c4009	`<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */`
Packit	6c4009	`{`
Packit	6c4009	`if (u.ieee.exponent > v.ieee.exponent)`
Packit	6c4009	`u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;`
Packit	6c4009	`else`
Packit	6c4009	`v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;`
Packit	6c4009	`if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)`
Packit	6c4009	`{`
Packit	6c4009	`if (w.ieee.exponent)`
Packit	6c4009	`w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;`
Packit	6c4009	`else`
Packit	6c4009	`w.d *= 0x1p130L;`
Packit	6c4009	`adjust = -1;`
Packit	6c4009	`}`
Packit	6c4009	`/* Otherwise x * y should just affect inexact`
Packit	6c4009	`and nothing else. */`
Packit	6c4009	`}`
Packit	6c4009	`x = u.d;`
Packit	6c4009	`y = v.d;`
Packit	6c4009	`z = w.d;`
Packit	6c4009	`}`
Packit	6c4009
Packit	6c4009	`/* Ensure correct sign of exact 0 + 0. */`
Packit	6c4009	`if (__glibc_unlikely ((x == 0 \|\| y == 0) && z == 0))`
Packit	6c4009	`{`
Packit	6c4009	`x = math_opt_barrier (x);`
Packit	6c4009	`return x * y + z;`
Packit	6c4009	`}`
Packit	6c4009
Packit	6c4009	`fenv_t env;`
Packit	6c4009	`feholdexcept (&env;;`
Packit	6c4009	`fesetround (FE_TONEAREST);`
Packit	6c4009
Packit	6c4009	`/* Multiplication m1 + m2 = x * y using Dekker's algorithm. */`
Packit	6c4009	`#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)`
Packit	6c4009	`long double x1 = x * C;`
Packit	6c4009	`long double y1 = y * C;`
Packit	6c4009	`long double m1 = x * y;`
Packit	6c4009	`x1 = (x - x1) + x1;`
Packit	6c4009	`y1 = (y - y1) + y1;`
Packit	6c4009	`long double x2 = x - x1;`
Packit	6c4009	`long double y2 = y - y1;`
Packit	6c4009	`long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;`
Packit	6c4009
Packit	6c4009	`/* Addition a1 + a2 = z + m1 using Knuth's algorithm. */`
Packit	6c4009	`long double a1 = z + m1;`
Packit	6c4009	`long double t1 = a1 - z;`
Packit	6c4009	`long double t2 = a1 - t1;`
Packit	6c4009	`t1 = m1 - t1;`
Packit	6c4009	`t2 = z - t2;`
Packit	6c4009	`long double a2 = t1 + t2;`
Packit	6c4009	`/* Ensure the arithmetic is not scheduled after feclearexcept call. */`
Packit	6c4009	`math_force_eval (m2);`
Packit	6c4009	`math_force_eval (a2);`
Packit	6c4009	`feclearexcept (FE_INEXACT);`
Packit	6c4009
Packit	6c4009	`/* If the result is an exact zero, ensure it has the correct sign. */`
Packit	6c4009	`if (a1 == 0 && m2 == 0)`
Packit	6c4009	`{`
Packit	6c4009	`feupdateenv (&env;;`
Packit	6c4009	`/* Ensure that round-to-nearest value of z + m1 is not reused. */`
Packit	6c4009	`z = math_opt_barrier (z);`
Packit	6c4009	`return z + m1;`
Packit	6c4009	`}`
Packit	6c4009
Packit	6c4009	`fesetround (FE_TOWARDZERO);`
Packit	6c4009	`/* Perform m2 + a2 addition with round to odd. */`
Packit	6c4009	`u.d = a2 + m2;`
Packit	6c4009
Packit	6c4009	`if (__glibc_likely (adjust == 0))`
Packit	6c4009	`{`
Packit	6c4009	`if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)`
Packit	6c4009	`u.ieee.mantissa1 \|= fetestexcept (FE_INEXACT) != 0;`
Packit	6c4009	`feupdateenv (&env;;`
Packit	6c4009	`/* Result is a1 + u.d. */`
Packit	6c4009	`return a1 + u.d;`
Packit	6c4009	`}`
Packit	6c4009	`else if (__glibc_likely (adjust > 0))`
Packit	6c4009	`{`
Packit	6c4009	`if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)`
Packit	6c4009	`u.ieee.mantissa1 \|= fetestexcept (FE_INEXACT) != 0;`
Packit	6c4009	`feupdateenv (&env;;`
Packit	6c4009	`/* Result is a1 + u.d, scaled up. */`
Packit	6c4009	`return (a1 + u.d) * 0x1p64L;`
Packit	6c4009	`}`
Packit	6c4009	`else`
Packit	6c4009	`{`
Packit	6c4009	`if ((u.ieee.mantissa1 & 1) == 0)`
Packit	6c4009	`u.ieee.mantissa1 \|= fetestexcept (FE_INEXACT) != 0;`
Packit	6c4009	`v.d = a1 + u.d;`
Packit	6c4009	`/* Ensure the addition is not scheduled after fetestexcept call. */`
Packit	6c4009	`math_force_eval (v.d);`
Packit	6c4009	`int j = fetestexcept (FE_INEXACT) != 0;`
Packit	6c4009	`feupdateenv (&env;;`
Packit	6c4009	`/* Ensure the following computations are performed in default rounding`
Packit	6c4009	`mode instead of just reusing the round to zero computation. */`
Packit	6c4009	`asm volatile ("" : "=m" (u) : "m" (u));`
Packit	6c4009	`/* If a1 + u.d is exact, the only rounding happens during`
Packit	6c4009	`scaling down. */`
Packit	6c4009	`if (j == 0)`
Packit	6c4009	`return v.d * 0x1p-130L;`
Packit	6c4009	`/* If result rounded to zero is not subnormal, no double`
Packit	6c4009	`rounding will occur. */`
Packit	6c4009	`if (v.ieee.exponent > 130)`
Packit	6c4009	`return (a1 + u.d) * 0x1p-130L;`
Packit	6c4009	`/* If v.d * 0x1p-130L with round to zero is a subnormal above`
Packit	6c4009	`or equal to LDBL_MIN / 2, then v.d * 0x1p-130L shifts mantissa`
Packit	6c4009	`down just by 1 bit, which means v.ieee.mantissa1 \|= j would`
Packit	6c4009	`change the round bit, not sticky or guard bit.`
Packit	6c4009	`v.d * 0x1p-130L never normalizes by shifting up,`
Packit	6c4009	`so round bit plus sticky bit should be already enough`
Packit	6c4009	`for proper rounding. */`
Packit	6c4009	`if (v.ieee.exponent == 130)`
Packit	6c4009	`{`
Packit	6c4009	`/* If the exponent would be in the normal range when`
Packit	6c4009	`rounding to normal precision with unbounded exponent`
Packit	6c4009	`range, the exact result is known and spurious underflows`
Packit	6c4009	`must be avoided on systems detecting tininess after`
Packit	6c4009	`rounding. */`
Packit	6c4009	`if (TININESS_AFTER_ROUNDING)`
Packit	6c4009	`{`
Packit	6c4009	`w.d = a1 + u.d;`
Packit	6c4009	`if (w.ieee.exponent == 131)`
Packit	6c4009	`return w.d * 0x1p-130L;`
Packit	6c4009	`}`
Packit	6c4009	`/* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,`
Packit	6c4009	`v.ieee.mantissa1 & 1 is the round bit and j is our sticky`
Packit	6c4009	`bit. */`
Packit	6c4009	`w.d = 0.0L;`
Packit	6c4009	`w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) \| j;`
Packit	6c4009	`w.ieee.negative = v.ieee.negative;`
Packit	6c4009	`v.ieee.mantissa1 &= ~3U;`
Packit	6c4009	`v.d *= 0x1p-130L;`
Packit	6c4009	`w.d *= 0x1p-2L;`
Packit	6c4009	`return v.d + w.d;`
Packit	6c4009	`}`
Packit	6c4009	`v.ieee.mantissa1 \|= j;`
Packit	6c4009	`return v.d * 0x1p-130L;`
Packit	6c4009	`}`
Packit	6c4009	`}`
Packit	6c4009	`libm_alias_ldouble (__fma, fma)`

source-git / glibc

Source Code

Blame sysdeps/ieee754/ldbl-96/s_fmal.c