Blame sysdeps/ieee754/ldbl-96/s_fmal.c

Packit 6c4009
/* Compute x * y + z as ternary operation.
Packit 6c4009
   Copyright (C) 2010-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
   Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <float.h>
Packit 6c4009
#include <math.h>
Packit 6c4009
#include <fenv.h>
Packit 6c4009
#include <ieee754.h>
Packit 6c4009
#include <math-barriers.h>
Packit 6c4009
#include <math_private.h>
Packit 6c4009
#include <libm-alias-ldouble.h>
Packit 6c4009
#include <tininess.h>
Packit 6c4009
Packit 6c4009
/* This implementation uses rounding to odd to avoid problems with
Packit 6c4009
   double rounding.  See a paper by Boldo and Melquiond:
Packit 6c4009
   http://www.lri.fr/~melquion/doc/08-tc.pdf  */
Packit 6c4009
Packit 6c4009
long double
Packit 6c4009
__fmal (long double x, long double y, long double z)
Packit 6c4009
{
Packit 6c4009
  union ieee854_long_double u, v, w;
Packit 6c4009
  int adjust = 0;
Packit 6c4009
  u.d = x;
Packit 6c4009
  v.d = y;
Packit 6c4009
  w.d = z;
Packit 6c4009
  if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
Packit 6c4009
			>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
Packit 6c4009
			   - LDBL_MANT_DIG, 0)
Packit 6c4009
      || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
Packit 6c4009
      || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
Packit 6c4009
      || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
Packit 6c4009
      || __builtin_expect (u.ieee.exponent + v.ieee.exponent
Packit 6c4009
			   <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
Packit 6c4009
    {
Packit 6c4009
      /* If z is Inf, but x and y are finite, the result should be
Packit 6c4009
	 z rather than NaN.  */
Packit 6c4009
      if (w.ieee.exponent == 0x7fff
Packit 6c4009
	  && u.ieee.exponent != 0x7fff
Packit 6c4009
          && v.ieee.exponent != 0x7fff)
Packit 6c4009
	return (z + x) + y;
Packit 6c4009
      /* If z is zero and x are y are nonzero, compute the result
Packit 6c4009
	 as x * y to avoid the wrong sign of a zero result if x * y
Packit 6c4009
	 underflows to 0.  */
Packit 6c4009
      if (z == 0 && x != 0 && y != 0)
Packit 6c4009
	return x * y;
Packit 6c4009
      /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
Packit 6c4009
	 x * y + z.  */
Packit 6c4009
      if (u.ieee.exponent == 0x7fff
Packit 6c4009
	  || v.ieee.exponent == 0x7fff
Packit 6c4009
	  || w.ieee.exponent == 0x7fff
Packit 6c4009
	  || x == 0
Packit 6c4009
	  || y == 0)
Packit 6c4009
	return x * y + z;
Packit 6c4009
      /* If fma will certainly overflow, compute as x * y.  */
Packit 6c4009
      if (u.ieee.exponent + v.ieee.exponent
Packit 6c4009
	  > 0x7fff + IEEE854_LONG_DOUBLE_BIAS)
Packit 6c4009
	return x * y;
Packit 6c4009
      /* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the
Packit 6c4009
	 result nor whether there is underflow depends on its exact
Packit 6c4009
	 value, only on its sign.  */
Packit 6c4009
      if (u.ieee.exponent + v.ieee.exponent
Packit 6c4009
	  < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
Packit 6c4009
	{
Packit 6c4009
	  int neg = u.ieee.negative ^ v.ieee.negative;
Packit 6c4009
	  long double tiny = neg ? -0x1p-16445L : 0x1p-16445L;
Packit 6c4009
	  if (w.ieee.exponent >= 3)
Packit 6c4009
	    return tiny + z;
Packit 6c4009
	  /* Scaling up, adding TINY and scaling down produces the
Packit 6c4009
	     correct result, because in round-to-nearest mode adding
Packit 6c4009
	     TINY has no effect and in other modes double rounding is
Packit 6c4009
	     harmless.  But it may not produce required underflow
Packit 6c4009
	     exceptions.  */
Packit 6c4009
	  v.d = z * 0x1p65L + tiny;
Packit 6c4009
	  if (TININESS_AFTER_ROUNDING
Packit 6c4009
	      ? v.ieee.exponent < 66
Packit 6c4009
	      : (w.ieee.exponent == 0
Packit 6c4009
		 || (w.ieee.exponent == 1
Packit 6c4009
		     && w.ieee.negative != neg
Packit 6c4009
		     && w.ieee.mantissa1 == 0
Packit 6c4009
		     && w.ieee.mantissa0 == 0x80000000)))
Packit 6c4009
	    {
Packit 6c4009
	      long double force_underflow = x * y;
Packit 6c4009
	      math_force_eval (force_underflow);
Packit 6c4009
	    }
Packit 6c4009
	  return v.d * 0x1p-65L;
Packit 6c4009
	}
Packit 6c4009
      if (u.ieee.exponent + v.ieee.exponent
Packit 6c4009
	  >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
Packit 6c4009
	{
Packit 6c4009
	  /* Compute 1p-64 times smaller result and multiply
Packit 6c4009
	     at the end.  */
Packit 6c4009
	  if (u.ieee.exponent > v.ieee.exponent)
Packit 6c4009
	    u.ieee.exponent -= LDBL_MANT_DIG;
Packit 6c4009
	  else
Packit 6c4009
	    v.ieee.exponent -= LDBL_MANT_DIG;
Packit 6c4009
	  /* If x + y exponent is very large and z exponent is very small,
Packit 6c4009
	     it doesn't matter if we don't adjust it.  */
Packit 6c4009
	  if (w.ieee.exponent > LDBL_MANT_DIG)
Packit 6c4009
	    w.ieee.exponent -= LDBL_MANT_DIG;
Packit 6c4009
	  adjust = 1;
Packit 6c4009
	}
Packit 6c4009
      else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
Packit 6c4009
	{
Packit 6c4009
	  /* Similarly.
Packit 6c4009
	     If z exponent is very large and x and y exponents are
Packit 6c4009
	     very small, adjust them up to avoid spurious underflows,
Packit 6c4009
	     rather than down.  */
Packit 6c4009
	  if (u.ieee.exponent + v.ieee.exponent
Packit 6c4009
	      <= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG)
Packit 6c4009
	    {
Packit 6c4009
	      if (u.ieee.exponent > v.ieee.exponent)
Packit 6c4009
		u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
Packit 6c4009
	      else
Packit 6c4009
		v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
Packit 6c4009
	    }
Packit 6c4009
	  else if (u.ieee.exponent > v.ieee.exponent)
Packit 6c4009
	    {
Packit 6c4009
	      if (u.ieee.exponent > LDBL_MANT_DIG)
Packit 6c4009
		u.ieee.exponent -= LDBL_MANT_DIG;
Packit 6c4009
	    }
Packit 6c4009
	  else if (v.ieee.exponent > LDBL_MANT_DIG)
Packit 6c4009
	    v.ieee.exponent -= LDBL_MANT_DIG;
Packit 6c4009
	  w.ieee.exponent -= LDBL_MANT_DIG;
Packit 6c4009
	  adjust = 1;
Packit 6c4009
	}
Packit 6c4009
      else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
Packit 6c4009
	{
Packit 6c4009
	  u.ieee.exponent -= LDBL_MANT_DIG;
Packit 6c4009
	  if (v.ieee.exponent)
Packit 6c4009
	    v.ieee.exponent += LDBL_MANT_DIG;
Packit 6c4009
	  else
Packit 6c4009
	    v.d *= 0x1p64L;
Packit 6c4009
	}
Packit 6c4009
      else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
Packit 6c4009
	{
Packit 6c4009
	  v.ieee.exponent -= LDBL_MANT_DIG;
Packit 6c4009
	  if (u.ieee.exponent)
Packit 6c4009
	    u.ieee.exponent += LDBL_MANT_DIG;
Packit 6c4009
	  else
Packit 6c4009
	    u.d *= 0x1p64L;
Packit 6c4009
	}
Packit 6c4009
      else /* if (u.ieee.exponent + v.ieee.exponent
Packit 6c4009
		  <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
Packit 6c4009
	{
Packit 6c4009
	  if (u.ieee.exponent > v.ieee.exponent)
Packit 6c4009
	    u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
Packit 6c4009
	  else
Packit 6c4009
	    v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
Packit 6c4009
	  if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
Packit 6c4009
	    {
Packit 6c4009
	      if (w.ieee.exponent)
Packit 6c4009
		w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
Packit 6c4009
	      else
Packit 6c4009
		w.d *= 0x1p130L;
Packit 6c4009
	      adjust = -1;
Packit 6c4009
	    }
Packit 6c4009
	  /* Otherwise x * y should just affect inexact
Packit 6c4009
	     and nothing else.  */
Packit 6c4009
	}
Packit 6c4009
      x = u.d;
Packit 6c4009
      y = v.d;
Packit 6c4009
      z = w.d;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Ensure correct sign of exact 0 + 0.  */
Packit 6c4009
  if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
Packit 6c4009
    {
Packit 6c4009
      x = math_opt_barrier (x);
Packit 6c4009
      return x * y + z;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  fenv_t env;
Packit 6c4009
  feholdexcept (&env;;
Packit 6c4009
  fesetround (FE_TONEAREST);
Packit 6c4009
Packit 6c4009
  /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
Packit 6c4009
#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
Packit 6c4009
  long double x1 = x * C;
Packit 6c4009
  long double y1 = y * C;
Packit 6c4009
  long double m1 = x * y;
Packit 6c4009
  x1 = (x - x1) + x1;
Packit 6c4009
  y1 = (y - y1) + y1;
Packit 6c4009
  long double x2 = x - x1;
Packit 6c4009
  long double y2 = y - y1;
Packit 6c4009
  long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
Packit 6c4009
Packit 6c4009
  /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
Packit 6c4009
  long double a1 = z + m1;
Packit 6c4009
  long double t1 = a1 - z;
Packit 6c4009
  long double t2 = a1 - t1;
Packit 6c4009
  t1 = m1 - t1;
Packit 6c4009
  t2 = z - t2;
Packit 6c4009
  long double a2 = t1 + t2;
Packit 6c4009
  /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
Packit 6c4009
  math_force_eval (m2);
Packit 6c4009
  math_force_eval (a2);
Packit 6c4009
  feclearexcept (FE_INEXACT);
Packit 6c4009
Packit 6c4009
  /* If the result is an exact zero, ensure it has the correct sign.  */
Packit 6c4009
  if (a1 == 0 && m2 == 0)
Packit 6c4009
    {
Packit 6c4009
      feupdateenv (&env;;
Packit 6c4009
      /* Ensure that round-to-nearest value of z + m1 is not reused.  */
Packit 6c4009
      z = math_opt_barrier (z);
Packit 6c4009
      return z + m1;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  fesetround (FE_TOWARDZERO);
Packit 6c4009
  /* Perform m2 + a2 addition with round to odd.  */
Packit 6c4009
  u.d = a2 + m2;
Packit 6c4009
Packit 6c4009
  if (__glibc_likely (adjust == 0))
Packit 6c4009
    {
Packit 6c4009
      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
Packit 6c4009
	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
Packit 6c4009
      feupdateenv (&env;;
Packit 6c4009
      /* Result is a1 + u.d.  */
Packit 6c4009
      return a1 + u.d;
Packit 6c4009
    }
Packit 6c4009
  else if (__glibc_likely (adjust > 0))
Packit 6c4009
    {
Packit 6c4009
      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
Packit 6c4009
	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
Packit 6c4009
      feupdateenv (&env;;
Packit 6c4009
      /* Result is a1 + u.d, scaled up.  */
Packit 6c4009
      return (a1 + u.d) * 0x1p64L;
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      if ((u.ieee.mantissa1 & 1) == 0)
Packit 6c4009
	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
Packit 6c4009
      v.d = a1 + u.d;
Packit 6c4009
      /* Ensure the addition is not scheduled after fetestexcept call.  */
Packit 6c4009
      math_force_eval (v.d);
Packit 6c4009
      int j = fetestexcept (FE_INEXACT) != 0;
Packit 6c4009
      feupdateenv (&env;;
Packit 6c4009
      /* Ensure the following computations are performed in default rounding
Packit 6c4009
	 mode instead of just reusing the round to zero computation.  */
Packit 6c4009
      asm volatile ("" : "=m" (u) : "m" (u));
Packit 6c4009
      /* If a1 + u.d is exact, the only rounding happens during
Packit 6c4009
	 scaling down.  */
Packit 6c4009
      if (j == 0)
Packit 6c4009
	return v.d * 0x1p-130L;
Packit 6c4009
      /* If result rounded to zero is not subnormal, no double
Packit 6c4009
	 rounding will occur.  */
Packit 6c4009
      if (v.ieee.exponent > 130)
Packit 6c4009
	return (a1 + u.d) * 0x1p-130L;
Packit 6c4009
      /* If v.d * 0x1p-130L with round to zero is a subnormal above
Packit 6c4009
	 or equal to LDBL_MIN / 2, then v.d * 0x1p-130L shifts mantissa
Packit 6c4009
	 down just by 1 bit, which means v.ieee.mantissa1 |= j would
Packit 6c4009
	 change the round bit, not sticky or guard bit.
Packit 6c4009
	 v.d * 0x1p-130L never normalizes by shifting up,
Packit 6c4009
	 so round bit plus sticky bit should be already enough
Packit 6c4009
	 for proper rounding.  */
Packit 6c4009
      if (v.ieee.exponent == 130)
Packit 6c4009
	{
Packit 6c4009
	  /* If the exponent would be in the normal range when
Packit 6c4009
	     rounding to normal precision with unbounded exponent
Packit 6c4009
	     range, the exact result is known and spurious underflows
Packit 6c4009
	     must be avoided on systems detecting tininess after
Packit 6c4009
	     rounding.  */
Packit 6c4009
	  if (TININESS_AFTER_ROUNDING)
Packit 6c4009
	    {
Packit 6c4009
	      w.d = a1 + u.d;
Packit 6c4009
	      if (w.ieee.exponent == 131)
Packit 6c4009
		return w.d * 0x1p-130L;
Packit 6c4009
	    }
Packit 6c4009
	  /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
Packit 6c4009
	     v.ieee.mantissa1 & 1 is the round bit and j is our sticky
Packit 6c4009
	     bit.  */
Packit 6c4009
	  w.d = 0.0L;
Packit 6c4009
	  w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
Packit 6c4009
	  w.ieee.negative = v.ieee.negative;
Packit 6c4009
	  v.ieee.mantissa1 &= ~3U;
Packit 6c4009
	  v.d *= 0x1p-130L;
Packit 6c4009
	  w.d *= 0x1p-2L;
Packit 6c4009
	  return v.d + w.d;
Packit 6c4009
	}
Packit 6c4009
      v.ieee.mantissa1 |= j;
Packit 6c4009
      return v.d * 0x1p-130L;
Packit 6c4009
    }
Packit 6c4009
}
Packit 6c4009
libm_alias_ldouble (__fma, fma)