hjl / source-git / glibc

Forked from source-git/glibc 3 years ago
Clone

Blame sysdeps/powerpc/fpu/e_sqrt.c

Packit 6c4009
/* Double-precision floating point square root.
Packit 6c4009
   Copyright (C) 1997-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <math.h>
Packit 6c4009
#include <math_private.h>
Packit 6c4009
#include <fenv_libc.h>
Packit 6c4009
#include <inttypes.h>
Packit 6c4009
#include <stdint.h>
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
#include <ldsodefs.h>
Packit 6c4009
Packit 6c4009
#ifndef _ARCH_PPCSQ
Packit 6c4009
static const double almost_half = 0.5000000000000001;	/* 0.5 + 2^-53 */
Packit 6c4009
static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
Packit 6c4009
static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
Packit 6c4009
static const float two108 = 3.245185536584267269e+32;
Packit 6c4009
static const float twom54 = 5.551115123125782702e-17;
Packit 6c4009
extern const float __t_sqrt[1024];
Packit 6c4009
Packit 6c4009
/* The method is based on a description in
Packit 6c4009
   Computation of elementary functions on the IBM RISC System/6000 processor,
Packit 6c4009
   P. W. Markstein, IBM J. Res. Develop, 34(1) 1990.
Packit 6c4009
   Basically, it consists of two interleaved Newton-Raphson approximations,
Packit 6c4009
   one to find the actual square root, and one to find its reciprocal
Packit 6c4009
   without the expense of a division operation.   The tricky bit here
Packit 6c4009
   is the use of the POWER/PowerPC multiply-add operation to get the
Packit 6c4009
   required accuracy with high speed.
Packit 6c4009
Packit 6c4009
   The argument reduction works by a combination of table lookup to
Packit 6c4009
   obtain the initial guesses, and some careful modification of the
Packit 6c4009
   generated guesses (which mostly runs on the integer unit, while the
Packit 6c4009
   Newton-Raphson is running on the FPU).  */
Packit 6c4009
Packit 6c4009
double
Packit 6c4009
__slow_ieee754_sqrt (double x)
Packit 6c4009
{
Packit 6c4009
  const float inf = a_inf.value;
Packit 6c4009
Packit 6c4009
  if (x > 0)
Packit 6c4009
    {
Packit 6c4009
      /* schedule the EXTRACT_WORDS to get separation between the store
Packit 6c4009
	 and the load.  */
Packit 6c4009
      ieee_double_shape_type ew_u;
Packit 6c4009
      ieee_double_shape_type iw_u;
Packit 6c4009
      ew_u.value = (x);
Packit 6c4009
      if (x != inf)
Packit 6c4009
	{
Packit 6c4009
	  /* Variables named starting with 's' exist in the
Packit 6c4009
	     argument-reduced space, so that 2 > sx >= 0.5,
Packit 6c4009
	     1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... .
Packit 6c4009
	     Variables named ending with 'i' are integer versions of
Packit 6c4009
	     floating-point values.  */
Packit 6c4009
	  double sx;	/* The value of which we're trying to find the
Packit 6c4009
			   square root.  */
Packit 6c4009
	  double sg, g;	/* Guess of the square root of x.  */
Packit 6c4009
	  double sd, d;	/* Difference between the square of the guess and x.  */
Packit 6c4009
	  double sy;	/* Estimate of 1/2g (overestimated by 1ulp).  */
Packit 6c4009
	  double sy2;	/* 2*sy */
Packit 6c4009
	  double e;	/* Difference between y*g and 1/2 (se = e * fsy).  */
Packit 6c4009
	  double shx;	/* == sx * fsg */
Packit 6c4009
	  double fsg;	/* sg*fsg == g.  */
Packit 6c4009
	  fenv_t fe;	/* Saved floating-point environment (stores rounding
Packit 6c4009
			   mode and whether the inexact exception is
Packit 6c4009
			   enabled).  */
Packit 6c4009
	  uint32_t xi0, xi1, sxi, fsgi;
Packit 6c4009
	  const float *t_sqrt;
Packit 6c4009
Packit 6c4009
	  fe = fegetenv_register ();
Packit 6c4009
	  /* complete the EXTRACT_WORDS (xi0,xi1,x) operation.  */
Packit 6c4009
	  xi0 = ew_u.parts.msw;
Packit 6c4009
	  xi1 = ew_u.parts.lsw;
Packit 6c4009
	  relax_fenv_state ();
Packit 6c4009
	  sxi = (xi0 & 0x3fffffff) | 0x3fe00000;
Packit 6c4009
	  /* schedule the INSERT_WORDS (sx, sxi, xi1) to get separation
Packit 6c4009
	     between the store and the load.  */
Packit 6c4009
	  iw_u.parts.msw = sxi;
Packit 6c4009
	  iw_u.parts.lsw = xi1;
Packit 6c4009
	  t_sqrt = __t_sqrt + (xi0 >> (52 - 32 - 8 - 1) & 0x3fe);
Packit 6c4009
	  sg = t_sqrt[0];
Packit 6c4009
	  sy = t_sqrt[1];
Packit 6c4009
	  /* complete the INSERT_WORDS (sx, sxi, xi1) operation.  */
Packit 6c4009
	  sx = iw_u.value;
Packit 6c4009
Packit 6c4009
	  /* Here we have three Newton-Raphson iterations each of a
Packit 6c4009
	     division and a square root and the remainder of the
Packit 6c4009
	     argument reduction, all interleaved.   */
Packit 6c4009
	  sd = -__builtin_fma (sg, sg, -sx);
Packit 6c4009
	  fsgi = (xi0 + 0x40000000) >> 1 & 0x7ff00000;
Packit 6c4009
	  sy2 = sy + sy;
Packit 6c4009
	  sg = __builtin_fma (sy, sd, sg);	/* 16-bit approximation to
Packit 6c4009
						   sqrt(sx). */
Packit 6c4009
Packit 6c4009
	  /* schedule the INSERT_WORDS (fsg, fsgi, 0) to get separation
Packit 6c4009
	     between the store and the load.  */
Packit 6c4009
	  INSERT_WORDS (fsg, fsgi, 0);
Packit 6c4009
	  iw_u.parts.msw = fsgi;
Packit 6c4009
	  iw_u.parts.lsw = (0);
Packit 6c4009
	  e = -__builtin_fma (sy, sg, -almost_half);
Packit 6c4009
	  sd = -__builtin_fma (sg, sg, -sx);
Packit 6c4009
	  if ((xi0 & 0x7ff00000) == 0)
Packit 6c4009
	    goto denorm;
Packit 6c4009
	  sy = __builtin_fma (e, sy2, sy);
Packit 6c4009
	  sg = __builtin_fma (sy, sd, sg);	/* 32-bit approximation to
Packit 6c4009
						   sqrt(sx).  */
Packit 6c4009
	  sy2 = sy + sy;
Packit 6c4009
	  /* complete the INSERT_WORDS (fsg, fsgi, 0) operation.  */
Packit 6c4009
	  fsg = iw_u.value;
Packit 6c4009
	  e = -__builtin_fma (sy, sg, -almost_half);
Packit 6c4009
	  sd = -__builtin_fma (sg, sg, -sx);
Packit 6c4009
	  sy = __builtin_fma (e, sy2, sy);
Packit 6c4009
	  shx = sx * fsg;
Packit 6c4009
	  sg = __builtin_fma (sy, sd, sg);	/* 64-bit approximation to
Packit 6c4009
						   sqrt(sx), but perhaps
Packit 6c4009
						   rounded incorrectly.  */
Packit 6c4009
	  sy2 = sy + sy;
Packit 6c4009
	  g = sg * fsg;
Packit 6c4009
	  e = -__builtin_fma (sy, sg, -almost_half);
Packit 6c4009
	  d = -__builtin_fma (g, sg, -shx);
Packit 6c4009
	  sy = __builtin_fma (e, sy2, sy);
Packit 6c4009
	  fesetenv_register (fe);
Packit 6c4009
	  return __builtin_fma (sy, d, g);
Packit 6c4009
	denorm:
Packit 6c4009
	  /* For denormalised numbers, we normalise, calculate the
Packit 6c4009
	     square root, and return an adjusted result.  */
Packit 6c4009
	  fesetenv_register (fe);
Packit 6c4009
	  return __slow_ieee754_sqrt (x * two108) * twom54;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
  else if (x < 0)
Packit 6c4009
    {
Packit 6c4009
      /* For some reason, some PowerPC32 processors don't implement
Packit 6c4009
	 FE_INVALID_SQRT.  */
Packit 6c4009
#ifdef FE_INVALID_SQRT
Packit 6c4009
      __feraiseexcept (FE_INVALID_SQRT);
Packit 6c4009
Packit 6c4009
      fenv_union_t u = { .fenv = fegetenv_register () };
Packit 6c4009
      if ((u.l & FE_INVALID) == 0)
Packit 6c4009
#endif
Packit 6c4009
	__feraiseexcept (FE_INVALID);
Packit 6c4009
      x = a_nan.value;
Packit 6c4009
    }
Packit 6c4009
  return f_wash (x);
Packit 6c4009
}
Packit 6c4009
#endif /* _ARCH_PPCSQ  */
Packit 6c4009
Packit 6c4009
#undef __ieee754_sqrt
Packit 6c4009
double
Packit 6c4009
__ieee754_sqrt (double x)
Packit 6c4009
{
Packit 6c4009
  double z;
Packit 6c4009
Packit 6c4009
#ifdef _ARCH_PPCSQ
Packit 6c4009
  asm ("fsqrt %0,%1\n" :"=f" (z):"f" (x));
Packit 6c4009
#else
Packit 6c4009
  z = __slow_ieee754_sqrt (x);
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
  return z;
Packit 6c4009
}
Packit 6c4009
strong_alias (__ieee754_sqrt, __sqrt_finite)