Blame sysdeps/powerpc/fpu/e_sqrtf.c

Packit 6c4009
/* Single-precision floating point square root.
Packit 6c4009
   Copyright (C) 1997-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <math.h>
Packit 6c4009
#include <math_private.h>
Packit 6c4009
#include <fenv_libc.h>
Packit 6c4009
#include <inttypes.h>
Packit 6c4009
#include <stdint.h>
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
#include <ldsodefs.h>
Packit 6c4009
Packit 6c4009
#ifndef _ARCH_PPCSQ
Packit 6c4009
static const float almost_half = 0.50000006;	/* 0.5 + 2^-24 */
Packit 6c4009
static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
Packit 6c4009
static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
Packit 6c4009
static const float two48 = 281474976710656.0;
Packit 6c4009
static const float twom24 = 5.9604644775390625e-8;
Packit 6c4009
extern const float __t_sqrt[1024];
Packit 6c4009
Packit 6c4009
/* The method is based on a description in
Packit 6c4009
   Computation of elementary functions on the IBM RISC System/6000 processor,
Packit 6c4009
   P. W. Markstein, IBM J. Res. Develop, 34(1) 1990.
Packit 6c4009
   Basically, it consists of two interleaved Newton-Raphson approximations,
Packit 6c4009
   one to find the actual square root, and one to find its reciprocal
Packit 6c4009
   without the expense of a division operation.   The tricky bit here
Packit 6c4009
   is the use of the POWER/PowerPC multiply-add operation to get the
Packit 6c4009
   required accuracy with high speed.
Packit 6c4009
Packit 6c4009
   The argument reduction works by a combination of table lookup to
Packit 6c4009
   obtain the initial guesses, and some careful modification of the
Packit 6c4009
   generated guesses (which mostly runs on the integer unit, while the
Packit 6c4009
   Newton-Raphson is running on the FPU).  */
Packit 6c4009
Packit 6c4009
float
Packit 6c4009
__slow_ieee754_sqrtf (float x)
Packit 6c4009
{
Packit 6c4009
  const float inf = a_inf.value;
Packit 6c4009
Packit 6c4009
  if (x > 0)
Packit 6c4009
    {
Packit 6c4009
      if (x != inf)
Packit 6c4009
	{
Packit 6c4009
	  /* Variables named starting with 's' exist in the
Packit 6c4009
	     argument-reduced space, so that 2 > sx >= 0.5,
Packit 6c4009
	     1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... .
Packit 6c4009
	     Variables named ending with 'i' are integer versions of
Packit 6c4009
	     floating-point values.  */
Packit 6c4009
	  float sx;		/* The value of which we're trying to find the square
Packit 6c4009
				   root.  */
Packit 6c4009
	  float sg, g;		/* Guess of the square root of x.  */
Packit 6c4009
	  float sd, d;		/* Difference between the square of the guess and x.  */
Packit 6c4009
	  float sy;		/* Estimate of 1/2g (overestimated by 1ulp).  */
Packit 6c4009
	  float sy2;		/* 2*sy */
Packit 6c4009
	  float e;		/* Difference between y*g and 1/2 (note that e==se).  */
Packit 6c4009
	  float shx;		/* == sx * fsg */
Packit 6c4009
	  float fsg;		/* sg*fsg == g.  */
Packit 6c4009
	  fenv_t fe;		/* Saved floating-point environment (stores rounding
Packit 6c4009
				   mode and whether the inexact exception is
Packit 6c4009
				   enabled).  */
Packit 6c4009
	  uint32_t xi, sxi, fsgi;
Packit 6c4009
	  const float *t_sqrt;
Packit 6c4009
Packit 6c4009
	  GET_FLOAT_WORD (xi, x);
Packit 6c4009
	  fe = fegetenv_register ();
Packit 6c4009
	  relax_fenv_state ();
Packit 6c4009
	  sxi = (xi & 0x3fffffff) | 0x3f000000;
Packit 6c4009
	  SET_FLOAT_WORD (sx, sxi);
Packit 6c4009
	  t_sqrt = __t_sqrt + (xi >> (23 - 8 - 1) & 0x3fe);
Packit 6c4009
	  sg = t_sqrt[0];
Packit 6c4009
	  sy = t_sqrt[1];
Packit 6c4009
Packit 6c4009
	  /* Here we have three Newton-Raphson iterations each of a
Packit 6c4009
	     division and a square root and the remainder of the
Packit 6c4009
	     argument reduction, all interleaved.   */
Packit 6c4009
	  sd = -__builtin_fmaf (sg, sg, -sx);
Packit 6c4009
	  fsgi = (xi + 0x40000000) >> 1 & 0x7f800000;
Packit 6c4009
	  sy2 = sy + sy;
Packit 6c4009
	  sg = __builtin_fmaf (sy, sd, sg);	/* 16-bit approximation to
Packit 6c4009
						   sqrt(sx). */
Packit 6c4009
	  e = -__builtin_fmaf (sy, sg, -almost_half);
Packit 6c4009
	  SET_FLOAT_WORD (fsg, fsgi);
Packit 6c4009
	  sd = -__builtin_fmaf (sg, sg, -sx);
Packit 6c4009
	  sy = __builtin_fmaf (e, sy2, sy);
Packit 6c4009
	  if ((xi & 0x7f800000) == 0)
Packit 6c4009
	    goto denorm;
Packit 6c4009
	  shx = sx * fsg;
Packit 6c4009
	  sg = __builtin_fmaf (sy, sd, sg);	/* 32-bit approximation to
Packit 6c4009
						   sqrt(sx), but perhaps
Packit 6c4009
						   rounded incorrectly.  */
Packit 6c4009
	  sy2 = sy + sy;
Packit 6c4009
	  g = sg * fsg;
Packit 6c4009
	  e = -__builtin_fmaf (sy, sg, -almost_half);
Packit 6c4009
	  d = -__builtin_fmaf (g, sg, -shx);
Packit 6c4009
	  sy = __builtin_fmaf (e, sy2, sy);
Packit 6c4009
	  fesetenv_register (fe);
Packit 6c4009
	  return __builtin_fmaf (sy, d, g);
Packit 6c4009
	denorm:
Packit 6c4009
	  /* For denormalised numbers, we normalise, calculate the
Packit 6c4009
	     square root, and return an adjusted result.  */
Packit 6c4009
	  fesetenv_register (fe);
Packit 6c4009
	  return __slow_ieee754_sqrtf (x * two48) * twom24;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
  else if (x < 0)
Packit 6c4009
    {
Packit 6c4009
      /* For some reason, some PowerPC32 processors don't implement
Packit 6c4009
	 FE_INVALID_SQRT.  */
Packit 6c4009
#ifdef FE_INVALID_SQRT
Packit 6c4009
      feraiseexcept (FE_INVALID_SQRT);
Packit 6c4009
Packit 6c4009
      fenv_union_t u = { .fenv = fegetenv_register () };
Packit 6c4009
      if ((u.l & FE_INVALID) == 0)
Packit 6c4009
#endif
Packit 6c4009
	feraiseexcept (FE_INVALID);
Packit 6c4009
      x = a_nan.value;
Packit 6c4009
    }
Packit 6c4009
  return f_washf (x);
Packit 6c4009
}
Packit 6c4009
#endif /* _ARCH_PPCSQ  */
Packit 6c4009
Packit 6c4009
#undef __ieee754_sqrtf
Packit 6c4009
float
Packit 6c4009
__ieee754_sqrtf (float x)
Packit 6c4009
{
Packit 6c4009
  double z;
Packit 6c4009
Packit 6c4009
#ifdef _ARCH_PPCSQ
Packit 6c4009
  asm ("fsqrts	%0,%1\n" :"=f" (z):"f" (x));
Packit 6c4009
#else
Packit 6c4009
  z = __slow_ieee754_sqrtf (x);
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
  return z;
Packit 6c4009
}
Packit 6c4009
strong_alias (__ieee754_sqrtf, __sqrtf_finite)