Blame sysdeps/ieee754/dbl-64/mpa.c

Packit 6c4009
/*
Packit 6c4009
 * IBM Accurate Mathematical Library
Packit 6c4009
 * written by International Business Machines Corp.
Packit 6c4009
 * Copyright (C) 2001-2018 Free Software Foundation, Inc.
Packit 6c4009
 *
Packit 6c4009
 * This program is free software; you can redistribute it and/or modify
Packit 6c4009
 * it under the terms of the GNU Lesser General Public License as published by
Packit 6c4009
 * the Free Software Foundation; either version 2.1 of the License, or
Packit 6c4009
 * (at your option) any later version.
Packit 6c4009
 *
Packit 6c4009
 * This program is distributed in the hope that it will be useful,
Packit 6c4009
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 6c4009
 * GNU Lesser General Public License for more details.
Packit 6c4009
 *
Packit 6c4009
 * You should have received a copy of the GNU Lesser General Public License
Packit 6c4009
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
Packit 6c4009
 */
Packit 6c4009
/************************************************************************/
Packit 6c4009
/*  MODULE_NAME: mpa.c                                                  */
Packit 6c4009
/*                                                                      */
Packit 6c4009
/*  FUNCTIONS:                                                          */
Packit 6c4009
/*               mcr                                                    */
Packit 6c4009
/*               acr                                                    */
Packit 6c4009
/*               cpy                                                    */
Packit 6c4009
/*               norm                                                   */
Packit 6c4009
/*               denorm                                                 */
Packit 6c4009
/*               mp_dbl                                                 */
Packit 6c4009
/*               dbl_mp                                                 */
Packit 6c4009
/*               add_magnitudes                                         */
Packit 6c4009
/*               sub_magnitudes                                         */
Packit 6c4009
/*               add                                                    */
Packit 6c4009
/*               sub                                                    */
Packit 6c4009
/*               mul                                                    */
Packit 6c4009
/*               inv                                                    */
Packit 6c4009
/*               dvd                                                    */
Packit 6c4009
/*                                                                      */
Packit 6c4009
/* Arithmetic functions for multiple precision numbers.                 */
Packit 6c4009
/* Relative errors are bounded                                          */
Packit 6c4009
/************************************************************************/
Packit 6c4009
Packit 6c4009
Packit 6c4009
#include "endian.h"
Packit 6c4009
#include "mpa.h"
Packit 6c4009
#include <sys/param.h>
Packit 6c4009
#include <alloca.h>
Packit 6c4009
Packit 6c4009
#ifndef SECTION
Packit 6c4009
# define SECTION
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
#ifndef NO__CONST
Packit 6c4009
const mp_no __mpone = { 1, { 1.0, 1.0 } };
Packit 6c4009
const mp_no __mptwo = { 1, { 1.0, 2.0 } };
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
#ifndef NO___ACR
Packit 6c4009
/* Compare mantissa of two multiple precision numbers regardless of the sign
Packit 6c4009
   and exponent of the numbers.  */
Packit 6c4009
static int
Packit 6c4009
mcr (const mp_no *x, const mp_no *y, int p)
Packit 6c4009
{
Packit 6c4009
  long i;
Packit 6c4009
  long p2 = p;
Packit 6c4009
  for (i = 1; i <= p2; i++)
Packit 6c4009
    {
Packit 6c4009
      if (X[i] == Y[i])
Packit 6c4009
	continue;
Packit 6c4009
      else if (X[i] > Y[i])
Packit 6c4009
	return 1;
Packit 6c4009
      else
Packit 6c4009
	return -1;
Packit 6c4009
    }
Packit 6c4009
  return 0;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Compare the absolute values of two multiple precision numbers.  */
Packit 6c4009
int
Packit 6c4009
__acr (const mp_no *x, const mp_no *y, int p)
Packit 6c4009
{
Packit 6c4009
  long i;
Packit 6c4009
Packit 6c4009
  if (X[0] == 0)
Packit 6c4009
    {
Packit 6c4009
      if (Y[0] == 0)
Packit 6c4009
	i = 0;
Packit 6c4009
      else
Packit 6c4009
	i = -1;
Packit 6c4009
    }
Packit 6c4009
  else if (Y[0] == 0)
Packit 6c4009
    i = 1;
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      if (EX > EY)
Packit 6c4009
	i = 1;
Packit 6c4009
      else if (EX < EY)
Packit 6c4009
	i = -1;
Packit 6c4009
      else
Packit 6c4009
	i = mcr (x, y, p);
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  return i;
Packit 6c4009
}
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
#ifndef NO___CPY
Packit 6c4009
/* Copy multiple precision number X into Y.  They could be the same
Packit 6c4009
   number.  */
Packit 6c4009
void
Packit 6c4009
__cpy (const mp_no *x, mp_no *y, int p)
Packit 6c4009
{
Packit 6c4009
  long i;
Packit 6c4009
Packit 6c4009
  EY = EX;
Packit 6c4009
  for (i = 0; i <= p; i++)
Packit 6c4009
    Y[i] = X[i];
Packit 6c4009
}
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
#ifndef NO___MP_DBL
Packit 6c4009
/* Convert a multiple precision number *X into a double precision
Packit 6c4009
   number *Y, normalized case (|x| >= 2**(-1022))).  X has precision
Packit 6c4009
   P, which is positive.  */
Packit 6c4009
static void
Packit 6c4009
norm (const mp_no *x, double *y, int p)
Packit 6c4009
{
Packit 6c4009
# define R RADIXI
Packit 6c4009
  long i;
Packit 6c4009
  double c;
Packit 6c4009
  mantissa_t a, u, v, z[5];
Packit 6c4009
  if (p < 5)
Packit 6c4009
    {
Packit 6c4009
      if (p == 1)
Packit 6c4009
	c = X[1];
Packit 6c4009
      else if (p == 2)
Packit 6c4009
	c = X[1] + R * X[2];
Packit 6c4009
      else if (p == 3)
Packit 6c4009
	c = X[1] + R * (X[2] + R * X[3]);
Packit 6c4009
      else /* p == 4.  */
Packit 6c4009
	c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]);
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      for (a = 1, z[1] = X[1]; z[1] < TWO23; )
Packit 6c4009
	{
Packit 6c4009
	  a *= 2;
Packit 6c4009
	  z[1] *= 2;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      for (i = 2; i < 5; i++)
Packit 6c4009
	{
Packit 6c4009
	  mantissa_store_t d, r;
Packit 6c4009
	  d = X[i] * (mantissa_store_t) a;
Packit 6c4009
	  DIV_RADIX (d, r);
Packit 6c4009
	  z[i] = r;
Packit 6c4009
	  z[i - 1] += d;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      u = ALIGN_DOWN_TO (z[3], TWO19);
Packit 6c4009
      v = z[3] - u;
Packit 6c4009
Packit 6c4009
      if (v == TWO18)
Packit 6c4009
	{
Packit 6c4009
	  if (z[4] == 0)
Packit 6c4009
	    {
Packit 6c4009
	      for (i = 5; i <= p; i++)
Packit 6c4009
		{
Packit 6c4009
		  if (X[i] == 0)
Packit 6c4009
		    continue;
Packit 6c4009
		  else
Packit 6c4009
		    {
Packit 6c4009
		      z[3] += 1;
Packit 6c4009
		      break;
Packit 6c4009
		    }
Packit 6c4009
		}
Packit 6c4009
	    }
Packit 6c4009
	  else
Packit 6c4009
	    z[3] += 1;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      c = (z[1] + R * (z[2] + R * z[3])) / a;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  c *= X[0];
Packit 6c4009
Packit 6c4009
  for (i = 1; i < EX; i++)
Packit 6c4009
    c *= RADIX;
Packit 6c4009
  for (i = 1; i > EX; i--)
Packit 6c4009
    c *= RADIXI;
Packit 6c4009
Packit 6c4009
  *y = c;
Packit 6c4009
# undef R
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Convert a multiple precision number *X into a double precision
Packit 6c4009
   number *Y, Denormal case  (|x| < 2**(-1022))).  */
Packit 6c4009
static void
Packit 6c4009
denorm (const mp_no *x, double *y, int p)
Packit 6c4009
{
Packit 6c4009
  long i, k;
Packit 6c4009
  long p2 = p;
Packit 6c4009
  double c;
Packit 6c4009
  mantissa_t u, z[5];
Packit 6c4009
Packit 6c4009
# define R RADIXI
Packit 6c4009
  if (EX < -44 || (EX == -44 && X[1] < TWO5))
Packit 6c4009
    {
Packit 6c4009
      *y = 0;
Packit 6c4009
      return;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (p2 == 1)
Packit 6c4009
    {
Packit 6c4009
      if (EX == -42)
Packit 6c4009
	{
Packit 6c4009
	  z[1] = X[1] + TWO10;
Packit 6c4009
	  z[2] = 0;
Packit 6c4009
	  z[3] = 0;
Packit 6c4009
	  k = 3;
Packit 6c4009
	}
Packit 6c4009
      else if (EX == -43)
Packit 6c4009
	{
Packit 6c4009
	  z[1] = TWO10;
Packit 6c4009
	  z[2] = X[1];
Packit 6c4009
	  z[3] = 0;
Packit 6c4009
	  k = 2;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  z[1] = TWO10;
Packit 6c4009
	  z[2] = 0;
Packit 6c4009
	  z[3] = X[1];
Packit 6c4009
	  k = 1;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
  else if (p2 == 2)
Packit 6c4009
    {
Packit 6c4009
      if (EX == -42)
Packit 6c4009
	{
Packit 6c4009
	  z[1] = X[1] + TWO10;
Packit 6c4009
	  z[2] = X[2];
Packit 6c4009
	  z[3] = 0;
Packit 6c4009
	  k = 3;
Packit 6c4009
	}
Packit 6c4009
      else if (EX == -43)
Packit 6c4009
	{
Packit 6c4009
	  z[1] = TWO10;
Packit 6c4009
	  z[2] = X[1];
Packit 6c4009
	  z[3] = X[2];
Packit 6c4009
	  k = 2;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  z[1] = TWO10;
Packit 6c4009
	  z[2] = 0;
Packit 6c4009
	  z[3] = X[1];
Packit 6c4009
	  k = 1;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      if (EX == -42)
Packit 6c4009
	{
Packit 6c4009
	  z[1] = X[1] + TWO10;
Packit 6c4009
	  z[2] = X[2];
Packit 6c4009
	  k = 3;
Packit 6c4009
	}
Packit 6c4009
      else if (EX == -43)
Packit 6c4009
	{
Packit 6c4009
	  z[1] = TWO10;
Packit 6c4009
	  z[2] = X[1];
Packit 6c4009
	  k = 2;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  z[1] = TWO10;
Packit 6c4009
	  z[2] = 0;
Packit 6c4009
	  k = 1;
Packit 6c4009
	}
Packit 6c4009
      z[3] = X[k];
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  u = ALIGN_DOWN_TO (z[3], TWO5);
Packit 6c4009
Packit 6c4009
  if (u == z[3])
Packit 6c4009
    {
Packit 6c4009
      for (i = k + 1; i <= p2; i++)
Packit 6c4009
	{
Packit 6c4009
	  if (X[i] == 0)
Packit 6c4009
	    continue;
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      z[3] += 1;
Packit 6c4009
	      break;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);
Packit 6c4009
Packit 6c4009
  *y = c * TWOM1032;
Packit 6c4009
# undef R
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Convert multiple precision number *X into double precision number *Y.  The
Packit 6c4009
   result is correctly rounded to the nearest/even.  */
Packit 6c4009
void
Packit 6c4009
__mp_dbl (const mp_no *x, double *y, int p)
Packit 6c4009
{
Packit 6c4009
  if (X[0] == 0)
Packit 6c4009
    {
Packit 6c4009
      *y = 0;
Packit 6c4009
      return;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10)))
Packit 6c4009
    norm (x, y, p);
Packit 6c4009
  else
Packit 6c4009
    denorm (x, y, p);
Packit 6c4009
}
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
/* Get the multiple precision equivalent of X into *Y.  If the precision is too
Packit 6c4009
   small, the result is truncated.  */
Packit 6c4009
void
Packit 6c4009
SECTION
Packit 6c4009
__dbl_mp (double x, mp_no *y, int p)
Packit 6c4009
{
Packit 6c4009
  long i, n;
Packit 6c4009
  long p2 = p;
Packit 6c4009
Packit 6c4009
  /* Sign.  */
Packit 6c4009
  if (x == 0)
Packit 6c4009
    {
Packit 6c4009
      Y[0] = 0;
Packit 6c4009
      return;
Packit 6c4009
    }
Packit 6c4009
  else if (x > 0)
Packit 6c4009
    Y[0] = 1;
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      Y[0] = -1;
Packit 6c4009
      x = -x;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Exponent.  */
Packit 6c4009
  for (EY = 1; x >= RADIX; EY += 1)
Packit 6c4009
    x *= RADIXI;
Packit 6c4009
  for (; x < 1; EY -= 1)
Packit 6c4009
    x *= RADIX;
Packit 6c4009
Packit 6c4009
  /* Digits.  */
Packit 6c4009
  n = MIN (p2, 4);
Packit 6c4009
  for (i = 1; i <= n; i++)
Packit 6c4009
    {
Packit 6c4009
      INTEGER_OF (x, Y[i]);
Packit 6c4009
      x *= RADIX;
Packit 6c4009
    }
Packit 6c4009
  for (; i <= p2; i++)
Packit 6c4009
    Y[i] = 0;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0.  The
Packit 6c4009
   sign of the sum *Z is not changed.  X and Y may overlap but not X and Z or
Packit 6c4009
   Y and Z.  No guard digit is used.  The result equals the exact sum,
Packit 6c4009
   truncated.  */
Packit 6c4009
static void
Packit 6c4009
SECTION
Packit 6c4009
add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
Packit 6c4009
{
Packit 6c4009
  long i, j, k;
Packit 6c4009
  long p2 = p;
Packit 6c4009
  mantissa_t zk;
Packit 6c4009
Packit 6c4009
  EZ = EX;
Packit 6c4009
Packit 6c4009
  i = p2;
Packit 6c4009
  j = p2 + EY - EX;
Packit 6c4009
  k = p2 + 1;
Packit 6c4009
Packit 6c4009
  if (__glibc_unlikely (j < 1))
Packit 6c4009
    {
Packit 6c4009
      __cpy (x, z, p);
Packit 6c4009
      return;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  zk = 0;
Packit 6c4009
Packit 6c4009
  for (; j > 0; i--, j--)
Packit 6c4009
    {
Packit 6c4009
      zk += X[i] + Y[j];
Packit 6c4009
      if (zk >= RADIX)
Packit 6c4009
	{
Packit 6c4009
	  Z[k--] = zk - RADIX;
Packit 6c4009
	  zk = 1;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  Z[k--] = zk;
Packit 6c4009
	  zk = 0;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  for (; i > 0; i--)
Packit 6c4009
    {
Packit 6c4009
      zk += X[i];
Packit 6c4009
      if (zk >= RADIX)
Packit 6c4009
	{
Packit 6c4009
	  Z[k--] = zk - RADIX;
Packit 6c4009
	  zk = 1;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  Z[k--] = zk;
Packit 6c4009
	  zk = 0;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (zk == 0)
Packit 6c4009
    {
Packit 6c4009
      for (i = 1; i <= p2; i++)
Packit 6c4009
	Z[i] = Z[i + 1];
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      Z[1] = zk;
Packit 6c4009
      EZ += 1;
Packit 6c4009
    }
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
Packit 6c4009
   The sign of the difference *Z is not changed.  X and Y may overlap but not X
Packit 6c4009
   and Z or Y and Z.  One guard digit is used.  The error is less than one
Packit 6c4009
   ULP.  */
Packit 6c4009
static void
Packit 6c4009
SECTION
Packit 6c4009
sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
Packit 6c4009
{
Packit 6c4009
  long i, j, k;
Packit 6c4009
  long p2 = p;
Packit 6c4009
  mantissa_t zk;
Packit 6c4009
Packit 6c4009
  EZ = EX;
Packit 6c4009
  i = p2;
Packit 6c4009
  j = p2 + EY - EX;
Packit 6c4009
  k = p2;
Packit 6c4009
Packit 6c4009
  /* Y is too small compared to X, copy X over to the result.  */
Packit 6c4009
  if (__glibc_unlikely (j < 1))
Packit 6c4009
    {
Packit 6c4009
      __cpy (x, z, p);
Packit 6c4009
      return;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* The relevant least significant digit in Y is non-zero, so we factor it in
Packit 6c4009
     to enhance accuracy.  */
Packit 6c4009
  if (j < p2 && Y[j + 1] > 0)
Packit 6c4009
    {
Packit 6c4009
      Z[k + 1] = RADIX - Y[j + 1];
Packit 6c4009
      zk = -1;
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    zk = Z[k + 1] = 0;
Packit 6c4009
Packit 6c4009
  /* Subtract and borrow.  */
Packit 6c4009
  for (; j > 0; i--, j--)
Packit 6c4009
    {
Packit 6c4009
      zk += (X[i] - Y[j]);
Packit 6c4009
      if (zk < 0)
Packit 6c4009
	{
Packit 6c4009
	  Z[k--] = zk + RADIX;
Packit 6c4009
	  zk = -1;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  Z[k--] = zk;
Packit 6c4009
	  zk = 0;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* We're done with digits from Y, so it's just digits in X.  */
Packit 6c4009
  for (; i > 0; i--)
Packit 6c4009
    {
Packit 6c4009
      zk += X[i];
Packit 6c4009
      if (zk < 0)
Packit 6c4009
	{
Packit 6c4009
	  Z[k--] = zk + RADIX;
Packit 6c4009
	  zk = -1;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  Z[k--] = zk;
Packit 6c4009
	  zk = 0;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Normalize.  */
Packit 6c4009
  for (i = 1; Z[i] == 0; i++)
Packit 6c4009
    ;
Packit 6c4009
  EZ = EZ - i + 1;
Packit 6c4009
  for (k = 1; i <= p2 + 1; )
Packit 6c4009
    Z[k++] = Z[i++];
Packit 6c4009
  for (; k <= p2; )
Packit 6c4009
    Z[k++] = 0;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Add *X and *Y and store the result in *Z.  X and Y may overlap, but not X
Packit 6c4009
   and Z or Y and Z.  One guard digit is used.  The error is less than one
Packit 6c4009
   ULP.  */
Packit 6c4009
void
Packit 6c4009
SECTION
Packit 6c4009
__add (const mp_no *x, const mp_no *y, mp_no *z, int p)
Packit 6c4009
{
Packit 6c4009
  int n;
Packit 6c4009
Packit 6c4009
  if (X[0] == 0)
Packit 6c4009
    {
Packit 6c4009
      __cpy (y, z, p);
Packit 6c4009
      return;
Packit 6c4009
    }
Packit 6c4009
  else if (Y[0] == 0)
Packit 6c4009
    {
Packit 6c4009
      __cpy (x, z, p);
Packit 6c4009
      return;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (X[0] == Y[0])
Packit 6c4009
    {
Packit 6c4009
      if (__acr (x, y, p) > 0)
Packit 6c4009
	{
Packit 6c4009
	  add_magnitudes (x, y, z, p);
Packit 6c4009
	  Z[0] = X[0];
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  add_magnitudes (y, x, z, p);
Packit 6c4009
	  Z[0] = Y[0];
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      if ((n = __acr (x, y, p)) == 1)
Packit 6c4009
	{
Packit 6c4009
	  sub_magnitudes (x, y, z, p);
Packit 6c4009
	  Z[0] = X[0];
Packit 6c4009
	}
Packit 6c4009
      else if (n == -1)
Packit 6c4009
	{
Packit 6c4009
	  sub_magnitudes (y, x, z, p);
Packit 6c4009
	  Z[0] = Y[0];
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	Z[0] = 0;
Packit 6c4009
    }
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Subtract *Y from *X and return the result in *Z.  X and Y may overlap but
Packit 6c4009
   not X and Z or Y and Z.  One guard digit is used.  The error is less than
Packit 6c4009
   one ULP.  */
Packit 6c4009
void
Packit 6c4009
SECTION
Packit 6c4009
__sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
Packit 6c4009
{
Packit 6c4009
  int n;
Packit 6c4009
Packit 6c4009
  if (X[0] == 0)
Packit 6c4009
    {
Packit 6c4009
      __cpy (y, z, p);
Packit 6c4009
      Z[0] = -Z[0];
Packit 6c4009
      return;
Packit 6c4009
    }
Packit 6c4009
  else if (Y[0] == 0)
Packit 6c4009
    {
Packit 6c4009
      __cpy (x, z, p);
Packit 6c4009
      return;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (X[0] != Y[0])
Packit 6c4009
    {
Packit 6c4009
      if (__acr (x, y, p) > 0)
Packit 6c4009
	{
Packit 6c4009
	  add_magnitudes (x, y, z, p);
Packit 6c4009
	  Z[0] = X[0];
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  add_magnitudes (y, x, z, p);
Packit 6c4009
	  Z[0] = -Y[0];
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      if ((n = __acr (x, y, p)) == 1)
Packit 6c4009
	{
Packit 6c4009
	  sub_magnitudes (x, y, z, p);
Packit 6c4009
	  Z[0] = X[0];
Packit 6c4009
	}
Packit 6c4009
      else if (n == -1)
Packit 6c4009
	{
Packit 6c4009
	  sub_magnitudes (y, x, z, p);
Packit 6c4009
	  Z[0] = -Y[0];
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	Z[0] = 0;
Packit 6c4009
    }
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
#ifndef NO__MUL
Packit 6c4009
/* Multiply *X and *Y and store result in *Z.  X and Y may overlap but not X
Packit 6c4009
   and Z or Y and Z.  For P in [1, 2, 3], the exact result is truncated to P
Packit 6c4009
   digits.  In case P > 3 the error is bounded by 1.001 ULP.  */
Packit 6c4009
void
Packit 6c4009
SECTION
Packit 6c4009
__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
Packit 6c4009
{
Packit 6c4009
  long i, j, k, ip, ip2;
Packit 6c4009
  long p2 = p;
Packit 6c4009
  mantissa_store_t zk;
Packit 6c4009
  const mp_no *a;
Packit 6c4009
  mantissa_store_t *diag;
Packit 6c4009
Packit 6c4009
  /* Is z=0?  */
Packit 6c4009
  if (__glibc_unlikely (X[0] * Y[0] == 0))
Packit 6c4009
    {
Packit 6c4009
      Z[0] = 0;
Packit 6c4009
      return;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* We need not iterate through all X's and Y's since it's pointless to
Packit 6c4009
     multiply zeroes.  Here, both are zero...  */
Packit 6c4009
  for (ip2 = p2; ip2 > 0; ip2--)
Packit 6c4009
    if (X[ip2] != 0 || Y[ip2] != 0)
Packit 6c4009
      break;
Packit 6c4009
Packit 6c4009
  a = X[ip2] != 0 ? y : x;
Packit 6c4009
Packit 6c4009
  /* ... and here, at least one of them is still zero.  */
Packit 6c4009
  for (ip = ip2; ip > 0; ip--)
Packit 6c4009
    if (a->d[ip] != 0)
Packit 6c4009
      break;
Packit 6c4009
Packit 6c4009
  /* The product looks like this for p = 3 (as an example):
Packit 6c4009
Packit 6c4009
Packit 6c4009
				a1    a2    a3
Packit 6c4009
		 x		b1    b2    b3
Packit 6c4009
		 -----------------------------
Packit 6c4009
			     a1*b3 a2*b3 a3*b3
Packit 6c4009
		       a1*b2 a2*b2 a3*b2
Packit 6c4009
		 a1*b1 a2*b1 a3*b1
Packit 6c4009
Packit 6c4009
     So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
Packit 6c4009
     for P >= 3.  We compute the above digits in two parts; the last P-1
Packit 6c4009
     digits and then the first P digits.  The last P-1 digits are a sum of
Packit 6c4009
     products of the input digits from P to P-k where K is 0 for the least
Packit 6c4009
     significant digit and increases as we go towards the left.  The product
Packit 6c4009
     term is of the form X[k]*X[P-k] as can be seen in the above example.
Packit 6c4009
Packit 6c4009
     The first P digits are also a sum of products with the same product term,
Packit 6c4009
     except that the sum is from 1 to k.  This is also evident from the above
Packit 6c4009
     example.
Packit 6c4009
Packit 6c4009
     Another thing that becomes evident is that only the most significant
Packit 6c4009
     ip+ip2 digits of the result are non-zero, where ip and ip2 are the
Packit 6c4009
     'internal precision' of the input numbers, i.e. digits after ip and ip2
Packit 6c4009
     are all 0.  */
Packit 6c4009
Packit 6c4009
  k = (__glibc_unlikely (p2 < 3)) ? p2 + p2 : p2 + 3;
Packit 6c4009
Packit 6c4009
  while (k > ip + ip2 + 1)
Packit 6c4009
    Z[k--] = 0;
Packit 6c4009
Packit 6c4009
  zk = 0;
Packit 6c4009
Packit 6c4009
  /* Precompute sums of diagonal elements so that we can directly use them
Packit 6c4009
     later.  See the next comment to know we why need them.  */
Packit 6c4009
  diag = alloca (k * sizeof (mantissa_store_t));
Packit 6c4009
  mantissa_store_t d = 0;
Packit 6c4009
  for (i = 1; i <= ip; i++)
Packit 6c4009
    {
Packit 6c4009
      d += X[i] * (mantissa_store_t) Y[i];
Packit 6c4009
      diag[i] = d;
Packit 6c4009
    }
Packit 6c4009
  while (i < k)
Packit 6c4009
    diag[i++] = d;
Packit 6c4009
Packit 6c4009
  while (k > p2)
Packit 6c4009
    {
Packit 6c4009
      long lim = k / 2;
Packit 6c4009
Packit 6c4009
      if (k % 2 == 0)
Packit 6c4009
	/* We want to add this only once, but since we subtract it in the sum
Packit 6c4009
	   of products above, we add twice.  */
Packit 6c4009
	zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
Packit 6c4009
Packit 6c4009
      for (i = k - p2, j = p2; i < j; i++, j--)
Packit 6c4009
	zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
Packit 6c4009
Packit 6c4009
      zk -= diag[k - 1];
Packit 6c4009
Packit 6c4009
      DIV_RADIX (zk, Z[k]);
Packit 6c4009
      k--;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* The real deal.  Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
Packit 6c4009
     goes from 1 -> k - 1 and j goes the same range in reverse.  To reduce the
Packit 6c4009
     number of multiplications, we halve the range and if k is an even number,
Packit 6c4009
     add the diagonal element X[k/2]Y[k/2].  Through the half range, we compute
Packit 6c4009
     X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j].
Packit 6c4009
Packit 6c4009
     This reduction tells us that we're summing two things, the first term
Packit 6c4009
     through the half range and the negative of the sum of the product of all
Packit 6c4009
     terms of X and Y in the full range.  i.e.
Packit 6c4009
Packit 6c4009
     SUM(X[i] * Y[i]) for k terms.  This is precalculated above for each k in
Packit 6c4009
     a single loop so that it completes in O(n) time and can hence be directly
Packit 6c4009
     used in the loop below.  */
Packit 6c4009
  while (k > 1)
Packit 6c4009
    {
Packit 6c4009
      long lim = k / 2;
Packit 6c4009
Packit 6c4009
      if (k % 2 == 0)
Packit 6c4009
	/* We want to add this only once, but since we subtract it in the sum
Packit 6c4009
	   of products above, we add twice.  */
Packit 6c4009
        zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
Packit 6c4009
Packit 6c4009
      for (i = 1, j = k - 1; i < j; i++, j--)
Packit 6c4009
	zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
Packit 6c4009
Packit 6c4009
      zk -= diag[k - 1];
Packit 6c4009
Packit 6c4009
      DIV_RADIX (zk, Z[k]);
Packit 6c4009
      k--;
Packit 6c4009
    }
Packit 6c4009
  Z[k] = zk;
Packit 6c4009
Packit 6c4009
  /* Get the exponent sum into an intermediate variable.  This is a subtle
Packit 6c4009
     optimization, where given enough registers, all operations on the exponent
Packit 6c4009
     happen in registers and the result is written out only once into EZ.  */
Packit 6c4009
  int e = EX + EY;
Packit 6c4009
Packit 6c4009
  /* Is there a carry beyond the most significant digit?  */
Packit 6c4009
  if (__glibc_unlikely (Z[1] == 0))
Packit 6c4009
    {
Packit 6c4009
      for (i = 1; i <= p2; i++)
Packit 6c4009
	Z[i] = Z[i + 1];
Packit 6c4009
      e--;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  EZ = e;
Packit 6c4009
  Z[0] = X[0] * Y[0];
Packit 6c4009
}
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
#ifndef NO__SQR
Packit 6c4009
/* Square *X and store result in *Y.  X and Y may not overlap.  For P in
Packit 6c4009
   [1, 2, 3], the exact result is truncated to P digits.  In case P > 3 the
Packit 6c4009
   error is bounded by 1.001 ULP.  This is a faster special case of
Packit 6c4009
   multiplication.  */
Packit 6c4009
void
Packit 6c4009
SECTION
Packit 6c4009
__sqr (const mp_no *x, mp_no *y, int p)
Packit 6c4009
{
Packit 6c4009
  long i, j, k, ip;
Packit 6c4009
  mantissa_store_t yk;
Packit 6c4009
Packit 6c4009
  /* Is z=0?  */
Packit 6c4009
  if (__glibc_unlikely (X[0] == 0))
Packit 6c4009
    {
Packit 6c4009
      Y[0] = 0;
Packit 6c4009
      return;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* We need not iterate through all X's since it's pointless to
Packit 6c4009
     multiply zeroes.  */
Packit 6c4009
  for (ip = p; ip > 0; ip--)
Packit 6c4009
    if (X[ip] != 0)
Packit 6c4009
      break;
Packit 6c4009
Packit 6c4009
  k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
Packit 6c4009
Packit 6c4009
  while (k > 2 * ip + 1)
Packit 6c4009
    Y[k--] = 0;
Packit 6c4009
Packit 6c4009
  yk = 0;
Packit 6c4009
Packit 6c4009
  while (k > p)
Packit 6c4009
    {
Packit 6c4009
      mantissa_store_t yk2 = 0;
Packit 6c4009
      long lim = k / 2;
Packit 6c4009
Packit 6c4009
      if (k % 2 == 0)
Packit 6c4009
	yk += X[lim] * (mantissa_store_t) X[lim];
Packit 6c4009
Packit 6c4009
      /* In __mul, this loop (and the one within the next while loop) run
Packit 6c4009
         between a range to calculate the mantissa as follows:
Packit 6c4009
Packit 6c4009
         Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
Packit 6c4009
		+ X[n] * Y[k]
Packit 6c4009
Packit 6c4009
         For X == Y, we can get away with summing halfway and doubling the
Packit 6c4009
	 result.  For cases where the range size is even, the mid-point needs
Packit 6c4009
	 to be added separately (above).  */
Packit 6c4009
      for (i = k - p, j = p; i < j; i++, j--)
Packit 6c4009
	yk2 += X[i] * (mantissa_store_t) X[j];
Packit 6c4009
Packit 6c4009
      yk += 2 * yk2;
Packit 6c4009
Packit 6c4009
      DIV_RADIX (yk, Y[k]);
Packit 6c4009
      k--;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  while (k > 1)
Packit 6c4009
    {
Packit 6c4009
      mantissa_store_t yk2 = 0;
Packit 6c4009
      long lim = k / 2;
Packit 6c4009
Packit 6c4009
      if (k % 2 == 0)
Packit 6c4009
	yk += X[lim] * (mantissa_store_t) X[lim];
Packit 6c4009
Packit 6c4009
      /* Likewise for this loop.  */
Packit 6c4009
      for (i = 1, j = k - 1; i < j; i++, j--)
Packit 6c4009
	yk2 += X[i] * (mantissa_store_t) X[j];
Packit 6c4009
Packit 6c4009
      yk += 2 * yk2;
Packit 6c4009
Packit 6c4009
      DIV_RADIX (yk, Y[k]);
Packit 6c4009
      k--;
Packit 6c4009
    }
Packit 6c4009
  Y[k] = yk;
Packit 6c4009
Packit 6c4009
  /* Squares are always positive.  */
Packit 6c4009
  Y[0] = 1;
Packit 6c4009
Packit 6c4009
  /* Get the exponent sum into an intermediate variable.  This is a subtle
Packit 6c4009
     optimization, where given enough registers, all operations on the exponent
Packit 6c4009
     happen in registers and the result is written out only once into EZ.  */
Packit 6c4009
  int e = EX * 2;
Packit 6c4009
Packit 6c4009
  /* Is there a carry beyond the most significant digit?  */
Packit 6c4009
  if (__glibc_unlikely (Y[1] == 0))
Packit 6c4009
    {
Packit 6c4009
      for (i = 1; i <= p; i++)
Packit 6c4009
	Y[i] = Y[i + 1];
Packit 6c4009
      e--;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  EY = e;
Packit 6c4009
}
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
/* Invert *X and store in *Y.  Relative error bound:
Packit 6c4009
   - For P = 2: 1.001 * R ^ (1 - P)
Packit 6c4009
   - For P = 3: 1.063 * R ^ (1 - P)
Packit 6c4009
   - For P > 3: 2.001 * R ^ (1 - P)
Packit 6c4009
Packit 6c4009
   *X = 0 is not permissible.  */
Packit 6c4009
static void
Packit 6c4009
SECTION
Packit 6c4009
__inv (const mp_no *x, mp_no *y, int p)
Packit 6c4009
{
Packit 6c4009
  long i;
Packit 6c4009
  double t;
Packit 6c4009
  mp_no z, w;
Packit 6c4009
  static const int np1[] =
Packit 6c4009
    { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
Packit 6c4009
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
Packit 6c4009
  };
Packit 6c4009
Packit 6c4009
  __cpy (x, &z, p);
Packit 6c4009
  z.e = 0;
Packit 6c4009
  __mp_dbl (&z, &t, p);
Packit 6c4009
  t = 1 / t;
Packit 6c4009
  __dbl_mp (t, y, p);
Packit 6c4009
  EY -= EX;
Packit 6c4009
Packit 6c4009
  for (i = 0; i < np1[p]; i++)
Packit 6c4009
    {
Packit 6c4009
      __cpy (y, &w, p);
Packit 6c4009
      __mul (x, &w, y, p);
Packit 6c4009
      __sub (&__mptwo, y, &z, p);
Packit 6c4009
      __mul (&w, &z, y, p);
Packit 6c4009
    }
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Divide *X by *Y and store result in *Z.  X and Y may overlap but not X and Z
Packit 6c4009
   or Y and Z.  Relative error bound:
Packit 6c4009
   - For P = 2: 2.001 * R ^ (1 - P)
Packit 6c4009
   - For P = 3: 2.063 * R ^ (1 - P)
Packit 6c4009
   - For P > 3: 3.001 * R ^ (1 - P)
Packit 6c4009
Packit 6c4009
   *X = 0 is not permissible.  */
Packit 6c4009
void
Packit 6c4009
SECTION
Packit 6c4009
__dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
Packit 6c4009
{
Packit 6c4009
  mp_no w;
Packit 6c4009
Packit 6c4009
  if (X[0] == 0)
Packit 6c4009
    Z[0] = 0;
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      __inv (y, &w, p);
Packit 6c4009
      __mul (x, &w, z, p);
Packit 6c4009
    }
Packit 6c4009
}