Blame src/fma.c

Packit 80c72f
/* mpc_fma -- Fused multiply-add of three complex numbers
Packit 80c72f
Packit 80c72f
Copyright (C) 2011, 2012 INRIA
Packit 80c72f
Packit 80c72f
This file is part of GNU MPC.
Packit 80c72f
Packit 80c72f
GNU MPC is free software; you can redistribute it and/or modify it under
Packit 80c72f
the terms of the GNU Lesser General Public License as published by the
Packit 80c72f
Free Software Foundation; either version 3 of the License, or (at your
Packit 80c72f
option) any later version.
Packit 80c72f
Packit 80c72f
GNU MPC is distributed in the hope that it will be useful, but WITHOUT ANY
Packit 80c72f
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
Packit 80c72f
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
Packit 80c72f
more details.
Packit 80c72f
Packit 80c72f
You should have received a copy of the GNU Lesser General Public License
Packit 80c72f
along with this program. If not, see http://www.gnu.org/licenses/ .
Packit 80c72f
*/
Packit 80c72f
Packit 80c72f
#include "mpc-impl.h"
Packit 80c72f
Packit 80c72f
/* return a bound on the precision needed to add or subtract x and y exactly */
Packit 80c72f
static mpfr_prec_t
Packit 80c72f
bound_prec_addsub (mpfr_srcptr x, mpfr_srcptr y)
Packit 80c72f
{
Packit 80c72f
  if (!mpfr_regular_p (x))
Packit 80c72f
    return mpfr_get_prec (y);
Packit 80c72f
  else if (!mpfr_regular_p (y))
Packit 80c72f
    return mpfr_get_prec (x);
Packit 80c72f
  else /* neither x nor y are NaN, Inf or zero */
Packit 80c72f
    {
Packit 80c72f
      mpfr_exp_t ex = mpfr_get_exp (x);
Packit 80c72f
      mpfr_exp_t ey = mpfr_get_exp (y);
Packit 80c72f
      mpfr_exp_t ulpx = ex - mpfr_get_prec (x);
Packit 80c72f
      mpfr_exp_t ulpy = ey - mpfr_get_prec (y);
Packit 80c72f
      return ((ex >= ey) ? ex : ey) + 1 - ((ulpx <= ulpy) ? ulpx : ulpy);
Packit 80c72f
    }
Packit 80c72f
}
Packit 80c72f
Packit 80c72f
/* r <- a*b+c */
Packit 80c72f
int
Packit 80c72f
mpc_fma_naive (mpc_ptr r, mpc_srcptr a, mpc_srcptr b, mpc_srcptr c, mpc_rnd_t rnd)
Packit 80c72f
{
Packit 80c72f
  mpfr_t rea_reb, rea_imb, ima_reb, ima_imb, tmp;
Packit 80c72f
  mpfr_prec_t pre12, pre13, pre23, pim12, pim13, pim23;
Packit 80c72f
  int inex_re, inex_im;
Packit 80c72f
Packit 80c72f
  mpfr_init2 (rea_reb, mpfr_get_prec (mpc_realref(a)) + mpfr_get_prec (mpc_realref(b)));
Packit 80c72f
  mpfr_init2 (rea_imb, mpfr_get_prec (mpc_realref(a)) + mpfr_get_prec (mpc_imagref(b)));
Packit 80c72f
  mpfr_init2 (ima_reb, mpfr_get_prec (mpc_imagref(a)) + mpfr_get_prec (mpc_realref(b)));
Packit 80c72f
  mpfr_init2 (ima_imb, mpfr_get_prec (mpc_imagref(a)) + mpfr_get_prec (mpc_imagref(b)));
Packit 80c72f
Packit 80c72f
  mpfr_mul (rea_reb, mpc_realref(a), mpc_realref(b), GMP_RNDZ); /* exact */
Packit 80c72f
  mpfr_mul (rea_imb, mpc_realref(a), mpc_imagref(b), GMP_RNDZ); /* exact */
Packit 80c72f
  mpfr_mul (ima_reb, mpc_imagref(a), mpc_realref(b), GMP_RNDZ); /* exact */
Packit 80c72f
  mpfr_mul (ima_imb, mpc_imagref(a), mpc_imagref(b), GMP_RNDZ); /* exact */
Packit 80c72f
Packit 80c72f
  /* Re(r) <- rea_reb - ima_imb + Re(c) */
Packit 80c72f
Packit 80c72f
  pre12 = bound_prec_addsub (rea_reb, ima_imb); /* bound on exact precision for
Packit 80c72f
						   rea_reb - ima_imb */
Packit 80c72f
  pre13 = bound_prec_addsub (rea_reb, mpc_realref(c));
Packit 80c72f
  /* bound for rea_reb + Re(c) */
Packit 80c72f
  pre23 = bound_prec_addsub (ima_imb, mpc_realref(c));
Packit 80c72f
  /* bound for ima_imb - Re(c) */
Packit 80c72f
  if (pre12 <= pre13 && pre12 <= pre23) /* (rea_reb - ima_imb) + Re(c) */
Packit 80c72f
    {
Packit 80c72f
      mpfr_init2 (tmp, pre12);
Packit 80c72f
      mpfr_sub (tmp, rea_reb, ima_imb, GMP_RNDZ); /* exact */
Packit 80c72f
      inex_re = mpfr_add (mpc_realref(r), tmp, mpc_realref(c), MPC_RND_RE(rnd));
Packit 80c72f
      /* the only possible bad overlap is between r and c, but since we are
Packit 80c72f
	 only touching the real part of both, it is ok */
Packit 80c72f
    }
Packit 80c72f
  else if (pre13 <= pre23) /* (rea_reb + Re(c)) - ima_imb */
Packit 80c72f
    {
Packit 80c72f
      mpfr_init2 (tmp, pre13);
Packit 80c72f
      mpfr_add (tmp, rea_reb, mpc_realref(c), GMP_RNDZ); /* exact */
Packit 80c72f
      inex_re = mpfr_sub (mpc_realref(r), tmp, ima_imb, MPC_RND_RE(rnd));
Packit 80c72f
      /* the only possible bad overlap is between r and c, but since we are
Packit 80c72f
	 only touching the real part of both, it is ok */
Packit 80c72f
    }
Packit 80c72f
  else /* rea_reb + (Re(c) - ima_imb) */
Packit 80c72f
    {
Packit 80c72f
      mpfr_init2 (tmp, pre23);
Packit 80c72f
      mpfr_sub (tmp, mpc_realref(c), ima_imb, GMP_RNDZ); /* exact */
Packit 80c72f
      inex_re = mpfr_add (mpc_realref(r), tmp, rea_reb, MPC_RND_RE(rnd));
Packit 80c72f
      /* the only possible bad overlap is between r and c, but since we are
Packit 80c72f
	 only touching the real part of both, it is ok */
Packit 80c72f
    }
Packit 80c72f
Packit 80c72f
  /* Im(r) <- rea_imb + ima_reb + Im(c) */
Packit 80c72f
  pim12 = bound_prec_addsub (rea_imb, ima_reb); /* bound on exact precision for
Packit 80c72f
						   rea_imb + ima_reb */
Packit 80c72f
  pim13 = bound_prec_addsub (rea_imb, mpc_imagref(c));
Packit 80c72f
  /* bound for rea_imb + Im(c) */
Packit 80c72f
  pim23 = bound_prec_addsub (ima_reb, mpc_imagref(c));
Packit 80c72f
  /* bound for ima_reb + Im(c) */
Packit 80c72f
  if (pim12 <= pim13 && pim12 <= pim23) /* (rea_imb + ima_reb) + Im(c) */
Packit 80c72f
    {
Packit 80c72f
      mpfr_set_prec (tmp, pim12);
Packit 80c72f
      mpfr_add (tmp, rea_imb, ima_reb, GMP_RNDZ); /* exact */
Packit 80c72f
      inex_im = mpfr_add (mpc_imagref(r), tmp, mpc_imagref(c), MPC_RND_IM(rnd));
Packit 80c72f
      /* the only possible bad overlap is between r and c, but since we are
Packit 80c72f
	 only touching the imaginary part of both, it is ok */
Packit 80c72f
    }
Packit 80c72f
  else if (pim13 <= pim23) /* (rea_imb + Im(c)) + ima_reb */
Packit 80c72f
    {
Packit 80c72f
      mpfr_set_prec (tmp, pim13);
Packit 80c72f
      mpfr_add (tmp, rea_imb, mpc_imagref(c), GMP_RNDZ); /* exact */
Packit 80c72f
      inex_im = mpfr_add (mpc_imagref(r), tmp, ima_reb, MPC_RND_IM(rnd));
Packit 80c72f
      /* the only possible bad overlap is between r and c, but since we are
Packit 80c72f
	 only touching the imaginary part of both, it is ok */
Packit 80c72f
    }
Packit 80c72f
  else /* rea_imb + (Im(c) + ima_reb) */
Packit 80c72f
    {
Packit 80c72f
      mpfr_set_prec (tmp, pre23);
Packit 80c72f
      mpfr_add (tmp, mpc_imagref(c), ima_reb, GMP_RNDZ); /* exact */
Packit 80c72f
      inex_im = mpfr_add (mpc_imagref(r), tmp, rea_imb, MPC_RND_IM(rnd));
Packit 80c72f
      /* the only possible bad overlap is between r and c, but since we are
Packit 80c72f
	 only touching the imaginary part of both, it is ok */
Packit 80c72f
    }
Packit 80c72f
Packit 80c72f
  mpfr_clear (rea_reb);
Packit 80c72f
  mpfr_clear (rea_imb);
Packit 80c72f
  mpfr_clear (ima_reb);
Packit 80c72f
  mpfr_clear (ima_imb);
Packit 80c72f
  mpfr_clear (tmp);
Packit 80c72f
Packit 80c72f
  return MPC_INEX(inex_re, inex_im);
Packit 80c72f
}
Packit 80c72f
Packit 80c72f
/* The algorithm is as follows:
Packit 80c72f
   - in a first pass, we use the target precision + some extra bits
Packit 80c72f
   - if it fails, we add the number of cancelled bits when adding
Packit 80c72f
     Re(a*b) and Re(c) [similarly for the imaginary part]
Packit 80c72f
   - it is fails again, we call the mpc_fma_naive function, which also
Packit 80c72f
     deals with the special cases */
Packit 80c72f
int
Packit 80c72f
mpc_fma (mpc_ptr r, mpc_srcptr a, mpc_srcptr b, mpc_srcptr c, mpc_rnd_t rnd)
Packit 80c72f
{
Packit 80c72f
  mpc_t ab;
Packit 80c72f
  mpfr_prec_t pre, pim, wpre, wpim;
Packit 80c72f
  mpfr_exp_t diffre, diffim;
Packit 80c72f
  int i, inex = 0, okre = 0, okim = 0;
Packit 80c72f
Packit 80c72f
  if (mpc_fin_p (a) == 0 || mpc_fin_p (b) == 0 || mpc_fin_p (c) == 0)
Packit 80c72f
    return mpc_fma_naive (r, a, b, c, rnd);
Packit 80c72f
Packit 80c72f
  pre = mpfr_get_prec (mpc_realref(r));
Packit 80c72f
  pim = mpfr_get_prec (mpc_imagref(r));
Packit 80c72f
  wpre = pre + mpc_ceil_log2 (pre) + 10;
Packit 80c72f
  wpim = pim + mpc_ceil_log2 (pim) + 10;
Packit 80c72f
  mpc_init3 (ab, wpre, wpim);
Packit 80c72f
  for (i = 0; i < 2; ++i)
Packit 80c72f
    {
Packit 80c72f
      mpc_mul (ab, a, b, MPC_RNDZZ);
Packit 80c72f
      if (mpfr_zero_p (mpc_realref(ab)) || mpfr_zero_p (mpc_imagref(ab)))
Packit 80c72f
        break;
Packit 80c72f
      diffre = mpfr_get_exp (mpc_realref(ab));
Packit 80c72f
      diffim = mpfr_get_exp (mpc_imagref(ab));
Packit 80c72f
      mpc_add (ab, ab, c, MPC_RNDZZ);
Packit 80c72f
      if (mpfr_zero_p (mpc_realref(ab)) || mpfr_zero_p (mpc_imagref(ab)))
Packit 80c72f
        break;
Packit 80c72f
      diffre -= mpfr_get_exp (mpc_realref(ab));
Packit 80c72f
      diffim -= mpfr_get_exp (mpc_imagref(ab));
Packit 80c72f
      diffre = (diffre > 0 ? diffre + 1 : 1);
Packit 80c72f
      diffim = (diffim > 0 ? diffim + 1 : 1);
Packit 80c72f
      okre = diffre > (mpfr_exp_t) wpre ? 0 : mpfr_can_round (mpc_realref(ab),
Packit 80c72f
                                 wpre - diffre, GMP_RNDN, GMP_RNDZ,
Packit 80c72f
                                 pre + (MPC_RND_RE (rnd) == GMP_RNDN));
Packit 80c72f
      okim = diffim > (mpfr_exp_t) wpim ? 0 : mpfr_can_round (mpc_imagref(ab),
Packit 80c72f
                                 wpim - diffim, GMP_RNDN, GMP_RNDZ,
Packit 80c72f
                                 pim + (MPC_RND_IM (rnd) == GMP_RNDN));
Packit 80c72f
      if (okre && okim)
Packit 80c72f
        {
Packit 80c72f
          inex = mpc_set (r, ab, rnd);
Packit 80c72f
          break;
Packit 80c72f
        }
Packit 80c72f
      if (i == 1)
Packit 80c72f
        break;
Packit 80c72f
      if (okre == 0 && diffre > 1)
Packit 80c72f
        wpre += diffre;
Packit 80c72f
      if (okim == 0 && diffim > 1)
Packit 80c72f
        wpim += diffim;
Packit 80c72f
      mpfr_set_prec (mpc_realref(ab), wpre);
Packit 80c72f
      mpfr_set_prec (mpc_imagref(ab), wpim);
Packit 80c72f
    }
Packit 80c72f
  mpc_clear (ab);
Packit 80c72f
  return okre && okim ? inex : mpc_fma_naive (r, a, b, c, rnd);
Packit 80c72f
}