Tree - source-git/libmpc - CentOS Git server

source-git / libmpc

Blame src/fma.c

Blob History Raw

Packit	80c72f	`/* mpc_fma -- Fused multiply-add of three complex numbers`
Packit	80c72f
Packit	80c72f	`Copyright (C) 2011, 2012 INRIA`
Packit	80c72f
Packit	80c72f	`This file is part of GNU MPC.`
Packit	80c72f
Packit	80c72f	`GNU MPC is free software; you can redistribute it and/or modify it under`
Packit	80c72f	`the terms of the GNU Lesser General Public License as published by the`
Packit	80c72f	`Free Software Foundation; either version 3 of the License, or (at your`
Packit	80c72f	`option) any later version.`
Packit	80c72f
Packit	80c72f	`GNU MPC is distributed in the hope that it will be useful, but WITHOUT ANY`
Packit	80c72f	`WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS`
Packit	80c72f	`FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for`
Packit	80c72f	`more details.`
Packit	80c72f
Packit	80c72f	`You should have received a copy of the GNU Lesser General Public License`
Packit	80c72f	`along with this program. If not, see http://www.gnu.org/licenses/ .`
Packit	80c72f	`*/`
Packit	80c72f
Packit	80c72f	`#include "mpc-impl.h"`
Packit	80c72f
Packit	80c72f	`/* return a bound on the precision needed to add or subtract x and y exactly */`
Packit	80c72f	`static mpfr_prec_t`
Packit	80c72f	`bound_prec_addsub (mpfr_srcptr x, mpfr_srcptr y)`
Packit	80c72f	`{`
Packit	80c72f	`if (!mpfr_regular_p (x))`
Packit	80c72f	`return mpfr_get_prec (y);`
Packit	80c72f	`else if (!mpfr_regular_p (y))`
Packit	80c72f	`return mpfr_get_prec (x);`
Packit	80c72f	`else /* neither x nor y are NaN, Inf or zero */`
Packit	80c72f	`{`
Packit	80c72f	`mpfr_exp_t ex = mpfr_get_exp (x);`
Packit	80c72f	`mpfr_exp_t ey = mpfr_get_exp (y);`
Packit	80c72f	`mpfr_exp_t ulpx = ex - mpfr_get_prec (x);`
Packit	80c72f	`mpfr_exp_t ulpy = ey - mpfr_get_prec (y);`
Packit	80c72f	`return ((ex >= ey) ? ex : ey) + 1 - ((ulpx <= ulpy) ? ulpx : ulpy);`
Packit	80c72f	`}`
Packit	80c72f	`}`
Packit	80c72f
Packit	80c72f	`/* r <- ab+c /`
Packit	80c72f	`int`
Packit	80c72f	`mpc_fma_naive (mpc_ptr r, mpc_srcptr a, mpc_srcptr b, mpc_srcptr c, mpc_rnd_t rnd)`
Packit	80c72f	`{`
Packit	80c72f	`mpfr_t rea_reb, rea_imb, ima_reb, ima_imb, tmp;`
Packit	80c72f	`mpfr_prec_t pre12, pre13, pre23, pim12, pim13, pim23;`
Packit	80c72f	`int inex_re, inex_im;`
Packit	80c72f
Packit	80c72f	`mpfr_init2 (rea_reb, mpfr_get_prec (mpc_realref(a)) + mpfr_get_prec (mpc_realref(b)));`
Packit	80c72f	`mpfr_init2 (rea_imb, mpfr_get_prec (mpc_realref(a)) + mpfr_get_prec (mpc_imagref(b)));`
Packit	80c72f	`mpfr_init2 (ima_reb, mpfr_get_prec (mpc_imagref(a)) + mpfr_get_prec (mpc_realref(b)));`
Packit	80c72f	`mpfr_init2 (ima_imb, mpfr_get_prec (mpc_imagref(a)) + mpfr_get_prec (mpc_imagref(b)));`
Packit	80c72f
Packit	80c72f	`mpfr_mul (rea_reb, mpc_realref(a), mpc_realref(b), GMP_RNDZ); /* exact */`
Packit	80c72f	`mpfr_mul (rea_imb, mpc_realref(a), mpc_imagref(b), GMP_RNDZ); /* exact */`
Packit	80c72f	`mpfr_mul (ima_reb, mpc_imagref(a), mpc_realref(b), GMP_RNDZ); /* exact */`
Packit	80c72f	`mpfr_mul (ima_imb, mpc_imagref(a), mpc_imagref(b), GMP_RNDZ); /* exact */`
Packit	80c72f
Packit	80c72f	`/* Re(r) <- rea_reb - ima_imb + Re(c) */`
Packit	80c72f
Packit	80c72f	`pre12 = bound_prec_addsub (rea_reb, ima_imb); /* bound on exact precision for`
Packit	80c72f	`rea_reb - ima_imb */`
Packit	80c72f	`pre13 = bound_prec_addsub (rea_reb, mpc_realref(c));`
Packit	80c72f	`/* bound for rea_reb + Re(c) */`
Packit	80c72f	`pre23 = bound_prec_addsub (ima_imb, mpc_realref(c));`
Packit	80c72f	`/* bound for ima_imb - Re(c) */`
Packit	80c72f	`if (pre12 <= pre13 && pre12 <= pre23) /* (rea_reb - ima_imb) + Re(c) */`
Packit	80c72f	`{`
Packit	80c72f	`mpfr_init2 (tmp, pre12);`
Packit	80c72f	`mpfr_sub (tmp, rea_reb, ima_imb, GMP_RNDZ); /* exact */`
Packit	80c72f	`inex_re = mpfr_add (mpc_realref(r), tmp, mpc_realref(c), MPC_RND_RE(rnd));`
Packit	80c72f	`/* the only possible bad overlap is between r and c, but since we are`
Packit	80c72f	`only touching the real part of both, it is ok */`
Packit	80c72f	`}`
Packit	80c72f	`else if (pre13 <= pre23) /* (rea_reb + Re(c)) - ima_imb */`
Packit	80c72f	`{`
Packit	80c72f	`mpfr_init2 (tmp, pre13);`
Packit	80c72f	`mpfr_add (tmp, rea_reb, mpc_realref(c), GMP_RNDZ); /* exact */`
Packit	80c72f	`inex_re = mpfr_sub (mpc_realref(r), tmp, ima_imb, MPC_RND_RE(rnd));`
Packit	80c72f	`/* the only possible bad overlap is between r and c, but since we are`
Packit	80c72f	`only touching the real part of both, it is ok */`
Packit	80c72f	`}`
Packit	80c72f	`else /* rea_reb + (Re(c) - ima_imb) */`
Packit	80c72f	`{`
Packit	80c72f	`mpfr_init2 (tmp, pre23);`
Packit	80c72f	`mpfr_sub (tmp, mpc_realref(c), ima_imb, GMP_RNDZ); /* exact */`
Packit	80c72f	`inex_re = mpfr_add (mpc_realref(r), tmp, rea_reb, MPC_RND_RE(rnd));`
Packit	80c72f	`/* the only possible bad overlap is between r and c, but since we are`
Packit	80c72f	`only touching the real part of both, it is ok */`
Packit	80c72f	`}`
Packit	80c72f
Packit	80c72f	`/* Im(r) <- rea_imb + ima_reb + Im(c) */`
Packit	80c72f	`pim12 = bound_prec_addsub (rea_imb, ima_reb); /* bound on exact precision for`
Packit	80c72f	`rea_imb + ima_reb */`
Packit	80c72f	`pim13 = bound_prec_addsub (rea_imb, mpc_imagref(c));`
Packit	80c72f	`/* bound for rea_imb + Im(c) */`
Packit	80c72f	`pim23 = bound_prec_addsub (ima_reb, mpc_imagref(c));`
Packit	80c72f	`/* bound for ima_reb + Im(c) */`
Packit	80c72f	`if (pim12 <= pim13 && pim12 <= pim23) /* (rea_imb + ima_reb) + Im(c) */`
Packit	80c72f	`{`
Packit	80c72f	`mpfr_set_prec (tmp, pim12);`
Packit	80c72f	`mpfr_add (tmp, rea_imb, ima_reb, GMP_RNDZ); /* exact */`
Packit	80c72f	`inex_im = mpfr_add (mpc_imagref(r), tmp, mpc_imagref(c), MPC_RND_IM(rnd));`
Packit	80c72f	`/* the only possible bad overlap is between r and c, but since we are`
Packit	80c72f	`only touching the imaginary part of both, it is ok */`
Packit	80c72f	`}`
Packit	80c72f	`else if (pim13 <= pim23) /* (rea_imb + Im(c)) + ima_reb */`
Packit	80c72f	`{`
Packit	80c72f	`mpfr_set_prec (tmp, pim13);`
Packit	80c72f	`mpfr_add (tmp, rea_imb, mpc_imagref(c), GMP_RNDZ); /* exact */`
Packit	80c72f	`inex_im = mpfr_add (mpc_imagref(r), tmp, ima_reb, MPC_RND_IM(rnd));`
Packit	80c72f	`/* the only possible bad overlap is between r and c, but since we are`
Packit	80c72f	`only touching the imaginary part of both, it is ok */`
Packit	80c72f	`}`
Packit	80c72f	`else /* rea_imb + (Im(c) + ima_reb) */`
Packit	80c72f	`{`
Packit	80c72f	`mpfr_set_prec (tmp, pre23);`
Packit	80c72f	`mpfr_add (tmp, mpc_imagref(c), ima_reb, GMP_RNDZ); /* exact */`
Packit	80c72f	`inex_im = mpfr_add (mpc_imagref(r), tmp, rea_imb, MPC_RND_IM(rnd));`
Packit	80c72f	`/* the only possible bad overlap is between r and c, but since we are`
Packit	80c72f	`only touching the imaginary part of both, it is ok */`
Packit	80c72f	`}`
Packit	80c72f
Packit	80c72f	`mpfr_clear (rea_reb);`
Packit	80c72f	`mpfr_clear (rea_imb);`
Packit	80c72f	`mpfr_clear (ima_reb);`
Packit	80c72f	`mpfr_clear (ima_imb);`
Packit	80c72f	`mpfr_clear (tmp);`
Packit	80c72f
Packit	80c72f	`return MPC_INEX(inex_re, inex_im);`
Packit	80c72f	`}`
Packit	80c72f
Packit	80c72f	`/* The algorithm is as follows:`
Packit	80c72f	`- in a first pass, we use the target precision + some extra bits`
Packit	80c72f	`- if it fails, we add the number of cancelled bits when adding`
Packit	80c72f	`Re(a*b) and Re(c) [similarly for the imaginary part]`
Packit	80c72f	`- it is fails again, we call the mpc_fma_naive function, which also`
Packit	80c72f	`deals with the special cases */`
Packit	80c72f	`int`
Packit	80c72f	`mpc_fma (mpc_ptr r, mpc_srcptr a, mpc_srcptr b, mpc_srcptr c, mpc_rnd_t rnd)`
Packit	80c72f	`{`
Packit	80c72f	`mpc_t ab;`
Packit	80c72f	`mpfr_prec_t pre, pim, wpre, wpim;`
Packit	80c72f	`mpfr_exp_t diffre, diffim;`
Packit	80c72f	`int i, inex = 0, okre = 0, okim = 0;`
Packit	80c72f
Packit	80c72f	`if (mpc_fin_p (a) == 0 \|\| mpc_fin_p (b) == 0 \|\| mpc_fin_p (c) == 0)`
Packit	80c72f	`return mpc_fma_naive (r, a, b, c, rnd);`
Packit	80c72f
Packit	80c72f	`pre = mpfr_get_prec (mpc_realref(r));`
Packit	80c72f	`pim = mpfr_get_prec (mpc_imagref(r));`
Packit	80c72f	`wpre = pre + mpc_ceil_log2 (pre) + 10;`
Packit	80c72f	`wpim = pim + mpc_ceil_log2 (pim) + 10;`
Packit	80c72f	`mpc_init3 (ab, wpre, wpim);`
Packit	80c72f	`for (i = 0; i < 2; ++i)`
Packit	80c72f	`{`
Packit	80c72f	`mpc_mul (ab, a, b, MPC_RNDZZ);`
Packit	80c72f	`if (mpfr_zero_p (mpc_realref(ab)) \|\| mpfr_zero_p (mpc_imagref(ab)))`
Packit	80c72f	`break;`
Packit	80c72f	`diffre = mpfr_get_exp (mpc_realref(ab));`
Packit	80c72f	`diffim = mpfr_get_exp (mpc_imagref(ab));`
Packit	80c72f	`mpc_add (ab, ab, c, MPC_RNDZZ);`
Packit	80c72f	`if (mpfr_zero_p (mpc_realref(ab)) \|\| mpfr_zero_p (mpc_imagref(ab)))`
Packit	80c72f	`break;`
Packit	80c72f	`diffre -= mpfr_get_exp (mpc_realref(ab));`
Packit	80c72f	`diffim -= mpfr_get_exp (mpc_imagref(ab));`
Packit	80c72f	`diffre = (diffre > 0 ? diffre + 1 : 1);`
Packit	80c72f	`diffim = (diffim > 0 ? diffim + 1 : 1);`
Packit	80c72f	`okre = diffre > (mpfr_exp_t) wpre ? 0 : mpfr_can_round (mpc_realref(ab),`
Packit	80c72f	`wpre - diffre, GMP_RNDN, GMP_RNDZ,`
Packit	80c72f	`pre + (MPC_RND_RE (rnd) == GMP_RNDN));`
Packit	80c72f	`okim = diffim > (mpfr_exp_t) wpim ? 0 : mpfr_can_round (mpc_imagref(ab),`
Packit	80c72f	`wpim - diffim, GMP_RNDN, GMP_RNDZ,`
Packit	80c72f	`pim + (MPC_RND_IM (rnd) == GMP_RNDN));`
Packit	80c72f	`if (okre && okim)`
Packit	80c72f	`{`
Packit	80c72f	`inex = mpc_set (r, ab, rnd);`
Packit	80c72f	`break;`
Packit	80c72f	`}`
Packit	80c72f	`if (i == 1)`
Packit	80c72f	`break;`
Packit	80c72f	`if (okre == 0 && diffre > 1)`
Packit	80c72f	`wpre += diffre;`
Packit	80c72f	`if (okim == 0 && diffim > 1)`
Packit	80c72f	`wpim += diffim;`
Packit	80c72f	`mpfr_set_prec (mpc_realref(ab), wpre);`
Packit	80c72f	`mpfr_set_prec (mpc_imagref(ab), wpim);`
Packit	80c72f	`}`
Packit	80c72f	`mpc_clear (ab);`
Packit	80c72f	`return okre && okim ? inex : mpc_fma_naive (r, a, b, c, rnd);`
Packit	80c72f	`}`

source-git / libmpc

Source Code

Blame src/fma.c