Blame mpn/generic/toom63_mul.c

Packit 5c3484
/* Implementation of the algorithm for Toom-Cook 4.5-way.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Marco Bodrato.
Packit 5c3484
Packit 5c3484
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2009, 2012 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
Packit 5c3484
/* Stores |{ap,n}-{bp,n}| in {rp,n}, returns the sign. */
Packit 5c3484
static int
Packit 5c3484
abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t  x, y;
Packit 5c3484
  while (--n >= 0)
Packit 5c3484
    {
Packit 5c3484
      x = ap[n];
Packit 5c3484
      y = bp[n];
Packit 5c3484
      if (x != y)
Packit 5c3484
	{
Packit 5c3484
	  n++;
Packit 5c3484
	  if (x > y)
Packit 5c3484
	    {
Packit 5c3484
	      mpn_sub_n (rp, ap, bp, n);
Packit 5c3484
	      return 0;
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
	      mpn_sub_n (rp, bp, ap, n);
Packit 5c3484
	      return ~0;
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
      rp[n] = 0;
Packit 5c3484
    }
Packit 5c3484
  return 0;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
static int
Packit 5c3484
abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) {
Packit 5c3484
  int result;
Packit 5c3484
  result = abs_sub_n (rm, rp, rs, n);
Packit 5c3484
  ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n));
Packit 5c3484
  return result;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Toom-4.5, the splitting 6x3 unbalanced version.
Packit 5c3484
   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
Packit 5c3484
Packit 5c3484
  <--s-><--n--><--n--><--n--><--n--><--n-->
Packit 5c3484
   ____ ______ ______ ______ ______ ______
Packit 5c3484
  |_a5_|__a4__|__a3__|__a2__|__a1__|__a0__|
Packit 5c3484
			|b2_|__b1__|__b0__|
Packit 5c3484
			<-t-><--n--><--n-->
Packit 5c3484
Packit 5c3484
*/
Packit 5c3484
#define TOOM_63_MUL_N_REC(p, a, b, n, ws)		\
Packit 5c3484
  do {	mpn_mul_n (p, a, b, n);				\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
#define TOOM_63_MUL_REC(p, a, na, b, nb, ws)		\
Packit 5c3484
  do {	mpn_mul (p, a, na, b, nb);			\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
mpn_toom63_mul (mp_ptr pp,
Packit 5c3484
		mp_srcptr ap, mp_size_t an,
Packit 5c3484
		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
Packit 5c3484
{
Packit 5c3484
  mp_size_t n, s, t;
Packit 5c3484
  mp_limb_t cy;
Packit 5c3484
  int sign;
Packit 5c3484
Packit 5c3484
  /***************************** decomposition *******************************/
Packit 5c3484
#define a5  (ap + 5 * n)
Packit 5c3484
#define b0  (bp + 0 * n)
Packit 5c3484
#define b1  (bp + 1 * n)
Packit 5c3484
#define b2  (bp + 2 * n)
Packit 5c3484
Packit 5c3484
  ASSERT (an >= bn);
Packit 5c3484
  n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
Packit 5c3484
Packit 5c3484
  s = an - 5 * n;
Packit 5c3484
  t = bn - 2 * n;
Packit 5c3484
Packit 5c3484
  ASSERT (0 < s && s <= n);
Packit 5c3484
  ASSERT (0 < t && t <= n);
Packit 5c3484
  /* WARNING! it assumes s+t>=n */
Packit 5c3484
  ASSERT ( s + t >= n );
Packit 5c3484
  ASSERT ( s + t > 4);
Packit 5c3484
  /* WARNING! it assumes n>1 */
Packit 5c3484
  ASSERT ( n > 2);
Packit 5c3484
Packit 5c3484
#define   r8    pp				/* 2n   */
Packit 5c3484
#define   r7    scratch				/* 3n+1 */
Packit 5c3484
#define   r5    (pp + 3*n)			/* 3n+1 */
Packit 5c3484
#define   v0    (pp + 3*n)			/* n+1 */
Packit 5c3484
#define   v1    (pp + 4*n+1)			/* n+1 */
Packit 5c3484
#define   v2    (pp + 5*n+2)			/* n+1 */
Packit 5c3484
#define   v3    (pp + 6*n+3)			/* n+1 */
Packit 5c3484
#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
Packit 5c3484
#define   r1    (pp + 7*n)			/* s+t <= 2*n */
Packit 5c3484
#define   ws    (scratch + 6 * n + 2)		/* ??? */
Packit 5c3484
Packit 5c3484
  /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
Packit 5c3484
     need all of them, when DO_mpn_sublsh_n usea a scratch  */
Packit 5c3484
/*   if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */
Packit 5c3484
Packit 5c3484
  /********************** evaluation and recursive calls *********************/
Packit 5c3484
  /* $\pm4$ */
Packit 5c3484
  sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
Packit 5c3484
  pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */
Packit 5c3484
  /* FIXME: use addlsh */
Packit 5c3484
  v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */
Packit 5c3484
  if ( n == t )
Packit 5c3484
    v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */
Packit 5c3484
  else
Packit 5c3484
    v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */
Packit 5c3484
  sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
Packit 5c3484
  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
Packit 5c3484
  TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
Packit 5c3484
  mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
Packit 5c3484
Packit 5c3484
  /* $\pm1$ */
Packit 5c3484
  sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
Packit 5c3484
  /* Compute bs1 and bsm1. Code taken from toom33 */
Packit 5c3484
  cy = mpn_add (ws, b0, n, b2, t);
Packit 5c3484
#if HAVE_NATIVE_mpn_add_n_sub_n
Packit 5c3484
  if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
Packit 5c3484
    {
Packit 5c3484
      cy = mpn_add_n_sub_n (v3, v1, b1, ws, n);
Packit 5c3484
      v3[n] = cy >> 1;
Packit 5c3484
      v1[n] = 0;
Packit 5c3484
      sign = ~sign;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      mp_limb_t cy2;
Packit 5c3484
      cy2 = mpn_add_n_sub_n (v3, v1, ws, b1, n);
Packit 5c3484
      v3[n] = cy + (cy2 >> 1);
Packit 5c3484
      v1[n] = cy - (cy2 & 1);
Packit 5c3484
    }
Packit 5c3484
#else
Packit 5c3484
  v3[n] = cy + mpn_add_n (v3, ws, b1, n);
Packit 5c3484
  if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
Packit 5c3484
    {
Packit 5c3484
      mpn_sub_n (v1, b1, ws, n);
Packit 5c3484
      v1[n] = 0;
Packit 5c3484
      sign = ~sign;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      cy -= mpn_sub_n (v1, ws, b1, n);
Packit 5c3484
      v1[n] = cy;
Packit 5c3484
    }
Packit 5c3484
#endif
Packit 5c3484
  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
Packit 5c3484
  TOOM_63_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
Packit 5c3484
  mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
Packit 5c3484
Packit 5c3484
  /* $\pm2$ */
Packit 5c3484
  sign = mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
Packit 5c3484
  pp[n] = mpn_lshift (pp, b1, n, 1); /* 2b1 */
Packit 5c3484
  /* FIXME: use addlsh or addlsh2 */
Packit 5c3484
  v3[t] = mpn_lshift (v3, b2, t, 2);/* 4b2 */
Packit 5c3484
  if ( n == t )
Packit 5c3484
    v3[n]+= mpn_add_n (v3, v3, b0, n); /* 4b2+b0 */
Packit 5c3484
  else
Packit 5c3484
    v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 4b2+b0 */
Packit 5c3484
  sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
Packit 5c3484
  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
Packit 5c3484
  TOOM_63_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
Packit 5c3484
  mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
Packit 5c3484
Packit 5c3484
  /* A(0)*B(0) */
Packit 5c3484
  TOOM_63_MUL_N_REC(pp, ap, bp, n, ws);
Packit 5c3484
Packit 5c3484
  /* Infinity */
Packit 5c3484
  if (s > t) {
Packit 5c3484
    TOOM_63_MUL_REC(r1, a5, s, b2, t, ws);
Packit 5c3484
  } else {
Packit 5c3484
    TOOM_63_MUL_REC(r1, b2, t, a5, s, ws);
Packit 5c3484
  };
Packit 5c3484
Packit 5c3484
  mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
Packit 5c3484
Packit 5c3484
#undef a5
Packit 5c3484
#undef b0
Packit 5c3484
#undef b1
Packit 5c3484
#undef b2
Packit 5c3484
#undef r1
Packit 5c3484
#undef r3
Packit 5c3484
#undef r5
Packit 5c3484
#undef v0
Packit 5c3484
#undef v1
Packit 5c3484
#undef v2
Packit 5c3484
#undef v3
Packit 5c3484
#undef r7
Packit 5c3484
#undef r8
Packit 5c3484
#undef ws
Packit 5c3484
}