Blame mpn/generic/toom8h_mul.c

Packit 5c3484
/* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Marco Bodrato.
Packit 5c3484
Packit 5c3484
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
Packit 5c3484
Packit 5c3484
#if GMP_NUMB_BITS < 29
Packit 5c3484
#error Not implemented.
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if GMP_NUMB_BITS < 43
Packit 5c3484
#define BIT_CORRECTION 1
Packit 5c3484
#define CORRECTION_BITS GMP_NUMB_BITS
Packit 5c3484
#else
Packit 5c3484
#define BIT_CORRECTION 0
Packit 5c3484
#define CORRECTION_BITS 0
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
#if TUNE_PROGRAM_BUILD
Packit 5c3484
#define MAYBE_mul_basecase 1
Packit 5c3484
#define MAYBE_mul_toom22   1
Packit 5c3484
#define MAYBE_mul_toom33   1
Packit 5c3484
#define MAYBE_mul_toom44   1
Packit 5c3484
#define MAYBE_mul_toom8h   1
Packit 5c3484
#else
Packit 5c3484
#define MAYBE_mul_basecase						\
Packit 5c3484
  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
Packit 5c3484
#define MAYBE_mul_toom22						\
Packit 5c3484
  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
Packit 5c3484
#define MAYBE_mul_toom33						\
Packit 5c3484
  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
Packit 5c3484
#define MAYBE_mul_toom44						\
Packit 5c3484
  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
Packit 5c3484
#define MAYBE_mul_toom8h						\
Packit 5c3484
  (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
Packit 5c3484
  do {									\
Packit 5c3484
    if (MAYBE_mul_basecase						\
Packit 5c3484
	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
Packit 5c3484
      mpn_mul_basecase (p, a, n, b, n);					\
Packit 5c3484
      if (f) mpn_mul_basecase (p2, a2, n, b2, n);			\
Packit 5c3484
    } else if (MAYBE_mul_toom22						\
Packit 5c3484
	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
Packit 5c3484
      mpn_toom22_mul (p, a, n, b, n, ws);				\
Packit 5c3484
      if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws);			\
Packit 5c3484
    } else if (MAYBE_mul_toom33						\
Packit 5c3484
	     && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
Packit 5c3484
      mpn_toom33_mul (p, a, n, b, n, ws);				\
Packit 5c3484
      if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws);			\
Packit 5c3484
    } else if (MAYBE_mul_toom44						\
Packit 5c3484
	     && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
Packit 5c3484
      mpn_toom44_mul (p, a, n, b, n, ws);				\
Packit 5c3484
      if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws);			\
Packit 5c3484
    } else if (! MAYBE_mul_toom8h					\
Packit 5c3484
	     || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) {		\
Packit 5c3484
      mpn_toom6h_mul (p, a, n, b, n, ws);				\
Packit 5c3484
      if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws);			\
Packit 5c3484
    } else {								\
Packit 5c3484
      mpn_toom8h_mul (p, a, n, b, n, ws);				\
Packit 5c3484
      if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws);			\
Packit 5c3484
    }									\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
#define TOOM8H_MUL_REC(p, a, na, b, nb, ws)		\
Packit 5c3484
  do { mpn_mul (p, a, na, b, nb); } while (0)
Packit 5c3484
Packit 5c3484
/* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
Packit 5c3484
   With: an >= bn >= 86, an*5 <  bn * 11.
Packit 5c3484
   It _may_ work with bn<=?? and bn*?? < an*? < bn*??
Packit 5c3484
Packit 5c3484
   Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
Packit 5c3484
*/
Packit 5c3484
/* Estimate on needed scratch:
Packit 5c3484
   S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
Packit 5c3484
   since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
Packit 5c3484
 */
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
mpn_toom8h_mul   (mp_ptr pp,
Packit 5c3484
		  mp_srcptr ap, mp_size_t an,
Packit 5c3484
		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
Packit 5c3484
{
Packit 5c3484
  mp_size_t n, s, t;
Packit 5c3484
  int p, q, half;
Packit 5c3484
  int sign;
Packit 5c3484
Packit 5c3484
  /***************************** decomposition *******************************/
Packit 5c3484
Packit 5c3484
  ASSERT (an >= bn);
Packit 5c3484
  /* Can not handle too small operands */
Packit 5c3484
  ASSERT (bn >= 86);
Packit 5c3484
  /* Can not handle too much unbalancement */
Packit 5c3484
  ASSERT (an <= bn*4);
Packit 5c3484
  ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
Packit 5c3484
  ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
Packit 5c3484
  ASSERT (GMP_NUMB_BITS >  9*3 || an*2 <= bn* 3);
Packit 5c3484
Packit 5c3484
  /* Limit num/den is a rational number between
Packit 5c3484
     (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1))             */
Packit 5c3484
#define LIMIT_numerator (21)
Packit 5c3484
#define LIMIT_denominat (20)
Packit 5c3484
Packit 5c3484
  if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
Packit 5c3484
    {
Packit 5c3484
      half = 0;
Packit 5c3484
      n = 1 + ((an - 1)>>3);
Packit 5c3484
      p = q = 7;
Packit 5c3484
      s = an - 7 * n;
Packit 5c3484
      t = bn - 7 * n;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator
Packit 5c3484
	{ p = 9; q = 8; }
Packit 5c3484
      else if (GMP_NUMB_BITS <= 9*3 ||
Packit 5c3484
	       an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
Packit 5c3484
	{ p = 9; q = 7; }
Packit 5c3484
      else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator
Packit 5c3484
	{ p =10; q = 7; }
Packit 5c3484
      else if (GMP_NUMB_BITS <= 10*3 ||
Packit 5c3484
	       an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
Packit 5c3484
	{ p =10; q = 6; }
Packit 5c3484
      else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
Packit 5c3484
	{ p =11; q = 6; }
Packit 5c3484
      else if (GMP_NUMB_BITS <= 11*3 ||
Packit 5c3484
	       an * 4 < 9 * bn)
Packit 5c3484
	{ p =11; q = 5; }
Packit 5c3484
      else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn)  /* is 4*... <12*... */
Packit 5c3484
	{ p =12; q = 5; }
Packit 5c3484
      else if (GMP_NUMB_BITS <= 12*3 ||
Packit 5c3484
	       an * 9 < 28 * bn )  /* is 4*... <12*... */
Packit 5c3484
	{ p =12; q = 4; }
Packit 5c3484
      else
Packit 5c3484
	{ p =13; q = 4; }
Packit 5c3484
Packit 5c3484
      half = (p+q)&1;
Packit 5c3484
      n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
Packit 5c3484
      p--; q--;
Packit 5c3484
Packit 5c3484
      s = an - p * n;
Packit 5c3484
      t = bn - q * n;
Packit 5c3484
Packit 5c3484
      if(half) { /* Recover from badly chosen splitting */
Packit 5c3484
	if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
Packit 5c3484
	else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
Packit 5c3484
      }
Packit 5c3484
    }
Packit 5c3484
#undef LIMIT_numerator
Packit 5c3484
#undef LIMIT_denominat
Packit 5c3484
Packit 5c3484
  ASSERT (0 < s && s <= n);
Packit 5c3484
  ASSERT (0 < t && t <= n);
Packit 5c3484
  ASSERT (half || s + t > 3);
Packit 5c3484
  ASSERT (n > 2);
Packit 5c3484
Packit 5c3484
#define   r6    (pp + 3 * n)			/* 3n+1 */
Packit 5c3484
#define   r4    (pp + 7 * n)			/* 3n+1 */
Packit 5c3484
#define   r2    (pp +11 * n)			/* 3n+1 */
Packit 5c3484
#define   r0    (pp +15 * n)			/* s+t <= 2*n */
Packit 5c3484
#define   r7    (scratch)			/* 3n+1 */
Packit 5c3484
#define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
Packit 5c3484
#define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
Packit 5c3484
#define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
Packit 5c3484
#define   v0    (pp +11 * n)			/* n+1 */
Packit 5c3484
#define   v1    (pp +12 * n+1)			/* n+1 */
Packit 5c3484
#define   v2    (pp +13 * n+2)			/* n+1 */
Packit 5c3484
#define   v3    (scratch +12 * n + 4)		/* n+1 */
Packit 5c3484
#define   wsi   (scratch +12 * n + 4)		/* 3n+1 */
Packit 5c3484
#define   wse   (scratch +13 * n + 5)		/* 2n+1 */
Packit 5c3484
Packit 5c3484
  /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
Packit 5c3484
     need all of them  */
Packit 5c3484
/*   if (scratch == NULL) */
Packit 5c3484
/*     scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
Packit 5c3484
  ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
Packit 5c3484
  ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
Packit 5c3484
Packit 5c3484
  /********************** evaluation and recursive calls *********************/
Packit 5c3484
Packit 5c3484
  /* $\pm1/8$ */
Packit 5c3484
  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
Packit 5c3484
	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
Packit 5c3484
  /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
Packit 5c3484
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
Packit 5c3484
  mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
Packit 5c3484
Packit 5c3484
  /* $\pm1/4$ */
Packit 5c3484
  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
Packit 5c3484
	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
Packit 5c3484
  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
Packit 5c3484
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
Packit 5c3484
  mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
Packit 5c3484
Packit 5c3484
  /* $\pm2$ */
Packit 5c3484
  sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
Packit 5c3484
	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
Packit 5c3484
  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
Packit 5c3484
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
Packit 5c3484
  mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
Packit 5c3484
Packit 5c3484
  /* $\pm8$ */
Packit 5c3484
  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
Packit 5c3484
	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
Packit 5c3484
  /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
Packit 5c3484
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
Packit 5c3484
  mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
Packit 5c3484
Packit 5c3484
  /* $\pm1/2$ */
Packit 5c3484
  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
Packit 5c3484
	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
Packit 5c3484
  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
Packit 5c3484
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
Packit 5c3484
  mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
Packit 5c3484
Packit 5c3484
  /* $\pm1$ */
Packit 5c3484
  sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
Packit 5c3484
  if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
Packit 5c3484
    sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
Packit 5c3484
  else
Packit 5c3484
    sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
Packit 5c3484
  /* A(-1)*B(-1) */ /* A(1)*B(1) */
Packit 5c3484
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
Packit 5c3484
  mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
Packit 5c3484
Packit 5c3484
  /* $\pm4$ */
Packit 5c3484
  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
Packit 5c3484
	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
Packit 5c3484
  /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
Packit 5c3484
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
Packit 5c3484
  mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
Packit 5c3484
Packit 5c3484
#undef v0
Packit 5c3484
#undef v1
Packit 5c3484
#undef v2
Packit 5c3484
#undef v3
Packit 5c3484
#undef wse
Packit 5c3484
Packit 5c3484
  /* A(0)*B(0) */
Packit 5c3484
  TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
Packit 5c3484
Packit 5c3484
  /* Infinity */
Packit 5c3484
  if (UNLIKELY (half != 0)) {
Packit 5c3484
    if (s > t) {
Packit 5c3484
      TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
Packit 5c3484
    } else {
Packit 5c3484
      TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
Packit 5c3484
    };
Packit 5c3484
  };
Packit 5c3484
Packit 5c3484
  mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
Packit 5c3484
Packit 5c3484
#undef r0
Packit 5c3484
#undef r1
Packit 5c3484
#undef r2
Packit 5c3484
#undef r3
Packit 5c3484
#undef r4
Packit 5c3484
#undef r5
Packit 5c3484
#undef r6
Packit 5c3484
#undef wsi
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#undef TOOM8H_MUL_N_REC
Packit 5c3484
#undef TOOM8H_MUL_REC
Packit 5c3484
#undef MAYBE_mul_basecase
Packit 5c3484
#undef MAYBE_mul_toom22
Packit 5c3484
#undef MAYBE_mul_toom33
Packit 5c3484
#undef MAYBE_mul_toom44
Packit 5c3484
#undef MAYBE_mul_toom8h