Blame mpn/generic/toom8_sqr.c

Packit 5c3484
/* Implementation of the squaring algorithm with Toom-Cook 8.5-way.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Marco Bodrato.
Packit 5c3484
Packit 5c3484
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2009, 2012 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
Packit 5c3484
#if GMP_NUMB_BITS < 29
Packit 5c3484
#error Not implemented.
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if GMP_NUMB_BITS < 43
Packit 5c3484
#define BIT_CORRECTION 1
Packit 5c3484
#define CORRECTION_BITS GMP_NUMB_BITS
Packit 5c3484
#else
Packit 5c3484
#define BIT_CORRECTION 0
Packit 5c3484
#define CORRECTION_BITS 0
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#ifndef SQR_TOOM8_THRESHOLD
Packit 5c3484
#define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#ifndef SQR_TOOM6_THRESHOLD
Packit 5c3484
#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if TUNE_PROGRAM_BUILD
Packit 5c3484
#define MAYBE_sqr_basecase 1
Packit 5c3484
#define MAYBE_sqr_above_basecase   1
Packit 5c3484
#define MAYBE_sqr_toom2   1
Packit 5c3484
#define MAYBE_sqr_above_toom2   1
Packit 5c3484
#define MAYBE_sqr_toom3   1
Packit 5c3484
#define MAYBE_sqr_above_toom3   1
Packit 5c3484
#define MAYBE_sqr_toom4   1
Packit 5c3484
#define MAYBE_sqr_above_toom4   1
Packit 5c3484
#define MAYBE_sqr_above_toom6   1
Packit 5c3484
#else
Packit 5c3484
#define SQR_TOOM8_MAX					\
Packit 5c3484
  ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (8*2-1+7)) ?	\
Packit 5c3484
   ((SQR_FFT_THRESHOLD+8*2-1+7)/8)			\
Packit 5c3484
   : MP_SIZE_T_MAX )
Packit 5c3484
#define MAYBE_sqr_basecase					\
Packit 5c3484
  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM2_THRESHOLD)
Packit 5c3484
#define MAYBE_sqr_above_basecase				\
Packit 5c3484
  (SQR_TOOM8_MAX >= SQR_TOOM2_THRESHOLD)
Packit 5c3484
#define MAYBE_sqr_toom2						\
Packit 5c3484
  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM3_THRESHOLD)
Packit 5c3484
#define MAYBE_sqr_above_toom2					\
Packit 5c3484
  (SQR_TOOM8_MAX >= SQR_TOOM3_THRESHOLD)
Packit 5c3484
#define MAYBE_sqr_toom3						\
Packit 5c3484
  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM4_THRESHOLD)
Packit 5c3484
#define MAYBE_sqr_above_toom3					\
Packit 5c3484
  (SQR_TOOM8_MAX >= SQR_TOOM4_THRESHOLD)
Packit 5c3484
#define MAYBE_sqr_toom4						\
Packit 5c3484
  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM6_THRESHOLD)
Packit 5c3484
#define MAYBE_sqr_above_toom4					\
Packit 5c3484
  (SQR_TOOM8_MAX >= SQR_TOOM6_THRESHOLD)
Packit 5c3484
#define MAYBE_sqr_above_toom6					\
Packit 5c3484
  (SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#define TOOM8_SQR_REC(p, a, f, p2, a2, n, ws)				\
Packit 5c3484
  do {									\
Packit 5c3484
    if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
Packit 5c3484
	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) {			\
Packit 5c3484
      mpn_sqr_basecase (p, a, n);					\
Packit 5c3484
      if (f) mpn_sqr_basecase (p2, a2, n);				\
Packit 5c3484
    } else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
Packit 5c3484
	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) {		\
Packit 5c3484
      mpn_toom2_sqr (p, a, n, ws);					\
Packit 5c3484
      if (f) mpn_toom2_sqr (p2, a2, n, ws);				\
Packit 5c3484
    } else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
Packit 5c3484
	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) {		\
Packit 5c3484
      mpn_toom3_sqr (p, a, n, ws);					\
Packit 5c3484
      if (f) mpn_toom3_sqr (p2, a2, n, ws);				\
Packit 5c3484
    } else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4		\
Packit 5c3484
	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))) {		\
Packit 5c3484
      mpn_toom4_sqr (p, a, n, ws);					\
Packit 5c3484
      if (f) mpn_toom4_sqr (p2, a2, n, ws);				\
Packit 5c3484
    } else if (! MAYBE_sqr_above_toom6					\
Packit 5c3484
	     || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) {		\
Packit 5c3484
      mpn_toom6_sqr (p, a, n, ws);					\
Packit 5c3484
      if (f) mpn_toom6_sqr (p2, a2, n, ws);				\
Packit 5c3484
    } else {								\
Packit 5c3484
      mpn_toom8_sqr (p, a, n, ws);					\
Packit 5c3484
      if (f) mpn_toom8_sqr (p2, a2, n, ws);				\
Packit 5c3484
    }									\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
mpn_toom8_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
Packit 5c3484
{
Packit 5c3484
  mp_size_t n, s;
Packit 5c3484
Packit 5c3484
  /***************************** decomposition *******************************/
Packit 5c3484
Packit 5c3484
  ASSERT ( an >= 40 );
Packit 5c3484
Packit 5c3484
  n = 1 + ((an - 1)>>3);
Packit 5c3484
Packit 5c3484
  s = an - 7 * n;
Packit 5c3484
Packit 5c3484
  ASSERT (0 < s && s <= n);
Packit 5c3484
  ASSERT ( s + s > 3 );
Packit 5c3484
Packit 5c3484
#define   r6    (pp + 3 * n)			/* 3n+1 */
Packit 5c3484
#define   r4    (pp + 7 * n)			/* 3n+1 */
Packit 5c3484
#define   r2    (pp +11 * n)			/* 3n+1 */
Packit 5c3484
#define   r0    (pp +15 * n)			/* s+t <= 2*n */
Packit 5c3484
#define   r7    (scratch)			/* 3n+1 */
Packit 5c3484
#define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
Packit 5c3484
#define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
Packit 5c3484
#define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
Packit 5c3484
#define   v0    (pp +11 * n)			/* n+1 */
Packit 5c3484
#define   v2    (pp +13 * n+2)			/* n+1 */
Packit 5c3484
#define   wse   (scratch +12 * n + 4)		/* 3n+1 */
Packit 5c3484
Packit 5c3484
  /* Alloc also 3n+1 limbs for ws... toom_interpolate_16pts may
Packit 5c3484
     need all of them, when DO_mpn_sublsh_n usea a scratch  */
Packit 5c3484
/*   if (scratch == NULL) */
Packit 5c3484
/*     scratch = TMP_SALLOC_LIMBS (30 * n + 6); */
Packit 5c3484
Packit 5c3484
  /********************** evaluation and recursive calls *********************/
Packit 5c3484
  /* $\pm1/8$ */
Packit 5c3484
  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp);
Packit 5c3484
  /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
Packit 5c3484
  TOOM8_SQR_REC(pp, v0, 2, r7, v2, n + 1, wse);
Packit 5c3484
  mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0);
Packit 5c3484
Packit 5c3484
  /* $\pm1/4$ */
Packit 5c3484
  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp);
Packit 5c3484
  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
Packit 5c3484
  TOOM8_SQR_REC(pp, v0, 2, r5, v2, n + 1, wse);
Packit 5c3484
  mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0);
Packit 5c3484
Packit 5c3484
  /* $\pm2$ */
Packit 5c3484
  mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp);
Packit 5c3484
  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
Packit 5c3484
  TOOM8_SQR_REC(pp, v0, 2, r3, v2, n + 1, wse);
Packit 5c3484
  mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2);
Packit 5c3484
Packit 5c3484
  /* $\pm8$ */
Packit 5c3484
  mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp);
Packit 5c3484
  /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
Packit 5c3484
  TOOM8_SQR_REC(pp, v0, 2, r1, v2, n + 1, wse);
Packit 5c3484
  mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6);
Packit 5c3484
Packit 5c3484
  /* $\pm1/2$ */
Packit 5c3484
  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp);
Packit 5c3484
  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
Packit 5c3484
  TOOM8_SQR_REC(pp, v0, 2, r6, v2, n + 1, wse);
Packit 5c3484
  mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0);
Packit 5c3484
Packit 5c3484
  /* $\pm1$ */
Packit 5c3484
  mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s,    pp);
Packit 5c3484
  /* A(-1)*B(-1) */ /* A(1)*B(1) */
Packit 5c3484
  TOOM8_SQR_REC(pp, v0, 2, r4, v2, n + 1, wse);
Packit 5c3484
  mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0);
Packit 5c3484
Packit 5c3484
  /* $\pm4$ */
Packit 5c3484
  mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp);
Packit 5c3484
  /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
Packit 5c3484
  TOOM8_SQR_REC(pp, v0, 2, r2, v2, n + 1, wse);
Packit 5c3484
  mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4);
Packit 5c3484
Packit 5c3484
#undef v0
Packit 5c3484
#undef v2
Packit 5c3484
Packit 5c3484
  /* A(0)*B(0) */
Packit 5c3484
  TOOM8_SQR_REC(pp, ap, 0, pp, ap, n, wse);
Packit 5c3484
Packit 5c3484
  mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse);
Packit 5c3484
Packit 5c3484
#undef r0
Packit 5c3484
#undef r1
Packit 5c3484
#undef r2
Packit 5c3484
#undef r3
Packit 5c3484
#undef r4
Packit 5c3484
#undef r5
Packit 5c3484
#undef r6
Packit 5c3484
#undef wse
Packit 5c3484
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#undef TOOM8_SQR_REC
Packit 5c3484
#undef MAYBE_sqr_basecase
Packit 5c3484
#undef MAYBE_sqr_above_basecase
Packit 5c3484
#undef MAYBE_sqr_toom2
Packit 5c3484
#undef MAYBE_sqr_above_toom2
Packit 5c3484
#undef MAYBE_sqr_toom3
Packit 5c3484
#undef MAYBE_sqr_above_toom3
Packit 5c3484
#undef MAYBE_sqr_above_toom4