Blame mpn/generic/powm.c

Packit 5c3484
/* mpn_powm -- Compute R = U^E mod M.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Torbjorn Granlund.
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2007-2012 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
/*
Packit 5c3484
  BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
Packit 5c3484
Packit 5c3484
  1. W <- U
Packit 5c3484
Packit 5c3484
  2. T <- (B^n * U) mod M                Convert to REDC form
Packit 5c3484
Packit 5c3484
  3. Compute table U^1, U^3, U^5... of E-dependent size
Packit 5c3484
Packit 5c3484
  4. While there are more bits in E
Packit 5c3484
       W <- power left-to-right base-k
Packit 5c3484
Packit 5c3484
Packit 5c3484
  TODO:
Packit 5c3484
Packit 5c3484
   * Make getbits a macro, thereby allowing it to update the index operand.
Packit 5c3484
     That will simplify the code using getbits.  (Perhaps make getbits' sibling
Packit 5c3484
     getbit then have similar form, for symmetry.)
Packit 5c3484
Packit 5c3484
   * Write an itch function.  Or perhaps get rid of tp parameter since the huge
Packit 5c3484
     pp area is allocated locally anyway?
Packit 5c3484
Packit 5c3484
   * Choose window size without looping.  (Superoptimize or think(tm).)
Packit 5c3484
Packit 5c3484
   * Handle small bases with initial, reduction-free exponentiation.
Packit 5c3484
Packit 5c3484
   * Call new division functions, not mpn_tdiv_qr.
Packit 5c3484
Packit 5c3484
   * Consider special code for one-limb M.
Packit 5c3484
Packit 5c3484
   * How should we handle the redc1/redc2/redc_n choice?
Packit 5c3484
     - redc1:  T(binvert_1limb)  + e * (n)   * (T(mullo-1x1) + n*T(addmul_1))
Packit 5c3484
     - redc2:  T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
Packit 5c3484
     - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
Packit 5c3484
     This disregards the addmul_N constant term, but we could think of
Packit 5c3484
     that as part of the respective mullo.
Packit 5c3484
Packit 5c3484
   * When U (the base) is small, we should start the exponentiation with plain
Packit 5c3484
     operations, then convert that partial result to REDC form.
Packit 5c3484
Packit 5c3484
   * When U is just one limb, should it be handled without the k-ary tricks?
Packit 5c3484
     We could keep a factor of B^n in W, but use U' = BU as base.  After
Packit 5c3484
     multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
Packit 5c3484
     mod M.
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
#undef MPN_REDC_1
Packit 5c3484
#define MPN_REDC_1(rp, up, mp, n, invm)					\
Packit 5c3484
  do {									\
Packit 5c3484
    mp_limb_t cy;							\
Packit 5c3484
    cy = mpn_redc_1 (rp, up, mp, n, invm);				\
Packit 5c3484
    if (cy != 0)							\
Packit 5c3484
      mpn_sub_n (rp, rp, mp, n);					\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
#undef MPN_REDC_2
Packit 5c3484
#define MPN_REDC_2(rp, up, mp, n, mip)					\
Packit 5c3484
  do {									\
Packit 5c3484
    mp_limb_t cy;							\
Packit 5c3484
    cy = mpn_redc_2 (rp, up, mp, n, mip);				\
Packit 5c3484
    if (cy != 0)							\
Packit 5c3484
      mpn_sub_n (rp, rp, mp, n);					\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
Packit 5c3484
#define WANT_REDC_2 1
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#define getbit(p,bi) \
Packit 5c3484
  ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
Packit 5c3484
Packit 5c3484
static inline mp_limb_t
Packit 5c3484
getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
Packit 5c3484
{
Packit 5c3484
  int nbits_in_r;
Packit 5c3484
  mp_limb_t r;
Packit 5c3484
  mp_size_t i;
Packit 5c3484
Packit 5c3484
  if (bi < nbits)
Packit 5c3484
    {
Packit 5c3484
      return p[0] & (((mp_limb_t) 1 << bi) - 1);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      bi -= nbits;			/* bit index of low bit to extract */
Packit 5c3484
      i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
Packit 5c3484
      bi %= GMP_NUMB_BITS;		/* bit index in low word */
Packit 5c3484
      r = p[i] >> bi;			/* extract (low) bits */
Packit 5c3484
      nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
Packit 5c3484
      if (nbits_in_r < nbits)		/* did we get enough bits? */
Packit 5c3484
	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
Packit 5c3484
      return r & (((mp_limb_t ) 1 << nbits) - 1);
Packit 5c3484
    }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
static inline int
Packit 5c3484
win_size (mp_bitcnt_t eb)
Packit 5c3484
{
Packit 5c3484
  int k;
Packit 5c3484
  static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
Packit 5c3484
  for (k = 1; eb > x[k]; k++)
Packit 5c3484
    ;
Packit 5c3484
  return k;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* Convert U to REDC form, U_r = B^n * U mod M */
Packit 5c3484
static void
Packit 5c3484
redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
Packit 5c3484
{
Packit 5c3484
  mp_ptr tp, qp;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
  TMP_MARK;
Packit 5c3484
Packit 5c3484
  TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1);
Packit 5c3484
Packit 5c3484
  MPN_ZERO (tp, n);
Packit 5c3484
  MPN_COPY (tp + n, up, un);
Packit 5c3484
  mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
Packit 5c3484
  TMP_FREE;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
Packit 5c3484
   Requires that mp[n-1..0] is odd.
Packit 5c3484
   Requires that ep[en-1..0] is > 1.
Packit 5c3484
   Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
Packit 5c3484
void
Packit 5c3484
mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
Packit 5c3484
	  mp_srcptr ep, mp_size_t en,
Packit 5c3484
	  mp_srcptr mp, mp_size_t n, mp_ptr tp)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t ip[2], *mip;
Packit 5c3484
  int cnt;
Packit 5c3484
  mp_bitcnt_t ebi;
Packit 5c3484
  int windowsize, this_windowsize;
Packit 5c3484
  mp_limb_t expbits;
Packit 5c3484
  mp_ptr pp, this_pp;
Packit 5c3484
  long i;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
Packit 5c3484
  ASSERT (n >= 1 && ((mp[0] & 1) != 0));
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
Packit 5c3484
  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
Packit 5c3484
Packit 5c3484
#if 0
Packit 5c3484
  if (bn < n)
Packit 5c3484
    {
Packit 5c3484
      /* Do the first few exponent bits without mod reductions,
Packit 5c3484
	 until the result is greater than the mod argument.  */
Packit 5c3484
      for (;;)
Packit 5c3484
	{
Packit 5c3484
	  mpn_sqr (tp, this_pp, tn);
Packit 5c3484
	  tn = tn * 2 - 1,  tn += tp[tn] != 0;
Packit 5c3484
	  if (getbit (ep, ebi) != 0)
Packit 5c3484
	    mpn_mul (..., tp, tn, bp, bn);
Packit 5c3484
	  ebi--;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  windowsize = win_size (ebi);
Packit 5c3484
Packit 5c3484
#if WANT_REDC_2
Packit 5c3484
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      mip = ip;
Packit 5c3484
      binvert_limb (mip[0], mp[0]);
Packit 5c3484
      mip[0] = -mip[0];
Packit 5c3484
    }
Packit 5c3484
  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      mip = ip;
Packit 5c3484
      mpn_binvert (mip, mp, 2, tp);
Packit 5c3484
      mip[0] = -mip[0]; mip[1] = ~mip[1];
Packit 5c3484
    }
Packit 5c3484
#else
Packit 5c3484
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      mip = ip;
Packit 5c3484
      binvert_limb (mip[0], mp[0]);
Packit 5c3484
      mip[0] = -mip[0];
Packit 5c3484
    }
Packit 5c3484
#endif
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      mip = TMP_ALLOC_LIMBS (n);
Packit 5c3484
      mpn_binvert (mip, mp, n, tp);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
Packit 5c3484
Packit 5c3484
  this_pp = pp;
Packit 5c3484
  redcify (this_pp, bp, bn, mp, n);
Packit 5c3484
Packit 5c3484
  /* Store b^2 at rp.  */
Packit 5c3484
  mpn_sqr (tp, this_pp, n);
Packit 5c3484
#if WANT_REDC_2
Packit 5c3484
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
Packit 5c3484
    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
Packit 5c3484
  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
Packit 5c3484
    MPN_REDC_2 (rp, tp, mp, n, mip);
Packit 5c3484
#else
Packit 5c3484
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
Packit 5c3484
    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
Packit 5c3484
#endif
Packit 5c3484
  else
Packit 5c3484
    mpn_redc_n (rp, tp, mp, n, mip);
Packit 5c3484
Packit 5c3484
  /* Precompute odd powers of b and put them in the temporary area at pp.  */
Packit 5c3484
  for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
Packit 5c3484
    {
Packit 5c3484
      mpn_mul_n (tp, this_pp, rp, n);
Packit 5c3484
      this_pp += n;
Packit 5c3484
#if WANT_REDC_2
Packit 5c3484
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
Packit 5c3484
	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
Packit 5c3484
      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
Packit 5c3484
	MPN_REDC_2 (this_pp, tp, mp, n, mip);
Packit 5c3484
#else
Packit 5c3484
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
Packit 5c3484
	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
Packit 5c3484
#endif
Packit 5c3484
      else
Packit 5c3484
	mpn_redc_n (this_pp, tp, mp, n, mip);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  expbits = getbits (ep, ebi, windowsize);
Packit 5c3484
  if (ebi < windowsize)
Packit 5c3484
    ebi = 0;
Packit 5c3484
  else
Packit 5c3484
    ebi -= windowsize;
Packit 5c3484
Packit 5c3484
  count_trailing_zeros (cnt, expbits);
Packit 5c3484
  ebi += cnt;
Packit 5c3484
  expbits >>= cnt;
Packit 5c3484
Packit 5c3484
  MPN_COPY (rp, pp + n * (expbits >> 1), n);
Packit 5c3484
Packit 5c3484
#define INNERLOOP							\
Packit 5c3484
  while (ebi != 0)							\
Packit 5c3484
    {									\
Packit 5c3484
      while (getbit (ep, ebi) == 0)					\
Packit 5c3484
	{								\
Packit 5c3484
	  MPN_SQR (tp, rp, n);						\
Packit 5c3484
	  MPN_REDUCE (rp, tp, mp, n, mip);				\
Packit 5c3484
	  ebi--;							\
Packit 5c3484
	  if (ebi == 0)							\
Packit 5c3484
	    goto done;							\
Packit 5c3484
	}								\
Packit 5c3484
									\
Packit 5c3484
      /* The next bit of the exponent is 1.  Now extract the largest	\
Packit 5c3484
	 block of bits <= windowsize, and such that the least		\
Packit 5c3484
	 significant bit is 1.  */					\
Packit 5c3484
									\
Packit 5c3484
      expbits = getbits (ep, ebi, windowsize);				\
Packit 5c3484
      this_windowsize = windowsize;					\
Packit 5c3484
      if (ebi < windowsize)						\
Packit 5c3484
	{								\
Packit 5c3484
	  this_windowsize -= windowsize - ebi;				\
Packit 5c3484
	  ebi = 0;							\
Packit 5c3484
	}								\
Packit 5c3484
      else								\
Packit 5c3484
        ebi -= windowsize;						\
Packit 5c3484
									\
Packit 5c3484
      count_trailing_zeros (cnt, expbits);				\
Packit 5c3484
      this_windowsize -= cnt;						\
Packit 5c3484
      ebi += cnt;							\
Packit 5c3484
      expbits >>= cnt;							\
Packit 5c3484
									\
Packit 5c3484
      do								\
Packit 5c3484
	{								\
Packit 5c3484
	  MPN_SQR (tp, rp, n);						\
Packit 5c3484
	  MPN_REDUCE (rp, tp, mp, n, mip);				\
Packit 5c3484
	  this_windowsize--;						\
Packit 5c3484
	}								\
Packit 5c3484
      while (this_windowsize != 0);					\
Packit 5c3484
									\
Packit 5c3484
      MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n);			\
Packit 5c3484
      MPN_REDUCE (rp, tp, mp, n, mip);					\
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
Packit 5c3484
#if WANT_REDC_2
Packit 5c3484
  if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
Packit 5c3484
    {
Packit 5c3484
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
Packit 5c3484
	{
Packit 5c3484
	  if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
Packit 5c3484
	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
Packit 5c3484
	    {
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
Packit 5c3484
	      INNERLOOP;
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
Packit 5c3484
	      INNERLOOP;
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
Packit 5c3484
	{
Packit 5c3484
	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
Packit 5c3484
	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
Packit 5c3484
	    {
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
Packit 5c3484
	      INNERLOOP;
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
Packit 5c3484
	      INNERLOOP;
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
Packit 5c3484
	{
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
Packit 5c3484
	  INNERLOOP;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
Packit 5c3484
	  INNERLOOP;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
Packit 5c3484
	{
Packit 5c3484
	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
Packit 5c3484
	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
Packit 5c3484
	    {
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
Packit 5c3484
	      INNERLOOP;
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
Packit 5c3484
	      INNERLOOP;
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
Packit 5c3484
	{
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
Packit 5c3484
	  INNERLOOP;
Packit 5c3484
	}
Packit 5c3484
      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
Packit 5c3484
	{
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
Packit 5c3484
	  INNERLOOP;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
Packit 5c3484
	  INNERLOOP;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
#else  /* WANT_REDC_2 */
Packit 5c3484
Packit 5c3484
  if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
Packit 5c3484
    {
Packit 5c3484
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
Packit 5c3484
	{
Packit 5c3484
	  if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
Packit 5c3484
	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
Packit 5c3484
	    {
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
Packit 5c3484
	      INNERLOOP;
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
Packit 5c3484
	      INNERLOOP;
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
Packit 5c3484
	{
Packit 5c3484
	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
Packit 5c3484
	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
Packit 5c3484
	    {
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
Packit 5c3484
	      INNERLOOP;
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
Packit 5c3484
	      INNERLOOP;
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
Packit 5c3484
	  INNERLOOP;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
Packit 5c3484
	{
Packit 5c3484
	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
Packit 5c3484
	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
Packit 5c3484
	    {
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
Packit 5c3484
	      INNERLOOP;
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
Packit 5c3484
	      INNERLOOP;
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
Packit 5c3484
	{
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
Packit 5c3484
	  INNERLOOP;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
Packit 5c3484
	  INNERLOOP;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
#endif  /* WANT_REDC_2 */
Packit 5c3484
Packit 5c3484
 done:
Packit 5c3484
Packit 5c3484
  MPN_COPY (tp, rp, n);
Packit 5c3484
  MPN_ZERO (tp + n, n);
Packit 5c3484
Packit 5c3484
#if WANT_REDC_2
Packit 5c3484
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
Packit 5c3484
    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
Packit 5c3484
  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
Packit 5c3484
    MPN_REDC_2 (rp, tp, mp, n, mip);
Packit 5c3484
#else
Packit 5c3484
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
Packit 5c3484
    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
Packit 5c3484
#endif
Packit 5c3484
  else
Packit 5c3484
    mpn_redc_n (rp, tp, mp, n, mip);
Packit 5c3484
Packit 5c3484
  if (mpn_cmp (rp, mp, n) >= 0)
Packit 5c3484
    mpn_sub_n (rp, rp, mp, n);
Packit 5c3484
Packit 5c3484
  TMP_FREE;
Packit 5c3484
}