Blame mpz/n_pow_ui.c

Packit 5c3484
/* mpz_n_pow_ui -- mpn raised to ulong.
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
Packit 5c3484
   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
Packit 5c3484
   FUTURE GNU MP RELEASES.
Packit 5c3484
Packit 5c3484
Copyright 2001, 2002, 2005, 2012 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Change this to "#define TRACE(x) x" for some traces. */
Packit 5c3484
#define TRACE(x)
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Use this to test the mul_2 code on a CPU without a native version of that
Packit 5c3484
   routine.  */
Packit 5c3484
#if 0
Packit 5c3484
#define mpn_mul_2  refmpn_mul_2
Packit 5c3484
#define HAVE_NATIVE_mpn_mul_2  1
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* mpz_pow_ui and mpz_ui_pow_ui want to share almost all of this code.
Packit 5c3484
   ui_pow_ui doesn't need the mpn_mul based powering loop or the tests on
Packit 5c3484
   bsize==2 or >2, but separating that isn't easy because there's shared
Packit 5c3484
   code both before and after (the size calculations and the powers of 2
Packit 5c3484
   handling).
Packit 5c3484
Packit 5c3484
   Alternatives:
Packit 5c3484
Packit 5c3484
   It would work to just use the mpn_mul powering loop for 1 and 2 limb
Packit 5c3484
   bases, but the current separate loop allows mul_1 and mul_2 to be done
Packit 5c3484
   in-place, which might help cache locality a bit.  If mpn_mul was relaxed
Packit 5c3484
   to allow source==dest when vn==1 or 2 then some pointer twiddling might
Packit 5c3484
   let us get the same effect in one loop.
Packit 5c3484
Packit 5c3484
   The initial powering for bsize==1 into blimb or blimb:blimb_low doesn't
Packit 5c3484
   form the biggest possible power of b that fits, only the biggest power of
Packit 5c3484
   2 power, ie. b^(2^n).  It'd be possible to choose a bigger power, perhaps
Packit 5c3484
   using mp_bases[b].big_base for small b, and thereby get better value
Packit 5c3484
   from mpn_mul_1 or mpn_mul_2 in the bignum powering.  It's felt that doing
Packit 5c3484
   so would be more complicated than it's worth, and could well end up being
Packit 5c3484
   a slowdown for small e.  For big e on the other hand the algorithm is
Packit 5c3484
   dominated by mpn_sqr so there wouldn't much of a saving.  The current
Packit 5c3484
   code can be viewed as simply doing the first few steps of the powering in
Packit 5c3484
   a single or double limb where possible.
Packit 5c3484
Packit 5c3484
   If r==b, and blow_twos==0, and r must be realloc'ed, then the temporary
Packit 5c3484
   copy made of b is unnecessary.  We could just use the old alloc'ed block
Packit 5c3484
   and free it at the end.  But arranging this seems like a lot more trouble
Packit 5c3484
   than it's worth.  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* floor(sqrt(GMP_NUMB_MAX)), ie. the biggest value that can be squared in
Packit 5c3484
   a limb without overflowing.
Packit 5c3484
   FIXME: This formula is an underestimate when GMP_NUMB_BITS is odd. */
Packit 5c3484
Packit 5c3484
#define GMP_NUMB_HALFMAX  (((mp_limb_t) 1 << GMP_NUMB_BITS/2) - 1)
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* The following are for convenience, they update the size and check the
Packit 5c3484
   alloc.  */
Packit 5c3484
Packit 5c3484
#define MPN_SQR(dst, alloc, src, size)          \
Packit 5c3484
  do {                                          \
Packit 5c3484
    ASSERT (2*(size) <= (alloc));               \
Packit 5c3484
    mpn_sqr (dst, src, size);                   \
Packit 5c3484
    (size) *= 2;                                \
Packit 5c3484
    (size) -= ((dst)[(size)-1] == 0);           \
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
#define MPN_MUL(dst, alloc, src, size, src2, size2)     \
Packit 5c3484
  do {                                                  \
Packit 5c3484
    mp_limb_t  cy;                                      \
Packit 5c3484
    ASSERT ((size) + (size2) <= (alloc));               \
Packit 5c3484
    cy = mpn_mul (dst, src, size, src2, size2);         \
Packit 5c3484
    (size) += (size2) - (cy == 0);                      \
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
#define MPN_MUL_2(ptr, size, alloc, mult)       \
Packit 5c3484
  do {                                          \
Packit 5c3484
    mp_limb_t  cy;                              \
Packit 5c3484
    ASSERT ((size)+2 <= (alloc));               \
Packit 5c3484
    cy = mpn_mul_2 (ptr, ptr, size, mult);      \
Packit 5c3484
    (size)++;                                   \
Packit 5c3484
    (ptr)[(size)] = cy;                         \
Packit 5c3484
    (size) += (cy != 0);                        \
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
#define MPN_MUL_1(ptr, size, alloc, limb)       \
Packit 5c3484
  do {                                          \
Packit 5c3484
    mp_limb_t  cy;                              \
Packit 5c3484
    ASSERT ((size)+1 <= (alloc));               \
Packit 5c3484
    cy = mpn_mul_1 (ptr, ptr, size, limb);      \
Packit 5c3484
    (ptr)[size] = cy;                           \
Packit 5c3484
    (size) += (cy != 0);                        \
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
#define MPN_LSHIFT(ptr, size, alloc, shift)     \
Packit 5c3484
  do {                                          \
Packit 5c3484
    mp_limb_t  cy;                              \
Packit 5c3484
    ASSERT ((size)+1 <= (alloc));               \
Packit 5c3484
    cy = mpn_lshift (ptr, ptr, size, shift);    \
Packit 5c3484
    (ptr)[size] = cy;                           \
Packit 5c3484
    (size) += (cy != 0);                        \
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
#define MPN_RSHIFT_OR_COPY(dst, src, size, shift)       \
Packit 5c3484
  do {                                                  \
Packit 5c3484
    if ((shift) == 0)                                   \
Packit 5c3484
      MPN_COPY (dst, src, size);                        \
Packit 5c3484
    else                                                \
Packit 5c3484
      {                                                 \
Packit 5c3484
        mpn_rshift (dst, src, size, shift);             \
Packit 5c3484
        (size) -= ((dst)[(size)-1] == 0);               \
Packit 5c3484
      }                                                 \
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* ralloc and talloc are only wanted for ASSERTs, after the initial space
Packit 5c3484
   allocations.  Avoid writing values to them in a normal build, to ensure
Packit 5c3484
   the compiler lets them go dead.  gcc already figures this out itself
Packit 5c3484
   actually.  */
Packit 5c3484
Packit 5c3484
#define SWAP_RP_TP                                      \
Packit 5c3484
  do {                                                  \
Packit 5c3484
    MP_PTR_SWAP (rp, tp);                               \
Packit 5c3484
    ASSERT_CODE (MP_SIZE_T_SWAP (ralloc, talloc));      \
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e)
Packit 5c3484
{
Packit 5c3484
  mp_ptr         rp;
Packit 5c3484
  mp_size_t      rtwos_limbs, ralloc, rsize;
Packit 5c3484
  int            rneg, i, cnt, btwos, r_bp_overlap;
Packit 5c3484
  mp_limb_t      blimb, rl;
Packit 5c3484
  mp_bitcnt_t    rtwos_bits;
Packit 5c3484
#if HAVE_NATIVE_mpn_mul_2
Packit 5c3484
  mp_limb_t      blimb_low, rl_high;
Packit 5c3484
#else
Packit 5c3484
  mp_limb_t      b_twolimbs[2];
Packit 5c3484
#endif
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  TRACE (printf ("mpz_n_pow_ui rp=0x%lX bp=0x%lX bsize=%ld e=%lu (0x%lX)\n",
Packit 5c3484
		 PTR(r), bp, bsize, e, e);
Packit 5c3484
	 mpn_trace ("b", bp, bsize));
Packit 5c3484
Packit 5c3484
  ASSERT (bsize == 0 || bp[ABS(bsize)-1] != 0);
Packit 5c3484
  ASSERT (MPN_SAME_OR_SEPARATE2_P (PTR(r), ALLOC(r), bp, ABS(bsize)));
Packit 5c3484
Packit 5c3484
  /* b^0 == 1, including 0^0 == 1 */
Packit 5c3484
  if (e == 0)
Packit 5c3484
    {
Packit 5c3484
      PTR(r)[0] = 1;
Packit 5c3484
      SIZ(r) = 1;
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* 0^e == 0 apart from 0^0 above */
Packit 5c3484
  if (bsize == 0)
Packit 5c3484
    {
Packit 5c3484
      SIZ(r) = 0;
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* Sign of the final result. */
Packit 5c3484
  rneg = (bsize < 0 && (e & 1) != 0);
Packit 5c3484
  bsize = ABS (bsize);
Packit 5c3484
  TRACE (printf ("rneg %d\n", rneg));
Packit 5c3484
Packit 5c3484
  r_bp_overlap = (PTR(r) == bp);
Packit 5c3484
Packit 5c3484
  /* Strip low zero limbs from b. */
Packit 5c3484
  rtwos_limbs = 0;
Packit 5c3484
  for (blimb = *bp; blimb == 0; blimb = *++bp)
Packit 5c3484
    {
Packit 5c3484
      rtwos_limbs += e;
Packit 5c3484
      bsize--; ASSERT (bsize >= 1);
Packit 5c3484
    }
Packit 5c3484
  TRACE (printf ("trailing zero rtwos_limbs=%ld\n", rtwos_limbs));
Packit 5c3484
Packit 5c3484
  /* Strip low zero bits from b. */
Packit 5c3484
  count_trailing_zeros (btwos, blimb);
Packit 5c3484
  blimb >>= btwos;
Packit 5c3484
  rtwos_bits = e * btwos;
Packit 5c3484
  rtwos_limbs += rtwos_bits / GMP_NUMB_BITS;
Packit 5c3484
  rtwos_bits %= GMP_NUMB_BITS;
Packit 5c3484
  TRACE (printf ("trailing zero btwos=%d rtwos_limbs=%ld rtwos_bits=%lu\n",
Packit 5c3484
		 btwos, rtwos_limbs, rtwos_bits));
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
Packit 5c3484
  rl = 1;
Packit 5c3484
#if HAVE_NATIVE_mpn_mul_2
Packit 5c3484
  rl_high = 0;
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  if (bsize == 1)
Packit 5c3484
    {
Packit 5c3484
    bsize_1:
Packit 5c3484
      /* Power up as far as possible within blimb.  We start here with e!=0,
Packit 5c3484
	 but if e is small then we might reach e==0 and the whole b^e in rl.
Packit 5c3484
	 Notice this code works when blimb==1 too, reaching e==0.  */
Packit 5c3484
Packit 5c3484
      while (blimb <= GMP_NUMB_HALFMAX)
Packit 5c3484
	{
Packit 5c3484
	  TRACE (printf ("small e=0x%lX blimb=0x%lX rl=0x%lX\n",
Packit 5c3484
			 e, blimb, rl));
Packit 5c3484
	  ASSERT (e != 0);
Packit 5c3484
	  if ((e & 1) != 0)
Packit 5c3484
	    rl *= blimb;
Packit 5c3484
	  e >>= 1;
Packit 5c3484
	  if (e == 0)
Packit 5c3484
	    goto got_rl;
Packit 5c3484
	  blimb *= blimb;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
#if HAVE_NATIVE_mpn_mul_2
Packit 5c3484
      TRACE (printf ("single power, e=0x%lX b=0x%lX rl=0x%lX\n",
Packit 5c3484
		     e, blimb, rl));
Packit 5c3484
Packit 5c3484
      /* Can power b once more into blimb:blimb_low */
Packit 5c3484
      bsize = 2;
Packit 5c3484
      ASSERT (e != 0);
Packit 5c3484
      if ((e & 1) != 0)
Packit 5c3484
	{
Packit 5c3484
	  umul_ppmm (rl_high, rl, rl, blimb << GMP_NAIL_BITS);
Packit 5c3484
	  rl >>= GMP_NAIL_BITS;
Packit 5c3484
	}
Packit 5c3484
      e >>= 1;
Packit 5c3484
      umul_ppmm (blimb, blimb_low, blimb, blimb << GMP_NAIL_BITS);
Packit 5c3484
      blimb_low >>= GMP_NAIL_BITS;
Packit 5c3484
Packit 5c3484
    got_rl:
Packit 5c3484
      TRACE (printf ("double power e=0x%lX blimb=0x%lX:0x%lX rl=0x%lX:%lX\n",
Packit 5c3484
		     e, blimb, blimb_low, rl_high, rl));
Packit 5c3484
Packit 5c3484
      /* Combine left-over rtwos_bits into rl_high:rl to be handled by the
Packit 5c3484
	 final mul_1 or mul_2 rather than a separate lshift.
Packit 5c3484
	 - rl_high:rl mustn't be 1 (since then there's no final mul)
Packit 5c3484
	 - rl_high mustn't overflow
Packit 5c3484
	 - rl_high mustn't change to non-zero, since mul_1+lshift is
Packit 5c3484
	 probably faster than mul_2 (FIXME: is this true?)  */
Packit 5c3484
Packit 5c3484
      if (rtwos_bits != 0
Packit 5c3484
	  && ! (rl_high == 0 && rl == 1)
Packit 5c3484
	  && (rl_high >> (GMP_NUMB_BITS-rtwos_bits)) == 0)
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t  new_rl_high = (rl_high << rtwos_bits)
Packit 5c3484
	    | (rl >> (GMP_NUMB_BITS-rtwos_bits));
Packit 5c3484
	  if (! (rl_high == 0 && new_rl_high != 0))
Packit 5c3484
	    {
Packit 5c3484
	      rl_high = new_rl_high;
Packit 5c3484
	      rl <<= rtwos_bits;
Packit 5c3484
	      rtwos_bits = 0;
Packit 5c3484
	      TRACE (printf ("merged rtwos_bits, rl=0x%lX:%lX\n",
Packit 5c3484
			     rl_high, rl));
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
#else
Packit 5c3484
    got_rl:
Packit 5c3484
      TRACE (printf ("small power e=0x%lX blimb=0x%lX rl=0x%lX\n",
Packit 5c3484
		     e, blimb, rl));
Packit 5c3484
Packit 5c3484
      /* Combine left-over rtwos_bits into rl to be handled by the final
Packit 5c3484
	 mul_1 rather than a separate lshift.
Packit 5c3484
	 - rl mustn't be 1 (since then there's no final mul)
Packit 5c3484
	 - rl mustn't overflow	*/
Packit 5c3484
Packit 5c3484
      if (rtwos_bits != 0
Packit 5c3484
	  && rl != 1
Packit 5c3484
	  && (rl >> (GMP_NUMB_BITS-rtwos_bits)) == 0)
Packit 5c3484
	{
Packit 5c3484
	  rl <<= rtwos_bits;
Packit 5c3484
	  rtwos_bits = 0;
Packit 5c3484
	  TRACE (printf ("merged rtwos_bits, rl=0x%lX\n", rl));
Packit 5c3484
	}
Packit 5c3484
#endif
Packit 5c3484
    }
Packit 5c3484
  else if (bsize == 2)
Packit 5c3484
    {
Packit 5c3484
      mp_limb_t  bsecond = bp[1];
Packit 5c3484
      if (btwos != 0)
Packit 5c3484
	blimb |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK;
Packit 5c3484
      bsecond >>= btwos;
Packit 5c3484
      if (bsecond == 0)
Packit 5c3484
	{
Packit 5c3484
	  /* Two limbs became one after rshift. */
Packit 5c3484
	  bsize = 1;
Packit 5c3484
	  goto bsize_1;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      TRACE (printf ("bsize==2 using b=0x%lX:%lX", bsecond, blimb));
Packit 5c3484
#if HAVE_NATIVE_mpn_mul_2
Packit 5c3484
      blimb_low = blimb;
Packit 5c3484
#else
Packit 5c3484
      bp = b_twolimbs;
Packit 5c3484
      b_twolimbs[0] = blimb;
Packit 5c3484
      b_twolimbs[1] = bsecond;
Packit 5c3484
#endif
Packit 5c3484
      blimb = bsecond;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      if (r_bp_overlap || btwos != 0)
Packit 5c3484
	{
Packit 5c3484
	  mp_ptr tp = TMP_ALLOC_LIMBS (bsize);
Packit 5c3484
	  MPN_RSHIFT_OR_COPY (tp, bp, bsize, btwos);
Packit 5c3484
	  bp = tp;
Packit 5c3484
	  TRACE (printf ("rshift or copy bp,bsize, new bsize=%ld\n", bsize));
Packit 5c3484
	}
Packit 5c3484
#if HAVE_NATIVE_mpn_mul_2
Packit 5c3484
      /* in case 3 limbs rshift to 2 and hence use the mul_2 loop below */
Packit 5c3484
      blimb_low = bp[0];
Packit 5c3484
#endif
Packit 5c3484
      blimb = bp[bsize-1];
Packit 5c3484
Packit 5c3484
      TRACE (printf ("big bsize=%ld  ", bsize);
Packit 5c3484
	     mpn_trace ("b", bp, bsize));
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* At this point blimb is the most significant limb of the base to use.
Packit 5c3484
Packit 5c3484
     Each factor of b takes (bsize*BPML-cnt) bits and there's e of them; +1
Packit 5c3484
     limb to round up the division; +1 for multiplies all using an extra
Packit 5c3484
     limb over the true size; +2 for rl at the end; +1 for lshift at the
Packit 5c3484
     end.
Packit 5c3484
Packit 5c3484
     The size calculation here is reasonably accurate.  The base is at least
Packit 5c3484
     half a limb, so in 32 bits the worst case is 2^16+1 treated as 17 bits
Packit 5c3484
     when it will power up as just over 16, an overestimate of 17/16 =
Packit 5c3484
     6.25%.  For a 64-bit limb it's half that.
Packit 5c3484
Packit 5c3484
     If e==0 then blimb won't be anything useful (though it will be
Packit 5c3484
     non-zero), but that doesn't matter since we just end up with ralloc==5,
Packit 5c3484
     and that's fine for 2 limbs of rl and 1 of lshift.  */
Packit 5c3484
Packit 5c3484
  ASSERT (blimb != 0);
Packit 5c3484
  count_leading_zeros (cnt, blimb);
Packit 5c3484
  ralloc = (bsize*GMP_NUMB_BITS - cnt + GMP_NAIL_BITS) * e / GMP_NUMB_BITS + 5;
Packit 5c3484
  TRACE (printf ("ralloc %ld, from bsize=%ld blimb=0x%lX cnt=%d\n",
Packit 5c3484
		 ralloc, bsize, blimb, cnt));
Packit 5c3484
  rp = MPZ_REALLOC (r, ralloc + rtwos_limbs);
Packit 5c3484
Packit 5c3484
  /* Low zero limbs resulting from powers of 2. */
Packit 5c3484
  MPN_ZERO (rp, rtwos_limbs);
Packit 5c3484
  rp += rtwos_limbs;
Packit 5c3484
Packit 5c3484
  if (e == 0)
Packit 5c3484
    {
Packit 5c3484
      /* Any e==0 other than via bsize==1 or bsize==2 is covered at the
Packit 5c3484
	 start. */
Packit 5c3484
      rp[0] = rl;
Packit 5c3484
      rsize = 1;
Packit 5c3484
#if HAVE_NATIVE_mpn_mul_2
Packit 5c3484
      rp[1] = rl_high;
Packit 5c3484
      rsize += (rl_high != 0);
Packit 5c3484
#endif
Packit 5c3484
      ASSERT (rp[rsize-1] != 0);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      mp_ptr     tp;
Packit 5c3484
      mp_size_t  talloc;
Packit 5c3484
Packit 5c3484
      /* In the mpn_mul_1 or mpn_mul_2 loops or in the mpn_mul loop when the
Packit 5c3484
	 low bit of e is zero, tp only has to hold the second last power
Packit 5c3484
	 step, which is half the size of the final result.  There's no need
Packit 5c3484
	 to round up the divide by 2, since ralloc includes a +2 for rl
Packit 5c3484
	 which not needed by tp.  In the mpn_mul loop when the low bit of e
Packit 5c3484
	 is 1, tp must hold nearly the full result, so just size it the same
Packit 5c3484
	 as rp.  */
Packit 5c3484
Packit 5c3484
      talloc = ralloc;
Packit 5c3484
#if HAVE_NATIVE_mpn_mul_2
Packit 5c3484
      if (bsize <= 2 || (e & 1) == 0)
Packit 5c3484
	talloc /= 2;
Packit 5c3484
#else
Packit 5c3484
      if (bsize <= 1 || (e & 1) == 0)
Packit 5c3484
	talloc /= 2;
Packit 5c3484
#endif
Packit 5c3484
      TRACE (printf ("talloc %ld\n", talloc));
Packit 5c3484
      tp = TMP_ALLOC_LIMBS (talloc);
Packit 5c3484
Packit 5c3484
      /* Go from high to low over the bits of e, starting with i pointing at
Packit 5c3484
	 the bit below the highest 1 (which will mean i==-1 if e==1).  */
Packit 5c3484
      count_leading_zeros (cnt, (mp_limb_t) e);
Packit 5c3484
      i = GMP_LIMB_BITS - cnt - 2;
Packit 5c3484
Packit 5c3484
#if HAVE_NATIVE_mpn_mul_2
Packit 5c3484
      if (bsize <= 2)
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t  mult[2];
Packit 5c3484
Packit 5c3484
	  /* Any bsize==1 will have been powered above to be two limbs. */
Packit 5c3484
	  ASSERT (bsize == 2);
Packit 5c3484
	  ASSERT (blimb != 0);
Packit 5c3484
Packit 5c3484
	  /* Arrange the final result ends up in r, not in the temp space */
Packit 5c3484
	  if ((i & 1) == 0)
Packit 5c3484
	    SWAP_RP_TP;
Packit 5c3484
Packit 5c3484
	  rp[0] = blimb_low;
Packit 5c3484
	  rp[1] = blimb;
Packit 5c3484
	  rsize = 2;
Packit 5c3484
Packit 5c3484
	  mult[0] = blimb_low;
Packit 5c3484
	  mult[1] = blimb;
Packit 5c3484
Packit 5c3484
	  for ( ; i >= 0; i--)
Packit 5c3484
	    {
Packit 5c3484
	      TRACE (printf ("mul_2 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
Packit 5c3484
			     i, e, rsize, ralloc, talloc);
Packit 5c3484
		     mpn_trace ("r", rp, rsize));
Packit 5c3484
Packit 5c3484
	      MPN_SQR (tp, talloc, rp, rsize);
Packit 5c3484
	      SWAP_RP_TP;
Packit 5c3484
	      if ((e & (1L << i)) != 0)
Packit 5c3484
		MPN_MUL_2 (rp, rsize, ralloc, mult);
Packit 5c3484
	    }
Packit 5c3484
Packit 5c3484
	  TRACE (mpn_trace ("mul_2 before rl, r", rp, rsize));
Packit 5c3484
	  if (rl_high != 0)
Packit 5c3484
	    {
Packit 5c3484
	      mult[0] = rl;
Packit 5c3484
	      mult[1] = rl_high;
Packit 5c3484
	      MPN_MUL_2 (rp, rsize, ralloc, mult);
Packit 5c3484
	    }
Packit 5c3484
	  else if (rl != 1)
Packit 5c3484
	    MPN_MUL_1 (rp, rsize, ralloc, rl);
Packit 5c3484
	}
Packit 5c3484
#else
Packit 5c3484
      if (bsize == 1)
Packit 5c3484
	{
Packit 5c3484
	  /* Arrange the final result ends up in r, not in the temp space */
Packit 5c3484
	  if ((i & 1) == 0)
Packit 5c3484
	    SWAP_RP_TP;
Packit 5c3484
Packit 5c3484
	  rp[0] = blimb;
Packit 5c3484
	  rsize = 1;
Packit 5c3484
Packit 5c3484
	  for ( ; i >= 0; i--)
Packit 5c3484
	    {
Packit 5c3484
	      TRACE (printf ("mul_1 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
Packit 5c3484
			     i, e, rsize, ralloc, talloc);
Packit 5c3484
		     mpn_trace ("r", rp, rsize));
Packit 5c3484
Packit 5c3484
	      MPN_SQR (tp, talloc, rp, rsize);
Packit 5c3484
	      SWAP_RP_TP;
Packit 5c3484
	      if ((e & (1L << i)) != 0)
Packit 5c3484
		MPN_MUL_1 (rp, rsize, ralloc, blimb);
Packit 5c3484
	    }
Packit 5c3484
Packit 5c3484
	  TRACE (mpn_trace ("mul_1 before rl, r", rp, rsize));
Packit 5c3484
	  if (rl != 1)
Packit 5c3484
	    MPN_MUL_1 (rp, rsize, ralloc, rl);
Packit 5c3484
	}
Packit 5c3484
#endif
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  int  parity;
Packit 5c3484
Packit 5c3484
	  /* Arrange the final result ends up in r, not in the temp space */
Packit 5c3484
	  ULONG_PARITY (parity, e);
Packit 5c3484
	  if (((parity ^ i) & 1) != 0)
Packit 5c3484
	    SWAP_RP_TP;
Packit 5c3484
Packit 5c3484
	  MPN_COPY (rp, bp, bsize);
Packit 5c3484
	  rsize = bsize;
Packit 5c3484
Packit 5c3484
	  for ( ; i >= 0; i--)
Packit 5c3484
	    {
Packit 5c3484
	      TRACE (printf ("mul loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
Packit 5c3484
			     i, e, rsize, ralloc, talloc);
Packit 5c3484
		     mpn_trace ("r", rp, rsize));
Packit 5c3484
Packit 5c3484
	      MPN_SQR (tp, talloc, rp, rsize);
Packit 5c3484
	      SWAP_RP_TP;
Packit 5c3484
	      if ((e & (1L << i)) != 0)
Packit 5c3484
		{
Packit 5c3484
		  MPN_MUL (tp, talloc, rp, rsize, bp, bsize);
Packit 5c3484
		  SWAP_RP_TP;
Packit 5c3484
		}
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  ASSERT (rp == PTR(r) + rtwos_limbs);
Packit 5c3484
  TRACE (mpn_trace ("end loop r", rp, rsize));
Packit 5c3484
  TMP_FREE;
Packit 5c3484
Packit 5c3484
  /* Apply any partial limb factors of 2. */
Packit 5c3484
  if (rtwos_bits != 0)
Packit 5c3484
    {
Packit 5c3484
      MPN_LSHIFT (rp, rsize, ralloc, (unsigned) rtwos_bits);
Packit 5c3484
      TRACE (mpn_trace ("lshift r", rp, rsize));
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  rsize += rtwos_limbs;
Packit 5c3484
  SIZ(r) = (rneg ? -rsize : rsize);
Packit 5c3484
}