Blame mpn/generic/rootrem.c

Packit 5c3484
/* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and
Packit 5c3484
   store the truncated integer part at rootp and the remainder at remp.
Packit 5c3484
Packit 5c3484
   Contributed by Paul Zimmermann (algorithm) and
Packit 5c3484
   Paul Zimmermann and Torbjorn Granlund (implementation).
Packit 5c3484
   Marco Bodrato wrote logbased_root to seed the loop. 
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES.  IT'S
Packit 5c3484
   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT'S ALMOST
Packit 5c3484
   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2002, 2005, 2009-2012, 2015 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
/* FIXME:
Packit 5c3484
     This implementation is not optimal when remp == NULL, since the complexity
Packit 5c3484
     is M(n), whereas it should be M(n/k) on average.
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
#include <stdio.h>		/* for NULL */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
Packit 5c3484
				       mp_limb_t, int);
Packit 5c3484
Packit 5c3484
#define MPN_RSHIFT(rp,up,un,cnt) \
Packit 5c3484
  do {									\
Packit 5c3484
    if ((cnt) != 0)							\
Packit 5c3484
      mpn_rshift (rp, up, un, cnt);					\
Packit 5c3484
    else								\
Packit 5c3484
      {									\
Packit 5c3484
	MPN_COPY_INCR (rp, up, un);					\
Packit 5c3484
      }									\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
#define MPN_LSHIFT(cy,rp,up,un,cnt) \
Packit 5c3484
  do {									\
Packit 5c3484
    if ((cnt) != 0)							\
Packit 5c3484
      cy = mpn_lshift (rp, up, un, cnt);				\
Packit 5c3484
    else								\
Packit 5c3484
      {									\
Packit 5c3484
	MPN_COPY_DECR (rp, up, un);					\
Packit 5c3484
	cy = 0;								\
Packit 5c3484
      }									\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero.
Packit 5c3484
   If remp <> NULL, put in {remp, un} the remainder.
Packit 5c3484
   Return the size (in limbs) of the remainder if remp <> NULL,
Packit 5c3484
	  or a non-zero value iff the remainder is non-zero when remp = NULL.
Packit 5c3484
   Assumes:
Packit 5c3484
   (a) up[un-1] is not zero
Packit 5c3484
   (b) rootp has at least space for ceil(un/k) limbs
Packit 5c3484
   (c) remp has at least space for un limbs (in case remp <> NULL)
Packit 5c3484
   (d) the operands do not overlap.
Packit 5c3484
Packit 5c3484
   The auxiliary memory usage is 3*un+2 if remp = NULL,
Packit 5c3484
   and 2*un+2 if remp <> NULL.  FIXME: This is an incorrect comment.
Packit 5c3484
*/
Packit 5c3484
mp_size_t
Packit 5c3484
mpn_rootrem (mp_ptr rootp, mp_ptr remp,
Packit 5c3484
	     mp_srcptr up, mp_size_t un, mp_limb_t k)
Packit 5c3484
{
Packit 5c3484
  ASSERT (un > 0);
Packit 5c3484
  ASSERT (up[un - 1] != 0);
Packit 5c3484
  ASSERT (k > 1);
Packit 5c3484
Packit 5c3484
  if (UNLIKELY (k == 2))
Packit 5c3484
    return mpn_sqrtrem (rootp, remp, up, un);
Packit 5c3484
  /* (un-1)/k > 2 <=> un > 3k <=> (un + 2)/3 > k */
Packit 5c3484
  if (remp == NULL && (un + 2) / 3 > k)
Packit 5c3484
    /* Pad {up,un} with k zero limbs.  This will produce an approximate root
Packit 5c3484
       with one more limb, allowing us to compute the exact integral result. */
Packit 5c3484
    {
Packit 5c3484
      mp_ptr sp, wp;
Packit 5c3484
      mp_size_t rn, sn, wn;
Packit 5c3484
      TMP_DECL;
Packit 5c3484
      TMP_MARK;
Packit 5c3484
      wn = un + k;
Packit 5c3484
      sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
Packit 5c3484
      TMP_ALLOC_LIMBS_2 (wp, wn, /* will contain the padded input */
Packit 5c3484
			 sp, sn); /* approximate root of padded input */
Packit 5c3484
      MPN_COPY (wp + k, up, un);
Packit 5c3484
      MPN_FILL (wp, k, 0);
Packit 5c3484
      rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
Packit 5c3484
      /* The approximate root S = {sp,sn} is either the correct root of
Packit 5c3484
	 {sp,sn}, or 1 too large.  Thus unless the least significant limb of
Packit 5c3484
	 S is 0 or 1, we can deduce the root of {up,un} is S truncated by one
Packit 5c3484
	 limb.  (In case sp[0]=1, we can deduce the root, but not decide
Packit 5c3484
	 whether it is exact or not.) */
Packit 5c3484
      MPN_COPY (rootp, sp + 1, sn - 1);
Packit 5c3484
      TMP_FREE;
Packit 5c3484
      return rn;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
Packit 5c3484
    }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#define LOGROOT_USED_BITS 8
Packit 5c3484
#define LOGROOT_NEEDS_TWO_CORRECTIONS 1
Packit 5c3484
#define LOGROOT_RETURNED_BITS (LOGROOT_USED_BITS + LOGROOT_NEEDS_TWO_CORRECTIONS)
Packit 5c3484
/* Puts in *rootp some bits of the k^nt root of the number
Packit 5c3484
   2^bitn * 1.op ; where op represents the "fractional" bits.
Packit 5c3484
Packit 5c3484
   The returned value is the number of bits of the root minus one;
Packit 5c3484
   i.e. an approximation of the root will be
Packit 5c3484
   (*rootp) * 2^(retval-LOGROOT_RETURNED_BITS+1).
Packit 5c3484
Packit 5c3484
   Currently, only LOGROOT_USED_BITS bits of op are used (the implicit
Packit 5c3484
   one is not counted).
Packit 5c3484
 */
Packit 5c3484
static unsigned
Packit 5c3484
logbased_root (mp_ptr rootp, mp_limb_t op, mp_bitcnt_t bitn, mp_limb_t k)
Packit 5c3484
{
Packit 5c3484
  /* vlog=vector(256,i,floor((log(256+i)/log(2)-8)*256)-(i>255)) */
Packit 5c3484
  static const
Packit 5c3484
  unsigned char vlog[] = {1,   2,   4,   5,   7,   8,   9,  11,  12,  14,  15,  16,  18,  19,  21,  22,
Packit 5c3484
			 23,  25,  26,  27,  29,  30,  31,  33,  34,  35,  37,  38,  39,  40,  42,  43,
Packit 5c3484
			 44,  46,  47,  48,  49,  51,  52,  53,  54,  56,  57,  58,  59,  61,  62,  63,
Packit 5c3484
			 64,  65,  67,  68,  69,  70,  71,  73,  74,  75,  76,  77,  78,  80,  81,  82,
Packit 5c3484
			 83,  84,  85,  87,  88,  89,  90,  91,  92,  93,  94,  96,  97,  98,  99, 100,
Packit 5c3484
			101, 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
Packit 5c3484
			118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 131, 132, 133, 134,
Packit 5c3484
			135, 136, 137, 138, 139, 140, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
Packit 5c3484
			150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 162, 163, 164,
Packit 5c3484
			165, 166, 167, 168, 169, 170, 171, 172, 173, 173, 174, 175, 176, 177, 178, 179,
Packit 5c3484
			180, 181, 181, 182, 183, 184, 185, 186, 187, 188, 188, 189, 190, 191, 192, 193,
Packit 5c3484
			194, 194, 195, 196, 197, 198, 199, 200, 200, 201, 202, 203, 204, 205, 205, 206,
Packit 5c3484
			207, 208, 209, 209, 210, 211, 212, 213, 214, 214, 215, 216, 217, 218, 218, 219,
Packit 5c3484
			220, 221, 222, 222, 223, 224, 225, 225, 226, 227, 228, 229, 229, 230, 231, 232,
Packit 5c3484
			232, 233, 234, 235, 235, 236, 237, 238, 239, 239, 240, 241, 242, 242, 243, 244,
Packit 5c3484
			245, 245, 246, 247, 247, 248, 249, 250, 250, 251, 252, 253, 253, 254, 255, 255};
Packit 5c3484
Packit 5c3484
  /* vexp=vector(256,i,floor(2^(8+i/256)-256)-(i>255)) */
Packit 5c3484
  static const
Packit 5c3484
  unsigned char vexp[] = {0,   1,   2,   2,   3,   4,   4,   5,   6,   7,   7,   8,   9,   9,  10,  11,
Packit 5c3484
			 12,  12,  13,  14,  14,  15,  16,  17,  17,  18,  19,  20,  20,  21,  22,  23,
Packit 5c3484
			 23,  24,  25,  26,  26,  27,  28,  29,  30,  30,  31,  32,  33,  33,  34,  35,
Packit 5c3484
			 36,  37,  37,  38,  39,  40,  41,  41,  42,  43,  44,  45,  45,  46,  47,  48,
Packit 5c3484
			 49,  50,  50,  51,  52,  53,  54,  55,  55,  56,  57,  58,  59,  60,  61,  61,
Packit 5c3484
			 62,  63,  64,  65,  66,  67,  67,  68,  69,  70,  71,  72,  73,  74,  75,  75,
Packit 5c3484
			 76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  86,  87,  88,  89,  90,
Packit 5c3484
			 91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
Packit 5c3484
			107, 108, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 119, 120, 121, 122,
Packit 5c3484
			123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138,
Packit 5c3484
			139, 140, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 154, 155, 156,
Packit 5c3484
			157, 158, 159, 160, 161, 163, 164, 165, 166, 167, 168, 169, 171, 172, 173, 174,
Packit 5c3484
			175, 176, 178, 179, 180, 181, 182, 183, 185, 186, 187, 188, 189, 191, 192, 193,
Packit 5c3484
			194, 196, 197, 198, 199, 200, 202, 203, 204, 205, 207, 208, 209, 210, 212, 213,
Packit 5c3484
			214, 216, 217, 218, 219, 221, 222, 223, 225, 226, 227, 229, 230, 231, 232, 234,
Packit 5c3484
			235, 236, 238, 239, 240, 242, 243, 245, 246, 247, 249, 250, 251, 253, 254, 255};
Packit 5c3484
  mp_bitcnt_t retval;
Packit 5c3484
Packit 5c3484
  if (UNLIKELY (bitn > (~ (mp_bitcnt_t) 0) >> LOGROOT_USED_BITS))
Packit 5c3484
    {
Packit 5c3484
      /* In the unlikely case, we use two divisions and a modulo. */
Packit 5c3484
      retval = bitn / k;
Packit 5c3484
      bitn %= k;
Packit 5c3484
      bitn = (bitn << LOGROOT_USED_BITS |
Packit 5c3484
	      vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      bitn = (bitn << LOGROOT_USED_BITS |
Packit 5c3484
	      vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
Packit 5c3484
      retval = bitn >> LOGROOT_USED_BITS;
Packit 5c3484
      bitn &= (CNST_LIMB (1) << LOGROOT_USED_BITS) - 1;
Packit 5c3484
    }
Packit 5c3484
  ASSERT(bitn < CNST_LIMB (1) << LOGROOT_USED_BITS);
Packit 5c3484
  *rootp = CNST_LIMB(1) << (LOGROOT_USED_BITS - ! LOGROOT_NEEDS_TWO_CORRECTIONS)
Packit 5c3484
    | vexp[bitn] >> ! LOGROOT_NEEDS_TWO_CORRECTIONS;
Packit 5c3484
  return retval;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* if approx is non-zero, does not compute the final remainder */
Packit 5c3484
static mp_size_t
Packit 5c3484
mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
Packit 5c3484
		      mp_limb_t k, int approx)
Packit 5c3484
{
Packit 5c3484
  mp_ptr qp, rp, sp, wp, scratch;
Packit 5c3484
  mp_size_t qn, rn, sn, wn, nl, bn;
Packit 5c3484
  mp_limb_t save, save2, cy, uh;
Packit 5c3484
  mp_bitcnt_t unb; /* number of significant bits of {up,un} */
Packit 5c3484
  mp_bitcnt_t xnb; /* number of significant bits of the result */
Packit 5c3484
  mp_bitcnt_t b, kk;
Packit 5c3484
  mp_bitcnt_t sizes[GMP_NUMB_BITS + 1];
Packit 5c3484
  int ni;
Packit 5c3484
  int perf_pow;
Packit 5c3484
  unsigned ulz, snb, c, logk;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  /* MPN_SIZEINBASE_2EXP(unb, up, un, 1); --unb; */
Packit 5c3484
  uh = up[un - 1];
Packit 5c3484
  count_leading_zeros (ulz, uh);
Packit 5c3484
  ulz = ulz - GMP_NAIL_BITS + 1; /* Ignore the first 1. */
Packit 5c3484
  unb = (mp_bitcnt_t) un * GMP_NUMB_BITS - ulz;
Packit 5c3484
  /* unb is the (truncated) logarithm of the input U in base 2*/
Packit 5c3484
Packit 5c3484
  if (unb < k) /* root is 1 */
Packit 5c3484
    {
Packit 5c3484
      rootp[0] = 1;
Packit 5c3484
      if (remp == NULL)
Packit 5c3484
	un -= (*up == CNST_LIMB (1)); /* Non-zero iif {up,un} > 1 */
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  mpn_sub_1 (remp, up, un, CNST_LIMB (1));
Packit 5c3484
	  un -= (remp [un - 1] == 0);	/* There should be at most one zero limb,
Packit 5c3484
				   if we demand u to be normalized  */
Packit 5c3484
	}
Packit 5c3484
      return un;
Packit 5c3484
    }
Packit 5c3484
  /* if (unb - k < k/2 + k/16) // root is 2 */
Packit 5c3484
Packit 5c3484
  if (ulz == GMP_NUMB_BITS)
Packit 5c3484
    uh = up[un - 2];
Packit 5c3484
  else
Packit 5c3484
    uh = (uh << ulz & GMP_NUMB_MASK) | up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz);
Packit 5c3484
  ASSERT (un != 1 || up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz) == 1);
Packit 5c3484
Packit 5c3484
  xnb = logbased_root (rootp, uh, unb, k);
Packit 5c3484
  snb = LOGROOT_RETURNED_BITS - 1;
Packit 5c3484
  /* xnb+1 is the number of bits of the root R */
Packit 5c3484
  /* snb+1 is the number of bits of the current approximation S */
Packit 5c3484
Packit 5c3484
  kk = k * xnb;		/* number of truncated bits in the input */
Packit 5c3484
Packit 5c3484
  /* FIXME: Should we skip the next two loops when xnb <= snb ? */
Packit 5c3484
  for (uh = (k - 1) / 2, logk = 3; (uh >>= 1) != 0; ++logk )
Packit 5c3484
    ;
Packit 5c3484
  /* logk = ceil(log(k)/log(2)) + 1 */
Packit 5c3484
Packit 5c3484
  /* xnb is the number of remaining bits to determine in the kth root */
Packit 5c3484
  for (ni = 0; (sizes[ni] = xnb) > snb; ++ni)
Packit 5c3484
    {
Packit 5c3484
      /* invariant: here we want xnb+1 total bits for the kth root */
Packit 5c3484
Packit 5c3484
      /* if c is the new value of xnb, this means that we'll go from a
Packit 5c3484
	 root of c+1 bits (say s') to a root of xnb+1 bits.
Packit 5c3484
	 It is proved in the book "Modern Computer Arithmetic" by Brent
Packit 5c3484
	 and Zimmermann, Chapter 1, that
Packit 5c3484
	 if s' >= k*beta, then at most one correction is necessary.
Packit 5c3484
	 Here beta = 2^(xnb-c), and s' >= 2^c, thus it suffices that
Packit 5c3484
	 c >= ceil((xnb + log2(k))/2). */
Packit 5c3484
      if (xnb > logk)
Packit 5c3484
	xnb = (xnb + logk) / 2;
Packit 5c3484
      else
Packit 5c3484
	--xnb;	/* add just one bit at a time */
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  *rootp >>= snb - xnb;
Packit 5c3484
  kk -= xnb;
Packit 5c3484
Packit 5c3484
  ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
Packit 5c3484
  /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
Packit 5c3484
     sizes[i] <= 2 * sizes[i+1].
Packit 5c3484
     Newton iteration will first compute sizes[ni-1] extra bits,
Packit 5c3484
     then sizes[ni-2], ..., then sizes[0] = b. */
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
  /* qp and wp need enough space to store S'^k where S' is an approximate
Packit 5c3484
     root. Since S' can be as large as S+2, the worst case is when S=2 and
Packit 5c3484
     S'=4. But then since we know the number of bits of S in advance, S'
Packit 5c3484
     can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
Packit 5c3484
     So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
Packit 5c3484
     fits in un limbs, the number of extra limbs needed is bounded by
Packit 5c3484
     ceil(k*log2(3/2)/GMP_NUMB_BITS). */
Packit 5c3484
  /* THINK: with the use of logbased_root, maybe the constant is
Packit 5c3484
     258/256 instead of 3/2 ? log2(258/256) < 1/89 < 1/64 */
Packit 5c3484
#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
Packit 5c3484
  TMP_ALLOC_LIMBS_3 (scratch, un + 1, /* used by mpn_div_q */
Packit 5c3484
		     qp, un + EXTRA,  /* will contain quotient and remainder
Packit 5c3484
					 of R/(k*S^(k-1)), and S^k */
Packit 5c3484
		     wp, un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
Packit 5c3484
					 and temporary for mpn_pow_1 */
Packit 5c3484
Packit 5c3484
  if (remp == NULL)
Packit 5c3484
    rp = scratch;	/* will contain the remainder */
Packit 5c3484
  else
Packit 5c3484
    rp = remp;
Packit 5c3484
  sp = rootp;
Packit 5c3484
Packit 5c3484
  sn = 1;		/* Initial approximation has one limb */
Packit 5c3484
Packit 5c3484
  for (b = xnb; ni != 0; --ni)
Packit 5c3484
    {
Packit 5c3484
      /* 1: loop invariant:
Packit 5c3484
	 {sp, sn} is the current approximation of the root, which has
Packit 5c3484
		  exactly 1 + sizes[ni] bits.
Packit 5c3484
	 {rp, rn} is the current remainder
Packit 5c3484
	 {wp, wn} = {sp, sn}^(k-1)
Packit 5c3484
	 kk = number of truncated bits of the input
Packit 5c3484
      */
Packit 5c3484
Packit 5c3484
      /* Since each iteration treats b bits from the root and thus k*b bits
Packit 5c3484
	 from the input, and we already considered b bits from the input,
Packit 5c3484
	 we now have to take another (k-1)*b bits from the input. */
Packit 5c3484
      kk -= (k - 1) * b; /* remaining input bits */
Packit 5c3484
      /* {rp, rn} = floor({up, un} / 2^kk) */
Packit 5c3484
      rn = un - kk / GMP_NUMB_BITS;
Packit 5c3484
      MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
Packit 5c3484
      rn -= rp[rn - 1] == 0;
Packit 5c3484
Packit 5c3484
      /* 9: current buffers: {sp,sn}, {rp,rn} */
Packit 5c3484
Packit 5c3484
      for (c = 0;; c++)
Packit 5c3484
	{
Packit 5c3484
	  /* Compute S^k in {qp,qn}. */
Packit 5c3484
	  /* W <- S^(k-1) for the next iteration,
Packit 5c3484
	     and S^k = W * S. */
Packit 5c3484
	  wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
Packit 5c3484
	  mpn_mul (qp, wp, wn, sp, sn);
Packit 5c3484
	  qn = wn + sn;
Packit 5c3484
	  qn -= qp[qn - 1] == 0;
Packit 5c3484
Packit 5c3484
	  perf_pow = 1;
Packit 5c3484
	  /* if S^k > floor(U/2^kk), the root approximation was too large */
Packit 5c3484
	  if (qn > rn || (qn == rn && (perf_pow=mpn_cmp (qp, rp, rn)) > 0))
Packit 5c3484
	    MPN_DECR_U (sp, sn, 1);
Packit 5c3484
	  else
Packit 5c3484
	    break;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
Packit 5c3484
Packit 5c3484
      /* sometimes two corrections are needed with logbased_root*/
Packit 5c3484
      ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
Packit 5c3484
      ASSERT_ALWAYS (rn >= qn);
Packit 5c3484
Packit 5c3484
      b = sizes[ni - 1] - sizes[ni]; /* number of bits to compute in the
Packit 5c3484
				      next iteration */
Packit 5c3484
      bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[], after shift */
Packit 5c3484
Packit 5c3484
      kk = kk - b;
Packit 5c3484
      /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */
Packit 5c3484
      nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS);
Packit 5c3484
      /* nl  = 1 + floor((kk + b - 1) / GMP_NUMB_BITS)
Packit 5c3484
		 - floor(kk / GMP_NUMB_BITS)
Packit 5c3484
	     <= 1 + (kk + b - 1) / GMP_NUMB_BITS
Packit 5c3484
		  - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS
Packit 5c3484
	     = 2 + (b - 2) / GMP_NUMB_BITS
Packit 5c3484
	 thus since nl is an integer:
Packit 5c3484
	 nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */
Packit 5c3484
Packit 5c3484
      /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
Packit 5c3484
Packit 5c3484
      /* R = R - Q = floor(U/2^kk) - S^k */
Packit 5c3484
      if (perf_pow != 0)
Packit 5c3484
	{
Packit 5c3484
	  mpn_sub (rp, rp, rn, qp, qn);
Packit 5c3484
	  MPN_NORMALIZE_NOT_ZERO (rp, rn);
Packit 5c3484
Packit 5c3484
	  /* first multiply the remainder by 2^b */
Packit 5c3484
	  MPN_LSHIFT (cy, rp + bn, rp, rn, b % GMP_NUMB_BITS);
Packit 5c3484
	  rn = rn + bn;
Packit 5c3484
	  if (cy != 0)
Packit 5c3484
	    {
Packit 5c3484
	      rp[rn] = cy;
Packit 5c3484
	      rn++;
Packit 5c3484
	    }
Packit 5c3484
Packit 5c3484
	  save = rp[bn];
Packit 5c3484
	  /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */
Packit 5c3484
	  if (nl - 1 > bn)
Packit 5c3484
	    save2 = rp[bn + 1];
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  rn = bn;
Packit 5c3484
	  save2 = save = 0;
Packit 5c3484
	}
Packit 5c3484
      /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
Packit 5c3484
Packit 5c3484
      /* Now insert bits [kk,kk+b-1] from the input U */
Packit 5c3484
      MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS);
Packit 5c3484
      /* set to zero high bits of rp[bn] */
Packit 5c3484
      rp[bn] &= (CNST_LIMB (1) << (b % GMP_NUMB_BITS)) - 1;
Packit 5c3484
      /* restore corresponding bits */
Packit 5c3484
      rp[bn] |= save;
Packit 5c3484
      if (nl - 1 > bn)
Packit 5c3484
	rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since
Packit 5c3484
			       they start by bit 0 in rp[0], so they use
Packit 5c3484
			       at most ceil(b/GMP_NUMB_BITS) limbs */
Packit 5c3484
      /* FIXME: Should we normalise {rp,rn} here ?*/
Packit 5c3484
Packit 5c3484
      /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
Packit 5c3484
Packit 5c3484
      /* compute {wp, wn} = k * {sp, sn}^(k-1) */
Packit 5c3484
      cy = mpn_mul_1 (wp, wp, wn, k);
Packit 5c3484
      wp[wn] = cy;
Packit 5c3484
      wn += cy != 0;
Packit 5c3484
Packit 5c3484
      /* 6: current buffers: {sp,sn}, {qp,qn} */
Packit 5c3484
Packit 5c3484
      /* multiply the root approximation by 2^b */
Packit 5c3484
      MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);
Packit 5c3484
      sn = sn + b / GMP_NUMB_BITS;
Packit 5c3484
      if (cy != 0)
Packit 5c3484
	{
Packit 5c3484
	  sp[sn] = cy;
Packit 5c3484
	  sn++;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      save = sp[b / GMP_NUMB_BITS];
Packit 5c3484
Packit 5c3484
      /* Number of limbs used by b bits, when least significant bit is
Packit 5c3484
	 aligned to least limb */
Packit 5c3484
      bn = (b - 1) / GMP_NUMB_BITS + 1;
Packit 5c3484
Packit 5c3484
      /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
Packit 5c3484
Packit 5c3484
      /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
Packit 5c3484
      if (UNLIKELY (rn < wn))
Packit 5c3484
	{
Packit 5c3484
	  MPN_FILL (sp, bn, 0);
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  qn = rn - wn; /* expected quotient size */
Packit 5c3484
	  if (qn <= bn) { /* Divide only if result is not too big. */
Packit 5c3484
	    mpn_div_q (qp, rp, rn, wp, wn, scratch);
Packit 5c3484
	    qn += qp[qn] != 0;
Packit 5c3484
	  }
Packit 5c3484
Packit 5c3484
      /* 5: current buffers: {sp,sn}, {qp,qn}.
Packit 5c3484
	 Note: {rp,rn} is not needed any more since we'll compute it from
Packit 5c3484
	 scratch at the end of the loop.
Packit 5c3484
       */
Packit 5c3484
Packit 5c3484
      /* the quotient should be smaller than 2^b, since the previous
Packit 5c3484
	 approximation was correctly rounded toward zero */
Packit 5c3484
	  if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
Packit 5c3484
			  qp[qn - 1] >= (CNST_LIMB (1) << (b % GMP_NUMB_BITS))))
Packit 5c3484
	    {
Packit 5c3484
	      for (qn = 1; qn < bn; ++qn)
Packit 5c3484
		sp[qn - 1] = GMP_NUMB_MAX;
Packit 5c3484
	      sp[qn - 1] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - 1 - ((b - 1) % GMP_NUMB_BITS));
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
      /* 7: current buffers: {sp,sn}, {qp,qn} */
Packit 5c3484
Packit 5c3484
      /* Combine sB and q to form sB + q.  */
Packit 5c3484
	      MPN_COPY (sp, qp, qn);
Packit 5c3484
	      MPN_ZERO (sp + qn, bn - qn);
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
      sp[b / GMP_NUMB_BITS] |= save;
Packit 5c3484
Packit 5c3484
      /* 8: current buffer: {sp,sn} */
Packit 5c3484
Packit 5c3484
    };
Packit 5c3484
Packit 5c3484
  /* otherwise we have rn > 0, thus the return value is ok */
Packit 5c3484
  if (!approx || sp[0] <= CNST_LIMB (1))
Packit 5c3484
    {
Packit 5c3484
      for (c = 0;; c++)
Packit 5c3484
	{
Packit 5c3484
	  /* Compute S^k in {qp,qn}. */
Packit 5c3484
	  /* Last iteration: we don't need W anymore. */
Packit 5c3484
	  /* mpn_pow_1 requires that both qp and wp have enough
Packit 5c3484
	     space to store the result {sp,sn}^k + 1 limb */
Packit 5c3484
	  qn = mpn_pow_1 (qp, sp, sn, k, wp);
Packit 5c3484
Packit 5c3484
	  perf_pow = 1;
Packit 5c3484
	  if (qn > un || (qn == un && (perf_pow=mpn_cmp (qp, up, un)) > 0))
Packit 5c3484
	    MPN_DECR_U (sp, sn, 1);
Packit 5c3484
	  else
Packit 5c3484
	    break;
Packit 5c3484
	};
Packit 5c3484
Packit 5c3484
      /* sometimes two corrections are needed with logbased_root*/
Packit 5c3484
      ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
Packit 5c3484
Packit 5c3484
      rn = perf_pow != 0;
Packit 5c3484
      if (rn != 0 && remp != NULL)
Packit 5c3484
	{
Packit 5c3484
	  mpn_sub (remp, up, un, qp, qn);
Packit 5c3484
	  rn = un;
Packit 5c3484
	  MPN_NORMALIZE_NOT_ZERO (remp, rn);
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  TMP_FREE;
Packit 5c3484
  return rn;
Packit 5c3484
}