Blame mpn/generic/broot.c

Packit 5c3484
/* mpn_broot -- Compute hensel sqrt
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Niels Möller
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2012 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
Packit 5c3484
/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
Packit 5c3484
   typical use will have e small. */
Packit 5c3484
static mp_limb_t
Packit 5c3484
powlimb (mp_limb_t a, mp_limb_t e)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t r = 1;
Packit 5c3484
  mp_limb_t s = a;
Packit 5c3484
Packit 5c3484
  for (r = 1, s = a; e > 0; e >>= 1, s *= s)
Packit 5c3484
    if (e & 1)
Packit 5c3484
      r *= s;
Packit 5c3484
Packit 5c3484
  return r;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
Packit 5c3484
Packit 5c3484
   Iterates
Packit 5c3484
Packit 5c3484
     r' <-- r - r * (a^{k-1} r^k - 1) / n
Packit 5c3484
Packit 5c3484
   If
Packit 5c3484
Packit 5c3484
     a^{k-1} r^k = 1 (mod 2^m),
Packit 5c3484
Packit 5c3484
   then
Packit 5c3484
Packit 5c3484
     a^{k-1} r'^k = 1 (mod 2^{2m}),
Packit 5c3484
Packit 5c3484
   Compute the update term as
Packit 5c3484
Packit 5c3484
     r' = r - (a^{k-1} r^{k+1} - r) / k
Packit 5c3484
Packit 5c3484
   where we still have cancellation of low limbs.
Packit 5c3484
Packit 5c3484
 */
Packit 5c3484
void
Packit 5c3484
mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
Packit 5c3484
{
Packit 5c3484
  mp_size_t sizes[GMP_LIMB_BITS * 2];
Packit 5c3484
  mp_ptr akm1, tp, rnp, ep;
Packit 5c3484
  mp_limb_t a0, r0, km1, kp1h, kinv;
Packit 5c3484
  mp_size_t rn;
Packit 5c3484
  unsigned i;
Packit 5c3484
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  ASSERT (n > 0);
Packit 5c3484
  ASSERT (ap[0] & 1);
Packit 5c3484
  ASSERT (k & 1);
Packit 5c3484
  ASSERT (k >= 3);
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
Packit 5c3484
  akm1 = TMP_ALLOC_LIMBS (4*n);
Packit 5c3484
  tp = akm1 + n;
Packit 5c3484
Packit 5c3484
  km1 = k-1;
Packit 5c3484
  /* FIXME: Could arrange the iteration so we don't need to compute
Packit 5c3484
     this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
Packit 5c3484
     that we can use wraparound also for a*r, since the low half is
Packit 5c3484
     unchanged from the previous iteration. Or possibly mulmid. Also,
Packit 5c3484
     a r = a^{1/k}, so we get that value too, for free? */
Packit 5c3484
  mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
Packit 5c3484
Packit 5c3484
  a0 = ap[0];
Packit 5c3484
  binvert_limb (kinv, k);
Packit 5c3484
Packit 5c3484
  /* 4 bits: a^{1/k - 1} (mod 16):
Packit 5c3484
Packit 5c3484
	a % 8
Packit 5c3484
	1 3 5 7
Packit 5c3484
   k%4 +-------
Packit 5c3484
     1 |1 1 1 1
Packit 5c3484
     3 |1 9 9 1
Packit 5c3484
  */
Packit 5c3484
  r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
Packit 5c3484
  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
Packit 5c3484
  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
Packit 5c3484
  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
Packit 5c3484
#if GMP_NUMB_BITS > 32
Packit 5c3484
  {
Packit 5c3484
    unsigned prec = 32;
Packit 5c3484
    do
Packit 5c3484
      {
Packit 5c3484
	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
Packit 5c3484
	prec *= 2;
Packit 5c3484
      }
Packit 5c3484
    while (prec < GMP_NUMB_BITS);
Packit 5c3484
  }
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  rp[0] = r0;
Packit 5c3484
  if (n == 1)
Packit 5c3484
    {
Packit 5c3484
      TMP_FREE;
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
Packit 5c3484
  kp1h = k/2 + 1;
Packit 5c3484
Packit 5c3484
  /* FIXME: Special case for two limb iteration. */
Packit 5c3484
  rnp = TMP_ALLOC_LIMBS (2*n + 1);
Packit 5c3484
  ep = rnp + n;
Packit 5c3484
Packit 5c3484
  /* FIXME: Possible to this on the fly with some bit fiddling. */
Packit 5c3484
  for (i = 0; n > 1; n = (n + 1)/2)
Packit 5c3484
    sizes[i++] = n;
Packit 5c3484
Packit 5c3484
  rn = 1;
Packit 5c3484
Packit 5c3484
  while (i-- > 0)
Packit 5c3484
    {
Packit 5c3484
      /* Compute x^{k+1}. */
Packit 5c3484
      mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
Packit 5c3484
			       final iteration. */
Packit 5c3484
      mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
Packit 5c3484
Packit 5c3484
      /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */
Packit 5c3484
Packit 5c3484
      mpn_mullo_n (ep, rnp, akm1, sizes[i]);
Packit 5c3484
      ASSERT (mpn_cmp (ep, rp, rn) == 0);
Packit 5c3484
Packit 5c3484
      ASSERT (sizes[i] <= 2*rn);
Packit 5c3484
      mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
Packit 5c3484
      mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
Packit 5c3484
      rn = sizes[i];
Packit 5c3484
    }
Packit 5c3484
  TMP_FREE;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
Packit 5c3484
void
Packit 5c3484
mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
Packit 5c3484
{
Packit 5c3484
  mp_ptr tp;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  ASSERT (n > 0);
Packit 5c3484
  ASSERT (ap[0] & 1);
Packit 5c3484
  ASSERT (k & 1);
Packit 5c3484
Packit 5c3484
  if (k == 1)
Packit 5c3484
    {
Packit 5c3484
      MPN_COPY (rp, ap, n);
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
  tp = TMP_ALLOC_LIMBS (n);
Packit 5c3484
Packit 5c3484
  mpn_broot_invm1 (tp, ap, n, k);
Packit 5c3484
  mpn_mullo_n (rp, tp, ap, n);
Packit 5c3484
Packit 5c3484
  TMP_FREE;
Packit 5c3484
}