Blame mpn/generic/brootinv.c

Packit 5c3484
/* mpn_brootinv, compute r such that r^k * y = 1 (mod 2^b).
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Martin Boij (as part of perfpow.c).
Packit 5c3484
Packit 5c3484
Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
Packit 5c3484
/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
Packit 5c3484
   typical use will have e small. */
Packit 5c3484
static mp_limb_t
Packit 5c3484
powlimb (mp_limb_t a, mp_limb_t e)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t r;
Packit 5c3484
Packit 5c3484
  for (r = 1; e > 0; e >>= 1, a *= a)
Packit 5c3484
    if (e & 1)
Packit 5c3484
      r *= a;
Packit 5c3484
Packit 5c3484
  return r;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* Compute r such that r^k * y = 1 (mod B^n).
Packit 5c3484
Packit 5c3484
   Iterates
Packit 5c3484
     r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b)
Packit 5c3484
   using Hensel lifting, each time doubling the number of known bits in r.
Packit 5c3484
Packit 5c3484
   Works just for odd k.  Else the Hensel lifting degenerates.
Packit 5c3484
Packit 5c3484
   FIXME:
Packit 5c3484
Packit 5c3484
     (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows).
Packit 5c3484
Packit 5c3484
     (2) Rewrite iteration as
Packit 5c3484
	   r' <-- r - k^{-1} r (r^k y - 1)
Packit 5c3484
	 and take advantage of the zero low part of r^k y - 1.
Packit 5c3484
Packit 5c3484
     (3) Use wrap-around trick.
Packit 5c3484
Packit 5c3484
     (4) Use a small table to get starting value.
Packit 5c3484
Packit 5c3484
   Scratch need: 5*bn, where bn = ceil (bnb / GMP_NUMB_BITS).
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp)
Packit 5c3484
{
Packit 5c3484
  mp_ptr tp2, tp3;
Packit 5c3484
  mp_limb_t kinv, k2, r0, y0;
Packit 5c3484
  mp_size_t order[GMP_LIMB_BITS + 1];
Packit 5c3484
  int i, d;
Packit 5c3484
Packit 5c3484
  ASSERT (bn > 0);
Packit 5c3484
  ASSERT ((k & 1) != 0);
Packit 5c3484
Packit 5c3484
  tp2 = tp + bn;
Packit 5c3484
  tp3 = tp + 2 * bn;
Packit 5c3484
  k2 = k + 1;
Packit 5c3484
Packit 5c3484
  binvert_limb (kinv, k);
Packit 5c3484
Packit 5c3484
  /* 4-bit initial approximation:
Packit 5c3484
Packit 5c3484
   y%16 | 1  3  5  7  9 11 13 15,
Packit 5c3484
    k%4 +-------------------------+k2%4
Packit 5c3484
     1  | 1 11 13  7  9  3  5 15  |  2
Packit 5c3484
     3  | 1  3  5  7  9 11 13 15  |  0
Packit 5c3484
Packit 5c3484
  */
Packit 5c3484
  y0 = yp[0];
Packit 5c3484
Packit 5c3484
  r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & (k2 << 2) & 8);		/* 4 bits */
Packit 5c3484
  r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7f));		/* 8 bits */
Packit 5c3484
  r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7fff));	/* 16 bits */
Packit 5c3484
#if GMP_NUMB_BITS > 16
Packit 5c3484
  {
Packit 5c3484
    unsigned prec = 16;
Packit 5c3484
    do
Packit 5c3484
      {
Packit 5c3484
	r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2));
Packit 5c3484
	prec *= 2;
Packit 5c3484
      }
Packit 5c3484
    while (prec < GMP_NUMB_BITS);
Packit 5c3484
  }
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  rp[0] = r0;
Packit 5c3484
  if (bn == 1)
Packit 5c3484
    return;
Packit 5c3484
Packit 5c3484
  /* This initialization doesn't matter for the result (any garbage is
Packit 5c3484
     cancelled in the iteration), but proper initialization makes
Packit 5c3484
     valgrind happier. */
Packit 5c3484
  MPN_ZERO (rp+1, bn-1);
Packit 5c3484
Packit 5c3484
  d = 0;
Packit 5c3484
  for (; bn > 1; bn = (bn + 1) >> 1)
Packit 5c3484
    order[d++] = bn;
Packit 5c3484
Packit 5c3484
  for (i = d - 1; i >= 0; i--)
Packit 5c3484
    {
Packit 5c3484
      bn = order[i];
Packit 5c3484
Packit 5c3484
      mpn_mul_1 (tp, rp, bn, k2);
Packit 5c3484
Packit 5c3484
      mpn_powlo (tp2, rp, &k2, 1, bn, tp3);
Packit 5c3484
      mpn_mullo_n (rp, yp, tp2, bn);
Packit 5c3484
Packit 5c3484
      mpn_sub_n (tp2, tp, rp, bn);
Packit 5c3484
      mpn_pi1_bdiv_q_1 (rp, tp2, bn, k, kinv, 0);
Packit 5c3484
    }
Packit 5c3484
}