Blame mpn/generic/invertappr.c

Packit 5c3484
/* mpn_invertappr and helper functions.  Compute I such that
Packit 5c3484
   floor((B^{2n}-1)/U - 1 <= I + B^n <= floor((B^{2n}-1)/U.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Marco Bodrato.
Packit 5c3484
Packit 5c3484
   The algorithm used here was inspired by ApproximateReciprocal from "Modern
Packit 5c3484
   Computer Arithmetic", by Richard P. Brent and Paul Zimmermann.  Special
Packit 5c3484
   thanks to Paul Zimmermann for his very valuable suggestions on all the
Packit 5c3484
   theoretical aspects during the work on this code.
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright (C) 2007, 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
/* FIXME: The iterative version splits the operand in two slightly unbalanced
Packit 5c3484
   parts, the use of log_2 (or counting the bits) underestimate the maximum
Packit 5c3484
   number of iterations.  */
Packit 5c3484
Packit 5c3484
#if TUNE_PROGRAM_BUILD
Packit 5c3484
#define NPOWS \
Packit 5c3484
 ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
Packit 5c3484
#define MAYBE_dcpi1_divappr   1
Packit 5c3484
#else
Packit 5c3484
#define NPOWS \
Packit 5c3484
 ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (INV_NEWTON_THRESHOLD))
Packit 5c3484
#define MAYBE_dcpi1_divappr \
Packit 5c3484
  (INV_NEWTON_THRESHOLD < DC_DIVAPPR_Q_THRESHOLD)
Packit 5c3484
#if (INV_NEWTON_THRESHOLD > INV_MULMOD_BNM1_THRESHOLD) && \
Packit 5c3484
    (INV_APPR_THRESHOLD > INV_MULMOD_BNM1_THRESHOLD)
Packit 5c3484
#undef  INV_MULMOD_BNM1_THRESHOLD
Packit 5c3484
#define INV_MULMOD_BNM1_THRESHOLD 0 /* always when Newton */
Packit 5c3484
#endif
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* All the three functions mpn{,_bc,_ni}_invertappr (ip, dp, n, scratch), take
Packit 5c3484
   the strictly normalised value {dp,n} (i.e., most significant bit must be set)
Packit 5c3484
   as an input, and compute {ip,n}: the approximate reciprocal of {dp,n}.
Packit 5c3484
Packit 5c3484
   Let e = mpn*_invertappr (ip, dp, n, scratch) be the returned value; the
Packit 5c3484
   following conditions are satisfied by the output:
Packit 5c3484
     0 <= e <= 1;
Packit 5c3484
     {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1+e) .
Packit 5c3484
   I.e. e=0 means that the result {ip,n} equals the one given by mpn_invert.
Packit 5c3484
	e=1 means that the result _may_ be one less than expected.
Packit 5c3484
Packit 5c3484
   The _bc version returns e=1 most of the time.
Packit 5c3484
   The _ni version should return e=0 most of the time; only about 1% of
Packit 5c3484
   possible random input should give e=1.
Packit 5c3484
Packit 5c3484
   When the strict result is needed, i.e., e=0 in the relation above:
Packit 5c3484
     {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1) ;
Packit 5c3484
   the function mpn_invert (ip, dp, n, scratch) should be used instead.  */
Packit 5c3484
Packit 5c3484
/* Maximum scratch needed by this branch (at xp): 2*n */
Packit 5c3484
static mp_limb_t
Packit 5c3484
mpn_bc_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr xp)
Packit 5c3484
{
Packit 5c3484
  ASSERT (n > 0);
Packit 5c3484
  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
Packit 5c3484
  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
Packit 5c3484
  ASSERT (! MPN_OVERLAP_P (ip, n, xp, mpn_invertappr_itch(n)));
Packit 5c3484
  ASSERT (! MPN_OVERLAP_P (dp, n, xp, mpn_invertappr_itch(n)));
Packit 5c3484
Packit 5c3484
  /* Compute a base value of r limbs. */
Packit 5c3484
  if (n == 1)
Packit 5c3484
    invert_limb (*ip, *dp);
Packit 5c3484
  else {
Packit 5c3484
    mp_size_t i;
Packit 5c3484
Packit 5c3484
    /* n > 1 here */
Packit 5c3484
    i = n;
Packit 5c3484
    do
Packit 5c3484
      xp[--i] = GMP_NUMB_MAX;
Packit 5c3484
    while (i);
Packit 5c3484
    mpn_com (xp + n, dp, n);
Packit 5c3484
Packit 5c3484
    /* Now xp contains B^2n - {dp,n}*B^n - 1 */
Packit 5c3484
Packit 5c3484
    /* FIXME: if mpn_*pi1_divappr_q handles n==2, use it! */
Packit 5c3484
    if (n == 2) {
Packit 5c3484
      mpn_divrem_2 (ip, 0, xp, 4, dp);
Packit 5c3484
    } else {
Packit 5c3484
      gmp_pi1_t inv;
Packit 5c3484
      invert_pi1 (inv, dp[n-1], dp[n-2]);
Packit 5c3484
      if (! MAYBE_dcpi1_divappr
Packit 5c3484
	  || BELOW_THRESHOLD (n, DC_DIVAPPR_Q_THRESHOLD))
Packit 5c3484
	mpn_sbpi1_divappr_q (ip, xp, 2 * n, dp, n, inv.inv32);
Packit 5c3484
      else
Packit 5c3484
	mpn_dcpi1_divappr_q (ip, xp, 2 * n, dp, n, &inv;;
Packit 5c3484
      MPN_DECR_U(ip, n, CNST_LIMB (1));
Packit 5c3484
      return 1;
Packit 5c3484
    }
Packit 5c3484
  }
Packit 5c3484
  return 0;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* mpn_ni_invertappr: computes the approximate reciprocal using Newton's
Packit 5c3484
   iterations (at least one).
Packit 5c3484
Packit 5c3484
   Inspired by Algorithm "ApproximateReciprocal", published in "Modern Computer
Packit 5c3484
   Arithmetic" by Richard P. Brent and Paul Zimmermann, algorithm 3.5, page 121
Packit 5c3484
   in version 0.4 of the book.
Packit 5c3484
Packit 5c3484
   Some adaptations were introduced, to allow product mod B^m-1 and return the
Packit 5c3484
   value e.
Packit 5c3484
Packit 5c3484
   We introduced a correction in such a way that "the value of
Packit 5c3484
   B^{n+h}-T computed at step 8 cannot exceed B^n-1" (the book reads
Packit 5c3484
   "2B^n-1").
Packit 5c3484
Packit 5c3484
   Maximum scratch needed by this branch <= 2*n, but have to fit 3*rn
Packit 5c3484
   in the scratch, i.e. 3*rn <= 2*n: we require n>4.
Packit 5c3484
Packit 5c3484
   We use a wrapped product modulo B^m-1.  NOTE: is there any normalisation
Packit 5c3484
   problem for the [0] class?  It shouldn't: we compute 2*|A*X_h - B^{n+h}| <
Packit 5c3484
   B^m-1.  We may get [0] if and only if we get AX_h = B^{n+h}.  This can
Packit 5c3484
   happen only if A=B^{n}/2, but this implies X_h = B^{h}*2-1 i.e., AX_h =
Packit 5c3484
   B^{n+h} - A, then we get into the "negative" branch, where X_h is not
Packit 5c3484
   incremented (because A < B^n).
Packit 5c3484
Packit 5c3484
   FIXME: the scratch for mulmod_bnm1 does not currently fit in the scratch, it
Packit 5c3484
   is allocated apart.
Packit 5c3484
 */
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_ni_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t cy;
Packit 5c3484
  mp_size_t rn, mn;
Packit 5c3484
  mp_size_t sizes[NPOWS], *sizp;
Packit 5c3484
  mp_ptr tp;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
#define xp scratch
Packit 5c3484
Packit 5c3484
  ASSERT (n > 4);
Packit 5c3484
  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
Packit 5c3484
  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
Packit 5c3484
  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
Packit 5c3484
  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
Packit 5c3484
Packit 5c3484
  /* Compute the computation precisions from highest to lowest, leaving the
Packit 5c3484
     base case size in 'rn'.  */
Packit 5c3484
  sizp = sizes;
Packit 5c3484
  rn = n;
Packit 5c3484
  do {
Packit 5c3484
    *sizp = rn;
Packit 5c3484
    rn = (rn >> 1) + 1;
Packit 5c3484
    ++sizp;
Packit 5c3484
  } while (ABOVE_THRESHOLD (rn, INV_NEWTON_THRESHOLD));
Packit 5c3484
Packit 5c3484
  /* We search the inverse of 0.{dp,n}, we compute it as 1.{ip,n} */
Packit 5c3484
  dp += n;
Packit 5c3484
  ip += n;
Packit 5c3484
Packit 5c3484
  /* Compute a base value of rn limbs. */
Packit 5c3484
  mpn_bc_invertappr (ip - rn, dp - rn, rn, scratch);
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
Packit 5c3484
  if (ABOVE_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      mn = mpn_mulmod_bnm1_next_size (n + 1);
Packit 5c3484
      tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (mn, n, (n >> 1) + 1));
Packit 5c3484
    }
Packit 5c3484
  /* Use Newton's iterations to get the desired precision.*/
Packit 5c3484
Packit 5c3484
  while (1) {
Packit 5c3484
    n = *--sizp;
Packit 5c3484
    /*
Packit 5c3484
      v    n  v
Packit 5c3484
      +----+--+
Packit 5c3484
      ^ rn ^
Packit 5c3484
    */
Packit 5c3484
Packit 5c3484
    /* Compute i_jd . */
Packit 5c3484
    if (BELOW_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD)
Packit 5c3484
	|| ((mn = mpn_mulmod_bnm1_next_size (n + 1)) > (n + rn))) {
Packit 5c3484
      /* FIXME: We do only need {xp,n+1}*/
Packit 5c3484
      mpn_mul (xp, dp - n, n, ip - rn, rn);
Packit 5c3484
      mpn_add_n (xp + rn, xp + rn, dp - n, n - rn + 1);
Packit 5c3484
      cy = CNST_LIMB(1); /* Remember we truncated, Mod B^(n+1) */
Packit 5c3484
      /* We computed (truncated) {xp,n+1} <- 1.{ip,rn} * 0.{dp,n} */
Packit 5c3484
    } else { /* Use B^mn-1 wraparound */
Packit 5c3484
      mpn_mulmod_bnm1 (xp, mn, dp - n, n, ip - rn, rn, tp);
Packit 5c3484
      /* We computed {xp,mn} <- {ip,rn} * {dp,n} mod (B^mn-1) */
Packit 5c3484
      /* We know that 2*|ip*dp + dp*B^rn - B^{rn+n}| < B^mn-1 */
Packit 5c3484
      /* Add dp*B^rn mod (B^mn-1) */
Packit 5c3484
      ASSERT (n >= mn - rn);
Packit 5c3484
      cy = mpn_add_n (xp + rn, xp + rn, dp - n, mn - rn);
Packit 5c3484
      cy = mpn_add_nc (xp, xp, dp - (n - (mn - rn)), n - (mn - rn), cy);
Packit 5c3484
      /* Subtract B^{rn+n}, maybe only compensate the carry*/
Packit 5c3484
      xp[mn] = CNST_LIMB (1); /* set a limit for DECR_U */
Packit 5c3484
      MPN_DECR_U (xp + rn + n - mn, 2 * mn + 1 - rn - n, CNST_LIMB (1) - cy);
Packit 5c3484
      MPN_DECR_U (xp, mn, CNST_LIMB (1) - xp[mn]); /* if DECR_U eroded xp[mn] */
Packit 5c3484
      cy = CNST_LIMB(0); /* Remember we are working Mod B^mn-1 */
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
    if (xp[n] < CNST_LIMB (2)) { /* "positive" residue class */
Packit 5c3484
      cy = xp[n]; /* 0 <= cy <= 1 here. */
Packit 5c3484
#if HAVE_NATIVE_mpn_sublsh1_n
Packit 5c3484
      if (cy++) {
Packit 5c3484
	if (mpn_cmp (xp, dp - n, n) > 0) {
Packit 5c3484
	  mp_limb_t chk;
Packit 5c3484
	  chk = mpn_sublsh1_n (xp, xp, dp - n, n);
Packit 5c3484
	  ASSERT (chk == xp[n]);
Packit 5c3484
	  ++ cy;
Packit 5c3484
	} else
Packit 5c3484
	  ASSERT_CARRY (mpn_sub_n (xp, xp, dp - n, n));
Packit 5c3484
      }
Packit 5c3484
#else /* no mpn_sublsh1_n*/
Packit 5c3484
      if (cy++ && !mpn_sub_n (xp, xp, dp - n, n)) {
Packit 5c3484
	ASSERT_CARRY (mpn_sub_n (xp, xp, dp - n, n));
Packit 5c3484
	++cy;
Packit 5c3484
      }
Packit 5c3484
#endif
Packit 5c3484
      /* 1 <= cy <= 3 here. */
Packit 5c3484
#if HAVE_NATIVE_mpn_rsblsh1_n
Packit 5c3484
      if (mpn_cmp (xp, dp - n, n) > 0) {
Packit 5c3484
	ASSERT_NOCARRY (mpn_rsblsh1_n (xp + n, xp, dp - n, n));
Packit 5c3484
	++cy;
Packit 5c3484
      } else
Packit 5c3484
	ASSERT_NOCARRY (mpn_sub_nc (xp + 2 * n - rn, dp - rn, xp + n - rn, rn, mpn_cmp (xp, dp - n, n - rn) > 0));
Packit 5c3484
#else /* no mpn_rsblsh1_n*/
Packit 5c3484
      if (mpn_cmp (xp, dp - n, n) > 0) {
Packit 5c3484
	ASSERT_NOCARRY (mpn_sub_n (xp, xp, dp - n, n));
Packit 5c3484
	++cy;
Packit 5c3484
      }
Packit 5c3484
      ASSERT_NOCARRY (mpn_sub_nc (xp + 2 * n - rn, dp - rn, xp + n - rn, rn, mpn_cmp (xp, dp - n, n - rn) > 0));
Packit 5c3484
#endif
Packit 5c3484
      MPN_DECR_U(ip - rn, rn, cy); /* 1 <= cy <= 4 here. */
Packit 5c3484
    } else { /* "negative" residue class */
Packit 5c3484
      ASSERT (xp[n] >= GMP_NUMB_MAX - CNST_LIMB(1));
Packit 5c3484
      MPN_DECR_U(xp, n + 1, cy);
Packit 5c3484
      if (xp[n] != GMP_NUMB_MAX) {
Packit 5c3484
	MPN_INCR_U(ip - rn, rn, CNST_LIMB (1));
Packit 5c3484
	ASSERT_CARRY (mpn_add_n (xp, xp, dp - n, n));
Packit 5c3484
      }
Packit 5c3484
      mpn_com (xp + 2 * n - rn, xp + n - rn, rn);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
    /* Compute x_ju_j. FIXME:We need {xp+rn,rn}, mulhi? */
Packit 5c3484
    mpn_mul_n (xp, xp + 2 * n - rn, ip - rn, rn);
Packit 5c3484
    cy = mpn_add_n (xp + rn, xp + rn, xp + 2 * n - rn, 2 * rn - n);
Packit 5c3484
    cy = mpn_add_nc (ip - n, xp + 3 * rn - n, xp + n + rn, n - rn, cy);
Packit 5c3484
    MPN_INCR_U (ip - rn, rn, cy);
Packit 5c3484
    if (sizp == sizes) { /* Get out of the cycle */
Packit 5c3484
      /* Check for possible carry propagation from below. */
Packit 5c3484
      cy = xp[3 * rn - n - 1] > GMP_NUMB_MAX - CNST_LIMB (7); /* Be conservative. */
Packit 5c3484
      /*    cy = mpn_add_1 (xp + rn, xp + rn, 2*rn - n, 4); */
Packit 5c3484
      break;
Packit 5c3484
    }
Packit 5c3484
    rn = n;
Packit 5c3484
  }
Packit 5c3484
  TMP_FREE;
Packit 5c3484
Packit 5c3484
  return cy;
Packit 5c3484
#undef xp
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
Packit 5c3484
{
Packit 5c3484
  ASSERT (n > 0);
Packit 5c3484
  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
Packit 5c3484
  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
Packit 5c3484
  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
Packit 5c3484
  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
Packit 5c3484
Packit 5c3484
  if (BELOW_THRESHOLD (n, INV_NEWTON_THRESHOLD))
Packit 5c3484
    return mpn_bc_invertappr (ip, dp, n, scratch);
Packit 5c3484
  else
Packit 5c3484
    return mpn_ni_invertappr (ip, dp, n, scratch);
Packit 5c3484
}