Blame mpn/generic/mulmod_bnm1.c

Packit 5c3484
/* mulmod_bnm1.c -- multiplication mod B^n-1.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
Packit 5c3484
   Marco Bodrato.
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
/* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is
Packit 5c3484
   mod B^rn - 1, and values are semi-normalised; zero is represented
Packit 5c3484
   as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
Packit 5c3484
   tp==rp is allowed. */
Packit 5c3484
void
Packit 5c3484
mpn_bc_mulmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
Packit 5c3484
		    mp_ptr tp)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t cy;
Packit 5c3484
Packit 5c3484
  ASSERT (0 < rn);
Packit 5c3484
Packit 5c3484
  mpn_mul_n (tp, ap, bp, rn);
Packit 5c3484
  cy = mpn_add_n (rp, tp, tp + rn, rn);
Packit 5c3484
  /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
Packit 5c3484
   * be no overflow when adding in the carry. */
Packit 5c3484
  MPN_INCR_U (rp, rn, cy);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in
Packit 5c3484
   semi-normalised representation, computation is mod B^rn + 1. Needs
Packit 5c3484
   a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
Packit 5c3484
   Output is normalised. */
Packit 5c3484
static void
Packit 5c3484
mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
Packit 5c3484
		    mp_ptr tp)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t cy;
Packit 5c3484
Packit 5c3484
  ASSERT (0 < rn);
Packit 5c3484
Packit 5c3484
  mpn_mul_n (tp, ap, bp, rn + 1);
Packit 5c3484
  ASSERT (tp[2*rn+1] == 0);
Packit 5c3484
  ASSERT (tp[2*rn] < GMP_NUMB_MAX);
Packit 5c3484
  cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
Packit 5c3484
  rp[rn] = 0;
Packit 5c3484
  MPN_INCR_U (rp, rn+1, cy );
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
Packit 5c3484
 *
Packit 5c3484
 * The result is expected to be ZERO if and only if one of the operand
Packit 5c3484
 * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
Packit 5c3484
 * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
Packit 5c3484
 * combine results and obtain a natural number when one knows in
Packit 5c3484
 * advance that the final value is less than (B^rn-1).
Packit 5c3484
 * Moreover it should not be a problem if mulmod_bnm1 is used to
Packit 5c3484
 * compute the full product with an+bn <= rn, because this condition
Packit 5c3484
 * implies (B^an-1)(B^bn-1) < (B^rn-1) .
Packit 5c3484
 *
Packit 5c3484
 * Requires 0 < bn <= an <= rn and an + bn > rn/2
Packit 5c3484
 * Scratch need: rn + (need for recursive call OR rn + 4). This gives
Packit 5c3484
 *
Packit 5c3484
 * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
Packit 5c3484
 */
Packit 5c3484
void
Packit 5c3484
mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
Packit 5c3484
{
Packit 5c3484
  ASSERT (0 < bn);
Packit 5c3484
  ASSERT (bn <= an);
Packit 5c3484
  ASSERT (an <= rn);
Packit 5c3484
Packit 5c3484
  if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      if (UNLIKELY (bn < rn))
Packit 5c3484
	{
Packit 5c3484
	  if (UNLIKELY (an + bn <= rn))
Packit 5c3484
	    {
Packit 5c3484
	      mpn_mul (rp, ap, an, bp, bn);
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
	      mp_limb_t cy;
Packit 5c3484
	      mpn_mul (tp, ap, an, bp, bn);
Packit 5c3484
	      cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn);
Packit 5c3484
	      MPN_INCR_U (rp, rn, cy);
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      mp_size_t n;
Packit 5c3484
      mp_limb_t cy;
Packit 5c3484
      mp_limb_t hi;
Packit 5c3484
Packit 5c3484
      n = rn >> 1;
Packit 5c3484
Packit 5c3484
      /* We need at least an + bn >= n, to be able to fit one of the
Packit 5c3484
	 recursive products at rp. Requiring strict inequality makes
Packit 5c3484
	 the code slightly simpler. If desired, we could avoid this
Packit 5c3484
	 restriction by initially halving rn as long as rn is even and
Packit 5c3484
	 an + bn <= rn/2. */
Packit 5c3484
Packit 5c3484
      ASSERT (an + bn > n);
Packit 5c3484
Packit 5c3484
      /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
Packit 5c3484
	 and crt together as
Packit 5c3484
Packit 5c3484
	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
Packit 5c3484
      */
Packit 5c3484
Packit 5c3484
#define a0 ap
Packit 5c3484
#define a1 (ap + n)
Packit 5c3484
#define b0 bp
Packit 5c3484
#define b1 (bp + n)
Packit 5c3484
Packit 5c3484
#define xp  tp	/* 2n + 2 */
Packit 5c3484
      /* am1  maybe in {xp, n} */
Packit 5c3484
      /* bm1  maybe in {xp + n, n} */
Packit 5c3484
#define sp1 (tp + 2*n + 2)
Packit 5c3484
      /* ap1  maybe in {sp1, n + 1} */
Packit 5c3484
      /* bp1  maybe in {sp1 + n + 1, n + 1} */
Packit 5c3484
Packit 5c3484
      {
Packit 5c3484
	mp_srcptr am1, bm1;
Packit 5c3484
	mp_size_t anm, bnm;
Packit 5c3484
	mp_ptr so;
Packit 5c3484
Packit 5c3484
	bm1 = b0;
Packit 5c3484
	bnm = bn;
Packit 5c3484
	if (LIKELY (an > n))
Packit 5c3484
	  {
Packit 5c3484
	    am1 = xp;
Packit 5c3484
	    cy = mpn_add (xp, a0, n, a1, an - n);
Packit 5c3484
	    MPN_INCR_U (xp, n, cy);
Packit 5c3484
	    anm = n;
Packit 5c3484
	    so = xp + n;
Packit 5c3484
	    if (LIKELY (bn > n))
Packit 5c3484
	      {
Packit 5c3484
		bm1 = so;
Packit 5c3484
		cy = mpn_add (so, b0, n, b1, bn - n);
Packit 5c3484
		MPN_INCR_U (so, n, cy);
Packit 5c3484
		bnm = n;
Packit 5c3484
		so += n;
Packit 5c3484
	      }
Packit 5c3484
	  }
Packit 5c3484
	else
Packit 5c3484
	  {
Packit 5c3484
	    so = xp;
Packit 5c3484
	    am1 = a0;
Packit 5c3484
	    anm = an;
Packit 5c3484
	  }
Packit 5c3484
Packit 5c3484
	mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
Packit 5c3484
      }
Packit 5c3484
Packit 5c3484
      {
Packit 5c3484
	int       k;
Packit 5c3484
	mp_srcptr ap1, bp1;
Packit 5c3484
	mp_size_t anp, bnp;
Packit 5c3484
Packit 5c3484
	bp1 = b0;
Packit 5c3484
	bnp = bn;
Packit 5c3484
	if (LIKELY (an > n)) {
Packit 5c3484
	  ap1 = sp1;
Packit 5c3484
	  cy = mpn_sub (sp1, a0, n, a1, an - n);
Packit 5c3484
	  sp1[n] = 0;
Packit 5c3484
	  MPN_INCR_U (sp1, n + 1, cy);
Packit 5c3484
	  anp = n + ap1[n];
Packit 5c3484
	  if (LIKELY (bn > n)) {
Packit 5c3484
	    bp1 = sp1 + n + 1;
Packit 5c3484
	    cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n);
Packit 5c3484
	    sp1[2*n+1] = 0;
Packit 5c3484
	    MPN_INCR_U (sp1 + n + 1, n + 1, cy);
Packit 5c3484
	    bnp = n + bp1[n];
Packit 5c3484
	  }
Packit 5c3484
	} else {
Packit 5c3484
	  ap1 = a0;
Packit 5c3484
	  anp = an;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
Packit 5c3484
	  k=0;
Packit 5c3484
	else
Packit 5c3484
	  {
Packit 5c3484
	    int mask;
Packit 5c3484
	    k = mpn_fft_best_k (n, 0);
Packit 5c3484
	    mask = (1<
Packit 5c3484
	    while (n & mask) {k--; mask >>=1;};
Packit 5c3484
	  }
Packit 5c3484
	if (k >= FFT_FIRST_K)
Packit 5c3484
	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k);
Packit 5c3484
	else if (UNLIKELY (bp1 == b0))
Packit 5c3484
	  {
Packit 5c3484
	    ASSERT (anp + bnp <= 2*n+1);
Packit 5c3484
	    ASSERT (anp + bnp > n);
Packit 5c3484
	    ASSERT (anp >= bnp);
Packit 5c3484
	    mpn_mul (xp, ap1, anp, bp1, bnp);
Packit 5c3484
	    anp = anp + bnp - n;
Packit 5c3484
	    ASSERT (anp <= n || xp[2*n]==0);
Packit 5c3484
	    anp-= anp > n;
Packit 5c3484
	    cy = mpn_sub (xp, xp, n, xp + n, anp);
Packit 5c3484
	    xp[n] = 0;
Packit 5c3484
	    MPN_INCR_U (xp, n+1, cy);
Packit 5c3484
	  }
Packit 5c3484
	else
Packit 5c3484
	  mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp);
Packit 5c3484
      }
Packit 5c3484
Packit 5c3484
      /* Here the CRT recomposition begins.
Packit 5c3484
Packit 5c3484
	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
Packit 5c3484
	 Division by 2 is a bitwise rotation.
Packit 5c3484
Packit 5c3484
	 Assumes xp normalised mod (B^n+1).
Packit 5c3484
Packit 5c3484
	 The residue class [0] is represented by [B^n-1]; except when
Packit 5c3484
	 both input are ZERO.
Packit 5c3484
      */
Packit 5c3484
Packit 5c3484
#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
Packit 5c3484
#if HAVE_NATIVE_mpn_rsh1add_nc
Packit 5c3484
      cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
Packit 5c3484
      hi = cy << (GMP_NUMB_BITS - 1);
Packit 5c3484
      cy = 0;
Packit 5c3484
      /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
Packit 5c3484
	 overflows, i.e. a further increment will not overflow again. */
Packit 5c3484
#else /* ! _nc */
Packit 5c3484
      cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
Packit 5c3484
      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
Packit 5c3484
      cy >>= 1;
Packit 5c3484
      /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
Packit 5c3484
	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
Packit 5c3484
#endif
Packit 5c3484
#if GMP_NAIL_BITS == 0
Packit 5c3484
      add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
Packit 5c3484
#else
Packit 5c3484
      cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
Packit 5c3484
      rp[n-1] ^= hi;
Packit 5c3484
#endif
Packit 5c3484
#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
Packit 5c3484
#if HAVE_NATIVE_mpn_add_nc
Packit 5c3484
      cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
Packit 5c3484
#else /* ! _nc */
Packit 5c3484
      cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
Packit 5c3484
#endif
Packit 5c3484
      cy += (rp[0]&1;;
Packit 5c3484
      mpn_rshift(rp, rp, n, 1);
Packit 5c3484
      ASSERT (cy <= 2);
Packit 5c3484
      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
Packit 5c3484
      cy >>= 1;
Packit 5c3484
      /* We can have cy != 0 only if hi = 0... */
Packit 5c3484
      ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
Packit 5c3484
      rp[n-1] |= hi;
Packit 5c3484
      /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
Packit 5c3484
#endif
Packit 5c3484
      ASSERT (cy <= 1);
Packit 5c3484
      /* Next increment can not overflow, read the previous comments about cy. */
Packit 5c3484
      ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
Packit 5c3484
      MPN_INCR_U(rp, n, cy);
Packit 5c3484
Packit 5c3484
      /* Compute the highest half:
Packit 5c3484
	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
Packit 5c3484
       */
Packit 5c3484
      if (UNLIKELY (an + bn < rn))
Packit 5c3484
	{
Packit 5c3484
	  /* Note that in this case, the only way the result can equal
Packit 5c3484
	     zero mod B^{rn} - 1 is if one of the inputs is zero, and
Packit 5c3484
	     then the output of both the recursive calls and this CRT
Packit 5c3484
	     reconstruction is zero, not B^{rn} - 1. Which is good,
Packit 5c3484
	     since the latter representation doesn't fit in the output
Packit 5c3484
	     area.*/
Packit 5c3484
	  cy = mpn_sub_n (rp + n, rp, xp, an + bn - n);
Packit 5c3484
Packit 5c3484
	  /* FIXME: This subtraction of the high parts is not really
Packit 5c3484
	     necessary, we do it to get the carry out, and for sanity
Packit 5c3484
	     checking. */
Packit 5c3484
	  cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n,
Packit 5c3484
				   xp + an + bn - n, rn - (an + bn), cy);
Packit 5c3484
	  ASSERT (an + bn == rn - 1 ||
Packit 5c3484
		  mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn)));
Packit 5c3484
	  cy = mpn_sub_1 (rp, rp, an + bn, cy);
Packit 5c3484
	  ASSERT (cy == (xp + an + bn - n)[0]);
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
Packit 5c3484
	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
Packit 5c3484
	     DECR will affect _at most_ the lowest n limbs. */
Packit 5c3484
	  MPN_DECR_U (rp, 2*n, cy);
Packit 5c3484
	}
Packit 5c3484
#undef a0
Packit 5c3484
#undef a1
Packit 5c3484
#undef b0
Packit 5c3484
#undef b1
Packit 5c3484
#undef xp
Packit 5c3484
#undef sp1
Packit 5c3484
    }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
mp_size_t
Packit 5c3484
mpn_mulmod_bnm1_next_size (mp_size_t n)
Packit 5c3484
{
Packit 5c3484
  mp_size_t nh;
Packit 5c3484
Packit 5c3484
  if (BELOW_THRESHOLD (n,     MULMOD_BNM1_THRESHOLD))
Packit 5c3484
    return n;
Packit 5c3484
  if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
Packit 5c3484
    return (n + (2-1)) & (-2);
Packit 5c3484
  if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
Packit 5c3484
    return (n + (4-1)) & (-4);
Packit 5c3484
Packit 5c3484
  nh = (n + 1) >> 1;
Packit 5c3484
Packit 5c3484
  if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD))
Packit 5c3484
    return (n + (8-1)) & (-8);
Packit 5c3484
Packit 5c3484
  return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0));
Packit 5c3484
}