Blame mpn/generic/mode1o.c

Packit 5c3484
/* mpn_modexact_1c_odd -- mpn by limb exact division style remainder.
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
Packit 5c3484
   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
Packit 5c3484
   FUTURE GNU MP RELEASES.
Packit 5c3484
Packit 5c3484
Copyright 2000-2004 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Calculate an r satisfying
Packit 5c3484
Packit 5c3484
           r*B^k + a - c == q*d
Packit 5c3484
Packit 5c3484
   where B=2^GMP_LIMB_BITS, a is {src,size}, k is either size or size-1
Packit 5c3484
   (the caller won't know which), and q is the quotient (discarded).  d must
Packit 5c3484
   be odd, c can be any limb value.
Packit 5c3484
Packit 5c3484
   If c<d then r will be in the range 0<=r<d, or if c>=d then 0<=r<=d.
Packit 5c3484
Packit 5c3484
   This slightly strange function suits the initial Nx1 reduction for GCDs
Packit 5c3484
   or Jacobi symbols since the factors of 2 in B^k can be ignored, leaving
Packit 5c3484
   -r == a mod d (by passing c=0).  For a GCD the factor of -1 on r can be
Packit 5c3484
   ignored, or for the Jacobi symbol it can be accounted for.  The function
Packit 5c3484
   also suits divisibility and congruence testing since if r=0 (or r=d) is
Packit 5c3484
   obtained then a==c mod d.
Packit 5c3484
Packit 5c3484
Packit 5c3484
   r is a bit like the remainder returned by mpn_divexact_by3c, and is the
Packit 5c3484
   sort of remainder mpn_divexact_1 might return.  Like mpn_divexact_by3c, r
Packit 5c3484
   represents a borrow, since effectively quotient limbs are chosen so that
Packit 5c3484
   subtracting that multiple of d from src at each step will produce a zero
Packit 5c3484
   limb.
Packit 5c3484
Packit 5c3484
   A long calculation can be done piece by piece from low to high by passing
Packit 5c3484
   the return value from one part as the carry parameter to the next part.
Packit 5c3484
   The effective final k becomes anything between size and size-n, if n
Packit 5c3484
   pieces are used.
Packit 5c3484
Packit 5c3484
Packit 5c3484
   A similar sort of routine could be constructed based on adding multiples
Packit 5c3484
   of d at each limb, much like redc in mpz_powm does.  Subtracting however
Packit 5c3484
   has a small advantage that when subtracting to cancel out l there's never
Packit 5c3484
   a borrow into h, whereas using an addition would put a carry into h
Packit 5c3484
   depending whether l==0 or l!=0.
Packit 5c3484
Packit 5c3484
Packit 5c3484
   In terms of efficiency, this function is similar to a mul-by-inverse
Packit 5c3484
   mpn_mod_1.  Both are essentially two multiplies and are best suited to
Packit 5c3484
   CPUs with low latency multipliers (in comparison to a divide instruction
Packit 5c3484
   at least.)  But modexact has a few less supplementary operations, only
Packit 5c3484
   needs low part and high part multiplies, and has fewer working quantities
Packit 5c3484
   (helping CPUs with few registers).
Packit 5c3484
Packit 5c3484
Packit 5c3484
   In the main loop it will be noted that the new carry (call it r) is the
Packit 5c3484
   sum of the high product h and any borrow from l=s-c.  If c
Packit 5c3484
   have r
Packit 5c3484
   limb, so that q*d = B*h + l, where B=2^GMP_NUMB_BITS.  Now if h=d-1 then
Packit 5c3484
Packit 5c3484
       l = q*d - B*(d-1) <= (B-1)*d - B*(d-1) = B-d
Packit 5c3484
Packit 5c3484
   But if l=s-c produces a borrow when c<d, then l>=B-d+1 and hence will
Packit 5c3484
   never have h=d-1 and so r=h+borrow <= d-1.
Packit 5c3484
Packit 5c3484
   When c>=d, on the other hand, h=d-1 can certainly occur together with a
Packit 5c3484
   borrow, thereby giving only r<=d, as per the function definition above.
Packit 5c3484
Packit 5c3484
   As a design decision it's left to the caller to check for r=d if it might
Packit 5c3484
   be passing c>=d.  Several applications have c
Packit 5c3484
   test is often unnecessary, for example the GCDs or a plain divisibility
Packit 5c3484
   d|a test will pass c=0.
Packit 5c3484
Packit 5c3484
Packit 5c3484
   The special case for size==1 is so that it can be assumed c<=d in the
Packit 5c3484
   high<=divisor test at the end.  c<=d is only guaranteed after at least
Packit 5c3484
   one iteration of the main loop.  There's also a decent chance one % is
Packit 5c3484
   faster than a binvert_limb, though that will depend on the processor.
Packit 5c3484
Packit 5c3484
   A CPU specific implementation might want to omit the size==1 code or the
Packit 5c3484
   high
Packit 5c3484
   useful.  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d,
Packit 5c3484
                     mp_limb_t orig_c)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t  s, h, l, inverse, dummy, dmul, ret;
Packit 5c3484
  mp_limb_t  c = orig_c;
Packit 5c3484
  mp_size_t  i;
Packit 5c3484
Packit 5c3484
  ASSERT (size >= 1);
Packit 5c3484
  ASSERT (d & 1);
Packit 5c3484
  ASSERT_MPN (src, size);
Packit 5c3484
  ASSERT_LIMB (d);
Packit 5c3484
  ASSERT_LIMB (c);
Packit 5c3484
Packit 5c3484
  if (size == 1)
Packit 5c3484
    {
Packit 5c3484
      s = src[0];
Packit 5c3484
      if (s > c)
Packit 5c3484
	{
Packit 5c3484
	  l = s-c;
Packit 5c3484
	  h = l % d;
Packit 5c3484
	  if (h != 0)
Packit 5c3484
	    h = d - h;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  l = c-s;
Packit 5c3484
	  h = l % d;
Packit 5c3484
	}
Packit 5c3484
      return h;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
Packit 5c3484
  binvert_limb (inverse, d);
Packit 5c3484
  dmul = d << GMP_NAIL_BITS;
Packit 5c3484
Packit 5c3484
  i = 0;
Packit 5c3484
  do
Packit 5c3484
    {
Packit 5c3484
      s = src[i];
Packit 5c3484
      SUBC_LIMB (c, l, s, c);
Packit 5c3484
      l = (l * inverse) & GMP_NUMB_MASK;
Packit 5c3484
      umul_ppmm (h, dummy, l, dmul);
Packit 5c3484
      c += h;
Packit 5c3484
    }
Packit 5c3484
  while (++i < size-1);
Packit 5c3484
Packit 5c3484
Packit 5c3484
  s = src[i];
Packit 5c3484
  if (s <= d)
Packit 5c3484
    {
Packit 5c3484
      /* With high<=d the final step can be a subtract and addback.  If c==0
Packit 5c3484
	 then the addback will restore to l>=0.  If c==d then will get l==d
Packit 5c3484
	 if s==0, but that's ok per the function definition.  */
Packit 5c3484
Packit 5c3484
      l = c - s;
Packit 5c3484
      if (c < s)
Packit 5c3484
	l += d;
Packit 5c3484
Packit 5c3484
      ret = l;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      /* Can't skip a divide, just do the loop code once more. */
Packit 5c3484
Packit 5c3484
      SUBC_LIMB (c, l, s, c);
Packit 5c3484
      l = (l * inverse) & GMP_NUMB_MASK;
Packit 5c3484
      umul_ppmm (h, dummy, l, dmul);
Packit 5c3484
      c += h;
Packit 5c3484
      ret = c;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  ASSERT (orig_c < d ? ret < d : ret <= d);
Packit 5c3484
  return ret;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
Packit 5c3484
#if 0
Packit 5c3484
Packit 5c3484
/* The following is an alternate form that might shave one cycle on a
Packit 5c3484
   superscalar processor since it takes c+=h off the dependent chain,
Packit 5c3484
   leaving just a low product, high product, and a subtract.
Packit 5c3484
Packit 5c3484
   This is for CPU specific implementations to consider.  A special case for
Packit 5c3484
   high
Packit 5c3484
Packit 5c3484
   Notice that c is only ever 0 or 1, since if s-c produces a borrow then
Packit 5c3484
   x=0xFF..FF and x-h cannot produce a borrow.  The c=(x>s) could become
Packit 5c3484
   c=(x==0xFF..FF) too, if that helped.  */
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t  s, x, y, inverse, dummy, dmul, c1, c2;
Packit 5c3484
  mp_limb_t  c = 0;
Packit 5c3484
  mp_size_t  i;
Packit 5c3484
Packit 5c3484
  ASSERT (size >= 1);
Packit 5c3484
  ASSERT (d & 1);
Packit 5c3484
Packit 5c3484
  binvert_limb (inverse, d);
Packit 5c3484
  dmul = d << GMP_NAIL_BITS;
Packit 5c3484
Packit 5c3484
  for (i = 0; i < size; i++)
Packit 5c3484
    {
Packit 5c3484
      ASSERT (c==0 || c==1);
Packit 5c3484
Packit 5c3484
      s = src[i];
Packit 5c3484
      SUBC_LIMB (c1, x, s, c);
Packit 5c3484
Packit 5c3484
      SUBC_LIMB (c2, y, x, h);
Packit 5c3484
      c = c1 + c2;
Packit 5c3484
Packit 5c3484
      y = (y * inverse) & GMP_NUMB_MASK;
Packit 5c3484
      umul_ppmm (h, dummy, y, dmul);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  h += c;
Packit 5c3484
  return h;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#endif