|
Packit |
5c3484 |
/* mpn_modexact_1c_odd -- mpn by limb exact division style remainder.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
|
|
Packit |
5c3484 |
CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
|
|
Packit |
5c3484 |
FUTURE GNU MP RELEASES.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Copyright 2000-2004 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
it under the terms of either:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
option) any later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or both in parallel, as here.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
for more details.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
see https://www.gnu.org/licenses/. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "gmp.h"
|
|
Packit |
5c3484 |
#include "gmp-impl.h"
|
|
Packit |
5c3484 |
#include "longlong.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Calculate an r satisfying
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
r*B^k + a - c == q*d
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
where B=2^GMP_LIMB_BITS, a is {src,size}, k is either size or size-1
|
|
Packit |
5c3484 |
(the caller won't know which), and q is the quotient (discarded). d must
|
|
Packit |
5c3484 |
be odd, c can be any limb value.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
If c<d then r will be in the range 0<=r<d, or if c>=d then 0<=r<=d.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This slightly strange function suits the initial Nx1 reduction for GCDs
|
|
Packit |
5c3484 |
or Jacobi symbols since the factors of 2 in B^k can be ignored, leaving
|
|
Packit |
5c3484 |
-r == a mod d (by passing c=0). For a GCD the factor of -1 on r can be
|
|
Packit |
5c3484 |
ignored, or for the Jacobi symbol it can be accounted for. The function
|
|
Packit |
5c3484 |
also suits divisibility and congruence testing since if r=0 (or r=d) is
|
|
Packit |
5c3484 |
obtained then a==c mod d.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
r is a bit like the remainder returned by mpn_divexact_by3c, and is the
|
|
Packit |
5c3484 |
sort of remainder mpn_divexact_1 might return. Like mpn_divexact_by3c, r
|
|
Packit |
5c3484 |
represents a borrow, since effectively quotient limbs are chosen so that
|
|
Packit |
5c3484 |
subtracting that multiple of d from src at each step will produce a zero
|
|
Packit |
5c3484 |
limb.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
A long calculation can be done piece by piece from low to high by passing
|
|
Packit |
5c3484 |
the return value from one part as the carry parameter to the next part.
|
|
Packit |
5c3484 |
The effective final k becomes anything between size and size-n, if n
|
|
Packit |
5c3484 |
pieces are used.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
A similar sort of routine could be constructed based on adding multiples
|
|
Packit |
5c3484 |
of d at each limb, much like redc in mpz_powm does. Subtracting however
|
|
Packit |
5c3484 |
has a small advantage that when subtracting to cancel out l there's never
|
|
Packit |
5c3484 |
a borrow into h, whereas using an addition would put a carry into h
|
|
Packit |
5c3484 |
depending whether l==0 or l!=0.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
In terms of efficiency, this function is similar to a mul-by-inverse
|
|
Packit |
5c3484 |
mpn_mod_1. Both are essentially two multiplies and are best suited to
|
|
Packit |
5c3484 |
CPUs with low latency multipliers (in comparison to a divide instruction
|
|
Packit |
5c3484 |
at least.) But modexact has a few less supplementary operations, only
|
|
Packit |
5c3484 |
needs low part and high part multiplies, and has fewer working quantities
|
|
Packit |
5c3484 |
(helping CPUs with few registers).
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
In the main loop it will be noted that the new carry (call it r) is the
|
|
Packit |
5c3484 |
sum of the high product h and any borrow from l=s-c. If c
|
|
Packit |
5c3484 |
have r
|
|
Packit |
5c3484 |
limb, so that q*d = B*h + l, where B=2^GMP_NUMB_BITS. Now if h=d-1 then
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
l = q*d - B*(d-1) <= (B-1)*d - B*(d-1) = B-d
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
But if l=s-c produces a borrow when c<d, then l>=B-d+1 and hence will
|
|
Packit |
5c3484 |
never have h=d-1 and so r=h+borrow <= d-1.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
When c>=d, on the other hand, h=d-1 can certainly occur together with a
|
|
Packit |
5c3484 |
borrow, thereby giving only r<=d, as per the function definition above.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
As a design decision it's left to the caller to check for r=d if it might
|
|
Packit |
5c3484 |
be passing c>=d. Several applications have c
|
|
Packit |
5c3484 |
test is often unnecessary, for example the GCDs or a plain divisibility
|
|
Packit |
5c3484 |
d|a test will pass c=0.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The special case for size==1 is so that it can be assumed c<=d in the
|
|
Packit |
5c3484 |
high<=divisor test at the end. c<=d is only guaranteed after at least
|
|
Packit |
5c3484 |
one iteration of the main loop. There's also a decent chance one % is
|
|
Packit |
5c3484 |
faster than a binvert_limb, though that will depend on the processor.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
A CPU specific implementation might want to omit the size==1 code or the
|
|
Packit |
5c3484 |
high
|
|
Packit |
5c3484 |
useful. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_limb_t
|
|
Packit |
5c3484 |
mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d,
|
|
Packit |
5c3484 |
mp_limb_t orig_c)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t s, h, l, inverse, dummy, dmul, ret;
|
|
Packit |
5c3484 |
mp_limb_t c = orig_c;
|
|
Packit |
5c3484 |
mp_size_t i;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (size >= 1);
|
|
Packit |
5c3484 |
ASSERT (d & 1);
|
|
Packit |
5c3484 |
ASSERT_MPN (src, size);
|
|
Packit |
5c3484 |
ASSERT_LIMB (d);
|
|
Packit |
5c3484 |
ASSERT_LIMB (c);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (size == 1)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
s = src[0];
|
|
Packit |
5c3484 |
if (s > c)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
l = s-c;
|
|
Packit |
5c3484 |
h = l % d;
|
|
Packit |
5c3484 |
if (h != 0)
|
|
Packit |
5c3484 |
h = d - h;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
l = c-s;
|
|
Packit |
5c3484 |
h = l % d;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
return h;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
binvert_limb (inverse, d);
|
|
Packit |
5c3484 |
dmul = d << GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
i = 0;
|
|
Packit |
5c3484 |
do
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
s = src[i];
|
|
Packit |
5c3484 |
SUBC_LIMB (c, l, s, c);
|
|
Packit |
5c3484 |
l = (l * inverse) & GMP_NUMB_MASK;
|
|
Packit |
5c3484 |
umul_ppmm (h, dummy, l, dmul);
|
|
Packit |
5c3484 |
c += h;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
while (++i < size-1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
s = src[i];
|
|
Packit |
5c3484 |
if (s <= d)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* With high<=d the final step can be a subtract and addback. If c==0
|
|
Packit |
5c3484 |
then the addback will restore to l>=0. If c==d then will get l==d
|
|
Packit |
5c3484 |
if s==0, but that's ok per the function definition. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
l = c - s;
|
|
Packit |
5c3484 |
if (c < s)
|
|
Packit |
5c3484 |
l += d;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ret = l;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* Can't skip a divide, just do the loop code once more. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
SUBC_LIMB (c, l, s, c);
|
|
Packit |
5c3484 |
l = (l * inverse) & GMP_NUMB_MASK;
|
|
Packit |
5c3484 |
umul_ppmm (h, dummy, l, dmul);
|
|
Packit |
5c3484 |
c += h;
|
|
Packit |
5c3484 |
ret = c;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (orig_c < d ? ret < d : ret <= d);
|
|
Packit |
5c3484 |
return ret;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if 0
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* The following is an alternate form that might shave one cycle on a
|
|
Packit |
5c3484 |
superscalar processor since it takes c+=h off the dependent chain,
|
|
Packit |
5c3484 |
leaving just a low product, high product, and a subtract.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This is for CPU specific implementations to consider. A special case for
|
|
Packit |
5c3484 |
high
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Notice that c is only ever 0 or 1, since if s-c produces a borrow then
|
|
Packit |
5c3484 |
x=0xFF..FF and x-h cannot produce a borrow. The c=(x>s) could become
|
|
Packit |
5c3484 |
c=(x==0xFF..FF) too, if that helped. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_limb_t
|
|
Packit |
5c3484 |
mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t s, x, y, inverse, dummy, dmul, c1, c2;
|
|
Packit |
5c3484 |
mp_limb_t c = 0;
|
|
Packit |
5c3484 |
mp_size_t i;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (size >= 1);
|
|
Packit |
5c3484 |
ASSERT (d & 1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
binvert_limb (inverse, d);
|
|
Packit |
5c3484 |
dmul = d << GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
for (i = 0; i < size; i++)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
ASSERT (c==0 || c==1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
s = src[i];
|
|
Packit |
5c3484 |
SUBC_LIMB (c1, x, s, c);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
SUBC_LIMB (c2, y, x, h);
|
|
Packit |
5c3484 |
c = c1 + c2;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
y = (y * inverse) & GMP_NUMB_MASK;
|
|
Packit |
5c3484 |
umul_ppmm (h, dummy, y, dmul);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
h += c;
|
|
Packit |
5c3484 |
return h;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#endif
|