|
Packit |
5c3484 |
/* mpn_mod_1s_4p (ap, n, b, cps)
|
|
Packit |
5c3484 |
Divide (ap,,n) by b. Return the single-limb remainder.
|
|
Packit |
5c3484 |
Requires that d < B / 4.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Contributed to the GNU project by Torbjorn Granlund.
|
|
Packit |
5c3484 |
Based on a suggestion by Peter L. Montgomery.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
|
|
Packit |
5c3484 |
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
|
|
Packit |
5c3484 |
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Copyright 2008-2010 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
it under the terms of either:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
option) any later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or both in parallel, as here.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
for more details.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
see https://www.gnu.org/licenses/. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "gmp.h"
|
|
Packit |
5c3484 |
#include "gmp-impl.h"
|
|
Packit |
5c3484 |
#include "longlong.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t bi;
|
|
Packit |
5c3484 |
mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
|
|
Packit |
5c3484 |
int cnt;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (b <= (~(mp_limb_t) 0) / 4);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
count_leading_zeros (cnt, b);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
b <<= cnt;
|
|
Packit |
5c3484 |
invert_limb (bi, b);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
cps[0] = bi;
|
|
Packit |
5c3484 |
cps[1] = cnt;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
|
|
Packit |
5c3484 |
ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
|
|
Packit |
5c3484 |
cps[2] = B1modb >> cnt;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
|
|
Packit |
5c3484 |
cps[3] = B2modb >> cnt;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
|
|
Packit |
5c3484 |
cps[4] = B3modb >> cnt;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
|
|
Packit |
5c3484 |
cps[5] = B4modb >> cnt;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
udiv_rnnd_preinv (B5modb, B4modb, CNST_LIMB(0), b, bi);
|
|
Packit |
5c3484 |
cps[6] = B5modb >> cnt;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if WANT_ASSERT
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
int i;
|
|
Packit |
5c3484 |
b = cps[2];
|
|
Packit |
5c3484 |
for (i = 3; i <= 6; i++)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
b += cps[i];
|
|
Packit |
5c3484 |
ASSERT (b >= cps[i]);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_limb_t
|
|
Packit |
5c3484 |
mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7])
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
|
|
Packit |
5c3484 |
mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
|
|
Packit |
5c3484 |
mp_size_t i;
|
|
Packit |
5c3484 |
int cnt;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (n >= 1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
B1modb = cps[2];
|
|
Packit |
5c3484 |
B2modb = cps[3];
|
|
Packit |
5c3484 |
B3modb = cps[4];
|
|
Packit |
5c3484 |
B4modb = cps[5];
|
|
Packit |
5c3484 |
B5modb = cps[6];
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
switch (n & 3)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
case 0:
|
|
Packit |
5c3484 |
umul_ppmm (ph, pl, ap[n - 3], B1modb);
|
|
Packit |
5c3484 |
add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 4]);
|
|
Packit |
5c3484 |
umul_ppmm (ch, cl, ap[n - 2], B2modb);
|
|
Packit |
5c3484 |
add_ssaaaa (ph, pl, ph, pl, ch, cl);
|
|
Packit |
5c3484 |
umul_ppmm (rh, rl, ap[n - 1], B3modb);
|
|
Packit |
5c3484 |
add_ssaaaa (rh, rl, rh, rl, ph, pl);
|
|
Packit |
5c3484 |
n -= 4;
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
case 1:
|
|
Packit |
5c3484 |
rh = 0;
|
|
Packit |
5c3484 |
rl = ap[n - 1];
|
|
Packit |
5c3484 |
n -= 1;
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
case 2:
|
|
Packit |
5c3484 |
rh = ap[n - 1];
|
|
Packit |
5c3484 |
rl = ap[n - 2];
|
|
Packit |
5c3484 |
n -= 2;
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
case 3:
|
|
Packit |
5c3484 |
umul_ppmm (ph, pl, ap[n - 2], B1modb);
|
|
Packit |
5c3484 |
add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
|
|
Packit |
5c3484 |
umul_ppmm (rh, rl, ap[n - 1], B2modb);
|
|
Packit |
5c3484 |
add_ssaaaa (rh, rl, rh, rl, ph, pl);
|
|
Packit |
5c3484 |
n -= 3;
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
for (i = n - 4; i >= 0; i -= 4)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* rr = ap[i] < B
|
|
Packit |
5c3484 |
+ ap[i+1] * (B mod b) <= (B-1)(b-1)
|
|
Packit |
5c3484 |
+ ap[i+2] * (B^2 mod b) <= (B-1)(b-1)
|
|
Packit |
5c3484 |
+ ap[i+3] * (B^3 mod b) <= (B-1)(b-1)
|
|
Packit |
5c3484 |
+ LO(rr) * (B^4 mod b) <= (B-1)(b-1)
|
|
Packit |
5c3484 |
+ HI(rr) * (B^5 mod b) <= (B-1)(b-1)
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
umul_ppmm (ph, pl, ap[i + 1], B1modb);
|
|
Packit |
5c3484 |
add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
umul_ppmm (ch, cl, ap[i + 2], B2modb);
|
|
Packit |
5c3484 |
add_ssaaaa (ph, pl, ph, pl, ch, cl);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
umul_ppmm (ch, cl, ap[i + 3], B3modb);
|
|
Packit |
5c3484 |
add_ssaaaa (ph, pl, ph, pl, ch, cl);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
umul_ppmm (ch, cl, rl, B4modb);
|
|
Packit |
5c3484 |
add_ssaaaa (ph, pl, ph, pl, ch, cl);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
umul_ppmm (rh, rl, rh, B5modb);
|
|
Packit |
5c3484 |
add_ssaaaa (rh, rl, rh, rl, ph, pl);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
umul_ppmm (rh, cl, rh, B1modb);
|
|
Packit |
5c3484 |
add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
cnt = cps[1];
|
|
Packit |
5c3484 |
bi = cps[0];
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
|
|
Packit |
5c3484 |
udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
return r >> cnt;
|
|
Packit |
5c3484 |
}
|