Blame mpn/generic/mod_1_4.c

Packit 5c3484
/* mpn_mod_1s_4p (ap, n, b, cps)
Packit 5c3484
   Divide (ap,,n) by b.  Return the single-limb remainder.
Packit 5c3484
   Requires that d < B / 4.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Torbjorn Granlund.
Packit 5c3484
   Based on a suggestion by Peter L. Montgomery.
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2008-2010 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t bi;
Packit 5c3484
  mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
Packit 5c3484
  int cnt;
Packit 5c3484
Packit 5c3484
  ASSERT (b <= (~(mp_limb_t) 0) / 4);
Packit 5c3484
Packit 5c3484
  count_leading_zeros (cnt, b);
Packit 5c3484
Packit 5c3484
  b <<= cnt;
Packit 5c3484
  invert_limb (bi, b);
Packit 5c3484
Packit 5c3484
  cps[0] = bi;
Packit 5c3484
  cps[1] = cnt;
Packit 5c3484
Packit 5c3484
  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
Packit 5c3484
  ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
Packit 5c3484
  cps[2] = B1modb >> cnt;
Packit 5c3484
Packit 5c3484
  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
Packit 5c3484
  cps[3] = B2modb >> cnt;
Packit 5c3484
Packit 5c3484
  udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
Packit 5c3484
  cps[4] = B3modb >> cnt;
Packit 5c3484
Packit 5c3484
  udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
Packit 5c3484
  cps[5] = B4modb >> cnt;
Packit 5c3484
Packit 5c3484
  udiv_rnnd_preinv (B5modb, B4modb, CNST_LIMB(0), b, bi);
Packit 5c3484
  cps[6] = B5modb >> cnt;
Packit 5c3484
Packit 5c3484
#if WANT_ASSERT
Packit 5c3484
  {
Packit 5c3484
    int i;
Packit 5c3484
    b = cps[2];
Packit 5c3484
    for (i = 3; i <= 6; i++)
Packit 5c3484
      {
Packit 5c3484
	b += cps[i];
Packit 5c3484
	ASSERT (b >= cps[i]);
Packit 5c3484
      }
Packit 5c3484
  }
Packit 5c3484
#endif
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7])
Packit 5c3484
{
Packit 5c3484
  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
Packit 5c3484
  mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
Packit 5c3484
  mp_size_t i;
Packit 5c3484
  int cnt;
Packit 5c3484
Packit 5c3484
  ASSERT (n >= 1);
Packit 5c3484
Packit 5c3484
  B1modb = cps[2];
Packit 5c3484
  B2modb = cps[3];
Packit 5c3484
  B3modb = cps[4];
Packit 5c3484
  B4modb = cps[5];
Packit 5c3484
  B5modb = cps[6];
Packit 5c3484
Packit 5c3484
  switch (n & 3)
Packit 5c3484
    {
Packit 5c3484
    case 0:
Packit 5c3484
      umul_ppmm (ph, pl, ap[n - 3], B1modb);
Packit 5c3484
      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 4]);
Packit 5c3484
      umul_ppmm (ch, cl, ap[n - 2], B2modb);
Packit 5c3484
      add_ssaaaa (ph, pl, ph, pl, ch, cl);
Packit 5c3484
      umul_ppmm (rh, rl, ap[n - 1], B3modb);
Packit 5c3484
      add_ssaaaa (rh, rl, rh, rl, ph, pl);
Packit 5c3484
      n -= 4;
Packit 5c3484
      break;
Packit 5c3484
    case 1:
Packit 5c3484
      rh = 0;
Packit 5c3484
      rl = ap[n - 1];
Packit 5c3484
      n -= 1;
Packit 5c3484
      break;
Packit 5c3484
    case 2:
Packit 5c3484
      rh = ap[n - 1];
Packit 5c3484
      rl = ap[n - 2];
Packit 5c3484
      n -= 2;
Packit 5c3484
      break;
Packit 5c3484
    case 3:
Packit 5c3484
      umul_ppmm (ph, pl, ap[n - 2], B1modb);
Packit 5c3484
      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
Packit 5c3484
      umul_ppmm (rh, rl, ap[n - 1], B2modb);
Packit 5c3484
      add_ssaaaa (rh, rl, rh, rl, ph, pl);
Packit 5c3484
      n -= 3;
Packit 5c3484
      break;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  for (i = n - 4; i >= 0; i -= 4)
Packit 5c3484
    {
Packit 5c3484
      /* rr = ap[i]				< B
Packit 5c3484
	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
Packit 5c3484
	    + ap[i+2] * (B^2 mod b)		<= (B-1)(b-1)
Packit 5c3484
	    + ap[i+3] * (B^3 mod b)		<= (B-1)(b-1)
Packit 5c3484
	    + LO(rr)  * (B^4 mod b)		<= (B-1)(b-1)
Packit 5c3484
	    + HI(rr)  * (B^5 mod b)		<= (B-1)(b-1)
Packit 5c3484
      */
Packit 5c3484
      umul_ppmm (ph, pl, ap[i + 1], B1modb);
Packit 5c3484
      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
Packit 5c3484
Packit 5c3484
      umul_ppmm (ch, cl, ap[i + 2], B2modb);
Packit 5c3484
      add_ssaaaa (ph, pl, ph, pl, ch, cl);
Packit 5c3484
Packit 5c3484
      umul_ppmm (ch, cl, ap[i + 3], B3modb);
Packit 5c3484
      add_ssaaaa (ph, pl, ph, pl, ch, cl);
Packit 5c3484
Packit 5c3484
      umul_ppmm (ch, cl, rl, B4modb);
Packit 5c3484
      add_ssaaaa (ph, pl, ph, pl, ch, cl);
Packit 5c3484
Packit 5c3484
      umul_ppmm (rh, rl, rh, B5modb);
Packit 5c3484
      add_ssaaaa (rh, rl, rh, rl, ph, pl);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  umul_ppmm (rh, cl, rh, B1modb);
Packit 5c3484
  add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
Packit 5c3484
Packit 5c3484
  cnt = cps[1];
Packit 5c3484
  bi = cps[0];
Packit 5c3484
Packit 5c3484
  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
Packit 5c3484
  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
Packit 5c3484
Packit 5c3484
  return r >> cnt;
Packit 5c3484
}