Blame mpn/generic/mod_1_1.c

Packit 5c3484
/* mpn_mod_1_1p (ap, n, b, cps)
Packit 5c3484
   Divide (ap,,n) by b.  Return the single-limb remainder.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
Packit 5c3484
   Based on a suggestion by Peter L. Montgomery.
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2008-2011, 2013 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
#ifndef MOD_1_1P_METHOD
Packit 5c3484
# define MOD_1_1P_METHOD 1    /* need to make sure this is 2 for asm testing */
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* Define some longlong.h-style macros, but for wider operations.
Packit 5c3484
 * add_mssaaaa is like longlong.h's add_ssaaaa, but also generates
Packit 5c3484
 * carry out, in the form of a mask. */
Packit 5c3484
Packit 5c3484
#if defined (__GNUC__) && ! defined (NO_ASM)
Packit 5c3484
Packit 5c3484
#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
Packit 5c3484
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
Packit 5c3484
  __asm__ (  "add	%6, %k2\n\t"					\
Packit 5c3484
	     "adc	%4, %k1\n\t"					\
Packit 5c3484
	     "sbb	%k0, %k0"					\
Packit 5c3484
	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
Packit 5c3484
	   : "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
Packit 5c3484
	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
Packit 5c3484
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
Packit 5c3484
  __asm__ (  "add	%6, %q2\n\t"					\
Packit 5c3484
	     "adc	%4, %q1\n\t"					\
Packit 5c3484
	     "sbb	%q0, %q0"					\
Packit 5c3484
	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
Packit 5c3484
	   : "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
Packit 5c3484
	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if defined (__sparc__) && W_TYPE_SIZE == 32
Packit 5c3484
#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
Packit 5c3484
  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
Packit 5c3484
	     "addxcc	%r3, %4, %1\n\t"				\
Packit 5c3484
	     "subx	%%g0, %%g0, %0"					\
Packit 5c3484
	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
Packit 5c3484
	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
Packit 5c3484
	 __CLOBBER_CC)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if defined (__sparc__) && W_TYPE_SIZE == 64
Packit 5c3484
#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
Packit 5c3484
  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
Packit 5c3484
	     "addccc	%r7, %8, %%g0\n\t"				\
Packit 5c3484
	     "addccc	%r3, %4, %1\n\t"				\
Packit 5c3484
	     "clr	%0\n\t"						\
Packit 5c3484
	     "movcs	%%xcc, -1, %0"					\
Packit 5c3484
	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
Packit 5c3484
	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl),		\
Packit 5c3484
	     "rJ" ((al) >> 32), "rI" ((bl) >> 32)			\
Packit 5c3484
	 __CLOBBER_CC)
Packit 5c3484
#if __VIS__ >= 0x300
Packit 5c3484
#undef add_mssaaaa
Packit 5c3484
#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
Packit 5c3484
  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
Packit 5c3484
	     "addxccc	%r3, %4, %1\n\t"				\
Packit 5c3484
	     "clr	%0\n\t"						\
Packit 5c3484
	     "movcs	%%xcc, -1, %0"					\
Packit 5c3484
	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
Packit 5c3484
	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
Packit 5c3484
	 __CLOBBER_CC)
Packit 5c3484
#endif
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
Packit 5c3484
/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
Packit 5c3484
   processor running in 32-bit mode, since the carry flag then gets the 32-bit
Packit 5c3484
   carry.  */
Packit 5c3484
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
Packit 5c3484
  __asm__ (  "add%I6c	%2, %5, %6\n\t"					\
Packit 5c3484
	     "adde	%1, %3, %4\n\t"					\
Packit 5c3484
	     "subfe	%0, %0, %0\n\t"					\
Packit 5c3484
	     "nor	%0, %0, %0"					\
Packit 5c3484
	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
Packit 5c3484
	   : "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0))
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if defined (__s390x__) && W_TYPE_SIZE == 64
Packit 5c3484
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
Packit 5c3484
  __asm__ (  "algr	%2, %6\n\t"					\
Packit 5c3484
	     "alcgr	%1, %4\n\t"					\
Packit 5c3484
	     "lghi	%0, 0\n\t"					\
Packit 5c3484
	     "alcgr	%0, %0\n\t"					\
Packit 5c3484
	     "lcgr	%0, %0"						\
Packit 5c3484
	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
Packit 5c3484
	   : "1"  ((UDItype)(a1)), "r" ((UDItype)(b1)),			\
Packit 5c3484
	     "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
Packit 5c3484
#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
Packit 5c3484
  __asm__ (  "adds	%2, %5, %6\n\t"					\
Packit 5c3484
	     "adcs	%1, %3, %4\n\t"					\
Packit 5c3484
	     "movcc	%0, #0\n\t"					\
Packit 5c3484
	     "movcs	%0, #-1"					\
Packit 5c3484
	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
Packit 5c3484
	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
Packit 5c3484
#endif
Packit 5c3484
#endif /* defined (__GNUC__) */
Packit 5c3484
Packit 5c3484
#ifndef add_mssaaaa
Packit 5c3484
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __s0, __s1, __c0, __c1;					\
Packit 5c3484
    __s0 = (a0) + (b0);							\
Packit 5c3484
    __s1 = (a1) + (b1);							\
Packit 5c3484
    __c0 = __s0 < (a0);							\
Packit 5c3484
    __c1 = __s1 < (a1);							\
Packit 5c3484
    (s0) = __s0;							\
Packit 5c3484
    __s1 = __s1 + __c0;							\
Packit 5c3484
    (s1) = __s1;							\
Packit 5c3484
    (m) = - (__c1 + (__s1 < __c0));					\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if MOD_1_1P_METHOD == 1
Packit 5c3484
void
Packit 5c3484
mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t bi;
Packit 5c3484
  mp_limb_t B1modb, B2modb;
Packit 5c3484
  int cnt;
Packit 5c3484
Packit 5c3484
  count_leading_zeros (cnt, b);
Packit 5c3484
Packit 5c3484
  b <<= cnt;
Packit 5c3484
  invert_limb (bi, b);
Packit 5c3484
Packit 5c3484
  cps[0] = bi;
Packit 5c3484
  cps[1] = cnt;
Packit 5c3484
Packit 5c3484
  B1modb = -b;
Packit 5c3484
  if (LIKELY (cnt != 0))
Packit 5c3484
    B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
Packit 5c3484
  ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
Packit 5c3484
  cps[2] = B1modb >> cnt;
Packit 5c3484
Packit 5c3484
  /* In the normalized case, this can be simplified to
Packit 5c3484
   *
Packit 5c3484
   *   B2modb = - b * bi;
Packit 5c3484
   *   ASSERT (B2modb <= b);    // NB: equality iff b = B/2
Packit 5c3484
   */
Packit 5c3484
  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
Packit 5c3484
  cps[3] = B2modb >> cnt;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t bmodb[4])
Packit 5c3484
{
Packit 5c3484
  mp_limb_t rh, rl, bi, ph, pl, r;
Packit 5c3484
  mp_limb_t B1modb, B2modb;
Packit 5c3484
  mp_size_t i;
Packit 5c3484
  int cnt;
Packit 5c3484
  mp_limb_t mask;
Packit 5c3484
Packit 5c3484
  ASSERT (n >= 2);		/* fix tuneup.c if this is changed */
Packit 5c3484
Packit 5c3484
  B1modb = bmodb[2];
Packit 5c3484
  B2modb = bmodb[3];
Packit 5c3484
Packit 5c3484
  rl = ap[n - 1];
Packit 5c3484
  umul_ppmm (ph, pl, rl, B1modb);
Packit 5c3484
  add_ssaaaa (rh, rl, ph, pl, CNST_LIMB(0), ap[n - 2]);
Packit 5c3484
Packit 5c3484
  for (i = n - 3; i >= 0; i -= 1)
Packit 5c3484
    {
Packit 5c3484
      /* rr = ap[i]				< B
Packit 5c3484
	    + LO(rr)  * (B mod b)		<= (B-1)(b-1)
Packit 5c3484
	    + HI(rr)  * (B^2 mod b)		<= (B-1)(b-1)
Packit 5c3484
      */
Packit 5c3484
      umul_ppmm (ph, pl, rl, B1modb);
Packit 5c3484
      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i]);
Packit 5c3484
Packit 5c3484
      umul_ppmm (rh, rl, rh, B2modb);
Packit 5c3484
      add_ssaaaa (rh, rl, rh, rl, ph, pl);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  cnt = bmodb[1];
Packit 5c3484
  bi = bmodb[0];
Packit 5c3484
Packit 5c3484
  if (LIKELY (cnt != 0))
Packit 5c3484
    rh = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
Packit 5c3484
Packit 5c3484
  mask = -(mp_limb_t) (rh >= b);
Packit 5c3484
  rh -= mask & b;
Packit 5c3484
Packit 5c3484
  udiv_rnnd_preinv (r, rh, rl << cnt, b, bi);
Packit 5c3484
Packit 5c3484
  return r >> cnt;
Packit 5c3484
}
Packit 5c3484
#endif /* MOD_1_1P_METHOD == 1 */
Packit 5c3484
Packit 5c3484
#if MOD_1_1P_METHOD == 2
Packit 5c3484
void
Packit 5c3484
mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t bi;
Packit 5c3484
  mp_limb_t B2modb;
Packit 5c3484
  int cnt;
Packit 5c3484
Packit 5c3484
  count_leading_zeros (cnt, b);
Packit 5c3484
Packit 5c3484
  b <<= cnt;
Packit 5c3484
  invert_limb (bi, b);
Packit 5c3484
Packit 5c3484
  cps[0] = bi;
Packit 5c3484
  cps[1] = cnt;
Packit 5c3484
Packit 5c3484
  if (LIKELY (cnt != 0))
Packit 5c3484
    {
Packit 5c3484
      mp_limb_t B1modb = -b;
Packit 5c3484
      B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
Packit 5c3484
      ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
Packit 5c3484
      cps[2] = B1modb >> cnt;
Packit 5c3484
    }
Packit 5c3484
  B2modb = - b * bi;
Packit 5c3484
  ASSERT (B2modb <= b);    // NB: equality iff b = B/2
Packit 5c3484
  cps[3] = B2modb;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t bmodb[4])
Packit 5c3484
{
Packit 5c3484
  int cnt;
Packit 5c3484
  mp_limb_t bi, B1modb;
Packit 5c3484
  mp_limb_t r0, r1;
Packit 5c3484
  mp_limb_t r;
Packit 5c3484
Packit 5c3484
  ASSERT (n >= 2);		/* fix tuneup.c if this is changed */
Packit 5c3484
Packit 5c3484
  r0 = ap[n-2];
Packit 5c3484
  r1 = ap[n-1];
Packit 5c3484
Packit 5c3484
  if (n > 2)
Packit 5c3484
    {
Packit 5c3484
      mp_limb_t B2modb, B2mb;
Packit 5c3484
      mp_limb_t p0, p1;
Packit 5c3484
      mp_limb_t r2;
Packit 5c3484
      mp_size_t j;
Packit 5c3484
Packit 5c3484
      B2modb = bmodb[3];
Packit 5c3484
      B2mb = B2modb - b;
Packit 5c3484
Packit 5c3484
      umul_ppmm (p1, p0, r1, B2modb);
Packit 5c3484
      add_mssaaaa (r2, r1, r0, r0, ap[n-3], p1, p0);
Packit 5c3484
Packit 5c3484
      for (j = n-4; j >= 0; j--)
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t cy;
Packit 5c3484
	  /* mp_limb_t t = r0 + B2mb; */
Packit 5c3484
	  umul_ppmm (p1, p0, r1, B2modb);
Packit 5c3484
Packit 5c3484
	  ADDC_LIMB (cy, r0, r0, r2 & B2modb);
Packit 5c3484
	  /* Alternative, for cmov: if (cy) r0 = t; */
Packit 5c3484
	  r0 -= (-cy) & b;
Packit 5c3484
	  add_mssaaaa (r2, r1, r0, r0, ap[j], p1, p0);
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      r1 -= (r2 & b);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  cnt = bmodb[1];
Packit 5c3484
Packit 5c3484
  if (LIKELY (cnt != 0))
Packit 5c3484
    {
Packit 5c3484
      mp_limb_t t;
Packit 5c3484
      mp_limb_t B1modb = bmodb[2];
Packit 5c3484
Packit 5c3484
      umul_ppmm (r1, t, r1, B1modb);
Packit 5c3484
      r0 += t;
Packit 5c3484
      r1 += (r0 < t);
Packit 5c3484
Packit 5c3484
      /* Normalize */
Packit 5c3484
      r1 = (r1 << cnt) | (r0 >> (GMP_LIMB_BITS - cnt));
Packit 5c3484
      r0 <<= cnt;
Packit 5c3484
Packit 5c3484
      /* NOTE: Might get r1 == b here, but udiv_rnnd_preinv allows that. */
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      mp_limb_t mask = -(mp_limb_t) (r1 >= b);
Packit 5c3484
      r1 -= mask & b;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  bi = bmodb[0];
Packit 5c3484
Packit 5c3484
  udiv_rnnd_preinv (r, r1, r0, b, bi);
Packit 5c3484
  return r >> cnt;
Packit 5c3484
}
Packit 5c3484
#endif /* MOD_1_1P_METHOD == 2 */