Blame mpn/generic/div_qr_1n_pi1.c

Packit 5c3484
/* mpn_div_qr_1n_pi1
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Niels Möller
Packit 5c3484
Packit 5c3484
   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Packit 5c3484
Copyright 2013 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
#if GMP_NAIL_BITS > 0
Packit 5c3484
#error Nail bits not supported
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#ifndef DIV_QR_1N_METHOD
Packit 5c3484
#define DIV_QR_1N_METHOD 2
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* FIXME: Duplicated in mod_1_1.c. Move to gmp-impl.h */
Packit 5c3484
Packit 5c3484
#if defined (__GNUC__) && ! defined (NO_ASM)
Packit 5c3484
Packit 5c3484
#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
Packit 5c3484
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
Packit 5c3484
  __asm__ (  "add	%6, %k2\n\t"					\
Packit 5c3484
	     "adc	%4, %k1\n\t"					\
Packit 5c3484
	     "sbb	%k0, %k0"					\
Packit 5c3484
	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
Packit 5c3484
	   : "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
Packit 5c3484
	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
Packit 5c3484
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
Packit 5c3484
  __asm__ (  "add	%6, %q2\n\t"					\
Packit 5c3484
	     "adc	%4, %q1\n\t"					\
Packit 5c3484
	     "sbb	%q0, %q0"					\
Packit 5c3484
	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
Packit 5c3484
	   : "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
Packit 5c3484
	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if defined (__sparc__) && W_TYPE_SIZE == 32
Packit 5c3484
#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
Packit 5c3484
  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
Packit 5c3484
	     "addxcc	%r3, %4, %1\n\t"				\
Packit 5c3484
	     "subx	%%g0, %%g0, %0"					\
Packit 5c3484
	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
Packit 5c3484
	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
Packit 5c3484
	 __CLOBBER_CC)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if defined (__sparc__) && W_TYPE_SIZE == 64
Packit 5c3484
#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
Packit 5c3484
  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
Packit 5c3484
	     "addccc	%r7, %8, %%g0\n\t"				\
Packit 5c3484
	     "addccc	%r3, %4, %1\n\t"				\
Packit 5c3484
	     "clr	%0\n\t"						\
Packit 5c3484
	     "movcs	%%xcc, -1, %0"					\
Packit 5c3484
	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
Packit 5c3484
	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl),		\
Packit 5c3484
	     "rJ" ((al) >> 32), "rI" ((bl) >> 32)			\
Packit 5c3484
	 __CLOBBER_CC)
Packit 5c3484
#if __VIS__ >= 0x300
Packit 5c3484
#undef add_mssaaaa
Packit 5c3484
#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
Packit 5c3484
  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
Packit 5c3484
	     "addxccc	%r3, %4, %1\n\t"				\
Packit 5c3484
	     "clr	%0\n\t"						\
Packit 5c3484
	     "movcs	%%xcc, -1, %0"					\
Packit 5c3484
	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
Packit 5c3484
	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
Packit 5c3484
	 __CLOBBER_CC)
Packit 5c3484
#endif
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
Packit 5c3484
/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
Packit 5c3484
   processor running in 32-bit mode, since the carry flag then gets the 32-bit
Packit 5c3484
   carry.  */
Packit 5c3484
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
Packit 5c3484
  __asm__ (  "add%I6c	%2, %5, %6\n\t"					\
Packit 5c3484
	     "adde	%1, %3, %4\n\t"					\
Packit 5c3484
	     "subfe	%0, %0, %0\n\t"					\
Packit 5c3484
	     "nor	%0, %0, %0"					\
Packit 5c3484
	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
Packit 5c3484
	   : "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0))
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if defined (__s390x__) && W_TYPE_SIZE == 64
Packit 5c3484
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
Packit 5c3484
  __asm__ (  "algr	%2, %6\n\t"					\
Packit 5c3484
	     "alcgr	%1, %4\n\t"					\
Packit 5c3484
	     "lghi	%0, 0\n\t"					\
Packit 5c3484
	     "alcgr	%0, %0\n\t"					\
Packit 5c3484
	     "lcgr	%0, %0"						\
Packit 5c3484
	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
Packit 5c3484
	   : "1"  ((UDItype)(a1)), "r" ((UDItype)(b1)),			\
Packit 5c3484
	     "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
Packit 5c3484
#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
Packit 5c3484
  __asm__ (  "adds	%2, %5, %6\n\t"					\
Packit 5c3484
	     "adcs	%1, %3, %4\n\t"					\
Packit 5c3484
	     "movcc	%0, #0\n\t"					\
Packit 5c3484
	     "movcs	%0, #-1"					\
Packit 5c3484
	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
Packit 5c3484
	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
Packit 5c3484
#endif
Packit 5c3484
#endif /* defined (__GNUC__) */
Packit 5c3484
Packit 5c3484
#ifndef add_mssaaaa
Packit 5c3484
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __s0, __s1, __c0, __c1;					\
Packit 5c3484
    __s0 = (a0) + (b0);							\
Packit 5c3484
    __s1 = (a1) + (b1);							\
Packit 5c3484
    __c0 = __s0 < (a0);							\
Packit 5c3484
    __c1 = __s1 < (a1);							\
Packit 5c3484
    (s0) = __s0;							\
Packit 5c3484
    __s1 = __s1 + __c0;							\
Packit 5c3484
    (s1) = __s1;							\
Packit 5c3484
    (m) = - (__c1 + (__s1 < __c0));					\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if DIV_QR_1N_METHOD == 1
Packit 5c3484
Packit 5c3484
/* Divides (uh B^n + {up, n}) by d, storing the quotient at {qp, n}.
Packit 5c3484
   Requires that uh < d. */
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t uh,
Packit 5c3484
		   mp_limb_t d, mp_limb_t dinv)
Packit 5c3484
{
Packit 5c3484
  ASSERT (n > 0);
Packit 5c3484
  ASSERT (uh < d);
Packit 5c3484
  ASSERT (d & GMP_NUMB_HIGHBIT);
Packit 5c3484
  ASSERT (MPN_SAME_OR_SEPARATE_P (qp, up, n));
Packit 5c3484
Packit 5c3484
  do
Packit 5c3484
    {
Packit 5c3484
      mp_limb_t q, ul;
Packit 5c3484
Packit 5c3484
      ul = up[--n];
Packit 5c3484
      udiv_qrnnd_preinv (q, uh, uh, ul, d, dinv);
Packit 5c3484
      qp[n] = q;
Packit 5c3484
    }
Packit 5c3484
  while (n > 0);
Packit 5c3484
Packit 5c3484
  return uh;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#elif DIV_QR_1N_METHOD == 2
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t u1,
Packit 5c3484
		   mp_limb_t d, mp_limb_t dinv)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t B2;
Packit 5c3484
  mp_limb_t u0, u2;
Packit 5c3484
  mp_limb_t q0, q1;
Packit 5c3484
  mp_limb_t p0, p1;
Packit 5c3484
  mp_limb_t t;
Packit 5c3484
  mp_size_t j;
Packit 5c3484
Packit 5c3484
  ASSERT (d & GMP_LIMB_HIGHBIT);
Packit 5c3484
  ASSERT (n > 0);
Packit 5c3484
  ASSERT (u1 < d);
Packit 5c3484
Packit 5c3484
  if (n == 1)
Packit 5c3484
    {
Packit 5c3484
      udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv);
Packit 5c3484
      return u1;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* FIXME: Could be precomputed */
Packit 5c3484
  B2 = -d*dinv;
Packit 5c3484
Packit 5c3484
  umul_ppmm (q1, q0, dinv, u1);
Packit 5c3484
  umul_ppmm (p1, p0, B2, u1);
Packit 5c3484
  q1 += u1;
Packit 5c3484
  ASSERT (q1 >= u1);
Packit 5c3484
  u0 = up[n-1];	/* Early read, to allow qp == up. */
Packit 5c3484
  qp[n-1] = q1;
Packit 5c3484
Packit 5c3484
  add_mssaaaa (u2, u1, u0, u0, up[n-2], p1, p0);
Packit 5c3484
Packit 5c3484
  /* FIXME: Keep q1 in a variable between iterations, to reduce number
Packit 5c3484
     of memory accesses. */
Packit 5c3484
  for (j = n-2; j-- > 0; )
Packit 5c3484
    {
Packit 5c3484
      mp_limb_t q2, cy;
Packit 5c3484
Packit 5c3484
      /* Additions for the q update:
Packit 5c3484
       *	+-------+
Packit 5c3484
       *        |u1 * v |
Packit 5c3484
       *        +---+---+
Packit 5c3484
       *        | u1|
Packit 5c3484
       *    +---+---+
Packit 5c3484
       *    | 1 | v |  (conditional on u2)
Packit 5c3484
       *    +---+---+
Packit 5c3484
       *        | 1 |  (conditional on u0 + u2 B2 carry)
Packit 5c3484
       *        +---+
Packit 5c3484
       * +      | q0|
Packit 5c3484
       *   -+---+---+---+
Packit 5c3484
       *    | q2| q1| q0|
Packit 5c3484
       *    +---+---+---+
Packit 5c3484
      */
Packit 5c3484
      umul_ppmm (p1, t, u1, dinv);
Packit 5c3484
      add_ssaaaa (q2, q1, -u2, u2 & dinv, CNST_LIMB(0), u1);
Packit 5c3484
      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), p1);
Packit 5c3484
      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), q0);
Packit 5c3484
      q0 = t;
Packit 5c3484
Packit 5c3484
      umul_ppmm (p1, p0, u1, B2);
Packit 5c3484
      ADDC_LIMB (cy, u0, u0, u2 & B2);
Packit 5c3484
      u0 -= (-cy) & d;
Packit 5c3484
Packit 5c3484
      /* Final q update */
Packit 5c3484
      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), cy);
Packit 5c3484
      qp[j+1] = q1;
Packit 5c3484
      MPN_INCR_U (qp+j+2, n-j-2, q2);
Packit 5c3484
Packit 5c3484
      add_mssaaaa (u2, u1, u0, u0, up[j], p1, p0);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  q1 = (u2 > 0);
Packit 5c3484
  u1 -= (-q1) & d;
Packit 5c3484
Packit 5c3484
  t = (u1 >= d);
Packit 5c3484
  q1 += t;
Packit 5c3484
  u1 -= (-t) & d;
Packit 5c3484
Packit 5c3484
  udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
Packit 5c3484
  add_ssaaaa (q1, q0, q1, q0, CNST_LIMB(0), t);
Packit 5c3484
Packit 5c3484
  MPN_INCR_U (qp+1, n-1, q1);
Packit 5c3484
Packit 5c3484
  qp[0] = q0;
Packit 5c3484
  return u0;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#else
Packit 5c3484
#error Unknown DIV_QR_1N_METHOD
Packit 5c3484
#endif