|
Packit |
5c3484 |
/* mpn_div_qr_1n_pi1
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Contributed to the GNU project by Niels Möller
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS ONLY
|
|
Packit |
5c3484 |
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
|
|
Packit |
5c3484 |
GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Copyright 2013 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
it under the terms of either:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
option) any later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or both in parallel, as here.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
for more details.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
see https://www.gnu.org/licenses/. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "gmp.h"
|
|
Packit |
5c3484 |
#include "gmp-impl.h"
|
|
Packit |
5c3484 |
#include "longlong.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if GMP_NAIL_BITS > 0
|
|
Packit |
5c3484 |
#error Nail bits not supported
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#ifndef DIV_QR_1N_METHOD
|
|
Packit |
5c3484 |
#define DIV_QR_1N_METHOD 2
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* FIXME: Duplicated in mod_1_1.c. Move to gmp-impl.h */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if defined (__GNUC__) && ! defined (NO_ASM)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
|
|
Packit |
5c3484 |
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
|
|
Packit |
5c3484 |
__asm__ ( "add %6, %k2\n\t" \
|
|
Packit |
5c3484 |
"adc %4, %k1\n\t" \
|
|
Packit |
5c3484 |
"sbb %k0, %k0" \
|
|
Packit |
5c3484 |
: "=r" (m), "=r" (s1), "=&r" (s0) \
|
|
Packit |
5c3484 |
: "1" ((USItype)(a1)), "g" ((USItype)(b1)), \
|
|
Packit |
5c3484 |
"%2" ((USItype)(a0)), "g" ((USItype)(b0)))
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
|
|
Packit |
5c3484 |
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
|
|
Packit |
5c3484 |
__asm__ ( "add %6, %q2\n\t" \
|
|
Packit |
5c3484 |
"adc %4, %q1\n\t" \
|
|
Packit |
5c3484 |
"sbb %q0, %q0" \
|
|
Packit |
5c3484 |
: "=r" (m), "=r" (s1), "=&r" (s0) \
|
|
Packit |
5c3484 |
: "1" ((UDItype)(a1)), "rme" ((UDItype)(b1)), \
|
|
Packit |
5c3484 |
"%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if defined (__sparc__) && W_TYPE_SIZE == 32
|
|
Packit |
5c3484 |
#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
|
|
Packit |
5c3484 |
__asm__ ( "addcc %r5, %6, %2\n\t" \
|
|
Packit |
5c3484 |
"addxcc %r3, %4, %1\n\t" \
|
|
Packit |
5c3484 |
"subx %%g0, %%g0, %0" \
|
|
Packit |
5c3484 |
: "=r" (m), "=r" (sh), "=&r" (sl) \
|
|
Packit |
5c3484 |
: "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl) \
|
|
Packit |
5c3484 |
__CLOBBER_CC)
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if defined (__sparc__) && W_TYPE_SIZE == 64
|
|
Packit |
5c3484 |
#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
|
|
Packit |
5c3484 |
__asm__ ( "addcc %r5, %6, %2\n\t" \
|
|
Packit |
5c3484 |
"addccc %r7, %8, %%g0\n\t" \
|
|
Packit |
5c3484 |
"addccc %r3, %4, %1\n\t" \
|
|
Packit |
5c3484 |
"clr %0\n\t" \
|
|
Packit |
5c3484 |
"movcs %%xcc, -1, %0" \
|
|
Packit |
5c3484 |
: "=r" (m), "=r" (sh), "=&r" (sl) \
|
|
Packit |
5c3484 |
: "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl), \
|
|
Packit |
5c3484 |
"rJ" ((al) >> 32), "rI" ((bl) >> 32) \
|
|
Packit |
5c3484 |
__CLOBBER_CC)
|
|
Packit |
5c3484 |
#if __VIS__ >= 0x300
|
|
Packit |
5c3484 |
#undef add_mssaaaa
|
|
Packit |
5c3484 |
#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
|
|
Packit |
5c3484 |
__asm__ ( "addcc %r5, %6, %2\n\t" \
|
|
Packit |
5c3484 |
"addxccc %r3, %4, %1\n\t" \
|
|
Packit |
5c3484 |
"clr %0\n\t" \
|
|
Packit |
5c3484 |
"movcs %%xcc, -1, %0" \
|
|
Packit |
5c3484 |
: "=r" (m), "=r" (sh), "=&r" (sl) \
|
|
Packit |
5c3484 |
: "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl) \
|
|
Packit |
5c3484 |
__CLOBBER_CC)
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
|
|
Packit |
5c3484 |
/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
|
|
Packit |
5c3484 |
processor running in 32-bit mode, since the carry flag then gets the 32-bit
|
|
Packit |
5c3484 |
carry. */
|
|
Packit |
5c3484 |
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
|
|
Packit |
5c3484 |
__asm__ ( "add%I6c %2, %5, %6\n\t" \
|
|
Packit |
5c3484 |
"adde %1, %3, %4\n\t" \
|
|
Packit |
5c3484 |
"subfe %0, %0, %0\n\t" \
|
|
Packit |
5c3484 |
"nor %0, %0, %0" \
|
|
Packit |
5c3484 |
: "=r" (m), "=r" (s1), "=&r" (s0) \
|
|
Packit |
5c3484 |
: "r" (a1), "r" (b1), "%r" (a0), "rI" (b0))
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if defined (__s390x__) && W_TYPE_SIZE == 64
|
|
Packit |
5c3484 |
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
|
|
Packit |
5c3484 |
__asm__ ( "algr %2, %6\n\t" \
|
|
Packit |
5c3484 |
"alcgr %1, %4\n\t" \
|
|
Packit |
5c3484 |
"lghi %0, 0\n\t" \
|
|
Packit |
5c3484 |
"alcgr %0, %0\n\t" \
|
|
Packit |
5c3484 |
"lcgr %0, %0" \
|
|
Packit |
5c3484 |
: "=r" (m), "=r" (s1), "=&r" (s0) \
|
|
Packit |
5c3484 |
: "1" ((UDItype)(a1)), "r" ((UDItype)(b1)), \
|
|
Packit |
5c3484 |
"%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
|
|
Packit |
5c3484 |
#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
|
|
Packit |
5c3484 |
__asm__ ( "adds %2, %5, %6\n\t" \
|
|
Packit |
5c3484 |
"adcs %1, %3, %4\n\t" \
|
|
Packit |
5c3484 |
"movcc %0, #0\n\t" \
|
|
Packit |
5c3484 |
"movcs %0, #-1" \
|
|
Packit |
5c3484 |
: "=r" (m), "=r" (sh), "=&r" (sl) \
|
|
Packit |
5c3484 |
: "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#endif /* defined (__GNUC__) */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#ifndef add_mssaaaa
|
|
Packit |
5c3484 |
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
UWtype __s0, __s1, __c0, __c1; \
|
|
Packit |
5c3484 |
__s0 = (a0) + (b0); \
|
|
Packit |
5c3484 |
__s1 = (a1) + (b1); \
|
|
Packit |
5c3484 |
__c0 = __s0 < (a0); \
|
|
Packit |
5c3484 |
__c1 = __s1 < (a1); \
|
|
Packit |
5c3484 |
(s0) = __s0; \
|
|
Packit |
5c3484 |
__s1 = __s1 + __c0; \
|
|
Packit |
5c3484 |
(s1) = __s1; \
|
|
Packit |
5c3484 |
(m) = - (__c1 + (__s1 < __c0)); \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if DIV_QR_1N_METHOD == 1
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Divides (uh B^n + {up, n}) by d, storing the quotient at {qp, n}.
|
|
Packit |
5c3484 |
Requires that uh < d. */
|
|
Packit |
5c3484 |
mp_limb_t
|
|
Packit |
5c3484 |
mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t uh,
|
|
Packit |
5c3484 |
mp_limb_t d, mp_limb_t dinv)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
ASSERT (n > 0);
|
|
Packit |
5c3484 |
ASSERT (uh < d);
|
|
Packit |
5c3484 |
ASSERT (d & GMP_NUMB_HIGHBIT);
|
|
Packit |
5c3484 |
ASSERT (MPN_SAME_OR_SEPARATE_P (qp, up, n));
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
do
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t q, ul;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ul = up[--n];
|
|
Packit |
5c3484 |
udiv_qrnnd_preinv (q, uh, uh, ul, d, dinv);
|
|
Packit |
5c3484 |
qp[n] = q;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
while (n > 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
return uh;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#elif DIV_QR_1N_METHOD == 2
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_limb_t
|
|
Packit |
5c3484 |
mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t u1,
|
|
Packit |
5c3484 |
mp_limb_t d, mp_limb_t dinv)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t B2;
|
|
Packit |
5c3484 |
mp_limb_t u0, u2;
|
|
Packit |
5c3484 |
mp_limb_t q0, q1;
|
|
Packit |
5c3484 |
mp_limb_t p0, p1;
|
|
Packit |
5c3484 |
mp_limb_t t;
|
|
Packit |
5c3484 |
mp_size_t j;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (d & GMP_LIMB_HIGHBIT);
|
|
Packit |
5c3484 |
ASSERT (n > 0);
|
|
Packit |
5c3484 |
ASSERT (u1 < d);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (n == 1)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv);
|
|
Packit |
5c3484 |
return u1;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* FIXME: Could be precomputed */
|
|
Packit |
5c3484 |
B2 = -d*dinv;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
umul_ppmm (q1, q0, dinv, u1);
|
|
Packit |
5c3484 |
umul_ppmm (p1, p0, B2, u1);
|
|
Packit |
5c3484 |
q1 += u1;
|
|
Packit |
5c3484 |
ASSERT (q1 >= u1);
|
|
Packit |
5c3484 |
u0 = up[n-1]; /* Early read, to allow qp == up. */
|
|
Packit |
5c3484 |
qp[n-1] = q1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
add_mssaaaa (u2, u1, u0, u0, up[n-2], p1, p0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* FIXME: Keep q1 in a variable between iterations, to reduce number
|
|
Packit |
5c3484 |
of memory accesses. */
|
|
Packit |
5c3484 |
for (j = n-2; j-- > 0; )
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t q2, cy;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Additions for the q update:
|
|
Packit |
5c3484 |
* +-------+
|
|
Packit |
5c3484 |
* |u1 * v |
|
|
Packit |
5c3484 |
* +---+---+
|
|
Packit |
5c3484 |
* | u1|
|
|
Packit |
5c3484 |
* +---+---+
|
|
Packit |
5c3484 |
* | 1 | v | (conditional on u2)
|
|
Packit |
5c3484 |
* +---+---+
|
|
Packit |
5c3484 |
* | 1 | (conditional on u0 + u2 B2 carry)
|
|
Packit |
5c3484 |
* +---+
|
|
Packit |
5c3484 |
* + | q0|
|
|
Packit |
5c3484 |
* -+---+---+---+
|
|
Packit |
5c3484 |
* | q2| q1| q0|
|
|
Packit |
5c3484 |
* +---+---+---+
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
umul_ppmm (p1, t, u1, dinv);
|
|
Packit |
5c3484 |
add_ssaaaa (q2, q1, -u2, u2 & dinv, CNST_LIMB(0), u1);
|
|
Packit |
5c3484 |
add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), p1);
|
|
Packit |
5c3484 |
add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), q0);
|
|
Packit |
5c3484 |
q0 = t;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
umul_ppmm (p1, p0, u1, B2);
|
|
Packit |
5c3484 |
ADDC_LIMB (cy, u0, u0, u2 & B2);
|
|
Packit |
5c3484 |
u0 -= (-cy) & d;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Final q update */
|
|
Packit |
5c3484 |
add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), cy);
|
|
Packit |
5c3484 |
qp[j+1] = q1;
|
|
Packit |
5c3484 |
MPN_INCR_U (qp+j+2, n-j-2, q2);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
add_mssaaaa (u2, u1, u0, u0, up[j], p1, p0);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
q1 = (u2 > 0);
|
|
Packit |
5c3484 |
u1 -= (-q1) & d;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
t = (u1 >= d);
|
|
Packit |
5c3484 |
q1 += t;
|
|
Packit |
5c3484 |
u1 -= (-t) & d;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
|
|
Packit |
5c3484 |
add_ssaaaa (q1, q0, q1, q0, CNST_LIMB(0), t);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
MPN_INCR_U (qp+1, n-1, q1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
qp[0] = q0;
|
|
Packit |
5c3484 |
return u0;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
#error Unknown DIV_QR_1N_METHOD
|
|
Packit |
5c3484 |
#endif
|