|
Packit |
5c3484 |
/* mpn_div_qr_1u_pi2.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
THIS FILE CONTAINS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS
|
|
Packit |
5c3484 |
ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
|
|
Packit |
5c3484 |
GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Copyright 2013 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
it under the terms of either:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
option) any later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or both in parallel, as here.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
for more details.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
see https://www.gnu.org/licenses/. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* ISSUES:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* Can we really use the high pi2 inverse limb for udiv_qrnnd_preinv?
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* Are there any problems with generating n quotient limbs in the q area? It
|
|
Packit |
5c3484 |
surely simplifies things.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* Not yet adequately tested.
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "gmp.h"
|
|
Packit |
5c3484 |
#include "gmp-impl.h"
|
|
Packit |
5c3484 |
#include "longlong.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Define some longlong.h-style macros, but for wider operations.
|
|
Packit |
5c3484 |
* add_sssaaaa is like longlong.h's add_ssaaaa but propagating
|
|
Packit |
5c3484 |
carry-out into an additional sum operand.
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) && ! defined (NO_ASM)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
|
|
Packit |
5c3484 |
#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
|
|
Packit |
5c3484 |
__asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0" \
|
|
Packit |
5c3484 |
: "=r" (s2), "=&r" (s1), "=&r" (s0) \
|
|
Packit |
5c3484 |
: "0" ((USItype)(s2)), \
|
|
Packit |
5c3484 |
"1" ((USItype)(a1)), "g" ((USItype)(b1)), \
|
|
Packit |
5c3484 |
"%2" ((USItype)(a0)), "g" ((USItype)(b0)))
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if defined (__amd64__) && W_TYPE_SIZE == 64
|
|
Packit |
5c3484 |
#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
|
|
Packit |
5c3484 |
__asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0" \
|
|
Packit |
5c3484 |
: "=r" (s2), "=&r" (s1), "=&r" (s0) \
|
|
Packit |
5c3484 |
: "0" ((UDItype)(s2)), \
|
|
Packit |
5c3484 |
"1" ((UDItype)(a1)), "rme" ((UDItype)(b1)), \
|
|
Packit |
5c3484 |
"%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
|
|
Packit |
5c3484 |
/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
|
|
Packit |
5c3484 |
processor running in 32-bit mode, since the carry flag then gets the 32-bit
|
|
Packit |
5c3484 |
carry. */
|
|
Packit |
5c3484 |
#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
|
|
Packit |
5c3484 |
__asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0" \
|
|
Packit |
5c3484 |
: "=r" (s2), "=&r" (s1), "=&r" (s0) \
|
|
Packit |
5c3484 |
: "r" (s2), "r" (a1), "r" (b1), "%r" (a0), "rI" (b0))
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#endif /* __GNUC__ */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#ifndef add_sssaaaa
|
|
Packit |
5c3484 |
#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
UWtype __s0, __s1, __c0, __c1; \
|
|
Packit |
5c3484 |
__s0 = (a0) + (b0); \
|
|
Packit |
5c3484 |
__s1 = (a1) + (b1); \
|
|
Packit |
5c3484 |
__c0 = __s0 < (a0); \
|
|
Packit |
5c3484 |
__c1 = __s1 < (a1); \
|
|
Packit |
5c3484 |
(s0) = __s0; \
|
|
Packit |
5c3484 |
__s1 = __s1 + __c0; \
|
|
Packit |
5c3484 |
(s1) = __s1; \
|
|
Packit |
5c3484 |
(s2) += __c1 + (__s1 < __c0); \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
struct precomp_div_1_pi2
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t dip[2];
|
|
Packit |
5c3484 |
mp_limb_t d;
|
|
Packit |
5c3484 |
int norm_cnt;
|
|
Packit |
5c3484 |
};
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_limb_t
|
|
Packit |
5c3484 |
mpn_div_qr_1n_pi2 (mp_ptr qp,
|
|
Packit |
5c3484 |
mp_srcptr up, mp_size_t un,
|
|
Packit |
5c3484 |
struct precomp_div_1_pi2 *pd)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t most_significant_q_limb;
|
|
Packit |
5c3484 |
mp_size_t i;
|
|
Packit |
5c3484 |
mp_limb_t r, u2, u1, u0;
|
|
Packit |
5c3484 |
mp_limb_t d0, di1, di0;
|
|
Packit |
5c3484 |
mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d;
|
|
Packit |
5c3484 |
mp_limb_t cnd;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (un >= 2);
|
|
Packit |
5c3484 |
ASSERT ((pd->d & GMP_NUMB_HIGHBIT) != 0);
|
|
Packit |
5c3484 |
ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up);
|
|
Packit |
5c3484 |
ASSERT_MPN (up, un);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define q3 q3a
|
|
Packit |
5c3484 |
#define q2 q2b
|
|
Packit |
5c3484 |
#define q1 q1b
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
up += un - 3;
|
|
Packit |
5c3484 |
r = up[2];
|
|
Packit |
5c3484 |
d0 = pd->d;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
most_significant_q_limb = (r >= d0);
|
|
Packit |
5c3484 |
r -= d0 & -most_significant_q_limb;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
qp += un - 3;
|
|
Packit |
5c3484 |
qp[2] = most_significant_q_limb;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
di1 = pd->dip[1];
|
|
Packit |
5c3484 |
di0 = pd->dip[0];
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
for (i = un - 3; i >= 0; i -= 2)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
u2 = r;
|
|
Packit |
5c3484 |
u1 = up[1];
|
|
Packit |
5c3484 |
u0 = up[0];
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Dividend in {r,u1,u0} */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
umul_ppmm (q1d,q0d, u1, di0);
|
|
Packit |
5c3484 |
umul_ppmm (q2b,q1b, u1, di1);
|
|
Packit |
5c3484 |
q2b++; /* cannot spill */
|
|
Packit |
5c3484 |
add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
umul_ppmm (q2c,q1c, u2, di0);
|
|
Packit |
5c3484 |
add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
|
|
Packit |
5c3484 |
umul_ppmm (q3a,q2a, u2, di1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
q3 += r;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
r = u0 - q2 * d0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
cnd = (r >= q1);
|
|
Packit |
5c3484 |
r += d0 & -cnd;
|
|
Packit |
5c3484 |
sub_ddmmss (q3,q2, q3,q2, 0,cnd);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (UNLIKELY (r >= d0))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
r -= d0;
|
|
Packit |
5c3484 |
add_ssaaaa (q3,q2, q3,q2, 0,1);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
qp[0] = q2;
|
|
Packit |
5c3484 |
qp[1] = q3;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
up -= 2;
|
|
Packit |
5c3484 |
qp -= 2;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if ((un & 1) == 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
u2 = r;
|
|
Packit |
5c3484 |
u1 = up[1];
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1);
|
|
Packit |
5c3484 |
qp[1] = q3;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
return r;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#undef q3
|
|
Packit |
5c3484 |
#undef q2
|
|
Packit |
5c3484 |
#undef q1
|
|
Packit |
5c3484 |
}
|