|
Packit |
5c3484 |
/* UltraSPARC 64 mpn_divexact_1 -- mpn by limb exact division.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
|
|
Packit |
5c3484 |
CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
|
|
Packit |
5c3484 |
FUTURE GNU MP RELEASES.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Copyright 2000, 2001, 2003 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
it under the terms of either:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
option) any later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or both in parallel, as here.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
for more details.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
see https://www.gnu.org/licenses/. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "gmp.h"
|
|
Packit |
5c3484 |
#include "gmp-impl.h"
|
|
Packit |
5c3484 |
#include "longlong.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "mpn/sparc64/sparc64.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* 64-bit divisor 32-bit divisor
|
|
Packit |
5c3484 |
cycles/limb cycles/limb
|
|
Packit |
5c3484 |
(approx) (approx)
|
|
Packit |
5c3484 |
Ultrasparc 2i: 110 70
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* There are two key ideas here to reduce mulx's. Firstly when the divisor
|
|
Packit |
5c3484 |
is 32-bits the high of q*d can be calculated without the two 32x32->64
|
|
Packit |
5c3484 |
cross-products involving the high 32-bits of the divisor, that being zero
|
|
Packit |
5c3484 |
of course. Secondly umul_ppmm_lowequal and umul_ppmm_half_lowequal save
|
|
Packit |
5c3484 |
one mulx (each) knowing the low of q*d is equal to the input limb l.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
For size==1, a simple udivx is used. This is faster than calculating an
|
|
Packit |
5c3484 |
inverse.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
For a 32-bit divisor and small sizes, an attempt was made at a simple
|
|
Packit |
5c3484 |
udivx loop (two per 64-bit limb), but it turned out to be slower than
|
|
Packit |
5c3484 |
mul-by-inverse. At size==2 the inverse is about 260 cycles total
|
|
Packit |
5c3484 |
compared to a udivx at 291. Perhaps the latter would suit when size==2
|
|
Packit |
5c3484 |
but the high 32-bits of the second limb is zero (saving one udivx), but
|
|
Packit |
5c3484 |
it doesn't seem worth a special case just for that. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t inverse, s, s_next, c, l, ls, q;
|
|
Packit |
5c3484 |
unsigned rshift, lshift;
|
|
Packit |
5c3484 |
mp_limb_t lshift_mask;
|
|
Packit |
5c3484 |
mp_limb_t divisor_h;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (size >= 1);
|
|
Packit |
5c3484 |
ASSERT (divisor != 0);
|
|
Packit |
5c3484 |
ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));
|
|
Packit |
5c3484 |
ASSERT_MPN (src, size);
|
|
Packit |
5c3484 |
ASSERT_LIMB (divisor);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
s = *src++; /* src low limb */
|
|
Packit |
5c3484 |
size--;
|
|
Packit |
5c3484 |
if (size == 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
*dst = s / divisor;
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if ((divisor & 1) == 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
count_trailing_zeros (rshift, divisor);
|
|
Packit |
5c3484 |
divisor >>= rshift;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
rshift = 0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
binvert_limb (inverse, divisor);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
lshift = 64 - rshift;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* lshift==64 means no shift, so must mask out other part in this case */
|
|
Packit |
5c3484 |
lshift_mask = (rshift == 0 ? 0 : MP_LIMB_T_MAX);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
c = 0;
|
|
Packit |
5c3484 |
divisor_h = HIGH32 (divisor);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (divisor_h == 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* 32-bit divisor */
|
|
Packit |
5c3484 |
do
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
s_next = *src++;
|
|
Packit |
5c3484 |
ls = (s >> rshift) | ((s_next << lshift) & lshift_mask);
|
|
Packit |
5c3484 |
s = s_next;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
SUBC_LIMB (c, l, ls, c);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
q = l * inverse;
|
|
Packit |
5c3484 |
*dst++ = q;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
umul_ppmm_half_lowequal (l, q, divisor, l);
|
|
Packit |
5c3484 |
c += l;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
size--;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
while (size != 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ls = s >> rshift;
|
|
Packit |
5c3484 |
l = ls - c;
|
|
Packit |
5c3484 |
q = l * inverse;
|
|
Packit |
5c3484 |
*dst = q;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* 64-bit divisor */
|
|
Packit |
5c3484 |
mp_limb_t divisor_l = LOW32 (divisor);
|
|
Packit |
5c3484 |
do
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
s_next = *src++;
|
|
Packit |
5c3484 |
ls = (s >> rshift) | ((s_next << lshift) & lshift_mask);
|
|
Packit |
5c3484 |
s = s_next;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
SUBC_LIMB (c, l, ls, c);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
q = l * inverse;
|
|
Packit |
5c3484 |
*dst++ = q;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
umul_ppmm_lowequal (l, q, divisor, divisor_h, divisor_l, l);
|
|
Packit |
5c3484 |
c += l;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
size--;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
while (size != 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ls = s >> rshift;
|
|
Packit |
5c3484 |
l = ls - c;
|
|
Packit |
5c3484 |
q = l * inverse;
|
|
Packit |
5c3484 |
*dst = q;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|