|
Packit |
5c3484 |
/* mpn_divrem_1 -- mpn by limb division.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003 Free Software
|
|
Packit |
5c3484 |
Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
it under the terms of either:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
option) any later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or both in parallel, as here.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
for more details.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
see https://www.gnu.org/licenses/. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "gmp.h"
|
|
Packit |
5c3484 |
#include "gmp-impl.h"
|
|
Packit |
5c3484 |
#include "longlong.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
|
|
Packit |
5c3484 |
meaning the quotient size where that should happen, the quotient size
|
|
Packit |
5c3484 |
being how many udiv divisions will be done.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The default is to use preinv always, CPUs where this doesn't suit have
|
|
Packit |
5c3484 |
tuned thresholds. Note in particular that preinv should certainly be
|
|
Packit |
5c3484 |
used if that's the only division available (USE_PREINV_ALWAYS). */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#ifndef DIVREM_1_NORM_THRESHOLD
|
|
Packit |
5c3484 |
#define DIVREM_1_NORM_THRESHOLD 0
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#ifndef DIVREM_1_UNNORM_THRESHOLD
|
|
Packit |
5c3484 |
#define DIVREM_1_UNNORM_THRESHOLD 0
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
|
|
Packit |
5c3484 |
and UNNORM thresholds are 0 and only the inversion code is included.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
|
|
Packit |
5c3484 |
will be MP_SIZE_T_MAX and only the plain division code is included.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Otherwise mul-by-inverse is better than plain division above some
|
|
Packit |
5c3484 |
threshold, and best results are obtained by having code for both present.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The main reason for separating the norm and unnorm cases is that not all
|
|
Packit |
5c3484 |
CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm
|
|
Packit |
5c3484 |
code used on an already normalized divisor.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
|
|
Packit |
5c3484 |
non-shifting code for both the norm and unnorm cases, though with
|
|
Packit |
5c3484 |
different criteria for skipping a division, and with different thresholds
|
|
Packit |
5c3484 |
of course. And in fact if inversion is never viable, then that simple
|
|
Packit |
5c3484 |
non-shifting division would be all that's left.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The NORM and UNNORM thresholds might not differ much, but if there's
|
|
Packit |
5c3484 |
going to be separate code for norm and unnorm then it makes sense to have
|
|
Packit |
5c3484 |
separate thresholds. One thing that's possible is that the
|
|
Packit |
5c3484 |
mul-by-inverse might be better only for normalized divisors, due to that
|
|
Packit |
5c3484 |
case not needing variable bit shifts.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Notice that the thresholds are tested after the decision to possibly skip
|
|
Packit |
5c3484 |
one divide step, so they're based on the actual number of divisions done.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
For the unnorm case, it would be possible to call mpn_lshift to adjust
|
|
Packit |
5c3484 |
the dividend all in one go (into the quotient space say), rather than
|
|
Packit |
5c3484 |
limb-by-limb in the loop. This might help if mpn_lshift is a lot faster
|
|
Packit |
5c3484 |
than what the compiler can generate for EXTRACT. But this is left to CPU
|
|
Packit |
5c3484 |
specific implementations to consider, especially since EXTRACT isn't on
|
|
Packit |
5c3484 |
the dependent chain. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_limb_t
|
|
Packit |
5c3484 |
mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
|
|
Packit |
5c3484 |
mp_srcptr up, mp_size_t un, mp_limb_t d)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t n;
|
|
Packit |
5c3484 |
mp_size_t i;
|
|
Packit |
5c3484 |
mp_limb_t n1, n0;
|
|
Packit |
5c3484 |
mp_limb_t r = 0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (qxn >= 0);
|
|
Packit |
5c3484 |
ASSERT (un >= 0);
|
|
Packit |
5c3484 |
ASSERT (d != 0);
|
|
Packit |
5c3484 |
/* FIXME: What's the correct overlap rule when qxn!=0? */
|
|
Packit |
5c3484 |
ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
n = un + qxn;
|
|
Packit |
5c3484 |
if (n == 0)
|
|
Packit |
5c3484 |
return 0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
d <<= GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
qp += (n - 1); /* Make qp point at most significant quotient limb */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if ((d & GMP_LIMB_HIGHBIT) != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
if (un != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* High quotient limb is 0 or 1, skip a divide step. */
|
|
Packit |
5c3484 |
mp_limb_t q;
|
|
Packit |
5c3484 |
r = up[un - 1] << GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
q = (r >= d);
|
|
Packit |
5c3484 |
*qp-- = q;
|
|
Packit |
5c3484 |
r -= (d & -q);
|
|
Packit |
5c3484 |
r >>= GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
n--;
|
|
Packit |
5c3484 |
un--;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
plain:
|
|
Packit |
5c3484 |
for (i = un - 1; i >= 0; i--)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
n0 = up[i] << GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
udiv_qrnnd (*qp, r, r, n0, d);
|
|
Packit |
5c3484 |
r >>= GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
qp--;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
for (i = qxn - 1; i >= 0; i--)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
|
|
Packit |
5c3484 |
r >>= GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
qp--;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
return r;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* Multiply-by-inverse, divisor already normalized. */
|
|
Packit |
5c3484 |
mp_limb_t dinv;
|
|
Packit |
5c3484 |
invert_limb (dinv, d);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
for (i = un - 1; i >= 0; i--)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
n0 = up[i] << GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
|
|
Packit |
5c3484 |
r >>= GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
qp--;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
for (i = qxn - 1; i >= 0; i--)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
|
|
Packit |
5c3484 |
r >>= GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
qp--;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
return r;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* Most significant bit of divisor == 0. */
|
|
Packit |
5c3484 |
int cnt;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Skip a division if high < divisor (high quotient 0). Testing here
|
|
Packit |
5c3484 |
before normalizing will still skip as often as possible. */
|
|
Packit |
5c3484 |
if (un != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
n1 = up[un - 1] << GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
if (n1 < d)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
r = n1 >> GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
*qp-- = 0;
|
|
Packit |
5c3484 |
n--;
|
|
Packit |
5c3484 |
if (n == 0)
|
|
Packit |
5c3484 |
return r;
|
|
Packit |
5c3484 |
un--;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (! UDIV_NEEDS_NORMALIZATION
|
|
Packit |
5c3484 |
&& BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
|
|
Packit |
5c3484 |
goto plain;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
count_leading_zeros (cnt, d);
|
|
Packit |
5c3484 |
d <<= cnt;
|
|
Packit |
5c3484 |
r <<= cnt;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (UDIV_NEEDS_NORMALIZATION
|
|
Packit |
5c3484 |
&& BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t nshift;
|
|
Packit |
5c3484 |
if (un != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
n1 = up[un - 1] << GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
r |= (n1 >> (GMP_LIMB_BITS - cnt));
|
|
Packit |
5c3484 |
for (i = un - 2; i >= 0; i--)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
n0 = up[i] << GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
|
|
Packit |
5c3484 |
udiv_qrnnd (*qp, r, r, nshift, d);
|
|
Packit |
5c3484 |
r >>= GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
qp--;
|
|
Packit |
5c3484 |
n1 = n0;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
udiv_qrnnd (*qp, r, r, n1 << cnt, d);
|
|
Packit |
5c3484 |
r >>= GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
qp--;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
for (i = qxn - 1; i >= 0; i--)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
|
|
Packit |
5c3484 |
r >>= GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
qp--;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
return r >> cnt;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t dinv, nshift;
|
|
Packit |
5c3484 |
invert_limb (dinv, d);
|
|
Packit |
5c3484 |
if (un != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
n1 = up[un - 1] << GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
r |= (n1 >> (GMP_LIMB_BITS - cnt));
|
|
Packit |
5c3484 |
for (i = un - 2; i >= 0; i--)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
n0 = up[i] << GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
|
|
Packit |
5c3484 |
udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
|
|
Packit |
5c3484 |
r >>= GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
qp--;
|
|
Packit |
5c3484 |
n1 = n0;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
|
|
Packit |
5c3484 |
r >>= GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
qp--;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
for (i = qxn - 1; i >= 0; i--)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
|
|
Packit |
5c3484 |
r >>= GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
qp--;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
return r >> cnt;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|