|
Packit |
5c3484 |
/* mpn_divexact(qp,np,nn,dp,dn,tp) -- Divide N = {np,nn} by D = {dp,dn} storing
|
|
Packit |
5c3484 |
the result in Q = {qp,nn-dn+1} expecting no remainder. Overlap allowed
|
|
Packit |
5c3484 |
between Q and N; all other overlap disallowed.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Contributed to the GNU project by Torbjorn Granlund.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
|
|
Packit |
5c3484 |
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
|
|
Packit |
5c3484 |
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
it under the terms of either:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
option) any later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or both in parallel, as here.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
for more details.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
see https://www.gnu.org/licenses/. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "gmp.h"
|
|
Packit |
5c3484 |
#include "gmp-impl.h"
|
|
Packit |
5c3484 |
#include "longlong.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if 1
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
mpn_divexact (mp_ptr qp,
|
|
Packit |
5c3484 |
mp_srcptr np, mp_size_t nn,
|
|
Packit |
5c3484 |
mp_srcptr dp, mp_size_t dn)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
unsigned shift;
|
|
Packit |
5c3484 |
mp_size_t qn;
|
|
Packit |
5c3484 |
mp_ptr tp;
|
|
Packit |
5c3484 |
TMP_DECL;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (dn > 0);
|
|
Packit |
5c3484 |
ASSERT (nn >= dn);
|
|
Packit |
5c3484 |
ASSERT (dp[dn-1] > 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
while (dp[0] == 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
ASSERT (np[0] == 0);
|
|
Packit |
5c3484 |
dp++;
|
|
Packit |
5c3484 |
np++;
|
|
Packit |
5c3484 |
dn--;
|
|
Packit |
5c3484 |
nn--;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (dn == 1)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
MPN_DIVREM_OR_DIVEXACT_1 (qp, np, nn, dp[0]);
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_MARK;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
qn = nn + 1 - dn;
|
|
Packit |
5c3484 |
count_trailing_zeros (shift, dp[0]);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (shift > 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_ptr wp;
|
|
Packit |
5c3484 |
mp_size_t ss;
|
|
Packit |
5c3484 |
ss = (dn > qn) ? qn + 1 : dn;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tp = TMP_ALLOC_LIMBS (ss);
|
|
Packit |
5c3484 |
mpn_rshift (tp, dp, ss, shift);
|
|
Packit |
5c3484 |
dp = tp;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Since we have excluded dn == 1, we have nn > qn, and we need
|
|
Packit |
5c3484 |
to shift one limb beyond qn. */
|
|
Packit |
5c3484 |
wp = TMP_ALLOC_LIMBS (qn + 1);
|
|
Packit |
5c3484 |
mpn_rshift (wp, np, qn + 1, shift);
|
|
Packit |
5c3484 |
np = wp;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (dn > qn)
|
|
Packit |
5c3484 |
dn = qn;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tp = TMP_ALLOC_LIMBS (mpn_bdiv_q_itch (qn, dn));
|
|
Packit |
5c3484 |
mpn_bdiv_q (qp, np, qn, dp, dn, tp);
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* We use the Jebelean's bidirectional exact division algorithm. This is
|
|
Packit |
5c3484 |
somewhat naively implemented, with equal quotient parts done by 2-adic
|
|
Packit |
5c3484 |
division and truncating division. Since 2-adic division is faster, it
|
|
Packit |
5c3484 |
should be used for a larger chunk.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This code is horrendously ugly, in all sorts of ways.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* It was hacked without much care or thought, but with a testing program.
|
|
Packit |
5c3484 |
* It handles scratch space frivolously, and furthermore the itch function
|
|
Packit |
5c3484 |
is broken.
|
|
Packit |
5c3484 |
* Doesn't provide any measures to deal with mu_divappr_q's +3 error. We
|
|
Packit |
5c3484 |
have yet to provoke an error due to this, though.
|
|
Packit |
5c3484 |
* Algorithm selection leaves a lot to be desired. In particular, the choice
|
|
Packit |
5c3484 |
between DC and MU isn't a point, but we treat it like one.
|
|
Packit |
5c3484 |
* It makes the msb part 1 or 2 limbs larger than the lsb part, in spite of
|
|
Packit |
5c3484 |
that the latter is faster. We should at least reverse this, but perhaps
|
|
Packit |
5c3484 |
we should make the lsb part considerably larger. (How do we tune this?)
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_size_t
|
|
Packit |
5c3484 |
mpn_divexact_itch (mp_size_t nn, mp_size_t dn)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
return nn + dn; /* FIXME this is not right */
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
mpn_divexact (mp_ptr qp,
|
|
Packit |
5c3484 |
mp_srcptr np, mp_size_t nn,
|
|
Packit |
5c3484 |
mp_srcptr dp, mp_size_t dn,
|
|
Packit |
5c3484 |
mp_ptr scratch)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t qn;
|
|
Packit |
5c3484 |
mp_size_t nn0, qn0;
|
|
Packit |
5c3484 |
mp_size_t nn1, qn1;
|
|
Packit |
5c3484 |
mp_ptr tp;
|
|
Packit |
5c3484 |
mp_limb_t qml;
|
|
Packit |
5c3484 |
mp_limb_t qh;
|
|
Packit |
5c3484 |
int cnt;
|
|
Packit |
5c3484 |
mp_ptr xdp;
|
|
Packit |
5c3484 |
mp_limb_t di;
|
|
Packit |
5c3484 |
mp_limb_t cy;
|
|
Packit |
5c3484 |
gmp_pi1_t dinv;
|
|
Packit |
5c3484 |
TMP_DECL;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_MARK;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
qn = nn - dn + 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* For small divisors, and small quotients, don't use Jebelean's algorithm. */
|
|
Packit |
5c3484 |
if (dn < DIVEXACT_JEB_THRESHOLD || qn < DIVEXACT_JEB_THRESHOLD)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
tp = scratch;
|
|
Packit |
5c3484 |
MPN_COPY (tp, np, qn);
|
|
Packit |
5c3484 |
binvert_limb (di, dp[0]); di = -di;
|
|
Packit |
5c3484 |
dn = MIN (dn, qn);
|
|
Packit |
5c3484 |
mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
qn0 = ((nn - dn) >> 1) + 1; /* low quotient size */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* If quotient is much larger than the divisor, the bidirectional algorithm
|
|
Packit |
5c3484 |
does not work as currently implemented. Fall back to plain bdiv. */
|
|
Packit |
5c3484 |
if (qn0 > dn)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
tp = scratch;
|
|
Packit |
5c3484 |
MPN_COPY (tp, np, qn);
|
|
Packit |
5c3484 |
binvert_limb (di, dp[0]); di = -di;
|
|
Packit |
5c3484 |
dn = MIN (dn, qn);
|
|
Packit |
5c3484 |
mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
tp = scratch;
|
|
Packit |
5c3484 |
MPN_COPY (tp, np, qn);
|
|
Packit |
5c3484 |
binvert_limb (di, dp[0]); di = -di;
|
|
Packit |
5c3484 |
mpn_dcpi1_bdiv_q (qp, tp, qn, dp, dn, di);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mpn_mu_bdiv_q (qp, np, qn, dp, dn, scratch);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
nn0 = qn0 + qn0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
nn1 = nn0 - 1 + ((nn-dn) & 1);
|
|
Packit |
5c3484 |
qn1 = qn0;
|
|
Packit |
5c3484 |
if (LIKELY (qn0 != dn))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
nn1 = nn1 + 1;
|
|
Packit |
5c3484 |
qn1 = qn1 + 1;
|
|
Packit |
5c3484 |
if (UNLIKELY (dp[dn - 1] == 1 && qn1 != dn))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* If the leading divisor limb == 1, i.e. has just one bit, we have
|
|
Packit |
5c3484 |
to include an extra limb in order to get the needed overlap. */
|
|
Packit |
5c3484 |
/* FIXME: Now with the mu_divappr_q function, we should really need
|
|
Packit |
5c3484 |
more overlap. That indicates one of two things: (1) The test code
|
|
Packit |
5c3484 |
is not good. (2) We actually overlap too much by default. */
|
|
Packit |
5c3484 |
nn1 = nn1 + 1;
|
|
Packit |
5c3484 |
qn1 = qn1 + 1;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tp = TMP_ALLOC_LIMBS (nn1 + 1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
count_leading_zeros (cnt, dp[dn - 1]);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Normalize divisor, store into tmp area. */
|
|
Packit |
5c3484 |
if (cnt != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
xdp = TMP_ALLOC_LIMBS (qn1);
|
|
Packit |
5c3484 |
mpn_lshift (xdp, dp + dn - qn1, qn1, cnt);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
xdp = (mp_ptr) dp + dn - qn1;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Shift dividend according to the divisor normalization. */
|
|
Packit |
5c3484 |
/* FIXME: We compute too much here for XX_divappr_q, but these functions'
|
|
Packit |
5c3484 |
interfaces want a pointer to the imaginative least significant limb, not
|
|
Packit |
5c3484 |
to the least significant *used* limb. Of course, we could leave nn1-qn1
|
|
Packit |
5c3484 |
rubbish limbs in the low part, to save some time. */
|
|
Packit |
5c3484 |
if (cnt != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
cy = mpn_lshift (tp, np + nn - nn1, nn1, cnt);
|
|
Packit |
5c3484 |
if (cy != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
tp[nn1] = cy;
|
|
Packit |
5c3484 |
nn1++;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* FIXME: This copy is not needed for mpn_mu_divappr_q, except when the
|
|
Packit |
5c3484 |
mpn_sub_n right before is executed. */
|
|
Packit |
5c3484 |
MPN_COPY (tp, np + nn - nn1, nn1);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
invert_pi1 (dinv, xdp[qn1 - 1], xdp[qn1 - 2]);
|
|
Packit |
5c3484 |
if (BELOW_THRESHOLD (qn1, DC_DIVAPPR_Q_THRESHOLD))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
qp[qn0 - 1 + nn1 - qn1] = mpn_sbpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, dinv.inv32);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else if (BELOW_THRESHOLD (qn1, MU_DIVAPPR_Q_THRESHOLD))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
qp[qn0 - 1 + nn1 - qn1] = mpn_dcpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, &dinv);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* FIXME: mpn_mu_divappr_q doesn't handle qh != 0. Work around it with a
|
|
Packit |
5c3484 |
conditional subtraction here. */
|
|
Packit |
5c3484 |
qh = mpn_cmp (tp + nn1 - qn1, xdp, qn1) >= 0;
|
|
Packit |
5c3484 |
if (qh)
|
|
Packit |
5c3484 |
mpn_sub_n (tp + nn1 - qn1, tp + nn1 - qn1, xdp, qn1);
|
|
Packit |
5c3484 |
mpn_mu_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, scratch);
|
|
Packit |
5c3484 |
qp[qn0 - 1 + nn1 - qn1] = qh;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
qml = qp[qn0 - 1];
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
binvert_limb (di, dp[0]); di = -di;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (BELOW_THRESHOLD (qn0, DC_BDIV_Q_THRESHOLD))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
MPN_COPY (tp, np, qn0);
|
|
Packit |
5c3484 |
mpn_sbpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else if (BELOW_THRESHOLD (qn0, MU_BDIV_Q_THRESHOLD))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
MPN_COPY (tp, np, qn0);
|
|
Packit |
5c3484 |
mpn_dcpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mpn_mu_bdiv_q (qp, np, qn0, dp, qn0, scratch);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (qml < qp[qn0 - 1])
|
|
Packit |
5c3484 |
mpn_decr_u (qp + qn0, 1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
#endif
|