Blame mpn/generic/divexact.c

Packit 5c3484
/* mpn_divexact(qp,np,nn,dp,dn,tp) -- Divide N = {np,nn} by D = {dp,dn} storing
Packit 5c3484
   the result in Q = {qp,nn-dn+1} expecting no remainder.  Overlap allowed
Packit 5c3484
   between Q and N; all other overlap disallowed.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Torbjorn Granlund.
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
#if 1
Packit 5c3484
void
Packit 5c3484
mpn_divexact (mp_ptr qp,
Packit 5c3484
	      mp_srcptr np, mp_size_t nn,
Packit 5c3484
	      mp_srcptr dp, mp_size_t dn)
Packit 5c3484
{
Packit 5c3484
  unsigned shift;
Packit 5c3484
  mp_size_t qn;
Packit 5c3484
  mp_ptr tp;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  ASSERT (dn > 0);
Packit 5c3484
  ASSERT (nn >= dn);
Packit 5c3484
  ASSERT (dp[dn-1] > 0);
Packit 5c3484
Packit 5c3484
  while (dp[0] == 0)
Packit 5c3484
    {
Packit 5c3484
      ASSERT (np[0] == 0);
Packit 5c3484
      dp++;
Packit 5c3484
      np++;
Packit 5c3484
      dn--;
Packit 5c3484
      nn--;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (dn == 1)
Packit 5c3484
    {
Packit 5c3484
      MPN_DIVREM_OR_DIVEXACT_1 (qp, np, nn, dp[0]);
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
Packit 5c3484
  qn = nn + 1 - dn;
Packit 5c3484
  count_trailing_zeros (shift, dp[0]);
Packit 5c3484
Packit 5c3484
  if (shift > 0)
Packit 5c3484
    {
Packit 5c3484
      mp_ptr wp;
Packit 5c3484
      mp_size_t ss;
Packit 5c3484
      ss = (dn > qn) ? qn + 1 : dn;
Packit 5c3484
Packit 5c3484
      tp = TMP_ALLOC_LIMBS (ss);
Packit 5c3484
      mpn_rshift (tp, dp, ss, shift);
Packit 5c3484
      dp = tp;
Packit 5c3484
Packit 5c3484
      /* Since we have excluded dn == 1, we have nn > qn, and we need
Packit 5c3484
	 to shift one limb beyond qn. */
Packit 5c3484
      wp = TMP_ALLOC_LIMBS (qn + 1);
Packit 5c3484
      mpn_rshift (wp, np, qn + 1, shift);
Packit 5c3484
      np = wp;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (dn > qn)
Packit 5c3484
    dn = qn;
Packit 5c3484
Packit 5c3484
  tp = TMP_ALLOC_LIMBS (mpn_bdiv_q_itch (qn, dn));
Packit 5c3484
  mpn_bdiv_q (qp, np, qn, dp, dn, tp);
Packit 5c3484
  TMP_FREE;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#else
Packit 5c3484
Packit 5c3484
/* We use the Jebelean's bidirectional exact division algorithm.  This is
Packit 5c3484
   somewhat naively implemented, with equal quotient parts done by 2-adic
Packit 5c3484
   division and truncating division.  Since 2-adic division is faster, it
Packit 5c3484
   should be used for a larger chunk.
Packit 5c3484
Packit 5c3484
   This code is horrendously ugly, in all sorts of ways.
Packit 5c3484
Packit 5c3484
   * It was hacked without much care or thought, but with a testing program.
Packit 5c3484
   * It handles scratch space frivolously, and furthermore the itch function
Packit 5c3484
     is broken.
Packit 5c3484
   * Doesn't provide any measures to deal with mu_divappr_q's +3 error.  We
Packit 5c3484
     have yet to provoke an error due to this, though.
Packit 5c3484
   * Algorithm selection leaves a lot to be desired.  In particular, the choice
Packit 5c3484
     between DC and MU isn't a point, but we treat it like one.
Packit 5c3484
   * It makes the msb part 1 or 2 limbs larger than the lsb part, in spite of
Packit 5c3484
     that the latter is faster.  We should at least reverse this, but perhaps
Packit 5c3484
     we should make the lsb part considerably larger.  (How do we tune this?)
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
mp_size_t
Packit 5c3484
mpn_divexact_itch (mp_size_t nn, mp_size_t dn)
Packit 5c3484
{
Packit 5c3484
  return nn + dn;		/* FIXME this is not right */
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
mpn_divexact (mp_ptr qp,
Packit 5c3484
	      mp_srcptr np, mp_size_t nn,
Packit 5c3484
	      mp_srcptr dp, mp_size_t dn,
Packit 5c3484
	      mp_ptr scratch)
Packit 5c3484
{
Packit 5c3484
  mp_size_t qn;
Packit 5c3484
  mp_size_t nn0, qn0;
Packit 5c3484
  mp_size_t nn1, qn1;
Packit 5c3484
  mp_ptr tp;
Packit 5c3484
  mp_limb_t qml;
Packit 5c3484
  mp_limb_t qh;
Packit 5c3484
  int cnt;
Packit 5c3484
  mp_ptr xdp;
Packit 5c3484
  mp_limb_t di;
Packit 5c3484
  mp_limb_t cy;
Packit 5c3484
  gmp_pi1_t dinv;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
Packit 5c3484
  qn = nn - dn + 1;
Packit 5c3484
Packit 5c3484
  /* For small divisors, and small quotients, don't use Jebelean's algorithm. */
Packit 5c3484
  if (dn < DIVEXACT_JEB_THRESHOLD || qn < DIVEXACT_JEB_THRESHOLD)
Packit 5c3484
    {
Packit 5c3484
      tp = scratch;
Packit 5c3484
      MPN_COPY (tp, np, qn);
Packit 5c3484
      binvert_limb (di, dp[0]);  di = -di;
Packit 5c3484
      dn = MIN (dn, qn);
Packit 5c3484
      mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
Packit 5c3484
      TMP_FREE;
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  qn0 = ((nn - dn) >> 1) + 1;	/* low quotient size */
Packit 5c3484
Packit 5c3484
  /* If quotient is much larger than the divisor, the bidirectional algorithm
Packit 5c3484
     does not work as currently implemented.  Fall back to plain bdiv.  */
Packit 5c3484
  if (qn0 > dn)
Packit 5c3484
    {
Packit 5c3484
      if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
Packit 5c3484
	{
Packit 5c3484
	  tp = scratch;
Packit 5c3484
	  MPN_COPY (tp, np, qn);
Packit 5c3484
	  binvert_limb (di, dp[0]);  di = -di;
Packit 5c3484
	  dn = MIN (dn, qn);
Packit 5c3484
	  mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
Packit 5c3484
	}
Packit 5c3484
      else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
Packit 5c3484
	{
Packit 5c3484
	  tp = scratch;
Packit 5c3484
	  MPN_COPY (tp, np, qn);
Packit 5c3484
	  binvert_limb (di, dp[0]);  di = -di;
Packit 5c3484
	  mpn_dcpi1_bdiv_q (qp, tp, qn, dp, dn, di);
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  mpn_mu_bdiv_q (qp, np, qn, dp, dn, scratch);
Packit 5c3484
	}
Packit 5c3484
      TMP_FREE;
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  nn0 = qn0 + qn0;
Packit 5c3484
Packit 5c3484
  nn1 = nn0 - 1 + ((nn-dn) & 1);
Packit 5c3484
  qn1 = qn0;
Packit 5c3484
  if (LIKELY (qn0 != dn))
Packit 5c3484
    {
Packit 5c3484
      nn1 = nn1 + 1;
Packit 5c3484
      qn1 = qn1 + 1;
Packit 5c3484
      if (UNLIKELY (dp[dn - 1] == 1 && qn1 != dn))
Packit 5c3484
	{
Packit 5c3484
	  /* If the leading divisor limb == 1, i.e. has just one bit, we have
Packit 5c3484
	     to include an extra limb in order to get the needed overlap.  */
Packit 5c3484
	  /* FIXME: Now with the mu_divappr_q function, we should really need
Packit 5c3484
	     more overlap. That indicates one of two things: (1) The test code
Packit 5c3484
	     is not good. (2) We actually overlap too much by default.  */
Packit 5c3484
	  nn1 = nn1 + 1;
Packit 5c3484
	  qn1 = qn1 + 1;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  tp = TMP_ALLOC_LIMBS (nn1 + 1);
Packit 5c3484
Packit 5c3484
  count_leading_zeros (cnt, dp[dn - 1]);
Packit 5c3484
Packit 5c3484
  /* Normalize divisor, store into tmp area.  */
Packit 5c3484
  if (cnt != 0)
Packit 5c3484
    {
Packit 5c3484
      xdp = TMP_ALLOC_LIMBS (qn1);
Packit 5c3484
      mpn_lshift (xdp, dp + dn - qn1, qn1, cnt);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      xdp = (mp_ptr) dp + dn - qn1;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* Shift dividend according to the divisor normalization.  */
Packit 5c3484
  /* FIXME: We compute too much here for XX_divappr_q, but these functions'
Packit 5c3484
     interfaces want a pointer to the imaginative least significant limb, not
Packit 5c3484
     to the least significant *used* limb.  Of course, we could leave nn1-qn1
Packit 5c3484
     rubbish limbs in the low part, to save some time.  */
Packit 5c3484
  if (cnt != 0)
Packit 5c3484
    {
Packit 5c3484
      cy = mpn_lshift (tp, np + nn - nn1, nn1, cnt);
Packit 5c3484
      if (cy != 0)
Packit 5c3484
	{
Packit 5c3484
	  tp[nn1] = cy;
Packit 5c3484
	  nn1++;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      /* FIXME: This copy is not needed for mpn_mu_divappr_q, except when the
Packit 5c3484
	 mpn_sub_n right before is executed.  */
Packit 5c3484
      MPN_COPY (tp, np + nn - nn1, nn1);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  invert_pi1 (dinv, xdp[qn1 - 1], xdp[qn1 - 2]);
Packit 5c3484
  if (BELOW_THRESHOLD (qn1, DC_DIVAPPR_Q_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      qp[qn0 - 1 + nn1 - qn1] = mpn_sbpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, dinv.inv32);
Packit 5c3484
    }
Packit 5c3484
  else if (BELOW_THRESHOLD (qn1, MU_DIVAPPR_Q_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      qp[qn0 - 1 + nn1 - qn1] = mpn_dcpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, &dinv);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      /* FIXME: mpn_mu_divappr_q doesn't handle qh != 0.  Work around it with a
Packit 5c3484
	 conditional subtraction here.  */
Packit 5c3484
      qh = mpn_cmp (tp + nn1 - qn1, xdp, qn1) >= 0;
Packit 5c3484
      if (qh)
Packit 5c3484
	mpn_sub_n (tp + nn1 - qn1, tp + nn1 - qn1, xdp, qn1);
Packit 5c3484
      mpn_mu_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, scratch);
Packit 5c3484
      qp[qn0 - 1 + nn1 - qn1] = qh;
Packit 5c3484
    }
Packit 5c3484
  qml = qp[qn0 - 1];
Packit 5c3484
Packit 5c3484
  binvert_limb (di, dp[0]);  di = -di;
Packit 5c3484
Packit 5c3484
  if (BELOW_THRESHOLD (qn0, DC_BDIV_Q_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      MPN_COPY (tp, np, qn0);
Packit 5c3484
      mpn_sbpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
Packit 5c3484
    }
Packit 5c3484
  else if (BELOW_THRESHOLD (qn0, MU_BDIV_Q_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      MPN_COPY (tp, np, qn0);
Packit 5c3484
      mpn_dcpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      mpn_mu_bdiv_q (qp, np, qn0, dp, qn0, scratch);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (qml < qp[qn0 - 1])
Packit 5c3484
    mpn_decr_u (qp + qn0, 1);
Packit 5c3484
Packit 5c3484
  TMP_FREE;
Packit 5c3484
}
Packit 5c3484
#endif