Blame mpn/generic/div_q.c

Packit 5c3484
/* mpn_div_q -- division for arbitrary size operands.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Torbjorn Granlund.
Packit 5c3484
Packit 5c3484
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2009, 2010 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Compute Q = N/D with truncation.
Packit 5c3484
     N = {np,nn}
Packit 5c3484
     D = {dp,dn}
Packit 5c3484
     Q = {qp,nn-dn+1}
Packit 5c3484
     T = {scratch,nn+1} is scratch space
Packit 5c3484
   N and D are both untouched by the computation.
Packit 5c3484
   N and T may overlap; pass the same space if N is irrelevant after the call,
Packit 5c3484
   but note that tp needs an extra limb.
Packit 5c3484
Packit 5c3484
   Operand requirements:
Packit 5c3484
     N >= D > 0
Packit 5c3484
     dp[dn-1] != 0
Packit 5c3484
     No overlap between the N, D, and Q areas.
Packit 5c3484
Packit 5c3484
   This division function does not clobber its input operands, since it is
Packit 5c3484
   intended to support average-O(qn) division, and for that to be effective, it
Packit 5c3484
   cannot put requirements on callers to copy a O(nn) operand.
Packit 5c3484
Packit 5c3484
   If a caller does not care about the value of {np,nn+1} after calling this
Packit 5c3484
   function, it should pass np also for the scratch argument.  This function
Packit 5c3484
   will then save some time and space by avoiding allocation and copying.
Packit 5c3484
   (FIXME: Is this a good design?  We only really save any copying for
Packit 5c3484
   already-normalised divisors, which should be rare.  It also prevents us from
Packit 5c3484
   reasonably asking for all scratch space we need.)
Packit 5c3484
Packit 5c3484
   We write nn-dn+1 limbs for the quotient, but return void.  Why not return
Packit 5c3484
   the most significant quotient limb?  Look at the 4 main code blocks below
Packit 5c3484
   (consisting of an outer if-else where each arm contains an if-else). It is
Packit 5c3484
   tricky for the first code block, since the mpn_*_div_q calls will typically
Packit 5c3484
   generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless
Packit 5c3484
   we generate the most significant quotient limb here, before calling
Packit 5c3484
   mpn_*_div_q, or put the quotient in a temporary area.  Since this is a
Packit 5c3484
   critical division case (the SB sub-case in particular) copying is not a good
Packit 5c3484
   idea.
Packit 5c3484
Packit 5c3484
   It might make sense to split the if-else parts of the (qn + FUDGE
Packit 5c3484
   >= dn) blocks into separate functions, since we could promise quite
Packit 5c3484
   different things to callers in these two cases.  The 'then' case
Packit 5c3484
   benefits from np=scratch, and it could perhaps even tolerate qp=np,
Packit 5c3484
   saving some headache for many callers.
Packit 5c3484
Packit 5c3484
   FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size
Packit 5c3484
   operands, we do not reuse the huge scratch for adjustments.  This can be a
Packit 5c3484
   serious waste of memory for the largest operands.
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
/* FUDGE determines when to try getting an approximate quotient from the upper
Packit 5c3484
   parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2
Packit 5c3484
   for the code to be correct.  */
Packit 5c3484
#define FUDGE 5			/* FIXME: tune this */
Packit 5c3484
Packit 5c3484
#define DC_DIV_Q_THRESHOLD      DC_DIVAPPR_Q_THRESHOLD
Packit 5c3484
#define MU_DIV_Q_THRESHOLD      MU_DIVAPPR_Q_THRESHOLD
Packit 5c3484
#define MUPI_DIV_Q_THRESHOLD  MUPI_DIVAPPR_Q_THRESHOLD
Packit 5c3484
#ifndef MUPI_DIVAPPR_Q_THRESHOLD
Packit 5c3484
#define MUPI_DIVAPPR_Q_THRESHOLD  MUPI_DIV_QR_THRESHOLD
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
mpn_div_q (mp_ptr qp,
Packit 5c3484
	   mp_srcptr np, mp_size_t nn,
Packit 5c3484
	   mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
Packit 5c3484
{
Packit 5c3484
  mp_ptr new_dp, new_np, tp, rp;
Packit 5c3484
  mp_limb_t cy, dh, qh;
Packit 5c3484
  mp_size_t new_nn, qn;
Packit 5c3484
  gmp_pi1_t dinv;
Packit 5c3484
  int cnt;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
  TMP_MARK;
Packit 5c3484
Packit 5c3484
  ASSERT (nn >= dn);
Packit 5c3484
  ASSERT (dn > 0);
Packit 5c3484
  ASSERT (dp[dn - 1] != 0);
Packit 5c3484
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
Packit 5c3484
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
Packit 5c3484
  ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
Packit 5c3484
Packit 5c3484
  ASSERT_ALWAYS (FUDGE >= 2);
Packit 5c3484
Packit 5c3484
  if (dn == 1)
Packit 5c3484
    {
Packit 5c3484
      mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  qn = nn - dn + 1;		/* Quotient size, high limb might be zero */
Packit 5c3484
Packit 5c3484
  if (qn + FUDGE >= dn)
Packit 5c3484
    {
Packit 5c3484
      /* |________________________|
Packit 5c3484
                          |_______|  */
Packit 5c3484
      new_np = scratch;
Packit 5c3484
Packit 5c3484
      dh = dp[dn - 1];
Packit 5c3484
      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
Packit 5c3484
	{
Packit 5c3484
	  count_leading_zeros (cnt, dh);
Packit 5c3484
Packit 5c3484
	  cy = mpn_lshift (new_np, np, nn, cnt);
Packit 5c3484
	  new_np[nn] = cy;
Packit 5c3484
	  new_nn = nn + (cy != 0);
Packit 5c3484
Packit 5c3484
	  new_dp = TMP_ALLOC_LIMBS (dn);
Packit 5c3484
	  mpn_lshift (new_dp, dp, dn, cnt);
Packit 5c3484
Packit 5c3484
	  if (dn == 2)
Packit 5c3484
	    {
Packit 5c3484
	      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
Packit 5c3484
	    }
Packit 5c3484
	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
Packit 5c3484
		   BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
Packit 5c3484
	    {
Packit 5c3484
	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
Packit 5c3484
	      qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
Packit 5c3484
	    }
Packit 5c3484
	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
Packit 5c3484
		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
Packit 5c3484
		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
Packit 5c3484
		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
Packit 5c3484
	    {
Packit 5c3484
	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
Packit 5c3484
	      qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
	      mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
Packit 5c3484
	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
Packit 5c3484
	      qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
Packit 5c3484
	    }
Packit 5c3484
	  if (cy == 0)
Packit 5c3484
	    qp[qn - 1] = qh;
Packit 5c3484
	  else if (UNLIKELY (qh != 0))
Packit 5c3484
	    {
Packit 5c3484
	      /* This happens only when the quotient is close to B^n and
Packit 5c3484
		 mpn_*_divappr_q returned B^n.  */
Packit 5c3484
	      mp_size_t i, n;
Packit 5c3484
	      n = new_nn - dn;
Packit 5c3484
	      for (i = 0; i < n; i++)
Packit 5c3484
		qp[i] = GMP_NUMB_MAX;
Packit 5c3484
	      qh = 0;		/* currently ignored */
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
      else  /* divisor is already normalised */
Packit 5c3484
	{
Packit 5c3484
	  if (new_np != np)
Packit 5c3484
	    MPN_COPY (new_np, np, nn);
Packit 5c3484
Packit 5c3484
	  if (dn == 2)
Packit 5c3484
	    {
Packit 5c3484
	      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
Packit 5c3484
	    }
Packit 5c3484
	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
Packit 5c3484
		   BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
Packit 5c3484
	    {
Packit 5c3484
	      invert_pi1 (dinv, dh, dp[dn - 2]);
Packit 5c3484
	      qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
Packit 5c3484
	    }
Packit 5c3484
	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
Packit 5c3484
		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
Packit 5c3484
		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
Packit 5c3484
		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
Packit 5c3484
	    {
Packit 5c3484
	      invert_pi1 (dinv, dh, dp[dn - 2]);
Packit 5c3484
	      qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
	      mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
Packit 5c3484
	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
Packit 5c3484
	      qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
Packit 5c3484
	    }
Packit 5c3484
	  qp[nn - dn] = qh;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      /* |________________________|
Packit 5c3484
                |_________________|  */
Packit 5c3484
      tp = TMP_ALLOC_LIMBS (qn + 1);
Packit 5c3484
Packit 5c3484
      new_np = scratch;
Packit 5c3484
      new_nn = 2 * qn + 1;
Packit 5c3484
      if (new_np == np)
Packit 5c3484
	/* We need {np,nn} to remain untouched until the final adjustment, so
Packit 5c3484
	   we need to allocate separate space for new_np.  */
Packit 5c3484
	new_np = TMP_ALLOC_LIMBS (new_nn + 1);
Packit 5c3484
Packit 5c3484
Packit 5c3484
      dh = dp[dn - 1];
Packit 5c3484
      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
Packit 5c3484
	{
Packit 5c3484
	  count_leading_zeros (cnt, dh);
Packit 5c3484
Packit 5c3484
	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
Packit 5c3484
	  new_np[new_nn] = cy;
Packit 5c3484
Packit 5c3484
	  new_nn += (cy != 0);
Packit 5c3484
Packit 5c3484
	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
Packit 5c3484
	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
Packit 5c3484
	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
Packit 5c3484
Packit 5c3484
	  if (qn + 1 == 2)
Packit 5c3484
	    {
Packit 5c3484
	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
Packit 5c3484
	    }
Packit 5c3484
	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
Packit 5c3484
	    {
Packit 5c3484
	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
Packit 5c3484
	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
Packit 5c3484
	    }
Packit 5c3484
	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
Packit 5c3484
	    {
Packit 5c3484
	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
Packit 5c3484
	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
Packit 5c3484
	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
Packit 5c3484
	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
Packit 5c3484
	    }
Packit 5c3484
	  if (cy == 0)
Packit 5c3484
	    tp[qn] = qh;
Packit 5c3484
	  else if (UNLIKELY (qh != 0))
Packit 5c3484
	    {
Packit 5c3484
	      /* This happens only when the quotient is close to B^n and
Packit 5c3484
		 mpn_*_divappr_q returned B^n.  */
Packit 5c3484
	      mp_size_t i, n;
Packit 5c3484
	      n = new_nn - (qn + 1);
Packit 5c3484
	      for (i = 0; i < n; i++)
Packit 5c3484
		tp[i] = GMP_NUMB_MAX;
Packit 5c3484
	      qh = 0;		/* currently ignored */
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
      else  /* divisor is already normalised */
Packit 5c3484
	{
Packit 5c3484
	  MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless if MU will be used */
Packit 5c3484
Packit 5c3484
	  new_dp = (mp_ptr) dp + dn - (qn + 1);
Packit 5c3484
Packit 5c3484
	  if (qn == 2 - 1)
Packit 5c3484
	    {
Packit 5c3484
	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
Packit 5c3484
	    }
Packit 5c3484
	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
Packit 5c3484
	    {
Packit 5c3484
	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
Packit 5c3484
	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
Packit 5c3484
	    }
Packit 5c3484
	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
Packit 5c3484
	    {
Packit 5c3484
	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
Packit 5c3484
	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
Packit 5c3484
	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
Packit 5c3484
	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
Packit 5c3484
	    }
Packit 5c3484
	  tp[qn] = qh;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      MPN_COPY (qp, tp + 1, qn);
Packit 5c3484
      if (tp[0] <= 4)
Packit 5c3484
        {
Packit 5c3484
	  mp_size_t rn;
Packit 5c3484
Packit 5c3484
          rp = TMP_ALLOC_LIMBS (dn + qn);
Packit 5c3484
          mpn_mul (rp, dp, dn, tp + 1, qn);
Packit 5c3484
	  rn = dn + qn;
Packit 5c3484
	  rn -= rp[rn - 1] == 0;
Packit 5c3484
Packit 5c3484
          if (rn > nn || mpn_cmp (np, rp, nn) < 0)
Packit 5c3484
            mpn_decr_u (qp, 1);
Packit 5c3484
        }
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  TMP_FREE;
Packit 5c3484
}