Blame mpn/generic/dcpi1_divappr_q.c

Packit 5c3484
/* mpn_dcpi1_divappr_q -- divide-and-conquer division, returning approximate
Packit 5c3484
   quotient.  The quotient returned is either correct, or one too large.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Torbjorn Granlund.
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_dcpi1_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
Packit 5c3484
		       gmp_pi1_t *dinv, mp_ptr tp)
Packit 5c3484
{
Packit 5c3484
  mp_size_t lo, hi;
Packit 5c3484
  mp_limb_t cy, qh, ql;
Packit 5c3484
Packit 5c3484
  lo = n >> 1;			/* floor(n/2) */
Packit 5c3484
  hi = n - lo;			/* ceil(n/2) */
Packit 5c3484
Packit 5c3484
  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
Packit 5c3484
    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
Packit 5c3484
  else
Packit 5c3484
    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
Packit 5c3484
Packit 5c3484
  mpn_mul (tp, qp + lo, hi, dp, lo);
Packit 5c3484
Packit 5c3484
  cy = mpn_sub_n (np + lo, np + lo, tp, n);
Packit 5c3484
  if (qh != 0)
Packit 5c3484
    cy += mpn_sub_n (np + n, np + n, dp, lo);
Packit 5c3484
Packit 5c3484
  while (cy != 0)
Packit 5c3484
    {
Packit 5c3484
      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
Packit 5c3484
      cy -= mpn_add_n (np + lo, np + lo, dp, n);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
Packit 5c3484
    ql = mpn_sbpi1_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
Packit 5c3484
  else
Packit 5c3484
    ql = mpn_dcpi1_divappr_q_n (qp, np + hi, dp + hi, lo, dinv, tp);
Packit 5c3484
Packit 5c3484
  if (UNLIKELY (ql != 0))
Packit 5c3484
    {
Packit 5c3484
      mp_size_t i;
Packit 5c3484
      for (i = 0; i < lo; i++)
Packit 5c3484
	qp[i] = GMP_NUMB_MASK;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  return qh;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
Packit 5c3484
		     mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
Packit 5c3484
{
Packit 5c3484
  mp_size_t qn;
Packit 5c3484
  mp_limb_t qh, cy, qsave;
Packit 5c3484
  mp_ptr tp;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
Packit 5c3484
  ASSERT (dn >= 6);
Packit 5c3484
  ASSERT (nn > dn);
Packit 5c3484
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
Packit 5c3484
Packit 5c3484
  qn = nn - dn;
Packit 5c3484
  qp += qn;
Packit 5c3484
  np += nn;
Packit 5c3484
  dp += dn;
Packit 5c3484
Packit 5c3484
  if (qn >= dn)
Packit 5c3484
    {
Packit 5c3484
      qn++;			/* pretend we'll need an extra limb */
Packit 5c3484
      /* Reduce qn mod dn without division, optimizing small operations.  */
Packit 5c3484
      do
Packit 5c3484
	qn -= dn;
Packit 5c3484
      while (qn > dn);
Packit 5c3484
Packit 5c3484
      qp -= qn;			/* point at low limb of next quotient block */
Packit 5c3484
      np -= qn;			/* point in the middle of partial remainder */
Packit 5c3484
Packit 5c3484
      tp = TMP_SALLOC_LIMBS (dn);
Packit 5c3484
Packit 5c3484
      /* Perform the typically smaller block first.  */
Packit 5c3484
      if (qn == 1)
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t q, n2, n1, n0, d1, d0;
Packit 5c3484
Packit 5c3484
	  /* Handle qh up front, for simplicity. */
Packit 5c3484
	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
Packit 5c3484
	  if (qh)
Packit 5c3484
	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
Packit 5c3484
Packit 5c3484
	  /* A single iteration of schoolbook: One 3/2 division,
Packit 5c3484
	     followed by the bignum update and adjustment. */
Packit 5c3484
	  n2 = np[0];
Packit 5c3484
	  n1 = np[-1];
Packit 5c3484
	  n0 = np[-2];
Packit 5c3484
	  d1 = dp[-1];
Packit 5c3484
	  d0 = dp[-2];
Packit 5c3484
Packit 5c3484
	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
Packit 5c3484
Packit 5c3484
	  if (UNLIKELY (n2 == d1) && n1 == d0)
Packit 5c3484
	    {
Packit 5c3484
	      q = GMP_NUMB_MASK;
Packit 5c3484
	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
Packit 5c3484
	      ASSERT (cy == n2);
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
Packit 5c3484
Packit 5c3484
	      if (dn > 2)
Packit 5c3484
		{
Packit 5c3484
		  mp_limb_t cy, cy1;
Packit 5c3484
		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
Packit 5c3484
Packit 5c3484
		  cy1 = n0 < cy;
Packit 5c3484
		  n0 = (n0 - cy) & GMP_NUMB_MASK;
Packit 5c3484
		  cy = n1 < cy1;
Packit 5c3484
		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
Packit 5c3484
		  np[-2] = n0;
Packit 5c3484
Packit 5c3484
		  if (UNLIKELY (cy != 0))
Packit 5c3484
		    {
Packit 5c3484
		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
Packit 5c3484
		      qh -= (q == 0);
Packit 5c3484
		      q = (q - 1) & GMP_NUMB_MASK;
Packit 5c3484
		    }
Packit 5c3484
		}
Packit 5c3484
	      else
Packit 5c3484
		np[-2] = n0;
Packit 5c3484
Packit 5c3484
	      np[-1] = n1;
Packit 5c3484
	    }
Packit 5c3484
	  qp[0] = q;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  if (qn == 2)
Packit 5c3484
	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
Packit 5c3484
	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
Packit 5c3484
	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
Packit 5c3484
	  else
Packit 5c3484
	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
Packit 5c3484
Packit 5c3484
	  if (qn != dn)
Packit 5c3484
	    {
Packit 5c3484
	      if (qn > dn - qn)
Packit 5c3484
		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
Packit 5c3484
	      else
Packit 5c3484
		mpn_mul (tp, dp - dn, dn - qn, qp, qn);
Packit 5c3484
Packit 5c3484
	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
Packit 5c3484
	      if (qh != 0)
Packit 5c3484
		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
Packit 5c3484
Packit 5c3484
	      while (cy != 0)
Packit 5c3484
		{
Packit 5c3484
		  qh -= mpn_sub_1 (qp, qp, qn, 1);
Packit 5c3484
		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
Packit 5c3484
		}
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
      qn = nn - dn - qn + 1;
Packit 5c3484
      while (qn > dn)
Packit 5c3484
	{
Packit 5c3484
	  qp -= dn;
Packit 5c3484
	  np -= dn;
Packit 5c3484
	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
Packit 5c3484
	  qn -= dn;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      /* Since we pretended we'd need an extra quotient limb before, we now
Packit 5c3484
	 have made sure the code above left just dn-1=qn quotient limbs to
Packit 5c3484
	 develop.  Develop that plus a guard limb. */
Packit 5c3484
      qn--;
Packit 5c3484
      qp -= qn;
Packit 5c3484
      np -= dn;
Packit 5c3484
      qsave = qp[qn];
Packit 5c3484
      mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp);
Packit 5c3484
      MPN_COPY_INCR (qp, qp + 1, qn);
Packit 5c3484
      qp[qn] = qsave;
Packit 5c3484
    }
Packit 5c3484
  else    /* (qn < dn) */
Packit 5c3484
    {
Packit 5c3484
      mp_ptr q2p;
Packit 5c3484
#if 0				/* not possible since we demand nn > dn */
Packit 5c3484
      if (qn == 0)
Packit 5c3484
	{
Packit 5c3484
	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
Packit 5c3484
	  if (qh)
Packit 5c3484
	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
Packit 5c3484
	  TMP_FREE;
Packit 5c3484
	  return qh;
Packit 5c3484
	}
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
      qp -= qn;			/* point at low limb of next quotient block */
Packit 5c3484
      np -= qn;			/* point in the middle of partial remainder */
Packit 5c3484
Packit 5c3484
      q2p = TMP_SALLOC_LIMBS (qn + 1);
Packit 5c3484
      /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on
Packit 5c3484
	 callers not to be silly?  */
Packit 5c3484
      if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
Packit 5c3484
	{
Packit 5c3484
	  qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),
Packit 5c3484
				    dp - (qn + 1), qn + 1, dinv->inv32);
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  /* It is tempting to use qp for recursive scratch and put quotient in
Packit 5c3484
	     tp, but the recursive scratch needs one limb too many.  */
Packit 5c3484
	  tp = TMP_SALLOC_LIMBS (qn + 1);
Packit 5c3484
	  qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp);
Packit 5c3484
	}
Packit 5c3484
      MPN_COPY (qp, q2p + 1, qn);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  TMP_FREE;
Packit 5c3484
  return qh;
Packit 5c3484
}