Blame mpn/generic/mu_div_q.c

Packit 5c3484
/* mpn_mu_div_q.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
/*
Packit 5c3484
   The idea of the algorithm used herein is to compute a smaller inverted value
Packit 5c3484
   than used in the standard Barrett algorithm, and thus save time in the
Packit 5c3484
   Newton iterations, and pay just a small price when using the inverted value
Packit 5c3484
   for developing quotient bits.  This algorithm was presented at ICMS 2006.
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
/*
Packit 5c3484
  Things to work on:
Packit 5c3484
Packit 5c3484
  1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is
Packit 5c3484
     probably close to optimal, except when mpn_mu_divappr_q fails.
Packit 5c3484
Packit 5c3484
  2. We used to fall back to mpn_mu_div_qr when we detect a possible
Packit 5c3484
     mpn_mu_divappr_q rounding problem, now we multiply and compare.
Packit 5c3484
     Unfortunately, since mpn_mu_divappr_q does not return the partial
Packit 5c3484
     remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr could
Packit 5c3484
     solve that.
Packit 5c3484
Packit 5c3484
  3. The allocations done here should be made from the scratch area, which
Packit 5c3484
     then would need to be amended.
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
#include <stdlib.h>		/* for NULL */
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_mu_div_q (mp_ptr qp,
Packit 5c3484
	      mp_srcptr np, mp_size_t nn,
Packit 5c3484
	      mp_srcptr dp, mp_size_t dn,
Packit 5c3484
	      mp_ptr scratch)
Packit 5c3484
{
Packit 5c3484
  mp_ptr tp, rp;
Packit 5c3484
  mp_size_t qn;
Packit 5c3484
  mp_limb_t cy, qh;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
Packit 5c3484
  qn = nn - dn;
Packit 5c3484
Packit 5c3484
  tp = TMP_BALLOC_LIMBS (qn + 1);
Packit 5c3484
Packit 5c3484
  if (qn >= dn)			/* nn >= 2*dn + 1 */
Packit 5c3484
    {
Packit 5c3484
       /* |_______________________|   dividend
Packit 5c3484
			 |________|   divisor  */
Packit 5c3484
Packit 5c3484
      rp = TMP_BALLOC_LIMBS (nn + 1);
Packit 5c3484
      MPN_COPY (rp + 1, np, nn);
Packit 5c3484
      rp[0] = 0;
Packit 5c3484
Packit 5c3484
      qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;
Packit 5c3484
      if (qh != 0)
Packit 5c3484
	mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);
Packit 5c3484
Packit 5c3484
      cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);
Packit 5c3484
Packit 5c3484
      if (UNLIKELY (cy != 0))
Packit 5c3484
	{
Packit 5c3484
	  /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
Packit 5c3484
	     canonically reduced, replace the returned value of B^(qn-dn)+eps
Packit 5c3484
	     by the largest possible value.  */
Packit 5c3484
	  mp_size_t i;
Packit 5c3484
	  for (i = 0; i < qn + 1; i++)
Packit 5c3484
	    tp[i] = GMP_NUMB_MAX;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
Packit 5c3484
	 smaller than the max error, we cannot trust the quotient.  */
Packit 5c3484
      if (tp[0] > 4)
Packit 5c3484
	{
Packit 5c3484
	  MPN_COPY (qp, tp + 1, qn);
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t cy;
Packit 5c3484
	  mp_ptr pp;
Packit 5c3484
Packit 5c3484
	  pp = rp;
Packit 5c3484
	  mpn_mul (pp, tp + 1, qn, dp, dn);
Packit 5c3484
Packit 5c3484
	  cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;
Packit 5c3484
Packit 5c3484
	  if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
Packit 5c3484
	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
Packit 5c3484
	  else /* Same as above */
Packit 5c3484
	    MPN_COPY (qp, tp + 1, qn);
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
       /* |_______________________|   dividend
Packit 5c3484
		 |________________|   divisor  */
Packit 5c3484
Packit 5c3484
      /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
Packit 5c3484
	 here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only
Packit 5c3484
	 the most significant dn-1 limbs will actually be read, but it is not
Packit 5c3484
	 pretty.  */
Packit 5c3484
Packit 5c3484
      qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
Packit 5c3484
			     dp + dn - (qn + 1), qn + 1, scratch);
Packit 5c3484
Packit 5c3484
      /* The max error of mpn_mu_divappr_q is +4, but we get an additional
Packit 5c3484
         error from the divisor truncation.  */
Packit 5c3484
      if (tp[0] > 6)
Packit 5c3484
	{
Packit 5c3484
	  MPN_COPY (qp, tp + 1, qn);
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t cy;
Packit 5c3484
Packit 5c3484
	  /* FIXME: a shorter product should be enough; we may use already
Packit 5c3484
	     allocated space... */
Packit 5c3484
	  rp = TMP_BALLOC_LIMBS (nn);
Packit 5c3484
	  mpn_mul (rp, dp, dn, tp + 1, qn);
Packit 5c3484
Packit 5c3484
	  cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;
Packit 5c3484
Packit 5c3484
	  if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
Packit 5c3484
	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
Packit 5c3484
	  else /* Same as above */
Packit 5c3484
	    MPN_COPY (qp, tp + 1, qn);
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  TMP_FREE;
Packit 5c3484
  return qh;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
mp_size_t
Packit 5c3484
mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
Packit 5c3484
{
Packit 5c3484
  mp_size_t qn;
Packit 5c3484
Packit 5c3484
  qn = nn - dn;
Packit 5c3484
  if (qn >= dn)
Packit 5c3484
    {
Packit 5c3484
      return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
Packit 5c3484
    }
Packit 5c3484
}