Blame mpn/generic/mu_bdiv_qr.c

Packit 5c3484
/* mpn_mu_bdiv_qr(qp,rp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^qn,
Packit 5c3484
   where qn = nn-dn, storing the result in {qp,qn}.  Overlap allowed between Q
Packit 5c3484
   and N; all other overlap disallowed.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Torbjorn Granlund.
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2005-2007, 2009, 2010, 2012 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
/*
Packit 5c3484
   The idea of the algorithm used herein is to compute a smaller inverted value
Packit 5c3484
   than used in the standard Barrett algorithm, and thus save time in the
Packit 5c3484
   Newton iterations, and pay just a small price when using the inverted value
Packit 5c3484
   for developing quotient bits.  This algorithm was presented at ICMS 2006.
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* N = {np,nn}
Packit 5c3484
   D = {dp,dn}
Packit 5c3484
Packit 5c3484
   Requirements: N >= D
Packit 5c3484
		 D >= 1
Packit 5c3484
		 D odd
Packit 5c3484
		 dn >= 2
Packit 5c3484
		 nn >= 2
Packit 5c3484
		 scratch space as determined by mpn_mu_bdiv_qr_itch(nn,dn).
Packit 5c3484
Packit 5c3484
   Write quotient to Q = {qp,nn-dn}.
Packit 5c3484
Packit 5c3484
   FIXME: When iterating, perhaps do the small step before loop, not after.
Packit 5c3484
   FIXME: Try to avoid the scalar divisions when computing inverse size.
Packit 5c3484
   FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible.  In
Packit 5c3484
	  particular, when dn==in, tp and rp could use the same space.
Packit 5c3484
*/
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_mu_bdiv_qr (mp_ptr qp,
Packit 5c3484
		mp_ptr rp,
Packit 5c3484
		mp_srcptr np, mp_size_t nn,
Packit 5c3484
		mp_srcptr dp, mp_size_t dn,
Packit 5c3484
		mp_ptr scratch)
Packit 5c3484
{
Packit 5c3484
  mp_size_t qn;
Packit 5c3484
  mp_size_t in;
Packit 5c3484
  mp_limb_t cy, c0;
Packit 5c3484
  mp_size_t tn, wn;
Packit 5c3484
Packit 5c3484
  qn = nn - dn;
Packit 5c3484
Packit 5c3484
  ASSERT (dn >= 2);
Packit 5c3484
  ASSERT (qn >= 2);
Packit 5c3484
Packit 5c3484
  if (qn > dn)
Packit 5c3484
    {
Packit 5c3484
      mp_size_t b;
Packit 5c3484
Packit 5c3484
      /* |_______________________|   dividend
Packit 5c3484
			|________|   divisor  */
Packit 5c3484
Packit 5c3484
#define ip           scratch		/* in */
Packit 5c3484
#define tp           (scratch + in)	/* dn+in or next_size(dn) or rest >= binvert_itch(in) */
Packit 5c3484
#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
Packit 5c3484
Packit 5c3484
      /* Compute an inverse size that is a nice partition of the quotient.  */
Packit 5c3484
      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
Packit 5c3484
      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
Packit 5c3484
Packit 5c3484
      /* Some notes on allocation:
Packit 5c3484
Packit 5c3484
	 When in = dn, R dies when mpn_mullo returns, if in < dn the low in
Packit 5c3484
	 limbs of R dies at that point.  We could save memory by letting T live
Packit 5c3484
	 just under R, and let the upper part of T expand into R. These changes
Packit 5c3484
	 should reduce itch to perhaps 3dn.
Packit 5c3484
       */
Packit 5c3484
Packit 5c3484
      mpn_binvert (ip, dp, in, tp);
Packit 5c3484
Packit 5c3484
      MPN_COPY (rp, np, dn);
Packit 5c3484
      np += dn;
Packit 5c3484
      cy = 0;
Packit 5c3484
Packit 5c3484
      while (qn > in)
Packit 5c3484
	{
Packit 5c3484
	  mpn_mullo_n (qp, rp, ip, in);
Packit 5c3484
Packit 5c3484
	  if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
Packit 5c3484
	    mpn_mul (tp, dp, dn, qp, in);	/* mulhi, need tp[dn+in-1...in] */
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
	      tn = mpn_mulmod_bnm1_next_size (dn);
Packit 5c3484
	      mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
Packit 5c3484
	      wn = dn + in - tn;		/* number of wrapped limbs */
Packit 5c3484
	      if (wn > 0)
Packit 5c3484
		{
Packit 5c3484
		  c0 = mpn_sub_n (tp + tn, tp, rp, wn);
Packit 5c3484
		  mpn_decr_u (tp + wn, c0);
Packit 5c3484
		}
Packit 5c3484
	    }
Packit 5c3484
Packit 5c3484
	  qp += in;
Packit 5c3484
	  qn -= in;
Packit 5c3484
Packit 5c3484
	  if (dn != in)
Packit 5c3484
	    {
Packit 5c3484
	      /* Subtract tp[dn-1...in] from partial remainder.  */
Packit 5c3484
	      cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
Packit 5c3484
	      if (cy == 2)
Packit 5c3484
		{
Packit 5c3484
		  mpn_incr_u (tp + dn, 1);
Packit 5c3484
		  cy = 1;
Packit 5c3484
		}
Packit 5c3484
	    }
Packit 5c3484
	  /* Subtract tp[dn+in-1...dn] from dividend.  */
Packit 5c3484
	  cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
Packit 5c3484
	  np += in;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      /* Generate last qn limbs.  */
Packit 5c3484
      mpn_mullo_n (qp, rp, ip, qn);
Packit 5c3484
Packit 5c3484
      if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
Packit 5c3484
	mpn_mul (tp, dp, dn, qp, qn);		/* mulhi, need tp[qn+in-1...in] */
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  tn = mpn_mulmod_bnm1_next_size (dn);
Packit 5c3484
	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
Packit 5c3484
	  wn = dn + qn - tn;			/* number of wrapped limbs */
Packit 5c3484
	  if (wn > 0)
Packit 5c3484
	    {
Packit 5c3484
	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
Packit 5c3484
	      mpn_decr_u (tp + wn, c0);
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      if (dn != qn)
Packit 5c3484
	{
Packit 5c3484
	  cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
Packit 5c3484
	  if (cy == 2)
Packit 5c3484
	    {
Packit 5c3484
	      mpn_incr_u (tp + dn, 1);
Packit 5c3484
	      cy = 1;
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
      return mpn_sub_nc (rp + dn - qn, np, tp + dn, qn, cy);
Packit 5c3484
Packit 5c3484
#undef ip
Packit 5c3484
#undef tp
Packit 5c3484
#undef scratch_out
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      /* |_______________________|   dividend
Packit 5c3484
		|________________|   divisor  */
Packit 5c3484
Packit 5c3484
#define ip           scratch		/* in */
Packit 5c3484
#define tp           (scratch + in)	/* dn+in or next_size(dn) or rest >= binvert_itch(in) */
Packit 5c3484
#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
Packit 5c3484
Packit 5c3484
      /* Compute half-sized inverse.  */
Packit 5c3484
      in = qn - (qn >> 1);
Packit 5c3484
Packit 5c3484
      mpn_binvert (ip, dp, in, tp);
Packit 5c3484
Packit 5c3484
      mpn_mullo_n (qp, np, ip, in);		/* low `in' quotient limbs */
Packit 5c3484
Packit 5c3484
      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
Packit 5c3484
	mpn_mul (tp, dp, dn, qp, in);		/* mulhigh */
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  tn = mpn_mulmod_bnm1_next_size (dn);
Packit 5c3484
	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
Packit 5c3484
	  wn = dn + in - tn;			/* number of wrapped limbs */
Packit 5c3484
	  if (wn > 0)
Packit 5c3484
	    {
Packit 5c3484
	      c0 = mpn_sub_n (tp + tn, tp, np, wn);
Packit 5c3484
	      mpn_decr_u (tp + wn, c0);
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      qp += in;
Packit 5c3484
      qn -= in;
Packit 5c3484
Packit 5c3484
      cy = mpn_sub_n (rp, np + in, tp + in, dn);
Packit 5c3484
      mpn_mullo_n (qp, rp, ip, qn);		/* high qn quotient limbs */
Packit 5c3484
Packit 5c3484
      if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
Packit 5c3484
	mpn_mul (tp, dp, dn, qp, qn);		/* mulhigh */
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  tn = mpn_mulmod_bnm1_next_size (dn);
Packit 5c3484
	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
Packit 5c3484
	  wn = dn + qn - tn;			/* number of wrapped limbs */
Packit 5c3484
	  if (wn > 0)
Packit 5c3484
	    {
Packit 5c3484
	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
Packit 5c3484
	      mpn_decr_u (tp + wn, c0);
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
Packit 5c3484
      if (cy == 2)
Packit 5c3484
	{
Packit 5c3484
	  mpn_incr_u (tp + dn, 1);
Packit 5c3484
	  cy = 1;
Packit 5c3484
	}
Packit 5c3484
      return mpn_sub_nc (rp + dn - qn, np + dn + in, tp + dn, qn, cy);
Packit 5c3484
Packit 5c3484
#undef ip
Packit 5c3484
#undef tp
Packit 5c3484
#undef scratch_out
Packit 5c3484
    }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
mp_size_t
Packit 5c3484
mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
Packit 5c3484
{
Packit 5c3484
  mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
Packit 5c3484
  mp_size_t b;
Packit 5c3484
Packit 5c3484
  ASSERT_ALWAYS (DC_BDIV_Q_THRESHOLD < MU_BDIV_Q_THRESHOLD);
Packit 5c3484
Packit 5c3484
  qn = nn - dn;
Packit 5c3484
Packit 5c3484
  if (qn > dn)
Packit 5c3484
    {
Packit 5c3484
      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
Packit 5c3484
      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      in = qn - (qn >> 1);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      tn = dn + in;
Packit 5c3484
      itch_out = 0;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      tn = mpn_mulmod_bnm1_next_size (dn);
Packit 5c3484
      itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  itch_binvert = mpn_binvert_itch (in);
Packit 5c3484
  itches = tn + itch_out;
Packit 5c3484
  return in + MAX (itches, itch_binvert);
Packit 5c3484
}