Blame mpn/generic/sbpi1_div_q.c

Packit 5c3484
/* mpn_sbpi1_div_q -- Schoolbook division using the Möller-Granlund 3/2
Packit 5c3484
   division algorithm.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Torbjorn Granlund.
Packit 5c3484
Packit 5c3484
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2007, 2009 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_sbpi1_div_q (mp_ptr qp,
Packit 5c3484
		 mp_ptr np, mp_size_t nn,
Packit 5c3484
		 mp_srcptr dp, mp_size_t dn,
Packit 5c3484
		 mp_limb_t dinv)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t qh;
Packit 5c3484
  mp_size_t qn, i;
Packit 5c3484
  mp_limb_t n1, n0;
Packit 5c3484
  mp_limb_t d1, d0;
Packit 5c3484
  mp_limb_t cy, cy1;
Packit 5c3484
  mp_limb_t q;
Packit 5c3484
  mp_limb_t flag;
Packit 5c3484
Packit 5c3484
  mp_size_t dn_orig = dn;
Packit 5c3484
  mp_srcptr dp_orig = dp;
Packit 5c3484
  mp_ptr np_orig = np;
Packit 5c3484
Packit 5c3484
  ASSERT (dn > 2);
Packit 5c3484
  ASSERT (nn >= dn);
Packit 5c3484
  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
Packit 5c3484
Packit 5c3484
  np += nn;
Packit 5c3484
Packit 5c3484
  qn = nn - dn;
Packit 5c3484
  if (qn + 1 < dn)
Packit 5c3484
    {
Packit 5c3484
      dp += dn - (qn + 1);
Packit 5c3484
      dn = qn + 1;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  qh = mpn_cmp (np - dn, dp, dn) >= 0;
Packit 5c3484
  if (qh != 0)
Packit 5c3484
    mpn_sub_n (np - dn, np - dn, dp, dn);
Packit 5c3484
Packit 5c3484
  qp += qn;
Packit 5c3484
Packit 5c3484
  dn -= 2;			/* offset dn by 2 for main division loops,
Packit 5c3484
				   saving two iterations in mpn_submul_1.  */
Packit 5c3484
  d1 = dp[dn + 1];
Packit 5c3484
  d0 = dp[dn + 0];
Packit 5c3484
Packit 5c3484
  np -= 2;
Packit 5c3484
Packit 5c3484
  n1 = np[1];
Packit 5c3484
Packit 5c3484
  for (i = qn - (dn + 2); i >= 0; i--)
Packit 5c3484
    {
Packit 5c3484
      np--;
Packit 5c3484
      if (UNLIKELY (n1 == d1) && np[1] == d0)
Packit 5c3484
	{
Packit 5c3484
	  q = GMP_NUMB_MASK;
Packit 5c3484
	  mpn_submul_1 (np - dn, dp, dn + 2, q);
Packit 5c3484
	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
Packit 5c3484
Packit 5c3484
	  cy = mpn_submul_1 (np - dn, dp, dn, q);
Packit 5c3484
Packit 5c3484
	  cy1 = n0 < cy;
Packit 5c3484
	  n0 = (n0 - cy) & GMP_NUMB_MASK;
Packit 5c3484
	  cy = n1 < cy1;
Packit 5c3484
	  n1 -= cy1;
Packit 5c3484
	  np[0] = n0;
Packit 5c3484
Packit 5c3484
	  if (UNLIKELY (cy != 0))
Packit 5c3484
	    {
Packit 5c3484
	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
Packit 5c3484
	      q--;
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      *--qp = q;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  flag = ~CNST_LIMB(0);
Packit 5c3484
Packit 5c3484
  if (dn >= 0)
Packit 5c3484
    {
Packit 5c3484
      for (i = dn; i > 0; i--)
Packit 5c3484
	{
Packit 5c3484
	  np--;
Packit 5c3484
	  if (UNLIKELY (n1 >= (d1 & flag)))
Packit 5c3484
	    {
Packit 5c3484
	      q = GMP_NUMB_MASK;
Packit 5c3484
	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
Packit 5c3484
Packit 5c3484
	      if (UNLIKELY (n1 != cy))
Packit 5c3484
		{
Packit 5c3484
		  if (n1 < (cy & flag))
Packit 5c3484
		    {
Packit 5c3484
		      q--;
Packit 5c3484
		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
Packit 5c3484
		    }
Packit 5c3484
		  else
Packit 5c3484
		    flag = 0;
Packit 5c3484
		}
Packit 5c3484
	      n1 = np[1];
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
Packit 5c3484
Packit 5c3484
	      cy = mpn_submul_1 (np - dn, dp, dn, q);
Packit 5c3484
Packit 5c3484
	      cy1 = n0 < cy;
Packit 5c3484
	      n0 = (n0 - cy) & GMP_NUMB_MASK;
Packit 5c3484
	      cy = n1 < cy1;
Packit 5c3484
	      n1 -= cy1;
Packit 5c3484
	      np[0] = n0;
Packit 5c3484
Packit 5c3484
	      if (UNLIKELY (cy != 0))
Packit 5c3484
		{
Packit 5c3484
		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
Packit 5c3484
		  q--;
Packit 5c3484
		}
Packit 5c3484
	    }
Packit 5c3484
Packit 5c3484
	  *--qp = q;
Packit 5c3484
Packit 5c3484
	  /* Truncate operands.  */
Packit 5c3484
	  dn--;
Packit 5c3484
	  dp++;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      np--;
Packit 5c3484
      if (UNLIKELY (n1 >= (d1 & flag)))
Packit 5c3484
	{
Packit 5c3484
	  q = GMP_NUMB_MASK;
Packit 5c3484
	  cy = mpn_submul_1 (np, dp, 2, q);
Packit 5c3484
Packit 5c3484
	  if (UNLIKELY (n1 != cy))
Packit 5c3484
	    {
Packit 5c3484
	      if (n1 < (cy & flag))
Packit 5c3484
		{
Packit 5c3484
		  q--;
Packit 5c3484
		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
Packit 5c3484
		}
Packit 5c3484
	      else
Packit 5c3484
		flag = 0;
Packit 5c3484
	    }
Packit 5c3484
	  n1 = np[1];
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
Packit 5c3484
Packit 5c3484
	  np[0] = n0;
Packit 5c3484
	  np[1] = n1;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      *--qp = q;
Packit 5c3484
    }
Packit 5c3484
  ASSERT_ALWAYS (np[1] == n1);
Packit 5c3484
  np += 2;
Packit 5c3484
Packit 5c3484
Packit 5c3484
  dn = dn_orig;
Packit 5c3484
  if (UNLIKELY (n1 < (dn & flag)))
Packit 5c3484
    {
Packit 5c3484
      mp_limb_t q, x;
Packit 5c3484
Packit 5c3484
      /* The quotient may be too large if the remainder is small.  Recompute
Packit 5c3484
	 for above ignored operand parts, until the remainder spills.
Packit 5c3484
Packit 5c3484
	 FIXME: The quality of this code isn't the same as the code above.
Packit 5c3484
	 1. We don't compute things in an optimal order, high-to-low, in order
Packit 5c3484
	    to terminate as quickly as possible.
Packit 5c3484
	 2. We mess with pointers and sizes, adding and subtracting and
Packit 5c3484
	    adjusting to get things right.  It surely could be streamlined.
Packit 5c3484
	 3. The only termination criteria are that we determine that the
Packit 5c3484
	    quotient needs to be adjusted, or that we have recomputed
Packit 5c3484
	    everything.  We should stop when the remainder is so large
Packit 5c3484
	    that no additional subtracting could make it spill.
Packit 5c3484
	 4. If nothing else, we should not do two loops of submul_1 over the
Packit 5c3484
	    data, instead handle both the triangularization and chopping at
Packit 5c3484
	    once.  */
Packit 5c3484
Packit 5c3484
      x = n1;
Packit 5c3484
Packit 5c3484
      if (dn > 2)
Packit 5c3484
	{
Packit 5c3484
	  /* Compensate for triangularization.  */
Packit 5c3484
	  mp_limb_t y;
Packit 5c3484
Packit 5c3484
	  dp = dp_orig;
Packit 5c3484
	  if (qn + 1 < dn)
Packit 5c3484
	    {
Packit 5c3484
	      dp += dn - (qn + 1);
Packit 5c3484
	      dn = qn + 1;
Packit 5c3484
	    }
Packit 5c3484
Packit 5c3484
	  y = np[-2];
Packit 5c3484
Packit 5c3484
	  for (i = dn - 3; i >= 0; i--)
Packit 5c3484
	    {
Packit 5c3484
	      q = qp[i];
Packit 5c3484
	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
Packit 5c3484
Packit 5c3484
	      if (y < cy)
Packit 5c3484
		{
Packit 5c3484
		  if (x == 0)
Packit 5c3484
		    {
Packit 5c3484
		      cy = mpn_sub_1 (qp, qp, qn, 1);
Packit 5c3484
		      ASSERT_ALWAYS (cy == 0);
Packit 5c3484
		      return qh - cy;
Packit 5c3484
		    }
Packit 5c3484
		  x--;
Packit 5c3484
		}
Packit 5c3484
	      y -= cy;
Packit 5c3484
	    }
Packit 5c3484
	  np[-2] = y;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      dn = dn_orig;
Packit 5c3484
      if (qn + 1 < dn)
Packit 5c3484
	{
Packit 5c3484
	  /* Compensate for ignored dividend and divisor tails.  */
Packit 5c3484
Packit 5c3484
	  dp = dp_orig;
Packit 5c3484
	  np = np_orig;
Packit 5c3484
Packit 5c3484
	  if (qh != 0)
Packit 5c3484
	    {
Packit 5c3484
	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
Packit 5c3484
	      if (cy != 0)
Packit 5c3484
		{
Packit 5c3484
		  if (x == 0)
Packit 5c3484
		    {
Packit 5c3484
		      if (qn != 0)
Packit 5c3484
			cy = mpn_sub_1 (qp, qp, qn, 1);
Packit 5c3484
		      return qh - cy;
Packit 5c3484
		    }
Packit 5c3484
		  x--;
Packit 5c3484
		}
Packit 5c3484
	    }
Packit 5c3484
Packit 5c3484
	  if (qn == 0)
Packit 5c3484
	    return qh;
Packit 5c3484
Packit 5c3484
	  for (i = dn - qn - 2; i >= 0; i--)
Packit 5c3484
	    {
Packit 5c3484
	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
Packit 5c3484
	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
Packit 5c3484
	      if (cy != 0)
Packit 5c3484
		{
Packit 5c3484
		  if (x == 0)
Packit 5c3484
		    {
Packit 5c3484
		      cy = mpn_sub_1 (qp, qp, qn, 1);
Packit 5c3484
		      return qh;
Packit 5c3484
		    }
Packit 5c3484
		  x--;
Packit 5c3484
		}
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  return qh;
Packit 5c3484
}