Blame mpn/generic/divrem_1.c

Packit 5c3484
/* mpn_divrem_1 -- mpn by limb division.
Packit 5c3484
Packit 5c3484
Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003 Free Software
Packit 5c3484
Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
Packit 5c3484
   meaning the quotient size where that should happen, the quotient size
Packit 5c3484
   being how many udiv divisions will be done.
Packit 5c3484
Packit 5c3484
   The default is to use preinv always, CPUs where this doesn't suit have
Packit 5c3484
   tuned thresholds.  Note in particular that preinv should certainly be
Packit 5c3484
   used if that's the only division available (USE_PREINV_ALWAYS).  */
Packit 5c3484
Packit 5c3484
#ifndef DIVREM_1_NORM_THRESHOLD
Packit 5c3484
#define DIVREM_1_NORM_THRESHOLD  0
Packit 5c3484
#endif
Packit 5c3484
#ifndef DIVREM_1_UNNORM_THRESHOLD
Packit 5c3484
#define DIVREM_1_UNNORM_THRESHOLD  0
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
Packit 5c3484
   and UNNORM thresholds are 0 and only the inversion code is included.
Packit 5c3484
Packit 5c3484
   If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
Packit 5c3484
   will be MP_SIZE_T_MAX and only the plain division code is included.
Packit 5c3484
Packit 5c3484
   Otherwise mul-by-inverse is better than plain division above some
Packit 5c3484
   threshold, and best results are obtained by having code for both present.
Packit 5c3484
Packit 5c3484
   The main reason for separating the norm and unnorm cases is that not all
Packit 5c3484
   CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm
Packit 5c3484
   code used on an already normalized divisor.
Packit 5c3484
Packit 5c3484
   If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
Packit 5c3484
   non-shifting code for both the norm and unnorm cases, though with
Packit 5c3484
   different criteria for skipping a division, and with different thresholds
Packit 5c3484
   of course.  And in fact if inversion is never viable, then that simple
Packit 5c3484
   non-shifting division would be all that's left.
Packit 5c3484
Packit 5c3484
   The NORM and UNNORM thresholds might not differ much, but if there's
Packit 5c3484
   going to be separate code for norm and unnorm then it makes sense to have
Packit 5c3484
   separate thresholds.  One thing that's possible is that the
Packit 5c3484
   mul-by-inverse might be better only for normalized divisors, due to that
Packit 5c3484
   case not needing variable bit shifts.
Packit 5c3484
Packit 5c3484
   Notice that the thresholds are tested after the decision to possibly skip
Packit 5c3484
   one divide step, so they're based on the actual number of divisions done.
Packit 5c3484
Packit 5c3484
   For the unnorm case, it would be possible to call mpn_lshift to adjust
Packit 5c3484
   the dividend all in one go (into the quotient space say), rather than
Packit 5c3484
   limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
Packit 5c3484
   than what the compiler can generate for EXTRACT.  But this is left to CPU
Packit 5c3484
   specific implementations to consider, especially since EXTRACT isn't on
Packit 5c3484
   the dependent chain.  */
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
Packit 5c3484
	      mp_srcptr up, mp_size_t un, mp_limb_t d)
Packit 5c3484
{
Packit 5c3484
  mp_size_t  n;
Packit 5c3484
  mp_size_t  i;
Packit 5c3484
  mp_limb_t  n1, n0;
Packit 5c3484
  mp_limb_t  r = 0;
Packit 5c3484
Packit 5c3484
  ASSERT (qxn >= 0);
Packit 5c3484
  ASSERT (un >= 0);
Packit 5c3484
  ASSERT (d != 0);
Packit 5c3484
  /* FIXME: What's the correct overlap rule when qxn!=0? */
Packit 5c3484
  ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
Packit 5c3484
Packit 5c3484
  n = un + qxn;
Packit 5c3484
  if (n == 0)
Packit 5c3484
    return 0;
Packit 5c3484
Packit 5c3484
  d <<= GMP_NAIL_BITS;
Packit 5c3484
Packit 5c3484
  qp += (n - 1);   /* Make qp point at most significant quotient limb */
Packit 5c3484
Packit 5c3484
  if ((d & GMP_LIMB_HIGHBIT) != 0)
Packit 5c3484
    {
Packit 5c3484
      if (un != 0)
Packit 5c3484
	{
Packit 5c3484
	  /* High quotient limb is 0 or 1, skip a divide step. */
Packit 5c3484
	  mp_limb_t q;
Packit 5c3484
	  r = up[un - 1] << GMP_NAIL_BITS;
Packit 5c3484
	  q = (r >= d);
Packit 5c3484
	  *qp-- = q;
Packit 5c3484
	  r -= (d & -q);
Packit 5c3484
	  r >>= GMP_NAIL_BITS;
Packit 5c3484
	  n--;
Packit 5c3484
	  un--;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
Packit 5c3484
	{
Packit 5c3484
	plain:
Packit 5c3484
	  for (i = un - 1; i >= 0; i--)
Packit 5c3484
	    {
Packit 5c3484
	      n0 = up[i] << GMP_NAIL_BITS;
Packit 5c3484
	      udiv_qrnnd (*qp, r, r, n0, d);
Packit 5c3484
	      r >>= GMP_NAIL_BITS;
Packit 5c3484
	      qp--;
Packit 5c3484
	    }
Packit 5c3484
	  for (i = qxn - 1; i >= 0; i--)
Packit 5c3484
	    {
Packit 5c3484
	      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
Packit 5c3484
	      r >>= GMP_NAIL_BITS;
Packit 5c3484
	      qp--;
Packit 5c3484
	    }
Packit 5c3484
	  return r;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  /* Multiply-by-inverse, divisor already normalized. */
Packit 5c3484
	  mp_limb_t dinv;
Packit 5c3484
	  invert_limb (dinv, d);
Packit 5c3484
Packit 5c3484
	  for (i = un - 1; i >= 0; i--)
Packit 5c3484
	    {
Packit 5c3484
	      n0 = up[i] << GMP_NAIL_BITS;
Packit 5c3484
	      udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
Packit 5c3484
	      r >>= GMP_NAIL_BITS;
Packit 5c3484
	      qp--;
Packit 5c3484
	    }
Packit 5c3484
	  for (i = qxn - 1; i >= 0; i--)
Packit 5c3484
	    {
Packit 5c3484
	      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
Packit 5c3484
	      r >>= GMP_NAIL_BITS;
Packit 5c3484
	      qp--;
Packit 5c3484
	    }
Packit 5c3484
	  return r;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      /* Most significant bit of divisor == 0.  */
Packit 5c3484
      int cnt;
Packit 5c3484
Packit 5c3484
      /* Skip a division if high < divisor (high quotient 0).  Testing here
Packit 5c3484
	 before normalizing will still skip as often as possible.  */
Packit 5c3484
      if (un != 0)
Packit 5c3484
	{
Packit 5c3484
	  n1 = up[un - 1] << GMP_NAIL_BITS;
Packit 5c3484
	  if (n1 < d)
Packit 5c3484
	    {
Packit 5c3484
	      r = n1 >> GMP_NAIL_BITS;
Packit 5c3484
	      *qp-- = 0;
Packit 5c3484
	      n--;
Packit 5c3484
	      if (n == 0)
Packit 5c3484
		return r;
Packit 5c3484
	      un--;
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      if (! UDIV_NEEDS_NORMALIZATION
Packit 5c3484
	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
Packit 5c3484
	goto plain;
Packit 5c3484
Packit 5c3484
      count_leading_zeros (cnt, d);
Packit 5c3484
      d <<= cnt;
Packit 5c3484
      r <<= cnt;
Packit 5c3484
Packit 5c3484
      if (UDIV_NEEDS_NORMALIZATION
Packit 5c3484
	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t nshift;
Packit 5c3484
	  if (un != 0)
Packit 5c3484
	    {
Packit 5c3484
	      n1 = up[un - 1] << GMP_NAIL_BITS;
Packit 5c3484
	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
Packit 5c3484
	      for (i = un - 2; i >= 0; i--)
Packit 5c3484
		{
Packit 5c3484
		  n0 = up[i] << GMP_NAIL_BITS;
Packit 5c3484
		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
Packit 5c3484
		  udiv_qrnnd (*qp, r, r, nshift, d);
Packit 5c3484
		  r >>= GMP_NAIL_BITS;
Packit 5c3484
		  qp--;
Packit 5c3484
		  n1 = n0;
Packit 5c3484
		}
Packit 5c3484
	      udiv_qrnnd (*qp, r, r, n1 << cnt, d);
Packit 5c3484
	      r >>= GMP_NAIL_BITS;
Packit 5c3484
	      qp--;
Packit 5c3484
	    }
Packit 5c3484
	  for (i = qxn - 1; i >= 0; i--)
Packit 5c3484
	    {
Packit 5c3484
	      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
Packit 5c3484
	      r >>= GMP_NAIL_BITS;
Packit 5c3484
	      qp--;
Packit 5c3484
	    }
Packit 5c3484
	  return r >> cnt;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t  dinv, nshift;
Packit 5c3484
	  invert_limb (dinv, d);
Packit 5c3484
	  if (un != 0)
Packit 5c3484
	    {
Packit 5c3484
	      n1 = up[un - 1] << GMP_NAIL_BITS;
Packit 5c3484
	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
Packit 5c3484
	      for (i = un - 2; i >= 0; i--)
Packit 5c3484
		{
Packit 5c3484
		  n0 = up[i] << GMP_NAIL_BITS;
Packit 5c3484
		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
Packit 5c3484
		  udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
Packit 5c3484
		  r >>= GMP_NAIL_BITS;
Packit 5c3484
		  qp--;
Packit 5c3484
		  n1 = n0;
Packit 5c3484
		}
Packit 5c3484
	      udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
Packit 5c3484
	      r >>= GMP_NAIL_BITS;
Packit 5c3484
	      qp--;
Packit 5c3484
	    }
Packit 5c3484
	  for (i = qxn - 1; i >= 0; i--)
Packit 5c3484
	    {
Packit 5c3484
	      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
Packit 5c3484
	      r >>= GMP_NAIL_BITS;
Packit 5c3484
	      qp--;
Packit 5c3484
	    }
Packit 5c3484
	  return r >> cnt;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
}