Blame mpn/generic/pre_divrem_1.c

Packit 5c3484
/* mpn_preinv_divrem_1 -- mpn by limb division with pre-inverted divisor.
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
Packit 5c3484
   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
Packit 5c3484
   FUTURE GNU MP RELEASES.
Packit 5c3484
Packit 5c3484
Copyright 2000-2003 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Don't bloat a shared library with unused code. */
Packit 5c3484
#if USE_PREINV_DIVREM_1
Packit 5c3484
Packit 5c3484
/* Same test here for skipping one divide step as in mpn_divrem_1.
Packit 5c3484
Packit 5c3484
   The main reason for a separate shift==0 case is that not all CPUs give
Packit 5c3484
   zero for "n0 >> GMP_LIMB_BITS" which would arise in the general case
Packit 5c3484
   code used on shift==0.  shift==0 is also reasonably common in mp_bases
Packit 5c3484
   big_base, for instance base==10 on a 64-bit limb.
Packit 5c3484
Packit 5c3484
   Under shift!=0 it would be possible to call mpn_lshift to adjust the
Packit 5c3484
   dividend all in one go (into the quotient space say), rather than
Packit 5c3484
   limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
Packit 5c3484
   than what the compiler can generate for EXTRACT.  But this is left to CPU
Packit 5c3484
   specific implementations to consider, especially since EXTRACT isn't on
Packit 5c3484
   the dependent chain.
Packit 5c3484
Packit 5c3484
   If size==0 then the result is simply xsize limbs of zeros, but nothing
Packit 5c3484
   special is done for that, since it wouldn't be a usual call, and
Packit 5c3484
   certainly never arises from mpn_get_str which is our main caller.  */
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t xsize,
Packit 5c3484
		     mp_srcptr ap, mp_size_t size, mp_limb_t d_unnorm,
Packit 5c3484
		     mp_limb_t dinv, int shift)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t  ahigh, qhigh, r;
Packit 5c3484
  mp_size_t  i;
Packit 5c3484
  mp_limb_t  n1, n0;
Packit 5c3484
  mp_limb_t  d;
Packit 5c3484
Packit 5c3484
  ASSERT (xsize >= 0);
Packit 5c3484
  ASSERT (size >= 1);
Packit 5c3484
  ASSERT (d_unnorm != 0);
Packit 5c3484
#if WANT_ASSERT
Packit 5c3484
  {
Packit 5c3484
    int        want_shift;
Packit 5c3484
    mp_limb_t  want_dinv;
Packit 5c3484
    count_leading_zeros (want_shift, d_unnorm);
Packit 5c3484
    ASSERT (shift == want_shift);
Packit 5c3484
    invert_limb (want_dinv, d_unnorm << shift);
Packit 5c3484
    ASSERT (dinv == want_dinv);
Packit 5c3484
  }
Packit 5c3484
#endif
Packit 5c3484
  /* FIXME: What's the correct overlap rule when xsize!=0? */
Packit 5c3484
  ASSERT (MPN_SAME_OR_SEPARATE_P (qp+xsize, ap, size));
Packit 5c3484
Packit 5c3484
  ahigh = ap[size-1];
Packit 5c3484
  d = d_unnorm << shift;
Packit 5c3484
  qp += (size + xsize - 1);   /* dest high limb */
Packit 5c3484
Packit 5c3484
  if (shift == 0)
Packit 5c3484
    {
Packit 5c3484
      /* High quotient limb is 0 or 1, and skip a divide step. */
Packit 5c3484
      r = ahigh;
Packit 5c3484
      qhigh = (r >= d);
Packit 5c3484
      r = (qhigh ? r-d : r);
Packit 5c3484
      *qp-- = qhigh;
Packit 5c3484
      size--;
Packit 5c3484
Packit 5c3484
      for (i = size-1; i >= 0; i--)
Packit 5c3484
	{
Packit 5c3484
	  n0 = ap[i];
Packit 5c3484
	  udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
Packit 5c3484
	  qp--;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      r = 0;
Packit 5c3484
      if (ahigh < d_unnorm)
Packit 5c3484
	{
Packit 5c3484
	  r = ahigh << shift;
Packit 5c3484
	  *qp-- = 0;
Packit 5c3484
	  size--;
Packit 5c3484
	  if (size == 0)
Packit 5c3484
	    goto done_integer;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      n1 = ap[size-1];
Packit 5c3484
      r |= n1 >> (GMP_LIMB_BITS - shift);
Packit 5c3484
Packit 5c3484
      for (i = size-2; i >= 0; i--)
Packit 5c3484
	{
Packit 5c3484
	  ASSERT (r < d);
Packit 5c3484
	  n0 = ap[i];
Packit 5c3484
	  udiv_qrnnd_preinv (*qp, r, r,
Packit 5c3484
			     ((n1 << shift) | (n0 >> (GMP_LIMB_BITS - shift))),
Packit 5c3484
			     d, dinv);
Packit 5c3484
	  qp--;
Packit 5c3484
	  n1 = n0;
Packit 5c3484
	}
Packit 5c3484
      udiv_qrnnd_preinv (*qp, r, r, n1 << shift, d, dinv);
Packit 5c3484
      qp--;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
 done_integer:
Packit 5c3484
  for (i = 0; i < xsize; i++)
Packit 5c3484
    {
Packit 5c3484
      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
Packit 5c3484
      qp--;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  return r >> shift;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#endif /* USE_PREINV_DIVREM_1 */