Blame mpn/sparc64/divrem_1.c

Packit 5c3484
/* UltraSparc 64 mpn_divrem_1 -- mpn by limb division.
Packit 5c3484
Packit 5c3484
Copyright 1991, 1993, 1994, 1996, 1998-2001, 2003 Free Software Foundation,
Packit 5c3484
Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
#include "mpn/sparc64/sparc64.h"
Packit 5c3484
Packit 5c3484
Packit 5c3484
/*                   64-bit divisor       32-bit divisor
Packit 5c3484
                       cycles/limb          cycles/limb
Packit 5c3484
                        (approx)             (approx)
Packit 5c3484
                   integer  fraction    integer  fraction
Packit 5c3484
   Ultrasparc 2i:    160      160          122      96
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* 32-bit divisors are treated in special case code.  This requires 4 mulx
Packit 5c3484
   per limb instead of 8 in the general case.
Packit 5c3484
Packit 5c3484
   For big endian systems we need HALF_ENDIAN_ADJ included in the src[i]
Packit 5c3484
   addressing, to get the two halves of each limb read in the correct order.
Packit 5c3484
   This is kept in an adj variable.  Doing that measures about 4 c/l faster
Packit 5c3484
   than just writing HALF_ENDIAN_ADJ(i) in the integer loop.  The latter
Packit 5c3484
   shouldn't be 6 cycles worth of work, but perhaps it doesn't schedule well
Packit 5c3484
   (on gcc 3.2.1 at least).  The fraction loop doesn't seem affected, but we
Packit 5c3484
   still use a variable since that ought to work out best.  */
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_divrem_1 (mp_ptr qp_limbptr, mp_size_t xsize_limbs,
Packit 5c3484
              mp_srcptr ap_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
Packit 5c3484
{
Packit 5c3484
  mp_size_t  total_size_limbs;
Packit 5c3484
  mp_size_t  i;
Packit 5c3484
Packit 5c3484
  ASSERT (xsize_limbs >= 0);
Packit 5c3484
  ASSERT (size_limbs >= 0);
Packit 5c3484
  ASSERT (d_limb != 0);
Packit 5c3484
  /* FIXME: What's the correct overlap rule when xsize!=0? */
Packit 5c3484
  ASSERT (MPN_SAME_OR_SEPARATE_P (qp_limbptr + xsize_limbs,
Packit 5c3484
                                  ap_limbptr, size_limbs));
Packit 5c3484
Packit 5c3484
  total_size_limbs = size_limbs + xsize_limbs;
Packit 5c3484
  if (UNLIKELY (total_size_limbs == 0))
Packit 5c3484
    return 0;
Packit 5c3484
Packit 5c3484
  /* udivx is good for total_size==1, and no need to bother checking
Packit 5c3484
     limb
Packit 5c3484
  if (UNLIKELY (total_size_limbs == 1))
Packit 5c3484
    {
Packit 5c3484
      mp_limb_t  a, q;
Packit 5c3484
      a = (LIKELY (size_limbs != 0) ? ap_limbptr[0] : 0);
Packit 5c3484
      q = a / d_limb;
Packit 5c3484
      qp_limbptr[0] = q;
Packit 5c3484
      return a - q*d_limb;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (d_limb <= CNST_LIMB(0xFFFFFFFF))
Packit 5c3484
    {
Packit 5c3484
      mp_size_t  size, xsize, total_size, adj;
Packit 5c3484
      unsigned   *qp, n1, n0, q, r, nshift, norm_rmask;
Packit 5c3484
      mp_limb_t  dinv_limb;
Packit 5c3484
      const unsigned *ap;
Packit 5c3484
      int        norm, norm_rshift;
Packit 5c3484
Packit 5c3484
      size = 2 * size_limbs;
Packit 5c3484
      xsize = 2 * xsize_limbs;
Packit 5c3484
      total_size = size + xsize;
Packit 5c3484
Packit 5c3484
      ap = (unsigned *) ap_limbptr;
Packit 5c3484
      qp = (unsigned *) qp_limbptr;
Packit 5c3484
Packit 5c3484
      qp += xsize;
Packit 5c3484
      r = 0;        /* initial remainder */
Packit 5c3484
Packit 5c3484
      if (LIKELY (size != 0))
Packit 5c3484
        {
Packit 5c3484
          n1 = ap[size-1 + HALF_ENDIAN_ADJ(1)];
Packit 5c3484
Packit 5c3484
          /* If the length of the source is uniformly distributed, then
Packit 5c3484
             there's a 50% chance of the high 32-bits being zero, which we
Packit 5c3484
             can skip.  */
Packit 5c3484
          if (n1 == 0)
Packit 5c3484
            {
Packit 5c3484
              n1 = ap[size-2 + HALF_ENDIAN_ADJ(0)];
Packit 5c3484
              total_size--;
Packit 5c3484
              size--;
Packit 5c3484
              ASSERT (size > 0);  /* because always even */
Packit 5c3484
              qp[size + HALF_ENDIAN_ADJ(1)] = 0;
Packit 5c3484
            }
Packit 5c3484
Packit 5c3484
          /* Skip a division if high < divisor (high quotient 0).  Testing
Packit 5c3484
             here before before normalizing will still skip as often as
Packit 5c3484
             possible.  */
Packit 5c3484
          if (n1 < d_limb)
Packit 5c3484
            {
Packit 5c3484
              r = n1;
Packit 5c3484
              size--;
Packit 5c3484
              qp[size + HALF_ENDIAN_ADJ(size)] = 0;
Packit 5c3484
              total_size--;
Packit 5c3484
              if (total_size == 0)
Packit 5c3484
                return r;
Packit 5c3484
            }
Packit 5c3484
        }
Packit 5c3484
Packit 5c3484
      count_leading_zeros_32 (norm, d_limb);
Packit 5c3484
      norm -= 32;
Packit 5c3484
      d_limb <<= norm;
Packit 5c3484
      r <<= norm;
Packit 5c3484
Packit 5c3484
      norm_rshift = 32 - norm;
Packit 5c3484
      norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
Packit 5c3484
Packit 5c3484
      invert_half_limb (dinv_limb, d_limb);
Packit 5c3484
Packit 5c3484
      if (LIKELY (size != 0))
Packit 5c3484
        {
Packit 5c3484
          i = size - 1;
Packit 5c3484
          adj = HALF_ENDIAN_ADJ (i);
Packit 5c3484
          n1 = ap[i + adj];
Packit 5c3484
          adj = -adj;
Packit 5c3484
          r |= ((n1 >> norm_rshift) & norm_rmask);
Packit 5c3484
          for ( ; i > 0; i--)
Packit 5c3484
            {
Packit 5c3484
              n0 = ap[i-1 + adj];
Packit 5c3484
              adj = -adj;
Packit 5c3484
              nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
Packit 5c3484
              udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);
Packit 5c3484
              qp[i + adj] = q;
Packit 5c3484
              n1 = n0;
Packit 5c3484
            }
Packit 5c3484
          nshift = n1 << norm;
Packit 5c3484
          udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);
Packit 5c3484
          qp[0 + HALF_ENDIAN_ADJ(0)] = q;
Packit 5c3484
        }
Packit 5c3484
      qp -= xsize;
Packit 5c3484
      adj = HALF_ENDIAN_ADJ (0);
Packit 5c3484
      for (i = xsize-1; i >= 0; i--)
Packit 5c3484
        {
Packit 5c3484
          udiv_qrnnd_half_preinv (q, r, r, 0, d_limb, dinv_limb);
Packit 5c3484
          adj = -adj;
Packit 5c3484
          qp[i + adj] = q;
Packit 5c3484
        }
Packit 5c3484
Packit 5c3484
      return r >> norm;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      mp_srcptr  ap;
Packit 5c3484
      mp_ptr     qp;
Packit 5c3484
      mp_size_t  size, xsize, total_size;
Packit 5c3484
      mp_limb_t  d, n1, n0, q, r, dinv, nshift, norm_rmask;
Packit 5c3484
      int        norm, norm_rshift;
Packit 5c3484
Packit 5c3484
      ap = ap_limbptr;
Packit 5c3484
      qp = qp_limbptr;
Packit 5c3484
      size = size_limbs;
Packit 5c3484
      xsize = xsize_limbs;
Packit 5c3484
      total_size = total_size_limbs;
Packit 5c3484
      d = d_limb;
Packit 5c3484
Packit 5c3484
      qp += total_size;   /* above high limb */
Packit 5c3484
      r = 0;              /* initial remainder */
Packit 5c3484
Packit 5c3484
      if (LIKELY (size != 0))
Packit 5c3484
        {
Packit 5c3484
          /* Skip a division if high < divisor (high quotient 0).  Testing
Packit 5c3484
             here before before normalizing will still skip as often as
Packit 5c3484
             possible.  */
Packit 5c3484
          n1 = ap[size-1];
Packit 5c3484
          if (n1 < d)
Packit 5c3484
            {
Packit 5c3484
              r = n1;
Packit 5c3484
              *--qp = 0;
Packit 5c3484
              total_size--;
Packit 5c3484
              if (total_size == 0)
Packit 5c3484
                return r;
Packit 5c3484
              size--;
Packit 5c3484
            }
Packit 5c3484
        }
Packit 5c3484
Packit 5c3484
      count_leading_zeros (norm, d);
Packit 5c3484
      d <<= norm;
Packit 5c3484
      r <<= norm;
Packit 5c3484
Packit 5c3484
      norm_rshift = GMP_LIMB_BITS - norm;
Packit 5c3484
      norm_rmask = (norm == 0 ? 0 : ~CNST_LIMB(0));
Packit 5c3484
Packit 5c3484
      invert_limb (dinv, d);
Packit 5c3484
Packit 5c3484
      if (LIKELY (size != 0))
Packit 5c3484
        {
Packit 5c3484
          n1 = ap[size-1];
Packit 5c3484
          r |= ((n1 >> norm_rshift) & norm_rmask);
Packit 5c3484
          for (i = size-2; i >= 0; i--)
Packit 5c3484
            {
Packit 5c3484
              n0 = ap[i];
Packit 5c3484
              nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
Packit 5c3484
              udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);
Packit 5c3484
              *--qp = q;
Packit 5c3484
              n1 = n0;
Packit 5c3484
            }
Packit 5c3484
          nshift = n1 << norm;
Packit 5c3484
          udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);
Packit 5c3484
          *--qp = q;
Packit 5c3484
        }
Packit 5c3484
      for (i = 0; i < xsize; i++)
Packit 5c3484
        {
Packit 5c3484
          udiv_qrnnd_preinv (q, r, r, CNST_LIMB(0), d, dinv);
Packit 5c3484
          *--qp = q;
Packit 5c3484
        }
Packit 5c3484
      return r >> norm;
Packit 5c3484
    }
Packit 5c3484
}