Blame mpn/sparc64/sparc64.h

Packit 5c3484
/* UltraSPARC 64 support macros.
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
Packit 5c3484
   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
Packit 5c3484
   FUTURE GNU MP RELEASES.
Packit 5c3484
Packit 5c3484
Copyright 2003 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define LOW32(x)   ((x) & 0xFFFFFFFF)
Packit 5c3484
#define HIGH32(x)  ((x) >> 32)
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Halfword number i in src is accessed as src[i+HALF_ENDIAN_ADJ(i)].
Packit 5c3484
   Plain src[i] would be incorrect in big endian, HALF_ENDIAN_ADJ has the
Packit 5c3484
   effect of swapping the two halves in this case.  */
Packit 5c3484
#if HAVE_LIMB_BIG_ENDIAN
Packit 5c3484
#define HALF_ENDIAN_ADJ(i)  (1 - (((i) & 1) << 1))   /* +1 even, -1 odd */
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_LIMB_LITTLE_ENDIAN
Packit 5c3484
#define HALF_ENDIAN_ADJ(i)  0                        /* no adjust */
Packit 5c3484
#endif
Packit 5c3484
#ifndef HALF_ENDIAN_ADJ
Packit 5c3484
Error, error, unknown limb endianness;
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* umul_ppmm_lowequal sets h to the high limb of q*d, assuming the low limb
Packit 5c3484
   of that product is equal to l.  dh and dl are the 32-bit halves of d.
Packit 5c3484
Packit 5c3484
   |-----high----||----low-----|
Packit 5c3484
   +------+------+
Packit 5c3484
   |             |                 ph = qh * dh
Packit 5c3484
   +------+------+
Packit 5c3484
          +------+------+
Packit 5c3484
          |             |          pm1 = ql * dh
Packit 5c3484
          +------+------+
Packit 5c3484
          +------+------+
Packit 5c3484
          |             |          pm2 = qh * dl
Packit 5c3484
          +------+------+
Packit 5c3484
                 +------+------+
Packit 5c3484
                 |             |   pl = ql * dl (not calculated)
Packit 5c3484
                 +------+------+
Packit 5c3484
Packit 5c3484
   Knowing that the low 64 bits is equal to l means that LOW(pm1) + LOW(pm2)
Packit 5c3484
   + HIGH(pl) == HIGH(l).  The only thing we need from those product parts
Packit 5c3484
   is whether they produce a carry into the high.
Packit 5c3484
Packit 5c3484
   pm_l = LOW(pm1)+LOW(pm2) is done to contribute its carry, then the only
Packit 5c3484
   time there's a further carry from LOW(pm_l)+HIGH(pl) is if LOW(pm_l) >
Packit 5c3484
   HIGH(l).  pl is never actually calculated.  */
Packit 5c3484
Packit 5c3484
#define umul_ppmm_lowequal(h, q, d, dh, dl, l)  \
Packit 5c3484
  do {                                          \
Packit 5c3484
    mp_limb_t  ql, qh, ph, pm1, pm2, pm_l;      \
Packit 5c3484
    ASSERT (dh == HIGH32(d));                   \
Packit 5c3484
    ASSERT (dl == LOW32(d));                    \
Packit 5c3484
    ASSERT (q*d == l);                          \
Packit 5c3484
                                                \
Packit 5c3484
    ql = LOW32 (q);                             \
Packit 5c3484
    qh = HIGH32 (q);                            \
Packit 5c3484
                                                \
Packit 5c3484
    pm1 = ql * dh;                              \
Packit 5c3484
    pm2 = qh * dl;                              \
Packit 5c3484
    ph  = qh * dh;                              \
Packit 5c3484
                                                \
Packit 5c3484
    pm_l = LOW32 (pm1) + LOW32 (pm2);           \
Packit 5c3484
                                                \
Packit 5c3484
    (h) = ph + HIGH32 (pm1) + HIGH32 (pm2)      \
Packit 5c3484
      + HIGH32 (pm_l) + ((pm_l << 32) > l);     \
Packit 5c3484
                                                \
Packit 5c3484
    ASSERT_HIGH_PRODUCT (h, q, d);              \
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Set h to the high of q*d, assuming the low limb of that product is equal
Packit 5c3484
   to l, and that d fits in 32-bits.
Packit 5c3484
Packit 5c3484
   |-----high----||----low-----|
Packit 5c3484
          +------+------+
Packit 5c3484
          |             |          pm = qh * dl
Packit 5c3484
          +------+------+
Packit 5c3484
                 +------+------+
Packit 5c3484
                 |             |   pl = ql * dl (not calculated)
Packit 5c3484
                 +------+------+
Packit 5c3484
Packit 5c3484
   Knowing that LOW(pm) + HIGH(pl) == HIGH(l) (mod 2^32) means that the only
Packit 5c3484
   time there's a carry from that sum is when LOW(pm) > HIGH(l).  There's no
Packit 5c3484
   need to calculate pl to determine this.  */
Packit 5c3484
Packit 5c3484
#define umul_ppmm_half_lowequal(h, q, d, l)     \
Packit 5c3484
  do {                                          \
Packit 5c3484
    mp_limb_t pm;                               \
Packit 5c3484
    ASSERT (q*d == l);                          \
Packit 5c3484
    ASSERT (HIGH32(d) == 0);                    \
Packit 5c3484
                                                \
Packit 5c3484
    pm = HIGH32(q) * d;                         \
Packit 5c3484
    (h) = HIGH32(pm) + ((pm << 32) > l);        \
Packit 5c3484
    ASSERT_HIGH_PRODUCT (h, q, d);              \
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* check that h is the high limb of x*y */
Packit 5c3484
#if WANT_ASSERT
Packit 5c3484
#define ASSERT_HIGH_PRODUCT(h, x, y)    \
Packit 5c3484
  do {                                  \
Packit 5c3484
    mp_limb_t  want_h, dummy;           \
Packit 5c3484
    umul_ppmm (want_h, dummy, x, y);    \
Packit 5c3484
    ASSERT (h == want_h);               \
Packit 5c3484
  } while (0)
Packit 5c3484
#else
Packit 5c3484
#define ASSERT_HIGH_PRODUCT(h, q, d)    \
Packit 5c3484
  do { } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Multiply u anv v, where v < 2^32.  */
Packit 5c3484
#define umul_ppmm_s(w1, w0, u, v)					\
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __x0, __x2;							\
Packit 5c3484
    UWtype __ul, __vl, __uh;						\
Packit 5c3484
    UWtype __u = (u), __v = (v);					\
Packit 5c3484
									\
Packit 5c3484
    __ul = __ll_lowpart (__u);						\
Packit 5c3484
    __uh = __ll_highpart (__u);						\
Packit 5c3484
    __vl = __ll_lowpart (__v);						\
Packit 5c3484
									\
Packit 5c3484
    __x0 = (UWtype) __ul * __vl;					\
Packit 5c3484
    __x2 = (UWtype) __uh * __vl;					\
Packit 5c3484
									\
Packit 5c3484
    (w1) = (__x2 + (__x0 >> W_TYPE_SIZE/2)) >> W_TYPE_SIZE/2;		\
Packit 5c3484
    (w0) = (__x2 << W_TYPE_SIZE/2) + __x0;				\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
/* Count the leading zeros on a limb, but assuming it fits in 32 bits.
Packit 5c3484
   The count returned will be in the range 32 to 63.
Packit 5c3484
   This is the 32-bit generic C count_leading_zeros from longlong.h. */
Packit 5c3484
#define count_leading_zeros_32(count, x)                                      \
Packit 5c3484
  do {                                                                        \
Packit 5c3484
    mp_limb_t  __xr = (x);                                                    \
Packit 5c3484
    unsigned   __a;                                                           \
Packit 5c3484
    ASSERT ((x) != 0);                                                        \
Packit 5c3484
    ASSERT ((x) <= CNST_LIMB(0xFFFFFFFF));                                    \
Packit 5c3484
    __a = __xr < ((UWtype) 1 << 16) ? (__xr < ((UWtype) 1 << 8) ? 1 : 8 + 1)  \
Packit 5c3484
      : (__xr < ((UWtype) 1 << 24)  ? 16 + 1 : 24 + 1);                       \
Packit 5c3484
                                                                              \
Packit 5c3484
    (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a];                 \
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Set inv to a 32-bit inverse floor((b*(b-d)-1) / d), knowing that d fits
Packit 5c3484
   32 bits and is normalized (high bit set).  */
Packit 5c3484
#define invert_half_limb(inv, d)                \
Packit 5c3484
  do {                                          \
Packit 5c3484
    mp_limb_t  _n;                              \
Packit 5c3484
    ASSERT ((d) <= 0xFFFFFFFF);                 \
Packit 5c3484
    ASSERT ((d) & 0x80000000);                  \
Packit 5c3484
    _n = (((mp_limb_t) -(d)) << 32) - 1;        \
Packit 5c3484
    (inv) = (mp_limb_t) (unsigned) (_n / (d));  \
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Divide nh:nl by d, setting q to the quotient and r to the remainder.
Packit 5c3484
   q, r, nh and nl are 32-bits each, d_limb is 32-bits but in an mp_limb_t,
Packit 5c3484
   dinv_limb is similarly a 32-bit inverse but in an mp_limb_t.  */
Packit 5c3484
Packit 5c3484
#define udiv_qrnnd_half_preinv(q, r, nh, nl, d_limb, dinv_limb)         \
Packit 5c3484
  do {                                                                  \
Packit 5c3484
    unsigned   _n2, _n10, _n1, _nadj, _q11n, _xh, _r, _q;               \
Packit 5c3484
    mp_limb_t  _n, _x;                                                  \
Packit 5c3484
    ASSERT (d_limb <= 0xFFFFFFFF);                                      \
Packit 5c3484
    ASSERT (dinv_limb <= 0xFFFFFFFF);                                   \
Packit 5c3484
    ASSERT (d_limb & 0x80000000);                                       \
Packit 5c3484
    ASSERT (nh < d_limb);                                               \
Packit 5c3484
    _n10 = (nl);                                                        \
Packit 5c3484
    _n2 = (nh);                                                         \
Packit 5c3484
    _n1 = (int) _n10 >> 31;                                             \
Packit 5c3484
    _nadj = _n10 + (_n1 & d_limb);                                      \
Packit 5c3484
    _x = dinv_limb * (_n2 - _n1) + _nadj;                               \
Packit 5c3484
    _q11n = ~(_n2 + HIGH32 (_x));             /* -q1-1 */               \
Packit 5c3484
    _n = ((mp_limb_t) _n2 << 32) + _n10;                                \
Packit 5c3484
    _x = _n + d_limb * _q11n;                 /* n-q1*d-d */            \
Packit 5c3484
    _xh = HIGH32 (_x) - d_limb;               /* high(n-q1*d-d) */      \
Packit 5c3484
    ASSERT (_xh == 0 || _xh == ~0);                                     \
Packit 5c3484
    _r = _x + (d_limb & _xh);                 /* addback */             \
Packit 5c3484
    _q = _xh - _q11n;                         /* q1+1-addback */        \
Packit 5c3484
    ASSERT (_r < d_limb);                                               \
Packit 5c3484
    ASSERT (d_limb * _q + _r == _n);                                    \
Packit 5c3484
    (r) = _r;                                                           \
Packit 5c3484
    (q) = _q;                                                           \
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484