Blame longlong.h

Packit 5c3484
/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
Packit 5c3484
Packit 5c3484
Copyright 1991-1994, 1996, 1997, 1999-2005, 2007-2009, 2011-2016 Free Software
Packit 5c3484
Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
/* You have to define the following before including this file:
Packit 5c3484
Packit 5c3484
   UWtype -- An unsigned type, default type for operations (typically a "word")
Packit 5c3484
   UHWtype -- An unsigned type, at least half the size of UWtype
Packit 5c3484
   UDWtype -- An unsigned type, at least twice as large a UWtype
Packit 5c3484
   W_TYPE_SIZE -- size in bits of UWtype
Packit 5c3484
Packit 5c3484
   SItype, USItype -- Signed and unsigned 32 bit types
Packit 5c3484
   DItype, UDItype -- Signed and unsigned 64 bit types
Packit 5c3484
Packit 5c3484
   On a 32 bit machine UWtype should typically be USItype;
Packit 5c3484
   on a 64 bit machine, UWtype should typically be UDItype.
Packit 5c3484
Packit 5c3484
   Optionally, define:
Packit 5c3484
Packit 5c3484
   LONGLONG_STANDALONE -- Avoid code that needs machine-dependent support files
Packit 5c3484
   NO_ASM -- Disable inline asm
Packit 5c3484
Packit 5c3484
Packit 5c3484
   CAUTION!  Using this version of longlong.h outside of GMP is not safe.  You
Packit 5c3484
   need to include gmp.h and gmp-impl.h, or certain things might not work as
Packit 5c3484
   expected.
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
#define __BITS4 (W_TYPE_SIZE / 4)
Packit 5c3484
#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
Packit 5c3484
#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
Packit 5c3484
#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
Packit 5c3484
Packit 5c3484
/* This is used to make sure no undesirable sharing between different libraries
Packit 5c3484
   that use this file takes place.  */
Packit 5c3484
#ifndef __MPN
Packit 5c3484
#define __MPN(x) __##x
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* Define auxiliary asm macros.
Packit 5c3484
Packit 5c3484
   1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
Packit 5c3484
   UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
Packit 5c3484
   word product in HIGH_PROD and LOW_PROD.
Packit 5c3484
Packit 5c3484
   2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
Packit 5c3484
   UDWtype product.  This is just a variant of umul_ppmm.
Packit 5c3484
Packit 5c3484
   3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
Packit 5c3484
   denominator) divides a UDWtype, composed by the UWtype integers
Packit 5c3484
   HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
Packit 5c3484
   in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
Packit 5c3484
   than DENOMINATOR for correct operation.  If, in addition, the most
Packit 5c3484
   significant bit of DENOMINATOR must be 1, then the pre-processor symbol
Packit 5c3484
   UDIV_NEEDS_NORMALIZATION is defined to 1.
Packit 5c3484
Packit 5c3484
   4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
Packit 5c3484
   denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
Packit 5c3484
   is rounded towards 0.
Packit 5c3484
Packit 5c3484
   5) count_leading_zeros(count, x) counts the number of zero-bits from the
Packit 5c3484
   msb to the first non-zero bit in the UWtype X.  This is the number of
Packit 5c3484
   steps X needs to be shifted left to set the msb.  Undefined for X == 0,
Packit 5c3484
   unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
Packit 5c3484
Packit 5c3484
   6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
Packit 5c3484
   from the least significant end.
Packit 5c3484
Packit 5c3484
   7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
Packit 5c3484
   high_addend_2, low_addend_2) adds two UWtype integers, composed by
Packit 5c3484
   HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
Packit 5c3484
   respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
Packit 5c3484
   (i.e. carry out) is not stored anywhere, and is lost.
Packit 5c3484
Packit 5c3484
   8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
Packit 5c3484
   high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
Packit 5c3484
   composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
Packit 5c3484
   LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
Packit 5c3484
   and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
Packit 5c3484
   and is lost.
Packit 5c3484
Packit 5c3484
   If any of these macros are left undefined for a particular CPU,
Packit 5c3484
   C macros are used.
Packit 5c3484
Packit 5c3484
Packit 5c3484
   Notes:
Packit 5c3484
Packit 5c3484
   For add_ssaaaa the two high and two low addends can both commute, but
Packit 5c3484
   unfortunately gcc only supports one "%" commutative in each asm block.
Packit 5c3484
   This has always been so but is only documented in recent versions
Packit 5c3484
   (eg. pre-release 3.3).  Having two or more "%"s can cause an internal
Packit 5c3484
   compiler error in certain rare circumstances.
Packit 5c3484
Packit 5c3484
   Apparently it was only the last "%" that was ever actually respected, so
Packit 5c3484
   the code has been updated to leave just that.  Clearly there's a free
Packit 5c3484
   choice whether high or low should get it, if there's a reason to favour
Packit 5c3484
   one over the other.  Also obviously when the constraints on the two
Packit 5c3484
   operands are identical there's no benefit to the reloader in any "%" at
Packit 5c3484
   all.
Packit 5c3484
Packit 5c3484
   */
Packit 5c3484
Packit 5c3484
/* The CPUs come in alphabetical order below.
Packit 5c3484
Packit 5c3484
   Please add support for more CPUs here, or improve the current support
Packit 5c3484
   for the CPUs below!  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* count_leading_zeros_gcc_clz is count_leading_zeros implemented with gcc
Packit 5c3484
   3.4 __builtin_clzl or __builtin_clzll, according to our limb size.
Packit 5c3484
   Similarly count_trailing_zeros_gcc_ctz using __builtin_ctzl or
Packit 5c3484
   __builtin_ctzll.
Packit 5c3484
Packit 5c3484
   These builtins are only used when we check what code comes out, on some
Packit 5c3484
   chips they're merely libgcc calls, where we will instead want an inline
Packit 5c3484
   in that case (either asm or generic C).
Packit 5c3484
Packit 5c3484
   These builtins are better than an asm block of the same insn, since an
Packit 5c3484
   asm block doesn't give gcc any information about scheduling or resource
Packit 5c3484
   usage.  We keep an asm block for use on prior versions of gcc though.
Packit 5c3484
Packit 5c3484
   For reference, __builtin_ffs existed in gcc prior to __builtin_clz, but
Packit 5c3484
   it's not used (for count_leading_zeros) because it generally gives extra
Packit 5c3484
   code to ensure the result is 0 when the input is 0, which we don't need
Packit 5c3484
   or want.  */
Packit 5c3484
Packit 5c3484
#ifdef _LONG_LONG_LIMB
Packit 5c3484
#define count_leading_zeros_gcc_clz(count,x)	\
Packit 5c3484
  do {						\
Packit 5c3484
    ASSERT ((x) != 0);				\
Packit 5c3484
    (count) = __builtin_clzll (x);		\
Packit 5c3484
  } while (0)
Packit 5c3484
#else
Packit 5c3484
#define count_leading_zeros_gcc_clz(count,x)	\
Packit 5c3484
  do {						\
Packit 5c3484
    ASSERT ((x) != 0);				\
Packit 5c3484
    (count) = __builtin_clzl (x);		\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#ifdef _LONG_LONG_LIMB
Packit 5c3484
#define count_trailing_zeros_gcc_ctz(count,x)	\
Packit 5c3484
  do {						\
Packit 5c3484
    ASSERT ((x) != 0);				\
Packit 5c3484
    (count) = __builtin_ctzll (x);		\
Packit 5c3484
  } while (0)
Packit 5c3484
#else
Packit 5c3484
#define count_trailing_zeros_gcc_ctz(count,x)	\
Packit 5c3484
  do {						\
Packit 5c3484
    ASSERT ((x) != 0);				\
Packit 5c3484
    (count) = __builtin_ctzl (x);		\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* FIXME: The macros using external routines like __MPN(count_leading_zeros)
Packit 5c3484
   don't need to be under !NO_ASM */
Packit 5c3484
#if ! defined (NO_ASM)
Packit 5c3484
Packit 5c3484
#if defined (__alpha) && W_TYPE_SIZE == 64
Packit 5c3484
/* Most alpha-based machines, except Cray systems. */
Packit 5c3484
#if defined (__GNUC__)
Packit 5c3484
#if __GMP_GNUC_PREREQ (3,3)
Packit 5c3484
#define umul_ppmm(ph, pl, m0, m1) \
Packit 5c3484
  do {									\
Packit 5c3484
    UDItype __m0 = (m0), __m1 = (m1);					\
Packit 5c3484
    (ph) = __builtin_alpha_umulh (__m0, __m1);				\
Packit 5c3484
    (pl) = __m0 * __m1;							\
Packit 5c3484
  } while (0)
Packit 5c3484
#else
Packit 5c3484
#define umul_ppmm(ph, pl, m0, m1) \
Packit 5c3484
  do {									\
Packit 5c3484
    UDItype __m0 = (m0), __m1 = (m1);					\
Packit 5c3484
    __asm__ ("umulh %r1,%2,%0"						\
Packit 5c3484
	     : "=r" (ph)						\
Packit 5c3484
	     : "%rJ" (__m0), "rI" (__m1));				\
Packit 5c3484
    (pl) = __m0 * __m1;							\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
#define UMUL_TIME 18
Packit 5c3484
#else /* ! __GNUC__ */
Packit 5c3484
#include <machine/builtins.h>
Packit 5c3484
#define umul_ppmm(ph, pl, m0, m1) \
Packit 5c3484
  do {									\
Packit 5c3484
    UDItype __m0 = (m0), __m1 = (m1);					\
Packit 5c3484
    (ph) = __UMULH (__m0, __m1);					\
Packit 5c3484
    (pl) = __m0 * __m1;							\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
#ifndef LONGLONG_STANDALONE
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d) \
Packit 5c3484
  do { UWtype __di;							\
Packit 5c3484
    __di = __MPN(invert_limb) (d);					\
Packit 5c3484
    udiv_qrnnd_preinv (q, r, n1, n0, d, __di);				\
Packit 5c3484
  } while (0)
Packit 5c3484
#define UDIV_PREINV_ALWAYS  1
Packit 5c3484
#define UDIV_NEEDS_NORMALIZATION 1
Packit 5c3484
#define UDIV_TIME 220
Packit 5c3484
#endif /* LONGLONG_STANDALONE */
Packit 5c3484
Packit 5c3484
/* clz_tab is required in all configurations, since mpn/alpha/cntlz.asm
Packit 5c3484
   always goes into libgmp.so, even when not actually used.  */
Packit 5c3484
#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
Packit 5c3484
Packit 5c3484
#if defined (__GNUC__) && HAVE_HOST_CPU_alpha_CIX
Packit 5c3484
#define count_leading_zeros(COUNT,X) \
Packit 5c3484
  __asm__("ctlz %1,%0" : "=r"(COUNT) : "r"(X))
Packit 5c3484
#define count_trailing_zeros(COUNT,X) \
Packit 5c3484
  __asm__("cttz %1,%0" : "=r"(COUNT) : "r"(X))
Packit 5c3484
#endif /* clz/ctz using cix */
Packit 5c3484
Packit 5c3484
#if ! defined (count_leading_zeros)				\
Packit 5c3484
  && defined (__GNUC__) && ! defined (LONGLONG_STANDALONE)
Packit 5c3484
/* ALPHA_CMPBGE_0 gives "cmpbge $31,src,dst", ie. test src bytes == 0.
Packit 5c3484
   "$31" is written explicitly in the asm, since an "r" constraint won't
Packit 5c3484
   select reg 31.  There seems no need to worry about "r31" syntax for cray,
Packit 5c3484
   since gcc itself (pre-release 3.4) emits just $31 in various places.	 */
Packit 5c3484
#define ALPHA_CMPBGE_0(dst, src)					\
Packit 5c3484
  do { asm ("cmpbge $31, %1, %0" : "=r" (dst) : "r" (src)); } while (0)
Packit 5c3484
/* Zero bytes are turned into bits with cmpbge, a __clz_tab lookup counts
Packit 5c3484
   them, locating the highest non-zero byte.  A second __clz_tab lookup
Packit 5c3484
   counts the leading zero bits in that byte, giving the result.  */
Packit 5c3484
#define count_leading_zeros(count, x)					\
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype  __clz__b, __clz__c, __clz__x = (x);				\
Packit 5c3484
    ALPHA_CMPBGE_0 (__clz__b,  __clz__x);	    /* zero bytes */	\
Packit 5c3484
    __clz__b = __clz_tab [(__clz__b >> 1) ^ 0x7F];  /* 8 to 1 byte */	\
Packit 5c3484
    __clz__b = __clz__b * 8 - 7;		    /* 57 to 1 shift */ \
Packit 5c3484
    __clz__x >>= __clz__b;						\
Packit 5c3484
    __clz__c = __clz_tab [__clz__x];		    /* 8 to 1 bit */	\
Packit 5c3484
    __clz__b = 65 - __clz__b;						\
Packit 5c3484
    (count) = __clz__b - __clz__c;					\
Packit 5c3484
  } while (0)
Packit 5c3484
#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
Packit 5c3484
#endif /* clz using cmpbge */
Packit 5c3484
Packit 5c3484
#if ! defined (count_leading_zeros) && ! defined (LONGLONG_STANDALONE)
Packit 5c3484
#if HAVE_ATTRIBUTE_CONST
Packit 5c3484
long __MPN(count_leading_zeros) (UDItype) __attribute__ ((const));
Packit 5c3484
#else
Packit 5c3484
long __MPN(count_leading_zeros) (UDItype);
Packit 5c3484
#endif
Packit 5c3484
#define count_leading_zeros(count, x) \
Packit 5c3484
  ((count) = __MPN(count_leading_zeros) (x))
Packit 5c3484
#endif /* clz using mpn */
Packit 5c3484
#endif /* __alpha */
Packit 5c3484
Packit 5c3484
#if defined (__AVR) && W_TYPE_SIZE == 8
Packit 5c3484
#define umul_ppmm(ph, pl, m0, m1) \
Packit 5c3484
  do {									\
Packit 5c3484
    unsigned short __p = (unsigned short) (m0) * (m1);			\
Packit 5c3484
    (ph) = __p >> 8;							\
Packit 5c3484
    (pl) = __p;								\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif /* AVR */
Packit 5c3484
Packit 5c3484
#if defined (_CRAY) && W_TYPE_SIZE == 64
Packit 5c3484
#include <intrinsics.h>
Packit 5c3484
#define UDIV_PREINV_ALWAYS  1
Packit 5c3484
#define UDIV_NEEDS_NORMALIZATION 1
Packit 5c3484
#define UDIV_TIME 220
Packit 5c3484
long __MPN(count_leading_zeros) (UDItype);
Packit 5c3484
#define count_leading_zeros(count, x) \
Packit 5c3484
  ((count) = _leadz ((UWtype) (x)))
Packit 5c3484
#if defined (_CRAYIEEE)		/* I.e., Cray T90/ieee, T3D, and T3E */
Packit 5c3484
#define umul_ppmm(ph, pl, m0, m1) \
Packit 5c3484
  do {									\
Packit 5c3484
    UDItype __m0 = (m0), __m1 = (m1);					\
Packit 5c3484
    (ph) = _int_mult_upper (__m0, __m1);				\
Packit 5c3484
    (pl) = __m0 * __m1;							\
Packit 5c3484
  } while (0)
Packit 5c3484
#ifndef LONGLONG_STANDALONE
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d) \
Packit 5c3484
  do { UWtype __di;							\
Packit 5c3484
    __di = __MPN(invert_limb) (d);					\
Packit 5c3484
    udiv_qrnnd_preinv (q, r, n1, n0, d, __di);				\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif /* LONGLONG_STANDALONE */
Packit 5c3484
#endif /* _CRAYIEEE */
Packit 5c3484
#endif /* _CRAY */
Packit 5c3484
Packit 5c3484
#if defined (__ia64) && W_TYPE_SIZE == 64
Packit 5c3484
/* This form encourages gcc (pre-release 3.4 at least) to emit predicated
Packit 5c3484
   "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
Packit 5c3484
   code using "al
Packit 5c3484
   register, which takes an extra cycle.  */
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl)      \
Packit 5c3484
  do {						\
Packit 5c3484
    UWtype __x;					\
Packit 5c3484
    __x = (al) - (bl);				\
Packit 5c3484
    if ((al) < (bl))				\
Packit 5c3484
      (sh) = (ah) - (bh) - 1;			\
Packit 5c3484
    else					\
Packit 5c3484
      (sh) = (ah) - (bh);			\
Packit 5c3484
    (sl) = __x;					\
Packit 5c3484
  } while (0)
Packit 5c3484
#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)
Packit 5c3484
/* Do both product parts in assembly, since that gives better code with
Packit 5c3484
   all gcc versions.  Some callers will just use the upper part, and in
Packit 5c3484
   that situation we waste an instruction, but not any cycles.  */
Packit 5c3484
#define umul_ppmm(ph, pl, m0, m1) \
Packit 5c3484
    __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"		\
Packit 5c3484
	     : "=&f" (ph), "=f" (pl)					\
Packit 5c3484
	     : "f" (m0), "f" (m1))
Packit 5c3484
#define UMUL_TIME 14
Packit 5c3484
#define count_leading_zeros(count, x) \
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype _x = (x), _y, _a, _c;					\
Packit 5c3484
    __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));		\
Packit 5c3484
    __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));		\
Packit 5c3484
    _c = (_a - 1) << 3;							\
Packit 5c3484
    _x >>= _c;								\
Packit 5c3484
    if (_x >= 1 << 4)							\
Packit 5c3484
      _x >>= 4, _c += 4;						\
Packit 5c3484
    if (_x >= 1 << 2)							\
Packit 5c3484
      _x >>= 2, _c += 2;						\
Packit 5c3484
    _c += _x >> 1;							\
Packit 5c3484
    (count) =  W_TYPE_SIZE - 1 - _c;					\
Packit 5c3484
  } while (0)
Packit 5c3484
/* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
Packit 5c3484
   based, and we don't need a special case for x==0 here */
Packit 5c3484
#define count_trailing_zeros(count, x)					\
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __ctz_x = (x);						\
Packit 5c3484
    __asm__ ("popcnt %0 = %1"						\
Packit 5c3484
	     : "=r" (count)						\
Packit 5c3484
	     : "r" ((__ctz_x-1) & ~__ctz_x));				\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
#if defined (__INTEL_COMPILER)
Packit 5c3484
#include <ia64intrin.h>
Packit 5c3484
#define umul_ppmm(ph, pl, m0, m1)					\
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __m0 = (m0), __m1 = (m1);					\
Packit 5c3484
    ph = _m64_xmahu (__m0, __m1, 0);					\
Packit 5c3484
    pl = __m0 * __m1;							\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
#ifndef LONGLONG_STANDALONE
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d) \
Packit 5c3484
  do { UWtype __di;							\
Packit 5c3484
    __di = __MPN(invert_limb) (d);					\
Packit 5c3484
    udiv_qrnnd_preinv (q, r, n1, n0, d, __di);				\
Packit 5c3484
  } while (0)
Packit 5c3484
#define UDIV_PREINV_ALWAYS  1
Packit 5c3484
#define UDIV_NEEDS_NORMALIZATION 1
Packit 5c3484
#endif
Packit 5c3484
#define UDIV_TIME 220
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
#if defined (__GNUC__)
Packit 5c3484
Packit 5c3484
/* We sometimes need to clobber "cc" with gcc2, but that would not be
Packit 5c3484
   understood by gcc1.  Use cpp to avoid major code duplication.  */
Packit 5c3484
#if __GNUC__ < 2
Packit 5c3484
#define __CLOBBER_CC
Packit 5c3484
#define __AND_CLOBBER_CC
Packit 5c3484
#else /* __GNUC__ >= 2 */
Packit 5c3484
#define __CLOBBER_CC : "cc"
Packit 5c3484
#define __AND_CLOBBER_CC , "cc"
Packit 5c3484
#endif /* __GNUC__ < 2 */
Packit 5c3484
Packit 5c3484
#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("add %1,%4,%5\n\taddc %0,%2,%3"				\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl))
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("sub %1,%4,%5\n\tsubc %0,%2,%3"				\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "r" (ah), "rI" (bh), "r" (al), "rI" (bl))
Packit 5c3484
#define umul_ppmm(xh, xl, m0, m1) \
Packit 5c3484
  do {									\
Packit 5c3484
    USItype __m0 = (m0), __m1 = (m1);					\
Packit 5c3484
    __asm__ ("multiplu %0,%1,%2"					\
Packit 5c3484
	     : "=r" (xl)						\
Packit 5c3484
	     : "r" (__m0), "r" (__m1));					\
Packit 5c3484
    __asm__ ("multmu %0,%1,%2"						\
Packit 5c3484
	     : "=r" (xh)						\
Packit 5c3484
	     : "r" (__m0), "r" (__m1));					\
Packit 5c3484
  } while (0)
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d) \
Packit 5c3484
  __asm__ ("dividu %0,%3,%4"						\
Packit 5c3484
	   : "=r" (q), "=q" (r)						\
Packit 5c3484
	   : "1" (n1), "r" (n0), "r" (d))
Packit 5c3484
#define count_leading_zeros(count, x) \
Packit 5c3484
    __asm__ ("clz %0,%1"						\
Packit 5c3484
	     : "=r" (count)						\
Packit 5c3484
	     : "r" (x))
Packit 5c3484
#define COUNT_LEADING_ZEROS_0 32
Packit 5c3484
#endif /* __a29k__ */
Packit 5c3484
Packit 5c3484
#if defined (__arc__)
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("add.f\t%1, %4, %5\n\tadc\t%0, %2, %3"			\
Packit 5c3484
	   : "=r" (sh),							\
Packit 5c3484
	     "=&r" (sl)							\
Packit 5c3484
	   : "r"  ((USItype) (ah)),					\
Packit 5c3484
	     "rICal" ((USItype) (bh)),					\
Packit 5c3484
	     "%r" ((USItype) (al)),					\
Packit 5c3484
	     "rICal" ((USItype) (bl)))
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("sub.f\t%1, %4, %5\n\tsbc\t%0, %2, %3"			\
Packit 5c3484
	   : "=r" (sh),							\
Packit 5c3484
	     "=&r" (sl)							\
Packit 5c3484
	   : "r" ((USItype) (ah)),					\
Packit 5c3484
	     "rICal" ((USItype) (bh)),					\
Packit 5c3484
	     "r" ((USItype) (al)),					\
Packit 5c3484
	     "rICal" ((USItype) (bl)))
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
Packit 5c3484
    && W_TYPE_SIZE == 32
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3"			\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  do {									\
Packit 5c3484
    if (__builtin_constant_p (al))					\
Packit 5c3484
      {									\
Packit 5c3484
	if (__builtin_constant_p (ah))					\
Packit 5c3484
	  __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2"		\
Packit 5c3484
		   : "=r" (sh), "=&r" (sl)				\
Packit 5c3484
		   : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
Packit 5c3484
	else								\
Packit 5c3484
	  __asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3"		\
Packit 5c3484
		   : "=r" (sh), "=&r" (sl)				\
Packit 5c3484
		   : "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
Packit 5c3484
      }									\
Packit 5c3484
    else if (__builtin_constant_p (ah))					\
Packit 5c3484
      {									\
Packit 5c3484
	if (__builtin_constant_p (bl))					\
Packit 5c3484
	  __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2"		\
Packit 5c3484
		   : "=r" (sh), "=&r" (sl)				\
Packit 5c3484
		   : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
Packit 5c3484
	else								\
Packit 5c3484
	  __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2"		\
Packit 5c3484
		   : "=r" (sh), "=&r" (sl)				\
Packit 5c3484
		   : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
Packit 5c3484
      }									\
Packit 5c3484
    else if (__builtin_constant_p (bl))					\
Packit 5c3484
      {									\
Packit 5c3484
	if (__builtin_constant_p (bh))					\
Packit 5c3484
	  __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3"		\
Packit 5c3484
		   : "=r" (sh), "=&r" (sl)				\
Packit 5c3484
		   : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
Packit 5c3484
	else								\
Packit 5c3484
	  __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2"		\
Packit 5c3484
		   : "=r" (sh), "=&r" (sl)				\
Packit 5c3484
		   : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
Packit 5c3484
      }									\
Packit 5c3484
    else /* only bh might be a constant */				\
Packit 5c3484
      __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3"			\
Packit 5c3484
	       : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	       : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC);\
Packit 5c3484
    } while (0)
Packit 5c3484
#if defined (__ARM_ARCH_2__) || defined (__ARM_ARCH_2A__) \
Packit 5c3484
    || defined (__ARM_ARCH_3__)
Packit 5c3484
#define umul_ppmm(xh, xl, a, b)						\
Packit 5c3484
  do {									\
Packit 5c3484
    register USItype __t0, __t1, __t2;					\
Packit 5c3484
    __asm__ ("%@ Inlined umul_ppmm\n"					\
Packit 5c3484
	   "	mov	%2, %5, lsr #16\n"				\
Packit 5c3484
	   "	mov	%0, %6, lsr #16\n"				\
Packit 5c3484
	   "	bic	%3, %5, %2, lsl #16\n"				\
Packit 5c3484
	   "	bic	%4, %6, %0, lsl #16\n"				\
Packit 5c3484
	   "	mul	%1, %3, %4\n"					\
Packit 5c3484
	   "	mul	%4, %2, %4\n"					\
Packit 5c3484
	   "	mul	%3, %0, %3\n"					\
Packit 5c3484
	   "	mul	%0, %2, %0\n"					\
Packit 5c3484
	   "	adds	%3, %4, %3\n"					\
Packit 5c3484
	   "	addcs	%0, %0, #65536\n"				\
Packit 5c3484
	   "	adds	%1, %1, %3, lsl #16\n"				\
Packit 5c3484
	   "	adc	%0, %0, %3, lsr #16"				\
Packit 5c3484
	   : "=&r" ((USItype) (xh)), "=r" ((USItype) (xl)),		\
Packit 5c3484
	     "=&r" (__t0), "=&r" (__t1), "=r" (__t2)			\
Packit 5c3484
	   : "r" ((USItype) (a)), "r" ((USItype) (b)) __CLOBBER_CC);	\
Packit 5c3484
  } while (0)
Packit 5c3484
#define UMUL_TIME 20
Packit 5c3484
#ifndef LONGLONG_STANDALONE
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d) \
Packit 5c3484
  do { UWtype __r;							\
Packit 5c3484
    (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d));			\
Packit 5c3484
    (r) = __r;								\
Packit 5c3484
  } while (0)
Packit 5c3484
extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
Packit 5c3484
#define UDIV_TIME 200
Packit 5c3484
#endif /* LONGLONG_STANDALONE */
Packit 5c3484
#else /* ARMv4 or newer */
Packit 5c3484
#define umul_ppmm(xh, xl, a, b) \
Packit 5c3484
  __asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
Packit 5c3484
#define UMUL_TIME 5
Packit 5c3484
#define smul_ppmm(xh, xl, a, b) \
Packit 5c3484
  __asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
Packit 5c3484
#ifndef LONGLONG_STANDALONE
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d) \
Packit 5c3484
  do { UWtype __di;							\
Packit 5c3484
    __di = __MPN(invert_limb) (d);					\
Packit 5c3484
    udiv_qrnnd_preinv (q, r, n1, n0, d, __di);				\
Packit 5c3484
  } while (0)
Packit 5c3484
#define UDIV_PREINV_ALWAYS  1
Packit 5c3484
#define UDIV_NEEDS_NORMALIZATION 1
Packit 5c3484
#define UDIV_TIME 70
Packit 5c3484
#endif /* LONGLONG_STANDALONE */
Packit 5c3484
#endif /* defined(__ARM_ARCH_2__) ... */
Packit 5c3484
#define count_leading_zeros(count, x)  count_leading_zeros_gcc_clz(count, x)
Packit 5c3484
#define count_trailing_zeros(count, x)  count_trailing_zeros_gcc_ctz(count, x)
Packit 5c3484
#define COUNT_LEADING_ZEROS_0 32
Packit 5c3484
#endif /* __arm__ */
Packit 5c3484
Packit 5c3484
#if defined (__aarch64__) && W_TYPE_SIZE == 64
Packit 5c3484
/* FIXME: Extend the immediate range for the low word by using both
Packit 5c3484
   ADDS and SUBS, since they set carry in the same way.  */
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3"			\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)),		\
Packit 5c3484
	     "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) __CLOBBER_CC)
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3"			\
Packit 5c3484
	   : "=r,r" (sh), "=&r,&r" (sl)					\
Packit 5c3484
	   : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)),		\
Packit 5c3484
	     "r,Z"   ((UDItype)(al)), "rI,r"  ((UDItype)(bl)) __CLOBBER_CC)
Packit 5c3484
#define umul_ppmm(ph, pl, m0, m1) \
Packit 5c3484
  do {									\
Packit 5c3484
    UDItype __m0 = (m0), __m1 = (m1);					\
Packit 5c3484
    __asm__ ("umulh\t%0, %1, %2" : "=r" (ph) : "r" (__m0), "r" (__m1));	\
Packit 5c3484
    (pl) = __m0 * __m1;							\
Packit 5c3484
  } while (0)
Packit 5c3484
#define count_leading_zeros(count, x)  count_leading_zeros_gcc_clz(count, x)
Packit 5c3484
#define count_trailing_zeros(count, x)  count_trailing_zeros_gcc_ctz(count, x)
Packit 5c3484
#define COUNT_LEADING_ZEROS_0 64
Packit 5c3484
#endif /* __aarch64__ */
Packit 5c3484
Packit 5c3484
#if defined (__clipper__) && W_TYPE_SIZE == 32
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  ({union {UDItype __ll;						\
Packit 5c3484
	   struct {USItype __l, __h;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
  __asm__ ("mulwux %2,%0"						\
Packit 5c3484
	   : "=r" (__x.__ll)						\
Packit 5c3484
	   : "%0" ((USItype)(u)), "r" ((USItype)(v)));			\
Packit 5c3484
  (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
Packit 5c3484
#define smul_ppmm(w1, w0, u, v) \
Packit 5c3484
  ({union {DItype __ll;							\
Packit 5c3484
	   struct {SItype __l, __h;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
  __asm__ ("mulwx %2,%0"						\
Packit 5c3484
	   : "=r" (__x.__ll)						\
Packit 5c3484
	   : "%0" ((SItype)(u)), "r" ((SItype)(v)));			\
Packit 5c3484
  (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
Packit 5c3484
#define __umulsidi3(u, v) \
Packit 5c3484
  ({UDItype __w;							\
Packit 5c3484
    __asm__ ("mulwux %2,%0"						\
Packit 5c3484
	     : "=r" (__w) : "%0" ((USItype)(u)), "r" ((USItype)(v)));	\
Packit 5c3484
    __w; })
Packit 5c3484
#endif /* __clipper__ */
Packit 5c3484
Packit 5c3484
/* Fujitsu vector computers.  */
Packit 5c3484
#if defined (__uxp__) && W_TYPE_SIZE == 32
Packit 5c3484
#define umul_ppmm(ph, pl, u, v) \
Packit 5c3484
  do {									\
Packit 5c3484
    union {UDItype __ll;						\
Packit 5c3484
	   struct {USItype __h, __l;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
    __asm__ ("mult.lu %1,%2,%0"	: "=r" (__x.__ll) : "%r" (u), "rK" (v));\
Packit 5c3484
    (ph) = __x.__i.__h;							\
Packit 5c3484
    (pl) = __x.__i.__l;							\
Packit 5c3484
  } while (0)
Packit 5c3484
#define smul_ppmm(ph, pl, u, v) \
Packit 5c3484
  do {									\
Packit 5c3484
    union {UDItype __ll;						\
Packit 5c3484
	   struct {USItype __h, __l;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
    __asm__ ("mult.l %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v));	\
Packit 5c3484
    (ph) = __x.__i.__h;							\
Packit 5c3484
    (pl) = __x.__i.__l;							\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if defined (__gmicro__) && W_TYPE_SIZE == 32
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("add.w %5,%1\n\taddx %3,%0"					\
Packit 5c3484
	   : "=g" (sh), "=&g" (sl)					\
Packit 5c3484
	   : "0"  ((USItype)(ah)), "g" ((USItype)(bh)),			\
Packit 5c3484
	     "%1" ((USItype)(al)), "g" ((USItype)(bl)))
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("sub.w %5,%1\n\tsubx %3,%0"					\
Packit 5c3484
	   : "=g" (sh), "=&g" (sl)					\
Packit 5c3484
	   : "0" ((USItype)(ah)), "g" ((USItype)(bh)),			\
Packit 5c3484
	     "1" ((USItype)(al)), "g" ((USItype)(bl)))
Packit 5c3484
#define umul_ppmm(ph, pl, m0, m1) \
Packit 5c3484
  __asm__ ("mulx %3,%0,%1"						\
Packit 5c3484
	   : "=g" (ph), "=r" (pl)					\
Packit 5c3484
	   : "%0" ((USItype)(m0)), "g" ((USItype)(m1)))
Packit 5c3484
#define udiv_qrnnd(q, r, nh, nl, d) \
Packit 5c3484
  __asm__ ("divx %4,%0,%1"						\
Packit 5c3484
	   : "=g" (q), "=r" (r)						\
Packit 5c3484
	   : "1" ((USItype)(nh)), "0" ((USItype)(nl)), "g" ((USItype)(d)))
Packit 5c3484
#define count_leading_zeros(count, x) \
Packit 5c3484
  __asm__ ("bsch/1 %1,%0"						\
Packit 5c3484
	   : "=g" (count) : "g" ((USItype)(x)), "0" ((USItype)0))
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if defined (__hppa) && W_TYPE_SIZE == 32
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("add%I5 %5,%r4,%1\n\taddc %r2,%r3,%0"			\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "rM" (ah), "rM" (bh), "%rM" (al), "rI" (bl))
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("sub%I4 %4,%r5,%1\n\tsubb %r2,%r3,%0"			\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "rM" (ah), "rM" (bh), "rI" (al), "rM" (bl))
Packit 5c3484
#if defined (_PA_RISC1_1)
Packit 5c3484
#define umul_ppmm(wh, wl, u, v) \
Packit 5c3484
  do {									\
Packit 5c3484
    union {UDItype __ll;						\
Packit 5c3484
	   struct {USItype __h, __l;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
    __asm__ ("xmpyu %1,%2,%0" : "=*f" (__x.__ll) : "*f" (u), "*f" (v));	\
Packit 5c3484
    (wh) = __x.__i.__h;							\
Packit 5c3484
    (wl) = __x.__i.__l;							\
Packit 5c3484
  } while (0)
Packit 5c3484
#define UMUL_TIME 8
Packit 5c3484
#define UDIV_TIME 60
Packit 5c3484
#else
Packit 5c3484
#define UMUL_TIME 40
Packit 5c3484
#define UDIV_TIME 80
Packit 5c3484
#endif
Packit 5c3484
#define count_leading_zeros(count, x) \
Packit 5c3484
  do {									\
Packit 5c3484
    USItype __tmp;							\
Packit 5c3484
    __asm__ (								\
Packit 5c3484
       "ldi		1,%0\n"						\
Packit 5c3484
"	extru,=		%1,15,16,%%r0	; Bits 31..16 zero?\n"		\
Packit 5c3484
"	extru,tr	%1,15,16,%1	; No.  Shift down, skip add.\n"	\
Packit 5c3484
"	ldo		16(%0),%0	; Yes.  Perform add.\n"		\
Packit 5c3484
"	extru,=		%1,23,8,%%r0	; Bits 15..8 zero?\n"		\
Packit 5c3484
"	extru,tr	%1,23,8,%1	; No.  Shift down, skip add.\n"	\
Packit 5c3484
"	ldo		8(%0),%0	; Yes.  Perform add.\n"		\
Packit 5c3484
"	extru,=		%1,27,4,%%r0	; Bits 7..4 zero?\n"		\
Packit 5c3484
"	extru,tr	%1,27,4,%1	; No.  Shift down, skip add.\n"	\
Packit 5c3484
"	ldo		4(%0),%0	; Yes.  Perform add.\n"		\
Packit 5c3484
"	extru,=		%1,29,2,%%r0	; Bits 3..2 zero?\n"		\
Packit 5c3484
"	extru,tr	%1,29,2,%1	; No.  Shift down, skip add.\n"	\
Packit 5c3484
"	ldo		2(%0),%0	; Yes.  Perform add.\n"		\
Packit 5c3484
"	extru		%1,30,1,%1	; Extract bit 1.\n"		\
Packit 5c3484
"	sub		%0,%1,%0	; Subtract it.\n"		\
Packit 5c3484
	: "=r" (count), "=r" (__tmp) : "1" (x));			\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif /* hppa */
Packit 5c3484
Packit 5c3484
/* These macros are for ABI=2.0w.  In ABI=2.0n they can't be used, since GCC
Packit 5c3484
   (3.2) puts longlong into two adjacent 32-bit registers.  Presumably this
Packit 5c3484
   is just a case of no direct support for 2.0n but treating it like 1.0. */
Packit 5c3484
#if defined (__hppa) && W_TYPE_SIZE == 64 && ! defined (_LONG_LONG_LIMB)
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("add%I5 %5,%r4,%1\n\tadd,dc %r2,%r3,%0"			\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "rM" (ah), "rM" (bh), "%rM" (al), "rI" (bl))
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("sub%I4 %4,%r5,%1\n\tsub,db %r2,%r3,%0"			\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "rM" (ah), "rM" (bh), "rI" (al), "rM" (bl))
Packit 5c3484
#endif /* hppa */
Packit 5c3484
Packit 5c3484
#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
Packit 5c3484
#if defined (__zarch__) || defined (HAVE_HOST_CPU_s390_zarch)
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl)				\
Packit 5c3484
  do {									\
Packit 5c3484
/*  if (__builtin_constant_p (bl))					\
Packit 5c3484
      __asm__ ("alfi\t%1,%o5\n\talcr\t%0,%3"				\
Packit 5c3484
	       : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	       : "0"  (ah), "r" (bh), "%1" (al), "n" (bl) __CLOBBER_CC);\
Packit 5c3484
    else								\
Packit 5c3484
*/    __asm__ ("alr\t%1,%5\n\talcr\t%0,%3"				\
Packit 5c3484
	       : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	       : "0"  (ah), "r" (bh), "%1" (al), "r" (bl)__CLOBBER_CC);	\
Packit 5c3484
  } while (0)
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
Packit 5c3484
  do {									\
Packit 5c3484
/*  if (__builtin_constant_p (bl))					\
Packit 5c3484
      __asm__ ("slfi\t%1,%o5\n\tslbr\t%0,%3"				\
Packit 5c3484
	       : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	       : "0" (ah), "r" (bh), "1" (al), "n" (bl) __CLOBBER_CC);	\
Packit 5c3484
    else								\
Packit 5c3484
*/    __asm__ ("slr\t%1,%5\n\tslbr\t%0,%3"				\
Packit 5c3484
	       : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	       : "0" (ah), "r" (bh), "1" (al), "r" (bl) __CLOBBER_CC);	\
Packit 5c3484
  } while (0)
Packit 5c3484
#if __GMP_GNUC_PREREQ (4,5)
Packit 5c3484
#define umul_ppmm(xh, xl, m0, m1)					\
Packit 5c3484
  do {									\
Packit 5c3484
    union {UDItype __ll;						\
Packit 5c3484
	   struct {USItype __h, __l;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
    __x.__ll = (UDItype) (m0) * (UDItype) (m1);				\
Packit 5c3484
    (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
Packit 5c3484
  } while (0)
Packit 5c3484
#else
Packit 5c3484
#if 0
Packit 5c3484
/* FIXME: this fails if gcc knows about the 64-bit registers.  Use only
Packit 5c3484
   with a new enough processor pretending we have 32-bit registers.  */
Packit 5c3484
#define umul_ppmm(xh, xl, m0, m1)					\
Packit 5c3484
  do {									\
Packit 5c3484
    union {UDItype __ll;						\
Packit 5c3484
	   struct {USItype __h, __l;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
    __asm__ ("mlr\t%0,%2"						\
Packit 5c3484
	     : "=r" (__x.__ll)						\
Packit 5c3484
	     : "%0" (m0), "r" (m1));					\
Packit 5c3484
    (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
Packit 5c3484
  } while (0)
Packit 5c3484
#else
Packit 5c3484
#define umul_ppmm(xh, xl, m0, m1)					\
Packit 5c3484
  do {									\
Packit 5c3484
  /* When we have 64-bit regs and gcc is aware of that, we cannot simply use
Packit 5c3484
     DImode for the product, since that would be allocated to a single 64-bit
Packit 5c3484
     register, whereas mlr uses the low 32-bits of an even-odd register pair.
Packit 5c3484
  */									\
Packit 5c3484
    register USItype __r0 __asm__ ("0");				\
Packit 5c3484
    register USItype __r1 __asm__ ("1") = (m0);				\
Packit 5c3484
    __asm__ ("mlr\t%0,%3"						\
Packit 5c3484
	     : "=r" (__r0), "=r" (__r1)					\
Packit 5c3484
	     : "r" (__r1), "r" (m1));					\
Packit 5c3484
    (xh) = __r0; (xl) = __r1;						\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif /* if 0 */
Packit 5c3484
#endif
Packit 5c3484
#if 0
Packit 5c3484
/* FIXME: this fails if gcc knows about the 64-bit registers.  Use only
Packit 5c3484
   with a new enough processor pretending we have 32-bit registers.  */
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d)					\
Packit 5c3484
  do {									\
Packit 5c3484
    union {UDItype __ll;						\
Packit 5c3484
	   struct {USItype __h, __l;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
    __x.__i.__h = n1; __x.__i.__l = n0;					\
Packit 5c3484
    __asm__ ("dlr\t%0,%2"						\
Packit 5c3484
	     : "=r" (__x.__ll)						\
Packit 5c3484
	     : "0" (__x.__ll), "r" (d));				\
Packit 5c3484
    (q) = __x.__i.__l; (r) = __x.__i.__h;				\
Packit 5c3484
  } while (0)
Packit 5c3484
#else
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d)					\
Packit 5c3484
  do {									\
Packit 5c3484
    register USItype __r0 __asm__ ("0") = (n1);				\
Packit 5c3484
    register USItype __r1 __asm__ ("1") = (n0);				\
Packit 5c3484
    __asm__ ("dlr\t%0,%4"						\
Packit 5c3484
	     : "=r" (__r0), "=r" (__r1)					\
Packit 5c3484
	     : "r" (__r0), "r" (__r1), "r" (d));			\
Packit 5c3484
    (q) = __r1; (r) = __r0;						\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif /* if 0 */
Packit 5c3484
#else /* if __zarch__ */
Packit 5c3484
/* FIXME: this fails if gcc knows about the 64-bit registers.  */
Packit 5c3484
#define smul_ppmm(xh, xl, m0, m1)					\
Packit 5c3484
  do {									\
Packit 5c3484
    union {DItype __ll;							\
Packit 5c3484
	   struct {USItype __h, __l;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
    __asm__ ("mr\t%0,%2"						\
Packit 5c3484
	     : "=r" (__x.__ll)						\
Packit 5c3484
	     : "%0" (m0), "r" (m1));					\
Packit 5c3484
    (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
Packit 5c3484
  } while (0)
Packit 5c3484
/* FIXME: this fails if gcc knows about the 64-bit registers.  */
Packit 5c3484
#define sdiv_qrnnd(q, r, n1, n0, d)					\
Packit 5c3484
  do {									\
Packit 5c3484
    union {DItype __ll;							\
Packit 5c3484
	   struct {USItype __h, __l;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
    __x.__i.__h = n1; __x.__i.__l = n0;					\
Packit 5c3484
    __asm__ ("dr\t%0,%2"						\
Packit 5c3484
	     : "=r" (__x.__ll)						\
Packit 5c3484
	     : "0" (__x.__ll), "r" (d));				\
Packit 5c3484
    (q) = __x.__i.__l; (r) = __x.__i.__h;				\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif /* if __zarch__ */
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if defined (__s390x__) && W_TYPE_SIZE == 64
Packit 5c3484
/* We need to cast operands with register constraints, otherwise their types
Packit 5c3484
   will be assumed to be SImode by gcc.  For these machines, such operations
Packit 5c3484
   will insert a value into the low 32 bits, and leave the high 32 bits with
Packit 5c3484
   garbage.  */
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl)				\
Packit 5c3484
  do {									\
Packit 5c3484
    __asm__ ("algr\t%1,%5\n\talcgr\t%0,%3"				\
Packit 5c3484
	       : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	       : "0"  ((UDItype)(ah)), "r" ((UDItype)(bh)),		\
Packit 5c3484
		 "%1" ((UDItype)(al)), "r" ((UDItype)(bl)) __CLOBBER_CC); \
Packit 5c3484
  } while (0)
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
Packit 5c3484
  do {									\
Packit 5c3484
    __asm__ ("slgr\t%1,%5\n\tslbgr\t%0,%3"				\
Packit 5c3484
	     : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	     : "0" ((UDItype)(ah)), "r" ((UDItype)(bh)),		\
Packit 5c3484
	       "1" ((UDItype)(al)), "r" ((UDItype)(bl)) __CLOBBER_CC);	\
Packit 5c3484
  } while (0)
Packit 5c3484
#define umul_ppmm(xh, xl, m0, m1)					\
Packit 5c3484
  do {									\
Packit 5c3484
    union {unsigned int __attribute__ ((mode(TI))) __ll;		\
Packit 5c3484
	   struct {UDItype __h, __l;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
    __asm__ ("mlgr\t%0,%2"						\
Packit 5c3484
	     : "=r" (__x.__ll)						\
Packit 5c3484
	     : "%0" ((UDItype)(m0)), "r" ((UDItype)(m1)));		\
Packit 5c3484
    (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
Packit 5c3484
  } while (0)
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d)					\
Packit 5c3484
  do {									\
Packit 5c3484
    union {unsigned int __attribute__ ((mode(TI))) __ll;		\
Packit 5c3484
	   struct {UDItype __h, __l;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
    __x.__i.__h = n1; __x.__i.__l = n0;					\
Packit 5c3484
    __asm__ ("dlgr\t%0,%2"						\
Packit 5c3484
	     : "=r" (__x.__ll)						\
Packit 5c3484
	     : "0" (__x.__ll), "r" ((UDItype)(d)));			\
Packit 5c3484
    (q) = __x.__i.__l; (r) = __x.__i.__h;				\
Packit 5c3484
  } while (0)
Packit 5c3484
#if 0 /* FIXME: Enable for z10 (?) */
Packit 5c3484
#define count_leading_zeros(cnt, x)					\
Packit 5c3484
  do {									\
Packit 5c3484
    union {unsigned int __attribute__ ((mode(TI))) __ll;		\
Packit 5c3484
	   struct {UDItype __h, __l;} __i;				\
Packit 5c3484
	  } __clr_cnt;							\
Packit 5c3484
    __asm__ ("flogr\t%0,%1"						\
Packit 5c3484
	     : "=r" (__clr_cnt.__ll)					\
Packit 5c3484
	     : "r" (x) __CLOBBER_CC);					\
Packit 5c3484
    (cnt) = __clr_cnt.__i.__h;						\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* On x86 and x86_64, every asm implicitly clobbers "flags" and "fpsr",
Packit 5c3484
   so we don't need __CLOBBER_CC.  */
Packit 5c3484
#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("addl %5,%k1\n\tadcl %3,%k0"					\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "0"  ((USItype)(ah)), "g" ((USItype)(bh)),			\
Packit 5c3484
	     "%1" ((USItype)(al)), "g" ((USItype)(bl)))
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("subl %5,%k1\n\tsbbl %3,%k0"					\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "0" ((USItype)(ah)), "g" ((USItype)(bh)),			\
Packit 5c3484
	     "1" ((USItype)(al)), "g" ((USItype)(bl)))
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  __asm__ ("mull %3"							\
Packit 5c3484
	   : "=a" (w0), "=d" (w1)					\
Packit 5c3484
	   : "%0" ((USItype)(u)), "rm" ((USItype)(v)))
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
Packit 5c3484
  __asm__ ("divl %4"		     /* stringification in K&R C */	\
Packit 5c3484
	   : "=a" (q), "=d" (r)						\
Packit 5c3484
	   : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "rm" ((USItype)(dx)))
Packit 5c3484
Packit 5c3484
#if HAVE_HOST_CPU_i586 || HAVE_HOST_CPU_pentium || HAVE_HOST_CPU_pentiummmx
Packit 5c3484
/* Pentium bsrl takes between 10 and 72 cycles depending where the most
Packit 5c3484
   significant 1 bit is, hence the use of the following alternatives.  bsfl
Packit 5c3484
   is slow too, between 18 and 42 depending where the least significant 1
Packit 5c3484
   bit is, so let the generic count_trailing_zeros below make use of the
Packit 5c3484
   count_leading_zeros here too.  */
Packit 5c3484
Packit 5c3484
#if HAVE_HOST_CPU_pentiummmx && ! defined (LONGLONG_STANDALONE)
Packit 5c3484
/* The following should be a fixed 14 or 15 cycles, but possibly plus an L1
Packit 5c3484
   cache miss reading from __clz_tab.  For P55 it's favoured over the float
Packit 5c3484
   below so as to avoid mixing MMX and x87, since the penalty for switching
Packit 5c3484
   between the two is about 100 cycles.
Packit 5c3484
Packit 5c3484
   The asm block sets __shift to -3 if the high 24 bits are clear, -2 for
Packit 5c3484
   16, -1 for 8, or 0 otherwise.  This could be written equivalently as
Packit 5c3484
   follows, but as of gcc 2.95.2 it results in conditional jumps.
Packit 5c3484
Packit 5c3484
       __shift = -(__n < 0x1000000);
Packit 5c3484
       __shift -= (__n < 0x10000);
Packit 5c3484
       __shift -= (__n < 0x100);
Packit 5c3484
Packit 5c3484
   The middle two sbbl and cmpl's pair, and with luck something gcc
Packit 5c3484
   generates might pair with the first cmpl and the last sbbl.  The "32+1"
Packit 5c3484
   constant could be folded into __clz_tab[], but it doesn't seem worth
Packit 5c3484
   making a different table just for that.  */
Packit 5c3484
Packit 5c3484
#define count_leading_zeros(c,n)					\
Packit 5c3484
  do {									\
Packit 5c3484
    USItype  __n = (n);							\
Packit 5c3484
    USItype  __shift;							\
Packit 5c3484
    __asm__ ("cmpl  $0x1000000, %1\n"					\
Packit 5c3484
	     "sbbl  %0, %0\n"						\
Packit 5c3484
	     "cmpl  $0x10000, %1\n"					\
Packit 5c3484
	     "sbbl  $0, %0\n"						\
Packit 5c3484
	     "cmpl  $0x100, %1\n"					\
Packit 5c3484
	     "sbbl  $0, %0\n"						\
Packit 5c3484
	     : "=&r" (__shift) : "r"  (__n));				\
Packit 5c3484
    __shift = __shift*8 + 24 + 1;					\
Packit 5c3484
    (c) = 32 + 1 - __shift - __clz_tab[__n >> __shift];			\
Packit 5c3484
  } while (0)
Packit 5c3484
#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
Packit 5c3484
#define COUNT_LEADING_ZEROS_0   31   /* n==0 indistinguishable from n==1 */
Packit 5c3484
Packit 5c3484
#else /* ! pentiummmx || LONGLONG_STANDALONE */
Packit 5c3484
/* The following should be a fixed 14 cycles or so.  Some scheduling
Packit 5c3484
   opportunities should be available between the float load/store too.  This
Packit 5c3484
   sort of code is used in gcc 3 for __builtin_ffs (with "n&-n") and is
Packit 5c3484
   apparently suggested by the Intel optimizing manual (don't know exactly
Packit 5c3484
   where).  gcc 2.95 or up will be best for this, so the "double" is
Packit 5c3484
   correctly aligned on the stack.  */
Packit 5c3484
#define count_leading_zeros(c,n)					\
Packit 5c3484
  do {									\
Packit 5c3484
    union {								\
Packit 5c3484
      double    d;							\
Packit 5c3484
      unsigned  a[2];							\
Packit 5c3484
    } __u;								\
Packit 5c3484
    ASSERT ((n) != 0);							\
Packit 5c3484
    __u.d = (UWtype) (n);						\
Packit 5c3484
    (c) = 0x3FF + 31 - (__u.a[1] >> 20);				\
Packit 5c3484
  } while (0)
Packit 5c3484
#define COUNT_LEADING_ZEROS_0   (0x3FF + 31)
Packit 5c3484
#endif /* pentiummx */
Packit 5c3484
Packit 5c3484
#else /* ! pentium */
Packit 5c3484
Packit 5c3484
#if __GMP_GNUC_PREREQ (3,4)  /* using bsrl */
Packit 5c3484
#define count_leading_zeros(count,x)  count_leading_zeros_gcc_clz(count,x)
Packit 5c3484
#endif /* gcc clz */
Packit 5c3484
Packit 5c3484
/* On P6, gcc prior to 3.0 generates a partial register stall for
Packit 5c3484
   __cbtmp^31, due to using "xorb $31" instead of "xorl $31", the former
Packit 5c3484
   being 1 code byte smaller.  "31-__cbtmp" is a workaround, probably at the
Packit 5c3484
   cost of one extra instruction.  Do this for "i386" too, since that means
Packit 5c3484
   generic x86.  */
Packit 5c3484
#if ! defined (count_leading_zeros) && __GNUC__ < 3			\
Packit 5c3484
  && (HAVE_HOST_CPU_i386						\
Packit 5c3484
      || HAVE_HOST_CPU_i686						\
Packit 5c3484
      || HAVE_HOST_CPU_pentiumpro					\
Packit 5c3484
      || HAVE_HOST_CPU_pentium2						\
Packit 5c3484
      || HAVE_HOST_CPU_pentium3)
Packit 5c3484
#define count_leading_zeros(count, x)					\
Packit 5c3484
  do {									\
Packit 5c3484
    USItype __cbtmp;							\
Packit 5c3484
    ASSERT ((x) != 0);							\
Packit 5c3484
    __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x)));	\
Packit 5c3484
    (count) = 31 - __cbtmp;						\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif /* gcc<3 asm bsrl */
Packit 5c3484
Packit 5c3484
#ifndef count_leading_zeros
Packit 5c3484
#define count_leading_zeros(count, x)					\
Packit 5c3484
  do {									\
Packit 5c3484
    USItype __cbtmp;							\
Packit 5c3484
    ASSERT ((x) != 0);							\
Packit 5c3484
    __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x)));	\
Packit 5c3484
    (count) = __cbtmp ^ 31;						\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif /* asm bsrl */
Packit 5c3484
Packit 5c3484
#if __GMP_GNUC_PREREQ (3,4)  /* using bsfl */
Packit 5c3484
#define count_trailing_zeros(count,x)  count_trailing_zeros_gcc_ctz(count,x)
Packit 5c3484
#endif /* gcc ctz */
Packit 5c3484
Packit 5c3484
#ifndef count_trailing_zeros
Packit 5c3484
#define count_trailing_zeros(count, x)					\
Packit 5c3484
  do {									\
Packit 5c3484
    ASSERT ((x) != 0);							\
Packit 5c3484
    __asm__ ("bsfl %1,%k0" : "=r" (count) : "rm" ((USItype)(x)));	\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif /* asm bsfl */
Packit 5c3484
Packit 5c3484
#endif /* ! pentium */
Packit 5c3484
Packit 5c3484
#ifndef UMUL_TIME
Packit 5c3484
#define UMUL_TIME 10
Packit 5c3484
#endif
Packit 5c3484
#ifndef UDIV_TIME
Packit 5c3484
#define UDIV_TIME 40
Packit 5c3484
#endif
Packit 5c3484
#endif /* 80x86 */
Packit 5c3484
Packit 5c3484
#if defined (__amd64__) && W_TYPE_SIZE == 64
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("addq %5,%q1\n\tadcq %3,%q0"					\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "0"  ((UDItype)(ah)), "rme" ((UDItype)(bh)),		\
Packit 5c3484
	     "%1" ((UDItype)(al)), "rme" ((UDItype)(bl)))
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("subq %5,%q1\n\tsbbq %3,%q0"					\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "0" ((UDItype)(ah)), "rme" ((UDItype)(bh)),		\
Packit 5c3484
	     "1" ((UDItype)(al)), "rme" ((UDItype)(bl)))
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  __asm__ ("mulq %3"							\
Packit 5c3484
	   : "=a" (w0), "=d" (w1)					\
Packit 5c3484
	   : "%0" ((UDItype)(u)), "rm" ((UDItype)(v)))
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
Packit 5c3484
  __asm__ ("divq %4"		     /* stringification in K&R C */	\
Packit 5c3484
	   : "=a" (q), "=d" (r)						\
Packit 5c3484
	   : "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx)))
Packit 5c3484
/* bsrq destination must be a 64-bit register, hence UDItype for __cbtmp. */
Packit 5c3484
#define count_leading_zeros(count, x)					\
Packit 5c3484
  do {									\
Packit 5c3484
    UDItype __cbtmp;							\
Packit 5c3484
    ASSERT ((x) != 0);							\
Packit 5c3484
    __asm__ ("bsrq %1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x)));	\
Packit 5c3484
    (count) = __cbtmp ^ 63;						\
Packit 5c3484
  } while (0)
Packit 5c3484
/* bsfq destination must be a 64-bit register, "%q0" forces this in case
Packit 5c3484
   count is only an int. */
Packit 5c3484
#define count_trailing_zeros(count, x)					\
Packit 5c3484
  do {									\
Packit 5c3484
    ASSERT ((x) != 0);							\
Packit 5c3484
    __asm__ ("bsfq %1,%q0" : "=r" (count) : "rm" ((UDItype)(x)));	\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif /* __amd64__ */
Packit 5c3484
Packit 5c3484
#if defined (__i860__) && W_TYPE_SIZE == 32
Packit 5c3484
#define rshift_rhlc(r,h,l,c) \
Packit 5c3484
  __asm__ ("shr %3,r0,r0\;shrd %1,%2,%0"				\
Packit 5c3484
	   "=r" (r) : "r" (h), "r" (l), "rn" (c))
Packit 5c3484
#endif /* i860 */
Packit 5c3484
Packit 5c3484
#if defined (__i960__) && W_TYPE_SIZE == 32
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("cmpo 1,0\;addc %5,%4,%1\;addc %3,%2,%0"			\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "dI" (ah), "dI" (bh), "%dI" (al), "dI" (bl))
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("cmpo 0,0\;subc %5,%4,%1\;subc %3,%2,%0"			\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "dI" (ah), "dI" (bh), "dI" (al), "dI" (bl))
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  ({union {UDItype __ll;						\
Packit 5c3484
	   struct {USItype __l, __h;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
  __asm__ ("emul %2,%1,%0"						\
Packit 5c3484
	   : "=d" (__x.__ll) : "%dI" (u), "dI" (v));			\
Packit 5c3484
  (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
Packit 5c3484
#define __umulsidi3(u, v) \
Packit 5c3484
  ({UDItype __w;							\
Packit 5c3484
    __asm__ ("emul %2,%1,%0" : "=d" (__w) : "%dI" (u), "dI" (v));	\
Packit 5c3484
    __w; })
Packit 5c3484
#define udiv_qrnnd(q, r, nh, nl, d) \
Packit 5c3484
  do {									\
Packit 5c3484
    union {UDItype __ll;						\
Packit 5c3484
	   struct {USItype __l, __h;} __i;				\
Packit 5c3484
	  } __nn;							\
Packit 5c3484
    __nn.__i.__h = (nh); __nn.__i.__l = (nl);				\
Packit 5c3484
    __asm__ ("ediv %d,%n,%0"						\
Packit 5c3484
	   : "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d));		\
Packit 5c3484
    (r) = __rq.__i.__l; (q) = __rq.__i.__h;				\
Packit 5c3484
  } while (0)
Packit 5c3484
#define count_leading_zeros(count, x) \
Packit 5c3484
  do {									\
Packit 5c3484
    USItype __cbtmp;							\
Packit 5c3484
    __asm__ ("scanbit %1,%0" : "=r" (__cbtmp) : "r" (x));		\
Packit 5c3484
    (count) = __cbtmp ^ 31;						\
Packit 5c3484
  } while (0)
Packit 5c3484
#define COUNT_LEADING_ZEROS_0 (-32) /* sic */
Packit 5c3484
#if defined (__i960mx)		/* what is the proper symbol to test??? */
Packit 5c3484
#define rshift_rhlc(r,h,l,c) \
Packit 5c3484
  do {									\
Packit 5c3484
    union {UDItype __ll;						\
Packit 5c3484
	   struct {USItype __l, __h;} __i;				\
Packit 5c3484
	  } __nn;							\
Packit 5c3484
    __nn.__i.__h = (h); __nn.__i.__l = (l);				\
Packit 5c3484
    __asm__ ("shre %2,%1,%0" : "=d" (r) : "dI" (__nn.__ll), "dI" (c));	\
Packit 5c3484
  }
Packit 5c3484
#endif /* i960mx */
Packit 5c3484
#endif /* i960 */
Packit 5c3484
Packit 5c3484
#if (defined (__mc68000__) || defined (__mc68020__) || defined(mc68020) \
Packit 5c3484
     || defined (__m68k__) || defined (__mc5200__) || defined (__mc5206e__) \
Packit 5c3484
     || defined (__mc5307__)) && W_TYPE_SIZE == 32
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"				\
Packit 5c3484
	   : "=d" (sh), "=&d" (sl)					\
Packit 5c3484
	   : "0"  ((USItype)(ah)), "d" ((USItype)(bh)),			\
Packit 5c3484
	     "%1" ((USItype)(al)), "g" ((USItype)(bl)))
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"				\
Packit 5c3484
	   : "=d" (sh), "=&d" (sl)					\
Packit 5c3484
	   : "0" ((USItype)(ah)), "d" ((USItype)(bh)),			\
Packit 5c3484
	     "1" ((USItype)(al)), "g" ((USItype)(bl)))
Packit 5c3484
/* The '020, '030, '040 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
Packit 5c3484
#if defined (__mc68020__) || defined(mc68020) \
Packit 5c3484
     || defined (__mc68030__) || defined (mc68030) \
Packit 5c3484
     || defined (__mc68040__) || defined (mc68040) \
Packit 5c3484
     || defined (__mcpu32__) || defined (mcpu32) \
Packit 5c3484
     || defined (__NeXT__)
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  __asm__ ("mulu%.l %3,%1:%0"						\
Packit 5c3484
	   : "=d" (w0), "=d" (w1)					\
Packit 5c3484
	   : "%0" ((USItype)(u)), "dmi" ((USItype)(v)))
Packit 5c3484
#define UMUL_TIME 45
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d) \
Packit 5c3484
  __asm__ ("divu%.l %4,%1:%0"						\
Packit 5c3484
	   : "=d" (q), "=d" (r)						\
Packit 5c3484
	   : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
Packit 5c3484
#define UDIV_TIME 90
Packit 5c3484
#define sdiv_qrnnd(q, r, n1, n0, d) \
Packit 5c3484
  __asm__ ("divs%.l %4,%1:%0"						\
Packit 5c3484
	   : "=d" (q), "=d" (r)						\
Packit 5c3484
	   : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
Packit 5c3484
#else /* for other 68k family members use 16x16->32 multiplication */
Packit 5c3484
#define umul_ppmm(xh, xl, a, b) \
Packit 5c3484
  do { USItype __umul_tmp1, __umul_tmp2;				\
Packit 5c3484
	__asm__ ("| Inlined umul_ppmm\n"				\
Packit 5c3484
"	move%.l	%5,%3\n"						\
Packit 5c3484
"	move%.l	%2,%0\n"						\
Packit 5c3484
"	move%.w	%3,%1\n"						\
Packit 5c3484
"	swap	%3\n"							\
Packit 5c3484
"	swap	%0\n"							\
Packit 5c3484
"	mulu%.w	%2,%1\n"						\
Packit 5c3484
"	mulu%.w	%3,%0\n"						\
Packit 5c3484
"	mulu%.w	%2,%3\n"						\
Packit 5c3484
"	swap	%2\n"							\
Packit 5c3484
"	mulu%.w	%5,%2\n"						\
Packit 5c3484
"	add%.l	%3,%2\n"						\
Packit 5c3484
"	jcc	1f\n"							\
Packit 5c3484
"	add%.l	%#0x10000,%0\n"						\
Packit 5c3484
"1:	move%.l	%2,%3\n"						\
Packit 5c3484
"	clr%.w	%2\n"							\
Packit 5c3484
"	swap	%2\n"							\
Packit 5c3484
"	swap	%3\n"							\
Packit 5c3484
"	clr%.w	%3\n"							\
Packit 5c3484
"	add%.l	%3,%1\n"						\
Packit 5c3484
"	addx%.l	%2,%0\n"						\
Packit 5c3484
"	| End inlined umul_ppmm"					\
Packit 5c3484
	      : "=&d" (xh), "=&d" (xl),					\
Packit 5c3484
		"=d" (__umul_tmp1), "=&d" (__umul_tmp2)			\
Packit 5c3484
	      : "%2" ((USItype)(a)), "d" ((USItype)(b)));		\
Packit 5c3484
  } while (0)
Packit 5c3484
#define UMUL_TIME 100
Packit 5c3484
#define UDIV_TIME 400
Packit 5c3484
#endif /* not mc68020 */
Packit 5c3484
/* The '020, '030, '040 and '060 have bitfield insns.
Packit 5c3484
   GCC 3.4 defines __mc68020__ when in CPU32 mode, check for __mcpu32__ to
Packit 5c3484
   exclude bfffo on that chip (bitfield insns not available).  */
Packit 5c3484
#if (defined (__mc68020__) || defined (mc68020)    \
Packit 5c3484
     || defined (__mc68030__) || defined (mc68030) \
Packit 5c3484
     || defined (__mc68040__) || defined (mc68040) \
Packit 5c3484
     || defined (__mc68060__) || defined (mc68060) \
Packit 5c3484
     || defined (__NeXT__))			   \
Packit 5c3484
  && ! defined (__mcpu32__)
Packit 5c3484
#define count_leading_zeros(count, x) \
Packit 5c3484
  __asm__ ("bfffo %1{%b2:%b2},%0"					\
Packit 5c3484
	   : "=d" (count)						\
Packit 5c3484
	   : "od" ((USItype) (x)), "n" (0))
Packit 5c3484
#define COUNT_LEADING_ZEROS_0 32
Packit 5c3484
#endif
Packit 5c3484
#endif /* mc68000 */
Packit 5c3484
Packit 5c3484
#if defined (__m88000__) && W_TYPE_SIZE == 32
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"			\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "rJ" (ah), "rJ" (bh), "%rJ" (al), "rJ" (bl))
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"			\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "rJ" (ah), "rJ" (bh), "rJ" (al), "rJ" (bl))
Packit 5c3484
#define count_leading_zeros(count, x) \
Packit 5c3484
  do {									\
Packit 5c3484
    USItype __cbtmp;							\
Packit 5c3484
    __asm__ ("ff1 %0,%1" : "=r" (__cbtmp) : "r" (x));			\
Packit 5c3484
    (count) = __cbtmp ^ 31;						\
Packit 5c3484
  } while (0)
Packit 5c3484
#define COUNT_LEADING_ZEROS_0 63 /* sic */
Packit 5c3484
#if defined (__m88110__)
Packit 5c3484
#define umul_ppmm(wh, wl, u, v) \
Packit 5c3484
  do {									\
Packit 5c3484
    union {UDItype __ll;						\
Packit 5c3484
	   struct {USItype __h, __l;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
    __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v));	\
Packit 5c3484
    (wh) = __x.__i.__h;							\
Packit 5c3484
    (wl) = __x.__i.__l;							\
Packit 5c3484
  } while (0)
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d) \
Packit 5c3484
  ({union {UDItype __ll;						\
Packit 5c3484
	   struct {USItype __h, __l;} __i;				\
Packit 5c3484
	  } __x, __q;							\
Packit 5c3484
  __x.__i.__h = (n1); __x.__i.__l = (n0);				\
Packit 5c3484
  __asm__ ("divu.d %0,%1,%2"						\
Packit 5c3484
	   : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d));		\
Packit 5c3484
  (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
Packit 5c3484
#define UMUL_TIME 5
Packit 5c3484
#define UDIV_TIME 25
Packit 5c3484
#else
Packit 5c3484
#define UMUL_TIME 17
Packit 5c3484
#define UDIV_TIME 150
Packit 5c3484
#endif /* __m88110__ */
Packit 5c3484
#endif /* __m88000__ */
Packit 5c3484
Packit 5c3484
#if defined (__mips) && W_TYPE_SIZE == 32
Packit 5c3484
#if __GMP_GNUC_PREREQ (4,4)
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  do {									\
Packit 5c3484
    UDItype __ll = (UDItype)(u) * (v);					\
Packit 5c3484
    w1 = __ll >> 32;							\
Packit 5c3484
    w0 = __ll;								\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
#if !defined (umul_ppmm) && __GMP_GNUC_PREREQ (2,7) && !defined (__clang__)
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  __asm__ ("multu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v))
Packit 5c3484
#endif
Packit 5c3484
#if !defined (umul_ppmm)
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  __asm__ ("multu %2,%3\n\tmflo %0\n\tmfhi %1"				\
Packit 5c3484
	   : "=d" (w0), "=d" (w1) : "d" (u), "d" (v))
Packit 5c3484
#endif
Packit 5c3484
#define UMUL_TIME 10
Packit 5c3484
#define UDIV_TIME 100
Packit 5c3484
#endif /* __mips */
Packit 5c3484
Packit 5c3484
#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
Packit 5c3484
#if __GMP_GNUC_PREREQ (4,4)
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  do {									\
Packit 5c3484
    typedef unsigned int __ll_UTItype __attribute__((mode(TI)));	\
Packit 5c3484
    __ll_UTItype __ll = (__ll_UTItype)(u) * (v);			\
Packit 5c3484
    w1 = __ll >> 64;							\
Packit 5c3484
    w0 = __ll;								\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
#if !defined (umul_ppmm) && __GMP_GNUC_PREREQ (2,7) && !defined (__clang__)
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  __asm__ ("dmultu %2,%3"						\
Packit 5c3484
	   : "=l" (w0), "=h" (w1)					\
Packit 5c3484
	   : "d" ((UDItype)(u)), "d" ((UDItype)(v)))
Packit 5c3484
#endif
Packit 5c3484
#if !defined (umul_ppmm)
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  __asm__ ("dmultu %2,%3\n\tmflo %0\n\tmfhi %1"				\
Packit 5c3484
	   : "=d" (w0), "=d" (w1)					\
Packit 5c3484
	   : "d" ((UDItype)(u)), "d" ((UDItype)(v)))
Packit 5c3484
#endif
Packit 5c3484
#define UMUL_TIME 20
Packit 5c3484
#define UDIV_TIME 140
Packit 5c3484
#endif /* __mips */
Packit 5c3484
Packit 5c3484
#if defined (__mmix__) && W_TYPE_SIZE == 64
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  __asm__ ("MULU %0,%2,%3" : "=r" (w0), "=z" (w1) : "r" (u), "r" (v))
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if defined (__ns32000__) && W_TYPE_SIZE == 32
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  ({union {UDItype __ll;						\
Packit 5c3484
	   struct {USItype __l, __h;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
  __asm__ ("meid %2,%0"							\
Packit 5c3484
	   : "=g" (__x.__ll)						\
Packit 5c3484
	   : "%0" ((USItype)(u)), "g" ((USItype)(v)));			\
Packit 5c3484
  (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
Packit 5c3484
#define __umulsidi3(u, v) \
Packit 5c3484
  ({UDItype __w;							\
Packit 5c3484
    __asm__ ("meid %2,%0"						\
Packit 5c3484
	     : "=g" (__w)						\
Packit 5c3484
	     : "%0" ((USItype)(u)), "g" ((USItype)(v)));		\
Packit 5c3484
    __w; })
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d) \
Packit 5c3484
  ({union {UDItype __ll;						\
Packit 5c3484
	   struct {USItype __l, __h;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
  __x.__i.__h = (n1); __x.__i.__l = (n0);				\
Packit 5c3484
  __asm__ ("deid %2,%0"							\
Packit 5c3484
	   : "=g" (__x.__ll)						\
Packit 5c3484
	   : "0" (__x.__ll), "g" ((USItype)(d)));			\
Packit 5c3484
  (r) = __x.__i.__l; (q) = __x.__i.__h; })
Packit 5c3484
#define count_trailing_zeros(count,x) \
Packit 5c3484
  do {									\
Packit 5c3484
    __asm__ ("ffsd	%2,%0"						\
Packit 5c3484
	     : "=r" (count)						\
Packit 5c3484
	     : "0" ((USItype) 0), "r" ((USItype) (x)));			\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif /* __ns32000__ */
Packit 5c3484
Packit 5c3484
/* In the past we had a block of various #defines tested
Packit 5c3484
       _ARCH_PPC    - AIX
Packit 5c3484
       _ARCH_PWR    - AIX
Packit 5c3484
       __powerpc__  - gcc
Packit 5c3484
       __POWERPC__  - BEOS
Packit 5c3484
       __ppc__      - Darwin
Packit 5c3484
       PPC          - old gcc, GNU/Linux, SysV
Packit 5c3484
   The plain PPC test was not good for vxWorks, since PPC is defined on all
Packit 5c3484
   CPUs there (eg. m68k too), as a constant one is expected to compare
Packit 5c3484
   CPU_FAMILY against.
Packit 5c3484
Packit 5c3484
   At any rate, this was pretty unattractive and a bit fragile.  The use of
Packit 5c3484
   HAVE_HOST_CPU_FAMILY is designed to cut through it all and be sure of
Packit 5c3484
   getting the desired effect.
Packit 5c3484
Packit 5c3484
   ENHANCE-ME: We should test _IBMR2 here when we add assembly support for
Packit 5c3484
   the system vendor compilers.  (Is that vendor compilers with inline asm,
Packit 5c3484
   or what?)  */
Packit 5c3484
Packit 5c3484
#if (HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc)	\
Packit 5c3484
  && W_TYPE_SIZE == 32
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  do {									\
Packit 5c3484
    if (__builtin_constant_p (bh) && (bh) == 0)				\
Packit 5c3484
      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"			\
Packit 5c3484
	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));	\
Packit 5c3484
    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
Packit 5c3484
      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"			\
Packit 5c3484
	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));	\
Packit 5c3484
    else								\
Packit 5c3484
      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"			\
Packit 5c3484
	     : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	     : "r" (ah), "r" (bh), "%r" (al), "rI" (bl));		\
Packit 5c3484
  } while (0)
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  do {									\
Packit 5c3484
    if (__builtin_constant_p (ah) && (ah) == 0)				\
Packit 5c3484
      __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"			\
Packit 5c3484
	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
Packit 5c3484
    else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)		\
Packit 5c3484
      __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"			\
Packit 5c3484
	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
Packit 5c3484
    else if (__builtin_constant_p (bh) && (bh) == 0)			\
Packit 5c3484
      __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"			\
Packit 5c3484
	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
Packit 5c3484
    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
Packit 5c3484
      __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"			\
Packit 5c3484
	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
Packit 5c3484
    else								\
Packit 5c3484
      __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"			\
Packit 5c3484
	       : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl));		\
Packit 5c3484
  } while (0)
Packit 5c3484
#define count_leading_zeros(count, x) \
Packit 5c3484
  __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
Packit 5c3484
#define COUNT_LEADING_ZEROS_0 32
Packit 5c3484
#if HAVE_HOST_CPU_FAMILY_powerpc
Packit 5c3484
#if __GMP_GNUC_PREREQ (4,4)
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  do {									\
Packit 5c3484
    UDItype __ll = (UDItype)(u) * (v);					\
Packit 5c3484
    w1 = __ll >> 32;							\
Packit 5c3484
    w0 = __ll;								\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
#if !defined (umul_ppmm)
Packit 5c3484
#define umul_ppmm(ph, pl, m0, m1) \
Packit 5c3484
  do {									\
Packit 5c3484
    USItype __m0 = (m0), __m1 = (m1);					\
Packit 5c3484
    __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
Packit 5c3484
    (pl) = __m0 * __m1;							\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
#define UMUL_TIME 15
Packit 5c3484
#define smul_ppmm(ph, pl, m0, m1) \
Packit 5c3484
  do {									\
Packit 5c3484
    SItype __m0 = (m0), __m1 = (m1);					\
Packit 5c3484
    __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
Packit 5c3484
    (pl) = __m0 * __m1;							\
Packit 5c3484
  } while (0)
Packit 5c3484
#define SMUL_TIME 14
Packit 5c3484
#define UDIV_TIME 120
Packit 5c3484
#else
Packit 5c3484
#define UMUL_TIME 8
Packit 5c3484
#define smul_ppmm(xh, xl, m0, m1) \
Packit 5c3484
  __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
Packit 5c3484
#define SMUL_TIME 4
Packit 5c3484
#define sdiv_qrnnd(q, r, nh, nl, d) \
Packit 5c3484
  __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
Packit 5c3484
#define UDIV_TIME 100
Packit 5c3484
#endif
Packit 5c3484
#endif /* 32-bit POWER architecture variants.  */
Packit 5c3484
Packit 5c3484
/* We should test _IBMR2 here when we add assembly support for the system
Packit 5c3484
   vendor compilers.  */
Packit 5c3484
#if HAVE_HOST_CPU_FAMILY_powerpc && W_TYPE_SIZE == 64
Packit 5c3484
#if !defined (_LONG_LONG_LIMB)
Packit 5c3484
/* _LONG_LONG_LIMB is ABI=mode32 where adde operates on 32-bit values.  So
Packit 5c3484
   use adde etc only when not _LONG_LONG_LIMB.  */
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  do {									\
Packit 5c3484
    if (__builtin_constant_p (bh) && (bh) == 0)				\
Packit 5c3484
      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"			\
Packit 5c3484
	       : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	       : "r"  ((UDItype)(ah)),					\
Packit 5c3484
		 "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)));		\
Packit 5c3484
    else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
Packit 5c3484
      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"			\
Packit 5c3484
	       : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	       : "r"  ((UDItype)(ah)),					\
Packit 5c3484
		 "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)));		\
Packit 5c3484
    else								\
Packit 5c3484
      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"			\
Packit 5c3484
	       : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	       : "r"  ((UDItype)(ah)), "r"  ((UDItype)(bh)),		\
Packit 5c3484
		 "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)));		\
Packit 5c3484
  } while (0)
Packit 5c3484
/* We use "*rI" for the constant operand here, since with just "I", gcc barfs.
Packit 5c3484
   This might seem strange, but gcc folds away the dead code late.  */
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  do {									\
Packit 5c3484
    if (__builtin_constant_p (bl) && bl > -0x8000 && bl <= 0x8000) {	\
Packit 5c3484
	if (__builtin_constant_p (ah) && (ah) == 0)			\
Packit 5c3484
	  __asm__ ("addic %1,%3,%4\n\tsubfze %0,%2"			\
Packit 5c3484
		   : "=r" (sh), "=&r" (sl)				\
Packit 5c3484
		   :                       "r" ((UDItype)(bh)),		\
Packit 5c3484
		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))));	\
Packit 5c3484
	else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)	\
Packit 5c3484
	  __asm__ ("addic %1,%3,%4\n\tsubfme %0,%2"			\
Packit 5c3484
		   : "=r" (sh), "=&r" (sl)				\
Packit 5c3484
		   :                       "r" ((UDItype)(bh)),		\
Packit 5c3484
		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))));	\
Packit 5c3484
	else if (__builtin_constant_p (bh) && (bh) == 0)		\
Packit 5c3484
	  __asm__ ("addic %1,%3,%4\n\taddme %0,%2"			\
Packit 5c3484
		   : "=r" (sh), "=&r" (sl)				\
Packit 5c3484
		   : "r"  ((UDItype)(ah)),				\
Packit 5c3484
		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))));	\
Packit 5c3484
	else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)	\
Packit 5c3484
	  __asm__ ("addic %1,%3,%4\n\taddze %0,%2"			\
Packit 5c3484
		   : "=r" (sh), "=&r" (sl)				\
Packit 5c3484
		   : "r"  ((UDItype)(ah)),				\
Packit 5c3484
		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))));	\
Packit 5c3484
	else								\
Packit 5c3484
	  __asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2"			\
Packit 5c3484
		   : "=r" (sh), "=&r" (sl)				\
Packit 5c3484
		   : "r"  ((UDItype)(ah)), "r" ((UDItype)(bh)),		\
Packit 5c3484
		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))));	\
Packit 5c3484
    } else {								\
Packit 5c3484
	if (__builtin_constant_p (ah) && (ah) == 0)			\
Packit 5c3484
	  __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"			\
Packit 5c3484
		   : "=r" (sh), "=&r" (sl)				\
Packit 5c3484
		   :                       "r" ((UDItype)(bh)),		\
Packit 5c3484
		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl)));	\
Packit 5c3484
	else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)	\
Packit 5c3484
	  __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"			\
Packit 5c3484
		   : "=r" (sh), "=&r" (sl)				\
Packit 5c3484
		   :                       "r" ((UDItype)(bh)),		\
Packit 5c3484
		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl)));	\
Packit 5c3484
	else if (__builtin_constant_p (bh) && (bh) == 0)		\
Packit 5c3484
	  __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"			\
Packit 5c3484
		   : "=r" (sh), "=&r" (sl)				\
Packit 5c3484
		   : "r"  ((UDItype)(ah)),				\
Packit 5c3484
		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl)));	\
Packit 5c3484
	else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)	\
Packit 5c3484
	  __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"			\
Packit 5c3484
		   : "=r" (sh), "=&r" (sl)				\
Packit 5c3484
		   : "r"  ((UDItype)(ah)),				\
Packit 5c3484
		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl)));	\
Packit 5c3484
	else								\
Packit 5c3484
	  __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"		\
Packit 5c3484
		   : "=r" (sh), "=&r" (sl)				\
Packit 5c3484
		   : "r"  ((UDItype)(ah)), "r" ((UDItype)(bh)),		\
Packit 5c3484
		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl)));	\
Packit 5c3484
    }									\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif /* ! _LONG_LONG_LIMB */
Packit 5c3484
#define count_leading_zeros(count, x) \
Packit 5c3484
  __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
Packit 5c3484
#define COUNT_LEADING_ZEROS_0 64
Packit 5c3484
#if 0 && __GMP_GNUC_PREREQ (4,4) /* Disable, this results in libcalls! */
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  do {									\
Packit 5c3484
    typedef unsigned int __ll_UTItype __attribute__((mode(TI)));	\
Packit 5c3484
    __ll_UTItype __ll = (__ll_UTItype)(u) * (v);			\
Packit 5c3484
    w1 = __ll >> 64;							\
Packit 5c3484
    w0 = __ll;								\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
#if !defined (umul_ppmm)
Packit 5c3484
#define umul_ppmm(ph, pl, m0, m1) \
Packit 5c3484
  do {									\
Packit 5c3484
    UDItype __m0 = (m0), __m1 = (m1);					\
Packit 5c3484
    __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (__m0), "r" (__m1));	\
Packit 5c3484
    (pl) = __m0 * __m1;							\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
#define UMUL_TIME 15
Packit 5c3484
#define smul_ppmm(ph, pl, m0, m1) \
Packit 5c3484
  do {									\
Packit 5c3484
    DItype __m0 = (m0), __m1 = (m1);					\
Packit 5c3484
    __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (__m0), "r" (__m1));	\
Packit 5c3484
    (pl) = __m0 * __m1;							\
Packit 5c3484
  } while (0)
Packit 5c3484
#define SMUL_TIME 14  /* ??? */
Packit 5c3484
#define UDIV_TIME 120 /* ??? */
Packit 5c3484
#endif /* 64-bit PowerPC.  */
Packit 5c3484
Packit 5c3484
#if defined (__pyr__) && W_TYPE_SIZE == 32
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("addw %5,%1\n\taddwc %3,%0"					\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "0"  ((USItype)(ah)), "g" ((USItype)(bh)),			\
Packit 5c3484
	     "%1" ((USItype)(al)), "g" ((USItype)(bl)))
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("subw %5,%1\n\tsubwb %3,%0"					\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "0" ((USItype)(ah)), "g" ((USItype)(bh)),			\
Packit 5c3484
	     "1" ((USItype)(al)), "g" ((USItype)(bl)))
Packit 5c3484
/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP.  */
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  ({union {UDItype __ll;						\
Packit 5c3484
	   struct {USItype __h, __l;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
  __asm__ ("movw %1,%R0\n\tuemul %2,%0"					\
Packit 5c3484
	   : "=&r" (__x.__ll)						\
Packit 5c3484
	   : "g" ((USItype) (u)), "g" ((USItype)(v)));			\
Packit 5c3484
  (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
Packit 5c3484
#endif /* __pyr__ */
Packit 5c3484
Packit 5c3484
#if defined (__ibm032__) /* RT/ROMP */  && W_TYPE_SIZE == 32
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("a %1,%5\n\tae %0,%3"					\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "0"  ((USItype)(ah)), "r" ((USItype)(bh)),			\
Packit 5c3484
	     "%1" ((USItype)(al)), "r" ((USItype)(bl)))
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("s %1,%5\n\tse %0,%3"					\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "0" ((USItype)(ah)), "r" ((USItype)(bh)),			\
Packit 5c3484
	     "1" ((USItype)(al)), "r" ((USItype)(bl)))
Packit 5c3484
#define smul_ppmm(ph, pl, m0, m1) \
Packit 5c3484
  __asm__ (								\
Packit 5c3484
       "s	r2,r2\n"						\
Packit 5c3484
"	mts r10,%2\n"							\
Packit 5c3484
"	m	r2,%3\n"						\
Packit 5c3484
"	m	r2,%3\n"						\
Packit 5c3484
"	m	r2,%3\n"						\
Packit 5c3484
"	m	r2,%3\n"						\
Packit 5c3484
"	m	r2,%3\n"						\
Packit 5c3484
"	m	r2,%3\n"						\
Packit 5c3484
"	m	r2,%3\n"						\
Packit 5c3484
"	m	r2,%3\n"						\
Packit 5c3484
"	m	r2,%3\n"						\
Packit 5c3484
"	m	r2,%3\n"						\
Packit 5c3484
"	m	r2,%3\n"						\
Packit 5c3484
"	m	r2,%3\n"						\
Packit 5c3484
"	m	r2,%3\n"						\
Packit 5c3484
"	m	r2,%3\n"						\
Packit 5c3484
"	m	r2,%3\n"						\
Packit 5c3484
"	m	r2,%3\n"						\
Packit 5c3484
"	cas	%0,r2,r0\n"						\
Packit 5c3484
"	mfs	r10,%1"							\
Packit 5c3484
	   : "=r" (ph), "=r" (pl)					\
Packit 5c3484
	   : "%r" ((USItype)(m0)), "r" ((USItype)(m1))			\
Packit 5c3484
	   : "r2")
Packit 5c3484
#define UMUL_TIME 20
Packit 5c3484
#define UDIV_TIME 200
Packit 5c3484
#define count_leading_zeros(count, x) \
Packit 5c3484
  do {									\
Packit 5c3484
    if ((x) >= 0x10000)							\
Packit 5c3484
      __asm__ ("clz	%0,%1"						\
Packit 5c3484
	       : "=r" (count) : "r" ((USItype)(x) >> 16));		\
Packit 5c3484
    else								\
Packit 5c3484
      {									\
Packit 5c3484
	__asm__ ("clz	%0,%1"						\
Packit 5c3484
		 : "=r" (count) : "r" ((USItype)(x)));			\
Packit 5c3484
	(count) += 16;							\
Packit 5c3484
      }									\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif /* RT/ROMP */
Packit 5c3484
Packit 5c3484
#if (defined (__SH2__) || defined (__SH3__) || defined (__SH4__)) && W_TYPE_SIZE == 32
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  __asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0"		\
Packit 5c3484
	   : "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach")
Packit 5c3484
#define UMUL_TIME 5
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if defined (__sparc__) && W_TYPE_SIZE == 32
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"				\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl)			\
Packit 5c3484
	   __CLOBBER_CC)
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"				\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl)	\
Packit 5c3484
	   __CLOBBER_CC)
Packit 5c3484
/* FIXME: When gcc -mcpu=v9 is used on solaris, gcc/config/sol2-sld-64.h
Packit 5c3484
   doesn't define anything to indicate that to us, it only sets __sparcv8. */
Packit 5c3484
#if defined (__sparc_v9__) || defined (__sparcv9)
Packit 5c3484
/* Perhaps we should use floating-point operations here?  */
Packit 5c3484
#if 0
Packit 5c3484
/* Triggers a bug making mpz/tests/t-gcd.c fail.
Packit 5c3484
   Perhaps we simply need explicitly zero-extend the inputs?  */
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  __asm__ ("mulx %2,%3,%%g1; srl %%g1,0,%1; srlx %%g1,32,%0" :		\
Packit 5c3484
	   "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "g1")
Packit 5c3484
#else
Packit 5c3484
/* Use v8 umul until above bug is fixed.  */
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
Packit 5c3484
#endif
Packit 5c3484
/* Use a plain v8 divide for v9.  */
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d) \
Packit 5c3484
  do {									\
Packit 5c3484
    USItype __q;							\
Packit 5c3484
    __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"			\
Packit 5c3484
	     : "=r" (__q) : "r" (n1), "r" (n0), "r" (d));		\
Packit 5c3484
    (r) = (n0) - __q * (d);						\
Packit 5c3484
    (q) = __q;								\
Packit 5c3484
  } while (0)
Packit 5c3484
#else
Packit 5c3484
#if defined (__sparc_v8__)   /* gcc normal */				\
Packit 5c3484
  || defined (__sparcv8)     /* gcc solaris */				\
Packit 5c3484
  || HAVE_HOST_CPU_supersparc
Packit 5c3484
/* Don't match immediate range because, 1) it is not often useful,
Packit 5c3484
   2) the 'I' flag thinks of the range as a 13 bit signed interval,
Packit 5c3484
   while we want to match a 13 bit interval, sign extended to 32 bits,
Packit 5c3484
   but INTERPRETED AS UNSIGNED.  */
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
Packit 5c3484
#define UMUL_TIME 5
Packit 5c3484
Packit 5c3484
#if HAVE_HOST_CPU_supersparc
Packit 5c3484
#define UDIV_TIME 60		/* SuperSPARC timing */
Packit 5c3484
#else
Packit 5c3484
/* Don't use this on SuperSPARC because its udiv only handles 53 bit
Packit 5c3484
   dividends and will trap to the kernel for the rest. */
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d) \
Packit 5c3484
  do {									\
Packit 5c3484
    USItype __q;							\
Packit 5c3484
    __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"			\
Packit 5c3484
	     : "=r" (__q) : "r" (n1), "r" (n0), "r" (d));		\
Packit 5c3484
    (r) = (n0) - __q * (d);						\
Packit 5c3484
    (q) = __q;								\
Packit 5c3484
  } while (0)
Packit 5c3484
#define UDIV_TIME 25
Packit 5c3484
#endif /* HAVE_HOST_CPU_supersparc */
Packit 5c3484
Packit 5c3484
#else /* ! __sparc_v8__ */
Packit 5c3484
#if defined (__sparclite__)
Packit 5c3484
/* This has hardware multiply but not divide.  It also has two additional
Packit 5c3484
   instructions scan (ffs from high bit) and divscc.  */
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
Packit 5c3484
#define UMUL_TIME 5
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d) \
Packit 5c3484
  __asm__ ("! Inlined udiv_qrnnd\n"					\
Packit 5c3484
"	wr	%%g0,%2,%%y	! Not a delayed write for sparclite\n"	\
Packit 5c3484
"	tst	%%g0\n"							\
Packit 5c3484
"	divscc	%3,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%%g1\n"						\
Packit 5c3484
"	divscc	%%g1,%4,%0\n"						\
Packit 5c3484
"	rd	%%y,%1\n"						\
Packit 5c3484
"	bl,a 1f\n"							\
Packit 5c3484
"	add	%1,%4,%1\n"						\
Packit 5c3484
"1:	! End of inline udiv_qrnnd"					\
Packit 5c3484
	   : "=r" (q), "=r" (r) : "r" (n1), "r" (n0), "rI" (d)		\
Packit 5c3484
	   : "%g1" __AND_CLOBBER_CC)
Packit 5c3484
#define UDIV_TIME 37
Packit 5c3484
#define count_leading_zeros(count, x) \
Packit 5c3484
  __asm__ ("scan %1,1,%0" : "=r" (count) : "r" (x))
Packit 5c3484
/* Early sparclites return 63 for an argument of 0, but they warn that future
Packit 5c3484
   implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
Packit 5c3484
   undefined.  */
Packit 5c3484
#endif /* __sparclite__ */
Packit 5c3484
#endif /* __sparc_v8__ */
Packit 5c3484
#endif /* __sparc_v9__ */
Packit 5c3484
/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
Packit 5c3484
#ifndef umul_ppmm
Packit 5c3484
#define umul_ppmm(w1, w0, u, v) \
Packit 5c3484
  __asm__ ("! Inlined umul_ppmm\n"					\
Packit 5c3484
"	wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n" \
Packit 5c3484
"	sra	%3,31,%%g2	! Don't move this insn\n"		\
Packit 5c3484
"	and	%2,%%g2,%%g2	! Don't move this insn\n"		\
Packit 5c3484
"	andcc	%%g0,0,%%g1	! Don't move this insn\n"		\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,%3,%%g1\n"						\
Packit 5c3484
"	mulscc	%%g1,0,%%g1\n"						\
Packit 5c3484
"	add	%%g1,%%g2,%0\n"						\
Packit 5c3484
"	rd	%%y,%1"							\
Packit 5c3484
	   : "=r" (w1), "=r" (w0) : "%rI" (u), "r" (v)			\
Packit 5c3484
	   : "%g1", "%g2" __AND_CLOBBER_CC)
Packit 5c3484
#define UMUL_TIME 39		/* 39 instructions */
Packit 5c3484
#endif
Packit 5c3484
#ifndef udiv_qrnnd
Packit 5c3484
#ifndef LONGLONG_STANDALONE
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d) \
Packit 5c3484
  do { UWtype __r;							\
Packit 5c3484
    (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d));			\
Packit 5c3484
    (r) = __r;								\
Packit 5c3484
  } while (0)
Packit 5c3484
extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
Packit 5c3484
#ifndef UDIV_TIME
Packit 5c3484
#define UDIV_TIME 140
Packit 5c3484
#endif
Packit 5c3484
#endif /* LONGLONG_STANDALONE */
Packit 5c3484
#endif /* udiv_qrnnd */
Packit 5c3484
#endif /* __sparc__ */
Packit 5c3484
Packit 5c3484
#if defined (__sparc__) && W_TYPE_SIZE == 64
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ (								\
Packit 5c3484
       "addcc	%r4,%5,%1\n"						\
Packit 5c3484
      "	addccc	%r6,%7,%%g0\n"						\
Packit 5c3484
      "	addc	%r2,%3,%0"						\
Packit 5c3484
       : "=r" (sh), "=&r" (sl)						\
Packit 5c3484
       : "rJ"  ((UDItype)(ah)), "rI" ((UDItype)(bh)),			\
Packit 5c3484
	 "%rJ" ((UDItype)(al)), "rI" ((UDItype)(bl)),			\
Packit 5c3484
	 "%rJ" ((UDItype)(al) >> 32), "rI" ((UDItype)(bl) >> 32)	\
Packit 5c3484
	   __CLOBBER_CC)
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ (								\
Packit 5c3484
       "subcc	%r4,%5,%1\n"						\
Packit 5c3484
      "	subccc	%r6,%7,%%g0\n"						\
Packit 5c3484
      "	subc	%r2,%3,%0"						\
Packit 5c3484
       : "=r" (sh), "=&r" (sl)						\
Packit 5c3484
       : "rJ" ((UDItype)(ah)), "rI" ((UDItype)(bh)),			\
Packit 5c3484
	 "rJ" ((UDItype)(al)), "rI" ((UDItype)(bl)),			\
Packit 5c3484
	 "rJ" ((UDItype)(al) >> 32), "rI" ((UDItype)(bl) >> 32)		\
Packit 5c3484
	   __CLOBBER_CC)
Packit 5c3484
#if __VIS__ >= 0x300
Packit 5c3484
#undef add_ssaaaa
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ (								\
Packit 5c3484
       "addcc	%r4, %5, %1\n"						\
Packit 5c3484
      "	addxc	%r2, %r3, %0"						\
Packit 5c3484
	  : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
       : "rJ"  ((UDItype)(ah)), "rJ" ((UDItype)(bh)),			\
Packit 5c3484
	 "%rJ" ((UDItype)(al)), "rI" ((UDItype)(bl)) __CLOBBER_CC)
Packit 5c3484
#define umul_ppmm(ph, pl, m0, m1) \
Packit 5c3484
  do {									\
Packit 5c3484
    UDItype __m0 = (m0), __m1 = (m1);					\
Packit 5c3484
    (pl) = __m0 * __m1;							\
Packit 5c3484
    __asm__ ("umulxhi\t%2, %1, %0"					\
Packit 5c3484
	     : "=r" (ph)						\
Packit 5c3484
	     : "%r" (__m0), "r" (__m1));				\
Packit 5c3484
  } while (0)
Packit 5c3484
#define count_leading_zeros(count, x) \
Packit 5c3484
  __asm__ ("lzd\t%1,%0" : "=r" (count) : "r" (x))
Packit 5c3484
/* Needed by count_leading_zeros_32 in sparc64.h.  */
Packit 5c3484
#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
Packit 5c3484
#endif
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if (defined (__vax) || defined (__vax__)) && W_TYPE_SIZE == 32
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("addl2 %5,%1\n\tadwc %3,%0"					\
Packit 5c3484
	   : "=g" (sh), "=&g" (sl)					\
Packit 5c3484
	   : "0"  ((USItype)(ah)), "g" ((USItype)(bh)),			\
Packit 5c3484
	     "%1" ((USItype)(al)), "g" ((USItype)(bl)))
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"					\
Packit 5c3484
	   : "=g" (sh), "=&g" (sl)					\
Packit 5c3484
	   : "0" ((USItype)(ah)), "g" ((USItype)(bh)),			\
Packit 5c3484
	     "1" ((USItype)(al)), "g" ((USItype)(bl)))
Packit 5c3484
#define smul_ppmm(xh, xl, m0, m1) \
Packit 5c3484
  do {									\
Packit 5c3484
    union {UDItype __ll;						\
Packit 5c3484
	   struct {USItype __l, __h;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
    USItype __m0 = (m0), __m1 = (m1);					\
Packit 5c3484
    __asm__ ("emul %1,%2,$0,%0"						\
Packit 5c3484
	     : "=g" (__x.__ll) : "g" (__m0), "g" (__m1));		\
Packit 5c3484
    (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
Packit 5c3484
  } while (0)
Packit 5c3484
#define sdiv_qrnnd(q, r, n1, n0, d) \
Packit 5c3484
  do {									\
Packit 5c3484
    union {DItype __ll;							\
Packit 5c3484
	   struct {SItype __l, __h;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
    __x.__i.__h = n1; __x.__i.__l = n0;					\
Packit 5c3484
    __asm__ ("ediv %3,%2,%0,%1"						\
Packit 5c3484
	     : "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d));		\
Packit 5c3484
  } while (0)
Packit 5c3484
#if 0
Packit 5c3484
/* FIXME: This instruction appears to be unimplemented on some systems (vax
Packit 5c3484
   8800 maybe). */
Packit 5c3484
#define count_trailing_zeros(count,x)					\
Packit 5c3484
  do {									\
Packit 5c3484
    __asm__ ("ffs 0, 31, %1, %0"					\
Packit 5c3484
	     : "=g" (count)						\
Packit 5c3484
	     : "g" ((USItype) (x)));					\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
#endif /* vax */
Packit 5c3484
Packit 5c3484
#if defined (__z8000__) && W_TYPE_SIZE == 16
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("add	%H1,%H5\n\tadc	%H0,%H3"				\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "0"  ((unsigned int)(ah)), "r" ((unsigned int)(bh)),	\
Packit 5c3484
	     "%1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl)))
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  __asm__ ("sub	%H1,%H5\n\tsbc	%H0,%H3"				\
Packit 5c3484
	   : "=r" (sh), "=&r" (sl)					\
Packit 5c3484
	   : "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)),	\
Packit 5c3484
	     "1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl)))
Packit 5c3484
#define umul_ppmm(xh, xl, m0, m1) \
Packit 5c3484
  do {									\
Packit 5c3484
    union {long int __ll;						\
Packit 5c3484
	   struct {unsigned int __h, __l;} __i;				\
Packit 5c3484
	  } __x;							\
Packit 5c3484
    unsigned int __m0 = (m0), __m1 = (m1);				\
Packit 5c3484
    __asm__ ("mult	%S0,%H3"					\
Packit 5c3484
	     : "=r" (__x.__i.__h), "=r" (__x.__i.__l)			\
Packit 5c3484
	     : "%1" (m0), "rQR" (m1));					\
Packit 5c3484
    (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
Packit 5c3484
    (xh) += ((((signed int) __m0 >> 15) & __m1)				\
Packit 5c3484
	     + (((signed int) __m1 >> 15) & __m0));			\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif /* __z8000__ */
Packit 5c3484
Packit 5c3484
#endif /* __GNUC__ */
Packit 5c3484
Packit 5c3484
#endif /* NO_ASM */
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* FIXME: "sidi" here is highly doubtful, should sometimes be "diti".  */
Packit 5c3484
#if !defined (umul_ppmm) && defined (__umulsidi3)
Packit 5c3484
#define umul_ppmm(ph, pl, m0, m1) \
Packit 5c3484
  do {									\
Packit 5c3484
    UDWtype __ll = __umulsidi3 (m0, m1);				\
Packit 5c3484
    ph = (UWtype) (__ll >> W_TYPE_SIZE);				\
Packit 5c3484
    pl = (UWtype) __ll;							\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if !defined (__umulsidi3)
Packit 5c3484
#define __umulsidi3(u, v) \
Packit 5c3484
  ({UWtype __hi, __lo;							\
Packit 5c3484
    umul_ppmm (__hi, __lo, u, v);					\
Packit 5c3484
    ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
#if defined (__cplusplus)
Packit 5c3484
#define __longlong_h_C "C"
Packit 5c3484
#else
Packit 5c3484
#define __longlong_h_C
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* Use mpn_umul_ppmm or mpn_udiv_qrnnd functions, if they exist.  The "_r"
Packit 5c3484
   forms have "reversed" arguments, meaning the pointer is last, which
Packit 5c3484
   sometimes allows better parameter passing, in particular on 64-bit
Packit 5c3484
   hppa. */
Packit 5c3484
Packit 5c3484
#define mpn_umul_ppmm  __MPN(umul_ppmm)
Packit 5c3484
extern __longlong_h_C UWtype mpn_umul_ppmm (UWtype *, UWtype, UWtype);
Packit 5c3484
Packit 5c3484
#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm  \
Packit 5c3484
  && ! defined (LONGLONG_STANDALONE)
Packit 5c3484
#define umul_ppmm(wh, wl, u, v)						\
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __umul_ppmm__p0;						\
Packit 5c3484
    (wh) = mpn_umul_ppmm (&__umul_ppmm__p0, (UWtype) (u), (UWtype) (v));\
Packit 5c3484
    (wl) = __umul_ppmm__p0;						\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#define mpn_umul_ppmm_r  __MPN(umul_ppmm_r)
Packit 5c3484
extern __longlong_h_C UWtype mpn_umul_ppmm_r (UWtype, UWtype, UWtype *);
Packit 5c3484
Packit 5c3484
#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm_r	\
Packit 5c3484
  && ! defined (LONGLONG_STANDALONE)
Packit 5c3484
#define umul_ppmm(wh, wl, u, v)						\
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __umul_p0;							\
Packit 5c3484
    (wh) = mpn_umul_ppmm_r ((UWtype) (u), (UWtype) (v), &__umul_p0);	\
Packit 5c3484
    (wl) = __umul_p0;							\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#define mpn_udiv_qrnnd  __MPN(udiv_qrnnd)
Packit 5c3484
extern __longlong_h_C UWtype mpn_udiv_qrnnd (UWtype *, UWtype, UWtype, UWtype);
Packit 5c3484
Packit 5c3484
#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd	\
Packit 5c3484
  && ! defined (LONGLONG_STANDALONE)
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d)					\
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __udiv_qrnnd_r;						\
Packit 5c3484
    (q) = mpn_udiv_qrnnd (&__udiv_qrnnd_r,				\
Packit 5c3484
			  (UWtype) (n1), (UWtype) (n0), (UWtype) d);	\
Packit 5c3484
    (r) = __udiv_qrnnd_r;						\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#define mpn_udiv_qrnnd_r  __MPN(udiv_qrnnd_r)
Packit 5c3484
extern __longlong_h_C UWtype mpn_udiv_qrnnd_r (UWtype, UWtype, UWtype, UWtype *);
Packit 5c3484
Packit 5c3484
#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd_r	\
Packit 5c3484
  && ! defined (LONGLONG_STANDALONE)
Packit 5c3484
#define udiv_qrnnd(q, r, n1, n0, d)					\
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __udiv_qrnnd_r;						\
Packit 5c3484
    (q) = mpn_udiv_qrnnd_r ((UWtype) (n1), (UWtype) (n0), (UWtype) d,	\
Packit 5c3484
			    &__udiv_qrnnd_r);				\
Packit 5c3484
    (r) = __udiv_qrnnd_r;						\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* If this machine has no inline assembler, use C macros.  */
Packit 5c3484
Packit 5c3484
#if !defined (add_ssaaaa)
Packit 5c3484
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __x;								\
Packit 5c3484
    __x = (al) + (bl);							\
Packit 5c3484
    (sh) = (ah) + (bh) + (__x < (al));					\
Packit 5c3484
    (sl) = __x;								\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if !defined (sub_ddmmss)
Packit 5c3484
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __x;								\
Packit 5c3484
    __x = (al) - (bl);							\
Packit 5c3484
    (sh) = (ah) - (bh) - ((al) < (bl));					\
Packit 5c3484
    (sl) = __x;								\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
Packit 5c3484
   smul_ppmm.  */
Packit 5c3484
#if !defined (umul_ppmm) && defined (smul_ppmm)
Packit 5c3484
#define umul_ppmm(w1, w0, u, v)						\
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __w1;							\
Packit 5c3484
    UWtype __xm0 = (u), __xm1 = (v);					\
Packit 5c3484
    smul_ppmm (__w1, w0, __xm0, __xm1);					\
Packit 5c3484
    (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)		\
Packit 5c3484
		+ (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);		\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* If we still don't have umul_ppmm, define it using plain C.
Packit 5c3484
Packit 5c3484
   For reference, when this code is used for squaring (ie. u and v identical
Packit 5c3484
   expressions), gcc recognises __x1 and __x2 are the same and generates 3
Packit 5c3484
   multiplies, not 4.  The subsequent additions could be optimized a bit,
Packit 5c3484
   but the only place GMP currently uses such a square is mpn_sqr_basecase,
Packit 5c3484
   and chips obliged to use this generic C umul will have plenty of worse
Packit 5c3484
   performance problems than a couple of extra instructions on the diagonal
Packit 5c3484
   of sqr_basecase.  */
Packit 5c3484
Packit 5c3484
#if !defined (umul_ppmm)
Packit 5c3484
#define umul_ppmm(w1, w0, u, v)						\
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __x0, __x1, __x2, __x3;					\
Packit 5c3484
    UHWtype __ul, __vl, __uh, __vh;					\
Packit 5c3484
    UWtype __u = (u), __v = (v);					\
Packit 5c3484
									\
Packit 5c3484
    __ul = __ll_lowpart (__u);						\
Packit 5c3484
    __uh = __ll_highpart (__u);						\
Packit 5c3484
    __vl = __ll_lowpart (__v);						\
Packit 5c3484
    __vh = __ll_highpart (__v);						\
Packit 5c3484
									\
Packit 5c3484
    __x0 = (UWtype) __ul * __vl;					\
Packit 5c3484
    __x1 = (UWtype) __ul * __vh;					\
Packit 5c3484
    __x2 = (UWtype) __uh * __vl;					\
Packit 5c3484
    __x3 = (UWtype) __uh * __vh;					\
Packit 5c3484
									\
Packit 5c3484
    __x1 += __ll_highpart (__x0);/* this can't give carry */		\
Packit 5c3484
    __x1 += __x2;		/* but this indeed can */		\
Packit 5c3484
    if (__x1 < __x2)		/* did we get it? */			\
Packit 5c3484
      __x3 += __ll_B;		/* yes, add it in the proper pos. */	\
Packit 5c3484
									\
Packit 5c3484
    (w1) = __x3 + __ll_highpart (__x1);					\
Packit 5c3484
    (w0) = (__x1 << W_TYPE_SIZE/2) + __ll_lowpart (__x0);		\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* If we don't have smul_ppmm, define it using umul_ppmm (which surely will
Packit 5c3484
   exist in one form or another.  */
Packit 5c3484
#if !defined (smul_ppmm)
Packit 5c3484
#define smul_ppmm(w1, w0, u, v)						\
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __w1;							\
Packit 5c3484
    UWtype __xm0 = (u), __xm1 = (v);					\
Packit 5c3484
    umul_ppmm (__w1, w0, __xm0, __xm1);					\
Packit 5c3484
    (w1) = __w1 - (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)		\
Packit 5c3484
		- (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);		\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* Define this unconditionally, so it can be used for debugging.  */
Packit 5c3484
#define __udiv_qrnnd_c(q, r, n1, n0, d) \
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;			\
Packit 5c3484
									\
Packit 5c3484
    ASSERT ((d) != 0);							\
Packit 5c3484
    ASSERT ((n1) < (d));						\
Packit 5c3484
									\
Packit 5c3484
    __d1 = __ll_highpart (d);						\
Packit 5c3484
    __d0 = __ll_lowpart (d);						\
Packit 5c3484
									\
Packit 5c3484
    __q1 = (n1) / __d1;							\
Packit 5c3484
    __r1 = (n1) - __q1 * __d1;						\
Packit 5c3484
    __m = __q1 * __d0;							\
Packit 5c3484
    __r1 = __r1 * __ll_B | __ll_highpart (n0);				\
Packit 5c3484
    if (__r1 < __m)							\
Packit 5c3484
      {									\
Packit 5c3484
	__q1--, __r1 += (d);						\
Packit 5c3484
	if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
Packit 5c3484
	  if (__r1 < __m)						\
Packit 5c3484
	    __q1--, __r1 += (d);					\
Packit 5c3484
      }									\
Packit 5c3484
    __r1 -= __m;							\
Packit 5c3484
									\
Packit 5c3484
    __q0 = __r1 / __d1;							\
Packit 5c3484
    __r0 = __r1  - __q0 * __d1;						\
Packit 5c3484
    __m = __q0 * __d0;							\
Packit 5c3484
    __r0 = __r0 * __ll_B | __ll_lowpart (n0);				\
Packit 5c3484
    if (__r0 < __m)							\
Packit 5c3484
      {									\
Packit 5c3484
	__q0--, __r0 += (d);						\
Packit 5c3484
	if (__r0 >= (d))						\
Packit 5c3484
	  if (__r0 < __m)						\
Packit 5c3484
	    __q0--, __r0 += (d);					\
Packit 5c3484
      }									\
Packit 5c3484
    __r0 -= __m;							\
Packit 5c3484
									\
Packit 5c3484
    (q) = __q1 * __ll_B | __q0;						\
Packit 5c3484
    (r) = __r0;								\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
Packit 5c3484
   __udiv_w_sdiv (defined in libgcc or elsewhere).  */
Packit 5c3484
#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) \
Packit 5c3484
  && ! defined (LONGLONG_STANDALONE)
Packit 5c3484
#define udiv_qrnnd(q, r, nh, nl, d) \
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __r;								\
Packit 5c3484
    (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d);				\
Packit 5c3484
    (r) = __r;								\
Packit 5c3484
  } while (0)
Packit 5c3484
__GMP_DECLSPEC UWtype __MPN(udiv_w_sdiv) (UWtype *, UWtype, UWtype, UWtype);
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
Packit 5c3484
#if !defined (udiv_qrnnd)
Packit 5c3484
#define UDIV_NEEDS_NORMALIZATION 1
Packit 5c3484
#define udiv_qrnnd __udiv_qrnnd_c
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if !defined (count_leading_zeros)
Packit 5c3484
#define count_leading_zeros(count, x) \
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __xr = (x);							\
Packit 5c3484
    UWtype __a;								\
Packit 5c3484
									\
Packit 5c3484
    if (W_TYPE_SIZE == 32)						\
Packit 5c3484
      {									\
Packit 5c3484
	__a = __xr < ((UWtype) 1 << 2*__BITS4)				\
Packit 5c3484
	  ? (__xr < ((UWtype) 1 << __BITS4) ? 1 : __BITS4 + 1)		\
Packit 5c3484
	  : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 + 1		\
Packit 5c3484
	  : 3*__BITS4 + 1);						\
Packit 5c3484
      }									\
Packit 5c3484
    else								\
Packit 5c3484
      {									\
Packit 5c3484
	for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)			\
Packit 5c3484
	  if (((__xr >> __a) & 0xff) != 0)				\
Packit 5c3484
	    break;							\
Packit 5c3484
	++__a;								\
Packit 5c3484
      }									\
Packit 5c3484
									\
Packit 5c3484
    (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a];		\
Packit 5c3484
  } while (0)
Packit 5c3484
/* This version gives a well-defined value for zero. */
Packit 5c3484
#define COUNT_LEADING_ZEROS_0 (W_TYPE_SIZE - 1)
Packit 5c3484
#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
Packit 5c3484
#define COUNT_LEADING_ZEROS_SLOW
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* clz_tab needed by mpn/x86/pentium/mod_1.asm in a fat binary */
Packit 5c3484
#if HAVE_HOST_CPU_FAMILY_x86 && WANT_FAT_BINARY
Packit 5c3484
#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
Packit 5c3484
extern const unsigned char __GMP_DECLSPEC __clz_tab[129];
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if !defined (count_trailing_zeros)
Packit 5c3484
#if !defined (COUNT_LEADING_ZEROS_SLOW)
Packit 5c3484
/* Define count_trailing_zeros using an asm count_leading_zeros.  */
Packit 5c3484
#define count_trailing_zeros(count, x)					\
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __ctz_x = (x);						\
Packit 5c3484
    UWtype __ctz_c;							\
Packit 5c3484
    ASSERT (__ctz_x != 0);						\
Packit 5c3484
    count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);			\
Packit 5c3484
    (count) = W_TYPE_SIZE - 1 - __ctz_c;				\
Packit 5c3484
  } while (0)
Packit 5c3484
#else
Packit 5c3484
/* Define count_trailing_zeros in plain C, assuming small counts are common.
Packit 5c3484
   We use clz_tab without ado, since the C count_leading_zeros above will have
Packit 5c3484
   pulled it in.  */
Packit 5c3484
#define count_trailing_zeros(count, x)					\
Packit 5c3484
  do {									\
Packit 5c3484
    UWtype __ctz_x = (x);						\
Packit 5c3484
    int __ctz_c;							\
Packit 5c3484
									\
Packit 5c3484
    if (LIKELY ((__ctz_x & 0xff) != 0))					\
Packit 5c3484
      (count) = __clz_tab[__ctz_x & -__ctz_x] - 2;			\
Packit 5c3484
    else								\
Packit 5c3484
      {									\
Packit 5c3484
	for (__ctz_c = 8 - 2; __ctz_c < W_TYPE_SIZE - 2; __ctz_c += 8)	\
Packit 5c3484
	  {								\
Packit 5c3484
	    __ctz_x >>= 8;						\
Packit 5c3484
	    if (LIKELY ((__ctz_x & 0xff) != 0))				\
Packit 5c3484
	      break;							\
Packit 5c3484
	  }								\
Packit 5c3484
									\
Packit 5c3484
	(count) = __ctz_c + __clz_tab[__ctz_x & -__ctz_x];		\
Packit 5c3484
      }									\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#ifndef UDIV_NEEDS_NORMALIZATION
Packit 5c3484
#define UDIV_NEEDS_NORMALIZATION 0
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* Whether udiv_qrnnd is actually implemented with udiv_qrnnd_preinv, and
Packit 5c3484
   that hence the latter should always be used.  */
Packit 5c3484
#ifndef UDIV_PREINV_ALWAYS
Packit 5c3484
#define UDIV_PREINV_ALWAYS 0
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* Give defaults for UMUL_TIME and UDIV_TIME.  */
Packit 5c3484
#ifndef UMUL_TIME
Packit 5c3484
#define UMUL_TIME 1
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#ifndef UDIV_TIME
Packit 5c3484
#define UDIV_TIME UMUL_TIME
Packit 5c3484
#endif