Blame libcelt/ecintrin.h

Packit 664db3
/*Some common macros for potential platform-specific optimization.*/
Packit 664db3
#include <math.h>
Packit 664db3
#include <limits.h>
Packit 664db3
#if !defined(_ecintrin_H)
Packit 664db3
# define _ecintrin_H (1)
Packit 664db3
Packit 664db3
/*Some specific platforms may have optimized intrinsic or inline assembly
Packit 664db3
   versions of these functions which can substantially improve performance.
Packit 664db3
  We define macros for them to allow easy incorporation of these non-ANSI
Packit 664db3
   features.*/
Packit 664db3
Packit 664db3
/*Note that we do not provide a macro for abs(), because it is provided as a
Packit 664db3
   library function, which we assume is translated into an intrinsic to avoid
Packit 664db3
   the function call overhead and then implemented in the smartest way for the
Packit 664db3
   target platform.
Packit 664db3
  With modern gcc (4.x), this is true: it uses cmov instructions if the
Packit 664db3
   architecture supports it and branchless bit-twiddling if it does not (the
Packit 664db3
   speed difference between the two approaches is not measurable).
Packit 664db3
  Interestingly, the bit-twiddling method was patented in 2000 (US 6,073,150)
Packit 664db3
   by Sun Microsystems, despite prior art dating back to at least 1996:
Packit 664db3
   http://web.archive.org/web/19961201174141/www.x86.org/ftp/articles/pentopt/PENTOPT.TXT
Packit 664db3
  On gcc 3.x, however, our assumption is not true, as abs() is translated to a
Packit 664db3
   conditional jump, which is horrible on deeply piplined architectures (e.g.,
Packit 664db3
   all consumer architectures for the past decade or more) when the sign cannot
Packit 664db3
   be reliably predicted.*/
Packit 664db3
Packit 664db3
/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if
Packit 664db3
   given an appropriate architecture, but the branchless bit-twiddling versions
Packit 664db3
   are just as fast, and do not require any special target architecture.
Packit 664db3
  Earlier gcc versions (3.x) compiled both code to the same assembly
Packit 664db3
   instructions, because of the way they represented ((_b)>(_a)) internally.*/
Packit 664db3
#define EC_MAXI(_a,_b)      ((_a)-((_a)-(_b)&-((_b)>(_a))))
Packit 664db3
#define EC_MINI(_a,_b)      ((_a)+((_b)-(_a)&-((_b)<(_a))))
Packit 664db3
/*This has a chance of compiling branchless, and is just as fast as the
Packit 664db3
   bit-twiddling method, which is slightly less portable, since it relies on a
Packit 664db3
   sign-extended rightshift, which is not guaranteed by ANSI (but present on
Packit 664db3
   every relevant platform).*/
Packit 664db3
#define EC_SIGNI(_a)        (((_a)>0)-((_a)<0))
Packit 664db3
/*Slightly more portable than relying on a sign-extended right-shift (which is
Packit 664db3
   not guaranteed by ANSI), and just as fast, since gcc (3.x and 4.x both)
Packit 664db3
   compile it into the right-shift anyway.*/
Packit 664db3
#define EC_SIGNMASK(_a)     (-((_a)<0))
Packit 664db3
/*Clamps an integer into the given range.
Packit 664db3
  If _a>_c, then the lower bound _a is respected over the upper bound _c (this
Packit 664db3
   behavior is required to meet our documented API behavior).
Packit 664db3
  _a: The lower bound.
Packit 664db3
  _b: The value to clamp.
Packit 664db3
  _c: The upper boud.*/
Packit 664db3
#define EC_CLAMPI(_a,_b,_c) (EC_MAXI(_a,EC_MINI(_b,_c)))
Packit 664db3
Packit 664db3
Packit 664db3
/*Count leading zeros.
Packit 664db3
  This macro should only be used for implementing ec_ilog(), if it is defined.
Packit 664db3
  All other code should use EC_ILOG() instead.*/
Packit 664db3
#ifdef __GNUC_PREREQ
Packit 664db3
#if __GNUC_PREREQ(3,4)
Packit 664db3
# if INT_MAX>=2147483647
Packit 664db3
#  define EC_CLZ0 sizeof(unsigned)*CHAR_BIT
Packit 664db3
#  define EC_CLZ(_x) (__builtin_clz(_x))
Packit 664db3
# elif LONG_MAX>=2147483647L
Packit 664db3
#  define EC_CLZ0 sizeof(unsigned long)*CHAR_BIT
Packit 664db3
#  define EC_CLZ(_x) (__builtin_clzl(_x))
Packit 664db3
# endif
Packit 664db3
#endif
Packit 664db3
#endif
Packit 664db3
Packit 664db3
#if defined(EC_CLZ)
Packit 664db3
/*Note that __builtin_clz is not defined when _x==0, according to the gcc
Packit 664db3
   documentation (and that of the BSR instruction that implements it on x86).
Packit 664db3
  The majority of the time we can never pass it zero.
Packit 664db3
  When we need to, it can be special cased.*/
Packit 664db3
# define EC_ILOG(_x) (EC_CLZ0-EC_CLZ(_x))
Packit 664db3
#elif defined(ENABLE_TI_DSPLIB)
Packit 664db3
#include "dsplib.h"
Packit 664db3
#define EC_ILOG(x) (31 - _lnorm(x))
Packit 664db3
#else
Packit 664db3
# define EC_ILOG(_x) (ec_ilog(_x))
Packit 664db3
#endif
Packit 664db3
Packit 664db3
#ifdef __GNUC_PREREQ
Packit 664db3
#if __GNUC_PREREQ(3,4)
Packit 664db3
# if INT_MAX>=9223372036854775807
Packit 664db3
#  define EC_CLZ64_0 sizeof(unsigned)*CHAR_BIT
Packit 664db3
#  define EC_CLZ64(_x) (__builtin_clz(_x))
Packit 664db3
# elif LONG_MAX>=9223372036854775807L
Packit 664db3
#  define EC_CLZ64_0 sizeof(unsigned long)*CHAR_BIT
Packit 664db3
#  define EC_CLZ64(_x) (__builtin_clzl(_x))
Packit 664db3
# elif LLONG_MAX>=9223372036854775807LL
Packit 664db3
#  define EC_CLZ64_0 sizeof(unsigned long long)*CHAR_BIT
Packit 664db3
#  define EC_CLZ64(_x) (__builtin_clzll(_x))
Packit 664db3
# endif
Packit 664db3
#endif
Packit 664db3
#endif
Packit 664db3
Packit 664db3
#if defined(EC_CLZ64)
Packit 664db3
/*Note that __builtin_clz is not defined when _x==0, according to the gcc
Packit 664db3
   documentation (and that of the BSR instruction that implements it on x86).
Packit 664db3
  The majority of the time we can never pass it zero.
Packit 664db3
  When we need to, it can be special cased.*/
Packit 664db3
# define EC_ILOG64(_x) (EC_CLZ64_0-EC_CLZ64(_x))
Packit 664db3
#else
Packit 664db3
# define EC_ILOG64(_x) (ec_ilog64(_x))
Packit 664db3
#endif
Packit 664db3
Packit 664db3
#endif