Blame tune/modlinv.c

Packit 5c3484
/* Alternate implementations of binvert_limb to compare speeds. */
Packit 5c3484
Packit 5c3484
/*
Packit 5c3484
Copyright 2000, 2002 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include <stdio.h>
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
#include "speed.h"
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Like the standard version in gmp-impl.h, but with the expressions using a
Packit 5c3484
   "1-" form.  This has the same number of steps, but "1-" is on the
Packit 5c3484
   dependent chain, whereas the "2*" in the standard version isn't.
Packit 5c3484
   Depending on the CPU this should be the same or a touch slower.  */
Packit 5c3484
Packit 5c3484
#if GMP_LIMB_BITS <= 32
Packit 5c3484
#define binvert_limb_mul1(inv,n)                                \
Packit 5c3484
  do {                                                          \
Packit 5c3484
    mp_limb_t  __n = (n);                                       \
Packit 5c3484
    mp_limb_t  __inv;                                           \
Packit 5c3484
    ASSERT ((__n & 1) == 1);                                    \
Packit 5c3484
    __inv = binvert_limb_table[(__n&0xFF)/2]; /*  8 */          \
Packit 5c3484
    __inv = (1 - __n * __inv) * __inv + __inv;  /* 16 */        \
Packit 5c3484
    __inv = (1 - __n * __inv) * __inv + __inv;  /* 32 */        \
Packit 5c3484
    ASSERT (__inv * __n == 1);                                  \
Packit 5c3484
    (inv) = __inv;                                              \
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if GMP_LIMB_BITS > 32 && GMP_LIMB_BITS <= 64
Packit 5c3484
#define binvert_limb_mul1(inv,n)                                \
Packit 5c3484
  do {                                                          \
Packit 5c3484
    mp_limb_t  __n = (n);                                       \
Packit 5c3484
    mp_limb_t  __inv;                                           \
Packit 5c3484
    ASSERT ((__n & 1) == 1);                                    \
Packit 5c3484
    __inv = binvert_limb_table[(__n&0xFF)/2]; /*  8 */          \
Packit 5c3484
    __inv = (1 - __n * __inv) * __inv + __inv;  /* 16 */        \
Packit 5c3484
    __inv = (1 - __n * __inv) * __inv + __inv;  /* 32 */        \
Packit 5c3484
    __inv = (1 - __n * __inv) * __inv + __inv;  /* 64 */        \
Packit 5c3484
    ASSERT (__inv * __n == 1);                                  \
Packit 5c3484
    (inv) = __inv;                                              \
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* The loop based version used in GMP 3.0 and earlier.  Usually slower than
Packit 5c3484
   multiplying, due to the number of steps that must be performed.  Much
Packit 5c3484
   slower when the processor has a good multiply.  */
Packit 5c3484
Packit 5c3484
#define binvert_limb_loop(inv,n)                \
Packit 5c3484
  do {                                          \
Packit 5c3484
    mp_limb_t  __v = (n);                       \
Packit 5c3484
    mp_limb_t  __v_orig = __v;                  \
Packit 5c3484
    mp_limb_t  __make_zero = 1;                 \
Packit 5c3484
    mp_limb_t  __two_i = 1;                     \
Packit 5c3484
    mp_limb_t  __v_inv = 0;                     \
Packit 5c3484
                                                \
Packit 5c3484
    ASSERT ((__v & 1) == 1);                    \
Packit 5c3484
                                                \
Packit 5c3484
    do                                          \
Packit 5c3484
      {                                         \
Packit 5c3484
        while ((__two_i & __make_zero) == 0)    \
Packit 5c3484
          __two_i <<= 1, __v <<= 1;             \
Packit 5c3484
        __v_inv += __two_i;                     \
Packit 5c3484
        __make_zero -= __v;                     \
Packit 5c3484
      }                                         \
Packit 5c3484
    while (__make_zero);                        \
Packit 5c3484
                                                \
Packit 5c3484
    ASSERT (__v_orig * __v_inv == 1);           \
Packit 5c3484
    (inv) = __v_inv;                            \
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Another loop based version with conditionals, but doing a fixed number of
Packit 5c3484
   steps. */
Packit 5c3484
Packit 5c3484
#define binvert_limb_cond(inv,n)                \
Packit 5c3484
  do {                                          \
Packit 5c3484
    mp_limb_t  __n = (n);                       \
Packit 5c3484
    mp_limb_t  __rem = (1 - __n) >> 1;          \
Packit 5c3484
    mp_limb_t  __inv = GMP_LIMB_HIGHBIT;        \
Packit 5c3484
    int        __count;                         \
Packit 5c3484
                                                \
Packit 5c3484
    ASSERT ((__n & 1) == 1);                    \
Packit 5c3484
                                                \
Packit 5c3484
    __count = GMP_LIMB_BITS-1;               \
Packit 5c3484
    do                                          \
Packit 5c3484
      {                                         \
Packit 5c3484
        __inv >>= 1;                            \
Packit 5c3484
        if (__rem & 1)                          \
Packit 5c3484
          {                                     \
Packit 5c3484
            __inv |= GMP_LIMB_HIGHBIT;          \
Packit 5c3484
            __rem -= __n;                       \
Packit 5c3484
          }                                     \
Packit 5c3484
        __rem >>= 1;                            \
Packit 5c3484
      }                                         \
Packit 5c3484
    while (-- __count);                         \
Packit 5c3484
                                                \
Packit 5c3484
    ASSERT (__inv * __n == 1);                  \
Packit 5c3484
    (inv) = __inv;                              \
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Another loop based bitwise version, but purely arithmetic, no
Packit 5c3484
   conditionals. */
Packit 5c3484
Packit 5c3484
#define binvert_limb_arith(inv,n)                                       \
Packit 5c3484
  do {                                                                  \
Packit 5c3484
    mp_limb_t  __n = (n);                                               \
Packit 5c3484
    mp_limb_t  __rem = (1 - __n) >> 1;                                  \
Packit 5c3484
    mp_limb_t  __inv = GMP_LIMB_HIGHBIT;                                \
Packit 5c3484
    mp_limb_t  __lowbit;                                                \
Packit 5c3484
    int        __count;                                                 \
Packit 5c3484
                                                                        \
Packit 5c3484
    ASSERT ((__n & 1) == 1);                                            \
Packit 5c3484
                                                                        \
Packit 5c3484
    __count = GMP_LIMB_BITS-1;                                       \
Packit 5c3484
    do                                                                  \
Packit 5c3484
      {                                                                 \
Packit 5c3484
        __lowbit = __rem & 1;                                           \
Packit 5c3484
        __inv = (__inv >> 1) | (__lowbit << (GMP_LIMB_BITS-1));      \
Packit 5c3484
        __rem = (__rem - (__n & -__lowbit)) >> 1;                       \
Packit 5c3484
      }                                                                 \
Packit 5c3484
    while (-- __count);                                                 \
Packit 5c3484
                                                                        \
Packit 5c3484
    ASSERT (__inv * __n == 1);                                          \
Packit 5c3484
    (inv) = __inv;                                                      \
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
Packit 5c3484
double
Packit 5c3484
speed_binvert_limb_mul1 (struct speed_params *s)
Packit 5c3484
{
Packit 5c3484
  SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_mul1);
Packit 5c3484
}
Packit 5c3484
double
Packit 5c3484
speed_binvert_limb_loop (struct speed_params *s)
Packit 5c3484
{
Packit 5c3484
  SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_loop);
Packit 5c3484
}
Packit 5c3484
double
Packit 5c3484
speed_binvert_limb_cond (struct speed_params *s)
Packit 5c3484
{
Packit 5c3484
  SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_cond);
Packit 5c3484
}
Packit 5c3484
double
Packit 5c3484
speed_binvert_limb_arith (struct speed_params *s)
Packit 5c3484
{
Packit 5c3484
  SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_arith);
Packit 5c3484
}