|
Packit |
5c3484 |
/* Alternate implementations of binvert_limb to compare speeds. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/*
|
|
Packit |
5c3484 |
Copyright 2000, 2002 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
it under the terms of either:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
option) any later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or both in parallel, as here.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
for more details.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
see https://www.gnu.org/licenses/. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include <stdio.h>
|
|
Packit |
5c3484 |
#include "gmp.h"
|
|
Packit |
5c3484 |
#include "gmp-impl.h"
|
|
Packit |
5c3484 |
#include "longlong.h"
|
|
Packit |
5c3484 |
#include "speed.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Like the standard version in gmp-impl.h, but with the expressions using a
|
|
Packit |
5c3484 |
"1-" form. This has the same number of steps, but "1-" is on the
|
|
Packit |
5c3484 |
dependent chain, whereas the "2*" in the standard version isn't.
|
|
Packit |
5c3484 |
Depending on the CPU this should be the same or a touch slower. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if GMP_LIMB_BITS <= 32
|
|
Packit |
5c3484 |
#define binvert_limb_mul1(inv,n) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
mp_limb_t __n = (n); \
|
|
Packit |
5c3484 |
mp_limb_t __inv; \
|
|
Packit |
5c3484 |
ASSERT ((__n & 1) == 1); \
|
|
Packit |
5c3484 |
__inv = binvert_limb_table[(__n&0xFF)/2]; /* 8 */ \
|
|
Packit |
5c3484 |
__inv = (1 - __n * __inv) * __inv + __inv; /* 16 */ \
|
|
Packit |
5c3484 |
__inv = (1 - __n * __inv) * __inv + __inv; /* 32 */ \
|
|
Packit |
5c3484 |
ASSERT (__inv * __n == 1); \
|
|
Packit |
5c3484 |
(inv) = __inv; \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if GMP_LIMB_BITS > 32 && GMP_LIMB_BITS <= 64
|
|
Packit |
5c3484 |
#define binvert_limb_mul1(inv,n) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
mp_limb_t __n = (n); \
|
|
Packit |
5c3484 |
mp_limb_t __inv; \
|
|
Packit |
5c3484 |
ASSERT ((__n & 1) == 1); \
|
|
Packit |
5c3484 |
__inv = binvert_limb_table[(__n&0xFF)/2]; /* 8 */ \
|
|
Packit |
5c3484 |
__inv = (1 - __n * __inv) * __inv + __inv; /* 16 */ \
|
|
Packit |
5c3484 |
__inv = (1 - __n * __inv) * __inv + __inv; /* 32 */ \
|
|
Packit |
5c3484 |
__inv = (1 - __n * __inv) * __inv + __inv; /* 64 */ \
|
|
Packit |
5c3484 |
ASSERT (__inv * __n == 1); \
|
|
Packit |
5c3484 |
(inv) = __inv; \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* The loop based version used in GMP 3.0 and earlier. Usually slower than
|
|
Packit |
5c3484 |
multiplying, due to the number of steps that must be performed. Much
|
|
Packit |
5c3484 |
slower when the processor has a good multiply. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define binvert_limb_loop(inv,n) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
mp_limb_t __v = (n); \
|
|
Packit |
5c3484 |
mp_limb_t __v_orig = __v; \
|
|
Packit |
5c3484 |
mp_limb_t __make_zero = 1; \
|
|
Packit |
5c3484 |
mp_limb_t __two_i = 1; \
|
|
Packit |
5c3484 |
mp_limb_t __v_inv = 0; \
|
|
Packit |
5c3484 |
\
|
|
Packit |
5c3484 |
ASSERT ((__v & 1) == 1); \
|
|
Packit |
5c3484 |
\
|
|
Packit |
5c3484 |
do \
|
|
Packit |
5c3484 |
{ \
|
|
Packit |
5c3484 |
while ((__two_i & __make_zero) == 0) \
|
|
Packit |
5c3484 |
__two_i <<= 1, __v <<= 1; \
|
|
Packit |
5c3484 |
__v_inv += __two_i; \
|
|
Packit |
5c3484 |
__make_zero -= __v; \
|
|
Packit |
5c3484 |
} \
|
|
Packit |
5c3484 |
while (__make_zero); \
|
|
Packit |
5c3484 |
\
|
|
Packit |
5c3484 |
ASSERT (__v_orig * __v_inv == 1); \
|
|
Packit |
5c3484 |
(inv) = __v_inv; \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Another loop based version with conditionals, but doing a fixed number of
|
|
Packit |
5c3484 |
steps. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define binvert_limb_cond(inv,n) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
mp_limb_t __n = (n); \
|
|
Packit |
5c3484 |
mp_limb_t __rem = (1 - __n) >> 1; \
|
|
Packit |
5c3484 |
mp_limb_t __inv = GMP_LIMB_HIGHBIT; \
|
|
Packit |
5c3484 |
int __count; \
|
|
Packit |
5c3484 |
\
|
|
Packit |
5c3484 |
ASSERT ((__n & 1) == 1); \
|
|
Packit |
5c3484 |
\
|
|
Packit |
5c3484 |
__count = GMP_LIMB_BITS-1; \
|
|
Packit |
5c3484 |
do \
|
|
Packit |
5c3484 |
{ \
|
|
Packit |
5c3484 |
__inv >>= 1; \
|
|
Packit |
5c3484 |
if (__rem & 1) \
|
|
Packit |
5c3484 |
{ \
|
|
Packit |
5c3484 |
__inv |= GMP_LIMB_HIGHBIT; \
|
|
Packit |
5c3484 |
__rem -= __n; \
|
|
Packit |
5c3484 |
} \
|
|
Packit |
5c3484 |
__rem >>= 1; \
|
|
Packit |
5c3484 |
} \
|
|
Packit |
5c3484 |
while (-- __count); \
|
|
Packit |
5c3484 |
\
|
|
Packit |
5c3484 |
ASSERT (__inv * __n == 1); \
|
|
Packit |
5c3484 |
(inv) = __inv; \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Another loop based bitwise version, but purely arithmetic, no
|
|
Packit |
5c3484 |
conditionals. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define binvert_limb_arith(inv,n) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
mp_limb_t __n = (n); \
|
|
Packit |
5c3484 |
mp_limb_t __rem = (1 - __n) >> 1; \
|
|
Packit |
5c3484 |
mp_limb_t __inv = GMP_LIMB_HIGHBIT; \
|
|
Packit |
5c3484 |
mp_limb_t __lowbit; \
|
|
Packit |
5c3484 |
int __count; \
|
|
Packit |
5c3484 |
\
|
|
Packit |
5c3484 |
ASSERT ((__n & 1) == 1); \
|
|
Packit |
5c3484 |
\
|
|
Packit |
5c3484 |
__count = GMP_LIMB_BITS-1; \
|
|
Packit |
5c3484 |
do \
|
|
Packit |
5c3484 |
{ \
|
|
Packit |
5c3484 |
__lowbit = __rem & 1; \
|
|
Packit |
5c3484 |
__inv = (__inv >> 1) | (__lowbit << (GMP_LIMB_BITS-1)); \
|
|
Packit |
5c3484 |
__rem = (__rem - (__n & -__lowbit)) >> 1; \
|
|
Packit |
5c3484 |
} \
|
|
Packit |
5c3484 |
while (-- __count); \
|
|
Packit |
5c3484 |
\
|
|
Packit |
5c3484 |
ASSERT (__inv * __n == 1); \
|
|
Packit |
5c3484 |
(inv) = __inv; \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
double
|
|
Packit |
5c3484 |
speed_binvert_limb_mul1 (struct speed_params *s)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_mul1);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
double
|
|
Packit |
5c3484 |
speed_binvert_limb_loop (struct speed_params *s)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_loop);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
double
|
|
Packit |
5c3484 |
speed_binvert_limb_cond (struct speed_params *s)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_cond);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
double
|
|
Packit |
5c3484 |
speed_binvert_limb_arith (struct speed_params *s)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_arith);
|
|
Packit |
5c3484 |
}
|