|
Packit |
5c3484 |
/* Create tuned thresholds for various algorithms.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Copyright 1999-2003, 2005, 2006, 2008-2012 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
it under the terms of either:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
option) any later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or both in parallel, as here.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
for more details.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
see https://www.gnu.org/licenses/. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Usage: tuneup [-t] [-t] [-p precision]
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
-t turns on some diagnostic traces, a second -t turns on more traces.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Notes:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The code here isn't a vision of loveliness, mainly because it's subject
|
|
Packit |
5c3484 |
to ongoing changes according to new things wanting to be tuned, and
|
|
Packit |
5c3484 |
practical requirements of systems tested.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Sometimes running the program twice produces slightly different results.
|
|
Packit |
5c3484 |
This is probably because there's so little separating algorithms near
|
|
Packit |
5c3484 |
their crossover, and on that basis it should make little or no difference
|
|
Packit |
5c3484 |
to the final speed of the relevant routines, but nothing has been done to
|
|
Packit |
5c3484 |
check that carefully.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Algorithm:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The thresholds are determined as follows. A crossover may not be a
|
|
Packit |
5c3484 |
single size but rather a range where it oscillates between method A or
|
|
Packit |
5c3484 |
method B faster. If the threshold is set making B used where A is faster
|
|
Packit |
5c3484 |
(or vice versa) that's bad. Badness is the percentage time lost and
|
|
Packit |
5c3484 |
total badness is the sum of this over all sizes measured. The threshold
|
|
Packit |
5c3484 |
is set to minimize total badness.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Suppose, as sizes increase, method B becomes faster than method A. The
|
|
Packit |
5c3484 |
effect of the rule is that, as you look at increasing sizes, isolated
|
|
Packit |
5c3484 |
points where B is faster are ignored, but when it's consistently faster,
|
|
Packit |
5c3484 |
or faster on balance, then the threshold is set there. The same result
|
|
Packit |
5c3484 |
is obtained thinking in the other direction of A becoming faster at
|
|
Packit |
5c3484 |
smaller sizes.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
In practice the thresholds tend to be chosen to bring on the next
|
|
Packit |
5c3484 |
algorithm fairly quickly.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This rule is attractive because it's got a basis in reason and is fairly
|
|
Packit |
5c3484 |
easy to implement, but no work has been done to actually compare it in
|
|
Packit |
5c3484 |
absolute terms to other possibilities.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Implementation:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
In a normal library build the thresholds are constants. To tune them
|
|
Packit |
5c3484 |
selected objects are recompiled with the thresholds as global variables
|
|
Packit |
5c3484 |
instead. #define TUNE_PROGRAM_BUILD does this, with help from code at
|
|
Packit |
5c3484 |
the end of gmp-impl.h, and rules in tune/Makefile.am.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
MUL_TOOM22_THRESHOLD for example uses a recompiled mpn_mul_n. The
|
|
Packit |
5c3484 |
threshold is set to "size+1" to avoid karatsuba, or to "size" to use one
|
|
Packit |
5c3484 |
level, but recurse into the basecase.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
MUL_TOOM33_THRESHOLD makes use of the tuned MUL_TOOM22_THRESHOLD value.
|
|
Packit |
5c3484 |
Other routines in turn will make use of both of those. Naturally the
|
|
Packit |
5c3484 |
dependants must be tuned first.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
In a couple of cases, like DIVEXACT_1_THRESHOLD, there's no recompiling,
|
|
Packit |
5c3484 |
just a threshold based on comparing two routines (mpn_divrem_1 and
|
|
Packit |
5c3484 |
mpn_divexact_1), and no further use of the value determined.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Flags like USE_PREINV_MOD_1 or JACOBI_BASE_METHOD are even simpler, being
|
|
Packit |
5c3484 |
just comparisons between certain routines on representative data.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Shortcuts are applied when native (assembler) versions of routines exist.
|
|
Packit |
5c3484 |
For instance a native mpn_sqr_basecase is assumed to be always faster
|
|
Packit |
5c3484 |
than mpn_mul_basecase, with no measuring.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
No attempt is made to tune within assembler routines, for instance
|
|
Packit |
5c3484 |
DIVREM_1_NORM_THRESHOLD. An assembler mpn_divrem_1 is expected to be
|
|
Packit |
5c3484 |
written and tuned all by hand. Assembler routines that might have hard
|
|
Packit |
5c3484 |
limits are recompiled though, to make them accept a bigger range of sizes
|
|
Packit |
5c3484 |
than normal, eg. mpn_sqr_basecase to compare against mpn_toom2_sqr.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Limitations:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The FFTs aren't subject to the same badness rule as the other thresholds,
|
|
Packit |
5c3484 |
so each k is probably being brought on a touch early. This isn't likely
|
|
Packit |
5c3484 |
to make a difference, and the simpler probing means fewer tests.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define TUNE_PROGRAM_BUILD 1 /* for gmp-impl.h */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "config.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include <math.h>
|
|
Packit |
5c3484 |
#include <stdio.h>
|
|
Packit |
5c3484 |
#include <stdlib.h>
|
|
Packit |
5c3484 |
#include <time.h>
|
|
Packit |
5c3484 |
#if HAVE_UNISTD_H
|
|
Packit |
5c3484 |
#include <unistd.h>
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "gmp.h"
|
|
Packit |
5c3484 |
#include "gmp-impl.h"
|
|
Packit |
5c3484 |
#include "longlong.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "tests.h"
|
|
Packit |
5c3484 |
#include "speed.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if !HAVE_DECL_OPTARG
|
|
Packit |
5c3484 |
extern char *optarg;
|
|
Packit |
5c3484 |
extern int optind, opterr;
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define DEFAULT_MAX_SIZE 1000 /* limbs */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if WANT_FFT
|
|
Packit |
5c3484 |
mp_size_t option_fft_max_size = 50000; /* limbs */
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
mp_size_t option_fft_max_size = 0;
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
int option_trace = 0;
|
|
Packit |
5c3484 |
int option_fft_trace = 0;
|
|
Packit |
5c3484 |
struct speed_params s;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
struct dat_t {
|
|
Packit |
5c3484 |
mp_size_t size;
|
|
Packit |
5c3484 |
double d;
|
|
Packit |
5c3484 |
} *dat = NULL;
|
|
Packit |
5c3484 |
int ndat = 0;
|
|
Packit |
5c3484 |
int allocdat = 0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* This is not defined if mpn_sqr_basecase doesn't declare a limit. In that
|
|
Packit |
5c3484 |
case use zero here, which for params.max_size means no limit. */
|
|
Packit |
5c3484 |
#ifndef TUNE_SQR_TOOM2_MAX
|
|
Packit |
5c3484 |
#define TUNE_SQR_TOOM2_MAX 0
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_size_t mul_toom22_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mul_toom33_threshold = MUL_TOOM33_THRESHOLD_LIMIT;
|
|
Packit |
5c3484 |
mp_size_t mul_toom44_threshold = MUL_TOOM44_THRESHOLD_LIMIT;
|
|
Packit |
5c3484 |
mp_size_t mul_toom6h_threshold = MUL_TOOM6H_THRESHOLD_LIMIT;
|
|
Packit |
5c3484 |
mp_size_t mul_toom8h_threshold = MUL_TOOM8H_THRESHOLD_LIMIT;
|
|
Packit |
5c3484 |
mp_size_t mul_toom32_to_toom43_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mul_toom32_to_toom53_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mul_toom42_to_toom53_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mul_toom42_to_toom63_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mul_toom43_to_toom54_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mul_fft_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mul_fft_modf_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t sqr_basecase_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t sqr_toom2_threshold
|
|
Packit |
5c3484 |
= (TUNE_SQR_TOOM2_MAX == 0 ? MP_SIZE_T_MAX : TUNE_SQR_TOOM2_MAX);
|
|
Packit |
5c3484 |
mp_size_t sqr_toom3_threshold = SQR_TOOM3_THRESHOLD_LIMIT;
|
|
Packit |
5c3484 |
mp_size_t sqr_toom4_threshold = SQR_TOOM4_THRESHOLD_LIMIT;
|
|
Packit |
5c3484 |
mp_size_t sqr_toom6_threshold = SQR_TOOM6_THRESHOLD_LIMIT;
|
|
Packit |
5c3484 |
mp_size_t sqr_toom8_threshold = SQR_TOOM8_THRESHOLD_LIMIT;
|
|
Packit |
5c3484 |
mp_size_t sqr_fft_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t sqr_fft_modf_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mullo_basecase_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mullo_dc_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mullo_mul_n_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t sqrlo_basecase_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t sqrlo_dc_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t sqrlo_sqr_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mulmid_toom42_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mulmod_bnm1_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t sqrmod_bnm1_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t div_qr_2_pi2_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t dc_div_qr_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t dc_divappr_q_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mu_div_qr_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mu_divappr_q_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mupi_div_qr_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mu_div_q_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t dc_bdiv_qr_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t dc_bdiv_q_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mu_bdiv_qr_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mu_bdiv_q_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t inv_mulmod_bnm1_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t inv_newton_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t inv_appr_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t binv_newton_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t redc_1_to_redc_2_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t redc_1_to_redc_n_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t redc_2_to_redc_n_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t matrix22_strassen_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t hgcd_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t hgcd_appr_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t hgcd_reduce_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t gcd_dc_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t gcdext_dc_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
int div_qr_1n_pi1_method = 0;
|
|
Packit |
5c3484 |
mp_size_t div_qr_1_norm_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t div_qr_1_unnorm_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t divrem_1_norm_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t divrem_1_unnorm_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mod_1_norm_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mod_1_unnorm_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
int mod_1_1p_method = 0;
|
|
Packit |
5c3484 |
mp_size_t mod_1n_to_mod_1_1_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mod_1u_to_mod_1_1_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mod_1_1_to_mod_1_2_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t mod_1_2_to_mod_1_4_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t preinv_mod_1_to_mod_1_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t divrem_2_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t get_str_dc_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t get_str_precompute_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t set_str_dc_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t set_str_precompute_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t fac_odd_threshold = 0;
|
|
Packit |
5c3484 |
mp_size_t fac_dsc_threshold = FAC_DSC_THRESHOLD_LIMIT;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_size_t fft_modf_sqr_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
mp_size_t fft_modf_mul_threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
struct param_t {
|
|
Packit |
5c3484 |
const char *name;
|
|
Packit |
5c3484 |
speed_function_t function;
|
|
Packit |
5c3484 |
speed_function_t function2;
|
|
Packit |
5c3484 |
double step_factor; /* how much to step relatively */
|
|
Packit |
5c3484 |
int step; /* how much to step absolutely */
|
|
Packit |
5c3484 |
double function_fudge; /* multiplier for "function" speeds */
|
|
Packit |
5c3484 |
int stop_since_change;
|
|
Packit |
5c3484 |
double stop_factor;
|
|
Packit |
5c3484 |
mp_size_t min_size;
|
|
Packit |
5c3484 |
int min_is_always;
|
|
Packit |
5c3484 |
mp_size_t max_size;
|
|
Packit |
5c3484 |
mp_size_t check_size;
|
|
Packit |
5c3484 |
mp_size_t size_extra;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define DATA_HIGH_LT_R 1
|
|
Packit |
5c3484 |
#define DATA_HIGH_GE_R 2
|
|
Packit |
5c3484 |
int data_high;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
int noprint;
|
|
Packit |
5c3484 |
};
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* These are normally undefined when false, which suits "#if" fine.
|
|
Packit |
5c3484 |
But give them zero values so they can be used in plain C "if"s. */
|
|
Packit |
5c3484 |
#ifndef UDIV_PREINV_ALWAYS
|
|
Packit |
5c3484 |
#define UDIV_PREINV_ALWAYS 0
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#ifndef HAVE_NATIVE_mpn_divexact_1
|
|
Packit |
5c3484 |
#define HAVE_NATIVE_mpn_divexact_1 0
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#ifndef HAVE_NATIVE_mpn_div_qr_1n_pi1
|
|
Packit |
5c3484 |
#define HAVE_NATIVE_mpn_div_qr_1n_pi1 0
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#ifndef HAVE_NATIVE_mpn_divrem_1
|
|
Packit |
5c3484 |
#define HAVE_NATIVE_mpn_divrem_1 0
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#ifndef HAVE_NATIVE_mpn_divrem_2
|
|
Packit |
5c3484 |
#define HAVE_NATIVE_mpn_divrem_2 0
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#ifndef HAVE_NATIVE_mpn_mod_1
|
|
Packit |
5c3484 |
#define HAVE_NATIVE_mpn_mod_1 0
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#ifndef HAVE_NATIVE_mpn_mod_1_1p
|
|
Packit |
5c3484 |
#define HAVE_NATIVE_mpn_mod_1_1p 0
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#ifndef HAVE_NATIVE_mpn_modexact_1_odd
|
|
Packit |
5c3484 |
#define HAVE_NATIVE_mpn_modexact_1_odd 0
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#ifndef HAVE_NATIVE_mpn_preinv_divrem_1
|
|
Packit |
5c3484 |
#define HAVE_NATIVE_mpn_preinv_divrem_1 0
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#ifndef HAVE_NATIVE_mpn_preinv_mod_1
|
|
Packit |
5c3484 |
#define HAVE_NATIVE_mpn_preinv_mod_1 0
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#ifndef HAVE_NATIVE_mpn_sqr_basecase
|
|
Packit |
5c3484 |
#define HAVE_NATIVE_mpn_sqr_basecase 0
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define MAX3(a,b,c) MAX (MAX (a, b), c)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_limb_t
|
|
Packit |
5c3484 |
randlimb_norm (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t n;
|
|
Packit |
5c3484 |
mpn_random (&n, 1);
|
|
Packit |
5c3484 |
n |= GMP_NUMB_HIGHBIT;
|
|
Packit |
5c3484 |
return n;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define GMP_NUMB_HALFMASK ((CNST_LIMB(1) << (GMP_NUMB_BITS/2)) - 1)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_limb_t
|
|
Packit |
5c3484 |
randlimb_half (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t n;
|
|
Packit |
5c3484 |
mpn_random (&n, 1);
|
|
Packit |
5c3484 |
n &= GMP_NUMB_HALFMASK;
|
|
Packit |
5c3484 |
n += (n==0);
|
|
Packit |
5c3484 |
return n;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Add an entry to the end of the dat[] array, reallocing to make it bigger
|
|
Packit |
5c3484 |
if necessary. */
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
add_dat (mp_size_t size, double d)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
#define ALLOCDAT_STEP 500
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT_ALWAYS (ndat <= allocdat);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (ndat == allocdat)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
dat = (struct dat_t *) __gmp_allocate_or_reallocate
|
|
Packit |
5c3484 |
(dat, allocdat * sizeof(dat[0]),
|
|
Packit |
5c3484 |
(allocdat+ALLOCDAT_STEP) * sizeof(dat[0]));
|
|
Packit |
5c3484 |
allocdat += ALLOCDAT_STEP;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dat[ndat].size = size;
|
|
Packit |
5c3484 |
dat[ndat].d = d;
|
|
Packit |
5c3484 |
ndat++;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Return the threshold size based on the data accumulated. */
|
|
Packit |
5c3484 |
mp_size_t
|
|
Packit |
5c3484 |
analyze_dat (int final)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
double x, min_x;
|
|
Packit |
5c3484 |
int j, min_j;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* If the threshold is set at dat[0].size, any positive values are bad. */
|
|
Packit |
5c3484 |
x = 0.0;
|
|
Packit |
5c3484 |
for (j = 0; j < ndat; j++)
|
|
Packit |
5c3484 |
if (dat[j].d > 0.0)
|
|
Packit |
5c3484 |
x += dat[j].d;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (option_trace >= 2 && final)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
printf ("\n");
|
|
Packit |
5c3484 |
printf ("x is the sum of the badness from setting thresh at given size\n");
|
|
Packit |
5c3484 |
printf (" (minimum x is sought)\n");
|
|
Packit |
5c3484 |
printf ("size=%ld first x=%.4f\n", (long) dat[j].size, x);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
min_x = x;
|
|
Packit |
5c3484 |
min_j = 0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* When stepping to the next dat[j].size, positive values are no longer
|
|
Packit |
5c3484 |
bad (so subtracted), negative values become bad (so add the absolute
|
|
Packit |
5c3484 |
value, meaning subtract). */
|
|
Packit |
5c3484 |
for (j = 0; j < ndat; x -= dat[j].d, j++)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
if (option_trace >= 2 && final)
|
|
Packit |
5c3484 |
printf ("size=%ld x=%.4f\n", (long) dat[j].size, x);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (x < min_x)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
min_x = x;
|
|
Packit |
5c3484 |
min_j = j;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
return min_j;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Measuring for recompiled mpn/generic/div_qr_1.c,
|
|
Packit |
5c3484 |
* mpn/generic/divrem_1.c, mpn/generic/mod_1.c and mpz/fac_ui.c */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_limb_t mpn_div_qr_1_tune (mp_ptr, mp_limb_t *, mp_srcptr, mp_size_t, mp_limb_t);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if defined (__cplusplus)
|
|
Packit |
5c3484 |
extern "C" {
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_limb_t mpn_divrem_1_tune (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
|
|
Packit |
5c3484 |
mp_limb_t mpn_mod_1_tune (mp_srcptr, mp_size_t, mp_limb_t);
|
|
Packit |
5c3484 |
void mpz_fac_ui_tune (mpz_ptr, unsigned long);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if defined (__cplusplus)
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
double
|
|
Packit |
5c3484 |
speed_mpn_mod_1_tune (struct speed_params *s)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_tune);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
double
|
|
Packit |
5c3484 |
speed_mpn_divrem_1_tune (struct speed_params *s)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_tune);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
double
|
|
Packit |
5c3484 |
speed_mpz_fac_ui_tune (struct speed_params *s)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui_tune);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
double
|
|
Packit |
5c3484 |
speed_mpn_div_qr_1_tune (struct speed_params *s)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
SPEED_ROUTINE_MPN_DIV_QR_1 (mpn_div_qr_1_tune);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
double
|
|
Packit |
5c3484 |
tuneup_measure (speed_function_t fun,
|
|
Packit |
5c3484 |
const struct param_t *param,
|
|
Packit |
5c3484 |
struct speed_params *s)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t dummy;
|
|
Packit |
5c3484 |
double t;
|
|
Packit |
5c3484 |
TMP_DECL;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (! param)
|
|
Packit |
5c3484 |
param = &dummy;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
s->size += param->size_extra;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_MARK;
|
|
Packit |
5c3484 |
SPEED_TMP_ALLOC_LIMBS (s->xp, s->size, 0);
|
|
Packit |
5c3484 |
SPEED_TMP_ALLOC_LIMBS (s->yp, s->size, 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mpn_random (s->xp, s->size);
|
|
Packit |
5c3484 |
mpn_random (s->yp, s->size);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
switch (param->data_high) {
|
|
Packit |
5c3484 |
case DATA_HIGH_LT_R:
|
|
Packit |
5c3484 |
s->xp[s->size-1] %= s->r;
|
|
Packit |
5c3484 |
s->yp[s->size-1] %= s->r;
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
case DATA_HIGH_GE_R:
|
|
Packit |
5c3484 |
s->xp[s->size-1] |= s->r;
|
|
Packit |
5c3484 |
s->yp[s->size-1] |= s->r;
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
t = speed_measure (fun, s);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
s->size -= param->size_extra;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
return t;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define PRINT_WIDTH 31
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
print_define_start (const char *name)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
printf ("#define %-*s ", PRINT_WIDTH, name);
|
|
Packit |
5c3484 |
if (option_trace)
|
|
Packit |
5c3484 |
printf ("...\n");
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
print_define_end_remark (const char *name, mp_size_t value, const char *remark)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
if (option_trace)
|
|
Packit |
5c3484 |
printf ("#define %-*s ", PRINT_WIDTH, name);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (value == MP_SIZE_T_MAX)
|
|
Packit |
5c3484 |
printf ("MP_SIZE_T_MAX");
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
printf ("%5ld", (long) value);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (remark != NULL)
|
|
Packit |
5c3484 |
printf (" /* %s */", remark);
|
|
Packit |
5c3484 |
printf ("\n");
|
|
Packit |
5c3484 |
fflush (stdout);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
print_define_end (const char *name, mp_size_t value)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
const char *remark;
|
|
Packit |
5c3484 |
if (value == MP_SIZE_T_MAX)
|
|
Packit |
5c3484 |
remark = "never";
|
|
Packit |
5c3484 |
else if (value == 0)
|
|
Packit |
5c3484 |
remark = "always";
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
remark = NULL;
|
|
Packit |
5c3484 |
print_define_end_remark (name, value, remark);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
print_define (const char *name, mp_size_t value)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define_start (name);
|
|
Packit |
5c3484 |
print_define_end (name, value);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
print_define_remark (const char *name, mp_size_t value, const char *remark)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define_start (name);
|
|
Packit |
5c3484 |
print_define_end_remark (name, value, remark);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
one (mp_size_t *threshold, struct param_t *param)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
int since_positive, since_thresh_change;
|
|
Packit |
5c3484 |
int thresh_idx, new_thresh_idx;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define DEFAULT(x,n) do { if (! (x)) (x) = (n); } while (0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
DEFAULT (param->function_fudge, 1.0);
|
|
Packit |
5c3484 |
DEFAULT (param->function2, param->function);
|
|
Packit |
5c3484 |
DEFAULT (param->step_factor, 0.01); /* small steps by default */
|
|
Packit |
5c3484 |
DEFAULT (param->step, 1); /* small steps by default */
|
|
Packit |
5c3484 |
DEFAULT (param->stop_since_change, 80);
|
|
Packit |
5c3484 |
DEFAULT (param->stop_factor, 1.2);
|
|
Packit |
5c3484 |
DEFAULT (param->min_size, 10);
|
|
Packit |
5c3484 |
DEFAULT (param->max_size, DEFAULT_MAX_SIZE);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (param->check_size != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
double t1, t2;
|
|
Packit |
5c3484 |
s.size = param->check_size;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
*threshold = s.size+1;
|
|
Packit |
5c3484 |
t1 = tuneup_measure (param->function, param, &s);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
*threshold = s.size;
|
|
Packit |
5c3484 |
t2 = tuneup_measure (param->function2, param, &s);
|
|
Packit |
5c3484 |
if (t1 == -1.0 || t2 == -1.0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
printf ("Oops, can't run both functions at size %ld\n",
|
|
Packit |
5c3484 |
(long) s.size);
|
|
Packit |
5c3484 |
abort ();
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
t1 *= param->function_fudge;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* ask that t2 is at least 4% below t1 */
|
|
Packit |
5c3484 |
if (t1 < t2*1.04)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
if (option_trace)
|
|
Packit |
5c3484 |
printf ("function2 never enough faster: t1=%.9f t2=%.9f\n", t1, t2);
|
|
Packit |
5c3484 |
*threshold = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
if (! param->noprint)
|
|
Packit |
5c3484 |
print_define (param->name, *threshold);
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (option_trace >= 2)
|
|
Packit |
5c3484 |
printf ("function2 enough faster at size=%ld: t1=%.9f t2=%.9f\n",
|
|
Packit |
5c3484 |
(long) s.size, t1, t2);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (! param->noprint || option_trace)
|
|
Packit |
5c3484 |
print_define_start (param->name);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ndat = 0;
|
|
Packit |
5c3484 |
since_positive = 0;
|
|
Packit |
5c3484 |
since_thresh_change = 0;
|
|
Packit |
5c3484 |
thresh_idx = 0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (option_trace >= 2)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
printf (" algorithm-A algorithm-B ratio possible\n");
|
|
Packit |
5c3484 |
printf (" (seconds) (seconds) diff thresh\n");
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
for (s.size = param->min_size;
|
|
Packit |
5c3484 |
s.size < param->max_size;
|
|
Packit |
5c3484 |
s.size += MAX ((mp_size_t) floor (s.size * param->step_factor), param->step))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
double ti, tiplus1, d;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/*
|
|
Packit |
5c3484 |
FIXME: check minimum size requirements are met, possibly by just
|
|
Packit |
5c3484 |
checking for the -1 returns from the speed functions.
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* using method A at this size */
|
|
Packit |
5c3484 |
*threshold = s.size+1;
|
|
Packit |
5c3484 |
ti = tuneup_measure (param->function, param, &s);
|
|
Packit |
5c3484 |
if (ti == -1.0)
|
|
Packit |
5c3484 |
abort ();
|
|
Packit |
5c3484 |
ti *= param->function_fudge;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* using method B at this size */
|
|
Packit |
5c3484 |
*threshold = s.size;
|
|
Packit |
5c3484 |
tiplus1 = tuneup_measure (param->function2, param, &s);
|
|
Packit |
5c3484 |
if (tiplus1 == -1.0)
|
|
Packit |
5c3484 |
abort ();
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Calculate the fraction by which the one or the other routine is
|
|
Packit |
5c3484 |
slower. */
|
|
Packit |
5c3484 |
if (tiplus1 >= ti)
|
|
Packit |
5c3484 |
d = (tiplus1 - ti) / tiplus1; /* negative */
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
d = (tiplus1 - ti) / ti; /* positive */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
add_dat (s.size, d);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
new_thresh_idx = analyze_dat (0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (option_trace >= 2)
|
|
Packit |
5c3484 |
printf ("size=%ld %.9f %.9f % .4f %c %ld\n",
|
|
Packit |
5c3484 |
(long) s.size, ti, tiplus1, d,
|
|
Packit |
5c3484 |
ti > tiplus1 ? '#' : ' ',
|
|
Packit |
5c3484 |
(long) dat[new_thresh_idx].size);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Stop if the last time method i was faster was more than a
|
|
Packit |
5c3484 |
certain number of measurements ago. */
|
|
Packit |
5c3484 |
#define STOP_SINCE_POSITIVE 200
|
|
Packit |
5c3484 |
if (d >= 0)
|
|
Packit |
5c3484 |
since_positive = 0;
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
if (++since_positive > STOP_SINCE_POSITIVE)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
if (option_trace >= 1)
|
|
Packit |
5c3484 |
printf ("stopped due to since_positive (%d)\n",
|
|
Packit |
5c3484 |
STOP_SINCE_POSITIVE);
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Stop if method A has become slower by a certain factor. */
|
|
Packit |
5c3484 |
if (ti >= tiplus1 * param->stop_factor)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
if (option_trace >= 1)
|
|
Packit |
5c3484 |
printf ("stopped due to ti >= tiplus1 * factor (%.1f)\n",
|
|
Packit |
5c3484 |
param->stop_factor);
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Stop if the threshold implied hasn't changed in a certain
|
|
Packit |
5c3484 |
number of measurements. (It's this condition that usually
|
|
Packit |
5c3484 |
stops the loop.) */
|
|
Packit |
5c3484 |
if (thresh_idx != new_thresh_idx)
|
|
Packit |
5c3484 |
since_thresh_change = 0, thresh_idx = new_thresh_idx;
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
if (++since_thresh_change > param->stop_since_change)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
if (option_trace >= 1)
|
|
Packit |
5c3484 |
printf ("stopped due to since_thresh_change (%d)\n",
|
|
Packit |
5c3484 |
param->stop_since_change);
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Stop if the threshold implied is more than a certain number of
|
|
Packit |
5c3484 |
measurements ago. */
|
|
Packit |
5c3484 |
#define STOP_SINCE_AFTER 500
|
|
Packit |
5c3484 |
if (ndat - thresh_idx > STOP_SINCE_AFTER)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
if (option_trace >= 1)
|
|
Packit |
5c3484 |
printf ("stopped due to ndat - thresh_idx > amount (%d)\n",
|
|
Packit |
5c3484 |
STOP_SINCE_AFTER);
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Stop when the size limit is reached before the end of the
|
|
Packit |
5c3484 |
crossover, but only show this as an error for >= the default max
|
|
Packit |
5c3484 |
size. FIXME: Maybe should make it a param choice whether this is
|
|
Packit |
5c3484 |
an error. */
|
|
Packit |
5c3484 |
if (s.size >= param->max_size && param->max_size >= DEFAULT_MAX_SIZE)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
fprintf (stderr, "%s\n", param->name);
|
|
Packit |
5c3484 |
fprintf (stderr, "sizes %ld to %ld total %d measurements\n",
|
|
Packit |
5c3484 |
(long) dat[0].size, (long) dat[ndat-1].size, ndat);
|
|
Packit |
5c3484 |
fprintf (stderr, " max size reached before end of crossover\n");
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (option_trace >= 1)
|
|
Packit |
5c3484 |
printf ("sizes %ld to %ld total %d measurements\n",
|
|
Packit |
5c3484 |
(long) dat[0].size, (long) dat[ndat-1].size, ndat);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
*threshold = dat[analyze_dat (1)].size;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (param->min_is_always)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
if (*threshold == param->min_size)
|
|
Packit |
5c3484 |
*threshold = 0;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (! param->noprint || option_trace)
|
|
Packit |
5c3484 |
print_define_end (param->name, *threshold);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Special probing for the fft thresholds. The size restrictions on the
|
|
Packit |
5c3484 |
FFTs mean the graph of time vs size has a step effect. See this for
|
|
Packit |
5c3484 |
example using
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
./speed -s 4096-16384 -t 128 -P foo mpn_mul_fft.8 mpn_mul_fft.9
|
|
Packit |
5c3484 |
gnuplot foo.gnuplot
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The current approach is to compare routines at the midpoint of relevant
|
|
Packit |
5c3484 |
steps. Arguably a more sophisticated system of threshold data is wanted
|
|
Packit |
5c3484 |
if this step effect remains. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
struct fft_param_t {
|
|
Packit |
5c3484 |
const char *table_name;
|
|
Packit |
5c3484 |
const char *threshold_name;
|
|
Packit |
5c3484 |
const char *modf_threshold_name;
|
|
Packit |
5c3484 |
mp_size_t *p_threshold;
|
|
Packit |
5c3484 |
mp_size_t *p_modf_threshold;
|
|
Packit |
5c3484 |
mp_size_t first_size;
|
|
Packit |
5c3484 |
mp_size_t max_size;
|
|
Packit |
5c3484 |
speed_function_t function;
|
|
Packit |
5c3484 |
speed_function_t mul_modf_function;
|
|
Packit |
5c3484 |
speed_function_t mul_function;
|
|
Packit |
5c3484 |
mp_size_t sqr;
|
|
Packit |
5c3484 |
};
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* mpn_mul_fft requires pl a multiple of 2^k limbs, but with
|
|
Packit |
5c3484 |
N=pl*BIT_PER_MP_LIMB it internally also pads out so N/2^k is a multiple
|
|
Packit |
5c3484 |
of 2^(k-1) bits. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_size_t
|
|
Packit |
5c3484 |
fft_step_size (int k)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t step;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
step = MAX ((mp_size_t) 1 << (k-1), GMP_LIMB_BITS) / GMP_LIMB_BITS;
|
|
Packit |
5c3484 |
step *= (mp_size_t) 1 << k;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (step <= 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
printf ("Can't handle k=%d\n", k);
|
|
Packit |
5c3484 |
abort ();
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
return step;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_size_t
|
|
Packit |
5c3484 |
fft_next_size (mp_size_t pl, int k)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t m = fft_step_size (k);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* printf ("[k=%d %ld] %ld ->", k, m, pl); */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (pl == 0 || (pl & (m-1)) != 0)
|
|
Packit |
5c3484 |
pl = (pl | (m-1)) + 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* printf (" %ld\n", pl); */
|
|
Packit |
5c3484 |
return pl;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define NMAX_DEFAULT 1000000
|
|
Packit |
5c3484 |
#define MAX_REPS 25
|
|
Packit |
5c3484 |
#define MIN_REPS 5
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
static inline size_t
|
|
Packit |
5c3484 |
mpn_mul_fft_lcm (size_t a, unsigned int k)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
unsigned int l = k;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
while (a % 2 == 0 && k > 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
a >>= 1;
|
|
Packit |
5c3484 |
k--;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
return a << l;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_size_t
|
|
Packit |
5c3484 |
fftfill (mp_size_t pl, int k, int sqr)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t maxLK;
|
|
Packit |
5c3484 |
mp_bitcnt_t N, Nprime, nprime, M;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
N = pl * GMP_NUMB_BITS;
|
|
Packit |
5c3484 |
M = N >> k;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
maxLK = mpn_mul_fft_lcm ((unsigned long) GMP_NUMB_BITS, k);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK;
|
|
Packit |
5c3484 |
nprime = Nprime / GMP_NUMB_BITS;
|
|
Packit |
5c3484 |
if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
size_t K2;
|
|
Packit |
5c3484 |
for (;;)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
K2 = 1L << mpn_fft_best_k (nprime, sqr);
|
|
Packit |
5c3484 |
if ((nprime & (K2 - 1)) == 0)
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
nprime = (nprime + K2 - 1) & -K2;
|
|
Packit |
5c3484 |
Nprime = nprime * GMP_LIMB_BITS;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
ASSERT_ALWAYS (nprime < pl);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
return Nprime;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
static int
|
|
Packit |
5c3484 |
compare_double (const void *ap, const void *bp)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
double a = * (const double *) ap;
|
|
Packit |
5c3484 |
double b = * (const double *) bp;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (a < b)
|
|
Packit |
5c3484 |
return -1;
|
|
Packit |
5c3484 |
else if (a > b)
|
|
Packit |
5c3484 |
return 1;
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
return 0;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
double
|
|
Packit |
5c3484 |
median (double *times, int n)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
qsort (times, n, sizeof (double), compare_double);
|
|
Packit |
5c3484 |
return times[n/2];
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define FFT_CACHE_SIZE 25
|
|
Packit |
5c3484 |
typedef struct fft_cache
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t n;
|
|
Packit |
5c3484 |
double time;
|
|
Packit |
5c3484 |
} fft_cache_t;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
fft_cache_t fft_cache[FFT_CACHE_SIZE];
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
double
|
|
Packit |
5c3484 |
cached_measure (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n, int k,
|
|
Packit |
5c3484 |
int n_measurements)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
int i;
|
|
Packit |
5c3484 |
double t, ttab[MAX_REPS];
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (fft_cache[k].n == n)
|
|
Packit |
5c3484 |
return fft_cache[k].time;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
for (i = 0; i < n_measurements; i++)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
speed_starttime ();
|
|
Packit |
5c3484 |
mpn_mul_fft (rp, n, ap, n, bp, n, k);
|
|
Packit |
5c3484 |
ttab[i] = speed_endtime ();
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
t = median (ttab, n_measurements);
|
|
Packit |
5c3484 |
fft_cache[k].n = n;
|
|
Packit |
5c3484 |
fft_cache[k].time = t;
|
|
Packit |
5c3484 |
return t;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define INSERT_FFTTAB(idx, nval, kval) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
fft_tab[idx].n = nval; \
|
|
Packit |
5c3484 |
fft_tab[idx].k = kval; \
|
|
Packit |
5c3484 |
fft_tab[idx+1].n = (1 << 27) - 1; /* sentinel, 27b wide field */ \
|
|
Packit |
5c3484 |
fft_tab[idx+1].k = (1 << 5) - 1; \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
int
|
|
Packit |
5c3484 |
fftmes (mp_size_t nmin, mp_size_t nmax, int initial_k, struct fft_param_t *p, int idx, int print)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t n, n1, prev_n1;
|
|
Packit |
5c3484 |
int k, best_k, last_best_k, kmax;
|
|
Packit |
5c3484 |
int eff, prev_eff;
|
|
Packit |
5c3484 |
double t0, t1;
|
|
Packit |
5c3484 |
int n_measurements;
|
|
Packit |
5c3484 |
mp_limb_t *ap, *bp, *rp;
|
|
Packit |
5c3484 |
mp_size_t alloc;
|
|
Packit |
5c3484 |
struct fft_table_nk *fft_tab;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
fft_tab = mpn_fft_table3[p->sqr];
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
for (k = 0; k < FFT_CACHE_SIZE; k++)
|
|
Packit |
5c3484 |
fft_cache[k].n = 0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (nmin < (p->sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
nmin = (p->sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (print)
|
|
Packit |
5c3484 |
printf ("#define %s%*s", p->table_name, 38, "");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (idx == 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
INSERT_FFTTAB (0, nmin, initial_k);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (print)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
printf ("\\\n { ");
|
|
Packit |
5c3484 |
printf ("{%7u,%2u}", fft_tab[0].n, fft_tab[0].k);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
idx = 1;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ap = (mp_ptr) malloc (sizeof (mp_limb_t));
|
|
Packit |
5c3484 |
if (p->sqr)
|
|
Packit |
5c3484 |
bp = ap;
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
bp = (mp_ptr) malloc (sizeof (mp_limb_t));
|
|
Packit |
5c3484 |
rp = (mp_ptr) malloc (sizeof (mp_limb_t));
|
|
Packit |
5c3484 |
alloc = 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Round n to comply to initial k value */
|
|
Packit |
5c3484 |
n = (nmin + ((1ul << initial_k) - 1)) & (MP_SIZE_T_MAX << initial_k);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
n_measurements = (18 - initial_k) | 1;
|
|
Packit |
5c3484 |
n_measurements = MAX (n_measurements, MIN_REPS);
|
|
Packit |
5c3484 |
n_measurements = MIN (n_measurements, MAX_REPS);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
last_best_k = initial_k;
|
|
Packit |
5c3484 |
best_k = initial_k;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
while (n < nmax)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
int start_k, end_k;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Assume the current best k is best until we hit its next FFT step. */
|
|
Packit |
5c3484 |
t0 = 99999;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
prev_n1 = n + 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
start_k = MAX (4, best_k - 4);
|
|
Packit |
5c3484 |
end_k = MIN (24, best_k + 4);
|
|
Packit |
5c3484 |
for (k = start_k; k <= end_k; k++)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
n1 = mpn_fft_next_size (prev_n1, k);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
eff = 200 * (n1 * GMP_NUMB_BITS >> k) / fftfill (n1, k, p->sqr);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (eff < 70) /* avoid measuring too slow fft:s */
|
|
Packit |
5c3484 |
continue;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (n1 > alloc)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
alloc = n1;
|
|
Packit |
5c3484 |
if (p->sqr)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
ap = (mp_ptr) realloc (ap, sizeof (mp_limb_t));
|
|
Packit |
5c3484 |
rp = (mp_ptr) realloc (rp, sizeof (mp_limb_t));
|
|
Packit |
5c3484 |
ap = bp = (mp_ptr) realloc (ap, alloc * sizeof (mp_limb_t));
|
|
Packit |
5c3484 |
mpn_random (ap, alloc);
|
|
Packit |
5c3484 |
rp = (mp_ptr) realloc (rp, alloc * sizeof (mp_limb_t));
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
ap = (mp_ptr) realloc (ap, sizeof (mp_limb_t));
|
|
Packit |
5c3484 |
bp = (mp_ptr) realloc (bp, sizeof (mp_limb_t));
|
|
Packit |
5c3484 |
rp = (mp_ptr) realloc (rp, sizeof (mp_limb_t));
|
|
Packit |
5c3484 |
ap = (mp_ptr) realloc (ap, alloc * sizeof (mp_limb_t));
|
|
Packit |
5c3484 |
mpn_random (ap, alloc);
|
|
Packit |
5c3484 |
bp = (mp_ptr) realloc (bp, alloc * sizeof (mp_limb_t));
|
|
Packit |
5c3484 |
mpn_random (bp, alloc);
|
|
Packit |
5c3484 |
rp = (mp_ptr) realloc (rp, alloc * sizeof (mp_limb_t));
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
t1 = cached_measure (rp, ap, bp, n1, k, n_measurements);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (t1 * n_measurements > 0.3)
|
|
Packit |
5c3484 |
n_measurements -= 2;
|
|
Packit |
5c3484 |
n_measurements = MAX (n_measurements, MIN_REPS);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (t1 < t0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
best_k = k;
|
|
Packit |
5c3484 |
t0 = t1;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
n1 = mpn_fft_next_size (prev_n1, best_k);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (last_best_k != best_k)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
ASSERT_ALWAYS ((prev_n1 & ((1ul << last_best_k) - 1)) == 1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (idx >= FFT_TABLE3_SIZE)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n");
|
|
Packit |
5c3484 |
abort ();
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
INSERT_FFTTAB (idx, prev_n1 >> last_best_k, best_k);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (print)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
printf (", ");
|
|
Packit |
5c3484 |
if (idx % 4 == 0)
|
|
Packit |
5c3484 |
printf ("\\\n ");
|
|
Packit |
5c3484 |
printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (option_trace >= 2)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
printf ("{%lu,%u}\n", prev_n1, best_k);
|
|
Packit |
5c3484 |
fflush (stdout);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
last_best_k = best_k;
|
|
Packit |
5c3484 |
idx++;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
for (;;)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
prev_n1 = n1;
|
|
Packit |
5c3484 |
prev_eff = fftfill (prev_n1, best_k, p->sqr);
|
|
Packit |
5c3484 |
n1 = mpn_fft_next_size (prev_n1 + 1, best_k);
|
|
Packit |
5c3484 |
eff = fftfill (n1, best_k, p->sqr);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (eff != prev_eff)
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
n = prev_n1;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
kmax = sizeof (mp_size_t) * 4; /* GMP_MP_SIZE_T_BITS / 2 */
|
|
Packit |
5c3484 |
kmax = MIN (kmax, 25-1);
|
|
Packit |
5c3484 |
for (k = last_best_k + 1; k <= kmax; k++)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
if (idx >= FFT_TABLE3_SIZE)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n");
|
|
Packit |
5c3484 |
abort ();
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
INSERT_FFTTAB (idx, ((1ul << (2*k-2)) + 1) >> (k-1), k);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (print)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
printf (", ");
|
|
Packit |
5c3484 |
if (idx % 4 == 0)
|
|
Packit |
5c3484 |
printf ("\\\n ");
|
|
Packit |
5c3484 |
printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
idx++;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (print)
|
|
Packit |
5c3484 |
printf (" }\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
free (ap);
|
|
Packit |
5c3484 |
if (! p->sqr)
|
|
Packit |
5c3484 |
free (bp);
|
|
Packit |
5c3484 |
free (rp);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
return idx;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
fft (struct fft_param_t *p)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t size;
|
|
Packit |
5c3484 |
int k, idx, initial_k;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/*** Generate MUL_FFT_MODF_THRESHOLD / SQR_FFT_MODF_THRESHOLD ***/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if 1
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* Use plain one() mechanism, for some reasonable initial values of k. The
|
|
Packit |
5c3484 |
advantage is that we don't depend on mpn_fft_table3, which can therefore
|
|
Packit |
5c3484 |
leave it completely uninitialized. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
mp_size_t thres, best_thres;
|
|
Packit |
5c3484 |
int best_k;
|
|
Packit |
5c3484 |
char buf[20];
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
best_thres = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
best_k = -1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
for (k = 5; k <= 7; k++)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
param.name = p->modf_threshold_name;
|
|
Packit |
5c3484 |
param.min_size = 100;
|
|
Packit |
5c3484 |
param.max_size = 2000;
|
|
Packit |
5c3484 |
param.function = p->mul_function;
|
|
Packit |
5c3484 |
param.step_factor = 0.0;
|
|
Packit |
5c3484 |
param.step = 4;
|
|
Packit |
5c3484 |
param.function2 = p->mul_modf_function;
|
|
Packit |
5c3484 |
param.noprint = 1;
|
|
Packit |
5c3484 |
s.r = k;
|
|
Packit |
5c3484 |
one (&thres, ¶m;;
|
|
Packit |
5c3484 |
if (thres < best_thres)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
best_thres = thres;
|
|
Packit |
5c3484 |
best_k = k;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
*(p->p_modf_threshold) = best_thres;
|
|
Packit |
5c3484 |
sprintf (buf, "k = %d", best_k);
|
|
Packit |
5c3484 |
print_define_remark (p->modf_threshold_name, best_thres, buf);
|
|
Packit |
5c3484 |
initial_k = best_k;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
size = p->first_size;
|
|
Packit |
5c3484 |
for (;;)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
double tk, tm;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
size = mpn_fft_next_size (size+1, mpn_fft_best_k (size+1, p->sqr));
|
|
Packit |
5c3484 |
k = mpn_fft_best_k (size, p->sqr);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (size >= p->max_size)
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
s.size = size + fft_step_size (k) / 2;
|
|
Packit |
5c3484 |
s.r = k;
|
|
Packit |
5c3484 |
tk = tuneup_measure (p->mul_modf_function, NULL, &s);
|
|
Packit |
5c3484 |
if (tk == -1.0)
|
|
Packit |
5c3484 |
abort ();
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tm = tuneup_measure (p->mul_function, NULL, &s);
|
|
Packit |
5c3484 |
if (tm == -1.0)
|
|
Packit |
5c3484 |
abort ();
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (option_trace >= 2)
|
|
Packit |
5c3484 |
printf ("at %ld size=%ld k=%d %.9f size=%ld modf %.9f\n",
|
|
Packit |
5c3484 |
(long) size,
|
|
Packit |
5c3484 |
(long) size + fft_step_size (k) / 2, k, tk,
|
|
Packit |
5c3484 |
(long) s.size, tm);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (tk < tm)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
*p->p_modf_threshold = s.size;
|
|
Packit |
5c3484 |
print_define (p->modf_threshold_name, *p->p_modf_threshold);
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
initial_k = ?;
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/*** Generate MUL_FFT_TABLE3 / SQR_FFT_TABLE3 ***/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
idx = fftmes (*p->p_modf_threshold, p->max_size, initial_k, p, 0, 1);
|
|
Packit |
5c3484 |
printf ("#define %s_SIZE %d\n", p->table_name, idx);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/*** Generate MUL_FFT_THRESHOLD / SQR_FFT_THRESHOLD ***/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
size = 2 * *p->p_modf_threshold; /* OK? */
|
|
Packit |
5c3484 |
for (;;)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
double tk, tm;
|
|
Packit |
5c3484 |
mp_size_t mulmod_size, mul_size;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (size >= p->max_size)
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mulmod_size = mpn_mulmod_bnm1_next_size (2 * (size + 1)) / 2;
|
|
Packit |
5c3484 |
mul_size = (size + mulmod_size) / 2; /* middle of step */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
s.size = mulmod_size;
|
|
Packit |
5c3484 |
tk = tuneup_measure (p->function, NULL, &s);
|
|
Packit |
5c3484 |
if (tk == -1.0)
|
|
Packit |
5c3484 |
abort ();
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
s.size = mul_size;
|
|
Packit |
5c3484 |
tm = tuneup_measure (p->mul_function, NULL, &s);
|
|
Packit |
5c3484 |
if (tm == -1.0)
|
|
Packit |
5c3484 |
abort ();
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (option_trace >= 2)
|
|
Packit |
5c3484 |
printf ("at %ld size=%ld %.9f size=%ld mul %.9f\n",
|
|
Packit |
5c3484 |
(long) size,
|
|
Packit |
5c3484 |
(long) mulmod_size, tk,
|
|
Packit |
5c3484 |
(long) mul_size, tm);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
size = mulmod_size;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (tk < tm)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
*p->p_threshold = s.size;
|
|
Packit |
5c3484 |
print_define (p->threshold_name, *p->p_threshold);
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Start karatsuba from 4, since the Cray t90 ieee code is much faster at 2,
|
|
Packit |
5c3484 |
giving wrong results. */
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_mul_n (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
mp_size_t next_toom_start;
|
|
Packit |
5c3484 |
int something_changed;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpn_mul_n;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.name = "MUL_TOOM22_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = MAX (4, MPN_TOOM22_MUL_MINSIZE);
|
|
Packit |
5c3484 |
param.max_size = MUL_TOOM22_THRESHOLD_LIMIT-1;
|
|
Packit |
5c3484 |
one (&mul_toom22_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.noprint = 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Threshold sequence loop. Disable functions that would be used in a very
|
|
Packit |
5c3484 |
narrow range, re-measuring things when that happens. */
|
|
Packit |
5c3484 |
something_changed = 1;
|
|
Packit |
5c3484 |
while (something_changed)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
something_changed = 0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
next_toom_start = mul_toom22_threshold;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (mul_toom33_threshold != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
param.name = "MUL_TOOM33_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = MAX (next_toom_start, MPN_TOOM33_MUL_MINSIZE);
|
|
Packit |
5c3484 |
param.max_size = MUL_TOOM33_THRESHOLD_LIMIT-1;
|
|
Packit |
5c3484 |
one (&mul_toom33_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (next_toom_start * 1.05 >= mul_toom33_threshold)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mul_toom33_threshold = 0;
|
|
Packit |
5c3484 |
something_changed = 1;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
next_toom_start = MAX (next_toom_start, mul_toom33_threshold);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (mul_toom44_threshold != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
param.name = "MUL_TOOM44_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = MAX (next_toom_start, MPN_TOOM44_MUL_MINSIZE);
|
|
Packit |
5c3484 |
param.max_size = MUL_TOOM44_THRESHOLD_LIMIT-1;
|
|
Packit |
5c3484 |
one (&mul_toom44_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (next_toom_start * 1.05 >= mul_toom44_threshold)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mul_toom44_threshold = 0;
|
|
Packit |
5c3484 |
something_changed = 1;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
next_toom_start = MAX (next_toom_start, mul_toom44_threshold);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (mul_toom6h_threshold != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
param.name = "MUL_TOOM6H_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = MAX (next_toom_start, MPN_TOOM6H_MUL_MINSIZE);
|
|
Packit |
5c3484 |
param.max_size = MUL_TOOM6H_THRESHOLD_LIMIT-1;
|
|
Packit |
5c3484 |
one (&mul_toom6h_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (next_toom_start * 1.05 >= mul_toom6h_threshold)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mul_toom6h_threshold = 0;
|
|
Packit |
5c3484 |
something_changed = 1;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
next_toom_start = MAX (next_toom_start, mul_toom6h_threshold);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (mul_toom8h_threshold != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
param.name = "MUL_TOOM8H_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = MAX (next_toom_start, MPN_TOOM8H_MUL_MINSIZE);
|
|
Packit |
5c3484 |
param.max_size = MUL_TOOM8H_THRESHOLD_LIMIT-1;
|
|
Packit |
5c3484 |
one (&mul_toom8h_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (next_toom_start * 1.05 >= mul_toom8h_threshold)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mul_toom8h_threshold = 0;
|
|
Packit |
5c3484 |
something_changed = 1;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
print_define ("MUL_TOOM33_THRESHOLD", MUL_TOOM33_THRESHOLD);
|
|
Packit |
5c3484 |
print_define ("MUL_TOOM44_THRESHOLD", MUL_TOOM44_THRESHOLD);
|
|
Packit |
5c3484 |
print_define ("MUL_TOOM6H_THRESHOLD", MUL_TOOM6H_THRESHOLD);
|
|
Packit |
5c3484 |
print_define ("MUL_TOOM8H_THRESHOLD", MUL_TOOM8H_THRESHOLD);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* disabled until tuned */
|
|
Packit |
5c3484 |
MUL_FFT_THRESHOLD = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_mul (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
mp_size_t thres;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.noprint = 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpn_toom32_for_toom43_mul;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_toom43_for_toom32_mul;
|
|
Packit |
5c3484 |
param.name = "MUL_TOOM32_TO_TOOM43_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = MPN_TOOM43_MUL_MINSIZE * 24 / 17;
|
|
Packit |
5c3484 |
one (&thres, ¶m;;
|
|
Packit |
5c3484 |
mul_toom32_to_toom43_threshold = thres * 17 / 24;
|
|
Packit |
5c3484 |
print_define ("MUL_TOOM32_TO_TOOM43_THRESHOLD", mul_toom32_to_toom43_threshold);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpn_toom32_for_toom53_mul;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_toom53_for_toom32_mul;
|
|
Packit |
5c3484 |
param.name = "MUL_TOOM32_TO_TOOM53_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = MPN_TOOM53_MUL_MINSIZE * 30 / 19;
|
|
Packit |
5c3484 |
one (&thres, ¶m;;
|
|
Packit |
5c3484 |
mul_toom32_to_toom53_threshold = thres * 19 / 30;
|
|
Packit |
5c3484 |
print_define ("MUL_TOOM32_TO_TOOM53_THRESHOLD", mul_toom32_to_toom53_threshold);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpn_toom42_for_toom53_mul;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_toom53_for_toom42_mul;
|
|
Packit |
5c3484 |
param.name = "MUL_TOOM42_TO_TOOM53_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = MPN_TOOM53_MUL_MINSIZE * 20 / 11;
|
|
Packit |
5c3484 |
one (&thres, ¶m;;
|
|
Packit |
5c3484 |
mul_toom42_to_toom53_threshold = thres * 11 / 20;
|
|
Packit |
5c3484 |
print_define ("MUL_TOOM42_TO_TOOM53_THRESHOLD", mul_toom42_to_toom53_threshold);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpn_toom42_mul;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_toom63_mul;
|
|
Packit |
5c3484 |
param.name = "MUL_TOOM42_TO_TOOM63_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = MPN_TOOM63_MUL_MINSIZE * 2;
|
|
Packit |
5c3484 |
one (&thres, ¶m;;
|
|
Packit |
5c3484 |
mul_toom42_to_toom63_threshold = thres / 2;
|
|
Packit |
5c3484 |
print_define ("MUL_TOOM42_TO_TOOM63_THRESHOLD", mul_toom42_to_toom63_threshold);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Use ratio 5/6 when measuring, the middle of the range 2/3 to 1. */
|
|
Packit |
5c3484 |
param.function = speed_mpn_toom43_for_toom54_mul;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_toom54_for_toom43_mul;
|
|
Packit |
5c3484 |
param.name = "MUL_TOOM43_TO_TOOM54_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = MPN_TOOM54_MUL_MINSIZE * 6 / 5;
|
|
Packit |
5c3484 |
one (&thres, ¶m;;
|
|
Packit |
5c3484 |
mul_toom43_to_toom54_threshold = thres * 5 / 6;
|
|
Packit |
5c3484 |
print_define ("MUL_TOOM43_TO_TOOM54_THRESHOLD", mul_toom43_to_toom54_threshold);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_mullo (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpn_mullo_n;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.name = "MULLO_BASECASE_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = 1;
|
|
Packit |
5c3484 |
param.min_is_always = 1;
|
|
Packit |
5c3484 |
param.max_size = MULLO_BASECASE_THRESHOLD_LIMIT-1;
|
|
Packit |
5c3484 |
param.stop_factor = 1.5;
|
|
Packit |
5c3484 |
param.noprint = 1;
|
|
Packit |
5c3484 |
one (&mullo_basecase_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.name = "MULLO_DC_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = 8;
|
|
Packit |
5c3484 |
param.min_is_always = 0;
|
|
Packit |
5c3484 |
param.max_size = 1000;
|
|
Packit |
5c3484 |
one (&mullo_dc_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (mullo_basecase_threshold >= mullo_dc_threshold)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define ("MULLO_BASECASE_THRESHOLD", mullo_dc_threshold);
|
|
Packit |
5c3484 |
print_define_remark ("MULLO_DC_THRESHOLD", 0, "never mpn_mullo_basecase");
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define ("MULLO_BASECASE_THRESHOLD", mullo_basecase_threshold);
|
|
Packit |
5c3484 |
print_define ("MULLO_DC_THRESHOLD", mullo_dc_threshold);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (WANT_FFT && mul_fft_threshold < MP_SIZE_T_MAX / 2)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
param.name = "MULLO_MUL_N_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = mullo_dc_threshold;
|
|
Packit |
5c3484 |
param.max_size = 2 * mul_fft_threshold;
|
|
Packit |
5c3484 |
param.noprint = 0;
|
|
Packit |
5c3484 |
param.step_factor = 0.03;
|
|
Packit |
5c3484 |
one (&mullo_mul_n_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
print_define_remark ("MULLO_MUL_N_THRESHOLD", MP_SIZE_T_MAX,
|
|
Packit |
5c3484 |
"without FFT use mullo forever");
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_sqrlo (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpn_sqrlo;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.name = "SQRLO_BASECASE_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = 1;
|
|
Packit |
5c3484 |
param.min_is_always = 1;
|
|
Packit |
5c3484 |
param.max_size = SQRLO_BASECASE_THRESHOLD_LIMIT-1;
|
|
Packit |
5c3484 |
param.stop_factor = 1.5;
|
|
Packit |
5c3484 |
param.noprint = 1;
|
|
Packit |
5c3484 |
one (&sqrlo_basecase_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.name = "SQRLO_DC_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = 8;
|
|
Packit |
5c3484 |
param.min_is_always = 0;
|
|
Packit |
5c3484 |
param.max_size = SQRLO_DC_THRESHOLD_LIMIT-1;
|
|
Packit |
5c3484 |
one (&sqrlo_dc_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (sqrlo_basecase_threshold >= sqrlo_dc_threshold)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define ("SQRLO_BASECASE_THRESHOLD", sqrlo_dc_threshold);
|
|
Packit |
5c3484 |
print_define_remark ("SQRLO_DC_THRESHOLD", 0, "never mpn_sqrlo_basecase");
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define ("SQRLO_BASECASE_THRESHOLD", sqrlo_basecase_threshold);
|
|
Packit |
5c3484 |
print_define ("SQRLO_DC_THRESHOLD", sqrlo_dc_threshold);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (WANT_FFT && sqr_fft_threshold < MP_SIZE_T_MAX / 2)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
param.name = "SQRLO_SQR_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = sqrlo_dc_threshold;
|
|
Packit |
5c3484 |
param.max_size = 2 * sqr_fft_threshold;
|
|
Packit |
5c3484 |
param.noprint = 0;
|
|
Packit |
5c3484 |
param.step_factor = 0.03;
|
|
Packit |
5c3484 |
one (&sqrlo_sqr_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
print_define_remark ("SQRLO_SQR_THRESHOLD", MP_SIZE_T_MAX,
|
|
Packit |
5c3484 |
"without FFT use sqrlo forever");
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_mulmid (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.name = "MULMID_TOOM42_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_mulmid_n;
|
|
Packit |
5c3484 |
param.min_size = 4;
|
|
Packit |
5c3484 |
param.max_size = 100;
|
|
Packit |
5c3484 |
one (&mulmid_toom42_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_mulmod_bnm1 (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.name = "MULMOD_BNM1_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_mulmod_bnm1;
|
|
Packit |
5c3484 |
param.min_size = 4;
|
|
Packit |
5c3484 |
param.max_size = 100;
|
|
Packit |
5c3484 |
one (&mulmod_bnm1_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_sqrmod_bnm1 (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.name = "SQRMOD_BNM1_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_sqrmod_bnm1;
|
|
Packit |
5c3484 |
param.min_size = 4;
|
|
Packit |
5c3484 |
param.max_size = 100;
|
|
Packit |
5c3484 |
one (&sqrmod_bnm1_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Start the basecase from 3, since 1 is a special case, and if mul_basecase
|
|
Packit |
5c3484 |
is faster only at size==2 then we don't want to bother with extra code
|
|
Packit |
5c3484 |
just for that. Start karatsuba from 4 same as MUL above. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_sqr (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* disabled until tuned */
|
|
Packit |
5c3484 |
SQR_FFT_THRESHOLD = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (HAVE_NATIVE_mpn_sqr_basecase)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define_remark ("SQR_BASECASE_THRESHOLD", 0, "always (native)");
|
|
Packit |
5c3484 |
sqr_basecase_threshold = 0;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "SQR_BASECASE_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_sqr;
|
|
Packit |
5c3484 |
param.min_size = 3;
|
|
Packit |
5c3484 |
param.min_is_always = 1;
|
|
Packit |
5c3484 |
param.max_size = TUNE_SQR_TOOM2_MAX;
|
|
Packit |
5c3484 |
param.noprint = 1;
|
|
Packit |
5c3484 |
one (&sqr_basecase_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "SQR_TOOM2_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_sqr;
|
|
Packit |
5c3484 |
param.min_size = MAX (4, MPN_TOOM2_SQR_MINSIZE);
|
|
Packit |
5c3484 |
param.max_size = TUNE_SQR_TOOM2_MAX;
|
|
Packit |
5c3484 |
param.noprint = 1;
|
|
Packit |
5c3484 |
one (&sqr_toom2_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (! HAVE_NATIVE_mpn_sqr_basecase
|
|
Packit |
5c3484 |
&& sqr_toom2_threshold < sqr_basecase_threshold)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* Karatsuba becomes faster than mul_basecase before
|
|
Packit |
5c3484 |
sqr_basecase does. Arrange for the expression
|
|
Packit |
5c3484 |
"BELOW_THRESHOLD (un, SQR_TOOM2_THRESHOLD))" which
|
|
Packit |
5c3484 |
selects mpn_sqr_basecase in mpn_sqr to be false, by setting
|
|
Packit |
5c3484 |
SQR_TOOM2_THRESHOLD to zero, making
|
|
Packit |
5c3484 |
SQR_BASECASE_THRESHOLD the toom2 threshold. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
sqr_basecase_threshold = SQR_TOOM2_THRESHOLD;
|
|
Packit |
5c3484 |
SQR_TOOM2_THRESHOLD = 0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
print_define_remark ("SQR_BASECASE_THRESHOLD", sqr_basecase_threshold,
|
|
Packit |
5c3484 |
"toom2");
|
|
Packit |
5c3484 |
print_define_remark ("SQR_TOOM2_THRESHOLD",SQR_TOOM2_THRESHOLD,
|
|
Packit |
5c3484 |
"never sqr_basecase");
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
if (! HAVE_NATIVE_mpn_sqr_basecase)
|
|
Packit |
5c3484 |
print_define ("SQR_BASECASE_THRESHOLD", sqr_basecase_threshold);
|
|
Packit |
5c3484 |
print_define ("SQR_TOOM2_THRESHOLD", SQR_TOOM2_THRESHOLD);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
mp_size_t next_toom_start;
|
|
Packit |
5c3484 |
int something_changed;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpn_sqr;
|
|
Packit |
5c3484 |
param.noprint = 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Threshold sequence loop. Disable functions that would be used in a very
|
|
Packit |
5c3484 |
narrow range, re-measuring things when that happens. */
|
|
Packit |
5c3484 |
something_changed = 1;
|
|
Packit |
5c3484 |
while (something_changed)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
something_changed = 0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
next_toom_start = MAX (sqr_toom2_threshold, sqr_basecase_threshold);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
sqr_toom3_threshold = SQR_TOOM3_THRESHOLD_LIMIT;
|
|
Packit |
5c3484 |
param.name = "SQR_TOOM3_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = MAX (next_toom_start, MPN_TOOM3_SQR_MINSIZE);
|
|
Packit |
5c3484 |
param.max_size = SQR_TOOM3_THRESHOLD_LIMIT-1;
|
|
Packit |
5c3484 |
one (&sqr_toom3_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
next_toom_start = MAX (next_toom_start, sqr_toom3_threshold);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (sqr_toom4_threshold != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
param.name = "SQR_TOOM4_THRESHOLD";
|
|
Packit |
5c3484 |
sqr_toom4_threshold = SQR_TOOM4_THRESHOLD_LIMIT;
|
|
Packit |
5c3484 |
param.min_size = MAX (next_toom_start, MPN_TOOM4_SQR_MINSIZE);
|
|
Packit |
5c3484 |
param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1;
|
|
Packit |
5c3484 |
one (&sqr_toom4_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (next_toom_start * 1.05 >= sqr_toom4_threshold)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
sqr_toom4_threshold = 0;
|
|
Packit |
5c3484 |
something_changed = 1;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
next_toom_start = MAX (next_toom_start, sqr_toom4_threshold);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (sqr_toom6_threshold != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
param.name = "SQR_TOOM6_THRESHOLD";
|
|
Packit |
5c3484 |
sqr_toom6_threshold = SQR_TOOM6_THRESHOLD_LIMIT;
|
|
Packit |
5c3484 |
param.min_size = MAX (next_toom_start, MPN_TOOM6_SQR_MINSIZE);
|
|
Packit |
5c3484 |
param.max_size = SQR_TOOM6_THRESHOLD_LIMIT-1;
|
|
Packit |
5c3484 |
one (&sqr_toom6_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (next_toom_start * 1.05 >= sqr_toom6_threshold)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
sqr_toom6_threshold = 0;
|
|
Packit |
5c3484 |
something_changed = 1;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
next_toom_start = MAX (next_toom_start, sqr_toom6_threshold);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (sqr_toom8_threshold != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
param.name = "SQR_TOOM8_THRESHOLD";
|
|
Packit |
5c3484 |
sqr_toom8_threshold = SQR_TOOM8_THRESHOLD_LIMIT;
|
|
Packit |
5c3484 |
param.min_size = MAX (next_toom_start, MPN_TOOM8_SQR_MINSIZE);
|
|
Packit |
5c3484 |
param.max_size = SQR_TOOM8_THRESHOLD_LIMIT-1;
|
|
Packit |
5c3484 |
one (&sqr_toom8_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (next_toom_start * 1.05 >= sqr_toom8_threshold)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
sqr_toom8_threshold = 0;
|
|
Packit |
5c3484 |
something_changed = 1;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
print_define ("SQR_TOOM3_THRESHOLD", SQR_TOOM3_THRESHOLD);
|
|
Packit |
5c3484 |
print_define ("SQR_TOOM4_THRESHOLD", SQR_TOOM4_THRESHOLD);
|
|
Packit |
5c3484 |
print_define ("SQR_TOOM6_THRESHOLD", SQR_TOOM6_THRESHOLD);
|
|
Packit |
5c3484 |
print_define ("SQR_TOOM8_THRESHOLD", SQR_TOOM8_THRESHOLD);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_dc_div (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
s.r = 0; /* clear to make speed function do 2n/n */
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "DC_DIV_QR_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_sbpi1_div_qr;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_dcpi1_div_qr;
|
|
Packit |
5c3484 |
param.min_size = 6;
|
|
Packit |
5c3484 |
one (&dc_div_qr_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "DC_DIVAPPR_Q_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_sbpi1_divappr_q;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_dcpi1_divappr_q;
|
|
Packit |
5c3484 |
param.min_size = 6;
|
|
Packit |
5c3484 |
one (&dc_divappr_q_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
static double
|
|
Packit |
5c3484 |
speed_mpn_sbordcpi1_div_qr (struct speed_params *s)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
if (s->size < DC_DIV_QR_THRESHOLD)
|
|
Packit |
5c3484 |
return speed_mpn_sbpi1_div_qr (s);
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
return speed_mpn_dcpi1_div_qr (s);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_mu_div (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
s.r = 0; /* clear to make speed function do 2n/n */
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "MU_DIV_QR_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_dcpi1_div_qr;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_mu_div_qr;
|
|
Packit |
5c3484 |
param.min_size = mul_toom22_threshold;
|
|
Packit |
5c3484 |
param.max_size = 5000;
|
|
Packit |
5c3484 |
param.step_factor = 0.02;
|
|
Packit |
5c3484 |
one (&mu_div_qr_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "MU_DIVAPPR_Q_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_dcpi1_divappr_q;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_mu_divappr_q;
|
|
Packit |
5c3484 |
param.min_size = mul_toom22_threshold;
|
|
Packit |
5c3484 |
param.max_size = 5000;
|
|
Packit |
5c3484 |
param.step_factor = 0.02;
|
|
Packit |
5c3484 |
one (&mu_divappr_q_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "MUPI_DIV_QR_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_sbordcpi1_div_qr;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_mupi_div_qr;
|
|
Packit |
5c3484 |
param.min_size = 6;
|
|
Packit |
5c3484 |
param.min_is_always = 1;
|
|
Packit |
5c3484 |
param.max_size = 1000;
|
|
Packit |
5c3484 |
param.step_factor = 0.02;
|
|
Packit |
5c3484 |
one (&mupi_div_qr_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_dc_bdiv (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
s.r = 0; /* clear to make speed function do 2n/n*/
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "DC_BDIV_QR_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_sbpi1_bdiv_qr;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_dcpi1_bdiv_qr;
|
|
Packit |
5c3484 |
param.min_size = 4;
|
|
Packit |
5c3484 |
one (&dc_bdiv_qr_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "DC_BDIV_Q_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_sbpi1_bdiv_q;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_dcpi1_bdiv_q;
|
|
Packit |
5c3484 |
param.min_size = 4;
|
|
Packit |
5c3484 |
one (&dc_bdiv_q_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_mu_bdiv (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
s.r = 0; /* clear to make speed function do 2n/n*/
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "MU_BDIV_QR_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_dcpi1_bdiv_qr;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_mu_bdiv_qr;
|
|
Packit |
5c3484 |
param.min_size = dc_bdiv_qr_threshold;
|
|
Packit |
5c3484 |
param.max_size = 5000;
|
|
Packit |
5c3484 |
param.step_factor = 0.02;
|
|
Packit |
5c3484 |
one (&mu_bdiv_qr_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "MU_BDIV_Q_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_dcpi1_bdiv_q;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_mu_bdiv_q;
|
|
Packit |
5c3484 |
param.min_size = dc_bdiv_q_threshold;
|
|
Packit |
5c3484 |
param.max_size = 5000;
|
|
Packit |
5c3484 |
param.step_factor = 0.02;
|
|
Packit |
5c3484 |
one (&mu_bdiv_q_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_invertappr (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpn_ni_invertappr;
|
|
Packit |
5c3484 |
param.name = "INV_MULMOD_BNM1_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = 5;
|
|
Packit |
5c3484 |
one (&inv_mulmod_bnm1_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpn_invertappr;
|
|
Packit |
5c3484 |
param.name = "INV_NEWTON_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = 5;
|
|
Packit |
5c3484 |
one (&inv_newton_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_invert (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpn_invert;
|
|
Packit |
5c3484 |
param.name = "INV_APPR_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = 5;
|
|
Packit |
5c3484 |
one (&inv_appr_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_binvert (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpn_binvert;
|
|
Packit |
5c3484 |
param.name = "BINV_NEWTON_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = 8; /* pointless with smaller operands */
|
|
Packit |
5c3484 |
one (&binv_newton_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_redc (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
#define TUNE_REDC_2_MAX 100
|
|
Packit |
5c3484 |
#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
|
|
Packit |
5c3484 |
#define WANT_REDC_2 1
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if WANT_REDC_2
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "REDC_1_TO_REDC_2_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_redc_1;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_redc_2;
|
|
Packit |
5c3484 |
param.min_size = 1;
|
|
Packit |
5c3484 |
param.min_is_always = 1;
|
|
Packit |
5c3484 |
param.max_size = TUNE_REDC_2_MAX;
|
|
Packit |
5c3484 |
param.noprint = 1;
|
|
Packit |
5c3484 |
param.stop_factor = 1.5;
|
|
Packit |
5c3484 |
one (&redc_1_to_redc_2_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "REDC_2_TO_REDC_N_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_redc_2;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_redc_n;
|
|
Packit |
5c3484 |
param.min_size = 16;
|
|
Packit |
5c3484 |
param.noprint = 1;
|
|
Packit |
5c3484 |
one (&redc_2_to_redc_n_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
if (redc_1_to_redc_2_threshold >= redc_2_to_redc_n_threshold)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
redc_2_to_redc_n_threshold = 0; /* disable redc_2 */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Never use redc2, measure redc_1 -> redc_n cutoff, store result as
|
|
Packit |
5c3484 |
REDC_1_TO_REDC_2_THRESHOLD. */
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "REDC_1_TO_REDC_2_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_redc_1;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_redc_n;
|
|
Packit |
5c3484 |
param.min_size = 16;
|
|
Packit |
5c3484 |
param.noprint = 1;
|
|
Packit |
5c3484 |
one (&redc_1_to_redc_2_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
print_define ("REDC_1_TO_REDC_2_THRESHOLD", REDC_1_TO_REDC_2_THRESHOLD);
|
|
Packit |
5c3484 |
print_define ("REDC_2_TO_REDC_N_THRESHOLD", REDC_2_TO_REDC_N_THRESHOLD);
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "REDC_1_TO_REDC_N_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_redc_1;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_redc_n;
|
|
Packit |
5c3484 |
param.min_size = 16;
|
|
Packit |
5c3484 |
one (&redc_1_to_redc_n_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_matrix22_mul (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "MATRIX22_STRASSEN_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_matrix22_mul;
|
|
Packit |
5c3484 |
param.min_size = 2;
|
|
Packit |
5c3484 |
one (&matrix22_strassen_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_hgcd (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "HGCD_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_hgcd;
|
|
Packit |
5c3484 |
/* We seem to get strange results for small sizes */
|
|
Packit |
5c3484 |
param.min_size = 30;
|
|
Packit |
5c3484 |
one (&hgcd_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_hgcd_appr (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "HGCD_APPR_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_hgcd_appr;
|
|
Packit |
5c3484 |
/* We seem to get strange results for small sizes */
|
|
Packit |
5c3484 |
param.min_size = 50;
|
|
Packit |
5c3484 |
param.stop_since_change = 150;
|
|
Packit |
5c3484 |
one (&hgcd_appr_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_hgcd_reduce (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "HGCD_REDUCE_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_hgcd_reduce;
|
|
Packit |
5c3484 |
param.min_size = 30;
|
|
Packit |
5c3484 |
param.max_size = 7000;
|
|
Packit |
5c3484 |
param.step_factor = 0.04;
|
|
Packit |
5c3484 |
one (&hgcd_reduce_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_gcd_dc (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "GCD_DC_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_gcd;
|
|
Packit |
5c3484 |
param.min_size = hgcd_threshold;
|
|
Packit |
5c3484 |
param.max_size = 3000;
|
|
Packit |
5c3484 |
param.step_factor = 0.02;
|
|
Packit |
5c3484 |
one (&gcd_dc_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_gcdext_dc (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "GCDEXT_DC_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_gcdext;
|
|
Packit |
5c3484 |
param.min_size = hgcd_threshold;
|
|
Packit |
5c3484 |
param.max_size = 3000;
|
|
Packit |
5c3484 |
param.step_factor = 0.02;
|
|
Packit |
5c3484 |
one (&gcdext_dc_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* In tune_powm_sec we compute the table used by the win_size function. The
|
|
Packit |
5c3484 |
cutoff points are in exponent bits, disregarding other operand sizes. It is
|
|
Packit |
5c3484 |
not possible to use the one framework since it currently uses a granularity
|
|
Packit |
5c3484 |
of full limbs.
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* This win_size replaces the variant in the powm code, allowing us to
|
|
Packit |
5c3484 |
control k in the k-ary algorithms. */
|
|
Packit |
5c3484 |
int winsize;
|
|
Packit |
5c3484 |
int
|
|
Packit |
5c3484 |
win_size (mp_bitcnt_t eb)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
return winsize;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_powm_sec (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t n;
|
|
Packit |
5c3484 |
int k, i;
|
|
Packit |
5c3484 |
mp_size_t itch;
|
|
Packit |
5c3484 |
mp_bitcnt_t nbits, nbits_next, possible_nbits_cutoff;
|
|
Packit |
5c3484 |
const int n_max = 3000 / GMP_NUMB_BITS;
|
|
Packit |
5c3484 |
const int n_measurements = 5;
|
|
Packit |
5c3484 |
mp_ptr rp, bp, ep, mp, tp;
|
|
Packit |
5c3484 |
double ttab[n_measurements], tk, tkp1;
|
|
Packit |
5c3484 |
TMP_DECL;
|
|
Packit |
5c3484 |
TMP_MARK;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
possible_nbits_cutoff = 0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
k = 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
winsize = 10; /* the itch function needs this */
|
|
Packit |
5c3484 |
itch = mpn_sec_powm_itch (n_max, n_max * GMP_NUMB_BITS, n_max);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
rp = TMP_ALLOC_LIMBS (n_max);
|
|
Packit |
5c3484 |
bp = TMP_ALLOC_LIMBS (n_max);
|
|
Packit |
5c3484 |
ep = TMP_ALLOC_LIMBS (n_max);
|
|
Packit |
5c3484 |
mp = TMP_ALLOC_LIMBS (n_max);
|
|
Packit |
5c3484 |
tp = TMP_ALLOC_LIMBS (itch);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mpn_random (bp, n_max);
|
|
Packit |
5c3484 |
mpn_random (mp, n_max);
|
|
Packit |
5c3484 |
mp[0] |= 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* How about taking the M operand size into account?
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
An operation R=powm(B,E,N) will take time O(log(E)*M(log(N))) (assuming
|
|
Packit |
5c3484 |
B = O(M)).
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Using k-ary and no sliding window, the precomputation will need time
|
|
Packit |
5c3484 |
O(2^(k-1)*M(log(N))) and the main computation will need O(log(E)*S(N)) +
|
|
Packit |
5c3484 |
O(log(E)/k*M(N)), for the squarings, multiplications, respectively.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
An operation R=powm_sec(B,E,N) will take time like powm.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Using k-ary, the precomputation will need time O(2^k*M(log(N))) and the
|
|
Packit |
5c3484 |
main computation will need O(log(E)*S(N)) + O(log(E)/k*M(N)) +
|
|
Packit |
5c3484 |
O(log(E)/k*2^k*log(N)), for the squarings, multiplications, and full
|
|
Packit |
5c3484 |
table reads, respectively. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
printf ("#define POWM_SEC_TABLE ");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* For nbits == 1, we should always use k == 1, so no need to tune
|
|
Packit |
5c3484 |
that. Starting with nbits == 2 also ensure that nbits always is
|
|
Packit |
5c3484 |
larger than the windowsize k+1. */
|
|
Packit |
5c3484 |
for (nbits = 2; nbits <= n_max * GMP_NUMB_BITS; )
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
n = (nbits - 1) / GMP_NUMB_BITS + 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Generate E such that sliding-window for k and k+1 works equally
|
|
Packit |
5c3484 |
well/poorly (but sliding is not used in powm_sec, of course). */
|
|
Packit |
5c3484 |
for (i = 0; i < n; i++)
|
|
Packit |
5c3484 |
ep[i] = ~CNST_LIMB(0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
winsize = k;
|
|
Packit |
5c3484 |
for (i = 0; i < n_measurements; i++)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
speed_starttime ();
|
|
Packit |
5c3484 |
mpn_sec_powm (rp, bp, n, ep, nbits, mp, n, tp);
|
|
Packit |
5c3484 |
ttab[i] = speed_endtime ();
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
tk = median (ttab, n_measurements);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
winsize = k + 1;
|
|
Packit |
5c3484 |
speed_starttime ();
|
|
Packit |
5c3484 |
for (i = 0; i < n_measurements; i++)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
speed_starttime ();
|
|
Packit |
5c3484 |
mpn_sec_powm (rp, bp, n, ep, nbits, mp, n, tp);
|
|
Packit |
5c3484 |
ttab[i] = speed_endtime ();
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
tkp1 = median (ttab, n_measurements);
|
|
Packit |
5c3484 |
/*
|
|
Packit |
5c3484 |
printf ("testing: %ld, %d", nbits, k, ep[n-1]);
|
|
Packit |
5c3484 |
printf (" %10.5f %10.5f\n", tk, tkp1);
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
if (tkp1 < tk)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
if (possible_nbits_cutoff)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* Two consecutive sizes indicate k increase, obey. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Must always have x[k] >= k */
|
|
Packit |
5c3484 |
ASSERT_ALWAYS (possible_nbits_cutoff >= k);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (k > 1)
|
|
Packit |
5c3484 |
printf (",");
|
|
Packit |
5c3484 |
printf ("%ld", (long) possible_nbits_cutoff);
|
|
Packit |
5c3484 |
k++;
|
|
Packit |
5c3484 |
possible_nbits_cutoff = 0;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* One measurement indicate k increase, save nbits for further
|
|
Packit |
5c3484 |
consideration. */
|
|
Packit |
5c3484 |
/* The new larger k gets used for sizes > the cutoff
|
|
Packit |
5c3484 |
value, hence the cutoff should be one less than the
|
|
Packit |
5c3484 |
smallest size where it gives a speedup. */
|
|
Packit |
5c3484 |
possible_nbits_cutoff = nbits - 1;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
possible_nbits_cutoff = 0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
nbits_next = nbits * 65 / 64;
|
|
Packit |
5c3484 |
nbits = nbits_next + (nbits_next == nbits);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
printf ("\n");
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* size_extra==1 reflects the fact that with high
|
|
Packit |
5c3484 |
always skipped. Forcing high
|
|
Packit |
5c3484 |
while stepping through sizes, ie. that size-1 divides will be done each
|
|
Packit |
5c3484 |
time.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
min_size==2 and min_is_always are used so that if plain division is only
|
|
Packit |
5c3484 |
better at size==1 then don't bother including that code just for that
|
|
Packit |
5c3484 |
case, instead go with preinv always and get a size saving. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define DIV_1_PARAMS \
|
|
Packit |
5c3484 |
param.check_size = 256; \
|
|
Packit |
5c3484 |
param.min_size = 2; \
|
|
Packit |
5c3484 |
param.min_is_always = 1; \
|
|
Packit |
5c3484 |
param.data_high = DATA_HIGH_LT_R; \
|
|
Packit |
5c3484 |
param.size_extra = 1; \
|
|
Packit |
5c3484 |
param.stop_factor = 2.0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
double (*tuned_speed_mpn_divrem_1) (struct speed_params *);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_divrem_1 (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* plain version by default */
|
|
Packit |
5c3484 |
tuned_speed_mpn_divrem_1 = speed_mpn_divrem_1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* No support for tuning native assembler code, do that by hand and put
|
|
Packit |
5c3484 |
the results in the .asm file, there's no need for such thresholds to
|
|
Packit |
5c3484 |
appear in gmp-mparam.h. */
|
|
Packit |
5c3484 |
if (HAVE_NATIVE_mpn_divrem_1)
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (GMP_NAIL_BITS != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define_remark ("DIVREM_1_NORM_THRESHOLD", MP_SIZE_T_MAX,
|
|
Packit |
5c3484 |
"no preinv with nails");
|
|
Packit |
5c3484 |
print_define_remark ("DIVREM_1_UNNORM_THRESHOLD", MP_SIZE_T_MAX,
|
|
Packit |
5c3484 |
"no preinv with nails");
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (UDIV_PREINV_ALWAYS)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define_remark ("DIVREM_1_NORM_THRESHOLD", 0L, "preinv always");
|
|
Packit |
5c3484 |
print_define ("DIVREM_1_UNNORM_THRESHOLD", 0L);
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tuned_speed_mpn_divrem_1 = speed_mpn_divrem_1_tune;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Tune for the integer part of mpn_divrem_1. This will very possibly be
|
|
Packit |
5c3484 |
a bit out for the fractional part, but that's too bad, the integer part
|
|
Packit |
5c3484 |
is more important. */
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "DIVREM_1_NORM_THRESHOLD";
|
|
Packit |
5c3484 |
DIV_1_PARAMS;
|
|
Packit |
5c3484 |
s.r = randlimb_norm ();
|
|
Packit |
5c3484 |
param.function = speed_mpn_divrem_1_tune;
|
|
Packit |
5c3484 |
one (&divrem_1_norm_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "DIVREM_1_UNNORM_THRESHOLD";
|
|
Packit |
5c3484 |
DIV_1_PARAMS;
|
|
Packit |
5c3484 |
s.r = randlimb_half ();
|
|
Packit |
5c3484 |
param.function = speed_mpn_divrem_1_tune;
|
|
Packit |
5c3484 |
one (&divrem_1_unnorm_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_div_qr_1 (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
double t1, t2;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (!HAVE_NATIVE_mpn_div_qr_1n_pi1)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
double t1, t2;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
s.size = 10;
|
|
Packit |
5c3484 |
s.r = randlimb_norm ();
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
t1 = tuneup_measure (speed_mpn_div_qr_1n_pi1_1, ¶m, &s);
|
|
Packit |
5c3484 |
t2 = tuneup_measure (speed_mpn_div_qr_1n_pi1_2, ¶m, &s);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (t1 == -1.0 || t2 == -1.0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
printf ("Oops, can't measure all mpn_div_qr_1n_pi1 methods at %ld\n",
|
|
Packit |
5c3484 |
(long) s.size);
|
|
Packit |
5c3484 |
abort ();
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
div_qr_1n_pi1_method = (t1 < t2) ? 1 : 2;
|
|
Packit |
5c3484 |
print_define ("DIV_QR_1N_PI1_METHOD", div_qr_1n_pi1_method);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "DIV_QR_1_NORM_THRESHOLD";
|
|
Packit |
5c3484 |
DIV_1_PARAMS;
|
|
Packit |
5c3484 |
param.min_size = 1;
|
|
Packit |
5c3484 |
param.min_is_always = 0;
|
|
Packit |
5c3484 |
s.r = randlimb_norm ();
|
|
Packit |
5c3484 |
param.function = speed_mpn_div_qr_1_tune;
|
|
Packit |
5c3484 |
one (&div_qr_1_norm_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "DIV_QR_1_UNNORM_THRESHOLD";
|
|
Packit |
5c3484 |
DIV_1_PARAMS;
|
|
Packit |
5c3484 |
param.min_size = 1;
|
|
Packit |
5c3484 |
param.min_is_always = 0;
|
|
Packit |
5c3484 |
s.r = randlimb_half();
|
|
Packit |
5c3484 |
param.function = speed_mpn_div_qr_1_tune;
|
|
Packit |
5c3484 |
one (&div_qr_1_unnorm_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_mod_1 (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* No support for tuning native assembler code, do that by hand and put
|
|
Packit |
5c3484 |
the results in the .asm file, there's no need for such thresholds to
|
|
Packit |
5c3484 |
appear in gmp-mparam.h. */
|
|
Packit |
5c3484 |
if (HAVE_NATIVE_mpn_mod_1)
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (GMP_NAIL_BITS != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define_remark ("MOD_1_NORM_THRESHOLD", MP_SIZE_T_MAX,
|
|
Packit |
5c3484 |
"no preinv with nails");
|
|
Packit |
5c3484 |
print_define_remark ("MOD_1_UNNORM_THRESHOLD", MP_SIZE_T_MAX,
|
|
Packit |
5c3484 |
"no preinv with nails");
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (!HAVE_NATIVE_mpn_mod_1_1p)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
double t1, t2;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
s.size = 10;
|
|
Packit |
5c3484 |
s.r = randlimb_half ();
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
t1 = tuneup_measure (speed_mpn_mod_1_1_1, ¶m, &s);
|
|
Packit |
5c3484 |
t2 = tuneup_measure (speed_mpn_mod_1_1_2, ¶m, &s);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (t1 == -1.0 || t2 == -1.0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
printf ("Oops, can't measure all mpn_mod_1_1 methods at %ld\n",
|
|
Packit |
5c3484 |
(long) s.size);
|
|
Packit |
5c3484 |
abort ();
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
mod_1_1p_method = (t1 < t2) ? 1 : 2;
|
|
Packit |
5c3484 |
print_define ("MOD_1_1P_METHOD", mod_1_1p_method);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (UDIV_PREINV_ALWAYS)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define ("MOD_1_NORM_THRESHOLD", 0L);
|
|
Packit |
5c3484 |
print_define ("MOD_1_UNNORM_THRESHOLD", 0L);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "MOD_1_NORM_THRESHOLD";
|
|
Packit |
5c3484 |
DIV_1_PARAMS;
|
|
Packit |
5c3484 |
s.r = randlimb_norm ();
|
|
Packit |
5c3484 |
param.function = speed_mpn_mod_1_tune;
|
|
Packit |
5c3484 |
one (&mod_1_norm_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "MOD_1_UNNORM_THRESHOLD";
|
|
Packit |
5c3484 |
DIV_1_PARAMS;
|
|
Packit |
5c3484 |
s.r = randlimb_half ();
|
|
Packit |
5c3484 |
param.function = speed_mpn_mod_1_tune;
|
|
Packit |
5c3484 |
one (&mod_1_unnorm_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.check_size = 256;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
s.r = randlimb_norm ();
|
|
Packit |
5c3484 |
param.function = speed_mpn_mod_1_tune;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.name = "MOD_1N_TO_MOD_1_1_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = 2;
|
|
Packit |
5c3484 |
one (&mod_1n_to_mod_1_1_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.check_size = 256;
|
|
Packit |
5c3484 |
s.r = randlimb_half ();
|
|
Packit |
5c3484 |
param.noprint = 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpn_mod_1_1;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_mod_1_2;
|
|
Packit |
5c3484 |
param.min_is_always = 1;
|
|
Packit |
5c3484 |
param.name = "MOD_1_1_TO_MOD_1_2_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = 2;
|
|
Packit |
5c3484 |
one (&mod_1_1_to_mod_1_2_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpn_mod_1_2;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_mod_1_4;
|
|
Packit |
5c3484 |
param.min_is_always = 1;
|
|
Packit |
5c3484 |
param.name = "MOD_1_2_TO_MOD_1_4_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = 1;
|
|
Packit |
5c3484 |
one (&mod_1_2_to_mod_1_4_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (mod_1_1_to_mod_1_2_threshold >= mod_1_2_to_mod_1_4_threshold)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* Never use mod_1_2, measure mod_1_1 -> mod_1_4 */
|
|
Packit |
5c3484 |
mod_1_2_to_mod_1_4_threshold = 0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpn_mod_1_1;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_mod_1_4;
|
|
Packit |
5c3484 |
param.min_is_always = 1;
|
|
Packit |
5c3484 |
param.name = "MOD_1_1_TO_MOD_1_4_THRESHOLD fake";
|
|
Packit |
5c3484 |
param.min_size = 2;
|
|
Packit |
5c3484 |
one (&mod_1_1_to_mod_1_2_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpn_mod_1_tune;
|
|
Packit |
5c3484 |
param.function2 = NULL;
|
|
Packit |
5c3484 |
param.name = "MOD_1U_TO_MOD_1_1_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = 2;
|
|
Packit |
5c3484 |
param.min_is_always = 0;
|
|
Packit |
5c3484 |
one (&mod_1u_to_mod_1_1_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (mod_1u_to_mod_1_1_threshold >= mod_1_1_to_mod_1_2_threshold)
|
|
Packit |
5c3484 |
mod_1_1_to_mod_1_2_threshold = 0;
|
|
Packit |
5c3484 |
if (mod_1u_to_mod_1_1_threshold >= mod_1_2_to_mod_1_4_threshold)
|
|
Packit |
5c3484 |
mod_1_2_to_mod_1_4_threshold = 0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
print_define_remark ("MOD_1U_TO_MOD_1_1_THRESHOLD", mod_1u_to_mod_1_1_threshold, NULL);
|
|
Packit |
5c3484 |
print_define_remark ("MOD_1_1_TO_MOD_1_2_THRESHOLD", mod_1_1_to_mod_1_2_threshold,
|
|
Packit |
5c3484 |
mod_1_1_to_mod_1_2_threshold == 0 ? "never mpn_mod_1_1p" : NULL);
|
|
Packit |
5c3484 |
print_define_remark ("MOD_1_2_TO_MOD_1_4_THRESHOLD", mod_1_2_to_mod_1_4_threshold,
|
|
Packit |
5c3484 |
mod_1_2_to_mod_1_4_threshold == 0 ? "never mpn_mod_1s_2p" : NULL);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.check_size = 256;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.name = "PREINV_MOD_1_TO_MOD_1_THRESHOLD";
|
|
Packit |
5c3484 |
s.r = randlimb_norm ();
|
|
Packit |
5c3484 |
param.function = speed_mpn_preinv_mod_1;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_mod_1_tune;
|
|
Packit |
5c3484 |
param.min_size = 1;
|
|
Packit |
5c3484 |
one (&preinv_mod_1_to_mod_1_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* A non-zero DIVREM_1_UNNORM_THRESHOLD (or DIVREM_1_NORM_THRESHOLD) would
|
|
Packit |
5c3484 |
imply that udiv_qrnnd_preinv is worth using, but it seems most
|
|
Packit |
5c3484 |
straightforward to compare mpn_preinv_divrem_1 and mpn_divrem_1_div
|
|
Packit |
5c3484 |
directly. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_preinv_divrem_1 (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
speed_function_t divrem_1;
|
|
Packit |
5c3484 |
const char *divrem_1_name;
|
|
Packit |
5c3484 |
double t1, t2;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (GMP_NAIL_BITS != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define_remark ("USE_PREINV_DIVREM_1", 0, "no preinv with nails");
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Any native version of mpn_preinv_divrem_1 is assumed to exist because
|
|
Packit |
5c3484 |
it's faster than mpn_divrem_1. */
|
|
Packit |
5c3484 |
if (HAVE_NATIVE_mpn_preinv_divrem_1)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define_remark ("USE_PREINV_DIVREM_1", 1, "native");
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* If udiv_qrnnd_preinv is the only division method then of course
|
|
Packit |
5c3484 |
mpn_preinv_divrem_1 should be used. */
|
|
Packit |
5c3484 |
if (UDIV_PREINV_ALWAYS)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define_remark ("USE_PREINV_DIVREM_1", 1, "preinv always");
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* If we've got an assembler version of mpn_divrem_1, then compare against
|
|
Packit |
5c3484 |
that, not the mpn_divrem_1_div generic C. */
|
|
Packit |
5c3484 |
if (HAVE_NATIVE_mpn_divrem_1)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
divrem_1 = speed_mpn_divrem_1;
|
|
Packit |
5c3484 |
divrem_1_name = "mpn_divrem_1";
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
divrem_1 = speed_mpn_divrem_1_div;
|
|
Packit |
5c3484 |
divrem_1_name = "mpn_divrem_1_div";
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.data_high = DATA_HIGH_LT_R; /* allow skip one division */
|
|
Packit |
5c3484 |
s.size = 200; /* generous but not too big */
|
|
Packit |
5c3484 |
/* Divisor, nonzero. Unnormalized so as to exercise the shift!=0 case,
|
|
Packit |
5c3484 |
since in general that's probably most common, though in fact for a
|
|
Packit |
5c3484 |
64-bit limb mp_bases[10].big_base is normalized. */
|
|
Packit |
5c3484 |
s.r = urandom() & (GMP_NUMB_MASK >> 4);
|
|
Packit |
5c3484 |
if (s.r == 0) s.r = 123;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
t1 = tuneup_measure (speed_mpn_preinv_divrem_1, ¶m, &s);
|
|
Packit |
5c3484 |
t2 = tuneup_measure (divrem_1, ¶m, &s);
|
|
Packit |
5c3484 |
if (t1 == -1.0 || t2 == -1.0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
printf ("Oops, can't measure mpn_preinv_divrem_1 and %s at %ld\n",
|
|
Packit |
5c3484 |
divrem_1_name, (long) s.size);
|
|
Packit |
5c3484 |
abort ();
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
if (option_trace >= 1)
|
|
Packit |
5c3484 |
printf ("size=%ld, mpn_preinv_divrem_1 %.9f, %s %.9f\n",
|
|
Packit |
5c3484 |
(long) s.size, t1, divrem_1_name, t2);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
print_define_remark ("USE_PREINV_DIVREM_1", (mp_size_t) (t1 < t2), NULL);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_divrem_2 (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* No support for tuning native assembler code, do that by hand and put
|
|
Packit |
5c3484 |
the results in the .asm file, and there's no need for such thresholds
|
|
Packit |
5c3484 |
to appear in gmp-mparam.h. */
|
|
Packit |
5c3484 |
if (HAVE_NATIVE_mpn_divrem_2)
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (GMP_NAIL_BITS != 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define_remark ("DIVREM_2_THRESHOLD", MP_SIZE_T_MAX,
|
|
Packit |
5c3484 |
"no preinv with nails");
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (UDIV_PREINV_ALWAYS)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define_remark ("DIVREM_2_THRESHOLD", 0L, "preinv always");
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Tune for the integer part of mpn_divrem_2. This will very possibly be
|
|
Packit |
5c3484 |
a bit out for the fractional part, but that's too bad, the integer part
|
|
Packit |
5c3484 |
is more important.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
min_size must be >=2 since nsize>=2 is required, but is set to 4 to save
|
|
Packit |
5c3484 |
code space if plain division is better only at size==2 or size==3. */
|
|
Packit |
5c3484 |
param.name = "DIVREM_2_THRESHOLD";
|
|
Packit |
5c3484 |
param.check_size = 256;
|
|
Packit |
5c3484 |
param.min_size = 4;
|
|
Packit |
5c3484 |
param.min_is_always = 1;
|
|
Packit |
5c3484 |
param.size_extra = 2; /* does qsize==nsize-2 divisions */
|
|
Packit |
5c3484 |
param.stop_factor = 2.0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
s.r = randlimb_norm ();
|
|
Packit |
5c3484 |
param.function = speed_mpn_divrem_2;
|
|
Packit |
5c3484 |
one (&divrem_2_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_div_qr_2 (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "DIV_QR_2_PI2_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_div_qr_2n;
|
|
Packit |
5c3484 |
param.check_size = 500;
|
|
Packit |
5c3484 |
param.min_size = 4;
|
|
Packit |
5c3484 |
one (&div_qr_2_pi2_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* mpn_divexact_1 is vaguely expected to be used on smallish divisors, so
|
|
Packit |
5c3484 |
tune for that. Its speed can differ on odd or even divisor, so take an
|
|
Packit |
5c3484 |
average threshold for the two.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mpn_divrem_1 can vary with high
|
|
Packit |
5c3484 |
might not vary that way, but don't test this since high
|
|
Packit |
5c3484 |
expected to occur often with small divisors. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_divexact_1 (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
mp_size_t thresh[2], average;
|
|
Packit |
5c3484 |
int low, i;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Any native mpn_divexact_1 is assumed to incorporate all the speed of a
|
|
Packit |
5c3484 |
full mpn_divrem_1. */
|
|
Packit |
5c3484 |
if (HAVE_NATIVE_mpn_divexact_1)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define_remark ("DIVEXACT_1_THRESHOLD", 0, "always (native)");
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT_ALWAYS (tuned_speed_mpn_divrem_1 != NULL);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.name = "DIVEXACT_1_THRESHOLD";
|
|
Packit |
5c3484 |
param.data_high = DATA_HIGH_GE_R;
|
|
Packit |
5c3484 |
param.check_size = 256;
|
|
Packit |
5c3484 |
param.min_size = 2;
|
|
Packit |
5c3484 |
param.stop_factor = 1.5;
|
|
Packit |
5c3484 |
param.function = tuned_speed_mpn_divrem_1;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_divexact_1;
|
|
Packit |
5c3484 |
param.noprint = 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
print_define_start (param.name);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
for (low = 0; low <= 1; low++)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
s.r = randlimb_half();
|
|
Packit |
5c3484 |
if (low == 0)
|
|
Packit |
5c3484 |
s.r |= 1;
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
s.r &= ~CNST_LIMB(7);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
one (&thresh[low], ¶m;;
|
|
Packit |
5c3484 |
if (option_trace)
|
|
Packit |
5c3484 |
printf ("low=%d thresh %ld\n", low, (long) thresh[low]);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (thresh[low] == MP_SIZE_T_MAX)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
average = MP_SIZE_T_MAX;
|
|
Packit |
5c3484 |
goto divexact_1_done;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (option_trace)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
printf ("average of:");
|
|
Packit |
5c3484 |
for (i = 0; i < numberof(thresh); i++)
|
|
Packit |
5c3484 |
printf (" %ld", (long) thresh[i]);
|
|
Packit |
5c3484 |
printf ("\n");
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
average = 0;
|
|
Packit |
5c3484 |
for (i = 0; i < numberof(thresh); i++)
|
|
Packit |
5c3484 |
average += thresh[i];
|
|
Packit |
5c3484 |
average /= numberof(thresh);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* If divexact turns out to be better as early as 3 limbs, then use it
|
|
Packit |
5c3484 |
always, so as to reduce code size and conditional jumps. */
|
|
Packit |
5c3484 |
if (average <= 3)
|
|
Packit |
5c3484 |
average = 0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
divexact_1_done:
|
|
Packit |
5c3484 |
print_define_end (param.name, average);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* The generic mpn_modexact_1_odd skips a divide step if high
|
|
Packit |
5c3484 |
same as mpn_mod_1, but this might not be true of an assembler
|
|
Packit |
5c3484 |
implementation. The threshold used is an average based on data where a
|
|
Packit |
5c3484 |
divide can be skipped and where it can't.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
If modexact turns out to be better as early as 3 limbs, then use it
|
|
Packit |
5c3484 |
always, so as to reduce code size and conditional jumps. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_modexact_1_odd (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
mp_size_t thresh_lt, thresh_ge, average;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if 0
|
|
Packit |
5c3484 |
/* Any native mpn_modexact_1_odd is assumed to incorporate all the speed
|
|
Packit |
5c3484 |
of a full mpn_mod_1. */
|
|
Packit |
5c3484 |
if (HAVE_NATIVE_mpn_modexact_1_odd)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
print_define_remark ("BMOD_1_TO_MOD_1_THRESHOLD", MP_SIZE_T_MAX, "always bmod_1");
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.name = "BMOD_1_TO_MOD_1_THRESHOLD";
|
|
Packit |
5c3484 |
param.check_size = 256;
|
|
Packit |
5c3484 |
param.min_size = 2;
|
|
Packit |
5c3484 |
param.stop_factor = 1.5;
|
|
Packit |
5c3484 |
param.function = speed_mpn_modexact_1c_odd;
|
|
Packit |
5c3484 |
param.function2 = speed_mpn_mod_1_tune;
|
|
Packit |
5c3484 |
param.noprint = 1;
|
|
Packit |
5c3484 |
s.r = randlimb_half () | 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
print_define_start (param.name);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.data_high = DATA_HIGH_LT_R;
|
|
Packit |
5c3484 |
one (&thresh_lt, ¶m;;
|
|
Packit |
5c3484 |
if (option_trace)
|
|
Packit |
5c3484 |
printf ("lt thresh %ld\n", (long) thresh_lt);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
average = thresh_lt;
|
|
Packit |
5c3484 |
if (thresh_lt != MP_SIZE_T_MAX)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
param.data_high = DATA_HIGH_GE_R;
|
|
Packit |
5c3484 |
one (&thresh_ge, ¶m;;
|
|
Packit |
5c3484 |
if (option_trace)
|
|
Packit |
5c3484 |
printf ("ge thresh %ld\n", (long) thresh_ge);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (thresh_ge != MP_SIZE_T_MAX)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
average = (thresh_ge + thresh_lt) / 2;
|
|
Packit |
5c3484 |
if (thresh_ge <= 3)
|
|
Packit |
5c3484 |
average = 0;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
print_define_end (param.name, average);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_jacobi_base (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
double t1, t2, t3, t4;
|
|
Packit |
5c3484 |
int method;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
s.size = GMP_LIMB_BITS * 3 / 4;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
t1 = tuneup_measure (speed_mpn_jacobi_base_1, ¶m, &s);
|
|
Packit |
5c3484 |
if (option_trace >= 1)
|
|
Packit |
5c3484 |
printf ("size=%ld, mpn_jacobi_base_1 %.9f\n", (long) s.size, t1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
t2 = tuneup_measure (speed_mpn_jacobi_base_2, ¶m, &s);
|
|
Packit |
5c3484 |
if (option_trace >= 1)
|
|
Packit |
5c3484 |
printf ("size=%ld, mpn_jacobi_base_2 %.9f\n", (long) s.size, t2);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
t3 = tuneup_measure (speed_mpn_jacobi_base_3, ¶m, &s);
|
|
Packit |
5c3484 |
if (option_trace >= 1)
|
|
Packit |
5c3484 |
printf ("size=%ld, mpn_jacobi_base_3 %.9f\n", (long) s.size, t3);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
t4 = tuneup_measure (speed_mpn_jacobi_base_4, ¶m, &s);
|
|
Packit |
5c3484 |
if (option_trace >= 1)
|
|
Packit |
5c3484 |
printf ("size=%ld, mpn_jacobi_base_4 %.9f\n", (long) s.size, t4);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (t1 == -1.0 || t2 == -1.0 || t3 == -1.0 || t4 == -1.0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
printf ("Oops, can't measure all mpn_jacobi_base methods at %ld\n",
|
|
Packit |
5c3484 |
(long) s.size);
|
|
Packit |
5c3484 |
abort ();
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (t1 < t2 && t1 < t3 && t1 < t4)
|
|
Packit |
5c3484 |
method = 1;
|
|
Packit |
5c3484 |
else if (t2 < t3 && t2 < t4)
|
|
Packit |
5c3484 |
method = 2;
|
|
Packit |
5c3484 |
else if (t3 < t4)
|
|
Packit |
5c3484 |
method = 3;
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
method = 4;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
print_define ("JACOBI_BASE_METHOD", method);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_get_str (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* Tune for decimal, it being most common. Some rough testing suggests
|
|
Packit |
5c3484 |
other bases are different, but not by very much. */
|
|
Packit |
5c3484 |
s.r = 10;
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
GET_STR_PRECOMPUTE_THRESHOLD = 0;
|
|
Packit |
5c3484 |
param.name = "GET_STR_DC_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_get_str;
|
|
Packit |
5c3484 |
param.min_size = 4;
|
|
Packit |
5c3484 |
param.max_size = GET_STR_THRESHOLD_LIMIT;
|
|
Packit |
5c3484 |
one (&get_str_dc_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.name = "GET_STR_PRECOMPUTE_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_get_str;
|
|
Packit |
5c3484 |
param.min_size = GET_STR_DC_THRESHOLD;
|
|
Packit |
5c3484 |
param.max_size = GET_STR_THRESHOLD_LIMIT;
|
|
Packit |
5c3484 |
one (&get_str_precompute_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
double
|
|
Packit |
5c3484 |
speed_mpn_pre_set_str (struct speed_params *s)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
unsigned char *str;
|
|
Packit |
5c3484 |
mp_ptr wp;
|
|
Packit |
5c3484 |
mp_size_t wn;
|
|
Packit |
5c3484 |
unsigned i;
|
|
Packit |
5c3484 |
int base;
|
|
Packit |
5c3484 |
double t;
|
|
Packit |
5c3484 |
mp_ptr powtab_mem, tp;
|
|
Packit |
5c3484 |
powers_t powtab[GMP_LIMB_BITS];
|
|
Packit |
5c3484 |
mp_size_t un;
|
|
Packit |
5c3484 |
int chars_per_limb;
|
|
Packit |
5c3484 |
TMP_DECL;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
SPEED_RESTRICT_COND (s->size >= 1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
base = s->r == 0 ? 10 : s->r;
|
|
Packit |
5c3484 |
SPEED_RESTRICT_COND (base >= 2 && base <= 256);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_MARK;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
str = (unsigned char *) TMP_ALLOC (s->size);
|
|
Packit |
5c3484 |
for (i = 0; i < s->size; i++)
|
|
Packit |
5c3484 |
str[i] = s->xp[i] % base;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
LIMBS_PER_DIGIT_IN_BASE (wn, s->size, base);
|
|
Packit |
5c3484 |
SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* use this during development to check wn is big enough */
|
|
Packit |
5c3484 |
/*
|
|
Packit |
5c3484 |
ASSERT_ALWAYS (mpn_set_str (wp, str, s->size, base) <= wn);
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
speed_operand_src (s, (mp_ptr) str, s->size/GMP_LIMB_BYTES);
|
|
Packit |
5c3484 |
speed_operand_dst (s, wp, wn);
|
|
Packit |
5c3484 |
speed_cache_fill (s);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
chars_per_limb = mp_bases[base].chars_per_limb;
|
|
Packit |
5c3484 |
un = s->size / chars_per_limb + 1;
|
|
Packit |
5c3484 |
powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_set_str_powtab_alloc (un));
|
|
Packit |
5c3484 |
mpn_set_str_compute_powtab (powtab, powtab_mem, un, base);
|
|
Packit |
5c3484 |
tp = TMP_BALLOC_LIMBS (mpn_dc_set_str_itch (un));
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
speed_starttime ();
|
|
Packit |
5c3484 |
i = s->reps;
|
|
Packit |
5c3484 |
do
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mpn_pre_set_str (wp, str, s->size, powtab, tp);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
while (--i != 0);
|
|
Packit |
5c3484 |
t = speed_endtime ();
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
return t;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_set_str (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
s.r = 10; /* decimal */
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
SET_STR_PRECOMPUTE_THRESHOLD = 0;
|
|
Packit |
5c3484 |
param.step_factor = 0.01;
|
|
Packit |
5c3484 |
param.name = "SET_STR_DC_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_pre_set_str;
|
|
Packit |
5c3484 |
param.min_size = 100;
|
|
Packit |
5c3484 |
param.max_size = 50000;
|
|
Packit |
5c3484 |
one (&set_str_dc_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
param.step_factor = 0.02;
|
|
Packit |
5c3484 |
param.name = "SET_STR_PRECOMPUTE_THRESHOLD";
|
|
Packit |
5c3484 |
param.function = speed_mpn_set_str;
|
|
Packit |
5c3484 |
param.min_size = SET_STR_DC_THRESHOLD;
|
|
Packit |
5c3484 |
param.max_size = 100000;
|
|
Packit |
5c3484 |
one (&set_str_precompute_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_fft_mul (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct fft_param_t param;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (option_fft_max_size == 0)
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.table_name = "MUL_FFT_TABLE3";
|
|
Packit |
5c3484 |
param.threshold_name = "MUL_FFT_THRESHOLD";
|
|
Packit |
5c3484 |
param.p_threshold = &mul_fft_threshold;
|
|
Packit |
5c3484 |
param.modf_threshold_name = "MUL_FFT_MODF_THRESHOLD";
|
|
Packit |
5c3484 |
param.p_modf_threshold = &mul_fft_modf_threshold;
|
|
Packit |
5c3484 |
param.first_size = MUL_TOOM33_THRESHOLD / 2;
|
|
Packit |
5c3484 |
param.max_size = option_fft_max_size;
|
|
Packit |
5c3484 |
param.function = speed_mpn_fft_mul;
|
|
Packit |
5c3484 |
param.mul_modf_function = speed_mpn_mul_fft;
|
|
Packit |
5c3484 |
param.mul_function = speed_mpn_mul_n;
|
|
Packit |
5c3484 |
param.sqr = 0;
|
|
Packit |
5c3484 |
fft (¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_fft_sqr (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct fft_param_t param;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (option_fft_max_size == 0)
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.table_name = "SQR_FFT_TABLE3";
|
|
Packit |
5c3484 |
param.threshold_name = "SQR_FFT_THRESHOLD";
|
|
Packit |
5c3484 |
param.p_threshold = &sqr_fft_threshold;
|
|
Packit |
5c3484 |
param.modf_threshold_name = "SQR_FFT_MODF_THRESHOLD";
|
|
Packit |
5c3484 |
param.p_modf_threshold = &sqr_fft_modf_threshold;
|
|
Packit |
5c3484 |
param.first_size = SQR_TOOM3_THRESHOLD / 2;
|
|
Packit |
5c3484 |
param.max_size = option_fft_max_size;
|
|
Packit |
5c3484 |
param.function = speed_mpn_fft_sqr;
|
|
Packit |
5c3484 |
param.mul_modf_function = speed_mpn_mul_fft_sqr;
|
|
Packit |
5c3484 |
param.mul_function = speed_mpn_sqr;
|
|
Packit |
5c3484 |
param.sqr = 1;
|
|
Packit |
5c3484 |
fft (¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
tune_fac_ui (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
static struct param_t param;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.function = speed_mpz_fac_ui_tune;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.name = "FAC_DSC_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = 70;
|
|
Packit |
5c3484 |
param.max_size = FAC_DSC_THRESHOLD_LIMIT;
|
|
Packit |
5c3484 |
one (&fac_dsc_threshold, ¶m;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
param.name = "FAC_ODD_THRESHOLD";
|
|
Packit |
5c3484 |
param.min_size = 22;
|
|
Packit |
5c3484 |
param.stop_factor = 1.7;
|
|
Packit |
5c3484 |
param.min_is_always = 1;
|
|
Packit |
5c3484 |
one (&fac_odd_threshold, ¶m;;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
all (void)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
time_t start_time, end_time;
|
|
Packit |
5c3484 |
TMP_DECL;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_MARK;
|
|
Packit |
5c3484 |
SPEED_TMP_ALLOC_LIMBS (s.xp_block, SPEED_BLOCK_SIZE, 0);
|
|
Packit |
5c3484 |
SPEED_TMP_ALLOC_LIMBS (s.yp_block, SPEED_BLOCK_SIZE, 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mpn_random (s.xp_block, SPEED_BLOCK_SIZE);
|
|
Packit |
5c3484 |
mpn_random (s.yp_block, SPEED_BLOCK_SIZE);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
fprintf (stderr, "Parameters for %s\n", GMP_MPARAM_H_SUGGEST);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
speed_time_init ();
|
|
Packit |
5c3484 |
fprintf (stderr, "Using: %s\n", speed_time_string);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
fprintf (stderr, "speed_precision %d", speed_precision);
|
|
Packit |
5c3484 |
if (speed_unittime == 1.0)
|
|
Packit |
5c3484 |
fprintf (stderr, ", speed_unittime 1 cycle");
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
fprintf (stderr, ", speed_unittime %.2e secs", speed_unittime);
|
|
Packit |
5c3484 |
if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
|
|
Packit |
5c3484 |
fprintf (stderr, ", CPU freq unknown\n");
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
fprintf (stderr, ", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
fprintf (stderr, "DEFAULT_MAX_SIZE %d, fft_max_size %ld\n",
|
|
Packit |
5c3484 |
DEFAULT_MAX_SIZE, (long) option_fft_max_size);
|
|
Packit |
5c3484 |
fprintf (stderr, "\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
time (&start_time);
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
struct tm *tp;
|
|
Packit |
5c3484 |
tp = localtime (&start_time);
|
|
Packit |
5c3484 |
printf ("/* Generated by tuneup.c, %d-%02d-%02d, ",
|
|
Packit |
5c3484 |
tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#ifdef __GNUC__
|
|
Packit |
5c3484 |
/* gcc sub-minor version doesn't seem to come through as a define */
|
|
Packit |
5c3484 |
printf ("gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
|
|
Packit |
5c3484 |
#define PRINTED_COMPILER
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#if defined (__SUNPRO_C)
|
|
Packit |
5c3484 |
printf ("Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
|
|
Packit |
5c3484 |
#define PRINTED_COMPILER
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#if ! defined (__GNUC__) && defined (__sgi) && defined (_COMPILER_VERSION)
|
|
Packit |
5c3484 |
/* gcc defines __sgi and _COMPILER_VERSION on irix 6, avoid that */
|
|
Packit |
5c3484 |
printf ("MIPSpro C %d.%d.%d */\n",
|
|
Packit |
5c3484 |
_COMPILER_VERSION / 100,
|
|
Packit |
5c3484 |
_COMPILER_VERSION / 10 % 10,
|
|
Packit |
5c3484 |
_COMPILER_VERSION % 10);
|
|
Packit |
5c3484 |
#define PRINTED_COMPILER
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#if defined (__DECC) && defined (__DECC_VER)
|
|
Packit |
5c3484 |
printf ("DEC C %d */\n", __DECC_VER);
|
|
Packit |
5c3484 |
#define PRINTED_COMPILER
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#if ! defined (PRINTED_COMPILER)
|
|
Packit |
5c3484 |
printf ("system compiler */\n");
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
printf ("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tune_divrem_1 ();
|
|
Packit |
5c3484 |
tune_mod_1 ();
|
|
Packit |
5c3484 |
tune_preinv_divrem_1 ();
|
|
Packit |
5c3484 |
tune_div_qr_1 ();
|
|
Packit |
5c3484 |
#if 0
|
|
Packit |
5c3484 |
tune_divrem_2 ();
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
tune_div_qr_2 ();
|
|
Packit |
5c3484 |
tune_divexact_1 ();
|
|
Packit |
5c3484 |
tune_modexact_1_odd ();
|
|
Packit |
5c3484 |
printf("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tune_mul_n ();
|
|
Packit |
5c3484 |
printf("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tune_mul ();
|
|
Packit |
5c3484 |
printf("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tune_sqr ();
|
|
Packit |
5c3484 |
printf("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tune_mulmid ();
|
|
Packit |
5c3484 |
printf("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tune_mulmod_bnm1 ();
|
|
Packit |
5c3484 |
tune_sqrmod_bnm1 ();
|
|
Packit |
5c3484 |
printf("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tune_fft_mul ();
|
|
Packit |
5c3484 |
printf("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tune_fft_sqr ();
|
|
Packit |
5c3484 |
printf ("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tune_mullo ();
|
|
Packit |
5c3484 |
tune_sqrlo ();
|
|
Packit |
5c3484 |
printf("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tune_dc_div ();
|
|
Packit |
5c3484 |
tune_dc_bdiv ();
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
printf("\n");
|
|
Packit |
5c3484 |
tune_invertappr ();
|
|
Packit |
5c3484 |
tune_invert ();
|
|
Packit |
5c3484 |
printf("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tune_binvert ();
|
|
Packit |
5c3484 |
tune_redc ();
|
|
Packit |
5c3484 |
printf("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tune_mu_div ();
|
|
Packit |
5c3484 |
tune_mu_bdiv ();
|
|
Packit |
5c3484 |
printf("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tune_powm_sec ();
|
|
Packit |
5c3484 |
printf("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tune_get_str ();
|
|
Packit |
5c3484 |
tune_set_str ();
|
|
Packit |
5c3484 |
printf("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tune_fac_ui ();
|
|
Packit |
5c3484 |
printf("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tune_matrix22_mul ();
|
|
Packit |
5c3484 |
tune_hgcd ();
|
|
Packit |
5c3484 |
tune_hgcd_appr ();
|
|
Packit |
5c3484 |
tune_hgcd_reduce();
|
|
Packit |
5c3484 |
tune_gcd_dc ();
|
|
Packit |
5c3484 |
tune_gcdext_dc ();
|
|
Packit |
5c3484 |
tune_jacobi_base ();
|
|
Packit |
5c3484 |
printf("\n");
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
time (&end_time);
|
|
Packit |
5c3484 |
printf ("/* Tuneup completed successfully, took %ld seconds */\n",
|
|
Packit |
5c3484 |
(long) (end_time - start_time));
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
int
|
|
Packit |
5c3484 |
main (int argc, char *argv[])
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
int opt;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Unbuffered so if output is redirected to a file it isn't lost if the
|
|
Packit |
5c3484 |
program is killed part way through. */
|
|
Packit |
5c3484 |
setbuf (stdout, NULL);
|
|
Packit |
5c3484 |
setbuf (stderr, NULL);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
while ((opt = getopt(argc, argv, "f:o:p:t")) != EOF)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
switch (opt) {
|
|
Packit |
5c3484 |
case 'f':
|
|
Packit |
5c3484 |
if (optarg[0] == 't')
|
|
Packit |
5c3484 |
option_fft_trace = 2;
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
option_fft_max_size = atol (optarg);
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
case 'o':
|
|
Packit |
5c3484 |
speed_option_set (optarg);
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
case 'p':
|
|
Packit |
5c3484 |
speed_precision = atoi (optarg);
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
case 't':
|
|
Packit |
5c3484 |
option_trace++;
|
|
Packit |
5c3484 |
break;
|
|
Packit |
5c3484 |
case '?':
|
|
Packit |
5c3484 |
exit(1);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
all ();
|
|
Packit |
5c3484 |
exit (0);
|
|
Packit |
5c3484 |
}
|