Blame tune/tuneup.c

Packit 5c3484
/* Create tuned thresholds for various algorithms.
Packit 5c3484
Packit 5c3484
Copyright 1999-2003, 2005, 2006, 2008-2012 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Usage: tuneup [-t] [-t] [-p precision]
Packit 5c3484
Packit 5c3484
   -t turns on some diagnostic traces, a second -t turns on more traces.
Packit 5c3484
Packit 5c3484
   Notes:
Packit 5c3484
Packit 5c3484
   The code here isn't a vision of loveliness, mainly because it's subject
Packit 5c3484
   to ongoing changes according to new things wanting to be tuned, and
Packit 5c3484
   practical requirements of systems tested.
Packit 5c3484
Packit 5c3484
   Sometimes running the program twice produces slightly different results.
Packit 5c3484
   This is probably because there's so little separating algorithms near
Packit 5c3484
   their crossover, and on that basis it should make little or no difference
Packit 5c3484
   to the final speed of the relevant routines, but nothing has been done to
Packit 5c3484
   check that carefully.
Packit 5c3484
Packit 5c3484
   Algorithm:
Packit 5c3484
Packit 5c3484
   The thresholds are determined as follows.  A crossover may not be a
Packit 5c3484
   single size but rather a range where it oscillates between method A or
Packit 5c3484
   method B faster.  If the threshold is set making B used where A is faster
Packit 5c3484
   (or vice versa) that's bad.  Badness is the percentage time lost and
Packit 5c3484
   total badness is the sum of this over all sizes measured.  The threshold
Packit 5c3484
   is set to minimize total badness.
Packit 5c3484
Packit 5c3484
   Suppose, as sizes increase, method B becomes faster than method A.  The
Packit 5c3484
   effect of the rule is that, as you look at increasing sizes, isolated
Packit 5c3484
   points where B is faster are ignored, but when it's consistently faster,
Packit 5c3484
   or faster on balance, then the threshold is set there.  The same result
Packit 5c3484
   is obtained thinking in the other direction of A becoming faster at
Packit 5c3484
   smaller sizes.
Packit 5c3484
Packit 5c3484
   In practice the thresholds tend to be chosen to bring on the next
Packit 5c3484
   algorithm fairly quickly.
Packit 5c3484
Packit 5c3484
   This rule is attractive because it's got a basis in reason and is fairly
Packit 5c3484
   easy to implement, but no work has been done to actually compare it in
Packit 5c3484
   absolute terms to other possibilities.
Packit 5c3484
Packit 5c3484
   Implementation:
Packit 5c3484
Packit 5c3484
   In a normal library build the thresholds are constants.  To tune them
Packit 5c3484
   selected objects are recompiled with the thresholds as global variables
Packit 5c3484
   instead.  #define TUNE_PROGRAM_BUILD does this, with help from code at
Packit 5c3484
   the end of gmp-impl.h, and rules in tune/Makefile.am.
Packit 5c3484
Packit 5c3484
   MUL_TOOM22_THRESHOLD for example uses a recompiled mpn_mul_n.  The
Packit 5c3484
   threshold is set to "size+1" to avoid karatsuba, or to "size" to use one
Packit 5c3484
   level, but recurse into the basecase.
Packit 5c3484
Packit 5c3484
   MUL_TOOM33_THRESHOLD makes use of the tuned MUL_TOOM22_THRESHOLD value.
Packit 5c3484
   Other routines in turn will make use of both of those.  Naturally the
Packit 5c3484
   dependants must be tuned first.
Packit 5c3484
Packit 5c3484
   In a couple of cases, like DIVEXACT_1_THRESHOLD, there's no recompiling,
Packit 5c3484
   just a threshold based on comparing two routines (mpn_divrem_1 and
Packit 5c3484
   mpn_divexact_1), and no further use of the value determined.
Packit 5c3484
Packit 5c3484
   Flags like USE_PREINV_MOD_1 or JACOBI_BASE_METHOD are even simpler, being
Packit 5c3484
   just comparisons between certain routines on representative data.
Packit 5c3484
Packit 5c3484
   Shortcuts are applied when native (assembler) versions of routines exist.
Packit 5c3484
   For instance a native mpn_sqr_basecase is assumed to be always faster
Packit 5c3484
   than mpn_mul_basecase, with no measuring.
Packit 5c3484
Packit 5c3484
   No attempt is made to tune within assembler routines, for instance
Packit 5c3484
   DIVREM_1_NORM_THRESHOLD.  An assembler mpn_divrem_1 is expected to be
Packit 5c3484
   written and tuned all by hand.  Assembler routines that might have hard
Packit 5c3484
   limits are recompiled though, to make them accept a bigger range of sizes
Packit 5c3484
   than normal, eg. mpn_sqr_basecase to compare against mpn_toom2_sqr.
Packit 5c3484
Packit 5c3484
   Limitations:
Packit 5c3484
Packit 5c3484
   The FFTs aren't subject to the same badness rule as the other thresholds,
Packit 5c3484
   so each k is probably being brought on a touch early.  This isn't likely
Packit 5c3484
   to make a difference, and the simpler probing means fewer tests.
Packit 5c3484
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
#define TUNE_PROGRAM_BUILD  1   /* for gmp-impl.h */
Packit 5c3484
Packit 5c3484
#include "config.h"
Packit 5c3484
Packit 5c3484
#include <math.h>
Packit 5c3484
#include <stdio.h>
Packit 5c3484
#include <stdlib.h>
Packit 5c3484
#include <time.h>
Packit 5c3484
#if HAVE_UNISTD_H
Packit 5c3484
#include <unistd.h>
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
#include "tests.h"
Packit 5c3484
#include "speed.h"
Packit 5c3484
Packit 5c3484
#if !HAVE_DECL_OPTARG
Packit 5c3484
extern char *optarg;
Packit 5c3484
extern int optind, opterr;
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define DEFAULT_MAX_SIZE   1000  /* limbs */
Packit 5c3484
Packit 5c3484
#if WANT_FFT
Packit 5c3484
mp_size_t  option_fft_max_size = 50000;  /* limbs */
Packit 5c3484
#else
Packit 5c3484
mp_size_t  option_fft_max_size = 0;
Packit 5c3484
#endif
Packit 5c3484
int        option_trace = 0;
Packit 5c3484
int        option_fft_trace = 0;
Packit 5c3484
struct speed_params  s;
Packit 5c3484
Packit 5c3484
struct dat_t {
Packit 5c3484
  mp_size_t  size;
Packit 5c3484
  double     d;
Packit 5c3484
} *dat = NULL;
Packit 5c3484
int  ndat = 0;
Packit 5c3484
int  allocdat = 0;
Packit 5c3484
Packit 5c3484
/* This is not defined if mpn_sqr_basecase doesn't declare a limit.  In that
Packit 5c3484
   case use zero here, which for params.max_size means no limit.  */
Packit 5c3484
#ifndef TUNE_SQR_TOOM2_MAX
Packit 5c3484
#define TUNE_SQR_TOOM2_MAX  0
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
mp_size_t  mul_toom22_threshold         = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mul_toom33_threshold         = MUL_TOOM33_THRESHOLD_LIMIT;
Packit 5c3484
mp_size_t  mul_toom44_threshold         = MUL_TOOM44_THRESHOLD_LIMIT;
Packit 5c3484
mp_size_t  mul_toom6h_threshold         = MUL_TOOM6H_THRESHOLD_LIMIT;
Packit 5c3484
mp_size_t  mul_toom8h_threshold         = MUL_TOOM8H_THRESHOLD_LIMIT;
Packit 5c3484
mp_size_t  mul_toom32_to_toom43_threshold = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mul_toom32_to_toom53_threshold = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mul_toom42_to_toom53_threshold = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mul_toom42_to_toom63_threshold = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mul_toom43_to_toom54_threshold = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mul_fft_threshold            = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mul_fft_modf_threshold       = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  sqr_basecase_threshold       = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  sqr_toom2_threshold
Packit 5c3484
  = (TUNE_SQR_TOOM2_MAX == 0 ? MP_SIZE_T_MAX : TUNE_SQR_TOOM2_MAX);
Packit 5c3484
mp_size_t  sqr_toom3_threshold          = SQR_TOOM3_THRESHOLD_LIMIT;
Packit 5c3484
mp_size_t  sqr_toom4_threshold          = SQR_TOOM4_THRESHOLD_LIMIT;
Packit 5c3484
mp_size_t  sqr_toom6_threshold          = SQR_TOOM6_THRESHOLD_LIMIT;
Packit 5c3484
mp_size_t  sqr_toom8_threshold          = SQR_TOOM8_THRESHOLD_LIMIT;
Packit 5c3484
mp_size_t  sqr_fft_threshold            = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  sqr_fft_modf_threshold       = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mullo_basecase_threshold     = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mullo_dc_threshold           = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mullo_mul_n_threshold        = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  sqrlo_basecase_threshold     = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  sqrlo_dc_threshold           = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  sqrlo_sqr_threshold          = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mulmid_toom42_threshold      = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mulmod_bnm1_threshold        = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  sqrmod_bnm1_threshold        = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  div_qr_2_pi2_threshold       = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  dc_div_qr_threshold          = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  dc_divappr_q_threshold       = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mu_div_qr_threshold          = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mu_divappr_q_threshold       = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mupi_div_qr_threshold        = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mu_div_q_threshold           = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  dc_bdiv_qr_threshold         = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  dc_bdiv_q_threshold          = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mu_bdiv_qr_threshold         = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mu_bdiv_q_threshold          = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  inv_mulmod_bnm1_threshold    = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  inv_newton_threshold         = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  inv_appr_threshold           = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  binv_newton_threshold        = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  redc_1_to_redc_2_threshold   = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  redc_1_to_redc_n_threshold   = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  redc_2_to_redc_n_threshold   = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  matrix22_strassen_threshold  = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  hgcd_threshold               = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  hgcd_appr_threshold          = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  hgcd_reduce_threshold        = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  gcd_dc_threshold             = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  gcdext_dc_threshold          = MP_SIZE_T_MAX;
Packit 5c3484
int	   div_qr_1n_pi1_method		= 0;
Packit 5c3484
mp_size_t  div_qr_1_norm_threshold      = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  div_qr_1_unnorm_threshold    = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  divrem_1_norm_threshold      = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  divrem_1_unnorm_threshold    = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mod_1_norm_threshold         = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mod_1_unnorm_threshold       = MP_SIZE_T_MAX;
Packit 5c3484
int	   mod_1_1p_method		= 0;
Packit 5c3484
mp_size_t  mod_1n_to_mod_1_1_threshold  = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mod_1u_to_mod_1_1_threshold  = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mod_1_1_to_mod_1_2_threshold = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  mod_1_2_to_mod_1_4_threshold = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  preinv_mod_1_to_mod_1_threshold = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  divrem_2_threshold           = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  get_str_dc_threshold         = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  get_str_precompute_threshold = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  set_str_dc_threshold         = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  set_str_precompute_threshold = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  fac_odd_threshold            = 0;
Packit 5c3484
mp_size_t  fac_dsc_threshold            = FAC_DSC_THRESHOLD_LIMIT;
Packit 5c3484
Packit 5c3484
mp_size_t  fft_modf_sqr_threshold = MP_SIZE_T_MAX;
Packit 5c3484
mp_size_t  fft_modf_mul_threshold = MP_SIZE_T_MAX;
Packit 5c3484
Packit 5c3484
struct param_t {
Packit 5c3484
  const char        *name;
Packit 5c3484
  speed_function_t  function;
Packit 5c3484
  speed_function_t  function2;
Packit 5c3484
  double            step_factor;    /* how much to step relatively */
Packit 5c3484
  int               step;           /* how much to step absolutely */
Packit 5c3484
  double            function_fudge; /* multiplier for "function" speeds */
Packit 5c3484
  int               stop_since_change;
Packit 5c3484
  double            stop_factor;
Packit 5c3484
  mp_size_t         min_size;
Packit 5c3484
  int               min_is_always;
Packit 5c3484
  mp_size_t         max_size;
Packit 5c3484
  mp_size_t         check_size;
Packit 5c3484
  mp_size_t         size_extra;
Packit 5c3484
Packit 5c3484
#define DATA_HIGH_LT_R  1
Packit 5c3484
#define DATA_HIGH_GE_R  2
Packit 5c3484
  int               data_high;
Packit 5c3484
Packit 5c3484
  int               noprint;
Packit 5c3484
};
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* These are normally undefined when false, which suits "#if" fine.
Packit 5c3484
   But give them zero values so they can be used in plain C "if"s.  */
Packit 5c3484
#ifndef UDIV_PREINV_ALWAYS
Packit 5c3484
#define UDIV_PREINV_ALWAYS 0
Packit 5c3484
#endif
Packit 5c3484
#ifndef HAVE_NATIVE_mpn_divexact_1
Packit 5c3484
#define HAVE_NATIVE_mpn_divexact_1 0
Packit 5c3484
#endif
Packit 5c3484
#ifndef HAVE_NATIVE_mpn_div_qr_1n_pi1
Packit 5c3484
#define HAVE_NATIVE_mpn_div_qr_1n_pi1 0
Packit 5c3484
#endif
Packit 5c3484
#ifndef HAVE_NATIVE_mpn_divrem_1
Packit 5c3484
#define HAVE_NATIVE_mpn_divrem_1 0
Packit 5c3484
#endif
Packit 5c3484
#ifndef HAVE_NATIVE_mpn_divrem_2
Packit 5c3484
#define HAVE_NATIVE_mpn_divrem_2 0
Packit 5c3484
#endif
Packit 5c3484
#ifndef HAVE_NATIVE_mpn_mod_1
Packit 5c3484
#define HAVE_NATIVE_mpn_mod_1 0
Packit 5c3484
#endif
Packit 5c3484
#ifndef HAVE_NATIVE_mpn_mod_1_1p
Packit 5c3484
#define HAVE_NATIVE_mpn_mod_1_1p 0
Packit 5c3484
#endif
Packit 5c3484
#ifndef HAVE_NATIVE_mpn_modexact_1_odd
Packit 5c3484
#define HAVE_NATIVE_mpn_modexact_1_odd 0
Packit 5c3484
#endif
Packit 5c3484
#ifndef HAVE_NATIVE_mpn_preinv_divrem_1
Packit 5c3484
#define HAVE_NATIVE_mpn_preinv_divrem_1 0
Packit 5c3484
#endif
Packit 5c3484
#ifndef HAVE_NATIVE_mpn_preinv_mod_1
Packit 5c3484
#define HAVE_NATIVE_mpn_preinv_mod_1 0
Packit 5c3484
#endif
Packit 5c3484
#ifndef HAVE_NATIVE_mpn_sqr_basecase
Packit 5c3484
#define HAVE_NATIVE_mpn_sqr_basecase 0
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define MAX3(a,b,c)  MAX (MAX (a, b), c)
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
randlimb_norm (void)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t  n;
Packit 5c3484
  mpn_random (&n, 1);
Packit 5c3484
  n |= GMP_NUMB_HIGHBIT;
Packit 5c3484
  return n;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#define GMP_NUMB_HALFMASK  ((CNST_LIMB(1) << (GMP_NUMB_BITS/2)) - 1)
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
randlimb_half (void)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t  n;
Packit 5c3484
  mpn_random (&n, 1);
Packit 5c3484
  n &= GMP_NUMB_HALFMASK;
Packit 5c3484
  n += (n==0);
Packit 5c3484
  return n;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Add an entry to the end of the dat[] array, reallocing to make it bigger
Packit 5c3484
   if necessary.  */
Packit 5c3484
void
Packit 5c3484
add_dat (mp_size_t size, double d)
Packit 5c3484
{
Packit 5c3484
#define ALLOCDAT_STEP  500
Packit 5c3484
Packit 5c3484
  ASSERT_ALWAYS (ndat <= allocdat);
Packit 5c3484
Packit 5c3484
  if (ndat == allocdat)
Packit 5c3484
    {
Packit 5c3484
      dat = (struct dat_t *) __gmp_allocate_or_reallocate
Packit 5c3484
        (dat, allocdat * sizeof(dat[0]),
Packit 5c3484
         (allocdat+ALLOCDAT_STEP) * sizeof(dat[0]));
Packit 5c3484
      allocdat += ALLOCDAT_STEP;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  dat[ndat].size = size;
Packit 5c3484
  dat[ndat].d = d;
Packit 5c3484
  ndat++;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Return the threshold size based on the data accumulated. */
Packit 5c3484
mp_size_t
Packit 5c3484
analyze_dat (int final)
Packit 5c3484
{
Packit 5c3484
  double  x, min_x;
Packit 5c3484
  int     j, min_j;
Packit 5c3484
Packit 5c3484
  /* If the threshold is set at dat[0].size, any positive values are bad. */
Packit 5c3484
  x = 0.0;
Packit 5c3484
  for (j = 0; j < ndat; j++)
Packit 5c3484
    if (dat[j].d > 0.0)
Packit 5c3484
      x += dat[j].d;
Packit 5c3484
Packit 5c3484
  if (option_trace >= 2 && final)
Packit 5c3484
    {
Packit 5c3484
      printf ("\n");
Packit 5c3484
      printf ("x is the sum of the badness from setting thresh at given size\n");
Packit 5c3484
      printf ("  (minimum x is sought)\n");
Packit 5c3484
      printf ("size=%ld  first x=%.4f\n", (long) dat[j].size, x);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  min_x = x;
Packit 5c3484
  min_j = 0;
Packit 5c3484
Packit 5c3484
Packit 5c3484
  /* When stepping to the next dat[j].size, positive values are no longer
Packit 5c3484
     bad (so subtracted), negative values become bad (so add the absolute
Packit 5c3484
     value, meaning subtract). */
Packit 5c3484
  for (j = 0; j < ndat; x -= dat[j].d, j++)
Packit 5c3484
    {
Packit 5c3484
      if (option_trace >= 2 && final)
Packit 5c3484
        printf ("size=%ld  x=%.4f\n", (long) dat[j].size, x);
Packit 5c3484
Packit 5c3484
      if (x < min_x)
Packit 5c3484
        {
Packit 5c3484
          min_x = x;
Packit 5c3484
          min_j = j;
Packit 5c3484
        }
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  return min_j;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Measuring for recompiled mpn/generic/div_qr_1.c,
Packit 5c3484
 * mpn/generic/divrem_1.c, mpn/generic/mod_1.c and mpz/fac_ui.c */
Packit 5c3484
Packit 5c3484
mp_limb_t mpn_div_qr_1_tune (mp_ptr, mp_limb_t *, mp_srcptr, mp_size_t, mp_limb_t);
Packit 5c3484
Packit 5c3484
#if defined (__cplusplus)
Packit 5c3484
extern "C" {
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
mp_limb_t mpn_divrem_1_tune (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
Packit 5c3484
mp_limb_t mpn_mod_1_tune (mp_srcptr, mp_size_t, mp_limb_t);
Packit 5c3484
void mpz_fac_ui_tune (mpz_ptr, unsigned long);
Packit 5c3484
Packit 5c3484
#if defined (__cplusplus)
Packit 5c3484
}
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
double
Packit 5c3484
speed_mpn_mod_1_tune (struct speed_params *s)
Packit 5c3484
{
Packit 5c3484
  SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_tune);
Packit 5c3484
}
Packit 5c3484
double
Packit 5c3484
speed_mpn_divrem_1_tune (struct speed_params *s)
Packit 5c3484
{
Packit 5c3484
  SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_tune);
Packit 5c3484
}
Packit 5c3484
double
Packit 5c3484
speed_mpz_fac_ui_tune (struct speed_params *s)
Packit 5c3484
{
Packit 5c3484
  SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui_tune);
Packit 5c3484
}
Packit 5c3484
double
Packit 5c3484
speed_mpn_div_qr_1_tune (struct speed_params *s)
Packit 5c3484
{
Packit 5c3484
  SPEED_ROUTINE_MPN_DIV_QR_1 (mpn_div_qr_1_tune);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
double
Packit 5c3484
tuneup_measure (speed_function_t fun,
Packit 5c3484
                const struct param_t *param,
Packit 5c3484
                struct speed_params *s)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  dummy;
Packit 5c3484
  double   t;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  if (! param)
Packit 5c3484
    param = &dummy;
Packit 5c3484
Packit 5c3484
  s->size += param->size_extra;
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
  SPEED_TMP_ALLOC_LIMBS (s->xp, s->size, 0);
Packit 5c3484
  SPEED_TMP_ALLOC_LIMBS (s->yp, s->size, 0);
Packit 5c3484
Packit 5c3484
  mpn_random (s->xp, s->size);
Packit 5c3484
  mpn_random (s->yp, s->size);
Packit 5c3484
Packit 5c3484
  switch (param->data_high) {
Packit 5c3484
  case DATA_HIGH_LT_R:
Packit 5c3484
    s->xp[s->size-1] %= s->r;
Packit 5c3484
    s->yp[s->size-1] %= s->r;
Packit 5c3484
    break;
Packit 5c3484
  case DATA_HIGH_GE_R:
Packit 5c3484
    s->xp[s->size-1] |= s->r;
Packit 5c3484
    s->yp[s->size-1] |= s->r;
Packit 5c3484
    break;
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
  t = speed_measure (fun, s);
Packit 5c3484
Packit 5c3484
  s->size -= param->size_extra;
Packit 5c3484
Packit 5c3484
  TMP_FREE;
Packit 5c3484
  return t;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define PRINT_WIDTH  31
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
print_define_start (const char *name)
Packit 5c3484
{
Packit 5c3484
  printf ("#define %-*s  ", PRINT_WIDTH, name);
Packit 5c3484
  if (option_trace)
Packit 5c3484
    printf ("...\n");
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
print_define_end_remark (const char *name, mp_size_t value, const char *remark)
Packit 5c3484
{
Packit 5c3484
  if (option_trace)
Packit 5c3484
    printf ("#define %-*s  ", PRINT_WIDTH, name);
Packit 5c3484
Packit 5c3484
  if (value == MP_SIZE_T_MAX)
Packit 5c3484
    printf ("MP_SIZE_T_MAX");
Packit 5c3484
  else
Packit 5c3484
    printf ("%5ld", (long) value);
Packit 5c3484
Packit 5c3484
  if (remark != NULL)
Packit 5c3484
    printf ("  /* %s */", remark);
Packit 5c3484
  printf ("\n");
Packit 5c3484
  fflush (stdout);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
print_define_end (const char *name, mp_size_t value)
Packit 5c3484
{
Packit 5c3484
  const char  *remark;
Packit 5c3484
  if (value == MP_SIZE_T_MAX)
Packit 5c3484
    remark = "never";
Packit 5c3484
  else if (value == 0)
Packit 5c3484
    remark = "always";
Packit 5c3484
  else
Packit 5c3484
    remark = NULL;
Packit 5c3484
  print_define_end_remark (name, value, remark);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
print_define (const char *name, mp_size_t value)
Packit 5c3484
{
Packit 5c3484
  print_define_start (name);
Packit 5c3484
  print_define_end (name, value);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
print_define_remark (const char *name, mp_size_t value, const char *remark)
Packit 5c3484
{
Packit 5c3484
  print_define_start (name);
Packit 5c3484
  print_define_end_remark (name, value, remark);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
one (mp_size_t *threshold, struct param_t *param)
Packit 5c3484
{
Packit 5c3484
  int  since_positive, since_thresh_change;
Packit 5c3484
  int  thresh_idx, new_thresh_idx;
Packit 5c3484
Packit 5c3484
#define DEFAULT(x,n)  do { if (! (x))  (x) = (n); } while (0)
Packit 5c3484
Packit 5c3484
  DEFAULT (param->function_fudge, 1.0);
Packit 5c3484
  DEFAULT (param->function2, param->function);
Packit 5c3484
  DEFAULT (param->step_factor, 0.01);  /* small steps by default */
Packit 5c3484
  DEFAULT (param->step, 1);            /* small steps by default */
Packit 5c3484
  DEFAULT (param->stop_since_change, 80);
Packit 5c3484
  DEFAULT (param->stop_factor, 1.2);
Packit 5c3484
  DEFAULT (param->min_size, 10);
Packit 5c3484
  DEFAULT (param->max_size, DEFAULT_MAX_SIZE);
Packit 5c3484
Packit 5c3484
  if (param->check_size != 0)
Packit 5c3484
    {
Packit 5c3484
      double   t1, t2;
Packit 5c3484
      s.size = param->check_size;
Packit 5c3484
Packit 5c3484
      *threshold = s.size+1;
Packit 5c3484
      t1 = tuneup_measure (param->function, param, &s);
Packit 5c3484
Packit 5c3484
      *threshold = s.size;
Packit 5c3484
      t2 = tuneup_measure (param->function2, param, &s);
Packit 5c3484
      if (t1 == -1.0 || t2 == -1.0)
Packit 5c3484
        {
Packit 5c3484
          printf ("Oops, can't run both functions at size %ld\n",
Packit 5c3484
                  (long) s.size);
Packit 5c3484
          abort ();
Packit 5c3484
        }
Packit 5c3484
      t1 *= param->function_fudge;
Packit 5c3484
Packit 5c3484
      /* ask that t2 is at least 4% below t1 */
Packit 5c3484
      if (t1 < t2*1.04)
Packit 5c3484
        {
Packit 5c3484
          if (option_trace)
Packit 5c3484
            printf ("function2 never enough faster: t1=%.9f t2=%.9f\n", t1, t2);
Packit 5c3484
          *threshold = MP_SIZE_T_MAX;
Packit 5c3484
          if (! param->noprint)
Packit 5c3484
            print_define (param->name, *threshold);
Packit 5c3484
          return;
Packit 5c3484
        }
Packit 5c3484
Packit 5c3484
      if (option_trace >= 2)
Packit 5c3484
        printf ("function2 enough faster at size=%ld: t1=%.9f t2=%.9f\n",
Packit 5c3484
                (long) s.size, t1, t2);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (! param->noprint || option_trace)
Packit 5c3484
    print_define_start (param->name);
Packit 5c3484
Packit 5c3484
  ndat = 0;
Packit 5c3484
  since_positive = 0;
Packit 5c3484
  since_thresh_change = 0;
Packit 5c3484
  thresh_idx = 0;
Packit 5c3484
Packit 5c3484
  if (option_trace >= 2)
Packit 5c3484
    {
Packit 5c3484
      printf ("             algorithm-A  algorithm-B   ratio  possible\n");
Packit 5c3484
      printf ("              (seconds)    (seconds)    diff    thresh\n");
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  for (s.size = param->min_size;
Packit 5c3484
       s.size < param->max_size;
Packit 5c3484
       s.size += MAX ((mp_size_t) floor (s.size * param->step_factor), param->step))
Packit 5c3484
    {
Packit 5c3484
      double   ti, tiplus1, d;
Packit 5c3484
Packit 5c3484
      /*
Packit 5c3484
        FIXME: check minimum size requirements are met, possibly by just
Packit 5c3484
        checking for the -1 returns from the speed functions.
Packit 5c3484
      */
Packit 5c3484
Packit 5c3484
      /* using method A at this size */
Packit 5c3484
      *threshold = s.size+1;
Packit 5c3484
      ti = tuneup_measure (param->function, param, &s);
Packit 5c3484
      if (ti == -1.0)
Packit 5c3484
        abort ();
Packit 5c3484
      ti *= param->function_fudge;
Packit 5c3484
Packit 5c3484
      /* using method B at this size */
Packit 5c3484
      *threshold = s.size;
Packit 5c3484
      tiplus1 = tuneup_measure (param->function2, param, &s);
Packit 5c3484
      if (tiplus1 == -1.0)
Packit 5c3484
        abort ();
Packit 5c3484
Packit 5c3484
      /* Calculate the fraction by which the one or the other routine is
Packit 5c3484
         slower.  */
Packit 5c3484
      if (tiplus1 >= ti)
Packit 5c3484
        d = (tiplus1 - ti) / tiplus1;  /* negative */
Packit 5c3484
      else
Packit 5c3484
        d = (tiplus1 - ti) / ti;       /* positive */
Packit 5c3484
Packit 5c3484
      add_dat (s.size, d);
Packit 5c3484
Packit 5c3484
      new_thresh_idx = analyze_dat (0);
Packit 5c3484
Packit 5c3484
      if (option_trace >= 2)
Packit 5c3484
        printf ("size=%ld  %.9f  %.9f  % .4f %c  %ld\n",
Packit 5c3484
                (long) s.size, ti, tiplus1, d,
Packit 5c3484
                ti > tiplus1 ? '#' : ' ',
Packit 5c3484
                (long) dat[new_thresh_idx].size);
Packit 5c3484
Packit 5c3484
      /* Stop if the last time method i was faster was more than a
Packit 5c3484
         certain number of measurements ago.  */
Packit 5c3484
#define STOP_SINCE_POSITIVE  200
Packit 5c3484
      if (d >= 0)
Packit 5c3484
        since_positive = 0;
Packit 5c3484
      else
Packit 5c3484
        if (++since_positive > STOP_SINCE_POSITIVE)
Packit 5c3484
          {
Packit 5c3484
            if (option_trace >= 1)
Packit 5c3484
              printf ("stopped due to since_positive (%d)\n",
Packit 5c3484
                      STOP_SINCE_POSITIVE);
Packit 5c3484
            break;
Packit 5c3484
          }
Packit 5c3484
Packit 5c3484
      /* Stop if method A has become slower by a certain factor. */
Packit 5c3484
      if (ti >= tiplus1 * param->stop_factor)
Packit 5c3484
        {
Packit 5c3484
          if (option_trace >= 1)
Packit 5c3484
            printf ("stopped due to ti >= tiplus1 * factor (%.1f)\n",
Packit 5c3484
                    param->stop_factor);
Packit 5c3484
          break;
Packit 5c3484
        }
Packit 5c3484
Packit 5c3484
      /* Stop if the threshold implied hasn't changed in a certain
Packit 5c3484
         number of measurements.  (It's this condition that usually
Packit 5c3484
         stops the loop.) */
Packit 5c3484
      if (thresh_idx != new_thresh_idx)
Packit 5c3484
        since_thresh_change = 0, thresh_idx = new_thresh_idx;
Packit 5c3484
      else
Packit 5c3484
        if (++since_thresh_change > param->stop_since_change)
Packit 5c3484
          {
Packit 5c3484
            if (option_trace >= 1)
Packit 5c3484
              printf ("stopped due to since_thresh_change (%d)\n",
Packit 5c3484
                      param->stop_since_change);
Packit 5c3484
            break;
Packit 5c3484
          }
Packit 5c3484
Packit 5c3484
      /* Stop if the threshold implied is more than a certain number of
Packit 5c3484
         measurements ago.  */
Packit 5c3484
#define STOP_SINCE_AFTER   500
Packit 5c3484
      if (ndat - thresh_idx > STOP_SINCE_AFTER)
Packit 5c3484
        {
Packit 5c3484
          if (option_trace >= 1)
Packit 5c3484
            printf ("stopped due to ndat - thresh_idx > amount (%d)\n",
Packit 5c3484
                    STOP_SINCE_AFTER);
Packit 5c3484
          break;
Packit 5c3484
        }
Packit 5c3484
Packit 5c3484
      /* Stop when the size limit is reached before the end of the
Packit 5c3484
         crossover, but only show this as an error for >= the default max
Packit 5c3484
         size.  FIXME: Maybe should make it a param choice whether this is
Packit 5c3484
         an error.  */
Packit 5c3484
      if (s.size >= param->max_size && param->max_size >= DEFAULT_MAX_SIZE)
Packit 5c3484
        {
Packit 5c3484
          fprintf (stderr, "%s\n", param->name);
Packit 5c3484
          fprintf (stderr, "sizes %ld to %ld total %d measurements\n",
Packit 5c3484
                   (long) dat[0].size, (long) dat[ndat-1].size, ndat);
Packit 5c3484
          fprintf (stderr, "    max size reached before end of crossover\n");
Packit 5c3484
          break;
Packit 5c3484
        }
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (option_trace >= 1)
Packit 5c3484
    printf ("sizes %ld to %ld total %d measurements\n",
Packit 5c3484
            (long) dat[0].size, (long) dat[ndat-1].size, ndat);
Packit 5c3484
Packit 5c3484
  *threshold = dat[analyze_dat (1)].size;
Packit 5c3484
Packit 5c3484
  if (param->min_is_always)
Packit 5c3484
    {
Packit 5c3484
      if (*threshold == param->min_size)
Packit 5c3484
        *threshold = 0;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (! param->noprint || option_trace)
Packit 5c3484
    print_define_end (param->name, *threshold);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Special probing for the fft thresholds.  The size restrictions on the
Packit 5c3484
   FFTs mean the graph of time vs size has a step effect.  See this for
Packit 5c3484
   example using
Packit 5c3484
Packit 5c3484
       ./speed -s 4096-16384 -t 128 -P foo mpn_mul_fft.8 mpn_mul_fft.9
Packit 5c3484
       gnuplot foo.gnuplot
Packit 5c3484
Packit 5c3484
   The current approach is to compare routines at the midpoint of relevant
Packit 5c3484
   steps.  Arguably a more sophisticated system of threshold data is wanted
Packit 5c3484
   if this step effect remains. */
Packit 5c3484
Packit 5c3484
struct fft_param_t {
Packit 5c3484
  const char        *table_name;
Packit 5c3484
  const char        *threshold_name;
Packit 5c3484
  const char        *modf_threshold_name;
Packit 5c3484
  mp_size_t         *p_threshold;
Packit 5c3484
  mp_size_t         *p_modf_threshold;
Packit 5c3484
  mp_size_t         first_size;
Packit 5c3484
  mp_size_t         max_size;
Packit 5c3484
  speed_function_t  function;
Packit 5c3484
  speed_function_t  mul_modf_function;
Packit 5c3484
  speed_function_t  mul_function;
Packit 5c3484
  mp_size_t         sqr;
Packit 5c3484
};
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* mpn_mul_fft requires pl a multiple of 2^k limbs, but with
Packit 5c3484
   N=pl*BIT_PER_MP_LIMB it internally also pads out so N/2^k is a multiple
Packit 5c3484
   of 2^(k-1) bits. */
Packit 5c3484
Packit 5c3484
mp_size_t
Packit 5c3484
fft_step_size (int k)
Packit 5c3484
{
Packit 5c3484
  mp_size_t  step;
Packit 5c3484
Packit 5c3484
  step = MAX ((mp_size_t) 1 << (k-1), GMP_LIMB_BITS) / GMP_LIMB_BITS;
Packit 5c3484
  step *= (mp_size_t) 1 << k;
Packit 5c3484
Packit 5c3484
  if (step <= 0)
Packit 5c3484
    {
Packit 5c3484
      printf ("Can't handle k=%d\n", k);
Packit 5c3484
      abort ();
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  return step;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
mp_size_t
Packit 5c3484
fft_next_size (mp_size_t pl, int k)
Packit 5c3484
{
Packit 5c3484
  mp_size_t  m = fft_step_size (k);
Packit 5c3484
Packit 5c3484
/*    printf ("[k=%d %ld] %ld ->", k, m, pl); */
Packit 5c3484
Packit 5c3484
  if (pl == 0 || (pl & (m-1)) != 0)
Packit 5c3484
    pl = (pl | (m-1)) + 1;
Packit 5c3484
Packit 5c3484
/*    printf (" %ld\n", pl); */
Packit 5c3484
  return pl;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#define NMAX_DEFAULT 1000000
Packit 5c3484
#define MAX_REPS 25
Packit 5c3484
#define MIN_REPS 5
Packit 5c3484
Packit 5c3484
static inline size_t
Packit 5c3484
mpn_mul_fft_lcm (size_t a, unsigned int k)
Packit 5c3484
{
Packit 5c3484
  unsigned int l = k;
Packit 5c3484
Packit 5c3484
  while (a % 2 == 0 && k > 0)
Packit 5c3484
    {
Packit 5c3484
      a >>= 1;
Packit 5c3484
      k--;
Packit 5c3484
    }
Packit 5c3484
  return a << l;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
mp_size_t
Packit 5c3484
fftfill (mp_size_t pl, int k, int sqr)
Packit 5c3484
{
Packit 5c3484
  mp_size_t maxLK;
Packit 5c3484
  mp_bitcnt_t N, Nprime, nprime, M;
Packit 5c3484
Packit 5c3484
  N = pl * GMP_NUMB_BITS;
Packit 5c3484
  M = N >> k;
Packit 5c3484
Packit 5c3484
  maxLK = mpn_mul_fft_lcm ((unsigned long) GMP_NUMB_BITS, k);
Packit 5c3484
Packit 5c3484
  Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK;
Packit 5c3484
  nprime = Nprime / GMP_NUMB_BITS;
Packit 5c3484
  if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      size_t K2;
Packit 5c3484
      for (;;)
Packit 5c3484
	{
Packit 5c3484
	  K2 = 1L << mpn_fft_best_k (nprime, sqr);
Packit 5c3484
	  if ((nprime & (K2 - 1)) == 0)
Packit 5c3484
	    break;
Packit 5c3484
	  nprime = (nprime + K2 - 1) & -K2;
Packit 5c3484
	  Nprime = nprime * GMP_LIMB_BITS;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
  ASSERT_ALWAYS (nprime < pl);
Packit 5c3484
Packit 5c3484
  return Nprime;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
static int
Packit 5c3484
compare_double (const void *ap, const void *bp)
Packit 5c3484
{
Packit 5c3484
  double a = * (const double *) ap;
Packit 5c3484
  double b = * (const double *) bp;
Packit 5c3484
Packit 5c3484
  if (a < b)
Packit 5c3484
    return -1;
Packit 5c3484
  else if (a > b)
Packit 5c3484
    return 1;
Packit 5c3484
  else
Packit 5c3484
    return 0;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
double
Packit 5c3484
median (double *times, int n)
Packit 5c3484
{
Packit 5c3484
  qsort (times, n, sizeof (double), compare_double);
Packit 5c3484
  return times[n/2];
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#define FFT_CACHE_SIZE 25
Packit 5c3484
typedef struct fft_cache
Packit 5c3484
{
Packit 5c3484
  mp_size_t n;
Packit 5c3484
  double time;
Packit 5c3484
} fft_cache_t;
Packit 5c3484
Packit 5c3484
fft_cache_t fft_cache[FFT_CACHE_SIZE];
Packit 5c3484
Packit 5c3484
double
Packit 5c3484
cached_measure (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n, int k,
Packit 5c3484
		int n_measurements)
Packit 5c3484
{
Packit 5c3484
  int i;
Packit 5c3484
  double t, ttab[MAX_REPS];
Packit 5c3484
Packit 5c3484
  if (fft_cache[k].n == n)
Packit 5c3484
    return fft_cache[k].time;
Packit 5c3484
Packit 5c3484
  for (i = 0; i < n_measurements; i++)
Packit 5c3484
    {
Packit 5c3484
      speed_starttime ();
Packit 5c3484
      mpn_mul_fft (rp, n, ap, n, bp, n, k);
Packit 5c3484
      ttab[i] = speed_endtime ();
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  t = median (ttab, n_measurements);
Packit 5c3484
  fft_cache[k].n = n;
Packit 5c3484
  fft_cache[k].time = t;
Packit 5c3484
  return t;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#define INSERT_FFTTAB(idx, nval, kval)					\
Packit 5c3484
  do {									\
Packit 5c3484
    fft_tab[idx].n = nval;						\
Packit 5c3484
    fft_tab[idx].k = kval;						\
Packit 5c3484
    fft_tab[idx+1].n = (1 << 27) - 1;	/* sentinel, 27b wide field */	\
Packit 5c3484
    fft_tab[idx+1].k = (1 <<  5) - 1;					\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
int
Packit 5c3484
fftmes (mp_size_t nmin, mp_size_t nmax, int initial_k, struct fft_param_t *p, int idx, int print)
Packit 5c3484
{
Packit 5c3484
  mp_size_t n, n1, prev_n1;
Packit 5c3484
  int k, best_k, last_best_k, kmax;
Packit 5c3484
  int eff, prev_eff;
Packit 5c3484
  double t0, t1;
Packit 5c3484
  int n_measurements;
Packit 5c3484
  mp_limb_t *ap, *bp, *rp;
Packit 5c3484
  mp_size_t alloc;
Packit 5c3484
  struct fft_table_nk *fft_tab;
Packit 5c3484
Packit 5c3484
  fft_tab = mpn_fft_table3[p->sqr];
Packit 5c3484
Packit 5c3484
  for (k = 0; k < FFT_CACHE_SIZE; k++)
Packit 5c3484
    fft_cache[k].n = 0;
Packit 5c3484
Packit 5c3484
  if (nmin < (p->sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      nmin = (p->sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (print)
Packit 5c3484
    printf ("#define %s%*s", p->table_name, 38, "");
Packit 5c3484
Packit 5c3484
  if (idx == 0)
Packit 5c3484
    {
Packit 5c3484
      INSERT_FFTTAB (0, nmin, initial_k);
Packit 5c3484
Packit 5c3484
      if (print)
Packit 5c3484
	{
Packit 5c3484
	  printf ("\\\n  { ");
Packit 5c3484
	  printf ("{%7u,%2u}", fft_tab[0].n, fft_tab[0].k);
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      idx = 1;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  ap = (mp_ptr) malloc (sizeof (mp_limb_t));
Packit 5c3484
  if (p->sqr)
Packit 5c3484
    bp = ap;
Packit 5c3484
  else
Packit 5c3484
    bp = (mp_ptr) malloc (sizeof (mp_limb_t));
Packit 5c3484
  rp = (mp_ptr) malloc (sizeof (mp_limb_t));
Packit 5c3484
  alloc = 1;
Packit 5c3484
Packit 5c3484
  /* Round n to comply to initial k value */
Packit 5c3484
  n = (nmin + ((1ul << initial_k) - 1)) & (MP_SIZE_T_MAX << initial_k);
Packit 5c3484
Packit 5c3484
  n_measurements = (18 - initial_k) | 1;
Packit 5c3484
  n_measurements = MAX (n_measurements, MIN_REPS);
Packit 5c3484
  n_measurements = MIN (n_measurements, MAX_REPS);
Packit 5c3484
Packit 5c3484
  last_best_k = initial_k;
Packit 5c3484
  best_k = initial_k;
Packit 5c3484
Packit 5c3484
  while (n < nmax)
Packit 5c3484
    {
Packit 5c3484
      int start_k, end_k;
Packit 5c3484
Packit 5c3484
      /* Assume the current best k is best until we hit its next FFT step.  */
Packit 5c3484
      t0 = 99999;
Packit 5c3484
Packit 5c3484
      prev_n1 = n + 1;
Packit 5c3484
Packit 5c3484
      start_k = MAX (4, best_k - 4);
Packit 5c3484
      end_k = MIN (24, best_k + 4);
Packit 5c3484
      for (k = start_k; k <= end_k; k++)
Packit 5c3484
	{
Packit 5c3484
          n1 = mpn_fft_next_size (prev_n1, k);
Packit 5c3484
Packit 5c3484
	  eff = 200 * (n1 * GMP_NUMB_BITS >> k) / fftfill (n1, k, p->sqr);
Packit 5c3484
Packit 5c3484
	  if (eff < 70)		/* avoid measuring too slow fft:s */
Packit 5c3484
	    continue;
Packit 5c3484
Packit 5c3484
	  if (n1 > alloc)
Packit 5c3484
	    {
Packit 5c3484
	      alloc = n1;
Packit 5c3484
	      if (p->sqr)
Packit 5c3484
		{
Packit 5c3484
		  ap = (mp_ptr) realloc (ap, sizeof (mp_limb_t));
Packit 5c3484
		  rp = (mp_ptr) realloc (rp, sizeof (mp_limb_t));
Packit 5c3484
		  ap = bp = (mp_ptr) realloc (ap, alloc * sizeof (mp_limb_t));
Packit 5c3484
		  mpn_random (ap, alloc);
Packit 5c3484
		  rp = (mp_ptr) realloc (rp, alloc * sizeof (mp_limb_t));
Packit 5c3484
		}
Packit 5c3484
	      else
Packit 5c3484
		{
Packit 5c3484
		  ap = (mp_ptr) realloc (ap, sizeof (mp_limb_t));
Packit 5c3484
		  bp = (mp_ptr) realloc (bp, sizeof (mp_limb_t));
Packit 5c3484
		  rp = (mp_ptr) realloc (rp, sizeof (mp_limb_t));
Packit 5c3484
		  ap = (mp_ptr) realloc (ap, alloc * sizeof (mp_limb_t));
Packit 5c3484
		  mpn_random (ap, alloc);
Packit 5c3484
		  bp = (mp_ptr) realloc (bp, alloc * sizeof (mp_limb_t));
Packit 5c3484
		  mpn_random (bp, alloc);
Packit 5c3484
		  rp = (mp_ptr) realloc (rp, alloc * sizeof (mp_limb_t));
Packit 5c3484
		}
Packit 5c3484
	    }
Packit 5c3484
Packit 5c3484
	  t1 = cached_measure (rp, ap, bp, n1, k, n_measurements);
Packit 5c3484
Packit 5c3484
	  if (t1 * n_measurements > 0.3)
Packit 5c3484
	    n_measurements -= 2;
Packit 5c3484
	  n_measurements = MAX (n_measurements, MIN_REPS);
Packit 5c3484
Packit 5c3484
	  if (t1 < t0)
Packit 5c3484
	    {
Packit 5c3484
	      best_k = k;
Packit 5c3484
	      t0 = t1;
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      n1 = mpn_fft_next_size (prev_n1, best_k);
Packit 5c3484
Packit 5c3484
      if (last_best_k != best_k)
Packit 5c3484
	{
Packit 5c3484
	  ASSERT_ALWAYS ((prev_n1 & ((1ul << last_best_k) - 1)) == 1);
Packit 5c3484
Packit 5c3484
	  if (idx >= FFT_TABLE3_SIZE)
Packit 5c3484
	    {
Packit 5c3484
	      printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n");
Packit 5c3484
	      abort ();
Packit 5c3484
	    }
Packit 5c3484
	  INSERT_FFTTAB (idx, prev_n1 >> last_best_k, best_k);
Packit 5c3484
Packit 5c3484
	  if (print)
Packit 5c3484
	    {
Packit 5c3484
	      printf (", ");
Packit 5c3484
	      if (idx % 4 == 0)
Packit 5c3484
		printf ("\\\n    ");
Packit 5c3484
	      printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k);
Packit 5c3484
	    }
Packit 5c3484
Packit 5c3484
	  if (option_trace >= 2)
Packit 5c3484
	    {
Packit 5c3484
	      printf ("{%lu,%u}\n", prev_n1, best_k);
Packit 5c3484
	      fflush (stdout);
Packit 5c3484
	    }
Packit 5c3484
Packit 5c3484
	  last_best_k = best_k;
Packit 5c3484
	  idx++;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      for (;;)
Packit 5c3484
	{
Packit 5c3484
	  prev_n1 = n1;
Packit 5c3484
	  prev_eff = fftfill (prev_n1, best_k, p->sqr);
Packit 5c3484
	  n1 = mpn_fft_next_size (prev_n1 + 1, best_k);
Packit 5c3484
	  eff = fftfill (n1, best_k, p->sqr);
Packit 5c3484
Packit 5c3484
	  if (eff != prev_eff)
Packit 5c3484
	    break;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      n = prev_n1;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  kmax = sizeof (mp_size_t) * 4;	/* GMP_MP_SIZE_T_BITS / 2 */
Packit 5c3484
  kmax = MIN (kmax, 25-1);
Packit 5c3484
  for (k = last_best_k + 1; k <= kmax; k++)
Packit 5c3484
    {
Packit 5c3484
      if (idx >= FFT_TABLE3_SIZE)
Packit 5c3484
	{
Packit 5c3484
	  printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n");
Packit 5c3484
	  abort ();
Packit 5c3484
	}
Packit 5c3484
      INSERT_FFTTAB (idx, ((1ul << (2*k-2)) + 1) >> (k-1), k);
Packit 5c3484
Packit 5c3484
      if (print)
Packit 5c3484
	{
Packit 5c3484
	  printf (", ");
Packit 5c3484
	  if (idx % 4 == 0)
Packit 5c3484
	    printf ("\\\n    ");
Packit 5c3484
	  printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k);
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      idx++;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (print)
Packit 5c3484
    printf (" }\n");
Packit 5c3484
Packit 5c3484
  free (ap);
Packit 5c3484
  if (! p->sqr)
Packit 5c3484
    free (bp);
Packit 5c3484
  free (rp);
Packit 5c3484
Packit 5c3484
  return idx;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
fft (struct fft_param_t *p)
Packit 5c3484
{
Packit 5c3484
  mp_size_t  size;
Packit 5c3484
  int        k, idx, initial_k;
Packit 5c3484
Packit 5c3484
  /*** Generate MUL_FFT_MODF_THRESHOLD / SQR_FFT_MODF_THRESHOLD ***/
Packit 5c3484
Packit 5c3484
#if 1
Packit 5c3484
  {
Packit 5c3484
    /* Use plain one() mechanism, for some reasonable initial values of k.  The
Packit 5c3484
       advantage is that we don't depend on mpn_fft_table3, which can therefore
Packit 5c3484
       leave it completely uninitialized.  */
Packit 5c3484
Packit 5c3484
    static struct param_t param;
Packit 5c3484
    mp_size_t thres, best_thres;
Packit 5c3484
    int best_k;
Packit 5c3484
    char buf[20];
Packit 5c3484
Packit 5c3484
    best_thres = MP_SIZE_T_MAX;
Packit 5c3484
    best_k = -1;
Packit 5c3484
Packit 5c3484
    for (k = 5; k <= 7; k++)
Packit 5c3484
      {
Packit 5c3484
	param.name = p->modf_threshold_name;
Packit 5c3484
	param.min_size = 100;
Packit 5c3484
	param.max_size = 2000;
Packit 5c3484
	param.function  = p->mul_function;
Packit 5c3484
	param.step_factor = 0.0;
Packit 5c3484
	param.step = 4;
Packit 5c3484
	param.function2 = p->mul_modf_function;
Packit 5c3484
	param.noprint = 1;
Packit 5c3484
	s.r = k;
Packit 5c3484
	one (&thres, ¶m;;
Packit 5c3484
	if (thres < best_thres)
Packit 5c3484
	  {
Packit 5c3484
	    best_thres = thres;
Packit 5c3484
	    best_k = k;
Packit 5c3484
	  }
Packit 5c3484
      }
Packit 5c3484
Packit 5c3484
    *(p->p_modf_threshold) = best_thres;
Packit 5c3484
    sprintf (buf, "k = %d", best_k);
Packit 5c3484
    print_define_remark (p->modf_threshold_name, best_thres, buf);
Packit 5c3484
    initial_k = best_k;
Packit 5c3484
  }
Packit 5c3484
#else
Packit 5c3484
  size = p->first_size;
Packit 5c3484
  for (;;)
Packit 5c3484
    {
Packit 5c3484
      double  tk, tm;
Packit 5c3484
Packit 5c3484
      size = mpn_fft_next_size (size+1, mpn_fft_best_k (size+1, p->sqr));
Packit 5c3484
      k = mpn_fft_best_k (size, p->sqr);
Packit 5c3484
Packit 5c3484
      if (size >= p->max_size)
Packit 5c3484
        break;
Packit 5c3484
Packit 5c3484
      s.size = size + fft_step_size (k) / 2;
Packit 5c3484
      s.r = k;
Packit 5c3484
      tk = tuneup_measure (p->mul_modf_function, NULL, &s);
Packit 5c3484
      if (tk == -1.0)
Packit 5c3484
        abort ();
Packit 5c3484
Packit 5c3484
      tm = tuneup_measure (p->mul_function, NULL, &s);
Packit 5c3484
      if (tm == -1.0)
Packit 5c3484
        abort ();
Packit 5c3484
Packit 5c3484
      if (option_trace >= 2)
Packit 5c3484
        printf ("at %ld   size=%ld  k=%d  %.9f   size=%ld modf %.9f\n",
Packit 5c3484
                (long) size,
Packit 5c3484
                (long) size + fft_step_size (k) / 2, k, tk,
Packit 5c3484
                (long) s.size, tm);
Packit 5c3484
Packit 5c3484
      if (tk < tm)
Packit 5c3484
        {
Packit 5c3484
	  *p->p_modf_threshold = s.size;
Packit 5c3484
	  print_define (p->modf_threshold_name, *p->p_modf_threshold);
Packit 5c3484
	  break;
Packit 5c3484
        }
Packit 5c3484
    }
Packit 5c3484
  initial_k = ?;
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  /*** Generate MUL_FFT_TABLE3 / SQR_FFT_TABLE3 ***/
Packit 5c3484
Packit 5c3484
  idx = fftmes (*p->p_modf_threshold, p->max_size, initial_k, p, 0, 1);
Packit 5c3484
  printf ("#define %s_SIZE %d\n", p->table_name, idx);
Packit 5c3484
Packit 5c3484
  /*** Generate MUL_FFT_THRESHOLD / SQR_FFT_THRESHOLD ***/
Packit 5c3484
Packit 5c3484
  size = 2 * *p->p_modf_threshold;	/* OK? */
Packit 5c3484
  for (;;)
Packit 5c3484
    {
Packit 5c3484
      double  tk, tm;
Packit 5c3484
      mp_size_t mulmod_size, mul_size;;
Packit 5c3484
Packit 5c3484
      if (size >= p->max_size)
Packit 5c3484
        break;
Packit 5c3484
Packit 5c3484
      mulmod_size = mpn_mulmod_bnm1_next_size (2 * (size + 1)) / 2;
Packit 5c3484
      mul_size = (size + mulmod_size) / 2;	/* middle of step */
Packit 5c3484
Packit 5c3484
      s.size = mulmod_size;
Packit 5c3484
      tk = tuneup_measure (p->function, NULL, &s);
Packit 5c3484
      if (tk == -1.0)
Packit 5c3484
        abort ();
Packit 5c3484
Packit 5c3484
      s.size = mul_size;
Packit 5c3484
      tm = tuneup_measure (p->mul_function, NULL, &s);
Packit 5c3484
      if (tm == -1.0)
Packit 5c3484
        abort ();
Packit 5c3484
Packit 5c3484
      if (option_trace >= 2)
Packit 5c3484
        printf ("at %ld   size=%ld  %.9f   size=%ld mul %.9f\n",
Packit 5c3484
                (long) size,
Packit 5c3484
                (long) mulmod_size, tk,
Packit 5c3484
                (long) mul_size, tm);
Packit 5c3484
Packit 5c3484
      size = mulmod_size;
Packit 5c3484
Packit 5c3484
      if (tk < tm)
Packit 5c3484
        {
Packit 5c3484
	  *p->p_threshold = s.size;
Packit 5c3484
	  print_define (p->threshold_name, *p->p_threshold);
Packit 5c3484
	  break;
Packit 5c3484
        }
Packit 5c3484
    }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Start karatsuba from 4, since the Cray t90 ieee code is much faster at 2,
Packit 5c3484
   giving wrong results.  */
Packit 5c3484
void
Packit 5c3484
tune_mul_n (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
  mp_size_t next_toom_start;
Packit 5c3484
  int something_changed;
Packit 5c3484
Packit 5c3484
  param.function = speed_mpn_mul_n;
Packit 5c3484
Packit 5c3484
  param.name = "MUL_TOOM22_THRESHOLD";
Packit 5c3484
  param.min_size = MAX (4, MPN_TOOM22_MUL_MINSIZE);
Packit 5c3484
  param.max_size = MUL_TOOM22_THRESHOLD_LIMIT-1;
Packit 5c3484
  one (&mul_toom22_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
  param.noprint = 1;
Packit 5c3484
Packit 5c3484
  /* Threshold sequence loop.  Disable functions that would be used in a very
Packit 5c3484
     narrow range, re-measuring things when that happens.  */
Packit 5c3484
  something_changed = 1;
Packit 5c3484
  while (something_changed)
Packit 5c3484
    {
Packit 5c3484
      something_changed = 0;
Packit 5c3484
Packit 5c3484
	next_toom_start = mul_toom22_threshold;
Packit 5c3484
Packit 5c3484
	if (mul_toom33_threshold != 0)
Packit 5c3484
	  {
Packit 5c3484
	    param.name = "MUL_TOOM33_THRESHOLD";
Packit 5c3484
	    param.min_size = MAX (next_toom_start, MPN_TOOM33_MUL_MINSIZE);
Packit 5c3484
	    param.max_size = MUL_TOOM33_THRESHOLD_LIMIT-1;
Packit 5c3484
	    one (&mul_toom33_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
	    if (next_toom_start * 1.05 >= mul_toom33_threshold)
Packit 5c3484
	      {
Packit 5c3484
		mul_toom33_threshold = 0;
Packit 5c3484
		something_changed = 1;
Packit 5c3484
	      }
Packit 5c3484
	  }
Packit 5c3484
Packit 5c3484
	next_toom_start = MAX (next_toom_start, mul_toom33_threshold);
Packit 5c3484
Packit 5c3484
	if (mul_toom44_threshold != 0)
Packit 5c3484
	  {
Packit 5c3484
	    param.name = "MUL_TOOM44_THRESHOLD";
Packit 5c3484
	    param.min_size = MAX (next_toom_start, MPN_TOOM44_MUL_MINSIZE);
Packit 5c3484
	    param.max_size = MUL_TOOM44_THRESHOLD_LIMIT-1;
Packit 5c3484
	    one (&mul_toom44_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
	    if (next_toom_start * 1.05 >= mul_toom44_threshold)
Packit 5c3484
	      {
Packit 5c3484
		mul_toom44_threshold = 0;
Packit 5c3484
		something_changed = 1;
Packit 5c3484
	      }
Packit 5c3484
	  }
Packit 5c3484
Packit 5c3484
	next_toom_start = MAX (next_toom_start, mul_toom44_threshold);
Packit 5c3484
Packit 5c3484
	if (mul_toom6h_threshold != 0)
Packit 5c3484
	  {
Packit 5c3484
	    param.name = "MUL_TOOM6H_THRESHOLD";
Packit 5c3484
	    param.min_size = MAX (next_toom_start, MPN_TOOM6H_MUL_MINSIZE);
Packit 5c3484
	    param.max_size = MUL_TOOM6H_THRESHOLD_LIMIT-1;
Packit 5c3484
	    one (&mul_toom6h_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
	    if (next_toom_start * 1.05 >= mul_toom6h_threshold)
Packit 5c3484
	      {
Packit 5c3484
		mul_toom6h_threshold = 0;
Packit 5c3484
		something_changed = 1;
Packit 5c3484
	      }
Packit 5c3484
	  }
Packit 5c3484
Packit 5c3484
	next_toom_start = MAX (next_toom_start, mul_toom6h_threshold);
Packit 5c3484
Packit 5c3484
	if (mul_toom8h_threshold != 0)
Packit 5c3484
	  {
Packit 5c3484
	    param.name = "MUL_TOOM8H_THRESHOLD";
Packit 5c3484
	    param.min_size = MAX (next_toom_start, MPN_TOOM8H_MUL_MINSIZE);
Packit 5c3484
	    param.max_size = MUL_TOOM8H_THRESHOLD_LIMIT-1;
Packit 5c3484
	    one (&mul_toom8h_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
	    if (next_toom_start * 1.05 >= mul_toom8h_threshold)
Packit 5c3484
	      {
Packit 5c3484
		mul_toom8h_threshold = 0;
Packit 5c3484
		something_changed = 1;
Packit 5c3484
	      }
Packit 5c3484
	  }
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
    print_define ("MUL_TOOM33_THRESHOLD", MUL_TOOM33_THRESHOLD);
Packit 5c3484
    print_define ("MUL_TOOM44_THRESHOLD", MUL_TOOM44_THRESHOLD);
Packit 5c3484
    print_define ("MUL_TOOM6H_THRESHOLD", MUL_TOOM6H_THRESHOLD);
Packit 5c3484
    print_define ("MUL_TOOM8H_THRESHOLD", MUL_TOOM8H_THRESHOLD);
Packit 5c3484
Packit 5c3484
  /* disabled until tuned */
Packit 5c3484
  MUL_FFT_THRESHOLD = MP_SIZE_T_MAX;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_mul (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
  mp_size_t thres;
Packit 5c3484
Packit 5c3484
  param.noprint = 1;
Packit 5c3484
Packit 5c3484
  param.function = speed_mpn_toom32_for_toom43_mul;
Packit 5c3484
  param.function2 = speed_mpn_toom43_for_toom32_mul;
Packit 5c3484
  param.name = "MUL_TOOM32_TO_TOOM43_THRESHOLD";
Packit 5c3484
  param.min_size = MPN_TOOM43_MUL_MINSIZE * 24 / 17;
Packit 5c3484
  one (&thres, ¶m;;
Packit 5c3484
  mul_toom32_to_toom43_threshold = thres * 17 / 24;
Packit 5c3484
  print_define ("MUL_TOOM32_TO_TOOM43_THRESHOLD", mul_toom32_to_toom43_threshold);
Packit 5c3484
Packit 5c3484
  param.function = speed_mpn_toom32_for_toom53_mul;
Packit 5c3484
  param.function2 = speed_mpn_toom53_for_toom32_mul;
Packit 5c3484
  param.name = "MUL_TOOM32_TO_TOOM53_THRESHOLD";
Packit 5c3484
  param.min_size = MPN_TOOM53_MUL_MINSIZE * 30 / 19;
Packit 5c3484
  one (&thres, ¶m;;
Packit 5c3484
  mul_toom32_to_toom53_threshold = thres * 19 / 30;
Packit 5c3484
  print_define ("MUL_TOOM32_TO_TOOM53_THRESHOLD", mul_toom32_to_toom53_threshold);
Packit 5c3484
Packit 5c3484
  param.function = speed_mpn_toom42_for_toom53_mul;
Packit 5c3484
  param.function2 = speed_mpn_toom53_for_toom42_mul;
Packit 5c3484
  param.name = "MUL_TOOM42_TO_TOOM53_THRESHOLD";
Packit 5c3484
  param.min_size = MPN_TOOM53_MUL_MINSIZE * 20 / 11;
Packit 5c3484
  one (&thres, ¶m;;
Packit 5c3484
  mul_toom42_to_toom53_threshold = thres * 11 / 20;
Packit 5c3484
  print_define ("MUL_TOOM42_TO_TOOM53_THRESHOLD", mul_toom42_to_toom53_threshold);
Packit 5c3484
Packit 5c3484
  param.function = speed_mpn_toom42_mul;
Packit 5c3484
  param.function2 = speed_mpn_toom63_mul;
Packit 5c3484
  param.name = "MUL_TOOM42_TO_TOOM63_THRESHOLD";
Packit 5c3484
  param.min_size = MPN_TOOM63_MUL_MINSIZE * 2;
Packit 5c3484
  one (&thres, ¶m;;
Packit 5c3484
  mul_toom42_to_toom63_threshold = thres / 2;
Packit 5c3484
  print_define ("MUL_TOOM42_TO_TOOM63_THRESHOLD", mul_toom42_to_toom63_threshold);
Packit 5c3484
Packit 5c3484
  /* Use ratio 5/6 when measuring, the middle of the range 2/3 to 1. */
Packit 5c3484
  param.function = speed_mpn_toom43_for_toom54_mul;
Packit 5c3484
  param.function2 = speed_mpn_toom54_for_toom43_mul;
Packit 5c3484
  param.name = "MUL_TOOM43_TO_TOOM54_THRESHOLD";
Packit 5c3484
  param.min_size = MPN_TOOM54_MUL_MINSIZE * 6 / 5;
Packit 5c3484
  one (&thres, ¶m;;
Packit 5c3484
  mul_toom43_to_toom54_threshold = thres * 5 / 6;
Packit 5c3484
  print_define ("MUL_TOOM43_TO_TOOM54_THRESHOLD", mul_toom43_to_toom54_threshold);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_mullo (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
Packit 5c3484
  param.function = speed_mpn_mullo_n;
Packit 5c3484
Packit 5c3484
  param.name = "MULLO_BASECASE_THRESHOLD";
Packit 5c3484
  param.min_size = 1;
Packit 5c3484
  param.min_is_always = 1;
Packit 5c3484
  param.max_size = MULLO_BASECASE_THRESHOLD_LIMIT-1;
Packit 5c3484
  param.stop_factor = 1.5;
Packit 5c3484
  param.noprint = 1;
Packit 5c3484
  one (&mullo_basecase_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
  param.name = "MULLO_DC_THRESHOLD";
Packit 5c3484
  param.min_size = 8;
Packit 5c3484
  param.min_is_always = 0;
Packit 5c3484
  param.max_size = 1000;
Packit 5c3484
  one (&mullo_dc_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
  if (mullo_basecase_threshold >= mullo_dc_threshold)
Packit 5c3484
    {
Packit 5c3484
      print_define ("MULLO_BASECASE_THRESHOLD", mullo_dc_threshold);
Packit 5c3484
      print_define_remark ("MULLO_DC_THRESHOLD", 0, "never mpn_mullo_basecase");
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      print_define ("MULLO_BASECASE_THRESHOLD", mullo_basecase_threshold);
Packit 5c3484
      print_define ("MULLO_DC_THRESHOLD", mullo_dc_threshold);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (WANT_FFT && mul_fft_threshold < MP_SIZE_T_MAX / 2)
Packit 5c3484
    {
Packit 5c3484
      param.name = "MULLO_MUL_N_THRESHOLD";
Packit 5c3484
      param.min_size = mullo_dc_threshold;
Packit 5c3484
      param.max_size = 2 * mul_fft_threshold;
Packit 5c3484
      param.noprint = 0;
Packit 5c3484
      param.step_factor = 0.03;
Packit 5c3484
      one (&mullo_mul_n_threshold, ¶m;;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    print_define_remark ("MULLO_MUL_N_THRESHOLD", MP_SIZE_T_MAX,
Packit 5c3484
			 "without FFT use mullo forever");
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_sqrlo (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
Packit 5c3484
  param.function = speed_mpn_sqrlo;
Packit 5c3484
Packit 5c3484
  param.name = "SQRLO_BASECASE_THRESHOLD";
Packit 5c3484
  param.min_size = 1;
Packit 5c3484
  param.min_is_always = 1;
Packit 5c3484
  param.max_size = SQRLO_BASECASE_THRESHOLD_LIMIT-1;
Packit 5c3484
  param.stop_factor = 1.5;
Packit 5c3484
  param.noprint = 1;
Packit 5c3484
  one (&sqrlo_basecase_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
  param.name = "SQRLO_DC_THRESHOLD";
Packit 5c3484
  param.min_size = 8;
Packit 5c3484
  param.min_is_always = 0;
Packit 5c3484
  param.max_size = SQRLO_DC_THRESHOLD_LIMIT-1;
Packit 5c3484
  one (&sqrlo_dc_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
  if (sqrlo_basecase_threshold >= sqrlo_dc_threshold)
Packit 5c3484
    {
Packit 5c3484
      print_define ("SQRLO_BASECASE_THRESHOLD", sqrlo_dc_threshold);
Packit 5c3484
      print_define_remark ("SQRLO_DC_THRESHOLD", 0, "never mpn_sqrlo_basecase");
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      print_define ("SQRLO_BASECASE_THRESHOLD", sqrlo_basecase_threshold);
Packit 5c3484
      print_define ("SQRLO_DC_THRESHOLD", sqrlo_dc_threshold);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (WANT_FFT && sqr_fft_threshold < MP_SIZE_T_MAX / 2)
Packit 5c3484
    {
Packit 5c3484
      param.name = "SQRLO_SQR_THRESHOLD";
Packit 5c3484
      param.min_size = sqrlo_dc_threshold;
Packit 5c3484
      param.max_size = 2 * sqr_fft_threshold;
Packit 5c3484
      param.noprint = 0;
Packit 5c3484
      param.step_factor = 0.03;
Packit 5c3484
      one (&sqrlo_sqr_threshold, ¶m;;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    print_define_remark ("SQRLO_SQR_THRESHOLD", MP_SIZE_T_MAX,
Packit 5c3484
			 "without FFT use sqrlo forever");
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_mulmid (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
Packit 5c3484
  param.name = "MULMID_TOOM42_THRESHOLD";
Packit 5c3484
  param.function = speed_mpn_mulmid_n;
Packit 5c3484
  param.min_size = 4;
Packit 5c3484
  param.max_size = 100;
Packit 5c3484
  one (&mulmid_toom42_threshold, ¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_mulmod_bnm1 (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
Packit 5c3484
  param.name = "MULMOD_BNM1_THRESHOLD";
Packit 5c3484
  param.function = speed_mpn_mulmod_bnm1;
Packit 5c3484
  param.min_size = 4;
Packit 5c3484
  param.max_size = 100;
Packit 5c3484
  one (&mulmod_bnm1_threshold, ¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_sqrmod_bnm1 (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
Packit 5c3484
  param.name = "SQRMOD_BNM1_THRESHOLD";
Packit 5c3484
  param.function = speed_mpn_sqrmod_bnm1;
Packit 5c3484
  param.min_size = 4;
Packit 5c3484
  param.max_size = 100;
Packit 5c3484
  one (&sqrmod_bnm1_threshold, ¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Start the basecase from 3, since 1 is a special case, and if mul_basecase
Packit 5c3484
   is faster only at size==2 then we don't want to bother with extra code
Packit 5c3484
   just for that.  Start karatsuba from 4 same as MUL above.  */
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_sqr (void)
Packit 5c3484
{
Packit 5c3484
  /* disabled until tuned */
Packit 5c3484
  SQR_FFT_THRESHOLD = MP_SIZE_T_MAX;
Packit 5c3484
Packit 5c3484
  if (HAVE_NATIVE_mpn_sqr_basecase)
Packit 5c3484
    {
Packit 5c3484
      print_define_remark ("SQR_BASECASE_THRESHOLD", 0, "always (native)");
Packit 5c3484
      sqr_basecase_threshold = 0;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      static struct param_t  param;
Packit 5c3484
      param.name = "SQR_BASECASE_THRESHOLD";
Packit 5c3484
      param.function = speed_mpn_sqr;
Packit 5c3484
      param.min_size = 3;
Packit 5c3484
      param.min_is_always = 1;
Packit 5c3484
      param.max_size = TUNE_SQR_TOOM2_MAX;
Packit 5c3484
      param.noprint = 1;
Packit 5c3484
      one (&sqr_basecase_threshold, ¶m;;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "SQR_TOOM2_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_sqr;
Packit 5c3484
    param.min_size = MAX (4, MPN_TOOM2_SQR_MINSIZE);
Packit 5c3484
    param.max_size = TUNE_SQR_TOOM2_MAX;
Packit 5c3484
    param.noprint = 1;
Packit 5c3484
    one (&sqr_toom2_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
    if (! HAVE_NATIVE_mpn_sqr_basecase
Packit 5c3484
        && sqr_toom2_threshold < sqr_basecase_threshold)
Packit 5c3484
      {
Packit 5c3484
        /* Karatsuba becomes faster than mul_basecase before
Packit 5c3484
           sqr_basecase does.  Arrange for the expression
Packit 5c3484
           "BELOW_THRESHOLD (un, SQR_TOOM2_THRESHOLD))" which
Packit 5c3484
           selects mpn_sqr_basecase in mpn_sqr to be false, by setting
Packit 5c3484
           SQR_TOOM2_THRESHOLD to zero, making
Packit 5c3484
           SQR_BASECASE_THRESHOLD the toom2 threshold.  */
Packit 5c3484
Packit 5c3484
        sqr_basecase_threshold = SQR_TOOM2_THRESHOLD;
Packit 5c3484
        SQR_TOOM2_THRESHOLD = 0;
Packit 5c3484
Packit 5c3484
        print_define_remark ("SQR_BASECASE_THRESHOLD", sqr_basecase_threshold,
Packit 5c3484
                             "toom2");
Packit 5c3484
        print_define_remark ("SQR_TOOM2_THRESHOLD",SQR_TOOM2_THRESHOLD,
Packit 5c3484
                             "never sqr_basecase");
Packit 5c3484
      }
Packit 5c3484
    else
Packit 5c3484
      {
Packit 5c3484
        if (! HAVE_NATIVE_mpn_sqr_basecase)
Packit 5c3484
          print_define ("SQR_BASECASE_THRESHOLD", sqr_basecase_threshold);
Packit 5c3484
        print_define ("SQR_TOOM2_THRESHOLD", SQR_TOOM2_THRESHOLD);
Packit 5c3484
      }
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    mp_size_t next_toom_start;
Packit 5c3484
    int something_changed;
Packit 5c3484
Packit 5c3484
    param.function = speed_mpn_sqr;
Packit 5c3484
    param.noprint = 1;
Packit 5c3484
Packit 5c3484
  /* Threshold sequence loop.  Disable functions that would be used in a very
Packit 5c3484
     narrow range, re-measuring things when that happens.  */
Packit 5c3484
    something_changed = 1;
Packit 5c3484
    while (something_changed)
Packit 5c3484
      {
Packit 5c3484
	something_changed = 0;
Packit 5c3484
Packit 5c3484
	next_toom_start = MAX (sqr_toom2_threshold, sqr_basecase_threshold);
Packit 5c3484
Packit 5c3484
	sqr_toom3_threshold = SQR_TOOM3_THRESHOLD_LIMIT;
Packit 5c3484
	param.name = "SQR_TOOM3_THRESHOLD";
Packit 5c3484
	param.min_size = MAX (next_toom_start, MPN_TOOM3_SQR_MINSIZE);
Packit 5c3484
	param.max_size = SQR_TOOM3_THRESHOLD_LIMIT-1;
Packit 5c3484
	one (&sqr_toom3_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
	next_toom_start = MAX (next_toom_start, sqr_toom3_threshold);
Packit 5c3484
Packit 5c3484
	if (sqr_toom4_threshold != 0)
Packit 5c3484
	  {
Packit 5c3484
	    param.name = "SQR_TOOM4_THRESHOLD";
Packit 5c3484
	    sqr_toom4_threshold = SQR_TOOM4_THRESHOLD_LIMIT;
Packit 5c3484
	    param.min_size = MAX (next_toom_start, MPN_TOOM4_SQR_MINSIZE);
Packit 5c3484
	    param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1;
Packit 5c3484
	    one (&sqr_toom4_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
	    if (next_toom_start * 1.05 >= sqr_toom4_threshold)
Packit 5c3484
	      {
Packit 5c3484
		sqr_toom4_threshold = 0;
Packit 5c3484
		something_changed = 1;
Packit 5c3484
	      }
Packit 5c3484
	  }
Packit 5c3484
Packit 5c3484
	next_toom_start = MAX (next_toom_start, sqr_toom4_threshold);
Packit 5c3484
Packit 5c3484
	if (sqr_toom6_threshold != 0)
Packit 5c3484
	  {
Packit 5c3484
	    param.name = "SQR_TOOM6_THRESHOLD";
Packit 5c3484
	    sqr_toom6_threshold = SQR_TOOM6_THRESHOLD_LIMIT;
Packit 5c3484
	    param.min_size = MAX (next_toom_start, MPN_TOOM6_SQR_MINSIZE);
Packit 5c3484
	    param.max_size = SQR_TOOM6_THRESHOLD_LIMIT-1;
Packit 5c3484
	    one (&sqr_toom6_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
	    if (next_toom_start * 1.05 >= sqr_toom6_threshold)
Packit 5c3484
	      {
Packit 5c3484
		sqr_toom6_threshold = 0;
Packit 5c3484
		something_changed = 1;
Packit 5c3484
	      }
Packit 5c3484
	  }
Packit 5c3484
Packit 5c3484
	next_toom_start = MAX (next_toom_start, sqr_toom6_threshold);
Packit 5c3484
Packit 5c3484
	if (sqr_toom8_threshold != 0)
Packit 5c3484
	  {
Packit 5c3484
	    param.name = "SQR_TOOM8_THRESHOLD";
Packit 5c3484
	    sqr_toom8_threshold = SQR_TOOM8_THRESHOLD_LIMIT;
Packit 5c3484
	    param.min_size = MAX (next_toom_start, MPN_TOOM8_SQR_MINSIZE);
Packit 5c3484
	    param.max_size = SQR_TOOM8_THRESHOLD_LIMIT-1;
Packit 5c3484
	    one (&sqr_toom8_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
	    if (next_toom_start * 1.05 >= sqr_toom8_threshold)
Packit 5c3484
	      {
Packit 5c3484
		sqr_toom8_threshold = 0;
Packit 5c3484
		something_changed = 1;
Packit 5c3484
	      }
Packit 5c3484
	  }
Packit 5c3484
      }
Packit 5c3484
Packit 5c3484
    print_define ("SQR_TOOM3_THRESHOLD", SQR_TOOM3_THRESHOLD);
Packit 5c3484
    print_define ("SQR_TOOM4_THRESHOLD", SQR_TOOM4_THRESHOLD);
Packit 5c3484
    print_define ("SQR_TOOM6_THRESHOLD", SQR_TOOM6_THRESHOLD);
Packit 5c3484
    print_define ("SQR_TOOM8_THRESHOLD", SQR_TOOM8_THRESHOLD);
Packit 5c3484
  }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_dc_div (void)
Packit 5c3484
{
Packit 5c3484
  s.r = 0;		/* clear to make speed function do 2n/n */
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "DC_DIV_QR_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_sbpi1_div_qr;
Packit 5c3484
    param.function2 = speed_mpn_dcpi1_div_qr;
Packit 5c3484
    param.min_size = 6;
Packit 5c3484
    one (&dc_div_qr_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "DC_DIVAPPR_Q_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_sbpi1_divappr_q;
Packit 5c3484
    param.function2 = speed_mpn_dcpi1_divappr_q;
Packit 5c3484
    param.min_size = 6;
Packit 5c3484
    one (&dc_divappr_q_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
static double
Packit 5c3484
speed_mpn_sbordcpi1_div_qr (struct speed_params *s)
Packit 5c3484
{
Packit 5c3484
  if (s->size < DC_DIV_QR_THRESHOLD)
Packit 5c3484
    return speed_mpn_sbpi1_div_qr (s);
Packit 5c3484
  else
Packit 5c3484
    return speed_mpn_dcpi1_div_qr (s);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_mu_div (void)
Packit 5c3484
{
Packit 5c3484
  s.r = 0;		/* clear to make speed function do 2n/n */
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "MU_DIV_QR_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_dcpi1_div_qr;
Packit 5c3484
    param.function2 = speed_mpn_mu_div_qr;
Packit 5c3484
    param.min_size = mul_toom22_threshold;
Packit 5c3484
    param.max_size = 5000;
Packit 5c3484
    param.step_factor = 0.02;
Packit 5c3484
    one (&mu_div_qr_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "MU_DIVAPPR_Q_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_dcpi1_divappr_q;
Packit 5c3484
    param.function2 = speed_mpn_mu_divappr_q;
Packit 5c3484
    param.min_size = mul_toom22_threshold;
Packit 5c3484
    param.max_size = 5000;
Packit 5c3484
    param.step_factor = 0.02;
Packit 5c3484
    one (&mu_divappr_q_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "MUPI_DIV_QR_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_sbordcpi1_div_qr;
Packit 5c3484
    param.function2 = speed_mpn_mupi_div_qr;
Packit 5c3484
    param.min_size = 6;
Packit 5c3484
    param.min_is_always = 1;
Packit 5c3484
    param.max_size = 1000;
Packit 5c3484
    param.step_factor = 0.02;
Packit 5c3484
    one (&mupi_div_qr_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_dc_bdiv (void)
Packit 5c3484
{
Packit 5c3484
  s.r = 0;		/* clear to make speed function do 2n/n*/
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "DC_BDIV_QR_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_sbpi1_bdiv_qr;
Packit 5c3484
    param.function2 = speed_mpn_dcpi1_bdiv_qr;
Packit 5c3484
    param.min_size = 4;
Packit 5c3484
    one (&dc_bdiv_qr_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "DC_BDIV_Q_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_sbpi1_bdiv_q;
Packit 5c3484
    param.function2 = speed_mpn_dcpi1_bdiv_q;
Packit 5c3484
    param.min_size = 4;
Packit 5c3484
    one (&dc_bdiv_q_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_mu_bdiv (void)
Packit 5c3484
{
Packit 5c3484
  s.r = 0;		/* clear to make speed function do 2n/n*/
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "MU_BDIV_QR_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_dcpi1_bdiv_qr;
Packit 5c3484
    param.function2 = speed_mpn_mu_bdiv_qr;
Packit 5c3484
    param.min_size = dc_bdiv_qr_threshold;
Packit 5c3484
    param.max_size = 5000;
Packit 5c3484
    param.step_factor = 0.02;
Packit 5c3484
    one (&mu_bdiv_qr_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "MU_BDIV_Q_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_dcpi1_bdiv_q;
Packit 5c3484
    param.function2 = speed_mpn_mu_bdiv_q;
Packit 5c3484
    param.min_size = dc_bdiv_q_threshold;
Packit 5c3484
    param.max_size = 5000;
Packit 5c3484
    param.step_factor = 0.02;
Packit 5c3484
    one (&mu_bdiv_q_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_invertappr (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
Packit 5c3484
  param.function = speed_mpn_ni_invertappr;
Packit 5c3484
  param.name = "INV_MULMOD_BNM1_THRESHOLD";
Packit 5c3484
  param.min_size = 5;
Packit 5c3484
  one (&inv_mulmod_bnm1_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
  param.function = speed_mpn_invertappr;
Packit 5c3484
  param.name = "INV_NEWTON_THRESHOLD";
Packit 5c3484
  param.min_size = 5;
Packit 5c3484
  one (&inv_newton_threshold, ¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_invert (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
Packit 5c3484
  param.function = speed_mpn_invert;
Packit 5c3484
  param.name = "INV_APPR_THRESHOLD";
Packit 5c3484
  param.min_size = 5;
Packit 5c3484
  one (&inv_appr_threshold, ¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_binvert (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
Packit 5c3484
  param.function = speed_mpn_binvert;
Packit 5c3484
  param.name = "BINV_NEWTON_THRESHOLD";
Packit 5c3484
  param.min_size = 8;		/* pointless with smaller operands */
Packit 5c3484
  one (&binv_newton_threshold, ¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_redc (void)
Packit 5c3484
{
Packit 5c3484
#define TUNE_REDC_2_MAX 100
Packit 5c3484
#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
Packit 5c3484
#define WANT_REDC_2 1
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if WANT_REDC_2
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "REDC_1_TO_REDC_2_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_redc_1;
Packit 5c3484
    param.function2 = speed_mpn_redc_2;
Packit 5c3484
    param.min_size = 1;
Packit 5c3484
    param.min_is_always = 1;
Packit 5c3484
    param.max_size = TUNE_REDC_2_MAX;
Packit 5c3484
    param.noprint = 1;
Packit 5c3484
    param.stop_factor = 1.5;
Packit 5c3484
    one (&redc_1_to_redc_2_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "REDC_2_TO_REDC_N_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_redc_2;
Packit 5c3484
    param.function2 = speed_mpn_redc_n;
Packit 5c3484
    param.min_size = 16;
Packit 5c3484
    param.noprint = 1;
Packit 5c3484
    one (&redc_2_to_redc_n_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
  if (redc_1_to_redc_2_threshold >= redc_2_to_redc_n_threshold)
Packit 5c3484
    {
Packit 5c3484
      redc_2_to_redc_n_threshold = 0;	/* disable redc_2 */
Packit 5c3484
Packit 5c3484
      /* Never use redc2, measure redc_1 -> redc_n cutoff, store result as
Packit 5c3484
	 REDC_1_TO_REDC_2_THRESHOLD.  */
Packit 5c3484
      {
Packit 5c3484
	static struct param_t  param;
Packit 5c3484
	param.name = "REDC_1_TO_REDC_2_THRESHOLD";
Packit 5c3484
	param.function = speed_mpn_redc_1;
Packit 5c3484
	param.function2 = speed_mpn_redc_n;
Packit 5c3484
	param.min_size = 16;
Packit 5c3484
	param.noprint = 1;
Packit 5c3484
	one (&redc_1_to_redc_2_threshold, ¶m;;
Packit 5c3484
      }
Packit 5c3484
    }
Packit 5c3484
  print_define ("REDC_1_TO_REDC_2_THRESHOLD", REDC_1_TO_REDC_2_THRESHOLD);
Packit 5c3484
  print_define ("REDC_2_TO_REDC_N_THRESHOLD", REDC_2_TO_REDC_N_THRESHOLD);
Packit 5c3484
#else
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "REDC_1_TO_REDC_N_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_redc_1;
Packit 5c3484
    param.function2 = speed_mpn_redc_n;
Packit 5c3484
    param.min_size = 16;
Packit 5c3484
    one (&redc_1_to_redc_n_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
#endif
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_matrix22_mul (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
  param.name = "MATRIX22_STRASSEN_THRESHOLD";
Packit 5c3484
  param.function = speed_mpn_matrix22_mul;
Packit 5c3484
  param.min_size = 2;
Packit 5c3484
  one (&matrix22_strassen_threshold, ¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_hgcd (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
  param.name = "HGCD_THRESHOLD";
Packit 5c3484
  param.function = speed_mpn_hgcd;
Packit 5c3484
  /* We seem to get strange results for small sizes */
Packit 5c3484
  param.min_size = 30;
Packit 5c3484
  one (&hgcd_threshold, ¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_hgcd_appr (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
  param.name = "HGCD_APPR_THRESHOLD";
Packit 5c3484
  param.function = speed_mpn_hgcd_appr;
Packit 5c3484
  /* We seem to get strange results for small sizes */
Packit 5c3484
  param.min_size = 50;
Packit 5c3484
  param.stop_since_change = 150;
Packit 5c3484
  one (&hgcd_appr_threshold, ¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_hgcd_reduce (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
  param.name = "HGCD_REDUCE_THRESHOLD";
Packit 5c3484
  param.function = speed_mpn_hgcd_reduce;
Packit 5c3484
  param.min_size = 30;
Packit 5c3484
  param.max_size = 7000;
Packit 5c3484
  param.step_factor = 0.04;
Packit 5c3484
  one (&hgcd_reduce_threshold, ¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_gcd_dc (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
  param.name = "GCD_DC_THRESHOLD";
Packit 5c3484
  param.function = speed_mpn_gcd;
Packit 5c3484
  param.min_size = hgcd_threshold;
Packit 5c3484
  param.max_size = 3000;
Packit 5c3484
  param.step_factor = 0.02;
Packit 5c3484
  one (&gcd_dc_threshold, ¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_gcdext_dc (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
  param.name = "GCDEXT_DC_THRESHOLD";
Packit 5c3484
  param.function = speed_mpn_gcdext;
Packit 5c3484
  param.min_size = hgcd_threshold;
Packit 5c3484
  param.max_size = 3000;
Packit 5c3484
  param.step_factor = 0.02;
Packit 5c3484
  one (&gcdext_dc_threshold, ¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* In tune_powm_sec we compute the table used by the win_size function.  The
Packit 5c3484
   cutoff points are in exponent bits, disregarding other operand sizes.  It is
Packit 5c3484
   not possible to use the one framework since it currently uses a granularity
Packit 5c3484
   of full limbs.
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
/* This win_size replaces the variant in the powm code, allowing us to
Packit 5c3484
   control k in the k-ary algorithms.  */
Packit 5c3484
int winsize;
Packit 5c3484
int
Packit 5c3484
win_size (mp_bitcnt_t eb)
Packit 5c3484
{
Packit 5c3484
  return winsize;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_powm_sec (void)
Packit 5c3484
{
Packit 5c3484
  mp_size_t n;
Packit 5c3484
  int k, i;
Packit 5c3484
  mp_size_t itch;
Packit 5c3484
  mp_bitcnt_t nbits, nbits_next, possible_nbits_cutoff;
Packit 5c3484
  const int n_max = 3000 / GMP_NUMB_BITS;
Packit 5c3484
  const int n_measurements = 5;
Packit 5c3484
  mp_ptr rp, bp, ep, mp, tp;
Packit 5c3484
  double ttab[n_measurements], tk, tkp1;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
  TMP_MARK;
Packit 5c3484
Packit 5c3484
  possible_nbits_cutoff = 0;
Packit 5c3484
Packit 5c3484
  k = 1;
Packit 5c3484
Packit 5c3484
  winsize = 10;			/* the itch function needs this */
Packit 5c3484
  itch = mpn_sec_powm_itch (n_max, n_max * GMP_NUMB_BITS, n_max);
Packit 5c3484
Packit 5c3484
  rp = TMP_ALLOC_LIMBS (n_max);
Packit 5c3484
  bp = TMP_ALLOC_LIMBS (n_max);
Packit 5c3484
  ep = TMP_ALLOC_LIMBS (n_max);
Packit 5c3484
  mp = TMP_ALLOC_LIMBS (n_max);
Packit 5c3484
  tp = TMP_ALLOC_LIMBS (itch);
Packit 5c3484
Packit 5c3484
  mpn_random (bp, n_max);
Packit 5c3484
  mpn_random (mp, n_max);
Packit 5c3484
  mp[0] |= 1;
Packit 5c3484
Packit 5c3484
/* How about taking the M operand size into account?
Packit 5c3484
Packit 5c3484
   An operation R=powm(B,E,N) will take time O(log(E)*M(log(N))) (assuming
Packit 5c3484
   B = O(M)).
Packit 5c3484
Packit 5c3484
   Using k-ary and no sliding window, the precomputation will need time
Packit 5c3484
   O(2^(k-1)*M(log(N))) and the main computation will need O(log(E)*S(N)) +
Packit 5c3484
   O(log(E)/k*M(N)), for the squarings, multiplications, respectively.
Packit 5c3484
Packit 5c3484
   An operation R=powm_sec(B,E,N) will take time like powm.
Packit 5c3484
Packit 5c3484
   Using k-ary, the precomputation will need time O(2^k*M(log(N))) and the
Packit 5c3484
   main computation will need O(log(E)*S(N)) + O(log(E)/k*M(N)) +
Packit 5c3484
   O(log(E)/k*2^k*log(N)), for the squarings, multiplications, and full
Packit 5c3484
   table reads, respectively.  */
Packit 5c3484
Packit 5c3484
  printf ("#define POWM_SEC_TABLE  ");
Packit 5c3484
Packit 5c3484
  /* For nbits == 1, we should always use k == 1, so no need to tune
Packit 5c3484
     that. Starting with nbits == 2 also ensure that nbits always is
Packit 5c3484
     larger than the windowsize k+1. */
Packit 5c3484
  for (nbits = 2; nbits <= n_max * GMP_NUMB_BITS; )
Packit 5c3484
    {
Packit 5c3484
      n = (nbits - 1) / GMP_NUMB_BITS + 1;
Packit 5c3484
Packit 5c3484
      /* Generate E such that sliding-window for k and k+1 works equally
Packit 5c3484
	 well/poorly (but sliding is not used in powm_sec, of course). */
Packit 5c3484
      for (i = 0; i < n; i++)
Packit 5c3484
	ep[i] = ~CNST_LIMB(0);
Packit 5c3484
Packit 5c3484
      winsize = k;
Packit 5c3484
      for (i = 0; i < n_measurements; i++)
Packit 5c3484
	{
Packit 5c3484
	  speed_starttime ();
Packit 5c3484
	  mpn_sec_powm (rp, bp, n, ep, nbits, mp, n, tp);
Packit 5c3484
	  ttab[i] = speed_endtime ();
Packit 5c3484
	}
Packit 5c3484
      tk = median (ttab, n_measurements);
Packit 5c3484
Packit 5c3484
      winsize = k + 1;
Packit 5c3484
      speed_starttime ();
Packit 5c3484
      for (i = 0; i < n_measurements; i++)
Packit 5c3484
	{
Packit 5c3484
	  speed_starttime ();
Packit 5c3484
	  mpn_sec_powm (rp, bp, n, ep, nbits, mp, n, tp);
Packit 5c3484
	  ttab[i] = speed_endtime ();
Packit 5c3484
	}
Packit 5c3484
      tkp1 = median (ttab, n_measurements);
Packit 5c3484
/*
Packit 5c3484
      printf ("testing: %ld, %d", nbits, k, ep[n-1]);
Packit 5c3484
      printf ("   %10.5f  %10.5f\n", tk, tkp1);
Packit 5c3484
*/
Packit 5c3484
      if (tkp1 < tk)
Packit 5c3484
	{
Packit 5c3484
	  if (possible_nbits_cutoff)
Packit 5c3484
	    {
Packit 5c3484
	      /* Two consecutive sizes indicate k increase, obey.  */
Packit 5c3484
Packit 5c3484
	      /* Must always have x[k] >= k */
Packit 5c3484
	      ASSERT_ALWAYS (possible_nbits_cutoff >= k);
Packit 5c3484
Packit 5c3484
	      if (k > 1)
Packit 5c3484
		printf (",");
Packit 5c3484
	      printf ("%ld", (long) possible_nbits_cutoff);
Packit 5c3484
	      k++;
Packit 5c3484
	      possible_nbits_cutoff = 0;
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
	      /* One measurement indicate k increase, save nbits for further
Packit 5c3484
		 consideration.  */
Packit 5c3484
	      /* The new larger k gets used for sizes > the cutoff
Packit 5c3484
		 value, hence the cutoff should be one less than the
Packit 5c3484
		 smallest size where it gives a speedup. */
Packit 5c3484
	      possible_nbits_cutoff = nbits - 1;
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	possible_nbits_cutoff = 0;
Packit 5c3484
Packit 5c3484
      nbits_next = nbits * 65 / 64;
Packit 5c3484
      nbits = nbits_next + (nbits_next == nbits);
Packit 5c3484
    }
Packit 5c3484
  printf ("\n");
Packit 5c3484
  TMP_FREE;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* size_extra==1 reflects the fact that with high
Packit 5c3484
   always skipped.  Forcing high
Packit 5c3484
   while stepping through sizes, ie. that size-1 divides will be done each
Packit 5c3484
   time.
Packit 5c3484
Packit 5c3484
   min_size==2 and min_is_always are used so that if plain division is only
Packit 5c3484
   better at size==1 then don't bother including that code just for that
Packit 5c3484
   case, instead go with preinv always and get a size saving.  */
Packit 5c3484
Packit 5c3484
#define DIV_1_PARAMS                    \
Packit 5c3484
  param.check_size = 256;               \
Packit 5c3484
  param.min_size = 2;                   \
Packit 5c3484
  param.min_is_always = 1;              \
Packit 5c3484
  param.data_high = DATA_HIGH_LT_R;     \
Packit 5c3484
  param.size_extra = 1;                 \
Packit 5c3484
  param.stop_factor = 2.0;
Packit 5c3484
Packit 5c3484
Packit 5c3484
double (*tuned_speed_mpn_divrem_1) (struct speed_params *);
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_divrem_1 (void)
Packit 5c3484
{
Packit 5c3484
  /* plain version by default */
Packit 5c3484
  tuned_speed_mpn_divrem_1 = speed_mpn_divrem_1;
Packit 5c3484
Packit 5c3484
  /* No support for tuning native assembler code, do that by hand and put
Packit 5c3484
     the results in the .asm file, there's no need for such thresholds to
Packit 5c3484
     appear in gmp-mparam.h.  */
Packit 5c3484
  if (HAVE_NATIVE_mpn_divrem_1)
Packit 5c3484
    return;
Packit 5c3484
Packit 5c3484
  if (GMP_NAIL_BITS != 0)
Packit 5c3484
    {
Packit 5c3484
      print_define_remark ("DIVREM_1_NORM_THRESHOLD", MP_SIZE_T_MAX,
Packit 5c3484
                           "no preinv with nails");
Packit 5c3484
      print_define_remark ("DIVREM_1_UNNORM_THRESHOLD", MP_SIZE_T_MAX,
Packit 5c3484
                           "no preinv with nails");
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (UDIV_PREINV_ALWAYS)
Packit 5c3484
    {
Packit 5c3484
      print_define_remark ("DIVREM_1_NORM_THRESHOLD", 0L, "preinv always");
Packit 5c3484
      print_define ("DIVREM_1_UNNORM_THRESHOLD", 0L);
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  tuned_speed_mpn_divrem_1 = speed_mpn_divrem_1_tune;
Packit 5c3484
Packit 5c3484
  /* Tune for the integer part of mpn_divrem_1.  This will very possibly be
Packit 5c3484
     a bit out for the fractional part, but that's too bad, the integer part
Packit 5c3484
     is more important. */
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "DIVREM_1_NORM_THRESHOLD";
Packit 5c3484
    DIV_1_PARAMS;
Packit 5c3484
    s.r = randlimb_norm ();
Packit 5c3484
    param.function = speed_mpn_divrem_1_tune;
Packit 5c3484
    one (&divrem_1_norm_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "DIVREM_1_UNNORM_THRESHOLD";
Packit 5c3484
    DIV_1_PARAMS;
Packit 5c3484
    s.r = randlimb_half ();
Packit 5c3484
    param.function = speed_mpn_divrem_1_tune;
Packit 5c3484
    one (&divrem_1_unnorm_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_div_qr_1 (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
  double            t1, t2;
Packit 5c3484
Packit 5c3484
  if (!HAVE_NATIVE_mpn_div_qr_1n_pi1)
Packit 5c3484
    {
Packit 5c3484
      static struct param_t  param;
Packit 5c3484
      double   t1, t2;
Packit 5c3484
Packit 5c3484
      s.size = 10;
Packit 5c3484
      s.r = randlimb_norm ();
Packit 5c3484
Packit 5c3484
      t1 = tuneup_measure (speed_mpn_div_qr_1n_pi1_1, &param, &s);
Packit 5c3484
      t2 = tuneup_measure (speed_mpn_div_qr_1n_pi1_2, &param, &s);
Packit 5c3484
Packit 5c3484
      if (t1 == -1.0 || t2 == -1.0)
Packit 5c3484
	{
Packit 5c3484
	  printf ("Oops, can't measure all mpn_div_qr_1n_pi1 methods at %ld\n",
Packit 5c3484
		  (long) s.size);
Packit 5c3484
	  abort ();
Packit 5c3484
	}
Packit 5c3484
      div_qr_1n_pi1_method = (t1 < t2) ? 1 : 2;
Packit 5c3484
      print_define ("DIV_QR_1N_PI1_METHOD", div_qr_1n_pi1_method);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "DIV_QR_1_NORM_THRESHOLD";
Packit 5c3484
    DIV_1_PARAMS;
Packit 5c3484
    param.min_size = 1;
Packit 5c3484
    param.min_is_always = 0;
Packit 5c3484
    s.r = randlimb_norm ();
Packit 5c3484
    param.function = speed_mpn_div_qr_1_tune;
Packit 5c3484
    one (&div_qr_1_norm_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "DIV_QR_1_UNNORM_THRESHOLD";
Packit 5c3484
    DIV_1_PARAMS;
Packit 5c3484
    param.min_size = 1;
Packit 5c3484
    param.min_is_always = 0;
Packit 5c3484
    s.r = randlimb_half();
Packit 5c3484
    param.function = speed_mpn_div_qr_1_tune;
Packit 5c3484
    one (&div_qr_1_unnorm_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_mod_1 (void)
Packit 5c3484
{
Packit 5c3484
  /* No support for tuning native assembler code, do that by hand and put
Packit 5c3484
     the results in the .asm file, there's no need for such thresholds to
Packit 5c3484
     appear in gmp-mparam.h.  */
Packit 5c3484
  if (HAVE_NATIVE_mpn_mod_1)
Packit 5c3484
    return;
Packit 5c3484
Packit 5c3484
  if (GMP_NAIL_BITS != 0)
Packit 5c3484
    {
Packit 5c3484
      print_define_remark ("MOD_1_NORM_THRESHOLD", MP_SIZE_T_MAX,
Packit 5c3484
                           "no preinv with nails");
Packit 5c3484
      print_define_remark ("MOD_1_UNNORM_THRESHOLD", MP_SIZE_T_MAX,
Packit 5c3484
                           "no preinv with nails");
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (!HAVE_NATIVE_mpn_mod_1_1p)
Packit 5c3484
    {
Packit 5c3484
      static struct param_t  param;
Packit 5c3484
      double   t1, t2;
Packit 5c3484
Packit 5c3484
      s.size = 10;
Packit 5c3484
      s.r = randlimb_half ();
Packit 5c3484
Packit 5c3484
      t1 = tuneup_measure (speed_mpn_mod_1_1_1, &param, &s);
Packit 5c3484
      t2 = tuneup_measure (speed_mpn_mod_1_1_2, &param, &s);
Packit 5c3484
Packit 5c3484
      if (t1 == -1.0 || t2 == -1.0)
Packit 5c3484
	{
Packit 5c3484
	  printf ("Oops, can't measure all mpn_mod_1_1 methods at %ld\n",
Packit 5c3484
		  (long) s.size);
Packit 5c3484
	  abort ();
Packit 5c3484
	}
Packit 5c3484
      mod_1_1p_method = (t1 < t2) ? 1 : 2;
Packit 5c3484
      print_define ("MOD_1_1P_METHOD", mod_1_1p_method);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (UDIV_PREINV_ALWAYS)
Packit 5c3484
    {
Packit 5c3484
      print_define ("MOD_1_NORM_THRESHOLD", 0L);
Packit 5c3484
      print_define ("MOD_1_UNNORM_THRESHOLD", 0L);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      {
Packit 5c3484
	static struct param_t  param;
Packit 5c3484
	param.name = "MOD_1_NORM_THRESHOLD";
Packit 5c3484
	DIV_1_PARAMS;
Packit 5c3484
	s.r = randlimb_norm ();
Packit 5c3484
	param.function = speed_mpn_mod_1_tune;
Packit 5c3484
	one (&mod_1_norm_threshold, ¶m;;
Packit 5c3484
      }
Packit 5c3484
      {
Packit 5c3484
	static struct param_t  param;
Packit 5c3484
	param.name = "MOD_1_UNNORM_THRESHOLD";
Packit 5c3484
	DIV_1_PARAMS;
Packit 5c3484
	s.r = randlimb_half ();
Packit 5c3484
	param.function = speed_mpn_mod_1_tune;
Packit 5c3484
	one (&mod_1_unnorm_threshold, ¶m;;
Packit 5c3484
      }
Packit 5c3484
    }
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
Packit 5c3484
    param.check_size = 256;
Packit 5c3484
Packit 5c3484
    s.r = randlimb_norm ();
Packit 5c3484
    param.function = speed_mpn_mod_1_tune;
Packit 5c3484
Packit 5c3484
    param.name = "MOD_1N_TO_MOD_1_1_THRESHOLD";
Packit 5c3484
    param.min_size = 2;
Packit 5c3484
    one (&mod_1n_to_mod_1_1_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
Packit 5c3484
    param.check_size = 256;
Packit 5c3484
    s.r = randlimb_half ();
Packit 5c3484
    param.noprint = 1;
Packit 5c3484
Packit 5c3484
    param.function = speed_mpn_mod_1_1;
Packit 5c3484
    param.function2 = speed_mpn_mod_1_2;
Packit 5c3484
    param.min_is_always = 1;
Packit 5c3484
    param.name = "MOD_1_1_TO_MOD_1_2_THRESHOLD";
Packit 5c3484
    param.min_size = 2;
Packit 5c3484
    one (&mod_1_1_to_mod_1_2_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
    param.function = speed_mpn_mod_1_2;
Packit 5c3484
    param.function2 = speed_mpn_mod_1_4;
Packit 5c3484
    param.min_is_always = 1;
Packit 5c3484
    param.name = "MOD_1_2_TO_MOD_1_4_THRESHOLD";
Packit 5c3484
    param.min_size = 1;
Packit 5c3484
    one (&mod_1_2_to_mod_1_4_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
    if (mod_1_1_to_mod_1_2_threshold >= mod_1_2_to_mod_1_4_threshold)
Packit 5c3484
      {
Packit 5c3484
	/* Never use mod_1_2, measure mod_1_1 -> mod_1_4 */
Packit 5c3484
	mod_1_2_to_mod_1_4_threshold = 0;
Packit 5c3484
Packit 5c3484
	param.function = speed_mpn_mod_1_1;
Packit 5c3484
	param.function2 = speed_mpn_mod_1_4;
Packit 5c3484
	param.min_is_always = 1;
Packit 5c3484
	param.name = "MOD_1_1_TO_MOD_1_4_THRESHOLD fake";
Packit 5c3484
	param.min_size = 2;
Packit 5c3484
	one (&mod_1_1_to_mod_1_2_threshold, ¶m;;
Packit 5c3484
      }
Packit 5c3484
Packit 5c3484
    param.function = speed_mpn_mod_1_tune;
Packit 5c3484
    param.function2 = NULL;
Packit 5c3484
    param.name = "MOD_1U_TO_MOD_1_1_THRESHOLD";
Packit 5c3484
    param.min_size = 2;
Packit 5c3484
    param.min_is_always = 0;
Packit 5c3484
    one (&mod_1u_to_mod_1_1_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
    if (mod_1u_to_mod_1_1_threshold >= mod_1_1_to_mod_1_2_threshold)
Packit 5c3484
      mod_1_1_to_mod_1_2_threshold = 0;
Packit 5c3484
    if (mod_1u_to_mod_1_1_threshold >= mod_1_2_to_mod_1_4_threshold)
Packit 5c3484
      mod_1_2_to_mod_1_4_threshold = 0;
Packit 5c3484
Packit 5c3484
    print_define_remark ("MOD_1U_TO_MOD_1_1_THRESHOLD", mod_1u_to_mod_1_1_threshold, NULL);
Packit 5c3484
    print_define_remark ("MOD_1_1_TO_MOD_1_2_THRESHOLD", mod_1_1_to_mod_1_2_threshold,
Packit 5c3484
			 mod_1_1_to_mod_1_2_threshold == 0 ? "never mpn_mod_1_1p" : NULL);
Packit 5c3484
    print_define_remark ("MOD_1_2_TO_MOD_1_4_THRESHOLD", mod_1_2_to_mod_1_4_threshold,
Packit 5c3484
			 mod_1_2_to_mod_1_4_threshold == 0 ? "never mpn_mod_1s_2p" : NULL);
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
Packit 5c3484
    param.check_size = 256;
Packit 5c3484
Packit 5c3484
    param.name = "PREINV_MOD_1_TO_MOD_1_THRESHOLD";
Packit 5c3484
    s.r = randlimb_norm ();
Packit 5c3484
    param.function = speed_mpn_preinv_mod_1;
Packit 5c3484
    param.function2 = speed_mpn_mod_1_tune;
Packit 5c3484
    param.min_size = 1;
Packit 5c3484
    one (&preinv_mod_1_to_mod_1_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* A non-zero DIVREM_1_UNNORM_THRESHOLD (or DIVREM_1_NORM_THRESHOLD) would
Packit 5c3484
   imply that udiv_qrnnd_preinv is worth using, but it seems most
Packit 5c3484
   straightforward to compare mpn_preinv_divrem_1 and mpn_divrem_1_div
Packit 5c3484
   directly.  */
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_preinv_divrem_1 (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
  speed_function_t  divrem_1;
Packit 5c3484
  const char        *divrem_1_name;
Packit 5c3484
  double            t1, t2;
Packit 5c3484
Packit 5c3484
  if (GMP_NAIL_BITS != 0)
Packit 5c3484
    {
Packit 5c3484
      print_define_remark ("USE_PREINV_DIVREM_1", 0, "no preinv with nails");
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* Any native version of mpn_preinv_divrem_1 is assumed to exist because
Packit 5c3484
     it's faster than mpn_divrem_1.  */
Packit 5c3484
  if (HAVE_NATIVE_mpn_preinv_divrem_1)
Packit 5c3484
    {
Packit 5c3484
      print_define_remark ("USE_PREINV_DIVREM_1", 1, "native");
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* If udiv_qrnnd_preinv is the only division method then of course
Packit 5c3484
     mpn_preinv_divrem_1 should be used.  */
Packit 5c3484
  if (UDIV_PREINV_ALWAYS)
Packit 5c3484
    {
Packit 5c3484
      print_define_remark ("USE_PREINV_DIVREM_1", 1, "preinv always");
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* If we've got an assembler version of mpn_divrem_1, then compare against
Packit 5c3484
     that, not the mpn_divrem_1_div generic C.  */
Packit 5c3484
  if (HAVE_NATIVE_mpn_divrem_1)
Packit 5c3484
    {
Packit 5c3484
      divrem_1 = speed_mpn_divrem_1;
Packit 5c3484
      divrem_1_name = "mpn_divrem_1";
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      divrem_1 = speed_mpn_divrem_1_div;
Packit 5c3484
      divrem_1_name = "mpn_divrem_1_div";
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  param.data_high = DATA_HIGH_LT_R; /* allow skip one division */
Packit 5c3484
  s.size = 200;                     /* generous but not too big */
Packit 5c3484
  /* Divisor, nonzero.  Unnormalized so as to exercise the shift!=0 case,
Packit 5c3484
     since in general that's probably most common, though in fact for a
Packit 5c3484
     64-bit limb mp_bases[10].big_base is normalized.  */
Packit 5c3484
  s.r = urandom() & (GMP_NUMB_MASK >> 4);
Packit 5c3484
  if (s.r == 0) s.r = 123;
Packit 5c3484
Packit 5c3484
  t1 = tuneup_measure (speed_mpn_preinv_divrem_1, &param, &s);
Packit 5c3484
  t2 = tuneup_measure (divrem_1, &param, &s);
Packit 5c3484
  if (t1 == -1.0 || t2 == -1.0)
Packit 5c3484
    {
Packit 5c3484
      printf ("Oops, can't measure mpn_preinv_divrem_1 and %s at %ld\n",
Packit 5c3484
              divrem_1_name, (long) s.size);
Packit 5c3484
      abort ();
Packit 5c3484
    }
Packit 5c3484
  if (option_trace >= 1)
Packit 5c3484
    printf ("size=%ld, mpn_preinv_divrem_1 %.9f, %s %.9f\n",
Packit 5c3484
            (long) s.size, t1, divrem_1_name, t2);
Packit 5c3484
Packit 5c3484
  print_define_remark ("USE_PREINV_DIVREM_1", (mp_size_t) (t1 < t2), NULL);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_divrem_2 (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
Packit 5c3484
  /* No support for tuning native assembler code, do that by hand and put
Packit 5c3484
     the results in the .asm file, and there's no need for such thresholds
Packit 5c3484
     to appear in gmp-mparam.h.  */
Packit 5c3484
  if (HAVE_NATIVE_mpn_divrem_2)
Packit 5c3484
    return;
Packit 5c3484
Packit 5c3484
  if (GMP_NAIL_BITS != 0)
Packit 5c3484
    {
Packit 5c3484
      print_define_remark ("DIVREM_2_THRESHOLD", MP_SIZE_T_MAX,
Packit 5c3484
                           "no preinv with nails");
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (UDIV_PREINV_ALWAYS)
Packit 5c3484
    {
Packit 5c3484
      print_define_remark ("DIVREM_2_THRESHOLD", 0L, "preinv always");
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* Tune for the integer part of mpn_divrem_2.  This will very possibly be
Packit 5c3484
     a bit out for the fractional part, but that's too bad, the integer part
Packit 5c3484
     is more important.
Packit 5c3484
Packit 5c3484
     min_size must be >=2 since nsize>=2 is required, but is set to 4 to save
Packit 5c3484
     code space if plain division is better only at size==2 or size==3. */
Packit 5c3484
  param.name = "DIVREM_2_THRESHOLD";
Packit 5c3484
  param.check_size = 256;
Packit 5c3484
  param.min_size = 4;
Packit 5c3484
  param.min_is_always = 1;
Packit 5c3484
  param.size_extra = 2;      /* does qsize==nsize-2 divisions */
Packit 5c3484
  param.stop_factor = 2.0;
Packit 5c3484
Packit 5c3484
  s.r = randlimb_norm ();
Packit 5c3484
  param.function = speed_mpn_divrem_2;
Packit 5c3484
  one (&divrem_2_threshold, ¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_div_qr_2 (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
  param.name = "DIV_QR_2_PI2_THRESHOLD";
Packit 5c3484
  param.function = speed_mpn_div_qr_2n;
Packit 5c3484
  param.check_size = 500;
Packit 5c3484
  param.min_size = 4;
Packit 5c3484
  one (&div_qr_2_pi2_threshold, ¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* mpn_divexact_1 is vaguely expected to be used on smallish divisors, so
Packit 5c3484
   tune for that.  Its speed can differ on odd or even divisor, so take an
Packit 5c3484
   average threshold for the two.
Packit 5c3484
Packit 5c3484
   mpn_divrem_1 can vary with high
Packit 5c3484
   might not vary that way, but don't test this since high
Packit 5c3484
   expected to occur often with small divisors.  */
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_divexact_1 (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
  mp_size_t  thresh[2], average;
Packit 5c3484
  int        low, i;
Packit 5c3484
Packit 5c3484
  /* Any native mpn_divexact_1 is assumed to incorporate all the speed of a
Packit 5c3484
     full mpn_divrem_1.  */
Packit 5c3484
  if (HAVE_NATIVE_mpn_divexact_1)
Packit 5c3484
    {
Packit 5c3484
      print_define_remark ("DIVEXACT_1_THRESHOLD", 0, "always (native)");
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  ASSERT_ALWAYS (tuned_speed_mpn_divrem_1 != NULL);
Packit 5c3484
Packit 5c3484
  param.name = "DIVEXACT_1_THRESHOLD";
Packit 5c3484
  param.data_high = DATA_HIGH_GE_R;
Packit 5c3484
  param.check_size = 256;
Packit 5c3484
  param.min_size = 2;
Packit 5c3484
  param.stop_factor = 1.5;
Packit 5c3484
  param.function  = tuned_speed_mpn_divrem_1;
Packit 5c3484
  param.function2 = speed_mpn_divexact_1;
Packit 5c3484
  param.noprint = 1;
Packit 5c3484
Packit 5c3484
  print_define_start (param.name);
Packit 5c3484
Packit 5c3484
  for (low = 0; low <= 1; low++)
Packit 5c3484
    {
Packit 5c3484
      s.r = randlimb_half();
Packit 5c3484
      if (low == 0)
Packit 5c3484
        s.r |= 1;
Packit 5c3484
      else
Packit 5c3484
        s.r &= ~CNST_LIMB(7);
Packit 5c3484
Packit 5c3484
      one (&thresh[low], ¶m;;
Packit 5c3484
      if (option_trace)
Packit 5c3484
        printf ("low=%d thresh %ld\n", low, (long) thresh[low]);
Packit 5c3484
Packit 5c3484
      if (thresh[low] == MP_SIZE_T_MAX)
Packit 5c3484
        {
Packit 5c3484
          average = MP_SIZE_T_MAX;
Packit 5c3484
          goto divexact_1_done;
Packit 5c3484
        }
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (option_trace)
Packit 5c3484
    {
Packit 5c3484
      printf ("average of:");
Packit 5c3484
      for (i = 0; i < numberof(thresh); i++)
Packit 5c3484
        printf (" %ld", (long) thresh[i]);
Packit 5c3484
      printf ("\n");
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  average = 0;
Packit 5c3484
  for (i = 0; i < numberof(thresh); i++)
Packit 5c3484
    average += thresh[i];
Packit 5c3484
  average /= numberof(thresh);
Packit 5c3484
Packit 5c3484
  /* If divexact turns out to be better as early as 3 limbs, then use it
Packit 5c3484
     always, so as to reduce code size and conditional jumps.  */
Packit 5c3484
  if (average <= 3)
Packit 5c3484
    average = 0;
Packit 5c3484
Packit 5c3484
 divexact_1_done:
Packit 5c3484
  print_define_end (param.name, average);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* The generic mpn_modexact_1_odd skips a divide step if high
Packit 5c3484
   same as mpn_mod_1, but this might not be true of an assembler
Packit 5c3484
   implementation.  The threshold used is an average based on data where a
Packit 5c3484
   divide can be skipped and where it can't.
Packit 5c3484
Packit 5c3484
   If modexact turns out to be better as early as 3 limbs, then use it
Packit 5c3484
   always, so as to reduce code size and conditional jumps.  */
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_modexact_1_odd (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
  mp_size_t  thresh_lt, thresh_ge, average;
Packit 5c3484
Packit 5c3484
#if 0
Packit 5c3484
  /* Any native mpn_modexact_1_odd is assumed to incorporate all the speed
Packit 5c3484
     of a full mpn_mod_1.  */
Packit 5c3484
  if (HAVE_NATIVE_mpn_modexact_1_odd)
Packit 5c3484
    {
Packit 5c3484
      print_define_remark ("BMOD_1_TO_MOD_1_THRESHOLD", MP_SIZE_T_MAX, "always bmod_1");
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  param.name = "BMOD_1_TO_MOD_1_THRESHOLD";
Packit 5c3484
  param.check_size = 256;
Packit 5c3484
  param.min_size = 2;
Packit 5c3484
  param.stop_factor = 1.5;
Packit 5c3484
  param.function  = speed_mpn_modexact_1c_odd;
Packit 5c3484
  param.function2 = speed_mpn_mod_1_tune;
Packit 5c3484
  param.noprint = 1;
Packit 5c3484
  s.r = randlimb_half () | 1;
Packit 5c3484
Packit 5c3484
  print_define_start (param.name);
Packit 5c3484
Packit 5c3484
  param.data_high = DATA_HIGH_LT_R;
Packit 5c3484
  one (&thresh_lt, ¶m;;
Packit 5c3484
  if (option_trace)
Packit 5c3484
    printf ("lt thresh %ld\n", (long) thresh_lt);
Packit 5c3484
Packit 5c3484
  average = thresh_lt;
Packit 5c3484
  if (thresh_lt != MP_SIZE_T_MAX)
Packit 5c3484
    {
Packit 5c3484
      param.data_high = DATA_HIGH_GE_R;
Packit 5c3484
      one (&thresh_ge, ¶m;;
Packit 5c3484
      if (option_trace)
Packit 5c3484
        printf ("ge thresh %ld\n", (long) thresh_ge);
Packit 5c3484
Packit 5c3484
      if (thresh_ge != MP_SIZE_T_MAX)
Packit 5c3484
        {
Packit 5c3484
          average = (thresh_ge + thresh_lt) / 2;
Packit 5c3484
          if (thresh_ge <= 3)
Packit 5c3484
            average = 0;
Packit 5c3484
        }
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  print_define_end (param.name, average);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_jacobi_base (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
  double   t1, t2, t3, t4;
Packit 5c3484
  int      method;
Packit 5c3484
Packit 5c3484
  s.size = GMP_LIMB_BITS * 3 / 4;
Packit 5c3484
Packit 5c3484
  t1 = tuneup_measure (speed_mpn_jacobi_base_1, &param, &s);
Packit 5c3484
  if (option_trace >= 1)
Packit 5c3484
    printf ("size=%ld, mpn_jacobi_base_1 %.9f\n", (long) s.size, t1);
Packit 5c3484
Packit 5c3484
  t2 = tuneup_measure (speed_mpn_jacobi_base_2, &param, &s);
Packit 5c3484
  if (option_trace >= 1)
Packit 5c3484
    printf ("size=%ld, mpn_jacobi_base_2 %.9f\n", (long) s.size, t2);
Packit 5c3484
Packit 5c3484
  t3 = tuneup_measure (speed_mpn_jacobi_base_3, &param, &s);
Packit 5c3484
  if (option_trace >= 1)
Packit 5c3484
    printf ("size=%ld, mpn_jacobi_base_3 %.9f\n", (long) s.size, t3);
Packit 5c3484
Packit 5c3484
  t4 = tuneup_measure (speed_mpn_jacobi_base_4, &param, &s);
Packit 5c3484
  if (option_trace >= 1)
Packit 5c3484
    printf ("size=%ld, mpn_jacobi_base_4 %.9f\n", (long) s.size, t4);
Packit 5c3484
Packit 5c3484
  if (t1 == -1.0 || t2 == -1.0 || t3 == -1.0 || t4 == -1.0)
Packit 5c3484
    {
Packit 5c3484
      printf ("Oops, can't measure all mpn_jacobi_base methods at %ld\n",
Packit 5c3484
              (long) s.size);
Packit 5c3484
      abort ();
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (t1 < t2 && t1 < t3 && t1 < t4)
Packit 5c3484
    method = 1;
Packit 5c3484
  else if (t2 < t3 && t2 < t4)
Packit 5c3484
    method = 2;
Packit 5c3484
  else if (t3 < t4)
Packit 5c3484
    method = 3;
Packit 5c3484
  else
Packit 5c3484
    method = 4;
Packit 5c3484
Packit 5c3484
  print_define ("JACOBI_BASE_METHOD", method);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_get_str (void)
Packit 5c3484
{
Packit 5c3484
  /* Tune for decimal, it being most common.  Some rough testing suggests
Packit 5c3484
     other bases are different, but not by very much.  */
Packit 5c3484
  s.r = 10;
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    GET_STR_PRECOMPUTE_THRESHOLD = 0;
Packit 5c3484
    param.name = "GET_STR_DC_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_get_str;
Packit 5c3484
    param.min_size = 4;
Packit 5c3484
    param.max_size = GET_STR_THRESHOLD_LIMIT;
Packit 5c3484
    one (&get_str_dc_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.name = "GET_STR_PRECOMPUTE_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_get_str;
Packit 5c3484
    param.min_size = GET_STR_DC_THRESHOLD;
Packit 5c3484
    param.max_size = GET_STR_THRESHOLD_LIMIT;
Packit 5c3484
    one (&get_str_precompute_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
double
Packit 5c3484
speed_mpn_pre_set_str (struct speed_params *s)
Packit 5c3484
{
Packit 5c3484
  unsigned char *str;
Packit 5c3484
  mp_ptr     wp;
Packit 5c3484
  mp_size_t  wn;
Packit 5c3484
  unsigned   i;
Packit 5c3484
  int        base;
Packit 5c3484
  double     t;
Packit 5c3484
  mp_ptr powtab_mem, tp;
Packit 5c3484
  powers_t powtab[GMP_LIMB_BITS];
Packit 5c3484
  mp_size_t un;
Packit 5c3484
  int chars_per_limb;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  SPEED_RESTRICT_COND (s->size >= 1);
Packit 5c3484
Packit 5c3484
  base = s->r == 0 ? 10 : s->r;
Packit 5c3484
  SPEED_RESTRICT_COND (base >= 2 && base <= 256);
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
Packit 5c3484
  str = (unsigned char *) TMP_ALLOC (s->size);
Packit 5c3484
  for (i = 0; i < s->size; i++)
Packit 5c3484
    str[i] = s->xp[i] % base;
Packit 5c3484
Packit 5c3484
  LIMBS_PER_DIGIT_IN_BASE (wn, s->size, base);
Packit 5c3484
  SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);
Packit 5c3484
Packit 5c3484
  /* use this during development to check wn is big enough */
Packit 5c3484
  /*
Packit 5c3484
  ASSERT_ALWAYS (mpn_set_str (wp, str, s->size, base) <= wn);
Packit 5c3484
  */
Packit 5c3484
Packit 5c3484
  speed_operand_src (s, (mp_ptr) str, s->size/GMP_LIMB_BYTES);
Packit 5c3484
  speed_operand_dst (s, wp, wn);
Packit 5c3484
  speed_cache_fill (s);
Packit 5c3484
Packit 5c3484
  chars_per_limb = mp_bases[base].chars_per_limb;
Packit 5c3484
  un = s->size / chars_per_limb + 1;
Packit 5c3484
  powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_set_str_powtab_alloc (un));
Packit 5c3484
  mpn_set_str_compute_powtab (powtab, powtab_mem, un, base);
Packit 5c3484
  tp = TMP_BALLOC_LIMBS (mpn_dc_set_str_itch (un));
Packit 5c3484
Packit 5c3484
  speed_starttime ();
Packit 5c3484
  i = s->reps;
Packit 5c3484
  do
Packit 5c3484
    {
Packit 5c3484
      mpn_pre_set_str (wp, str, s->size, powtab, tp);
Packit 5c3484
    }
Packit 5c3484
  while (--i != 0);
Packit 5c3484
  t = speed_endtime ();
Packit 5c3484
Packit 5c3484
  TMP_FREE;
Packit 5c3484
  return t;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_set_str (void)
Packit 5c3484
{
Packit 5c3484
  s.r = 10;  /* decimal */
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    SET_STR_PRECOMPUTE_THRESHOLD = 0;
Packit 5c3484
    param.step_factor = 0.01;
Packit 5c3484
    param.name = "SET_STR_DC_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_pre_set_str;
Packit 5c3484
    param.min_size = 100;
Packit 5c3484
    param.max_size = 50000;
Packit 5c3484
    one (&set_str_dc_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
  {
Packit 5c3484
    static struct param_t  param;
Packit 5c3484
    param.step_factor = 0.02;
Packit 5c3484
    param.name = "SET_STR_PRECOMPUTE_THRESHOLD";
Packit 5c3484
    param.function = speed_mpn_set_str;
Packit 5c3484
    param.min_size = SET_STR_DC_THRESHOLD;
Packit 5c3484
    param.max_size = 100000;
Packit 5c3484
    one (&set_str_precompute_threshold, ¶m;;
Packit 5c3484
  }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_fft_mul (void)
Packit 5c3484
{
Packit 5c3484
  static struct fft_param_t  param;
Packit 5c3484
Packit 5c3484
  if (option_fft_max_size == 0)
Packit 5c3484
    return;
Packit 5c3484
Packit 5c3484
  param.table_name          = "MUL_FFT_TABLE3";
Packit 5c3484
  param.threshold_name      = "MUL_FFT_THRESHOLD";
Packit 5c3484
  param.p_threshold         = &mul_fft_threshold;
Packit 5c3484
  param.modf_threshold_name = "MUL_FFT_MODF_THRESHOLD";
Packit 5c3484
  param.p_modf_threshold    = &mul_fft_modf_threshold;
Packit 5c3484
  param.first_size          = MUL_TOOM33_THRESHOLD / 2;
Packit 5c3484
  param.max_size            = option_fft_max_size;
Packit 5c3484
  param.function            = speed_mpn_fft_mul;
Packit 5c3484
  param.mul_modf_function   = speed_mpn_mul_fft;
Packit 5c3484
  param.mul_function        = speed_mpn_mul_n;
Packit 5c3484
  param.sqr = 0;
Packit 5c3484
  fft (¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_fft_sqr (void)
Packit 5c3484
{
Packit 5c3484
  static struct fft_param_t  param;
Packit 5c3484
Packit 5c3484
  if (option_fft_max_size == 0)
Packit 5c3484
    return;
Packit 5c3484
Packit 5c3484
  param.table_name          = "SQR_FFT_TABLE3";
Packit 5c3484
  param.threshold_name      = "SQR_FFT_THRESHOLD";
Packit 5c3484
  param.p_threshold         = &sqr_fft_threshold;
Packit 5c3484
  param.modf_threshold_name = "SQR_FFT_MODF_THRESHOLD";
Packit 5c3484
  param.p_modf_threshold    = &sqr_fft_modf_threshold;
Packit 5c3484
  param.first_size          = SQR_TOOM3_THRESHOLD / 2;
Packit 5c3484
  param.max_size            = option_fft_max_size;
Packit 5c3484
  param.function            = speed_mpn_fft_sqr;
Packit 5c3484
  param.mul_modf_function   = speed_mpn_mul_fft_sqr;
Packit 5c3484
  param.mul_function        = speed_mpn_sqr;
Packit 5c3484
  param.sqr = 1;
Packit 5c3484
  fft (¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
tune_fac_ui (void)
Packit 5c3484
{
Packit 5c3484
  static struct param_t  param;
Packit 5c3484
Packit 5c3484
  param.function = speed_mpz_fac_ui_tune;
Packit 5c3484
Packit 5c3484
  param.name = "FAC_DSC_THRESHOLD";
Packit 5c3484
  param.min_size = 70;
Packit 5c3484
  param.max_size = FAC_DSC_THRESHOLD_LIMIT;
Packit 5c3484
  one (&fac_dsc_threshold, ¶m;;
Packit 5c3484
Packit 5c3484
  param.name = "FAC_ODD_THRESHOLD";
Packit 5c3484
  param.min_size = 22;
Packit 5c3484
  param.stop_factor = 1.7;
Packit 5c3484
  param.min_is_always = 1;
Packit 5c3484
  one (&fac_odd_threshold, ¶m;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
all (void)
Packit 5c3484
{
Packit 5c3484
  time_t  start_time, end_time;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
  SPEED_TMP_ALLOC_LIMBS (s.xp_block, SPEED_BLOCK_SIZE, 0);
Packit 5c3484
  SPEED_TMP_ALLOC_LIMBS (s.yp_block, SPEED_BLOCK_SIZE, 0);
Packit 5c3484
Packit 5c3484
  mpn_random (s.xp_block, SPEED_BLOCK_SIZE);
Packit 5c3484
  mpn_random (s.yp_block, SPEED_BLOCK_SIZE);
Packit 5c3484
Packit 5c3484
  fprintf (stderr, "Parameters for %s\n", GMP_MPARAM_H_SUGGEST);
Packit 5c3484
Packit 5c3484
  speed_time_init ();
Packit 5c3484
  fprintf (stderr, "Using: %s\n", speed_time_string);
Packit 5c3484
Packit 5c3484
  fprintf (stderr, "speed_precision %d", speed_precision);
Packit 5c3484
  if (speed_unittime == 1.0)
Packit 5c3484
    fprintf (stderr, ", speed_unittime 1 cycle");
Packit 5c3484
  else
Packit 5c3484
    fprintf (stderr, ", speed_unittime %.2e secs", speed_unittime);
Packit 5c3484
  if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
Packit 5c3484
    fprintf (stderr, ", CPU freq unknown\n");
Packit 5c3484
  else
Packit 5c3484
    fprintf (stderr, ", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
Packit 5c3484
Packit 5c3484
  fprintf (stderr, "DEFAULT_MAX_SIZE %d, fft_max_size %ld\n",
Packit 5c3484
           DEFAULT_MAX_SIZE, (long) option_fft_max_size);
Packit 5c3484
  fprintf (stderr, "\n");
Packit 5c3484
Packit 5c3484
  time (&start_time);
Packit 5c3484
  {
Packit 5c3484
    struct tm  *tp;
Packit 5c3484
    tp = localtime (&start_time);
Packit 5c3484
    printf ("/* Generated by tuneup.c, %d-%02d-%02d, ",
Packit 5c3484
            tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
Packit 5c3484
Packit 5c3484
#ifdef __GNUC__
Packit 5c3484
    /* gcc sub-minor version doesn't seem to come through as a define */
Packit 5c3484
    printf ("gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
Packit 5c3484
#define PRINTED_COMPILER
Packit 5c3484
#endif
Packit 5c3484
#if defined (__SUNPRO_C)
Packit 5c3484
    printf ("Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
Packit 5c3484
#define PRINTED_COMPILER
Packit 5c3484
#endif
Packit 5c3484
#if ! defined (__GNUC__) && defined (__sgi) && defined (_COMPILER_VERSION)
Packit 5c3484
    /* gcc defines __sgi and _COMPILER_VERSION on irix 6, avoid that */
Packit 5c3484
    printf ("MIPSpro C %d.%d.%d */\n",
Packit 5c3484
	    _COMPILER_VERSION / 100,
Packit 5c3484
	    _COMPILER_VERSION / 10 % 10,
Packit 5c3484
	    _COMPILER_VERSION % 10);
Packit 5c3484
#define PRINTED_COMPILER
Packit 5c3484
#endif
Packit 5c3484
#if defined (__DECC) && defined (__DECC_VER)
Packit 5c3484
    printf ("DEC C %d */\n", __DECC_VER);
Packit 5c3484
#define PRINTED_COMPILER
Packit 5c3484
#endif
Packit 5c3484
#if ! defined (PRINTED_COMPILER)
Packit 5c3484
    printf ("system compiler */\n");
Packit 5c3484
#endif
Packit 5c3484
  }
Packit 5c3484
  printf ("\n");
Packit 5c3484
Packit 5c3484
  tune_divrem_1 ();
Packit 5c3484
  tune_mod_1 ();
Packit 5c3484
  tune_preinv_divrem_1 ();
Packit 5c3484
  tune_div_qr_1 ();
Packit 5c3484
#if 0
Packit 5c3484
  tune_divrem_2 ();
Packit 5c3484
#endif
Packit 5c3484
  tune_div_qr_2 ();
Packit 5c3484
  tune_divexact_1 ();
Packit 5c3484
  tune_modexact_1_odd ();
Packit 5c3484
  printf("\n");
Packit 5c3484
Packit 5c3484
  tune_mul_n ();
Packit 5c3484
  printf("\n");
Packit 5c3484
Packit 5c3484
  tune_mul ();
Packit 5c3484
  printf("\n");
Packit 5c3484
Packit 5c3484
  tune_sqr ();
Packit 5c3484
  printf("\n");
Packit 5c3484
Packit 5c3484
  tune_mulmid ();
Packit 5c3484
  printf("\n");
Packit 5c3484
Packit 5c3484
  tune_mulmod_bnm1 ();
Packit 5c3484
  tune_sqrmod_bnm1 ();
Packit 5c3484
  printf("\n");
Packit 5c3484
Packit 5c3484
  tune_fft_mul ();
Packit 5c3484
  printf("\n");
Packit 5c3484
Packit 5c3484
  tune_fft_sqr ();
Packit 5c3484
  printf ("\n");
Packit 5c3484
Packit 5c3484
  tune_mullo ();
Packit 5c3484
  tune_sqrlo ();
Packit 5c3484
  printf("\n");
Packit 5c3484
Packit 5c3484
  tune_dc_div ();
Packit 5c3484
  tune_dc_bdiv ();
Packit 5c3484
Packit 5c3484
  printf("\n");
Packit 5c3484
  tune_invertappr ();
Packit 5c3484
  tune_invert ();
Packit 5c3484
  printf("\n");
Packit 5c3484
Packit 5c3484
  tune_binvert ();
Packit 5c3484
  tune_redc ();
Packit 5c3484
  printf("\n");
Packit 5c3484
Packit 5c3484
  tune_mu_div ();
Packit 5c3484
  tune_mu_bdiv ();
Packit 5c3484
  printf("\n");
Packit 5c3484
Packit 5c3484
  tune_powm_sec ();
Packit 5c3484
  printf("\n");
Packit 5c3484
Packit 5c3484
  tune_get_str ();
Packit 5c3484
  tune_set_str ();
Packit 5c3484
  printf("\n");
Packit 5c3484
Packit 5c3484
  tune_fac_ui ();
Packit 5c3484
  printf("\n");
Packit 5c3484
Packit 5c3484
  tune_matrix22_mul ();
Packit 5c3484
  tune_hgcd ();
Packit 5c3484
  tune_hgcd_appr ();
Packit 5c3484
  tune_hgcd_reduce();
Packit 5c3484
  tune_gcd_dc ();
Packit 5c3484
  tune_gcdext_dc ();
Packit 5c3484
  tune_jacobi_base ();
Packit 5c3484
  printf("\n");
Packit 5c3484
Packit 5c3484
  time (&end_time);
Packit 5c3484
  printf ("/* Tuneup completed successfully, took %ld seconds */\n",
Packit 5c3484
          (long) (end_time - start_time));
Packit 5c3484
Packit 5c3484
  TMP_FREE;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
int
Packit 5c3484
main (int argc, char *argv[])
Packit 5c3484
{
Packit 5c3484
  int  opt;
Packit 5c3484
Packit 5c3484
  /* Unbuffered so if output is redirected to a file it isn't lost if the
Packit 5c3484
     program is killed part way through.  */
Packit 5c3484
  setbuf (stdout, NULL);
Packit 5c3484
  setbuf (stderr, NULL);
Packit 5c3484
Packit 5c3484
  while ((opt = getopt(argc, argv, "f:o:p:t")) != EOF)
Packit 5c3484
    {
Packit 5c3484
      switch (opt) {
Packit 5c3484
      case 'f':
Packit 5c3484
        if (optarg[0] == 't')
Packit 5c3484
          option_fft_trace = 2;
Packit 5c3484
        else
Packit 5c3484
          option_fft_max_size = atol (optarg);
Packit 5c3484
        break;
Packit 5c3484
      case 'o':
Packit 5c3484
        speed_option_set (optarg);
Packit 5c3484
        break;
Packit 5c3484
      case 'p':
Packit 5c3484
        speed_precision = atoi (optarg);
Packit 5c3484
        break;
Packit 5c3484
      case 't':
Packit 5c3484
        option_trace++;
Packit 5c3484
        break;
Packit 5c3484
      case '?':
Packit 5c3484
        exit(1);
Packit 5c3484
      }
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  all ();
Packit 5c3484
  exit (0);
Packit 5c3484
}