Blame tune/speed.c

Packit 5c3484
/* Speed measuring program.
Packit 5c3484
Packit 5c3484
Copyright 1999-2003, 2005, 2006, 2008-2015 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
/* Usage message is in the code below, run with no arguments to print it.
Packit 5c3484
   See README for interesting applications.
Packit 5c3484
Packit 5c3484
   To add a new routine foo(), create a speed_foo() function in the style of
Packit 5c3484
   the existing ones and add an entry in the routine[] array.  Put FLAG_R if
Packit 5c3484
   speed_foo() wants an "r" parameter.
Packit 5c3484
Packit 5c3484
   The routines don't have help messages or descriptions, but most have
Packit 5c3484
   suggestive names.  See the source code for full details.
Packit 5c3484
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
#include "config.h"
Packit 5c3484
Packit 5c3484
#include <limits.h>
Packit 5c3484
#include <stdio.h>
Packit 5c3484
#include <stdlib.h>
Packit 5c3484
#include <string.h>
Packit 5c3484
Packit 5c3484
#if HAVE_UNISTD_H
Packit 5c3484
#include <unistd.h>  /* for getpid, R_OK */
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if TIME_WITH_SYS_TIME
Packit 5c3484
# include <sys/time.h>  /* for struct timeval */
Packit 5c3484
# include <time.h>
Packit 5c3484
#else
Packit 5c3484
# if HAVE_SYS_TIME_H
Packit 5c3484
#  include <sys/time.h>
Packit 5c3484
# else
Packit 5c3484
#  include <time.h>
Packit 5c3484
# endif
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if HAVE_SYS_RESOURCE_H
Packit 5c3484
#include <sys/resource.h>  /* for getrusage() */
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"  /* for the benefit of speed-many.c */
Packit 5c3484
#include "tests.h"
Packit 5c3484
#include "speed.h"
Packit 5c3484
Packit 5c3484
Packit 5c3484
#if !HAVE_DECL_OPTARG
Packit 5c3484
extern char *optarg;
Packit 5c3484
extern int optind, opterr;
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if !HAVE_STRTOUL
Packit 5c3484
#define strtoul(p,e,b)  (unsigned long) strtol(p,e,b)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#ifdef SPEED_EXTRA_PROTOS
Packit 5c3484
SPEED_EXTRA_PROTOS
Packit 5c3484
#endif
Packit 5c3484
#ifdef SPEED_EXTRA_PROTOS2
Packit 5c3484
SPEED_EXTRA_PROTOS2
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
#if GMP_LIMB_BITS == 32
Packit 5c3484
#define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK)
Packit 5c3484
#endif
Packit 5c3484
#if GMP_LIMB_BITS == 64
Packit 5c3484
#define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define CMP_ABSOLUTE     1
Packit 5c3484
#define CMP_RATIO        2
Packit 5c3484
#define CMP_DIFFERENCE   3
Packit 5c3484
#define CMP_DIFFPREV     4
Packit 5c3484
int  option_cmp = CMP_ABSOLUTE;
Packit 5c3484
Packit 5c3484
#define UNIT_SECONDS        1
Packit 5c3484
#define UNIT_CYCLES         2
Packit 5c3484
#define UNIT_CYCLESPERLIMB  3
Packit 5c3484
int  option_unit = UNIT_SECONDS;
Packit 5c3484
Packit 5c3484
#define DATA_RANDOM   1
Packit 5c3484
#define DATA_RANDOM2  2
Packit 5c3484
#define DATA_ZEROS    3
Packit 5c3484
#define DATA_AAS      4
Packit 5c3484
#define DATA_FFS      5
Packit 5c3484
#define DATA_2FD      6
Packit 5c3484
int  option_data = DATA_RANDOM;
Packit 5c3484
Packit 5c3484
int        option_square = 0;
Packit 5c3484
double     option_factor = 0.0;
Packit 5c3484
mp_size_t  option_step = 1;
Packit 5c3484
int        option_gnuplot = 0;
Packit 5c3484
char      *option_gnuplot_basename;
Packit 5c3484
struct size_array_t {
Packit 5c3484
  mp_size_t start, end;
Packit 5c3484
} *size_array = NULL;
Packit 5c3484
mp_size_t  size_num = 0;
Packit 5c3484
mp_size_t  size_allocnum = 0;
Packit 5c3484
int        option_resource_usage = 0;
Packit 5c3484
long       option_seed = 123456789;
Packit 5c3484
Packit 5c3484
struct speed_params  sp;
Packit 5c3484
Packit 5c3484
#define COLUMN_WIDTH  13  /* for the free-form output */
Packit 5c3484
Packit 5c3484
#define FLAG_R            (1<<0)  /* require ".r" */
Packit 5c3484
#define FLAG_R_OPTIONAL   (1<<1)  /* optional ".r" */
Packit 5c3484
#define FLAG_RSIZE        (1<<2)
Packit 5c3484
#define FLAG_NODATA       (1<<3)  /* don't alloc xp, yp */
Packit 5c3484
Packit 5c3484
const struct routine_t {
Packit 5c3484
  /* constants */
Packit 5c3484
  const char        *name;
Packit 5c3484
  speed_function_t  fun;
Packit 5c3484
  int               flag;
Packit 5c3484
} routine[] = {
Packit 5c3484
Packit 5c3484
  { "noop",              speed_noop                 },
Packit 5c3484
  { "noop_wxs",          speed_noop_wxs             },
Packit 5c3484
  { "noop_wxys",         speed_noop_wxys            },
Packit 5c3484
Packit 5c3484
  { "mpn_add_n",         speed_mpn_add_n,     FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_sub_n",         speed_mpn_sub_n,     FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_add_1",         speed_mpn_add_1,     FLAG_R },
Packit 5c3484
  { "mpn_add_1_inplace", speed_mpn_add_1_inplace, FLAG_R },
Packit 5c3484
  { "mpn_sub_1",         speed_mpn_sub_1,     FLAG_R },
Packit 5c3484
  { "mpn_sub_1_inplace", speed_mpn_sub_1_inplace, FLAG_R },
Packit 5c3484
Packit 5c3484
  { "mpn_add_err1_n",    speed_mpn_add_err1_n    },
Packit 5c3484
  { "mpn_add_err2_n",    speed_mpn_add_err2_n    },
Packit 5c3484
  { "mpn_add_err3_n",    speed_mpn_add_err3_n    },
Packit 5c3484
  { "mpn_sub_err1_n",    speed_mpn_sub_err1_n    },
Packit 5c3484
  { "mpn_sub_err2_n",    speed_mpn_sub_err2_n    },
Packit 5c3484
  { "mpn_sub_err3_n",    speed_mpn_sub_err3_n    },
Packit 5c3484
Packit 5c3484
#if HAVE_NATIVE_mpn_add_n_sub_n
Packit 5c3484
  { "mpn_add_n_sub_n",      speed_mpn_add_n_sub_n,     FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  { "mpn_addmul_1",      speed_mpn_addmul_1,  FLAG_R },
Packit 5c3484
  { "mpn_submul_1",      speed_mpn_submul_1,  FLAG_R },
Packit 5c3484
#if HAVE_NATIVE_mpn_addmul_2
Packit 5c3484
  { "mpn_addmul_2",      speed_mpn_addmul_2,  FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_addmul_3
Packit 5c3484
  { "mpn_addmul_3",      speed_mpn_addmul_3,  FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_addmul_4
Packit 5c3484
  { "mpn_addmul_4",      speed_mpn_addmul_4,  FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_addmul_5
Packit 5c3484
  { "mpn_addmul_5",      speed_mpn_addmul_5,  FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_addmul_6
Packit 5c3484
  { "mpn_addmul_6",      speed_mpn_addmul_6,  FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_addmul_7
Packit 5c3484
  { "mpn_addmul_7",      speed_mpn_addmul_7,  FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_addmul_8
Packit 5c3484
  { "mpn_addmul_8",      speed_mpn_addmul_8,  FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
  { "mpn_mul_1",         speed_mpn_mul_1,     FLAG_R },
Packit 5c3484
  { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R },
Packit 5c3484
#if HAVE_NATIVE_mpn_mul_2
Packit 5c3484
  { "mpn_mul_2",         speed_mpn_mul_2,     FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_mul_3
Packit 5c3484
  { "mpn_mul_3",         speed_mpn_mul_3,     FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_mul_4
Packit 5c3484
  { "mpn_mul_4",         speed_mpn_mul_4,     FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_mul_5
Packit 5c3484
  { "mpn_mul_5",         speed_mpn_mul_5,     FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_mul_6
Packit 5c3484
  { "mpn_mul_6",         speed_mpn_mul_6,     FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  { "mpn_divrem_1",      speed_mpn_divrem_1,  FLAG_R },
Packit 5c3484
  { "mpn_divrem_1f",     speed_mpn_divrem_1f, FLAG_R },
Packit 5c3484
#if HAVE_NATIVE_mpn_divrem_1c
Packit 5c3484
  { "mpn_divrem_1c",     speed_mpn_divrem_1c, FLAG_R },
Packit 5c3484
  { "mpn_divrem_1cf",    speed_mpn_divrem_1cf,FLAG_R },
Packit 5c3484
#endif
Packit 5c3484
  { "mpn_mod_1",         speed_mpn_mod_1,     FLAG_R },
Packit 5c3484
#if HAVE_NATIVE_mpn_mod_1c
Packit 5c3484
  { "mpn_mod_1c",        speed_mpn_mod_1c,    FLAG_R },
Packit 5c3484
#endif
Packit 5c3484
  { "mpn_preinv_divrem_1",  speed_mpn_preinv_divrem_1,  FLAG_R },
Packit 5c3484
  { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R },
Packit 5c3484
  { "mpn_preinv_mod_1",  speed_mpn_preinv_mod_1, FLAG_R },
Packit 5c3484
Packit 5c3484
  { "mpn_mod_1_1",       speed_mpn_mod_1_1,       FLAG_R },
Packit 5c3484
  { "mpn_mod_1_1_1",     speed_mpn_mod_1_1_1,     FLAG_R },
Packit 5c3484
  { "mpn_mod_1_1_2",     speed_mpn_mod_1_1_2,     FLAG_R },
Packit 5c3484
  { "mpn_mod_1s_2",      speed_mpn_mod_1_2,       FLAG_R },
Packit 5c3484
  { "mpn_mod_1s_3",      speed_mpn_mod_1_3,       FLAG_R },
Packit 5c3484
  { "mpn_mod_1s_4",      speed_mpn_mod_1_4,       FLAG_R },
Packit 5c3484
Packit 5c3484
  { "mpn_divrem_1_div",  speed_mpn_divrem_1_div,  FLAG_R },
Packit 5c3484
  { "mpn_divrem_1_inv",  speed_mpn_divrem_1_inv,  FLAG_R },
Packit 5c3484
  { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R },
Packit 5c3484
  { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R },
Packit 5c3484
  { "mpn_mod_1_div",     speed_mpn_mod_1_div,     FLAG_R },
Packit 5c3484
  { "mpn_mod_1_inv",     speed_mpn_mod_1_inv,     FLAG_R },
Packit 5c3484
Packit 5c3484
  { "mpn_divrem_2",      speed_mpn_divrem_2,        },
Packit 5c3484
  { "mpn_divrem_2_div",  speed_mpn_divrem_2_div,    },
Packit 5c3484
  { "mpn_divrem_2_inv",  speed_mpn_divrem_2_inv,    },
Packit 5c3484
Packit 5c3484
  { "mpn_div_qr_1n_pi1", speed_mpn_div_qr_1n_pi1, FLAG_R  },
Packit 5c3484
  { "mpn_div_qr_1n_pi1_1",speed_mpn_div_qr_1n_pi1_1, FLAG_R  },
Packit 5c3484
  { "mpn_div_qr_1n_pi1_2",speed_mpn_div_qr_1n_pi1_2, FLAG_R  },
Packit 5c3484
  { "mpn_div_qr_1",      speed_mpn_div_qr_1,      FLAG_R },
Packit 5c3484
Packit 5c3484
  { "mpn_div_qr_2n",     speed_mpn_div_qr_2n,       },
Packit 5c3484
  { "mpn_div_qr_2u",     speed_mpn_div_qr_2u,       },
Packit 5c3484
Packit 5c3484
  { "mpn_divexact_1",    speed_mpn_divexact_1,    FLAG_R },
Packit 5c3484
  { "mpn_divexact_by3",  speed_mpn_divexact_by3          },
Packit 5c3484
Packit 5c3484
  { "mpn_bdiv_q_1",      speed_mpn_bdiv_q_1,      FLAG_R },
Packit 5c3484
  { "mpn_pi1_bdiv_q_1",  speed_mpn_pi1_bdiv_q_1,  FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_bdiv_dbm1c",    speed_mpn_bdiv_dbm1c,    FLAG_R_OPTIONAL },
Packit 5c3484
Packit 5c3484
#if HAVE_NATIVE_mpn_modexact_1_odd
Packit 5c3484
  { "mpn_modexact_1_odd",  speed_mpn_modexact_1_odd,  FLAG_R },
Packit 5c3484
#endif
Packit 5c3484
  { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R },
Packit 5c3484
Packit 5c3484
#if GMP_NUMB_BITS % 4 == 0
Packit 5c3484
  { "mpn_mod_34lsub1",   speed_mpn_mod_34lsub1 },
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  { "mpn_lshift",        speed_mpn_lshift, FLAG_R   },
Packit 5c3484
  { "mpn_lshiftc",       speed_mpn_lshiftc, FLAG_R   },
Packit 5c3484
  { "mpn_rshift",        speed_mpn_rshift, FLAG_R   },
Packit 5c3484
Packit 5c3484
  { "mpn_and_n",         speed_mpn_and_n,  FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_andn_n",        speed_mpn_andn_n, FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_nand_n",        speed_mpn_nand_n, FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_ior_n",         speed_mpn_ior_n,  FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_iorn_n",        speed_mpn_iorn_n, FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_nior_n",        speed_mpn_nior_n, FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_xor_n",         speed_mpn_xor_n,  FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_xnor_n",        speed_mpn_xnor_n, FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_com",           speed_mpn_com              },
Packit 5c3484
  { "mpn_neg",           speed_mpn_neg              },
Packit 5c3484
Packit 5c3484
  { "mpn_popcount",      speed_mpn_popcount         },
Packit 5c3484
  { "mpn_hamdist",       speed_mpn_hamdist          },
Packit 5c3484
Packit 5c3484
  { "mpn_matrix22_mul",  speed_mpn_matrix22_mul     },
Packit 5c3484
Packit 5c3484
  { "mpn_hgcd",          speed_mpn_hgcd             },
Packit 5c3484
  { "mpn_hgcd_lehmer",   speed_mpn_hgcd_lehmer      },
Packit 5c3484
  { "mpn_hgcd_appr",     speed_mpn_hgcd_appr        },
Packit 5c3484
  { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer },
Packit 5c3484
Packit 5c3484
  { "mpn_hgcd_reduce",   speed_mpn_hgcd_reduce      },
Packit 5c3484
  { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1    },
Packit 5c3484
  { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2    },
Packit 5c3484
Packit 5c3484
  { "mpn_gcd_1",         speed_mpn_gcd_1,  FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_gcd_1N",        speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
Packit 5c3484
Packit 5c3484
  { "mpn_gcd",           speed_mpn_gcd                    },
Packit 5c3484
Packit 5c3484
  { "mpn_gcdext",            speed_mpn_gcdext            },
Packit 5c3484
  { "mpn_gcdext_single",     speed_mpn_gcdext_single     },
Packit 5c3484
  { "mpn_gcdext_double",     speed_mpn_gcdext_double     },
Packit 5c3484
  { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single },
Packit 5c3484
  { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double },
Packit 5c3484
#if 0
Packit 5c3484
  { "mpn_gcdext_lehmer",     speed_mpn_gcdext_lehmer     },
Packit 5c3484
#endif
Packit 5c3484
  { "mpz_jacobi",        speed_mpz_jacobi           },
Packit 5c3484
  { "mpn_jacobi_base",   speed_mpn_jacobi_base      },
Packit 5c3484
  { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1    },
Packit 5c3484
  { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2    },
Packit 5c3484
  { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3    },
Packit 5c3484
  { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4    },
Packit 5c3484
Packit 5c3484
  { "mpn_mul",           speed_mpn_mul,         FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_mul_basecase",  speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_sqr_basecase",  speed_mpn_sqr_basecase     },
Packit 5c3484
#if HAVE_NATIVE_mpn_sqr_diagonal
Packit 5c3484
  { "mpn_sqr_diagonal",  speed_mpn_sqr_diagonal     },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
Packit 5c3484
  { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 },
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  { "mpn_mul_n",         speed_mpn_mul_n            },
Packit 5c3484
  { "mpn_sqr",           speed_mpn_sqr              },
Packit 5c3484
Packit 5c3484
  { "mpn_toom2_sqr",     speed_mpn_toom2_sqr        },
Packit 5c3484
  { "mpn_toom3_sqr",     speed_mpn_toom3_sqr        },
Packit 5c3484
  { "mpn_toom4_sqr",     speed_mpn_toom4_sqr        },
Packit 5c3484
  { "mpn_toom6_sqr",     speed_mpn_toom6_sqr        },
Packit 5c3484
  { "mpn_toom8_sqr",     speed_mpn_toom8_sqr        },
Packit 5c3484
  { "mpn_toom22_mul",    speed_mpn_toom22_mul       },
Packit 5c3484
  { "mpn_toom33_mul",    speed_mpn_toom33_mul       },
Packit 5c3484
  { "mpn_toom44_mul",    speed_mpn_toom44_mul       },
Packit 5c3484
  { "mpn_toom6h_mul",    speed_mpn_toom6h_mul       },
Packit 5c3484
  { "mpn_toom8h_mul",    speed_mpn_toom8h_mul       },
Packit 5c3484
  { "mpn_toom32_mul",    speed_mpn_toom32_mul       },
Packit 5c3484
  { "mpn_toom42_mul",    speed_mpn_toom42_mul       },
Packit 5c3484
  { "mpn_toom43_mul",    speed_mpn_toom43_mul       },
Packit 5c3484
  { "mpn_toom63_mul",    speed_mpn_toom63_mul       },
Packit 5c3484
  { "mpn_nussbaumer_mul",    speed_mpn_nussbaumer_mul    },
Packit 5c3484
  { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
Packit 5c3484
#if WANT_OLD_FFT_FULL
Packit 5c3484
  { "mpn_mul_fft_full",      speed_mpn_mul_fft_full      },
Packit 5c3484
  { "mpn_mul_fft_full_sqr",  speed_mpn_mul_fft_full_sqr  },
Packit 5c3484
#endif
Packit 5c3484
  { "mpn_mul_fft",       speed_mpn_mul_fft,     FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_mul_fft_sqr",   speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
Packit 5c3484
Packit 5c3484
  { "mpn_sqrlo",          speed_mpn_sqrlo           },
Packit 5c3484
  { "mpn_sqrlo_basecase", speed_mpn_sqrlo_basecase  },
Packit 5c3484
  { "mpn_mullo_n",        speed_mpn_mullo_n         },
Packit 5c3484
  { "mpn_mullo_basecase", speed_mpn_mullo_basecase  },
Packit 5c3484
Packit 5c3484
  { "mpn_mulmid_basecase",  speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_toom42_mulmid",    speed_mpn_toom42_mulmid },
Packit 5c3484
  { "mpn_mulmid_n",         speed_mpn_mulmid_n },
Packit 5c3484
  { "mpn_mulmid",           speed_mpn_mulmid, FLAG_R_OPTIONAL },
Packit 5c3484
Packit 5c3484
  { "mpn_bc_mulmod_bnm1",      speed_mpn_bc_mulmod_bnm1      },
Packit 5c3484
  { "mpn_mulmod_bnm1",         speed_mpn_mulmod_bnm1         },
Packit 5c3484
  { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded },
Packit 5c3484
  { "mpn_sqrmod_bnm1",         speed_mpn_sqrmod_bnm1         },
Packit 5c3484
Packit 5c3484
  { "mpn_invert",              speed_mpn_invert              },
Packit 5c3484
  { "mpn_invertappr",          speed_mpn_invertappr          },
Packit 5c3484
  { "mpn_ni_invertappr",       speed_mpn_ni_invertappr       },
Packit 5c3484
  { "mpn_binvert",             speed_mpn_binvert             },
Packit 5c3484
  { "mpn_sec_invert",          speed_mpn_sec_invert          },
Packit 5c3484
Packit 5c3484
  { "mpn_sbpi1_div_qr",        speed_mpn_sbpi1_div_qr,    FLAG_R_OPTIONAL},
Packit 5c3484
  { "mpn_dcpi1_div_qr",        speed_mpn_dcpi1_div_qr,    FLAG_R_OPTIONAL},
Packit 5c3484
  { "mpn_mu_div_qr",           speed_mpn_mu_div_qr,       FLAG_R_OPTIONAL},
Packit 5c3484
  { "mpn_mupi_div_qr",         speed_mpn_mupi_div_qr,     FLAG_R_OPTIONAL},
Packit 5c3484
  { "mpn_sbpi1_divappr_q",     speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL},
Packit 5c3484
  { "mpn_dcpi1_divappr_q",     speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL},
Packit 5c3484
Packit 5c3484
  { "mpn_sbpi1_bdiv_qr",       speed_mpn_sbpi1_bdiv_qr       },
Packit 5c3484
  { "mpn_dcpi1_bdiv_qr",       speed_mpn_dcpi1_bdiv_qr       },
Packit 5c3484
  { "mpn_sbpi1_bdiv_q",        speed_mpn_sbpi1_bdiv_q        },
Packit 5c3484
  { "mpn_dcpi1_bdiv_q",        speed_mpn_dcpi1_bdiv_q        },
Packit 5c3484
Packit 5c3484
  { "mpn_broot",               speed_mpn_broot,    FLAG_R },
Packit 5c3484
  { "mpn_broot_invm1",         speed_mpn_broot_invm1, FLAG_R },
Packit 5c3484
  { "mpn_brootinv",            speed_mpn_brootinv, FLAG_R },
Packit 5c3484
Packit 5c3484
  { "mpn_get_str",          speed_mpn_get_str,     FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_set_str",          speed_mpn_set_str,     FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_set_str_basecase", speed_mpn_bc_set_str,  FLAG_R_OPTIONAL },
Packit 5c3484
Packit 5c3484
  { "mpn_sqrtrem",       speed_mpn_sqrtrem          },
Packit 5c3484
  { "mpn_rootrem",       speed_mpn_rootrem, FLAG_R  },
Packit 5c3484
  { "mpn_sqrt",          speed_mpn_sqrt             },
Packit 5c3484
  { "mpn_root",          speed_mpn_root, FLAG_R     },
Packit 5c3484
Packit 5c3484
  { "mpn_fib2_ui",       speed_mpn_fib2_ui,    FLAG_NODATA },
Packit 5c3484
  { "mpz_fib_ui",        speed_mpz_fib_ui,     FLAG_NODATA },
Packit 5c3484
  { "mpz_fib2_ui",       speed_mpz_fib2_ui,    FLAG_NODATA },
Packit 5c3484
  { "mpz_lucnum_ui",     speed_mpz_lucnum_ui,  FLAG_NODATA },
Packit 5c3484
  { "mpz_lucnum2_ui",    speed_mpz_lucnum2_ui, FLAG_NODATA },
Packit 5c3484
Packit 5c3484
  { "mpz_add",           speed_mpz_add              },
Packit 5c3484
  { "mpz_bin_uiui",      speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpz_bin_ui",        speed_mpz_bin_ui,   FLAG_NODATA | FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpz_fac_ui",        speed_mpz_fac_ui,   FLAG_NODATA   },
Packit 5c3484
  { "mpz_2fac_ui",       speed_mpz_2fac_ui,  FLAG_NODATA   },
Packit 5c3484
  { "mpz_powm",          speed_mpz_powm             },
Packit 5c3484
  { "mpz_powm_mod",      speed_mpz_powm_mod         },
Packit 5c3484
  { "mpz_powm_redc",     speed_mpz_powm_redc        },
Packit 5c3484
  { "mpz_powm_sec",      speed_mpz_powm_sec        },
Packit 5c3484
  { "mpz_powm_ui",       speed_mpz_powm_ui,  FLAG_R_OPTIONAL },
Packit 5c3484
Packit 5c3484
  { "mpz_mod",           speed_mpz_mod              },
Packit 5c3484
  { "mpn_redc_1",        speed_mpn_redc_1           },
Packit 5c3484
  { "mpn_redc_2",        speed_mpn_redc_2           },
Packit 5c3484
  { "mpn_redc_n",        speed_mpn_redc_n           },
Packit 5c3484
Packit 5c3484
  { "MPN_COPY",          speed_MPN_COPY             },
Packit 5c3484
  { "MPN_COPY_INCR",     speed_MPN_COPY_INCR        },
Packit 5c3484
  { "MPN_COPY_DECR",     speed_MPN_COPY_DECR        },
Packit 5c3484
  { "memcpy",            speed_memcpy               },
Packit 5c3484
#if HAVE_NATIVE_mpn_copyi
Packit 5c3484
  { "mpn_copyi",         speed_mpn_copyi            },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_copyd
Packit 5c3484
  { "mpn_copyd",         speed_mpn_copyd            },
Packit 5c3484
#endif
Packit 5c3484
  { "mpn_sec_tabselect", speed_mpn_sec_tabselect, FLAG_R_OPTIONAL },
Packit 5c3484
#if HAVE_NATIVE_mpn_addlsh1_n == 1
Packit 5c3484
  { "mpn_addlsh1_n",     speed_mpn_addlsh1_n, FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_sublsh1_n == 1
Packit 5c3484
  { "mpn_sublsh1_n",     speed_mpn_sublsh1_n, FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_addlsh1_n_ip1
Packit 5c3484
  { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1    },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_addlsh1_n_ip2
Packit 5c3484
  { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2    },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_sublsh1_n_ip1
Packit 5c3484
  { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1    },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_rsblsh1_n == 1
Packit 5c3484
  { "mpn_rsblsh1_n",     speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_addlsh2_n == 1
Packit 5c3484
  { "mpn_addlsh2_n",     speed_mpn_addlsh2_n, FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_sublsh2_n == 1
Packit 5c3484
  { "mpn_sublsh2_n",     speed_mpn_sublsh2_n, FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_addlsh2_n_ip1
Packit 5c3484
  { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1    },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_addlsh2_n_ip2
Packit 5c3484
  { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2    },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_sublsh2_n_ip1
Packit 5c3484
  { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1    },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_rsblsh2_n == 1
Packit 5c3484
  { "mpn_rsblsh2_n",     speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_addlsh_n
Packit 5c3484
  { "mpn_addlsh_n",     speed_mpn_addlsh_n, FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_sublsh_n
Packit 5c3484
  { "mpn_sublsh_n",     speed_mpn_sublsh_n, FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_addlsh_n_ip1
Packit 5c3484
  { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1    },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_addlsh_n_ip2
Packit 5c3484
  { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2    },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_sublsh_n_ip1
Packit 5c3484
  { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1    },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_rsblsh_n
Packit 5c3484
  { "mpn_rsblsh_n",     speed_mpn_rsblsh_n, FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_rsh1add_n
Packit 5c3484
  { "mpn_rsh1add_n",     speed_mpn_rsh1add_n, FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_rsh1sub_n
Packit 5c3484
  { "mpn_rsh1sub_n",     speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  { "mpn_cnd_add_n",     speed_mpn_cnd_add_n, FLAG_R_OPTIONAL },
Packit 5c3484
  { "mpn_cnd_sub_n",     speed_mpn_cnd_sub_n, FLAG_R_OPTIONAL },
Packit 5c3484
Packit 5c3484
  { "MPN_ZERO",          speed_MPN_ZERO             },
Packit 5c3484
Packit 5c3484
  { "binvert_limb",       speed_binvert_limb,       FLAG_NODATA },
Packit 5c3484
  { "binvert_limb_mul1",  speed_binvert_limb_mul1,  FLAG_NODATA },
Packit 5c3484
  { "binvert_limb_loop",  speed_binvert_limb_loop,  FLAG_NODATA },
Packit 5c3484
  { "binvert_limb_cond",  speed_binvert_limb_cond,  FLAG_NODATA },
Packit 5c3484
  { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA },
Packit 5c3484
Packit 5c3484
  { "malloc_free",                  speed_malloc_free                  },
Packit 5c3484
  { "malloc_realloc_free",          speed_malloc_realloc_free          },
Packit 5c3484
  { "gmp_allocate_free",            speed_gmp_allocate_free            },
Packit 5c3484
  { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free },
Packit 5c3484
  { "mpz_init_clear",               speed_mpz_init_clear               },
Packit 5c3484
  { "mpq_init_clear",               speed_mpq_init_clear               },
Packit 5c3484
  { "mpf_init_clear",               speed_mpf_init_clear               },
Packit 5c3484
  { "mpz_init_realloc_clear",       speed_mpz_init_realloc_clear       },
Packit 5c3484
Packit 5c3484
  { "umul_ppmm",         speed_umul_ppmm,     FLAG_R_OPTIONAL },
Packit 5c3484
#if HAVE_NATIVE_mpn_umul_ppmm
Packit 5c3484
  { "mpn_umul_ppmm",     speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_umul_ppmm_r
Packit 5c3484
  { "mpn_umul_ppmm_r",   speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  { "count_leading_zeros",  speed_count_leading_zeros,  FLAG_NODATA | FLAG_R_OPTIONAL },
Packit 5c3484
  { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
Packit 5c3484
Packit 5c3484
  { "udiv_qrnnd",             speed_udiv_qrnnd,             FLAG_R_OPTIONAL },
Packit 5c3484
  { "udiv_qrnnd_c",           speed_udiv_qrnnd_c,           FLAG_R_OPTIONAL },
Packit 5c3484
#if HAVE_NATIVE_mpn_udiv_qrnnd
Packit 5c3484
  { "mpn_udiv_qrnnd",         speed_mpn_udiv_qrnnd,         FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
#if HAVE_NATIVE_mpn_udiv_qrnnd_r
Packit 5c3484
  { "mpn_udiv_qrnnd_r",       speed_mpn_udiv_qrnnd_r,       FLAG_R_OPTIONAL },
Packit 5c3484
#endif
Packit 5c3484
  { "invert_limb",            speed_invert_limb,            FLAG_R_OPTIONAL },
Packit 5c3484
Packit 5c3484
  { "operator_div",           speed_operator_div,           FLAG_R_OPTIONAL },
Packit 5c3484
  { "operator_mod",           speed_operator_mod,           FLAG_R_OPTIONAL },
Packit 5c3484
Packit 5c3484
  { "gmp_randseed",    speed_gmp_randseed,    FLAG_R_OPTIONAL               },
Packit 5c3484
  { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA },
Packit 5c3484
  { "mpz_urandomb",    speed_mpz_urandomb,    FLAG_R_OPTIONAL | FLAG_NODATA },
Packit 5c3484
Packit 5c3484
#ifdef SPEED_EXTRA_ROUTINES
Packit 5c3484
  SPEED_EXTRA_ROUTINES
Packit 5c3484
#endif
Packit 5c3484
#ifdef SPEED_EXTRA_ROUTINES2
Packit 5c3484
  SPEED_EXTRA_ROUTINES2
Packit 5c3484
#endif
Packit 5c3484
};
Packit 5c3484
Packit 5c3484
Packit 5c3484
struct choice_t {
Packit 5c3484
  const struct routine_t  *p;
Packit 5c3484
  mp_limb_t               r;
Packit 5c3484
  double                  scale;
Packit 5c3484
  double                  time;
Packit 5c3484
  int                     no_time;
Packit 5c3484
  double                  prev_time;
Packit 5c3484
  const char              *name;
Packit 5c3484
};
Packit 5c3484
struct choice_t  *choice;
Packit 5c3484
int  num_choices = 0;
Packit 5c3484
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
data_fill (mp_ptr ptr, mp_size_t size)
Packit 5c3484
{
Packit 5c3484
  switch (option_data) {
Packit 5c3484
  case DATA_RANDOM:
Packit 5c3484
    mpn_random (ptr, size);
Packit 5c3484
    break;
Packit 5c3484
  case DATA_RANDOM2:
Packit 5c3484
    mpn_random2 (ptr, size);
Packit 5c3484
    break;
Packit 5c3484
  case DATA_ZEROS:
Packit 5c3484
    MPN_ZERO (ptr, size);
Packit 5c3484
    break;
Packit 5c3484
  case DATA_AAS:
Packit 5c3484
    MPN_FILL (ptr, size, GMP_NUMB_0xAA);
Packit 5c3484
    break;
Packit 5c3484
  case DATA_FFS:
Packit 5c3484
    MPN_FILL (ptr, size, GMP_NUMB_MAX);
Packit 5c3484
    break;
Packit 5c3484
  case DATA_2FD:
Packit 5c3484
    MPN_FILL (ptr, size, GMP_NUMB_MAX);
Packit 5c3484
    ptr[0] -= 2;
Packit 5c3484
    break;
Packit 5c3484
  default:
Packit 5c3484
    abort();
Packit 5c3484
    /*NOTREACHED*/
Packit 5c3484
  }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* The code here handling the various combinations of output options isn't
Packit 5c3484
   too attractive, but it works and is fairly clean.  */
Packit 5c3484
Packit 5c3484
#define SIZE_TO_DIVISOR(n)              \
Packit 5c3484
  (option_square == 1 ? (n)*(n)         \
Packit 5c3484
  : option_square == 2 ? (n)*((n)+1)/2  \
Packit 5c3484
  : (n))
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
Packit 5c3484
{
Packit 5c3484
  const char  *first_open_fastest, *first_open_notfastest, *first_close;
Packit 5c3484
  int         i, fastest, want_data;
Packit 5c3484
  double      fastest_time;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
Packit 5c3484
  /* allocate data, unless all routines are NODATA */
Packit 5c3484
  want_data = 0;
Packit 5c3484
  for (i = 0; i < num_choices; i++)
Packit 5c3484
    want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0);
Packit 5c3484
Packit 5c3484
  if (want_data)
Packit 5c3484
    {
Packit 5c3484
      SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp);
Packit 5c3484
      SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp);
Packit 5c3484
Packit 5c3484
      data_fill (s->xp, s->size);
Packit 5c3484
      data_fill (s->yp, s->size);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      sp.xp = NULL;
Packit 5c3484
      sp.yp = NULL;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
Packit 5c3484
    {
Packit 5c3484
      first_open_fastest = "(#";
Packit 5c3484
      first_open_notfastest = " (";
Packit 5c3484
      first_close = ")";
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      first_open_fastest = "#";
Packit 5c3484
      first_open_notfastest = " ";
Packit 5c3484
      first_close = "";
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  fastest = -1;
Packit 5c3484
  fastest_time = -1.0;
Packit 5c3484
  for (i = 0; i < num_choices; i++)
Packit 5c3484
    {
Packit 5c3484
      s->r = choice[i].r;
Packit 5c3484
      choice[i].time = speed_measure (choice[i].p->fun, s);
Packit 5c3484
      choice[i].no_time = (choice[i].time == -1.0);
Packit 5c3484
      if (! choice[i].no_time)
Packit 5c3484
        choice[i].time *= choice[i].scale;
Packit 5c3484
Packit 5c3484
      /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
Packit 5c3484
         is before any differences.  */
Packit 5c3484
      {
Packit 5c3484
        double     t;
Packit 5c3484
        t = choice[i].time;
Packit 5c3484
        if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
Packit 5c3484
          {
Packit 5c3484
            if (choice[i].prev_time == -1.0)
Packit 5c3484
              choice[i].no_time = 1;
Packit 5c3484
            else
Packit 5c3484
              choice[i].time = choice[i].time - choice[i].prev_time;
Packit 5c3484
          }
Packit 5c3484
        choice[i].prev_time = t;
Packit 5c3484
      }
Packit 5c3484
Packit 5c3484
      if (choice[i].no_time)
Packit 5c3484
        continue;
Packit 5c3484
Packit 5c3484
      /* Look for the fastest after CMP_DIFFPREV has been applied, but
Packit 5c3484
         before CMP_RATIO or CMP_DIFFERENCE.  There's only a fastest shown
Packit 5c3484
         if there's more than one routine.  */
Packit 5c3484
      if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
Packit 5c3484
        {
Packit 5c3484
          fastest = i;
Packit 5c3484
          fastest_time = choice[i].time;
Packit 5c3484
        }
Packit 5c3484
Packit 5c3484
      if (option_cmp == CMP_DIFFPREV)
Packit 5c3484
        {
Packit 5c3484
          /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
Packit 5c3484
          if (option_unit == UNIT_CYCLES)
Packit 5c3484
            choice[i].time /= speed_cycletime;
Packit 5c3484
          else if (option_unit == UNIT_CYCLESPERLIMB)
Packit 5c3484
            {
Packit 5c3484
              if (prev_size == -1)
Packit 5c3484
                choice[i].time /= speed_cycletime;
Packit 5c3484
              else
Packit 5c3484
                choice[i].time /=  (speed_cycletime
Packit 5c3484
                                    * (SIZE_TO_DIVISOR(s->size)
Packit 5c3484
                                       - SIZE_TO_DIVISOR(prev_size)));
Packit 5c3484
            }
Packit 5c3484
        }
Packit 5c3484
      else
Packit 5c3484
        {
Packit 5c3484
          if (option_unit == UNIT_CYCLES)
Packit 5c3484
            choice[i].time /= speed_cycletime;
Packit 5c3484
          else if (option_unit == UNIT_CYCLESPERLIMB)
Packit 5c3484
            choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
Packit 5c3484
Packit 5c3484
          if (option_cmp == CMP_RATIO && i > 0)
Packit 5c3484
            {
Packit 5c3484
              /* A ratio isn't affected by the units chosen. */
Packit 5c3484
              if (choice[0].no_time || choice[0].time == 0.0)
Packit 5c3484
                choice[i].no_time = 1;
Packit 5c3484
              else
Packit 5c3484
                choice[i].time /= choice[0].time;
Packit 5c3484
            }
Packit 5c3484
          else if (option_cmp == CMP_DIFFERENCE && i > 0)
Packit 5c3484
            {
Packit 5c3484
              if (choice[0].no_time)
Packit 5c3484
                {
Packit 5c3484
                  choice[i].no_time = 1;
Packit 5c3484
                  continue;
Packit 5c3484
                }
Packit 5c3484
              choice[i].time -= choice[0].time;
Packit 5c3484
            }
Packit 5c3484
        }
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (option_gnuplot)
Packit 5c3484
    {
Packit 5c3484
      /* In CMP_DIFFPREV, don't print anything for the first size, start
Packit 5c3484
         with the second where an actual difference is available.
Packit 5c3484
Packit 5c3484
         In CMP_RATIO, print the first column as 1.0.
Packit 5c3484
Packit 5c3484
         The 9 decimals printed is much more than the expected precision of
Packit 5c3484
         the measurements actually. */
Packit 5c3484
Packit 5c3484
      if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
Packit 5c3484
        {
Packit 5c3484
          fprintf (fp, "%-6ld ", s->size);
Packit 5c3484
          for (i = 0; i < num_choices; i++)
Packit 5c3484
            fprintf (fp, "  %.9e",
Packit 5c3484
                     choice[i].no_time ? 0.0
Packit 5c3484
                     : (option_cmp == CMP_RATIO && i == 0) ? 1.0
Packit 5c3484
                     : choice[i].time);
Packit 5c3484
          fprintf (fp, "\n");
Packit 5c3484
        }
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      fprintf (fp, "%-6ld ", s->size);
Packit 5c3484
      for (i = 0; i < num_choices; i++)
Packit 5c3484
        {
Packit 5c3484
          char  buf[128];
Packit 5c3484
          int   decimals;
Packit 5c3484
Packit 5c3484
          if (choice[i].no_time)
Packit 5c3484
            {
Packit 5c3484
              fprintf (fp, " %*s", COLUMN_WIDTH, "n/a");
Packit 5c3484
            }
Packit 5c3484
          else
Packit 5c3484
            {if (option_unit == UNIT_CYCLESPERLIMB
Packit 5c3484
                 || (option_cmp == CMP_RATIO && i > 0))
Packit 5c3484
                decimals = 4;
Packit 5c3484
              else if (option_unit == UNIT_CYCLES)
Packit 5c3484
                decimals = 2;
Packit 5c3484
              else
Packit 5c3484
                decimals = 9;
Packit 5c3484
Packit 5c3484
              sprintf (buf, "%s%.*f%s",
Packit 5c3484
                       i == fastest ? first_open_fastest : first_open_notfastest,
Packit 5c3484
                       decimals, choice[i].time, first_close);
Packit 5c3484
              fprintf (fp, " %*s", COLUMN_WIDTH, buf);
Packit 5c3484
            }
Packit 5c3484
        }
Packit 5c3484
      fprintf (fp, "\n");
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  TMP_FREE;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
run_all (FILE *fp)
Packit 5c3484
{
Packit 5c3484
  mp_size_t  prev_size;
Packit 5c3484
  int        i;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
  SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp);
Packit 5c3484
  SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp);
Packit 5c3484
Packit 5c3484
  data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
Packit 5c3484
  data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
Packit 5c3484
Packit 5c3484
  for (i = 0; i < size_num; i++)
Packit 5c3484
    {
Packit 5c3484
      sp.size = size_array[i].start;
Packit 5c3484
      prev_size = -1;
Packit 5c3484
      for (;;)
Packit 5c3484
        {
Packit 5c3484
          mp_size_t  step;
Packit 5c3484
Packit 5c3484
          if (option_data == DATA_2FD && sp.size >= 2)
Packit 5c3484
            sp.xp[sp.size-1] = 2;
Packit 5c3484
Packit 5c3484
          run_one (fp, &sp, prev_size);
Packit 5c3484
          prev_size = sp.size;
Packit 5c3484
Packit 5c3484
          if (option_data == DATA_2FD && sp.size >= 2)
Packit 5c3484
            sp.xp[sp.size-1] = MP_LIMB_T_MAX;
Packit 5c3484
Packit 5c3484
          if (option_factor != 0.0)
Packit 5c3484
            {
Packit 5c3484
              step = (mp_size_t) (sp.size * option_factor - sp.size);
Packit 5c3484
              if (step < 1)
Packit 5c3484
                step = 1;
Packit 5c3484
            }
Packit 5c3484
          else
Packit 5c3484
            step = 1;
Packit 5c3484
          if (step < option_step)
Packit 5c3484
            step = option_step;
Packit 5c3484
Packit 5c3484
          sp.size += step;
Packit 5c3484
          if (sp.size > size_array[i].end)
Packit 5c3484
            break;
Packit 5c3484
        }
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  TMP_FREE;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
FILE *
Packit 5c3484
fopen_for_write (const char *filename)
Packit 5c3484
{
Packit 5c3484
  FILE  *fp;
Packit 5c3484
  if ((fp = fopen (filename, "w")) == NULL)
Packit 5c3484
    {
Packit 5c3484
      fprintf (stderr, "Cannot create %s\n", filename);
Packit 5c3484
      exit(1);
Packit 5c3484
    }
Packit 5c3484
  return fp;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
fclose_written (FILE *fp, const char *filename)
Packit 5c3484
{
Packit 5c3484
  int  err;
Packit 5c3484
Packit 5c3484
  err = ferror (fp);
Packit 5c3484
  err |= fclose (fp);
Packit 5c3484
Packit 5c3484
  if (err)
Packit 5c3484
    {
Packit 5c3484
      fprintf (stderr, "Error writing %s\n", filename);
Packit 5c3484
      exit(1);
Packit 5c3484
    }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
run_gnuplot (int argc, char *argv[])
Packit 5c3484
{
Packit 5c3484
  char  *plot_filename;
Packit 5c3484
  char  *data_filename;
Packit 5c3484
  FILE  *fp;
Packit 5c3484
  int   i;
Packit 5c3484
Packit 5c3484
  plot_filename = (char *) (*__gmp_allocate_func)
Packit 5c3484
    (strlen (option_gnuplot_basename) + 20);
Packit 5c3484
  data_filename = (char *) (*__gmp_allocate_func)
Packit 5c3484
    (strlen (option_gnuplot_basename) + 20);
Packit 5c3484
Packit 5c3484
  sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
Packit 5c3484
  sprintf (data_filename, "%s.data",    option_gnuplot_basename);
Packit 5c3484
Packit 5c3484
  fp = fopen_for_write (plot_filename);
Packit 5c3484
Packit 5c3484
  fprintf (fp, "# Generated with:\n");
Packit 5c3484
  fprintf (fp, "#");
Packit 5c3484
  for (i = 0; i < argc; i++)
Packit 5c3484
    fprintf (fp, " %s", argv[i]);
Packit 5c3484
  fprintf (fp, "\n");
Packit 5c3484
  fprintf (fp, "\n");
Packit 5c3484
Packit 5c3484
  fprintf (fp, "reset\n");
Packit 5c3484
Packit 5c3484
  /* Putting the key at the top left is usually good, and you can change it
Packit 5c3484
     interactively if it's not. */
Packit 5c3484
  fprintf (fp, "set key left\n");
Packit 5c3484
Packit 5c3484
  /* designed to make it possible to see crossovers easily */
Packit 5c3484
  fprintf (fp, "set style data lines\n");
Packit 5c3484
Packit 5c3484
  fprintf (fp, "plot ");
Packit 5c3484
  for (i = 0; i < num_choices; i++)
Packit 5c3484
    {
Packit 5c3484
      fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
Packit 5c3484
      fprintf (fp, " title \"%s\"", choice[i].name);
Packit 5c3484
Packit 5c3484
      if (i != num_choices-1)
Packit 5c3484
        fprintf (fp, ", \\");
Packit 5c3484
      fprintf (fp, "\n");
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  fprintf (fp, "load \"-\"\n");
Packit 5c3484
  fclose_written (fp, plot_filename);
Packit 5c3484
Packit 5c3484
  fp = fopen_for_write (data_filename);
Packit 5c3484
Packit 5c3484
  /* Unbuffered so you can see where the program was up to if it crashes or
Packit 5c3484
     you kill it. */
Packit 5c3484
  setbuf (fp, NULL);
Packit 5c3484
Packit 5c3484
  run_all (fp);
Packit 5c3484
  fclose_written (fp, data_filename);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Return a limb with n many one bits (starting from the least significant) */
Packit 5c3484
Packit 5c3484
#define LIMB_ONES(n) \
Packit 5c3484
  ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX      \
Packit 5c3484
    : (n) == 0 ? CNST_LIMB(0)                   \
Packit 5c3484
    : (CNST_LIMB(1) << (n)) - 1)
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
r_string (const char *s)
Packit 5c3484
{
Packit 5c3484
  const char  *s_orig = s;
Packit 5c3484
  long        n;
Packit 5c3484
Packit 5c3484
  if (strcmp (s, "aas") == 0)
Packit 5c3484
    return GMP_NUMB_0xAA;
Packit 5c3484
Packit 5c3484
  {
Packit 5c3484
    mpz_t      z;
Packit 5c3484
    mp_limb_t  l;
Packit 5c3484
    int        set, siz;
Packit 5c3484
Packit 5c3484
    mpz_init (z);
Packit 5c3484
    set = mpz_set_str (z, s, 0);
Packit 5c3484
    siz = SIZ(z);
Packit 5c3484
    l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]);
Packit 5c3484
    mpz_clear (z);
Packit 5c3484
    if (set == 0)
Packit 5c3484
      {
Packit 5c3484
        if (siz > 1 || siz < -1)
Packit 5c3484
          printf ("Warning, r parameter %s truncated to %d bits\n",
Packit 5c3484
                  s_orig, GMP_LIMB_BITS);
Packit 5c3484
        return l;
Packit 5c3484
      }
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
  if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
Packit 5c3484
    n = strtoul (s+2, (char **) &s, 16);
Packit 5c3484
  else
Packit 5c3484
    n = strtol (s, (char **) &s, 10);
Packit 5c3484
Packit 5c3484
  if (strcmp (s, "bits") == 0)
Packit 5c3484
    {
Packit 5c3484
      mp_limb_t  l;
Packit 5c3484
      if (n > GMP_LIMB_BITS)
Packit 5c3484
        {
Packit 5c3484
          fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
Packit 5c3484
                   n, GMP_LIMB_BITS);
Packit 5c3484
          exit (1);
Packit 5c3484
        }
Packit 5c3484
      mpn_random (&l, 1);
Packit 5c3484
      return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n);
Packit 5c3484
    }
Packit 5c3484
  else  if (strcmp (s, "ones") == 0)
Packit 5c3484
    {
Packit 5c3484
      if (n > GMP_LIMB_BITS)
Packit 5c3484
        {
Packit 5c3484
          fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
Packit 5c3484
                   n, GMP_LIMB_BITS);
Packit 5c3484
          exit (1);
Packit 5c3484
        }
Packit 5c3484
      return LIMB_ONES (n);
Packit 5c3484
    }
Packit 5c3484
  else if (*s != '\0')
Packit 5c3484
    {
Packit 5c3484
      fprintf (stderr, "invalid r parameter: %s\n", s_orig);
Packit 5c3484
      exit (1);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  return n;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
routine_find (struct choice_t *c, const char *s_orig)
Packit 5c3484
{
Packit 5c3484
  const char  *s;
Packit 5c3484
  int     i;
Packit 5c3484
  size_t  nlen;
Packit 5c3484
Packit 5c3484
  c->name = s_orig;
Packit 5c3484
  s = strchr (s_orig, '*');
Packit 5c3484
  if (s != NULL)
Packit 5c3484
    {
Packit 5c3484
      c->scale = atof(s_orig);
Packit 5c3484
      s++;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      c->scale = 1.0;
Packit 5c3484
      s = s_orig;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  for (i = 0; i < numberof (routine); i++)
Packit 5c3484
    {
Packit 5c3484
      nlen = strlen (routine[i].name);
Packit 5c3484
      if (memcmp (s, routine[i].name, nlen) != 0)
Packit 5c3484
        continue;
Packit 5c3484
Packit 5c3484
      if (s[nlen] == '.')
Packit 5c3484
        {
Packit 5c3484
          /* match, with a .r parameter */
Packit 5c3484
Packit 5c3484
          if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
Packit 5c3484
            {
Packit 5c3484
              fprintf (stderr,
Packit 5c3484
                       "Choice %s bad: doesn't take a \".<r>\" parameter\n",
Packit 5c3484
                       s_orig);
Packit 5c3484
              exit (1);
Packit 5c3484
            }
Packit 5c3484
Packit 5c3484
          c->p = &routine[i];
Packit 5c3484
          c->r = r_string (s + nlen + 1);
Packit 5c3484
          return;
Packit 5c3484
        }
Packit 5c3484
Packit 5c3484
      if (s[nlen] == '\0')
Packit 5c3484
        {
Packit 5c3484
          /* match, with no parameter */
Packit 5c3484
Packit 5c3484
          if (routine[i].flag & FLAG_R)
Packit 5c3484
            {
Packit 5c3484
              fprintf (stderr,
Packit 5c3484
                       "Choice %s bad: needs a \".<r>\" parameter\n",
Packit 5c3484
                       s_orig);
Packit 5c3484
              exit (1);
Packit 5c3484
            }
Packit 5c3484
Packit 5c3484
          c->p = &routine[i];
Packit 5c3484
          c->r = 0;
Packit 5c3484
          return;
Packit 5c3484
        }
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  fprintf (stderr, "Choice %s unrecognised\n", s_orig);
Packit 5c3484
  exit (1);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
usage (void)
Packit 5c3484
{
Packit 5c3484
  int  i;
Packit 5c3484
Packit 5c3484
  speed_time_init ();
Packit 5c3484
Packit 5c3484
  printf ("Usage: speed [-options] -s size <routine>...\n");
Packit 5c3484
  printf ("Measure the speed of some routines.\n");
Packit 5c3484
  printf ("Times are in seconds, accuracy is shown.\n");
Packit 5c3484
  printf ("\n");
Packit 5c3484
  printf ("   -p num     set precision as number of time units each routine must run\n");
Packit 5c3484
  printf ("   -s size[-end][,size[-end]]...   sizes to measure\n");
Packit 5c3484
  printf ("              single sizes or ranges, sep with comma or use multiple -s\n");
Packit 5c3484
  printf ("   -t step    step through sizes by given amount\n");
Packit 5c3484
  printf ("   -f factor  step through sizes by given factor (eg. 1.05)\n");
Packit 5c3484
  printf ("   -r         show times as ratios of the first routine\n");
Packit 5c3484
  printf ("   -d         show times as difference from the first routine\n");
Packit 5c3484
  printf ("   -D         show times as difference from previous size shown\n");
Packit 5c3484
  printf ("   -c         show times in CPU cycles\n");
Packit 5c3484
  printf ("   -C         show times in cycles per limb\n");
Packit 5c3484
  printf ("   -u         print resource usage (memory) at end\n");
Packit 5c3484
  printf ("   -P name    output plot files \"name.gnuplot\" and \"name.data\"\n");
Packit 5c3484
  printf ("   -a <type>  use given data: random(default), random2, zeros, aas, ffs, 2fd\n");
Packit 5c3484
  printf ("   -x, -y, -w, -W <align>  specify data alignments, sources and dests\n");
Packit 5c3484
  printf ("   -o addrs   print addresses of data blocks\n");
Packit 5c3484
  printf ("\n");
Packit 5c3484
  printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n");
Packit 5c3484
  printf ("is greater.\n");
Packit 5c3484
  printf ("If both -C and -D are used, it means cycles per however many limbs between a\n");
Packit 5c3484
  printf ("size and the previous size.\n");
Packit 5c3484
  printf ("\n");
Packit 5c3484
  printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n");
Packit 5c3484
  printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n");
Packit 5c3484
  printf ("a log/log plot).\n");
Packit 5c3484
  printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n");
Packit 5c3484
  printf ("when viewing more than one routine, it means same axis scales for all data).\n");
Packit 5c3484
  printf ("\n");
Packit 5c3484
  printf ("The available routines are as follows.\n");
Packit 5c3484
  printf ("\n");
Packit 5c3484
Packit 5c3484
  for (i = 0; i < numberof (routine); i++)
Packit 5c3484
    {
Packit 5c3484
      if (routine[i].flag & FLAG_R)
Packit 5c3484
        printf ("\t%s.r\n", routine[i].name);
Packit 5c3484
      else if (routine[i].flag & FLAG_R_OPTIONAL)
Packit 5c3484
        printf ("\t%s (optional .r)\n", routine[i].name);
Packit 5c3484
      else
Packit 5c3484
        printf ("\t%s\n", routine[i].name);
Packit 5c3484
    }
Packit 5c3484
  printf ("\n");
Packit 5c3484
  printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n");
Packit 5c3484
  printf ("r should be in decimal, or use 0xN for hexadecimal.\n");
Packit 5c3484
  printf ("\n");
Packit 5c3484
  printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n");
Packit 5c3484
  printf ("N one bits, or \"aas\" for 0xAA..AA.\n");
Packit 5c3484
  printf ("\n");
Packit 5c3484
  printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n");
Packit 5c3484
  printf ("The fastest routine at each size is marked with a # (free form output only).\n");
Packit 5c3484
  printf ("\n");
Packit 5c3484
  printf ("%s", speed_time_string);
Packit 5c3484
  printf ("\n");
Packit 5c3484
  printf ("Gnuplot home page http://www.gnuplot.info/\n");
Packit 5c3484
  printf ("Quickplot home page http://quickplot.sourceforge.net/\n");
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
check_align_option (const char *name, mp_size_t align)
Packit 5c3484
{
Packit 5c3484
  if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK)
Packit 5c3484
    {
Packit 5c3484
      fprintf (stderr, "Alignment request out of range: %s %ld\n",
Packit 5c3484
               name, (long) align);
Packit 5c3484
      fprintf (stderr, "  should be 0 to %d (limbs), inclusive\n",
Packit 5c3484
               SPEED_TMP_ALLOC_ADJUST_MASK);
Packit 5c3484
      exit (1);
Packit 5c3484
    }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
int
Packit 5c3484
main (int argc, char *argv[])
Packit 5c3484
{
Packit 5c3484
  int  i;
Packit 5c3484
  int  opt;
Packit 5c3484
Packit 5c3484
  /* Unbuffered so output goes straight out when directed to a pipe or file
Packit 5c3484
     and isn't lost on killing the program half way.  */
Packit 5c3484
  setbuf (stdout, NULL);
Packit 5c3484
Packit 5c3484
  for (;;)
Packit 5c3484
    {
Packit 5c3484
      opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z");
Packit 5c3484
      if (opt == EOF)
Packit 5c3484
        break;
Packit 5c3484
Packit 5c3484
      switch (opt) {
Packit 5c3484
      case 'a':
Packit 5c3484
        if (strcmp (optarg, "random") == 0)       option_data = DATA_RANDOM;
Packit 5c3484
        else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
Packit 5c3484
        else if (strcmp (optarg, "zeros") == 0)   option_data = DATA_ZEROS;
Packit 5c3484
        else if (strcmp (optarg, "aas") == 0)     option_data = DATA_AAS;
Packit 5c3484
        else if (strcmp (optarg, "ffs") == 0)     option_data = DATA_FFS;
Packit 5c3484
        else if (strcmp (optarg, "2fd") == 0)     option_data = DATA_2FD;
Packit 5c3484
        else
Packit 5c3484
          {
Packit 5c3484
            fprintf (stderr, "unrecognised data option: %s\n", optarg);
Packit 5c3484
            exit (1);
Packit 5c3484
          }
Packit 5c3484
        break;
Packit 5c3484
      case 'C':
Packit 5c3484
        if (option_unit  != UNIT_SECONDS) goto bad_unit;
Packit 5c3484
        option_unit = UNIT_CYCLESPERLIMB;
Packit 5c3484
        break;
Packit 5c3484
      case 'c':
Packit 5c3484
        if (option_unit != UNIT_SECONDS)
Packit 5c3484
          {
Packit 5c3484
          bad_unit:
Packit 5c3484
            fprintf (stderr, "cannot use more than one of -c, -C\n");
Packit 5c3484
            exit (1);
Packit 5c3484
          }
Packit 5c3484
        option_unit = UNIT_CYCLES;
Packit 5c3484
        break;
Packit 5c3484
      case 'D':
Packit 5c3484
        if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
Packit 5c3484
        option_cmp = CMP_DIFFPREV;
Packit 5c3484
        break;
Packit 5c3484
      case 'd':
Packit 5c3484
        if (option_cmp != CMP_ABSOLUTE)
Packit 5c3484
          {
Packit 5c3484
          bad_cmp:
Packit 5c3484
            fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
Packit 5c3484
            exit (1);
Packit 5c3484
          }
Packit 5c3484
        option_cmp = CMP_DIFFERENCE;
Packit 5c3484
        break;
Packit 5c3484
      case 'E':
Packit 5c3484
        option_square = 1;
Packit 5c3484
        break;
Packit 5c3484
      case 'F':
Packit 5c3484
        option_square = 2;
Packit 5c3484
        break;
Packit 5c3484
      case 'f':
Packit 5c3484
        option_factor = atof (optarg);
Packit 5c3484
        if (option_factor <= 1.0)
Packit 5c3484
          {
Packit 5c3484
            fprintf (stderr, "-f factor must be > 1.0\n");
Packit 5c3484
            exit (1);
Packit 5c3484
          }
Packit 5c3484
        break;
Packit 5c3484
      case 'o':
Packit 5c3484
        speed_option_set (optarg);
Packit 5c3484
        break;
Packit 5c3484
      case 'P':
Packit 5c3484
        option_gnuplot = 1;
Packit 5c3484
        option_gnuplot_basename = optarg;
Packit 5c3484
        break;
Packit 5c3484
      case 'p':
Packit 5c3484
        speed_precision = atoi (optarg);
Packit 5c3484
        break;
Packit 5c3484
      case 'R':
Packit 5c3484
        option_seed = time (NULL);
Packit 5c3484
        break;
Packit 5c3484
      case 'r':
Packit 5c3484
        if (option_cmp != CMP_ABSOLUTE)
Packit 5c3484
          goto bad_cmp;
Packit 5c3484
        option_cmp = CMP_RATIO;
Packit 5c3484
        break;
Packit 5c3484
      case 's':
Packit 5c3484
        {
Packit 5c3484
          char  *s;
Packit 5c3484
          for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
Packit 5c3484
            {
Packit 5c3484
              if (size_num == size_allocnum)
Packit 5c3484
                {
Packit 5c3484
                  size_array = (struct size_array_t *)
Packit 5c3484
                    __gmp_allocate_or_reallocate
Packit 5c3484
                    (size_array,
Packit 5c3484
                     size_allocnum * sizeof(size_array[0]),
Packit 5c3484
                     (size_allocnum+10) * sizeof(size_array[0]));
Packit 5c3484
                  size_allocnum += 10;
Packit 5c3484
                }
Packit 5c3484
              if (sscanf (s, "%ld-%ld",
Packit 5c3484
                          &size_array[size_num].start,
Packit 5c3484
                          &size_array[size_num].end) != 2)
Packit 5c3484
                {
Packit 5c3484
                  size_array[size_num].start = size_array[size_num].end
Packit 5c3484
                    = atol (s);
Packit 5c3484
                }
Packit 5c3484
Packit 5c3484
              if (size_array[size_num].start < 0
Packit 5c3484
                  || size_array[size_num].end < 0
Packit 5c3484
                  || size_array[size_num].start > size_array[size_num].end)
Packit 5c3484
                {
Packit 5c3484
                  fprintf (stderr, "invalid size parameter: %s\n", s);
Packit 5c3484
                  exit (1);
Packit 5c3484
                }
Packit 5c3484
Packit 5c3484
              size_num++;
Packit 5c3484
            }
Packit 5c3484
        }
Packit 5c3484
        break;
Packit 5c3484
      case 't':
Packit 5c3484
        option_step = atol (optarg);
Packit 5c3484
        if (option_step < 1)
Packit 5c3484
          {
Packit 5c3484
            fprintf (stderr, "-t step must be >= 1\n");
Packit 5c3484
            exit (1);
Packit 5c3484
          }
Packit 5c3484
        break;
Packit 5c3484
      case 'u':
Packit 5c3484
        option_resource_usage = 1;
Packit 5c3484
        break;
Packit 5c3484
      case 'z':
Packit 5c3484
        sp.cache = 1;
Packit 5c3484
        break;
Packit 5c3484
      case 'x':
Packit 5c3484
        sp.align_xp = atol (optarg);
Packit 5c3484
        check_align_option ("-x", sp.align_xp);
Packit 5c3484
        break;
Packit 5c3484
      case 'y':
Packit 5c3484
        sp.align_yp = atol (optarg);
Packit 5c3484
        check_align_option ("-y", sp.align_yp);
Packit 5c3484
        break;
Packit 5c3484
      case 'w':
Packit 5c3484
        sp.align_wp = atol (optarg);
Packit 5c3484
        check_align_option ("-w", sp.align_wp);
Packit 5c3484
        break;
Packit 5c3484
      case 'W':
Packit 5c3484
        sp.align_wp2 = atol (optarg);
Packit 5c3484
        check_align_option ("-W", sp.align_wp2);
Packit 5c3484
        break;
Packit 5c3484
      case '?':
Packit 5c3484
        exit(1);
Packit 5c3484
      }
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (optind >= argc)
Packit 5c3484
    {
Packit 5c3484
      usage ();
Packit 5c3484
      exit (1);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (size_num == 0)
Packit 5c3484
    {
Packit 5c3484
      fprintf (stderr, "-s <size> must be specified\n");
Packit 5c3484
      exit (1);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  gmp_randinit_default (__gmp_rands);
Packit 5c3484
  __gmp_rands_initialized = 1;
Packit 5c3484
  gmp_randseed_ui (__gmp_rands, option_seed);
Packit 5c3484
Packit 5c3484
  choice = (struct choice_t *) (*__gmp_allocate_func)
Packit 5c3484
    ((argc - optind) * sizeof(choice[0]));
Packit 5c3484
  for ( ; optind < argc; optind++)
Packit 5c3484
    {
Packit 5c3484
      struct choice_t  c;
Packit 5c3484
      routine_find (&c, argv[optind]);
Packit 5c3484
      choice[num_choices] = c;
Packit 5c3484
      num_choices++;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
Packit 5c3484
      num_choices < 2)
Packit 5c3484
    {
Packit 5c3484
      fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  speed_time_init ();
Packit 5c3484
  if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
Packit 5c3484
    speed_cycletime_need_cycles ();
Packit 5c3484
  else
Packit 5c3484
    speed_cycletime_need_seconds ();
Packit 5c3484
Packit 5c3484
  if (option_gnuplot)
Packit 5c3484
    {
Packit 5c3484
      run_gnuplot (argc, argv);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      if (option_unit == UNIT_SECONDS)
Packit 5c3484
        printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
Packit 5c3484
      else
Packit 5c3484
        printf ("overhead %.2f cycles",
Packit 5c3484
                speed_measure (speed_noop, NULL) / speed_cycletime);
Packit 5c3484
      printf (", precision %d units of %.2e secs",
Packit 5c3484
              speed_precision, speed_unittime);
Packit 5c3484
Packit 5c3484
      if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
Packit 5c3484
        printf (", CPU freq unknown\n");
Packit 5c3484
      else
Packit 5c3484
        printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
Packit 5c3484
Packit 5c3484
      printf ("       ");
Packit 5c3484
      for (i = 0; i < num_choices; i++)
Packit 5c3484
        printf (" %*s", COLUMN_WIDTH, choice[i].name);
Packit 5c3484
      printf ("\n");
Packit 5c3484
Packit 5c3484
      run_all (stdout);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (option_resource_usage)
Packit 5c3484
    {
Packit 5c3484
#if HAVE_GETRUSAGE
Packit 5c3484
      {
Packit 5c3484
        /* This doesn't give data sizes on linux 2.0.x, only utime. */
Packit 5c3484
        struct rusage  r;
Packit 5c3484
        if (getrusage (RUSAGE_SELF, &r) != 0)
Packit 5c3484
          perror ("getrusage");
Packit 5c3484
        else
Packit 5c3484
          printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
Packit 5c3484
                  r.ru_utime.tv_sec, r.ru_utime.tv_usec,
Packit 5c3484
                  r.ru_idrss, r.ru_isrss, r.ru_ixrss);
Packit 5c3484
      }
Packit 5c3484
#else
Packit 5c3484
      printf ("getrusage() not available\n");
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
      /* Linux kernel. */
Packit 5c3484
      {
Packit 5c3484
        char  buf[128];
Packit 5c3484
        sprintf (buf, "/proc/%d/status", getpid());
Packit 5c3484
        if (access (buf, R_OK) == 0)
Packit 5c3484
          {
Packit 5c3484
            sprintf (buf, "cat /proc/%d/status", getpid());
Packit 5c3484
            system (buf);
Packit 5c3484
          }
Packit 5c3484
Packit 5c3484
      }
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  return 0;
Packit 5c3484
}