Blame tune/speed.h

Packit 5c3484
/* Header for speed and threshold things.
Packit 5c3484
Packit 5c3484
Copyright 1999-2003, 2005, 2006, 2008-2015 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#ifndef __SPEED_H__
Packit 5c3484
#define __SPEED_H__
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Pad ptr,oldsize with zero limbs (at the most significant end) to make it
Packit 5c3484
   newsize long. */
Packit 5c3484
#define MPN_ZERO_EXTEND(ptr, oldsize, newsize)		\
Packit 5c3484
  do {							\
Packit 5c3484
    ASSERT ((newsize) >= (oldsize));			\
Packit 5c3484
    MPN_ZERO ((ptr)+(oldsize), (newsize)-(oldsize));	\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
/* A mask of the least significant n bits.  Note 1<<32 doesn't give zero on
Packit 5c3484
   x86 family CPUs, hence the separate case for GMP_LIMB_BITS. */
Packit 5c3484
#define MP_LIMB_T_LOWBITMASK(n)	\
Packit 5c3484
  ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX : ((mp_limb_t) 1 << (n)) - 1)
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* align must be a power of 2 here, usually CACHE_LINE_SIZE is a good choice */
Packit 5c3484
Packit 5c3484
#define TMP_ALLOC_ALIGNED(bytes, align)	\
Packit 5c3484
  align_pointer (TMP_ALLOC ((bytes) + (align)-1), (align))
Packit 5c3484
#define TMP_ALLOC_LIMBS_ALIGNED(limbs, align)	\
Packit 5c3484
  ((mp_ptr) TMP_ALLOC_ALIGNED ((limbs)*sizeof(mp_limb_t), align))
Packit 5c3484
Packit 5c3484
/* CACHE_LINE_SIZE is our default alignment for speed operands, and the
Packit 5c3484
   limit on what s->align_xp etc and then request for off-alignment.  Maybe
Packit 5c3484
   this should be an option of some sort, but in any case here are some line
Packit 5c3484
   sizes,
Packit 5c3484
Packit 5c3484
       bytes
Packit 5c3484
	 32   pentium
Packit 5c3484
	 64   athlon
Packit 5c3484
	 64   itanium-2 L1
Packit 5c3484
	128   itanium-2 L2
Packit 5c3484
*/
Packit 5c3484
#define CACHE_LINE_SIZE   64 /* bytes */
Packit 5c3484
Packit 5c3484
#define SPEED_TMP_ALLOC_ADJUST_MASK  (CACHE_LINE_SIZE/GMP_LIMB_BYTES - 1)
Packit 5c3484
Packit 5c3484
/* Set ptr to a TMP_ALLOC block of the given limbs, with the given limb
Packit 5c3484
   alignment.  */
Packit 5c3484
#define SPEED_TMP_ALLOC_LIMBS(ptr, limbs, align)			\
Packit 5c3484
  do {									\
Packit 5c3484
    mp_ptr     __ptr;							\
Packit 5c3484
    mp_size_t  __ptr_align, __ptr_add;					\
Packit 5c3484
									\
Packit 5c3484
    ASSERT ((CACHE_LINE_SIZE % GMP_LIMB_BYTES) == 0);		\
Packit 5c3484
    __ptr = TMP_ALLOC_LIMBS ((limbs) + SPEED_TMP_ALLOC_ADJUST_MASK);	\
Packit 5c3484
    __ptr_align = (__ptr - (mp_ptr) NULL);				\
Packit 5c3484
    __ptr_add = ((align) - __ptr_align) & SPEED_TMP_ALLOC_ADJUST_MASK;	\
Packit 5c3484
    (ptr) = __ptr + __ptr_add;						\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* This is the size for s->xp_block and s->yp_block, used in certain
Packit 5c3484
   routines that want to run across many different data values and use
Packit 5c3484
   s->size for a different purpose, eg. SPEED_ROUTINE_MPN_GCD_1.
Packit 5c3484
Packit 5c3484
   512 means 2kbytes of data for each of xp_block and yp_block, making 4k
Packit 5c3484
   total, which should fit easily in any L1 data cache. */
Packit 5c3484
Packit 5c3484
#define SPEED_BLOCK_SIZE   512 /* limbs */
Packit 5c3484
Packit 5c3484
Packit 5c3484
extern double  speed_unittime;
Packit 5c3484
extern double  speed_cycletime;
Packit 5c3484
extern int     speed_precision;
Packit 5c3484
extern char    speed_time_string[];
Packit 5c3484
void speed_time_init (void);
Packit 5c3484
void speed_cycletime_fail (const char *str);
Packit 5c3484
void speed_cycletime_init (void);
Packit 5c3484
void speed_cycletime_need_cycles (void);
Packit 5c3484
void speed_cycletime_need_seconds (void);
Packit 5c3484
void speed_starttime (void);
Packit 5c3484
double speed_endtime (void);
Packit 5c3484
Packit 5c3484
Packit 5c3484
struct speed_params {
Packit 5c3484
  unsigned   reps;	/* how many times to run the routine */
Packit 5c3484
  mp_ptr     xp;	/* first argument */
Packit 5c3484
  mp_ptr     yp;	/* second argument */
Packit 5c3484
  mp_size_t  size;	/* size of both arguments */
Packit 5c3484
  mp_limb_t  r;		/* user supplied parameter */
Packit 5c3484
  mp_size_t  align_xp;	/* alignment of xp */
Packit 5c3484
  mp_size_t  align_yp;	/* alignment of yp */
Packit 5c3484
  mp_size_t  align_wp;	/* intended alignment of wp */
Packit 5c3484
  mp_size_t  align_wp2; /* intended alignment of wp2 */
Packit 5c3484
  mp_ptr     xp_block;	/* first special SPEED_BLOCK_SIZE block */
Packit 5c3484
  mp_ptr     yp_block;	/* second special SPEED_BLOCK_SIZE block */
Packit 5c3484
Packit 5c3484
  double     time_divisor; /* optionally set by the speed routine */
Packit 5c3484
Packit 5c3484
  /* used by the cache priming things */
Packit 5c3484
  int	     cache;
Packit 5c3484
  unsigned   src_num, dst_num;
Packit 5c3484
  struct {
Packit 5c3484
    mp_ptr    ptr;
Packit 5c3484
    mp_size_t size;
Packit 5c3484
  } src[5], dst[4];
Packit 5c3484
};
Packit 5c3484
Packit 5c3484
typedef double (*speed_function_t) (struct speed_params *);
Packit 5c3484
Packit 5c3484
double speed_measure (speed_function_t fun, struct speed_params *);
Packit 5c3484
Packit 5c3484
/* Prototypes for speed measuring routines */
Packit 5c3484
Packit 5c3484
double speed_back_to_back (struct speed_params *);
Packit 5c3484
double speed_count_leading_zeros (struct speed_params *);
Packit 5c3484
double speed_count_trailing_zeros (struct speed_params *);
Packit 5c3484
double speed_find_a (struct speed_params *);
Packit 5c3484
double speed_gmp_allocate_free (struct speed_params *);
Packit 5c3484
double speed_gmp_allocate_reallocate_free (struct speed_params *);
Packit 5c3484
double speed_invert_limb (struct speed_params *);
Packit 5c3484
double speed_malloc_free (struct speed_params *);
Packit 5c3484
double speed_malloc_realloc_free (struct speed_params *);
Packit 5c3484
double speed_memcpy (struct speed_params *);
Packit 5c3484
double speed_binvert_limb (struct speed_params *);
Packit 5c3484
double speed_binvert_limb_mul1 (struct speed_params *);
Packit 5c3484
double speed_binvert_limb_loop (struct speed_params *);
Packit 5c3484
double speed_binvert_limb_cond (struct speed_params *);
Packit 5c3484
double speed_binvert_limb_arith (struct speed_params *);
Packit 5c3484
Packit 5c3484
double speed_mpf_init_clear (struct speed_params *);
Packit 5c3484
Packit 5c3484
double speed_mpn_add_n (struct speed_params *);
Packit 5c3484
double speed_mpn_add_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_add_1_inplace (struct speed_params *);
Packit 5c3484
double speed_mpn_add_err1_n (struct speed_params *);
Packit 5c3484
double speed_mpn_add_err2_n (struct speed_params *);
Packit 5c3484
double speed_mpn_add_err3_n (struct speed_params *);
Packit 5c3484
double speed_mpn_addlsh_n (struct speed_params *);
Packit 5c3484
double speed_mpn_addlsh1_n (struct speed_params *);
Packit 5c3484
double speed_mpn_addlsh2_n (struct speed_params *);
Packit 5c3484
double speed_mpn_addlsh_n_ip1 (struct speed_params *);
Packit 5c3484
double speed_mpn_addlsh1_n_ip1 (struct speed_params *);
Packit 5c3484
double speed_mpn_addlsh2_n_ip1 (struct speed_params *);
Packit 5c3484
double speed_mpn_addlsh_n_ip2 (struct speed_params *);
Packit 5c3484
double speed_mpn_addlsh1_n_ip2 (struct speed_params *);
Packit 5c3484
double speed_mpn_addlsh2_n_ip2 (struct speed_params *);
Packit 5c3484
double speed_mpn_add_n_sub_n (struct speed_params *);
Packit 5c3484
double speed_mpn_and_n (struct speed_params *);
Packit 5c3484
double speed_mpn_andn_n (struct speed_params *);
Packit 5c3484
double speed_mpn_addmul_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_addmul_2 (struct speed_params *);
Packit 5c3484
double speed_mpn_addmul_3 (struct speed_params *);
Packit 5c3484
double speed_mpn_addmul_4 (struct speed_params *);
Packit 5c3484
double speed_mpn_addmul_5 (struct speed_params *);
Packit 5c3484
double speed_mpn_addmul_6 (struct speed_params *);
Packit 5c3484
double speed_mpn_addmul_7 (struct speed_params *);
Packit 5c3484
double speed_mpn_addmul_8 (struct speed_params *);
Packit 5c3484
double speed_mpn_cnd_add_n (struct speed_params *);
Packit 5c3484
double speed_mpn_cnd_sub_n (struct speed_params *);
Packit 5c3484
double speed_mpn_com (struct speed_params *);
Packit 5c3484
double speed_mpn_neg (struct speed_params *);
Packit 5c3484
double speed_mpn_copyd (struct speed_params *);
Packit 5c3484
double speed_mpn_copyi (struct speed_params *);
Packit 5c3484
double speed_MPN_COPY (struct speed_params *);
Packit 5c3484
double speed_MPN_COPY_DECR (struct speed_params *);
Packit 5c3484
double speed_MPN_COPY_INCR (struct speed_params *);
Packit 5c3484
double speed_mpn_sec_tabselect (struct speed_params *);
Packit 5c3484
double speed_mpn_divexact_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_divexact_by3 (struct speed_params *);
Packit 5c3484
double speed_mpn_bdiv_q_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_pi1_bdiv_q_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_bdiv_dbm1c (struct speed_params *);
Packit 5c3484
double speed_mpn_divrem_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_divrem_1f (struct speed_params *);
Packit 5c3484
double speed_mpn_divrem_1c (struct speed_params *);
Packit 5c3484
double speed_mpn_divrem_1cf (struct speed_params *);
Packit 5c3484
double speed_mpn_divrem_1_div (struct speed_params *);
Packit 5c3484
double speed_mpn_divrem_1f_div (struct speed_params *);
Packit 5c3484
double speed_mpn_divrem_1_inv (struct speed_params *);
Packit 5c3484
double speed_mpn_divrem_1f_inv (struct speed_params *);
Packit 5c3484
double speed_mpn_divrem_2 (struct speed_params *);
Packit 5c3484
double speed_mpn_divrem_2_div (struct speed_params *);
Packit 5c3484
double speed_mpn_divrem_2_inv (struct speed_params *);
Packit 5c3484
double speed_mpn_div_qr_1n_pi1 (struct speed_params *);
Packit 5c3484
double speed_mpn_div_qr_1n_pi1_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_div_qr_1n_pi1_2 (struct speed_params *);
Packit 5c3484
double speed_mpn_div_qr_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_div_qr_2n (struct speed_params *);
Packit 5c3484
double speed_mpn_div_qr_2u (struct speed_params *);
Packit 5c3484
double speed_mpn_fib2_ui (struct speed_params *);
Packit 5c3484
double speed_mpn_matrix22_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_hgcd (struct speed_params *);
Packit 5c3484
double speed_mpn_hgcd_lehmer (struct speed_params *);
Packit 5c3484
double speed_mpn_hgcd_appr (struct speed_params *);
Packit 5c3484
double speed_mpn_hgcd_appr_lehmer (struct speed_params *);
Packit 5c3484
double speed_mpn_hgcd_reduce (struct speed_params *);
Packit 5c3484
double speed_mpn_hgcd_reduce_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_hgcd_reduce_2 (struct speed_params *);
Packit 5c3484
double speed_mpn_gcd (struct speed_params *);
Packit 5c3484
double speed_mpn_gcd_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_gcd_1N (struct speed_params *);
Packit 5c3484
double speed_mpn_gcdext (struct speed_params *);
Packit 5c3484
double speed_mpn_gcdext_double (struct speed_params *);
Packit 5c3484
double speed_mpn_gcdext_one_double (struct speed_params *);
Packit 5c3484
double speed_mpn_gcdext_one_single (struct speed_params *);
Packit 5c3484
double speed_mpn_gcdext_single (struct speed_params *);
Packit 5c3484
double speed_mpn_get_str (struct speed_params *);
Packit 5c3484
double speed_mpn_hamdist (struct speed_params *);
Packit 5c3484
double speed_mpn_ior_n (struct speed_params *);
Packit 5c3484
double speed_mpn_iorn_n (struct speed_params *);
Packit 5c3484
double speed_mpn_jacobi_base (struct speed_params *);
Packit 5c3484
double speed_mpn_jacobi_base_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_jacobi_base_2 (struct speed_params *);
Packit 5c3484
double speed_mpn_jacobi_base_3 (struct speed_params *);
Packit 5c3484
double speed_mpn_jacobi_base_4 (struct speed_params *);
Packit 5c3484
double speed_mpn_lshift (struct speed_params *);
Packit 5c3484
double speed_mpn_lshiftc (struct speed_params *);
Packit 5c3484
double speed_mpn_mod_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_mod_1c (struct speed_params *);
Packit 5c3484
double speed_mpn_mod_1_div (struct speed_params *);
Packit 5c3484
double speed_mpn_mod_1_inv (struct speed_params *);
Packit 5c3484
double speed_mpn_mod_1_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_mod_1_1_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_mod_1_1_2 (struct speed_params *);
Packit 5c3484
double speed_mpn_mod_1_2 (struct speed_params *);
Packit 5c3484
double speed_mpn_mod_1_3 (struct speed_params *);
Packit 5c3484
double speed_mpn_mod_1_4 (struct speed_params *);
Packit 5c3484
double speed_mpn_mod_34lsub1 (struct speed_params *);
Packit 5c3484
double speed_mpn_modexact_1_odd (struct speed_params *);
Packit 5c3484
double speed_mpn_modexact_1c_odd (struct speed_params *);
Packit 5c3484
double speed_mpn_mul_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_mul_1_inplace (struct speed_params *);
Packit 5c3484
double speed_mpn_mul_2 (struct speed_params *);
Packit 5c3484
double speed_mpn_mul_3 (struct speed_params *);
Packit 5c3484
double speed_mpn_mul_4 (struct speed_params *);
Packit 5c3484
double speed_mpn_mul_5 (struct speed_params *);
Packit 5c3484
double speed_mpn_mul_6 (struct speed_params *);
Packit 5c3484
double speed_mpn_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_mul_basecase (struct speed_params *);
Packit 5c3484
double speed_mpn_mulmid (struct speed_params *);
Packit 5c3484
double speed_mpn_mulmid_basecase (struct speed_params *);
Packit 5c3484
double speed_mpn_mul_fft (struct speed_params *);
Packit 5c3484
double speed_mpn_mul_fft_sqr (struct speed_params *);
Packit 5c3484
double speed_mpn_fft_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_fft_sqr (struct speed_params *);
Packit 5c3484
#if WANT_OLD_FFT_FULL
Packit 5c3484
double speed_mpn_mul_fft_full (struct speed_params *);
Packit 5c3484
double speed_mpn_mul_fft_full_sqr (struct speed_params *);
Packit 5c3484
#endif
Packit 5c3484
double speed_mpn_nussbaumer_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_nussbaumer_mul_sqr (struct speed_params *);
Packit 5c3484
double speed_mpn_mul_n (struct speed_params *);
Packit 5c3484
double speed_mpn_mul_n_sqr (struct speed_params *);
Packit 5c3484
double speed_mpn_mulmid_n (struct speed_params *);
Packit 5c3484
double speed_mpn_sqrlo (struct speed_params *);
Packit 5c3484
double speed_mpn_sqrlo_basecase (struct speed_params *);
Packit 5c3484
double speed_mpn_mullo_n (struct speed_params *);
Packit 5c3484
double speed_mpn_mullo_basecase (struct speed_params *);
Packit 5c3484
double speed_mpn_nand_n (struct speed_params *);
Packit 5c3484
double speed_mpn_nior_n (struct speed_params *);
Packit 5c3484
double speed_mpn_popcount (struct speed_params *);
Packit 5c3484
double speed_mpn_preinv_divrem_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_preinv_divrem_1f (struct speed_params *);
Packit 5c3484
double speed_mpn_preinv_mod_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_sbpi1_div_qr (struct speed_params *);
Packit 5c3484
double speed_mpn_dcpi1_div_qr (struct speed_params *);
Packit 5c3484
double speed_mpn_sbpi1_divappr_q (struct speed_params *);
Packit 5c3484
double speed_mpn_dcpi1_divappr_q (struct speed_params *);
Packit 5c3484
double speed_mpn_mu_div_qr (struct speed_params *);
Packit 5c3484
double speed_mpn_mu_divappr_q (struct speed_params *);
Packit 5c3484
double speed_mpn_mupi_div_qr (struct speed_params *);
Packit 5c3484
double speed_mpn_mu_div_q (struct speed_params *);
Packit 5c3484
double speed_mpn_sbpi1_bdiv_qr (struct speed_params *);
Packit 5c3484
double speed_mpn_dcpi1_bdiv_qr (struct speed_params *);
Packit 5c3484
double speed_mpn_sbpi1_bdiv_q (struct speed_params *);
Packit 5c3484
double speed_mpn_dcpi1_bdiv_q (struct speed_params *);
Packit 5c3484
double speed_mpn_mu_bdiv_q (struct speed_params *);
Packit 5c3484
double speed_mpn_mu_bdiv_qr (struct speed_params *);
Packit 5c3484
double speed_mpn_broot (struct speed_params *);
Packit 5c3484
double speed_mpn_broot_invm1 (struct speed_params *);
Packit 5c3484
double speed_mpn_brootinv (struct speed_params *);
Packit 5c3484
double speed_mpn_invert (struct speed_params *);
Packit 5c3484
double speed_mpn_invertappr (struct speed_params *);
Packit 5c3484
double speed_mpn_ni_invertappr (struct speed_params *);
Packit 5c3484
double speed_mpn_sec_invert (struct speed_params *s);
Packit 5c3484
double speed_mpn_binvert (struct speed_params *);
Packit 5c3484
double speed_mpn_redc_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_redc_2 (struct speed_params *);
Packit 5c3484
double speed_mpn_redc_n (struct speed_params *);
Packit 5c3484
double speed_mpn_rsblsh_n (struct speed_params *);
Packit 5c3484
double speed_mpn_rsblsh1_n (struct speed_params *);
Packit 5c3484
double speed_mpn_rsblsh2_n (struct speed_params *);
Packit 5c3484
double speed_mpn_rsh1add_n (struct speed_params *);
Packit 5c3484
double speed_mpn_rsh1sub_n (struct speed_params *);
Packit 5c3484
double speed_mpn_rshift (struct speed_params *);
Packit 5c3484
double speed_mpn_sb_divrem_m3 (struct speed_params *);
Packit 5c3484
double speed_mpn_sb_divrem_m3_div (struct speed_params *);
Packit 5c3484
double speed_mpn_sb_divrem_m3_inv (struct speed_params *);
Packit 5c3484
double speed_mpn_set_str (struct speed_params *);
Packit 5c3484
double speed_mpn_bc_set_str (struct speed_params *);
Packit 5c3484
double speed_mpn_dc_set_str (struct speed_params *);
Packit 5c3484
double speed_mpn_set_str_pre (struct speed_params *);
Packit 5c3484
double speed_mpn_sqr_basecase (struct speed_params *);
Packit 5c3484
double speed_mpn_sqr_diag_addlsh1 (struct speed_params *);
Packit 5c3484
double speed_mpn_sqr_diagonal (struct speed_params *);
Packit 5c3484
double speed_mpn_sqr (struct speed_params *);
Packit 5c3484
double speed_mpn_sqrtrem (struct speed_params *);
Packit 5c3484
double speed_mpn_rootrem (struct speed_params *);
Packit 5c3484
double speed_mpn_sqrt (struct speed_params *);
Packit 5c3484
double speed_mpn_root (struct speed_params *);
Packit 5c3484
double speed_mpn_sub_n (struct speed_params *);
Packit 5c3484
double speed_mpn_sub_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_sub_1_inplace (struct speed_params *);
Packit 5c3484
double speed_mpn_sub_err1_n (struct speed_params *);
Packit 5c3484
double speed_mpn_sub_err2_n (struct speed_params *);
Packit 5c3484
double speed_mpn_sub_err3_n (struct speed_params *);
Packit 5c3484
double speed_mpn_sublsh_n (struct speed_params *);
Packit 5c3484
double speed_mpn_sublsh1_n (struct speed_params *);
Packit 5c3484
double speed_mpn_sublsh2_n (struct speed_params *);
Packit 5c3484
double speed_mpn_sublsh_n_ip1 (struct speed_params *);
Packit 5c3484
double speed_mpn_sublsh1_n_ip1 (struct speed_params *);
Packit 5c3484
double speed_mpn_sublsh2_n_ip1 (struct speed_params *);
Packit 5c3484
double speed_mpn_submul_1 (struct speed_params *);
Packit 5c3484
double speed_mpn_toom2_sqr (struct speed_params *);
Packit 5c3484
double speed_mpn_toom3_sqr (struct speed_params *);
Packit 5c3484
double speed_mpn_toom4_sqr (struct speed_params *);
Packit 5c3484
double speed_mpn_toom6_sqr (struct speed_params *);
Packit 5c3484
double speed_mpn_toom8_sqr (struct speed_params *);
Packit 5c3484
double speed_mpn_toom22_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom33_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom44_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom6h_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom8h_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom32_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom42_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom43_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom63_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom32_for_toom43_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom43_for_toom32_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom32_for_toom53_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom53_for_toom32_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom42_for_toom53_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom53_for_toom42_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom43_for_toom54_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom54_for_toom43_mul (struct speed_params *);
Packit 5c3484
double speed_mpn_toom42_mulmid (struct speed_params *);
Packit 5c3484
double speed_mpn_mulmod_bnm1 (struct speed_params *);
Packit 5c3484
double speed_mpn_bc_mulmod_bnm1 (struct speed_params *);
Packit 5c3484
double speed_mpn_mulmod_bnm1_rounded (struct speed_params *);
Packit 5c3484
double speed_mpn_sqrmod_bnm1 (struct speed_params *);
Packit 5c3484
double speed_mpn_udiv_qrnnd (struct speed_params *);
Packit 5c3484
double speed_mpn_udiv_qrnnd_r (struct speed_params *);
Packit 5c3484
double speed_mpn_umul_ppmm (struct speed_params *);
Packit 5c3484
double speed_mpn_umul_ppmm_r (struct speed_params *);
Packit 5c3484
double speed_mpn_xnor_n (struct speed_params *);
Packit 5c3484
double speed_mpn_xor_n (struct speed_params *);
Packit 5c3484
double speed_MPN_ZERO (struct speed_params *);
Packit 5c3484
Packit 5c3484
double speed_mpq_init_clear (struct speed_params *);
Packit 5c3484
Packit 5c3484
double speed_mpz_add (struct speed_params *);
Packit 5c3484
double speed_mpz_bin_uiui (struct speed_params *);
Packit 5c3484
double speed_mpz_bin_ui (struct speed_params *);
Packit 5c3484
double speed_mpz_fac_ui (struct speed_params *);
Packit 5c3484
double speed_mpz_2fac_ui (struct speed_params *);
Packit 5c3484
double speed_mpz_fib_ui (struct speed_params *);
Packit 5c3484
double speed_mpz_fib2_ui (struct speed_params *);
Packit 5c3484
double speed_mpz_init_clear (struct speed_params *);
Packit 5c3484
double speed_mpz_init_realloc_clear (struct speed_params *);
Packit 5c3484
double speed_mpz_jacobi (struct speed_params *);
Packit 5c3484
double speed_mpz_lucnum_ui (struct speed_params *);
Packit 5c3484
double speed_mpz_lucnum2_ui (struct speed_params *);
Packit 5c3484
double speed_mpz_mod (struct speed_params *);
Packit 5c3484
double speed_mpz_powm (struct speed_params *);
Packit 5c3484
double speed_mpz_powm_mod (struct speed_params *);
Packit 5c3484
double speed_mpz_powm_redc (struct speed_params *);
Packit 5c3484
double speed_mpz_powm_sec (struct speed_params *);
Packit 5c3484
double speed_mpz_powm_ui (struct speed_params *);
Packit 5c3484
double speed_mpz_urandomb (struct speed_params *);
Packit 5c3484
Packit 5c3484
double speed_gmp_randseed (struct speed_params *);
Packit 5c3484
double speed_gmp_randseed_ui (struct speed_params *);
Packit 5c3484
Packit 5c3484
double speed_noop (struct speed_params *);
Packit 5c3484
double speed_noop_wxs (struct speed_params *);
Packit 5c3484
double speed_noop_wxys (struct speed_params *);
Packit 5c3484
Packit 5c3484
double speed_operator_div (struct speed_params *);
Packit 5c3484
double speed_operator_mod (struct speed_params *);
Packit 5c3484
Packit 5c3484
double speed_udiv_qrnnd (struct speed_params *);
Packit 5c3484
double speed_udiv_qrnnd_preinv1 (struct speed_params *);
Packit 5c3484
double speed_udiv_qrnnd_preinv2 (struct speed_params *);
Packit 5c3484
double speed_udiv_qrnnd_preinv3 (struct speed_params *);
Packit 5c3484
double speed_udiv_qrnnd_c (struct speed_params *);
Packit 5c3484
double speed_umul_ppmm (struct speed_params *);
Packit 5c3484
Packit 5c3484
/* Prototypes for other routines */
Packit 5c3484
Packit 5c3484
#if defined (__cplusplus)
Packit 5c3484
extern "C" {
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* low 32-bits in p[0], high 32-bits in p[1] */
Packit 5c3484
void speed_cyclecounter (unsigned p[2]);
Packit 5c3484
Packit 5c3484
#if defined (__cplusplus)
Packit 5c3484
}
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
void mftb_function (unsigned p[2]);
Packit 5c3484
Packit 5c3484
double speed_cyclecounter_diff (const unsigned [2], const unsigned [2]);
Packit 5c3484
int gettimeofday_microseconds_p (void);
Packit 5c3484
int getrusage_microseconds_p (void);
Packit 5c3484
int cycles_works_p (void);
Packit 5c3484
long clk_tck (void);
Packit 5c3484
double freq_measure (const char *, double (*)(void));
Packit 5c3484
Packit 5c3484
int double_cmp_ptr (const double *, const double *);
Packit 5c3484
void pentium_wbinvd (void);
Packit 5c3484
typedef int (*qsort_function_t) (const void *, const void *);
Packit 5c3484
Packit 5c3484
void noop (void);
Packit 5c3484
void noop_1 (mp_limb_t);
Packit 5c3484
void noop_wxs (mp_ptr, mp_srcptr, mp_size_t);
Packit 5c3484
void noop_wxys (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
Packit 5c3484
void mpn_cache_fill (mp_srcptr, mp_size_t);
Packit 5c3484
void mpn_cache_fill_dummy (mp_limb_t);
Packit 5c3484
void speed_cache_fill (struct speed_params *);
Packit 5c3484
void speed_operand_src (struct speed_params *, mp_ptr, mp_size_t);
Packit 5c3484
void speed_operand_dst (struct speed_params *, mp_ptr, mp_size_t);
Packit 5c3484
Packit 5c3484
extern int  speed_option_addrs;
Packit 5c3484
extern int  speed_option_verbose;
Packit 5c3484
extern int  speed_option_cycles_broken;
Packit 5c3484
void speed_option_set (const char *);
Packit 5c3484
Packit 5c3484
mp_limb_t mpn_div_qr_1n_pi1_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
Packit 5c3484
mp_limb_t mpn_div_qr_1n_pi1_2 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
Packit 5c3484
Packit 5c3484
mp_limb_t mpn_divrem_1_div (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
Packit 5c3484
mp_limb_t mpn_divrem_1_inv (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
Packit 5c3484
mp_limb_t mpn_divrem_2_div (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
Packit 5c3484
mp_limb_t mpn_divrem_2_inv (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
Packit 5c3484
Packit 5c3484
int mpn_jacobi_base_1 (mp_limb_t, mp_limb_t, int);
Packit 5c3484
int mpn_jacobi_base_2 (mp_limb_t, mp_limb_t, int);
Packit 5c3484
int mpn_jacobi_base_3 (mp_limb_t, mp_limb_t, int);
Packit 5c3484
int mpn_jacobi_base_4 (mp_limb_t, mp_limb_t, int);
Packit 5c3484
Packit 5c3484
mp_limb_t mpn_mod_1_div (mp_srcptr, mp_size_t, mp_limb_t);
Packit 5c3484
mp_limb_t mpn_mod_1_inv (mp_srcptr, mp_size_t, mp_limb_t);
Packit 5c3484
Packit 5c3484
mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [4]);
Packit 5c3484
mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [4]);
Packit 5c3484
Packit 5c3484
void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t);
Packit 5c3484
void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t);
Packit 5c3484
Packit 5c3484
mp_size_t mpn_gcdext_one_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
Packit 5c3484
mp_size_t mpn_gcdext_one_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
Packit 5c3484
mp_size_t mpn_gcdext_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
Packit 5c3484
mp_size_t mpn_gcdext_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
Packit 5c3484
mp_size_t mpn_hgcd_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
Packit 5c3484
mp_size_t mpn_hgcd_lehmer_itch (mp_size_t);
Packit 5c3484
Packit 5c3484
mp_size_t mpn_hgcd_appr_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
Packit 5c3484
mp_size_t mpn_hgcd_appr_lehmer_itch (mp_size_t);
Packit 5c3484
Packit 5c3484
mp_size_t mpn_hgcd_reduce_1 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
Packit 5c3484
mp_size_t mpn_hgcd_reduce_1_itch (mp_size_t, mp_size_t);
Packit 5c3484
Packit 5c3484
mp_size_t mpn_hgcd_reduce_2 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
Packit 5c3484
mp_size_t mpn_hgcd_reduce_2_itch (mp_size_t, mp_size_t);
Packit 5c3484
Packit 5c3484
mp_limb_t mpn_sb_divrem_mn_div (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
Packit 5c3484
mp_limb_t mpn_sb_divrem_mn_inv (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
Packit 5c3484
Packit 5c3484
mp_size_t mpn_set_str_basecase (mp_ptr, const unsigned char *, size_t, int);
Packit 5c3484
void mpn_pre_set_str (mp_ptr, unsigned char *, size_t, powers_t *, mp_ptr);
Packit 5c3484
Packit 5c3484
void mpz_powm_mod (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
Packit 5c3484
void mpz_powm_redc (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
Packit 5c3484
Packit 5c3484
int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* "get" is called repeatedly until it ticks over, just in case on a fast
Packit 5c3484
   processor it takes less than a microsecond, though this is probably
Packit 5c3484
   unlikely if it's a system call.
Packit 5c3484
Packit 5c3484
   speed_cyclecounter is called on the same side of the "get" for the start
Packit 5c3484
   and end measurements.  It doesn't matter how long it takes from the "get"
Packit 5c3484
   sample to the cycles sample, since that period will cancel out in the
Packit 5c3484
   difference calculation (assuming it's the same each time).
Packit 5c3484
Packit 5c3484
   Letting the test run for more than a process time slice is probably only
Packit 5c3484
   going to reduce accuracy, especially for getrusage when the cycle counter
Packit 5c3484
   is real time, or for gettimeofday if the cycle counter is in fact process
Packit 5c3484
   time.  Use CLK_TCK/2 as a reasonable stop.
Packit 5c3484
Packit 5c3484
   It'd be desirable to be quite accurate here.  The default speed_precision
Packit 5c3484
   for a cycle counter is 10000 cycles, so to mix that with getrusage or
Packit 5c3484
   gettimeofday the frequency should be at least that accurate.  But running
Packit 5c3484
   measurements for 10000 microseconds (or more) is too long.  Be satisfied
Packit 5c3484
   with just a half clock tick (5000 microseconds usually).  */
Packit 5c3484
Packit 5c3484
#define FREQ_MEASURE_ONE(name, type, get, getc, sec, usec)		\
Packit 5c3484
  do {									\
Packit 5c3484
    type      st1, st, et1, et;						\
Packit 5c3484
    unsigned  sc[2], ec[2];						\
Packit 5c3484
    long      dt, half_tick;						\
Packit 5c3484
    double    dc, cyc;							\
Packit 5c3484
									\
Packit 5c3484
    half_tick = (1000000L / clk_tck()) / 2;				\
Packit 5c3484
									\
Packit 5c3484
    get (st1);								\
Packit 5c3484
    do {								\
Packit 5c3484
      get (st);								\
Packit 5c3484
    } while (usec(st) == usec(st1) && sec(st) == sec(st1));		\
Packit 5c3484
									\
Packit 5c3484
    getc (sc);								\
Packit 5c3484
									\
Packit 5c3484
    for (;;)								\
Packit 5c3484
      {									\
Packit 5c3484
	get (et1);							\
Packit 5c3484
	do {								\
Packit 5c3484
	  get (et);							\
Packit 5c3484
	} while (usec(et) == usec(et1) && sec(et) == sec(et1));		\
Packit 5c3484
									\
Packit 5c3484
	getc (ec);							\
Packit 5c3484
									\
Packit 5c3484
	dc = speed_cyclecounter_diff (ec, sc);				\
Packit 5c3484
									\
Packit 5c3484
	/* allow secs to cancel before multiplying */			\
Packit 5c3484
	dt = sec(et) - sec(st);						\
Packit 5c3484
	dt = dt * 1000000L + (usec(et) - usec(st));			\
Packit 5c3484
									\
Packit 5c3484
	if (dt >= half_tick)						\
Packit 5c3484
	  break;							\
Packit 5c3484
      }									\
Packit 5c3484
									\
Packit 5c3484
    cyc = dt * 1e-6 / dc;						\
Packit 5c3484
									\
Packit 5c3484
    if (speed_option_verbose >= 2)					\
Packit 5c3484
      printf ("freq_measure_%s_one() dc=%.6g dt=%ld cyc=%.6g\n",	\
Packit 5c3484
	      name, dc, dt, cyc);					\
Packit 5c3484
									\
Packit 5c3484
    return dt * 1e-6 / dc;						\
Packit 5c3484
									\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* The measuring routines use these big macros to save duplication for
Packit 5c3484
   similar forms.  They also get used for some automatically generated
Packit 5c3484
   measuring of new implementations of functions.
Packit 5c3484
Packit 5c3484
   Having something like SPEED_ROUTINE_BINARY_N as a subroutine accepting a
Packit 5c3484
   function pointer is considered undesirable since it's not the way a
Packit 5c3484
   normal application will be calling, and some processors might do
Packit 5c3484
   different things with an indirect call, like not branch predicting, or
Packit 5c3484
   doing a full pipe flush.  At least some of the "functions" measured are
Packit 5c3484
   actually macros too.
Packit 5c3484
Packit 5c3484
   The net effect is to bloat the object code, possibly in a big way, but
Packit 5c3484
   only what's being measured is being run, so that doesn't matter.
Packit 5c3484
Packit 5c3484
   The loop forms don't try to cope with __GMP_ATTRIBUTE_PURE or
Packit 5c3484
   ATTRIBUTE_CONST on the called functions.  Adding a cast to a non-pure
Packit 5c3484
   function pointer doesn't work in gcc 3.2.  Using an actual non-pure
Packit 5c3484
   function pointer variable works, but stands a real risk of a
Packit 5c3484
   non-optimizing compiler generating unnecessary overheads in the call.
Packit 5c3484
   Currently the best idea is not to use those attributes for a timing
Packit 5c3484
   program build.  __GMP_NO_ATTRIBUTE_CONST_PURE will tell gmp.h and
Packit 5c3484
   gmp-impl.h to omit them from routines there.  */
Packit 5c3484
Packit 5c3484
#define SPEED_RESTRICT_COND(cond)   if (!(cond)) return -1.0;
Packit 5c3484
Packit 5c3484
/* For mpn_copy or similar. */
Packit 5c3484
#define SPEED_ROUTINE_MPN_COPY_CALL(call)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 0);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
#define SPEED_ROUTINE_MPN_COPY(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_COPY_CALL (function (wp, s->xp, s->size))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TABSELECT(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    xp, wp;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 0);					\
Packit 5c3484
									\
Packit 5c3484
    if (s->r == 0)							\
Packit 5c3484
      s->r = s->size;	/* default to a quadratic shape */		\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (xp, s->size * s->r, s->align_xp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, xp, s->size * s->r);				\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (wp, xp, s->size, s->r, (s->r) / 2);			\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime () / s->r;					\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_COPYC(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 0);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (wp, s->xp, s->size, 0);					\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
/* s->size is still in limbs, and it's limbs which are copied, but
Packit 5c3484
   "function" takes a size in bytes not limbs.  */
Packit 5c3484
#define SPEED_ROUTINE_MPN_COPY_BYTES(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 0);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (wp, s->xp, s->size * GMP_LIMB_BYTES);		\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* For mpn_add_n, mpn_sub_n, or similar. */
Packit 5c3484
#define SPEED_ROUTINE_MPN_BINARY_N_CALL(call)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr     wp;							\
Packit 5c3484
    mp_ptr     xp, yp;							\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    double     t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    xp = s->xp;								\
Packit 5c3484
    yp = s->yp;								\
Packit 5c3484
									\
Packit 5c3484
    if (s->r == 0)	;						\
Packit 5c3484
    else if (s->r == 1) { xp = wp;	    }				\
Packit 5c3484
    else if (s->r == 2) {	   yp = wp; }				\
Packit 5c3484
    else if (s->r == 3) { xp = wp; yp = wp; }				\
Packit 5c3484
    else if (s->r == 4) {     yp = xp;	    }				\
Packit 5c3484
    else		{						\
Packit 5c3484
      TMP_FREE;								\
Packit 5c3484
      return -1.0;							\
Packit 5c3484
    }									\
Packit 5c3484
									\
Packit 5c3484
    /* initialize wp if operand overlap */				\
Packit 5c3484
    if (xp == wp || yp == wp)						\
Packit 5c3484
      MPN_COPY (wp, s->xp, s->size);					\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, xp, s->size);					\
Packit 5c3484
    speed_operand_src (s, yp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* For mpn_aors_errK_n, where 1 <= K <= 3. */
Packit 5c3484
#define SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL(call, K)			\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr     wp;							\
Packit 5c3484
    mp_ptr     xp, yp;							\
Packit 5c3484
    mp_ptr     zp[K];							\
Packit 5c3484
    mp_limb_t  ep[2*K];							\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    double     t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    /* (don't have a mechanism to specify zp alignments) */		\
Packit 5c3484
    for (i = 0; i < K; i++)						\
Packit 5c3484
      SPEED_TMP_ALLOC_LIMBS (zp[i], s->size, 0);			\
Packit 5c3484
									\
Packit 5c3484
    xp = s->xp;								\
Packit 5c3484
    yp = s->yp;								\
Packit 5c3484
									\
Packit 5c3484
    if (s->r == 0)	;						\
Packit 5c3484
    else if (s->r == 1) { xp = wp;	    }				\
Packit 5c3484
    else if (s->r == 2) {	   yp = wp; }				\
Packit 5c3484
    else if (s->r == 3) { xp = wp; yp = wp; }				\
Packit 5c3484
    else if (s->r == 4) {     yp = xp;	    }				\
Packit 5c3484
    else		{						\
Packit 5c3484
      TMP_FREE;								\
Packit 5c3484
      return -1.0;							\
Packit 5c3484
    }									\
Packit 5c3484
									\
Packit 5c3484
    /* initialize wp if operand overlap */				\
Packit 5c3484
    if (xp == wp || yp == wp)						\
Packit 5c3484
      MPN_COPY (wp, s->xp, s->size);					\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, xp, s->size);					\
Packit 5c3484
    speed_operand_src (s, yp, s->size);					\
Packit 5c3484
    for (i = 0; i < K; i++)						\
Packit 5c3484
      speed_operand_src (s, zp[i], s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_BINARY_ERR1_N(function)			\
Packit 5c3484
  SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], s->size, 0), 1)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_BINARY_ERR2_N(function)			\
Packit 5c3484
  SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], zp[1], s->size, 0), 2)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_BINARY_ERR3_N(function)			\
Packit 5c3484
  SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], zp[1], zp[2], s->size, 0), 3)
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* For mpn_add_n, mpn_sub_n, or similar. */
Packit 5c3484
#define SPEED_ROUTINE_MPN_ADDSUB_N_CALL(call)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr     ap, sp;							\
Packit 5c3484
    mp_ptr     xp, yp;							\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    double     t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (ap, s->size, s->align_wp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (sp, s->size, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    xp = s->xp;								\
Packit 5c3484
    yp = s->yp;								\
Packit 5c3484
									\
Packit 5c3484
    if ((s->r & 1) != 0) { xp = ap; }					\
Packit 5c3484
    if ((s->r & 2) != 0) { yp = ap; }					\
Packit 5c3484
    if ((s->r & 4) != 0) { xp = sp; }					\
Packit 5c3484
    if ((s->r & 8) != 0) { yp = sp; }					\
Packit 5c3484
    if ((s->r & 3) == 3  ||  (s->r & 12) == 12)				\
Packit 5c3484
      {									\
Packit 5c3484
	TMP_FREE;							\
Packit 5c3484
	return -1.0;							\
Packit 5c3484
      }									\
Packit 5c3484
									\
Packit 5c3484
    /* initialize ap if operand overlap */				\
Packit 5c3484
    if (xp == ap || yp == ap)						\
Packit 5c3484
      MPN_COPY (ap, s->xp, s->size);					\
Packit 5c3484
    /* initialize sp if operand overlap */				\
Packit 5c3484
    if (xp == sp || yp == sp)						\
Packit 5c3484
      MPN_COPY (sp, s->xp, s->size);					\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, xp, s->size);					\
Packit 5c3484
    speed_operand_src (s, yp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, ap, s->size);					\
Packit 5c3484
    speed_operand_dst (s, sp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_BINARY_N(function)				\
Packit 5c3484
   SPEED_ROUTINE_MPN_BINARY_N_CALL ((*function) (wp, xp, yp, s->size))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_BINARY_NC(function)				\
Packit 5c3484
   SPEED_ROUTINE_MPN_BINARY_N_CALL ((*function) (wp, xp, yp, s->size, 0))
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* For mpn_lshift, mpn_rshift, mpn_mul_1, with r, or similar. */
Packit 5c3484
#define SPEED_ROUTINE_MPN_UNARY_1_CALL(call)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_UNARY_1(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_UNARY_1C(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r, 0))
Packit 5c3484
Packit 5c3484
/* FIXME: wp is uninitialized here, should start it off from xp */
Packit 5c3484
#define SPEED_ROUTINE_MPN_UNARY_1_INPLACE(function)			\
Packit 5c3484
  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, wp, s->size, s->r))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_DIVEXACT_1(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_BDIV_Q_1(function)				\
Packit 5c3484
    SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_PI1_BDIV_Q_1_CALL(call)			\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   shift;							\
Packit 5c3484
    mp_limb_t  dinv;							\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size > 0);					\
Packit 5c3484
    SPEED_RESTRICT_COND (s->r != 0);					\
Packit 5c3484
									\
Packit 5c3484
    count_trailing_zeros (shift, s->r);					\
Packit 5c3484
    binvert_limb (dinv, s->r >> shift);					\
Packit 5c3484
									\
Packit 5c3484
    SPEED_ROUTINE_MPN_UNARY_1_CALL (call);				\
Packit 5c3484
  }
Packit 5c3484
#define SPEED_ROUTINE_MPN_PI1_BDIV_Q_1(function)			\
Packit 5c3484
  SPEED_ROUTINE_MPN_PI1_BDIV_Q_1_CALL					\
Packit 5c3484
  ((*function) (wp, s->xp, s->size, s->r, dinv, shift))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_BDIV_DBM1C(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r, 0))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_DIVREM_1(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, 0, s->xp, s->size, s->r))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_DIVREM_1C(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, 0, s->xp, s->size, s->r, 0))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_DIVREM_1F(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->size, s->xp, 0, s->r))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_DIVREM_1CF(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->size, s->xp, 0, s->r, 0))
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL(call)			\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   shift;							\
Packit 5c3484
    mp_limb_t  dinv;							\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 0);					\
Packit 5c3484
    SPEED_RESTRICT_COND (s->r != 0);					\
Packit 5c3484
									\
Packit 5c3484
    count_leading_zeros (shift, s->r);					\
Packit 5c3484
    invert_limb (dinv, s->r << shift);					\
Packit 5c3484
									\
Packit 5c3484
    SPEED_ROUTINE_MPN_UNARY_1_CALL (call);				\
Packit 5c3484
  }									\
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_PREINV_DIVREM_1(function)			\
Packit 5c3484
  SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL				\
Packit 5c3484
  ((*function) (wp, 0, s->xp, s->size, s->r, dinv, shift))
Packit 5c3484
Packit 5c3484
/* s->size limbs worth of fraction part */
Packit 5c3484
#define SPEED_ROUTINE_MPN_PREINV_DIVREM_1F(function)			\
Packit 5c3484
  SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL				\
Packit 5c3484
  ((*function) (wp, s->size, s->xp, 0, s->r, dinv, shift))
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* s->r is duplicated to form the multiplier, defaulting to
Packit 5c3484
   MP_BASES_BIG_BASE_10.  Not sure if that's particularly useful, but at
Packit 5c3484
   least it provides some control.  */
Packit 5c3484
#define SPEED_ROUTINE_MPN_UNARY_N(function,N)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr     wp;							\
Packit 5c3484
    mp_size_t  wn;							\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    double     t;							\
Packit 5c3484
    mp_limb_t  yp[N];							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= N);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    wn = s->size + N-1;							\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);			\
Packit 5c3484
    for (i = 0; i < N; i++)						\
Packit 5c3484
      yp[i] = (s->r != 0 ? s->r : MP_BASES_BIG_BASE_10);		\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_src (s, yp, (mp_size_t) N);				\
Packit 5c3484
    speed_operand_dst (s, wp, wn);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (wp, s->xp, s->size, yp);				\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_UNARY_2(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_UNARY_N (function, 2)
Packit 5c3484
#define SPEED_ROUTINE_MPN_UNARY_3(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_UNARY_N (function, 3)
Packit 5c3484
#define SPEED_ROUTINE_MPN_UNARY_4(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_UNARY_N (function, 4)
Packit 5c3484
#define SPEED_ROUTINE_MPN_UNARY_5(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_UNARY_N (function, 5)
Packit 5c3484
#define SPEED_ROUTINE_MPN_UNARY_6(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_UNARY_N (function, 6)
Packit 5c3484
#define SPEED_ROUTINE_MPN_UNARY_7(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_UNARY_N (function, 7)
Packit 5c3484
#define SPEED_ROUTINE_MPN_UNARY_8(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_UNARY_N (function, 8)
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* For mpn_mul, mpn_mul_basecase, xsize=r, ysize=s->size. */
Packit 5c3484
#define SPEED_ROUTINE_MPN_MUL(function)					\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp;							\
Packit 5c3484
    mp_size_t size1;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    size1 = (s->r == 0 ? s->size : s->r);				\
Packit 5c3484
    if (size1 < 0) size1 = -size1 - s->size;				\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (size1 >= 1);					\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= size1);				\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, size1 + s->size, s->align_wp);		\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_src (s, s->yp, size1);				\
Packit 5c3484
    speed_operand_dst (s, wp, size1 + s->size);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (wp, s->xp, s->size, s->yp, size1);			\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_MUL_N_CALL(call)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_src (s, s->yp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, 2*s->size);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_MUL_N(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_CALL (function (wp, s->xp, s->yp, s->size));
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_MULLO_N_CALL(call)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_src (s, s->yp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_MULLO_N(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_MULLO_N_CALL (function (wp, s->xp, s->yp, s->size));
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_MULLO_BASECASE(function)			\
Packit 5c3484
  SPEED_ROUTINE_MPN_MULLO_N_CALL (function (wp, s->xp, s->yp, s->size));
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_SQRLO(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (wp, s->xp, s->size);					\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
/* For mpn_mulmid, mpn_mulmid_basecase, xsize=r, ysize=s->size. */
Packit 5c3484
#define SPEED_ROUTINE_MPN_MULMID(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp, xp;							\
Packit 5c3484
    mp_size_t size1;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    size1 = (s->r == 0 ? (2 * s->size - 1) : s->r);			\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
    SPEED_RESTRICT_COND (size1 >= s->size);				\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp);	\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, xp, size1);					\
Packit 5c3484
    speed_operand_src (s, s->yp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, size1 - s->size + 3);			\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (wp, xp, size1, s->yp, s->size);				\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_MULMID_N(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp, xp;							\
Packit 5c3484
    mp_size_t size1;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    size1 = 2 * s->size - 1;						\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp);	\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, xp, size1);					\
Packit 5c3484
    speed_operand_src (s, s->yp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, size1 - s->size + 3);			\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (wp, xp, s->yp, s->size);				\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM42_MULMID(function)			\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp, xp, scratch;						\
Packit 5c3484
    mp_size_t size1, scratch_size;					\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    size1 = 2 * s->size - 1;						\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp);	\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp);			\
Packit 5c3484
    scratch_size = mpn_toom42_mulmid_itch (s->size);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (scratch, scratch_size, 0);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, xp, size1);					\
Packit 5c3484
    speed_operand_src (s, s->yp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, size1 - s->size + 3);			\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (wp, xp, s->yp, s->size, scratch);			\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL(call)			\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp, tp;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    mp_size_t itch;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    itch = mpn_mulmod_bnm1_itch (s->size, s->size, s->size);		\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size, s->align_wp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_src (s, s->yp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, 2 * s->size);				\
Packit 5c3484
    speed_operand_dst (s, tp, itch);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
#define SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED(function)			\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp, tp;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    mp_size_t size, itch;						\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    size = mpn_mulmod_bnm1_next_size (s->size);				\
Packit 5c3484
    itch = mpn_mulmod_bnm1_itch (size, size, size);			\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, size, s->align_wp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_src (s, s->yp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, size);					\
Packit 5c3484
    speed_operand_dst (s, tp, itch);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (wp, size, s->xp, s->size, s->yp, s->size, tp);		\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_MUL_N_TSPACE(call, tsize, minsize)		\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp, tspace;						\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= minsize);				\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tspace, tsize, s->align_wp2);		\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_src (s, s->yp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, 2*s->size);				\
Packit 5c3484
    speed_operand_dst (s, tspace, tsize);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM22_MUL_N(function)			\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
Packit 5c3484
     mpn_toom22_mul_itch (s->size, s->size),				\
Packit 5c3484
     MPN_TOOM22_MUL_MINSIZE)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM33_MUL_N(function)			\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
Packit 5c3484
     mpn_toom33_mul_itch (s->size, s->size),				\
Packit 5c3484
     MPN_TOOM33_MUL_MINSIZE)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM44_MUL_N(function)			\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
Packit 5c3484
     mpn_toom44_mul_itch (s->size, s->size),				\
Packit 5c3484
     MPN_TOOM44_MUL_MINSIZE)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM6H_MUL_N(function)			\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
Packit 5c3484
     mpn_toom6h_mul_itch (s->size, s->size),				\
Packit 5c3484
     MPN_TOOM6H_MUL_MINSIZE)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM8H_MUL_N(function)			\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
Packit 5c3484
     mpn_toom8h_mul_itch (s->size, s->size),				\
Packit 5c3484
     MPN_TOOM8H_MUL_MINSIZE)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM32_MUL(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, 2*s->size/3, tspace),		\
Packit 5c3484
     mpn_toom32_mul_itch (s->size, 2*s->size/3),			\
Packit 5c3484
     MPN_TOOM32_MUL_MINSIZE)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM42_MUL(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, s->size/2, tspace),		\
Packit 5c3484
     mpn_toom42_mul_itch (s->size, s->size/2),				\
Packit 5c3484
     MPN_TOOM42_MUL_MINSIZE)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM43_MUL(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, s->size*3/4, tspace),		\
Packit 5c3484
     mpn_toom43_mul_itch (s->size, s->size*3/4),			\
Packit 5c3484
     MPN_TOOM43_MUL_MINSIZE)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM63_MUL(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, s->size/2, tspace),		\
Packit 5c3484
     mpn_toom63_mul_itch (s->size, s->size/2),				\
Packit 5c3484
     MPN_TOOM63_MUL_MINSIZE)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL(function)		\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace),	\
Packit 5c3484
     mpn_toom32_mul_itch (s->size, 17*s->size/24),			\
Packit 5c3484
     MPN_TOOM32_MUL_MINSIZE)
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL(function)		\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace),	\
Packit 5c3484
     mpn_toom43_mul_itch (s->size, 17*s->size/24),			\
Packit 5c3484
     MPN_TOOM43_MUL_MINSIZE)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL(function)		\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace),	\
Packit 5c3484
     mpn_toom32_mul_itch (s->size, 19*s->size/30),			\
Packit 5c3484
     MPN_TOOM32_MUL_MINSIZE)
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL(function)		\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace),	\
Packit 5c3484
     mpn_toom53_mul_itch (s->size, 19*s->size/30),			\
Packit 5c3484
     MPN_TOOM53_MUL_MINSIZE)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL(function)		\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace),	\
Packit 5c3484
     mpn_toom42_mul_itch (s->size, 11*s->size/20),			\
Packit 5c3484
     MPN_TOOM42_MUL_MINSIZE)
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL(function)		\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace),	\
Packit 5c3484
     mpn_toom53_mul_itch (s->size, 11*s->size/20),			\
Packit 5c3484
     MPN_TOOM53_MUL_MINSIZE)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL(function)		\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, 5*s->size/6, tspace),	\
Packit 5c3484
     mpn_toom42_mul_itch (s->size, 5*s->size/6),			\
Packit 5c3484
     MPN_TOOM54_MUL_MINSIZE)
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL(function)		\
Packit 5c3484
  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
Packit 5c3484
    (function (wp, s->xp, s->size, s->yp, 5*s->size/6, tspace),	\
Packit 5c3484
     mpn_toom54_mul_itch (s->size, 5*s->size/6),			\
Packit 5c3484
     MPN_TOOM54_MUL_MINSIZE)
Packit 5c3484
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_SQR_CALL(call)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, 2*s->size);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_SQR(function)					\
Packit 5c3484
  SPEED_ROUTINE_MPN_SQR_CALL (function (wp, s->xp, s->size))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_SQR_DIAG_ADDLSH1_CALL(call)			\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp, tp;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 2);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_wp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size, s->align_wp);		\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_src (s, tp, 2 * s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, 2 * s->size);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime () / 2;						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_SQR_TSPACE(call, tsize, minsize)		\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp, tspace;						\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= minsize);				\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tspace, tsize, s->align_wp2);		\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, 2*s->size);				\
Packit 5c3484
    speed_operand_dst (s, tspace, tsize);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM2_SQR(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),	\
Packit 5c3484
				mpn_toom2_sqr_itch (s->size),		\
Packit 5c3484
				MPN_TOOM2_SQR_MINSIZE)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM3_SQR(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),	\
Packit 5c3484
				mpn_toom3_sqr_itch (s->size),		\
Packit 5c3484
				MPN_TOOM3_SQR_MINSIZE)
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM4_SQR(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),	\
Packit 5c3484
				mpn_toom4_sqr_itch (s->size),		\
Packit 5c3484
				MPN_TOOM4_SQR_MINSIZE)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM6_SQR(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),	\
Packit 5c3484
				mpn_toom6_sqr_itch (s->size),		\
Packit 5c3484
				MPN_TOOM6_SQR_MINSIZE)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_TOOM8_SQR(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),	\
Packit 5c3484
				mpn_toom8_sqr_itch (s->size),		\
Packit 5c3484
				MPN_TOOM8_SQR_MINSIZE)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_MOD_CALL(call)				\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 0);					\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
									\
Packit 5c3484
    return speed_endtime ();						\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_MOD_1(function)				\
Packit 5c3484
   SPEED_ROUTINE_MPN_MOD_CALL ((*function) (s->xp, s->size, s->r))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_MOD_1C(function)				\
Packit 5c3484
   SPEED_ROUTINE_MPN_MOD_CALL ((*function)(s->xp, s->size, s->r, CNST_LIMB(0)))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_MODEXACT_1_ODD(function)			\
Packit 5c3484
  SPEED_ROUTINE_MPN_MOD_CALL (function (s->xp, s->size, s->r));
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_MODEXACT_1C_ODD(function)			\
Packit 5c3484
  SPEED_ROUTINE_MPN_MOD_CALL (function (s->xp, s->size, s->r, CNST_LIMB(0)));
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_MOD_34LSUB1(function)				\
Packit 5c3484
   SPEED_ROUTINE_MPN_MOD_CALL ((*function) (s->xp, s->size))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_PREINV_MOD_1(function)			\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    mp_limb_t  inv;							\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 0);					\
Packit 5c3484
    SPEED_RESTRICT_COND (s->r & GMP_LIMB_HIGHBIT);			\
Packit 5c3484
									\
Packit 5c3484
    invert_limb (inv, s->r);						\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      (*function) (s->xp, s->size, s->r, inv);				\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
									\
Packit 5c3484
    return speed_endtime ();						\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_MOD_1_1(function,pfunc)			\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    mp_limb_t  inv[4];							\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 2);					\
Packit 5c3484
									\
Packit 5c3484
    mpn_mod_1_1p_cps (inv, s->r);					\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do {								\
Packit 5c3484
      pfunc (inv, s->r);						\
Packit 5c3484
      function (s->xp, s->size, s->r << inv[1], inv);				\
Packit 5c3484
    } while (--i != 0);							\
Packit 5c3484
									\
Packit 5c3484
    return speed_endtime ();						\
Packit 5c3484
  }
Packit 5c3484
#define SPEED_ROUTINE_MPN_MOD_1_N(function,pfunc,N)			\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    mp_limb_t  inv[N+3];						\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
    SPEED_RESTRICT_COND (s->r <= ~(mp_limb_t)0 / N);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do {								\
Packit 5c3484
      pfunc (inv, s->r);						\
Packit 5c3484
      function (s->xp, s->size, s->r, inv);				\
Packit 5c3484
    } while (--i != 0);							\
Packit 5c3484
									\
Packit 5c3484
    return speed_endtime ();						\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* A division of 2*s->size by s->size limbs */
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_DC_DIVREM_CALL(call)				\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    mp_ptr    a, d, q, r;						\
Packit 5c3484
    double    t;							\
Packit 5c3484
    gmp_pi1_t dinv;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (a, 2*s->size, s->align_xp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (d, s->size,   s->align_yp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (q, s->size+1, s->align_wp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (r, s->size,   s->align_wp2);			\
Packit 5c3484
									\
Packit 5c3484
    MPN_COPY (a, s->xp, s->size);					\
Packit 5c3484
    MPN_COPY (a+s->size, s->xp, s->size);				\
Packit 5c3484
									\
Packit 5c3484
    MPN_COPY (d, s->yp, s->size);					\
Packit 5c3484
									\
Packit 5c3484
    /* normalize the data */						\
Packit 5c3484
    d[s->size-1] |= GMP_NUMB_HIGHBIT;					\
Packit 5c3484
    a[2*s->size-1] = d[s->size-1] - 1;					\
Packit 5c3484
									\
Packit 5c3484
    invert_pi1 (dinv, d[s->size-1], d[s->size-2]);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, a, 2*s->size);				\
Packit 5c3484
    speed_operand_src (s, d, s->size);					\
Packit 5c3484
    speed_operand_dst (s, q, s->size+1);				\
Packit 5c3484
    speed_operand_dst (s, r, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* A remainder 2*s->size by s->size limbs */
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPZ_MOD(function)					\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    mpz_t      a, d, r;							\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    mpz_init_set_n (d, s->yp, s->size);					\
Packit 5c3484
									\
Packit 5c3484
    /* high part less than d, low part a duplicate copied in */		\
Packit 5c3484
    mpz_init_set_n (a, s->xp, s->size);					\
Packit 5c3484
    mpz_mod (a, a, d);							\
Packit 5c3484
    mpz_mul_2exp (a, a, GMP_LIMB_BITS * s->size);			\
Packit 5c3484
    MPN_COPY (PTR(a), s->xp, s->size);					\
Packit 5c3484
									\
Packit 5c3484
    mpz_init (r);							\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, PTR(a), SIZ(a));				\
Packit 5c3484
    speed_operand_src (s, PTR(d), SIZ(d));				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (r, a, d);						\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    return speed_endtime ();						\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_PI1_DIV(function, INV, DMIN, QMIN)		\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    mp_ptr     dp, tp, ap, qp;						\
Packit 5c3484
    gmp_pi1_t  inv;							\
Packit 5c3484
    double     t;							\
Packit 5c3484
    mp_size_t size1;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    size1 = (s->r == 0 ? 2 * s->size : s->r);				\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= DMIN);				\
Packit 5c3484
    SPEED_RESTRICT_COND (size1 - s->size >= QMIN);			\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (ap, size1, s->align_xp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_wp2);			\
Packit 5c3484
									\
Packit 5c3484
    /* we don't fill in dividend completely when size1 > s->size */	\
Packit 5c3484
    MPN_COPY (ap,         s->xp, s->size);				\
Packit 5c3484
    MPN_COPY (ap + size1 - s->size, s->xp, s->size);			\
Packit 5c3484
									\
Packit 5c3484
    MPN_COPY (dp,         s->yp, s->size);				\
Packit 5c3484
									\
Packit 5c3484
    /* normalize the data */						\
Packit 5c3484
    dp[s->size-1] |= GMP_NUMB_HIGHBIT;					\
Packit 5c3484
    ap[size1 - 1] = dp[s->size - 1] - 1;				\
Packit 5c3484
									\
Packit 5c3484
    invert_pi1 (inv, dp[s->size-1], dp[s->size-2]);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, ap, size1);					\
Packit 5c3484
    speed_operand_dst (s, tp, size1);					\
Packit 5c3484
    speed_operand_src (s, dp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, qp, size1 - s->size);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do {								\
Packit 5c3484
      MPN_COPY (tp, ap, size1);						\
Packit 5c3484
      function (qp, tp, size1, dp, s->size, INV);			\
Packit 5c3484
    } while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
#define SPEED_ROUTINE_MPN_MU_DIV_Q(function,itchfn)			\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    mp_ptr     dp, tp, qp, scratch;					\
Packit 5c3484
    double     t;							\
Packit 5c3484
    mp_size_t itch;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 2);					\
Packit 5c3484
									\
Packit 5c3484
    itch = itchfn (2 * s->size, s->size, 0);				\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);		\
Packit 5c3484
									\
Packit 5c3484
    MPN_COPY (tp,         s->xp, s->size);				\
Packit 5c3484
    MPN_COPY (tp+s->size, s->xp, s->size);				\
Packit 5c3484
									\
Packit 5c3484
    /* normalize the data */						\
Packit 5c3484
    dp[s->size-1] |= GMP_NUMB_HIGHBIT;					\
Packit 5c3484
    tp[2*s->size-1] = dp[s->size-1] - 1;				\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_dst (s, qp, s->size);					\
Packit 5c3484
    speed_operand_src (s, tp, 2 * s->size);				\
Packit 5c3484
    speed_operand_src (s, dp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, scratch, itch);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do {								\
Packit 5c3484
      function (qp, tp, 2 * s->size, dp, s->size, scratch);		\
Packit 5c3484
    } while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
#define SPEED_ROUTINE_MPN_MU_DIV_QR(function,itchfn)			\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    mp_ptr     dp, tp, qp, rp, scratch;					\
Packit 5c3484
    double     t;							\
Packit 5c3484
    mp_size_t size1, itch;						\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    size1 = (s->r == 0 ? 2 * s->size : s->r);				\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 2);					\
Packit 5c3484
    SPEED_RESTRICT_COND (size1 >= s->size);				\
Packit 5c3484
									\
Packit 5c3484
    itch = itchfn (size1, s->size, 0);					\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */	\
Packit 5c3484
									\
Packit 5c3484
    /* we don't fill in dividend completely when size1 > s->size */	\
Packit 5c3484
    MPN_COPY (tp,         s->xp, s->size);				\
Packit 5c3484
    MPN_COPY (tp + size1 - s->size, s->xp, s->size);			\
Packit 5c3484
									\
Packit 5c3484
    MPN_COPY (dp,         s->yp, s->size);				\
Packit 5c3484
									\
Packit 5c3484
    /* normalize the data */						\
Packit 5c3484
    dp[s->size-1] |= GMP_NUMB_HIGHBIT;					\
Packit 5c3484
    tp[size1 - 1] = dp[s->size - 1] - 1;				\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_dst (s, qp, size1 - s->size);				\
Packit 5c3484
    speed_operand_dst (s, rp, s->size);					\
Packit 5c3484
    speed_operand_src (s, tp, size1);					\
Packit 5c3484
    speed_operand_src (s, dp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, scratch, itch);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do {								\
Packit 5c3484
      function (qp, rp, tp, size1, dp, s->size, scratch);		\
Packit 5c3484
    } while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
#define SPEED_ROUTINE_MPN_MUPI_DIV_QR(function,itchfn)			\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    mp_ptr     dp, tp, qp, rp, ip, scratch, tmp;			\
Packit 5c3484
    double     t;							\
Packit 5c3484
    mp_size_t  size1, itch;						\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    size1 = (s->r == 0 ? 2 * s->size : s->r);				\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 2);					\
Packit 5c3484
    SPEED_RESTRICT_COND (size1 >= s->size);				\
Packit 5c3484
									\
Packit 5c3484
    itch = itchfn (size1, s->size, s->size);				\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */	\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_wp2); /* alignment? */	\
Packit 5c3484
									\
Packit 5c3484
    /* we don't fill in dividend completely when size1 > s->size */	\
Packit 5c3484
    MPN_COPY (tp,         s->xp, s->size);				\
Packit 5c3484
    MPN_COPY (tp + size1 - s->size, s->xp, s->size);			\
Packit 5c3484
									\
Packit 5c3484
    MPN_COPY (dp,         s->yp, s->size);				\
Packit 5c3484
									\
Packit 5c3484
    /* normalize the data */						\
Packit 5c3484
    dp[s->size-1] |= GMP_NUMB_HIGHBIT;					\
Packit 5c3484
    tp[size1 - 1] = dp[s->size-1] - 1;					\
Packit 5c3484
									\
Packit 5c3484
    tmp = TMP_ALLOC_LIMBS (mpn_invert_itch (s->size));			\
Packit 5c3484
    mpn_invert (ip, dp, s->size, tmp);					\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_dst (s, qp, size1 - s->size);				\
Packit 5c3484
    speed_operand_dst (s, rp, s->size);					\
Packit 5c3484
    speed_operand_src (s, tp, size1);					\
Packit 5c3484
    speed_operand_src (s, dp, s->size);					\
Packit 5c3484
    speed_operand_src (s, ip, s->size);					\
Packit 5c3484
    speed_operand_dst (s, scratch, itch);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do {								\
Packit 5c3484
      function (qp, rp, tp, size1, dp, s->size, ip, s->size, scratch);	\
Packit 5c3484
    } while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_PI1_BDIV_QR(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    mp_ptr     dp, tp, ap, qp;						\
Packit 5c3484
    mp_limb_t  inv;							\
Packit 5c3484
    double     t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size, s->align_xp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size, s->align_wp2);		\
Packit 5c3484
									\
Packit 5c3484
    MPN_COPY (ap,         s->xp, s->size);				\
Packit 5c3484
    MPN_COPY (ap+s->size, s->xp, s->size);				\
Packit 5c3484
									\
Packit 5c3484
    /* divisor must be odd */						\
Packit 5c3484
    MPN_COPY (dp, s->yp, s->size);					\
Packit 5c3484
    dp[0] |= 1;								\
Packit 5c3484
    binvert_limb (inv, dp[0]);						\
Packit 5c3484
    inv = -inv;								\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, ap, 2*s->size);				\
Packit 5c3484
    speed_operand_dst (s, tp, 2*s->size);				\
Packit 5c3484
    speed_operand_src (s, dp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, qp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do {								\
Packit 5c3484
      MPN_COPY (tp, ap, 2*s->size);					\
Packit 5c3484
      function (qp, tp, 2*s->size, dp, s->size, inv);			\
Packit 5c3484
    } while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
#define SPEED_ROUTINE_MPN_PI1_BDIV_Q(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    mp_ptr     dp, tp, qp;						\
Packit 5c3484
    mp_limb_t  inv;							\
Packit 5c3484
    double     t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, s->size, s->align_wp2);			\
Packit 5c3484
									\
Packit 5c3484
    /* divisor must be odd */						\
Packit 5c3484
    MPN_COPY (dp, s->yp, s->size);					\
Packit 5c3484
    dp[0] |= 1;								\
Packit 5c3484
    binvert_limb (inv, dp[0]);						\
Packit 5c3484
    inv = -inv;								\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, tp, s->size);					\
Packit 5c3484
    speed_operand_src (s, dp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, qp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do {								\
Packit 5c3484
      MPN_COPY (tp, s->xp, s->size);					\
Packit 5c3484
      function (qp, tp, s->size, dp, s->size, inv);			\
Packit 5c3484
    } while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
#define SPEED_ROUTINE_MPN_MU_BDIV_Q(function,itchfn)			\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    mp_ptr     dp, qp, scratch;						\
Packit 5c3484
    double     t;							\
Packit 5c3484
    mp_size_t itch;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 2);					\
Packit 5c3484
									\
Packit 5c3484
    itch = itchfn (s->size, s->size);					\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);		\
Packit 5c3484
									\
Packit 5c3484
    /* divisor must be odd */						\
Packit 5c3484
    MPN_COPY (dp, s->yp, s->size);					\
Packit 5c3484
    dp[0] |= 1;								\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_dst (s, qp, s->size);					\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_src (s, dp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, scratch, itch);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do {								\
Packit 5c3484
      function (qp, s->xp, s->size, dp, s->size, scratch);		\
Packit 5c3484
    } while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
#define SPEED_ROUTINE_MPN_MU_BDIV_QR(function,itchfn)			\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    mp_ptr     dp, tp, qp, rp, scratch;					\
Packit 5c3484
    double     t;							\
Packit 5c3484
    mp_size_t itch;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 2);					\
Packit 5c3484
									\
Packit 5c3484
    itch = itchfn (2 * s->size, s->size);				\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */	\
Packit 5c3484
									\
Packit 5c3484
    MPN_COPY (tp,         s->xp, s->size);				\
Packit 5c3484
    MPN_COPY (tp+s->size, s->xp, s->size);				\
Packit 5c3484
									\
Packit 5c3484
    /* divisor must be odd */						\
Packit 5c3484
    MPN_COPY (dp, s->yp, s->size);					\
Packit 5c3484
    dp[0] |= 1;								\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_dst (s, qp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, rp, s->size);					\
Packit 5c3484
    speed_operand_src (s, tp, 2 * s->size);				\
Packit 5c3484
    speed_operand_src (s, dp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, scratch, itch);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do {								\
Packit 5c3484
      function (qp, rp, tp, 2 * s->size, dp, s->size, scratch);		\
Packit 5c3484
    } while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_BROOT(function)	\
Packit 5c3484
  {						\
Packit 5c3484
    SPEED_RESTRICT_COND (s->r & 1);		\
Packit 5c3484
    s->xp[0] |= 1;				\
Packit 5c3484
    SPEED_ROUTINE_MPN_UNARY_1_CALL		\
Packit 5c3484
      ((*function) (wp, s->xp, s->size, s->r));	\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_BROOTINV(function, itch)	\
Packit 5c3484
  {							\
Packit 5c3484
    mp_ptr    wp, tp;					\
Packit 5c3484
    unsigned  i;					\
Packit 5c3484
    double    t;					\
Packit 5c3484
    TMP_DECL;						\
Packit 5c3484
    TMP_MARK;						\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);			\
Packit 5c3484
    SPEED_RESTRICT_COND (s->r & 1);			\
Packit 5c3484
    wp = TMP_ALLOC_LIMBS (s->size);			\
Packit 5c3484
    tp = TMP_ALLOC_LIMBS ( (itch));			\
Packit 5c3484
    s->xp[0] |= 1;					\
Packit 5c3484
							\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);		\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);			\
Packit 5c3484
    speed_cache_fill (s);				\
Packit 5c3484
							\
Packit 5c3484
    speed_starttime ();					\
Packit 5c3484
    i = s->reps;					\
Packit 5c3484
    do							\
Packit 5c3484
      (*function) (wp, s->xp, s->size, s->r, tp);	\
Packit 5c3484
    while (--i != 0);					\
Packit 5c3484
    t = speed_endtime ();				\
Packit 5c3484
							\
Packit 5c3484
    TMP_FREE;						\
Packit 5c3484
    return t;						\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_INVERT(function,itchfn)			\
Packit 5c3484
  {									\
Packit 5c3484
    long  i;								\
Packit 5c3484
    mp_ptr    up, tp, ip;						\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (up, s->size,   s->align_yp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);		\
Packit 5c3484
									\
Packit 5c3484
    MPN_COPY (up, s->xp, s->size);					\
Packit 5c3484
									\
Packit 5c3484
    /* normalize the data */						\
Packit 5c3484
    up[s->size-1] |= GMP_NUMB_HIGHBIT;					\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, up, s->size);					\
Packit 5c3484
    speed_operand_dst (s, tp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, ip, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (ip, up, s->size, tp);					\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_INVERTAPPR(function,itchfn)			\
Packit 5c3484
  {									\
Packit 5c3484
    long  i;								\
Packit 5c3484
    mp_ptr    up, tp, ip;						\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);		\
Packit 5c3484
									\
Packit 5c3484
    MPN_COPY (up, s->xp, s->size);					\
Packit 5c3484
									\
Packit 5c3484
    /* normalize the data */						\
Packit 5c3484
    up[s->size-1] |= GMP_NUMB_HIGHBIT;					\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, up, s->size);					\
Packit 5c3484
    speed_operand_dst (s, tp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, ip, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (ip, up, s->size, tp);					\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_NI_INVERTAPPR(function,itchfn)		\
Packit 5c3484
  {									\
Packit 5c3484
    long  i;								\
Packit 5c3484
    mp_ptr    up, tp, ip;						\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 3);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);		\
Packit 5c3484
									\
Packit 5c3484
    MPN_COPY (up, s->xp, s->size);					\
Packit 5c3484
									\
Packit 5c3484
    /* normalize the data */						\
Packit 5c3484
    up[s->size-1] |= GMP_NUMB_HIGHBIT;					\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, up, s->size);					\
Packit 5c3484
    speed_operand_dst (s, tp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, ip, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (ip, up, s->size, tp);					\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_BINVERT(function,itchfn)			\
Packit 5c3484
  {									\
Packit 5c3484
    long  i;								\
Packit 5c3484
    mp_ptr    up, tp, ip;						\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (up, s->size,   s->align_yp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);		\
Packit 5c3484
									\
Packit 5c3484
    MPN_COPY (up, s->xp, s->size);					\
Packit 5c3484
									\
Packit 5c3484
    /* normalize the data */						\
Packit 5c3484
    up[0] |= 1;								\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, up, s->size);					\
Packit 5c3484
    speed_operand_dst (s, tp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, ip, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (ip, up, s->size, tp);					\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_SEC_INVERT(function,itchfn)			\
Packit 5c3484
  {									\
Packit 5c3484
    long  i;								\
Packit 5c3484
    mp_ptr    up, mp, tp, ip;						\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);		\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, up, s->size);					\
Packit 5c3484
    speed_operand_dst (s, tp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, ip, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    MPN_COPY (mp, s->yp, s->size);					\
Packit 5c3484
    /* Must be odd */							\
Packit 5c3484
    mp[0] |= 1;								\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      {									\
Packit 5c3484
	MPN_COPY (up, s->xp, s->size);					\
Packit 5c3484
	function (ip, up, mp, s->size, 2*s->size*GMP_NUMB_BITS, tp);	\
Packit 5c3484
      }									\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_REDC_1(function)					\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    mp_ptr     cp, mp, tp, ap;						\
Packit 5c3484
    mp_limb_t  inv;							\
Packit 5c3484
    double     t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (mp, s->size,     s->align_yp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (cp, s->size,     s->align_wp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2);		\
Packit 5c3484
									\
Packit 5c3484
    MPN_COPY (ap,         s->xp, s->size);				\
Packit 5c3484
    MPN_COPY (ap+s->size, s->xp, s->size);				\
Packit 5c3484
									\
Packit 5c3484
    /* modulus must be odd */						\
Packit 5c3484
    MPN_COPY (mp, s->yp, s->size);					\
Packit 5c3484
    mp[0] |= 1;								\
Packit 5c3484
    binvert_limb (inv, mp[0]);						\
Packit 5c3484
    inv = -inv;								\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, ap, 2*s->size+1);				\
Packit 5c3484
    speed_operand_dst (s, tp, 2*s->size+1);				\
Packit 5c3484
    speed_operand_src (s, mp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, cp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do {								\
Packit 5c3484
      MPN_COPY (tp, ap, 2*s->size);					\
Packit 5c3484
      function (cp, tp, mp, s->size, inv);				\
Packit 5c3484
    } while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
#define SPEED_ROUTINE_REDC_2(function)					\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    mp_ptr     cp, mp, tp, ap;						\
Packit 5c3484
    mp_limb_t  invp[2];							\
Packit 5c3484
    double     t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (mp, s->size,     s->align_yp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (cp, s->size,     s->align_wp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2);		\
Packit 5c3484
									\
Packit 5c3484
    MPN_COPY (ap,         s->xp, s->size);				\
Packit 5c3484
    MPN_COPY (ap+s->size, s->xp, s->size);				\
Packit 5c3484
									\
Packit 5c3484
    /* modulus must be odd */						\
Packit 5c3484
    MPN_COPY (mp, s->yp, s->size);					\
Packit 5c3484
    mp[0] |= 1;								\
Packit 5c3484
    mpn_binvert (invp, mp, 2, tp);					\
Packit 5c3484
    invp[0] = -invp[0]; invp[1] = ~invp[1];				\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, ap, 2*s->size+1);				\
Packit 5c3484
    speed_operand_dst (s, tp, 2*s->size+1);				\
Packit 5c3484
    speed_operand_src (s, mp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, cp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do {								\
Packit 5c3484
      MPN_COPY (tp, ap, 2*s->size);					\
Packit 5c3484
      function (cp, tp, mp, s->size, invp);				\
Packit 5c3484
    } while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
#define SPEED_ROUTINE_REDC_N(function)					\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    mp_ptr     cp, mp, tp, ap, invp;					\
Packit 5c3484
    double     t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size > 8);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (mp, s->size,     s->align_yp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (cp, s->size,     s->align_wp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (invp, s->size,   s->align_wp2); /* align? */	\
Packit 5c3484
									\
Packit 5c3484
    MPN_COPY (ap,         s->xp, s->size);				\
Packit 5c3484
    MPN_COPY (ap+s->size, s->xp, s->size);				\
Packit 5c3484
									\
Packit 5c3484
    /* modulus must be odd */						\
Packit 5c3484
    MPN_COPY (mp, s->yp, s->size);					\
Packit 5c3484
    mp[0] |= 1;								\
Packit 5c3484
    mpn_binvert (invp, mp, s->size, tp);				\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, ap, 2*s->size+1);				\
Packit 5c3484
    speed_operand_dst (s, tp, 2*s->size+1);				\
Packit 5c3484
    speed_operand_src (s, mp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, cp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do {								\
Packit 5c3484
      MPN_COPY (tp, ap, 2*s->size);					\
Packit 5c3484
      function (cp, tp, mp, s->size, invp);				\
Packit 5c3484
    } while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_POPCOUNT(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned i;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (s->xp, s->size);					\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
									\
Packit 5c3484
    return speed_endtime ();						\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_HAMDIST(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned i;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_src (s, s->yp, s->size);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (s->xp, s->yp, s->size);					\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
									\
Packit 5c3484
    return speed_endtime ();						\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPZ_UI(function)					\
Packit 5c3484
  {									\
Packit 5c3484
    mpz_t     z;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 0);					\
Packit 5c3484
									\
Packit 5c3484
    mpz_init (z);							\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (z, s->size);						\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    mpz_clear (z);							\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPZ_FAC_UI(function)    SPEED_ROUTINE_MPZ_UI(function)
Packit 5c3484
#define SPEED_ROUTINE_MPZ_FIB_UI(function)    SPEED_ROUTINE_MPZ_UI(function)
Packit 5c3484
#define SPEED_ROUTINE_MPZ_LUCNUM_UI(function) SPEED_ROUTINE_MPZ_UI(function)
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPZ_2_UI(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    mpz_t     z, z2;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 0);					\
Packit 5c3484
									\
Packit 5c3484
    mpz_init (z);							\
Packit 5c3484
    mpz_init (z2);							\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (z, z2, s->size);					\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    mpz_clear (z);							\
Packit 5c3484
    mpz_clear (z2);							\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPZ_FIB2_UI(function)    SPEED_ROUTINE_MPZ_2_UI(function)
Packit 5c3484
#define SPEED_ROUTINE_MPZ_LUCNUM2_UI(function) SPEED_ROUTINE_MPZ_2_UI(function)
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_FIB2_UI(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr     fp, f1p;							\
Packit 5c3484
    mp_size_t  alloc;							\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    double     t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 0);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    alloc = MPN_FIB2_SIZE (s->size);					\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (fp,	alloc, s->align_xp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (f1p, alloc, s->align_yp);			\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (fp, f1p, s->size);					\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Calculate b^e mod m for random b and m of s->size limbs and random e of 6
Packit 5c3484
   limbs.  m is forced to odd so that redc can be used.  e is limited in
Packit 5c3484
   size so the calculation doesn't take too long. */
Packit 5c3484
#define SPEED_ROUTINE_MPZ_POWM(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    mpz_t     r, b, e, m;						\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    mpz_init (r);							\
Packit 5c3484
    mpz_init_set_n (b, s->xp, s->size);					\
Packit 5c3484
    mpz_init_set_n (m, s->yp, s->size);					\
Packit 5c3484
    mpz_setbit (m, 0);	/* force m to odd */				\
Packit 5c3484
    mpz_init_set_n (e, s->xp_block, 6);					\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (r, b, e, m);						\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    mpz_clear (r);							\
Packit 5c3484
    mpz_clear (b);							\
Packit 5c3484
    mpz_clear (e);							\
Packit 5c3484
    mpz_clear (m);							\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
/* (m-2)^0xAAAAAAAA mod m */
Packit 5c3484
#define SPEED_ROUTINE_MPZ_POWM_UI(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    mpz_t     r, b, m;							\
Packit 5c3484
    unsigned  long  e;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    mpz_init (r);							\
Packit 5c3484
									\
Packit 5c3484
    /* force m to odd */						\
Packit 5c3484
    mpz_init (m);							\
Packit 5c3484
    mpz_set_n (m, s->xp, s->size);					\
Packit 5c3484
    PTR(m)[0] |= 1;							\
Packit 5c3484
									\
Packit 5c3484
    e = (~ (unsigned long) 0) / 3;					\
Packit 5c3484
    if (s->r != 0)							\
Packit 5c3484
      e = s->r;								\
Packit 5c3484
									\
Packit 5c3484
    mpz_init_set (b, m);						\
Packit 5c3484
    mpz_sub_ui (b, b, 2);						\
Packit 5c3484
/* printf ("%X\n", mpz_get_ui(m)); */					\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    do									\
Packit 5c3484
      function (r, b, e, m);						\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    mpz_clear (r);							\
Packit 5c3484
    mpz_clear (b);							\
Packit 5c3484
    mpz_clear (m);							\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_ADDSUB_CALL(call)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp, wp2, xp, yp;						\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 0);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp,	s->size, s->align_wp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp2, s->size, s->align_wp2);			\
Packit 5c3484
    xp = s->xp;								\
Packit 5c3484
    yp = s->yp;								\
Packit 5c3484
									\
Packit 5c3484
    if (s->r == 0)	;						\
Packit 5c3484
    else if (s->r == 1) { xp = wp;	      }				\
Packit 5c3484
    else if (s->r == 2) {	    yp = wp2; }				\
Packit 5c3484
    else if (s->r == 3) { xp = wp;  yp = wp2; }				\
Packit 5c3484
    else if (s->r == 4) { xp = wp2; yp = wp;  }				\
Packit 5c3484
    else {								\
Packit 5c3484
      TMP_FREE;								\
Packit 5c3484
      return -1.0;							\
Packit 5c3484
    }									\
Packit 5c3484
    if (xp != s->xp) MPN_COPY (xp, s->xp, s->size);			\
Packit 5c3484
    if (yp != s->yp) MPN_COPY (yp, s->yp, s->size);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, xp, s->size);					\
Packit 5c3484
    speed_operand_src (s, yp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, wp2, s->size);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_ADDSUB_N(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_ADDSUB_CALL						\
Packit 5c3484
    (function (wp, wp2, xp, yp, s->size));
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_ADDSUB_NC(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_ADDSUB_CALL						\
Packit 5c3484
    (function (wp, wp2, xp, yp, s->size, 0));
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Doing an Nx1 gcd with the given r. */
Packit 5c3484
#define SPEED_ROUTINE_MPN_GCD_1N(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    xp;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
    SPEED_RESTRICT_COND (s->r != 0);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (xp, s->size, s->align_xp);			\
Packit 5c3484
    MPN_COPY (xp, s->xp, s->size);					\
Packit 5c3484
    xp[0] |= refmpn_zero_p (xp, s->size);				\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (xp, s->size, s->r);					\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* SPEED_BLOCK_SIZE many one GCDs of s->size bits each. */
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_GCD_1_CALL(setup, call)			\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned  i, j;							\
Packit 5c3484
    mp_ptr    px, py;							\
Packit 5c3484
    mp_limb_t x_mask, y_mask;						\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size <= mp_bits_per_limb);			\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (px, SPEED_BLOCK_SIZE, s->align_xp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (py, SPEED_BLOCK_SIZE, s->align_yp);		\
Packit 5c3484
    MPN_COPY (px, s->xp_block, SPEED_BLOCK_SIZE);			\
Packit 5c3484
    MPN_COPY (py, s->yp_block, SPEED_BLOCK_SIZE);			\
Packit 5c3484
									\
Packit 5c3484
    x_mask = MP_LIMB_T_LOWBITMASK (s->size);				\
Packit 5c3484
    y_mask = MP_LIMB_T_LOWBITMASK (s->r != 0 ? s->r : s->size);		\
Packit 5c3484
    for (i = 0; i < SPEED_BLOCK_SIZE; i++)				\
Packit 5c3484
      {									\
Packit 5c3484
	px[i] &= x_mask; px[i] += (px[i] == 0);				\
Packit 5c3484
	py[i] &= y_mask; py[i] += (py[i] == 0);				\
Packit 5c3484
	setup;								\
Packit 5c3484
      }									\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, px, SPEED_BLOCK_SIZE);			\
Packit 5c3484
    speed_operand_src (s, py, SPEED_BLOCK_SIZE);			\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      {									\
Packit 5c3484
	j = SPEED_BLOCK_SIZE;						\
Packit 5c3484
	do								\
Packit 5c3484
	  {								\
Packit 5c3484
	    call;							\
Packit 5c3484
	  }								\
Packit 5c3484
	while (--j != 0);						\
Packit 5c3484
      }									\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
									\
Packit 5c3484
    s->time_divisor = SPEED_BLOCK_SIZE;					\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_GCD_1(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_GCD_1_CALL( , function (&px[j-1], 1, py[j-1]))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_JACBASE(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_GCD_1_CALL						\
Packit 5c3484
    ({									\
Packit 5c3484
       /* require x
Packit 5c3484
       px[i] %= py[i];							\
Packit 5c3484
       px[i] |= 1;							\
Packit 5c3484
       py[i] |= 1;							\
Packit 5c3484
       if (py[i]==1) py[i]=3;						\
Packit 5c3484
     },									\
Packit 5c3484
     function (px[j-1], py[j-1], 0))
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_HGCD_CALL(func, itchfunc)			\
Packit 5c3484
  {									\
Packit 5c3484
    mp_size_t hgcd_init_itch, hgcd_itch;				\
Packit 5c3484
    mp_ptr ap, bp, wp, tmp1;						\
Packit 5c3484
    struct hgcd_matrix hgcd;						\
Packit 5c3484
    int res;								\
Packit 5c3484
    unsigned i;								\
Packit 5c3484
    double t;								\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    if (s->size < 2)							\
Packit 5c3484
      return -1;							\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);		\
Packit 5c3484
									\
Packit 5c3484
    s->xp[s->size - 1] |= 1;						\
Packit 5c3484
    s->yp[s->size - 1] |= 1;						\
Packit 5c3484
									\
Packit 5c3484
    hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);		\
Packit 5c3484
    hgcd_itch = itchfunc (s->size);					\
Packit 5c3484
									\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, hgcd_itch, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_src (s, s->yp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, ap, s->size + 1);				\
Packit 5c3484
    speed_operand_dst (s, bp, s->size + 1);				\
Packit 5c3484
    speed_operand_dst (s, wp, hgcd_itch);				\
Packit 5c3484
    speed_operand_dst (s, tmp1, hgcd_init_itch);			\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      {									\
Packit 5c3484
	MPN_COPY (ap, s->xp, s->size);					\
Packit 5c3484
	MPN_COPY (bp, s->yp, s->size);					\
Packit 5c3484
	mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);			\
Packit 5c3484
	res = func (ap, bp, s->size, &hgcd, wp);			\
Packit 5c3484
      }									\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL(func, itchfunc)		\
Packit 5c3484
  {									\
Packit 5c3484
    mp_size_t hgcd_init_itch, hgcd_step_itch;				\
Packit 5c3484
    mp_ptr ap, bp, wp, tmp1;						\
Packit 5c3484
    struct hgcd_matrix hgcd;						\
Packit 5c3484
    mp_size_t p = s->size/2;						\
Packit 5c3484
    int res;								\
Packit 5c3484
    unsigned i;								\
Packit 5c3484
    double t;								\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    if (s->size < 2)							\
Packit 5c3484
      return -1;							\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);		\
Packit 5c3484
									\
Packit 5c3484
    s->xp[s->size - 1] |= 1;						\
Packit 5c3484
    s->yp[s->size - 1] |= 1;						\
Packit 5c3484
									\
Packit 5c3484
    hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);		\
Packit 5c3484
    hgcd_step_itch = itchfunc (s->size, p);				\
Packit 5c3484
									\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, hgcd_step_itch, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_src (s, s->yp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, ap, s->size + 1);				\
Packit 5c3484
    speed_operand_dst (s, bp, s->size + 1);				\
Packit 5c3484
    speed_operand_dst (s, wp, hgcd_step_itch);				\
Packit 5c3484
    speed_operand_dst (s, tmp1, hgcd_init_itch);			\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      {									\
Packit 5c3484
	MPN_COPY (ap, s->xp, s->size);					\
Packit 5c3484
	MPN_COPY (bp, s->yp, s->size);					\
Packit 5c3484
	mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);			\
Packit 5c3484
	res = func (&hgcd, ap, bp, s->size, p, wp);			\
Packit 5c3484
      }									\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
/* Run some GCDs of s->size limbs each.  The number of different data values
Packit 5c3484
   is decreased as s->size**2, since GCD is a quadratic algorithm.
Packit 5c3484
   SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT
Packit 5c3484
   though, because the plain gcd is about twice as fast as gcdext.  */
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_GCD_CALL(datafactor, call)			\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    mp_size_t j, pieces, psize;						\
Packit 5c3484
    mp_ptr    wp, wp2, xtmp, ytmp, px, py;				\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (xtmp, s->size+1, s->align_xp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (ytmp, s->size+1, s->align_yp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp,   s->size+1, s->align_wp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp2,  s->size+1, s->align_wp2);		\
Packit 5c3484
									\
Packit 5c3484
    pieces = SPEED_BLOCK_SIZE * datafactor / s->size / s->size;		\
Packit 5c3484
    pieces = MIN (pieces, SPEED_BLOCK_SIZE / s->size);			\
Packit 5c3484
    pieces = MAX (pieces, 1);						\
Packit 5c3484
									\
Packit 5c3484
    psize = pieces * s->size;						\
Packit 5c3484
    px = TMP_ALLOC_LIMBS (psize);					\
Packit 5c3484
    py = TMP_ALLOC_LIMBS (psize);					\
Packit 5c3484
    MPN_COPY (px, pieces==1 ? s->xp : s->xp_block, psize);		\
Packit 5c3484
    MPN_COPY (py, pieces==1 ? s->yp : s->yp_block, psize);		\
Packit 5c3484
									\
Packit 5c3484
    /* Requirements: x >= y, y must be odd, high limbs != 0.		\
Packit 5c3484
       No need to ensure random numbers are really great.  */		\
Packit 5c3484
    for (j = 0; j < pieces; j++)					\
Packit 5c3484
      {									\
Packit 5c3484
	mp_ptr	x = px + j * s->size;					\
Packit 5c3484
	mp_ptr	y = py + j * s->size;					\
Packit 5c3484
	if (x[s->size - 1] == 0) x[s->size - 1] = 1;			\
Packit 5c3484
	if (y[s->size - 1] == 0) y[s->size - 1] = 1;			\
Packit 5c3484
									\
Packit 5c3484
	if (x[s->size - 1] < y[s->size - 1])				\
Packit 5c3484
	  MP_LIMB_T_SWAP (x[s->size - 1], y[s->size - 1]);		\
Packit 5c3484
	else if (x[s->size - 1] == y[s->size - 1])			\
Packit 5c3484
	  {								\
Packit 5c3484
	    x[s->size - 1] = 2;						\
Packit 5c3484
	    y[s->size - 1] = 1;						\
Packit 5c3484
	  }								\
Packit 5c3484
	y[0] |= 1;							\
Packit 5c3484
      }									\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, px, psize);					\
Packit 5c3484
    speed_operand_src (s, py, psize);					\
Packit 5c3484
    speed_operand_dst (s, xtmp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, ytmp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      {									\
Packit 5c3484
	j = pieces;							\
Packit 5c3484
	do								\
Packit 5c3484
	  {								\
Packit 5c3484
	    MPN_COPY (xtmp, px+(j - 1)*s->size, s->size);		\
Packit 5c3484
	    MPN_COPY (ytmp, py+(j - 1)*s->size, s->size);		\
Packit 5c3484
	    call;							\
Packit 5c3484
	  }								\
Packit 5c3484
	while (--j != 0);						\
Packit 5c3484
      }									\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
									\
Packit 5c3484
    s->time_divisor = pieces;						\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_GCD(function)	\
Packit 5c3484
  SPEED_ROUTINE_MPN_GCD_CALL (8, function (wp, xtmp, s->size, ytmp, s->size))
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_GCDEXT(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_GCD_CALL						\
Packit 5c3484
    (4, { mp_size_t  wp2size;						\
Packit 5c3484
	  function (wp, wp2, &wp2size, xtmp, s->size, ytmp, s->size); })
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_GCDEXT_ONE(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    mp_size_t j, pieces, psize, wp2size;				\
Packit 5c3484
    mp_ptr    wp, wp2, xtmp, ytmp, px, py;				\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (xtmp, s->size+1, s->align_xp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (ytmp, s->size+1, s->align_yp);		\
Packit 5c3484
    MPN_COPY (xtmp, s->xp, s->size);					\
Packit 5c3484
    MPN_COPY (ytmp, s->yp, s->size);					\
Packit 5c3484
									\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp,	s->size+1, s->align_wp);		\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp2, s->size+1, s->align_wp2);		\
Packit 5c3484
									\
Packit 5c3484
    pieces = SPEED_BLOCK_SIZE / 3;					\
Packit 5c3484
    psize = 3 * pieces;							\
Packit 5c3484
    px = TMP_ALLOC_LIMBS (psize);					\
Packit 5c3484
    py = TMP_ALLOC_LIMBS (psize);					\
Packit 5c3484
    MPN_COPY (px, s->xp_block, psize);					\
Packit 5c3484
    MPN_COPY (py, s->yp_block, psize);					\
Packit 5c3484
									\
Packit 5c3484
    /* x must have at least as many bits as y,				\
Packit 5c3484
       high limbs must be non-zero */					\
Packit 5c3484
    for (j = 0; j < pieces; j++)					\
Packit 5c3484
      {									\
Packit 5c3484
	mp_ptr	x = px+3*j;						\
Packit 5c3484
	mp_ptr	y = py+3*j;						\
Packit 5c3484
	x[2] += (x[2] == 0);						\
Packit 5c3484
	y[2] += (y[2] == 0);						\
Packit 5c3484
	if (x[2] < y[2])						\
Packit 5c3484
	  MP_LIMB_T_SWAP (x[2], y[2]);					\
Packit 5c3484
      }									\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, px, psize);					\
Packit 5c3484
    speed_operand_src (s, py, psize);					\
Packit 5c3484
    speed_operand_dst (s, xtmp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, ytmp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      {									\
Packit 5c3484
	mp_ptr	x = px;							\
Packit 5c3484
	mp_ptr	y = py;							\
Packit 5c3484
	mp_ptr	xth = &xtmp[s->size-3];					\
Packit 5c3484
	mp_ptr	yth = &ytmp[s->size-3];					\
Packit 5c3484
	j = pieces;							\
Packit 5c3484
	do								\
Packit 5c3484
	  {								\
Packit 5c3484
	    xth[0] = x[0], xth[1] = x[1], xth[2] = x[2];		\
Packit 5c3484
	    yth[0] = y[0], yth[1] = y[1], yth[2] = y[2];		\
Packit 5c3484
									\
Packit 5c3484
	    ytmp[0] |= 1; /* y must be odd, */				\
Packit 5c3484
									\
Packit 5c3484
	    function (wp, wp2, &wp2size, xtmp, s->size, ytmp, s->size);	\
Packit 5c3484
									\
Packit 5c3484
	    x += 3;							\
Packit 5c3484
	    y += 3;							\
Packit 5c3484
	  }								\
Packit 5c3484
	while (--j != 0);						\
Packit 5c3484
      }									\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
									\
Packit 5c3484
    s->time_divisor = pieces;						\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPZ_JACOBI(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    mpz_t     a, b;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    mp_size_t j, pieces, psize;						\
Packit 5c3484
    mp_ptr    px, py;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    pieces = SPEED_BLOCK_SIZE / MAX (s->size, 1);			\
Packit 5c3484
    pieces = MAX (pieces, 1);						\
Packit 5c3484
    s->time_divisor = pieces;						\
Packit 5c3484
									\
Packit 5c3484
    psize = pieces * s->size;						\
Packit 5c3484
    px = TMP_ALLOC_LIMBS (psize);					\
Packit 5c3484
    py = TMP_ALLOC_LIMBS (psize);					\
Packit 5c3484
    MPN_COPY (px, pieces==1 ? s->xp : s->xp_block, psize);		\
Packit 5c3484
    MPN_COPY (py, pieces==1 ? s->yp : s->yp_block, psize);		\
Packit 5c3484
									\
Packit 5c3484
    for (j = 0; j < pieces; j++)					\
Packit 5c3484
      {									\
Packit 5c3484
	mp_ptr	x = px+j*s->size;					\
Packit 5c3484
	mp_ptr	y = py+j*s->size;					\
Packit 5c3484
									\
Packit 5c3484
	/* y odd */							\
Packit 5c3484
	y[0] |= 1;							\
Packit 5c3484
									\
Packit 5c3484
	/* high limbs non-zero */					\
Packit 5c3484
	if (x[s->size-1] == 0) x[s->size-1] = 1;			\
Packit 5c3484
	if (y[s->size-1] == 0) y[s->size-1] = 1;			\
Packit 5c3484
      }									\
Packit 5c3484
									\
Packit 5c3484
    SIZ(a) = s->size;							\
Packit 5c3484
    SIZ(b) = s->size;							\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, px, psize);					\
Packit 5c3484
    speed_operand_src (s, py, psize);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      {									\
Packit 5c3484
	j = pieces;							\
Packit 5c3484
	do								\
Packit 5c3484
	  {								\
Packit 5c3484
	    PTR(a) = px+(j-1)*s->size;					\
Packit 5c3484
	    PTR(b) = py+(j-1)*s->size;					\
Packit 5c3484
	    function (a, b);						\
Packit 5c3484
	  }								\
Packit 5c3484
	while (--j != 0);						\
Packit 5c3484
      }									\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_DIVREM_2(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp, xp;							\
Packit 5c3484
    mp_limb_t yp[2];							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 2);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (xp, s->size, s->align_xp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    /* source is destroyed */						\
Packit 5c3484
    MPN_COPY (xp, s->xp, s->size);					\
Packit 5c3484
									\
Packit 5c3484
    /* divisor must be normalized */					\
Packit 5c3484
    MPN_COPY (yp, s->yp_block, 2);					\
Packit 5c3484
    yp[1] |= GMP_NUMB_HIGHBIT;						\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, xp, s->size);					\
Packit 5c3484
    speed_operand_src (s, yp, 2);					\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (wp, 0, xp, s->size, yp);				\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_DIV_QR_1(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp, xp;							\
Packit 5c3484
    mp_limb_t d;							\
Packit 5c3484
    mp_limb_t r;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    d = s->r;								\
Packit 5c3484
    if (d == 0)								\
Packit 5c3484
      d = 1;								\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      r = function (wp, wp+s->size-1, s->xp, s->size, d);		\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_DIV_QR_1N_PI1(function)			\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp, xp;							\
Packit 5c3484
    mp_limb_t d, dinv;							\
Packit 5c3484
    mp_limb_t r;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    d = s->r;								\
Packit 5c3484
    /* divisor must be normalized */					\
Packit 5c3484
    SPEED_RESTRICT_COND (d & GMP_NUMB_HIGHBIT);				\
Packit 5c3484
    invert_limb (dinv, d);						\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      r = function (wp, s->xp, s->size, 0, d, dinv);			\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_DIV_QR_2(function, norm)			\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp, xp;							\
Packit 5c3484
    mp_limb_t yp[2];							\
Packit 5c3484
    mp_limb_t rp[2];							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 2);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    /* divisor must be normalized */					\
Packit 5c3484
    MPN_COPY (yp, s->yp_block, 2);					\
Packit 5c3484
    if (norm)								\
Packit 5c3484
      yp[1] |= GMP_NUMB_HIGHBIT;					\
Packit 5c3484
    else								\
Packit 5c3484
      {									\
Packit 5c3484
	yp[1] &= ~GMP_NUMB_HIGHBIT;					\
Packit 5c3484
	if (yp[1] == 0)							\
Packit 5c3484
	  yp[1] = 1;							\
Packit 5c3484
      }									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_src (s, yp, 2);					\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, rp, 2);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function (wp, rp, s->xp, s->size, yp);				\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MODLIMB_INVERT(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i, j;							\
Packit 5c3484
    mp_ptr     xp;							\
Packit 5c3484
    mp_limb_t  n = 1;							\
Packit 5c3484
    double     t;							\
Packit 5c3484
									\
Packit 5c3484
    xp = s->xp_block-1;							\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp_block, SPEED_BLOCK_SIZE);		\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      {									\
Packit 5c3484
	j = SPEED_BLOCK_SIZE;						\
Packit 5c3484
	do								\
Packit 5c3484
	  {								\
Packit 5c3484
	    /* randomized but successively dependent */			\
Packit 5c3484
	    n += (xp[j] << 1);						\
Packit 5c3484
									\
Packit 5c3484
	    function (n, n);						\
Packit 5c3484
	  }								\
Packit 5c3484
	while (--j != 0);						\
Packit 5c3484
      }									\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    /* make sure the compiler won't optimize away n */			\
Packit 5c3484
    noop_1 (n);								\
Packit 5c3484
									\
Packit 5c3484
    s->time_divisor = SPEED_BLOCK_SIZE;					\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_SQRTROOT_CALL(call)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp, wp2;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp,	s->size, s->align_wp);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp2, s->size, s->align_wp2);			\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, wp2, s->size);				\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* s->size controls the number of limbs in the input, s->r is the base, or
Packit 5c3484
   decimal by default. */
Packit 5c3484
#define SPEED_ROUTINE_MPN_GET_STR(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned char *wp;							\
Packit 5c3484
    mp_size_t wn;							\
Packit 5c3484
    mp_ptr xp;								\
Packit 5c3484
    int base;								\
Packit 5c3484
    unsigned i;								\
Packit 5c3484
    double t;								\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    base = s->r == 0 ? 10 : s->r;					\
Packit 5c3484
    SPEED_RESTRICT_COND (base >= 2 && base <= 256);			\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (xp, s->size + 1, s->align_xp);		\
Packit 5c3484
									\
Packit 5c3484
    MPN_SIZEINBASE (wn, s->xp, s->size, base);				\
Packit 5c3484
    wp = (unsigned char *) TMP_ALLOC (wn);				\
Packit 5c3484
									\
Packit 5c3484
    /* use this during development to guard against overflowing wp */	\
Packit 5c3484
    /*									\
Packit 5c3484
    MPN_COPY (xp, s->xp, s->size);					\
Packit 5c3484
    ASSERT_ALWAYS (mpn_get_str (wp, base, xp, s->size) <= wn);		\
Packit 5c3484
    */									\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, s->xp, s->size);				\
Packit 5c3484
    speed_operand_dst (s, xp, s->size);					\
Packit 5c3484
    speed_operand_dst (s, (mp_ptr) wp, wn/GMP_LIMB_BYTES);		\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      {									\
Packit 5c3484
	MPN_COPY (xp, s->xp, s->size);					\
Packit 5c3484
	function (wp, base, xp, s->size);				\
Packit 5c3484
      }									\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
/* s->size controls the number of digits in the input, s->r is the base, or
Packit 5c3484
   decimal by default. */
Packit 5c3484
#define SPEED_ROUTINE_MPN_SET_STR_CALL(call)				\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned char *xp;							\
Packit 5c3484
    mp_ptr     wp;							\
Packit 5c3484
    mp_size_t  wn;							\
Packit 5c3484
    unsigned   i;							\
Packit 5c3484
    int        base;							\
Packit 5c3484
    double     t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 1);					\
Packit 5c3484
									\
Packit 5c3484
    base = s->r == 0 ? 10 : s->r;					\
Packit 5c3484
    SPEED_RESTRICT_COND (base >= 2 && base <= 256);			\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
									\
Packit 5c3484
    xp = (unsigned char *) TMP_ALLOC (s->size);				\
Packit 5c3484
    for (i = 0; i < s->size; i++)					\
Packit 5c3484
      xp[i] = s->xp[i] % base;						\
Packit 5c3484
									\
Packit 5c3484
    LIMBS_PER_DIGIT_IN_BASE (wn, s->size, base);			\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);			\
Packit 5c3484
									\
Packit 5c3484
    /* use this during development to check wn is big enough */		\
Packit 5c3484
    /*									\
Packit 5c3484
    ASSERT_ALWAYS (mpn_set_str (wp, xp, s->size, base) <= wn);		\
Packit 5c3484
    */									\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, (mp_ptr) xp, s->size/GMP_LIMB_BYTES);	\
Packit 5c3484
    speed_operand_dst (s, wp, wn);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* Run an accel gcd find_a() function over various data values.  A set of
Packit 5c3484
   values is used in case some run particularly fast or slow.  The size
Packit 5c3484
   parameter is ignored, the amount of data tested is fixed.  */
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_GCD_FINDA(function)				\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned  i, j;							\
Packit 5c3484
    mp_limb_t cp[SPEED_BLOCK_SIZE][2];					\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
									\
Packit 5c3484
    /* low must be odd, high must be non-zero */			\
Packit 5c3484
    for (i = 0; i < SPEED_BLOCK_SIZE; i++)				\
Packit 5c3484
      {									\
Packit 5c3484
	cp[i][0] = s->xp_block[i] | 1;					\
Packit 5c3484
	cp[i][1] = s->yp_block[i] + (s->yp_block[i] == 0);		\
Packit 5c3484
      }									\
Packit 5c3484
									\
Packit 5c3484
    speed_operand_src (s, &cp[0][0], 2*SPEED_BLOCK_SIZE);		\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      {									\
Packit 5c3484
	j = SPEED_BLOCK_SIZE;						\
Packit 5c3484
	do								\
Packit 5c3484
	  {								\
Packit 5c3484
	    function (cp[j-1]);						\
Packit 5c3484
	  }								\
Packit 5c3484
	while (--j != 0);						\
Packit 5c3484
      }									\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
									\
Packit 5c3484
    s->time_divisor = SPEED_BLOCK_SIZE;					\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
/* "call" should do "count_foo_zeros(c,n)".
Packit 5c3484
   Give leading=1 if foo is leading zeros, leading=0 for trailing.
Packit 5c3484
   Give zero=1 if n=0 is allowed in the call, zero=0 if not.  */
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_COUNT_ZEROS_A(leading, zero)			\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr     xp;							\
Packit 5c3484
    int        i, c;							\
Packit 5c3484
    unsigned   j;							\
Packit 5c3484
    mp_limb_t  n;							\
Packit 5c3484
    double     t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (xp, SPEED_BLOCK_SIZE, s->align_xp);		\
Packit 5c3484
									\
Packit 5c3484
    if (! speed_routine_count_zeros_setup (s, xp, leading, zero))	\
Packit 5c3484
      return -1.0;							\
Packit 5c3484
    speed_operand_src (s, xp, SPEED_BLOCK_SIZE);			\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    c = 0;								\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    j = s->reps;							\
Packit 5c3484
    do {								\
Packit 5c3484
      for (i = 0; i < SPEED_BLOCK_SIZE; i++)				\
Packit 5c3484
	{								\
Packit 5c3484
	  n = xp[i];							\
Packit 5c3484
	  n ^= c;							\
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_COUNT_ZEROS_B()					\
Packit 5c3484
	}								\
Packit 5c3484
    } while (--j != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    /* don't let c go dead */						\
Packit 5c3484
    noop_1 (c);								\
Packit 5c3484
									\
Packit 5c3484
    s->time_divisor = SPEED_BLOCK_SIZE;					\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }									\
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_COUNT_ZEROS_C(call, leading, zero)		\
Packit 5c3484
  do {									\
Packit 5c3484
    SPEED_ROUTINE_COUNT_ZEROS_A (leading, zero);			\
Packit 5c3484
    call;								\
Packit 5c3484
    SPEED_ROUTINE_COUNT_ZEROS_B ();					\
Packit 5c3484
  } while (0)								\
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_COUNT_LEADING_ZEROS_C(call,zero)			\
Packit 5c3484
  SPEED_ROUTINE_COUNT_ZEROS_C (call, 1, zero)
Packit 5c3484
#define SPEED_ROUTINE_COUNT_LEADING_ZEROS(fun)				\
Packit 5c3484
  SPEED_ROUTINE_COUNT_ZEROS_C (fun (c, n), 1, 0)
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_COUNT_TRAILING_ZEROS_C(call,zero)			\
Packit 5c3484
  SPEED_ROUTINE_COUNT_ZEROS_C (call, 0, zero)
Packit 5c3484
#define SPEED_ROUTINE_COUNT_TRAILING_ZEROS(call)			\
Packit 5c3484
  SPEED_ROUTINE_COUNT_ZEROS_C (fun (c, n), 0, 0)
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_INVERT_LIMB_CALL(call)				\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned   i, j;							\
Packit 5c3484
    mp_limb_t  d, dinv=0;						\
Packit 5c3484
    mp_ptr     xp = s->xp_block - 1;					\
Packit 5c3484
									\
Packit 5c3484
    s->time_divisor = SPEED_BLOCK_SIZE;					\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      {									\
Packit 5c3484
	j = SPEED_BLOCK_SIZE;						\
Packit 5c3484
	do								\
Packit 5c3484
	  {								\
Packit 5c3484
	    d = dinv ^ xp[j];						\
Packit 5c3484
	    d |= GMP_LIMB_HIGHBIT;					\
Packit 5c3484
	    do { call; } while (0);					\
Packit 5c3484
	  }								\
Packit 5c3484
	while (--j != 0);						\
Packit 5c3484
      }									\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
									\
Packit 5c3484
    /* don't let the compiler optimize everything away */		\
Packit 5c3484
    noop_1 (dinv);							\
Packit 5c3484
									\
Packit 5c3484
    return speed_endtime();						\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_BACK_TO_BACK(function)			\
Packit 5c3484
  {									\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      function ();							\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    return speed_endtime ();						\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_ZERO_CALL(call)				\
Packit 5c3484
  {									\
Packit 5c3484
    mp_ptr    wp;							\
Packit 5c3484
    unsigned  i;							\
Packit 5c3484
    double    t;							\
Packit 5c3484
    TMP_DECL;								\
Packit 5c3484
									\
Packit 5c3484
    SPEED_RESTRICT_COND (s->size >= 0);					\
Packit 5c3484
									\
Packit 5c3484
    TMP_MARK;								\
Packit 5c3484
    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
Packit 5c3484
    speed_operand_dst (s, wp, s->size);					\
Packit 5c3484
    speed_cache_fill (s);						\
Packit 5c3484
									\
Packit 5c3484
    speed_starttime ();							\
Packit 5c3484
    i = s->reps;							\
Packit 5c3484
    do									\
Packit 5c3484
      call;								\
Packit 5c3484
    while (--i != 0);							\
Packit 5c3484
    t = speed_endtime ();						\
Packit 5c3484
									\
Packit 5c3484
    TMP_FREE;								\
Packit 5c3484
    return t;								\
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
#define SPEED_ROUTINE_MPN_ZERO(function)				\
Packit 5c3484
  SPEED_ROUTINE_MPN_ZERO_CALL (function (wp, s->size))
Packit 5c3484
Packit 5c3484
Packit 5c3484
#endif