From a262a8407c1b1fa0e2c32a670c463e46e1d93a90 Mon Sep 17 00:00:00 2001 From: Packit Service Date: Feb 03 2021 06:34:12 +0000 Subject: Apply patch glibc-rh1817513-11.patch patch_name: glibc-rh1817513-11.patch present_in_specfile: true location_in_specfile: 412 --- diff --git a/benchtests/Makefile b/benchtests/Makefile index 28d6b0c..bde0caf 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -131,6 +131,12 @@ CPPFLAGS-nonlib += -DDURATION=$(BENCH_DURATION) -D_ISOMAC # HP_TIMING if it is available. ifdef USE_CLOCK_GETTIME CPPFLAGS-nonlib += -DUSE_CLOCK_GETTIME +else +# On x86 processors, use RDTSCP, instead of RDTSC, to measure performance +# of functions. All x86 processors since 2010 support RDTSCP instruction. +ifdef USE_RDTSCP +CPPFLAGS-nonlib += -DUSE_RDTSCP +endif endif DETAILED_OPT := diff --git a/benchtests/README b/benchtests/README index 4ddff79..aaf0b65 100644 --- a/benchtests/README +++ b/benchtests/README @@ -34,6 +34,15 @@ the benchmark to use clock_gettime by invoking make as follows: Again, one must run `make bench-clean' before changing the measurement method. +On x86 processors, RDTSCP instruction provides more precise timing data +than RDTSC instruction. All x86 processors since 2010 support RDTSCP +instruction. One can force the benchmark to use RDTSCP by invoking make +as follows: + + $ make USE_RDTSCP=1 bench + +One must run `make bench-clean' before changing the measurement method. + Running benchmarks on another target: ==================================== diff --git a/sysdeps/x86/hp-timing.h b/sysdeps/x86/hp-timing.h index 77a1360..0aa6f5e 100644 --- a/sysdeps/x86/hp-timing.h +++ b/sysdeps/x86/hp-timing.h @@ -40,7 +40,19 @@ typedef unsigned long long int hp_timing_t; NB: Use __builtin_ia32_rdtsc directly since including makes building glibc very slow. */ -# define HP_TIMING_NOW(Var) ((Var) = __builtin_ia32_rdtsc ()) +# ifdef USE_RDTSCP +/* RDTSCP waits until all previous instructions have executed and all + previous loads are globally visible before reading the counter. + RDTSC doesn't wait until all previous instructions have been executed + before reading the counter. */ +# define HP_TIMING_NOW(Var) \ + (__extension__ ({ \ + unsigned int __aux; \ + (Var) = __builtin_ia32_rdtscp (&__aux); \ + })) +# else +# define HP_TIMING_NOW(Var) ((Var) = __builtin_ia32_rdtsc ()) +# endif # include #else