/* This test exercises the PAPI_TOT_CYC and PAPI_REF_CYC counters. PAPI_TOT_CYC should measure the number of cycles required to do a fixed amount of work. It should be roughly constant for constant work, regardless of the speed state a core is in. PAPI_REF_CYC should measure the number of cycles at a constant reference clock rate, independent of the actual clock rate of the core. */ /* PAPI_REF_CYC has various issues on Intel chips: On older machines PAPI uses UNHALTED_REFERENCE_CYCLES but this means different things on different architectures + On Core2/Atom this maps to the special Fixed Counter 2 CPU_CLK_UNHALTED.REF This counts at the same rate as the TSC (PAPI_get_real_cyc()) And also seems to match PAPI_TOT_CYC It is documented as having a fixed ratio to the CPU_CLK_UNHALTED.BUS (3c/1) event. + On Nehalem/Westemere this also maps to Fixed Counter 2. Again, counts same rate as the TSC and returns CPU_CLK_UNHALTED.REF_P (3c/1) times the "Maximum Non-Turbo Ratio" + Same for Sandybridge/Ivybridge On newer HSW,BDW,SKL machines PAPI uses a different type of event CPU_CLK_THREAD_UNHALTED:REF_XCLK + On Haswell machines this is just the reference clock (100MHz?) + On Sandybridge this is off by a factor of 8x? */ /* NOTE: PAPI_get_virt_cyc() returns a lie! It's just virt_time() * max_theoretical_MHz so no point in checking that */ #include #include #include #include "papi.h" #include "papi_test.h" #include "testcode.h" #define NUM_FLOPS 20000000 static void work (int EventSet, int sleep_test, int quiet) { int retval; long long values[2]; long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; double cycles_error; int numflops = NUM_FLOPS; /* Gather before stats */ elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); elapsed_virt_us = PAPI_get_virt_usec( ); elapsed_virt_cyc = PAPI_get_virt_cyc( ); /* Start PAPI */ retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } /* our test code */ if (sleep_test) { sleep(2); } else { do_flops( numflops, 1 ); } /* Stop PAPI */ retval = PAPI_stop( EventSet, values ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } /* Calculate total values */ elapsed_virt_us = PAPI_get_virt_usec( ) - elapsed_virt_us; elapsed_virt_cyc = PAPI_get_virt_cyc( ) - elapsed_virt_cyc; elapsed_us = PAPI_get_real_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; if (!quiet) { printf( "-------------------------------------------------------------------------\n" ); if (sleep_test) printf("Sleeping for 2s\n"); else printf( "Using %d iterations of c += a*b\n", numflops ); printf( "-------------------------------------------------------------------------\n" ); printf( "PAPI_TOT_CYC : \t%10lld\n", values[0] ); printf( "PAPI_REF_CYC : \t%10lld\n", values[1] ); printf( "Real usec : \t%10lld\n", elapsed_us ); printf( "Real cycles : \t%10lld\n", elapsed_cyc ); printf( "Virt usec : \t%10lld\n", elapsed_virt_us ); printf( "Virt cycles (estimate) : \t%10lld\n", elapsed_virt_cyc ); printf( "Estimated GHz : \t%10.3lf\n", (double) elapsed_cyc/(double)elapsed_us/1000.0); printf( "-------------------------------------------------------------------------\n" ); } if (sleep_test) { if (!quiet) { printf( "Verification: PAPI_REF_CYC should be much lower than real_usec\n"); } if (values[1]>elapsed_us) { if (!quiet) printf("PAPI_REF_CYC too high!\n"); test_fail( __FILE__, __LINE__, "PAPI_REF_CYC too high", 0 ); } } else { /* PAPI_REF_CYC should be roughly the same as TSC when busy */ /* on Intel chips */ if (!quiet) { printf( "Verification: real_cyc should be roughly PAPI_REF_CYC\n"); printf( " real_usec should be roughly virt_usec (on otherwise idle system)\n"); } cycles_error=100.0* ((double)values[1]-((double)elapsed_cyc)) /values[1]; if ((cycles_error>10.0) || (cycles_error<-10.0)) { if (!quiet) printf("Error of %.2f%%\n",cycles_error); test_fail( __FILE__, __LINE__, "PAPI_REF_CYC validation", 0 ); } cycles_error=100.0* ((double)elapsed_us-(double)elapsed_virt_us) /(double)elapsed_us; if ((cycles_error>10.0) || (cycles_error<-10.0)) { if (!quiet) printf("Error of %.2f%%\n",cycles_error); test_warn( __FILE__, __LINE__, "real_us validation", 0 ); } } } int main( int argc, char **argv ) { int retval; int EventSet = PAPI_NULL; int quiet; /* Set TESTS_QUIET variable */ quiet = tests_quiet( argc, argv ); /* Init the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Check the ref cycles event */ retval = PAPI_query_named_event("PAPI_REF_CYC"); if (PAPI_OK!=retval) { if (!quiet) printf("No PAPI_REF_CYC available\n"); test_skip( __FILE__, __LINE__, "PAPI_REF_CYC is not defined on this platform.", 0); } /* create an eventset */ retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } /* add core cycle event */ retval = PAPI_add_named_event( EventSet, "PAPI_TOT_CYC"); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_named_event: PAPI_TOT_CYC", retval ); } /* add ref cycle event */ retval = PAPI_add_named_event( EventSet, "PAPI_REF_CYC"); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_events: PAPI_REF_CYC", retval ); } if (!quiet) { printf("Test case sleeping: " "Look at TOT and REF cycles.\n"); } work(EventSet, 1, quiet); // do_flops(10*numflops); if (!quiet) { printf( "\nTest case busy:\n" ); } work(EventSet, 0, quiet); test_pass( __FILE__ ); return 0; }