Blob Blame History Raw
/* This test exercises the PAPI_TOT_CYC and PAPI_REF_CYC counters.

	PAPI_TOT_CYC should measure the number of cycles required to do
	a fixed amount of work.
	It should be roughly constant for constant work, regardless of the
	speed state a core is in.

	PAPI_REF_CYC should measure the number of cycles at a constant
	reference clock rate, independent of the actual clock rate of the core.
*/

/*
	PAPI_REF_CYC has various issues on Intel chips:

	On older machines PAPI uses UNHALTED_REFERENCE_CYCLES but this
	means different things on different architectures

	+ On Core2/Atom this maps to the special Fixed Counter 2
		CPU_CLK_UNHALTED.REF
		This counts at the same rate as the TSC (PAPI_get_real_cyc())
		And also seems to match PAPI_TOT_CYC
		It is documented as having a fixed ratio to the
		CPU_CLK_UNHALTED.BUS (3c/1) event.

	+ On Nehalem/Westemere this also maps to Fixed Counter 2.
		Again, counts same rate as the TSC  and returns
		CPU_CLK_UNHALTED.REF_P (3c/1)
		times the "Maximum Non-Turbo Ratio"

	+ Same for Sandybridge/Ivybridge

	On newer HSW,BDW,SKL machines PAPI uses a different type of event
	CPU_CLK_THREAD_UNHALTED:REF_XCLK

	+ On Haswell machines this is just the reference clock
		(100MHz?)
	+ On Sandybridge this is off by a factor of 8x?
*/

/* NOTE:
	PAPI_get_virt_cyc() returns a lie!
	It's just virt_time() * max_theoretical_MHz
	so no point in checking that */

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#include "papi.h"
#include "papi_test.h"

#include "testcode.h"

#define NUM_FLOPS  20000000

static void work (int EventSet, int sleep_test, int quiet)
{
	int retval;
	long long values[2];
	long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc;
	double cycles_error;
	int numflops = NUM_FLOPS;

	/* Gather before stats */
	elapsed_us = PAPI_get_real_usec(  );
	elapsed_cyc = PAPI_get_real_cyc(  );
	elapsed_virt_us = PAPI_get_virt_usec(  );
	elapsed_virt_cyc = PAPI_get_virt_cyc(  );

	/* Start PAPI */
	retval = PAPI_start( EventSet );
	if ( retval != PAPI_OK ) {
		test_fail( __FILE__, __LINE__, "PAPI_start", retval );
	}

	/* our test code */
	if (sleep_test) {
		sleep(2);
	}
	else {
		do_flops( numflops, 1 );
	}

	/* Stop PAPI */
	retval = PAPI_stop( EventSet, values );
	if ( retval != PAPI_OK ) {
	   test_fail( __FILE__, __LINE__, "PAPI_stop", retval );
	}

	/* Calculate total values */
	elapsed_virt_us = PAPI_get_virt_usec(  ) - elapsed_virt_us;
	elapsed_virt_cyc = PAPI_get_virt_cyc(  ) - elapsed_virt_cyc;
	elapsed_us = PAPI_get_real_usec(  ) - elapsed_us;
	elapsed_cyc = PAPI_get_real_cyc(  ) - elapsed_cyc;

	if (!quiet) {
		printf( "-------------------------------------------------------------------------\n" );
		if (sleep_test) printf("Sleeping for 2s\n");
		else printf( "Using %d iterations of c += a*b\n", numflops );
		printf( "-------------------------------------------------------------------------\n" );

		printf( "PAPI_TOT_CYC             : \t%10lld\n", values[0] );
		printf( "PAPI_REF_CYC             : \t%10lld\n", values[1] );
		printf( "Real usec                : \t%10lld\n", elapsed_us );
		printf( "Real cycles              : \t%10lld\n", elapsed_cyc );
		printf( "Virt usec                : \t%10lld\n", elapsed_virt_us );
		printf( "Virt cycles (estimate)   : \t%10lld\n", elapsed_virt_cyc );
		printf( "Estimated GHz            : \t%10.3lf\n", (double) elapsed_cyc/(double)elapsed_us/1000.0);

		printf( "-------------------------------------------------------------------------\n" );
	}


	if (sleep_test) {
		if (!quiet) {
		printf( "Verification: PAPI_REF_CYC should be much lower than real_usec\n");
		}
		if (values[1]>elapsed_us) {
			if (!quiet) printf("PAPI_REF_CYC too high!\n");
			test_fail( __FILE__, __LINE__, "PAPI_REF_CYC too high", 0 );
		}

	}
	else {
		/* PAPI_REF_CYC should be roughly the same as TSC when busy */
		/* on Intel chips */
		if (!quiet) {
		printf( "Verification: real_cyc should be roughly PAPI_REF_CYC\n");
		printf( "              real_usec should be roughly virt_usec (on otherwise idle system)\n");
		}

		cycles_error=100.0*
			((double)values[1]-((double)elapsed_cyc))
				/values[1];

		if ((cycles_error>10.0) || (cycles_error<-10.0)) {
			if (!quiet) printf("Error of %.2f%%\n",cycles_error);
			test_fail( __FILE__, __LINE__, "PAPI_REF_CYC validation", 0 );
		}

		cycles_error=100.0*
			((double)elapsed_us-(double)elapsed_virt_us)
				/(double)elapsed_us;

		if ((cycles_error>10.0) || (cycles_error<-10.0)) {
			if (!quiet) printf("Error of %.2f%%\n",cycles_error);
			test_warn( __FILE__, __LINE__, "real_us validation", 0 );
		}
	}
}


int
main( int argc, char **argv )
{
	int retval;
	int EventSet = PAPI_NULL;
	int quiet;

	/* Set TESTS_QUIET variable */
	quiet = tests_quiet( argc, argv );

	/* Init the PAPI library */
	retval = PAPI_library_init( PAPI_VER_CURRENT );
	if ( retval != PAPI_VER_CURRENT ) {
	   test_fail( __FILE__, __LINE__, "PAPI_library_init", retval );
	}

	/* Check the ref cycles event */
	retval = PAPI_query_named_event("PAPI_REF_CYC");
	if (PAPI_OK!=retval) {
		if (!quiet) printf("No PAPI_REF_CYC available\n");
		test_skip( __FILE__, __LINE__,
			"PAPI_REF_CYC is not defined on this platform.", 0);
	}

	/* create an eventset */
	retval = PAPI_create_eventset( &EventSet );
	if ( retval != PAPI_OK ) {
		test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval );
	}

	/* add core cycle event */
	retval = PAPI_add_named_event( EventSet, "PAPI_TOT_CYC");
	if ( retval != PAPI_OK ) {
		test_fail( __FILE__, __LINE__,
				"PAPI_add_named_event: PAPI_TOT_CYC", retval );
	}

	/* add ref cycle event */
	retval = PAPI_add_named_event( EventSet, "PAPI_REF_CYC");
	if ( retval != PAPI_OK ) {
		test_fail( __FILE__, __LINE__,
				"PAPI_add_events: PAPI_REF_CYC", retval );
	}

	if (!quiet) {
		printf("Test case sleeping: "
			"Look at TOT and REF cycles.\n");
	}

	work(EventSet, 1, quiet);
//	do_flops(10*numflops);

	if (!quiet) {
		printf( "\nTest case busy:\n" );
	}

	work(EventSet, 0, quiet);

	test_pass( __FILE__ );

	return 0;
}