|
Packit |
577717 |
/****************************************************************************
|
|
Packit |
577717 |
*C
|
|
Packit |
577717 |
*C matrix-hl.f
|
|
Packit |
577717 |
*C An example of matrix-matrix multiplication and using PAPI high level
|
|
Packit |
577717 |
*C to look at the performance. written by Kevin London
|
|
Packit |
577717 |
*C March 2000
|
|
Packit |
577717 |
*C Added to c tests to check stop
|
|
Packit |
577717 |
*C****************************************************************************
|
|
Packit |
577717 |
*/
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#include <stdio.h>
|
|
Packit |
577717 |
#include <stdlib.h>
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#include "papi.h"
|
|
Packit |
577717 |
#include "papi_test.h"
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#include "do_loops.h"
|
|
Packit |
577717 |
|
|
Packit |
577717 |
int
|
|
Packit |
577717 |
main( int argc, char **argv )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#define NROWS1 175
|
|
Packit |
577717 |
#define NCOLS1 225
|
|
Packit |
577717 |
#define NROWS2 NCOLS1
|
|
Packit |
577717 |
#define NCOLS2 150
|
|
Packit |
577717 |
double p[NROWS1][NCOLS1], q[NROWS2][NCOLS2], r[NROWS1][NCOLS2];
|
|
Packit |
577717 |
int i, j, k, num_events, retval;
|
|
Packit |
577717 |
/* PAPI standardized event to be monitored */
|
|
Packit |
577717 |
int event[2];
|
|
Packit |
577717 |
/* PAPI values of the counters */
|
|
Packit |
577717 |
long long values[2], tmp;
|
|
Packit |
577717 |
int quiet;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
quiet = tests_quiet( argc, argv );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Setup default values */
|
|
Packit |
577717 |
num_events = 0;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* See how many hardware events at one time are supported
|
|
Packit |
577717 |
* This also initializes the PAPI library */
|
|
Packit |
577717 |
num_events = PAPI_num_counters( );
|
|
Packit |
577717 |
if ( num_events < 2 ) {
|
|
Packit |
577717 |
if (!quiet) printf( "This example program requries the architecture to "
|
|
Packit |
577717 |
"support 2 simultaneous hardware events...shutting down.\n" );
|
|
Packit |
577717 |
test_skip( __FILE__, __LINE__, "PAPI_num_counters", 1 );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( !quiet )
|
|
Packit |
577717 |
printf( "Number of hardware counters supported: %d\n", num_events );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( PAPI_query_event( PAPI_FP_OPS ) == PAPI_OK )
|
|
Packit |
577717 |
event[0] = PAPI_FP_OPS;
|
|
Packit |
577717 |
else if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK )
|
|
Packit |
577717 |
event[0] = PAPI_FP_INS;
|
|
Packit |
577717 |
else
|
|
Packit |
577717 |
event[0] = PAPI_TOT_INS;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Time used */
|
|
Packit |
577717 |
event[1] = PAPI_TOT_CYC;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* matrix 1: read in the matrix values */
|
|
Packit |
577717 |
for ( i = 0; i < NROWS1; i++ )
|
|
Packit |
577717 |
for ( j = 0; j < NCOLS1; j++ )
|
|
Packit |
577717 |
p[i][j] = i * j * 1.0;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
for ( i = 0; i < NROWS2; i++ )
|
|
Packit |
577717 |
for ( j = 0; j < NCOLS2; j++ )
|
|
Packit |
577717 |
q[i][j] = i * j * 1.0;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
for ( i = 0; i < NROWS1; i++ )
|
|
Packit |
577717 |
for ( j = 0; j < NCOLS2; j++ )
|
|
Packit |
577717 |
r[i][j] = i * j * 1.0;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Set up the counters */
|
|
Packit |
577717 |
num_events = 2;
|
|
Packit |
577717 |
retval = PAPI_start_counters( event, num_events );
|
|
Packit |
577717 |
if ( retval != PAPI_OK )
|
|
Packit |
577717 |
test_fail( __FILE__, __LINE__, "PAPI_start_counters", retval );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Clear the counter values */
|
|
Packit |
577717 |
retval = PAPI_read_counters( values, num_events );
|
|
Packit |
577717 |
if ( retval != PAPI_OK )
|
|
Packit |
577717 |
test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Compute the matrix-matrix multiplication */
|
|
Packit |
577717 |
for ( i = 0; i < NROWS1; i++ )
|
|
Packit |
577717 |
for ( j = 0; j < NCOLS2; j++ )
|
|
Packit |
577717 |
for ( k = 0; k < NCOLS1; k++ )
|
|
Packit |
577717 |
r[i][j] = r[i][j] + p[i][k] * q[k][j];
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Stop the counters and put the results in the array values */
|
|
Packit |
577717 |
retval = PAPI_stop_counters( values, num_events );
|
|
Packit |
577717 |
if ( retval != PAPI_OK )
|
|
Packit |
577717 |
test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Make sure the compiler does not optimize away the multiplication
|
|
Packit |
577717 |
* with dummy(r);
|
|
Packit |
577717 |
*/
|
|
Packit |
577717 |
dummy( r );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( !quiet ) {
|
|
Packit |
577717 |
if ( event[0] == PAPI_TOT_INS ) {
|
|
Packit |
577717 |
printf( TAB1, "TOT Instructions:", values[0] );
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
printf( TAB1, "FP Instructions:", values[0] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
printf( TAB1, "Cycles:", values[1] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/*
|
|
Packit |
577717 |
* Intel Core overreports flops by 50% when using -O
|
|
Packit |
577717 |
* Use -O2 or -O3 to produce the expected # of flops
|
|
Packit |
577717 |
*/
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( event[0] == PAPI_FP_INS ) {
|
|
Packit |
577717 |
/* Compare measured FLOPS to expected value */
|
|
Packit |
577717 |
tmp =
|
|
Packit |
577717 |
2 * ( long long ) ( NROWS1 ) * ( long long ) ( NCOLS2 ) *
|
|
Packit |
577717 |
( long long ) ( NCOLS1 );
|
|
Packit |
577717 |
if ( abs( ( int ) values[0] - ( int ) tmp ) > ( double ) tmp * 0.05 ) {
|
|
Packit |
577717 |
/* Maybe we are counting FMAs? */
|
|
Packit |
577717 |
tmp = tmp / 2;
|
|
Packit |
577717 |
if ( abs( ( int ) values[0] - ( int ) tmp ) >
|
|
Packit |
577717 |
( double ) tmp * 0.05 ) {
|
|
Packit |
577717 |
printf( "\n" TAB1, "Expected operation count: ", 2 * tmp );
|
|
Packit |
577717 |
printf( TAB1, "Or possibly (using FMA): ", tmp );
|
|
Packit |
577717 |
printf( TAB1, "Instead I got: ", values[0] );
|
|
Packit |
577717 |
test_fail( __FILE__, __LINE__,
|
|
Packit |
577717 |
"Unexpected FLOP count (check vector operations)",
|
|
Packit |
577717 |
1 );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
test_pass( __FILE__ );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
return 0;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
}
|