|
Packit |
577717 |
/*
|
|
Packit |
577717 |
Example of using LD_PRELOAD with the CUDA component.
|
|
Packit |
577717 |
Asim YarKhan
|
|
Packit |
577717 |
|
|
Packit |
577717 |
This is designed to work with the simpleMultiGPU_no_counters binary
|
|
Packit |
577717 |
in the PAPI CUDA component tests directory. First trace the library
|
|
Packit |
577717 |
calls in simpleMultiGPU_no_counters binary using ltrace. Note in
|
|
Packit |
577717 |
the ltrace output that the CUDA C APIs are different from the CUDA
|
|
Packit |
577717 |
calls visible to nvcc. Then figure out appropriate place to attach
|
|
Packit |
577717 |
the PAPI calls. The initialization is attached to the first entry
|
|
Packit |
577717 |
to cudaSetDevice. Each cudaSetDevice is also used to setup the PAPI
|
|
Packit |
577717 |
events for that device. It was harder to figure out where to attach
|
|
Packit |
577717 |
the PAPI_start. After running some tests, I attached it to the 18th
|
|
Packit |
577717 |
invocation of gettimeofday (kind of arbitrary! Sorry!). The
|
|
Packit |
577717 |
PAPI_stop was attached to the first invocation of cudaFreeHost.
|
|
Packit |
577717 |
|
|
Packit |
577717 |
*/
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#define _GNU_SOURCE
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#include <stdio.h>
|
|
Packit |
577717 |
#include <dlfcn.h>
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#include "papi.h"
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#define MAXDEVICES 5
|
|
Packit |
577717 |
int EventSet = PAPI_NULL;
|
|
Packit |
577717 |
int devseen[MAXDEVICES] = {0};
|
|
Packit |
577717 |
|
|
Packit |
577717 |
static void *dl1;
|
|
Packit |
577717 |
int (*PAPI_library_init_ptr)(int version); /**< initialize the PAPI library */
|
|
Packit |
577717 |
int (*PAPI_create_eventset_ptr)(int *EventSet); /**< create a new empty PAPI event set */
|
|
Packit |
577717 |
int (*PAPI_add_named_event_ptr)(int EventSet, char *EventName); /**< add an event by name to a PAPI event set */
|
|
Packit |
577717 |
int (*PAPI_start_ptr)(int EventSet); /**< start counting hardware events in an event set */
|
|
Packit |
577717 |
int (*PAPI_stop_ptr)(int EventSet, long long * values); /**< stop counting hardware events in an event set and return current events */
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
int cudaSetDevice(int devnum, int n1, int n2, int n3, void *ptr1)
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
static int onetime = 0;
|
|
Packit |
577717 |
int retval, retval_cudaSetDevice;
|
|
Packit |
577717 |
//printf("cudaSetDevice wrapper %d\n", devnum);
|
|
Packit |
577717 |
if ( onetime==0 ) {
|
|
Packit |
577717 |
onetime=1;
|
|
Packit |
577717 |
// Load the papi library dynamically and read the relevant functions
|
|
Packit |
577717 |
dl1 = dlopen( "libpapi.so", RTLD_NOW | RTLD_GLOBAL );
|
|
Packit |
577717 |
if ( dl1==NULL ) printf("Intercept cudaSetDevice: Cannot load libpapi.so\n");
|
|
Packit |
577717 |
PAPI_library_init_ptr = dlsym( dl1, "PAPI_library_init" );
|
|
Packit |
577717 |
PAPI_create_eventset_ptr = dlsym( dl1, "PAPI_create_eventset" );
|
|
Packit |
577717 |
PAPI_add_named_event_ptr = dlsym( dl1, "PAPI_add_named_event" );
|
|
Packit |
577717 |
PAPI_start_ptr = dlsym( dl1, "PAPI_start" );
|
|
Packit |
577717 |
PAPI_stop_ptr = dlsym( dl1, "PAPI_stop" );
|
|
Packit |
577717 |
// Start using PAPI
|
|
Packit |
577717 |
printf("Intercept cudaSetDevice: Initializing PAPI on device %d\n", devnum);
|
|
Packit |
577717 |
retval = (PAPI_library_init_ptr)( PAPI_VER_CURRENT );
|
|
Packit |
577717 |
if( retval != PAPI_VER_CURRENT ) fprintf( stdout, "PAPI_library_init failed\n" );
|
|
Packit |
577717 |
printf( "PAPI version: %d.%d.%d\n", PAPI_VERSION_MAJOR( PAPI_VERSION ), PAPI_VERSION_MINOR( PAPI_VERSION ), PAPI_VERSION_REVISION( PAPI_VERSION ) );
|
|
Packit |
577717 |
retval = (PAPI_create_eventset_ptr)( &EventSet );
|
|
Packit |
577717 |
if( retval != PAPI_OK ) fprintf( stdout, "PAPI_create_eventset failed\n" );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
int (*original_function)(int devnum, int n1, int n2, int n3, void *ptr1);
|
|
Packit |
577717 |
original_function = dlsym(RTLD_NEXT, "cudaSetDevice");
|
|
Packit |
577717 |
retval_cudaSetDevice = (*original_function)( devnum, n1, n2, n3, ptr1 );
|
|
Packit |
577717 |
if ( devseen[devnum]==0 ) {
|
|
Packit |
577717 |
devseen[devnum]=1;
|
|
Packit |
577717 |
char tmpEventName[120];
|
|
Packit |
577717 |
printf("Intercept cudaSetDevice: Attaching events for device on device %d\n", devnum);
|
|
Packit |
577717 |
snprintf( tmpEventName, 110, "cuda:::device:%d:%s", devnum, "inst_executed" );
|
|
Packit |
577717 |
retval = (PAPI_add_named_event_ptr)( EventSet, tmpEventName );
|
|
Packit |
577717 |
if (retval!=PAPI_OK) printf( "Could not add event %s\n", tmpEventName );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
return retval_cudaSetDevice;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
int gettimeofday(void *ptr1, void *ptr2)
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
static int onetime = 0;
|
|
Packit |
577717 |
onetime++;
|
|
Packit |
577717 |
// printf("gettimeofday onetime %d\n", onetime);
|
|
Packit |
577717 |
// Use above print statement to determine that the N-th gettime of day works
|
|
Packit |
577717 |
if ( onetime==17 ) {
|
|
Packit |
577717 |
printf("Intercept gettimeofday: Attaching PAPI_start to the %d th call to gettimeofday (this may need to be adjusted)\n", onetime);
|
|
Packit |
577717 |
int retval = (PAPI_start_ptr)( EventSet );
|
|
Packit |
577717 |
printf("Starting PAPI\n");
|
|
Packit |
577717 |
if( retval!=PAPI_OK ) fprintf( stdout, "PAPI_start failed\n" );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
int (*original_function)(void *ptr1, void *ptr2);
|
|
Packit |
577717 |
original_function = dlsym(RTLD_NEXT, "gettimeofday");
|
|
Packit |
577717 |
return (*original_function)(ptr1, ptr2);
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
int cudaFreeHost(void *ptr1, void *ptr2, int n1, int n2, void *ptr3)
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
static int onetime = 0;
|
|
Packit |
577717 |
long long values[10];
|
|
Packit |
577717 |
int retval, devnum;
|
|
Packit |
577717 |
onetime++;
|
|
Packit |
577717 |
if ( onetime==1 ) {
|
|
Packit |
577717 |
printf("Intercept cudaFreeHost: Used to get PAPI results\n" );
|
|
Packit |
577717 |
retval = (PAPI_stop_ptr)( EventSet, values );
|
|
Packit |
577717 |
if( retval != PAPI_OK ) fprintf( stderr, "PAPI_stop failed\n" );
|
|
Packit |
577717 |
for( devnum = 0; devnum < MAXDEVICES && devseen[devnum]==1 ; devnum++ )
|
|
Packit |
577717 |
printf( "PAPI counterValue: cuda::device:%d:%s: %12lld \n", devnum, "inst_executed", values[devnum] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
int (*original_function)(void *ptr1, void *ptr2, int n1, int n2, void *ptr3);
|
|
Packit |
577717 |
original_function = dlsym(RTLD_NEXT, "cudaFreeHost");
|
|
Packit |
577717 |
return (*original_function)(ptr1, ptr2, n1, n2, ptr3);
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|