Blame src/components/cuda/tests/cuda_ld_preload_example.c

Packit 577717
/*
Packit 577717
  Example of using LD_PRELOAD with the CUDA component.  
Packit 577717
  Asim YarKhan
Packit 577717
Packit 577717
  This is designed to work with the simpleMultiGPU_no_counters binary
Packit 577717
  in the PAPI CUDA component tests directory.  First trace the library
Packit 577717
  calls in simpleMultiGPU_no_counters binary using ltrace.  Note in
Packit 577717
  the ltrace output that the CUDA C APIs are different from the CUDA
Packit 577717
  calls visible to nvcc. Then figure out appropriate place to attach
Packit 577717
  the PAPI calls.  The initialization is attached to the first entry
Packit 577717
  to cudaSetDevice.  Each cudaSetDevice is also used to setup the PAPI
Packit 577717
  events for that device.  It was harder to figure out where to attach
Packit 577717
  the PAPI_start.  After running some tests, I attached it to the 18th
Packit 577717
  invocation of gettimeofday (kind of arbitrary! Sorry!).  The
Packit 577717
  PAPI_stop was attached to the first invocation of cudaFreeHost.
Packit 577717
Packit 577717
*/
Packit 577717
Packit 577717
#define _GNU_SOURCE
Packit 577717
Packit 577717
#include <stdio.h>
Packit 577717
#include <dlfcn.h>
Packit 577717
Packit 577717
#include "papi.h"
Packit 577717
Packit 577717
#define MAXDEVICES 5
Packit 577717
int EventSet = PAPI_NULL;
Packit 577717
int devseen[MAXDEVICES] = {0};
Packit 577717
Packit 577717
static void *dl1;
Packit 577717
int (*PAPI_library_init_ptr)(int version); /**< initialize the PAPI library */
Packit 577717
int (*PAPI_create_eventset_ptr)(int *EventSet); /**< create a new empty PAPI event set */
Packit 577717
int (*PAPI_add_named_event_ptr)(int EventSet, char *EventName); /**< add an event by name to a PAPI event set */
Packit 577717
int (*PAPI_start_ptr)(int EventSet); /**< start counting hardware events in an event set */
Packit 577717
int (*PAPI_stop_ptr)(int EventSet, long long * values); /**< stop counting hardware events in an event set and return current events */
Packit 577717
Packit 577717
Packit 577717
int cudaSetDevice(int devnum, int n1, int n2, int n3, void *ptr1) 
Packit 577717
{
Packit 577717
    static int onetime = 0;
Packit 577717
    int retval, retval_cudaSetDevice;
Packit 577717
    //printf("cudaSetDevice wrapper %d\n", devnum);
Packit 577717
    if ( onetime==0 ) {
Packit 577717
        onetime=1;
Packit 577717
        // Load the papi library dynamically and read the relevant functions
Packit 577717
        dl1 = dlopen( "libpapi.so", RTLD_NOW | RTLD_GLOBAL );
Packit 577717
        if ( dl1==NULL ) printf("Intercept cudaSetDevice: Cannot load libpapi.so\n");
Packit 577717
        PAPI_library_init_ptr = dlsym( dl1, "PAPI_library_init" );
Packit 577717
        PAPI_create_eventset_ptr = dlsym( dl1, "PAPI_create_eventset" );
Packit 577717
        PAPI_add_named_event_ptr = dlsym( dl1, "PAPI_add_named_event" );
Packit 577717
        PAPI_start_ptr = dlsym( dl1, "PAPI_start" );
Packit 577717
        PAPI_stop_ptr = dlsym( dl1, "PAPI_stop" );
Packit 577717
        // Start using PAPI
Packit 577717
        printf("Intercept cudaSetDevice: Initializing PAPI on device %d\n", devnum);
Packit 577717
        retval = (PAPI_library_init_ptr)( PAPI_VER_CURRENT );
Packit 577717
        if( retval != PAPI_VER_CURRENT ) fprintf( stdout, "PAPI_library_init failed\n" );
Packit 577717
        printf( "PAPI version: %d.%d.%d\n", PAPI_VERSION_MAJOR( PAPI_VERSION ), PAPI_VERSION_MINOR( PAPI_VERSION ), PAPI_VERSION_REVISION( PAPI_VERSION ) );
Packit 577717
        retval = (PAPI_create_eventset_ptr)( &EventSet );
Packit 577717
        if( retval != PAPI_OK ) fprintf( stdout, "PAPI_create_eventset failed\n" );
Packit 577717
    }
Packit 577717
    int (*original_function)(int devnum, int n1, int n2, int n3, void *ptr1);
Packit 577717
    original_function = dlsym(RTLD_NEXT, "cudaSetDevice");
Packit 577717
    retval_cudaSetDevice = (*original_function)( devnum, n1, n2, n3, ptr1 );
Packit 577717
    if ( devseen[devnum]==0 ) {
Packit 577717
        devseen[devnum]=1;
Packit 577717
        char tmpEventName[120];
Packit 577717
        printf("Intercept cudaSetDevice: Attaching events for device on device %d\n", devnum);
Packit 577717
        snprintf( tmpEventName, 110, "cuda:::device:%d:%s", devnum, "inst_executed" );
Packit 577717
        retval = (PAPI_add_named_event_ptr)( EventSet, tmpEventName );
Packit 577717
        if (retval!=PAPI_OK) printf( "Could not add event %s\n", tmpEventName );
Packit 577717
    }
Packit 577717
    return retval_cudaSetDevice;
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
int gettimeofday(void *ptr1, void *ptr2)
Packit 577717
{
Packit 577717
    static int onetime = 0;
Packit 577717
    onetime++;
Packit 577717
    // printf("gettimeofday onetime %d\n", onetime);
Packit 577717
    // Use above print statement to determine that the N-th gettime of day works
Packit 577717
    if ( onetime==17 ) {
Packit 577717
        printf("Intercept gettimeofday: Attaching PAPI_start to the %d th call to gettimeofday (this may need to be adjusted)\n", onetime);
Packit 577717
        int retval = (PAPI_start_ptr)( EventSet );
Packit 577717
        printf("Starting PAPI\n");
Packit 577717
        if( retval!=PAPI_OK ) fprintf( stdout, "PAPI_start failed\n" );
Packit 577717
    }
Packit 577717
    int (*original_function)(void *ptr1, void *ptr2);
Packit 577717
    original_function = dlsym(RTLD_NEXT, "gettimeofday");
Packit 577717
    return (*original_function)(ptr1, ptr2);
Packit 577717
}
Packit 577717
Packit 577717
int cudaFreeHost(void *ptr1, void *ptr2, int n1, int n2, void *ptr3) 
Packit 577717
{
Packit 577717
    static int onetime = 0;
Packit 577717
    long long values[10];
Packit 577717
    int retval, devnum;
Packit 577717
    onetime++;
Packit 577717
    if ( onetime==1 ) {
Packit 577717
        printf("Intercept cudaFreeHost: Used to get PAPI results\n" );
Packit 577717
        retval = (PAPI_stop_ptr)( EventSet, values );
Packit 577717
        if( retval != PAPI_OK )  fprintf( stderr, "PAPI_stop failed\n" );
Packit 577717
        for( devnum = 0; devnum < MAXDEVICES && devseen[devnum]==1  ; devnum++ )
Packit 577717
            printf( "PAPI counterValue: cuda::device:%d:%s: %12lld \n", devnum, "inst_executed", values[devnum] );
Packit 577717
    }
Packit 577717
    int (*original_function)(void *ptr1, void *ptr2, int n1, int n2, void *ptr3);
Packit 577717
    original_function = dlsym(RTLD_NEXT, "cudaFreeHost");
Packit 577717
    return (*original_function)(ptr1, ptr2, n1, n2, ptr3);
Packit 577717
}
Packit 577717