/****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /** * @file HelloWorld.c * CVS: $Id$ * @author Heike Jagode * jagode@eecs.utk.edu * Mods: * * test case for Example component * * * @brief * This file is a very simple HelloWorld C example which serves (together * with its Makefile) as a guideline on how to add tests to components. * The papi configure and papi Makefile will take care of the compilation * of the component tests (if all tests are added to a directory named * 'tests' in the specific component dir). * See components/README for more details. * * The string "Hello World!" is mangled and then restored. */ #include #include #include "papi.h" #include "papi_test.h" #define NUM_EVENTS 1 #define PAPI // Prototypes __global__ void helloWorld(char*); // Host function int main(int argc, char** argv) { #ifdef PAPI int retval, i; int EventSet = PAPI_NULL; long long values[NUM_EVENTS]; /* REPLACE THE EVENT NAME 'PAPI_FP_OPS' WITH A CUDA EVENT FOR THE CUDA DEVICE YOU ARE RUNNING ON. RUN papi_native_avail to get a list of CUDA events that are supported on your machine */ // e.g. on a P100 nvml:::Tesla_P100-SXM2-16GB:power char *EventName[] = { "PAPI_FP_OPS" }; int events[NUM_EVENTS]; int eventCount = 0; /* PAPI Initialization */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if( retval != PAPI_VER_CURRENT ) fprintf( stderr, "PAPI_library_init failed\n" ); printf( "PAPI_VERSION : %4d %6d %7d\n", PAPI_VERSION_MAJOR( PAPI_VERSION ), PAPI_VERSION_MINOR( PAPI_VERSION ), PAPI_VERSION_REVISION( PAPI_VERSION ) ); /* convert PAPI native events to PAPI code */ for( i = 0; i < NUM_EVENTS; i++ ){ retval = PAPI_event_name_to_code( EventName[i], &events[i] ); if( retval != PAPI_OK ) { fprintf( stderr, "PAPI_event_name_to_code failed\n" ); continue; } eventCount++; printf( "Name %s --- Code: %#x\n", EventName[i], events[i] ); } /* if we did not find any valid events, just report test failed. */ if (eventCount == 0) { printf( "Test FAILED: no valid events found.\n"); return 1; } retval = PAPI_create_eventset( &EventSet ); if( retval != PAPI_OK ) fprintf( stderr, "PAPI_create_eventset failed\n" ); retval = PAPI_add_events( EventSet, events, eventCount ); if( retval != PAPI_OK ) fprintf( stderr, "PAPI_add_events failed\n" ); #endif int j; int count; int cuda_device; cudaGetDeviceCount( &count ); for ( cuda_device = 0; cuda_device < count; cuda_device++ ) { cudaSetDevice( cuda_device ); #ifdef PAPI retval = PAPI_start( EventSet ); if( retval != PAPI_OK ) fprintf( stderr, "PAPI_start failed\n" ); #endif // desired output char str[] = "Hello World!"; // mangle contents of output // the null character is left intact for simplicity for(j = 0; j < 12; j++) { str[j] -= j; //printf("str=%s\n", str); } // allocate memory on the device char *d_str; size_t size = sizeof(str); cudaMalloc((void**)&d_str, size); // copy the string to the device cudaMemcpy(d_str, str, size, cudaMemcpyHostToDevice); // set the grid and block sizes dim3 dimGrid(2); // one block per word dim3 dimBlock(6); // one thread per character // invoke the kernel helloWorld<<< dimGrid, dimBlock >>>(d_str); // retrieve the results from the device cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost); // free up the allocated memory on the device cudaFree(d_str); printf("END: %s\n", str); #ifdef PAPI retval = PAPI_stop( EventSet, values ); if( retval != PAPI_OK ) fprintf( stderr, "PAPI_stop failed\n" ); for( i = 0; i < eventCount; i++ ) printf( "On device %d: %12lld \t\t --> %s \n", cuda_device, values[i], EventName[i] ); #endif } return 0; } // Device kernel __global__ void helloWorld(char* str) { // determine where in the thread grid we are int idx = blockIdx.x * blockDim.x + threadIdx.x; // unmangle output str[idx] += idx; }