|
Packit |
577717 |
/****************************/
|
|
Packit |
577717 |
/* THIS IS OPEN SOURCE CODE */
|
|
Packit |
577717 |
/****************************/
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/**
|
|
Packit |
577717 |
* @file HelloWorld.c
|
|
Packit |
577717 |
* @author Heike Jagode
|
|
Packit |
577717 |
* jagode@eecs.utk.edu
|
|
Packit |
577717 |
* Mods: <your name here>
|
|
Packit |
577717 |
* <your email address>
|
|
Packit |
577717 |
* test case for Example component
|
|
Packit |
577717 |
*
|
|
Packit |
577717 |
*
|
|
Packit |
577717 |
* @brief
|
|
Packit |
577717 |
* This file is a very simple HelloWorld C example which serves (together
|
|
Packit |
577717 |
* with its Makefile) as a guideline on how to add tests to components.
|
|
Packit |
577717 |
* The papi configure and papi Makefile will take care of the compilation
|
|
Packit |
577717 |
* of the component tests (if all tests are added to a directory named
|
|
Packit |
577717 |
* 'tests' in the specific component dir).
|
|
Packit |
577717 |
* See components/README for more details.
|
|
Packit |
577717 |
*
|
|
Packit |
577717 |
* The string "Hello World!" is mangled and then restored.
|
|
Packit |
577717 |
*/
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#include <cuda.h>
|
|
Packit |
577717 |
#include <stdio.h>
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#include "papi.h"
|
|
Packit |
577717 |
#include "papi_test.h"
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#define NUM_EVENTS 1
|
|
Packit |
577717 |
#define PAPI 1
|
|
Packit |
577717 |
|
|
Packit |
577717 |
// Prototypes
|
|
Packit |
577717 |
__global__ void helloWorld(char*);
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
// Host function
|
|
Packit |
577717 |
int main(int argc, char** argv)
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
#ifdef PAPI
|
|
Packit |
577717 |
int retval, i;
|
|
Packit |
577717 |
int EventSet = PAPI_NULL;
|
|
Packit |
577717 |
long long values[NUM_EVENTS];
|
|
Packit |
577717 |
/* REPLACE THE EVENT NAME 'PAPI_FP_OPS' WITH A CUDA EVENT
|
|
Packit |
577717 |
FOR THE CUDA DEVICE YOU ARE RUNNING ON.
|
|
Packit |
577717 |
RUN papi_native_avail to get a list of CUDA events that are
|
|
Packit |
577717 |
supported on your machine */
|
|
Packit |
577717 |
//char *EventName[] = { "PAPI_FP_OPS" };
|
|
Packit |
577717 |
char const *EventName[] = { "cuda:::event:elapsed_cycles_sm:device=0" };
|
|
Packit |
577717 |
int events[NUM_EVENTS];
|
|
Packit |
577717 |
int eventCount = 0;
|
|
Packit |
577717 |
int quiet;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Set TESTS_QUIET variable */
|
|
Packit |
577717 |
quiet=tests_quiet( argc, argv );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* PAPI Initialization */
|
|
Packit |
577717 |
retval = PAPI_library_init( PAPI_VER_CURRENT );
|
|
Packit |
577717 |
if( retval != PAPI_VER_CURRENT ) {
|
|
Packit |
577717 |
if (!quiet) printf("PAPI init failed\n");
|
|
Packit |
577717 |
test_fail(__FILE__,__LINE__,
|
|
Packit |
577717 |
"PAPI_library_init failed", 0 );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if (!quiet) {
|
|
Packit |
577717 |
printf( "PAPI_VERSION : %4d %6d %7d\n",
|
|
Packit |
577717 |
PAPI_VERSION_MAJOR( PAPI_VERSION ),
|
|
Packit |
577717 |
PAPI_VERSION_MINOR( PAPI_VERSION ),
|
|
Packit |
577717 |
PAPI_VERSION_REVISION( PAPI_VERSION ) );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* convert PAPI native events to PAPI code */
|
|
Packit |
577717 |
for( i = 0; i < NUM_EVENTS; i++ ){
|
|
Packit |
577717 |
retval = PAPI_event_name_to_code( (char *)EventName[i], &events[i] );
|
|
Packit |
577717 |
if( retval != PAPI_OK ) {
|
|
Packit |
577717 |
fprintf( stderr, "PAPI_event_name_to_code failed\n" );
|
|
Packit |
577717 |
continue;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
eventCount++;
|
|
Packit |
577717 |
if (!quiet) printf( "Name %s --- Code: %#x\n", EventName[i], events[i] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* if we did not find any valid events, just report test failed. */
|
|
Packit |
577717 |
if (eventCount == 0) {
|
|
Packit |
577717 |
if (!quiet) printf( "Test FAILED: no valid events found.\n");
|
|
Packit |
577717 |
test_skip(__FILE__,__LINE__,"No events found",0);
|
|
Packit |
577717 |
return 1;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
retval = PAPI_create_eventset( &EventSet );
|
|
Packit |
577717 |
if( retval != PAPI_OK ) {
|
|
Packit |
577717 |
if (!quiet) printf( "PAPI_create_eventset failed\n" );
|
|
Packit |
577717 |
test_fail(__FILE__,__LINE__,"Cannot create eventset",retval);
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
// If multiple GPUs/contexts were being used,
|
|
Packit |
577717 |
// you need to switch to each device before adding its events
|
|
Packit |
577717 |
// e.g. cudaSetDevice( 0 );
|
|
Packit |
577717 |
retval = PAPI_add_events( EventSet, events, eventCount );
|
|
Packit |
577717 |
if( retval != PAPI_OK ) {
|
|
Packit |
577717 |
fprintf( stderr, "PAPI_add_events failed\n" );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
retval = PAPI_start( EventSet );
|
|
Packit |
577717 |
if( retval != PAPI_OK ) {
|
|
Packit |
577717 |
fprintf( stderr, "PAPI_start failed\n" );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
int j;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
// desired output
|
|
Packit |
577717 |
char str[] = "Hello World!";
|
|
Packit |
577717 |
|
|
Packit |
577717 |
// mangle contents of output
|
|
Packit |
577717 |
// the null character is left intact for simplicity
|
|
Packit |
577717 |
for(j = 0; j < 12; j++) {
|
|
Packit |
577717 |
str[j] -= j;
|
|
Packit |
577717 |
//printf("str=%s\n", str);
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
// allocate memory on the device
|
|
Packit |
577717 |
char *d_str;
|
|
Packit |
577717 |
size_t size = sizeof(str);
|
|
Packit |
577717 |
cudaMalloc((void**)&d_str, size);
|
|
Packit |
577717 |
|
|
Packit |
577717 |
// copy the string to the device
|
|
Packit |
577717 |
cudaMemcpy(d_str, str, size, cudaMemcpyHostToDevice);
|
|
Packit |
577717 |
|
|
Packit |
577717 |
// set the grid and block sizes
|
|
Packit |
577717 |
dim3 dimGrid(2); // one block per word
|
|
Packit |
577717 |
dim3 dimBlock(6); // one thread per character
|
|
Packit |
577717 |
|
|
Packit |
577717 |
// invoke the kernel
|
|
Packit |
577717 |
helloWorld<<< dimGrid, dimBlock >>>(d_str);
|
|
Packit |
577717 |
|
|
Packit |
577717 |
// retrieve the results from the device
|
|
Packit |
577717 |
cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost);
|
|
Packit |
577717 |
|
|
Packit |
577717 |
// free up the allocated memory on the device
|
|
Packit |
577717 |
cudaFree(d_str);
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if (!quiet) printf("END: %s\n", str);
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#ifdef PAPI
|
|
Packit |
577717 |
retval = PAPI_stop( EventSet, values );
|
|
Packit |
577717 |
if( retval != PAPI_OK )
|
|
Packit |
577717 |
fprintf( stderr, "PAPI_stop failed\n" );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
retval = PAPI_cleanup_eventset(EventSet);
|
|
Packit |
577717 |
if( retval != PAPI_OK )
|
|
Packit |
577717 |
fprintf(stderr, "PAPI_cleanup_eventset failed\n");
|
|
Packit |
577717 |
|
|
Packit |
577717 |
retval = PAPI_destroy_eventset(&EventSet);
|
|
Packit |
577717 |
if (retval != PAPI_OK)
|
|
Packit |
577717 |
fprintf(stderr, "PAPI_destroy_eventset failed\n");
|
|
Packit |
577717 |
|
|
Packit |
577717 |
PAPI_shutdown();
|
|
Packit |
577717 |
|
|
Packit |
577717 |
for( i = 0; i < eventCount; i++ )
|
|
Packit |
577717 |
if (!quiet) printf( "%12lld \t\t --> %s \n", values[i], EventName[i] );
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
|
|
Packit |
577717 |
test_pass(__FILE__);
|
|
Packit |
577717 |
|
|
Packit |
577717 |
return 0;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
// Device kernel
|
|
Packit |
577717 |
__global__ void
|
|
Packit |
577717 |
helloWorld(char* str)
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
// determine where in the thread grid we are
|
|
Packit |
577717 |
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
Packit |
577717 |
// unmangle output
|
|
Packit |
577717 |
str[idx] += idx;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|