Blame src/components/cuda/sampling/activity.c

Packit 577717
/*
Packit 577717
 * Author : Sangamesh Ragate
Packit 577717
 * Date : 18th Nov 2015
Packit 577717
 * ICl-UTK
Packit 577717
 * Description : This is the shared library that sets up the environent 
Packit 577717
 * for the cuda application by creating the context and keeping it ready
Packit 577717
 * to perform PC sampling of the cuda application as soon as it launces the kernel
Packit 577717
 */
Packit 577717
Packit 577717
Packit 577717
Packit 577717
#include <cuda.h>
Packit 577717
#include <cupti.h>
Packit 577717
#include <stdio.h>
Packit 577717
#include <stdlib.h>
Packit 577717
#include <string.h>
Packit 577717
Packit 577717
static CUpti_SubscriberHandle g_subscriber;
Packit 577717
Packit 577717
Packit 577717
#define RUNTIME_API_CALL(apiFuncCall)                                          \
Packit 577717
do {                                                                           \
Packit 577717
    cudaError_t _status = apiFuncCall;                                         \
Packit 577717
    if (_status != cudaSuccess) {                                              \
Packit 577717
        fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n",   \
Packit 577717
                __FILE__, __LINE__, #apiFuncCall, cudaGetErrorString(_status));\
Packit 577717
        exit(-1);                                                              \
Packit 577717
    }                                                                          \
Packit 577717
} while (0)
Packit 577717
Packit 577717
#define CUPTI_CALL(call)                                                      \
Packit 577717
do {                                                                          \
Packit 577717
    CUptiResult _status = call;                                               \
Packit 577717
    if (_status != CUPTI_SUCCESS) {                                           \
Packit 577717
        const char *errstr;                                                   \
Packit 577717
        cuptiGetResultString(_status, &errstr);                               \
Packit 577717
        fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n",  \
Packit 577717
                __FILE__, __LINE__, #call, errstr);                           \
Packit 577717
        exit(-1);                                                             \
Packit 577717
    }                                                                         \
Packit 577717
} while (0)
Packit 577717
Packit 577717
#define BUF_SIZE (32 * 16384)
Packit 577717
#define ALIGN_SIZE (8)
Packit 577717
Packit 577717
static char* stall_name[12];
Packit 577717
static int val[12]={0};
Packit 577717
Packit 577717
	
Packit 577717
static const char *
Packit 577717
getStallReasonString(CUpti_ActivityPCSamplingStallReason reason,unsigned int samples)
Packit 577717
{
Packit 577717
    switch (reason) {
Packit 577717
    case CUPTI_ACTIVITY_PC_SAMPLING_STALL_INVALID:
Packit 577717
		stall_name[0]="Stall_invalid";
Packit 577717
		val[0] += samples;
Packit 577717
        return "Invalid";
Packit 577717
    case CUPTI_ACTIVITY_PC_SAMPLING_STALL_NONE:
Packit 577717
		stall_name[1]="Stall_none";
Packit 577717
		val[1] += samples;
Packit 577717
        return "Selected";
Packit 577717
    case CUPTI_ACTIVITY_PC_SAMPLING_STALL_INST_FETCH:
Packit 577717
		stall_name[2]="Stall_inst_fetch";
Packit 577717
		val[2] += samples;
Packit 577717
        return "Instruction fetch";
Packit 577717
    case CUPTI_ACTIVITY_PC_SAMPLING_STALL_EXEC_DEPENDENCY:
Packit 577717
		stall_name[3]="Stall_exec_dependency";
Packit 577717
		val[3] += samples;
Packit 577717
        return "Execution dependency";
Packit 577717
    case CUPTI_ACTIVITY_PC_SAMPLING_STALL_MEMORY_DEPENDENCY:
Packit 577717
		stall_name[4]="Stall_mem_dependency";
Packit 577717
		val[4] += samples;
Packit 577717
        return "Memory dependency";
Packit 577717
    case CUPTI_ACTIVITY_PC_SAMPLING_STALL_TEXTURE:
Packit 577717
		stall_name[5]="Stall_texture";
Packit 577717
		val[5] += samples;
Packit 577717
        return "Texture";
Packit 577717
    case CUPTI_ACTIVITY_PC_SAMPLING_STALL_SYNC:
Packit 577717
		stall_name[6]="Stall_sync";
Packit 577717
		val[6] += samples;
Packit 577717
        return "Sync";
Packit 577717
    case CUPTI_ACTIVITY_PC_SAMPLING_STALL_CONSTANT_MEMORY_DEPENDENCY:
Packit 577717
		stall_name[7]="Stall_const_mem_dependency";
Packit 577717
		val[7] += samples;
Packit 577717
        return "Constant memory dependency";
Packit 577717
    case CUPTI_ACTIVITY_PC_SAMPLING_STALL_PIPE_BUSY:
Packit 577717
		stall_name[8]="Stall_pipe_busy";
Packit 577717
		val[8] += samples;
Packit 577717
        return "Pipe busy";
Packit 577717
    case CUPTI_ACTIVITY_PC_SAMPLING_STALL_MEMORY_THROTTLE:
Packit 577717
		stall_name[9]="Stall_memory_throttle";
Packit 577717
		val[9] += samples;
Packit 577717
        return "Memory throttle";
Packit 577717
    case CUPTI_ACTIVITY_PC_SAMPLING_STALL_NOT_SELECTED:
Packit 577717
		stall_name[10]="Stall_warp_not_selected";
Packit 577717
		val[10] += samples;
Packit 577717
        return "Warp Not selected";
Packit 577717
    case CUPTI_ACTIVITY_PC_SAMPLING_STALL_OTHER:
Packit 577717
		stall_name[11]="Stall_other";
Packit 577717
		val[11] += samples;
Packit 577717
        return "Other";
Packit 577717
    default:
Packit 577717
        break;
Packit 577717
    }
Packit 577717
Packit 577717
    return NULL;
Packit 577717
}
Packit 577717
Packit 577717
static void
Packit 577717
printActivity(CUpti_Activity *record)
Packit 577717
{
Packit 577717
    switch (record->kind) {
Packit 577717
        case CUPTI_ACTIVITY_KIND_SOURCE_LOCATOR:
Packit 577717
        {
Packit 577717
            CUpti_ActivitySourceLocator *sourceLocator = (CUpti_ActivitySourceLocator *)record;
Packit 577717
            printf("Source Locator Id %d, File %s Line %d\n", sourceLocator->id, sourceLocator->fileName, sourceLocator->lineNumber);
Packit 577717
            break;
Packit 577717
        }
Packit 577717
        case CUPTI_ACTIVITY_KIND_PC_SAMPLING:
Packit 577717
        {
Packit 577717
            CUpti_ActivityPCSampling *psRecord = (CUpti_ActivityPCSampling *)record;
Packit 577717
            printf("source %u, functionId %u, pc 0x%x, corr %u, samples %u, stallreason %s\n",
Packit 577717
                  psRecord->sourceLocatorId,
Packit 577717
                  psRecord->functionId,
Packit 577717
                  psRecord->pcOffset,
Packit 577717
                  psRecord->correlationId,
Packit 577717
                  psRecord->samples,
Packit 577717
                  getStallReasonString(psRecord->stallReason,psRecord->samples));
Packit 577717
                  break;
Packit 577717
        }
Packit 577717
        case CUPTI_ACTIVITY_KIND_PC_SAMPLING_RECORD_INFO:
Packit 577717
        {
Packit 577717
            CUpti_ActivityPCSamplingRecordInfo *pcsriResult =
Packit 577717
                                (CUpti_ActivityPCSamplingRecordInfo *)(void *)record;
Packit 577717
			
Packit 577717
			printf("\n\n************** PC_SAMPLING_RECORD_SUMMARY ************************\n");
Packit 577717
            printf("corr %u, totalSamples %llu, droppedSamples %llu, sampling period %llu\n",
Packit 577717
                  pcsriResult->correlationId,
Packit 577717
                  (unsigned long long)pcsriResult->totalSamples,
Packit 577717
                  (unsigned long long)pcsriResult->droppedSamples,
Packit 577717
				  (unsigned long long)pcsriResult->samplingPeriodInCycles);
Packit 577717
            break;
Packit 577717
        }
Packit 577717
        case CUPTI_ACTIVITY_KIND_FUNCTION:
Packit 577717
        {
Packit 577717
            CUpti_ActivityFunction *fResult =
Packit 577717
                (CUpti_ActivityFunction *)record;
Packit 577717
Packit 577717
			printf("\n\n************************************ ACTIVITY_KIND_FUNCTION_SUMMARY **********************************\n");
Packit 577717
            printf("id %u, ctx %u, moduleId %u, functionIndex %u, name %s\n",
Packit 577717
                fResult->id,
Packit 577717
                fResult->contextId,
Packit 577717
                fResult->moduleId,
Packit 577717
                fResult->functionIndex,
Packit 577717
                fResult->name);
Packit 577717
			printf("\n\n\n\n**************************************************************************************************\n");
Packit 577717
            break;
Packit 577717
        }
Packit 577717
		case CUPTI_ACTIVITY_KIND_KERNEL:
Packit 577717
		{
Packit 577717
			CUpti_ActivityKernel3 *kernel = (CUpti_ActivityKernel3 *)record;
Packit 577717
			printf("\n\n************************************** KERNEL_RECORD_SUMMARY **********************************\n");
Packit 577717
			printf("Kernel %s , device %d, context %d, correlation %d, stream %d,[start-end][%ld-%ld]\n\n",kernel->name, 
Packit 577717
					kernel->deviceId,kernel->contextId,kernel->correlationId,kernel->streamId,kernel->start,kernel->end);
Packit 577717
			break;
Packit 577717
		}
Packit 577717
Packit 577717
		default:
Packit 577717
            printf("\n");
Packit 577717
            break;
Packit 577717
    }
Packit 577717
}
Packit 577717
Packit 577717
static void CUPTIAPI
Packit 577717
bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords)
Packit 577717
{
Packit 577717
    *size = BUF_SIZE + ALIGN_SIZE;
Packit 577717
    *buffer = (uint8_t*) calloc(1, *size);
Packit 577717
    *maxNumRecords = 0;
Packit 577717
    if (*buffer == NULL) {
Packit 577717
        printf("Error: out of memory\n");
Packit 577717
        exit(-1);
Packit 577717
    }
Packit 577717
}
Packit 577717
Packit 577717
static void CUPTIAPI
Packit 577717
bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize)
Packit 577717
{
Packit 577717
    CUptiResult status;
Packit 577717
    CUpti_Activity *record = NULL;
Packit 577717
    do {
Packit 577717
        status = cuptiActivityGetNextRecord(buffer, validSize, &record);
Packit 577717
        if(status == CUPTI_SUCCESS) {
Packit 577717
            printActivity(record);
Packit 577717
        }
Packit 577717
        else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) {
Packit 577717
            break;
Packit 577717
        }
Packit 577717
        else {
Packit 577717
            CUPTI_CALL(status);
Packit 577717
        }
Packit 577717
    } while (1);
Packit 577717
Packit 577717
    size_t dropped;
Packit 577717
    CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped));
Packit 577717
    if (dropped != 0) {
Packit 577717
        printf("Dropped %u activity records\n", (unsigned int)dropped);
Packit 577717
    }
Packit 577717
	printf("\n\n\n\n\n\n");
Packit 577717
	printf("************* STALL SUMMARY ********************\n");
Packit 577717
	int i;
Packit 577717
	for(i=0;i<12;++i)
Packit 577717
		if(stall_name[i] != NULL)
Packit 577717
			printf("%s = %d \n",stall_name[i],val[i]);
Packit 577717
	printf("*************************************************\n\n");
Packit 577717
Packit 577717
Packit 577717
}
Packit 577717
Packit 577717
#define DUMP_CUBIN 1
Packit 577717
Packit 577717
void CUPTIAPI dumpCudaModule(CUpti_CallbackId cbid, void *resourceDescriptor)
Packit 577717
{
Packit 577717
#if DUMP_CUBIN
Packit 577717
	  const char *pCubin;
Packit 577717
	  size_t cubinSize;
Packit 577717
Packit 577717
		  
Packit 577717
	  //dump the cubin at MODULE_LOADED_STARTING
Packit 577717
	  CUpti_ModuleResourceData *moduleResourceData = (CUpti_ModuleResourceData *)resourceDescriptor;
Packit 577717
	  #endif
Packit 577717
		  
Packit 577717
	  if (cbid == CUPTI_CBID_RESOURCE_MODULE_LOADED) {
Packit 577717
		  #if DUMP_CUBIN
Packit 577717
		  // You can use nvdisasm to dump the SASS from the cubin. 
Packit 577717
		  // Try nvdisasm -b -fun <function_id> sass_to_source.cubin
Packit 577717
		  pCubin = moduleResourceData->pCubin;
Packit 577717
		  cubinSize = moduleResourceData->cubinSize;
Packit 577717
			  
Packit 577717
		  FILE *cubin;
Packit 577717
		  cubin = fopen("sass_source_map.cubin", "wb");
Packit 577717
		  fwrite(pCubin, sizeof(uint8_t), cubinSize, cubin);
Packit 577717
		  fclose(cubin);
Packit 577717
		  #endif
Packit 577717
	  }else if (cbid == CUPTI_CBID_RESOURCE_MODULE_UNLOAD_STARTING) {
Packit 577717
	  // You can dump the cubin either at MODULE_LOADED or MODULE_UNLOAD_STARTING
Packit 577717
	  }
Packit 577717
}
Packit 577717
Packit 577717
static void
Packit 577717
handleResource(CUpti_CallbackId cbid, const CUpti_ResourceData *resourceData)
Packit 577717
{
Packit 577717
	  if (cbid == CUPTI_CBID_RESOURCE_MODULE_LOADED) {
Packit 577717
		    dumpCudaModule(cbid, resourceData->resourceDescriptor);
Packit 577717
	  }else if (cbid == CUPTI_CBID_RESOURCE_MODULE_UNLOAD_STARTING) {
Packit 577717
			dumpCudaModule(cbid, resourceData->resourceDescriptor);
Packit 577717
	  }
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
static void CUPTIAPI
Packit 577717
traceCallback(void *userdata, CUpti_CallbackDomain domain,
Packit 577717
		              CUpti_CallbackId cbid, const void *cbdata)
Packit 577717
{
Packit 577717
	  if (domain == CUPTI_CB_DOMAIN_RESOURCE) {
Packit 577717
		 handleResource(cbid, (CUpti_ResourceData *)cbdata);
Packit 577717
	  }
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
__attribute__((constructor)) void
Packit 577717
initTrace()
Packit 577717
{
Packit 577717
	//get the arguments from the environment variables
Packit 577717
	int deviceId, sampRate;
Packit 577717
	
Packit 577717
    CUcontext cuCtx;
Packit 577717
	deviceId = atoi(getenv("GPU_DEVICE_ID"));
Packit 577717
    cuInit(0);
Packit 577717
	cuCtxCreate(&cuCtx,0,deviceId);
Packit 577717
	CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted));
Packit 577717
    CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_PC_SAMPLING));
Packit 577717
	//CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_BRANCH));
Packit 577717
Packit 577717
	CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL));
Packit 577717
	CUPTI_CALL(cuptiSubscribe(&g_subscriber, (CUpti_CallbackFunc)traceCallback, NULL));
Packit 577717
	CUPTI_CALL(cuptiEnableDomain(1, g_subscriber, CUPTI_CB_DOMAIN_RESOURCE));
Packit 577717
	CUpti_ActivityPCSamplingConfig config;
Packit 577717
	sampRate=atoi(getenv("PC_SAMPLING_RATE"));
Packit 577717
	config.samplingPeriod= sampRate;
Packit 577717
	CUPTI_CALL(cuptiActivityConfigurePCSampling(cuCtx, &config));
Packit 577717
}
Packit 577717
Packit 577717
__attribute__((destructor)) void
Packit 577717
finiTrace()
Packit 577717
{
Packit 577717
//	printf("FLushing CUPTI \n");
Packit 577717
	CUPTI_CALL(cuptiActivityFlushAll(0));
Packit 577717
}
Packit 577717