|
Packit |
577717 |
#include "mex.h"
|
|
Packit |
577717 |
#include "matrix.h"
|
|
Packit |
577717 |
#include "papi.h"
|
|
Packit |
577717 |
|
|
Packit |
577717 |
static long long accum_error = 0;
|
|
Packit |
577717 |
static long long start_time = 0;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
void mexFunction(int nlhs, mxArray *plhs[],
|
|
Packit |
577717 |
int nrhs, const mxArray *prhs[]) {
|
|
Packit |
577717 |
float real_time, proc_time, rate;
|
|
Packit |
577717 |
double *x;
|
|
Packit |
577717 |
unsigned int mrows, ncols;
|
|
Packit |
577717 |
int result;
|
|
Packit |
577717 |
unsigned int flop_events[2];
|
|
Packit |
577717 |
long long ins = 0, flop_values[2];
|
|
Packit |
577717 |
long long elapsed_time;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Check for proper number of arguments. */
|
|
Packit |
577717 |
if(nrhs > 1) {
|
|
Packit |
577717 |
mexErrMsgTxt("This function expects one optional input.");
|
|
Packit |
577717 |
} else if(nlhs > 2) {
|
|
Packit |
577717 |
mexErrMsgTxt("This function produces 1 or 2 outputs: [ops, mflops].");
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
/* The input must be a noncomplex scalar double.*/
|
|
Packit |
577717 |
if(nrhs == 1) {
|
|
Packit |
577717 |
mrows = mxGetM(prhs[0]);
|
|
Packit |
577717 |
ncols = mxGetN(prhs[0]);
|
|
Packit |
577717 |
if(!mxIsDouble(prhs[0]) || mxIsComplex(prhs[0]) || !(mrows == 1 && ncols == 1)) {
|
|
Packit |
577717 |
mexErrMsgTxt("Input must be a noncomplex scalar double.");
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
/* Assign a pointer to the input. */
|
|
Packit |
577717 |
x = mxGetPr(prhs[0]);
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* if input is 0, reset the counters by calling PAPI_stop_counters with 0 values */
|
|
Packit |
577717 |
if(*x == 0) {
|
|
Packit |
577717 |
if (start_time == 0) {
|
|
Packit |
577717 |
PAPI_stop_counters(NULL, 0);
|
|
Packit |
577717 |
accum_error = 0;
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
start_time = 0;
|
|
Packit |
577717 |
PAPI_stop_counters(flop_values, 2);
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
if(result = PAPI_event_name_to_code("EMON_SSE_SSE2_COMP_INST_RETIRED_PACKED_DOUBLE", &(flop_events[0])) < PAPI_OK) {
|
|
Packit |
577717 |
if(result = PAPI_flops( &real_time, &proc_time, &ins, &rate)
|
|
Packit |
577717 |
mexPrintf("Error code: %d\n", result);
|
|
Packit |
577717 |
mexErrMsgTxt("Error getting flops.");
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
if(start_time == 0) {
|
|
Packit |
577717 |
flop_events[1] = PAPI_FP_OPS;
|
|
Packit |
577717 |
start_time = PAPI_get_real_usec();
|
|
Packit |
577717 |
if((result = PAPI_start_counters(flop_events, 2)) < PAPI_OK) {
|
|
Packit |
577717 |
mexPrintf("Error code: %d\n", result);
|
|
Packit |
577717 |
mexErrMsgTxt("Error getting flops.");
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
ins = 0;
|
|
Packit |
577717 |
rate = 0;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
if((result = PAPI_read_counters(flop_values, 2)) < PAPI_OK) {
|
|
Packit |
577717 |
mexPrintf("%d\n", result);
|
|
Packit |
577717 |
mexErrMsgTxt("Error reading the running counters.");
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
elapsed_time = PAPI_get_real_usec() - start_time;
|
|
Packit |
577717 |
ins = (2*flop_values[0])+flop_values[1];
|
|
Packit |
577717 |
rate = ((float)ins)/((float)elapsed_time);
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* mexPrintf("real: %f, proc: %f, rate: %f, ins: %lld\n", real_time, proc_time, rate, ins); */
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if(nlhs > 0) {
|
|
Packit |
577717 |
plhs[0] = mxCreateScalarDouble((double)(ins - accum_error));
|
|
Packit |
577717 |
/* this call adds 7 fp instructions to the total */
|
|
Packit |
577717 |
/* but apparently not on Pentium M with Matlab 7.0.4 */
|
|
Packit |
577717 |
// accum_error += 7;
|
|
Packit |
577717 |
if(nlhs == 2) {
|
|
Packit |
577717 |
plhs[1] = mxCreateScalarDouble((double)rate);
|
|
Packit |
577717 |
/* the second call adds 4 fp instructions to the total */
|
|
Packit |
577717 |
/* but apparently not on Pentium M with Matlab 7.0.4 */
|
|
Packit |
577717 |
// accum_error += 4;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|