C **************************************************************************** C C matrixpapi.f C An example of matrix-matrix multiplication and using PAPI low level to C look at the performance. written by Kevin London C March 2000 C **************************************************************************** #include "fpapi_test.h" program fmatrixlowpapi implicit integer (p) INTEGER ncols1,nrows1,ncols2,nrows2 PARAMETER(nrows1=175,ncols1=225,nrows2=ncols1,ncols2=150) INTEGER i,j,k,retval,nchr,numevents,EventSet CHARACTER*(PAPI_MAX_STR_LEN) vstring,mstring C PAPI values of the counters INTEGER event INTEGER*8 values(2) INTEGER*8 starttime,stoptime REAL*8 finaltime INTEGER ncpu,nnodes,totalcpus,vendor,model REAL revision, mhz REAL*8 p(nrows1,ncols1),q(nrows2,ncols2), & r(nrows1,ncols2) integer tests_quiet, get_quiet external get_quiet tests_quiet = get_quiet() EventSet = PAPI_NULL C Setup default values numevents=0 starttime=0 stoptime=0 retval = PAPI_VER_CURRENT call PAPIf_library_init( retval ) if ( retval.NE.PAPI_VER_CURRENT) then call ftest_fail(__FILE__, __LINE__, *'PAPI_library_init', retval) end if C Create the eventset call PAPIf_create_eventset(EventSet,retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, *'PAPIf_create_eventset', retval) end if C Total cycles call PAPIf_add_event(EventSet,PAPI_TOT_CYC,retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, *'PAPIf_add_event PAPI_TOT_CYC', retval) end if C Total [floating point] instructions call PAPIf_query_event(PAPI_FP_INS, retval) if (retval .NE. PAPI_OK) then event = PAPI_TOT_INS else event = PAPI_FP_INS end if call PAPIf_add_event(EventSet,event,retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, *'PAPIf_add_event PAPI_TOT_INS', retval) end if C Grab the hardware info call PAPIf_get_hardware_info( ncpu, nnodes, totalcpus, vendor, . vstring, model, mstring, revision, mhz ) do i=len(mstring),1,-1 if(mstring(i:i).NE.' ') goto 10 end do 10 if(i.LT.1)then nchr=1 else nchr=i end if if (tests_quiet .EQ. 0) then print * print 100, totalcpus,mstring(1:nchr), mhz print * print 101,'ncpu',ncpu, 'nnodes',nnodes, 'totalcpus',totalcpus print 102,'mhz',mhz,'revision',revision print 103,'vendor',vendor,'vstring',vstring print 104,'model',model,'mstring',mstring print * end if 100 format(i5,' CPU(s) ',a,' at ',f7.2,' MHz') 101 format(a9,' =',i6,7x,a9,' =',i5,5x,a9,'=',i5) 102 format(a9,' =',f7.2,6x,a9,' =',f15.5) 103 format(a9,' =',i6,7x,a9,' =',a40) 104 format(a9,' =',i6,7x,a9,' =',a40) C Open matrix file number 1 for reading C OPEN(UNIT=1,FILE='fmt1',STATUS='OLD') C Open matrix file number 2 for reading C OPEN(UNIT=2,FILE='fmt2',STATUS='OLD') C matrix 1: read in the matrix values do i=1, nrows1 do j=1,ncols1 p(i,j) = i*j*1.0 end do end do C matrix 2: read in the matrix values do i=1, nrows2 do j=1,ncols2 q(i,j) = i*j*1.0 end do end do C Initialize the result matrix do i=1,nrows1 do j=1, ncols2 r(i,j) = i*j*1.0 end do end do C Grab the beginning time call PAPIf_get_real_usec( starttime ) C Start the event counters call PAPIf_start( EventSet, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_start', retval) end if C Compute the matrix-matrix multiplication do i=1,nrows1 do j=1,ncols2 do k=1,ncols1 r(i,j)=r(i,j) + p(i,k)*q(k,j) end do end do end do C Stop the counters and put the results in the array values call PAPIf_stop(EventSet,values,retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_stop', retval) end if call PAPIf_get_real_usec( stoptime ) finaltime=(REAL(stoptime)/1000000.0)-(REAL(starttime)/1000000.0) C Make sure the compiler does not optimize away the multiplication call dummy(r) if (tests_quiet .EQ. 0) then print *, 'Time: ', finaltime, 'seconds' print *, 'Cycles: ', values(1) if (event .EQ. PAPI_TOT_INS) then print *, 'Total Instructions: ', values(2) else print *, 'FP Instructions: ', values(2) write(*,'(a,f9.6)') ' Efficiency (fp/cycle):', & real(values(2))/real(values(1)) end if end if call ftests_pass(__FILE__) end