/*
* File: perfctr.c
* Author: Philip Mucci
* mucci at cs.utk.edu
* Mods: Kevin London
* london at cs.utk.edu
* Mods: Maynard Johnson
* maynardj at us.ibm.com
* Mods: Brian Sheely
* bsheely at eecs.utk.edu
*/
#include <string.h>
#include <linux/unistd.h>
#include <errno.h>
#include <sys/time.h>
#include "papi.h"
#include "papi_internal.h"
#ifdef PPC64
#include "perfctr-ppc64.h"
#else
#include "perfctr-x86.h"
#include "papi_libpfm_events.h"
#endif
#include "papi_vector.h"
#include "papi_memory.h"
#include "extras.h"
#include "linux-common.h"
#include "linux-context.h"
extern papi_vector_t _perfctr_vector;
#ifdef PPC64
extern int setup_ppc64_presets( int cputype, int cidx );
#endif
/* This should be in a linux.h header file maybe. */
#define FOPEN_ERROR "fopen(%s) returned NULL"
#if defined(PERFCTR26)
#define PERFCTR_CPU_NAME(pi) perfctr_info_cpu_name(pi)
#define PERFCTR_CPU_NRCTRS(pi) perfctr_info_nrctrs(pi)
#else
#define PERFCTR_CPU_NAME perfctr_cpu_name
#define PERFCTR_CPU_NRCTRS perfctr_cpu_nrctrs
#endif
#if !defined(PPC64)
static inline int
xlate_cpu_type_to_vendor( unsigned perfctr_cpu_type )
{
switch ( perfctr_cpu_type ) {
case PERFCTR_X86_INTEL_P5:
case PERFCTR_X86_INTEL_P5MMX:
case PERFCTR_X86_INTEL_P6:
case PERFCTR_X86_INTEL_PII:
case PERFCTR_X86_INTEL_PIII:
case PERFCTR_X86_INTEL_P4:
case PERFCTR_X86_INTEL_P4M2:
#ifdef PERFCTR_X86_INTEL_P4M3
case PERFCTR_X86_INTEL_P4M3:
#endif
#ifdef PERFCTR_X86_INTEL_PENTM
case PERFCTR_X86_INTEL_PENTM:
#endif
#ifdef PERFCTR_X86_INTEL_CORE
case PERFCTR_X86_INTEL_CORE:
#endif
#ifdef PERFCTR_X86_INTEL_CORE2
case PERFCTR_X86_INTEL_CORE2:
#endif
#ifdef PERFCTR_X86_INTEL_ATOM /* family 6 model 28 */
case PERFCTR_X86_INTEL_ATOM:
#endif
#ifdef PERFCTR_X86_INTEL_NHLM /* family 6 model 26 */
case PERFCTR_X86_INTEL_NHLM:
#endif
#ifdef PERFCTR_X86_INTEL_WSTMR
case PERFCTR_X86_INTEL_WSTMR:
#endif
return ( PAPI_VENDOR_INTEL );
#ifdef PERFCTR_X86_AMD_K8
case PERFCTR_X86_AMD_K8:
#endif
#ifdef PERFCTR_X86_AMD_K8C
case PERFCTR_X86_AMD_K8C:
#endif
#ifdef PERFCTR_X86_AMD_FAM10 /* this is defined in perfctr 2.6.29 */
case PERFCTR_X86_AMD_FAM10:
#endif
case PERFCTR_X86_AMD_K7:
return ( PAPI_VENDOR_AMD );
default:
return ( PAPI_VENDOR_UNKNOWN );
}
}
#endif
long long tb_scale_factor = ( long long ) 1; /* needed to scale get_cycles on PPC series */
int
_perfctr_init_component( int cidx )
{
int retval;
struct perfctr_info info;
char abiv[PAPI_MIN_STR_LEN];
#if defined(PERFCTR26)
int fd;
#else
struct vperfctr *dev;
#endif
#if defined(PERFCTR26)
/* Get info from the kernel */
/* Use lower level calls per Mikael to get the perfctr info
without actually creating a new kernel-side state.
Also, close the fd immediately after retrieving the info.
This is much lighter weight and doesn't reserve the counter
resources. Also compatible with perfctr 2.6.14.
*/
fd = _vperfctr_open( 0 );
if ( fd < 0 ) {
strncpy(_perfctr_vector.cmp_info.disabled_reason,
VOPEN_ERROR,PAPI_MAX_STR_LEN);
return PAPI_ESYS;
}
retval = perfctr_info( fd, &info );
close( fd );
if ( retval < 0 ) {
strncpy(_perfctr_vector.cmp_info.disabled_reason,
VINFO_ERROR,PAPI_MAX_STR_LEN);
return PAPI_ESYS;
}
/* copy tsc multiplier to local variable */
/* this field appears in perfctr 2.6 and higher */
tb_scale_factor = ( long long ) info.tsc_to_cpu_mult;
#else
/* Opened once for all threads. */
if ( ( dev = vperfctr_open( ) ) == NULL ) {
strncpy(_perfctr_vector.cmp_info.disabled_reason,
VOPEN_ERROR,PAPI_MAX_STR_LEN);
return PAPI_ESYS;
}
SUBDBG( "_perfctr_init_component vperfctr_open = %p\n", dev );
/* Get info from the kernel */
retval = vperfctr_info( dev, &info );
if ( retval < 0 ) {
strncpy(_perfctr_vector.cmp_info.disabled_reason,
VINFO_ERROR,PAPI_MAX_STR_LEN);
return ( PAPI_ESYS );
}
vperfctr_close( dev );
#endif
/* Fill in what we can of the papi_system_info. */
retval = _papi_os_vector.get_system_info( &_papi_hwi_system_info );
if ( retval != PAPI_OK )
return ( retval );
/* Setup memory info */
retval = _papi_os_vector.get_memory_info( &_papi_hwi_system_info.hw_info,
( int ) info.cpu_type );
if ( retval )
return ( retval );
strcpy( _perfctr_vector.cmp_info.name,"perfctr.c" );
strcpy( _perfctr_vector.cmp_info.version, "$Revision$" );
sprintf( abiv, "0x%08X", info.abi_version );
strcpy( _perfctr_vector.cmp_info.support_version, abiv );
strcpy( _perfctr_vector.cmp_info.kernel_version, info.driver_version );
_perfctr_vector.cmp_info.CmpIdx = cidx;
_perfctr_vector.cmp_info.num_cntrs = ( int ) PERFCTR_CPU_NRCTRS( &info );
_perfctr_vector.cmp_info.num_mpx_cntrs=_perfctr_vector.cmp_info.num_cntrs;
if ( info.cpu_features & PERFCTR_FEATURE_RDPMC )
_perfctr_vector.cmp_info.fast_counter_read = 1;
else
_perfctr_vector.cmp_info.fast_counter_read = 0;
_perfctr_vector.cmp_info.fast_real_timer = 1;
_perfctr_vector.cmp_info.fast_virtual_timer = 1;
_perfctr_vector.cmp_info.attach = 1;
_perfctr_vector.cmp_info.attach_must_ptrace = 1;
_perfctr_vector.cmp_info.default_domain = PAPI_DOM_USER;
#if !defined(PPC64)
/* AMD and Intel ia386 processors all support unit mask bits */
_perfctr_vector.cmp_info.cntr_umasks = 1;
#endif
#if defined(PPC64)
_perfctr_vector.cmp_info.available_domains =
PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR;
#else
_perfctr_vector.cmp_info.available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL;
#endif
_perfctr_vector.cmp_info.default_granularity = PAPI_GRN_THR;
_perfctr_vector.cmp_info.available_granularities = PAPI_GRN_THR;
if ( info.cpu_features & PERFCTR_FEATURE_PCINT )
_perfctr_vector.cmp_info.hardware_intr = 1;
else
_perfctr_vector.cmp_info.hardware_intr = 0;
SUBDBG( "Hardware/OS %s support counter generated interrupts\n",
_perfctr_vector.cmp_info.hardware_intr ? "does" : "does not" );
strcpy( _papi_hwi_system_info.hw_info.model_string,
PERFCTR_CPU_NAME( &info ) );
_papi_hwi_system_info.hw_info.model = ( int ) info.cpu_type;
#if defined(PPC64)
_papi_hwi_system_info.hw_info.vendor = PAPI_VENDOR_IBM;
if ( strlen( _papi_hwi_system_info.hw_info.vendor_string ) == 0 )
strcpy( _papi_hwi_system_info.hw_info.vendor_string, "IBM" );
#else
_papi_hwi_system_info.hw_info.vendor =
xlate_cpu_type_to_vendor( info.cpu_type );
#endif
/* Setup presets last. Some platforms depend on earlier info */
#if !defined(PPC64)
// retval = setup_p3_vector_table(vtable);
if ( !retval )
retval = _papi_libpfm_init(&_perfctr_vector, cidx );
#else
/* Setup native and preset events */
// retval = ppc64_setup_vector_table(vtable);
if ( !retval )
retval = perfctr_ppc64_setup_native_table( );
if ( !retval )
retval = setup_ppc64_presets( info.cpu_type, cidx );
#endif
if ( retval )
return ( retval );
return ( PAPI_OK );
}
static int
attach( hwd_control_state_t * ctl, unsigned long tid )
{
struct vperfctr_control tmp;
#ifdef VPERFCTR_CONTROL_CLOEXEC
tmp.flags = VPERFCTR_CONTROL_CLOEXEC;
#endif
ctl->rvperfctr = rvperfctr_open( ( int ) tid );
if ( ctl->rvperfctr == NULL ) {
PAPIERROR( VOPEN_ERROR );
return ( PAPI_ESYS );
}
SUBDBG( "_papi_hwd_ctl rvperfctr_open() = %p\n", ctl->rvperfctr );
/* Initialize the per thread/process virtualized TSC */
memset( &tmp, 0x0, sizeof ( tmp ) );
tmp.cpu_control.tsc_on = 1;
/* Start the per thread/process virtualized TSC */
if ( rvperfctr_control( ctl->rvperfctr, &tmp ) < 0 ) {
PAPIERROR( RCNTRL_ERROR );
return ( PAPI_ESYS );
}
return ( PAPI_OK );
} /* end attach() */
static int
detach( hwd_control_state_t * ctl )
{
rvperfctr_close( ctl->rvperfctr );
return ( PAPI_OK );
} /* end detach() */
static inline int
round_requested_ns( int ns )
{
if ( ns < _papi_os_info.itimer_res_ns ) {
return _papi_os_info.itimer_res_ns;
} else {
int leftover_ns = ns % _papi_os_info.itimer_res_ns;
return ns + leftover_ns;
}
}
int
_perfctr_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option )
{
( void ) ctx; /*unused */
switch ( code ) {
case PAPI_DOMAIN:
case PAPI_DEFDOM:
#if defined(PPC64)
return ( _perfctr_vector.
set_domain( option->domain.ESI, option->domain.domain ) );
#else
return ( _perfctr_vector.
set_domain( option->domain.ESI->ctl_state,
option->domain.domain ) );
#endif
case PAPI_GRANUL:
case PAPI_DEFGRN:
return PAPI_ECMP;
case PAPI_ATTACH:
return ( attach( option->attach.ESI->ctl_state, option->attach.tid ) );
case PAPI_DETACH:
return ( detach( option->attach.ESI->ctl_state ) );
case PAPI_DEF_ITIMER:
{
/* flags are currently ignored, eventually the flags will be able
to specify whether or not we use POSIX itimers (clock_gettimer) */
if ( ( option->itimer.itimer_num == ITIMER_REAL ) &&
( option->itimer.itimer_sig != SIGALRM ) )
return PAPI_EINVAL;
if ( ( option->itimer.itimer_num == ITIMER_VIRTUAL ) &&
( option->itimer.itimer_sig != SIGVTALRM ) )
return PAPI_EINVAL;
if ( ( option->itimer.itimer_num == ITIMER_PROF ) &&
( option->itimer.itimer_sig != SIGPROF ) )
return PAPI_EINVAL;
if ( option->itimer.ns > 0 )
option->itimer.ns = round_requested_ns( option->itimer.ns );
/* At this point, we assume the user knows what he or
she is doing, they maybe doing something arch specific */
return PAPI_OK;
}
case PAPI_DEF_MPX_NS:
{
option->multiplex.ns =
( unsigned long ) round_requested_ns( ( int ) option->multiplex.
ns );
return ( PAPI_OK );
}
case PAPI_DEF_ITIMER_NS:
{
option->itimer.ns = round_requested_ns( option->itimer.ns );
return ( PAPI_OK );
}
default:
return ( PAPI_ENOSUPP );
}
}
void
_perfctr_dispatch_timer( int signal, siginfo_t * si, void *context )
{
( void ) signal; /*unused */
_papi_hwi_context_t ctx;
ThreadInfo_t *master = NULL;
int isHardware = 0;
caddr_t address;
int cidx = _perfctr_vector.cmp_info.CmpIdx;
hwd_context_t *our_context;
ctx.si = si;
ctx.ucontext = ( ucontext_t * ) context;
#define OVERFLOW_MASK si->si_pmc_ovf_mask
#define GEN_OVERFLOW 0
address = ( caddr_t ) GET_OVERFLOW_ADDRESS( ( ctx ) );
_papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, address, &isHardware,
OVERFLOW_MASK, GEN_OVERFLOW, &master,
_perfctr_vector.cmp_info.CmpIdx );
/* We are done, resume interrupting counters */
if ( isHardware ) {
our_context=(hwd_context_t *) master->context[cidx];
errno = vperfctr_iresume( our_context->perfctr );
if ( errno < 0 ) {
PAPIERROR( "vperfctr_iresume errno %d", errno );
}
}
}
int
_perfctr_init_thread( hwd_context_t * ctx )
{
struct vperfctr_control tmp;
int error;
/* Initialize our thread/process pointer. */
if ( ( ctx->perfctr = vperfctr_open( ) ) == NULL ) {
#ifdef VPERFCTR_OPEN_CREAT_EXCL
/* New versions of perfctr have this, which allows us to
get a previously created context, i.e. one created after
a fork and now we're inside a new process that has been exec'd */
if ( errno ) {
if ( ( ctx->perfctr = vperfctr_open_mode( 0 ) ) == NULL ) {
return PAPI_ESYS;
}
} else {
return PAPI_ESYS;
}
#else
return PAPI_ESYS;
#endif
}
SUBDBG( "_papi_hwd_init vperfctr_open() = %p\n", ctx->perfctr );
/* Initialize the per thread/process virtualized TSC */
memset( &tmp, 0x0, sizeof ( tmp ) );
tmp.cpu_control.tsc_on = 1;
#ifdef VPERFCTR_CONTROL_CLOEXEC
tmp.flags = VPERFCTR_CONTROL_CLOEXEC;
SUBDBG( "close on exec\t\t\t%u\n", tmp.flags );
#endif
/* Start the per thread/process virtualized TSC */
error = vperfctr_control( ctx->perfctr, &tmp );
if ( error < 0 ) {
SUBDBG( "starting virtualized TSC; vperfctr_control returns %d\n",
error );
return PAPI_ESYS;
}
return PAPI_OK;
}
/* This routine is for shutting down threads, including the
master thread. */
int
_perfctr_shutdown_thread( hwd_context_t * ctx )
{
#ifdef DEBUG
int retval = vperfctr_unlink( ctx->perfctr );
SUBDBG( "_papi_hwd_shutdown vperfctr_unlink(%p) = %d\n", ctx->perfctr,
retval );
#else
vperfctr_unlink( ctx->perfctr );
#endif
vperfctr_close( ctx->perfctr );
SUBDBG( "_perfctr_shutdown vperfctr_close(%p)\n", ctx->perfctr );
memset( ctx, 0x0, sizeof ( hwd_context_t ) );
return ( PAPI_OK );
}