|
Packit |
577717 |
/*
|
|
Packit |
577717 |
* File: perfctr-x86.c
|
|
Packit |
577717 |
* Author: Brian Sheely
|
|
Packit |
577717 |
* bsheely@eecs.utk.edu
|
|
Packit |
577717 |
* Mods: <your name here>
|
|
Packit |
577717 |
* <your email address>
|
|
Packit |
577717 |
*/
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#include <string.h>
|
|
Packit |
577717 |
#include <linux/unistd.h>
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#include "papi.h"
|
|
Packit |
577717 |
#include "papi_memory.h"
|
|
Packit |
577717 |
#include "papi_internal.h"
|
|
Packit |
577717 |
#include "perfctr-x86.h"
|
|
Packit |
577717 |
#include "perfmon/pfmlib.h"
|
|
Packit |
577717 |
#include "extras.h"
|
|
Packit |
577717 |
#include "papi_vector.h"
|
|
Packit |
577717 |
#include "papi_libpfm_events.h"
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#include "papi_preset.h"
|
|
Packit |
577717 |
#include "linux-memory.h"
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Contains source for the Modified Bipartite Allocation scheme */
|
|
Packit |
577717 |
#include "papi_bipartite.h"
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Prototypes for entry points found in perfctr.c */
|
|
Packit |
577717 |
extern int _perfctr_init_component( int );
|
|
Packit |
577717 |
extern int _perfctr_ctl( hwd_context_t * ctx, int code,
|
|
Packit |
577717 |
_papi_int_option_t * option );
|
|
Packit |
577717 |
extern void _perfctr_dispatch_timer( int signal, hwd_siginfo_t * si,
|
|
Packit |
577717 |
void *context );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
extern int _perfctr_init_thread( hwd_context_t * ctx );
|
|
Packit |
577717 |
extern int _perfctr_shutdown_thread( hwd_context_t * ctx );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#include "linux-common.h"
|
|
Packit |
577717 |
#include "linux-timer.h"
|
|
Packit |
577717 |
|
|
Packit |
577717 |
extern papi_mdi_t _papi_hwi_system_info;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
extern papi_vector_t _perfctr_vector;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#if defined(PERFCTR26)
|
|
Packit |
577717 |
#define evntsel_aux p4.escr
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#if defined(PAPI_PENTIUM4_VEC_MMX)
|
|
Packit |
577717 |
#define P4_VEC "MMX"
|
|
Packit |
577717 |
#else
|
|
Packit |
577717 |
#define P4_VEC "SSE"
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#if defined(PAPI_PENTIUM4_FP_X87)
|
|
Packit |
577717 |
#define P4_FPU " X87"
|
|
Packit |
577717 |
#elif defined(PAPI_PENTIUM4_FP_X87_SSE_SP)
|
|
Packit |
577717 |
#define P4_FPU " X87 SSE_SP"
|
|
Packit |
577717 |
#elif defined(PAPI_PENTIUM4_FP_SSE_SP_DP)
|
|
Packit |
577717 |
#define P4_FPU " SSE_SP SSE_DP"
|
|
Packit |
577717 |
#else
|
|
Packit |
577717 |
#define P4_FPU " X87 SSE_DP"
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* CODE TO SUPPORT CUSTOMIZABLE FP COUNTS ON OPTERON */
|
|
Packit |
577717 |
#if defined(PAPI_OPTERON_FP_RETIRED)
|
|
Packit |
577717 |
#define AMD_FPU "RETIRED"
|
|
Packit |
577717 |
#elif defined(PAPI_OPTERON_FP_SSE_SP)
|
|
Packit |
577717 |
#define AMD_FPU "SSE_SP"
|
|
Packit |
577717 |
#elif defined(PAPI_OPTERON_FP_SSE_DP)
|
|
Packit |
577717 |
#define AMD_FPU "SSE_DP"
|
|
Packit |
577717 |
#else
|
|
Packit |
577717 |
#define AMD_FPU "SPECULATIVE"
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
|
|
Packit |
577717 |
static inline int is_pentium4(void) {
|
|
Packit |
577717 |
if ( ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_INTEL ) &&
|
|
Packit |
577717 |
( _papi_hwi_system_info.hw_info.cpuid_family == 15 )) {
|
|
Packit |
577717 |
return 1;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
return 0;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#ifdef DEBUG
|
|
Packit |
577717 |
static void
|
|
Packit |
577717 |
print_alloc( X86_reg_alloc_t * a )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
SUBDBG( "X86_reg_alloc:\n" );
|
|
Packit |
577717 |
SUBDBG( " selector: %#x\n", a->ra_selector );
|
|
Packit |
577717 |
SUBDBG( " rank: %#x\n", a->ra_rank );
|
|
Packit |
577717 |
SUBDBG( " escr: %#x %#x\n", a->ra_escr[0], a->ra_escr[1] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
void
|
|
Packit |
577717 |
print_control( const struct perfctr_cpu_control *control )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
unsigned int i;
|
|
Packit |
577717 |
SUBDBG( "Control used:\n" );
|
|
Packit |
577717 |
SUBDBG( "tsc_on\t\t\t%u\n", control->tsc_on );
|
|
Packit |
577717 |
SUBDBG( "nractrs\t\t\t%u\n", control->nractrs );
|
|
Packit |
577717 |
SUBDBG( "nrictrs\t\t\t%u\n", control->nrictrs );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
for ( i = 0; i < ( control->nractrs + control->nrictrs ); ++i ) {
|
|
Packit |
577717 |
if ( control->pmc_map[i] >= 18 ) {
|
|
Packit |
577717 |
SUBDBG( "pmc_map[%u]\t\t0x%08X\n", i, control->pmc_map[i] );
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
SUBDBG( "pmc_map[%u]\t\t%u\n", i, control->pmc_map[i] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
SUBDBG( "evntsel[%u]\t\t0x%08X\n", i, control->evntsel[i] );
|
|
Packit |
577717 |
if ( control->ireset[i] ) {
|
|
Packit |
577717 |
SUBDBG( "ireset[%u]\t%d\n", i, control->ireset[i] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
|
|
Packit |
577717 |
static int
|
|
Packit |
577717 |
_x86_init_control_state( hwd_control_state_t *ptr )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
int i, def_mode = 0;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_USER )
|
|
Packit |
577717 |
def_mode |= ESCR_T0_USR;
|
|
Packit |
577717 |
if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_KERNEL )
|
|
Packit |
577717 |
def_mode |= ESCR_T0_OS;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
|
|
Packit |
577717 |
ptr->control.cpu_control.evntsel_aux[i] |= def_mode;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
ptr->control.cpu_control.tsc_on = 1;
|
|
Packit |
577717 |
ptr->control.cpu_control.nractrs = 0;
|
|
Packit |
577717 |
ptr->control.cpu_control.nrictrs = 0;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#ifdef VPERFCTR_CONTROL_CLOEXEC
|
|
Packit |
577717 |
ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC;
|
|
Packit |
577717 |
SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags );
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_USER )
|
|
Packit |
577717 |
def_mode |= PERF_USR;
|
|
Packit |
577717 |
if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_KERNEL )
|
|
Packit |
577717 |
def_mode |= PERF_OS;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
ptr->allocated_registers.selector = 0;
|
|
Packit |
577717 |
switch ( _papi_hwi_system_info.hw_info.model ) {
|
|
Packit |
577717 |
case PERFCTR_X86_GENERIC:
|
|
Packit |
577717 |
case PERFCTR_X86_WINCHIP_C6:
|
|
Packit |
577717 |
case PERFCTR_X86_WINCHIP_2:
|
|
Packit |
577717 |
case PERFCTR_X86_VIA_C3:
|
|
Packit |
577717 |
case PERFCTR_X86_INTEL_P5:
|
|
Packit |
577717 |
case PERFCTR_X86_INTEL_P5MMX:
|
|
Packit |
577717 |
case PERFCTR_X86_INTEL_PII:
|
|
Packit |
577717 |
case PERFCTR_X86_INTEL_P6:
|
|
Packit |
577717 |
case PERFCTR_X86_INTEL_PIII:
|
|
Packit |
577717 |
#ifdef PERFCTR_X86_INTEL_CORE
|
|
Packit |
577717 |
case PERFCTR_X86_INTEL_CORE:
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
#ifdef PERFCTR_X86_INTEL_PENTM
|
|
Packit |
577717 |
case PERFCTR_X86_INTEL_PENTM:
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
ptr->control.cpu_control.evntsel[0] |= PERF_ENABLE;
|
|
Packit |
577717 |
for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
|
|
Packit |
577717 |
ptr->control.cpu_control.evntsel[i] |= def_mode;
|
|
Packit |
577717 |
ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
break;
|
|
Packit |
577717 |
#ifdef PERFCTR_X86_INTEL_CORE2
|
|
Packit |
577717 |
case PERFCTR_X86_INTEL_CORE2:
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
#ifdef PERFCTR_X86_INTEL_ATOM
|
|
Packit |
577717 |
case PERFCTR_X86_INTEL_ATOM:
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
#ifdef PERFCTR_X86_INTEL_NHLM
|
|
Packit |
577717 |
case PERFCTR_X86_INTEL_NHLM:
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
#ifdef PERFCTR_X86_INTEL_WSTMR
|
|
Packit |
577717 |
case PERFCTR_X86_INTEL_WSTMR:
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
#ifdef PERFCTR_X86_AMD_K8
|
|
Packit |
577717 |
case PERFCTR_X86_AMD_K8:
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
#ifdef PERFCTR_X86_AMD_K8C
|
|
Packit |
577717 |
case PERFCTR_X86_AMD_K8C:
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
#ifdef PERFCTR_X86_AMD_FAM10H /* this is defined in perfctr 2.6.29 */
|
|
Packit |
577717 |
case PERFCTR_X86_AMD_FAM10H:
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
case PERFCTR_X86_AMD_K7:
|
|
Packit |
577717 |
for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
|
|
Packit |
577717 |
ptr->control.cpu_control.evntsel[i] |= PERF_ENABLE | def_mode;
|
|
Packit |
577717 |
ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
break;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
#ifdef VPERFCTR_CONTROL_CLOEXEC
|
|
Packit |
577717 |
ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC;
|
|
Packit |
577717 |
SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags );
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Make sure the TSC is always on */
|
|
Packit |
577717 |
ptr->control.cpu_control.tsc_on = 1;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
return ( PAPI_OK );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
int
|
|
Packit |
577717 |
_x86_set_domain( hwd_control_state_t * cntrl, int domain )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
int i, did = 0;
|
|
Packit |
577717 |
int num_cntrs = _perfctr_vector.cmp_info.num_cntrs;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Clear the current domain set for this event set */
|
|
Packit |
577717 |
/* We don't touch the Enable bit in this code */
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
|
|
Packit |
577717 |
cntrl->control.cpu_control.evntsel_aux[i] &=
|
|
Packit |
577717 |
~( ESCR_T0_OS | ESCR_T0_USR );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( domain & PAPI_DOM_USER ) {
|
|
Packit |
577717 |
did = 1;
|
|
Packit |
577717 |
for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
|
|
Packit |
577717 |
cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_USR;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( domain & PAPI_DOM_KERNEL ) {
|
|
Packit |
577717 |
did = 1;
|
|
Packit |
577717 |
for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
|
|
Packit |
577717 |
cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_OS;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
for ( i = 0; i < num_cntrs; i++ ) {
|
|
Packit |
577717 |
cntrl->control.cpu_control.evntsel[i] &= ~( PERF_OS | PERF_USR );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( domain & PAPI_DOM_USER ) {
|
|
Packit |
577717 |
did = 1;
|
|
Packit |
577717 |
for ( i = 0; i < num_cntrs; i++ ) {
|
|
Packit |
577717 |
cntrl->control.cpu_control.evntsel[i] |= PERF_USR;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( domain & PAPI_DOM_KERNEL ) {
|
|
Packit |
577717 |
did = 1;
|
|
Packit |
577717 |
for ( i = 0; i < num_cntrs; i++ ) {
|
|
Packit |
577717 |
cntrl->control.cpu_control.evntsel[i] |= PERF_OS;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( !did )
|
|
Packit |
577717 |
return ( PAPI_EINVAL );
|
|
Packit |
577717 |
else
|
|
Packit |
577717 |
return ( PAPI_OK );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* This function examines the event to determine
|
|
Packit |
577717 |
if it can be mapped to counter ctr.
|
|
Packit |
577717 |
Returns true if it can, false if it can't. */
|
|
Packit |
577717 |
static int
|
|
Packit |
577717 |
_bpt_map_avail( hwd_reg_alloc_t * dst, int ctr )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
return ( int ) ( dst->ra_selector & ( 1 << ctr ) );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* This function forces the event to
|
|
Packit |
577717 |
be mapped to only counter ctr.
|
|
Packit |
577717 |
Returns nothing. */
|
|
Packit |
577717 |
static void
|
|
Packit |
577717 |
_bpt_map_set( hwd_reg_alloc_t * dst, int ctr )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
dst->ra_selector = ( unsigned int ) ( 1 << ctr );
|
|
Packit |
577717 |
dst->ra_rank = 1;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
/* Pentium 4 requires that both an escr and a counter are selected.
|
|
Packit |
577717 |
Find which counter mask contains this counter.
|
|
Packit |
577717 |
Set the opposite escr to empty (-1) */
|
|
Packit |
577717 |
if ( dst->ra_bits.counter[0] & dst->ra_selector )
|
|
Packit |
577717 |
dst->ra_escr[1] = -1;
|
|
Packit |
577717 |
else
|
|
Packit |
577717 |
dst->ra_escr[0] = -1;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* This function examines the event to determine
|
|
Packit |
577717 |
if it has a single exclusive mapping.
|
|
Packit |
577717 |
Returns true if exlusive, false if non-exclusive. */
|
|
Packit |
577717 |
static int
|
|
Packit |
577717 |
_bpt_map_exclusive( hwd_reg_alloc_t * dst )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
return ( dst->ra_rank == 1 );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* This function compares the dst and src events
|
|
Packit |
577717 |
to determine if any resources are shared. Typically the src event
|
|
Packit |
577717 |
is exclusive, so this detects a conflict if true.
|
|
Packit |
577717 |
Returns true if conflict, false if no conflict. */
|
|
Packit |
577717 |
static int
|
|
Packit |
577717 |
_bpt_map_shared( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
int retval1, retval2;
|
|
Packit |
577717 |
/* Pentium 4 needs to check for conflict of both counters and esc registers */
|
|
Packit |
577717 |
/* selectors must share bits */
|
|
Packit |
577717 |
retval1 = ( ( dst->ra_selector & src->ra_selector ) ||
|
|
Packit |
577717 |
/* or escrs must equal each other and not be set to -1 */
|
|
Packit |
577717 |
( ( dst->ra_escr[0] == src->ra_escr[0] ) &&
|
|
Packit |
577717 |
( ( int ) dst->ra_escr[0] != -1 ) ) ||
|
|
Packit |
577717 |
( ( dst->ra_escr[1] == src->ra_escr[1] ) &&
|
|
Packit |
577717 |
( ( int ) dst->ra_escr[1] != -1 ) ) );
|
|
Packit |
577717 |
/* Pentium 4 also needs to check for conflict on pebs registers */
|
|
Packit |
577717 |
/* pebs enables must both be non-zero */
|
|
Packit |
577717 |
retval2 =
|
|
Packit |
577717 |
( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) &&
|
|
Packit |
577717 |
/* and not equal to each other */
|
|
Packit |
577717 |
( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) ||
|
|
Packit |
577717 |
/* same for pebs_matrix_vert */
|
|
Packit |
577717 |
( ( dst->ra_bits.pebs_matrix_vert &&
|
|
Packit |
577717 |
src->ra_bits.pebs_matrix_vert ) &&
|
|
Packit |
577717 |
( dst->ra_bits.pebs_matrix_vert !=
|
|
Packit |
577717 |
src->ra_bits.pebs_matrix_vert ) ) );
|
|
Packit |
577717 |
if ( retval2 ) {
|
|
Packit |
577717 |
SUBDBG( "pebs conflict!\n" );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
return ( retval1 | retval2 );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
return ( int ) ( dst->ra_selector & src->ra_selector );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* This function removes shared resources available to the src event
|
|
Packit |
577717 |
from the resources available to the dst event,
|
|
Packit |
577717 |
and reduces the rank of the dst event accordingly. Typically,
|
|
Packit |
577717 |
the src event will be exclusive, but the code shouldn't assume it.
|
|
Packit |
577717 |
Returns nothing. */
|
|
Packit |
577717 |
static void
|
|
Packit |
577717 |
_bpt_map_preempt( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
int i;
|
|
Packit |
577717 |
unsigned shared;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
#ifdef DEBUG
|
|
Packit |
577717 |
SUBDBG( "src, dst\n" );
|
|
Packit |
577717 |
print_alloc( src );
|
|
Packit |
577717 |
print_alloc( dst );
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* check for a pebs conflict */
|
|
Packit |
577717 |
/* pebs enables must both be non-zero */
|
|
Packit |
577717 |
i = ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) &&
|
|
Packit |
577717 |
/* and not equal to each other */
|
|
Packit |
577717 |
( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) ||
|
|
Packit |
577717 |
/* same for pebs_matrix_vert */
|
|
Packit |
577717 |
( ( dst->ra_bits.pebs_matrix_vert &&
|
|
Packit |
577717 |
src->ra_bits.pebs_matrix_vert )
|
|
Packit |
577717 |
&& ( dst->ra_bits.pebs_matrix_vert !=
|
|
Packit |
577717 |
src->ra_bits.pebs_matrix_vert ) ) );
|
|
Packit |
577717 |
if ( i ) {
|
|
Packit |
577717 |
SUBDBG( "pebs conflict! clearing selector\n" );
|
|
Packit |
577717 |
dst->ra_selector = 0;
|
|
Packit |
577717 |
return;
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
/* remove counters referenced by any shared escrs */
|
|
Packit |
577717 |
if ( ( dst->ra_escr[0] == src->ra_escr[0] ) &&
|
|
Packit |
577717 |
( ( int ) dst->ra_escr[0] != -1 ) ) {
|
|
Packit |
577717 |
dst->ra_selector &= ~dst->ra_bits.counter[0];
|
|
Packit |
577717 |
dst->ra_escr[0] = -1;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
if ( ( dst->ra_escr[1] == src->ra_escr[1] ) &&
|
|
Packit |
577717 |
( ( int ) dst->ra_escr[1] != -1 ) ) {
|
|
Packit |
577717 |
dst->ra_selector &= ~dst->ra_bits.counter[1];
|
|
Packit |
577717 |
dst->ra_escr[1] = -1;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* remove any remaining shared counters */
|
|
Packit |
577717 |
shared = ( dst->ra_selector & src->ra_selector );
|
|
Packit |
577717 |
if ( shared )
|
|
Packit |
577717 |
dst->ra_selector ^= shared;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
/* recompute rank */
|
|
Packit |
577717 |
for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ )
|
|
Packit |
577717 |
if ( dst->ra_selector & ( 1 << i ) )
|
|
Packit |
577717 |
dst->ra_rank++;
|
|
Packit |
577717 |
#ifdef DEBUG
|
|
Packit |
577717 |
SUBDBG( "new dst\n" );
|
|
Packit |
577717 |
print_alloc( dst );
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
shared = dst->ra_selector & src->ra_selector;
|
|
Packit |
577717 |
if ( shared )
|
|
Packit |
577717 |
dst->ra_selector ^= shared;
|
|
Packit |
577717 |
for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ )
|
|
Packit |
577717 |
if ( dst->ra_selector & ( 1 << i ) )
|
|
Packit |
577717 |
dst->ra_rank++;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
static void
|
|
Packit |
577717 |
_bpt_map_update( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
dst->ra_selector = src->ra_selector;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
dst->ra_escr[0] = src->ra_escr[0];
|
|
Packit |
577717 |
dst->ra_escr[1] = src->ra_escr[1];
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Register allocation */
|
|
Packit |
577717 |
static int
|
|
Packit |
577717 |
_x86_allocate_registers( EventSetInfo_t * ESI )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
int i, j, natNum;
|
|
Packit |
577717 |
hwd_reg_alloc_t event_list[MAX_COUNTERS];
|
|
Packit |
577717 |
hwd_register_t *ptr;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Initialize the local structure needed
|
|
Packit |
577717 |
for counter allocation and optimization. */
|
|
Packit |
577717 |
natNum = ESI->NativeCount;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
SUBDBG( "native event count: %d\n", natNum );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
for ( i = 0; i < natNum; i++ ) {
|
|
Packit |
577717 |
/* retrieve the mapping information about this native event */
|
|
Packit |
577717 |
_papi_libpfm_ntv_code_to_bits_perfctr( ( unsigned int ) ESI->NativeInfoArray[i].
|
|
Packit |
577717 |
ni_event, &event_list[i].ra_bits );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
/* combine counter bit masks for both esc registers into selector */
|
|
Packit |
577717 |
event_list[i].ra_selector =
|
|
Packit |
577717 |
event_list[i].ra_bits.counter[0] | event_list[i].ra_bits.
|
|
Packit |
577717 |
counter[1];
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
/* make sure register allocator only looks at legal registers */
|
|
Packit |
577717 |
event_list[i].ra_selector =
|
|
Packit |
577717 |
event_list[i].ra_bits.selector & ALLCNTRS;
|
|
Packit |
577717 |
#ifdef PERFCTR_X86_INTEL_CORE2
|
|
Packit |
577717 |
if ( _papi_hwi_system_info.hw_info.model ==
|
|
Packit |
577717 |
PERFCTR_X86_INTEL_CORE2 )
|
|
Packit |
577717 |
event_list[i].ra_selector |=
|
|
Packit |
577717 |
( ( event_list[i].ra_bits.
|
|
Packit |
577717 |
selector >> 16 ) << 2 ) & ALLCNTRS;
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
/* calculate native event rank, which is no. of counters it can live on */
|
|
Packit |
577717 |
event_list[i].ra_rank = 0;
|
|
Packit |
577717 |
for ( j = 0; j < MAX_COUNTERS; j++ ) {
|
|
Packit |
577717 |
if ( event_list[i].ra_selector & ( 1 << j ) ) {
|
|
Packit |
577717 |
event_list[i].ra_rank++;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
event_list[i].ra_escr[0] = event_list[i].ra_bits.escr[0];
|
|
Packit |
577717 |
event_list[i].ra_escr[1] = event_list[i].ra_bits.escr[1];
|
|
Packit |
577717 |
#ifdef DEBUG
|
|
Packit |
577717 |
SUBDBG( "i: %d\n", i );
|
|
Packit |
577717 |
print_alloc( &event_list[i] );
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
if ( _papi_bipartite_alloc( event_list, natNum, ESI->CmpIdx ) ) { /* successfully mapped */
|
|
Packit |
577717 |
for ( i = 0; i < natNum; i++ ) {
|
|
Packit |
577717 |
#ifdef PERFCTR_X86_INTEL_CORE2
|
|
Packit |
577717 |
if ( _papi_hwi_system_info.hw_info.model ==
|
|
Packit |
577717 |
PERFCTR_X86_INTEL_CORE2 )
|
|
Packit |
577717 |
event_list[i].ra_bits.selector = event_list[i].ra_selector;
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
#ifdef DEBUG
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
SUBDBG( "i: %d\n", i );
|
|
Packit |
577717 |
print_alloc( &event_list[i] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
/* Copy all info about this native event to the NativeInfo struct */
|
|
Packit |
577717 |
ptr = ESI->NativeInfoArray[i].ni_bits;
|
|
Packit |
577717 |
*ptr = event_list[i].ra_bits;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
/* The selector contains the counter bit position. Turn it into a number
|
|
Packit |
577717 |
and store it in the first counter value, zeroing the second. */
|
|
Packit |
577717 |
ptr->counter[0] = ffs( event_list[i].ra_selector ) - 1;
|
|
Packit |
577717 |
ptr->counter[1] = 0;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Array order on perfctr is event ADD order, not counter #... */
|
|
Packit |
577717 |
ESI->NativeInfoArray[i].ni_position = i;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
return PAPI_OK;
|
|
Packit |
577717 |
} else
|
|
Packit |
577717 |
return PAPI_ECNFLCT;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
static void
|
|
Packit |
577717 |
clear_cs_events( hwd_control_state_t * this_state )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
unsigned int i, j;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* total counters is sum of accumulating (nractrs) and interrupting (nrictrs) */
|
|
Packit |
577717 |
j = this_state->control.cpu_control.nractrs +
|
|
Packit |
577717 |
this_state->control.cpu_control.nrictrs;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Remove all counter control command values from eventset. */
|
|
Packit |
577717 |
for ( i = 0; i < j; i++ ) {
|
|
Packit |
577717 |
SUBDBG( "Clearing pmc event entry %d\n", i );
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
this_state->control.cpu_control.pmc_map[i] = 0;
|
|
Packit |
577717 |
this_state->control.cpu_control.evntsel[i] = 0;
|
|
Packit |
577717 |
this_state->control.cpu_control.evntsel_aux[i] =
|
|
Packit |
577717 |
this_state->control.cpu_control.
|
|
Packit |
577717 |
evntsel_aux[i] & ( ESCR_T0_OS | ESCR_T0_USR );
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
this_state->control.cpu_control.pmc_map[i] = i;
|
|
Packit |
577717 |
this_state->control.cpu_control.evntsel[i]
|
|
Packit |
577717 |
= this_state->control.cpu_control.
|
|
Packit |
577717 |
evntsel[i] & ( PERF_ENABLE | PERF_OS | PERF_USR );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
this_state->control.cpu_control.ireset[i] = 0;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
/* Clear pebs stuff */
|
|
Packit |
577717 |
this_state->control.cpu_control.p4.pebs_enable = 0;
|
|
Packit |
577717 |
this_state->control.cpu_control.p4.pebs_matrix_vert = 0;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* clear both a and i counter counts */
|
|
Packit |
577717 |
this_state->control.cpu_control.nractrs = 0;
|
|
Packit |
577717 |
this_state->control.cpu_control.nrictrs = 0;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#ifdef DEBUG
|
|
Packit |
577717 |
if ( is_pentium4() )
|
|
Packit |
577717 |
print_control( &this_state->control.cpu_control );
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* This function clears the current contents of the control structure and
|
|
Packit |
577717 |
updates it with whatever resources are allocated for all the native events
|
|
Packit |
577717 |
in the native info structure array. */
|
|
Packit |
577717 |
static int
|
|
Packit |
577717 |
_x86_update_control_state( hwd_control_state_t * this_state,
|
|
Packit |
577717 |
NativeInfo_t * native, int count,
|
|
Packit |
577717 |
hwd_context_t * ctx )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
( void ) ctx; /*unused */
|
|
Packit |
577717 |
unsigned int i, k, retval = PAPI_OK;
|
|
Packit |
577717 |
hwd_register_t *bits,*bits2;
|
|
Packit |
577717 |
struct perfctr_cpu_control *cpu_control = &this_state->control.cpu_control;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* clear out the events from the control state */
|
|
Packit |
577717 |
clear_cs_events( this_state );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
/* fill the counters we're using */
|
|
Packit |
577717 |
for ( i = 0; i < ( unsigned int ) count; i++ ) {
|
|
Packit |
577717 |
/* dereference the mapping information about this native event */
|
|
Packit |
577717 |
bits = native[i].ni_bits;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Add counter control command values to eventset */
|
|
Packit |
577717 |
cpu_control->pmc_map[i] = bits->counter[0];
|
|
Packit |
577717 |
cpu_control->evntsel[i] = bits->cccr;
|
|
Packit |
577717 |
cpu_control->ireset[i] = bits->ireset;
|
|
Packit |
577717 |
cpu_control->pmc_map[i] |= FAST_RDPMC;
|
|
Packit |
577717 |
cpu_control->evntsel_aux[i] |= bits->event;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* pebs_enable and pebs_matrix_vert are shared registers used for replay_events.
|
|
Packit |
577717 |
Replay_events count L1 and L2 cache events. There is only one of each for
|
|
Packit |
577717 |
the entire eventset. Therefore, there can be only one unique replay_event
|
|
Packit |
577717 |
per eventset. This means L1 and L2 can't be counted together. Which stinks.
|
|
Packit |
577717 |
This conflict should be trapped in the allocation scheme, but we'll test for it
|
|
Packit |
577717 |
here too, just in case. */
|
|
Packit |
577717 |
if ( bits->pebs_enable ) {
|
|
Packit |
577717 |
/* if pebs_enable isn't set, just copy */
|
|
Packit |
577717 |
if ( cpu_control->p4.pebs_enable == 0 ) {
|
|
Packit |
577717 |
cpu_control->p4.pebs_enable = bits->pebs_enable;
|
|
Packit |
577717 |
/* if pebs_enable conflicts, flag an error */
|
|
Packit |
577717 |
} else if ( cpu_control->p4.pebs_enable != bits->pebs_enable ) {
|
|
Packit |
577717 |
SUBDBG
|
|
Packit |
577717 |
( "WARNING: P4_update_control_state -- pebs_enable conflict!" );
|
|
Packit |
577717 |
retval = PAPI_ECNFLCT;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
/* if pebs_enable == bits->pebs_enable, do nothing */
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
if ( bits->pebs_matrix_vert ) {
|
|
Packit |
577717 |
/* if pebs_matrix_vert isn't set, just copy */
|
|
Packit |
577717 |
if ( cpu_control->p4.pebs_matrix_vert == 0 ) {
|
|
Packit |
577717 |
cpu_control->p4.pebs_matrix_vert = bits->pebs_matrix_vert;
|
|
Packit |
577717 |
/* if pebs_matrix_vert conflicts, flag an error */
|
|
Packit |
577717 |
} else if ( cpu_control->p4.pebs_matrix_vert !=
|
|
Packit |
577717 |
bits->pebs_matrix_vert ) {
|
|
Packit |
577717 |
SUBDBG
|
|
Packit |
577717 |
( "WARNING: P4_update_control_state -- pebs_matrix_vert conflict!" );
|
|
Packit |
577717 |
retval = PAPI_ECNFLCT;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
/* if pebs_matrix_vert == bits->pebs_matrix_vert, do nothing */
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
this_state->control.cpu_control.nractrs = count;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Make sure the TSC is always on */
|
|
Packit |
577717 |
this_state->control.cpu_control.tsc_on = 1;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#ifdef DEBUG
|
|
Packit |
577717 |
print_control( &this_state->control.cpu_control );
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
switch ( _papi_hwi_system_info.hw_info.model ) {
|
|
Packit |
577717 |
#ifdef PERFCTR_X86_INTEL_CORE2
|
|
Packit |
577717 |
case PERFCTR_X86_INTEL_CORE2:
|
|
Packit |
577717 |
/* fill the counters we're using */
|
|
Packit |
577717 |
for ( i = 0; i < ( unsigned int ) count; i++ ) {
|
|
Packit |
577717 |
bits2 = native[i].ni_bits;
|
|
Packit |
577717 |
for ( k = 0; k < MAX_COUNTERS; k++ )
|
|
Packit |
577717 |
if ( bits2->selector & ( 1 << k ) ) {
|
|
Packit |
577717 |
break;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
if ( k > 1 )
|
|
Packit |
577717 |
this_state->control.cpu_control.pmc_map[i] =
|
|
Packit |
577717 |
( k - 2 ) | 0x40000000;
|
|
Packit |
577717 |
else
|
|
Packit |
577717 |
this_state->control.cpu_control.pmc_map[i] = k;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Add counter control command values to eventset */
|
|
Packit |
577717 |
this_state->control.cpu_control.evntsel[i] |=
|
|
Packit |
577717 |
bits2->counter_cmd;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
break;
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
default:
|
|
Packit |
577717 |
/* fill the counters we're using */
|
|
Packit |
577717 |
for ( i = 0; i < ( unsigned int ) count; i++ ) {
|
|
Packit |
577717 |
/* Add counter control command values to eventset */
|
|
Packit |
577717 |
bits2 = native[i].ni_bits;
|
|
Packit |
577717 |
this_state->control.cpu_control.evntsel[i] |=
|
|
Packit |
577717 |
bits2->counter_cmd;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
this_state->control.cpu_control.nractrs = ( unsigned int ) count;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
return retval;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
static int
|
|
Packit |
577717 |
_x86_start( hwd_context_t * ctx, hwd_control_state_t * state )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
int error;
|
|
Packit |
577717 |
#ifdef DEBUG
|
|
Packit |
577717 |
print_control( &state->control.cpu_control );
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( state->rvperfctr != NULL ) {
|
|
Packit |
577717 |
if ( ( error =
|
|
Packit |
577717 |
rvperfctr_control( state->rvperfctr, &state->control ) ) < 0 ) {
|
|
Packit |
577717 |
SUBDBG( "rvperfctr_control returns: %d\n", error );
|
|
Packit |
577717 |
PAPIERROR( RCNTRL_ERROR );
|
|
Packit |
577717 |
return ( PAPI_ESYS );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
return ( PAPI_OK );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( ( error = vperfctr_control( ctx->perfctr, &state->control ) ) < 0 ) {
|
|
Packit |
577717 |
SUBDBG( "vperfctr_control returns: %d\n", error );
|
|
Packit |
577717 |
PAPIERROR( VCNTRL_ERROR );
|
|
Packit |
577717 |
return ( PAPI_ESYS );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
return ( PAPI_OK );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
static int
|
|
Packit |
577717 |
_x86_stop( hwd_context_t * ctx, hwd_control_state_t * state )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
int error;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( state->rvperfctr != NULL ) {
|
|
Packit |
577717 |
if ( rvperfctr_stop( ( struct rvperfctr * ) ctx->perfctr ) < 0 ) {
|
|
Packit |
577717 |
PAPIERROR( RCNTRL_ERROR );
|
|
Packit |
577717 |
return ( PAPI_ESYS );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
return ( PAPI_OK );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
error = vperfctr_stop( ctx->perfctr );
|
|
Packit |
577717 |
if ( error < 0 ) {
|
|
Packit |
577717 |
SUBDBG( "vperfctr_stop returns: %d\n", error );
|
|
Packit |
577717 |
PAPIERROR( VCNTRL_ERROR );
|
|
Packit |
577717 |
return ( PAPI_ESYS );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
return ( PAPI_OK );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
static int
|
|
Packit |
577717 |
_x86_read( hwd_context_t * ctx, hwd_control_state_t * spc, long long **dp,
|
|
Packit |
577717 |
int flags )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
if ( flags & PAPI_PAUSED ) {
|
|
Packit |
577717 |
vperfctr_read_state( ctx->perfctr, &spc->state, NULL );
|
|
Packit |
577717 |
if ( !is_pentium4() ) {
|
|
Packit |
577717 |
unsigned int i = 0;
|
|
Packit |
577717 |
for ( i = 0;
|
|
Packit |
577717 |
i <
|
|
Packit |
577717 |
spc->control.cpu_control.nractrs +
|
|
Packit |
577717 |
spc->control.cpu_control.nrictrs; i++ ) {
|
|
Packit |
577717 |
SUBDBG( "vperfctr_read_state: counter %d = %lld\n", i,
|
|
Packit |
577717 |
spc->state.pmc[i] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
SUBDBG( "vperfctr_read_ctrs\n" );
|
|
Packit |
577717 |
if ( spc->rvperfctr != NULL ) {
|
|
Packit |
577717 |
rvperfctr_read_ctrs( spc->rvperfctr, &spc->state );
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
vperfctr_read_ctrs( ctx->perfctr, &spc->state );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
*dp = ( long long * ) spc->state.pmc;
|
|
Packit |
577717 |
#ifdef DEBUG
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
if ( ISLEVEL( DEBUG_SUBSTRATE ) ) {
|
|
Packit |
577717 |
unsigned int i;
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
for ( i = 0; i < spc->control.cpu_control.nractrs; i++ ) {
|
|
Packit |
577717 |
SUBDBG( "raw val hardware index %d is %lld\n", i,
|
|
Packit |
577717 |
( long long ) spc->state.pmc[i] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
for ( i = 0;
|
|
Packit |
577717 |
i <
|
|
Packit |
577717 |
spc->control.cpu_control.nractrs +
|
|
Packit |
577717 |
spc->control.cpu_control.nrictrs; i++ ) {
|
|
Packit |
577717 |
SUBDBG( "raw val hardware index %d is %lld\n", i,
|
|
Packit |
577717 |
( long long ) spc->state.pmc[i] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
return ( PAPI_OK );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
static int
|
|
Packit |
577717 |
_x86_reset( hwd_context_t * ctx, hwd_control_state_t * cntrl )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
return ( _x86_start( ctx, cntrl ) );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Perfctr requires that interrupting counters appear at the end of the pmc list
|
|
Packit |
577717 |
In the case a user wants to interrupt on a counter in an evntset that is not
|
|
Packit |
577717 |
among the last events, we need to move the perfctr virtual events around to
|
|
Packit |
577717 |
make it last. This function swaps two perfctr events, and then adjust the
|
|
Packit |
577717 |
position entries in both the NativeInfoArray and the EventInfoArray to keep
|
|
Packit |
577717 |
everything consistent. */
|
|
Packit |
577717 |
static void
|
|
Packit |
577717 |
swap_events( EventSetInfo_t * ESI, struct hwd_pmc_control *contr, int cntr1,
|
|
Packit |
577717 |
int cntr2 )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
unsigned int ui;
|
|
Packit |
577717 |
int si, i, j;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
for ( i = 0; i < ESI->NativeCount; i++ ) {
|
|
Packit |
577717 |
if ( ESI->NativeInfoArray[i].ni_position == cntr1 )
|
|
Packit |
577717 |
ESI->NativeInfoArray[i].ni_position = cntr2;
|
|
Packit |
577717 |
else if ( ESI->NativeInfoArray[i].ni_position == cntr2 )
|
|
Packit |
577717 |
ESI->NativeInfoArray[i].ni_position = cntr1;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
for ( i = 0; i < ESI->NumberOfEvents; i++ ) {
|
|
Packit |
577717 |
for ( j = 0; ESI->EventInfoArray[i].pos[j] >= 0; j++ ) {
|
|
Packit |
577717 |
if ( ESI->EventInfoArray[i].pos[j] == cntr1 )
|
|
Packit |
577717 |
ESI->EventInfoArray[i].pos[j] = cntr2;
|
|
Packit |
577717 |
else if ( ESI->EventInfoArray[i].pos[j] == cntr2 )
|
|
Packit |
577717 |
ESI->EventInfoArray[i].pos[j] = cntr1;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
ui = contr->cpu_control.pmc_map[cntr1];
|
|
Packit |
577717 |
contr->cpu_control.pmc_map[cntr1] = contr->cpu_control.pmc_map[cntr2];
|
|
Packit |
577717 |
contr->cpu_control.pmc_map[cntr2] = ui;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
ui = contr->cpu_control.evntsel[cntr1];
|
|
Packit |
577717 |
contr->cpu_control.evntsel[cntr1] = contr->cpu_control.evntsel[cntr2];
|
|
Packit |
577717 |
contr->cpu_control.evntsel[cntr2] = ui;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
ui = contr->cpu_control.evntsel_aux[cntr1];
|
|
Packit |
577717 |
contr->cpu_control.evntsel_aux[cntr1] =
|
|
Packit |
577717 |
contr->cpu_control.evntsel_aux[cntr2];
|
|
Packit |
577717 |
contr->cpu_control.evntsel_aux[cntr2] = ui;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
si = contr->cpu_control.ireset[cntr1];
|
|
Packit |
577717 |
contr->cpu_control.ireset[cntr1] = contr->cpu_control.ireset[cntr2];
|
|
Packit |
577717 |
contr->cpu_control.ireset[cntr2] = si;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
static int
|
|
Packit |
577717 |
_x86_set_overflow( EventSetInfo_t *ESI, int EventIndex, int threshold )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
hwd_control_state_t *ctl = ( hwd_control_state_t * ) ( ESI->ctl_state );
|
|
Packit |
577717 |
struct hwd_pmc_control *contr = &(ctl->control);
|
|
Packit |
577717 |
int i, ncntrs, nricntrs = 0, nracntrs = 0, retval = 0;
|
|
Packit |
577717 |
OVFDBG( "EventIndex=%d\n", EventIndex );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#ifdef DEBUG
|
|
Packit |
577717 |
if ( is_pentium4() )
|
|
Packit |
577717 |
print_control( &(contr->cpu_control) );
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* The correct event to overflow is EventIndex */
|
|
Packit |
577717 |
ncntrs = _perfctr_vector.cmp_info.num_cntrs;
|
|
Packit |
577717 |
i = ESI->EventInfoArray[EventIndex].pos[0];
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( i >= ncntrs ) {
|
|
Packit |
577717 |
PAPIERROR( "Selector id %d is larger than ncntrs %d", i, ncntrs );
|
|
Packit |
577717 |
return PAPI_EINVAL;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( threshold != 0 ) { /* Set an overflow threshold */
|
|
Packit |
577717 |
retval = _papi_hwi_start_signal( _perfctr_vector.cmp_info.hardware_intr_sig,
|
|
Packit |
577717 |
NEED_CONTEXT,
|
|
Packit |
577717 |
_perfctr_vector.cmp_info.CmpIdx );
|
|
Packit |
577717 |
if ( retval != PAPI_OK )
|
|
Packit |
577717 |
return ( retval );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* overflow interrupt occurs on the NEXT event after overflow occurs
|
|
Packit |
577717 |
thus we subtract 1 from the threshold. */
|
|
Packit |
577717 |
contr->cpu_control.ireset[i] = ( -threshold + 1 );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( is_pentium4() )
|
|
Packit |
577717 |
contr->cpu_control.evntsel[i] |= CCCR_OVF_PMI_T0;
|
|
Packit |
577717 |
else
|
|
Packit |
577717 |
contr->cpu_control.evntsel[i] |= PERF_INT_ENABLE;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
contr->cpu_control.nrictrs++;
|
|
Packit |
577717 |
contr->cpu_control.nractrs--;
|
|
Packit |
577717 |
nricntrs = ( int ) contr->cpu_control.nrictrs;
|
|
Packit |
577717 |
nracntrs = ( int ) contr->cpu_control.nractrs;
|
|
Packit |
577717 |
contr->si_signo = _perfctr_vector.cmp_info.hardware_intr_sig;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* move this event to the bottom part of the list if needed */
|
|
Packit |
577717 |
if ( i < nracntrs )
|
|
Packit |
577717 |
swap_events( ESI, contr, i, nracntrs );
|
|
Packit |
577717 |
OVFDBG( "Modified event set\n" );
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
if ( is_pentium4() && contr->cpu_control.evntsel[i] & CCCR_OVF_PMI_T0 ) {
|
|
Packit |
577717 |
contr->cpu_control.ireset[i] = 0;
|
|
Packit |
577717 |
contr->cpu_control.evntsel[i] &= ( ~CCCR_OVF_PMI_T0 );
|
|
Packit |
577717 |
contr->cpu_control.nrictrs--;
|
|
Packit |
577717 |
contr->cpu_control.nractrs++;
|
|
Packit |
577717 |
} else if ( !is_pentium4() &&
|
|
Packit |
577717 |
contr->cpu_control.evntsel[i] & PERF_INT_ENABLE ) {
|
|
Packit |
577717 |
contr->cpu_control.ireset[i] = 0;
|
|
Packit |
577717 |
contr->cpu_control.evntsel[i] &= ( ~PERF_INT_ENABLE );
|
|
Packit |
577717 |
contr->cpu_control.nrictrs--;
|
|
Packit |
577717 |
contr->cpu_control.nractrs++;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
nricntrs = ( int ) contr->cpu_control.nrictrs;
|
|
Packit |
577717 |
nracntrs = ( int ) contr->cpu_control.nractrs;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* move this event to the top part of the list if needed */
|
|
Packit |
577717 |
if ( i >= nracntrs )
|
|
Packit |
577717 |
swap_events( ESI, contr, i, nracntrs - 1 );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( !nricntrs )
|
|
Packit |
577717 |
contr->si_signo = 0;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
OVFDBG( "Modified event set\n" );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
retval = _papi_hwi_stop_signal( _perfctr_vector.cmp_info.hardware_intr_sig );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#ifdef DEBUG
|
|
Packit |
577717 |
if ( is_pentium4() )
|
|
Packit |
577717 |
print_control( &(contr->cpu_control) );
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
OVFDBG( "End of call. Exit code: %d\n", retval );
|
|
Packit |
577717 |
return ( retval );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
static int
|
|
Packit |
577717 |
_x86_stop_profiling( ThreadInfo_t * master, EventSetInfo_t * ESI )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
( void ) master; /*unused */
|
|
Packit |
577717 |
( void ) ESI; /*unused */
|
|
Packit |
577717 |
return ( PAPI_OK );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* these define cccr and escr register bits, and the p4 event structure */
|
|
Packit |
577717 |
#include "perfmon/pfmlib_pentium4.h"
|
|
Packit |
577717 |
#include "../lib/pfmlib_pentium4_priv.h"
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#define P4_REPLAY_REAL_MASK 0x00000003
|
|
Packit |
577717 |
|
|
Packit |
577717 |
extern pentium4_escr_reg_t pentium4_escrs[];
|
|
Packit |
577717 |
extern pentium4_cccr_reg_t pentium4_cccrs[];
|
|
Packit |
577717 |
extern pentium4_event_t pentium4_events[];
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
static pentium4_replay_regs_t p4_replay_regs[] = {
|
|
Packit |
577717 |
/* 0 */ {.enb = 0,
|
|
Packit |
577717 |
/* dummy */
|
|
Packit |
577717 |
.mat_vert = 0,
|
|
Packit |
577717 |
},
|
|
Packit |
577717 |
/* 1 */ {.enb = 0,
|
|
Packit |
577717 |
/* dummy */
|
|
Packit |
577717 |
.mat_vert = 0,
|
|
Packit |
577717 |
},
|
|
Packit |
577717 |
/* 2 */ {.enb = 0x01000001,
|
|
Packit |
577717 |
/* 1stL_cache_load_miss_retired */
|
|
Packit |
577717 |
.mat_vert = 0x00000001,
|
|
Packit |
577717 |
},
|
|
Packit |
577717 |
/* 3 */ {.enb = 0x01000002,
|
|
Packit |
577717 |
/* 2ndL_cache_load_miss_retired */
|
|
Packit |
577717 |
.mat_vert = 0x00000001,
|
|
Packit |
577717 |
},
|
|
Packit |
577717 |
/* 4 */ {.enb = 0x01000004,
|
|
Packit |
577717 |
/* DTLB_load_miss_retired */
|
|
Packit |
577717 |
.mat_vert = 0x00000001,
|
|
Packit |
577717 |
},
|
|
Packit |
577717 |
/* 5 */ {.enb = 0x01000004,
|
|
Packit |
577717 |
/* DTLB_store_miss_retired */
|
|
Packit |
577717 |
.mat_vert = 0x00000002,
|
|
Packit |
577717 |
},
|
|
Packit |
577717 |
/* 6 */ {.enb = 0x01000004,
|
|
Packit |
577717 |
/* DTLB_all_miss_retired */
|
|
Packit |
577717 |
.mat_vert = 0x00000003,
|
|
Packit |
577717 |
},
|
|
Packit |
577717 |
/* 7 */ {.enb = 0x01018001,
|
|
Packit |
577717 |
/* Tagged_mispred_branch */
|
|
Packit |
577717 |
.mat_vert = 0x00000010,
|
|
Packit |
577717 |
},
|
|
Packit |
577717 |
/* 8 */ {.enb = 0x01000200,
|
|
Packit |
577717 |
/* MOB_load_replay_retired */
|
|
Packit |
577717 |
.mat_vert = 0x00000001,
|
|
Packit |
577717 |
},
|
|
Packit |
577717 |
/* 9 */ {.enb = 0x01000400,
|
|
Packit |
577717 |
/* split_load_retired */
|
|
Packit |
577717 |
.mat_vert = 0x00000001,
|
|
Packit |
577717 |
},
|
|
Packit |
577717 |
/* 10 */ {.enb = 0x01000400,
|
|
Packit |
577717 |
/* split_store_retired */
|
|
Packit |
577717 |
.mat_vert = 0x00000002,
|
|
Packit |
577717 |
},
|
|
Packit |
577717 |
};
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* this maps the arbitrary pmd index in libpfm/pentium4_events.h to the intel documentation */
|
|
Packit |
577717 |
static int pfm2intel[] =
|
|
Packit |
577717 |
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 2, 3, 6, 7, 10, 11, 14, 15, 17 };
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* This call is broken. Selector can be much bigger than 32 bits. It should be a pfmlib_regmask_t - pjm */
|
|
Packit |
577717 |
/* Also, libpfm assumes events can live on different counters with different codes. This call only returns
|
|
Packit |
577717 |
the first occurence found. */
|
|
Packit |
577717 |
/* Right now its only called by ntv_code_to_bits in perfctr-p3, so we're ok. But for it to be
|
|
Packit |
577717 |
generally useful it should be fixed. - dkt */
|
|
Packit |
577717 |
static int
|
|
Packit |
577717 |
_pfm_get_counter_info( unsigned int event, unsigned int *selector, int *code )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
pfmlib_regmask_t cnt, impl;
|
|
Packit |
577717 |
unsigned int num;
|
|
Packit |
577717 |
unsigned int i, first = 1;
|
|
Packit |
577717 |
int ret;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( ( ret = pfm_get_event_counters( event, &cnt ) ) != PFMLIB_SUCCESS ) {
|
|
Packit |
577717 |
PAPIERROR( "pfm_get_event_counters(%d,%p): %s", event, &cnt,
|
|
Packit |
577717 |
pfm_strerror( ret ) );
|
|
Packit |
577717 |
return PAPI_ESYS;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
if ( ( ret = pfm_get_num_counters( &num ) ) != PFMLIB_SUCCESS ) {
|
|
Packit |
577717 |
PAPIERROR( "pfm_get_num_counters(%p): %s", num, pfm_strerror( ret ) );
|
|
Packit |
577717 |
return PAPI_ESYS;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
if ( ( ret = pfm_get_impl_counters( &impl ) ) != PFMLIB_SUCCESS ) {
|
|
Packit |
577717 |
PAPIERROR( "pfm_get_impl_counters(%p): %s", &impl,
|
|
Packit |
577717 |
pfm_strerror( ret ) );
|
|
Packit |
577717 |
return PAPI_ESYS;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
*selector = 0;
|
|
Packit |
577717 |
for ( i = 0; num; i++ ) {
|
|
Packit |
577717 |
if ( pfm_regmask_isset( &impl, i ) )
|
|
Packit |
577717 |
num--;
|
|
Packit |
577717 |
if ( pfm_regmask_isset( &cnt, i ) ) {
|
|
Packit |
577717 |
if ( first ) {
|
|
Packit |
577717 |
if ( ( ret =
|
|
Packit |
577717 |
pfm_get_event_code_counter( event, i,
|
|
Packit |
577717 |
code ) ) !=
|
|
Packit |
577717 |
PFMLIB_SUCCESS ) {
|
|
Packit |
577717 |
PAPIERROR( "pfm_get_event_code_counter(%d, %d, %p): %s",
|
|
Packit |
577717 |
event, i, code, pfm_strerror( ret ) );
|
|
Packit |
577717 |
return PAPI_ESYS;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
first = 0;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
*selector |= 1 << i;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
return PAPI_OK;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
int
|
|
Packit |
577717 |
_papi_libpfm_ntv_code_to_bits_perfctr( unsigned int EventCode,
|
|
Packit |
577717 |
hwd_register_t *newbits )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
unsigned int event, umask;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
X86_register_t *bits = (X86_register_t *)newbits;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( is_pentium4() ) {
|
|
Packit |
577717 |
pentium4_escr_value_t escr_value;
|
|
Packit |
577717 |
pentium4_cccr_value_t cccr_value;
|
|
Packit |
577717 |
unsigned int num_masks, replay_mask, unit_masks[12];
|
|
Packit |
577717 |
unsigned int event_mask;
|
|
Packit |
577717 |
unsigned int tag_value, tag_enable;
|
|
Packit |
577717 |
unsigned int i;
|
|
Packit |
577717 |
int j, escr, cccr, pmd;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK )
|
|
Packit |
577717 |
return PAPI_ENOEVNT;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* for each allowed escr (1 or 2) find the allowed cccrs.
|
|
Packit |
577717 |
for each allowed cccr find the pmd index
|
|
Packit |
577717 |
convert to an intel counter number; or it into bits->counter */
|
|
Packit |
577717 |
for ( i = 0; i < MAX_ESCRS_PER_EVENT; i++ ) {
|
|
Packit |
577717 |
bits->counter[i] = 0;
|
|
Packit |
577717 |
escr = pentium4_events[event].allowed_escrs[i];
|
|
Packit |
577717 |
if ( escr < 0 ) {
|
|
Packit |
577717 |
continue;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
bits->escr[i] = escr;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
for ( j = 0; j < MAX_CCCRS_PER_ESCR; j++ ) {
|
|
Packit |
577717 |
cccr = pentium4_escrs[escr].allowed_cccrs[j];
|
|
Packit |
577717 |
if ( cccr < 0 ) {
|
|
Packit |
577717 |
continue;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
pmd = pentium4_cccrs[cccr].pmd;
|
|
Packit |
577717 |
bits->counter[i] |= ( 1 << pfm2intel[pmd] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* if there's only one valid escr, copy the values */
|
|
Packit |
577717 |
if ( escr < 0 ) {
|
|
Packit |
577717 |
bits->escr[1] = bits->escr[0];
|
|
Packit |
577717 |
bits->counter[1] = bits->counter[0];
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Calculate the event-mask value. Invalid masks
|
|
Packit |
577717 |
* specified by the caller are ignored. */
|
|
Packit |
577717 |
tag_value = 0;
|
|
Packit |
577717 |
tag_enable = 0;
|
|
Packit |
577717 |
event_mask = _pfm_convert_umask( event, umask );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( event_mask & 0xF0000 ) {
|
|
Packit |
577717 |
tag_enable = 1;
|
|
Packit |
577717 |
tag_value = ( ( event_mask & 0xF0000 ) >> EVENT_MASK_BITS );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
event_mask &= 0x0FFFF; /* mask off possible tag bits */
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* Set up the ESCR and CCCR register values. */
|
|
Packit |
577717 |
escr_value.val = 0;
|
|
Packit |
577717 |
escr_value.bits.t1_usr = 0; /* controlled by kernel */
|
|
Packit |
577717 |
escr_value.bits.t1_os = 0; /* controlled by kernel */
|
|
Packit |
577717 |
// escr_value.bits.t0_usr = (plm & PFM_PLM3) ? 1 : 0;
|
|
Packit |
577717 |
// escr_value.bits.t0_os = (plm & PFM_PLM0) ? 1 : 0;
|
|
Packit |
577717 |
escr_value.bits.tag_enable = tag_enable;
|
|
Packit |
577717 |
escr_value.bits.tag_value = tag_value;
|
|
Packit |
577717 |
escr_value.bits.event_mask = event_mask;
|
|
Packit |
577717 |
escr_value.bits.event_select = pentium4_events[event].event_select;
|
|
Packit |
577717 |
escr_value.bits.reserved = 0;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* initialize the proper bits in the cccr register */
|
|
Packit |
577717 |
cccr_value.val = 0;
|
|
Packit |
577717 |
cccr_value.bits.reserved1 = 0;
|
|
Packit |
577717 |
cccr_value.bits.enable = 1;
|
|
Packit |
577717 |
cccr_value.bits.escr_select = pentium4_events[event].escr_select;
|
|
Packit |
577717 |
cccr_value.bits.active_thread = 3;
|
|
Packit |
577717 |
/* FIXME: This is set to count when either logical
|
|
Packit |
577717 |
* CPU is active. Need a way to distinguish
|
|
Packit |
577717 |
* between logical CPUs when HT is enabled.
|
|
Packit |
577717 |
* the docs say these bits should always
|
|
Packit |
577717 |
* be set. */
|
|
Packit |
577717 |
cccr_value.bits.compare = 0;
|
|
Packit |
577717 |
/* FIXME: What do we do with "threshold" settings? */
|
|
Packit |
577717 |
cccr_value.bits.complement = 0;
|
|
Packit |
577717 |
/* FIXME: What do we do with "threshold" settings? */
|
|
Packit |
577717 |
cccr_value.bits.threshold = 0;
|
|
Packit |
577717 |
/* FIXME: What do we do with "threshold" settings? */
|
|
Packit |
577717 |
cccr_value.bits.force_ovf = 0;
|
|
Packit |
577717 |
/* FIXME: Do we want to allow "forcing" overflow
|
|
Packit |
577717 |
* interrupts on all counter increments? */
|
|
Packit |
577717 |
cccr_value.bits.ovf_pmi_t0 = 0;
|
|
Packit |
577717 |
cccr_value.bits.ovf_pmi_t1 = 0;
|
|
Packit |
577717 |
/* PMI taken care of by kernel typically */
|
|
Packit |
577717 |
cccr_value.bits.reserved2 = 0;
|
|
Packit |
577717 |
cccr_value.bits.cascade = 0;
|
|
Packit |
577717 |
/* FIXME: How do we handle "cascading" counters? */
|
|
Packit |
577717 |
cccr_value.bits.overflow = 0;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* these flags are always zero, from what I can tell... */
|
|
Packit |
577717 |
bits->pebs_enable = 0; /* flag for PEBS counting */
|
|
Packit |
577717 |
bits->pebs_matrix_vert = 0;
|
|
Packit |
577717 |
/* flag for PEBS_MATRIX_VERT, whatever that is */
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* ...unless the event is replay_event */
|
|
Packit |
577717 |
if ( !strcmp( pentium4_events[event].name, "replay_event" ) ) {
|
|
Packit |
577717 |
escr_value.bits.event_mask = event_mask & P4_REPLAY_REAL_MASK;
|
|
Packit |
577717 |
num_masks = prepare_umask( umask, unit_masks );
|
|
Packit |
577717 |
for ( i = 0; i < num_masks; i++ ) {
|
|
Packit |
577717 |
replay_mask = unit_masks[i];
|
|
Packit |
577717 |
if ( replay_mask > 1 && replay_mask < 11 ) {
|
|
Packit |
577717 |
/* process each valid mask we find */
|
|
Packit |
577717 |
bits->pebs_enable |= p4_replay_regs[replay_mask].enb;
|
|
Packit |
577717 |
bits->pebs_matrix_vert |= p4_replay_regs[replay_mask].mat_vert;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* store the escr and cccr values */
|
|
Packit |
577717 |
bits->event = escr_value.val;
|
|
Packit |
577717 |
bits->cccr = cccr_value.val;
|
|
Packit |
577717 |
bits->ireset = 0; /* I don't really know what this does */
|
|
Packit |
577717 |
SUBDBG( "escr: 0x%lx; cccr: 0x%lx\n", escr_value.val, cccr_value.val );
|
|
Packit |
577717 |
} else {
|
|
Packit |
577717 |
|
|
Packit |
577717 |
int ret, code;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK )
|
|
Packit |
577717 |
return PAPI_ENOEVNT;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( ( ret = _pfm_get_counter_info( event, &bits->selector,
|
|
Packit |
577717 |
&code ) ) != PAPI_OK )
|
|
Packit |
577717 |
return ret;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
bits->counter_cmd=(int) (code | ((_pfm_convert_umask(event,umask))<< 8) );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
SUBDBG( "selector: %#x\n", bits->selector );
|
|
Packit |
577717 |
SUBDBG( "event: %#x; umask: %#x; code: %#x; cmd: %#x\n", event,
|
|
Packit |
577717 |
umask, code, ( ( hwd_register_t * ) bits )->counter_cmd );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
return PAPI_OK;
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
papi_vector_t _perfctr_vector = {
|
|
Packit |
577717 |
.cmp_info = {
|
|
Packit |
577717 |
/* default component information (unspecified values are initialized to 0) */
|
|
Packit |
577717 |
.name = "perfctr",
|
|
Packit |
577717 |
.description = "Linux perfctr CPU counters",
|
|
Packit |
577717 |
.default_domain = PAPI_DOM_USER,
|
|
Packit |
577717 |
.available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL,
|
|
Packit |
577717 |
.default_granularity = PAPI_GRN_THR,
|
|
Packit |
577717 |
.available_granularities = PAPI_GRN_THR,
|
|
Packit |
577717 |
.hardware_intr_sig = PAPI_INT_SIGNAL,
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* component specific cmp_info initializations */
|
|
Packit |
577717 |
.fast_real_timer = 1,
|
|
Packit |
577717 |
.fast_virtual_timer = 1,
|
|
Packit |
577717 |
.attach = 1,
|
|
Packit |
577717 |
.attach_must_ptrace = 1,
|
|
Packit |
577717 |
.cntr_umasks = 1,
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
,
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* sizes of framework-opaque component-private structures */
|
|
Packit |
577717 |
.size = {
|
|
Packit |
577717 |
.context = sizeof ( X86_perfctr_context_t ),
|
|
Packit |
577717 |
.control_state = sizeof ( X86_perfctr_control_t ),
|
|
Packit |
577717 |
.reg_value = sizeof ( X86_register_t ),
|
|
Packit |
577717 |
.reg_alloc = sizeof ( X86_reg_alloc_t ),
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
,
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* function pointers in this component */
|
|
Packit |
577717 |
.init_control_state = _x86_init_control_state,
|
|
Packit |
577717 |
.start = _x86_start,
|
|
Packit |
577717 |
.stop = _x86_stop,
|
|
Packit |
577717 |
.read = _x86_read,
|
|
Packit |
577717 |
.allocate_registers = _x86_allocate_registers,
|
|
Packit |
577717 |
.update_control_state = _x86_update_control_state,
|
|
Packit |
577717 |
.set_domain = _x86_set_domain,
|
|
Packit |
577717 |
.reset = _x86_reset,
|
|
Packit |
577717 |
.set_overflow = _x86_set_overflow,
|
|
Packit |
577717 |
.stop_profiling = _x86_stop_profiling,
|
|
Packit |
577717 |
|
|
Packit |
577717 |
.init_component = _perfctr_init_component,
|
|
Packit |
577717 |
.ctl = _perfctr_ctl,
|
|
Packit |
577717 |
.dispatch_timer = _perfctr_dispatch_timer,
|
|
Packit |
577717 |
.init_thread = _perfctr_init_thread,
|
|
Packit |
577717 |
.shutdown_thread = _perfctr_shutdown_thread,
|
|
Packit |
577717 |
|
|
Packit |
577717 |
/* from libpfm */
|
|
Packit |
577717 |
.ntv_enum_events = _papi_libpfm_ntv_enum_events,
|
|
Packit |
577717 |
.ntv_name_to_code = _papi_libpfm_ntv_name_to_code,
|
|
Packit |
577717 |
.ntv_code_to_name = _papi_libpfm_ntv_code_to_name,
|
|
Packit |
577717 |
.ntv_code_to_descr = _papi_libpfm_ntv_code_to_descr,
|
|
Packit |
577717 |
.ntv_code_to_bits = _papi_libpfm_ntv_code_to_bits_perfctr,
|
|
Packit |
577717 |
|
|
Packit |
577717 |
};
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|