/* * File: perfctr.c * Author: Philip Mucci * mucci at cs.utk.edu * Mods: Kevin London * london at cs.utk.edu * Mods: Maynard Johnson * maynardj at us.ibm.com * Mods: Brian Sheely * bsheely at eecs.utk.edu */ #include #include #include #include #include "papi.h" #include "papi_internal.h" #ifdef PPC64 #include "perfctr-ppc64.h" #else #include "perfctr-x86.h" #include "papi_libpfm_events.h" #endif #include "papi_vector.h" #include "papi_memory.h" #include "extras.h" #include "linux-common.h" #include "linux-context.h" extern papi_vector_t _perfctr_vector; #ifdef PPC64 extern int setup_ppc64_presets( int cputype, int cidx ); #endif /* This should be in a linux.h header file maybe. */ #define FOPEN_ERROR "fopen(%s) returned NULL" #if defined(PERFCTR26) #define PERFCTR_CPU_NAME(pi) perfctr_info_cpu_name(pi) #define PERFCTR_CPU_NRCTRS(pi) perfctr_info_nrctrs(pi) #else #define PERFCTR_CPU_NAME perfctr_cpu_name #define PERFCTR_CPU_NRCTRS perfctr_cpu_nrctrs #endif #if !defined(PPC64) static inline int xlate_cpu_type_to_vendor( unsigned perfctr_cpu_type ) { switch ( perfctr_cpu_type ) { case PERFCTR_X86_INTEL_P5: case PERFCTR_X86_INTEL_P5MMX: case PERFCTR_X86_INTEL_P6: case PERFCTR_X86_INTEL_PII: case PERFCTR_X86_INTEL_PIII: case PERFCTR_X86_INTEL_P4: case PERFCTR_X86_INTEL_P4M2: #ifdef PERFCTR_X86_INTEL_P4M3 case PERFCTR_X86_INTEL_P4M3: #endif #ifdef PERFCTR_X86_INTEL_PENTM case PERFCTR_X86_INTEL_PENTM: #endif #ifdef PERFCTR_X86_INTEL_CORE case PERFCTR_X86_INTEL_CORE: #endif #ifdef PERFCTR_X86_INTEL_CORE2 case PERFCTR_X86_INTEL_CORE2: #endif #ifdef PERFCTR_X86_INTEL_ATOM /* family 6 model 28 */ case PERFCTR_X86_INTEL_ATOM: #endif #ifdef PERFCTR_X86_INTEL_NHLM /* family 6 model 26 */ case PERFCTR_X86_INTEL_NHLM: #endif #ifdef PERFCTR_X86_INTEL_WSTMR case PERFCTR_X86_INTEL_WSTMR: #endif return ( PAPI_VENDOR_INTEL ); #ifdef PERFCTR_X86_AMD_K8 case PERFCTR_X86_AMD_K8: #endif #ifdef PERFCTR_X86_AMD_K8C case PERFCTR_X86_AMD_K8C: #endif #ifdef PERFCTR_X86_AMD_FAM10 /* this is defined in perfctr 2.6.29 */ case PERFCTR_X86_AMD_FAM10: #endif case PERFCTR_X86_AMD_K7: return ( PAPI_VENDOR_AMD ); default: return ( PAPI_VENDOR_UNKNOWN ); } } #endif long long tb_scale_factor = ( long long ) 1; /* needed to scale get_cycles on PPC series */ int _perfctr_init_component( int cidx ) { int retval; struct perfctr_info info; char abiv[PAPI_MIN_STR_LEN]; #if defined(PERFCTR26) int fd; #else struct vperfctr *dev; #endif #if defined(PERFCTR26) /* Get info from the kernel */ /* Use lower level calls per Mikael to get the perfctr info without actually creating a new kernel-side state. Also, close the fd immediately after retrieving the info. This is much lighter weight and doesn't reserve the counter resources. Also compatible with perfctr 2.6.14. */ fd = _vperfctr_open( 0 ); if ( fd < 0 ) { strncpy(_perfctr_vector.cmp_info.disabled_reason, VOPEN_ERROR,PAPI_MAX_STR_LEN); return PAPI_ESYS; } retval = perfctr_info( fd, &info ); close( fd ); if ( retval < 0 ) { strncpy(_perfctr_vector.cmp_info.disabled_reason, VINFO_ERROR,PAPI_MAX_STR_LEN); return PAPI_ESYS; } /* copy tsc multiplier to local variable */ /* this field appears in perfctr 2.6 and higher */ tb_scale_factor = ( long long ) info.tsc_to_cpu_mult; #else /* Opened once for all threads. */ if ( ( dev = vperfctr_open( ) ) == NULL ) { strncpy(_perfctr_vector.cmp_info.disabled_reason, VOPEN_ERROR,PAPI_MAX_STR_LEN); return PAPI_ESYS; } SUBDBG( "_perfctr_init_component vperfctr_open = %p\n", dev ); /* Get info from the kernel */ retval = vperfctr_info( dev, &info ); if ( retval < 0 ) { strncpy(_perfctr_vector.cmp_info.disabled_reason, VINFO_ERROR,PAPI_MAX_STR_LEN); return ( PAPI_ESYS ); } vperfctr_close( dev ); #endif /* Fill in what we can of the papi_system_info. */ retval = _papi_os_vector.get_system_info( &_papi_hwi_system_info ); if ( retval != PAPI_OK ) return ( retval ); /* Setup memory info */ retval = _papi_os_vector.get_memory_info( &_papi_hwi_system_info.hw_info, ( int ) info.cpu_type ); if ( retval ) return ( retval ); strcpy( _perfctr_vector.cmp_info.name,"perfctr.c" ); strcpy( _perfctr_vector.cmp_info.version, "$Revision$" ); sprintf( abiv, "0x%08X", info.abi_version ); strcpy( _perfctr_vector.cmp_info.support_version, abiv ); strcpy( _perfctr_vector.cmp_info.kernel_version, info.driver_version ); _perfctr_vector.cmp_info.CmpIdx = cidx; _perfctr_vector.cmp_info.num_cntrs = ( int ) PERFCTR_CPU_NRCTRS( &info ); _perfctr_vector.cmp_info.num_mpx_cntrs=_perfctr_vector.cmp_info.num_cntrs; if ( info.cpu_features & PERFCTR_FEATURE_RDPMC ) _perfctr_vector.cmp_info.fast_counter_read = 1; else _perfctr_vector.cmp_info.fast_counter_read = 0; _perfctr_vector.cmp_info.fast_real_timer = 1; _perfctr_vector.cmp_info.fast_virtual_timer = 1; _perfctr_vector.cmp_info.attach = 1; _perfctr_vector.cmp_info.attach_must_ptrace = 1; _perfctr_vector.cmp_info.default_domain = PAPI_DOM_USER; #if !defined(PPC64) /* AMD and Intel ia386 processors all support unit mask bits */ _perfctr_vector.cmp_info.cntr_umasks = 1; #endif #if defined(PPC64) _perfctr_vector.cmp_info.available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR; #else _perfctr_vector.cmp_info.available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL; #endif _perfctr_vector.cmp_info.default_granularity = PAPI_GRN_THR; _perfctr_vector.cmp_info.available_granularities = PAPI_GRN_THR; if ( info.cpu_features & PERFCTR_FEATURE_PCINT ) _perfctr_vector.cmp_info.hardware_intr = 1; else _perfctr_vector.cmp_info.hardware_intr = 0; SUBDBG( "Hardware/OS %s support counter generated interrupts\n", _perfctr_vector.cmp_info.hardware_intr ? "does" : "does not" ); strcpy( _papi_hwi_system_info.hw_info.model_string, PERFCTR_CPU_NAME( &info ) ); _papi_hwi_system_info.hw_info.model = ( int ) info.cpu_type; #if defined(PPC64) _papi_hwi_system_info.hw_info.vendor = PAPI_VENDOR_IBM; if ( strlen( _papi_hwi_system_info.hw_info.vendor_string ) == 0 ) strcpy( _papi_hwi_system_info.hw_info.vendor_string, "IBM" ); #else _papi_hwi_system_info.hw_info.vendor = xlate_cpu_type_to_vendor( info.cpu_type ); #endif /* Setup presets last. Some platforms depend on earlier info */ #if !defined(PPC64) // retval = setup_p3_vector_table(vtable); if ( !retval ) retval = _papi_libpfm_init(&_perfctr_vector, cidx ); #else /* Setup native and preset events */ // retval = ppc64_setup_vector_table(vtable); if ( !retval ) retval = perfctr_ppc64_setup_native_table( ); if ( !retval ) retval = setup_ppc64_presets( info.cpu_type, cidx ); #endif if ( retval ) return ( retval ); return ( PAPI_OK ); } static int attach( hwd_control_state_t * ctl, unsigned long tid ) { struct vperfctr_control tmp; #ifdef VPERFCTR_CONTROL_CLOEXEC tmp.flags = VPERFCTR_CONTROL_CLOEXEC; #endif ctl->rvperfctr = rvperfctr_open( ( int ) tid ); if ( ctl->rvperfctr == NULL ) { PAPIERROR( VOPEN_ERROR ); return ( PAPI_ESYS ); } SUBDBG( "_papi_hwd_ctl rvperfctr_open() = %p\n", ctl->rvperfctr ); /* Initialize the per thread/process virtualized TSC */ memset( &tmp, 0x0, sizeof ( tmp ) ); tmp.cpu_control.tsc_on = 1; /* Start the per thread/process virtualized TSC */ if ( rvperfctr_control( ctl->rvperfctr, &tmp ) < 0 ) { PAPIERROR( RCNTRL_ERROR ); return ( PAPI_ESYS ); } return ( PAPI_OK ); } /* end attach() */ static int detach( hwd_control_state_t * ctl ) { rvperfctr_close( ctl->rvperfctr ); return ( PAPI_OK ); } /* end detach() */ static inline int round_requested_ns( int ns ) { if ( ns < _papi_os_info.itimer_res_ns ) { return _papi_os_info.itimer_res_ns; } else { int leftover_ns = ns % _papi_os_info.itimer_res_ns; return ns + leftover_ns; } } int _perfctr_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) { ( void ) ctx; /*unused */ switch ( code ) { case PAPI_DOMAIN: case PAPI_DEFDOM: #if defined(PPC64) return ( _perfctr_vector. set_domain( option->domain.ESI, option->domain.domain ) ); #else return ( _perfctr_vector. set_domain( option->domain.ESI->ctl_state, option->domain.domain ) ); #endif case PAPI_GRANUL: case PAPI_DEFGRN: return PAPI_ECMP; case PAPI_ATTACH: return ( attach( option->attach.ESI->ctl_state, option->attach.tid ) ); case PAPI_DETACH: return ( detach( option->attach.ESI->ctl_state ) ); case PAPI_DEF_ITIMER: { /* flags are currently ignored, eventually the flags will be able to specify whether or not we use POSIX itimers (clock_gettimer) */ if ( ( option->itimer.itimer_num == ITIMER_REAL ) && ( option->itimer.itimer_sig != SIGALRM ) ) return PAPI_EINVAL; if ( ( option->itimer.itimer_num == ITIMER_VIRTUAL ) && ( option->itimer.itimer_sig != SIGVTALRM ) ) return PAPI_EINVAL; if ( ( option->itimer.itimer_num == ITIMER_PROF ) && ( option->itimer.itimer_sig != SIGPROF ) ) return PAPI_EINVAL; if ( option->itimer.ns > 0 ) option->itimer.ns = round_requested_ns( option->itimer.ns ); /* At this point, we assume the user knows what he or she is doing, they maybe doing something arch specific */ return PAPI_OK; } case PAPI_DEF_MPX_NS: { option->multiplex.ns = ( unsigned long ) round_requested_ns( ( int ) option->multiplex. ns ); return ( PAPI_OK ); } case PAPI_DEF_ITIMER_NS: { option->itimer.ns = round_requested_ns( option->itimer.ns ); return ( PAPI_OK ); } default: return ( PAPI_ENOSUPP ); } } void _perfctr_dispatch_timer( int signal, siginfo_t * si, void *context ) { ( void ) signal; /*unused */ _papi_hwi_context_t ctx; ThreadInfo_t *master = NULL; int isHardware = 0; caddr_t address; int cidx = _perfctr_vector.cmp_info.CmpIdx; hwd_context_t *our_context; ctx.si = si; ctx.ucontext = ( ucontext_t * ) context; #define OVERFLOW_MASK si->si_pmc_ovf_mask #define GEN_OVERFLOW 0 address = ( caddr_t ) GET_OVERFLOW_ADDRESS( ( ctx ) ); _papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, address, &isHardware, OVERFLOW_MASK, GEN_OVERFLOW, &master, _perfctr_vector.cmp_info.CmpIdx ); /* We are done, resume interrupting counters */ if ( isHardware ) { our_context=(hwd_context_t *) master->context[cidx]; errno = vperfctr_iresume( our_context->perfctr ); if ( errno < 0 ) { PAPIERROR( "vperfctr_iresume errno %d", errno ); } } } int _perfctr_init_thread( hwd_context_t * ctx ) { struct vperfctr_control tmp; int error; /* Initialize our thread/process pointer. */ if ( ( ctx->perfctr = vperfctr_open( ) ) == NULL ) { #ifdef VPERFCTR_OPEN_CREAT_EXCL /* New versions of perfctr have this, which allows us to get a previously created context, i.e. one created after a fork and now we're inside a new process that has been exec'd */ if ( errno ) { if ( ( ctx->perfctr = vperfctr_open_mode( 0 ) ) == NULL ) { return PAPI_ESYS; } } else { return PAPI_ESYS; } #else return PAPI_ESYS; #endif } SUBDBG( "_papi_hwd_init vperfctr_open() = %p\n", ctx->perfctr ); /* Initialize the per thread/process virtualized TSC */ memset( &tmp, 0x0, sizeof ( tmp ) ); tmp.cpu_control.tsc_on = 1; #ifdef VPERFCTR_CONTROL_CLOEXEC tmp.flags = VPERFCTR_CONTROL_CLOEXEC; SUBDBG( "close on exec\t\t\t%u\n", tmp.flags ); #endif /* Start the per thread/process virtualized TSC */ error = vperfctr_control( ctx->perfctr, &tmp ); if ( error < 0 ) { SUBDBG( "starting virtualized TSC; vperfctr_control returns %d\n", error ); return PAPI_ESYS; } return PAPI_OK; } /* This routine is for shutting down threads, including the master thread. */ int _perfctr_shutdown_thread( hwd_context_t * ctx ) { #ifdef DEBUG int retval = vperfctr_unlink( ctx->perfctr ); SUBDBG( "_papi_hwd_shutdown vperfctr_unlink(%p) = %d\n", ctx->perfctr, retval ); #else vperfctr_unlink( ctx->perfctr ); #endif vperfctr_close( ctx->perfctr ); SUBDBG( "_perfctr_shutdown vperfctr_close(%p)\n", ctx->perfctr ); memset( ctx, 0x0, sizeof ( hwd_context_t ) ); return ( PAPI_OK ); }