/****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: extras.c * Author: Philip Mucci * mucci@cs.utk.edu * Mods: dan terpstra * terpstra@cs.utk.edu * Mods: Haihang You * you@cs.utk.edu * Mods: Kevin London * london@cs.utk.edu * Mods: Maynard Johnson * maynardj@us.ibm.com */ /* This file contains portable routines to do things that we wish the vendors did in the kernel extensions or performance libraries. */ #include "papi.h" #include "papi_internal.h" #include "papi_vector.h" #include "papi_memory.h" #include "extras.h" #include "threads.h" #if (!defined(HAVE_FFSLL) || defined(__bgp__)) int ffsll( long long lli ); #else #include #endif /****************/ /* BEGIN LOCALS */ /****************/ static unsigned int _rnum = DEADBEEF; /**************/ /* END LOCALS */ /**************/ inline_static unsigned short random_ushort( void ) { return ( unsigned short ) ( _rnum = 1664525 * _rnum + 1013904223 ); } /* compute the amount by which to increment the bucket. value is the current value of the bucket this routine is used by all three profiling cases it is inlined for speed */ inline_static int profil_increment( long long value, int flags, long long excess, long long threshold ) { int increment = 1; if ( flags == PAPI_PROFIL_POSIX ) { return ( 1 ); } if ( flags & PAPI_PROFIL_RANDOM ) { if ( random_ushort( ) <= ( USHRT_MAX / 4 ) ) return ( 0 ); } if ( flags & PAPI_PROFIL_COMPRESS ) { /* We're likely to ignore the sample if buf[address] gets big. */ if ( random_ushort( ) < value ) { return ( 0 ); } } if ( flags & PAPI_PROFIL_WEIGHTED ) { /* Increment is between 1 and 255 */ if ( excess <= ( long long ) 1 ) increment = 1; else if ( excess > threshold ) increment = 255; else { threshold = threshold / ( long long ) 255; increment = ( int ) ( excess / threshold ); } } return ( increment ); } static void posix_profil( caddr_t address, PAPI_sprofil_t * prof, int flags, long long excess, long long threshold ) { unsigned short *buf16; unsigned int *buf32; unsigned long long *buf64; unsigned long indx; unsigned long long lloffset; /* SPECIAL CASE: if starting address is 0 and scale factor is 2 then all counts go into first bin. */ if ( ( prof->pr_off == 0 ) && ( prof->pr_scale == 0x2 ) ) indx = 0; else { /* compute the profile buffer offset by: - subtracting the profiling base address from the pc address - multiplying by the scaling factor - dividing by max scale (65536, or 2^^16) - dividing by implicit 2 (2^^1 for a total of 2^^17), for even addresses NOTE: 131072 is a valid scale value. It produces byte resolution of addresses */ lloffset = ( unsigned long long ) ( ( address - prof->pr_off ) * prof->pr_scale ); indx = ( unsigned long ) ( lloffset >> 17 ); } /* confirm addresses within specified range */ if ( address >= prof->pr_off ) { /* test first for 16-bit buckets; this should be the fast case */ if ( flags & PAPI_PROFIL_BUCKET_16 ) { if ( ( indx * sizeof ( short ) ) < prof->pr_size ) { buf16 = (unsigned short *) prof->pr_base; buf16[indx] = ( unsigned short ) ( ( unsigned short ) buf16[indx] + profil_increment( buf16[indx], flags, excess, threshold ) ); PRFDBG( "posix_profil_16() bucket %lu = %u\n", indx, buf16[indx] ); } } /* next, look for the 32-bit case */ else if ( flags & PAPI_PROFIL_BUCKET_32 ) { if ( ( indx * sizeof ( int ) ) < prof->pr_size ) { buf32 = (unsigned int *) prof->pr_base; buf32[indx] = ( unsigned int ) buf32[indx] + ( unsigned int ) profil_increment( buf32[indx], flags, excess, threshold ); PRFDBG( "posix_profil_32() bucket %lu = %u\n", indx, buf32[indx] ); } } /* finally, fall through to the 64-bit case */ else { if ( ( indx * sizeof ( long long ) ) < prof->pr_size ) { buf64 = (unsigned long long *) prof->pr_base; buf64[indx] = ( unsigned long long ) buf64[indx] + ( unsigned long long ) profil_increment( ( long long ) buf64[indx], flags, excess, threshold ); PRFDBG( "posix_profil_64() bucket %lu = %lld\n", indx, buf64[indx] ); } } } } void _papi_hwi_dispatch_profile( EventSetInfo_t * ESI, caddr_t pc, long long over, int profile_index ) { EventSetProfileInfo_t *profile = &ESI->profile; PAPI_sprofil_t *sprof; caddr_t offset = 0; caddr_t best_offset = 0; int count; int best_index = -1; int i; PRFDBG( "handled IP %p\n", pc ); sprof = profile->prof[profile_index]; count = profile->count[profile_index]; for ( i = 0; i < count; i++ ) { offset = sprof[i].pr_off; if ( ( offset < pc ) && ( offset > best_offset ) ) { best_index = i; best_offset = offset; } } if ( best_index == -1 ) best_index = 0; posix_profil( pc, &sprof[best_index], profile->flags, over, profile->threshold[profile_index] ); } /* if isHardware is true, then the processor is using hardware overflow, else it is using software overflow. Use this parameter instead of _papi_hwi_system_info.supports_hw_overflow is in CRAY some processors may use hardware overflow, some may use software overflow. overflow_bit: if the component can get the overflow bit when overflow occurs, then this should be passed by the component; If both genOverflowBit and isHardwareSupport are true, that means the component doesn't know how to get the overflow bit from the kernel directly, so we generate the overflow bit in this function since this function can access the ESI->overflow struct; (The component can only set genOverflowBit parameter to true if the hardware doesn't support multiple hardware overflow. If the component supports multiple hardware overflow and you don't know how to get the overflow bit, then I don't know how to deal with this situation). */ int _papi_hwi_dispatch_overflow_signal( void *papiContext, caddr_t address, int *isHardware, long long overflow_bit, int genOverflowBit, ThreadInfo_t ** t, int cidx ) { int retval, event_counter, i, overflow_flag, pos; int papi_index, j; int profile_index = 0; long long overflow_vector; long long temp[_papi_hwd[cidx]->cmp_info.num_cntrs], over; long long latest = 0; ThreadInfo_t *thread; EventSetInfo_t *ESI; _papi_hwi_context_t *ctx = ( _papi_hwi_context_t * ) papiContext; OVFDBG( "enter\n" ); if ( *t ) thread = *t; else *t = thread = _papi_hwi_lookup_thread( 0 ); if ( thread != NULL ) { ESI = thread->running_eventset[cidx]; if ( ( ESI == NULL ) || ( ( ESI->state & PAPI_OVERFLOWING ) == 0 ) ) { OVFDBG( "Either no eventset or eventset not set to overflow.\n" ); #ifdef ANY_THREAD_GETS_SIGNAL _papi_hwi_broadcast_signal( thread->tid ); #endif return ( PAPI_OK ); } if ( ESI->CmpIdx != cidx ) return ( PAPI_ENOCMP ); if ( ESI->master != thread ) { PAPIERROR ( "eventset->thread %#lx vs. current thread %#lx mismatch", ESI->master, thread ); return ( PAPI_EBUG ); } if ( isHardware ) { if ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) { ESI->state |= PAPI_PAUSED; *isHardware = 1; } else *isHardware = 0; } /* Get the latest counter value */ event_counter = ESI->overflow.event_counter; overflow_flag = 0; overflow_vector = 0; if ( !( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) ) { retval = _papi_hwi_read( thread->context[cidx], ESI, ESI->sw_stop ); if ( retval < PAPI_OK ) return ( retval ); for ( i = 0; i < event_counter; i++ ) { papi_index = ESI->overflow.EventIndex[i]; latest = ESI->sw_stop[papi_index]; temp[i] = -1; if ( latest >= ( long long ) ESI->overflow.deadline[i] ) { OVFDBG ( "dispatch_overflow() latest %lld, deadline %lld, threshold %d\n", latest, ESI->overflow.deadline[i], ESI->overflow.threshold[i] ); pos = ESI->EventInfoArray[papi_index].pos[0]; overflow_vector ^= ( long long ) 1 << pos; temp[i] = latest - ESI->overflow.deadline[i]; overflow_flag = 1; /* adjust the deadline */ ESI->overflow.deadline[i] = latest + ESI->overflow.threshold[i]; } } } else if ( genOverflowBit ) { /* we had assumed the overflow event can't be derived event */ papi_index = ESI->overflow.EventIndex[0]; /* suppose the pos is the same as the counter number * (this is not true in Itanium, but itanium doesn't * need us to generate the overflow bit */ pos = ESI->EventInfoArray[papi_index].pos[0]; overflow_vector = ( long long ) 1 << pos; } else overflow_vector = overflow_bit; if ( ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) || overflow_flag ) { if ( ESI->state & PAPI_PROFILING ) { int k = 0; while ( overflow_vector ) { i = ffsll( overflow_vector ) - 1; for ( j = 0; j < event_counter; j++ ) { papi_index = ESI->overflow.EventIndex[j]; /* This loop is here ONLY because Pentium 4 can have tagged * * events that contain more than one counter without being * * derived. You've gotta scan all terms to make sure you * * find the one to profile. */ for ( k = 0, pos = 0; k < PAPI_EVENTS_IN_DERIVED_EVENT && pos >= 0; k++ ) { pos = ESI->EventInfoArray[papi_index].pos[k]; if ( i == pos ) { profile_index = j; goto foundit; } } } if ( j == event_counter ) { PAPIERROR ( "BUG! overflow_vector is 0, dropping interrupt" ); return ( PAPI_EBUG ); } foundit: if ( ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) ) over = 0; else over = temp[profile_index]; _papi_hwi_dispatch_profile( ESI, address, over, profile_index ); overflow_vector ^= ( long long ) 1 << i; } /* do not use overflow_vector after this place */ } else { ESI->overflow.handler( ESI->EventSetIndex, ( void * ) address, overflow_vector, ctx->ucontext ); } } ESI->state &= ~( PAPI_PAUSED ); } #ifdef ANY_THREAD_GETS_SIGNAL else { OVFDBG( "I haven't been noticed by PAPI before\n" ); _papi_hwi_broadcast_signal( ( *_papi_hwi_thread_id_fn ) ( ) ); } #endif return ( PAPI_OK ); } #include #include #include int _papi_hwi_using_signal[PAPI_NSIG]; int _papi_hwi_start_timer( int timer, int signal, int ns ) { struct itimerval value; int us = ns / 1000; if ( us == 0 ) us = 1; #ifdef ANY_THREAD_GETS_SIGNAL _papi_hwi_lock( INTERNAL_LOCK ); if ( ( _papi_hwi_using_signal[signal] - 1 ) ) { INTDBG( "itimer already installed\n" ); _papi_hwi_unlock( INTERNAL_LOCK ); return ( PAPI_OK ); } _papi_hwi_unlock( INTERNAL_LOCK ); #else ( void ) signal; /*unused */ #endif value.it_interval.tv_sec = 0; value.it_interval.tv_usec = us; value.it_value.tv_sec = 0; value.it_value.tv_usec = us; INTDBG( "Installing itimer %d, with %d us interval\n", timer, us ); if ( setitimer( timer, &value, NULL ) < 0 ) { PAPIERROR( "setitimer errno %d", errno ); return ( PAPI_ESYS ); } return ( PAPI_OK ); } int _papi_hwi_start_signal( int signal, int need_context, int cidx ) { struct sigaction action; _papi_hwi_lock( INTERNAL_LOCK ); _papi_hwi_using_signal[signal]++; if ( _papi_hwi_using_signal[signal] - 1 ) { INTDBG( "_papi_hwi_using_signal is now %d\n", _papi_hwi_using_signal[signal] ); _papi_hwi_unlock( INTERNAL_LOCK ); return ( PAPI_OK ); } memset( &action, 0x00, sizeof ( struct sigaction ) ); action.sa_flags = SA_RESTART; action.sa_sigaction = ( void ( * )( int, siginfo_t *, void * ) ) _papi_hwd[cidx]-> dispatch_timer; if ( need_context ) #if (defined(_BGL) /*|| defined (__bgp__)*/) action.sa_flags |= SIGPWR; #else action.sa_flags |= SA_SIGINFO; #endif INTDBG( "installing signal handler\n" ); if ( sigaction( signal, &action, NULL ) < 0 ) { PAPIERROR( "sigaction errno %d", errno ); _papi_hwi_unlock( INTERNAL_LOCK ); return ( PAPI_ESYS ); } INTDBG( "_papi_hwi_using_signal[%d] is now %d.\n", signal, _papi_hwi_using_signal[signal] ); _papi_hwi_unlock( INTERNAL_LOCK ); return ( PAPI_OK ); } int _papi_hwi_stop_signal( int signal ) { _papi_hwi_lock( INTERNAL_LOCK ); if ( --_papi_hwi_using_signal[signal] == 0 ) { INTDBG( "removing signal handler\n" ); if ( sigaction( signal, NULL, NULL ) == -1 ) { PAPIERROR( "sigaction errno %d", errno ); _papi_hwi_unlock( INTERNAL_LOCK ); return ( PAPI_ESYS ); } } INTDBG( "_papi_hwi_using_signal[%d] is now %d\n", signal, _papi_hwi_using_signal[signal] ); _papi_hwi_unlock( INTERNAL_LOCK ); return ( PAPI_OK ); } int _papi_hwi_stop_timer( int timer, int signal ) { struct itimerval value; #ifdef ANY_THREAD_GETS_SIGNAL _papi_hwi_lock( INTERNAL_LOCK ); if ( _papi_hwi_using_signal[signal] > 1 ) { INTDBG( "itimer in use by another thread\n" ); _papi_hwi_unlock( INTERNAL_LOCK ); return ( PAPI_OK ); } _papi_hwi_unlock( INTERNAL_LOCK ); #else ( void ) signal; /*unused */ #endif value.it_interval.tv_sec = 0; value.it_interval.tv_usec = 0; value.it_value.tv_sec = 0; value.it_value.tv_usec = 0; INTDBG( "turning off timer\n" ); if ( setitimer( timer, &value, NULL ) == -1 ) { PAPIERROR( "setitimer errno %d", errno ); return PAPI_ESYS; } return PAPI_OK; } #if (!defined(HAVE_FFSLL) || defined(__bgp__)) /* find the first set bit in long long */ int ffsll( long long lli ) { int i, num, t, tmpint, len; num = sizeof ( long long ) / sizeof ( int ); if ( num == 1 ) return ( ffs( ( int ) lli ) ); len = sizeof ( int ) * CHAR_BIT; for ( i = 0; i < num; i++ ) { tmpint = ( int ) ( ( ( lli >> len ) << len ) ^ lli ); t = ffs( tmpint ); if ( t ) { return ( t + i * len ); } lli = lli >> len; } return PAPI_OK; } #endif