#ifndef _LINUX_LOCK_H #define _LINUX_LOCK_H #include "mb.h" /* Locking functions */ #if defined(USE_PTHREAD_MUTEXES) #include extern pthread_mutex_t _papi_hwd_lock_data[PAPI_MAX_LOCK]; #define _papi_hwd_lock(lck) \ do \ { \ pthread_mutex_lock (&_papi_hwd_lock_data[lck]); \ } while(0) #define _papi_hwd_unlock(lck) \ do \ { \ pthread_mutex_unlock(&_papi_hwd_lock_data[lck]); \ } while(0) #else extern volatile unsigned int _papi_hwd_lock_data[PAPI_MAX_LOCK]; #define MUTEX_OPEN 0 #define MUTEX_CLOSED 1 /********/ /* ia64 */ /********/ #if defined(__ia64__) #ifdef __INTEL_COMPILER #define _papi_hwd_lock(lck) { while(_InterlockedCompareExchange_acq(&_papi_hwd_lock_data[lck],MUTEX_CLOSED,MUTEX_OPEN) != MUTEX_OPEN) { ; } } #define _papi_hwd_unlock(lck) { _InterlockedExchange((volatile int *)&_papi_hwd_lock_data[lck], MUTEX_OPEN); } #else /* GCC */ #define _papi_hwd_lock(lck) \ { int res = 0; \ do { \ __asm__ __volatile__ ("mov ar.ccv=%0;;" :: "r"(MUTEX_OPEN)); \ __asm__ __volatile__ ("cmpxchg4.acq %0=[%1],%2,ar.ccv" : "=r"(res) : "r"(&_papi_hwd_lock_data[lck]), "r"(MUTEX_CLOSED) : "memory"); \ } while (res != MUTEX_OPEN); } #define _papi_hwd_unlock(lck) { __asm__ __volatile__ ("st4.rel [%0]=%1" : : "r"(&_papi_hwd_lock_data[lck]), "r"(MUTEX_OPEN) : "memory"); } #endif /***********/ /* x86 */ /***********/ #elif defined(__i386__)||defined(__x86_64__) #define _papi_hwd_lock(lck) \ do \ { \ unsigned int res = 0; \ do { \ __asm__ __volatile__ ("lock ; " "cmpxchg %1,%2" : "=a"(res) : "q"(MUTEX_CLOSED), "m"(_papi_hwd_lock_data[lck]), "0"(MUTEX_OPEN) : "memory"); \ } while(res != (unsigned int)MUTEX_OPEN); \ } while(0) #define _papi_hwd_unlock(lck) \ do \ { \ unsigned int res = 0; \ __asm__ __volatile__ ("xchg %0,%1" : "=r"(res) : "m"(_papi_hwd_lock_data[lck]), "0"(MUTEX_OPEN) : "memory"); \ } while(0) /***************/ /* power */ /***************/ #elif defined(__powerpc__) /* * These functions are slight modifications of the functions in * /usr/include/asm-ppc/system.h. * * We can't use the ones in system.h directly because they are defined * only when __KERNEL__ is defined. */ static __inline__ unsigned long papi_xchg_u32( volatile void *p, unsigned long val ) { unsigned long prev; __asm__ __volatile__( "\n\ sync \n\ 1: lwarx %0,0,%2 \n\ stwcx. %3,0,%2 \n\ bne- 1b \n\ isync":"=&r"( prev ), "=m"( *( volatile unsigned long * ) p ) :"r"( p ), "r"( val ), "m"( *( volatile unsigned long * ) p ) :"cc", "memory" ); return prev; } #define _papi_hwd_lock(lck) \ do { \ unsigned int retval; \ do { \ retval = papi_xchg_u32(&_papi_hwd_lock_data[lck],MUTEX_CLOSED); \ } while(retval != (unsigned int)MUTEX_OPEN); \ } while(0) #define _papi_hwd_unlock(lck) \ do { \ unsigned int retval; \ do { \ retval = papi_xchg_u32(&_papi_hwd_lock_data[lck],MUTEX_OPEN); \ } while(retval != (unsigned int)MUTEX_CLOSED); \ } while (0) /*****************/ /* SPARC */ /*****************/ #elif defined(__sparc__) static inline void __raw_spin_lock( volatile unsigned int *lock ) { __asm__ __volatile__( "\n1:\n\t" "ldstub [%0], %%g2\n\t" "orcc %%g2, 0x0, %%g0\n\t" "bne,a 2f\n\t" " ldub [%0], %%g2\n\t" ".subsection 2\n" "2:\n\t" "orcc %%g2, 0x0, %%g0\n\t" "bne,a 2b\n\t" " ldub [%0], %%g2\n\t" "b,a 1b\n\t" ".previous\n": /* no outputs */ :"r"( lock ) :"g2", "memory", "cc" ); } static inline void __raw_spin_unlock( volatile unsigned int *lock ) { __asm__ __volatile__( "stb %%g0, [%0]"::"r"( lock ):"memory" ); } #define _papi_hwd_lock(lck) __raw_spin_lock(&_papi_hwd_lock_data[lck]); #define _papi_hwd_unlock(lck) __raw_spin_unlock(&_papi_hwd_lock_data[lck]) /*******************/ /* ARM */ /*******************/ #elif defined(__arm__) #if 0 /* OLD CODE FROM VINCE BELOW */ /* FIXME */ /* not sure if this even works */ /* also the various flavors of ARM */ /* have differing levels of atomic */ /* instruction support. A proper */ /* implementation needs to handle this :( */ #warning "WARNING! Verify mutexes work on ARM!" /* * For arm/gcc, 0 is clear, 1 is set. */ #define MUTEX_SET(tsl) ({ \ int __r; \ asm volatile( \ "swpb %0, %1, [%2]\n\t" \ "eor %0, %0, #1\n\t" \ : "=&r" (__r) \ : "r" (1), "r" (tsl) \ ); \ __r & 1; \ }) #define _papi_hwd_lock(lck) MUTEX_SET(lck) #define _papi_hwd_unlock(lck) (*(volatile int *)(lck) = 0) #endif /* NEW CODE FROM PHIL */ static inline int __arm_papi_spin_lock (volatile unsigned int *lock) { unsigned int val; do asm volatile ("swp %0, %1, [%2]" : "=r" (val) : "0" (1), "r" (lock) : "memory"); while (val != 0); return 0; } #define _papi_hwd_lock(lck) { rmb(); __arm_papi_spin_lock(&_papi_hwd_lock_data[lck]); rmb(); } #define _papi_hwd_unlock(lck) { rmb(); _papi_hwd_lock_data[lck] = 0; rmb(); } #elif defined(__mips__) static inline void __raw_spin_lock(volatile unsigned int *lock) { unsigned int tmp; __asm__ __volatile__( " .set noreorder # __raw_spin_lock \n" "1: ll %1, %2 \n" " bnez %1, 1b \n" " li %1, 1 \n" " sc %1, %0 \n" " beqzl %1, 1b \n" " nop \n" " sync \n" " .set reorder \n" : "=m" (*lock), "=&r" (tmp) : "m" (*lock) : "memory"); } static inline void __raw_spin_unlock(volatile unsigned int *lock) { __asm__ __volatile__( " .set noreorder # __raw_spin_unlock \n" " sync \n" " sw $0, %0 \n" " .set\treorder \n" : "=m" (*lock) : "m" (*lock) : "memory"); } #define _papi_hwd_lock(lck) __raw_spin_lock(&_papi_hwd_lock_data[lck]); #define _papi_hwd_unlock(lck) __raw_spin_unlock(&_papi_hwd_lock_data[lck]) #else #error "_papi_hwd_lock/unlock undefined!" #endif #endif #endif /* defined(USE_PTHREAD_MUTEXES) */