/* * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. * Copyright (c) 2003-2011 Hewlett-Packard Development Company, L.P. * * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. * * Permission is hereby granted to use or copy this program * for any purpose, provided the above notices are retained on all copies. * Permission to modify the code and to distribute modified code is granted, * provided the above notices are retained, and a notice that the code was * modified is included with the above copyright notice. * */ /* Memory model documented at http://www-106.ibm.com/developerworks/ */ /* eserver/articles/archguide.html and (clearer) */ /* http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html. */ /* There appears to be no implicit ordering between any kind of */ /* independent memory references. */ /* TODO: Implement double-wide operations if available. */ #if (AO_GNUC_PREREQ(4, 8) || AO_CLANG_PREREQ(3, 8)) \ && !defined(AO_DISABLE_GCC_ATOMICS) /* Probably, it could be enabled even for earlier gcc/clang versions. */ /* TODO: As of clang-3.8.1, it emits lwsync in AO_load_acquire */ /* (i.e., the code is less efficient than the one given below). */ # include "generic.h" #else /* AO_DISABLE_GCC_ATOMICS */ /* Architecture enforces some ordering based on control dependence. */ /* I don't know if that could help. */ /* Data-dependent loads are always ordered. */ /* Based on the above references, eieio is intended for use on */ /* uncached memory, which we don't support. It does not order loads */ /* from cached memory. */ #include "../all_aligned_atomic_load_store.h" #define AO_load(addr) AO_load_acquire(addr) #include "../test_and_set_t_is_ao_t.h" /* There seems to be no byte equivalent of lwarx, so this */ /* may really be what we want, at least in the 32-bit case. */ AO_INLINE void AO_nop_full(void) { __asm__ __volatile__("sync" : : : "memory"); } #define AO_HAVE_nop_full /* lwsync apparently works for everything but a StoreLoad barrier. */ AO_INLINE void AO_lwsync(void) { #ifdef __NO_LWSYNC__ __asm__ __volatile__("sync" : : : "memory"); #else __asm__ __volatile__("lwsync" : : : "memory"); #endif } #define AO_nop_write() AO_lwsync() #define AO_HAVE_nop_write #define AO_nop_read() AO_lwsync() #define AO_HAVE_nop_read #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__) /* ppc64 uses ld not lwz */ # define AO_PPC_LD "ld" # define AO_PPC_LxARX "ldarx" # define AO_PPC_CMPx "cmpd" # define AO_PPC_STxCXd "stdcx." # define AO_PPC_LOAD_CLOBBER "cr0" #else # define AO_PPC_LD "lwz" # define AO_PPC_LxARX "lwarx" # define AO_PPC_CMPx "cmpw" # define AO_PPC_STxCXd "stwcx." # define AO_PPC_LOAD_CLOBBER "cc" /* FIXME: We should get gcc to allocate one of the condition */ /* registers. I always got "impossible constraint" when I */ /* tried the "y" constraint. */ # define AO_T_IS_INT #endif #ifdef _AIX /* Labels are not supported on AIX. */ /* ppc64 has same size of instructions as 32-bit one. */ # define AO_PPC_L(label) /* empty */ # define AO_PPC_BR_A(labelBF, addr) addr #else # define AO_PPC_L(label) label ": " # define AO_PPC_BR_A(labelBF, addr) labelBF #endif /* We explicitly specify load_acquire, since it is important, and can */ /* be implemented relatively cheaply. It could be implemented */ /* with an ordinary load followed by a lwsync. But the general wisdom */ /* seems to be that a data dependent branch followed by an isync is */ /* cheaper. And the documentation is fairly explicit that this also */ /* has acquire semantics. */ AO_INLINE AO_t AO_load_acquire(const volatile AO_t *addr) { AO_t result; __asm__ __volatile__ ( AO_PPC_LD "%U1%X1 %0,%1\n" "cmpw %0,%0\n" "bne- " AO_PPC_BR_A("1f", "$+4") "\n" AO_PPC_L("1") "isync\n" : "=r" (result) : "m"(*addr) : "memory", AO_PPC_LOAD_CLOBBER); return result; } #define AO_HAVE_load_acquire /* We explicitly specify store_release, since it relies */ /* on the fact that lwsync is also a LoadStore barrier. */ AO_INLINE void AO_store_release(volatile AO_t *addr, AO_t value) { AO_lwsync(); *addr = value; } #define AO_HAVE_store_release #ifndef AO_PREFER_GENERALIZED /* This is similar to the code in the garbage collector. Deleting */ /* this and having it synthesized from compare_and_swap would probably */ /* only cost us a load immediate instruction. */ AO_INLINE AO_TS_VAL_t AO_test_and_set(volatile AO_TS_t *addr) { /* Completely untested. And we should be using smaller objects anyway. */ AO_t oldval; AO_t temp = 1; /* locked value */ __asm__ __volatile__( AO_PPC_L("1") AO_PPC_LxARX " %0,0,%1\n" /* load and reserve */ AO_PPC_CMPx "i %0, 0\n" /* if load is */ "bne " AO_PPC_BR_A("2f", "$+12") "\n" /* non-zero, return already set */ AO_PPC_STxCXd " %2,0,%1\n" /* else store conditional */ "bne- " AO_PPC_BR_A("1b", "$-16") "\n" /* retry if lost reservation */ AO_PPC_L("2") "\n" /* oldval is zero if we set */ : "=&r"(oldval) : "r"(addr), "r"(temp) : "memory", "cr0"); return (AO_TS_VAL_t)oldval; } #define AO_HAVE_test_and_set AO_INLINE AO_TS_VAL_t AO_test_and_set_acquire(volatile AO_TS_t *addr) { AO_TS_VAL_t result = AO_test_and_set(addr); AO_lwsync(); return result; } #define AO_HAVE_test_and_set_acquire AO_INLINE AO_TS_VAL_t AO_test_and_set_release(volatile AO_TS_t *addr) { AO_lwsync(); return AO_test_and_set(addr); } #define AO_HAVE_test_and_set_release AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { AO_TS_VAL_t result; AO_lwsync(); result = AO_test_and_set(addr); AO_lwsync(); return result; } #define AO_HAVE_test_and_set_full #endif /* !AO_PREFER_GENERALIZED */ #ifndef AO_GENERALIZE_ASM_BOOL_CAS AO_INLINE int AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_t oldval; int result = 0; __asm__ __volatile__( AO_PPC_L("1") AO_PPC_LxARX " %0,0,%2\n" /* load and reserve */ AO_PPC_CMPx " %0, %4\n" /* if load is not equal to */ "bne " AO_PPC_BR_A("2f", "$+16") "\n" /* old, fail */ AO_PPC_STxCXd " %3,0,%2\n" /* else store conditional */ "bne- " AO_PPC_BR_A("1b", "$-16") "\n" /* retry if lost reservation */ "li %1,1\n" /* result = 1; */ AO_PPC_L("2") "\n" : "=&r"(oldval), "=&r"(result) : "r"(addr), "r"(new_val), "r"(old), "1"(result) : "memory", "cr0"); return result; } # define AO_HAVE_compare_and_swap AO_INLINE int AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val) { int result = AO_compare_and_swap(addr, old, new_val); AO_lwsync(); return result; } # define AO_HAVE_compare_and_swap_acquire AO_INLINE int AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_lwsync(); return AO_compare_and_swap(addr, old, new_val); } # define AO_HAVE_compare_and_swap_release AO_INLINE int AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) { int result; AO_lwsync(); result = AO_compare_and_swap(addr, old, new_val); if (result) AO_lwsync(); return result; } # define AO_HAVE_compare_and_swap_full #endif /* !AO_GENERALIZE_ASM_BOOL_CAS */ AO_INLINE AO_t AO_fetch_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val) { AO_t fetched_val; __asm__ __volatile__( AO_PPC_L("1") AO_PPC_LxARX " %0,0,%1\n" /* load and reserve */ AO_PPC_CMPx " %0, %3\n" /* if load is not equal to */ "bne " AO_PPC_BR_A("2f", "$+12") "\n" /* old_val, fail */ AO_PPC_STxCXd " %2,0,%1\n" /* else store conditional */ "bne- " AO_PPC_BR_A("1b", "$-16") "\n" /* retry if lost reservation */ AO_PPC_L("2") "\n" : "=&r"(fetched_val) : "r"(addr), "r"(new_val), "r"(old_val) : "memory", "cr0"); return fetched_val; } #define AO_HAVE_fetch_compare_and_swap AO_INLINE AO_t AO_fetch_compare_and_swap_acquire(volatile AO_t *addr, AO_t old_val, AO_t new_val) { AO_t result = AO_fetch_compare_and_swap(addr, old_val, new_val); AO_lwsync(); return result; } #define AO_HAVE_fetch_compare_and_swap_acquire AO_INLINE AO_t AO_fetch_compare_and_swap_release(volatile AO_t *addr, AO_t old_val, AO_t new_val) { AO_lwsync(); return AO_fetch_compare_and_swap(addr, old_val, new_val); } #define AO_HAVE_fetch_compare_and_swap_release AO_INLINE AO_t AO_fetch_compare_and_swap_full(volatile AO_t *addr, AO_t old_val, AO_t new_val) { AO_t result; AO_lwsync(); result = AO_fetch_compare_and_swap(addr, old_val, new_val); if (result == old_val) AO_lwsync(); return result; } #define AO_HAVE_fetch_compare_and_swap_full #ifndef AO_PREFER_GENERALIZED AO_INLINE AO_t AO_fetch_and_add(volatile AO_t *addr, AO_t incr) { AO_t oldval; AO_t newval; __asm__ __volatile__( AO_PPC_L("1") AO_PPC_LxARX " %0,0,%2\n" /* load and reserve */ "add %1,%0,%3\n" /* increment */ AO_PPC_STxCXd " %1,0,%2\n" /* store conditional */ "bne- " AO_PPC_BR_A("1b", "$-12") "\n" /* retry if lost reservation */ : "=&r"(oldval), "=&r"(newval) : "r"(addr), "r"(incr) : "memory", "cr0"); return oldval; } #define AO_HAVE_fetch_and_add AO_INLINE AO_t AO_fetch_and_add_acquire(volatile AO_t *addr, AO_t incr) { AO_t result = AO_fetch_and_add(addr, incr); AO_lwsync(); return result; } #define AO_HAVE_fetch_and_add_acquire AO_INLINE AO_t AO_fetch_and_add_release(volatile AO_t *addr, AO_t incr) { AO_lwsync(); return AO_fetch_and_add(addr, incr); } #define AO_HAVE_fetch_and_add_release AO_INLINE AO_t AO_fetch_and_add_full(volatile AO_t *addr, AO_t incr) { AO_t result; AO_lwsync(); result = AO_fetch_and_add(addr, incr); AO_lwsync(); return result; } #define AO_HAVE_fetch_and_add_full #endif /* !AO_PREFER_GENERALIZED */ #undef AO_PPC_BR_A #undef AO_PPC_CMPx #undef AO_PPC_L #undef AO_PPC_LD #undef AO_PPC_LOAD_CLOBBER #undef AO_PPC_LxARX #undef AO_PPC_STxCXd #endif /* AO_DISABLE_GCC_ATOMICS */