|
Packit |
fe9d6e |
/*
|
|
Packit |
fe9d6e |
* Copyright (c) 2003-2011 Hewlett-Packard Development Company, L.P.
|
|
Packit |
fe9d6e |
*
|
|
Packit |
fe9d6e |
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
Packit |
fe9d6e |
* of this software and associated documentation files (the "Software"), to deal
|
|
Packit |
fe9d6e |
* in the Software without restriction, including without limitation the rights
|
|
Packit |
fe9d6e |
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
Packit |
fe9d6e |
* copies of the Software, and to permit persons to whom the Software is
|
|
Packit |
fe9d6e |
* furnished to do so, subject to the following conditions:
|
|
Packit |
fe9d6e |
*
|
|
Packit |
fe9d6e |
* The above copyright notice and this permission notice shall be included in
|
|
Packit |
fe9d6e |
* all copies or substantial portions of the Software.
|
|
Packit |
fe9d6e |
*
|
|
Packit |
fe9d6e |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
Packit |
fe9d6e |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
Packit |
fe9d6e |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
Packit |
fe9d6e |
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
Packit |
fe9d6e |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
Packit |
fe9d6e |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
Packit |
fe9d6e |
* SOFTWARE.
|
|
Packit |
fe9d6e |
*/
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
#include "../all_aligned_atomic_load_store.h"
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
/* Real X86 implementations appear */
|
|
Packit |
fe9d6e |
/* to enforce ordering between memory operations, EXCEPT that a later */
|
|
Packit |
fe9d6e |
/* read can pass earlier writes, presumably due to the visible */
|
|
Packit |
fe9d6e |
/* presence of store buffers. */
|
|
Packit |
fe9d6e |
/* We ignore the fact that the official specs */
|
|
Packit |
fe9d6e |
/* seem to be much weaker (and arguably too weak to be usable). */
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
#include "../ordered_except_wr.h"
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
#ifdef AO_ASM_X64_AVAILABLE
|
|
Packit |
fe9d6e |
# include "../test_and_set_t_is_char.h"
|
|
Packit |
fe9d6e |
#else
|
|
Packit |
fe9d6e |
# include "../test_and_set_t_is_ao_t.h"
|
|
Packit |
fe9d6e |
#endif
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
/* Assume _MSC_VER >= 1400 */
|
|
Packit |
fe9d6e |
#include <intrin.h>
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
#pragma intrinsic (_InterlockedCompareExchange)
|
|
Packit |
fe9d6e |
#pragma intrinsic (_InterlockedCompareExchange64)
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
#ifndef AO_PREFER_GENERALIZED
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
# pragma intrinsic (_InterlockedIncrement)
|
|
Packit |
fe9d6e |
# pragma intrinsic (_InterlockedIncrement64)
|
|
Packit |
fe9d6e |
# pragma intrinsic (_InterlockedDecrement)
|
|
Packit |
fe9d6e |
# pragma intrinsic (_InterlockedDecrement64)
|
|
Packit |
fe9d6e |
# pragma intrinsic (_InterlockedExchangeAdd)
|
|
Packit |
fe9d6e |
# pragma intrinsic (_InterlockedExchangeAdd64)
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE AO_t
|
|
Packit |
fe9d6e |
AO_fetch_and_add_full (volatile AO_t *p, AO_t incr)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
return _InterlockedExchangeAdd64((__int64 volatile *)p, incr);
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
#define AO_HAVE_fetch_and_add_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE AO_t
|
|
Packit |
fe9d6e |
AO_fetch_and_add1_full (volatile AO_t *p)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
return _InterlockedIncrement64((__int64 volatile *)p) - 1;
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
#define AO_HAVE_fetch_and_add1_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE AO_t
|
|
Packit |
fe9d6e |
AO_fetch_and_sub1_full (volatile AO_t *p)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
return _InterlockedDecrement64((__int64 volatile *)p) + 1;
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
#define AO_HAVE_fetch_and_sub1_full
|
|
Packit |
fe9d6e |
#endif /* !AO_PREFER_GENERALIZED */
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE AO_t
|
|
Packit |
fe9d6e |
AO_fetch_compare_and_swap_full(volatile AO_t *addr, AO_t old_val,
|
|
Packit |
fe9d6e |
AO_t new_val)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
return (AO_t)_InterlockedCompareExchange64((__int64 volatile *)addr,
|
|
Packit |
fe9d6e |
new_val, old_val);
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
#define AO_HAVE_fetch_compare_and_swap_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE unsigned int
|
|
Packit |
fe9d6e |
AO_int_fetch_compare_and_swap_full(volatile unsigned int *addr,
|
|
Packit |
fe9d6e |
unsigned int old_val, unsigned int new_val)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
return _InterlockedCompareExchange((long volatile *)addr, new_val, old_val);
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
#define AO_HAVE_int_fetch_compare_and_swap_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
#ifndef AO_PREFER_GENERALIZED
|
|
Packit |
fe9d6e |
AO_INLINE unsigned int
|
|
Packit |
fe9d6e |
AO_int_fetch_and_add_full(volatile unsigned int *p, unsigned int incr)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
return _InterlockedExchangeAdd((long volatile *)p, incr);
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
#define AO_HAVE_int_fetch_and_add_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE unsigned int
|
|
Packit |
fe9d6e |
AO_int_fetch_and_add1_full(volatile unsigned int *p)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
return _InterlockedIncrement((long volatile *)p) - 1;
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_int_fetch_and_add1_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE unsigned int
|
|
Packit |
fe9d6e |
AO_int_fetch_and_sub1_full(volatile unsigned int *p)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
return _InterlockedDecrement((long volatile *)p) + 1;
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_int_fetch_and_sub1_full
|
|
Packit |
fe9d6e |
#endif /* !AO_PREFER_GENERALIZED */
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
#if _MSC_VER > 1400
|
|
Packit |
fe9d6e |
# pragma intrinsic (_InterlockedAnd8)
|
|
Packit |
fe9d6e |
# pragma intrinsic (_InterlockedCompareExchange16)
|
|
Packit |
fe9d6e |
# pragma intrinsic (_InterlockedOr8)
|
|
Packit |
fe9d6e |
# pragma intrinsic (_InterlockedXor8)
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE void
|
|
Packit |
fe9d6e |
AO_char_and_full(volatile unsigned char *p, unsigned char value)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
_InterlockedAnd8((char volatile *)p, value);
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_char_and_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE void
|
|
Packit |
fe9d6e |
AO_char_or_full(volatile unsigned char *p, unsigned char value)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
_InterlockedOr8((char volatile *)p, value);
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_char_or_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE void
|
|
Packit |
fe9d6e |
AO_char_xor_full(volatile unsigned char *p, unsigned char value)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
_InterlockedXor8((char volatile *)p, value);
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_char_xor_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE unsigned short
|
|
Packit |
fe9d6e |
AO_short_fetch_compare_and_swap_full(volatile unsigned short *addr,
|
|
Packit |
fe9d6e |
unsigned short old_val,
|
|
Packit |
fe9d6e |
unsigned short new_val)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
return _InterlockedCompareExchange16((short volatile *)addr,
|
|
Packit |
fe9d6e |
new_val, old_val);
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_short_fetch_compare_and_swap_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
# ifndef AO_PREFER_GENERALIZED
|
|
Packit |
fe9d6e |
# pragma intrinsic (_InterlockedIncrement16)
|
|
Packit |
fe9d6e |
# pragma intrinsic (_InterlockedDecrement16)
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE unsigned short
|
|
Packit |
fe9d6e |
AO_short_fetch_and_add1_full(volatile unsigned short *p)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
return _InterlockedIncrement16((short volatile *)p) - 1;
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_short_fetch_and_add1_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE unsigned short
|
|
Packit |
fe9d6e |
AO_short_fetch_and_sub1_full(volatile unsigned short *p)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
return _InterlockedDecrement16((short volatile *)p) + 1;
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_short_fetch_and_sub1_full
|
|
Packit |
fe9d6e |
# endif /* !AO_PREFER_GENERALIZED */
|
|
Packit |
fe9d6e |
#endif /* _MSC_VER > 1400 */
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
#if _MSC_VER >= 1800 /* Visual Studio 2013+ */
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
# pragma intrinsic (_InterlockedCompareExchange8)
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE unsigned char
|
|
Packit |
fe9d6e |
AO_char_fetch_compare_and_swap_full(volatile unsigned char *addr,
|
|
Packit |
fe9d6e |
unsigned char old_val,
|
|
Packit |
fe9d6e |
unsigned char new_val)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
return _InterlockedCompareExchange8((char volatile *)addr,
|
|
Packit |
fe9d6e |
new_val, old_val);
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_char_fetch_compare_and_swap_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
# ifndef AO_PREFER_GENERALIZED
|
|
Packit |
fe9d6e |
# pragma intrinsic (_InterlockedExchangeAdd16)
|
|
Packit |
fe9d6e |
# pragma intrinsic (_InterlockedExchangeAdd8)
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE unsigned char
|
|
Packit |
fe9d6e |
AO_char_fetch_and_add_full(volatile unsigned char *p, unsigned char incr)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
return _InterlockedExchangeAdd8((char volatile *)p, incr);
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_char_fetch_and_add_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE unsigned short
|
|
Packit |
fe9d6e |
AO_short_fetch_and_add_full(volatile unsigned short *p,
|
|
Packit |
fe9d6e |
unsigned short incr)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
return _InterlockedExchangeAdd16((short volatile *)p, incr);
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_short_fetch_and_add_full
|
|
Packit |
fe9d6e |
# endif /* !AO_PREFER_GENERALIZED */
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
#elif defined(AO_ASM_X64_AVAILABLE)
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE unsigned char
|
|
Packit |
fe9d6e |
AO_char_fetch_and_add_full(volatile unsigned char *p, unsigned char incr)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
__asm
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
mov al, incr
|
|
Packit |
fe9d6e |
mov rbx, p
|
|
Packit |
fe9d6e |
lock xadd byte ptr [rbx], al
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_char_fetch_and_add_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE unsigned short
|
|
Packit |
fe9d6e |
AO_short_fetch_and_add_full(volatile unsigned short *p, unsigned short incr)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
__asm
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
mov ax, incr
|
|
Packit |
fe9d6e |
mov rbx, p
|
|
Packit |
fe9d6e |
lock xadd word ptr [rbx], ax
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_short_fetch_and_add_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
#endif /* _MSC_VER < 1800 && AO_ASM_X64_AVAILABLE */
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
#ifdef AO_ASM_X64_AVAILABLE
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
/* As far as we can tell, the lfence and sfence instructions are not */
|
|
Packit |
fe9d6e |
/* currently needed or useful for cached memory accesses. */
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE void
|
|
Packit |
fe9d6e |
AO_nop_full(void)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
/* Note: "mfence" (SSE2) is supported on all x86_64/amd64 chips. */
|
|
Packit |
fe9d6e |
__asm { mfence }
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_nop_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE AO_TS_VAL_t
|
|
Packit |
fe9d6e |
AO_test_and_set_full(volatile AO_TS_t *addr)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
__asm
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
mov rax,AO_TS_SET ;
|
|
Packit |
fe9d6e |
mov rbx,addr ;
|
|
Packit |
fe9d6e |
xchg byte ptr [rbx],al ;
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_test_and_set_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
#endif /* AO_ASM_X64_AVAILABLE */
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
#ifdef AO_CMPXCHG16B_AVAILABLE
|
|
Packit |
fe9d6e |
/* AO_compare_double_and_swap_double_full needs implementation for Win64.
|
|
Packit |
fe9d6e |
* Also see ../gcc/x86.h for partial old Opteron workaround.
|
|
Packit |
fe9d6e |
*/
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
# if _MSC_VER >= 1500
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
# include "../standard_ao_double_t.h"
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
# pragma intrinsic (_InterlockedCompareExchange128)
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
AO_INLINE int
|
|
Packit |
fe9d6e |
AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
|
|
Packit |
fe9d6e |
AO_t old_val1, AO_t old_val2,
|
|
Packit |
fe9d6e |
AO_t new_val1, AO_t new_val2)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
__int64 comparandResult[2];
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
assert(((size_t)addr & (sizeof(AO_double_t) - 1)) == 0);
|
|
Packit |
fe9d6e |
comparandResult[0] = old_val1; /* low */
|
|
Packit |
fe9d6e |
comparandResult[1] = old_val2; /* high */
|
|
Packit |
fe9d6e |
return _InterlockedCompareExchange128((volatile __int64 *)addr,
|
|
Packit |
fe9d6e |
new_val2 /* high */, new_val1 /* low */, comparandResult);
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_compare_double_and_swap_double_full
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
# elif defined(AO_ASM_X64_AVAILABLE)
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
# include "../standard_ao_double_t.h"
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
/* If there is no intrinsic _InterlockedCompareExchange128 then we */
|
|
Packit |
fe9d6e |
/* need basically what's given below. */
|
|
Packit |
fe9d6e |
AO_INLINE int
|
|
Packit |
fe9d6e |
AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
|
|
Packit |
fe9d6e |
AO_t old_val1, AO_t old_val2,
|
|
Packit |
fe9d6e |
AO_t new_val1, AO_t new_val2)
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
__asm
|
|
Packit |
fe9d6e |
{
|
|
Packit |
fe9d6e |
mov rdx,QWORD PTR [old_val2] ;
|
|
Packit |
fe9d6e |
mov rax,QWORD PTR [old_val1] ;
|
|
Packit |
fe9d6e |
mov rcx,QWORD PTR [new_val2] ;
|
|
Packit |
fe9d6e |
mov rbx,QWORD PTR [new_val1] ;
|
|
Packit |
fe9d6e |
lock cmpxchg16b [addr] ;
|
|
Packit |
fe9d6e |
setz rax ;
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
}
|
|
Packit |
fe9d6e |
# define AO_HAVE_compare_double_and_swap_double_full
|
|
Packit |
fe9d6e |
# endif /* AO_ASM_X64_AVAILABLE && (_MSC_VER < 1500) */
|
|
Packit |
fe9d6e |
|
|
Packit |
fe9d6e |
#endif /* AO_CMPXCHG16B_AVAILABLE */
|