#ifndef Py_ATOMIC_H #define Py_ATOMIC_H #ifdef Py_BUILD_CORE #include "dynamic_annotations.h" #include "pyconfig.h" #if defined(HAVE_STD_ATOMIC) #include #endif #if defined(_MSC_VER) #include #include #endif /* This is modeled after the atomics interface from C1x, according to * the draft at * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf. * Operations and types are named the same except with a _Py_ prefix * and have the same semantics. * * Beware, the implementations here are deep magic. */ #if defined(HAVE_STD_ATOMIC) typedef enum _Py_memory_order { _Py_memory_order_relaxed = memory_order_relaxed, _Py_memory_order_acquire = memory_order_acquire, _Py_memory_order_release = memory_order_release, _Py_memory_order_acq_rel = memory_order_acq_rel, _Py_memory_order_seq_cst = memory_order_seq_cst } _Py_memory_order; typedef struct _Py_atomic_address { atomic_uintptr_t _value; } _Py_atomic_address; typedef struct _Py_atomic_int { atomic_int _value; } _Py_atomic_int; #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \ atomic_signal_fence(ORDER) #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \ atomic_thread_fence(ORDER) #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ atomic_store_explicit(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER) #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ atomic_load_explicit(&(ATOMIC_VAL)->_value, ORDER) /* Use builtin atomic operations in GCC >= 4.7 */ #elif defined(HAVE_BUILTIN_ATOMIC) typedef enum _Py_memory_order { _Py_memory_order_relaxed = __ATOMIC_RELAXED, _Py_memory_order_acquire = __ATOMIC_ACQUIRE, _Py_memory_order_release = __ATOMIC_RELEASE, _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL, _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST } _Py_memory_order; typedef struct _Py_atomic_address { uintptr_t _value; } _Py_atomic_address; typedef struct _Py_atomic_int { int _value; } _Py_atomic_int; #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \ __atomic_signal_fence(ORDER) #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \ __atomic_thread_fence(ORDER) #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ (assert((ORDER) == __ATOMIC_RELAXED \ || (ORDER) == __ATOMIC_SEQ_CST \ || (ORDER) == __ATOMIC_RELEASE), \ __atomic_store_n(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER)) #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ (assert((ORDER) == __ATOMIC_RELAXED \ || (ORDER) == __ATOMIC_SEQ_CST \ || (ORDER) == __ATOMIC_ACQUIRE \ || (ORDER) == __ATOMIC_CONSUME), \ __atomic_load_n(&(ATOMIC_VAL)->_value, ORDER)) /* Only support GCC (for expression statements) and x86 (for simple * atomic semantics) and MSVC x86/x64/ARM */ #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64)) typedef enum _Py_memory_order { _Py_memory_order_relaxed, _Py_memory_order_acquire, _Py_memory_order_release, _Py_memory_order_acq_rel, _Py_memory_order_seq_cst } _Py_memory_order; typedef struct _Py_atomic_address { uintptr_t _value; } _Py_atomic_address; typedef struct _Py_atomic_int { int _value; } _Py_atomic_int; static __inline__ void _Py_atomic_signal_fence(_Py_memory_order order) { if (order != _Py_memory_order_relaxed) __asm__ volatile("":::"memory"); } static __inline__ void _Py_atomic_thread_fence(_Py_memory_order order) { if (order != _Py_memory_order_relaxed) __asm__ volatile("mfence":::"memory"); } /* Tell the race checker about this operation's effects. */ static __inline__ void _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) { (void)address; /* shut up -Wunused-parameter */ switch(order) { case _Py_memory_order_release: case _Py_memory_order_acq_rel: case _Py_memory_order_seq_cst: _Py_ANNOTATE_HAPPENS_BEFORE(address); break; case _Py_memory_order_relaxed: case _Py_memory_order_acquire: break; } switch(order) { case _Py_memory_order_acquire: case _Py_memory_order_acq_rel: case _Py_memory_order_seq_cst: _Py_ANNOTATE_HAPPENS_AFTER(address); break; case _Py_memory_order_relaxed: case _Py_memory_order_release: break; } } #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ __extension__ ({ \ __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \ __typeof__(atomic_val->_value) new_val = NEW_VAL;\ volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \ _Py_memory_order order = ORDER; \ _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \ \ /* Perform the operation. */ \ _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \ switch(order) { \ case _Py_memory_order_release: \ _Py_atomic_signal_fence(_Py_memory_order_release); \ /* fallthrough */ \ case _Py_memory_order_relaxed: \ *volatile_data = new_val; \ break; \ \ case _Py_memory_order_acquire: \ case _Py_memory_order_acq_rel: \ case _Py_memory_order_seq_cst: \ __asm__ volatile("xchg %0, %1" \ : "+r"(new_val) \ : "m"(atomic_val->_value) \ : "memory"); \ break; \ } \ _Py_ANNOTATE_IGNORE_WRITES_END(); \ }) #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ __extension__ ({ \ __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \ __typeof__(atomic_val->_value) result; \ volatile __typeof__(result) *volatile_data = &atomic_val->_value; \ _Py_memory_order order = ORDER; \ _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \ \ /* Perform the operation. */ \ _Py_ANNOTATE_IGNORE_READS_BEGIN(); \ switch(order) { \ case _Py_memory_order_release: \ case _Py_memory_order_acq_rel: \ case _Py_memory_order_seq_cst: \ /* Loads on x86 are not releases by default, so need a */ \ /* thread fence. */ \ _Py_atomic_thread_fence(_Py_memory_order_release); \ break; \ default: \ /* No fence */ \ break; \ } \ result = *volatile_data; \ switch(order) { \ case _Py_memory_order_acquire: \ case _Py_memory_order_acq_rel: \ case _Py_memory_order_seq_cst: \ /* Loads on x86 are automatically acquire operations so */ \ /* can get by with just a compiler fence. */ \ _Py_atomic_signal_fence(_Py_memory_order_acquire); \ break; \ default: \ /* No fence */ \ break; \ } \ _Py_ANNOTATE_IGNORE_READS_END(); \ result; \ }) #elif defined(_MSC_VER) /* _Interlocked* functions provide a full memory barrier and are therefore enough for acq_rel and seq_cst. If the HLE variants aren't available in hardware they will fall back to a full memory barrier as well. This might affect performance but likely only in some very specific and hard to meassure scenario. */ #if defined(_M_IX86) || defined(_M_X64) typedef enum _Py_memory_order { _Py_memory_order_relaxed, _Py_memory_order_acquire, _Py_memory_order_release, _Py_memory_order_acq_rel, _Py_memory_order_seq_cst } _Py_memory_order; typedef struct _Py_atomic_address { volatile uintptr_t _value; } _Py_atomic_address; typedef struct _Py_atomic_int { volatile int _value; } _Py_atomic_int; #if defined(_M_X64) #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \ switch (ORDER) { \ case _Py_memory_order_acquire: \ _InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ break; \ case _Py_memory_order_release: \ _InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ break; \ default: \ _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ break; \ } #else #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0); #endif #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \ switch (ORDER) { \ case _Py_memory_order_acquire: \ _InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ break; \ case _Py_memory_order_release: \ _InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ break; \ default: \ _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ break; \ } #if defined(_M_X64) /* This has to be an intptr_t for now. gil_created() uses -1 as a sentinel value, if this returns a uintptr_t it will do an unsigned compare and crash */ inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) { __int64 old; switch (order) { case _Py_memory_order_acquire: { do { old = *value; } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old); break; } case _Py_memory_order_release: { do { old = *value; } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old); break; } case _Py_memory_order_relaxed: old = *value; break; default: { do { old = *value; } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old); break; } } return old; } #else #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL #endif inline int _Py_atomic_load_32bit(volatile int* value, int order) { long old; switch (order) { case _Py_memory_order_acquire: { do { old = *value; } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old); break; } case _Py_memory_order_release: { do { old = *value; } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old); break; } case _Py_memory_order_relaxed: old = *value; break; default: { do { old = *value; } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old); break; } } return old; } #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ if (sizeof(*ATOMIC_VAL._value) == 8) { \ _Py_atomic_store_64bit((volatile long long*)ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \ _Py_atomic_store_32bit((volatile long*)ATOMIC_VAL._value, NEW_VAL, ORDER) } #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ ( \ sizeof(*(ATOMIC_VAL._value)) == 8 ? \ _Py_atomic_load_64bit((volatile long long*)ATOMIC_VAL._value, ORDER) : \ _Py_atomic_load_32bit((volatile long*)ATOMIC_VAL._value, ORDER) \ ) #elif defined(_M_ARM) || defined(_M_ARM64) typedef enum _Py_memory_order { _Py_memory_order_relaxed, _Py_memory_order_acquire, _Py_memory_order_release, _Py_memory_order_acq_rel, _Py_memory_order_seq_cst } _Py_memory_order; typedef struct _Py_atomic_address { volatile uintptr_t _value; } _Py_atomic_address; typedef struct _Py_atomic_int { volatile int _value; } _Py_atomic_int; #if defined(_M_ARM64) #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \ switch (ORDER) { \ case _Py_memory_order_acquire: \ _InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ break; \ case _Py_memory_order_release: \ _InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ break; \ default: \ _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ break; \ } #else #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0); #endif #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \ switch (ORDER) { \ case _Py_memory_order_acquire: \ _InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ break; \ case _Py_memory_order_release: \ _InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ break; \ default: \ _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ break; \ } #if defined(_M_ARM64) /* This has to be an intptr_t for now. gil_created() uses -1 as a sentinel value, if this returns a uintptr_t it will do an unsigned compare and crash */ inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) { uintptr_t old; switch (order) { case _Py_memory_order_acquire: { do { old = *value; } while(_InterlockedCompareExchange64_acq(value, old, old) != old); break; } case _Py_memory_order_release: { do { old = *value; } while(_InterlockedCompareExchange64_rel(value, old, old) != old); break; } case _Py_memory_order_relaxed: old = *value; break; default: { do { old = *value; } while(_InterlockedCompareExchange64(value, old, old) != old); break; } } return old; } #else #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL #endif inline int _Py_atomic_load_32bit(volatile int* value, int order) { int old; switch (order) { case _Py_memory_order_acquire: { do { old = *value; } while(_InterlockedCompareExchange_acq(value, old, old) != old); break; } case _Py_memory_order_release: { do { old = *value; } while(_InterlockedCompareExchange_rel(value, old, old) != old); break; } case _Py_memory_order_relaxed: old = *value; break; default: { do { old = *value; } while(_InterlockedCompareExchange(value, old, old) != old); break; } } return old; } #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ if (sizeof(*ATOMIC_VAL._value) == 8) { \ _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \ _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ ( \ sizeof(*(ATOMIC_VAL._value)) == 8 ? \ _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \ _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \ ) #endif #else /* !gcc x86 !_msc_ver */ typedef enum _Py_memory_order { _Py_memory_order_relaxed, _Py_memory_order_acquire, _Py_memory_order_release, _Py_memory_order_acq_rel, _Py_memory_order_seq_cst } _Py_memory_order; typedef struct _Py_atomic_address { uintptr_t _value; } _Py_atomic_address; typedef struct _Py_atomic_int { int _value; } _Py_atomic_int; /* Fall back to other compilers and processors by assuming that simple volatile accesses are atomic. This is false, so people should port this. */ #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0) #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0) #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ ((ATOMIC_VAL)->_value = NEW_VAL) #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ ((ATOMIC_VAL)->_value) #endif /* Standardized shortcuts. */ #define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \ _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_seq_cst) #define _Py_atomic_load(ATOMIC_VAL) \ _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_seq_cst) /* Python-local extensions */ #define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \ _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed) #define _Py_atomic_load_relaxed(ATOMIC_VAL) \ _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed) #endif /* Py_BUILD_CORE */ #endif /* Py_ATOMIC_H */