/* ---------------------------------------------------------------------------- * * (c) The GHC Team, 2005-2011 * * Macros for multi-CPU support * * Do not #include this file directly: #include "Rts.h" instead. * * To understand the structure of the RTS headers, see the wiki: * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes * * -------------------------------------------------------------------------- */ #pragma once #if defined(arm_HOST_ARCH) && defined(arm_HOST_ARCH_PRE_ARMv6) void arm_atomic_spin_lock(void); void arm_atomic_spin_unlock(void); #endif // Unconditionally atomic operations // These are atomic even in the non-threaded RTS. These are necessary in the // Proftimer implementation, which may be called from the pthreads-based // ITimer implementation. #define RELAXED_LOAD_ALWAYS(ptr) __atomic_load_n(ptr, __ATOMIC_RELAXED) #define RELAXED_STORE_ALWAYS(ptr,val) __atomic_store_n(ptr, val, __ATOMIC_RELAXED) #define RELAXED_ADD_ALWAYS(ptr,val) __atomic_add_fetch(ptr, val, __ATOMIC_RELAXED) // Acquire/release atomic operations #define ACQUIRE_LOAD_ALWAYS(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE) #define RELEASE_STORE_ALWAYS(ptr,val) __atomic_store_n(ptr, val, __ATOMIC_RELEASE) // Sequentially consistent atomic operations #define SEQ_CST_LOAD_ALWAYS(ptr) __atomic_load_n(ptr, __ATOMIC_SEQ_CST) #define SEQ_CST_STORE_ALWAYS(ptr,val) __atomic_store_n(ptr, val, __ATOMIC_SEQ_CST) #define SEQ_CST_ADD_ALWAYS(ptr,val) __atomic_add_fetch(ptr, val, __ATOMIC_SEQ_CST) #define SEQ_CST_SUB_ALWAYS(ptr,val) __atomic_sub_fetch(ptr, val, __ATOMIC_SEQ_CST) #define SEQ_CST_XCHG_ALWAYS(ptr,val) __atomic_exchange_n(ptr, val, __ATOMIC_SEQ_CST); #if defined(THREADED_RTS) /* ---------------------------------------------------------------------------- Atomic operations ------------------------------------------------------------------------- */ #if !IN_STG_CODE || IN_STGCRUN // We only want the barriers, e.g. write_barrier(), declared in .hc // files. Defining the other inline functions here causes type // mismatch errors from gcc, because the generated C code is assuming // that there are no prototypes in scope. /* * The atomic exchange operation: xchg(p,w) exchanges the value * pointed to by p with the value w, returning the old value. * * Used for locking closures during updates (see lockClosure() * in rts/include/rts/storage/SMPClosureOps.h) and the MVar primops. */ EXTERN_INLINE StgWord xchg(StgPtr p, StgWord w); /* * Compare-and-swap. Atomically does this: * * cas(p,o,n) { * r = *p; * if (r == o) { *p = n }; * return r; * } */ EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n); EXTERN_INLINE StgWord8 cas_word8(StgWord8 *volatile p, StgWord8 o, StgWord8 n); /* * Compare-and-swap * this uses a seq_cst success memory order and a relaxed failure memory order */ EXTERN_INLINE StgWord cas_seq_cst_relaxed(StgVolatilePtr p, StgWord o, StgWord n); /* * Atomic addition by the provided quantity * * atomic_inc(p, n) { * return ((*p) += n); * } */ EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p, StgWord n); /* * Atomic decrement * * atomic_dec(p) { * return --(*p); * } */ EXTERN_INLINE StgWord atomic_dec(StgVolatilePtr p); /* * Busy-wait nop: this is a hint to the CPU that we are currently in a * busy-wait loop waiting for another CPU to change something. On a * hyperthreaded CPU it should yield to another thread, for example. */ EXTERN_INLINE void busy_wait_nop(void); #endif // !IN_STG_CODE /* * Various kinds of memory barrier. * write_barrier: prevents future stores occurring before preceding stores. * store_load_barrier: prevents future loads occurring before preceding stores. * load_load_barrier: prevents future loads occurring before earlier loads. * * Reference for these: "The JSR-133 Cookbook for Compiler Writers" * http://gee.cs.oswego.edu/dl/jmm/cookbook.html * * To check whether you got these right, try the test in * testsuite/tests/rts/testwsdeque.c * This tests the work-stealing deque implementation, which relies on * properly working store_load and load_load memory barriers. */ EXTERN_INLINE void write_barrier(void); EXTERN_INLINE void store_load_barrier(void); EXTERN_INLINE void load_load_barrier(void); /* * Note [Heap memory barriers] * ~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Machines with weak memory ordering semantics have consequences for how * closures are observed and mutated. In particular, when we * expose a new (or mutated) object to another core we must ensure that the * stores which formed the new object are visible (e.g. stores are flushed from * cache and the relevant cachelines invalidated in other cores). * * To ensure this we must use memory barriers. Which barriers are required to * access a field depends upon the type of the field. In general, fields come * in three flavours: * * * Mutable GC Pointers (C type StgClosure*, Cmm type StgPtr) * * Immutable GC Pointers (C type MUT_FIELD StgClosure*, Cmm type StgPtr) * * Non-pointers (C type StgWord, Cmm type StdWord) * * Note that Addr# fields are *not* GC pointers and therefore are classified * as non-pointers. Responsibility for barriers lies with the party * dereferencing the pointer. * * Also note that we are only concerned with mutation by the mutator. The GC * is free to change nearly any field as this is necessary for a moving GC. * Naturally, doing this safely requires care which we discuss in section * below. * * Immutable pointer fields are those which the mutator cannot change after * an object is made visible on the heap. Most objects' fields are of this * flavour (e.g. all data constructor fields). As these fields are written * precisely once, no write barriers are needed on writes nor reads. This is * safe due to an argument hinging on causality: Consider an immutable field F * of an object O refers to object O'. Naturally, O' must have been visible to * the creator of O when O was constructed. Consequently, if O is visible to a * reader, O' must also be visible. * * Mutable pointer fields are those which can be modified by the mutator. These * require a bit more care as they may break the causality argument given * above. For instance, consider an object O (e.g. a MUT_VAR) with a mutable * field F. A thread may allocate a new object O' and store a reference to O' * into F. Without explicit synchronization O' may not be visible to another * thread attempting to dereference F. * * Mutable fields include: * * - StgMutVar: var * - StgWeak: finalizer * - StgMVar: head, tail, value * - StgMVarTSOQueue: link * - StgTVar: current_value, first_watch_queue_entry * - StgTVarWatchQueue: {next,prev}_queue_entry * - StgTRecChunk: TODO * - StgMutArrPtrs: payload * - StgSmallMutArrPtrs: payload * - StgThunk although this is a somewhat special case; see below * * Writing to a mutable pointer field must be done via a release-store. * Reading from such a field is done via an acquire-load. * * Finally, non-pointer fields can be safely mutated without barriers as * they do not refer to other memory. Technically, concurrent accesses to * non-pointer fields still do need to be atomic in many cases to avoid torn * accesses. However, this is something that we generally avoid by locking * closures prior to mutating non-pointer fields (see Locking closures below). * * Note that MUT_VARs offer both synchronized and unsynchronized primops. * Consequently, in these cases there is a burden on the user to ensure that * synchronization is provided where necessary. * * Locking closures * ---------------- * Several primops temporarily turn closures into WHITEHOLEs to ensure that * they have exclusive access (see SMPClosureOps.h:reallyLockClosure). * Locking is done via an atomic exchange operation on the closure's info table * pointer with sequential consistency (although only acquire ordering is * needed). This acquire ensures that we synchronize with any previous thread * that had locked the closure. Consequently, it is important that we take great * care in examining the mutable fields of a lockable closure prior to having * locked it. * * Naturally, unlocking is done via a release-store to restore the closure's * original info table pointer. * * Thunks * ------ * As noted above, thunks are a rather special (yet quite common) case. In * particular, they have the unique property of being updatable, transforming * from a thunk to an indirection. This transformation requires its own * synchronization protocol. In particular, we must ensure that a reader * examining a thunk being updated can see the indirectee. Consequently, a * thunk update (see rts/Updates.h) does the following: * * 1. Use a relaxed-store to place the new indirectee into the thunk's * indirectee field * 2. use a release-store to set the info table to stg_BLACKHOLE (which * represents an indirection) * * Blackholing a thunk (either eagerly, by GHC.StgToCmm.Bind.emitBlackHoleCode, * or lazily, by ThreadPaused.c:threadPaused) is done similarly. * * Conversely, indirection entry (see the entry code of stg_BLACKHOLE, stg_IND, * and stg_IND_STATIC in rts/StgMiscClosure.cmm) does the following: * * 1. We jump into the entry code for, e.g., stg_BLACKHOLE; this of course * implies that we have already read the thunk's info table pointer, which * is done with a relaxed load. * 2. use an acquire-fence to ensure that our view on the thunk is * up-to-date. This synchronizes with step (2) in the update * procedure. * 3. relaxed-load the indirectee. Since thunks are updated at most * once we know that the fence in the last step has given us * an up-to-date view of the indirectee closure. * 4. enter the indirectee (or block if the indirectee is a TSO) * * Other closures * -------------- * Below is a summary of the barriers which are necessary to make GHC's * primitive types respect the above rules: * * - MVar# operations. * The MVar# primops lock the MVar prior to reading or writing any MVar fix. * * - Write to a TVar#: * This is protected by the full barrier implied by the CAS in STM.c:lock_stm. * * - Write to an Array# or SmallArray#: * This case is protected by an explicit write barrier in the code produced * for this primop by the codegen. See GHC.StgToCmm.Prim.doWritePtrArrayOp and * GHC.StgToCmm.Prim.doWriteSmallPtrArrayOp. Relevant issue: #12469. * * - Write to MutVar# via writeMutVar#: * This case is protected by an explicit write barrier in the code produced * for this primop by the codegen. * * - Write to MutVar# via atomicModifyMutVar# or casMutVar#: * This is protected by the full barrier implied by the cmpxchg operations * in this primops. * * - Sending a Message to another capability: * This is protected by the acquition and release of the target capability's * lock in Messages.c:sendMessage. * * N.B. recordClosureMutated places a reference to the mutated object on * the capability-local mut_list. Consequently this does not require any memory * barrier. * * Barriers in thread migration * ---------------------------- * When a thread is migrated from one capability to another we must take care * that the new capability synchronizes with the old (e.g. to ensure that * the state of the thread's stack is visible to the new executor). * This is ensured by the barriers implied in sending the MessageWakeup. * * Barriers on Messages * -------------------- * The RTS uses the Message mechanism to convey information between capabilities. * To send a message (see Messages.c:sendMessage) the sender must first take * the `lock` of the recipient capability. The Message can then be linked * onto the recipient's `inbox` list and release the lock (implying a release * barrier). * * To process its inbox (see Schedule.c:scheduleProcessInbox) the recipient must * take the capability lock before examining the inbox. This implies an acquire * barrier, ensuring that the messages in the inbox are visible. * * Under the above story there is no need for accesses to `inbox` to be * atomic at all as all accesses are under the capability lock. However, there * is a slight wrinkle here: * Capability.h:emptyInbox wants to test whether `inbox` is empty without * holding the capability lock. Consequently, accesses to `inbox` must be * atomic, although may use the relaxed ordering. * * Barriers during GC * ------------------ * Entering the garbage collector implies an acquire barrier across all * capabilities as all capabilities obey the following protocol when entering * `gcWorkerThread`: * * 1. acquire gc_entry_mutex * 2. signal gc_entry_arrived_cv to let the GC leader know * that we have started to synchronize * 3. wait on gc_entry_start_now_cv, releasing * gc_entry_mutex; this implies a release barrier * 4. after all threads have synchronized, the leader will * broadcast signal gc_entry_start_now_cv * 5. each of the waiting workers will acquire * gc_entry_mutex, implying an acquire barrier which will * synchronize with all of the other workers' releases in * (3). * * Similarly, leaving the garbage collector implies a release * barrier as gcWorkerThread blocks on gc_exit_leave_now_cv * with a similar protocol. * * During parallel GC we need to be careful during evacuation: before replacing * a closure with a forwarding pointer we must commit a write barrier to ensure * that the copy we made in to-space is visible to other cores. * * However, we can be a bit lax when *reading* during GC. Specifically, the GC * can only make a very limited set of changes to existing closures: * * - it can replace a closure's info table with stg_WHITEHOLE. * - it can replace a previously-whitehole'd closure's info table with a * forwarding pointer * - it can replace a previously-whitehole'd closure's info table with a * valid info table pointer (done in eval_thunk_selector) * - it can update the value of a pointer field after evacuating it * * This is quite nice since we don't need to worry about an interleaving * of writes producing an invalid state: a closure's fields remain valid after * an update of its info table pointer and vice-versa. * * After a round of parallel scavenging we must also ensure that any writes the * GC thread workers made are visible to the main GC thread. This is ensured by * the full barrier implied by the atomic decrement in * GC.c:scavenge_until_all_done. * * The work-stealing queue (WSDeque) also requires barriers; these are * documented in WSDeque.c. * */ /* ---------------------------------------------------------------------------- Implementations ------------------------------------------------------------------------- */ #if !IN_STG_CODE || IN_STGCRUN /* * Exchange the value pointed to by p with w and return the former. This * function is used to acquire a lock. An acquire memory barrier is sufficient * for a lock operation because corresponding unlock operation issues a * store-store barrier (write_barrier()) immediately before releasing the lock. */ EXTERN_INLINE StgWord xchg(StgPtr p, StgWord w) { #if defined(HAVE_C11_ATOMICS) return __atomic_exchange_n(p, w, __ATOMIC_SEQ_CST); #else // When porting GHC to a new platform check that // __sync_lock_test_and_set() actually stores w in *p. // Use test rts/atomicxchg to verify that the correct value is stored. // From the gcc manual: // (https://gcc.gnu.org/onlinedocs/gcc-4.4.3/gcc/Atomic-Builtins.html) // This built-in function, as described by Intel, is not // a traditional test-and-set operation, but rather an atomic // exchange operation. // [...] // Many targets have only minimal support for such locks, // and do not support a full exchange operation. In this case, // a target may support reduced functionality here by which the // only valid value to store is the immediate constant 1. The // exact value actually stored in *ptr is implementation defined. return __sync_lock_test_and_set(p, w); #endif } /* * CMPXCHG - the single-word atomic compare-and-exchange instruction. Used * in the STM implementation. */ EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n) { #if defined(HAVE_C11_ATOMICS) __atomic_compare_exchange_n(p, &o, n, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return o; #else return __sync_val_compare_and_swap(p, o, n); #endif } EXTERN_INLINE StgWord8 cas_word8(StgWord8 *volatile p, StgWord8 o, StgWord8 n) { #if defined(HAVE_C11_ATOMICS) __atomic_compare_exchange_n(p, &o, n, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return o; #else return __sync_val_compare_and_swap(p, o, n); #endif } EXTERN_INLINE StgWord cas_seq_cst_relaxed(StgVolatilePtr p, StgWord o, StgWord n) { #if defined(HAVE_C11_ATOMICS) __atomic_compare_exchange_n(p, &o, n, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); return o; #else return __sync_val_compare_and_swap(p, o, n); #endif } // RRN: Generalized to arbitrary increments to enable fetch-and-add in // Haskell code (fetchAddIntArray#). // PT: add-and-fetch, returns new value EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p, StgWord incr) { #if defined(HAVE_C11_ATOMICS) return __atomic_add_fetch(p, incr, __ATOMIC_SEQ_CST); #else return __sync_add_and_fetch(p, incr); #endif } EXTERN_INLINE StgWord atomic_dec(StgVolatilePtr p) { #if defined(HAVE_C11_ATOMICS) return __atomic_sub_fetch(p, 1, __ATOMIC_SEQ_CST); #else return __sync_sub_and_fetch(p, (StgWord) 1); #endif } /* * Some architectures have a way to tell the CPU that we're in a * busy-wait loop, and the processor should look for something else to * do (such as run another hardware thread). */ EXTERN_INLINE void busy_wait_nop(void) { #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH) // On Intel, the busy-wait-nop instruction is called "pause", // which is actually represented as a nop with the rep prefix. // On processors before the P4 this behaves as a nop; on P4 and // later it might do something clever like yield to another // hyperthread. In any case, Intel recommends putting one // of these in a spin lock loop. __asm__ __volatile__ ("rep; nop"); #else // nothing #endif } #endif // !IN_STG_CODE /* * We need to tell both the compiler AND the CPU about the barriers. * It's no good preventing the CPU from reordering the operations if * the compiler has already done so - hence the "memory" restriction * on each of the barriers below. */ EXTERN_INLINE void write_barrier(void) { #if defined(NOSMP) return; #elif defined(TSAN_ENABLED) // RELEASE is a bit stronger than the store-store barrier provided by // write_barrier, consequently we only use this case as a conservative // approximation when using ThreadSanitizer. See Note [ThreadSanitizer]. __atomic_thread_fence(__ATOMIC_RELEASE); #elif defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH) __asm__ __volatile__ ("" : : : "memory"); #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \ || defined(powerpc64le_HOST_ARCH) __asm__ __volatile__ ("lwsync" : : : "memory"); #elif defined(s390x_HOST_ARCH) __asm__ __volatile__ ("" : : : "memory"); #elif defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) __asm__ __volatile__ ("dmb st" : : : "memory"); #elif defined(riscv64_HOST_ARCH) __asm__ __volatile__ ("fence w,w" : : : "memory"); #elif defined(loongarch64_HOST_ARCH) __asm__ __volatile__ ("dbar 0" : : : "memory"); #else #error memory barriers unimplemented on this architecture #endif } EXTERN_INLINE void store_load_barrier(void) { #if defined(NOSMP) return; #elif defined(i386_HOST_ARCH) __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory"); #elif defined(x86_64_HOST_ARCH) __asm__ __volatile__ ("lock; addq $0,0(%%rsp)" : : : "memory"); #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \ || defined(powerpc64le_HOST_ARCH) __asm__ __volatile__ ("sync" : : : "memory"); #elif defined(s390x_HOST_ARCH) __asm__ __volatile__ ("bcr 14,0" : : : "memory"); #elif defined(arm_HOST_ARCH) __asm__ __volatile__ ("dmb" : : : "memory"); #elif defined(aarch64_HOST_ARCH) __asm__ __volatile__ ("dmb sy" : : : "memory"); #elif defined(riscv64_HOST_ARCH) __asm__ __volatile__ ("fence w,r" : : : "memory"); #elif defined(loongarch64_HOST_ARCH) __asm__ __volatile__ ("dbar 0" : : : "memory"); #else #error memory barriers unimplemented on this architecture #endif } EXTERN_INLINE void load_load_barrier(void) { #if defined(NOSMP) return; #elif defined(i386_HOST_ARCH) __asm__ __volatile__ ("" : : : "memory"); #elif defined(x86_64_HOST_ARCH) __asm__ __volatile__ ("" : : : "memory"); #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \ || defined(powerpc64le_HOST_ARCH) __asm__ __volatile__ ("lwsync" : : : "memory"); #elif defined(s390x_HOST_ARCH) __asm__ __volatile__ ("" : : : "memory"); #elif defined(arm_HOST_ARCH) __asm__ __volatile__ ("dmb" : : : "memory"); #elif defined(aarch64_HOST_ARCH) __asm__ __volatile__ ("dmb ld" : : : "memory"); #elif defined(riscv64_HOST_ARCH) __asm__ __volatile__ ("fence r,r" : : : "memory"); #elif defined(loongarch64_HOST_ARCH) __asm__ __volatile__ ("dbar 0" : : : "memory"); #else #error memory barriers unimplemented on this architecture #endif } // Load a pointer from a memory location that might be being modified // concurrently. This prevents the compiler from optimising away // multiple loads of the memory location, as it might otherwise do in // a busy wait loop for example. #define VOLATILE_LOAD(p) (*((StgVolatilePtr)(p))) // Relaxed atomic operations. #define RELAXED_LOAD(ptr) __atomic_load_n(ptr, __ATOMIC_RELAXED) #define RELAXED_STORE(ptr,val) __atomic_store_n(ptr, val, __ATOMIC_RELAXED) #define RELAXED_ADD(ptr,val) __atomic_add_fetch(ptr, val, __ATOMIC_RELAXED) // Acquire/release atomic operations #define ACQUIRE_LOAD(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE) #define RELEASE_STORE(ptr,val) __atomic_store_n(ptr, val, __ATOMIC_RELEASE) // Sequentially consistent atomic operations #define SEQ_CST_LOAD(ptr) __atomic_load_n(ptr, __ATOMIC_SEQ_CST) #define SEQ_CST_STORE(ptr,val) __atomic_store_n(ptr, val, __ATOMIC_SEQ_CST) #define SEQ_CST_ADD(ptr,val) __atomic_add_fetch(ptr, val, __ATOMIC_SEQ_CST) // Non-atomic addition for "approximate" counters that can be lossy #define NONATOMIC_ADD(ptr,val) RELAXED_STORE(ptr, RELAXED_LOAD(ptr) + val) // compare-and-swap atomic operations #define SEQ_CST_RELAXED_CAS(p,o,n) cas_seq_cst_relaxed(p,o,n) // Explicit fences // // These are typically necessary only in very specific cases (e.g. WSDeque) // where the ordered operations aren't expressive enough to capture the desired // ordering. #define ACQUIRE_FENCE() __atomic_thread_fence(__ATOMIC_ACQUIRE) #define RELEASE_FENCE() __atomic_thread_fence(__ATOMIC_RELEASE) #define SEQ_CST_FENCE() __atomic_thread_fence(__ATOMIC_SEQ_CST) /* ---------------------------------------------------------------------- */ #else /* !THREADED_RTS */ EXTERN_INLINE void write_barrier(void); EXTERN_INLINE void store_load_barrier(void); EXTERN_INLINE void load_load_barrier(void); EXTERN_INLINE void write_barrier () {} /* nothing */ EXTERN_INLINE void store_load_barrier() {} /* nothing */ EXTERN_INLINE void load_load_barrier () {} /* nothing */ // Relaxed atomic operations #define RELAXED_LOAD(ptr) *ptr #define RELAXED_STORE(ptr,val) *ptr = val #define RELAXED_ADD(ptr,val) *ptr += val // Acquire/release atomic operations #define ACQUIRE_LOAD(ptr) *ptr #define RELEASE_STORE(ptr,val) *ptr = val // Sequentially consistent atomic operations #define SEQ_CST_LOAD(ptr) *ptr #define SEQ_CST_STORE(ptr,val) *ptr = val #define SEQ_CST_ADD(ptr,val) *ptr += val // Non-atomic addition for "approximate" counters that can be lossy #define NONATOMIC_ADD(ptr,val) *ptr += val // compare-and-swap atomic operations #define SEQ_CST_RELAXED_CAS(p,o,n) cas(p,o,n) // Fences #define ACQUIRE_FENCE() #define RELEASE_FENCE() #define SEQ_CST_FENCE() #if !IN_STG_CODE || IN_STGCRUN INLINE_HEADER StgWord xchg(StgPtr p, StgWord w) { StgWord old = *p; *p = w; return old; } EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n); EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n) { StgWord result; result = *p; if (result == o) { *p = n; } return result; } EXTERN_INLINE StgWord8 cas_word8(StgWord8 *volatile p, StgWord8 o, StgWord8 n); EXTERN_INLINE StgWord8 cas_word8(StgWord8 *volatile p, StgWord8 o, StgWord8 n) { StgWord8 result; result = *p; if (result == o) { *p = n; } return result; } EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p, StgWord incr); EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p, StgWord incr) { return ((*p) += incr); } INLINE_HEADER StgWord atomic_dec(StgVolatilePtr p) { return --(*p); } #endif /* An alias for the C11 declspec */ #define ATOMIC #define VOLATILE_LOAD(p) ((StgWord)*((StgWord*)(p))) #endif /* !THREADED_RTS */ /* Helpers implemented in terms of the above */ #if !IN_STG_CODE || IN_STGCRUN INLINE_HEADER void * xchg_ptr(void **p, void *w) { return (void *) xchg((StgPtr) p, (StgWord) w); } INLINE_HEADER void * cas_ptr(volatile void **p, void *o, void *n) { return (void *) cas((StgVolatilePtr) p, (StgWord) o, (StgWord) n); } #endif