From a05cd859223872ab4fa38e7ced147a3b0d40e458 Mon Sep 17 00:00:00 2001 From: Peter Trommler Date: Sat, 17 Apr 2021 17:59:44 +0200 Subject: PrimOps: Add CAS op for all int sizes PPC NCG: Implement CAS inline for 32 and 64 bit testsuite: Add tests for smaller atomic CAS X86 NCG: Catch calls to CAS C fallback Primops: Add atomicCasWord[8|16|32|64]Addr# Add tests for atomicCasWord[8|16|32|64]Addr# Add changelog entry for new primops X86 NCG: Fix MO-Cmpxchg W64 on 32-bit arch ghc-prim: 64-bit CAS C fallback on all archs --- compiler/GHC/Builtin/primops.txt.pp | 96 ++++++++ compiler/GHC/CmmToAsm/PPC/CodeGen.hs | 33 ++- compiler/GHC/CmmToAsm/X86/CodeGen.hs | 7 +- compiler/GHC/StgToCmm/Prim.hs | 18 +- includes/stg/MiscClosures.h | 4 + libraries/ghc-prim/cbits/atomic.c | 2 - libraries/ghc-prim/changelog.md | 11 + rts/PrimOps.cmm | 52 +++++ rts/RtsSymbols.c | 4 + rts/package.conf.in | 4 - rts/rts.cabal.in | 4 +- .../tests/concurrent/should_run/AtomicPrimops.hs | 242 +++++++++++++++++++++ .../concurrent/should_run/AtomicPrimops.stdout | 8 + 13 files changed, 473 insertions(+), 12 deletions(-) diff --git a/compiler/GHC/Builtin/primops.txt.pp b/compiler/GHC/Builtin/primops.txt.pp index 5f5cd64cfa..b07c344e18 100644 --- a/compiler/GHC/Builtin/primops.txt.pp +++ b/compiler/GHC/Builtin/primops.txt.pp @@ -1927,6 +1927,46 @@ primop CasByteArrayOp_Int "casIntArray#" GenPrimOp with has_side_effects = True can_fail = True +primop CasByteArrayOp_Int8 "casInt8Array#" GenPrimOp + MutableByteArray# s -> Int# -> Int8# -> Int8# -> State# s -> (# State# s, Int8# #) + {Given an array, an offset in bytes, the expected old value, and + the new value, perform an atomic compare and swap i.e. write the new + value if the current value matches the provided old value. Returns + the value of the element before the operation. Implies a full memory + barrier.} + with has_side_effects = True + can_fail = True + +primop CasByteArrayOp_Int16 "casInt16Array#" GenPrimOp + MutableByteArray# s -> Int# -> Int16# -> Int16# -> State# s -> (# State# s, Int16# #) + {Given an array, an offset in 16 bit units, the expected old value, and + the new value, perform an atomic compare and swap i.e. write the new + value if the current value matches the provided old value. Returns + the value of the element before the operation. Implies a full memory + barrier.} + with has_side_effects = True + can_fail = True + +primop CasByteArrayOp_Int32 "casInt32Array#" GenPrimOp + MutableByteArray# s -> Int# -> Int32# -> Int32# -> State# s -> (# State# s, Int32# #) + {Given an array, an offset in 32 bit units, the expected old value, and + the new value, perform an atomic compare and swap i.e. write the new + value if the current value matches the provided old value. Returns + the value of the element before the operation. Implies a full memory + barrier.} + with has_side_effects = True + can_fail = True + +primop CasByteArrayOp_Int64 "casInt64Array#" GenPrimOp + MutableByteArray# s -> Int# -> INT64 -> INT64 -> State# s -> (# State# s, INT64 #) + {Given an array, an offset in 64 bit units, the expected old value, and + the new value, perform an atomic compare and swap i.e. write the new + value if the current value matches the provided old value. Returns + the value of the element before the operation. Implies a full memory + barrier.} + with has_side_effects = True + can_fail = True + primop FetchAddByteArrayOp_Int "fetchAddIntArray#" GenPrimOp MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #) {Given an array, and offset in machine words, and a value to add, @@ -2387,6 +2427,62 @@ primop CasAddrOp_Word "atomicCasWordAddr#" GenPrimOp with has_side_effects = True can_fail = True +primop CasAddrOp_Word8 "atomicCasWord8Addr#" GenPrimOp + Addr# -> Word8# -> Word8# -> State# s -> (# State# s, Word8# #) + { Compare and swap on a 8 bit-sized and aligned memory location. + + Use as: \s -> atomicCasWordAddr8# location expected desired s + + This version always returns the old value read. This follows the normal + protocol for CAS operations (and matches the underlying instruction on + most architectures). + + Implies a full memory barrier.} + with has_side_effects = True + can_fail = True + +primop CasAddrOp_Word16 "atomicCasWord16Addr#" GenPrimOp + Addr# -> Word16# -> Word16# -> State# s -> (# State# s, Word16# #) + { Compare and swap on a 16 bit-sized and aligned memory location. + + Use as: \s -> atomicCasWordAddr16# location expected desired s + + This version always returns the old value read. This follows the normal + protocol for CAS operations (and matches the underlying instruction on + most architectures). + + Implies a full memory barrier.} + with has_side_effects = True + can_fail = True + +primop CasAddrOp_Word32 "atomicCasWord32Addr#" GenPrimOp + Addr# -> Word32# -> Word32# -> State# s -> (# State# s, Word32# #) + { Compare and swap on a 32 bit-sized and aligned memory location. + + Use as: \s -> atomicCasWordAddr32# location expected desired s + + This version always returns the old value read. This follows the normal + protocol for CAS operations (and matches the underlying instruction on + most architectures). + + Implies a full memory barrier.} + with has_side_effects = True + can_fail = True + +primop CasAddrOp_Word64 "atomicCasWord64Addr#" GenPrimOp + Addr# -> WORD64 -> WORD64 -> State# s -> (# State# s, WORD64 #) + { Compare and swap on a 64 bit-sized and aligned memory location. + + Use as: \s -> atomicCasWordAddr64# location expected desired s + + This version always returns the old value read. This follows the normal + protocol for CAS operations (and matches the underlying instruction on + most architectures). + + Implies a full memory barrier.} + with has_side_effects = True + can_fail = True + primop FetchAddAddrOp_Word "fetchAddWordAddr#" GenPrimOp Addr# -> Word# -> State# s -> (# State# s, Word# #) {Given an address, and a value to add, diff --git a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs index 67bc3d9bdb..1c3b244980 100644 --- a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs @@ -1221,7 +1221,38 @@ genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr] genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do code <- assignMem_IntCode (intFormat width) addr val - return $ unitOL(HWSYNC) `appOL` code + return $ unitOL HWSYNC `appOL` code + +genCCall (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] + | width == W32 || width == W64 + = do + platform <- getPlatform + (old_reg, old_code) <- getSomeReg old + (new_reg, new_code) <- getSomeReg new + (addr_reg, addr_code) <- getSomeReg addr + lbl_retry <- getBlockIdNat + lbl_eq <- getBlockIdNat + lbl_end <- getBlockIdNat + let reg_dst = getRegisterReg platform (CmmLocal dst) + code = toOL + [ HWSYNC + , BCC ALWAYS lbl_retry Nothing + , NEWBLOCK lbl_retry + , LDR format reg_dst (AddrRegReg r0 addr_reg) + , CMP format reg_dst (RIReg old_reg) + , BCC NE lbl_end Nothing + , BCC ALWAYS lbl_eq Nothing + , NEWBLOCK lbl_eq + , STC format new_reg (AddrRegReg r0 addr_reg) + , BCC NE lbl_retry Nothing + , BCC ALWAYS lbl_end Nothing + , NEWBLOCK lbl_end + , ISYNC + ] + return $ addr_code `appOL` new_code `appOL` old_code `appOL` code + where + format = intFormat width + genCCall (PrimTarget (MO_Clz width)) [dst] [src] = do platform <- getPlatform diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs index 5e7c261cbb..1ab24c4a25 100644 --- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs @@ -2595,10 +2595,11 @@ genCCall' _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] _ = do code <- assignMem_IntCode (intFormat width) addr val return $ code `snocOL` MFENCE -genCCall' _ is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _ = do +genCCall' _ is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _ -- On x86 we don't have enough registers to use cmpxchg with a -- complicated addressing mode, so on that architecture we -- pre-compute the address first. + | not (is32Bit && width == W64) = do Amode amode addr_code <- getSimpleAmode is32Bit addr newval <- getNewRegNat format newval_code <- getAnyReg new @@ -3441,7 +3442,9 @@ outOfLineCmmOp bid mop res args MO_AtomicRMW _ _ -> fsLit "atomicrmw" MO_AtomicRead _ -> fsLit "atomicread" MO_AtomicWrite _ -> fsLit "atomicwrite" - MO_Cmpxchg _ -> fsLit "cmpxchg" + MO_Cmpxchg w -> cmpxchgLabel w -- for W64 on 32-bit + -- TODO: implement + -- cmpxchg8b instr MO_Xchg _ -> should_be_inline MO_UF_Conv _ -> unsupported diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs index d61880a0e2..290ace9f01 100644 --- a/compiler/GHC/StgToCmm/Prim.hs +++ b/compiler/GHC/StgToCmm/Prim.hs @@ -872,6 +872,14 @@ emitPrimOp dflags primop = case primop of emitPrimCall [res] (MO_Cmpxchg (wordWidth platform)) [dst, expected, new] CasAddrOp_Word -> \[dst, expected, new] -> opIntoRegs $ \[res] -> emitPrimCall [res] (MO_Cmpxchg (wordWidth platform)) [dst, expected, new] + CasAddrOp_Word8 -> \[dst, expected, new] -> opIntoRegs $ \[res] -> + emitPrimCall [res] (MO_Cmpxchg W8) [dst, expected, new] + CasAddrOp_Word16 -> \[dst, expected, new] -> opIntoRegs $ \[res] -> + emitPrimCall [res] (MO_Cmpxchg W16) [dst, expected, new] + CasAddrOp_Word32 -> \[dst, expected, new] -> opIntoRegs $ \[res] -> + emitPrimCall [res] (MO_Cmpxchg W32) [dst, expected, new] + CasAddrOp_Word64 -> \[dst, expected, new] -> opIntoRegs $ \[res] -> + emitPrimCall [res] (MO_Cmpxchg W64) [dst, expected, new] -- SIMD primops (VecBroadcastOp vcat n w) -> \[e] -> opIntoRegs $ \[res] -> do @@ -1075,6 +1083,14 @@ emitPrimOp dflags primop = case primop of doAtomicWriteByteArray mba ix (bWord platform) val CasByteArrayOp_Int -> \[mba, ix, old, new] -> opIntoRegs $ \[res] -> doCasByteArray res mba ix (bWord platform) old new + CasByteArrayOp_Int8 -> \[mba, ix, old, new] -> opIntoRegs $ \[res] -> + doCasByteArray res mba ix b8 old new + CasByteArrayOp_Int16 -> \[mba, ix, old, new] -> opIntoRegs $ \[res] -> + doCasByteArray res mba ix b16 old new + CasByteArrayOp_Int32 -> \[mba, ix, old, new] -> opIntoRegs $ \[res] -> + doCasByteArray res mba ix b32 old new + CasByteArrayOp_Int64 -> \[mba, ix, old, new] -> opIntoRegs $ \[res] -> + doCasByteArray res mba ix b64 old new -- The rest just translate straightforwardly @@ -3092,7 +3108,7 @@ doCasByteArray doCasByteArray res mba idx idx_ty old new = do profile <- getProfile platform <- getPlatform - let width = (typeWidth idx_ty) + let width = typeWidth idx_ty addr = cmmIndexOffExpr platform (arrWordsHdrSize profile) width mba idx emitPrimCall diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h index d8aefd8035..30469c603d 100644 --- a/includes/stg/MiscClosures.h +++ b/includes/stg/MiscClosures.h @@ -444,6 +444,10 @@ RTS_FUN_DECL(stg_shrinkMutableByteArrayzh); RTS_FUN_DECL(stg_resizzeMutableByteArrayzh); RTS_FUN_DECL(stg_shrinkSmallMutableArrayzh); RTS_FUN_DECL(stg_casIntArrayzh); +RTS_FUN_DECL(stg_casInt8Arrayzh); +RTS_FUN_DECL(stg_casInt16Arrayzh); +RTS_FUN_DECL(stg_casInt32Arrayzh); +RTS_FUN_DECL(stg_casInt64Arrayzh); RTS_FUN_DECL(stg_newArrayzh); RTS_FUN_DECL(stg_newArrayArrayzh); RTS_FUN_DECL(stg_copyArrayzh); diff --git a/libraries/ghc-prim/cbits/atomic.c b/libraries/ghc-prim/cbits/atomic.c index 18451016ea..af26e16268 100644 --- a/libraries/ghc-prim/cbits/atomic.c +++ b/libraries/ghc-prim/cbits/atomic.c @@ -309,14 +309,12 @@ hs_cmpxchg32(StgWord x, StgWord old, StgWord new) return __sync_val_compare_and_swap((volatile StgWord32 *) x, (StgWord32) old, (StgWord32) new); } -#if WORD_SIZE_IN_BITS == 64 extern StgWord hs_cmpxchg64(StgWord x, StgWord64 old, StgWord64 new); StgWord hs_cmpxchg64(StgWord x, StgWord64 old, StgWord64 new) { return __sync_val_compare_and_swap((volatile StgWord64 *) x, old, new); } -#endif // Atomic exchange operations diff --git a/libraries/ghc-prim/changelog.md b/libraries/ghc-prim/changelog.md index 5d27ec197a..63f2881dcb 100644 --- a/libraries/ghc-prim/changelog.md +++ b/libraries/ghc-prim/changelog.md @@ -87,6 +87,17 @@ - `extend{Int,Word}#` -> `extendTo{Int,Word}#` - `narrow{Int,Word}#` -> `intTo{Int,Word}#` +- Add primops for atomic compare and swap for sizes other that wordsize: + + casInt8Array# :: MutableByteArray# s -> Int# -> Int8# -> Int8# -> State# s -> (# State# s, Int8# #) + casInt16Array# :: MutableByteArray# s -> Int# -> Int16# -> Int16# -> State# s -> (# State# s, Int16# #) + casInt32Array# :: MutableByteArray# s -> Int# -> Int32# -> Int32# -> State# s -> (# State# s, Int32# #) + casInt64Array# :: MutableByteArray# s -> Int# -> Int64# -> Int64# -> State# s -> (# State# s, Int64# #) + atomicCasWord8Addr# :: Addr# -> Word8# -> Word8# -> State# s -> (# State# s, Word8# #) + atomicCasWord16Addr# :: Addr# -> Word16# -> Word16# -> State# s -> (# State# s, Word16# #) + atomicCasWord32Addr# :: Addr# -> Word32# -> Word32# -> State# s -> (# State# s, Word32# #) + atomicCasWord64Addr# :: Addr# -> WORD64 -> WORD64 -> State# s -> (# State# s, WORD64 #) + ## 0.7.0 (edit as necessary) - Shipped with GHC 9.0.1 diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 85c708cf92..8f99105b18 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -264,6 +264,58 @@ stg_casIntArrayzh( gcptr arr, W_ ind, W_ old, W_ new ) } +stg_casInt8Arrayzh( gcptr arr, W_ ind, I8 old, I8 new ) +/* MutableByteArray# s -> Int# -> Int8# -> Int8# -> State# s -> (# State# s, Int8# #) */ +{ + W_ p; + I8 h; + + p = arr + SIZEOF_StgArrBytes + ind; + (h) = prim %cmpxchg8(p, old, new); + + return(h); +} + + +stg_casInt16Arrayzh( gcptr arr, W_ ind, I16 old, I16 new ) +/* MutableByteArray# s -> Int# -> Int16# -> Int16# -> State# s -> (# State# s, Int16# #) */ +{ + W_ p; + I16 h; + + p = arr + SIZEOF_StgArrBytes + ind*2; + (h) = prim %cmpxchg16(p, old, new); + + return(h); +} + + +stg_casInt32Arrayzh( gcptr arr, W_ ind, I32 old, I32 new ) +/* MutableByteArray# s -> Int# -> Int32# -> Int32# -> State# s -> (# State# s, Int32# #) */ +{ + W_ p; + I32 h; + + p = arr + SIZEOF_StgArrBytes + ind*4; + (h) = prim %cmpxchg32(p, old, new); + + return(h); +} + + +stg_casInt64Arrayzh( gcptr arr, W_ ind, I64 old, I64 new ) +/* MutableByteArray# s -> Int# -> Int64# -> Int64# -> State# s -> (# State# s, Int64# #) */ +{ + W_ p; + I64 h; + + p = arr + SIZEOF_StgArrBytes + ind*8; + (h) = prim %cmpxchg64(p, old, new); + + return(h); +} + + stg_newArrayzh ( W_ n /* words */, gcptr init ) { W_ words, size, p; diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c index 678527e328..38e1b8071c 100644 --- a/rts/RtsSymbols.c +++ b/rts/RtsSymbols.c @@ -721,6 +721,10 @@ SymI_HasProto(stg_newBCOzh) \ SymI_HasProto(stg_newByteArrayzh) \ SymI_HasProto(stg_casIntArrayzh) \ + SymI_HasProto(stg_casInt8Arrayzh) \ + SymI_HasProto(stg_casInt16Arrayzh) \ + SymI_HasProto(stg_casInt32Arrayzh) \ + SymI_HasProto(stg_casInt64Arrayzh) \ SymI_HasProto(stg_newMVarzh) \ SymI_HasProto(stg_newMutVarzh) \ SymI_HasProto(stg_newTVarzh) \ diff --git a/rts/package.conf.in b/rts/package.conf.in index 9bc48d57ca..b0796595ff 100644 --- a/rts/package.conf.in +++ b/rts/package.conf.in @@ -170,9 +170,7 @@ ld-options: , "-Wl,-u,_hs_cmpxchg8" , "-Wl,-u,_hs_cmpxchg16" , "-Wl,-u,_hs_cmpxchg32" -#if WORD_SIZE_IN_BITS == 64 , "-Wl,-u,_hs_cmpxchg64" -#endif , "-Wl,-u,_hs_xchg8" , "-Wl,-u,_hs_xchg16" , "-Wl,-u,_hs_xchg32" @@ -284,9 +282,7 @@ ld-options: , "-Wl,-u,hs_cmpxchg8" , "-Wl,-u,hs_cmpxchg16" , "-Wl,-u,hs_cmpxchg32" -#if WORD_SIZE_IN_BITS == 64 , "-Wl,-u,hs_cmpxchg64" -#endif , "-Wl,-u,hs_xchg8" , "-Wl,-u,hs_xchg16" , "-Wl,-u,hs_xchg32" diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in index 3ceae1cbdc..a08e007c2a 100644 --- a/rts/rts.cabal.in +++ b/rts/rts.cabal.in @@ -220,7 +220,6 @@ library "-Wl,-u,_hs_atomic_nand64" "-Wl,-u,_hs_atomic_or64" "-Wl,-u,_hs_atomic_xor64" - "-Wl,-u,_hs_cmpxchg64" "-Wl,-u,_hs_atomicread64" "-Wl,-u,_hs_atomicwrite64" else @@ -231,7 +230,6 @@ library "-Wl,-u,hs_atomic_nand64" "-Wl,-u,hs_atomic_or64" "-Wl,-u,hs_atomic_xor64" - "-Wl,-u,hs_cmpxchg64" "-Wl,-u,hs_atomicread64" "-Wl,-u,hs_atomicwrite64" if flag(leading-underscore) @@ -299,6 +297,7 @@ library "-Wl,-u,_hs_cmpxchg8" "-Wl,-u,_hs_cmpxchg16" "-Wl,-u,_hs_cmpxchg32" + "-Wl,-u,_hs_cmpxchg64" "-Wl,-u,_hs_xchg8" "-Wl,-u,_hs_xchg16" "-Wl,-u,_hs_xchg32" @@ -380,6 +379,7 @@ library "-Wl,-u,hs_cmpxchg8" "-Wl,-u,hs_cmpxchg16" "-Wl,-u,hs_cmpxchg32" + "-Wl,-u,hs_cmpxchg64" "-Wl,-u,hs_xchg8" "-Wl,-u,hs_xchg16" "-Wl,-u,hs_xchg32" diff --git a/testsuite/tests/concurrent/should_run/AtomicPrimops.hs b/testsuite/tests/concurrent/should_run/AtomicPrimops.hs index 83e5b514f0..b8adb3c621 100644 --- a/testsuite/tests/concurrent/should_run/AtomicPrimops.hs +++ b/testsuite/tests/concurrent/should_run/AtomicPrimops.hs @@ -10,7 +10,9 @@ import Foreign.Marshal.Alloc import Foreign.Ptr import Foreign.Storable import GHC.Exts +import GHC.Int import GHC.IO +import GHC.Word -- | Iterations per worker. iters :: Word @@ -25,6 +27,10 @@ main = do fetchOrTest fetchXorTest casTest + cas8Test + cas16Test + cas32Test + cas64Test readWriteTest -- Addr# fetchAddSubAddrTest @@ -33,6 +39,10 @@ main = do fetchOrAddrTest fetchXorAddrTest casAddrTest + casAddr8Test + casAddr16Test + casAddr32Test + casAddr64Test readWriteAddrTest loop :: Word -> IO () -> IO () @@ -202,6 +212,62 @@ casTest = do old' <- casIntArray mba ix old (old + n) when (old /= old') $ add mba ix n +cas8Test :: IO () +cas8Test = do + tot <- race 0 + (\ mba -> loop iters $ add mba 0 1) + (\ mba -> loop iters $ add mba 0 2) + assertEq (fromIntegral ((3 * fromIntegral iters) :: Word8)) tot "cas8Test" + where + -- Fetch-and-add implemented using CAS. + add :: MByteArray -> Int -> Int8 -> IO () + add mba ix n = do + old <- readInt8Array mba ix + old' <- casInt8Array mba ix old (old + n) + when (old /= old') $ add mba ix n + +cas16Test :: IO () +cas16Test = do + tot <- race 0 + (\ mba -> loop iters $ add mba 0 1) + (\ mba -> loop iters $ add mba 0 2) + assertEq (fromIntegral ((3 * fromIntegral iters) :: Word16)) tot "cas16Test" + where + -- Fetch-and-add implemented using CAS. + add :: MByteArray -> Int -> Int16 -> IO () + add mba ix n = do + old <- readInt16Array mba ix + old' <- casInt16Array mba ix old (old + n) + when (old /= old') $ add mba ix n + +cas32Test :: IO () +cas32Test = do + tot <- race 0 + (\ mba -> loop iters $ add mba 0 1) + (\ mba -> loop iters $ add mba 0 2) + assertEq (fromIntegral ((3 * fromIntegral iters) :: Word32)) tot "cas32Test" + where + -- Fetch-and-add implemented using CAS. + add :: MByteArray -> Int -> Int32 -> IO () + add mba ix n = do + old <- readInt32Array mba ix + old' <- casInt32Array mba ix old (old + n) + when (old /= old') $ add mba ix n + +cas64Test :: IO () +cas64Test = do + tot <- race 0 + (\ mba -> loop iters $ add mba 0 1) + (\ mba -> loop iters $ add mba 0 2) + assertEq (3 * fromIntegral iters) tot "cas64Test" + where + -- Fetch-and-add implemented using CAS. + add :: MByteArray -> Int -> Int64 -> IO () + add mba ix n = do + old <- readInt64Array mba ix + old' <- casInt64Array mba ix old (old + n) + when (old /= old') $ add mba ix n + -- | Test atomicCasWordAddr# by having two threads concurrently increment a -- counter, checking the sum at the end. casAddrTest :: IO () @@ -219,6 +285,69 @@ casAddrTest = do old' <- atomicCasWordPtr ptr old (old + n) when (old /= old') $ go old' +casAddr8Test :: IO () +casAddr8Test = do + tot <- race8Addr 0 + (\ addr -> loop iters $ add addr 1) + (\ addr -> loop iters $ add addr 2) + assertEq (fromIntegral (fromIntegral (3 * iters) :: Word8)) + (fromIntegral tot) "casAddr8Test" + where + -- Fetch-and-add implemented using CAS. + add :: Ptr Word8 -> Word8 -> IO () + add ptr n = peek ptr >>= go + where + go old = do + old' <- atomicCasWord8Ptr ptr old (old + n) + when (old /= old') $ go old' + +casAddr16Test :: IO () +casAddr16Test = do + tot <- race16Addr 0 + (\ addr -> loop iters $ add addr 1) + (\ addr -> loop iters $ add addr 2) + assertEq (fromIntegral (fromIntegral (3 * iters) :: Word16)) + (fromIntegral tot) "casAddr16Test" + where + -- Fetch-and-add implemented using CAS. + add :: Ptr Word16 -> Word16 -> IO () + add ptr n = peek ptr >>= go + where + go old = do + old' <- atomicCasWord16Ptr ptr old (old + n) + when (old /= old') $ go old' + +casAddr32Test :: IO () +casAddr32Test = do + tot <- race32Addr 0 + (\ addr -> loop iters $ add addr 1) + (\ addr -> loop iters $ add addr 2) + assertEq (fromIntegral (fromIntegral (3 * iters) :: Word32)) + (fromIntegral tot) "casAddr32Test" + where + -- Fetch-and-add implemented using CAS. + add :: Ptr Word32 -> Word32 -> IO () + add ptr n = peek ptr >>= go + where + go old = do + old' <- atomicCasWord32Ptr ptr old (old + n) + when (old /= old') $ go old' + +casAddr64Test :: IO () +casAddr64Test = do + tot <- race64Addr 0 + (\ addr -> loop iters $ add addr 1) + (\ addr -> loop iters $ add addr 2) + assertEq (3 * iters) (fromIntegral tot) "casAddr64Test" + where + -- Fetch-and-add implemented using CAS. + add :: Ptr Word64 -> Word64 -> IO () + add ptr n = peek ptr >>= go + where + go old = do + old' <- atomicCasWord64Ptr ptr old (old + n) + when (old /= old') $ go old' + -- | Tests atomic reads and writes by making sure that one thread sees -- updates that are done on another. This test isn't very good at the @@ -286,6 +415,62 @@ raceAddr n0 thread1 thread2 = do mapM_ takeMVar [done1, done2] peek ptr +race8Addr :: Word8 -- ^ Initial value of array element + -> (Ptr Word8 -> IO ()) -- ^ Thread 1 action + -> (Ptr Word8 -> IO ()) -- ^ Thread 2 action + -> IO Word8 -- ^ Final value of array element +race8Addr n0 thread1 thread2 = do + done1 <- newEmptyMVar + done2 <- newEmptyMVar + ptr <- castPtr <$> callocBytes (sizeOf (undefined :: Word8)) + poke ptr n0 + forkIO $ thread1 ptr >> putMVar done1 () + forkIO $ thread2 ptr >> putMVar done2 () + mapM_ takeMVar [done1, done2] + peek ptr + +race16Addr :: Word16 -- ^ Initial value of array element + -> (Ptr Word16 -> IO ()) -- ^ Thread 1 action + -> (Ptr Word16 -> IO ()) -- ^ Thread 2 action + -> IO Word16 -- ^ Final value of array element +race16Addr n0 thread1 thread2 = do + done1 <- newEmptyMVar + done2 <- newEmptyMVar + ptr <- castPtr <$> callocBytes (sizeOf (undefined :: Word16)) + poke ptr n0 + forkIO $ thread1 ptr >> putMVar done1 () + forkIO $ thread2 ptr >> putMVar done2 () + mapM_ takeMVar [done1, done2] + peek ptr + +race32Addr :: Word32 -- ^ Initial value of array element + -> (Ptr Word32 -> IO ()) -- ^ Thread 1 action + -> (Ptr Word32 -> IO ()) -- ^ Thread 2 action + -> IO Word32 -- ^ Final value of array element +race32Addr n0 thread1 thread2 = do + done1 <- newEmptyMVar + done2 <- newEmptyMVar + ptr <- castPtr <$> callocBytes (sizeOf (undefined :: Word32)) + poke ptr n0 + forkIO $ thread1 ptr >> putMVar done1 () + forkIO $ thread2 ptr >> putMVar done2 () + mapM_ takeMVar [done1, done2] + peek ptr + +race64Addr :: Word64 -- ^ Initial value of array element + -> (Ptr Word64 -> IO ()) -- ^ Thread 1 action + -> (Ptr Word64 -> IO ()) -- ^ Thread 2 action + -> IO Word64 -- ^ Final value of array element +race64Addr n0 thread1 thread2 = do + done1 <- newEmptyMVar + done2 <- newEmptyMVar + ptr <- castPtr <$> callocBytes (sizeOf (undefined :: Word64)) + poke ptr n0 + forkIO $ thread1 ptr >> putMVar done1 () + forkIO $ thread2 ptr >> putMVar done2 () + mapM_ takeMVar [done1, done2] + peek ptr + ------------------------------------------------------------------------ -- Test helper @@ -347,6 +532,26 @@ readIntArray (MBA mba#) (I# ix#) = IO $ \ s# -> case readIntArray# mba# ix# s# of (# s2#, n# #) -> (# s2#, I# n# #) +readInt8Array :: MByteArray -> Int -> IO Int8 +readInt8Array (MBA mba#) (I# ix#) = IO $ \ s# -> + case readInt8Array# mba# ix# s# of + (# s2#, n# #) -> (# s2#, I8# n# #) + +readInt16Array :: MByteArray -> Int -> IO Int16 +readInt16Array (MBA mba#) (I# ix#) = IO $ \ s# -> + case readInt16Array# mba# ix# s# of + (# s2#, n# #) -> (# s2#, I16# n# #) + +readInt32Array :: MByteArray -> Int -> IO Int32 +readInt32Array (MBA mba#) (I# ix#) = IO $ \ s# -> + case readInt32Array# mba# ix# s# of + (# s2#, n# #) -> (# s2#, I32# n# #) + +readInt64Array :: MByteArray -> Int -> IO Int64 +readInt64Array (MBA mba#) (I# ix#) = IO $ \ s# -> + case readInt64Array# mba# ix# s# of + (# s2#, n# #) -> (# s2#, I64# n# #) + atomicWriteIntArray :: MByteArray -> Int -> Int -> IO () atomicWriteIntArray (MBA mba#) (I# ix#) (I# n#) = IO $ \ s# -> case atomicWriteIntArray# mba# ix# n# s# of @@ -362,6 +567,26 @@ casIntArray (MBA mba#) (I# ix#) (I# old#) (I# new#) = IO $ \ s# -> case casIntArray# mba# ix# old# new# s# of (# s2#, old2# #) -> (# s2#, I# old2# #) +casInt8Array :: MByteArray -> Int -> Int8 -> Int8 -> IO Int8 +casInt8Array (MBA mba#) (I# ix#) (I8# old#) (I8# new#) = IO $ \ s# -> + case casInt8Array# mba# ix# old# new# s# of + (# s2#, old2# #) -> (# s2#, I8# old2# #) + +casInt16Array :: MByteArray -> Int -> Int16 -> Int16 -> IO Int16 +casInt16Array (MBA mba#) (I# ix#) (I16# old#) (I16# new#) = IO $ \ s# -> + case casInt16Array# mba# ix# old# new# s# of + (# s2#, old2# #) -> (# s2#, I16# old2# #) + +casInt32Array :: MByteArray -> Int -> Int32 -> Int32 -> IO Int32 +casInt32Array (MBA mba#) (I# ix#) (I32# old#) (I32# new#) = IO $ \ s# -> + case casInt32Array# mba# ix# old# new# s# of + (# s2#, old2# #) -> (# s2#, I32# old2# #) + +casInt64Array :: MByteArray -> Int -> Int64 -> Int64 -> IO Int64 +casInt64Array (MBA mba#) (I# ix#) (I64# old#) (I64# new#) = IO $ \ s# -> + case casInt64Array# mba# ix# old# new# s# of + (# s2#, old2# #) -> (# s2#, I64# old2# #) + ------------------------------------------------------------------------ -- Wrappers around Addr# @@ -411,3 +636,20 @@ atomicCasWordPtr :: Ptr Word -> Word -> Word -> IO Word atomicCasWordPtr (Ptr addr#) (W# old#) (W# new#) = IO $ \ s# -> case atomicCasWordAddr# addr# old# new# s# of (# s2#, old2# #) -> (# s2#, W# old2# #) + +atomicCasWord8Ptr :: Ptr Word8 -> Word8 -> Word8 -> IO Word8 +atomicCasWord8Ptr (Ptr addr#) (W8# old#) (W8# new#) = IO $ \ s# -> + case atomicCasWord8Addr# addr# old# new# s# of + (# s2#, old2# #) -> (# s2#, W8# old2# #) +atomicCasWord16Ptr :: Ptr Word16 -> Word16 -> Word16 -> IO Word16 +atomicCasWord16Ptr (Ptr addr#) (W16# old#) (W16# new#) = IO $ \ s# -> + case atomicCasWord16Addr# addr# old# new# s# of + (# s2#, old2# #) -> (# s2#, W16# old2# #) +atomicCasWord32Ptr :: Ptr Word32 -> Word32 -> Word32 -> IO Word32 +atomicCasWord32Ptr (Ptr addr#) (W32# old#) (W32# new#) = IO $ \ s# -> + case atomicCasWord32Addr# addr# old# new# s# of + (# s2#, old2# #) -> (# s2#, W32# old2# #) +atomicCasWord64Ptr :: Ptr Word64 -> Word64 -> Word64 -> IO Word64 +atomicCasWord64Ptr (Ptr addr#) (W64# old#) (W64# new#) = IO $ \ s# -> + case atomicCasWord64Addr# addr# old# new# s# of + (# s2#, old2# #) -> (# s2#, W64# old2# #) diff --git a/testsuite/tests/concurrent/should_run/AtomicPrimops.stdout b/testsuite/tests/concurrent/should_run/AtomicPrimops.stdout index b09c2a8eaa..055f6694a1 100644 --- a/testsuite/tests/concurrent/should_run/AtomicPrimops.stdout +++ b/testsuite/tests/concurrent/should_run/AtomicPrimops.stdout @@ -4,6 +4,10 @@ fetchNandTest: OK fetchOrTest: OK fetchXorTest: OK casTest: OK +cas8Test: OK +cas16Test: OK +cas32Test: OK +cas64Test: OK readWriteTest: OK fetchAddSubAddrTest: OK fetchAndAddrTest: OK @@ -11,4 +15,8 @@ fetchNandAddrTest: OK fetchOrAddrTest: OK fetchXorAddrTest: OK casAddrTest: OK +casAddr8Test: OK +casAddr16Test: OK +casAddr32Test: OK +casAddr64Test: OK readWriteAddrTest: OK -- cgit v1.2.1