diff options
| author | Johan Tibell <johan.tibell@gmail.com> | 2014-06-09 11:43:21 +0200 |
|---|---|---|
| committer | Johan Tibell <johan.tibell@gmail.com> | 2014-06-24 19:06:53 +0200 |
| commit | d8abf85f8ca176854e9d5d0b12371c4bc402aac3 (patch) | |
| tree | 9542f2b8a7ee9b9759396e4172ad5fc4ce1d2f3c /compiler | |
| parent | a4a79b5a04658ac542b1e07a6975b488fd589441 (diff) | |
| download | haskell-d8abf85f8ca176854e9d5d0b12371c4bc402aac3.tar.gz | |
Add more primops for atomic ops on byte arrays
Summary:
Add more primops for atomic ops on byte arrays
Adds the following primops:
* atomicReadIntArray#
* atomicWriteIntArray#
* fetchSubIntArray#
* fetchOrIntArray#
* fetchXorIntArray#
* fetchAndIntArray#
Makes these pre-existing out-of-line primops inline:
* fetchAddIntArray#
* casIntArray#
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/cmm/CmmMachOp.hs | 19 | ||||
| -rw-r--r-- | compiler/cmm/CmmSink.hs | 4 | ||||
| -rw-r--r-- | compiler/cmm/PprC.hs | 4 | ||||
| -rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 94 | ||||
| -rw-r--r-- | compiler/llvmGen/Llvm/AbsSyn.hs | 7 | ||||
| -rw-r--r-- | compiler/llvmGen/Llvm/PpLlvm.hs | 18 | ||||
| -rw-r--r-- | compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 71 | ||||
| -rw-r--r-- | compiler/nativeGen/CPrim.hs | 50 | ||||
| -rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 4 | ||||
| -rw-r--r-- | compiler/nativeGen/SPARC/CodeGen.hs | 4 | ||||
| -rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 92 | ||||
| -rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 38 | ||||
| -rw-r--r-- | compiler/nativeGen/X86/Ppr.hs | 8 | ||||
| -rw-r--r-- | compiler/prelude/primops.txt.pp | 76 |
14 files changed, 449 insertions, 40 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index c4ec393ad6..d8ce492de1 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -19,6 +19,9 @@ module CmmMachOp -- CallishMachOp , CallishMachOp(..), callishMachOpHints , pprCallishMachOp + + -- Atomic read-modify-write + , AtomicMachOp(..) ) where @@ -547,8 +550,24 @@ data CallishMachOp | MO_PopCnt Width | MO_BSwap Width + + -- Atomic read-modify-write. + | MO_AtomicRMW Width AtomicMachOp + | MO_AtomicRead Width + | MO_AtomicWrite Width + | MO_Cmpxchg Width deriving (Eq, Show) +-- | The operation to perform atomically. +data AtomicMachOp = + AMO_Add + | AMO_Sub + | AMO_And + | AMO_Nand + | AMO_Or + | AMO_Xor + deriving (Eq, Show) + pprCallishMachOp :: CallishMachOp -> SDoc pprCallishMachOp mo = text (show mo) diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs index 4c025425ab..4dced9afd2 100644 --- a/compiler/cmm/CmmSink.hs +++ b/compiler/cmm/CmmSink.hs @@ -650,6 +650,10 @@ data AbsMem -- perhaps we ought to have a special annotation for calls that can -- modify heap/stack memory. For now we just use the conservative -- definition here. +-- +-- Some CallishMachOp imply a memory barrier e.g. AtomicRMW and +-- therefore we should never float any memory operations across one of +-- these calls. bothMems :: AbsMem -> AbsMem -> AbsMem diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 47b247e278..455c79ba02 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -753,6 +753,10 @@ pprCallishMachOp_for_C mop MO_Memmove -> ptext (sLit "memmove") (MO_BSwap w) -> ptext (sLit $ bSwapLabel w) (MO_PopCnt w) -> ptext (sLit $ popCntLabel w) + (MO_AtomicRMW w amop) -> ptext (sLit $ atomicRMWLabel w amop) + (MO_Cmpxchg w) -> ptext (sLit $ cmpxchgLabel w) + (MO_AtomicRead w) -> ptext (sLit $ atomicReadLabel w) + (MO_AtomicWrite w) -> ptext (sLit $ atomicWriteLabel w) (MO_UF_Conv w) -> ptext (sLit $ word2FloatLabel w) MO_S_QuotRem {} -> unsupported diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 40a5e3649b..e4c682bf02 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -769,6 +769,25 @@ emitPrimOp _ res PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 res emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args emitPrimOp _ res PrefetchAddrOp0 args = doPrefetchAddrOp 0 res args +-- Atomic read-modify-write +emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] = + doAtomicRMW res AMO_Add mba ix (bWord dflags) n +emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] = + doAtomicRMW res AMO_Sub mba ix (bWord dflags) n +emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] = + doAtomicRMW res AMO_And mba ix (bWord dflags) n +emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] = + doAtomicRMW res AMO_Nand mba ix (bWord dflags) n +emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] = + doAtomicRMW res AMO_Or mba ix (bWord dflags) n +emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] = + doAtomicRMW res AMO_Xor mba ix (bWord dflags) n +emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] = + doAtomicReadByteArray res mba ix (bWord dflags) +emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] = + doAtomicWriteByteArray mba ix (bWord dflags) val +emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] = + doCasByteArray res mba ix (bWord dflags) old new -- The rest just translate straightforwardly emitPrimOp dflags [res] op [arg] @@ -1933,6 +1952,81 @@ doWriteSmallPtrArrayOp addr idx val = do emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel))) ------------------------------------------------------------------------------ +-- Atomic read-modify-write + +-- | Emit an atomic modification to a byte array element. The result +-- reg contains that previous value of the element. Implies a full +-- memory barrier. +doAtomicRMW :: LocalReg -- ^ Result reg + -> AtomicMachOp -- ^ Atomic op (e.g. add) + -> CmmExpr -- ^ MutableByteArray# + -> CmmExpr -- ^ Index + -> CmmType -- ^ Type of element by which we are indexing + -> CmmExpr -- ^ Op argument (e.g. amount to add) + -> FCode () +doAtomicRMW res amop mba idx idx_ty n = do + dflags <- getDynFlags + let width = typeWidth idx_ty + addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags) + width mba idx + emitPrimCall + [ res ] + (MO_AtomicRMW width amop) + [ addr, n ] + +-- | Emit an atomic read to a byte array that acts as a memory barrier. +doAtomicReadByteArray + :: LocalReg -- ^ Result reg + -> CmmExpr -- ^ MutableByteArray# + -> CmmExpr -- ^ Index + -> CmmType -- ^ Type of element by which we are indexing + -> FCode () +doAtomicReadByteArray res mba idx idx_ty = do + dflags <- getDynFlags + let width = typeWidth idx_ty + addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags) + width mba idx + emitPrimCall + [ res ] + (MO_AtomicRead width) + [ addr ] + +-- | Emit an atomic write to a byte array that acts as a memory barrier. +doAtomicWriteByteArray + :: CmmExpr -- ^ MutableByteArray# + -> CmmExpr -- ^ Index + -> CmmType -- ^ Type of element by which we are indexing + -> CmmExpr -- ^ Value to write + -> FCode () +doAtomicWriteByteArray mba idx idx_ty val = do + dflags <- getDynFlags + let width = typeWidth idx_ty + addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags) + width mba idx + emitPrimCall + [ {- no results -} ] + (MO_AtomicWrite width) + [ addr, val ] + +doCasByteArray + :: LocalReg -- ^ Result reg + -> CmmExpr -- ^ MutableByteArray# + -> CmmExpr -- ^ Index + -> CmmType -- ^ Type of element by which we are indexing + -> CmmExpr -- ^ Old value + -> CmmExpr -- ^ New value + -> FCode () +doCasByteArray res mba idx idx_ty old new = do + dflags <- getDynFlags + let width = (typeWidth idx_ty) + addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags) + width mba idx + emitPrimCall + [ res ] + (MO_Cmpxchg width) + [ addr, old, new ] + +------------------------------------------------------------------------------ -- Helpers for emitting function calls -- | Emit a call to @memcpy@. diff --git a/compiler/llvmGen/Llvm/AbsSyn.hs b/compiler/llvmGen/Llvm/AbsSyn.hs index f92bd89c5c..24d0856ea3 100644 --- a/compiler/llvmGen/Llvm/AbsSyn.hs +++ b/compiler/llvmGen/Llvm/AbsSyn.hs @@ -65,6 +65,8 @@ data LlvmFunction = LlvmFunction { type LlvmFunctions = [LlvmFunction] +type SingleThreaded = Bool + -- | LLVM ordering types for synchronization purposes. (Introduced in LLVM -- 3.0). Please see the LLVM documentation for a better description. data LlvmSyncOrdering @@ -224,6 +226,11 @@ data LlvmExpression | Load LlvmVar {- | + Atomic load of the value at location ptr + -} + | ALoad LlvmSyncOrdering SingleThreaded LlvmVar + + {- | Navigate in an structure, selecting elements * inbound: Is the pointer inbounds? (computed pointer doesn't overflow) * ptr: Location of the structure diff --git a/compiler/llvmGen/Llvm/PpLlvm.hs b/compiler/llvmGen/Llvm/PpLlvm.hs index 025078226d..73077257f8 100644 --- a/compiler/llvmGen/Llvm/PpLlvm.hs +++ b/compiler/llvmGen/Llvm/PpLlvm.hs @@ -239,6 +239,7 @@ ppLlvmExpression expr Insert vec elt idx -> ppInsert vec elt idx GetElemPtr inb ptr indexes -> ppGetElementPtr inb ptr indexes Load ptr -> ppLoad ptr + ALoad ord st ptr -> ppALoad ord st ptr Malloc tp amount -> ppMalloc tp amount Phi tp precessors -> ppPhi tp precessors Asm asm c ty v se sk -> ppAsm asm c ty v se sk @@ -327,13 +328,18 @@ ppSyncOrdering SyncSeqCst = text "seq_cst" -- of specifying alignment. ppLoad :: LlvmVar -> SDoc -ppLoad var - | isVecPtrVar var = text "load" <+> ppr var <> - comma <+> text "align 1" - | otherwise = text "load" <+> ppr var +ppLoad var = text "load" <+> ppr var <> align where - isVecPtrVar :: LlvmVar -> Bool - isVecPtrVar = isVector . pLower . getVarType + align | isVector . pLower . getVarType $ var = text ", align 1" + | otherwise = empty + +ppALoad :: LlvmSyncOrdering -> SingleThreaded -> LlvmVar -> SDoc +ppALoad ord st var = sdocWithDynFlags $ \dflags -> + let alignment = (llvmWidthInBits dflags $ getVarType var) `quot` 8 + align = text ", align" <+> ppr alignment + sThreaded | st = text " singlethread" + | otherwise = empty + in text "load atomic" <+> ppr var <> sThreaded <+> ppSyncOrdering ord <> align ppStore :: LlvmVar -> LlvmVar -> SDoc ppStore val dst diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index 517553516b..4a56600937 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -15,6 +15,7 @@ import BlockId import CodeGen.Platform ( activeStgRegs, callerSaves ) import CLabel import Cmm +import CPrim import PprCmm import CmmUtils import Hoopl @@ -32,6 +33,7 @@ import Unique import Data.List ( nub ) import Data.Maybe ( catMaybes ) +type Atomic = Bool type LlvmStatements = OrdList LlvmStatement -- ----------------------------------------------------------------------------- @@ -228,6 +230,17 @@ genCall t@(PrimTarget (MO_PopCnt w)) dsts args = genCall t@(PrimTarget (MO_BSwap w)) dsts args = genCallSimpleCast w t dsts args +genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = do + dstV <- getCmmReg (CmmLocal dst) + (v1, stmts, top) <- genLoad True addr (localRegType dst) + let stmt1 = Store v1 dstV + return (stmts `snocOL` stmt1, top) + +-- TODO: implement these properly rather than calling to RTS functions. +-- genCall t@(PrimTarget (MO_AtomicWrite width)) [] [addr, val] = undefined +-- genCall t@(PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = undefined +-- genCall t@(PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = undefined + -- Handle memcpy function specifically since llvm's intrinsic version takes -- some extra parameters. genCall t@(PrimTarget op) [] args' @@ -548,7 +561,6 @@ cmmPrimOpFunctions mop = do (MO_Prefetch_Data _ )-> fsLit "llvm.prefetch" - MO_S_QuotRem {} -> unsupported MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported @@ -558,6 +570,12 @@ cmmPrimOpFunctions mop = do MO_Touch -> unsupported MO_UF_Conv _ -> unsupported + MO_AtomicRead _ -> unsupported + + MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop + MO_Cmpxchg w -> fsLit $ cmpxchgLabel w + MO_AtomicWrite w -> fsLit $ atomicWriteLabel w + -- | Tail function calls genJump :: CmmExpr -> [GlobalReg] -> LlvmM StmtData @@ -849,7 +867,7 @@ exprToVarOpt opt e = case e of -> genLit opt lit CmmLoad e' ty - -> genLoad e' ty + -> genLoad False e' ty -- Cmmreg in expression is the value, so must load. If you want actual -- reg pointer, call getCmmReg directly. @@ -1268,41 +1286,41 @@ genMachOp_slow _ _ _ = panic "genMachOp: More then 2 expressions in MachOp!" -- | Handle CmmLoad expression. -genLoad :: CmmExpr -> CmmType -> LlvmM ExprData +genLoad :: Atomic -> CmmExpr -> CmmType -> LlvmM ExprData -- First we try to detect a few common cases and produce better code for -- these then the default case. We are mostly trying to detect Cmm code -- like I32[Sp + n] and use 'getelementptr' operations instead of the -- generic case that uses casts and pointer arithmetic -genLoad e@(CmmReg (CmmGlobal r)) ty - = genLoad_fast e r 0 ty +genLoad atomic e@(CmmReg (CmmGlobal r)) ty + = genLoad_fast atomic e r 0 ty -genLoad e@(CmmRegOff (CmmGlobal r) n) ty - = genLoad_fast e r n ty +genLoad atomic e@(CmmRegOff (CmmGlobal r) n) ty + = genLoad_fast atomic e r n ty -genLoad e@(CmmMachOp (MO_Add _) [ +genLoad atomic e@(CmmMachOp (MO_Add _) [ (CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]) ty - = genLoad_fast e r (fromInteger n) ty + = genLoad_fast atomic e r (fromInteger n) ty -genLoad e@(CmmMachOp (MO_Sub _) [ +genLoad atomic e@(CmmMachOp (MO_Sub _) [ (CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]) ty - = genLoad_fast e r (negate $ fromInteger n) ty + = genLoad_fast atomic e r (negate $ fromInteger n) ty -- generic case -genLoad e ty +genLoad atomic e ty = do other <- getTBAAMeta otherN - genLoad_slow e ty other + genLoad_slow atomic e ty other -- | Handle CmmLoad expression. -- This is a special case for loading from a global register pointer -- offset such as I32[Sp+8]. -genLoad_fast :: CmmExpr -> GlobalReg -> Int -> CmmType - -> LlvmM ExprData -genLoad_fast e r n ty = do +genLoad_fast :: Atomic -> CmmExpr -> GlobalReg -> Int -> CmmType + -> LlvmM ExprData +genLoad_fast atomic e r n ty = do dflags <- getDynFlags (gv, grt, s1) <- getCmmRegVal (CmmGlobal r) meta <- getTBAARegMeta r @@ -1315,7 +1333,7 @@ genLoad_fast e r n ty = do case grt == ty' of -- were fine True -> do - (var, s3) <- doExpr ty' (MExpr meta $ Load ptr) + (var, s3) <- doExpr ty' (MExpr meta $ loadInstr ptr) return (var, s1 `snocOL` s2 `snocOL` s3, []) @@ -1323,32 +1341,34 @@ genLoad_fast e r n ty = do False -> do let pty = pLift ty' (ptr', s3) <- doExpr pty $ Cast LM_Bitcast ptr pty - (var, s4) <- doExpr ty' (MExpr meta $ Load ptr') + (var, s4) <- doExpr ty' (MExpr meta $ loadInstr ptr') return (var, s1 `snocOL` s2 `snocOL` s3 `snocOL` s4, []) -- If its a bit type then we use the slow method since -- we can't avoid casting anyway. - False -> genLoad_slow e ty meta - + False -> genLoad_slow atomic e ty meta + where + loadInstr ptr | atomic = ALoad SyncSeqCst False ptr + | otherwise = Load ptr -- | Handle Cmm load expression. -- Generic case. Uses casts and pointer arithmetic if needed. -genLoad_slow :: CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData -genLoad_slow e ty meta = do +genLoad_slow :: Atomic -> CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData +genLoad_slow atomic e ty meta = do (iptr, stmts, tops) <- exprToVar e dflags <- getDynFlags case getVarType iptr of LMPointer _ -> do (dvar, load) <- doExpr (cmmToLlvmType ty) - (MExpr meta $ Load iptr) + (MExpr meta $ loadInstr iptr) return (dvar, stmts `snocOL` load, tops) i@(LMInt _) | i == llvmWord dflags -> do let pty = LMPointer $ cmmToLlvmType ty (ptr, cast) <- doExpr pty $ Cast LM_Inttoptr iptr pty (dvar, load) <- doExpr (cmmToLlvmType ty) - (MExpr meta $ Load ptr) + (MExpr meta $ loadInstr ptr) return (dvar, stmts `snocOL` cast `snocOL` load, tops) other -> do dflags <- getDynFlags @@ -1357,6 +1377,9 @@ genLoad_slow e ty meta = do "Size of Ptr: " ++ show (llvmPtrBits dflags) ++ ", Size of var: " ++ show (llvmWidthInBits dflags other) ++ ", Var: " ++ showSDoc dflags (ppr iptr))) + where + loadInstr ptr | atomic = ALoad SyncSeqCst False ptr + | otherwise = Load ptr -- | Handle CmmReg expression. This will return a pointer to the stack diff --git a/compiler/nativeGen/CPrim.hs b/compiler/nativeGen/CPrim.hs index a6f4cab7bd..34782dfc1c 100644 --- a/compiler/nativeGen/CPrim.hs +++ b/compiler/nativeGen/CPrim.hs @@ -1,11 +1,16 @@ -- | Generating C symbol names emitted by the compiler. module CPrim - ( popCntLabel + ( atomicReadLabel + , atomicWriteLabel + , atomicRMWLabel + , cmpxchgLabel + , popCntLabel , bSwapLabel , word2FloatLabel ) where import CmmType +import CmmMachOp import Outputable popCntLabel :: Width -> String @@ -31,3 +36,46 @@ word2FloatLabel w = "hs_word2float" ++ pprWidth w pprWidth W32 = "32" pprWidth W64 = "64" pprWidth w = pprPanic "word2FloatLabel: Unsupported word width " (ppr w) + +atomicRMWLabel :: Width -> AtomicMachOp -> String +atomicRMWLabel w amop = "hs_atomic_" ++ pprFunName amop ++ pprWidth w + where + pprWidth W8 = "8" + pprWidth W16 = "16" + pprWidth W32 = "32" + pprWidth W64 = "64" + pprWidth w = pprPanic "atomicRMWLabel: Unsupported word width " (ppr w) + + pprFunName AMO_Add = "add" + pprFunName AMO_Sub = "sub" + pprFunName AMO_And = "and" + pprFunName AMO_Nand = "nand" + pprFunName AMO_Or = "or" + pprFunName AMO_Xor = "xor" + +cmpxchgLabel :: Width -> String +cmpxchgLabel w = "hs_cmpxchg" ++ pprWidth w + where + pprWidth W8 = "8" + pprWidth W16 = "16" + pprWidth W32 = "32" + pprWidth W64 = "64" + pprWidth w = pprPanic "cmpxchgLabel: Unsupported word width " (ppr w) + +atomicReadLabel :: Width -> String +atomicReadLabel w = "hs_atomicread" ++ pprWidth w + where + pprWidth W8 = "8" + pprWidth W16 = "16" + pprWidth W32 = "32" + pprWidth W64 = "64" + pprWidth w = pprPanic "atomicReadLabel: Unsupported word width " (ppr w) + +atomicWriteLabel :: Width -> String +atomicWriteLabel w = "hs_atomicwrite" ++ pprWidth w + where + pprWidth W8 = "8" + pprWidth W16 = "16" + pprWidth W32 = "32" + pprWidth W64 = "64" + pprWidth w = pprPanic "atomicWriteLabel: Unsupported word width " (ppr w) diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index 91651e6065..22a2c7cb6a 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -1160,6 +1160,10 @@ genCCall' dflags gcp target dest_regs args0 MO_BSwap w -> (fsLit $ bSwapLabel w, False) MO_PopCnt w -> (fsLit $ popCntLabel w, False) + MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False) + MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False) + MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False) + MO_AtomicWrite w -> (fsLit $ atomicWriteLabel w, False) MO_S_QuotRem {} -> unsupported MO_U_QuotRem {} -> unsupported diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs index f5e61d0a8f..51f89d629f 100644 --- a/compiler/nativeGen/SPARC/CodeGen.hs +++ b/compiler/nativeGen/SPARC/CodeGen.hs @@ -654,6 +654,10 @@ outOfLineMachOp_table mop MO_BSwap w -> fsLit $ bSwapLabel w MO_PopCnt w -> fsLit $ popCntLabel w + MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop + MO_Cmpxchg w -> fsLit $ cmpxchgLabel w + MO_AtomicRead w -> fsLit $ atomicReadLabel w + MO_AtomicWrite w -> fsLit $ atomicWriteLabel w MO_S_QuotRem {} -> unsupported MO_U_QuotRem {} -> unsupported diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index fa93767fa3..140e8b2992 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -1761,6 +1761,93 @@ genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args = do where lbl = mkCmmCodeLabel primPackageId (fsLit (word2FloatLabel width)) +genCCall dflags _ (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = do + Amode amode addr_code <- getAmode addr + arg <- getNewRegNat size + arg_code <- getAnyReg n + use_sse2 <- sse2Enabled + let platform = targetPlatform dflags + dst_r = getRegisterReg platform use_sse2 (CmmLocal dst) + code <- op_code dst_r arg amode + return $ addr_code `appOL` arg_code arg `appOL` code + where + -- Code for the operation + op_code :: Reg -- ^ The destination reg + -> Reg -- ^ Register containing argument + -> AddrMode -- ^ Address of location to mutate + -> NatM (OrdList Instr) + op_code dst_r arg amode = case amop of + -- In the common case where dst_r is a virtual register the + -- final move should go away, because it's the last use of arg + -- and the first use of dst_r. + AMO_Add -> return $ toOL [ LOCK + , XADD size (OpReg arg) (OpAddr amode) + , MOV size (OpReg arg) (OpReg dst_r) + ] + AMO_Sub -> return $ toOL [ NEGI size (OpReg arg) + , LOCK + , XADD size (OpReg arg) (OpAddr amode) + , MOV size (OpReg arg) (OpReg dst_r) + ] + AMO_And -> cmpxchg_code (\ src dst -> unitOL $ AND size src dst) + AMO_Nand -> cmpxchg_code (\ src dst -> toOL [ AND size src dst + , NOT size dst + ]) + AMO_Or -> cmpxchg_code (\ src dst -> unitOL $ OR size src dst) + AMO_Xor -> cmpxchg_code (\ src dst -> unitOL $ XOR size src dst) + where + -- Simulate operation that lacks a dedicated instruction using + -- cmpxchg. + cmpxchg_code :: (Operand -> Operand -> OrdList Instr) + -> NatM (OrdList Instr) + cmpxchg_code instrs = do + lbl <- getBlockIdNat + tmp <- getNewRegNat size + return $ toOL + [ MOV size (OpAddr amode) (OpReg eax) + , JXX ALWAYS lbl + , NEWBLOCK lbl + -- Keep old value so we can return it: + , MOV size (OpReg eax) (OpReg dst_r) + , MOV size (OpReg eax) (OpReg tmp) + ] + `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL + [ LOCK + , CMPXCHG size (OpReg tmp) (OpAddr amode) + , JXX NE lbl + ] + + size = intSize width + +genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] = do + load_code <- intLoadCode (MOV (intSize width)) addr + let platform = targetPlatform dflags + use_sse2 <- sse2Enabled + return (load_code (getRegisterReg platform use_sse2 (CmmLocal dst))) + +genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do + assignMem_IntCode (intSize width) addr val + +genCCall dflags _ (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = do + Amode amode addr_code <- getAmode addr + newval <- getNewRegNat size + newval_code <- getAnyReg new + oldval <- getNewRegNat size + oldval_code <- getAnyReg old + use_sse2 <- sse2Enabled + let platform = targetPlatform dflags + dst_r = getRegisterReg platform use_sse2 (CmmLocal dst) + code = toOL + [ MOV size (OpReg oldval) (OpReg eax) + , LOCK + , CMPXCHG size (OpReg newval) (OpAddr amode) + , MOV size (OpReg eax) (OpReg dst_r) + ] + return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval + `appOL` code + where + size = intSize width + genCCall _ is32Bit target dest_regs args | is32Bit = genCCall32 target dest_regs args | otherwise = genCCall64 target dest_regs args @@ -2385,6 +2472,11 @@ outOfLineCmmOp mop res args MO_PopCnt _ -> fsLit "popcnt" MO_BSwap _ -> fsLit "bswap" + MO_AtomicRMW _ _ -> fsLit "atomicrmw" + MO_AtomicRead _ -> fsLit "atomicread" + MO_AtomicWrite _ -> fsLit "atomicwrite" + MO_Cmpxchg _ -> fsLit "cmpxchg" + MO_UF_Conv _ -> unsupported MO_S_QuotRem {} -> unsupported diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 05fff9be96..ac91747171 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -327,6 +327,10 @@ data Instr | PREFETCH PrefetchVariant Size Operand -- prefetch Variant, addr size, address to prefetch -- variant can be NTA, Lvl0, Lvl1, or Lvl2 + | LOCK -- lock prefix + | XADD Size Operand Operand -- src (r), dst (r/m) + | CMPXCHG Size Operand Operand -- src (r), dst (r/m), eax implicit + data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2 @@ -337,6 +341,8 @@ data Operand +-- | Returns which registers are read and written as a (read, written) +-- pair. x86_regUsageOfInstr :: Platform -> Instr -> RegUsage x86_regUsageOfInstr platform instr = case instr of @@ -428,10 +434,21 @@ x86_regUsageOfInstr platform instr -- note: might be a better way to do this PREFETCH _ _ src -> mkRU (use_R src []) [] + LOCK -> noUsage + XADD _ src dst -> usageMM src dst + CMPXCHG _ src dst -> usageRMM src dst (OpReg eax) _other -> panic "regUsage: unrecognised instr" - where + -- # Definitions + -- + -- Written: If the operand is a register, it's written. If it's an + -- address, registers mentioned in the address are read. + -- + -- Modified: If the operand is a register, it's both read and + -- written. If it's an address, registers mentioned in the address + -- are read. + -- 2 operand form; first operand Read; second Written usageRW :: Operand -> Operand -> RegUsage usageRW op (OpReg reg) = mkRU (use_R op []) [reg] @@ -444,6 +461,18 @@ x86_regUsageOfInstr platform instr usageRM op (OpAddr ea) = mkRUR (use_R op $! use_EA ea []) usageRM _ _ = panic "X86.RegInfo.usageRM: no match" + -- 2 operand form; first operand Modified; second Modified + usageMM :: Operand -> Operand -> RegUsage + usageMM (OpReg src) (OpReg dst) = mkRU [src, dst] [src, dst] + usageMM (OpReg src) (OpAddr ea) = mkRU (use_EA ea [src]) [src] + usageMM _ _ = panic "X86.RegInfo.usageMM: no match" + + -- 3 operand form; first operand Read; second Modified; third Modified + usageRMM :: Operand -> Operand -> Operand -> RegUsage + usageRMM (OpReg src) (OpReg dst) (OpReg reg) = mkRU [src, dst, reg] [dst, reg] + usageRMM (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea [src, reg]) [reg] + usageRMM _ _ _ = panic "X86.RegInfo.usageRMM: no match" + -- 1 operand form; operand Modified usageM :: Operand -> RegUsage usageM (OpReg reg) = mkRU [reg] [reg] @@ -476,6 +505,7 @@ x86_regUsageOfInstr platform instr where src' = filter (interesting platform) src dst' = filter (interesting platform) dst +-- | Is this register interesting for the register allocator? interesting :: Platform -> Reg -> Bool interesting _ (RegVirtual _) = True interesting platform (RegReal (RealRegSingle i)) = isFastTrue (freeReg platform i) @@ -483,6 +513,8 @@ interesting _ (RegReal (RealRegPair{})) = panic "X86.interesting: no re +-- | Applies the supplied function to all registers in instructions. +-- Typically used to change virtual registers to real registers. x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr x86_patchRegsOfInstr instr env = case instr of @@ -571,6 +603,10 @@ x86_patchRegsOfInstr instr env PREFETCH lvl size src -> PREFETCH lvl size (patchOp src) + LOCK -> instr + XADD sz src dst -> patch2 (XADD sz) src dst + CMPXCHG sz src dst -> patch2 (CMPXCHG sz) src dst + _other -> panic "patchRegs: unrecognised instr" where diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index 459c041ba5..7771c02512 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -886,6 +886,14 @@ pprInstr GFREE ptext (sLit "\tffree %st(4) ;ffree %st(5)") ] +-- Atomics + +pprInstr LOCK = ptext (sLit "\tlock") + +pprInstr (XADD size src dst) = pprSizeOpOp (sLit "xadd") size src dst + +pprInstr (CMPXCHG size src dst) = pprSizeOpOp (sLit "cmpxchg") size src dst + pprInstr _ = panic "X86.Ppr.pprInstr: no match" diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp index 4851315eb4..4faa585246 100644 --- a/compiler/prelude/primops.txt.pp +++ b/compiler/prelude/primops.txt.pp @@ -1363,19 +1363,79 @@ primop SetByteArrayOp "setByteArray#" GenPrimOp code_size = { primOpCodeSizeForeignCall + 4 } can_fail = True +-- Atomic operations + +primop AtomicReadByteArrayOp_Int "atomicReadIntArray#" GenPrimOp + MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #) + {Given an array and an offset in Int units, read an element. The + index is assumed to be in bounds. Implies a full memory barrier.} + with has_side_effects = True + can_fail = True + +primop AtomicWriteByteArrayOp_Int "atomicWriteIntArray#" GenPrimOp + MutableByteArray# s -> Int# -> Int# -> State# s -> State# s + {Given an array and an offset in Int units, write an element. The + index is assumed to be in bounds. Implies a full memory barrier.} + with has_side_effects = True + can_fail = True + primop CasByteArrayOp_Int "casIntArray#" GenPrimOp MutableByteArray# s -> Int# -> Int# -> Int# -> State# s -> (# State# s, Int# #) - {Machine-level atomic compare and swap on a word within a ByteArray.} - with - out_of_line = True - has_side_effects = True + {Given an array, an offset in Int units, the expected old value, and + the new value, perform an atomic compare and swap i.e. write the new + value if the current value matches the provided old value. Returns + the value of the element before the operation. Implies a full memory + barrier.} + with has_side_effects = True + can_fail = True primop FetchAddByteArrayOp_Int "fetchAddIntArray#" GenPrimOp MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #) - {Machine-level word-sized fetch-and-add within a ByteArray.} - with - out_of_line = True - has_side_effects = True + {Given an array, and offset in Int units, and a value to add, + atomically add the value to the element. Returns the value of the + element before the operation. Implies a full memory barrier.} + with has_side_effects = True + can_fail = True + +primop FetchSubByteArrayOp_Int "fetchSubIntArray#" GenPrimOp + MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #) + {Given an array, and offset in Int units, and a value to subtract, + atomically substract the value to the element. Returns the value of + the element before the operation. Implies a full memory barrier.} + with has_side_effects = True + can_fail = True + +primop FetchAndByteArrayOp_Int "fetchAndIntArray#" GenPrimOp + MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #) + {Given an array, and offset in Int units, and a value to AND, + atomically AND the value to the element. Returns the value of the + element before the operation. Implies a full memory barrier.} + with has_side_effects = True + can_fail = True + +primop FetchNandByteArrayOp_Int "fetchNandIntArray#" GenPrimOp + MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #) + {Given an array, and offset in Int units, and a value to NAND, + atomically NAND the value to the element. Returns the value of the + element before the operation. Implies a full memory barrier.} + with has_side_effects = True + can_fail = True + +primop FetchOrByteArrayOp_Int "fetchOrIntArray#" GenPrimOp + MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #) + {Given an array, and offset in Int units, and a value to OR, + atomically OR the value to the element. Returns the value of the + element before the operation. Implies a full memory barrier.} + with has_side_effects = True + can_fail = True + +primop FetchXorByteArrayOp_Int "fetchXorIntArray#" GenPrimOp + MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #) + {Given an array, and offset in Int units, and a value to XOR, + atomically XOR the value to the element. Returns the value of the + element before the operation. Implies a full memory barrier.} + with has_side_effects = True + can_fail = True ------------------------------------------------------------------------ |
