diff options
| author | Johan Tibell <johan.tibell@gmail.com> | 2014-06-27 13:48:24 +0200 |
|---|---|---|
| committer | Johan Tibell <johan.tibell@gmail.com> | 2014-06-30 22:12:45 +0200 |
| commit | 4ee4ab01c1d97845aecb7707ad2f9a80933e7a49 (patch) | |
| tree | 08991e97b0109b81447968ba62e4753e8209a738 /compiler/llvmGen/LlvmCodeGen/CodeGen.hs | |
| parent | aed1723f97e0539d5ab35222b180c1552a5f4cfc (diff) | |
| download | haskell-4ee4ab01c1d97845aecb7707ad2f9a80933e7a49.tar.gz | |
Re-add more primops for atomic ops on byte arrays
This is the second attempt to add this functionality. The first
attempt was reverted in 950fcae46a82569e7cd1fba1637a23b419e00ecd, due
to register allocator failure on x86. Given how the register
allocator currently works, we don't have enough registers on x86 to
support cmpxchg using complicated addressing modes. Instead we fall
back to a simpler addressing mode on x86.
Adds the following primops:
* atomicReadIntArray#
* atomicWriteIntArray#
* fetchSubIntArray#
* fetchOrIntArray#
* fetchXorIntArray#
* fetchAndIntArray#
Makes these pre-existing out-of-line primops inline:
* fetchAddIntArray#
* casIntArray#
Diffstat (limited to 'compiler/llvmGen/LlvmCodeGen/CodeGen.hs')
| -rw-r--r-- | compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 71 |
1 files changed, 47 insertions, 24 deletions
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index 517553516b..4a56600937 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -15,6 +15,7 @@ import BlockId import CodeGen.Platform ( activeStgRegs, callerSaves ) import CLabel import Cmm +import CPrim import PprCmm import CmmUtils import Hoopl @@ -32,6 +33,7 @@ import Unique import Data.List ( nub ) import Data.Maybe ( catMaybes ) +type Atomic = Bool type LlvmStatements = OrdList LlvmStatement -- ----------------------------------------------------------------------------- @@ -228,6 +230,17 @@ genCall t@(PrimTarget (MO_PopCnt w)) dsts args = genCall t@(PrimTarget (MO_BSwap w)) dsts args = genCallSimpleCast w t dsts args +genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = do + dstV <- getCmmReg (CmmLocal dst) + (v1, stmts, top) <- genLoad True addr (localRegType dst) + let stmt1 = Store v1 dstV + return (stmts `snocOL` stmt1, top) + +-- TODO: implement these properly rather than calling to RTS functions. +-- genCall t@(PrimTarget (MO_AtomicWrite width)) [] [addr, val] = undefined +-- genCall t@(PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = undefined +-- genCall t@(PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = undefined + -- Handle memcpy function specifically since llvm's intrinsic version takes -- some extra parameters. genCall t@(PrimTarget op) [] args' @@ -548,7 +561,6 @@ cmmPrimOpFunctions mop = do (MO_Prefetch_Data _ )-> fsLit "llvm.prefetch" - MO_S_QuotRem {} -> unsupported MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported @@ -558,6 +570,12 @@ cmmPrimOpFunctions mop = do MO_Touch -> unsupported MO_UF_Conv _ -> unsupported + MO_AtomicRead _ -> unsupported + + MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop + MO_Cmpxchg w -> fsLit $ cmpxchgLabel w + MO_AtomicWrite w -> fsLit $ atomicWriteLabel w + -- | Tail function calls genJump :: CmmExpr -> [GlobalReg] -> LlvmM StmtData @@ -849,7 +867,7 @@ exprToVarOpt opt e = case e of -> genLit opt lit CmmLoad e' ty - -> genLoad e' ty + -> genLoad False e' ty -- Cmmreg in expression is the value, so must load. If you want actual -- reg pointer, call getCmmReg directly. @@ -1268,41 +1286,41 @@ genMachOp_slow _ _ _ = panic "genMachOp: More then 2 expressions in MachOp!" -- | Handle CmmLoad expression. -genLoad :: CmmExpr -> CmmType -> LlvmM ExprData +genLoad :: Atomic -> CmmExpr -> CmmType -> LlvmM ExprData -- First we try to detect a few common cases and produce better code for -- these then the default case. We are mostly trying to detect Cmm code -- like I32[Sp + n] and use 'getelementptr' operations instead of the -- generic case that uses casts and pointer arithmetic -genLoad e@(CmmReg (CmmGlobal r)) ty - = genLoad_fast e r 0 ty +genLoad atomic e@(CmmReg (CmmGlobal r)) ty + = genLoad_fast atomic e r 0 ty -genLoad e@(CmmRegOff (CmmGlobal r) n) ty - = genLoad_fast e r n ty +genLoad atomic e@(CmmRegOff (CmmGlobal r) n) ty + = genLoad_fast atomic e r n ty -genLoad e@(CmmMachOp (MO_Add _) [ +genLoad atomic e@(CmmMachOp (MO_Add _) [ (CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]) ty - = genLoad_fast e r (fromInteger n) ty + = genLoad_fast atomic e r (fromInteger n) ty -genLoad e@(CmmMachOp (MO_Sub _) [ +genLoad atomic e@(CmmMachOp (MO_Sub _) [ (CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]) ty - = genLoad_fast e r (negate $ fromInteger n) ty + = genLoad_fast atomic e r (negate $ fromInteger n) ty -- generic case -genLoad e ty +genLoad atomic e ty = do other <- getTBAAMeta otherN - genLoad_slow e ty other + genLoad_slow atomic e ty other -- | Handle CmmLoad expression. -- This is a special case for loading from a global register pointer -- offset such as I32[Sp+8]. -genLoad_fast :: CmmExpr -> GlobalReg -> Int -> CmmType - -> LlvmM ExprData -genLoad_fast e r n ty = do +genLoad_fast :: Atomic -> CmmExpr -> GlobalReg -> Int -> CmmType + -> LlvmM ExprData +genLoad_fast atomic e r n ty = do dflags <- getDynFlags (gv, grt, s1) <- getCmmRegVal (CmmGlobal r) meta <- getTBAARegMeta r @@ -1315,7 +1333,7 @@ genLoad_fast e r n ty = do case grt == ty' of -- were fine True -> do - (var, s3) <- doExpr ty' (MExpr meta $ Load ptr) + (var, s3) <- doExpr ty' (MExpr meta $ loadInstr ptr) return (var, s1 `snocOL` s2 `snocOL` s3, []) @@ -1323,32 +1341,34 @@ genLoad_fast e r n ty = do False -> do let pty = pLift ty' (ptr', s3) <- doExpr pty $ Cast LM_Bitcast ptr pty - (var, s4) <- doExpr ty' (MExpr meta $ Load ptr') + (var, s4) <- doExpr ty' (MExpr meta $ loadInstr ptr') return (var, s1 `snocOL` s2 `snocOL` s3 `snocOL` s4, []) -- If its a bit type then we use the slow method since -- we can't avoid casting anyway. - False -> genLoad_slow e ty meta - + False -> genLoad_slow atomic e ty meta + where + loadInstr ptr | atomic = ALoad SyncSeqCst False ptr + | otherwise = Load ptr -- | Handle Cmm load expression. -- Generic case. Uses casts and pointer arithmetic if needed. -genLoad_slow :: CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData -genLoad_slow e ty meta = do +genLoad_slow :: Atomic -> CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData +genLoad_slow atomic e ty meta = do (iptr, stmts, tops) <- exprToVar e dflags <- getDynFlags case getVarType iptr of LMPointer _ -> do (dvar, load) <- doExpr (cmmToLlvmType ty) - (MExpr meta $ Load iptr) + (MExpr meta $ loadInstr iptr) return (dvar, stmts `snocOL` load, tops) i@(LMInt _) | i == llvmWord dflags -> do let pty = LMPointer $ cmmToLlvmType ty (ptr, cast) <- doExpr pty $ Cast LM_Inttoptr iptr pty (dvar, load) <- doExpr (cmmToLlvmType ty) - (MExpr meta $ Load ptr) + (MExpr meta $ loadInstr ptr) return (dvar, stmts `snocOL` cast `snocOL` load, tops) other -> do dflags <- getDynFlags @@ -1357,6 +1377,9 @@ genLoad_slow e ty meta = do "Size of Ptr: " ++ show (llvmPtrBits dflags) ++ ", Size of var: " ++ show (llvmWidthInBits dflags other) ++ ", Var: " ++ showSDoc dflags (ppr iptr))) + where + loadInstr ptr | atomic = ALoad SyncSeqCst False ptr + | otherwise = Load ptr -- | Handle CmmReg expression. This will return a pointer to the stack |
