diff options
-rw-r--r-- | compiler/cmm/CmmMachOp.hs | 4 | ||||
-rw-r--r-- | compiler/cmm/PprC.hs | 1 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 47 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 21 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 1 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/CodeGen.hs | 1 | ||||
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 3 | ||||
-rw-r--r-- | compiler/prelude/primops.txt.pp | 18 |
8 files changed, 96 insertions, 0 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index 0f18029d20..fae84e5d53 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -517,6 +517,10 @@ data CallishMachOp | MO_WriteBarrier | MO_Touch -- Keep variables live (when using interior pointers) + -- Prefetch + | MO_Prefetch_Data -- Prefetch hint. May change program performance but not + -- program behavior. + -- Note that these three MachOps all take 1 extra parameter than the -- standard C lib versions. The extra (last) parameter contains -- alignment of the pointers. Used for optimisation in backends. diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index cda68ef39e..45c415f35a 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -759,6 +759,7 @@ pprCallishMachOp_for_C mop MO_Add2 {} -> unsupported MO_U_Mul2 {} -> unsupported MO_Touch -> unsupported + MO_Prefetch_Data -> unsupported where unsupported = panic ("pprCallishMachOp_for_C: " ++ show mop ++ " not supported!") diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 4e0d773097..4005f6d9b4 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -265,6 +265,15 @@ emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg] emitPrimOp _ res@[] TouchOp args@[_arg] = do emitPrimCall res MO_Touch args +emitPrimOp _ res@[] PrefetchByteArrayOp args@[_arg] + = do emitPrimCall res MO_Prefetch_Data args + +emitPrimOp _ res@[] PrefetchMutableByteArrayOp args@[_arg] + = do emitPrimCall res MO_Prefetch_Data args + +emitPrimOp _ res@[] PrefetchAddrOp args@[_arg] + = do emitPrimCall res MO_Prefetch_Data args + -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a) emitPrimOp dflags [res] ByteArrayContents_Char [arg] = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags)) @@ -619,6 +628,11 @@ emitPrimOp _ res@[_,_] Int64X2UnpackOp [arg] = emitPrimOp _ [res] Int64X2InsertOp [v,e,i] = doVecInsertOp Nothing vec2b64 v e i res +-- Prefetch +emitPrimOp _ res PrefetchByteArrayOp args = doPrefetchByteArrayOp res args +emitPrimOp _ res PrefetchMutableByteArrayOp args = doPrefetchByteArrayOp res args +emitPrimOp _ res PrefetchAddrOp args = doPrefetchAddrOp res args + -- The rest just translate straightforwardly emitPrimOp dflags [res] op [arg] | nopOp op @@ -1265,6 +1279,39 @@ doVecInsertOp maybe_pre_write_cast ty src e idx res = do wid :: Width wid = typeWidth (vecElemType ty) +------------------------------------------------------------------------------ +-- Helpers for translating prefetching. + +doPrefetchByteArrayOp :: [LocalReg] + -> [CmmExpr] + -> FCode () +doPrefetchByteArrayOp res [addr,idx] + = do dflags <- getDynFlags + mkBasicPrefetch (arrWordsHdrSize dflags) res addr idx +doPrefetchByteArrayOp _ _ + = panic "StgCmmPrim: doPrefetchByteArrayOp" + +doPrefetchAddrOp :: [LocalReg] + -> [CmmExpr] + -> FCode () +doPrefetchAddrOp res [addr,idx] + = mkBasicPrefetch 0 res addr idx +doPrefetchAddrOp _ _ + = panic "StgCmmPrim: doPrefetchAddrOp" + +mkBasicPrefetch :: ByteOff -- Initial offset in bytes + -> [LocalReg] -- Destination + -> CmmExpr -- Base address + -> CmmExpr -- Index + -> FCode () +mkBasicPrefetch off res base idx + = do dflags <- getDynFlags + emitPrimCall [] MO_Prefetch_Data [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx] + case res of + [] -> return () + [reg] -> emitAssign (CmmLocal reg) base + _ -> panic "StgCmmPrim: mkBasicPrefetch" + -- ---------------------------------------------------------------------------- -- Copying byte arrays diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index 969bca8ec0..609be3d3b0 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -201,6 +201,25 @@ genCall _ (PrimTarget (MO_UF_Conv _)) [_] args = panic $ "genCall: Too many arguments to MO_UF_Conv. " ++ "Can only handle 1, given" ++ show (length args) ++ "." +-- Handle prefetching data +genCall env t@(PrimTarget MO_Prefetch_Data) [] args = do + let dflags = getDflags env + argTy = [i8Ptr, i32, i32, i32] + funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible + CC_Ccc LMVoid FixedArgs (tysToParams argTy) Nothing + + let (_, arg_hints) = foreignTargetHints t + let args_hints' = zip args arg_hints + (env1, argVars, stmts1, top1) <- arg_vars env args_hints' ([], nilOL, []) + (env2, fptr, stmts2, top2) <- getFunPtr env1 funTy t + (argVars', stmts3) <- castVars dflags $ zip argVars argTy + + let arguments = argVars' ++ [mkIntLit i32 0, mkIntLit i32 3, mkIntLit i32 1] + call = Expr $ Call StdCall fptr arguments [] + stmts = stmts1 `appOL` stmts2 `appOL` stmts3 + `appOL` trashStmts (getDflags env) `snocOL` call + return (env2, stmts, top1 ++ top2) + -- Handle popcnt function specifically since GHC only really has i32 and i64 -- types and things like Word8 are backed by an i32 and just present a logical -- i8 range. So we must handle conversions from i32 to i8 explicitly as LLVM @@ -521,6 +540,8 @@ cmmPrimOpFunctions env mop (MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ show (widthToLlvmInt w) + MO_Prefetch_Data -> fsLit "llvm.prefetch" + MO_S_QuotRem {} -> unsupported MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index e9a5b433f6..92eff362f8 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -1164,6 +1164,7 @@ genCCall' dflags gcp target dest_regs args0 MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported + MO_Prefetch_Data -> unsupported unsupported = panic ("outOfLineCmmOp: " ++ show mop ++ " not supported") diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs index 880b5c6bba..c6497e1248 100644 --- a/compiler/nativeGen/SPARC/CodeGen.hs +++ b/compiler/nativeGen/SPARC/CodeGen.hs @@ -656,6 +656,7 @@ outOfLineMachOp_table mop MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported + MO_Prefetch_Data -> unsupported where unsupported = panic ("outOfLineCmmOp: " ++ show mop ++ " not supported here") diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 4177cadbf6..c6cdd8a4d2 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -1657,6 +1657,8 @@ genCCall _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL genCCall _ (PrimTarget MO_Touch) _ _ = return nilOL +genCCall _ (PrimTarget MO_Prefetch_Data) _ _ = return nilOL + genCCall is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst] args@[src] = do sse4_2 <- sse4_2Enabled @@ -2326,6 +2328,7 @@ outOfLineCmmOp mop res args MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported + MO_Prefetch_Data -> unsupported unsupported = panic ("outOfLineCmmOp: " ++ show mop ++ " not supported here") diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp index cbbea5115a..a5b0fec908 100644 --- a/compiler/prelude/primops.txt.pp +++ b/compiler/prelude/primops.txt.pp @@ -2678,6 +2678,24 @@ primop WriteOffAddrOp_Int64AsInt64X2 "writeInt64OffAddrAsInt64X2#" GenPrimOp llvm_only = True ------------------------------------------------------------------------ +section "Prefetch" + {Prefetch operations} +------------------------------------------------------------------------ + +primop PrefetchByteArrayOp "prefetchByteArray#" GenPrimOp + ByteArray# -> Int# -> ByteArray# + with llvm_only = True + +primop PrefetchMutableByteArrayOp "prefetchMutableByteArray#" GenPrimOp + MutableByteArray# s -> Int# -> State# s -> State# s + with has_side_effects = True + llvm_only = True + +primop PrefetchAddrOp "prefetchAddr#" GenPrimOp + Addr# -> Int# -> Addr# + with llvm_only = True + +------------------------------------------------------------------------ --- --- ------------------------------------------------------------------------ |