summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/cmm/CmmMachOp.hs4
-rw-r--r--compiler/cmm/PprC.hs1
-rw-r--r--compiler/codeGen/StgCmmPrim.hs47
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs21
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs1
-rw-r--r--compiler/nativeGen/SPARC/CodeGen.hs1
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs3
-rw-r--r--compiler/prelude/primops.txt.pp18
8 files changed, 96 insertions, 0 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index 0f18029d20..fae84e5d53 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -517,6 +517,10 @@ data CallishMachOp
| MO_WriteBarrier
| MO_Touch -- Keep variables live (when using interior pointers)
+ -- Prefetch
+ | MO_Prefetch_Data -- Prefetch hint. May change program performance but not
+ -- program behavior.
+
-- Note that these three MachOps all take 1 extra parameter than the
-- standard C lib versions. The extra (last) parameter contains
-- alignment of the pointers. Used for optimisation in backends.
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index cda68ef39e..45c415f35a 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -759,6 +759,7 @@ pprCallishMachOp_for_C mop
MO_Add2 {} -> unsupported
MO_U_Mul2 {} -> unsupported
MO_Touch -> unsupported
+ MO_Prefetch_Data -> unsupported
where unsupported = panic ("pprCallishMachOp_for_C: " ++ show mop
++ " not supported!")
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 4e0d773097..4005f6d9b4 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -265,6 +265,15 @@ emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
emitPrimOp _ res@[] TouchOp args@[_arg]
= do emitPrimCall res MO_Touch args
+emitPrimOp _ res@[] PrefetchByteArrayOp args@[_arg]
+ = do emitPrimCall res MO_Prefetch_Data args
+
+emitPrimOp _ res@[] PrefetchMutableByteArrayOp args@[_arg]
+ = do emitPrimCall res MO_Prefetch_Data args
+
+emitPrimOp _ res@[] PrefetchAddrOp args@[_arg]
+ = do emitPrimCall res MO_Prefetch_Data args
+
-- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
emitPrimOp dflags [res] ByteArrayContents_Char [arg]
= emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
@@ -619,6 +628,11 @@ emitPrimOp _ res@[_,_] Int64X2UnpackOp [arg] =
emitPrimOp _ [res] Int64X2InsertOp [v,e,i] =
doVecInsertOp Nothing vec2b64 v e i res
+-- Prefetch
+emitPrimOp _ res PrefetchByteArrayOp args = doPrefetchByteArrayOp res args
+emitPrimOp _ res PrefetchMutableByteArrayOp args = doPrefetchByteArrayOp res args
+emitPrimOp _ res PrefetchAddrOp args = doPrefetchAddrOp res args
+
-- The rest just translate straightforwardly
emitPrimOp dflags [res] op [arg]
| nopOp op
@@ -1265,6 +1279,39 @@ doVecInsertOp maybe_pre_write_cast ty src e idx res = do
wid :: Width
wid = typeWidth (vecElemType ty)
+------------------------------------------------------------------------------
+-- Helpers for translating prefetching.
+
+doPrefetchByteArrayOp :: [LocalReg]
+ -> [CmmExpr]
+ -> FCode ()
+doPrefetchByteArrayOp res [addr,idx]
+ = do dflags <- getDynFlags
+ mkBasicPrefetch (arrWordsHdrSize dflags) res addr idx
+doPrefetchByteArrayOp _ _
+ = panic "StgCmmPrim: doPrefetchByteArrayOp"
+
+doPrefetchAddrOp :: [LocalReg]
+ -> [CmmExpr]
+ -> FCode ()
+doPrefetchAddrOp res [addr,idx]
+ = mkBasicPrefetch 0 res addr idx
+doPrefetchAddrOp _ _
+ = panic "StgCmmPrim: doPrefetchAddrOp"
+
+mkBasicPrefetch :: ByteOff -- Initial offset in bytes
+ -> [LocalReg] -- Destination
+ -> CmmExpr -- Base address
+ -> CmmExpr -- Index
+ -> FCode ()
+mkBasicPrefetch off res base idx
+ = do dflags <- getDynFlags
+ emitPrimCall [] MO_Prefetch_Data [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
+ case res of
+ [] -> return ()
+ [reg] -> emitAssign (CmmLocal reg) base
+ _ -> panic "StgCmmPrim: mkBasicPrefetch"
+
-- ----------------------------------------------------------------------------
-- Copying byte arrays
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index 969bca8ec0..609be3d3b0 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -201,6 +201,25 @@ genCall _ (PrimTarget (MO_UF_Conv _)) [_] args =
panic $ "genCall: Too many arguments to MO_UF_Conv. " ++
"Can only handle 1, given" ++ show (length args) ++ "."
+-- Handle prefetching data
+genCall env t@(PrimTarget MO_Prefetch_Data) [] args = do
+ let dflags = getDflags env
+ argTy = [i8Ptr, i32, i32, i32]
+ funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
+ CC_Ccc LMVoid FixedArgs (tysToParams argTy) Nothing
+
+ let (_, arg_hints) = foreignTargetHints t
+ let args_hints' = zip args arg_hints
+ (env1, argVars, stmts1, top1) <- arg_vars env args_hints' ([], nilOL, [])
+ (env2, fptr, stmts2, top2) <- getFunPtr env1 funTy t
+ (argVars', stmts3) <- castVars dflags $ zip argVars argTy
+
+ let arguments = argVars' ++ [mkIntLit i32 0, mkIntLit i32 3, mkIntLit i32 1]
+ call = Expr $ Call StdCall fptr arguments []
+ stmts = stmts1 `appOL` stmts2 `appOL` stmts3
+ `appOL` trashStmts (getDflags env) `snocOL` call
+ return (env2, stmts, top1 ++ top2)
+
-- Handle popcnt function specifically since GHC only really has i32 and i64
-- types and things like Word8 are backed by an i32 and just present a logical
-- i8 range. So we must handle conversions from i32 to i8 explicitly as LLVM
@@ -521,6 +540,8 @@ cmmPrimOpFunctions env mop
(MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ show (widthToLlvmInt w)
+ MO_Prefetch_Data -> fsLit "llvm.prefetch"
+
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index e9a5b433f6..92eff362f8 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -1164,6 +1164,7 @@ genCCall' dflags gcp target dest_regs args0
MO_U_Mul2 {} -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
+ MO_Prefetch_Data -> unsupported
unsupported = panic ("outOfLineCmmOp: " ++ show mop
++ " not supported")
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index 880b5c6bba..c6497e1248 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -656,6 +656,7 @@ outOfLineMachOp_table mop
MO_U_Mul2 {} -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
+ MO_Prefetch_Data -> unsupported
where unsupported = panic ("outOfLineCmmOp: " ++ show mop
++ " not supported here")
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 4177cadbf6..c6cdd8a4d2 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -1657,6 +1657,8 @@ genCCall _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL
genCCall _ (PrimTarget MO_Touch) _ _ = return nilOL
+genCCall _ (PrimTarget MO_Prefetch_Data) _ _ = return nilOL
+
genCCall is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
args@[src] = do
sse4_2 <- sse4_2Enabled
@@ -2326,6 +2328,7 @@ outOfLineCmmOp mop res args
MO_U_Mul2 {} -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
+ MO_Prefetch_Data -> unsupported
unsupported = panic ("outOfLineCmmOp: " ++ show mop
++ " not supported here")
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index cbbea5115a..a5b0fec908 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -2678,6 +2678,24 @@ primop WriteOffAddrOp_Int64AsInt64X2 "writeInt64OffAddrAsInt64X2#" GenPrimOp
llvm_only = True
------------------------------------------------------------------------
+section "Prefetch"
+ {Prefetch operations}
+------------------------------------------------------------------------
+
+primop PrefetchByteArrayOp "prefetchByteArray#" GenPrimOp
+ ByteArray# -> Int# -> ByteArray#
+ with llvm_only = True
+
+primop PrefetchMutableByteArrayOp "prefetchMutableByteArray#" GenPrimOp
+ MutableByteArray# s -> Int# -> State# s -> State# s
+ with has_side_effects = True
+ llvm_only = True
+
+primop PrefetchAddrOp "prefetchAddr#" GenPrimOp
+ Addr# -> Int# -> Addr#
+ with llvm_only = True
+
+------------------------------------------------------------------------
--- ---
------------------------------------------------------------------------