diff options
author | Austin Seipp <austin@well-typed.com> | 2013-10-01 21:13:14 -0500 |
---|---|---|
committer | Austin Seipp <austin@well-typed.com> | 2013-10-01 21:26:47 -0500 |
commit | fd74014079f14bd3ab50e328e52c44ef97d40e05 (patch) | |
tree | da31c992a76d3816a4f1012ceb1eb4e68d0fb556 /compiler/nativeGen | |
parent | 627d1e008cbe4d9318b2466394420a968d1659da (diff) | |
download | haskell-fd74014079f14bd3ab50e328e52c44ef97d40e05.tar.gz |
Add support for prefetch with locality levels.
This patch adds support for several new primitive operations which
support using processor-specific instructions to help guide data and
cache locality decisions. We have levels ranging from [0..3]
For LLVM, we generate llvm.prefetch intrinsics at the proper locality
level (similar to GCC.)
For x86 we generate prefetch{NTA, t2, t1, t0} instructions. On SPARC and
PowerPC, the locality levels are ignored.
This closes #8256.
Authored-by: Carter Tazio Schonwald <carter.schonwald@gmail.com>
Signed-off-by: Austin Seipp <austin@well-typed.com>
Diffstat (limited to 'compiler/nativeGen')
-rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 5 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/CodeGen.hs | 7 | ||||
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 23 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 16 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Ppr.hs | 12 |
5 files changed, 56 insertions, 7 deletions
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index 65533d8f9a..3f0e7632f8 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -912,6 +912,9 @@ genCCall' _ _ (PrimTarget MO_WriteBarrier) _ _ genCCall' _ _ (PrimTarget MO_Touch) _ _ = return $ nilOL +genCCall' _ _ (PrimTarget (MO_Prefetch_Data _)) _ _ + = return $ nilOL + genCCall' dflags gcp target dest_regs args0 = ASSERT(not $ any (`elem` [II16]) $ map cmmTypeSize argReps) -- we rely on argument promotion in the codeGen @@ -1165,7 +1168,7 @@ genCCall' dflags gcp target dest_regs args0 MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported - MO_Prefetch_Data -> unsupported + (MO_Prefetch_Data _ ) -> unsupported unsupported = panic ("outOfLineCmmOp: " ++ show mop ++ " not supported") diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs index 5d2b9a9d6d..5d65b427e1 100644 --- a/compiler/nativeGen/SPARC/CodeGen.hs +++ b/compiler/nativeGen/SPARC/CodeGen.hs @@ -392,7 +392,10 @@ genCCall -- In the SPARC case we don't need a barrier. -- genCCall (PrimTarget MO_WriteBarrier) _ _ - = do return nilOL + = return $ nilOL + +genCCall (PrimTarget (MO_Prefetch_Data _)) _ _ + = return $ nilOL genCCall target dest_regs args0 = do @@ -657,7 +660,7 @@ outOfLineMachOp_table mop MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported - MO_Prefetch_Data -> unsupported + (MO_Prefetch_Data _) -> unsupported where unsupported = panic ("outOfLineCmmOp: " ++ show mop ++ " not supported here") diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index e18da25347..2456688744 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -1658,7 +1658,26 @@ genCCall _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL genCCall _ (PrimTarget MO_Touch) _ _ = return nilOL -genCCall _ (PrimTarget MO_Prefetch_Data) _ _ = return nilOL +genCCall is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] = + case n of + 0 -> genPrefetch src $ PREFETCH NTA size + 1 -> genPrefetch src $ PREFETCH Lvl2 size + 2 -> genPrefetch src $ PREFETCH Lvl1 size + 3 -> genPrefetch src $ PREFETCH Lvl0 size + l -> panic $ "unexpected prefetch level in genCCall MO_Prefetch_Data: " ++ (show l) + -- the c / llvm prefetch convention is 0, 1, 2, and 3 + -- the x86 corresponding names are : NTA, 2 , 1, and 0 + where + size = archWordSize is32bit + -- need to know what register width for pointers! + genPrefetch inRegSrc prefetchCTor = + do + code_src <- getAnyReg inRegSrc + src_r <- getNewRegNat size + return $ code_src src_r `appOL` + (unitOL (prefetchCTor (OpAddr + ((AddrBaseIndex (EABaseReg src_r ) EAIndexNone (ImmInt 0)))) )) + -- prefetch always takes an address genCCall is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do dflags <- getDynFlags @@ -2361,7 +2380,7 @@ outOfLineCmmOp mop res args MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported - MO_Prefetch_Data -> unsupported + (MO_Prefetch_Data _ ) -> unsupported unsupported = panic ("outOfLineCmmOp: " ++ show mop ++ " not supported here") diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index e584ffe8b9..d10591e37f 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -9,7 +9,7 @@ #include "HsVersions.h" #include "nativeGen/NCG.h" -module X86.Instr (Instr(..), Operand(..), JumpDest, +module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest, getJumpDestBlockId, canShortcut, shortcutStatics, shortcutJump, i386_insert_ffrees, allocMoreStack, maxSpillSlots, archWordSize) @@ -319,7 +319,14 @@ data Instr -- 1: popl %reg -- SSE4.2 - | POPCNT Size Operand Reg -- src, dst + | POPCNT Size Operand Reg -- src, dst + + -- prefetch + | PREFETCH PrefetchVariant Size Operand -- prefetch Variant, addr size, address to prefetch + -- variant can be NTA, Lvl0, Lvl1, or Lvl2 + +data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2 + data Operand = OpReg Reg -- register @@ -417,6 +424,9 @@ x86_regUsageOfInstr platform instr POPCNT _ src dst -> mkRU (use_R src []) [dst] + -- note: might be a better way to do this + PREFETCH _ _ src -> mkRU (use_R src []) [] + _other -> panic "regUsage: unrecognised instr" where @@ -557,6 +567,8 @@ x86_patchRegsOfInstr instr env POPCNT sz src dst -> POPCNT sz (patchOp src) (env dst) + PREFETCH lvl size src -> PREFETCH lvl size (patchOp src) + _other -> panic "patchRegs: unrecognised instr" where diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index 7f9c6901da..f38a04d069 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -577,6 +577,11 @@ pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst pprInstr (POPCNT size src dst) = pprOpOp (sLit "popcnt") size src (OpReg dst) +pprInstr (PREFETCH NTA size src ) = pprSizeOp_ (sLit "prefetchnta") size src +pprInstr (PREFETCH Lvl0 size src) = pprSizeOp_ (sLit "prefetcht0") size src +pprInstr (PREFETCH Lvl1 size src) = pprSizeOp_ (sLit "prefetcht1") size src +pprInstr (PREFETCH Lvl2 size src) = pprSizeOp_ (sLit "prefetcht2") size src + pprInstr (NOT size op) = pprSizeOp (sLit "not") size op pprInstr (BSWAP size op) = pprSizeOp (sLit "bswap") size (OpReg op) pprInstr (NEGI size op) = pprSizeOp (sLit "neg") size op @@ -1025,6 +1030,13 @@ pprSizeImmOp name size imm op1 ] +pprSizeOp_ :: LitString -> Size -> Operand -> SDoc +pprSizeOp_ name size op1 + = hcat [ + pprMnemonic_ name , + pprOperand size op1 + ] + pprSizeOp :: LitString -> Size -> Operand -> SDoc pprSizeOp name size op1 = hcat [ |