summaryrefslogtreecommitdiff
path: root/compiler/nativeGen
diff options
context:
space:
mode:
authorAustin Seipp <austin@well-typed.com>2013-10-01 21:13:14 -0500
committerAustin Seipp <austin@well-typed.com>2013-10-01 21:26:47 -0500
commitfd74014079f14bd3ab50e328e52c44ef97d40e05 (patch)
treeda31c992a76d3816a4f1012ceb1eb4e68d0fb556 /compiler/nativeGen
parent627d1e008cbe4d9318b2466394420a968d1659da (diff)
downloadhaskell-fd74014079f14bd3ab50e328e52c44ef97d40e05.tar.gz
Add support for prefetch with locality levels.
This patch adds support for several new primitive operations which support using processor-specific instructions to help guide data and cache locality decisions. We have levels ranging from [0..3] For LLVM, we generate llvm.prefetch intrinsics at the proper locality level (similar to GCC.) For x86 we generate prefetch{NTA, t2, t1, t0} instructions. On SPARC and PowerPC, the locality levels are ignored. This closes #8256. Authored-by: Carter Tazio Schonwald <carter.schonwald@gmail.com> Signed-off-by: Austin Seipp <austin@well-typed.com>
Diffstat (limited to 'compiler/nativeGen')
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs5
-rw-r--r--compiler/nativeGen/SPARC/CodeGen.hs7
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs23
-rw-r--r--compiler/nativeGen/X86/Instr.hs16
-rw-r--r--compiler/nativeGen/X86/Ppr.hs12
5 files changed, 56 insertions, 7 deletions
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 65533d8f9a..3f0e7632f8 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -912,6 +912,9 @@ genCCall' _ _ (PrimTarget MO_WriteBarrier) _ _
genCCall' _ _ (PrimTarget MO_Touch) _ _
= return $ nilOL
+genCCall' _ _ (PrimTarget (MO_Prefetch_Data _)) _ _
+ = return $ nilOL
+
genCCall' dflags gcp target dest_regs args0
= ASSERT(not $ any (`elem` [II16]) $ map cmmTypeSize argReps)
-- we rely on argument promotion in the codeGen
@@ -1165,7 +1168,7 @@ genCCall' dflags gcp target dest_regs args0
MO_U_Mul2 {} -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
- MO_Prefetch_Data -> unsupported
+ (MO_Prefetch_Data _ ) -> unsupported
unsupported = panic ("outOfLineCmmOp: " ++ show mop
++ " not supported")
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index 5d2b9a9d6d..5d65b427e1 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -392,7 +392,10 @@ genCCall
-- In the SPARC case we don't need a barrier.
--
genCCall (PrimTarget MO_WriteBarrier) _ _
- = do return nilOL
+ = return $ nilOL
+
+genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
+ = return $ nilOL
genCCall target dest_regs args0
= do
@@ -657,7 +660,7 @@ outOfLineMachOp_table mop
MO_U_Mul2 {} -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
- MO_Prefetch_Data -> unsupported
+ (MO_Prefetch_Data _) -> unsupported
where unsupported = panic ("outOfLineCmmOp: " ++ show mop
++ " not supported here")
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index e18da25347..2456688744 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -1658,7 +1658,26 @@ genCCall _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL
genCCall _ (PrimTarget MO_Touch) _ _ = return nilOL
-genCCall _ (PrimTarget MO_Prefetch_Data) _ _ = return nilOL
+genCCall is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] =
+ case n of
+ 0 -> genPrefetch src $ PREFETCH NTA size
+ 1 -> genPrefetch src $ PREFETCH Lvl2 size
+ 2 -> genPrefetch src $ PREFETCH Lvl1 size
+ 3 -> genPrefetch src $ PREFETCH Lvl0 size
+ l -> panic $ "unexpected prefetch level in genCCall MO_Prefetch_Data: " ++ (show l)
+ -- the c / llvm prefetch convention is 0, 1, 2, and 3
+ -- the x86 corresponding names are : NTA, 2 , 1, and 0
+ where
+ size = archWordSize is32bit
+ -- need to know what register width for pointers!
+ genPrefetch inRegSrc prefetchCTor =
+ do
+ code_src <- getAnyReg inRegSrc
+ src_r <- getNewRegNat size
+ return $ code_src src_r `appOL`
+ (unitOL (prefetchCTor (OpAddr
+ ((AddrBaseIndex (EABaseReg src_r ) EAIndexNone (ImmInt 0)))) ))
+ -- prefetch always takes an address
genCCall is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do
dflags <- getDynFlags
@@ -2361,7 +2380,7 @@ outOfLineCmmOp mop res args
MO_U_Mul2 {} -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
- MO_Prefetch_Data -> unsupported
+ (MO_Prefetch_Data _ ) -> unsupported
unsupported = panic ("outOfLineCmmOp: " ++ show mop
++ " not supported here")
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index e584ffe8b9..d10591e37f 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -9,7 +9,7 @@
#include "HsVersions.h"
#include "nativeGen/NCG.h"
-module X86.Instr (Instr(..), Operand(..), JumpDest,
+module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest,
getJumpDestBlockId, canShortcut, shortcutStatics,
shortcutJump, i386_insert_ffrees, allocMoreStack,
maxSpillSlots, archWordSize)
@@ -319,7 +319,14 @@ data Instr
-- 1: popl %reg
-- SSE4.2
- | POPCNT Size Operand Reg -- src, dst
+ | POPCNT Size Operand Reg -- src, dst
+
+ -- prefetch
+ | PREFETCH PrefetchVariant Size Operand -- prefetch Variant, addr size, address to prefetch
+ -- variant can be NTA, Lvl0, Lvl1, or Lvl2
+
+data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2
+
data Operand
= OpReg Reg -- register
@@ -417,6 +424,9 @@ x86_regUsageOfInstr platform instr
POPCNT _ src dst -> mkRU (use_R src []) [dst]
+ -- note: might be a better way to do this
+ PREFETCH _ _ src -> mkRU (use_R src []) []
+
_other -> panic "regUsage: unrecognised instr"
where
@@ -557,6 +567,8 @@ x86_patchRegsOfInstr instr env
POPCNT sz src dst -> POPCNT sz (patchOp src) (env dst)
+ PREFETCH lvl size src -> PREFETCH lvl size (patchOp src)
+
_other -> panic "patchRegs: unrecognised instr"
where
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index 7f9c6901da..f38a04d069 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -577,6 +577,11 @@ pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst
pprInstr (POPCNT size src dst) = pprOpOp (sLit "popcnt") size src (OpReg dst)
+pprInstr (PREFETCH NTA size src ) = pprSizeOp_ (sLit "prefetchnta") size src
+pprInstr (PREFETCH Lvl0 size src) = pprSizeOp_ (sLit "prefetcht0") size src
+pprInstr (PREFETCH Lvl1 size src) = pprSizeOp_ (sLit "prefetcht1") size src
+pprInstr (PREFETCH Lvl2 size src) = pprSizeOp_ (sLit "prefetcht2") size src
+
pprInstr (NOT size op) = pprSizeOp (sLit "not") size op
pprInstr (BSWAP size op) = pprSizeOp (sLit "bswap") size (OpReg op)
pprInstr (NEGI size op) = pprSizeOp (sLit "neg") size op
@@ -1025,6 +1030,13 @@ pprSizeImmOp name size imm op1
]
+pprSizeOp_ :: LitString -> Size -> Operand -> SDoc
+pprSizeOp_ name size op1
+ = hcat [
+ pprMnemonic_ name ,
+ pprOperand size op1
+ ]
+
pprSizeOp :: LitString -> Size -> Operand -> SDoc
pprSizeOp name size op1
= hcat [