summaryrefslogtreecommitdiff
path: root/compiler/codeGen
diff options
context:
space:
mode:
authorCarter Tazio Schonwald <carter.schonwald@gmail.com>2014-12-15 09:42:36 -0600
committerAustin Seipp <austin@well-typed.com>2014-12-15 09:42:52 -0600
commitf44333eae7bc7dc7b6003b75874a02445f6b633b (patch)
tree242ae7928d39bed82d4162b6a397d1e6ccdb45e5 /compiler/codeGen
parent8afdf274194e77e85e6a08dc4963022c56fc29d8 (diff)
downloadhaskell-f44333eae7bc7dc7b6003b75874a02445f6b633b.tar.gz
Changing prefetch primops to have a `seq`-like interface
Summary: The current primops for prefetching do not properly work in pure code; namely, the primops are not 'hoisted' into the correct call sites based on when arguments are evaluated. Instead, they should use a `seq`-like interface, which will cause it to be evaluated when the needed term is. See #9353 for the full discussion. Test Plan: updated tests for pure prefetch in T8256 to reflect the design changes in #9353 Reviewers: simonmar, hvr, ekmett, austin Reviewed By: ekmett, austin Subscribers: merijn, thomie, carter, simonmar Differential Revision: https://phabricator.haskell.org/D350 GHC Trac Issues: #9353
Diffstat (limited to 'compiler/codeGen')
-rw-r--r--compiler/codeGen/StgCmmPrim.hs80
1 files changed, 51 insertions, 29 deletions
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index a86caf1a9d..e208318e17 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -735,21 +735,25 @@ emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
ty = vecCmmCat vcat w
-- Prefetch
-emitPrimOp _ res PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
-emitPrimOp _ res PrefetchMutableByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
-emitPrimOp _ res PrefetchAddrOp3 args = doPrefetchAddrOp 3 res args
-
-emitPrimOp _ res PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
-emitPrimOp _ res PrefetchMutableByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
-emitPrimOp _ res PrefetchAddrOp2 args = doPrefetchAddrOp 2 res args
-
-emitPrimOp _ res PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
-emitPrimOp _ res PrefetchMutableByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
-emitPrimOp _ res PrefetchAddrOp1 args = doPrefetchAddrOp 1 res args
-
-emitPrimOp _ res PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
-emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
-emitPrimOp _ res PrefetchAddrOp0 args = doPrefetchAddrOp 0 res args
+emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
+emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
+emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
+emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
+
+emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
+emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
+emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
+emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
+
+emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
+emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
+emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
+emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
+
+emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
+emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
+emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
+emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
-- Atomic read-modify-write
emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
@@ -1549,38 +1553,56 @@ doVecInsertOp maybe_pre_write_cast ty src e idx res = do
------------------------------------------------------------------------------
-- Helpers for translating prefetching.
+
+-- | Translate byte array prefetch operations into proper primcalls.
doPrefetchByteArrayOp :: Int
- -> [LocalReg]
-> [CmmExpr]
-> FCode ()
-doPrefetchByteArrayOp locality res [addr,idx]
+doPrefetchByteArrayOp locality [addr,idx]
+ = do dflags <- getDynFlags
+ mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
+doPrefetchByteArrayOp _ _
+ = panic "StgCmmPrim: doPrefetchByteArrayOp"
+
+-- | Translate mutable byte array prefetch operations into proper primcalls.
+doPrefetchMutableByteArrayOp :: Int
+ -> [CmmExpr]
+ -> FCode ()
+doPrefetchMutableByteArrayOp locality [addr,idx]
= do dflags <- getDynFlags
- mkBasicPrefetch locality (arrWordsHdrSize dflags) res addr idx
-doPrefetchByteArrayOp _ _ _
+ mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
+doPrefetchMutableByteArrayOp _ _
= panic "StgCmmPrim: doPrefetchByteArrayOp"
+-- | Translate address prefetch operations into proper primcalls.
doPrefetchAddrOp ::Int
- -> [LocalReg]
-> [CmmExpr]
-> FCode ()
-doPrefetchAddrOp locality res [addr,idx]
- = mkBasicPrefetch locality 0 res addr idx
-doPrefetchAddrOp _ _ _
+doPrefetchAddrOp locality [addr,idx]
+ = mkBasicPrefetch locality 0 addr idx
+doPrefetchAddrOp _ _
= panic "StgCmmPrim: doPrefetchAddrOp"
+-- | Translate value prefetch operations into proper primcalls.
+doPrefetchValueOp :: Int
+ -> [CmmExpr]
+ -> FCode ()
+doPrefetchValueOp locality [addr]
+ = do dflags <- getDynFlags
+ mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
+doPrefetchValueOp _ _
+ = panic "StgCmmPrim: doPrefetchValueOp"
+
+-- | helper to generate prefetch primcalls
mkBasicPrefetch :: Int -- Locality level 0-3
-> ByteOff -- Initial offset in bytes
- -> [LocalReg] -- Destination
-> CmmExpr -- Base address
-> CmmExpr -- Index
-> FCode ()
-mkBasicPrefetch locality off res base idx
+mkBasicPrefetch locality off base idx
= do dflags <- getDynFlags
emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
- case res of
- [] -> return ()
- [reg] -> emitAssign (CmmLocal reg) base
- _ -> panic "StgCmmPrim: mkBasicPrefetch"
+ return ()
-- ----------------------------------------------------------------------------
-- Allocating byte arrays