summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoralexbiehl <alex.biehl@gmail.com>2017-11-02 11:47:32 -0400
committerBen Gamari <ben@smart-cactus.org>2017-11-02 11:47:33 -0400
commit44406d3ffd0a6a8bc706ca5492632ff4122ce8f9 (patch)
treebf015312b2b99b7adfb504ff80960bb4f5a1a70b
parent29ae83374647e227d76acd896b89590fc15590d6 (diff)
downloadhaskell-wip/D4145.tar.gz
CmmSink: Use a IntSet instead of a listwip/D4145
CmmProcs which have *lots* of local variables take a considerable amount of time in CmmSink. This was noticed by @tdammers in #7258 while compiling files with large records (~200-400 fields). Before: ``` Sun Oct 29 19:58 2017 Time and Allocation Profiling Report (Final) ghc-stage2 +RTS -p -RTS -B/Users/alexbiehl/git/ghc/inplace/lib /Users/alexbiehl/Downloads/W2.hs -fforce-recomp -O2 total time = 26.00 secs (25996 ticks @ 1000 us, 1 processor) total alloc = 14,921,627,912 bytes (excludes profiling overheads) COST CENTRE MODULE SRC %time %alloc sink CmmPipeline compiler/cmm/CmmPipeline.hs:(104,13)-(105,59) 55.7 15.9 SimplTopBinds SimplCore compiler/simplCore/SimplCore.hs:761:39-74 19.5 30.6 FloatOutwards SimplCore compiler/simplCore/SimplCore.hs:471:40-66 4.2 9.0 RegAlloc-linear AsmCodeGen compiler/nativeGen/AsmCodeGen.hs:(658,27)-(660,55) 4.0 11.1 pprNativeCode AsmCodeGen compiler/nativeGen/AsmCodeGen.hs:(529,37)-(530,65) 2.8 6.3 NewStranal SimplCore compiler/simplCore/SimplCore.hs:480:40-63 1.6 3.7 OccAnal SimplCore compiler/simplCore/SimplCore.hs:(739,22)-(740,67) 1.5 3.5 StgCmm HscMain compiler/main/HscMain.hs:(1426,13)-(1427,62) 1.2 2.4 regLiveness AsmCodeGen compiler/nativeGen/AsmCodeGen.hs:(591,17)-(593,52) 1.2 1.9 genMachCode AsmCodeGen compiler/nativeGen/AsmCodeGen.hs:(580,17)-(582,62) 0.9 1.8 NativeCodeGen CodeOutput compiler/main/CodeOutput.hs:171:18-78 0.9 2.1 CoreTidy HscMain compiler/main/HscMain.hs:1253:27-67 0.8 1.9 ``` After: ``` Sun Oct 29 19:18 2017 Time and Allocation Profiling Report (Final) ghc-stage2 +RTS -p -RTS -B/Users/alexbiehl/git/ghc/inplace/lib /Users/alexbiehl/Downloads/W2.hs -fforce-recomp -O2 total time = 13.31 secs (13307 ticks @ 1000 us, 1 processor) total alloc = 15,772,184,488 bytes (excludes profiling overheads) COST CENTRE MODULE SRC %time %alloc SimplTopBinds SimplCore compiler/simplCore/SimplCore.hs:761:39-74 38.3 29.0 sink CmmPipeline compiler/cmm/CmmPipeline.hs:(104,13)-(105,59) 13.2 20.3 RegAlloc-linear AsmCodeGen compiler/nativeGen/AsmCodeGen.hs:(658,27)-(660,55) 8.3 10.5 FloatOutwards SimplCore compiler/simplCore/SimplCore.hs:471:40-66 8.1 8.5 pprNativeCode AsmCodeGen compiler/nativeGen/AsmCodeGen.hs:(529,37)-(530,65) 5.4 5.9 NewStranal SimplCore compiler/simplCore/SimplCore.hs:480:40-63 3.1 3.5 OccAnal SimplCore compiler/simplCore/SimplCore.hs:(739,22)-(740,67) 2.9 3.3 StgCmm HscMain compiler/main/HscMain.hs:(1426,13)-(1427,62) 2.3 2.3 regLiveness AsmCodeGen compiler/nativeGen/AsmCodeGen.hs:(591,17)-(593,52) 2.1 1.8 NativeCodeGen CodeOutput compiler/main/CodeOutput.hs:171:18-78 1.7 2.0 genMachCode AsmCodeGen compiler/nativeGen/AsmCodeGen.hs:(580,17)-(582,62) 1.6 1.7 CoreTidy HscMain compiler/main/HscMain.hs:1253:27-67 1.4 1.8 foldNodesBwdOO Hoopl.Dataflow compiler/cmm/Hoopl/Dataflow.hs:(397,1)-(403,17) 1.1 0.8 ``` Reviewers: austin, bgamari, simonmar Subscribers: duog, rwbarton, thomie, tdammers GHC Trac Issues: #7258 Differential Revision: https://phabricator.haskell.org/D4145
-rw-r--r--compiler/cmm/CmmSink.hs32
1 files changed, 25 insertions, 7 deletions
diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs
index a674e54a54..3633ed3003 100644
--- a/compiler/cmm/CmmSink.hs
+++ b/compiler/cmm/CmmSink.hs
@@ -17,13 +17,31 @@ import CodeGen.Platform
import Platform (isARM, platformArch)
import DynFlags
+import Unique
import UniqFM
import PprCmm ()
+import qualified Data.IntSet as IntSet
import Data.List (partition)
import qualified Data.Set as Set
import Data.Maybe
+-- Compact sets for membership tests of local variables.
+
+type LRegSet = IntSet.IntSet
+
+emptyLRegSet :: LRegSet
+emptyLRegSet = IntSet.empty
+
+nullLRegSet :: LRegSet -> Bool
+nullLRegSet = IntSet.null
+
+insertLRegSet :: LocalReg -> LRegSet -> LRegSet
+insertLRegSet l = IntSet.insert (getKey (getUnique l))
+
+elemLRegSet :: LocalReg -> LRegSet -> Bool
+elemLRegSet l = IntSet.member (getKey (getUnique l))
+
-- -----------------------------------------------------------------------------
-- Sinking and inlining
@@ -399,7 +417,7 @@ tryToInline
, Assignments -- Remaining assignments
)
-tryToInline dflags live node assigs = go usages node [] assigs
+tryToInline dflags live node assigs = go usages node emptyLRegSet assigs
where
usages :: UniqFM Int -- Maps each LocalReg to a count of how often it is used
usages = foldLocalRegsUsed dflags addUsage emptyUFM node
@@ -422,7 +440,7 @@ tryToInline dflags live node assigs = go usages node [] assigs
inline_and_keep = keep inl_node -- inline the assignment, keep it
keep node' = (final_node, a : rest')
- where (final_node, rest') = go usages' node' (l:skipped) rest
+ where (final_node, rest') = go usages' node' (insertLRegSet l skipped) rest
usages' = foldLocalRegsUsed dflags (\m r -> addToUFM m r 2)
usages rhs
-- we must not inline anything that is mentioned in the RHS
@@ -430,7 +448,7 @@ tryToInline dflags live node assigs = go usages node [] assigs
-- usages of the regs on the RHS to 2.
cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments]
- || l `elem` skipped
+ || l `elemLRegSet` skipped
|| not (okToInline dflags rhs node)
l_usages = lookupUFM usages l
@@ -521,11 +539,11 @@ And we do that right here in tryToInline, just as we do cmmMachOpFold.
addUsage :: UniqFM Int -> LocalReg -> UniqFM Int
addUsage m r = addToUFM_C (+) m r 1
-regsUsedIn :: [LocalReg] -> CmmExpr -> Bool
-regsUsedIn [] _ = False
+regsUsedIn :: LRegSet -> CmmExpr -> Bool
+regsUsedIn ls _ | nullLRegSet ls = False
regsUsedIn ls e = wrapRecExpf f e False
- where f (CmmReg (CmmLocal l)) _ | l `elem` ls = True
- f (CmmRegOff (CmmLocal l) _) _ | l `elem` ls = True
+ where f (CmmReg (CmmLocal l)) _ | l `elemLRegSet` ls = True
+ f (CmmRegOff (CmmLocal l) _) _ | l `elemLRegSet` ls = True
f _ z = z
-- we don't inline into CmmUnsafeForeignCall if the expression refers