diff options
author | Ben.Lippmeier@anu.edu.au <unknown> | 2007-09-05 12:52:19 +0000 |
---|---|---|
committer | Ben.Lippmeier@anu.edu.au <unknown> | 2007-09-05 12:52:19 +0000 |
commit | a8312580d6f089d153d8af668484d4c2eb75e8a8 (patch) | |
tree | 42ddcc5c73d50c68b340315da5922f9481044c5a | |
parent | 16dc208aaad7aadaea970e47b8055d7d7f8781e5 (diff) | |
download | haskell-a8312580d6f089d153d8af668484d4c2eb75e8a8.tar.gz |
Refactor MachRegs.trivColorable to do unboxed accumulation
trivColorable was soaking up total 31% time, 41% alloc when
compiling SHA1.lhs with -O2 -fregs-graph on x86.
Refactoring to use unboxed accumulators and walk directly
over the UniqFM holding the set of conflicts reduces this
to 17% time, 6% alloc.
-rw-r--r-- | compiler/nativeGen/AsmCodeGen.lhs | 4 | ||||
-rw-r--r-- | compiler/nativeGen/GraphBase.hs | 2 | ||||
-rw-r--r-- | compiler/nativeGen/GraphOps.hs | 4 | ||||
-rw-r--r-- | compiler/nativeGen/MachRegs.lhs | 84 | ||||
-rw-r--r-- | compiler/nativeGen/RegAllocColor.hs | 9 | ||||
-rw-r--r-- | compiler/nativeGen/RegAllocLinear.hs | 1 | ||||
-rw-r--r-- | compiler/utils/UniqFM.lhs | 3 |
7 files changed, 91 insertions, 16 deletions
diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index 86363ed0c1..c9f11d51b2 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -278,7 +278,7 @@ cmmNativeGen dflags us cmm -- graph coloring register allocation let ((alloced, regAllocStats), usAlloc) - = {-# SCC "regAlloc(color)" #-} + = {-# SCC "RegAlloc(color)" #-} initUs usLive $ Color.regAlloc generateRegAllocStats @@ -312,7 +312,7 @@ cmmNativeGen dflags us cmm else do -- do linear register allocation let ((alloced, regAllocStats), usAlloc) - = {-# SCC "regAlloc(linear)" #-} + = {-# SCC "RegAlloc(linear)" #-} initUs usLive $ liftM unzip $ mapUs Linear.regAlloc withLiveness diff --git a/compiler/nativeGen/GraphBase.hs b/compiler/nativeGen/GraphBase.hs index c4e9eb3531..b980ba2261 100644 --- a/compiler/nativeGen/GraphBase.hs +++ b/compiler/nativeGen/GraphBase.hs @@ -82,7 +82,7 @@ data Node k cls color , nodeConflicts :: UniqSet k -- | Colors that cannot be used by this node. - , nodeExclusions :: UniqSet color + , nodeExclusions :: UniqSet color -- | Colors that this node would prefer to be, in decending order. , nodePreference :: [color] diff --git a/compiler/nativeGen/GraphOps.hs b/compiler/nativeGen/GraphOps.hs index f620d8a0df..f918fd2557 100644 --- a/compiler/nativeGen/GraphOps.hs +++ b/compiler/nativeGen/GraphOps.hs @@ -28,7 +28,6 @@ import UniqFM import Data.List hiding (union) import Data.Maybe - -- | Lookup a node from the graph. lookupNode :: Uniquable k @@ -447,6 +446,7 @@ setColor u color u +{-# INLINE adjustWithDefaultUFM #-} adjustWithDefaultUFM :: Uniquable k => (a -> a) -> a -> k @@ -458,7 +458,7 @@ adjustWithDefaultUFM f def k map map k def - +{-# INLINE adjustUFM #-} adjustUFM :: Uniquable k => (a -> a) diff --git a/compiler/nativeGen/MachRegs.lhs b/compiler/nativeGen/MachRegs.lhs index dd4962ca4d..ee514f90cc 100644 --- a/compiler/nativeGen/MachRegs.lhs +++ b/compiler/nativeGen/MachRegs.lhs @@ -103,6 +103,9 @@ import Unique import UniqSet import Constants import FastTypes +import UniqFM + +import GHC.Exts #if powerpc_TARGET_ARCH import Data.Word ( Word8, Word16, Word32 ) @@ -444,24 +447,30 @@ instance Outputable Reg where -- NOTE: This only works for arcitectures with just RcInteger and RcDouble -- (which are disjoint) ie. x86, x86_64 and ppc -- + +-- BL 2007/09 +-- Doing a nice fold over the UniqSet makes trivColorable use +-- 32% of total compile time and 42% of total alloc when compiling SHA1.lhs from darcs. +{- trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool trivColorable classN conflicts exclusions - = let + = let + + acc :: Reg -> (Int, Int) -> (Int, Int) acc r (cd, cf) = case regClass r of RcInteger -> (cd+1, cf) RcDouble -> (cd, cf+1) _ -> panic "MachRegs.trivColorable: reg class not handled" - tmp = foldUniqSet acc (0, 0) conflicts - (rsD, rsFP) = foldUniqSet acc tmp exclusions + tmp = foldUniqSet acc (0, 0) conflicts + (countInt, countFloat) = foldUniqSet acc tmp exclusions - squeese = worst rsD classN RcInteger - + worst rsFP classN RcDouble + squeese = worst countInt classN RcInteger + + worst countFloat classN RcDouble in squeese < allocatableRegsInClass classN - -- | Worst case displacement -- node N of classN has n neighbors of class C. -- @@ -480,6 +489,69 @@ worst n classN classC -> case classC of RcDouble -> min n (allocatableRegsInClass RcDouble) RcInteger -> 0 +-} + + +-- The number of allocatable regs is hard coded here so we can do a fast comparision +-- in trivColorable. It's ok if these numbers are _less_ than the actual number of +-- free regs, but they can't be more or the register conflict graph won't color. +-- +-- There is an allocatableRegsInClass :: RegClass -> Int, but doing the unboxing +-- is too slow for us here. +-- +-- Compare MachRegs.freeRegs and MachRegs.h to get these numbers. +-- +#if i386_TARGET_ARCH +#define ALLOCATABLE_REGS_INTEGER 3# +#define ALLOCATABLE_REGS_DOUBLE 6# +#endif + +#if x86_64_TARGET_ARCH +#define ALLOCATABLE_REGS_INTEGER 5# +#define ALLOCATABLE_REGS_DOUBLE 2# +#endif + +#if powerpc_TARGET_ARCH +#define ALLOCATABLE_REGS_INTEGER 16# +#define ALLOCATABLE_REGS_DOUBLE 26# +#endif + +{-# INLINE regClass #-} +trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool +trivColorable classN conflicts exclusions + = {-# SCC "trivColorable" #-} + let + {-# INLINE isSqueesed #-} + isSqueesed cI cF ufm + = case ufm of + NodeUFM _ _ left right + -> case isSqueesed cI cF right of + (# s, cI', cF' #) + -> case s of + False -> isSqueesed cI' cF' left + True -> (# True, cI', cF' #) + + LeafUFM _ reg + -> case regClass reg of + RcInteger + -> case cI +# 1# of + cI' -> (# cI' >=# ALLOCATABLE_REGS_INTEGER, cI', cF #) + + RcDouble + -> case cF +# 1# of + cF' -> (# cF' >=# ALLOCATABLE_REGS_DOUBLE, cI, cF' #) + + EmptyUFM + -> (# False, cI, cF #) + + in case isSqueesed 0# 0# conflicts of + (# False, cI', cF' #) + -> case isSqueesed cI' cF' exclusions of + (# s, _, _ #) -> not s + + (# True, _, _ #) + -> False + -- ----------------------------------------------------------------------------- diff --git a/compiler/nativeGen/RegAllocColor.hs b/compiler/nativeGen/RegAllocColor.hs index 2e3d40e427..0cd3923ceb 100644 --- a/compiler/nativeGen/RegAllocColor.hs +++ b/compiler/nativeGen/RegAllocColor.hs @@ -79,11 +79,12 @@ regAlloc_spin dump (spinCount :: Int) triv regsFree slotsFree debug_codeGraphs c $$ text "slotsFree = " <> ppr (sizeUniqSet slotsFree)) -- build a conflict graph from the code. - graph <- buildGraph code + graph <- {-# SCC "BuildGraph" #-} buildGraph code -- build a map of how many instructions each reg lives for. -- this is lazy, it won't be computed unless we need to spill - let fmLife = plusUFMs_C (\(r1, l1) (_, l2) -> (r1, l1 + l2)) + + let fmLife = {-# SCC "LifetimeCount" #-} plusUFMs_C (\(r1, l1) (_, l2) -> (r1, l1 + l2)) $ map lifetimeCount code -- record startup state @@ -101,7 +102,7 @@ regAlloc_spin dump (spinCount :: Int) triv regsFree slotsFree debug_codeGraphs c -- try and color the graph let (graph_colored, rsSpill, rmCoalesce) - = Color.colorGraph regsFree triv spill graph + = {-# SCC "ColorGraph" #-} Color.colorGraph regsFree triv spill graph -- rewrite regs in the code that have been coalesced let patchF reg = case lookupUFM rmCoalesce reg of @@ -147,7 +148,7 @@ regAlloc_spin dump (spinCount :: Int) triv regsFree slotsFree debug_codeGraphs c -- spill the uncolored regs (code_spilled, slotsFree', spillStats) <- regSpill code_coalesced slotsFree rsSpill - + -- recalculate liveness let code_nat = map stripLive code_spilled code_relive <- mapM regLiveness code_nat diff --git a/compiler/nativeGen/RegAllocLinear.hs b/compiler/nativeGen/RegAllocLinear.hs index 571932810b..b99abe3841 100644 --- a/compiler/nativeGen/RegAllocLinear.hs +++ b/compiler/nativeGen/RegAllocLinear.hs @@ -224,6 +224,7 @@ emptyStackMap = StackMap [0..maxSpillSlots] emptyUFM getStackSlotFor :: StackMap -> Unique -> (StackMap,Int) getStackSlotFor (StackMap [] _) _ = panic "RegAllocLinear.getStackSlotFor: out of stack slots" + getStackSlotFor fs@(StackMap (freeSlot:stack') reserved) reg = case lookupUFM reserved reg of Just slot -> (fs,slot) diff --git a/compiler/utils/UniqFM.lhs b/compiler/utils/UniqFM.lhs index 3abf698fb8..242fe22c3c 100644 --- a/compiler/utils/UniqFM.lhs +++ b/compiler/utils/UniqFM.lhs @@ -21,7 +21,8 @@ Basically, the things need to be in class @Uniquable@, and we use the -- for details module UniqFM ( - UniqFM, -- abstract type + UniqFM(..), -- abstract type + -- (de-abstracted for MachRegs.trivColorable optimisation BL 2007/09) emptyUFM, unitUFM, |