summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen.Lippmeier@anu.edu.au <unknown>2007-09-05 12:52:19 +0000
committerBen.Lippmeier@anu.edu.au <unknown>2007-09-05 12:52:19 +0000
commita8312580d6f089d153d8af668484d4c2eb75e8a8 (patch)
tree42ddcc5c73d50c68b340315da5922f9481044c5a
parent16dc208aaad7aadaea970e47b8055d7d7f8781e5 (diff)
downloadhaskell-a8312580d6f089d153d8af668484d4c2eb75e8a8.tar.gz
Refactor MachRegs.trivColorable to do unboxed accumulation
trivColorable was soaking up total 31% time, 41% alloc when compiling SHA1.lhs with -O2 -fregs-graph on x86. Refactoring to use unboxed accumulators and walk directly over the UniqFM holding the set of conflicts reduces this to 17% time, 6% alloc.
-rw-r--r--compiler/nativeGen/AsmCodeGen.lhs4
-rw-r--r--compiler/nativeGen/GraphBase.hs2
-rw-r--r--compiler/nativeGen/GraphOps.hs4
-rw-r--r--compiler/nativeGen/MachRegs.lhs84
-rw-r--r--compiler/nativeGen/RegAllocColor.hs9
-rw-r--r--compiler/nativeGen/RegAllocLinear.hs1
-rw-r--r--compiler/utils/UniqFM.lhs3
7 files changed, 91 insertions, 16 deletions
diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs
index 86363ed0c1..c9f11d51b2 100644
--- a/compiler/nativeGen/AsmCodeGen.lhs
+++ b/compiler/nativeGen/AsmCodeGen.lhs
@@ -278,7 +278,7 @@ cmmNativeGen dflags us cmm
-- graph coloring register allocation
let ((alloced, regAllocStats), usAlloc)
- = {-# SCC "regAlloc(color)" #-}
+ = {-# SCC "RegAlloc(color)" #-}
initUs usLive
$ Color.regAlloc
generateRegAllocStats
@@ -312,7 +312,7 @@ cmmNativeGen dflags us cmm
else do
-- do linear register allocation
let ((alloced, regAllocStats), usAlloc)
- = {-# SCC "regAlloc(linear)" #-}
+ = {-# SCC "RegAlloc(linear)" #-}
initUs usLive
$ liftM unzip
$ mapUs Linear.regAlloc withLiveness
diff --git a/compiler/nativeGen/GraphBase.hs b/compiler/nativeGen/GraphBase.hs
index c4e9eb3531..b980ba2261 100644
--- a/compiler/nativeGen/GraphBase.hs
+++ b/compiler/nativeGen/GraphBase.hs
@@ -82,7 +82,7 @@ data Node k cls color
, nodeConflicts :: UniqSet k
-- | Colors that cannot be used by this node.
- , nodeExclusions :: UniqSet color
+ , nodeExclusions :: UniqSet color
-- | Colors that this node would prefer to be, in decending order.
, nodePreference :: [color]
diff --git a/compiler/nativeGen/GraphOps.hs b/compiler/nativeGen/GraphOps.hs
index f620d8a0df..f918fd2557 100644
--- a/compiler/nativeGen/GraphOps.hs
+++ b/compiler/nativeGen/GraphOps.hs
@@ -28,7 +28,6 @@ import UniqFM
import Data.List hiding (union)
import Data.Maybe
-
-- | Lookup a node from the graph.
lookupNode
:: Uniquable k
@@ -447,6 +446,7 @@ setColor u color
u
+{-# INLINE adjustWithDefaultUFM #-}
adjustWithDefaultUFM
:: Uniquable k
=> (a -> a) -> a -> k
@@ -458,7 +458,7 @@ adjustWithDefaultUFM f def k map
map
k def
-
+{-# INLINE adjustUFM #-}
adjustUFM
:: Uniquable k
=> (a -> a)
diff --git a/compiler/nativeGen/MachRegs.lhs b/compiler/nativeGen/MachRegs.lhs
index dd4962ca4d..ee514f90cc 100644
--- a/compiler/nativeGen/MachRegs.lhs
+++ b/compiler/nativeGen/MachRegs.lhs
@@ -103,6 +103,9 @@ import Unique
import UniqSet
import Constants
import FastTypes
+import UniqFM
+
+import GHC.Exts
#if powerpc_TARGET_ARCH
import Data.Word ( Word8, Word16, Word32 )
@@ -444,24 +447,30 @@ instance Outputable Reg where
-- NOTE: This only works for arcitectures with just RcInteger and RcDouble
-- (which are disjoint) ie. x86, x86_64 and ppc
--
+
+-- BL 2007/09
+-- Doing a nice fold over the UniqSet makes trivColorable use
+-- 32% of total compile time and 42% of total alloc when compiling SHA1.lhs from darcs.
+{-
trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
trivColorable classN conflicts exclusions
- = let
+ = let
+
+ acc :: Reg -> (Int, Int) -> (Int, Int)
acc r (cd, cf)
= case regClass r of
RcInteger -> (cd+1, cf)
RcDouble -> (cd, cf+1)
_ -> panic "MachRegs.trivColorable: reg class not handled"
- tmp = foldUniqSet acc (0, 0) conflicts
- (rsD, rsFP) = foldUniqSet acc tmp exclusions
+ tmp = foldUniqSet acc (0, 0) conflicts
+ (countInt, countFloat) = foldUniqSet acc tmp exclusions
- squeese = worst rsD classN RcInteger
- + worst rsFP classN RcDouble
+ squeese = worst countInt classN RcInteger
+ + worst countFloat classN RcDouble
in squeese < allocatableRegsInClass classN
-
-- | Worst case displacement
-- node N of classN has n neighbors of class C.
--
@@ -480,6 +489,69 @@ worst n classN classC
-> case classC of
RcDouble -> min n (allocatableRegsInClass RcDouble)
RcInteger -> 0
+-}
+
+
+-- The number of allocatable regs is hard coded here so we can do a fast comparision
+-- in trivColorable. It's ok if these numbers are _less_ than the actual number of
+-- free regs, but they can't be more or the register conflict graph won't color.
+--
+-- There is an allocatableRegsInClass :: RegClass -> Int, but doing the unboxing
+-- is too slow for us here.
+--
+-- Compare MachRegs.freeRegs and MachRegs.h to get these numbers.
+--
+#if i386_TARGET_ARCH
+#define ALLOCATABLE_REGS_INTEGER 3#
+#define ALLOCATABLE_REGS_DOUBLE 6#
+#endif
+
+#if x86_64_TARGET_ARCH
+#define ALLOCATABLE_REGS_INTEGER 5#
+#define ALLOCATABLE_REGS_DOUBLE 2#
+#endif
+
+#if powerpc_TARGET_ARCH
+#define ALLOCATABLE_REGS_INTEGER 16#
+#define ALLOCATABLE_REGS_DOUBLE 26#
+#endif
+
+{-# INLINE regClass #-}
+trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
+trivColorable classN conflicts exclusions
+ = {-# SCC "trivColorable" #-}
+ let
+ {-# INLINE isSqueesed #-}
+ isSqueesed cI cF ufm
+ = case ufm of
+ NodeUFM _ _ left right
+ -> case isSqueesed cI cF right of
+ (# s, cI', cF' #)
+ -> case s of
+ False -> isSqueesed cI' cF' left
+ True -> (# True, cI', cF' #)
+
+ LeafUFM _ reg
+ -> case regClass reg of
+ RcInteger
+ -> case cI +# 1# of
+ cI' -> (# cI' >=# ALLOCATABLE_REGS_INTEGER, cI', cF #)
+
+ RcDouble
+ -> case cF +# 1# of
+ cF' -> (# cF' >=# ALLOCATABLE_REGS_DOUBLE, cI, cF' #)
+
+ EmptyUFM
+ -> (# False, cI, cF #)
+
+ in case isSqueesed 0# 0# conflicts of
+ (# False, cI', cF' #)
+ -> case isSqueesed cI' cF' exclusions of
+ (# s, _, _ #) -> not s
+
+ (# True, _, _ #)
+ -> False
+
-- -----------------------------------------------------------------------------
diff --git a/compiler/nativeGen/RegAllocColor.hs b/compiler/nativeGen/RegAllocColor.hs
index 2e3d40e427..0cd3923ceb 100644
--- a/compiler/nativeGen/RegAllocColor.hs
+++ b/compiler/nativeGen/RegAllocColor.hs
@@ -79,11 +79,12 @@ regAlloc_spin dump (spinCount :: Int) triv regsFree slotsFree debug_codeGraphs c
$$ text "slotsFree = " <> ppr (sizeUniqSet slotsFree))
-- build a conflict graph from the code.
- graph <- buildGraph code
+ graph <- {-# SCC "BuildGraph" #-} buildGraph code
-- build a map of how many instructions each reg lives for.
-- this is lazy, it won't be computed unless we need to spill
- let fmLife = plusUFMs_C (\(r1, l1) (_, l2) -> (r1, l1 + l2))
+
+ let fmLife = {-# SCC "LifetimeCount" #-} plusUFMs_C (\(r1, l1) (_, l2) -> (r1, l1 + l2))
$ map lifetimeCount code
-- record startup state
@@ -101,7 +102,7 @@ regAlloc_spin dump (spinCount :: Int) triv regsFree slotsFree debug_codeGraphs c
-- try and color the graph
let (graph_colored, rsSpill, rmCoalesce)
- = Color.colorGraph regsFree triv spill graph
+ = {-# SCC "ColorGraph" #-} Color.colorGraph regsFree triv spill graph
-- rewrite regs in the code that have been coalesced
let patchF reg = case lookupUFM rmCoalesce reg of
@@ -147,7 +148,7 @@ regAlloc_spin dump (spinCount :: Int) triv regsFree slotsFree debug_codeGraphs c
-- spill the uncolored regs
(code_spilled, slotsFree', spillStats)
<- regSpill code_coalesced slotsFree rsSpill
-
+
-- recalculate liveness
let code_nat = map stripLive code_spilled
code_relive <- mapM regLiveness code_nat
diff --git a/compiler/nativeGen/RegAllocLinear.hs b/compiler/nativeGen/RegAllocLinear.hs
index 571932810b..b99abe3841 100644
--- a/compiler/nativeGen/RegAllocLinear.hs
+++ b/compiler/nativeGen/RegAllocLinear.hs
@@ -224,6 +224,7 @@ emptyStackMap = StackMap [0..maxSpillSlots] emptyUFM
getStackSlotFor :: StackMap -> Unique -> (StackMap,Int)
getStackSlotFor (StackMap [] _) _
= panic "RegAllocLinear.getStackSlotFor: out of stack slots"
+
getStackSlotFor fs@(StackMap (freeSlot:stack') reserved) reg =
case lookupUFM reserved reg of
Just slot -> (fs,slot)
diff --git a/compiler/utils/UniqFM.lhs b/compiler/utils/UniqFM.lhs
index 3abf698fb8..242fe22c3c 100644
--- a/compiler/utils/UniqFM.lhs
+++ b/compiler/utils/UniqFM.lhs
@@ -21,7 +21,8 @@ Basically, the things need to be in class @Uniquable@, and we use the
-- for details
module UniqFM (
- UniqFM, -- abstract type
+ UniqFM(..), -- abstract type
+ -- (de-abstracted for MachRegs.trivColorable optimisation BL 2007/09)
emptyUFM,
unitUFM,