summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/basicTypes/BasicTypes.hs107
-rw-r--r--compiler/basicTypes/MkId.hs18
-rw-r--r--compiler/cmm/CmmImplementSwitchPlans.hs19
-rw-r--r--compiler/cmm/CmmParse.y47
-rw-r--r--compiler/cmm/CmmSwitch.hs201
-rw-r--r--compiler/cmm/PprC.hs10
-rw-r--r--compiler/cmm/PprCmm.hs29
-rw-r--r--compiler/codeGen/StgCmmBind.hs2
-rw-r--r--compiler/codeGen/StgCmmExpr.hs49
-rw-r--r--compiler/codeGen/StgCmmMonad.hs6
-rw-r--r--compiler/codeGen/StgCmmPrim.hs28
-rw-r--r--compiler/codeGen/StgCmmProf.hs1
-rw-r--r--compiler/codeGen/StgCmmUtils.hs118
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs27
-rw-r--r--compiler/main/DynFlags.hs16
-rw-r--r--compiler/main/ErrUtils.hs9
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs6
-rw-r--r--compiler/nativeGen/SPARC/CodeGen.hs6
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs6
-rw-r--r--compiler/simplStg/StgCse.hs12
-rw-r--r--compiler/simplStg/StgStats.hs2
-rw-r--r--compiler/simplStg/UnariseStg.hs38
-rw-r--r--compiler/stgSyn/CoreToStg.hs111
-rw-r--r--compiler/stgSyn/StgLint.hs12
-rw-r--r--compiler/stgSyn/StgSyn.hs14
25 files changed, 610 insertions, 284 deletions
diff --git a/compiler/basicTypes/BasicTypes.hs b/compiler/basicTypes/BasicTypes.hs
index d8c3eb739d..c8aaa057fd 100644
--- a/compiler/basicTypes/BasicTypes.hs
+++ b/compiler/basicTypes/BasicTypes.hs
@@ -106,7 +106,10 @@ module BasicTypes(
IntWithInf, infinity, treatZeroAsInf, mkIntWithInf, intGtLimit,
- SpliceExplicitFlag(..)
+ SpliceExplicitFlag(..),
+
+ BranchWeight(..), neverFreq, rareFreq, someFreq, defFreq, oftenFreq,
+ usuallyFreq, alwaysFreq, combinedFreqs, moreLikely, getWeight
) where
import GhcPrelude
@@ -1613,3 +1616,105 @@ data SpliceExplicitFlag
= ExplicitSplice | -- ^ <=> $(f x y)
ImplicitSplice -- ^ <=> f x y, i.e. a naked top level expression
deriving Data
+
+
+{-
+ Note [Branch weights]
+ ~~~~~~~~~~~~~~~~~~~~~~~
+
+ The basic rundown:
+ * From STG onward we track which brances are most likely taken.
+ * We generate this info by
+ + Checking for bottom during the core-to-stg translation.
+ Expressions which we can detect as being bottom during compile time can
+ safely be assumed to be rarely taken.
+ + Heap/Stack checks when generating Cmm code:
+ Running out of heap/stack space is comperativly rare so we assume these
+ are not taken.
+ + User annotations when compiling hand written Cmm code.
+ This makes it possible to have the compiler optimize for the common case
+ without relying on internal details of the cmm to assembly translation.
+ * When generating code we use this information to generate better assembly:
+ + At the moment this only influences code layout (CmmContFlowOpt)
+ where we try to make the common case a fallthrough since thats generally
+ faster.
+ + TODO: Balance if/else trees for cases by weight instead of node count.
+
+ This is part of #14672.
+ [Make likelyhood of branches/conditions available throughout the compiler.]
+
+ At the Stg level we record in case alternatives a branch weight.
+ Weights are relative to each other with higher numbers being more
+ likely to be taken.
+
+ We currently generate this information in CoreToStg by checking
+ alternatives for bottom expressions and marking them as never
+ called.
+
+ When generating Cmm this is included in switchtargets as is or translated
+ to likely/not likely for conditional statements.
+
+ This information is then used in the backend for optimizing control
+ flow.
+
+ As long as we only perform simple optimizations that just check
+ which of two branches is more likely to be taken using a Int based
+ representation is fine.
+
+ TODO: For more involved optimizations like calculating hot paths
+ stricter semantics might be needed. As currently a branch with weight
+ 2 and weight 4 only are meaniful compareable if they branch off at the
+ same point. (Eg a single case statement)
+ Conditionals would also require more information than just
+ likely/unlikely/unknown for this to work.
+
+-}
+
+-- | Frequency with which a alternative is taken,
+-- values are relative to each other. Higher means
+-- a branch is taken more often.
+-- See alsoe Note [Branch weights]
+newtype BranchWeight = Weight Int deriving (Eq, Ord, Show)
+
+instance Outputable BranchWeight where
+ ppr (Weight i) = ppr i
+
+neverFreq, rareFreq, someFreq, defFreq,
+ oftenFreq, usuallyFreq, alwaysFreq :: BranchWeight
+
+defFreqVal :: Int
+defFreqVal = 1000
+
+neverFreq = Weight $ 0
+rareFreq = Weight $ div defFreqVal 5
+someFreq = Weight $ div defFreqVal 2
+defFreq = Weight $ 1000
+oftenFreq = Weight $ defFreqVal * 2
+usuallyFreq = Weight $ defFreqVal * 10
+--Don't go crazy here, for large switches we otherwise we might run into
+--integer overflow issues on 32bit platforms if we add them up.
+--which can happen if most of them result in the same expression.
+alwaysFreq = Weight $ defFreqVal * 50
+
+-- | Is f1 more likely then f2?
+-- Returns nothing if they are the same
+moreLikely :: BranchWeight -> BranchWeight -> Maybe Bool
+moreLikely f1 f2
+ | f1 > f2 = Just True
+ | f1 < f2 = Just False
+ | otherwise = Nothing
+
+{- | Add up weights respecting never.
+ Combining two weights where one is never or negative results in the other one.
+ This is neccesary because we never want a likely branch and a unlikely one
+ to add up to less than the likely branch was originally.
+
+ This can happen if we end up with negative weights somehow.
+-}
+combinedFreqs :: BranchWeight -> BranchWeight -> BranchWeight
+combinedFreqs (Weight f1) (Weight f2)
+ | f1 < 0 || f2 < 0 = Weight (max f2 f1)
+ | otherwise = Weight (f1 + f2)
+
+getWeight :: BranchWeight -> Int
+getWeight (Weight f) = f
diff --git a/compiler/basicTypes/MkId.hs b/compiler/basicTypes/MkId.hs
index 38c772c935..f3b0dc25bd 100644
--- a/compiler/basicTypes/MkId.hs
+++ b/compiler/basicTypes/MkId.hs
@@ -357,11 +357,13 @@ mkDictSelRhs clas val_index
dict_id = mkTemplateLocal 1 pred
arg_ids = mkTemplateLocalsNum 2 arg_tys
- rhs_body | new_tycon = unwrapNewTypeBody tycon (mkTyVarTys tyvars) (Var dict_id)
- | otherwise = Case (Var dict_id) dict_id (idType the_arg_id)
- [(DataAlt data_con, arg_ids, varToCoreExpr the_arg_id)]
- -- varToCoreExpr needed for equality superclass selectors
- -- sel a b d = case x of { MkC _ (g:a~b) _ -> CO g }
+ rhs_body | new_tycon
+ = unwrapNewTypeBody tycon (mkTyVarTys tyvars) (Var dict_id)
+ | otherwise
+ = Case (Var dict_id) dict_id (idType the_arg_id)
+ [(DataAlt data_con, arg_ids, varToCoreExpr the_arg_id)]
+ -- varToCoreExpr needed for equality superclass selectors
+ -- sel a b d = case x of { MkC _ (g:a~b) _ -> CO g }
dictSelRule :: Int -> Arity -> RuleFun
-- Tries to persuade the argument to look like a constructor
@@ -834,7 +836,8 @@ wrapCo co rep_ty (unbox_rep, box_rep) -- co :: arg_ty ~ rep_ty
------------------------
seqUnboxer :: Unboxer
-seqUnboxer v = return ([v], \e -> Case (Var v) v (exprType e) [(DEFAULT, [], e)])
+seqUnboxer v = return
+ ([v], \e -> Case (Var v) v (exprType e) [(DEFAULT, [], e)])
unitUnboxer :: Unboxer
unitUnboxer v = return ([v], \e -> e)
@@ -1257,7 +1260,8 @@ seqId = pcMiscPrelId seqName ty info
(mkFunTy alphaTy (mkFunTy betaTy betaTy))
[x,y] = mkTemplateLocals [alphaTy, betaTy]
- rhs = mkLams [alphaTyVar,betaTyVar,x,y] (Case (Var x) x betaTy [(DEFAULT, [], Var y)])
+ rhs = mkLams [alphaTyVar,betaTyVar,x,y]
+ (Case (Var x) x betaTy [(DEFAULT, [], Var y)])
------------------------------------------------
lazyId :: Id -- See Note [lazyId magic]
diff --git a/compiler/cmm/CmmImplementSwitchPlans.hs b/compiler/cmm/CmmImplementSwitchPlans.hs
index 2e2da5d305..b24915d08b 100644
--- a/compiler/cmm/CmmImplementSwitchPlans.hs
+++ b/compiler/cmm/CmmImplementSwitchPlans.hs
@@ -41,7 +41,8 @@ visitSwitches :: DynFlags -> CmmBlock -> UniqSM [CmmBlock]
visitSwitches dflags block
| (entry@(CmmEntry _ scope), middle, CmmSwitch expr ids) <- blockSplit block
= do
- let plan = createSwitchPlan ids
+ let balanceByWeight = gopt Opt_WeightBalanceAlts dflags
+ let plan = createSwitchPlan balanceByWeight ids
(newTail, newBlocks) <- implementSwitchPlan dflags scope expr plan
@@ -57,11 +58,11 @@ visitSwitches dflags block
implementSwitchPlan :: DynFlags -> CmmTickScope -> CmmExpr -> SwitchPlan -> UniqSM (Block CmmNode O C, [CmmBlock])
implementSwitchPlan dflags scope expr = go
where
- go (Unconditionally l)
- = return (emptyBlock `blockJoinTail` CmmBranch l, [])
+ go (Unconditionally li)
+ = return (emptyBlock `blockJoinTail` CmmBranch (liLbl li), [])
go (JumpTable ids)
= return (emptyBlock `blockJoinTail` CmmSwitch expr ids, [])
- go (IfLT signed i ids1 ids2)
+ go (IfLT signed i ids1 ids2 freq)
= do
(bid1, newBlocks1) <- go' ids1
(bid2, newBlocks2) <- go' ids2
@@ -69,20 +70,20 @@ implementSwitchPlan dflags scope expr = go
let lt | signed = cmmSLtWord
| otherwise = cmmULtWord
scrut = lt dflags expr $ CmmLit $ mkWordCLit dflags i
- lastNode = CmmCondBranch scrut bid1 bid2 Nothing
+ lastNode = CmmCondBranch scrut bid1 bid2 freq
lastBlock = emptyBlock `blockJoinTail` lastNode
return (lastBlock, newBlocks1++newBlocks2)
- go (IfEqual i l ids2)
+ go (IfEqual i (l,_f) ids2 freq)
= do
(bid2, newBlocks2) <- go' ids2
- let scrut = cmmNeWord dflags expr $ CmmLit $ mkWordCLit dflags i
- lastNode = CmmCondBranch scrut bid2 l Nothing
+ let scrut = cmmEqWord dflags expr $ CmmLit $ mkWordCLit dflags i
+ lastNode = CmmCondBranch scrut l bid2 freq
lastBlock = emptyBlock `blockJoinTail` lastNode
return (lastBlock, newBlocks2)
-- Same but returning a label to branch to
- go' (Unconditionally l)
+ go' (Unconditionally (l,_f))
= return (l, [])
go' p
= do
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
index cf660d274f..cb36b71634 100644
--- a/compiler/cmm/CmmParse.y
+++ b/compiler/cmm/CmmParse.y
@@ -372,8 +372,8 @@ cmm :: { CmmParse () }
cmmtop :: { CmmParse () }
: cmmproc { $1 }
| cmmdata { $1 }
- | decl { $1 }
- | 'CLOSURE' '(' NAME ',' NAME lits ')' ';'
+ | decl { $1 }
+ | 'CLOSURE' '(' NAME ',' NAME lits ')' ';'
{% liftP . withThisPackage $ \pkg ->
do lits <- sequence $6;
staticClosure pkg $3 $5 (map getLit lits) }
@@ -388,20 +388,20 @@ cmmtop :: { CmmParse () }
-- * we can derive closure and info table labels from a single NAME
cmmdata :: { CmmParse () }
- : 'section' STRING '{' data_label statics '}'
+ : 'section' STRING '{' data_label statics '}'
{ do lbl <- $4;
ss <- sequence $5;
code (emitDecl (CmmData (Section (section $2) lbl) (Statics lbl $ concat ss))) }
data_label :: { CmmParse CLabel }
- : NAME ':'
+ : NAME ':'
{% liftP . withThisPackage $ \pkg ->
return (mkCmmDataLabel pkg $1) }
statics :: { [CmmParse [CmmStatic]] }
: {- empty -} { [] }
| static statics { $1 : $2 }
-
+
-- Strings aren't used much in the RTS HC code, so it doesn't seem
-- worth allowing inline strings. C-- doesn't allow them anyway.
static :: { CmmParse [CmmStatic] }
@@ -410,10 +410,10 @@ static :: { CmmParse [CmmStatic] }
| type ';' { return [CmmUninitialised
(widthInBytes (typeWidth $1))] }
| 'bits8' '[' ']' STRING ';' { return [mkString $4] }
- | 'bits8' '[' INT ']' ';' { return [CmmUninitialised
+ | 'bits8' '[' INT ']' ';' { return [CmmUninitialised
(fromIntegral $3)] }
- | typenot8 '[' INT ']' ';' { return [CmmUninitialised
- (widthInBytes (typeWidth $1) *
+ | typenot8 '[' INT ']' ';' { return [CmmUninitialised
+ (widthInBytes (typeWidth $1) *
fromIntegral $3)] }
| 'CLOSURE' '(' NAME lits ')'
{ do { lits <- sequence $4
@@ -474,7 +474,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
, cit_rep = rep
, cit_prof = prof, cit_srt = NoC_SRT },
[]) }
-
+
| 'INFO_TABLE_FUN' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ',' INT ')'
-- ptrs, nptrs, closure type, description, type, fun type
{% liftP . withThisPackage $ \pkg ->
@@ -511,7 +511,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
-- If profiling is on, this string gets duplicated,
-- but that's the way the old code did it we can fix it some other time.
-
+
| 'INFO_TABLE_SELECTOR' '(' NAME ',' INT ',' INT ',' STRING ',' STRING ')'
-- selector, closure type, description, type
{% liftP . withThisPackage $ \pkg ->
@@ -574,7 +574,7 @@ importName
-- A label imported without an explicit packageId.
-- These are taken to come frome some foreign, unnamed package.
- : NAME
+ : NAME
{ ($1, mkForeignLabel $1 Nothing ForeignLabelInExternalPackage IsFunction) }
-- as previous 'NAME', but 'IsData'
@@ -584,8 +584,8 @@ importName
-- A label imported with an explicit packageId.
| STRING NAME
{ ($2, mkCmmCodeLabel (fsToUnitId (mkFastString $1)) $2) }
-
-
+
+
names :: { [FastString] }
: NAME { [$1] }
| NAME ',' names { $1 : $3 }
@@ -671,9 +671,9 @@ bool_expr :: { CmmParse BoolExpr }
| expr { do e <- $1; return (BoolTest e) }
bool_op :: { CmmParse BoolExpr }
- : bool_expr '&&' bool_expr { do e1 <- $1; e2 <- $3;
+ : bool_expr '&&' bool_expr { do e1 <- $1; e2 <- $3;
return (BoolAnd e1 e2) }
- | bool_expr '||' bool_expr { do e1 <- $1; e2 <- $3;
+ | bool_expr '||' bool_expr { do e1 <- $1; e2 <- $3;
return (BoolOr e1 e2) }
| '!' bool_expr { do e <- $2; return (BoolNot e) }
| '(' bool_op ')' { $2 }
@@ -759,7 +759,7 @@ expr :: { CmmParse CmmExpr }
expr0 :: { CmmParse CmmExpr }
: INT maybe_ty { return (CmmLit (CmmInt $1 (typeWidth $2))) }
| FLOAT maybe_ty { return (CmmLit (CmmFloat $1 (typeWidth $2))) }
- | STRING { do s <- code (newStringCLit $1);
+ | STRING { do s <- code (newStringCLit $1);
return (CmmLit s) }
| reg { $1 }
| type '[' expr ']' { do e <- $3; return (CmmLoad e $1) }
@@ -817,14 +817,14 @@ foreign_formal :: { CmmParse (LocalReg, ForeignHint) }
local_lreg :: { CmmParse LocalReg }
: NAME { do e <- lookupName $1;
return $
- case e of
+ case e of
CmmReg (CmmLocal r) -> r
other -> pprPanic "CmmParse:" (ftext $1 <> text " not a local register") }
lreg :: { CmmParse CmmReg }
: NAME { do e <- lookupName $1;
return $
- case e of
+ case e of
CmmReg r -> r
other -> pprPanic "CmmParse:" (ftext $1 <> text " not a register") }
| GLOBALREG { return (CmmGlobal $1) }
@@ -1361,8 +1361,7 @@ withSourceNote a b parse = do
-- -----------------------------------------------------------------------------
-- Table jumps
--- We use a simplified form of C-- switch statements for now. A
--- switch statement always compiles to a table jump. Each arm can
+-- We use a simplified form of C-- switch statements for now. Each arm can
-- specify a list of values (not ranges), and there can be a single
-- default branch. The range of the table is given either by the
-- optional range on the switch (eg. switch [0..7] {...}), or by
@@ -1375,21 +1374,23 @@ doSwitch :: Maybe (Integer,Integer)
doSwitch mb_range scrut arms deflt
= do
-- Compile code for the default branch
- dflt_entry <-
+ dflt_entry <-
case deflt of
Nothing -> return Nothing
- Just e -> do b <- forkLabelledCode e; return (Just b)
+ Just e -> do b <- forkLabelledCode e; return (Just (b,defFreq))
+ --TODO: Parse likely information for branches
-- Compile each case branch
table_entries <- mapM emitArm arms
let table = M.fromList (concat table_entries)
+ let ftable = fmap (\c -> (c,defFreq)) table
dflags <- getDynFlags
let range = fromMaybe (0, tARGET_MAX_WORD dflags) mb_range
expr <- scrut
-- ToDo: check for out of range and jump to default if necessary
- emit $ mkSwitch expr (mkSwitchTargets False range dflt_entry table)
+ emit $ mkSwitch expr (mkSwitchTargets False range dflt_entry ftable)
where
emitArm :: ([Integer],Either BlockId (CmmParse ())) -> CmmParse [(Integer,BlockId)]
emitArm (ints,Left blockid) = return [ (i,blockid) | i <- ints ]
diff --git a/compiler/cmm/CmmSwitch.hs b/compiler/cmm/CmmSwitch.hs
index ce779465e3..8ded03bdf3 100644
--- a/compiler/cmm/CmmSwitch.hs
+++ b/compiler/cmm/CmmSwitch.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE GADTs #-}
module CmmSwitch (
- SwitchTargets,
- mkSwitchTargets,
+ SwitchTargets, LabelInfo,
+ liLbl, liWeight, mkSwitchTargets,
switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned,
mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough,
switchTargetsToList, eqSwitchTargetWith,
@@ -9,6 +9,8 @@ module CmmSwitch (
SwitchPlan(..),
targetSupportsSwitch,
createSwitchPlan,
+
+ SeparatedList,
) where
import GhcPrelude
@@ -18,9 +20,11 @@ import DynFlags
import Hoopl.Label (Label)
import Data.Maybe
+import Data.Bifunctor
import Data.List (groupBy)
import Data.Function (on)
import qualified Data.Map as M
+import BasicTypes (BranchWeight, combinedFreqs, moreLikely, neverFreq)
-- Note [Cmm Switches, the general plan]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -103,17 +107,27 @@ minJumpTableOffset = 2
-- optional default value and a map from values to jump labels.
data SwitchTargets =
SwitchTargets
- Bool -- Signed values
- (Integer, Integer) -- Range
- (Maybe Label) -- Default value
- (M.Map Integer Label) -- The branches
- deriving (Show, Eq)
+ { st_signed :: Bool -- Signed values
+ , st_range :: (Integer, Integer) -- Range
+ , st_defLabel :: (Maybe LabelInfo) -- Default value
+ , st_valMap :: (M.Map Integer LabelInfo) -- The branches
+ } deriving (Show, Eq)
+
+-- | A label annotated with a branch weight.
+type LabelInfo = (Label, BranchWeight)
+
+liLbl :: LabelInfo -> Label
+liLbl = fst
+
+liWeight :: LabelInfo -> BranchWeight
+liWeight = snd
-- | The smart constructor mkSwitchTargets normalises the map a bit:
-- * No entries outside the range
-- * No entries equal to the default
-- * No default if all elements have explicit values
-mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets
+mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe LabelInfo
+ -> M.Map Integer LabelInfo -> SwitchTargets
mkSwitchTargets signed range@(lo,hi) mbdef ids
= SwitchTargets signed range mbdef' ids'
where
@@ -135,14 +149,16 @@ mkSwitchTargets signed range@(lo,hi) mbdef ids
-- | Changes all labels mentioned in the SwitchTargets value
mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets
mapSwitchTargets f (SwitchTargets signed range mbdef branches)
- = SwitchTargets signed range (fmap f mbdef) (fmap f branches)
+ = SwitchTargets signed range
+ (fmap (first f) mbdef)
+ (fmap (first f) branches)
-- | Returns the list of non-default branches of the SwitchTargets value
-switchTargetsCases :: SwitchTargets -> [(Integer, Label)]
+switchTargetsCases :: SwitchTargets -> [(Integer, LabelInfo)]
switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches
-- | Return the default label of the SwitchTargets value
-switchTargetsDefault :: SwitchTargets -> Maybe Label
+switchTargetsDefault :: SwitchTargets -> Maybe LabelInfo
switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef
-- | Return the range of the SwitchTargets value
@@ -161,7 +177,7 @@ switchTargetsSigned (SwitchTargets signed _ _ _) = signed
-- The conversion from Integer to Int is a bit of a wart, as the actual
-- scrutinee might be an unsigned word, but it just works, due to wrap-around
-- arithmetic (as verified by the CmmSwitchTest test case).
-switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label])
+switchTargetsToTable :: SwitchTargets -> (Int, [Maybe LabelInfo])
switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches)
= (fromIntegral (-start), [ labelFor i | i <- [start..hi] ])
where
@@ -198,27 +214,37 @@ switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches)
-- | The list of all labels occuring in the SwitchTargets value.
switchTargetsToList :: SwitchTargets -> [Label]
switchTargetsToList (SwitchTargets _ _ mbdef branches)
- = maybeToList mbdef ++ M.elems branches
+ = map liLbl (maybeToList mbdef ++ M.elems branches)
-- | Groups cases with equal targets, suitable for pretty-printing to a
-- c-like switch statement with fall-through semantics.
-switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label)
+switchTargetsFallThrough :: SwitchTargets
+ -> ([([Integer], LabelInfo)], Maybe LabelInfo)
switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef)
where
groups = map (\xs -> (map fst xs, snd (head xs))) $
- groupBy ((==) `on` snd) $
+ groupBy ((==) `on` (liLbl . snd)) $
M.toList branches
-- | Custom equality helper, needed for "CmmCommonBlockElim"
-eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool
-eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) =
- signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2)
+eqSwitchTargetWith :: (Label -> Label -> Bool)
+ -> SwitchTargets -> SwitchTargets
+ -> Bool
+eqSwitchTargetWith eq
+ (SwitchTargets signed1 range1 mbdef1 ids1)
+ (SwitchTargets signed2 range2 mbdef2 ids2) =
+ signed1 == signed2 && range1 == range2 &&
+ goMB mbdef1 mbdef2 &&
+ goList (M.toList ids1) (M.toList ids2)
where
goMB Nothing Nothing = True
- goMB (Just l1) (Just l2) = l1 `eq` l2
+ goMB (Just l1) (Just l2) = liLbl l1 `eq` liLbl l2
goMB _ _ = False
goList [] [] = True
- goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2
+ goList ((i1,l1):ls1) ((i2,l2):ls2) =
+ i1 == i2 &&
+ liLbl l1 `eq` liLbl l2 &&
+ goList ls1 ls2
goList _ _ = False
-----------------------------------------------------------------------------
@@ -228,10 +254,23 @@ eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets
-- | A SwitchPlan abstractly describes how a Switch statement ought to be
-- implemented. See Note [createSwitchPlan]
data SwitchPlan
- = Unconditionally Label
- | IfEqual Integer Label SwitchPlan
- | IfLT Bool Integer SwitchPlan SwitchPlan
- | JumpTable SwitchTargets
+ = Unconditionally
+ { sp_ucTarget :: LabelInfo }
+ | IfEqual
+ { sp_val :: Integer
+ , sp_eqTarget :: LabelInfo
+ , sp_else :: SwitchPlan
+ , sp_likely ::(Maybe Bool)
+ }
+ | IfLT
+ { sp_signed :: Bool
+ , sp_val :: Integer
+ , sp_ltTarget :: SwitchPlan
+ , sp_else :: SwitchPlan
+ , sp_likely :: (Maybe Bool)
+ }
+ | JumpTable
+ { sp_jmpTable :: SwitchTargets }
deriving Show
--
-- Note [createSwitchPlan]
@@ -251,6 +290,20 @@ data SwitchPlan
-- findSingleValues
-- 5. The thus collected pieces are assembled to a balanced binary tree.
+-- | Accumulated weight of all branches in a switchplan
+planWeight :: SwitchPlan -> BranchWeight
+planWeight Unconditionally { sp_ucTarget = target }
+ = liWeight target
+planWeight IfEqual {sp_eqTarget = target, sp_else = alt }
+ = combinedFreqs (liWeight target) (planWeight alt)
+planWeight IfLT {sp_ltTarget = target, sp_else = alt }
+ = combinedFreqs (planWeight target) (planWeight alt)
+planWeight JumpTable {sp_jmpTable = table }
+ = foldl1 combinedFreqs lblWeights `combinedFreqs` maybe neverFreq liWeight def
+ where
+ lblWeights = map liWeight $ M.elems (st_valMap table)
+ def = st_defLabel table
+
{-
Note [Two alts + default]
~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -323,28 +376,36 @@ targetSupportsSwitch _ = False
-- | This function creates a SwitchPlan from a SwitchTargets value, breaking it
-- down into smaller pieces suitable for code generation.
-createSwitchPlan :: SwitchTargets -> SwitchPlan
+createSwitchPlan :: Bool -> SwitchTargets -> SwitchPlan
-- Lets do the common case of a singleton map quicky and efficiently (#10677)
-createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
- | [(x, l)] <- M.toList m
- = IfEqual x l (Unconditionally defLabel)
+createSwitchPlan _ (SwitchTargets _signed _range (Just defInfo) m)
+ | [(x, li)] <- M.toList m
+ = IfEqual x li
+ (Unconditionally defInfo)
+ (moreLikely (liWeight li) (liWeight defInfo))
-- And another common case, matching "booleans"
-createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m)
- | [(x1, l1), (_x2,l2)] <- M.toAscList m
+createSwitchPlan _ (SwitchTargets _signed (lo,hi) Nothing m)
+ | [(x1, li1@(l1,f1)), (_x2,li2@(l2,f2))] <- M.toAscList m
--Checking If |range| = 2 is enough if we have two unique literals
, hi - lo == 1
- = IfEqual x1 l1 (Unconditionally l2)
+ = IfEqual x1 li1 (Unconditionally li2) (moreLikely f1 f2)
-- See Note [Two alts + default]
-createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
- | [(x1, l1), (x2,l2)] <- M.toAscList m
- = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel))
-createSwitchPlan (SwitchTargets signed range mbdef m) =
- -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $
+createSwitchPlan _ (SwitchTargets _signed _range (Just def@(defLabel, fdef)) m)
+ | [(x1, li1@(l1,f1)), (x2,li2@(l2,f2))] <- M.toAscList m
+ = IfEqual x1 li1
+ (IfEqual x2 li2 (Unconditionally def) (moreLikely f2 fdef))
+ (moreLikely f1 (combinedFreqs f2 fdef))
+createSwitchPlan balance (SwitchTargets signed range mbdef m) =
+ --pprTrace "createSwitchPlan"
+ --(text (show (range,m)) $$ text (show pieces) $$
+ --text (show flatPlan) $$ text (show plan)) $
plan
where
+ pieces :: [M.Map Integer LabelInfo]
pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m
- flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces
- plan = buildTree signed $ flatPlan
+ flatPlan = findSingleValues $
+ mkFlatSwitchPlan signed mbdef range pieces
+ plan = buildTree balance signed $ flatPlan
---
@@ -381,26 +442,39 @@ breakTooSmall m
type FlatSwitchPlan = SeparatedList Integer SwitchPlan
-mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan
+{-TODO:
+ Given the branch weights in LabelInfo we could do better
+ than binary search. Look at buildTree, findSingleValues, mkFlatSwitchPlan
+ if you implement this.
+-}
+-- | mkFlatSwitchPlan byWeight signed defLabel range maps
+mkFlatSwitchPlan :: Bool -- ^ Values are signed
+ -> Maybe LabelInfo -- ^ Default alternative
+ -> (Integer, Integer) -- ^ Range of possible values
+ -> [M.Map Integer LabelInfo] -- ^ Value to branch mapping.
+ -> FlatSwitchPlan
-- If we have no default (i.e. undefined where there is no entry), we can
-- branch at the minimum of each map
-mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty
+mkFlatSwitchPlan _ Nothing _ []
+ = pprPanic "mkFlatSwitchPlan with nothing left to do" empty
mkFlatSwitchPlan signed Nothing _ (m:ms)
- = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ])
+ = (mkLeafPlan signed Nothing m ,
+ [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ])
-- If we have a default, we have to interleave segments that jump
-- to the default between the maps
-mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps)
+mkFlatSwitchPlan signed (Just li@(l,f)) r ms
+ = let ((_,p1):ps) = go r ms in (p1, ps)
where
go (lo,hi) []
| lo > hi = []
- | otherwise = [(lo, Unconditionally l)]
+ | otherwise = [(lo, Unconditionally li)]
go (lo,hi) (m:ms)
| lo < min
- = (lo, Unconditionally l) : go (min,hi) (m:ms)
+ = (lo, Unconditionally li) : go (min,hi) (m:ms)
| lo == min
- = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms
+ = (lo, mkLeafPlan signed (Just li) m) : go (max+1,hi) ms
| otherwise
= pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min)
where
@@ -408,10 +482,10 @@ mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps)
max = fst (M.findMax m)
-mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan
+mkLeafPlan :: Bool -> Maybe LabelInfo -> M.Map Integer LabelInfo -> SwitchPlan
mkLeafPlan signed mbdef m
- | [(_,l)] <- M.toList m -- singleton map
- = Unconditionally l
+ | [(_,li@(l,_f))] <- M.toList m -- singleton map
+ = Unconditionally li
| otherwise
= JumpTable $ mkSwitchTargets signed (min,max) mbdef m
where
@@ -427,7 +501,10 @@ mkLeafPlan signed mbdef m
findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan
findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs)
| l == l3 && i + 1 == i'
- = findSingleValues (IfEqual i l2 (Unconditionally l), xs)
+ = findSingleValues
+ (IfEqual i l2
+ (Unconditionally l) (moreLikely (liWeight l2) (liWeight l))
+ , xs)
findSingleValues (p, (i,p'):xs)
= (p,i) `consSL` findSingleValues (p', xs)
findSingleValues (p, [])
@@ -437,12 +514,28 @@ findSingleValues (p, [])
--- Step 5: Actually build the tree
---
--- Build a balanced tree from a separated list
-buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan
-buildTree _ (p,[]) = p
-buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2)
- where
+-- | Build a balanced tree from a separated list
+-- Potentially by weight
+buildTree :: Bool -> Bool -> FlatSwitchPlan -> SwitchPlan
+buildTree _ _ (p,[]) = p
+buildTree byWeight signed sl
+ = --traceShow (m,likely,(planWeight left),(planWeight right), byWeight ) $
+ IfLT
+ { sp_signed = signed
+ , sp_val = m
+ , sp_ltTarget = left
+ , sp_else = right
+ , sp_likely = likely
+ }
+ where
(sl1, m, sl2) = divideSL sl
+ left = (buildTree byWeight signed sl1) :: SwitchPlan
+ right = (buildTree byWeight signed sl2)
+ likely = if byWeight
+ then moreLikely (planWeight left) (planWeight right)
+ else Nothing
+
+
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index 76e4d4cb94..ea23ee6884 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -341,7 +341,8 @@ pprSwitch dflags e ids
(pairs, mbdef) = switchTargetsFallThrough ids
-- fall through case
- caseify (ix:ixs, ident) = vcat (map do_fallthrough ixs) $$ final_branch ix
+ caseify :: ([Integer], LabelInfo) -> SDoc
+ caseify (ix:ixs, lblInfo) = vcat (map do_fallthrough ixs) $$ final_branch ix
where
do_fallthrough ix =
hsep [ text "case" , pprHexVal ix (wordWidth dflags) <> colon ,
@@ -349,11 +350,14 @@ pprSwitch dflags e ids
final_branch ix =
hsep [ text "case" , pprHexVal ix (wordWidth dflags) <> colon ,
- text "goto" , (pprBlockId ident) <> semi ]
+ text "goto" , (pprBlockId ( liLbl lblInfo)) <> semi <+>
+ parens (text "likely:" <> ppr (liWeight lblInfo))]
caseify (_ , _ ) = panic "pprSwitch: switch with no cases!"
- def | Just l <- mbdef = text "default: goto" <+> pprBlockId l <> semi
+ def | Just li <- mbdef
+ = text "default: goto" <+> pprBlockId (liLbl li) <> semi <+>
+ parens (text "likely:" <> ppr (liWeight li))
| otherwise = empty
-- ---------------------------------------------------------------------
diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs
index 6a93ea818e..635ad04c3c 100644
--- a/compiler/cmm/PprCmm.hs
+++ b/compiler/cmm/PprCmm.hs
@@ -244,17 +244,24 @@ pprNode node = pp_node <+> pp_debug
])
4 (vcat (map ppCase cases) $$ def) $$ rbrace
where
- (cases, mbdef) = switchTargetsFallThrough ids
- ppCase (is,l) = hsep
- [ text "case"
- , commafy $ map integer is
- , text ": goto"
- , ppr l <> semi
- ]
- def | Just l <- mbdef = hsep
- [ text "default:"
- , braces (text "goto" <+> ppr l <> semi)
- ]
+ (cases, mbdef)
+ = switchTargetsFallThrough ids
+ ppCase (is,li)
+ = hsep
+ [ text "case"
+ , commafy $ map integer is
+ , (text "/* likely:" <+> ppr (liWeight li) <+> text "*/")
+ , text ": goto"
+ , ppr (liLbl li) <> semi
+ ]
+ def | Just li <- mbdef
+ = hsep
+ [ text "default" <+>
+ (text "/* likely:" <+>
+ ppr (liWeight li) <+>
+ text "*/ :")
+ , braces (text "goto" <+> ppr (liLbl li) <> semi)
+ ]
| otherwise = empty
range = brackets $ hsep [integer lo, text "..", integer hi]
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
index cf602ef0b8..9b3c166d1e 100644
--- a/compiler/codeGen/StgCmmBind.hs
+++ b/compiler/codeGen/StgCmmBind.hs
@@ -272,7 +272,7 @@ mkRhsClosure dflags bndr _cc _bi
, StgCase (StgApp scrutinee [{-no args-}])
_ -- ignore bndr
(AlgAlt _)
- [(DataAlt _, params, sel_expr)] <- strip expr
+ [(DataAlt _, params, sel_expr, _)] <- strip expr
, StgApp selectee [{-no args-}] <- strip sel_expr
, the_fv == scrutinee -- Scrutinee is the only free variable
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
index 3fcc935121..ab0e6d0c2a 100644
--- a/compiler/codeGen/StgCmmExpr.hs
+++ b/compiler/codeGen/StgCmmExpr.hs
@@ -31,6 +31,7 @@ import StgCmmClosure
import StgSyn
+import BasicTypes (BranchWeight)
import MkGraph
import BlockId
import Cmm
@@ -379,7 +380,7 @@ calls to nonVoidIds in various places. So we must not look up
cgCase (StgApp v []) _ (PrimAlt _) alts
| isVoidRep (idPrimRep v) -- See Note [Scrutinising VoidRep]
- , [(DEFAULT, _, rhs)] <- alts
+ , [(DEFAULT, _, rhs, _)] <- alts
= cgExpr rhs
{- Note [Dodgy unsafeCoerce 1]
@@ -561,7 +562,7 @@ chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id]
chooseReturnBndrs bndr (PrimAlt _) _alts
= assertNonVoidIds [bndr]
-chooseReturnBndrs _bndr (MultiValAlt n) [(_, ids, _)]
+chooseReturnBndrs _bndr (MultiValAlt n) [(_, ids, _, _)]
= ASSERT2(ids `lengthIs` n, ppr n $$ ppr ids $$ ppr _bndr)
assertNonVoidIds ids -- 'bndr' is not assigned!
@@ -578,10 +579,10 @@ chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [StgAlt]
-> FCode ReturnKind
-- At this point the result of the case are in the binders
-cgAlts gc_plan _bndr PolyAlt [(_, _, rhs)]
+cgAlts gc_plan _bndr PolyAlt [(_, _, rhs, _)]
= maybeAltHeapCheck gc_plan (cgExpr rhs)
-cgAlts gc_plan _bndr (MultiValAlt _) [(_, _, rhs)]
+cgAlts gc_plan _bndr (MultiValAlt _) [(_, _, rhs, _)]
= maybeAltHeapCheck gc_plan (cgExpr rhs)
-- Here bndrs are *already* in scope, so don't rebind them
@@ -591,13 +592,13 @@ cgAlts gc_plan bndr (PrimAlt _) alts
; tagged_cmms <- cgAltRhss gc_plan bndr alts
; let bndr_reg = CmmLocal (idToReg dflags bndr)
- (DEFAULT,deflt) = head tagged_cmms
+ (DEFAULT,deflt,f) = head tagged_cmms
-- PrimAlts always have a DEFAULT case
-- and it always comes first
- tagged_cmms' = [(lit,code)
- | (LitAlt lit, code) <- tagged_cmms]
- ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
+ tagged_cmms' = [(lit,code,f)
+ | (LitAlt lit, code,f) <- tagged_cmms]
+ ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' (deflt,f)
; return AssignedDirectly }
cgAlts gc_plan bndr (AlgAlt tycon) alts
@@ -613,7 +614,7 @@ cgAlts gc_plan bndr (AlgAlt tycon) alts
then do
let -- Yes, bndr_reg has constr. tag in ls bits
tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
- branches' = [(tag+1,branch) | (tag,branch) <- branches]
+ branches' = [(tag+1,branch,f) | (tag,branch,f) <- branches]
emitSwitch tag_expr branches' mb_deflt 1 fam_sz
else -- No, get tag from info table
@@ -651,18 +652,18 @@ cgAlts _ _ _ _ = panic "cgAlts"
-------------------
cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
- -> FCode ( Maybe CmmAGraphScoped
- , [(ConTagZ, CmmAGraphScoped)] )
+ -> FCode ( Maybe (CmmAGraphScoped, BranchWeight)
+ , [(ConTagZ, CmmAGraphScoped, BranchWeight)] )
cgAlgAltRhss gc_plan bndr alts
= do { tagged_cmms <- cgAltRhss gc_plan bndr alts
; let { mb_deflt = case tagged_cmms of
- ((DEFAULT,rhs) : _) -> Just rhs
+ ((DEFAULT,rhs,f) : _) -> Just (rhs,f)
_other -> Nothing
-- DEFAULT is always first, if present
- ; branches = [ (dataConTagZ con, cmm)
- | (DataAlt con, cmm) <- tagged_cmms ]
+ ; branches = [ (dataConTagZ con, cmm, f)
+ | (DataAlt con, cmm, f) <- tagged_cmms ]
}
; return (mb_deflt, branches)
@@ -671,20 +672,20 @@ cgAlgAltRhss gc_plan bndr alts
-------------------
cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
- -> FCode [(AltCon, CmmAGraphScoped)]
+ -> FCode [(AltCon, CmmAGraphScoped,BranchWeight)]
cgAltRhss gc_plan bndr alts = do
dflags <- getDynFlags
let
base_reg = idToReg dflags bndr
- cg_alt :: StgAlt -> FCode (AltCon, CmmAGraphScoped)
- cg_alt (con, bndrs, rhs)
- = getCodeScoped $
- maybeAltHeapCheck gc_plan $
- do { _ <- bindConArgs con base_reg (assertNonVoidIds bndrs)
- -- alt binders are always non-void,
- -- see Note [Post-unarisation invariants] in UnariseStg
- ; _ <- cgExpr rhs
- ; return con }
+ cg_alt :: StgAlt -> FCode (AltCon, CmmAGraphScoped, BranchWeight)
+ cg_alt (con, bndrs, rhs, freq) = do
+ (i,c) <- getCodeScoped $ maybeAltHeapCheck gc_plan $
+ do { _ <- bindConArgs con base_reg (assertNonVoidIds bndrs)
+ -- alt binders are always non-void,
+ -- see Note [Post-unarisation invariants] in UnariseStg
+ ; _ <- cgExpr rhs
+ ; return con }
+ return (i,c,freq)
forkAlts (map cg_alt alts)
maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs
index 7c3864296c..c3baa7bb70 100644
--- a/compiler/codeGen/StgCmmMonad.hs
+++ b/compiler/codeGen/StgCmmMonad.hs
@@ -799,8 +799,10 @@ getCmm code
; return (fromOL (cgs_tops state2)) }
-mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> FCode CmmAGraph
-mkCmmIfThenElse e tbranch fbranch = mkCmmIfThenElse' e tbranch fbranch Nothing
+mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> Maybe Bool
+ -> FCode CmmAGraph
+mkCmmIfThenElse e tbranch fbranch likely
+ = mkCmmIfThenElse' e tbranch fbranch likely
mkCmmIfThenElse' :: CmmExpr -> CmmAGraph -> CmmAGraph
-> Maybe Bool -> FCode CmmAGraph
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index b5cd267c6b..fa2e7d2b6c 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -1127,9 +1127,8 @@ genericFabsOp w [res_r] [aa]
let g3 = catAGraphs [mkAssign res_t aa,
mkAssign (CmmLocal res_r) (neg (CmmReg res_t))]
- g4 <- mkCmmIfThenElse (gt aa zero) g2 g3
-
- emit =<< mkCmmIfThenElse (eq aa zero) g1 g4
+ g4 <- mkCmmIfThenElse (gt aa zero) g2 g3 Nothing
+ emit =<< mkCmmIfThenElse (eq aa zero) g1 g4 Nothing
genericFabsOp _ _ _ = panic "genericFabsOp"
@@ -1821,14 +1820,17 @@ doCopyMutableByteArrayOp = emitCopyByteArray copy
where
-- The only time the memory might overlap is when the two arrays
-- we were provided are the same array!
- -- TODO: Optimize branch for common case of no aliasing.
+ -- The common case is no aliasing so we set the likly value to `Just False`.
copy src dst dst_p src_p bytes = do
dflags <- getDynFlags
[moveCall, cpyCall] <- forkAlts [
getCode $ emitMemmoveCall dst_p src_p bytes 1,
getCode $ emitMemcpyCall dst_p src_p bytes 1
]
- emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
+ emit =<< mkCmmIfThenElse
+ (cmmEqWord dflags src dst)
+ moveCall cpyCall
+ (Just False)
emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-> FCode ())
@@ -1965,7 +1967,8 @@ doCopyMutableArrayOp = emitCopyArray copy
where
-- The only time the memory might overlap is when the two arrays
-- we were provided are the same array!
- -- TODO: Optimize branch for common case of no aliasing.
+ -- Optimize branch for common case of no aliasing by setting likely
+ -- to `Just False`.
copy src dst dst_p src_p bytes = do
dflags <- getDynFlags
[moveCall, cpyCall] <- forkAlts [
@@ -1974,7 +1977,10 @@ doCopyMutableArrayOp = emitCopyArray copy
getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
(wORD_SIZE dflags)
]
- emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
+ emit =<< mkCmmIfThenElse
+ (cmmEqWord dflags src dst)
+ moveCall cpyCall
+ (Just False)
emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
-> FCode ()) -- ^ copy function
@@ -2028,7 +2034,8 @@ doCopySmallMutableArrayOp = emitCopySmallArray copy
where
-- The only time the memory might overlap is when the two arrays
-- we were provided are the same array!
- -- TODO: Optimize branch for common case of no aliasing.
+ -- Optimize branch for common case of no aliasing by setting likelyhood
+ -- to `Just False`.
copy src dst dst_p src_p bytes = do
dflags <- getDynFlags
[moveCall, cpyCall] <- forkAlts
@@ -2037,7 +2044,10 @@ doCopySmallMutableArrayOp = emitCopySmallArray copy
, getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
(wORD_SIZE dflags)
]
- emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
+ emit =<< mkCmmIfThenElse
+ (cmmEqWord dflags src dst)
+ moveCall cpyCall
+ (Just False)
emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
-> FCode ()) -- ^ copy function
diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs
index a0bca5d661..f490be5c31 100644
--- a/compiler/codeGen/StgCmmProf.hs
+++ b/compiler/codeGen/StgCmmProf.hs
@@ -352,6 +352,7 @@ ldvEnter cl_ptr = do
emit =<< mkCmmIfThenElse (CmmMachOp (mo_wordUGt dflags) [loadEra dflags, CmmLit (zeroCLit dflags)])
(mkStore ldv_wd new_ldv_wd)
mkNop
+ Nothing
loadEra :: DynFlags -> CmmExpr
loadEra dflags = CmmMachOp (MO_UU_Conv (cIntWidth dflags) (wordWidth dflags))
diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs
index 94013f5c6d..68949bf190 100644
--- a/compiler/codeGen/StgCmmUtils.hs
+++ b/compiler/codeGen/StgCmmUtils.hs
@@ -55,6 +55,7 @@ import CLabel
import CmmUtils
import CmmSwitch
+import BasicTypes (BranchWeight)
import ForeignCall
import IdInfo
import Type
@@ -74,8 +75,6 @@ import RepType
import qualified Data.ByteString as BS
import qualified Data.Map as M
import Data.Char
-import Data.List
-import Data.Ord
import Data.Word
@@ -448,16 +447,16 @@ unscramble dflags vertices = mapM_ do_component components
emitSwitch :: CmmExpr -- Tag to switch on
- -> [(ConTagZ, CmmAGraphScoped)] -- Tagged branches
- -> Maybe CmmAGraphScoped -- Default branch (if any)
+ -> [(ConTagZ, CmmAGraphScoped, BranchWeight)] -- Tagged branches
+ -> Maybe (CmmAGraphScoped, BranchWeight) -- Default branch (if any)
-> ConTagZ -> ConTagZ -- Min and Max possible values;
-- behaviour outside this range is
-- undefined
-> FCode ()
-- First, two rather common cases in which there is no work to do
-emitSwitch _ [] (Just code) _ _ = emit (fst code)
-emitSwitch _ [(_,code)] Nothing _ _ = emit (fst code)
+emitSwitch _ [] (Just code) _ _ = emit (fst $ fst code)
+emitSwitch _ [(_,code,_)] Nothing _ _ = emit (fst code)
-- Right, off we go
emitSwitch tag_expr branches mb_deflt lo_tag hi_tag = do
@@ -467,7 +466,8 @@ emitSwitch tag_expr branches mb_deflt lo_tag hi_tag = do
tag_expr' <- assignTemp' tag_expr
-- Sort the branches before calling mk_discrete_switch
- let branches_lbls' = [ (fromIntegral i, l) | (i,l) <- sortBy (comparing fst) branches_lbls ]
+ let branches_lbls' = [ (fromIntegral i, l, f)
+ | (i,l,f) <- sortWith fstOf3 branches_lbls ]
let range = (fromIntegral lo_tag, fromIntegral hi_tag)
emit $ mk_discrete_switch False tag_expr' branches_lbls' mb_deflt_lbl range
@@ -476,19 +476,19 @@ emitSwitch tag_expr branches mb_deflt lo_tag hi_tag = do
mk_discrete_switch :: Bool -- ^ Use signed comparisons
-> CmmExpr
- -> [(Integer, BlockId)]
- -> Maybe BlockId
+ -> [(Integer, BlockId, BranchWeight)]
+ -> Maybe (BlockId, BranchWeight)
-> (Integer, Integer)
-> CmmAGraph
-- SINGLETON TAG RANGE: no case analysis to do
-mk_discrete_switch _ _tag_expr [(tag, lbl)] _ (lo_tag, hi_tag)
+mk_discrete_switch _ _tag_expr [(tag, lbl, _f)] _ (lo_tag, hi_tag)
| lo_tag == hi_tag
= ASSERT( tag == lo_tag )
mkBranch lbl
-- SINGLETON BRANCH, NO DEFAULT: no case analysis to do
-mk_discrete_switch _ _tag_expr [(_tag,lbl)] Nothing _
+mk_discrete_switch _ _tag_expr [(_tag,lbl,_)] Nothing _
= mkBranch lbl
-- The simplifier might have eliminated a case
-- so we may have e.g. case xs of
@@ -499,25 +499,17 @@ mk_discrete_switch _ _tag_expr [(_tag,lbl)] Nothing _
-- SOMETHING MORE COMPLICATED: defer to CmmImplementSwitchPlans
-- See Note [Cmm Switches, the general plan] in CmmSwitch
mk_discrete_switch signed tag_expr branches mb_deflt range
- = mkSwitch tag_expr $ mkSwitchTargets signed range mb_deflt (M.fromList branches)
-
-divideBranches :: Ord a => [(a,b)] -> ([(a,b)], a, [(a,b)])
-divideBranches branches = (lo_branches, mid, hi_branches)
- where
- -- 2 branches => n_branches `div` 2 = 1
- -- => branches !! 1 give the *second* tag
- -- There are always at least 2 branches here
- (mid,_) = branches !! (length branches `div` 2)
- (lo_branches, hi_branches) = span is_lo branches
- is_lo (t,_) = t < mid
+ = mkSwitch tag_expr $
+ mkSwitchTargets signed range mb_deflt
+ (M.fromList $ map (\(i,e,f)-> (i,(e,f))) branches)
--------------
emitCmmLitSwitch :: CmmExpr -- Tag to switch on
- -> [(Literal, CmmAGraphScoped)] -- Tagged branches
- -> CmmAGraphScoped -- Default branch (always)
+ -> [(Literal, CmmAGraphScoped, BranchWeight)] -- Tagged branches
+ -> (CmmAGraphScoped, BranchWeight) -- Default branch (always)
-> FCode () -- Emit the code
-emitCmmLitSwitch _scrut [] deflt = emit $ fst deflt
-emitCmmLitSwitch scrut branches deflt = do
+emitCmmLitSwitch _scrut [] (deflt,_dfreq) = emit $ fst deflt
+emitCmmLitSwitch scrut branches (deflt,dfreq) = do
scrut' <- assignTemp' scrut
join_lbl <- newBlockId
deflt_lbl <- label_code join_lbl deflt
@@ -529,20 +521,22 @@ emitCmmLitSwitch scrut branches deflt = do
-- We find the necessary type information in the literals in the branches
let signed = case head branches of
- (MachInt _, _) -> True
- (MachInt64 _, _) -> True
+ (MachInt _, _, _) -> True
+ (MachInt64 _, _, _) -> True
_ -> False
let range | signed = (tARGET_MIN_INT dflags, tARGET_MAX_INT dflags)
| otherwise = (0, tARGET_MAX_WORD dflags)
if isFloatType cmm_ty
- then emit =<< mk_float_switch rep scrut' deflt_lbl noBound branches_lbls
+ then emit =<< mk_float_switch rep scrut'
+ (deflt_lbl, dfreq) noBound
+ branches_lbls
else emit $ mk_discrete_switch
signed
scrut'
- [(litValue lit,l) | (lit,l) <- branches_lbls]
- (Just deflt_lbl)
+ [(litValue lit,l,f) | (lit,l,f) <- branches_lbls]
+ (Just (deflt_lbl, dfreq))
range
emitLabel join_lbl
@@ -552,11 +546,30 @@ type LitBound = (Maybe Literal, Maybe Literal)
noBound :: LitBound
noBound = (Nothing, Nothing)
-mk_float_switch :: Width -> CmmExpr -> BlockId
+{- TODO:
+ Currently this generates a binary search tree for the given value.
+
+ Given we have branch weights we would ideally balance the tree
+ by weight instead.
+
+ Eg. given (lit,weight) of [(0,1),(1,1),(2,1),(3,99)] we want to split the
+ list into [(0,1),(1,1),(2,1)] and [(3,99)].
+
+ Things to consider:
+ * Does it make a difference often enough to be worth the complexity
+ and increase in compile time.
+ * Negative weights have to be rounded up to zero,
+ otherwise they would distort the results.
+ * How should entries with no information be treated?
+ -> Probably good enough to use the default value.
+ * If implemented should this only apply when optimizations are
+ active?
+-}
+mk_float_switch :: Width -> CmmExpr -> (BlockId, BranchWeight)
-> LitBound
- -> [(Literal,BlockId)]
+ -> [(Literal,BlockId,BranchWeight)]
-> FCode CmmAGraph
-mk_float_switch rep scrut deflt _bounds [(lit,blk)]
+mk_float_switch rep scrut (deflt, _dfrq) _bounds [(lit,blk,_frq)]
= do dflags <- getDynFlags
return $ mkCbranch (cond dflags) deflt blk Nothing
where
@@ -565,17 +578,32 @@ mk_float_switch rep scrut deflt _bounds [(lit,blk)]
cmm_lit = mkSimpleLit dflags lit
ne = MO_F_Ne rep
-mk_float_switch rep scrut deflt_blk_id (lo_bound, hi_bound) branches
+mk_float_switch rep scrut (deflt_blk_id,dfreq) (lo_bound, hi_bound) branches
= do dflags <- getDynFlags
- lo_blk <- mk_float_switch rep scrut deflt_blk_id bounds_lo lo_branches
- hi_blk <- mk_float_switch rep scrut deflt_blk_id bounds_hi hi_branches
- mkCmmIfThenElse (cond dflags) lo_blk hi_blk
+ lo_blk <- mk_float_switch
+ rep scrut (deflt_blk_id,dfreq)
+ bounds_lo lo_branches
+ hi_blk <- mk_float_switch
+ rep scrut
+ (deflt_blk_id,dfreq) bounds_hi hi_branches
+ mkCmmIfThenElse (cond dflags) lo_blk hi_blk Nothing
where
+
(lo_branches, mid_lit, hi_branches) = divideBranches branches
bounds_lo = (lo_bound, Just mid_lit)
bounds_hi = (Just mid_lit, hi_bound)
+ divideBranches :: Ord a => [(a,b,c)] -> ([(a,b,c)], a, [(a,b,c)])
+ divideBranches branches = (lo_branches, mid, hi_branches)
+ where
+ -- 2 branches => n_branches `div` 2 = 1
+ -- => branches !! 1 give the *second* tag
+ -- There are always at least 2 branches here
+ (mid,_,_) = branches !! (length branches `div` 2)
+ (lo_branches, hi_branches) = span is_lo branches
+ is_lo (t,_,_) = t < mid
+
cond dflags = CmmMachOp lt [scrut, CmmLit cmm_lit]
where
cmm_lit = mkSimpleLit dflags mid_lit
@@ -583,21 +611,23 @@ mk_float_switch rep scrut deflt_blk_id (lo_bound, hi_bound) branches
--------------
-label_default :: BlockId -> Maybe CmmAGraphScoped -> FCode (Maybe BlockId)
+label_default :: BlockId -> Maybe (CmmAGraphScoped, BranchWeight)
+ -> FCode (Maybe (BlockId, BranchWeight))
label_default _ Nothing
= return Nothing
-label_default join_lbl (Just code)
+label_default join_lbl (Just (code,f))
= do lbl <- label_code join_lbl code
- return (Just lbl)
+ return (Just (lbl,f))
--------------
-label_branches :: BlockId -> [(a,CmmAGraphScoped)] -> FCode [(a,BlockId)]
+label_branches :: BlockId -> [(a,CmmAGraphScoped, BranchWeight)]
+ -> FCode [(a,BlockId,BranchWeight)]
label_branches _join_lbl []
= return []
-label_branches join_lbl ((tag,code):branches)
+label_branches join_lbl ((tag,code,freq):branches)
= do lbl <- label_code join_lbl code
branches' <- label_branches join_lbl branches
- return ((tag,lbl):branches')
+ return ((tag,lbl,freq):branches')
--------------
label_code :: BlockId -> CmmAGraphScoped -> FCode BlockId
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index e812dd445f..65f7d6652c 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -13,6 +13,7 @@ import Llvm
import LlvmCodeGen.Base
import LlvmCodeGen.Regs
+import BasicTypes (BranchWeight, getWeight, neverFreq)
import BlockId
import CodeGen.Platform ( activeStgRegs, callerSaves )
import CLabel
@@ -38,6 +39,7 @@ import Util
import Control.Monad.Trans.Class
import Control.Monad.Trans.Writer
+import Data.Int (Int32)
import Data.Semigroup ( Semigroup )
import qualified Data.Semigroup as Semigroup
import Data.List ( nub )
@@ -1071,7 +1073,19 @@ For a real example of this, see ./rts/StgStdThunks.cmm
-}
-
+switchMetaData :: BranchWeight -> [BranchWeight] -> MetaAnnot
+switchMetaData defFreq altFreqs =
+ let values = map
+ -- LLVM branch weights are i32 typed so we cap it there.
+ (\w ->
+ min (fromIntegral (maxBound :: Int32))
+ (fromIntegral . getWeight $ w))
+ (defFreq:altFreqs)
+ types = repeat (LMInt $ fromIntegral 32)
+ lits = zipWith LMIntLit values types
+ weights = map (MetaVar . LMLitVar) lits
+ in
+ MetaAnnot (fsLit "branch_weights") $ MetaStruct weights
-- | Switch branch
genSwitch :: CmmExpr -> SwitchTargets -> LlvmM StmtData
@@ -1079,13 +1093,14 @@ genSwitch cond ids = do
(vc, stmts, top) <- exprToVar cond
let ty = getVarType vc
- let labels = [ (mkIntLit ty ix, blockIdToLlvm b)
- | (ix, b) <- switchTargetsCases ids ]
+ let (labels,fs) = unzip [ ((mkIntLit ty ix, blockIdToLlvm b), f)
+ | (ix, (b,f)) <- switchTargetsCases ids ]
-- out of range is undefined, so let's just branch to first label
- let defLbl | Just l <- switchTargetsDefault ids = blockIdToLlvm l
- | otherwise = snd (head labels)
+ let (defLbl,defFreq)
+ | Just (l,f) <- switchTargetsDefault ids = (blockIdToLlvm l,f)
+ | otherwise = (snd (head labels),neverFreq)
- let s1 = Switch vc defLbl labels
+ let s1 = MetaStmt [switchMetaData defFreq fs ] (Switch vc defLbl labels)
return $ (stmts `snocOL` s1, top)
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index e6b9cf6b93..c59f26c1e8 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -485,6 +485,8 @@ data GeneralFlag
| Opt_SolveConstantDicts
| Opt_AlignmentSanitisation
| Opt_CatchBottoms
+ | Opt_UnlikelyBottoms -- ^ Assume bottoming alternatives are not taken.
+ | Opt_WeightBalanceAlts -- ^ Split trees by branch weight where applicable.
-- PreInlining is on by default. The option is there just to see how
-- bad things get if you turn it off!
@@ -583,6 +585,7 @@ data GeneralFlag
| Opt_SuppressUniques
| Opt_SuppressStgFreeVars
| Opt_SuppressTicks -- Replaces Opt_PprShowTicks
+ | Opt_SuppressTimestamps -- ^ Suppress timestamps in dumps
-- temporary flags
| Opt_AutoLinkPackages
@@ -3040,7 +3043,8 @@ dynamic_flags_deps = [
setGeneralFlag Opt_SuppressIdInfo
setGeneralFlag Opt_SuppressTicks
setGeneralFlag Opt_SuppressStgFreeVars
- setGeneralFlag Opt_SuppressTypeSignatures)
+ setGeneralFlag Opt_SuppressTypeSignatures
+ setGeneralFlag Opt_SuppressTimestamps)
------ Debugging ----------------------------------------------------
, make_ord_flag defGhcFlag "dstg-stats"
@@ -3835,10 +3839,12 @@ dFlagsDeps = [
flagSpec "suppress-idinfo" Opt_SuppressIdInfo,
flagSpec "suppress-unfoldings" Opt_SuppressUnfoldings,
flagSpec "suppress-module-prefixes" Opt_SuppressModulePrefixes,
+ flagSpec "suppress-timestamps" Opt_SuppressTimestamps,
flagSpec "suppress-type-applications" Opt_SuppressTypeApplications,
flagSpec "suppress-type-signatures" Opt_SuppressTypeSignatures,
flagSpec "suppress-uniques" Opt_SuppressUniques,
- flagSpec "suppress-var-kinds" Opt_SuppressVarKinds]
+ flagSpec "suppress-var-kinds" Opt_SuppressVarKinds
+ ]
-- | These @-f\<blah\>@ flags can all be reversed with @-fno-\<blah\>@
fFlags :: [FlagSpec GeneralFlag]
@@ -3956,7 +3962,9 @@ fFlagsDeps = [
flagSpec "abstract-refinement-substitutions" Opt_AbstractRefSubstitutions,
flagSpec "unclutter-valid-substitutions" Opt_UnclutterValidSubstitutions,
flagSpec "show-loaded-modules" Opt_ShowLoadedModules,
- flagSpec "whole-archive-hs-libs" Opt_WholeArchiveHsLibs
+ flagSpec "whole-archive-hs-libs" Opt_WholeArchiveHsLibs,
+ flagSpec "unlikely-bottoms" Opt_UnlikelyBottoms,
+ flagSpec "weight-balance-alts" Opt_WeightBalanceAlts
]
-- | These @-f\<blah\>@ flags can all be reversed with @-fno-\<blah\>@
@@ -4344,6 +4352,8 @@ optLevelFlags -- see Note [Documenting optimisation flags]
, ([2], Opt_LiberateCase)
, ([2], Opt_SpecConstr)
+ , ([1,2], Opt_UnlikelyBottoms)
+ , ([1,2], Opt_WeightBalanceAlts)
-- , ([2], Opt_RegsGraph)
-- RegsGraph suffers performance regression. See #7679
-- , ([2], Opt_StaticArgumentTransformation)
diff --git a/compiler/main/ErrUtils.hs b/compiler/main/ErrUtils.hs
index 13ff017e09..c7fb8babe9 100644
--- a/compiler/main/ErrUtils.hs
+++ b/compiler/main/ErrUtils.hs
@@ -500,9 +500,12 @@ dumpSDoc dflags print_unqual flag hdr doc =
doc' <- if null hdr
then return doc
else do t <- getCurrentTime
- let d = text (show t)
- $$ blankLine
- $$ doc
+ let timeStamp = if (gopt Opt_SuppressTimestamps dflags)
+ then empty
+ else text (show t)
+ let d = timeStamp
+ $$ blankLine
+ $$ doc
return $ mkDumpDoc hdr d
defaultLogActionHPrintDoc dflags handle doc' dump_style
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index e2c568c836..18ce58a11e 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -2081,7 +2081,11 @@ genSwitch dflags expr targets
BCTR ids (Just lbl)
]
return code
- where (offset, ids) = switchTargetsToTable targets
+ where
+ (offset, lblInfos) = switchTargetsToTable targets
+ -- lblInfos contains branch weights too,
+ -- but we only use the labels for now.
+ ids = map (fmap liLbl) lblInfos
generateJumpTableForInstr :: DynFlags -> Instr
-> Maybe (NatCmmDecl CmmStatics Instr)
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index 6dfd58950e..b1e717b427 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -339,7 +339,11 @@ genSwitch dflags expr targets
, LD II32 (AddrRegReg base_reg offset_reg) dst
, JMP_TBL (AddrRegImm dst (ImmInt 0)) ids label
, NOP ]
- where (offset, ids) = switchTargetsToTable targets
+ where
+ (offset, lblInfos) = switchTargetsToTable targets
+ -- lblInfos contains branch weights too,
+ -- we only use the labels for now.
+ ids = map (fmap liLbl) lblInfos
generateJumpTableForInstr :: DynFlags -> Instr
-> Maybe (NatCmmDecl CmmStatics Instr)
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 09757e769e..fd1640acf9 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -2840,7 +2840,11 @@ genSwitch dflags expr targets
JMP_TBL op ids (Section ReadOnlyData lbl) lbl
]
return code
- where (offset, ids) = switchTargetsToTable targets
+ where
+ (offset, lblInfos) = switchTargetsToTable targets
+ -- lblInfos contains branch weights too,
+ -- but for the jump table we use only the labels.
+ ids = map (fmap liLbl) lblInfos
generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, CmmStatics) Instr)
generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl)
diff --git a/compiler/simplStg/StgCse.hs b/compiler/simplStg/StgCse.hs
index 6e896176f9..4924b508c7 100644
--- a/compiler/simplStg/StgCse.hs
+++ b/compiler/simplStg/StgCse.hs
@@ -330,16 +330,16 @@ stgCseExpr env (StgLetNoEscape binds body)
-- Case alternatives
-- Extend the CSE environment
stgCseAlt :: CseEnv -> OutId -> InStgAlt -> OutStgAlt
-stgCseAlt env case_bndr (DataAlt dataCon, args, rhs)
+stgCseAlt env case_bndr (DataAlt dataCon, args, rhs, freq)
= let (env1, args') = substBndrs env args
env2 = addDataCon case_bndr dataCon (map StgVarArg args') env1
-- see note [Case 2: CSEing case binders]
rhs' = stgCseExpr env2 rhs
- in (DataAlt dataCon, args', rhs')
-stgCseAlt env _ (altCon, args, rhs)
+ in (DataAlt dataCon, args', rhs', freq)
+stgCseAlt env _ (altCon, args, rhs, freq)
= let (env1, args') = substBndrs env args
rhs' = stgCseExpr env1 rhs
- in (altCon, args', rhs')
+ in (altCon, args', rhs', freq)
-- Bindings
stgCseBind :: CseEnv -> InStgBinding -> (Maybe OutStgBinding, CseEnv)
@@ -390,8 +390,8 @@ mkStgCase scrut bndr ty alts | all isBndr alts = scrut
where
-- see Note [All alternatives are the binder]
- isBndr (_, _, StgApp f []) = f == bndr
- isBndr _ = False
+ isBndr (_, _, StgApp f [], _) = f == bndr
+ isBndr _ = False
-- Utilities
diff --git a/compiler/simplStg/StgStats.hs b/compiler/simplStg/StgStats.hs
index 712ec2d22e..8dd5630932 100644
--- a/compiler/simplStg/StgStats.hs
+++ b/compiler/simplStg/StgStats.hs
@@ -172,6 +172,6 @@ statExpr (StgCase expr _ _ alts)
countOne StgCases
where
stat_alts alts
- = combineSEs (map statExpr [ e | (_,_,e) <- alts ])
+ = combineSEs (map statExpr [ e | (_,_,e,_) <- alts ])
statExpr (StgLam {}) = panic "statExpr StgLam"
diff --git a/compiler/simplStg/UnariseStg.hs b/compiler/simplStg/UnariseStg.hs
index 57dd699f70..b72e24b3a7 100644
--- a/compiler/simplStg/UnariseStg.hs
+++ b/compiler/simplStg/UnariseStg.hs
@@ -382,7 +382,7 @@ elimCase :: UnariseEnv
-> [OutStgArg] -- non-void args
-> InId -> AltType -> [InStgAlt] -> UniqSM OutStgExpr
-elimCase rho args bndr (MultiValAlt _) [(_, bndrs, rhs)]
+elimCase rho args bndr (MultiValAlt _) [(_, bndrs, rhs, _freq)]
= do let rho1 = extendRho rho bndr (MultiVal args)
rho2
| isUnboxedTupleBndr bndr
@@ -414,47 +414,51 @@ elimCase _ args bndr alt_ty alts
--------------------------------------------------------------------------------
unariseAlts :: UnariseEnv -> AltType -> InId -> [StgAlt] -> UniqSM [StgAlt]
-unariseAlts rho (MultiValAlt n) bndr [(DEFAULT, [], e)]
+unariseAlts rho (MultiValAlt n) bndr [(DEFAULT, [], e, f)]
| isUnboxedTupleBndr bndr
= do (rho', ys) <- unariseConArgBinder rho bndr
e' <- unariseExpr rho' e
- return [(DataAlt (tupleDataCon Unboxed n), ys, e')]
+ return [(DataAlt (tupleDataCon Unboxed n), ys, e', f)]
-unariseAlts rho (MultiValAlt n) bndr [(DataAlt _, ys, e)]
+unariseAlts rho (MultiValAlt n) bndr [(DataAlt _, ys, e, f)]
| isUnboxedTupleBndr bndr
= do (rho', ys1) <- unariseConArgBinders rho ys
MASSERT(ys1 `lengthIs` n)
let rho'' = extendRho rho' bndr (MultiVal (map StgVarArg ys1))
e' <- unariseExpr rho'' e
- return [(DataAlt (tupleDataCon Unboxed n), ys1, e')]
+ return [(DataAlt (tupleDataCon Unboxed n), ys1, e', f)]
unariseAlts _ (MultiValAlt _) bndr alts
| isUnboxedTupleBndr bndr
= pprPanic "unariseExpr: strange multi val alts" (ppr alts)
-- In this case we don't need to scrutinize the tag bit
-unariseAlts rho (MultiValAlt _) bndr [(DEFAULT, _, rhs)]
+unariseAlts rho (MultiValAlt _) bndr [(DEFAULT, _, rhs, f)]
| isUnboxedSumBndr bndr
= do (rho_sum_bndrs, sum_bndrs) <- unariseConArgBinder rho bndr
rhs' <- unariseExpr rho_sum_bndrs rhs
- return [(DataAlt (tupleDataCon Unboxed (length sum_bndrs)), sum_bndrs, rhs')]
+ return
+ [(DataAlt (tupleDataCon Unboxed (length sum_bndrs)),
+ sum_bndrs, rhs', f)]
unariseAlts rho (MultiValAlt _) bndr alts
| isUnboxedSumBndr bndr
- = do (rho_sum_bndrs, scrt_bndrs@(tag_bndr : real_bndrs)) <- unariseConArgBinder rho bndr
+ = do (rho_sum_bndrs,
+ scrt_bndrs@(tag_bndr : real_bndrs)) <- unariseConArgBinder rho bndr
alts' <- unariseSumAlts rho_sum_bndrs (map StgVarArg real_bndrs) alts
let inner_case = StgCase (StgApp tag_bndr []) tag_bndr tagAltTy alts'
return [ (DataAlt (tupleDataCon Unboxed (length scrt_bndrs)),
scrt_bndrs,
- inner_case) ]
+ inner_case, defFreq) ]
unariseAlts rho _ _ alts
= mapM (\alt -> unariseAlt rho alt) alts
unariseAlt :: UnariseEnv -> StgAlt -> UniqSM StgAlt
-unariseAlt rho (con, xs, e)
+unariseAlt rho (con, xs, e, f)
= do (rho', xs') <- unariseConArgBinders rho xs
- (con, xs',) <$> unariseExpr rho' e
+ e' <- unariseExpr rho' e
+ return (con, xs', e', f)
--------------------------------------------------------------------------------
@@ -472,13 +476,13 @@ unariseSumAlt :: UnariseEnv
-> [StgArg] -- sum components _excluding_ the tag bit.
-> StgAlt -- original alternative with sum LHS
-> UniqSM StgAlt
-unariseSumAlt rho _ (DEFAULT, _, e)
- = ( DEFAULT, [], ) <$> unariseExpr rho e
+unariseSumAlt rho _ (DEFAULT, _, e, f)
+ = unariseExpr rho e >>= \e -> return ( DEFAULT, [], e, f)
-unariseSumAlt rho args (DataAlt sumCon, bs, e)
+unariseSumAlt rho args (DataAlt sumCon, bs, e, f)
= do let rho' = mapSumIdBinders bs args rho
e' <- unariseExpr rho' e
- return ( LitAlt (MachInt (fromIntegral (dataConTag sumCon))), [], e' )
+ return ( LitAlt (MachInt (fromIntegral (dataConTag sumCon))), [], e', f)
unariseSumAlt _ scrt alt
= pprPanic "unariseSumAlt" (ppr scrt $$ ppr alt)
@@ -780,6 +784,6 @@ mkDefaultLitAlt :: [StgAlt] -> [StgAlt]
-- Since they are exhaustive, we can replace one with DEFAULT, to avoid
-- generating a final test. Remember, the DEFAULT comes first if it exists.
mkDefaultLitAlt [] = pprPanic "elimUbxSumExpr.mkDefaultAlt" (text "Empty alts")
-mkDefaultLitAlt alts@((DEFAULT, _, _) : _) = alts
-mkDefaultLitAlt ((LitAlt{}, [], rhs) : alts) = (DEFAULT, [], rhs) : alts
+mkDefaultLitAlt alts@((DEFAULT, _, _, _) : _) = alts
+mkDefaultLitAlt ((LitAlt{}, [], rhs, f) : alts) = (DEFAULT, [], rhs, f) : alts
mkDefaultLitAlt alts = pprPanic "mkDefaultLitAlt" (text "Not a lit alt:" <+> ppr alts)
diff --git a/compiler/stgSyn/CoreToStg.hs b/compiler/stgSyn/CoreToStg.hs
index 671f3eb5b5..47aefd899e 100644
--- a/compiler/stgSyn/CoreToStg.hs
+++ b/compiler/stgSyn/CoreToStg.hs
@@ -18,7 +18,7 @@ module CoreToStg ( coreToStg ) where
import GhcPrelude
import CoreSyn
-import CoreUtils ( exprType, findDefault, isJoinBind )
+import CoreUtils ( exprType, findDefault, isJoinBind, exprIsBottom )
import CoreArity ( manifestArity )
import StgSyn
@@ -34,7 +34,7 @@ import VarEnv
import Module
import Name ( isExternalName, nameOccName, nameModule_maybe )
import OccName ( occNameFS )
-import BasicTypes ( Arity )
+import BasicTypes ( Arity, neverFreq, defFreq )
import TysWiredIn ( unboxedUnitDataCon )
import Literal
import Outputable
@@ -348,7 +348,7 @@ coreToTopStgRhs
-> CtsM (StgRhs, FreeVarsInfo, CollectedCCs)
coreToTopStgRhs dflags ccs this_mod scope_fv_info (bndr, rhs)
- = do { (new_rhs, rhs_fvs) <- coreToStgExpr rhs
+ = do { (new_rhs, rhs_fvs) <- coreToStgExpr dflags rhs
; let (stg_rhs, ccs') =
mkTopStgRhs dflags this_mod ccs rhs_fvs bndr bndr_info new_rhs
@@ -385,7 +385,7 @@ coreToTopStgRhs dflags ccs this_mod scope_fv_info (bndr, rhs)
-- ---------------------------------------------------------------------------
coreToStgExpr
- :: CoreExpr
+ :: DynFlags -> CoreExpr
-> CtsM (StgExpr, -- Decorated STG expr
FreeVarsInfo) -- Its free vars (NB free, not live)
@@ -397,23 +397,23 @@ coreToStgExpr
-- No LitInteger's should be left by the time this is called. CorePrep
-- should have converted them all to a real core representation.
-coreToStgExpr (Lit (LitInteger {})) = panic "coreToStgExpr: LitInteger"
-coreToStgExpr (Lit l) = return (StgLit l, emptyFVInfo)
-coreToStgExpr (Var v) = coreToStgApp Nothing v [] []
-coreToStgExpr (Coercion _) = coreToStgApp Nothing coercionTokenId [] []
+coreToStgExpr _df (Lit (LitInteger {})) = panic "coreToStgExpr: LitInteger"
+coreToStgExpr _df (Lit l) = return (StgLit l, emptyFVInfo)
+coreToStgExpr df (Var v) = coreToStgApp df Nothing v [] []
+coreToStgExpr df (Coercion _) = coreToStgApp df Nothing coercionTokenId [] []
-coreToStgExpr expr@(App _ _)
- = coreToStgApp Nothing f args ticks
+coreToStgExpr df expr@(App _ _)
+ = coreToStgApp df Nothing f args ticks
where
(f, args, ticks) = myCollectArgs expr
-coreToStgExpr expr@(Lam _ _)
+coreToStgExpr df expr@(Lam _ _)
= let
(args, body) = myCollectBinders expr
args' = filterStgBinders args
in
extendVarEnvCts [ (a, LambdaBound) | a <- args' ] $ do
- (body, body_fvs) <- coreToStgExpr body
+ (body, body_fvs) <- coreToStgExpr df body
let
fvs = args' `minusFVBinders` body_fvs
result_expr | null args' = body
@@ -421,22 +421,22 @@ coreToStgExpr expr@(Lam _ _)
return (result_expr, fvs)
-coreToStgExpr (Tick tick expr)
+coreToStgExpr df (Tick tick expr)
= do case tick of
HpcTick{} -> return ()
ProfNote{} -> return ()
SourceNote{} -> return ()
Breakpoint{} -> panic "coreToStgExpr: breakpoint should not happen"
- (expr2, fvs) <- coreToStgExpr expr
+ (expr2, fvs) <- coreToStgExpr df expr
return (StgTick tick expr2, fvs)
-coreToStgExpr (Cast expr _)
- = coreToStgExpr expr
+coreToStgExpr df (Cast expr _)
+ = coreToStgExpr df expr
-- Cases require a little more real work.
-coreToStgExpr (Case scrut _ _ [])
- = coreToStgExpr scrut
+coreToStgExpr df (Case scrut _ _ [])
+ = coreToStgExpr df scrut
-- See Note [Empty case alternatives] in CoreSyn If the case
-- alternatives are empty, the scrutinee must diverge or raise an
-- exception, so we can just dive into it.
@@ -447,7 +447,7 @@ coreToStgExpr (Case scrut _ _ [])
-- runtime system error function.
-coreToStgExpr (Case scrut bndr _ alts) = do
+coreToStgExpr df (Case scrut bndr _ alts) = do
(alts2, alts_fvs)
<- extendVarEnvCts [(bndr, LambdaBound)] $ do
(alts2, fvs_s) <- mapAndUnzipM vars_alt alts
@@ -467,34 +467,43 @@ coreToStgExpr (Case scrut bndr _ alts) = do
-- We tell the scrutinee that everything
-- live in the alts is live in it, too.
- (scrut2, scrut_fvs) <- coreToStgExpr scrut
+ (scrut2, scrut_fvs) <- coreToStgExpr df scrut
return (
StgCase scrut2 bndr' (mkStgAltType bndr alts) alts2,
scrut_fvs `unionFVInfo` alts_fvs_wo_bndr
)
where
+ alt_freq rhs
+ | gopt Opt_UnlikelyBottoms df
+ , exprIsBottom rhs
+ = -- If a expression is bottom we can safely assume it's
+ -- alternative is rarely taken. Hence we set the
+ -- branch weight to zero/never.
+ -- For details see Note [Branch weights] in BasicTypes
+ neverFreq
+ | otherwise = defFreq
vars_alt (con, binders, rhs)
| DataAlt c <- con, c == unboxedUnitDataCon
= -- This case is a bit smelly.
-- See Note [Nullary unboxed tuple] in Type.hs
-- where a nullary tuple is mapped to (State# World#)
ASSERT( null binders )
- do { (rhs2, rhs_fvs) <- coreToStgExpr rhs
- ; return ((DEFAULT, [], rhs2), rhs_fvs) }
+ do { (rhs2, rhs_fvs) <- coreToStgExpr df rhs
+ ; return ((DEFAULT, [], rhs2, alt_freq rhs), rhs_fvs) }
| otherwise
= let -- Remove type variables
binders' = filterStgBinders binders
in
extendVarEnvCts [(b, LambdaBound) | b <- binders'] $ do
- (rhs2, rhs_fvs) <- coreToStgExpr rhs
- return ( (con, binders', rhs2),
+ (rhs2, rhs_fvs) <- coreToStgExpr df rhs
+ return ( (con, binders', rhs2, alt_freq rhs),
binders' `minusFVBinders` rhs_fvs )
-coreToStgExpr (Let bind body) = do
- coreToStgLet bind body
+coreToStgExpr df (Let bind body) = do
+ coreToStgLet df bind body
-coreToStgExpr e = pprPanic "coreToStgExpr" (ppr e)
+coreToStgExpr _ e = pprPanic "coreToStgExpr" (ppr e)
mkStgAltType :: Id -> [CoreAlt] -> AltType
mkStgAltType bndr alts
@@ -541,7 +550,8 @@ mkStgAltType bndr alts
-- ---------------------------------------------------------------------------
coreToStgApp
- :: Maybe UpdateFlag -- Just upd <=> this application is
+ :: DynFlags
+ -> Maybe UpdateFlag -- Just upd <=> this application is
-- the rhs of a thunk binding
-- x = [...] \upd [] -> the_app
-- with specified update flag
@@ -551,8 +561,8 @@ coreToStgApp
-> CtsM (StgExpr, FreeVarsInfo)
-coreToStgApp _ f args ticks = do
- (args', args_fvs, ticks') <- coreToStgArgs args
+coreToStgApp df _ f args ticks = do
+ (args', args_fvs, ticks') <- coreToStgArgs df args
how_bound <- lookupVarCts f
let
@@ -618,26 +628,27 @@ coreToStgApp _ f args ticks = do
-- This is the guy that turns applications into A-normal form
-- ---------------------------------------------------------------------------
-coreToStgArgs :: [CoreArg] -> CtsM ([StgArg], FreeVarsInfo, [Tickish Id])
-coreToStgArgs []
+coreToStgArgs :: DynFlags -> [CoreArg]
+ -> CtsM ([StgArg], FreeVarsInfo, [Tickish Id])
+coreToStgArgs _ []
= return ([], emptyFVInfo, [])
-coreToStgArgs (Type _ : args) = do -- Type argument
- (args', fvs, ts) <- coreToStgArgs args
+coreToStgArgs df (Type _ : args) = do -- Type argument
+ (args', fvs, ts) <- coreToStgArgs df args
return (args', fvs, ts)
-coreToStgArgs (Coercion _ : args) -- Coercion argument; replace with place holder
- = do { (args', fvs, ts) <- coreToStgArgs args
+coreToStgArgs df (Coercion _ : args) -- Coercion argument; replace with place holder
+ = do { (args', fvs, ts) <- coreToStgArgs df args
; return (StgVarArg coercionTokenId : args', fvs, ts) }
-coreToStgArgs (Tick t e : args)
+coreToStgArgs df (Tick t e : args)
= ASSERT( not (tickishIsCode t) )
- do { (args', fvs, ts) <- coreToStgArgs (e : args)
+ do { (args', fvs, ts) <- coreToStgArgs df (e : args)
; return (args', fvs, t:ts) }
-coreToStgArgs (arg : args) = do -- Non-type argument
- (stg_args, args_fvs, ticks) <- coreToStgArgs args
- (arg', arg_fvs) <- coreToStgExpr arg
+coreToStgArgs df (arg : args) = do -- Non-type argument
+ (stg_args, args_fvs, ticks) <- coreToStgArgs df args
+ (arg', arg_fvs) <- coreToStgExpr df arg
let
fvs = args_fvs `unionFVInfo` arg_fvs
@@ -677,12 +688,13 @@ coreToStgArgs (arg : args) = do -- Non-type argument
-- ---------------------------------------------------------------------------
coreToStgLet
- :: CoreBind -- bindings
+ :: DynFlags
+ -> CoreBind -- bindings
-> CoreExpr -- body
-> CtsM (StgExpr, -- new let
FreeVarsInfo) -- variables free in the whole let
-coreToStgLet bind body = do
+coreToStgLet df bind body = do
(bind2, bind_fvs,
body2, body_fvs)
<- mfix $ \ ~(_, _, _, rec_body_fvs) -> do
@@ -692,7 +704,7 @@ coreToStgLet bind body = do
-- Do the body
extendVarEnvCts env_ext $ do
- (body2, body_fvs) <- coreToStgExpr body
+ (body2, body_fvs) <- coreToStgExpr df body
return (bind2, bind_fvs,
body2, body_fvs)
@@ -724,7 +736,7 @@ coreToStgLet bind body = do
vars_bind body_fvs (NonRec binder rhs) = do
- (rhs2, bind_fvs) <- coreToStgRhs body_fvs (binder,rhs)
+ (rhs2, bind_fvs) <- coreToStgRhs df body_fvs (binder,rhs)
let
env_ext_item = mk_binding binder rhs
@@ -742,19 +754,20 @@ coreToStgLet bind body = do
in
extendVarEnvCts env_ext $ do
(rhss2, fvss)
- <- mapAndUnzipM (coreToStgRhs rec_scope_fvs) pairs
+ <- mapAndUnzipM (coreToStgRhs df rec_scope_fvs) pairs
let
bind_fvs = unionFVInfos fvss
return (StgRec (binders `zip` rhss2),
bind_fvs, env_ext)
-coreToStgRhs :: FreeVarsInfo -- Free var info for the scope of the binding
+coreToStgRhs :: DynFlags
+ -> FreeVarsInfo -- Free var info for the scope of the binding
-> (Id,CoreExpr)
-> CtsM (StgRhs, FreeVarsInfo)
-coreToStgRhs scope_fv_info (bndr, rhs) = do
- (new_rhs, rhs_fvs) <- coreToStgExpr rhs
+coreToStgRhs df scope_fv_info (bndr, rhs) = do
+ (new_rhs, rhs_fvs) <- coreToStgExpr df rhs
return (mkStgRhs rhs_fvs bndr bndr_info new_rhs, rhs_fvs)
where
bndr_info = lookupFVInfo scope_fv_info bndr
diff --git a/compiler/stgSyn/StgLint.hs b/compiler/stgSyn/StgLint.hs
index bb2064ab48..30b2b991a3 100644
--- a/compiler/stgSyn/StgLint.hs
+++ b/compiler/stgSyn/StgLint.hs
@@ -36,6 +36,8 @@ module StgLint ( lintStgTopBindings ) where
import GhcPrelude
+import BasicTypes (BranchWeight)
+
import StgSyn
import DynFlags
@@ -184,18 +186,20 @@ lintStgExpr (StgCase scrut bndr alts_type alts) = do
addInScopeVars [bndr | in_scope] (mapM_ lintAlt alts)
-lintAlt :: (AltCon, [Id], StgExpr) -> LintM ()
+lintAlt :: (AltCon, [Id], StgExpr, BranchWeight) -> LintM ()
-lintAlt (DEFAULT, _, rhs) =
+lintAlt (DEFAULT, _, rhs, _) =
lintStgExpr rhs
-lintAlt (LitAlt _, _, rhs) =
+lintAlt (LitAlt _, _, rhs, _) =
lintStgExpr rhs
-lintAlt (DataAlt _, bndrs, rhs) = do
+lintAlt (DataAlt _, bndrs, rhs, _) = do
mapM_ checkPostUnariseBndr bndrs
addInScopeVars bndrs (lintStgExpr rhs)
+
+
{-
************************************************************************
* *
diff --git a/compiler/stgSyn/StgSyn.hs b/compiler/stgSyn/StgSyn.hs
index 29d544103f..3f7cbc0f46 100644
--- a/compiler/stgSyn/StgSyn.hs
+++ b/compiler/stgSyn/StgSyn.hs
@@ -47,6 +47,7 @@ module StgSyn (
import GhcPrelude
+import BasicTypes (BranchWeight)
import CoreSyn ( AltCon, Tickish )
import CostCentre ( CostCentreStack )
import Data.ByteString ( ByteString )
@@ -479,7 +480,7 @@ rhsHasCafRefs (StgRhsCon _ _ args)
= any stgArgHasCafRefs args
altHasCafRefs :: GenStgAlt bndr Id -> Bool
-altHasCafRefs (_, _, rhs) = exprHasCafRefs rhs
+altHasCafRefs (_, _, rhs, _) = exprHasCafRefs rhs
stgArgHasCafRefs :: GenStgArg Id -> Bool
stgArgHasCafRefs (StgVarArg id)
@@ -543,7 +544,9 @@ rather than from the scrutinee type.
type GenStgAlt bndr occ
= (AltCon, -- alts: data constructor,
[bndr], -- constructor's parameters,
- GenStgExpr bndr occ) -- ...right-hand side.
+ GenStgExpr bndr occ, -- ..right-hand side,
+ BranchWeight) -- relative chance to take this alt, see
+ -- Note [Branch weights] in BasicTypes
data AltType
= PolyAlt -- Polymorphic (a lifted type variable)
@@ -784,8 +787,11 @@ pprStgExpr (StgCase expr bndr alt_type alts)
pprStgAlt :: (OutputableBndr bndr, Outputable occ, Ord occ)
=> GenStgAlt bndr occ -> SDoc
-pprStgAlt (con, params, expr)
- = hang (hsep [ppr con, sep (map (pprBndr CasePatBind) params), text "->"])
+pprStgAlt (con, params, expr, f)
+ = hang (hsep [ppr con,
+ sep (map (pprBndr CasePatBind) params),
+ parens (text "likely:" <> ppr f) ,
+ text "->"])
4 (ppr expr <> semi)
pprStgOp :: StgOp -> SDoc