diff options
Diffstat (limited to 'compiler')
25 files changed, 610 insertions, 284 deletions
diff --git a/compiler/basicTypes/BasicTypes.hs b/compiler/basicTypes/BasicTypes.hs index d8c3eb739d..c8aaa057fd 100644 --- a/compiler/basicTypes/BasicTypes.hs +++ b/compiler/basicTypes/BasicTypes.hs @@ -106,7 +106,10 @@ module BasicTypes( IntWithInf, infinity, treatZeroAsInf, mkIntWithInf, intGtLimit, - SpliceExplicitFlag(..) + SpliceExplicitFlag(..), + + BranchWeight(..), neverFreq, rareFreq, someFreq, defFreq, oftenFreq, + usuallyFreq, alwaysFreq, combinedFreqs, moreLikely, getWeight ) where import GhcPrelude @@ -1613,3 +1616,105 @@ data SpliceExplicitFlag = ExplicitSplice | -- ^ <=> $(f x y) ImplicitSplice -- ^ <=> f x y, i.e. a naked top level expression deriving Data + + +{- + Note [Branch weights] + ~~~~~~~~~~~~~~~~~~~~~~~ + + The basic rundown: + * From STG onward we track which brances are most likely taken. + * We generate this info by + + Checking for bottom during the core-to-stg translation. + Expressions which we can detect as being bottom during compile time can + safely be assumed to be rarely taken. + + Heap/Stack checks when generating Cmm code: + Running out of heap/stack space is comperativly rare so we assume these + are not taken. + + User annotations when compiling hand written Cmm code. + This makes it possible to have the compiler optimize for the common case + without relying on internal details of the cmm to assembly translation. + * When generating code we use this information to generate better assembly: + + At the moment this only influences code layout (CmmContFlowOpt) + where we try to make the common case a fallthrough since thats generally + faster. + + TODO: Balance if/else trees for cases by weight instead of node count. + + This is part of #14672. + [Make likelyhood of branches/conditions available throughout the compiler.] + + At the Stg level we record in case alternatives a branch weight. + Weights are relative to each other with higher numbers being more + likely to be taken. + + We currently generate this information in CoreToStg by checking + alternatives for bottom expressions and marking them as never + called. + + When generating Cmm this is included in switchtargets as is or translated + to likely/not likely for conditional statements. + + This information is then used in the backend for optimizing control + flow. + + As long as we only perform simple optimizations that just check + which of two branches is more likely to be taken using a Int based + representation is fine. + + TODO: For more involved optimizations like calculating hot paths + stricter semantics might be needed. As currently a branch with weight + 2 and weight 4 only are meaniful compareable if they branch off at the + same point. (Eg a single case statement) + Conditionals would also require more information than just + likely/unlikely/unknown for this to work. + +-} + +-- | Frequency with which a alternative is taken, +-- values are relative to each other. Higher means +-- a branch is taken more often. +-- See alsoe Note [Branch weights] +newtype BranchWeight = Weight Int deriving (Eq, Ord, Show) + +instance Outputable BranchWeight where + ppr (Weight i) = ppr i + +neverFreq, rareFreq, someFreq, defFreq, + oftenFreq, usuallyFreq, alwaysFreq :: BranchWeight + +defFreqVal :: Int +defFreqVal = 1000 + +neverFreq = Weight $ 0 +rareFreq = Weight $ div defFreqVal 5 +someFreq = Weight $ div defFreqVal 2 +defFreq = Weight $ 1000 +oftenFreq = Weight $ defFreqVal * 2 +usuallyFreq = Weight $ defFreqVal * 10 +--Don't go crazy here, for large switches we otherwise we might run into +--integer overflow issues on 32bit platforms if we add them up. +--which can happen if most of them result in the same expression. +alwaysFreq = Weight $ defFreqVal * 50 + +-- | Is f1 more likely then f2? +-- Returns nothing if they are the same +moreLikely :: BranchWeight -> BranchWeight -> Maybe Bool +moreLikely f1 f2 + | f1 > f2 = Just True + | f1 < f2 = Just False + | otherwise = Nothing + +{- | Add up weights respecting never. + Combining two weights where one is never or negative results in the other one. + This is neccesary because we never want a likely branch and a unlikely one + to add up to less than the likely branch was originally. + + This can happen if we end up with negative weights somehow. +-} +combinedFreqs :: BranchWeight -> BranchWeight -> BranchWeight +combinedFreqs (Weight f1) (Weight f2) + | f1 < 0 || f2 < 0 = Weight (max f2 f1) + | otherwise = Weight (f1 + f2) + +getWeight :: BranchWeight -> Int +getWeight (Weight f) = f diff --git a/compiler/basicTypes/MkId.hs b/compiler/basicTypes/MkId.hs index 38c772c935..f3b0dc25bd 100644 --- a/compiler/basicTypes/MkId.hs +++ b/compiler/basicTypes/MkId.hs @@ -357,11 +357,13 @@ mkDictSelRhs clas val_index dict_id = mkTemplateLocal 1 pred arg_ids = mkTemplateLocalsNum 2 arg_tys - rhs_body | new_tycon = unwrapNewTypeBody tycon (mkTyVarTys tyvars) (Var dict_id) - | otherwise = Case (Var dict_id) dict_id (idType the_arg_id) - [(DataAlt data_con, arg_ids, varToCoreExpr the_arg_id)] - -- varToCoreExpr needed for equality superclass selectors - -- sel a b d = case x of { MkC _ (g:a~b) _ -> CO g } + rhs_body | new_tycon + = unwrapNewTypeBody tycon (mkTyVarTys tyvars) (Var dict_id) + | otherwise + = Case (Var dict_id) dict_id (idType the_arg_id) + [(DataAlt data_con, arg_ids, varToCoreExpr the_arg_id)] + -- varToCoreExpr needed for equality superclass selectors + -- sel a b d = case x of { MkC _ (g:a~b) _ -> CO g } dictSelRule :: Int -> Arity -> RuleFun -- Tries to persuade the argument to look like a constructor @@ -834,7 +836,8 @@ wrapCo co rep_ty (unbox_rep, box_rep) -- co :: arg_ty ~ rep_ty ------------------------ seqUnboxer :: Unboxer -seqUnboxer v = return ([v], \e -> Case (Var v) v (exprType e) [(DEFAULT, [], e)]) +seqUnboxer v = return + ([v], \e -> Case (Var v) v (exprType e) [(DEFAULT, [], e)]) unitUnboxer :: Unboxer unitUnboxer v = return ([v], \e -> e) @@ -1257,7 +1260,8 @@ seqId = pcMiscPrelId seqName ty info (mkFunTy alphaTy (mkFunTy betaTy betaTy)) [x,y] = mkTemplateLocals [alphaTy, betaTy] - rhs = mkLams [alphaTyVar,betaTyVar,x,y] (Case (Var x) x betaTy [(DEFAULT, [], Var y)]) + rhs = mkLams [alphaTyVar,betaTyVar,x,y] + (Case (Var x) x betaTy [(DEFAULT, [], Var y)]) ------------------------------------------------ lazyId :: Id -- See Note [lazyId magic] diff --git a/compiler/cmm/CmmImplementSwitchPlans.hs b/compiler/cmm/CmmImplementSwitchPlans.hs index 2e2da5d305..b24915d08b 100644 --- a/compiler/cmm/CmmImplementSwitchPlans.hs +++ b/compiler/cmm/CmmImplementSwitchPlans.hs @@ -41,7 +41,8 @@ visitSwitches :: DynFlags -> CmmBlock -> UniqSM [CmmBlock] visitSwitches dflags block | (entry@(CmmEntry _ scope), middle, CmmSwitch expr ids) <- blockSplit block = do - let plan = createSwitchPlan ids + let balanceByWeight = gopt Opt_WeightBalanceAlts dflags + let plan = createSwitchPlan balanceByWeight ids (newTail, newBlocks) <- implementSwitchPlan dflags scope expr plan @@ -57,11 +58,11 @@ visitSwitches dflags block implementSwitchPlan :: DynFlags -> CmmTickScope -> CmmExpr -> SwitchPlan -> UniqSM (Block CmmNode O C, [CmmBlock]) implementSwitchPlan dflags scope expr = go where - go (Unconditionally l) - = return (emptyBlock `blockJoinTail` CmmBranch l, []) + go (Unconditionally li) + = return (emptyBlock `blockJoinTail` CmmBranch (liLbl li), []) go (JumpTable ids) = return (emptyBlock `blockJoinTail` CmmSwitch expr ids, []) - go (IfLT signed i ids1 ids2) + go (IfLT signed i ids1 ids2 freq) = do (bid1, newBlocks1) <- go' ids1 (bid2, newBlocks2) <- go' ids2 @@ -69,20 +70,20 @@ implementSwitchPlan dflags scope expr = go let lt | signed = cmmSLtWord | otherwise = cmmULtWord scrut = lt dflags expr $ CmmLit $ mkWordCLit dflags i - lastNode = CmmCondBranch scrut bid1 bid2 Nothing + lastNode = CmmCondBranch scrut bid1 bid2 freq lastBlock = emptyBlock `blockJoinTail` lastNode return (lastBlock, newBlocks1++newBlocks2) - go (IfEqual i l ids2) + go (IfEqual i (l,_f) ids2 freq) = do (bid2, newBlocks2) <- go' ids2 - let scrut = cmmNeWord dflags expr $ CmmLit $ mkWordCLit dflags i - lastNode = CmmCondBranch scrut bid2 l Nothing + let scrut = cmmEqWord dflags expr $ CmmLit $ mkWordCLit dflags i + lastNode = CmmCondBranch scrut l bid2 freq lastBlock = emptyBlock `blockJoinTail` lastNode return (lastBlock, newBlocks2) -- Same but returning a label to branch to - go' (Unconditionally l) + go' (Unconditionally (l,_f)) = return (l, []) go' p = do diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y index cf660d274f..cb36b71634 100644 --- a/compiler/cmm/CmmParse.y +++ b/compiler/cmm/CmmParse.y @@ -372,8 +372,8 @@ cmm :: { CmmParse () } cmmtop :: { CmmParse () } : cmmproc { $1 } | cmmdata { $1 } - | decl { $1 } - | 'CLOSURE' '(' NAME ',' NAME lits ')' ';' + | decl { $1 } + | 'CLOSURE' '(' NAME ',' NAME lits ')' ';' {% liftP . withThisPackage $ \pkg -> do lits <- sequence $6; staticClosure pkg $3 $5 (map getLit lits) } @@ -388,20 +388,20 @@ cmmtop :: { CmmParse () } -- * we can derive closure and info table labels from a single NAME cmmdata :: { CmmParse () } - : 'section' STRING '{' data_label statics '}' + : 'section' STRING '{' data_label statics '}' { do lbl <- $4; ss <- sequence $5; code (emitDecl (CmmData (Section (section $2) lbl) (Statics lbl $ concat ss))) } data_label :: { CmmParse CLabel } - : NAME ':' + : NAME ':' {% liftP . withThisPackage $ \pkg -> return (mkCmmDataLabel pkg $1) } statics :: { [CmmParse [CmmStatic]] } : {- empty -} { [] } | static statics { $1 : $2 } - + -- Strings aren't used much in the RTS HC code, so it doesn't seem -- worth allowing inline strings. C-- doesn't allow them anyway. static :: { CmmParse [CmmStatic] } @@ -410,10 +410,10 @@ static :: { CmmParse [CmmStatic] } | type ';' { return [CmmUninitialised (widthInBytes (typeWidth $1))] } | 'bits8' '[' ']' STRING ';' { return [mkString $4] } - | 'bits8' '[' INT ']' ';' { return [CmmUninitialised + | 'bits8' '[' INT ']' ';' { return [CmmUninitialised (fromIntegral $3)] } - | typenot8 '[' INT ']' ';' { return [CmmUninitialised - (widthInBytes (typeWidth $1) * + | typenot8 '[' INT ']' ';' { return [CmmUninitialised + (widthInBytes (typeWidth $1) * fromIntegral $3)] } | 'CLOSURE' '(' NAME lits ')' { do { lits <- sequence $4 @@ -474,7 +474,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) } , cit_rep = rep , cit_prof = prof, cit_srt = NoC_SRT }, []) } - + | 'INFO_TABLE_FUN' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ',' INT ')' -- ptrs, nptrs, closure type, description, type, fun type {% liftP . withThisPackage $ \pkg -> @@ -511,7 +511,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) } -- If profiling is on, this string gets duplicated, -- but that's the way the old code did it we can fix it some other time. - + | 'INFO_TABLE_SELECTOR' '(' NAME ',' INT ',' INT ',' STRING ',' STRING ')' -- selector, closure type, description, type {% liftP . withThisPackage $ \pkg -> @@ -574,7 +574,7 @@ importName -- A label imported without an explicit packageId. -- These are taken to come frome some foreign, unnamed package. - : NAME + : NAME { ($1, mkForeignLabel $1 Nothing ForeignLabelInExternalPackage IsFunction) } -- as previous 'NAME', but 'IsData' @@ -584,8 +584,8 @@ importName -- A label imported with an explicit packageId. | STRING NAME { ($2, mkCmmCodeLabel (fsToUnitId (mkFastString $1)) $2) } - - + + names :: { [FastString] } : NAME { [$1] } | NAME ',' names { $1 : $3 } @@ -671,9 +671,9 @@ bool_expr :: { CmmParse BoolExpr } | expr { do e <- $1; return (BoolTest e) } bool_op :: { CmmParse BoolExpr } - : bool_expr '&&' bool_expr { do e1 <- $1; e2 <- $3; + : bool_expr '&&' bool_expr { do e1 <- $1; e2 <- $3; return (BoolAnd e1 e2) } - | bool_expr '||' bool_expr { do e1 <- $1; e2 <- $3; + | bool_expr '||' bool_expr { do e1 <- $1; e2 <- $3; return (BoolOr e1 e2) } | '!' bool_expr { do e <- $2; return (BoolNot e) } | '(' bool_op ')' { $2 } @@ -759,7 +759,7 @@ expr :: { CmmParse CmmExpr } expr0 :: { CmmParse CmmExpr } : INT maybe_ty { return (CmmLit (CmmInt $1 (typeWidth $2))) } | FLOAT maybe_ty { return (CmmLit (CmmFloat $1 (typeWidth $2))) } - | STRING { do s <- code (newStringCLit $1); + | STRING { do s <- code (newStringCLit $1); return (CmmLit s) } | reg { $1 } | type '[' expr ']' { do e <- $3; return (CmmLoad e $1) } @@ -817,14 +817,14 @@ foreign_formal :: { CmmParse (LocalReg, ForeignHint) } local_lreg :: { CmmParse LocalReg } : NAME { do e <- lookupName $1; return $ - case e of + case e of CmmReg (CmmLocal r) -> r other -> pprPanic "CmmParse:" (ftext $1 <> text " not a local register") } lreg :: { CmmParse CmmReg } : NAME { do e <- lookupName $1; return $ - case e of + case e of CmmReg r -> r other -> pprPanic "CmmParse:" (ftext $1 <> text " not a register") } | GLOBALREG { return (CmmGlobal $1) } @@ -1361,8 +1361,7 @@ withSourceNote a b parse = do -- ----------------------------------------------------------------------------- -- Table jumps --- We use a simplified form of C-- switch statements for now. A --- switch statement always compiles to a table jump. Each arm can +-- We use a simplified form of C-- switch statements for now. Each arm can -- specify a list of values (not ranges), and there can be a single -- default branch. The range of the table is given either by the -- optional range on the switch (eg. switch [0..7] {...}), or by @@ -1375,21 +1374,23 @@ doSwitch :: Maybe (Integer,Integer) doSwitch mb_range scrut arms deflt = do -- Compile code for the default branch - dflt_entry <- + dflt_entry <- case deflt of Nothing -> return Nothing - Just e -> do b <- forkLabelledCode e; return (Just b) + Just e -> do b <- forkLabelledCode e; return (Just (b,defFreq)) + --TODO: Parse likely information for branches -- Compile each case branch table_entries <- mapM emitArm arms let table = M.fromList (concat table_entries) + let ftable = fmap (\c -> (c,defFreq)) table dflags <- getDynFlags let range = fromMaybe (0, tARGET_MAX_WORD dflags) mb_range expr <- scrut -- ToDo: check for out of range and jump to default if necessary - emit $ mkSwitch expr (mkSwitchTargets False range dflt_entry table) + emit $ mkSwitch expr (mkSwitchTargets False range dflt_entry ftable) where emitArm :: ([Integer],Either BlockId (CmmParse ())) -> CmmParse [(Integer,BlockId)] emitArm (ints,Left blockid) = return [ (i,blockid) | i <- ints ] diff --git a/compiler/cmm/CmmSwitch.hs b/compiler/cmm/CmmSwitch.hs index ce779465e3..8ded03bdf3 100644 --- a/compiler/cmm/CmmSwitch.hs +++ b/compiler/cmm/CmmSwitch.hs @@ -1,7 +1,7 @@ {-# LANGUAGE GADTs #-} module CmmSwitch ( - SwitchTargets, - mkSwitchTargets, + SwitchTargets, LabelInfo, + liLbl, liWeight, mkSwitchTargets, switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned, mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough, switchTargetsToList, eqSwitchTargetWith, @@ -9,6 +9,8 @@ module CmmSwitch ( SwitchPlan(..), targetSupportsSwitch, createSwitchPlan, + + SeparatedList, ) where import GhcPrelude @@ -18,9 +20,11 @@ import DynFlags import Hoopl.Label (Label) import Data.Maybe +import Data.Bifunctor import Data.List (groupBy) import Data.Function (on) import qualified Data.Map as M +import BasicTypes (BranchWeight, combinedFreqs, moreLikely, neverFreq) -- Note [Cmm Switches, the general plan] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -103,17 +107,27 @@ minJumpTableOffset = 2 -- optional default value and a map from values to jump labels. data SwitchTargets = SwitchTargets - Bool -- Signed values - (Integer, Integer) -- Range - (Maybe Label) -- Default value - (M.Map Integer Label) -- The branches - deriving (Show, Eq) + { st_signed :: Bool -- Signed values + , st_range :: (Integer, Integer) -- Range + , st_defLabel :: (Maybe LabelInfo) -- Default value + , st_valMap :: (M.Map Integer LabelInfo) -- The branches + } deriving (Show, Eq) + +-- | A label annotated with a branch weight. +type LabelInfo = (Label, BranchWeight) + +liLbl :: LabelInfo -> Label +liLbl = fst + +liWeight :: LabelInfo -> BranchWeight +liWeight = snd -- | The smart constructor mkSwitchTargets normalises the map a bit: -- * No entries outside the range -- * No entries equal to the default -- * No default if all elements have explicit values -mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets +mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe LabelInfo + -> M.Map Integer LabelInfo -> SwitchTargets mkSwitchTargets signed range@(lo,hi) mbdef ids = SwitchTargets signed range mbdef' ids' where @@ -135,14 +149,16 @@ mkSwitchTargets signed range@(lo,hi) mbdef ids -- | Changes all labels mentioned in the SwitchTargets value mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets mapSwitchTargets f (SwitchTargets signed range mbdef branches) - = SwitchTargets signed range (fmap f mbdef) (fmap f branches) + = SwitchTargets signed range + (fmap (first f) mbdef) + (fmap (first f) branches) -- | Returns the list of non-default branches of the SwitchTargets value -switchTargetsCases :: SwitchTargets -> [(Integer, Label)] +switchTargetsCases :: SwitchTargets -> [(Integer, LabelInfo)] switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches -- | Return the default label of the SwitchTargets value -switchTargetsDefault :: SwitchTargets -> Maybe Label +switchTargetsDefault :: SwitchTargets -> Maybe LabelInfo switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef -- | Return the range of the SwitchTargets value @@ -161,7 +177,7 @@ switchTargetsSigned (SwitchTargets signed _ _ _) = signed -- The conversion from Integer to Int is a bit of a wart, as the actual -- scrutinee might be an unsigned word, but it just works, due to wrap-around -- arithmetic (as verified by the CmmSwitchTest test case). -switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label]) +switchTargetsToTable :: SwitchTargets -> (Int, [Maybe LabelInfo]) switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches) = (fromIntegral (-start), [ labelFor i | i <- [start..hi] ]) where @@ -198,27 +214,37 @@ switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches) -- | The list of all labels occuring in the SwitchTargets value. switchTargetsToList :: SwitchTargets -> [Label] switchTargetsToList (SwitchTargets _ _ mbdef branches) - = maybeToList mbdef ++ M.elems branches + = map liLbl (maybeToList mbdef ++ M.elems branches) -- | Groups cases with equal targets, suitable for pretty-printing to a -- c-like switch statement with fall-through semantics. -switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label) +switchTargetsFallThrough :: SwitchTargets + -> ([([Integer], LabelInfo)], Maybe LabelInfo) switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef) where groups = map (\xs -> (map fst xs, snd (head xs))) $ - groupBy ((==) `on` snd) $ + groupBy ((==) `on` (liLbl . snd)) $ M.toList branches -- | Custom equality helper, needed for "CmmCommonBlockElim" -eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool -eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) = - signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2) +eqSwitchTargetWith :: (Label -> Label -> Bool) + -> SwitchTargets -> SwitchTargets + -> Bool +eqSwitchTargetWith eq + (SwitchTargets signed1 range1 mbdef1 ids1) + (SwitchTargets signed2 range2 mbdef2 ids2) = + signed1 == signed2 && range1 == range2 && + goMB mbdef1 mbdef2 && + goList (M.toList ids1) (M.toList ids2) where goMB Nothing Nothing = True - goMB (Just l1) (Just l2) = l1 `eq` l2 + goMB (Just l1) (Just l2) = liLbl l1 `eq` liLbl l2 goMB _ _ = False goList [] [] = True - goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2 + goList ((i1,l1):ls1) ((i2,l2):ls2) = + i1 == i2 && + liLbl l1 `eq` liLbl l2 && + goList ls1 ls2 goList _ _ = False ----------------------------------------------------------------------------- @@ -228,10 +254,23 @@ eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets -- | A SwitchPlan abstractly describes how a Switch statement ought to be -- implemented. See Note [createSwitchPlan] data SwitchPlan - = Unconditionally Label - | IfEqual Integer Label SwitchPlan - | IfLT Bool Integer SwitchPlan SwitchPlan - | JumpTable SwitchTargets + = Unconditionally + { sp_ucTarget :: LabelInfo } + | IfEqual + { sp_val :: Integer + , sp_eqTarget :: LabelInfo + , sp_else :: SwitchPlan + , sp_likely ::(Maybe Bool) + } + | IfLT + { sp_signed :: Bool + , sp_val :: Integer + , sp_ltTarget :: SwitchPlan + , sp_else :: SwitchPlan + , sp_likely :: (Maybe Bool) + } + | JumpTable + { sp_jmpTable :: SwitchTargets } deriving Show -- -- Note [createSwitchPlan] @@ -251,6 +290,20 @@ data SwitchPlan -- findSingleValues -- 5. The thus collected pieces are assembled to a balanced binary tree. +-- | Accumulated weight of all branches in a switchplan +planWeight :: SwitchPlan -> BranchWeight +planWeight Unconditionally { sp_ucTarget = target } + = liWeight target +planWeight IfEqual {sp_eqTarget = target, sp_else = alt } + = combinedFreqs (liWeight target) (planWeight alt) +planWeight IfLT {sp_ltTarget = target, sp_else = alt } + = combinedFreqs (planWeight target) (planWeight alt) +planWeight JumpTable {sp_jmpTable = table } + = foldl1 combinedFreqs lblWeights `combinedFreqs` maybe neverFreq liWeight def + where + lblWeights = map liWeight $ M.elems (st_valMap table) + def = st_defLabel table + {- Note [Two alts + default] ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -323,28 +376,36 @@ targetSupportsSwitch _ = False -- | This function creates a SwitchPlan from a SwitchTargets value, breaking it -- down into smaller pieces suitable for code generation. -createSwitchPlan :: SwitchTargets -> SwitchPlan +createSwitchPlan :: Bool -> SwitchTargets -> SwitchPlan -- Lets do the common case of a singleton map quicky and efficiently (#10677) -createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m) - | [(x, l)] <- M.toList m - = IfEqual x l (Unconditionally defLabel) +createSwitchPlan _ (SwitchTargets _signed _range (Just defInfo) m) + | [(x, li)] <- M.toList m + = IfEqual x li + (Unconditionally defInfo) + (moreLikely (liWeight li) (liWeight defInfo)) -- And another common case, matching "booleans" -createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m) - | [(x1, l1), (_x2,l2)] <- M.toAscList m +createSwitchPlan _ (SwitchTargets _signed (lo,hi) Nothing m) + | [(x1, li1@(l1,f1)), (_x2,li2@(l2,f2))] <- M.toAscList m --Checking If |range| = 2 is enough if we have two unique literals , hi - lo == 1 - = IfEqual x1 l1 (Unconditionally l2) + = IfEqual x1 li1 (Unconditionally li2) (moreLikely f1 f2) -- See Note [Two alts + default] -createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m) - | [(x1, l1), (x2,l2)] <- M.toAscList m - = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel)) -createSwitchPlan (SwitchTargets signed range mbdef m) = - -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $ +createSwitchPlan _ (SwitchTargets _signed _range (Just def@(defLabel, fdef)) m) + | [(x1, li1@(l1,f1)), (x2,li2@(l2,f2))] <- M.toAscList m + = IfEqual x1 li1 + (IfEqual x2 li2 (Unconditionally def) (moreLikely f2 fdef)) + (moreLikely f1 (combinedFreqs f2 fdef)) +createSwitchPlan balance (SwitchTargets signed range mbdef m) = + --pprTrace "createSwitchPlan" + --(text (show (range,m)) $$ text (show pieces) $$ + --text (show flatPlan) $$ text (show plan)) $ plan where + pieces :: [M.Map Integer LabelInfo] pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m - flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces - plan = buildTree signed $ flatPlan + flatPlan = findSingleValues $ + mkFlatSwitchPlan signed mbdef range pieces + plan = buildTree balance signed $ flatPlan --- @@ -381,26 +442,39 @@ breakTooSmall m type FlatSwitchPlan = SeparatedList Integer SwitchPlan -mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan +{-TODO: + Given the branch weights in LabelInfo we could do better + than binary search. Look at buildTree, findSingleValues, mkFlatSwitchPlan + if you implement this. +-} +-- | mkFlatSwitchPlan byWeight signed defLabel range maps +mkFlatSwitchPlan :: Bool -- ^ Values are signed + -> Maybe LabelInfo -- ^ Default alternative + -> (Integer, Integer) -- ^ Range of possible values + -> [M.Map Integer LabelInfo] -- ^ Value to branch mapping. + -> FlatSwitchPlan -- If we have no default (i.e. undefined where there is no entry), we can -- branch at the minimum of each map -mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty +mkFlatSwitchPlan _ Nothing _ [] + = pprPanic "mkFlatSwitchPlan with nothing left to do" empty mkFlatSwitchPlan signed Nothing _ (m:ms) - = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ]) + = (mkLeafPlan signed Nothing m , + [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ]) -- If we have a default, we have to interleave segments that jump -- to the default between the maps -mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps) +mkFlatSwitchPlan signed (Just li@(l,f)) r ms + = let ((_,p1):ps) = go r ms in (p1, ps) where go (lo,hi) [] | lo > hi = [] - | otherwise = [(lo, Unconditionally l)] + | otherwise = [(lo, Unconditionally li)] go (lo,hi) (m:ms) | lo < min - = (lo, Unconditionally l) : go (min,hi) (m:ms) + = (lo, Unconditionally li) : go (min,hi) (m:ms) | lo == min - = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms + = (lo, mkLeafPlan signed (Just li) m) : go (max+1,hi) ms | otherwise = pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min) where @@ -408,10 +482,10 @@ mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps) max = fst (M.findMax m) -mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan +mkLeafPlan :: Bool -> Maybe LabelInfo -> M.Map Integer LabelInfo -> SwitchPlan mkLeafPlan signed mbdef m - | [(_,l)] <- M.toList m -- singleton map - = Unconditionally l + | [(_,li@(l,_f))] <- M.toList m -- singleton map + = Unconditionally li | otherwise = JumpTable $ mkSwitchTargets signed (min,max) mbdef m where @@ -427,7 +501,10 @@ mkLeafPlan signed mbdef m findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs) | l == l3 && i + 1 == i' - = findSingleValues (IfEqual i l2 (Unconditionally l), xs) + = findSingleValues + (IfEqual i l2 + (Unconditionally l) (moreLikely (liWeight l2) (liWeight l)) + , xs) findSingleValues (p, (i,p'):xs) = (p,i) `consSL` findSingleValues (p', xs) findSingleValues (p, []) @@ -437,12 +514,28 @@ findSingleValues (p, []) --- Step 5: Actually build the tree --- --- Build a balanced tree from a separated list -buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan -buildTree _ (p,[]) = p -buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2) - where +-- | Build a balanced tree from a separated list +-- Potentially by weight +buildTree :: Bool -> Bool -> FlatSwitchPlan -> SwitchPlan +buildTree _ _ (p,[]) = p +buildTree byWeight signed sl + = --traceShow (m,likely,(planWeight left),(planWeight right), byWeight ) $ + IfLT + { sp_signed = signed + , sp_val = m + , sp_ltTarget = left + , sp_else = right + , sp_likely = likely + } + where (sl1, m, sl2) = divideSL sl + left = (buildTree byWeight signed sl1) :: SwitchPlan + right = (buildTree byWeight signed sl2) + likely = if byWeight + then moreLikely (planWeight left) (planWeight right) + else Nothing + + diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 76e4d4cb94..ea23ee6884 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -341,7 +341,8 @@ pprSwitch dflags e ids (pairs, mbdef) = switchTargetsFallThrough ids -- fall through case - caseify (ix:ixs, ident) = vcat (map do_fallthrough ixs) $$ final_branch ix + caseify :: ([Integer], LabelInfo) -> SDoc + caseify (ix:ixs, lblInfo) = vcat (map do_fallthrough ixs) $$ final_branch ix where do_fallthrough ix = hsep [ text "case" , pprHexVal ix (wordWidth dflags) <> colon , @@ -349,11 +350,14 @@ pprSwitch dflags e ids final_branch ix = hsep [ text "case" , pprHexVal ix (wordWidth dflags) <> colon , - text "goto" , (pprBlockId ident) <> semi ] + text "goto" , (pprBlockId ( liLbl lblInfo)) <> semi <+> + parens (text "likely:" <> ppr (liWeight lblInfo))] caseify (_ , _ ) = panic "pprSwitch: switch with no cases!" - def | Just l <- mbdef = text "default: goto" <+> pprBlockId l <> semi + def | Just li <- mbdef + = text "default: goto" <+> pprBlockId (liLbl li) <> semi <+> + parens (text "likely:" <> ppr (liWeight li)) | otherwise = empty -- --------------------------------------------------------------------- diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs index 6a93ea818e..635ad04c3c 100644 --- a/compiler/cmm/PprCmm.hs +++ b/compiler/cmm/PprCmm.hs @@ -244,17 +244,24 @@ pprNode node = pp_node <+> pp_debug ]) 4 (vcat (map ppCase cases) $$ def) $$ rbrace where - (cases, mbdef) = switchTargetsFallThrough ids - ppCase (is,l) = hsep - [ text "case" - , commafy $ map integer is - , text ": goto" - , ppr l <> semi - ] - def | Just l <- mbdef = hsep - [ text "default:" - , braces (text "goto" <+> ppr l <> semi) - ] + (cases, mbdef) + = switchTargetsFallThrough ids + ppCase (is,li) + = hsep + [ text "case" + , commafy $ map integer is + , (text "/* likely:" <+> ppr (liWeight li) <+> text "*/") + , text ": goto" + , ppr (liLbl li) <> semi + ] + def | Just li <- mbdef + = hsep + [ text "default" <+> + (text "/* likely:" <+> + ppr (liWeight li) <+> + text "*/ :") + , braces (text "goto" <+> ppr (liLbl li) <> semi) + ] | otherwise = empty range = brackets $ hsep [integer lo, text "..", integer hi] diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index cf602ef0b8..9b3c166d1e 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -272,7 +272,7 @@ mkRhsClosure dflags bndr _cc _bi , StgCase (StgApp scrutinee [{-no args-}]) _ -- ignore bndr (AlgAlt _) - [(DataAlt _, params, sel_expr)] <- strip expr + [(DataAlt _, params, sel_expr, _)] <- strip expr , StgApp selectee [{-no args-}] <- strip sel_expr , the_fv == scrutinee -- Scrutinee is the only free variable diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 3fcc935121..ab0e6d0c2a 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -31,6 +31,7 @@ import StgCmmClosure import StgSyn +import BasicTypes (BranchWeight) import MkGraph import BlockId import Cmm @@ -379,7 +380,7 @@ calls to nonVoidIds in various places. So we must not look up cgCase (StgApp v []) _ (PrimAlt _) alts | isVoidRep (idPrimRep v) -- See Note [Scrutinising VoidRep] - , [(DEFAULT, _, rhs)] <- alts + , [(DEFAULT, _, rhs, _)] <- alts = cgExpr rhs {- Note [Dodgy unsafeCoerce 1] @@ -561,7 +562,7 @@ chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id] chooseReturnBndrs bndr (PrimAlt _) _alts = assertNonVoidIds [bndr] -chooseReturnBndrs _bndr (MultiValAlt n) [(_, ids, _)] +chooseReturnBndrs _bndr (MultiValAlt n) [(_, ids, _, _)] = ASSERT2(ids `lengthIs` n, ppr n $$ ppr ids $$ ppr _bndr) assertNonVoidIds ids -- 'bndr' is not assigned! @@ -578,10 +579,10 @@ chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs" cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [StgAlt] -> FCode ReturnKind -- At this point the result of the case are in the binders -cgAlts gc_plan _bndr PolyAlt [(_, _, rhs)] +cgAlts gc_plan _bndr PolyAlt [(_, _, rhs, _)] = maybeAltHeapCheck gc_plan (cgExpr rhs) -cgAlts gc_plan _bndr (MultiValAlt _) [(_, _, rhs)] +cgAlts gc_plan _bndr (MultiValAlt _) [(_, _, rhs, _)] = maybeAltHeapCheck gc_plan (cgExpr rhs) -- Here bndrs are *already* in scope, so don't rebind them @@ -591,13 +592,13 @@ cgAlts gc_plan bndr (PrimAlt _) alts ; tagged_cmms <- cgAltRhss gc_plan bndr alts ; let bndr_reg = CmmLocal (idToReg dflags bndr) - (DEFAULT,deflt) = head tagged_cmms + (DEFAULT,deflt,f) = head tagged_cmms -- PrimAlts always have a DEFAULT case -- and it always comes first - tagged_cmms' = [(lit,code) - | (LitAlt lit, code) <- tagged_cmms] - ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt + tagged_cmms' = [(lit,code,f) + | (LitAlt lit, code,f) <- tagged_cmms] + ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' (deflt,f) ; return AssignedDirectly } cgAlts gc_plan bndr (AlgAlt tycon) alts @@ -613,7 +614,7 @@ cgAlts gc_plan bndr (AlgAlt tycon) alts then do let -- Yes, bndr_reg has constr. tag in ls bits tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg) - branches' = [(tag+1,branch) | (tag,branch) <- branches] + branches' = [(tag+1,branch,f) | (tag,branch,f) <- branches] emitSwitch tag_expr branches' mb_deflt 1 fam_sz else -- No, get tag from info table @@ -651,18 +652,18 @@ cgAlts _ _ _ _ = panic "cgAlts" ------------------- cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt] - -> FCode ( Maybe CmmAGraphScoped - , [(ConTagZ, CmmAGraphScoped)] ) + -> FCode ( Maybe (CmmAGraphScoped, BranchWeight) + , [(ConTagZ, CmmAGraphScoped, BranchWeight)] ) cgAlgAltRhss gc_plan bndr alts = do { tagged_cmms <- cgAltRhss gc_plan bndr alts ; let { mb_deflt = case tagged_cmms of - ((DEFAULT,rhs) : _) -> Just rhs + ((DEFAULT,rhs,f) : _) -> Just (rhs,f) _other -> Nothing -- DEFAULT is always first, if present - ; branches = [ (dataConTagZ con, cmm) - | (DataAlt con, cmm) <- tagged_cmms ] + ; branches = [ (dataConTagZ con, cmm, f) + | (DataAlt con, cmm, f) <- tagged_cmms ] } ; return (mb_deflt, branches) @@ -671,20 +672,20 @@ cgAlgAltRhss gc_plan bndr alts ------------------- cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt] - -> FCode [(AltCon, CmmAGraphScoped)] + -> FCode [(AltCon, CmmAGraphScoped,BranchWeight)] cgAltRhss gc_plan bndr alts = do dflags <- getDynFlags let base_reg = idToReg dflags bndr - cg_alt :: StgAlt -> FCode (AltCon, CmmAGraphScoped) - cg_alt (con, bndrs, rhs) - = getCodeScoped $ - maybeAltHeapCheck gc_plan $ - do { _ <- bindConArgs con base_reg (assertNonVoidIds bndrs) - -- alt binders are always non-void, - -- see Note [Post-unarisation invariants] in UnariseStg - ; _ <- cgExpr rhs - ; return con } + cg_alt :: StgAlt -> FCode (AltCon, CmmAGraphScoped, BranchWeight) + cg_alt (con, bndrs, rhs, freq) = do + (i,c) <- getCodeScoped $ maybeAltHeapCheck gc_plan $ + do { _ <- bindConArgs con base_reg (assertNonVoidIds bndrs) + -- alt binders are always non-void, + -- see Note [Post-unarisation invariants] in UnariseStg + ; _ <- cgExpr rhs + ; return con } + return (i,c,freq) forkAlts (map cg_alt alts) maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs index 7c3864296c..c3baa7bb70 100644 --- a/compiler/codeGen/StgCmmMonad.hs +++ b/compiler/codeGen/StgCmmMonad.hs @@ -799,8 +799,10 @@ getCmm code ; return (fromOL (cgs_tops state2)) } -mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> FCode CmmAGraph -mkCmmIfThenElse e tbranch fbranch = mkCmmIfThenElse' e tbranch fbranch Nothing +mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> Maybe Bool + -> FCode CmmAGraph +mkCmmIfThenElse e tbranch fbranch likely + = mkCmmIfThenElse' e tbranch fbranch likely mkCmmIfThenElse' :: CmmExpr -> CmmAGraph -> CmmAGraph -> Maybe Bool -> FCode CmmAGraph diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index b5cd267c6b..fa2e7d2b6c 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -1127,9 +1127,8 @@ genericFabsOp w [res_r] [aa] let g3 = catAGraphs [mkAssign res_t aa, mkAssign (CmmLocal res_r) (neg (CmmReg res_t))] - g4 <- mkCmmIfThenElse (gt aa zero) g2 g3 - - emit =<< mkCmmIfThenElse (eq aa zero) g1 g4 + g4 <- mkCmmIfThenElse (gt aa zero) g2 g3 Nothing + emit =<< mkCmmIfThenElse (eq aa zero) g1 g4 Nothing genericFabsOp _ _ _ = panic "genericFabsOp" @@ -1821,14 +1820,17 @@ doCopyMutableByteArrayOp = emitCopyByteArray copy where -- The only time the memory might overlap is when the two arrays -- we were provided are the same array! - -- TODO: Optimize branch for common case of no aliasing. + -- The common case is no aliasing so we set the likly value to `Just False`. copy src dst dst_p src_p bytes = do dflags <- getDynFlags [moveCall, cpyCall] <- forkAlts [ getCode $ emitMemmoveCall dst_p src_p bytes 1, getCode $ emitMemcpyCall dst_p src_p bytes 1 ] - emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall + emit =<< mkCmmIfThenElse + (cmmEqWord dflags src dst) + moveCall cpyCall + (Just False) emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()) @@ -1965,7 +1967,8 @@ doCopyMutableArrayOp = emitCopyArray copy where -- The only time the memory might overlap is when the two arrays -- we were provided are the same array! - -- TODO: Optimize branch for common case of no aliasing. + -- Optimize branch for common case of no aliasing by setting likely + -- to `Just False`. copy src dst dst_p src_p bytes = do dflags <- getDynFlags [moveCall, cpyCall] <- forkAlts [ @@ -1974,7 +1977,10 @@ doCopyMutableArrayOp = emitCopyArray copy getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes) (wORD_SIZE dflags) ] - emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall + emit =<< mkCmmIfThenElse + (cmmEqWord dflags src dst) + moveCall cpyCall + (Just False) emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff -> FCode ()) -- ^ copy function @@ -2028,7 +2034,8 @@ doCopySmallMutableArrayOp = emitCopySmallArray copy where -- The only time the memory might overlap is when the two arrays -- we were provided are the same array! - -- TODO: Optimize branch for common case of no aliasing. + -- Optimize branch for common case of no aliasing by setting likelyhood + -- to `Just False`. copy src dst dst_p src_p bytes = do dflags <- getDynFlags [moveCall, cpyCall] <- forkAlts @@ -2037,7 +2044,10 @@ doCopySmallMutableArrayOp = emitCopySmallArray copy , getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes) (wORD_SIZE dflags) ] - emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall + emit =<< mkCmmIfThenElse + (cmmEqWord dflags src dst) + moveCall cpyCall + (Just False) emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff -> FCode ()) -- ^ copy function diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs index a0bca5d661..f490be5c31 100644 --- a/compiler/codeGen/StgCmmProf.hs +++ b/compiler/codeGen/StgCmmProf.hs @@ -352,6 +352,7 @@ ldvEnter cl_ptr = do emit =<< mkCmmIfThenElse (CmmMachOp (mo_wordUGt dflags) [loadEra dflags, CmmLit (zeroCLit dflags)]) (mkStore ldv_wd new_ldv_wd) mkNop + Nothing loadEra :: DynFlags -> CmmExpr loadEra dflags = CmmMachOp (MO_UU_Conv (cIntWidth dflags) (wordWidth dflags)) diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index 94013f5c6d..68949bf190 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -55,6 +55,7 @@ import CLabel import CmmUtils import CmmSwitch +import BasicTypes (BranchWeight) import ForeignCall import IdInfo import Type @@ -74,8 +75,6 @@ import RepType import qualified Data.ByteString as BS import qualified Data.Map as M import Data.Char -import Data.List -import Data.Ord import Data.Word @@ -448,16 +447,16 @@ unscramble dflags vertices = mapM_ do_component components emitSwitch :: CmmExpr -- Tag to switch on - -> [(ConTagZ, CmmAGraphScoped)] -- Tagged branches - -> Maybe CmmAGraphScoped -- Default branch (if any) + -> [(ConTagZ, CmmAGraphScoped, BranchWeight)] -- Tagged branches + -> Maybe (CmmAGraphScoped, BranchWeight) -- Default branch (if any) -> ConTagZ -> ConTagZ -- Min and Max possible values; -- behaviour outside this range is -- undefined -> FCode () -- First, two rather common cases in which there is no work to do -emitSwitch _ [] (Just code) _ _ = emit (fst code) -emitSwitch _ [(_,code)] Nothing _ _ = emit (fst code) +emitSwitch _ [] (Just code) _ _ = emit (fst $ fst code) +emitSwitch _ [(_,code,_)] Nothing _ _ = emit (fst code) -- Right, off we go emitSwitch tag_expr branches mb_deflt lo_tag hi_tag = do @@ -467,7 +466,8 @@ emitSwitch tag_expr branches mb_deflt lo_tag hi_tag = do tag_expr' <- assignTemp' tag_expr -- Sort the branches before calling mk_discrete_switch - let branches_lbls' = [ (fromIntegral i, l) | (i,l) <- sortBy (comparing fst) branches_lbls ] + let branches_lbls' = [ (fromIntegral i, l, f) + | (i,l,f) <- sortWith fstOf3 branches_lbls ] let range = (fromIntegral lo_tag, fromIntegral hi_tag) emit $ mk_discrete_switch False tag_expr' branches_lbls' mb_deflt_lbl range @@ -476,19 +476,19 @@ emitSwitch tag_expr branches mb_deflt lo_tag hi_tag = do mk_discrete_switch :: Bool -- ^ Use signed comparisons -> CmmExpr - -> [(Integer, BlockId)] - -> Maybe BlockId + -> [(Integer, BlockId, BranchWeight)] + -> Maybe (BlockId, BranchWeight) -> (Integer, Integer) -> CmmAGraph -- SINGLETON TAG RANGE: no case analysis to do -mk_discrete_switch _ _tag_expr [(tag, lbl)] _ (lo_tag, hi_tag) +mk_discrete_switch _ _tag_expr [(tag, lbl, _f)] _ (lo_tag, hi_tag) | lo_tag == hi_tag = ASSERT( tag == lo_tag ) mkBranch lbl -- SINGLETON BRANCH, NO DEFAULT: no case analysis to do -mk_discrete_switch _ _tag_expr [(_tag,lbl)] Nothing _ +mk_discrete_switch _ _tag_expr [(_tag,lbl,_)] Nothing _ = mkBranch lbl -- The simplifier might have eliminated a case -- so we may have e.g. case xs of @@ -499,25 +499,17 @@ mk_discrete_switch _ _tag_expr [(_tag,lbl)] Nothing _ -- SOMETHING MORE COMPLICATED: defer to CmmImplementSwitchPlans -- See Note [Cmm Switches, the general plan] in CmmSwitch mk_discrete_switch signed tag_expr branches mb_deflt range - = mkSwitch tag_expr $ mkSwitchTargets signed range mb_deflt (M.fromList branches) - -divideBranches :: Ord a => [(a,b)] -> ([(a,b)], a, [(a,b)]) -divideBranches branches = (lo_branches, mid, hi_branches) - where - -- 2 branches => n_branches `div` 2 = 1 - -- => branches !! 1 give the *second* tag - -- There are always at least 2 branches here - (mid,_) = branches !! (length branches `div` 2) - (lo_branches, hi_branches) = span is_lo branches - is_lo (t,_) = t < mid + = mkSwitch tag_expr $ + mkSwitchTargets signed range mb_deflt + (M.fromList $ map (\(i,e,f)-> (i,(e,f))) branches) -------------- emitCmmLitSwitch :: CmmExpr -- Tag to switch on - -> [(Literal, CmmAGraphScoped)] -- Tagged branches - -> CmmAGraphScoped -- Default branch (always) + -> [(Literal, CmmAGraphScoped, BranchWeight)] -- Tagged branches + -> (CmmAGraphScoped, BranchWeight) -- Default branch (always) -> FCode () -- Emit the code -emitCmmLitSwitch _scrut [] deflt = emit $ fst deflt -emitCmmLitSwitch scrut branches deflt = do +emitCmmLitSwitch _scrut [] (deflt,_dfreq) = emit $ fst deflt +emitCmmLitSwitch scrut branches (deflt,dfreq) = do scrut' <- assignTemp' scrut join_lbl <- newBlockId deflt_lbl <- label_code join_lbl deflt @@ -529,20 +521,22 @@ emitCmmLitSwitch scrut branches deflt = do -- We find the necessary type information in the literals in the branches let signed = case head branches of - (MachInt _, _) -> True - (MachInt64 _, _) -> True + (MachInt _, _, _) -> True + (MachInt64 _, _, _) -> True _ -> False let range | signed = (tARGET_MIN_INT dflags, tARGET_MAX_INT dflags) | otherwise = (0, tARGET_MAX_WORD dflags) if isFloatType cmm_ty - then emit =<< mk_float_switch rep scrut' deflt_lbl noBound branches_lbls + then emit =<< mk_float_switch rep scrut' + (deflt_lbl, dfreq) noBound + branches_lbls else emit $ mk_discrete_switch signed scrut' - [(litValue lit,l) | (lit,l) <- branches_lbls] - (Just deflt_lbl) + [(litValue lit,l,f) | (lit,l,f) <- branches_lbls] + (Just (deflt_lbl, dfreq)) range emitLabel join_lbl @@ -552,11 +546,30 @@ type LitBound = (Maybe Literal, Maybe Literal) noBound :: LitBound noBound = (Nothing, Nothing) -mk_float_switch :: Width -> CmmExpr -> BlockId +{- TODO: + Currently this generates a binary search tree for the given value. + + Given we have branch weights we would ideally balance the tree + by weight instead. + + Eg. given (lit,weight) of [(0,1),(1,1),(2,1),(3,99)] we want to split the + list into [(0,1),(1,1),(2,1)] and [(3,99)]. + + Things to consider: + * Does it make a difference often enough to be worth the complexity + and increase in compile time. + * Negative weights have to be rounded up to zero, + otherwise they would distort the results. + * How should entries with no information be treated? + -> Probably good enough to use the default value. + * If implemented should this only apply when optimizations are + active? +-} +mk_float_switch :: Width -> CmmExpr -> (BlockId, BranchWeight) -> LitBound - -> [(Literal,BlockId)] + -> [(Literal,BlockId,BranchWeight)] -> FCode CmmAGraph -mk_float_switch rep scrut deflt _bounds [(lit,blk)] +mk_float_switch rep scrut (deflt, _dfrq) _bounds [(lit,blk,_frq)] = do dflags <- getDynFlags return $ mkCbranch (cond dflags) deflt blk Nothing where @@ -565,17 +578,32 @@ mk_float_switch rep scrut deflt _bounds [(lit,blk)] cmm_lit = mkSimpleLit dflags lit ne = MO_F_Ne rep -mk_float_switch rep scrut deflt_blk_id (lo_bound, hi_bound) branches +mk_float_switch rep scrut (deflt_blk_id,dfreq) (lo_bound, hi_bound) branches = do dflags <- getDynFlags - lo_blk <- mk_float_switch rep scrut deflt_blk_id bounds_lo lo_branches - hi_blk <- mk_float_switch rep scrut deflt_blk_id bounds_hi hi_branches - mkCmmIfThenElse (cond dflags) lo_blk hi_blk + lo_blk <- mk_float_switch + rep scrut (deflt_blk_id,dfreq) + bounds_lo lo_branches + hi_blk <- mk_float_switch + rep scrut + (deflt_blk_id,dfreq) bounds_hi hi_branches + mkCmmIfThenElse (cond dflags) lo_blk hi_blk Nothing where + (lo_branches, mid_lit, hi_branches) = divideBranches branches bounds_lo = (lo_bound, Just mid_lit) bounds_hi = (Just mid_lit, hi_bound) + divideBranches :: Ord a => [(a,b,c)] -> ([(a,b,c)], a, [(a,b,c)]) + divideBranches branches = (lo_branches, mid, hi_branches) + where + -- 2 branches => n_branches `div` 2 = 1 + -- => branches !! 1 give the *second* tag + -- There are always at least 2 branches here + (mid,_,_) = branches !! (length branches `div` 2) + (lo_branches, hi_branches) = span is_lo branches + is_lo (t,_,_) = t < mid + cond dflags = CmmMachOp lt [scrut, CmmLit cmm_lit] where cmm_lit = mkSimpleLit dflags mid_lit @@ -583,21 +611,23 @@ mk_float_switch rep scrut deflt_blk_id (lo_bound, hi_bound) branches -------------- -label_default :: BlockId -> Maybe CmmAGraphScoped -> FCode (Maybe BlockId) +label_default :: BlockId -> Maybe (CmmAGraphScoped, BranchWeight) + -> FCode (Maybe (BlockId, BranchWeight)) label_default _ Nothing = return Nothing -label_default join_lbl (Just code) +label_default join_lbl (Just (code,f)) = do lbl <- label_code join_lbl code - return (Just lbl) + return (Just (lbl,f)) -------------- -label_branches :: BlockId -> [(a,CmmAGraphScoped)] -> FCode [(a,BlockId)] +label_branches :: BlockId -> [(a,CmmAGraphScoped, BranchWeight)] + -> FCode [(a,BlockId,BranchWeight)] label_branches _join_lbl [] = return [] -label_branches join_lbl ((tag,code):branches) +label_branches join_lbl ((tag,code,freq):branches) = do lbl <- label_code join_lbl code branches' <- label_branches join_lbl branches - return ((tag,lbl):branches') + return ((tag,lbl,freq):branches') -------------- label_code :: BlockId -> CmmAGraphScoped -> FCode BlockId diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index e812dd445f..65f7d6652c 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -13,6 +13,7 @@ import Llvm import LlvmCodeGen.Base import LlvmCodeGen.Regs +import BasicTypes (BranchWeight, getWeight, neverFreq) import BlockId import CodeGen.Platform ( activeStgRegs, callerSaves ) import CLabel @@ -38,6 +39,7 @@ import Util import Control.Monad.Trans.Class import Control.Monad.Trans.Writer +import Data.Int (Int32) import Data.Semigroup ( Semigroup ) import qualified Data.Semigroup as Semigroup import Data.List ( nub ) @@ -1071,7 +1073,19 @@ For a real example of this, see ./rts/StgStdThunks.cmm -} - +switchMetaData :: BranchWeight -> [BranchWeight] -> MetaAnnot +switchMetaData defFreq altFreqs = + let values = map + -- LLVM branch weights are i32 typed so we cap it there. + (\w -> + min (fromIntegral (maxBound :: Int32)) + (fromIntegral . getWeight $ w)) + (defFreq:altFreqs) + types = repeat (LMInt $ fromIntegral 32) + lits = zipWith LMIntLit values types + weights = map (MetaVar . LMLitVar) lits + in + MetaAnnot (fsLit "branch_weights") $ MetaStruct weights -- | Switch branch genSwitch :: CmmExpr -> SwitchTargets -> LlvmM StmtData @@ -1079,13 +1093,14 @@ genSwitch cond ids = do (vc, stmts, top) <- exprToVar cond let ty = getVarType vc - let labels = [ (mkIntLit ty ix, blockIdToLlvm b) - | (ix, b) <- switchTargetsCases ids ] + let (labels,fs) = unzip [ ((mkIntLit ty ix, blockIdToLlvm b), f) + | (ix, (b,f)) <- switchTargetsCases ids ] -- out of range is undefined, so let's just branch to first label - let defLbl | Just l <- switchTargetsDefault ids = blockIdToLlvm l - | otherwise = snd (head labels) + let (defLbl,defFreq) + | Just (l,f) <- switchTargetsDefault ids = (blockIdToLlvm l,f) + | otherwise = (snd (head labels),neverFreq) - let s1 = Switch vc defLbl labels + let s1 = MetaStmt [switchMetaData defFreq fs ] (Switch vc defLbl labels) return $ (stmts `snocOL` s1, top) diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index e6b9cf6b93..c59f26c1e8 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -485,6 +485,8 @@ data GeneralFlag | Opt_SolveConstantDicts | Opt_AlignmentSanitisation | Opt_CatchBottoms + | Opt_UnlikelyBottoms -- ^ Assume bottoming alternatives are not taken. + | Opt_WeightBalanceAlts -- ^ Split trees by branch weight where applicable. -- PreInlining is on by default. The option is there just to see how -- bad things get if you turn it off! @@ -583,6 +585,7 @@ data GeneralFlag | Opt_SuppressUniques | Opt_SuppressStgFreeVars | Opt_SuppressTicks -- Replaces Opt_PprShowTicks + | Opt_SuppressTimestamps -- ^ Suppress timestamps in dumps -- temporary flags | Opt_AutoLinkPackages @@ -3040,7 +3043,8 @@ dynamic_flags_deps = [ setGeneralFlag Opt_SuppressIdInfo setGeneralFlag Opt_SuppressTicks setGeneralFlag Opt_SuppressStgFreeVars - setGeneralFlag Opt_SuppressTypeSignatures) + setGeneralFlag Opt_SuppressTypeSignatures + setGeneralFlag Opt_SuppressTimestamps) ------ Debugging ---------------------------------------------------- , make_ord_flag defGhcFlag "dstg-stats" @@ -3835,10 +3839,12 @@ dFlagsDeps = [ flagSpec "suppress-idinfo" Opt_SuppressIdInfo, flagSpec "suppress-unfoldings" Opt_SuppressUnfoldings, flagSpec "suppress-module-prefixes" Opt_SuppressModulePrefixes, + flagSpec "suppress-timestamps" Opt_SuppressTimestamps, flagSpec "suppress-type-applications" Opt_SuppressTypeApplications, flagSpec "suppress-type-signatures" Opt_SuppressTypeSignatures, flagSpec "suppress-uniques" Opt_SuppressUniques, - flagSpec "suppress-var-kinds" Opt_SuppressVarKinds] + flagSpec "suppress-var-kinds" Opt_SuppressVarKinds + ] -- | These @-f\<blah\>@ flags can all be reversed with @-fno-\<blah\>@ fFlags :: [FlagSpec GeneralFlag] @@ -3956,7 +3962,9 @@ fFlagsDeps = [ flagSpec "abstract-refinement-substitutions" Opt_AbstractRefSubstitutions, flagSpec "unclutter-valid-substitutions" Opt_UnclutterValidSubstitutions, flagSpec "show-loaded-modules" Opt_ShowLoadedModules, - flagSpec "whole-archive-hs-libs" Opt_WholeArchiveHsLibs + flagSpec "whole-archive-hs-libs" Opt_WholeArchiveHsLibs, + flagSpec "unlikely-bottoms" Opt_UnlikelyBottoms, + flagSpec "weight-balance-alts" Opt_WeightBalanceAlts ] -- | These @-f\<blah\>@ flags can all be reversed with @-fno-\<blah\>@ @@ -4344,6 +4352,8 @@ optLevelFlags -- see Note [Documenting optimisation flags] , ([2], Opt_LiberateCase) , ([2], Opt_SpecConstr) + , ([1,2], Opt_UnlikelyBottoms) + , ([1,2], Opt_WeightBalanceAlts) -- , ([2], Opt_RegsGraph) -- RegsGraph suffers performance regression. See #7679 -- , ([2], Opt_StaticArgumentTransformation) diff --git a/compiler/main/ErrUtils.hs b/compiler/main/ErrUtils.hs index 13ff017e09..c7fb8babe9 100644 --- a/compiler/main/ErrUtils.hs +++ b/compiler/main/ErrUtils.hs @@ -500,9 +500,12 @@ dumpSDoc dflags print_unqual flag hdr doc = doc' <- if null hdr then return doc else do t <- getCurrentTime - let d = text (show t) - $$ blankLine - $$ doc + let timeStamp = if (gopt Opt_SuppressTimestamps dflags) + then empty + else text (show t) + let d = timeStamp + $$ blankLine + $$ doc return $ mkDumpDoc hdr d defaultLogActionHPrintDoc dflags handle doc' dump_style diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index e2c568c836..18ce58a11e 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -2081,7 +2081,11 @@ genSwitch dflags expr targets BCTR ids (Just lbl) ] return code - where (offset, ids) = switchTargetsToTable targets + where + (offset, lblInfos) = switchTargetsToTable targets + -- lblInfos contains branch weights too, + -- but we only use the labels for now. + ids = map (fmap liLbl) lblInfos generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl CmmStatics Instr) diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs index 6dfd58950e..b1e717b427 100644 --- a/compiler/nativeGen/SPARC/CodeGen.hs +++ b/compiler/nativeGen/SPARC/CodeGen.hs @@ -339,7 +339,11 @@ genSwitch dflags expr targets , LD II32 (AddrRegReg base_reg offset_reg) dst , JMP_TBL (AddrRegImm dst (ImmInt 0)) ids label , NOP ] - where (offset, ids) = switchTargetsToTable targets + where + (offset, lblInfos) = switchTargetsToTable targets + -- lblInfos contains branch weights too, + -- we only use the labels for now. + ids = map (fmap liLbl) lblInfos generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl CmmStatics Instr) diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 09757e769e..fd1640acf9 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -2840,7 +2840,11 @@ genSwitch dflags expr targets JMP_TBL op ids (Section ReadOnlyData lbl) lbl ] return code - where (offset, ids) = switchTargetsToTable targets + where + (offset, lblInfos) = switchTargetsToTable targets + -- lblInfos contains branch weights too, + -- but for the jump table we use only the labels. + ids = map (fmap liLbl) lblInfos generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, CmmStatics) Instr) generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl) diff --git a/compiler/simplStg/StgCse.hs b/compiler/simplStg/StgCse.hs index 6e896176f9..4924b508c7 100644 --- a/compiler/simplStg/StgCse.hs +++ b/compiler/simplStg/StgCse.hs @@ -330,16 +330,16 @@ stgCseExpr env (StgLetNoEscape binds body) -- Case alternatives -- Extend the CSE environment stgCseAlt :: CseEnv -> OutId -> InStgAlt -> OutStgAlt -stgCseAlt env case_bndr (DataAlt dataCon, args, rhs) +stgCseAlt env case_bndr (DataAlt dataCon, args, rhs, freq) = let (env1, args') = substBndrs env args env2 = addDataCon case_bndr dataCon (map StgVarArg args') env1 -- see note [Case 2: CSEing case binders] rhs' = stgCseExpr env2 rhs - in (DataAlt dataCon, args', rhs') -stgCseAlt env _ (altCon, args, rhs) + in (DataAlt dataCon, args', rhs', freq) +stgCseAlt env _ (altCon, args, rhs, freq) = let (env1, args') = substBndrs env args rhs' = stgCseExpr env1 rhs - in (altCon, args', rhs') + in (altCon, args', rhs', freq) -- Bindings stgCseBind :: CseEnv -> InStgBinding -> (Maybe OutStgBinding, CseEnv) @@ -390,8 +390,8 @@ mkStgCase scrut bndr ty alts | all isBndr alts = scrut where -- see Note [All alternatives are the binder] - isBndr (_, _, StgApp f []) = f == bndr - isBndr _ = False + isBndr (_, _, StgApp f [], _) = f == bndr + isBndr _ = False -- Utilities diff --git a/compiler/simplStg/StgStats.hs b/compiler/simplStg/StgStats.hs index 712ec2d22e..8dd5630932 100644 --- a/compiler/simplStg/StgStats.hs +++ b/compiler/simplStg/StgStats.hs @@ -172,6 +172,6 @@ statExpr (StgCase expr _ _ alts) countOne StgCases where stat_alts alts - = combineSEs (map statExpr [ e | (_,_,e) <- alts ]) + = combineSEs (map statExpr [ e | (_,_,e,_) <- alts ]) statExpr (StgLam {}) = panic "statExpr StgLam" diff --git a/compiler/simplStg/UnariseStg.hs b/compiler/simplStg/UnariseStg.hs index 57dd699f70..b72e24b3a7 100644 --- a/compiler/simplStg/UnariseStg.hs +++ b/compiler/simplStg/UnariseStg.hs @@ -382,7 +382,7 @@ elimCase :: UnariseEnv -> [OutStgArg] -- non-void args -> InId -> AltType -> [InStgAlt] -> UniqSM OutStgExpr -elimCase rho args bndr (MultiValAlt _) [(_, bndrs, rhs)] +elimCase rho args bndr (MultiValAlt _) [(_, bndrs, rhs, _freq)] = do let rho1 = extendRho rho bndr (MultiVal args) rho2 | isUnboxedTupleBndr bndr @@ -414,47 +414,51 @@ elimCase _ args bndr alt_ty alts -------------------------------------------------------------------------------- unariseAlts :: UnariseEnv -> AltType -> InId -> [StgAlt] -> UniqSM [StgAlt] -unariseAlts rho (MultiValAlt n) bndr [(DEFAULT, [], e)] +unariseAlts rho (MultiValAlt n) bndr [(DEFAULT, [], e, f)] | isUnboxedTupleBndr bndr = do (rho', ys) <- unariseConArgBinder rho bndr e' <- unariseExpr rho' e - return [(DataAlt (tupleDataCon Unboxed n), ys, e')] + return [(DataAlt (tupleDataCon Unboxed n), ys, e', f)] -unariseAlts rho (MultiValAlt n) bndr [(DataAlt _, ys, e)] +unariseAlts rho (MultiValAlt n) bndr [(DataAlt _, ys, e, f)] | isUnboxedTupleBndr bndr = do (rho', ys1) <- unariseConArgBinders rho ys MASSERT(ys1 `lengthIs` n) let rho'' = extendRho rho' bndr (MultiVal (map StgVarArg ys1)) e' <- unariseExpr rho'' e - return [(DataAlt (tupleDataCon Unboxed n), ys1, e')] + return [(DataAlt (tupleDataCon Unboxed n), ys1, e', f)] unariseAlts _ (MultiValAlt _) bndr alts | isUnboxedTupleBndr bndr = pprPanic "unariseExpr: strange multi val alts" (ppr alts) -- In this case we don't need to scrutinize the tag bit -unariseAlts rho (MultiValAlt _) bndr [(DEFAULT, _, rhs)] +unariseAlts rho (MultiValAlt _) bndr [(DEFAULT, _, rhs, f)] | isUnboxedSumBndr bndr = do (rho_sum_bndrs, sum_bndrs) <- unariseConArgBinder rho bndr rhs' <- unariseExpr rho_sum_bndrs rhs - return [(DataAlt (tupleDataCon Unboxed (length sum_bndrs)), sum_bndrs, rhs')] + return + [(DataAlt (tupleDataCon Unboxed (length sum_bndrs)), + sum_bndrs, rhs', f)] unariseAlts rho (MultiValAlt _) bndr alts | isUnboxedSumBndr bndr - = do (rho_sum_bndrs, scrt_bndrs@(tag_bndr : real_bndrs)) <- unariseConArgBinder rho bndr + = do (rho_sum_bndrs, + scrt_bndrs@(tag_bndr : real_bndrs)) <- unariseConArgBinder rho bndr alts' <- unariseSumAlts rho_sum_bndrs (map StgVarArg real_bndrs) alts let inner_case = StgCase (StgApp tag_bndr []) tag_bndr tagAltTy alts' return [ (DataAlt (tupleDataCon Unboxed (length scrt_bndrs)), scrt_bndrs, - inner_case) ] + inner_case, defFreq) ] unariseAlts rho _ _ alts = mapM (\alt -> unariseAlt rho alt) alts unariseAlt :: UnariseEnv -> StgAlt -> UniqSM StgAlt -unariseAlt rho (con, xs, e) +unariseAlt rho (con, xs, e, f) = do (rho', xs') <- unariseConArgBinders rho xs - (con, xs',) <$> unariseExpr rho' e + e' <- unariseExpr rho' e + return (con, xs', e', f) -------------------------------------------------------------------------------- @@ -472,13 +476,13 @@ unariseSumAlt :: UnariseEnv -> [StgArg] -- sum components _excluding_ the tag bit. -> StgAlt -- original alternative with sum LHS -> UniqSM StgAlt -unariseSumAlt rho _ (DEFAULT, _, e) - = ( DEFAULT, [], ) <$> unariseExpr rho e +unariseSumAlt rho _ (DEFAULT, _, e, f) + = unariseExpr rho e >>= \e -> return ( DEFAULT, [], e, f) -unariseSumAlt rho args (DataAlt sumCon, bs, e) +unariseSumAlt rho args (DataAlt sumCon, bs, e, f) = do let rho' = mapSumIdBinders bs args rho e' <- unariseExpr rho' e - return ( LitAlt (MachInt (fromIntegral (dataConTag sumCon))), [], e' ) + return ( LitAlt (MachInt (fromIntegral (dataConTag sumCon))), [], e', f) unariseSumAlt _ scrt alt = pprPanic "unariseSumAlt" (ppr scrt $$ ppr alt) @@ -780,6 +784,6 @@ mkDefaultLitAlt :: [StgAlt] -> [StgAlt] -- Since they are exhaustive, we can replace one with DEFAULT, to avoid -- generating a final test. Remember, the DEFAULT comes first if it exists. mkDefaultLitAlt [] = pprPanic "elimUbxSumExpr.mkDefaultAlt" (text "Empty alts") -mkDefaultLitAlt alts@((DEFAULT, _, _) : _) = alts -mkDefaultLitAlt ((LitAlt{}, [], rhs) : alts) = (DEFAULT, [], rhs) : alts +mkDefaultLitAlt alts@((DEFAULT, _, _, _) : _) = alts +mkDefaultLitAlt ((LitAlt{}, [], rhs, f) : alts) = (DEFAULT, [], rhs, f) : alts mkDefaultLitAlt alts = pprPanic "mkDefaultLitAlt" (text "Not a lit alt:" <+> ppr alts) diff --git a/compiler/stgSyn/CoreToStg.hs b/compiler/stgSyn/CoreToStg.hs index 671f3eb5b5..47aefd899e 100644 --- a/compiler/stgSyn/CoreToStg.hs +++ b/compiler/stgSyn/CoreToStg.hs @@ -18,7 +18,7 @@ module CoreToStg ( coreToStg ) where import GhcPrelude import CoreSyn -import CoreUtils ( exprType, findDefault, isJoinBind ) +import CoreUtils ( exprType, findDefault, isJoinBind, exprIsBottom ) import CoreArity ( manifestArity ) import StgSyn @@ -34,7 +34,7 @@ import VarEnv import Module import Name ( isExternalName, nameOccName, nameModule_maybe ) import OccName ( occNameFS ) -import BasicTypes ( Arity ) +import BasicTypes ( Arity, neverFreq, defFreq ) import TysWiredIn ( unboxedUnitDataCon ) import Literal import Outputable @@ -348,7 +348,7 @@ coreToTopStgRhs -> CtsM (StgRhs, FreeVarsInfo, CollectedCCs) coreToTopStgRhs dflags ccs this_mod scope_fv_info (bndr, rhs) - = do { (new_rhs, rhs_fvs) <- coreToStgExpr rhs + = do { (new_rhs, rhs_fvs) <- coreToStgExpr dflags rhs ; let (stg_rhs, ccs') = mkTopStgRhs dflags this_mod ccs rhs_fvs bndr bndr_info new_rhs @@ -385,7 +385,7 @@ coreToTopStgRhs dflags ccs this_mod scope_fv_info (bndr, rhs) -- --------------------------------------------------------------------------- coreToStgExpr - :: CoreExpr + :: DynFlags -> CoreExpr -> CtsM (StgExpr, -- Decorated STG expr FreeVarsInfo) -- Its free vars (NB free, not live) @@ -397,23 +397,23 @@ coreToStgExpr -- No LitInteger's should be left by the time this is called. CorePrep -- should have converted them all to a real core representation. -coreToStgExpr (Lit (LitInteger {})) = panic "coreToStgExpr: LitInteger" -coreToStgExpr (Lit l) = return (StgLit l, emptyFVInfo) -coreToStgExpr (Var v) = coreToStgApp Nothing v [] [] -coreToStgExpr (Coercion _) = coreToStgApp Nothing coercionTokenId [] [] +coreToStgExpr _df (Lit (LitInteger {})) = panic "coreToStgExpr: LitInteger" +coreToStgExpr _df (Lit l) = return (StgLit l, emptyFVInfo) +coreToStgExpr df (Var v) = coreToStgApp df Nothing v [] [] +coreToStgExpr df (Coercion _) = coreToStgApp df Nothing coercionTokenId [] [] -coreToStgExpr expr@(App _ _) - = coreToStgApp Nothing f args ticks +coreToStgExpr df expr@(App _ _) + = coreToStgApp df Nothing f args ticks where (f, args, ticks) = myCollectArgs expr -coreToStgExpr expr@(Lam _ _) +coreToStgExpr df expr@(Lam _ _) = let (args, body) = myCollectBinders expr args' = filterStgBinders args in extendVarEnvCts [ (a, LambdaBound) | a <- args' ] $ do - (body, body_fvs) <- coreToStgExpr body + (body, body_fvs) <- coreToStgExpr df body let fvs = args' `minusFVBinders` body_fvs result_expr | null args' = body @@ -421,22 +421,22 @@ coreToStgExpr expr@(Lam _ _) return (result_expr, fvs) -coreToStgExpr (Tick tick expr) +coreToStgExpr df (Tick tick expr) = do case tick of HpcTick{} -> return () ProfNote{} -> return () SourceNote{} -> return () Breakpoint{} -> panic "coreToStgExpr: breakpoint should not happen" - (expr2, fvs) <- coreToStgExpr expr + (expr2, fvs) <- coreToStgExpr df expr return (StgTick tick expr2, fvs) -coreToStgExpr (Cast expr _) - = coreToStgExpr expr +coreToStgExpr df (Cast expr _) + = coreToStgExpr df expr -- Cases require a little more real work. -coreToStgExpr (Case scrut _ _ []) - = coreToStgExpr scrut +coreToStgExpr df (Case scrut _ _ []) + = coreToStgExpr df scrut -- See Note [Empty case alternatives] in CoreSyn If the case -- alternatives are empty, the scrutinee must diverge or raise an -- exception, so we can just dive into it. @@ -447,7 +447,7 @@ coreToStgExpr (Case scrut _ _ []) -- runtime system error function. -coreToStgExpr (Case scrut bndr _ alts) = do +coreToStgExpr df (Case scrut bndr _ alts) = do (alts2, alts_fvs) <- extendVarEnvCts [(bndr, LambdaBound)] $ do (alts2, fvs_s) <- mapAndUnzipM vars_alt alts @@ -467,34 +467,43 @@ coreToStgExpr (Case scrut bndr _ alts) = do -- We tell the scrutinee that everything -- live in the alts is live in it, too. - (scrut2, scrut_fvs) <- coreToStgExpr scrut + (scrut2, scrut_fvs) <- coreToStgExpr df scrut return ( StgCase scrut2 bndr' (mkStgAltType bndr alts) alts2, scrut_fvs `unionFVInfo` alts_fvs_wo_bndr ) where + alt_freq rhs + | gopt Opt_UnlikelyBottoms df + , exprIsBottom rhs + = -- If a expression is bottom we can safely assume it's + -- alternative is rarely taken. Hence we set the + -- branch weight to zero/never. + -- For details see Note [Branch weights] in BasicTypes + neverFreq + | otherwise = defFreq vars_alt (con, binders, rhs) | DataAlt c <- con, c == unboxedUnitDataCon = -- This case is a bit smelly. -- See Note [Nullary unboxed tuple] in Type.hs -- where a nullary tuple is mapped to (State# World#) ASSERT( null binders ) - do { (rhs2, rhs_fvs) <- coreToStgExpr rhs - ; return ((DEFAULT, [], rhs2), rhs_fvs) } + do { (rhs2, rhs_fvs) <- coreToStgExpr df rhs + ; return ((DEFAULT, [], rhs2, alt_freq rhs), rhs_fvs) } | otherwise = let -- Remove type variables binders' = filterStgBinders binders in extendVarEnvCts [(b, LambdaBound) | b <- binders'] $ do - (rhs2, rhs_fvs) <- coreToStgExpr rhs - return ( (con, binders', rhs2), + (rhs2, rhs_fvs) <- coreToStgExpr df rhs + return ( (con, binders', rhs2, alt_freq rhs), binders' `minusFVBinders` rhs_fvs ) -coreToStgExpr (Let bind body) = do - coreToStgLet bind body +coreToStgExpr df (Let bind body) = do + coreToStgLet df bind body -coreToStgExpr e = pprPanic "coreToStgExpr" (ppr e) +coreToStgExpr _ e = pprPanic "coreToStgExpr" (ppr e) mkStgAltType :: Id -> [CoreAlt] -> AltType mkStgAltType bndr alts @@ -541,7 +550,8 @@ mkStgAltType bndr alts -- --------------------------------------------------------------------------- coreToStgApp - :: Maybe UpdateFlag -- Just upd <=> this application is + :: DynFlags + -> Maybe UpdateFlag -- Just upd <=> this application is -- the rhs of a thunk binding -- x = [...] \upd [] -> the_app -- with specified update flag @@ -551,8 +561,8 @@ coreToStgApp -> CtsM (StgExpr, FreeVarsInfo) -coreToStgApp _ f args ticks = do - (args', args_fvs, ticks') <- coreToStgArgs args +coreToStgApp df _ f args ticks = do + (args', args_fvs, ticks') <- coreToStgArgs df args how_bound <- lookupVarCts f let @@ -618,26 +628,27 @@ coreToStgApp _ f args ticks = do -- This is the guy that turns applications into A-normal form -- --------------------------------------------------------------------------- -coreToStgArgs :: [CoreArg] -> CtsM ([StgArg], FreeVarsInfo, [Tickish Id]) -coreToStgArgs [] +coreToStgArgs :: DynFlags -> [CoreArg] + -> CtsM ([StgArg], FreeVarsInfo, [Tickish Id]) +coreToStgArgs _ [] = return ([], emptyFVInfo, []) -coreToStgArgs (Type _ : args) = do -- Type argument - (args', fvs, ts) <- coreToStgArgs args +coreToStgArgs df (Type _ : args) = do -- Type argument + (args', fvs, ts) <- coreToStgArgs df args return (args', fvs, ts) -coreToStgArgs (Coercion _ : args) -- Coercion argument; replace with place holder - = do { (args', fvs, ts) <- coreToStgArgs args +coreToStgArgs df (Coercion _ : args) -- Coercion argument; replace with place holder + = do { (args', fvs, ts) <- coreToStgArgs df args ; return (StgVarArg coercionTokenId : args', fvs, ts) } -coreToStgArgs (Tick t e : args) +coreToStgArgs df (Tick t e : args) = ASSERT( not (tickishIsCode t) ) - do { (args', fvs, ts) <- coreToStgArgs (e : args) + do { (args', fvs, ts) <- coreToStgArgs df (e : args) ; return (args', fvs, t:ts) } -coreToStgArgs (arg : args) = do -- Non-type argument - (stg_args, args_fvs, ticks) <- coreToStgArgs args - (arg', arg_fvs) <- coreToStgExpr arg +coreToStgArgs df (arg : args) = do -- Non-type argument + (stg_args, args_fvs, ticks) <- coreToStgArgs df args + (arg', arg_fvs) <- coreToStgExpr df arg let fvs = args_fvs `unionFVInfo` arg_fvs @@ -677,12 +688,13 @@ coreToStgArgs (arg : args) = do -- Non-type argument -- --------------------------------------------------------------------------- coreToStgLet - :: CoreBind -- bindings + :: DynFlags + -> CoreBind -- bindings -> CoreExpr -- body -> CtsM (StgExpr, -- new let FreeVarsInfo) -- variables free in the whole let -coreToStgLet bind body = do +coreToStgLet df bind body = do (bind2, bind_fvs, body2, body_fvs) <- mfix $ \ ~(_, _, _, rec_body_fvs) -> do @@ -692,7 +704,7 @@ coreToStgLet bind body = do -- Do the body extendVarEnvCts env_ext $ do - (body2, body_fvs) <- coreToStgExpr body + (body2, body_fvs) <- coreToStgExpr df body return (bind2, bind_fvs, body2, body_fvs) @@ -724,7 +736,7 @@ coreToStgLet bind body = do vars_bind body_fvs (NonRec binder rhs) = do - (rhs2, bind_fvs) <- coreToStgRhs body_fvs (binder,rhs) + (rhs2, bind_fvs) <- coreToStgRhs df body_fvs (binder,rhs) let env_ext_item = mk_binding binder rhs @@ -742,19 +754,20 @@ coreToStgLet bind body = do in extendVarEnvCts env_ext $ do (rhss2, fvss) - <- mapAndUnzipM (coreToStgRhs rec_scope_fvs) pairs + <- mapAndUnzipM (coreToStgRhs df rec_scope_fvs) pairs let bind_fvs = unionFVInfos fvss return (StgRec (binders `zip` rhss2), bind_fvs, env_ext) -coreToStgRhs :: FreeVarsInfo -- Free var info for the scope of the binding +coreToStgRhs :: DynFlags + -> FreeVarsInfo -- Free var info for the scope of the binding -> (Id,CoreExpr) -> CtsM (StgRhs, FreeVarsInfo) -coreToStgRhs scope_fv_info (bndr, rhs) = do - (new_rhs, rhs_fvs) <- coreToStgExpr rhs +coreToStgRhs df scope_fv_info (bndr, rhs) = do + (new_rhs, rhs_fvs) <- coreToStgExpr df rhs return (mkStgRhs rhs_fvs bndr bndr_info new_rhs, rhs_fvs) where bndr_info = lookupFVInfo scope_fv_info bndr diff --git a/compiler/stgSyn/StgLint.hs b/compiler/stgSyn/StgLint.hs index bb2064ab48..30b2b991a3 100644 --- a/compiler/stgSyn/StgLint.hs +++ b/compiler/stgSyn/StgLint.hs @@ -36,6 +36,8 @@ module StgLint ( lintStgTopBindings ) where import GhcPrelude +import BasicTypes (BranchWeight) + import StgSyn import DynFlags @@ -184,18 +186,20 @@ lintStgExpr (StgCase scrut bndr alts_type alts) = do addInScopeVars [bndr | in_scope] (mapM_ lintAlt alts) -lintAlt :: (AltCon, [Id], StgExpr) -> LintM () +lintAlt :: (AltCon, [Id], StgExpr, BranchWeight) -> LintM () -lintAlt (DEFAULT, _, rhs) = +lintAlt (DEFAULT, _, rhs, _) = lintStgExpr rhs -lintAlt (LitAlt _, _, rhs) = +lintAlt (LitAlt _, _, rhs, _) = lintStgExpr rhs -lintAlt (DataAlt _, bndrs, rhs) = do +lintAlt (DataAlt _, bndrs, rhs, _) = do mapM_ checkPostUnariseBndr bndrs addInScopeVars bndrs (lintStgExpr rhs) + + {- ************************************************************************ * * diff --git a/compiler/stgSyn/StgSyn.hs b/compiler/stgSyn/StgSyn.hs index 29d544103f..3f7cbc0f46 100644 --- a/compiler/stgSyn/StgSyn.hs +++ b/compiler/stgSyn/StgSyn.hs @@ -47,6 +47,7 @@ module StgSyn ( import GhcPrelude +import BasicTypes (BranchWeight) import CoreSyn ( AltCon, Tickish ) import CostCentre ( CostCentreStack ) import Data.ByteString ( ByteString ) @@ -479,7 +480,7 @@ rhsHasCafRefs (StgRhsCon _ _ args) = any stgArgHasCafRefs args altHasCafRefs :: GenStgAlt bndr Id -> Bool -altHasCafRefs (_, _, rhs) = exprHasCafRefs rhs +altHasCafRefs (_, _, rhs, _) = exprHasCafRefs rhs stgArgHasCafRefs :: GenStgArg Id -> Bool stgArgHasCafRefs (StgVarArg id) @@ -543,7 +544,9 @@ rather than from the scrutinee type. type GenStgAlt bndr occ = (AltCon, -- alts: data constructor, [bndr], -- constructor's parameters, - GenStgExpr bndr occ) -- ...right-hand side. + GenStgExpr bndr occ, -- ..right-hand side, + BranchWeight) -- relative chance to take this alt, see + -- Note [Branch weights] in BasicTypes data AltType = PolyAlt -- Polymorphic (a lifted type variable) @@ -784,8 +787,11 @@ pprStgExpr (StgCase expr bndr alt_type alts) pprStgAlt :: (OutputableBndr bndr, Outputable occ, Ord occ) => GenStgAlt bndr occ -> SDoc -pprStgAlt (con, params, expr) - = hang (hsep [ppr con, sep (map (pprBndr CasePatBind) params), text "->"]) +pprStgAlt (con, params, expr, f) + = hang (hsep [ppr con, + sep (map (pprBndr CasePatBind) params), + parens (text "likely:" <> ppr f) , + text "->"]) 4 (ppr expr <> semi) pprStgOp :: StgOp -> SDoc |