summaryrefslogtreecommitdiff
path: root/compiler/GHC/Core
diff options
context:
space:
mode:
authorAndreas Klebinger <klebinger.andreas@gmx.at>2022-04-07 17:21:47 +0200
committerMarge Bot <ben+marge-bot@smart-cactus.org>2022-08-06 06:13:17 -0400
commitfab0ee93abda33bf5c7eb5ca0372e12bd140a252 (patch)
treedfb79e20a525328a52bd5ea9168583b836f9ab54 /compiler/GHC/Core
parent1f6c56ae9aa4ab4977ba376ac901d5256bf0aba0 (diff)
downloadhaskell-fab0ee93abda33bf5c7eb5ca0372e12bd140a252.tar.gz
Change `-fprof-late` to insert cost centres after unfolding creation.
The former behaviour of adding cost centres after optimization but before unfoldings are created is not available via the flag `prof-late-inline` instead. I also reduced the overhead of -fprof-late* by pushing the cost centres into lambdas. This means the cost centres will only account for execution of functions and not their partial application. Further I made LATE_CC cost centres it's own CC flavour so they now won't clash with user defined ones if a user uses the same string for a custom scc. LateCC: Don't put cost centres inside constructor workers. With -fprof-late they are rarely useful as the worker is usually inlined. Even if the worker is not inlined or we use -fprof-late-linline they are generally not helpful but bloat compile and run time significantly. So we just don't add sccs inside constructor workers. ------------------------- Metric Decrease: T13701 -------------------------
Diffstat (limited to 'compiler/GHC/Core')
-rw-r--r--compiler/GHC/Core/LateCC.hs142
-rw-r--r--compiler/GHC/Core/Opt/Pipeline.hs7
2 files changed, 122 insertions, 27 deletions
diff --git a/compiler/GHC/Core/LateCC.hs b/compiler/GHC/Core/LateCC.hs
index 2b4f810441..7a677e9964 100644
--- a/compiler/GHC/Core/LateCC.hs
+++ b/compiler/GHC/Core/LateCC.hs
@@ -3,12 +3,15 @@
-- | Adds cost-centers after the core piple has run.
module GHC.Core.LateCC
- ( addLateCostCentres
+ ( addLateCostCentresMG
+ , addLateCostCentresPgm
+ , addLateCostCentres -- Might be useful for API users
+ , Env(..)
) where
import Control.Applicative
-import GHC.Utils.Monad.State.Strict
import Control.Monad
+import qualified Data.Set as S
import GHC.Prelude
import GHC.Types.CostCentre
@@ -20,21 +23,83 @@ import GHC.Types.Var
import GHC.Unit.Types
import GHC.Data.FastString
import GHC.Core
-import GHC.Types.Id
+import GHC.Core.Opt.Monad
import GHC.Core.Utils (mkTick)
+import GHC.Types.Id
+import GHC.Driver.Session
-addLateCostCentres :: Bool -> ModGuts -> ModGuts
-addLateCostCentres prof_count_entries guts = let
- env = Env
- { thisModule = mg_module guts
- , ccState = newCostCentreState
- , countEntries = prof_count_entries
- }
- in guts { mg_binds = doCoreProgram env (mg_binds guts) }
+import GHC.Utils.Logger
+import GHC.Utils.Outputable
+import GHC.Utils.Misc
+import GHC.Utils.Error (withTiming)
+import GHC.Utils.Monad.State.Strict
+
+
+{- Note [Collecting late cost centres]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Usually cost centres defined by a module are collected
+during tidy by collectCostCentres. However with `-fprof-late`
+we insert cost centres after inlining. So we keep a list of
+all the cost centres we inserted and combine that with the list
+of cost centres found during tidy.
+
+To avoid overhead when using -fprof-inline there is a flag to stop
+us from collecting them here when we run this pass before tidy.
+
+Note [Adding late cost centres]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The basic idea is very simple. For every top level binder
+`f = rhs` we compile it as if the user had written
+`f = {-# SCC f #-} rhs`.
+
+If we do this after unfoldings for `f` have been created this
+doesn't impact core-level optimizations at all. If we do it
+before the cost centre will be included in the unfolding and
+might inhibit optimizations at the call site. For this reason
+we provide flags for both approaches as they have different
+tradeoffs.
+
+We also don't add a cost centre for any binder that is a constructor
+worker or wrapper. These will never meaningfully enrich the resulting
+profile so we improve efficiency by omitting those.
+
+-}
+
+addLateCostCentresMG :: ModGuts -> CoreM ModGuts
+addLateCostCentresMG guts = do
+ dflags <- getDynFlags
+ let env :: Env
+ env = Env
+ { thisModule = mg_module guts
+ , ccState = newCostCentreState
+ , countEntries = gopt Opt_ProfCountEntries dflags
+ , collectCCs = False -- See Note [Collecting late cost centres]
+ }
+ let guts' = guts { mg_binds = fst (addLateCostCentres env (mg_binds guts))
+ }
+ return guts'
+
+addLateCostCentresPgm :: DynFlags -> Logger -> Module -> CoreProgram -> IO (CoreProgram, S.Set CostCentre)
+addLateCostCentresPgm dflags logger mod binds =
+ withTiming logger
+ (text "LateCC"<+>brackets (ppr mod))
+ (\(a,b) -> a `seqList` (b `seq` ())) $ do
+ let env = Env
+ { thisModule = mod
+ , ccState = newCostCentreState
+ , countEntries = gopt Opt_ProfCountEntries dflags
+ , collectCCs = True -- See Note [Collecting late cost centres]
+ }
+ (binds', ccs) = addLateCostCentres env binds
+ when (dopt Opt_D_dump_late_cc dflags || dopt Opt_D_verbose_core2core dflags) $
+ putDumpFileMaybe logger Opt_D_dump_late_cc "LateCC" FormatCore (vcat (map ppr binds'))
+ return (binds', ccs)
+
+addLateCostCentres :: Env -> CoreProgram -> (CoreProgram,S.Set CostCentre)
+addLateCostCentres env binds =
+ let (binds', state) = runState (mapM (doBind env) binds) initLateCCState
+ in (binds',lcs_ccs state)
-doCoreProgram :: Env -> CoreProgram -> CoreProgram
-doCoreProgram env binds = flip evalState newCostCentreState $ do
- mapM (doBind env) binds
doBind :: Env -> CoreBind -> M CoreBind
doBind env (NonRec b rhs) = NonRec b <$> doBndr env b rhs
@@ -44,28 +109,59 @@ doBind env (Rec bs) = Rec <$> mapM doPair bs
doPair (b,rhs) = (b,) <$> doBndr env b rhs
doBndr :: Env -> Id -> CoreExpr -> M CoreExpr
-doBndr env bndr rhs = do
+doBndr env bndr rhs
+ -- Cost centres on constructor workers are pretty much useless
+ -- so we don't emit them if we are looking at the rhs of a constructor
+ -- binding.
+ | Just _ <- isDataConId_maybe bndr = pure rhs
+ | otherwise = doBndr' env bndr rhs
+
+
+-- We want to put the cost centra below the lambda as we only care about executions of the RHS.
+doBndr' :: Env -> Id -> CoreExpr -> State LateCCState CoreExpr
+doBndr' env bndr (Lam b rhs) = Lam b <$> doBndr' env bndr rhs
+doBndr' env bndr rhs = do
let name = idName bndr
name_loc = nameSrcSpan name
cc_name = getOccFS name
count = countEntries env
- cc_flavour <- getCCExprFlavour cc_name
+ cc_flavour <- getCCFlavour cc_name
let cc_mod = thisModule env
bndrCC = NormalCC cc_flavour cc_name cc_mod name_loc
note = ProfNote bndrCC count True
+ addCC env bndrCC
return $ mkTick note rhs
-type M = State CostCentreState
+data LateCCState = LateCCState
+ { lcs_state :: !CostCentreState
+ , lcs_ccs :: S.Set CostCentre
+ }
+type M = State LateCCState
-getCCExprFlavour :: FastString -> M CCFlavour
-getCCExprFlavour name = ExprCC <$> getCCIndex' name
+initLateCCState :: LateCCState
+initLateCCState = LateCCState newCostCentreState mempty
+
+getCCFlavour :: FastString -> M CCFlavour
+getCCFlavour name = LateCC <$> getCCIndex' name
getCCIndex' :: FastString -> M CostCentreIndex
-getCCIndex' name = state (getCCIndex name)
+getCCIndex' name = do
+ state <- get
+ let (index,cc_state') = getCCIndex name (lcs_state state)
+ put (state { lcs_state = cc_state'})
+ return index
+
+addCC :: Env -> CostCentre -> M ()
+addCC !env cc = do
+ state <- get
+ when (collectCCs env) $ do
+ let ccs' = S.insert cc (lcs_ccs state)
+ put (state { lcs_ccs = ccs'})
data Env = Env
- { thisModule :: Module
- , countEntries :: Bool
- , ccState :: CostCentreState
+ { thisModule :: !Module
+ , countEntries:: !Bool
+ , ccState :: !CostCentreState
+ , collectCCs :: !Bool
}
diff --git a/compiler/GHC/Core/Opt/Pipeline.hs b/compiler/GHC/Core/Opt/Pipeline.hs
index d1ca6a2165..bbf0dc2164 100644
--- a/compiler/GHC/Core/Opt/Pipeline.hs
+++ b/compiler/GHC/Core/Opt/Pipeline.hs
@@ -43,7 +43,7 @@ import GHC.Core.Opt.CallArity ( callArityAnalProgram )
import GHC.Core.Opt.Exitify ( exitifyProgram )
import GHC.Core.Opt.WorkWrap ( wwTopBinds )
import GHC.Core.Opt.CallerCC ( addCallerCostCentres )
-import GHC.Core.LateCC (addLateCostCentres)
+import GHC.Core.LateCC (addLateCostCentresMG)
import GHC.Core.Seq (seqBinds)
import GHC.Core.FamInstEnv
@@ -198,7 +198,7 @@ getCoreToDo dflags rule_base extra_vars
runWhen (profiling && not (null $ callerCcFilters dflags)) CoreAddCallerCcs
add_late_ccs =
- runWhen (profiling && gopt Opt_ProfLateCcs dflags) $ CoreAddLateCcs
+ runWhen (profiling && gopt Opt_ProfLateInlineCcs dflags) $ CoreAddLateCcs
core_todo =
[
@@ -463,7 +463,6 @@ doCorePass pass guts = do
p_fam_env <- getPackageFamInstEnv
let platform = targetPlatform dflags
let fam_envs = (p_fam_env, mg_fam_inst_env guts)
- let prof_count_entries = gopt Opt_ProfCountEntries dflags
let updateBinds f = return $ guts { mg_binds = f (mg_binds guts) }
let updateBindsM f = f (mg_binds guts) >>= \b' -> return $ guts { mg_binds = b' }
@@ -513,7 +512,7 @@ doCorePass pass guts = do
addCallerCostCentres guts
CoreAddLateCcs -> {-# SCC "AddLateCcs" #-}
- return (addLateCostCentres prof_count_entries guts)
+ addLateCostCentresMG guts
CoreDoPrintCore -> {-# SCC "PrintCore" #-}
liftIO $ printCore logger (mg_binds guts) >> return guts