diff options
author | Andreas Klebinger <klebinger.andreas@gmx.at> | 2022-04-07 17:21:47 +0200 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2022-08-06 06:13:17 -0400 |
commit | fab0ee93abda33bf5c7eb5ca0372e12bd140a252 (patch) | |
tree | dfb79e20a525328a52bd5ea9168583b836f9ab54 /compiler/GHC/Core | |
parent | 1f6c56ae9aa4ab4977ba376ac901d5256bf0aba0 (diff) | |
download | haskell-fab0ee93abda33bf5c7eb5ca0372e12bd140a252.tar.gz |
Change `-fprof-late` to insert cost centres after unfolding creation.
The former behaviour of adding cost centres after optimization but
before unfoldings are created is not available via the flag
`prof-late-inline` instead.
I also reduced the overhead of -fprof-late* by pushing the cost centres
into lambdas. This means the cost centres will only account for
execution of functions and not their partial application.
Further I made LATE_CC cost centres it's own CC flavour so they now
won't clash with user defined ones if a user uses the same string for
a custom scc.
LateCC: Don't put cost centres inside constructor workers.
With -fprof-late they are rarely useful as the worker is usually
inlined. Even if the worker is not inlined or we use -fprof-late-linline
they are generally not helpful but bloat compile and run time
significantly. So we just don't add sccs inside constructor workers.
-------------------------
Metric Decrease:
T13701
-------------------------
Diffstat (limited to 'compiler/GHC/Core')
-rw-r--r-- | compiler/GHC/Core/LateCC.hs | 142 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Pipeline.hs | 7 |
2 files changed, 122 insertions, 27 deletions
diff --git a/compiler/GHC/Core/LateCC.hs b/compiler/GHC/Core/LateCC.hs index 2b4f810441..7a677e9964 100644 --- a/compiler/GHC/Core/LateCC.hs +++ b/compiler/GHC/Core/LateCC.hs @@ -3,12 +3,15 @@ -- | Adds cost-centers after the core piple has run. module GHC.Core.LateCC - ( addLateCostCentres + ( addLateCostCentresMG + , addLateCostCentresPgm + , addLateCostCentres -- Might be useful for API users + , Env(..) ) where import Control.Applicative -import GHC.Utils.Monad.State.Strict import Control.Monad +import qualified Data.Set as S import GHC.Prelude import GHC.Types.CostCentre @@ -20,21 +23,83 @@ import GHC.Types.Var import GHC.Unit.Types import GHC.Data.FastString import GHC.Core -import GHC.Types.Id +import GHC.Core.Opt.Monad import GHC.Core.Utils (mkTick) +import GHC.Types.Id +import GHC.Driver.Session -addLateCostCentres :: Bool -> ModGuts -> ModGuts -addLateCostCentres prof_count_entries guts = let - env = Env - { thisModule = mg_module guts - , ccState = newCostCentreState - , countEntries = prof_count_entries - } - in guts { mg_binds = doCoreProgram env (mg_binds guts) } +import GHC.Utils.Logger +import GHC.Utils.Outputable +import GHC.Utils.Misc +import GHC.Utils.Error (withTiming) +import GHC.Utils.Monad.State.Strict + + +{- Note [Collecting late cost centres] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Usually cost centres defined by a module are collected +during tidy by collectCostCentres. However with `-fprof-late` +we insert cost centres after inlining. So we keep a list of +all the cost centres we inserted and combine that with the list +of cost centres found during tidy. + +To avoid overhead when using -fprof-inline there is a flag to stop +us from collecting them here when we run this pass before tidy. + +Note [Adding late cost centres] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The basic idea is very simple. For every top level binder +`f = rhs` we compile it as if the user had written +`f = {-# SCC f #-} rhs`. + +If we do this after unfoldings for `f` have been created this +doesn't impact core-level optimizations at all. If we do it +before the cost centre will be included in the unfolding and +might inhibit optimizations at the call site. For this reason +we provide flags for both approaches as they have different +tradeoffs. + +We also don't add a cost centre for any binder that is a constructor +worker or wrapper. These will never meaningfully enrich the resulting +profile so we improve efficiency by omitting those. + +-} + +addLateCostCentresMG :: ModGuts -> CoreM ModGuts +addLateCostCentresMG guts = do + dflags <- getDynFlags + let env :: Env + env = Env + { thisModule = mg_module guts + , ccState = newCostCentreState + , countEntries = gopt Opt_ProfCountEntries dflags + , collectCCs = False -- See Note [Collecting late cost centres] + } + let guts' = guts { mg_binds = fst (addLateCostCentres env (mg_binds guts)) + } + return guts' + +addLateCostCentresPgm :: DynFlags -> Logger -> Module -> CoreProgram -> IO (CoreProgram, S.Set CostCentre) +addLateCostCentresPgm dflags logger mod binds = + withTiming logger + (text "LateCC"<+>brackets (ppr mod)) + (\(a,b) -> a `seqList` (b `seq` ())) $ do + let env = Env + { thisModule = mod + , ccState = newCostCentreState + , countEntries = gopt Opt_ProfCountEntries dflags + , collectCCs = True -- See Note [Collecting late cost centres] + } + (binds', ccs) = addLateCostCentres env binds + when (dopt Opt_D_dump_late_cc dflags || dopt Opt_D_verbose_core2core dflags) $ + putDumpFileMaybe logger Opt_D_dump_late_cc "LateCC" FormatCore (vcat (map ppr binds')) + return (binds', ccs) + +addLateCostCentres :: Env -> CoreProgram -> (CoreProgram,S.Set CostCentre) +addLateCostCentres env binds = + let (binds', state) = runState (mapM (doBind env) binds) initLateCCState + in (binds',lcs_ccs state) -doCoreProgram :: Env -> CoreProgram -> CoreProgram -doCoreProgram env binds = flip evalState newCostCentreState $ do - mapM (doBind env) binds doBind :: Env -> CoreBind -> M CoreBind doBind env (NonRec b rhs) = NonRec b <$> doBndr env b rhs @@ -44,28 +109,59 @@ doBind env (Rec bs) = Rec <$> mapM doPair bs doPair (b,rhs) = (b,) <$> doBndr env b rhs doBndr :: Env -> Id -> CoreExpr -> M CoreExpr -doBndr env bndr rhs = do +doBndr env bndr rhs + -- Cost centres on constructor workers are pretty much useless + -- so we don't emit them if we are looking at the rhs of a constructor + -- binding. + | Just _ <- isDataConId_maybe bndr = pure rhs + | otherwise = doBndr' env bndr rhs + + +-- We want to put the cost centra below the lambda as we only care about executions of the RHS. +doBndr' :: Env -> Id -> CoreExpr -> State LateCCState CoreExpr +doBndr' env bndr (Lam b rhs) = Lam b <$> doBndr' env bndr rhs +doBndr' env bndr rhs = do let name = idName bndr name_loc = nameSrcSpan name cc_name = getOccFS name count = countEntries env - cc_flavour <- getCCExprFlavour cc_name + cc_flavour <- getCCFlavour cc_name let cc_mod = thisModule env bndrCC = NormalCC cc_flavour cc_name cc_mod name_loc note = ProfNote bndrCC count True + addCC env bndrCC return $ mkTick note rhs -type M = State CostCentreState +data LateCCState = LateCCState + { lcs_state :: !CostCentreState + , lcs_ccs :: S.Set CostCentre + } +type M = State LateCCState -getCCExprFlavour :: FastString -> M CCFlavour -getCCExprFlavour name = ExprCC <$> getCCIndex' name +initLateCCState :: LateCCState +initLateCCState = LateCCState newCostCentreState mempty + +getCCFlavour :: FastString -> M CCFlavour +getCCFlavour name = LateCC <$> getCCIndex' name getCCIndex' :: FastString -> M CostCentreIndex -getCCIndex' name = state (getCCIndex name) +getCCIndex' name = do + state <- get + let (index,cc_state') = getCCIndex name (lcs_state state) + put (state { lcs_state = cc_state'}) + return index + +addCC :: Env -> CostCentre -> M () +addCC !env cc = do + state <- get + when (collectCCs env) $ do + let ccs' = S.insert cc (lcs_ccs state) + put (state { lcs_ccs = ccs'}) data Env = Env - { thisModule :: Module - , countEntries :: Bool - , ccState :: CostCentreState + { thisModule :: !Module + , countEntries:: !Bool + , ccState :: !CostCentreState + , collectCCs :: !Bool } diff --git a/compiler/GHC/Core/Opt/Pipeline.hs b/compiler/GHC/Core/Opt/Pipeline.hs index d1ca6a2165..bbf0dc2164 100644 --- a/compiler/GHC/Core/Opt/Pipeline.hs +++ b/compiler/GHC/Core/Opt/Pipeline.hs @@ -43,7 +43,7 @@ import GHC.Core.Opt.CallArity ( callArityAnalProgram ) import GHC.Core.Opt.Exitify ( exitifyProgram ) import GHC.Core.Opt.WorkWrap ( wwTopBinds ) import GHC.Core.Opt.CallerCC ( addCallerCostCentres ) -import GHC.Core.LateCC (addLateCostCentres) +import GHC.Core.LateCC (addLateCostCentresMG) import GHC.Core.Seq (seqBinds) import GHC.Core.FamInstEnv @@ -198,7 +198,7 @@ getCoreToDo dflags rule_base extra_vars runWhen (profiling && not (null $ callerCcFilters dflags)) CoreAddCallerCcs add_late_ccs = - runWhen (profiling && gopt Opt_ProfLateCcs dflags) $ CoreAddLateCcs + runWhen (profiling && gopt Opt_ProfLateInlineCcs dflags) $ CoreAddLateCcs core_todo = [ @@ -463,7 +463,6 @@ doCorePass pass guts = do p_fam_env <- getPackageFamInstEnv let platform = targetPlatform dflags let fam_envs = (p_fam_env, mg_fam_inst_env guts) - let prof_count_entries = gopt Opt_ProfCountEntries dflags let updateBinds f = return $ guts { mg_binds = f (mg_binds guts) } let updateBindsM f = f (mg_binds guts) >>= \b' -> return $ guts { mg_binds = b' } @@ -513,7 +512,7 @@ doCorePass pass guts = do addCallerCostCentres guts CoreAddLateCcs -> {-# SCC "AddLateCcs" #-} - return (addLateCostCentres prof_count_entries guts) + addLateCostCentresMG guts CoreDoPrintCore -> {-# SCC "PrintCore" #-} liftIO $ printCore logger (mg_binds guts) >> return guts |