diff options
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/GHC/Core/Opt/WorkWrap.hs | 7 | ||||
-rw-r--r-- | compiler/GHC/Core/Unfold.hs | 91 | ||||
-rw-r--r-- | compiler/GHC/Core/Unfold/Make.hs | 92 |
3 files changed, 98 insertions, 92 deletions
diff --git a/compiler/GHC/Core/Opt/WorkWrap.hs b/compiler/GHC/Core/Opt/WorkWrap.hs index 9becea0c18..511d3bf6e3 100644 --- a/compiler/GHC/Core/Opt/WorkWrap.hs +++ b/compiler/GHC/Core/Opt/WorkWrap.hs @@ -13,13 +13,12 @@ import GHC.Driver.Session import GHC.Core.Opt.Arity ( manifestArity ) import GHC.Core -import GHC.Core.Unfold import GHC.Core.Unfold.Make import GHC.Core.Utils ( exprType, exprIsHNF ) import GHC.Core.Type import GHC.Core.Opt.WorkWrap.Utils import GHC.Core.FamInstEnv -import GHC.Core.SimpleOpt( SimpleOpts(..) ) +import GHC.Core.SimpleOpt import GHC.Types.Var import GHC.Types.Id @@ -719,7 +718,9 @@ splitFun ww_opts fn_id rhs return [(fn_id, rhs)] Just stuff - | Just stable_unf <- certainlyWillInline uf_opts fn_info + | let opt_wwd_rhs = simpleOptExpr (wo_simple_opts ww_opts) rhs + -- We need to stabilise the WW'd (and optimised) RHS below + , Just stable_unf <- certainlyWillInline uf_opts fn_info opt_wwd_rhs -- We could make a w/w split, but in fact the RHS is small -- See Note [Don't w/w inline small non-loop-breaker things] , let id_w_unf = fn_id `setIdUnfolding` stable_unf diff --git a/compiler/GHC/Core/Unfold.hs b/compiler/GHC/Core/Unfold.hs index dbc6b1e7fd..08c5a10b30 100644 --- a/compiler/GHC/Core/Unfold.hs +++ b/compiler/GHC/Core/Unfold.hs @@ -18,8 +18,6 @@ find, unsurprisingly, a Core expression. {-# LANGUAGE BangPatterns #-} -{-# OPTIONS_GHC -Wno-incomplete-record-updates #-} - module GHC.Core.Unfold ( Unfolding, UnfoldingGuidance, -- Abstract types @@ -32,7 +30,7 @@ module GHC.Core.Unfold ( ArgSummary(..), couldBeSmallEnoughToInline, inlineBoringOk, - certainlyWillInline, smallEnoughToInline, + smallEnoughToInline, callSiteInline, CallCtxt(..), calcUnfoldingGuidance @@ -45,12 +43,11 @@ import GHC.Driver.Flags import GHC.Core import GHC.Core.Utils import GHC.Types.Id -import GHC.Types.Demand ( isDeadEndSig ) import GHC.Core.DataCon import GHC.Types.Literal import GHC.Builtin.PrimOps import GHC.Types.Id.Info -import GHC.Types.Basic ( Arity, isNoInlinePragma ) +import GHC.Types.Basic ( Arity ) import GHC.Core.Type import GHC.Builtin.Names import GHC.Builtin.Types.Prim ( realWorldStatePrimTy ) @@ -961,89 +958,7 @@ smallEnoughToInline opts (CoreUnfolding {uf_guidance = guidance}) smallEnoughToInline _ _ = False ----------------- - -certainlyWillInline :: UnfoldingOpts -> IdInfo -> Maybe Unfolding --- ^ Sees if the unfolding is pretty certain to inline. --- If so, return a *stable* unfolding for it, that will always inline. -certainlyWillInline opts fn_info - = case fn_unf of - CoreUnfolding { uf_tmpl = expr, uf_guidance = guidance, uf_src = src } - | noinline -> Nothing -- See Note [Worker/wrapper for NOINLINE functions] - | otherwise - -> case guidance of - UnfNever -> Nothing - UnfWhen {} -> Just (fn_unf { uf_src = src' }) - -- INLINE functions have UnfWhen - UnfIfGoodArgs { ug_size = size, ug_args = args } - -> do_cunf expr size args src' - where - src' = -- Do not change InlineCompulsory! - case src of - InlineCompulsory -> InlineCompulsory - _ -> InlineStable - - DFunUnfolding {} -> Just fn_unf -- Don't w/w DFuns; it never makes sense - -- to do so, and even if it is currently a - -- loop breaker, it may not be later - - _other_unf -> Nothing - - where - noinline = isNoInlinePragma (inlinePragInfo fn_info) - fn_unf = unfoldingInfo fn_info -- NB: loop-breakers never inline - - -- The UnfIfGoodArgs case seems important. If we w/w small functions - -- binary sizes go up by 10%! (This is with SplitObjs.) - -- I'm not totally sure why. - -- INLINABLE functions come via this path - -- See Note [certainlyWillInline: INLINABLE] - do_cunf expr size args src' - | arityInfo fn_info > 0 -- See Note [certainlyWillInline: be careful of thunks] - , not (isDeadEndSig (dmdSigInfo fn_info)) - -- Do not unconditionally inline a bottoming functions even if - -- it seems smallish. We've carefully lifted it out to top level, - -- so we don't want to re-inline it. - , let unf_arity = length args - , size - (10 * (unf_arity + 1)) <= unfoldingUseThreshold opts - = Just (fn_unf { uf_src = src' - , uf_guidance = UnfWhen { ug_arity = unf_arity - , ug_unsat_ok = unSaturatedOk - , ug_boring_ok = inlineBoringOk expr } }) - -- Note the "unsaturatedOk". A function like f = \ab. a - -- will certainly inline, even if partially applied (f e), so we'd - -- better make sure that the transformed inlining has the same property - | otherwise - = Nothing - -{- Note [certainlyWillInline: be careful of thunks] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Don't claim that thunks will certainly inline, because that risks work -duplication. Even if the work duplication is not great (eg is_cheap -holds), it can make a big difference in an inner loop In #5623 we -found that the WorkWrap phase thought that - y = case x of F# v -> F# (v +# v) -was certainlyWillInline, so the addition got duplicated. - -Note that we check arityInfo instead of the arity of the unfolding to detect -this case. This is so that we don't accidentally fail to inline small partial -applications, like `f = g 42` (where `g` recurses into `f`) where g has arity 2 -(say). Here there is no risk of work duplication, and the RHS is tiny, so -certainlyWillInline should return True. But `unf_arity` is zero! However f's -arity, gotten from `arityInfo fn_info`, is 1. - -Failing to say that `f` will inline forces W/W to generate a potentially huge -worker for f that will immediately cancel with `g`'s wrapper anyway, causing -unnecessary churn in the Simplifier while arriving at the same result. - -Note [certainlyWillInline: INLINABLE] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -certainlyWillInline /must/ return Nothing for a large INLINABLE thing, -even though we have a stable inlining, so that strictness w/w takes -place. It makes a big difference to efficiency, and the w/w pass knows -how to transfer the INLINABLE info to the worker; see WorkWrap -Note [Worker/wrapper for INLINABLE functions] - +{- ************************************************************************ * * \subsection{callSiteInline} diff --git a/compiler/GHC/Core/Unfold/Make.hs b/compiler/GHC/Core/Unfold/Make.hs index 71981061ef..dd0a0b968a 100644 --- a/compiler/GHC/Core/Unfold/Make.hs +++ b/compiler/GHC/Core/Unfold/Make.hs @@ -1,4 +1,4 @@ - +{-# OPTIONS_GHC -Wno-incomplete-record-updates #-} -- | Unfolding creation module GHC.Core.Unfold.Make @@ -16,6 +16,7 @@ module GHC.Core.Unfold.Make , mkCompulsoryUnfolding' , mkDFunUnfolding , specUnfolding + , certainlyWillInline ) where @@ -28,6 +29,7 @@ import GHC.Core.DataCon import GHC.Core.Utils import GHC.Types.Basic import GHC.Types.Id +import GHC.Types.Id.Info import GHC.Types.Demand ( DmdSig, isDeadEndSig ) import GHC.Utils.Outputable @@ -309,4 +311,92 @@ mkCoreUnfolding src top_lvl expr guidance uf_expandable = exprIsExpandable expr, uf_guidance = guidance } +---------------- +certainlyWillInline :: UnfoldingOpts -> IdInfo -> CoreExpr -> Maybe Unfolding +-- ^ Sees if the unfolding is pretty certain to inline. +-- If so, return a *stable* unfolding for it, that will always inline. +-- The CoreExpr is the WW'd and simplified RHS. In contrast, the unfolding +-- template might not have been WW'd yet. +certainlyWillInline opts fn_info rhs' + = case fn_unf of + CoreUnfolding { uf_guidance = guidance, uf_src = src } + | noinline -> Nothing -- See Note [Worker/wrapper for NOINLINE functions] + | otherwise + -> case guidance of + UnfNever -> Nothing + UnfWhen {} -> Just (fn_unf { uf_src = src', uf_tmpl = tmpl' }) + -- INLINE functions have UnfWhen + UnfIfGoodArgs { ug_size = size, ug_args = args } + -> do_cunf size args src' tmpl' + where + src' = -- Do not change InlineCompulsory! + case src of + InlineCompulsory -> InlineCompulsory + _ -> InlineStable + tmpl' = -- Do not overwrite stable unfoldings! + case src of + InlineRhs -> occurAnalyseExpr rhs' + _ -> uf_tmpl fn_unf + + DFunUnfolding {} -> Just fn_unf -- Don't w/w DFuns; it never makes sense + -- to do so, and even if it is currently a + -- loop breaker, it may not be later + + _other_unf -> Nothing + where + noinline = isNoInlinePragma (inlinePragInfo fn_info) + fn_unf = unfoldingInfo fn_info -- NB: loop-breakers never inline + + -- The UnfIfGoodArgs case seems important. If we w/w small functions + -- binary sizes go up by 10%! (This is with SplitObjs.) + -- I'm not totally sure why. + -- INLINABLE functions come via this path + -- See Note [certainlyWillInline: INLINABLE] + do_cunf size args src' tmpl' + | arityInfo fn_info > 0 -- See Note [certainlyWillInline: be careful of thunks] + , not (isDeadEndSig (dmdSigInfo fn_info)) + -- Do not unconditionally inline a bottoming functions even if + -- it seems smallish. We've carefully lifted it out to top level, + -- so we don't want to re-inline it. + , let unf_arity = length args + , size - (10 * (unf_arity + 1)) <= unfoldingUseThreshold opts + = Just (fn_unf { uf_src = src' + , uf_tmpl = tmpl' + , uf_guidance = UnfWhen { ug_arity = unf_arity + , ug_unsat_ok = unSaturatedOk + , ug_boring_ok = inlineBoringOk tmpl' } }) + -- Note the "unsaturatedOk". A function like f = \ab. a + -- will certainly inline, even if partially applied (f e), so we'd + -- better make sure that the transformed inlining has the same property + | otherwise + = Nothing + +{- Note [certainlyWillInline: be careful of thunks] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Don't claim that thunks will certainly inline, because that risks work +duplication. Even if the work duplication is not great (eg is_cheap +holds), it can make a big difference in an inner loop In #5623 we +found that the WorkWrap phase thought that + y = case x of F# v -> F# (v +# v) +was certainlyWillInline, so the addition got duplicated. + +Note that we check arityInfo instead of the arity of the unfolding to detect +this case. This is so that we don't accidentally fail to inline small partial +applications, like `f = g 42` (where `g` recurses into `f`) where g has arity 2 +(say). Here there is no risk of work duplication, and the RHS is tiny, so +certainlyWillInline should return True. But `unf_arity` is zero! However f's +arity, gotten from `arityInfo fn_info`, is 1. + +Failing to say that `f` will inline forces W/W to generate a potentially huge +worker for f that will immediately cancel with `g`'s wrapper anyway, causing +unnecessary churn in the Simplifier while arriving at the same result. + +Note [certainlyWillInline: INLINABLE] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +certainlyWillInline /must/ return Nothing for a large INLINABLE thing, +even though we have a stable inlining, so that strictness w/w takes +place. It makes a big difference to efficiency, and the w/w pass knows +how to transfer the INLINABLE info to the worker; see WorkWrap +Note [Worker/wrapper for INLINABLE functions] +-} |