diff options
author | Sebastian Graf <sebastian.graf@kit.edu> | 2021-10-13 17:25:30 +0200 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2021-10-29 05:02:25 -0400 |
commit | 925c47b46529d202190f18bd653a6945caa51823 (patch) | |
tree | abcd83a8a9e7e6cac99025850a6e762f609dfbb6 | |
parent | 7170052651ff02bfcf1e9611f0813dd20a7c8558 (diff) | |
download | haskell-925c47b46529d202190f18bd653a6945caa51823.tar.gz |
WorkWrap: Update Unfolding with WW'd body prior to `tryWW` (#20510)
We have a function in #20510 that is small enough to get a stable unfolding in WW:
```hs
small :: Int -> Int
small x = go 0 x
where
go z 0 = z * x
go z y = go (z+y) (y-1)
```
But it appears we failed to use the WW'd RHS as the stable unfolding. As a result,
inlining `small` would expose the non-WW'd version of `go`. That appears to regress
badly in #19727 which is a bit too large to extract a reproducer from that is
guaranteed to reproduce across GHC versions.
The solution is to simply update the unfolding in `certainlyWillInline` with the
WW'd RHS.
Fixes #20510.
-rw-r--r-- | compiler/GHC/Core/Opt/WorkWrap.hs | 7 | ||||
-rw-r--r-- | compiler/GHC/Core/Unfold.hs | 91 | ||||
-rw-r--r-- | compiler/GHC/Core/Unfold/Make.hs | 92 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T7360.stderr | 4 | ||||
-rw-r--r-- | testsuite/tests/stranal/should_compile/T20510.hs | 7 | ||||
-rw-r--r-- | testsuite/tests/stranal/should_compile/T20510.stderr | 125 | ||||
-rw-r--r-- | testsuite/tests/stranal/should_compile/all.T | 3 |
7 files changed, 234 insertions, 95 deletions
diff --git a/compiler/GHC/Core/Opt/WorkWrap.hs b/compiler/GHC/Core/Opt/WorkWrap.hs index 9becea0c18..511d3bf6e3 100644 --- a/compiler/GHC/Core/Opt/WorkWrap.hs +++ b/compiler/GHC/Core/Opt/WorkWrap.hs @@ -13,13 +13,12 @@ import GHC.Driver.Session import GHC.Core.Opt.Arity ( manifestArity ) import GHC.Core -import GHC.Core.Unfold import GHC.Core.Unfold.Make import GHC.Core.Utils ( exprType, exprIsHNF ) import GHC.Core.Type import GHC.Core.Opt.WorkWrap.Utils import GHC.Core.FamInstEnv -import GHC.Core.SimpleOpt( SimpleOpts(..) ) +import GHC.Core.SimpleOpt import GHC.Types.Var import GHC.Types.Id @@ -719,7 +718,9 @@ splitFun ww_opts fn_id rhs return [(fn_id, rhs)] Just stuff - | Just stable_unf <- certainlyWillInline uf_opts fn_info + | let opt_wwd_rhs = simpleOptExpr (wo_simple_opts ww_opts) rhs + -- We need to stabilise the WW'd (and optimised) RHS below + , Just stable_unf <- certainlyWillInline uf_opts fn_info opt_wwd_rhs -- We could make a w/w split, but in fact the RHS is small -- See Note [Don't w/w inline small non-loop-breaker things] , let id_w_unf = fn_id `setIdUnfolding` stable_unf diff --git a/compiler/GHC/Core/Unfold.hs b/compiler/GHC/Core/Unfold.hs index dbc6b1e7fd..08c5a10b30 100644 --- a/compiler/GHC/Core/Unfold.hs +++ b/compiler/GHC/Core/Unfold.hs @@ -18,8 +18,6 @@ find, unsurprisingly, a Core expression. {-# LANGUAGE BangPatterns #-} -{-# OPTIONS_GHC -Wno-incomplete-record-updates #-} - module GHC.Core.Unfold ( Unfolding, UnfoldingGuidance, -- Abstract types @@ -32,7 +30,7 @@ module GHC.Core.Unfold ( ArgSummary(..), couldBeSmallEnoughToInline, inlineBoringOk, - certainlyWillInline, smallEnoughToInline, + smallEnoughToInline, callSiteInline, CallCtxt(..), calcUnfoldingGuidance @@ -45,12 +43,11 @@ import GHC.Driver.Flags import GHC.Core import GHC.Core.Utils import GHC.Types.Id -import GHC.Types.Demand ( isDeadEndSig ) import GHC.Core.DataCon import GHC.Types.Literal import GHC.Builtin.PrimOps import GHC.Types.Id.Info -import GHC.Types.Basic ( Arity, isNoInlinePragma ) +import GHC.Types.Basic ( Arity ) import GHC.Core.Type import GHC.Builtin.Names import GHC.Builtin.Types.Prim ( realWorldStatePrimTy ) @@ -961,89 +958,7 @@ smallEnoughToInline opts (CoreUnfolding {uf_guidance = guidance}) smallEnoughToInline _ _ = False ----------------- - -certainlyWillInline :: UnfoldingOpts -> IdInfo -> Maybe Unfolding --- ^ Sees if the unfolding is pretty certain to inline. --- If so, return a *stable* unfolding for it, that will always inline. -certainlyWillInline opts fn_info - = case fn_unf of - CoreUnfolding { uf_tmpl = expr, uf_guidance = guidance, uf_src = src } - | noinline -> Nothing -- See Note [Worker/wrapper for NOINLINE functions] - | otherwise - -> case guidance of - UnfNever -> Nothing - UnfWhen {} -> Just (fn_unf { uf_src = src' }) - -- INLINE functions have UnfWhen - UnfIfGoodArgs { ug_size = size, ug_args = args } - -> do_cunf expr size args src' - where - src' = -- Do not change InlineCompulsory! - case src of - InlineCompulsory -> InlineCompulsory - _ -> InlineStable - - DFunUnfolding {} -> Just fn_unf -- Don't w/w DFuns; it never makes sense - -- to do so, and even if it is currently a - -- loop breaker, it may not be later - - _other_unf -> Nothing - - where - noinline = isNoInlinePragma (inlinePragInfo fn_info) - fn_unf = unfoldingInfo fn_info -- NB: loop-breakers never inline - - -- The UnfIfGoodArgs case seems important. If we w/w small functions - -- binary sizes go up by 10%! (This is with SplitObjs.) - -- I'm not totally sure why. - -- INLINABLE functions come via this path - -- See Note [certainlyWillInline: INLINABLE] - do_cunf expr size args src' - | arityInfo fn_info > 0 -- See Note [certainlyWillInline: be careful of thunks] - , not (isDeadEndSig (dmdSigInfo fn_info)) - -- Do not unconditionally inline a bottoming functions even if - -- it seems smallish. We've carefully lifted it out to top level, - -- so we don't want to re-inline it. - , let unf_arity = length args - , size - (10 * (unf_arity + 1)) <= unfoldingUseThreshold opts - = Just (fn_unf { uf_src = src' - , uf_guidance = UnfWhen { ug_arity = unf_arity - , ug_unsat_ok = unSaturatedOk - , ug_boring_ok = inlineBoringOk expr } }) - -- Note the "unsaturatedOk". A function like f = \ab. a - -- will certainly inline, even if partially applied (f e), so we'd - -- better make sure that the transformed inlining has the same property - | otherwise - = Nothing - -{- Note [certainlyWillInline: be careful of thunks] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Don't claim that thunks will certainly inline, because that risks work -duplication. Even if the work duplication is not great (eg is_cheap -holds), it can make a big difference in an inner loop In #5623 we -found that the WorkWrap phase thought that - y = case x of F# v -> F# (v +# v) -was certainlyWillInline, so the addition got duplicated. - -Note that we check arityInfo instead of the arity of the unfolding to detect -this case. This is so that we don't accidentally fail to inline small partial -applications, like `f = g 42` (where `g` recurses into `f`) where g has arity 2 -(say). Here there is no risk of work duplication, and the RHS is tiny, so -certainlyWillInline should return True. But `unf_arity` is zero! However f's -arity, gotten from `arityInfo fn_info`, is 1. - -Failing to say that `f` will inline forces W/W to generate a potentially huge -worker for f that will immediately cancel with `g`'s wrapper anyway, causing -unnecessary churn in the Simplifier while arriving at the same result. - -Note [certainlyWillInline: INLINABLE] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -certainlyWillInline /must/ return Nothing for a large INLINABLE thing, -even though we have a stable inlining, so that strictness w/w takes -place. It makes a big difference to efficiency, and the w/w pass knows -how to transfer the INLINABLE info to the worker; see WorkWrap -Note [Worker/wrapper for INLINABLE functions] - +{- ************************************************************************ * * \subsection{callSiteInline} diff --git a/compiler/GHC/Core/Unfold/Make.hs b/compiler/GHC/Core/Unfold/Make.hs index 71981061ef..dd0a0b968a 100644 --- a/compiler/GHC/Core/Unfold/Make.hs +++ b/compiler/GHC/Core/Unfold/Make.hs @@ -1,4 +1,4 @@ - +{-# OPTIONS_GHC -Wno-incomplete-record-updates #-} -- | Unfolding creation module GHC.Core.Unfold.Make @@ -16,6 +16,7 @@ module GHC.Core.Unfold.Make , mkCompulsoryUnfolding' , mkDFunUnfolding , specUnfolding + , certainlyWillInline ) where @@ -28,6 +29,7 @@ import GHC.Core.DataCon import GHC.Core.Utils import GHC.Types.Basic import GHC.Types.Id +import GHC.Types.Id.Info import GHC.Types.Demand ( DmdSig, isDeadEndSig ) import GHC.Utils.Outputable @@ -309,4 +311,92 @@ mkCoreUnfolding src top_lvl expr guidance uf_expandable = exprIsExpandable expr, uf_guidance = guidance } +---------------- +certainlyWillInline :: UnfoldingOpts -> IdInfo -> CoreExpr -> Maybe Unfolding +-- ^ Sees if the unfolding is pretty certain to inline. +-- If so, return a *stable* unfolding for it, that will always inline. +-- The CoreExpr is the WW'd and simplified RHS. In contrast, the unfolding +-- template might not have been WW'd yet. +certainlyWillInline opts fn_info rhs' + = case fn_unf of + CoreUnfolding { uf_guidance = guidance, uf_src = src } + | noinline -> Nothing -- See Note [Worker/wrapper for NOINLINE functions] + | otherwise + -> case guidance of + UnfNever -> Nothing + UnfWhen {} -> Just (fn_unf { uf_src = src', uf_tmpl = tmpl' }) + -- INLINE functions have UnfWhen + UnfIfGoodArgs { ug_size = size, ug_args = args } + -> do_cunf size args src' tmpl' + where + src' = -- Do not change InlineCompulsory! + case src of + InlineCompulsory -> InlineCompulsory + _ -> InlineStable + tmpl' = -- Do not overwrite stable unfoldings! + case src of + InlineRhs -> occurAnalyseExpr rhs' + _ -> uf_tmpl fn_unf + + DFunUnfolding {} -> Just fn_unf -- Don't w/w DFuns; it never makes sense + -- to do so, and even if it is currently a + -- loop breaker, it may not be later + + _other_unf -> Nothing + where + noinline = isNoInlinePragma (inlinePragInfo fn_info) + fn_unf = unfoldingInfo fn_info -- NB: loop-breakers never inline + + -- The UnfIfGoodArgs case seems important. If we w/w small functions + -- binary sizes go up by 10%! (This is with SplitObjs.) + -- I'm not totally sure why. + -- INLINABLE functions come via this path + -- See Note [certainlyWillInline: INLINABLE] + do_cunf size args src' tmpl' + | arityInfo fn_info > 0 -- See Note [certainlyWillInline: be careful of thunks] + , not (isDeadEndSig (dmdSigInfo fn_info)) + -- Do not unconditionally inline a bottoming functions even if + -- it seems smallish. We've carefully lifted it out to top level, + -- so we don't want to re-inline it. + , let unf_arity = length args + , size - (10 * (unf_arity + 1)) <= unfoldingUseThreshold opts + = Just (fn_unf { uf_src = src' + , uf_tmpl = tmpl' + , uf_guidance = UnfWhen { ug_arity = unf_arity + , ug_unsat_ok = unSaturatedOk + , ug_boring_ok = inlineBoringOk tmpl' } }) + -- Note the "unsaturatedOk". A function like f = \ab. a + -- will certainly inline, even if partially applied (f e), so we'd + -- better make sure that the transformed inlining has the same property + | otherwise + = Nothing + +{- Note [certainlyWillInline: be careful of thunks] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Don't claim that thunks will certainly inline, because that risks work +duplication. Even if the work duplication is not great (eg is_cheap +holds), it can make a big difference in an inner loop In #5623 we +found that the WorkWrap phase thought that + y = case x of F# v -> F# (v +# v) +was certainlyWillInline, so the addition got duplicated. + +Note that we check arityInfo instead of the arity of the unfolding to detect +this case. This is so that we don't accidentally fail to inline small partial +applications, like `f = g 42` (where `g` recurses into `f`) where g has arity 2 +(say). Here there is no risk of work duplication, and the RHS is tiny, so +certainlyWillInline should return True. But `unf_arity` is zero! However f's +arity, gotten from `arityInfo fn_info`, is 1. + +Failing to say that `f` will inline forces W/W to generate a potentially huge +worker for f that will immediately cancel with `g`'s wrapper anyway, causing +unnecessary churn in the Simplifier while arriving at the same result. + +Note [certainlyWillInline: INLINABLE] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +certainlyWillInline /must/ return Nothing for a large INLINABLE thing, +even though we have a stable inlining, so that strictness w/w takes +place. It makes a big difference to efficiency, and the w/w pass knows +how to transfer the INLINABLE info to the worker; see WorkWrap +Note [Worker/wrapper for INLINABLE functions] +-} diff --git a/testsuite/tests/simplCore/should_compile/T7360.stderr b/testsuite/tests/simplCore/should_compile/T7360.stderr index 9b42a8c41d..d4b68ee22e 100644 --- a/testsuite/tests/simplCore/should_compile/T7360.stderr +++ b/testsuite/tests/simplCore/should_compile/T7360.stderr @@ -59,10 +59,8 @@ fun2 :: forall {a}. [a] -> ((), Int) Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False) Tmpl= \ (@a) (x [Occ=Once1] :: [a]) -> (T7360.fun4, - case x of wild [Occ=Once1] { __DEFAULT -> - case GHC.List.$wlenAcc @a wild 0# of ww1 [Occ=Once1] { __DEFAULT -> + case GHC.List.$wlenAcc @a x 0# of ww1 [Occ=Once1] { __DEFAULT -> GHC.Types.I# ww1 - } })}] fun2 = \ (@a) (x :: [a]) -> diff --git a/testsuite/tests/stranal/should_compile/T20510.hs b/testsuite/tests/stranal/should_compile/T20510.hs new file mode 100644 index 0000000000..b9f05118ef --- /dev/null +++ b/testsuite/tests/stranal/should_compile/T20510.hs @@ -0,0 +1,7 @@ +module T20510 where + +small :: Int -> Int +small x = go 0 x + where + go z 0 = z * x + go z y = go (z+y) (y-1) diff --git a/testsuite/tests/stranal/should_compile/T20510.stderr b/testsuite/tests/stranal/should_compile/T20510.stderr new file mode 100644 index 0000000000..b2cbed4594 --- /dev/null +++ b/testsuite/tests/stranal/should_compile/T20510.stderr @@ -0,0 +1,125 @@ + +==================== Exitification transformation ==================== +Result size of Exitification transformation + = {terms: 50, types: 22, coercions: 0, joins: 2/2} + +-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0} +lvl :: Int +[LclId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}] +lvl = GHC.Types.I# 0# + +-- RHS size: {terms: 32, types: 14, coercions: 0, joins: 2/2} +small :: Int -> Int +[LclIdX, + Arity=1, + Str=<SP(SL)>, + Cpr=1, + Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, + Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False) + Tmpl= \ (x [Dmd=SP(SL)] :: Int) -> + joinrec { + go [InlPrag=[2], Occ=T[2]] :: Int -> Int -> Int + [LclId[JoinId(2)], + Arity=2, + Str=<SP(L)><SP(SL)>, + Unf=Unf{Src=InlineStable, TopLvl=False, Value=True, ConLike=True, + WorkFree=True, Expandable=True, + Guidance=ALWAYS_IF(arity=2,unsat_ok=True,boring_ok=False) + Tmpl= \ (z [Occ=Once1!, Dmd=SP(L)] :: Int) + (ds [Occ=Once1!, Dmd=SP(SL)] :: Int) -> + case z of { GHC.Types.I# ww [Occ=Once1] -> + case ds of { GHC.Types.I# ww [Occ=Once1, Dmd=SL] -> + jump $wgo ww ww + } + }}] + go (z [Occ=Once1!, Dmd=SP(L)] :: Int) + (ds [Occ=Once1!, Dmd=SP(SL)] :: Int) + = case z of { GHC.Types.I# ww [Occ=Once1] -> + case ds of { GHC.Types.I# ww [Occ=Once1, Dmd=SL] -> + jump $wgo ww ww + } + }; + $wgo [InlPrag=[2], Occ=LoopBreakerT[2]] + :: GHC.Prim.Int# -> GHC.Prim.Int# -> Int + [LclId[JoinId(2)], + Arity=2, + Str=<L><SL>, + Unf=Unf{Src=<vanilla>, TopLvl=False, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [0 51] 69 10}] + $wgo (ww [Occ=Once2] :: GHC.Prim.Int#) + (ww [Occ=Once1!, Dmd=SL] :: GHC.Prim.Int#) + = case ww of ds { + __DEFAULT -> + jump go + (GHC.Types.I# (GHC.Prim.+# ww ds)) + (GHC.Types.I# (GHC.Prim.-# ds 1#)); + 0# -> + case x of { GHC.Types.I# y [Occ=Once1] -> + GHC.Types.I# (GHC.Prim.*# ww y) + } + }; } in + jump go lvl x}] +small + = \ (x [Dmd=SP(SL)] :: Int) -> + join { + exit :: GHC.Prim.Int# -> Int + [LclId[JoinId(1)]] + exit (ww :: GHC.Prim.Int#) + = case x of { GHC.Types.I# y -> + GHC.Types.I# (GHC.Prim.*# ww y) + } } in + joinrec { + $wgo [InlPrag=[2], Occ=LoopBreaker] + :: GHC.Prim.Int# -> GHC.Prim.Int# -> Int + [LclId[JoinId(2)], + Arity=2, + Str=<L><SL>, + Unf=Unf{Src=<vanilla>, TopLvl=False, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [0 38] 49 10}] + $wgo (ww :: GHC.Prim.Int#) (ww [Dmd=SL] :: GHC.Prim.Int#) + = case ww of ds { + __DEFAULT -> jump $wgo (GHC.Prim.+# ww ds) (GHC.Prim.-# ds 1#); + 0# -> jump exit ww + }; } in + case x of { GHC.Types.I# ww [Dmd=SL] -> jump $wgo 0# ww } + +-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} +$trModule :: GHC.Prim.Addr# +[LclId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 20 0}] +$trModule = "main"# + +-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0} +$trModule :: GHC.Types.TrName +[LclId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}] +$trModule = GHC.Types.TrNameS $trModule + +-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} +$trModule :: GHC.Prim.Addr# +[LclId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 30 0}] +$trModule = "T20510"# + +-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0} +$trModule :: GHC.Types.TrName +[LclId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}] +$trModule = GHC.Types.TrNameS $trModule + +-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0} +T20510.$trModule :: GHC.Types.Module +[LclIdX, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}] +T20510.$trModule = GHC.Types.Module $trModule $trModule + + + diff --git a/testsuite/tests/stranal/should_compile/all.T b/testsuite/tests/stranal/should_compile/all.T index 1798c08638..d953da1da9 100644 --- a/testsuite/tests/stranal/should_compile/all.T +++ b/testsuite/tests/stranal/should_compile/all.T @@ -72,3 +72,6 @@ test('T19766', [ grep_errmsg(r'absentError') ], compile, ['-ddump-worker-wrapper test('T19849', normal, compile, ['']) test('T19882a', normal, compile, ['']) test('T19882b', normal, compile, ['']) +# We want that the 'go' joinrec in the unfolding has been worker/wrappered. +# So we simply grep for 'jump $wgo' and hope we find more than 2 call sites: +test('T20510', [ grep_errmsg(r'jump \$wgo') ], compile, ['-dsuppress-uniques -ddump-exitify']) |