diff options
-rw-r--r-- | compiler/GHC/Core/Opt/Arity.hs | 242 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify.hs | 44 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify/Utils.hs | 142 | ||||
-rw-r--r-- | testsuite/tests/profiling/should_run/T5654-O1.prof.sample | 37 | ||||
-rw-r--r-- | testsuite/tests/profiling/should_run/T5654b-O1.prof.sample | 34 | ||||
-rw-r--r-- | testsuite/tests/profiling/should_run/ioprof.stderr | 3 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T18355.hs | 9 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T18355.stderr | 70 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/all.T | 1 |
9 files changed, 361 insertions, 221 deletions
diff --git a/compiler/GHC/Core/Opt/Arity.hs b/compiler/GHC/Core/Opt/Arity.hs index 2b2a7c20ea..7891012792 100644 --- a/compiler/GHC/Core/Opt/Arity.hs +++ b/compiler/GHC/Core/Opt/Arity.hs @@ -13,9 +13,12 @@ -- | Arity and eta expansion module GHC.Core.Opt.Arity ( manifestArity, joinRhsArity, exprArity, typeArity - , exprEtaExpandArity, findRhsArity, etaExpand + , exprEtaExpandArity, findRhsArity + , etaExpand, etaExpandAT , etaExpandToJoinPoint, etaExpandToJoinPointRule , exprBotStrictness_maybe + , ArityType(..), expandableArityType, arityTypeArity + , maxWithArity, isBotArityType, idArityType ) where @@ -42,7 +45,7 @@ import GHC.Types.Unique import GHC.Driver.Session ( DynFlags, GeneralFlag(..), gopt ) import GHC.Utils.Outputable import GHC.Data.FastString -import GHC.Utils.Misc ( debugIsOn ) +import GHC.Utils.Misc ( lengthAtLeast ) {- ************************************************************************ @@ -486,8 +489,11 @@ Then f :: AT [False,False] ATop -------------------- Main arity code ---------------------------- -} --- See Note [ArityType] -data ArityType = ATop [OneShotInfo] | ABot Arity + +data ArityType -- See Note [ArityType] + = ATop [OneShotInfo] + | ABot Arity + deriving( Eq ) -- There is always an explicit lambda -- to justify the [OneShot], or the Arity @@ -495,18 +501,45 @@ instance Outputable ArityType where ppr (ATop os) = text "ATop" <> parens (ppr (length os)) ppr (ABot n) = text "ABot" <> parens (ppr n) +arityTypeArity :: ArityType -> Arity +-- The number of value args for the arity type +arityTypeArity (ATop oss) = length oss +arityTypeArity (ABot ar) = ar + +expandableArityType :: ArityType -> Bool +-- True <=> eta-expansion will add at least one lambda +expandableArityType (ATop oss) = not (null oss) +expandableArityType (ABot ar) = ar /= 0 + +isBotArityType :: ArityType -> Bool +isBotArityType (ABot {}) = True +isBotArityType (ATop {}) = False + +arityTypeOneShots :: ArityType -> [OneShotInfo] +arityTypeOneShots (ATop oss) = oss +arityTypeOneShots (ABot ar) = replicate ar OneShotLam + -- If we are diveging or throwing an exception anyway + -- it's fine to push redexes inside the lambdas + +botArityType :: ArityType +botArityType = ABot 0 -- Unit for andArityType + +maxWithArity :: ArityType -> Arity -> ArityType +maxWithArity at@(ABot {}) _ = at +maxWithArity at@(ATop oss) ar + | oss `lengthAtLeast` ar = at + | otherwise = ATop (take ar (oss ++ repeat NoOneShotInfo)) + vanillaArityType :: ArityType vanillaArityType = ATop [] -- Totally uninformative -- ^ The Arity returned is the number of value args the -- expression can be applied to without doing much work -exprEtaExpandArity :: DynFlags -> CoreExpr -> Arity +exprEtaExpandArity :: DynFlags -> CoreExpr -> ArityType -- exprEtaExpandArity is used when eta expanding -- e ==> \xy -> e x y exprEtaExpandArity dflags e - = case (arityType env e) of - ATop oss -> length oss - ABot n -> n + = arityType env e where env = AE { ae_cheap_fn = mk_cheap_fn dflags isCheapApp , ae_ped_bot = gopt Opt_PedanticBottoms dflags @@ -529,7 +562,7 @@ mk_cheap_fn dflags cheap_app ---------------------- -findRhsArity :: DynFlags -> Id -> CoreExpr -> Arity -> (Arity, Bool) +findRhsArity :: DynFlags -> Id -> CoreExpr -> Arity -> ArityType -- This implements the fixpoint loop for arity analysis -- See Note [Arity analysis] -- If findRhsArity e = (n, is_bot) then @@ -543,44 +576,34 @@ findRhsArity dflags bndr rhs old_arity -- we stop right away (since arities should not decrease) -- Result: the common case is that there is just one iteration where - is_lam = has_lam rhs - - has_lam (Tick _ e) = has_lam e - has_lam (Lam b e) = isId b || has_lam e - has_lam _ = False - init_cheap_app :: CheapAppFun init_cheap_app fn n_val_args | fn == bndr = True -- On the first pass, this binder gets infinite arity | otherwise = isCheapApp fn n_val_args - go :: (Arity, Bool) -> (Arity, Bool) - go cur_info@(cur_arity, _) - | cur_arity <= old_arity = cur_info - | new_arity == cur_arity = cur_info - | otherwise = ASSERT( new_arity < cur_arity ) + go :: ArityType -> ArityType + go cur_atype + | cur_arity <= old_arity = cur_atype + | new_atype == cur_atype = cur_atype + | otherwise = #if defined(DEBUG) pprTrace "Exciting arity" - (vcat [ ppr bndr <+> ppr cur_arity <+> ppr new_arity + (vcat [ ppr bndr <+> ppr cur_atype <+> ppr new_atype , ppr rhs]) #endif - go new_info + go new_atype where - new_info@(new_arity, _) = get_arity cheap_app + new_atype = get_arity cheap_app + cur_arity = arityTypeArity cur_atype cheap_app :: CheapAppFun cheap_app fn n_val_args | fn == bndr = n_val_args < cur_arity | otherwise = isCheapApp fn n_val_args - get_arity :: CheapAppFun -> (Arity, Bool) - get_arity cheap_app - = case (arityType env rhs) of - ABot n -> (n, True) - ATop (os:oss) | isOneShotInfo os || is_lam - -> (1 + length oss, False) -- Don't expand PAPs/thunks - ATop _ -> (0, False) -- Note [Eta expanding thunks] - where + get_arity :: CheapAppFun -> ArityType + get_arity cheap_app = arityType env rhs + where env = AE { ae_cheap_fn = mk_cheap_fn dflags cheap_app , ae_ped_bot = gopt Opt_PedanticBottoms dflags , ae_joins = emptyVarSet } @@ -613,7 +636,6 @@ write the analysis loop. The analysis is cheap-and-cheerful because it doesn't deal with mutual recursion. But the self-recursive case is the important one. - Note [Eta expanding through dictionaries] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the experimental -fdicts-cheap flag is on, we eta-expand through @@ -632,24 +654,6 @@ The (foo DInt) is floated out, and makes ineffective a RULE One could go further and make exprIsCheap reply True to any dictionary-typed expression, but that's more work. - -Note [Eta expanding thunks] -~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We don't eta-expand - * Trivial RHSs x = y - * PAPs x = map g - * Thunks f = case y of p -> \x -> blah - -When we see - f = case y of p -> \x -> blah -should we eta-expand it? Well, if 'x' is a one-shot state token -then 'yes' because 'f' will only be applied once. But otherwise -we (conservatively) say no. My main reason is to avoid expanding -PAPSs - f = g d ==> f = \x. g d x -because that might in turn make g inline (if it has an inline pragma), -which we might not want. After all, INLINE pragmas say "inline only -when saturated" so we don't want to be too gung-ho about saturating! -} arityLam :: Id -> ArityType -> ArityType @@ -673,6 +677,7 @@ arityApp (ATop []) _ = ATop [] arityApp (ATop (_:as)) cheap = floatIn cheap (ATop as) andArityType :: ArityType -> ArityType -> ArityType -- Used for branches of a 'case' +-- This is least upper bound in the ArityType lattice andArityType (ABot n1) (ABot n2) = ABot (n1 `max` n2) -- Note [ABot branches: use max] andArityType (ATop as) (ABot _) = ATop as andArityType (ABot _) (ATop bs) = ATop bs @@ -754,8 +759,7 @@ arityType :: ArityEnv -> CoreExpr -> ArityType arityType env (Cast e co) = case arityType env e of - ATop os -> ATop (take co_arity os) - -- See Note [Arity trimming] + ATop os -> ATop (take co_arity os) -- See Note [Arity trimming] ABot n | co_arity < n -> ATop (replicate co_arity noOneShotInfo) | otherwise -> ABot n where @@ -769,19 +773,9 @@ arityType env (Cast e co) arityType env (Var v) | v `elemVarSet` ae_joins env - = ABot 0 -- See Note [Eta-expansion and join points] - - | strict_sig <- idStrictness v - , not $ isTopSig strict_sig - , (ds, res) <- splitStrictSig strict_sig - , let arity = length ds - = if isDeadEndDiv res then ABot arity - else ATop (take arity one_shots) + = botArityType -- See Note [Eta-expansion and join points] | otherwise - = ATop (take (idArity v) one_shots) - where - one_shots :: [OneShotInfo] -- One-shot-ness derived from the type - one_shots = typeArity (idType v) + = idArityType v -- Lambdas; increase arity arityType env (Lam x e) @@ -804,13 +798,13 @@ arityType env (App fun arg ) -- arityType env (Case scrut _ _ alts) | exprIsDeadEnd scrut || null alts - = ABot 0 -- Do not eta expand - -- See Note [Dealing with bottom (1)] + = botArityType -- Do not eta expand + -- See Note [Dealing with bottom (1)] | otherwise = case alts_type of - ABot n | n>0 -> ATop [] -- Don't eta expand - | otherwise -> ABot 0 -- if RHS is bottomming - -- See Note [Dealing with bottom (2)] + ABot n | n>0 -> ATop [] -- Don't eta expand + | otherwise -> botArityType -- if RHS is bottomming + -- See Note [Dealing with bottom (2)] ATop as | not (ae_ped_bot env) -- See Note [Dealing with bottom (3)] , ae_cheap_fn env scrut Nothing -> ATop as @@ -886,7 +880,8 @@ So we do this: body of the let. * Dually, when we come to a /call/ of a join point, just no-op - by returning (ABot 0), the neutral element of ArityType. + by returning botArityType, the bottom element of ArityType, + which so that: bot `andArityType` x = x * This works if the join point is bound in the expression we are taking the arityType of. But if it's bound further out, it makes @@ -905,6 +900,20 @@ An alternative (roughly equivalent) idea would be to carry an environment mapping let-bound Ids to their ArityType. -} +idArityType :: Id -> ArityType +idArityType v + | strict_sig <- idStrictness v + , not $ isTopSig strict_sig + , (ds, res) <- splitStrictSig strict_sig + , let arity = length ds + = if isDeadEndDiv res then ABot arity + else ATop (take arity one_shots) + | otherwise + = ATop (take (idArity v) one_shots) + where + one_shots :: [OneShotInfo] -- One-shot-ness derived from the type + one_shots = typeArity (idType v) + {- %************************************************************************ %* * @@ -1001,6 +1010,25 @@ which we want to lead to code like This means that we need to look through type applications and be ready to re-add floats on the top. +Note [Eta expansion with ArityType] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The etaExpandAT function takes an ArityType (not just an Arity) to +guide eta-expansion. Why? Because we want to preserve one-shot info. +Consider + foo = \x. case x of + True -> (\s{os}. blah) |> co + False -> wubble +We'll get an ArityType for foo of (ATop [NoOneShot,OneShot]). + +Then we want to eta-expand to + foo = \x. (\eta{os}. (case x of ...as before...) eta) |> some_co + +That 'eta' binder is fresh, and we really want it to have the +one-shot flag from the inner \s{osf}. By expanding with the +ArityType gotten from analysing the RHS, we achieve this neatly. + +This makes a big difference to the one-shot monad trick; +see Note [The one-shot state monad trick] in GHC.Core.Unify. -} -- | @etaExpand n e@ returns an expression with @@ -1013,11 +1041,16 @@ to re-add floats on the top. -- We should have that: -- -- > ty = exprType e = exprType e' -etaExpand :: Arity -- ^ Result should have this number of value args - -> CoreExpr -- ^ Expression to expand - -> CoreExpr +etaExpand :: Arity -> CoreExpr -> CoreExpr +etaExpandAT :: ArityType -> CoreExpr -> CoreExpr + +etaExpand n orig_expr = eta_expand (replicate n NoOneShotInfo) orig_expr +etaExpandAT at orig_expr = eta_expand (arityTypeOneShots at) orig_expr + -- See Note [Eta expansion with ArityType] + -- etaExpand arity e = res -- Then 'res' has at least 'arity' lambdas at the top +-- See Note [Eta expansion with ArityType] -- -- etaExpand deals with for-alls. For example: -- etaExpand 1 E @@ -1028,21 +1061,23 @@ etaExpand :: Arity -- ^ Result should have this number of value arg -- It deals with coerces too, though they are now rare -- so perhaps the extra code isn't worth it -etaExpand n orig_expr - = go n orig_expr +eta_expand :: [OneShotInfo] -> CoreExpr -> CoreExpr +eta_expand one_shots orig_expr + = go one_shots orig_expr where -- Strip off existing lambdas and casts before handing off to mkEtaWW -- Note [Eta expansion and SCCs] - go 0 expr = expr - go n (Lam v body) | isTyVar v = Lam v (go n body) - | otherwise = Lam v (go (n-1) body) - go n (Cast expr co) = Cast (go n expr) co - go n expr + go [] expr = expr + go oss@(_:oss1) (Lam v body) | isTyVar v = Lam v (go oss body) + | otherwise = Lam v (go oss1 body) + go oss (Cast expr co) = Cast (go oss expr) co + + go oss expr = -- pprTrace "ee" (vcat [ppr orig_expr, ppr expr, ppr etas]) $ retick $ etaInfoAbs etas (etaInfoApp subst' sexpr etas) where in_scope = mkInScopeSet (exprFreeVars expr) - (in_scope', etas) = mkEtaWW n (ppr orig_expr) in_scope (exprType expr) + (in_scope', etas) = mkEtaWW oss (ppr orig_expr) in_scope (exprType expr) subst' = mkEmptySubst in_scope' -- Find ticks behind type apps. @@ -1141,7 +1176,7 @@ etaInfoAppTy _ (EtaCo co : eis) = etaInfoAppTy (coercionRKind co) eis -- semantically-irrelevant source annotations, so call sites must take care to -- preserve that info. See Note [Eta expansion and SCCs]. mkEtaWW - :: Arity + :: [OneShotInfo] -- ^ How many value arguments to eta-expand -> SDoc -- ^ The pretty-printed original expression, for warnings. @@ -1153,36 +1188,29 @@ mkEtaWW -- The outgoing 'InScopeSet' extends the incoming 'InScopeSet' with the -- fresh variables in 'EtaInfo'. -mkEtaWW orig_n ppr_orig_expr in_scope orig_ty - = go orig_n empty_subst orig_ty [] +mkEtaWW orig_oss ppr_orig_expr in_scope orig_ty + = go 0 orig_oss empty_subst orig_ty [] where empty_subst = mkEmptyTCvSubst in_scope - go :: Arity -- Number of value args to expand to + go :: Int -- For fresh names + -> [OneShotInfo] -- Number of value args to expand to -> TCvSubst -> Type -- We are really looking at subst(ty) -> [EtaInfo] -- Accumulating parameter -> (InScopeSet, [EtaInfo]) - go n subst ty eis -- See Note [exprArity invariant] - + go _ [] subst _ eis -- See Note [exprArity invariant] ----------- Done! No more expansion needed - | n == 0 = (getTCvInScope subst, reverse eis) + go n oss@(one_shot:oss1) subst ty eis -- See Note [exprArity invariant] ----------- Forall types (forall a. ty) | Just (tcv,ty') <- splitForAllTy_maybe ty - , let (subst', tcv') = Type.substVarBndr subst tcv - = let ((n_subst, n_tcv), n_n) - -- We want to have at least 'n' lambdas at the top. - -- If tcv is a tyvar, it corresponds to one Lambda (/\). - -- And we won't reduce n. - -- If tcv is a covar, we could eta-expand the expr with one - -- lambda \co:ty. e co. In this case we generate a new variable - -- of the coercion type, update the scope, and reduce n by 1. - | isTyVar tcv = ((subst', tcv'), n) - -- covar case: - | otherwise = (freshEtaId n subst' (unrestricted (varType tcv')), n-1) - -- Avoid free vars of the original expression - in go n_n n_subst ty' (EtaVar n_tcv : eis) + , (subst', tcv') <- Type.substVarBndr subst tcv + , let oss' | isTyVar tcv = oss + | otherwise = oss1 + -- A forall can bind a CoVar, in which case + -- we consume one of the [OneShotInfo] + = go n oss' subst' ty' (EtaVar tcv' : eis) ----------- Function types (t1 -> t2) | Just (mult, arg_ty, res_ty) <- splitFunTy_maybe ty @@ -1190,9 +1218,11 @@ mkEtaWW orig_n ppr_orig_expr in_scope orig_ty -- See Note [Levity polymorphism invariants] in GHC.Core -- See also test case typecheck/should_run/EtaExpandLevPoly - , let (subst', eta_id') = freshEtaId n subst (Scaled mult arg_ty) - -- Avoid free vars of the original expression - = go (n-1) subst' res_ty (EtaVar eta_id' : eis) + , (subst', eta_id) <- freshEtaId n subst (Scaled mult arg_ty) + -- Avoid free vars of the original expression + + , let eta_id' = eta_id `setIdOneShotInfo` one_shot + = go (n+1) oss1 subst' res_ty (EtaVar eta_id' : eis) ----------- Newtypes -- Given this: @@ -1206,12 +1236,12 @@ mkEtaWW orig_n ppr_orig_expr in_scope orig_ty -- Remember to apply the substitution to co (#16979) -- (or we could have applied to ty, but then -- we'd have had to zap it for the recursive call) - = go n subst ty' (pushCoercion co' eis) + = go n oss subst ty' (pushCoercion co' eis) | otherwise -- We have an expression of arity > 0, -- but its type isn't a function, or a binder -- is levity-polymorphic - = WARN( True, (ppr orig_n <+> ppr orig_ty) $$ ppr_orig_expr ) + = WARN( True, (ppr orig_oss <+> ppr orig_ty) $$ ppr_orig_expr ) (getTCvInScope subst, reverse eis) -- This *can* legitimately happen: -- e.g. coerce Int (\x. x) Essentially the programmer is diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs index ffddd62c8c..1577f3a151 100644 --- a/compiler/GHC/Core/Opt/Simplify.hs +++ b/compiler/GHC/Core/Opt/Simplify.hs @@ -46,7 +46,8 @@ import GHC.Core.Ppr ( pprCoreExpr ) import GHC.Types.Unique ( hasKey ) import GHC.Core.Unfold import GHC.Core.Utils -import GHC.Core.Opt.Arity ( etaExpand ) +import GHC.Core.Opt.Arity ( ArityType(..), arityTypeArity, isBotArityType + , idArityType, etaExpandAT ) import GHC.Core.SimpleOpt ( pushCoTyArg, pushCoValArg , joinPointBinding_maybe, joinPointBindings_maybe ) import GHC.Core.FVs ( mkRuleInfo ) @@ -706,10 +707,10 @@ makeTrivialBinding mode top_lvl occ_fs info expr expr_ty -- Now something very like completeBind, -- but without the postInlineUnconditionally part - ; (arity, is_bot, expr2) <- tryEtaExpandRhs mode var expr1 + ; (arity_type, expr2) <- tryEtaExpandRhs mode var expr1 ; unf <- mkLetUnfolding (sm_dflags mode) top_lvl InlineRhs var expr2 - ; let final_id = addLetBndrInfo var arity is_bot unf + ; let final_id = addLetBndrInfo var arity_type unf bind = NonRec final_id expr2 ; return ( floats `addLetFlts` unitLetFloat bind, final_id ) } @@ -799,14 +800,13 @@ completeBind env top_lvl mb_cont old_bndr new_bndr new_rhs -- Do eta-expansion on the RHS of the binding -- See Note [Eta-expanding at let bindings] in GHC.Core.Opt.Simplify.Utils - ; (new_arity, is_bot, final_rhs) <- tryEtaExpandRhs (getMode env) - new_bndr new_rhs + ; (new_arity, final_rhs) <- tryEtaExpandRhs (getMode env) new_bndr new_rhs -- Simplify the unfolding ; new_unfolding <- simplLetUnfolding env top_lvl mb_cont old_bndr final_rhs (idType new_bndr) new_arity old_unf - ; let final_bndr = addLetBndrInfo new_bndr new_arity is_bot new_unfolding + ; let final_bndr = addLetBndrInfo new_bndr new_arity new_unfolding -- See Note [In-scope set as a substitution] ; if postInlineUnconditionally env top_lvl final_bndr occ_info final_rhs @@ -823,10 +823,13 @@ completeBind env top_lvl mb_cont old_bndr new_bndr new_rhs -- pprTrace "Binding" (ppr final_bndr <+> ppr new_unfolding) $ return (mkFloatBind env (NonRec final_bndr final_rhs)) } -addLetBndrInfo :: OutId -> Arity -> Bool -> Unfolding -> OutId -addLetBndrInfo new_bndr new_arity is_bot new_unf +addLetBndrInfo :: OutId -> ArityType -> Unfolding -> OutId +addLetBndrInfo new_bndr new_arity_type new_unf = new_bndr `setIdInfo` info5 where + new_arity = arityTypeArity new_arity_type + is_bot = isBotArityType new_arity_type + info1 = idInfo new_bndr `setArityInfo` new_arity -- Unfolding info: Note [Setting the new unfolding] @@ -844,12 +847,13 @@ addLetBndrInfo new_bndr new_arity is_bot new_unf = info2 -- Bottoming bindings: see Note [Bottoming bindings] - info4 | is_bot = info3 - `setStrictnessInfo` - mkClosedStrictSig (replicate new_arity topDmd) botDiv - `setCprInfo` mkCprSig new_arity botCpr + info4 | is_bot = info3 `setStrictnessInfo` bot_sig + `setCprInfo` bot_cpr | otherwise = info3 + bot_sig = mkClosedStrictSig (replicate new_arity topDmd) botDiv + bot_cpr = mkCprSig new_arity botCpr + -- Zap call arity info. We have used it by now (via -- `tryEtaExpandRhs`), and the simplifier can invalidate this -- information, leading to broken code later (e.g. #13479) @@ -860,9 +864,9 @@ addLetBndrInfo new_bndr new_arity is_bot new_unf ~~~~~~~~~~~~~~~~~~~~~~~~ Generally speaking the arity of a binding should not decrease. But it *can* legitimately happen because of RULES. Eg - f = g Int + f = g @Int where g has arity 2, will have arity 2. But if there's a rewrite rule - g Int --> h + g @Int --> h where h has arity 1, then f's arity will decrease. Here's a real-life example, which is in the output of Specialise: @@ -892,7 +896,7 @@ Then we'd like to drop the dead <alts> immediately. So it's good to propagate the info that x's RHS is bottom to x's IdInfo as rapidly as possible. -We use tryEtaExpandRhs on every binding, and it turns ou that the +We use tryEtaExpandRhs on every binding, and it turns out that the arity computation it performs (via GHC.Core.Opt.Arity.findRhsArity) already does a simple bottoming-expression analysis. So all we need to do is propagate that info to the binder's IdInfo. @@ -1551,7 +1555,7 @@ simplLamBndr env bndr | isId bndr && hasCoreUnfolding old_unf -- Special case = do { (env1, bndr1) <- simplBinder env bndr ; unf' <- simplStableUnfolding env1 NotTopLevel Nothing bndr - (idType bndr1) (idArity bndr1) old_unf + (idType bndr1) (idArityType bndr1) old_unf ; let bndr2 = bndr1 `setIdUnfolding` unf' ; return (modifyInScope env1 bndr2, bndr2) } @@ -3736,7 +3740,7 @@ because we don't know its usage in each RHS separately simplLetUnfolding :: SimplEnv-> TopLevelFlag -> MaybeJoinCont -> InId - -> OutExpr -> OutType -> Arity + -> OutExpr -> OutType -> ArityType -> Unfolding -> SimplM Unfolding simplLetUnfolding env top_lvl cont_mb id new_rhs rhs_ty arity unf | isStableUnfolding unf @@ -3766,7 +3770,9 @@ mkLetUnfolding dflags top_lvl src id new_rhs simplStableUnfolding :: SimplEnv -> TopLevelFlag -> MaybeJoinCont -- Just k => a join point with continuation k -> InId - -> OutType -> Arity -> Unfolding + -> OutType + -> ArityType -- Used to eta expand, but only for non-join-points + -> Unfolding ->SimplM Unfolding -- Note [Setting the new unfolding] simplStableUnfolding env top_lvl mb_cont id rhs_ty id_arity unf @@ -3829,7 +3835,7 @@ simplStableUnfolding env top_lvl mb_cont id rhs_ty id_arity unf eta_expand expr | not eta_on = expr | exprIsTrivial expr = expr - | otherwise = etaExpand id_arity expr + | otherwise = etaExpandAT id_arity expr eta_on = sm_eta_expand (getMode env) {- Note [Eta-expand stable unfoldings] diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs index b4b0ad7062..e9ee16157f 100644 --- a/compiler/GHC/Core/Opt/Simplify/Utils.hs +++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs @@ -1479,9 +1479,9 @@ mkLam env bndrs body cont , sm_eta_expand (getMode env) , any isRuntimeVar bndrs , let body_arity = exprEtaExpandArity dflags body - , body_arity > 0 + , expandableArityType body_arity = do { tick (EtaExpansion (head bndrs)) - ; let res = mkLams bndrs (etaExpand body_arity body) + ; let res = mkLams bndrs (etaExpandAT body_arity body) ; traceSmpl "eta expand" (vcat [text "before" <+> ppr (mkLams bndrs body) , text "after" <+> ppr res]) ; return res } @@ -1551,7 +1551,7 @@ because the latter is not well-kinded. -} tryEtaExpandRhs :: SimplMode -> OutId -> OutExpr - -> SimplM (Arity, Bool, OutExpr) + -> SimplM (ArityType, OutExpr) -- See Note [Eta-expanding at let bindings] -- If tryEtaExpandRhs rhs = (n, is_bot, rhs') then -- (a) rhs' has manifest arity n @@ -1559,40 +1559,46 @@ tryEtaExpandRhs :: SimplMode -> OutId -> OutExpr tryEtaExpandRhs mode bndr rhs | Just join_arity <- isJoinId_maybe bndr = do { let (join_bndrs, join_body) = collectNBinders join_arity rhs - ; return (count isId join_bndrs, exprIsDeadEnd join_body, rhs) } + oss = [idOneShotInfo id | id <- join_bndrs, isId id] + arity_type | exprIsDeadEnd join_body = ABot (length oss) + | otherwise = ATop oss + ; return (arity_type, rhs) } -- Note [Do not eta-expand join points] -- But do return the correct arity and bottom-ness, because -- these are used to set the bndr's IdInfo (#15517) -- Note [Invariants on join points] invariant 2b, in GHC.Core + | sm_eta_expand mode -- Provided eta-expansion is on + , new_arity > old_arity -- And the current manifest arity isn't enough + , want_eta rhs + = do { tick (EtaExpansion bndr) + ; return (arity_type, etaExpandAT arity_type rhs) } + | otherwise - = do { (new_arity, is_bot, new_rhs) <- try_expand + = return (arity_type, rhs) - ; WARN( new_arity < old_id_arity, - (text "Arity decrease:" <+> (ppr bndr <+> ppr old_id_arity - <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) ) - -- Note [Arity decrease] in GHC.Core.Opt.Simplify - return (new_arity, is_bot, new_rhs) } where - try_expand - | exprIsTrivial rhs -- See Note [Do not eta-expand trivial expressions] - = return (exprArity rhs, False, rhs) - - | sm_eta_expand mode -- Provided eta-expansion is on - , new_arity > old_arity -- And the current manifest arity isn't enough - = do { tick (EtaExpansion bndr) - ; return (new_arity, is_bot, etaExpand new_arity rhs) } - - | otherwise - = return (old_arity, is_bot && new_arity == old_arity, rhs) - - dflags = sm_dflags mode - old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs] - old_id_arity = idArity bndr - - (new_arity1, is_bot) = findRhsArity dflags bndr rhs old_arity - new_arity2 = idCallArity bndr - new_arity = max new_arity1 new_arity2 + dflags = sm_dflags mode + old_arity = exprArity rhs + + arity_type = findRhsArity dflags bndr rhs old_arity + `maxWithArity` idCallArity bndr + new_arity = arityTypeArity arity_type + + -- See Note [Which RHSs do we eta-expand?] + want_eta (Cast e _) = want_eta e + want_eta (Tick _ e) = want_eta e + want_eta (Lam b e) | isTyVar b = want_eta e + want_eta (App e a) | exprIsTrivial a = want_eta e + want_eta (Var {}) = False + want_eta (Lit {}) = False + want_eta _ = True +{- + want_eta _ = case arity_type of + ATop (os:_) -> isOneShotInfo os + ATop [] -> False + ABot {} -> True +-} {- Note [Eta-expanding at let bindings] @@ -1619,14 +1625,53 @@ because then 'genMap' will inline, and it really shouldn't: at least as far as the programmer is concerned, it's not applied to two arguments! -Note [Do not eta-expand trivial expressions] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Do not eta-expand a trivial RHS like - f = g -If we eta expand do - f = \x. g x -we'll just eta-reduce again, and so on; so the -simplifier never terminates. +Note [Which RHSs do we eta-expand?] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We don't eta-expand: + +* Trivial RHSs, e.g. f = g + If we eta expand do + f = \x. g x + we'll just eta-reduce again, and so on; so the + simplifier never terminates. + +* PAPs: see Note [Do not eta-expand PAPs] + +What about things like this? + f = case y of p -> \x -> blah + +Here we do eta-expand. This is a change (Jun 20), but if we have +really decided that f has arity 1, then putting that lambda at the top +seems like a Good idea. + +Note [Do not eta-expand PAPs] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We used to have old_arity = manifestArity rhs, which meant that we +would eta-expand even PAPs. But this gives no particular advantage, +and can lead to a massive blow-up in code size, exhibited by #9020. +Suppose we have a PAP + foo :: IO () + foo = returnIO () +Then we can eta-expand do + foo = (\eta. (returnIO () |> sym g) eta) |> g +where + g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #) + +But there is really no point in doing this, and it generates masses of +coercions and whatnot that eventually disappear again. For T9020, GHC +allocated 6.6G before, and 0.8G afterwards; and residency dropped from +1.8G to 45M. + +Moreover, if we eta expand + f = g d ==> f = \x. g d x +that might in turn make g inline (if it has an inline pragma), which +we might not want. After all, INLINE pragmas say "inline only when +saturated" so we don't want to be too gung-ho about saturating! + +But note that this won't eta-expand, say + f = \g -> map g +Does it matter not eta-expanding such functions? I'm not sure. Perhaps +strictness analysis will have less to bite on? Note [Do not eta-expand join points] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1667,29 +1712,6 @@ CorePrep comes around, the code is very likely to look more like this: $j2 = if n > 0 then $j1 else (...) eta -Note [Do not eta-expand PAPs] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We used to have old_arity = manifestArity rhs, which meant that we -would eta-expand even PAPs. But this gives no particular advantage, -and can lead to a massive blow-up in code size, exhibited by #9020. -Suppose we have a PAP - foo :: IO () - foo = returnIO () -Then we can eta-expand do - foo = (\eta. (returnIO () |> sym g) eta) |> g -where - g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #) - -But there is really no point in doing this, and it generates masses of -coercions and whatnot that eventually disappear again. For T9020, GHC -allocated 6.6G before, and 0.8G afterwards; and residency dropped from -1.8G to 45M. - -But note that this won't eta-expand, say - f = \g -> map g -Does it matter not eta-expanding such functions? I'm not sure. Perhaps -strictness analysis will have less to bite on? - ************************************************************************ * * diff --git a/testsuite/tests/profiling/should_run/T5654-O1.prof.sample b/testsuite/tests/profiling/should_run/T5654-O1.prof.sample index 0e65631521..5da6ed89e7 100644 --- a/testsuite/tests/profiling/should_run/T5654-O1.prof.sample +++ b/testsuite/tests/profiling/should_run/T5654-O1.prof.sample @@ -1,27 +1,28 @@ - Thu Dec 8 11:34 2016 Time and Allocation Profiling Report (Final) + Thu Jul 9 17:12 2020 Time and Allocation Profiling Report (Final) - T5654-O1 +RTS -p -RTS + T5654-O1 +RTS -hc -p -RTS total time = 0.00 secs (0 ticks @ 1000 us, 1 processor) - total alloc = 39,064 bytes (excludes profiling overheads) + total alloc = 38,664 bytes (excludes profiling overheads) COST CENTRE MODULE SRC %time %alloc -MAIN MAIN <built-in> 0.0 1.9 -CAF GHC.IO.Handle.FD <entire-module> 0.0 88.6 -CAF GHC.IO.Encoding <entire-module> 0.0 7.1 -CAF GHC.Conc.Signal <entire-module> 0.0 1.6 +MAIN MAIN <built-in> 0.0 1.7 +CAF GHC.IO.Handle.FD <entire-module> 0.0 89.7 +CAF GHC.IO.Encoding <entire-module> 0.0 6.3 +CAF GHC.Conc.Signal <entire-module> 0.0 1.7 - individual inherited -COST CENTRE MODULE SRC no. entries %time %alloc %time %alloc + individual inherited +COST CENTRE MODULE SRC no. entries %time %alloc %time %alloc -MAIN MAIN <built-in> 104 0 0.0 1.9 0.0 100.0 - CAF Main <entire-module> 207 0 0.0 0.0 0.0 0.2 - main Main T5654-O1.hs:13:1-21 208 1 0.0 0.1 0.0 0.2 - f Main T5654-O1.hs:7:1-5 209 1 0.0 0.0 0.0 0.0 - g Main T5654-O1.hs:11:1-11 210 1 0.0 0.0 0.0 0.0 - CAF GHC.Conc.Signal <entire-module> 201 0 0.0 1.6 0.0 1.6 - CAF GHC.IO.Encoding <entire-module> 191 0 0.0 7.1 0.0 7.1 - CAF GHC.IO.Encoding.Iconv <entire-module> 189 0 0.0 0.6 0.0 0.6 - CAF GHC.IO.Handle.FD <entire-module> 181 0 0.0 88.6 0.0 88.6 +MAIN MAIN <built-in> 121 0 0.0 1.7 0.0 100.0 + CAF Main <entire-module> 241 0 0.0 0.0 0.0 0.1 + f Main T5654-O1.hs:7:1-5 243 1 0.0 0.0 0.0 0.0 + main Main T5654-O1.hs:13:1-21 242 1 0.0 0.1 0.0 0.1 + f Main T5654-O1.hs:7:1-5 244 0 0.0 0.0 0.0 0.0 + g Main T5654-O1.hs:11:1-11 245 1 0.0 0.0 0.0 0.0 + CAF GHC.Conc.Signal <entire-module> 236 0 0.0 1.7 0.0 1.7 + CAF GHC.IO.Encoding <entire-module> 227 0 0.0 6.3 0.0 6.3 + CAF GHC.IO.Encoding.Iconv <entire-module> 225 0 0.0 0.5 0.0 0.5 + CAF GHC.IO.Handle.FD <entire-module> 217 0 0.0 89.7 0.0 89.7 diff --git a/testsuite/tests/profiling/should_run/T5654b-O1.prof.sample b/testsuite/tests/profiling/should_run/T5654b-O1.prof.sample index 45ae0ba55c..2ff1e70bc7 100644 --- a/testsuite/tests/profiling/should_run/T5654b-O1.prof.sample +++ b/testsuite/tests/profiling/should_run/T5654b-O1.prof.sample @@ -1,28 +1,30 @@ - Fri Jun 3 11:00 2016 Time and Allocation Profiling Report (Final) + Thu Jul 9 17:12 2020 Time and Allocation Profiling Report (Final) T5654b-O1 +RTS -hc -p -RTS total time = 0.00 secs (0 ticks @ 1000 us, 1 processor) - total alloc = 38,880 bytes (excludes profiling overheads) + total alloc = 38,728 bytes (excludes profiling overheads) COST CENTRE MODULE SRC %time %alloc MAIN MAIN <built-in> 0.0 1.7 -CAF GHC.IO.Handle.FD <entire-module> 0.0 88.8 -CAF GHC.IO.Encoding <entire-module> 0.0 7.1 +CAF GHC.IO.Handle.FD <entire-module> 0.0 89.5 +CAF GHC.IO.Encoding <entire-module> 0.0 6.3 CAF GHC.Conc.Signal <entire-module> 0.0 1.7 - individual inherited -COST CENTRE MODULE SRC no. entries %time %alloc %time %alloc + individual inherited +COST CENTRE MODULE SRC no. entries %time %alloc %time %alloc -MAIN MAIN <built-in> 43 0 0.0 1.7 0.0 100.0 - CAF Main <entire-module> 85 0 0.0 0.0 0.0 0.1 - main Main T5654b-O1.hs:22:1-21 86 1 0.0 0.1 0.0 0.1 - f Main T5654b-O1.hs:12:1-7 87 1 0.0 0.0 0.0 0.0 - g Main T5654b-O1.hs:16:1-7 88 1 0.0 0.0 0.0 0.0 - h Main T5654b-O1.hs:20:1-19 89 1 0.0 0.0 0.0 0.0 - CAF GHC.Conc.Signal <entire-module> 79 0 0.0 1.7 0.0 1.7 - CAF GHC.IO.Encoding <entire-module> 74 0 0.0 7.1 0.0 7.1 - CAF GHC.IO.Handle.FD <entire-module> 72 0 0.0 88.8 0.0 88.8 - CAF GHC.IO.Encoding.Iconv <entire-module> 53 0 0.0 0.6 0.0 0.6 +MAIN MAIN <built-in> 121 0 0.0 1.7 0.0 100.0 + CAF Main <entire-module> 241 0 0.0 0.0 0.0 0.3 + f Main T5654b-O1.hs:12:1-7 243 1 0.0 0.1 0.0 0.1 + g Main T5654b-O1.hs:16:1-7 244 1 0.0 0.0 0.0 0.0 + main Main T5654b-O1.hs:22:1-21 242 1 0.0 0.1 0.0 0.1 + f Main T5654b-O1.hs:12:1-7 245 0 0.0 0.0 0.0 0.0 + g Main T5654b-O1.hs:16:1-7 246 0 0.0 0.0 0.0 0.0 + h Main T5654b-O1.hs:20:1-19 247 1 0.0 0.0 0.0 0.0 + CAF GHC.Conc.Signal <entire-module> 236 0 0.0 1.7 0.0 1.7 + CAF GHC.IO.Encoding <entire-module> 227 0 0.0 6.3 0.0 6.3 + CAF GHC.IO.Encoding.Iconv <entire-module> 225 0 0.0 0.5 0.0 0.5 + CAF GHC.IO.Handle.FD <entire-module> 217 0 0.0 89.5 0.0 89.5 diff --git a/testsuite/tests/profiling/should_run/ioprof.stderr b/testsuite/tests/profiling/should_run/ioprof.stderr index db9c36bbe3..0cb2b4b174 100644 --- a/testsuite/tests/profiling/should_run/ioprof.stderr +++ b/testsuite/tests/profiling/should_run/ioprof.stderr @@ -1,5 +1,5 @@ ioprof: a -CallStack (from ImplicitParams): +CallStack (from HasCallStack): error, called at ioprof.hs:23:22 in main:Main CallStack (from -prof): Main.errorM.\ (ioprof.hs:23:22-28) @@ -11,4 +11,3 @@ CallStack (from -prof): Main.bar (ioprof.hs:31:1-20) Main.runM (ioprof.hs:26:1-37) Main.main (ioprof.hs:28:1-43) - Main.CAF (<entire-module>) diff --git a/testsuite/tests/simplCore/should_compile/T18355.hs b/testsuite/tests/simplCore/should_compile/T18355.hs new file mode 100644 index 0000000000..207c2087d6 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T18355.hs @@ -0,0 +1,9 @@ +module T18355 where + +import GHC.Exts + +-- I expect the simplified Core to have an eta-expaned +-- defn of f, with a OneShot on the final lambda-binder +f x b = case b of + True -> oneShot (\y -> x+y) + False -> \y -> x-y diff --git a/testsuite/tests/simplCore/should_compile/T18355.stderr b/testsuite/tests/simplCore/should_compile/T18355.stderr new file mode 100644 index 0000000000..50efeca4b1 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T18355.stderr @@ -0,0 +1,70 @@ + +==================== Tidy Core ==================== +Result size of Tidy Core + = {terms: 32, types: 23, coercions: 0, joins: 0/0} + +-- RHS size: {terms: 17, types: 10, coercions: 0, joins: 0/0} +f :: forall {a}. Num a => a -> Bool -> a -> a +[GblId, + Arity=4, + Str=<S,1*U(1*C1(C1(U)),1*C1(C1(U)),A,A,A,A,A)><L,U><S,1*U><L,U>, + Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, + Guidance=ALWAYS_IF(arity=4,unsat_ok=True,boring_ok=False) + Tmpl= \ (@a) + ($dNum [Occ=Once*] :: Num a) + (x [Occ=Once*] :: a) + (b [Occ=Once!] :: Bool) + (eta [Occ=Once*, OS=OneShot] :: a) -> + case b of { + False -> - @a $dNum x eta; + True -> + @a $dNum x eta + }}] +f = \ (@a) + ($dNum :: Num a) + (x :: a) + (b :: Bool) + (eta [OS=OneShot] :: a) -> + case b of { + False -> - @a $dNum x eta; + True -> + @a $dNum x eta + } + +-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} +T18355.$trModule4 :: Addr# +[GblId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 20 0}] +T18355.$trModule4 = "main"# + +-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0} +T18355.$trModule3 :: GHC.Types.TrName +[GblId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 20}] +T18355.$trModule3 = GHC.Types.TrNameS T18355.$trModule4 + +-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} +T18355.$trModule2 :: Addr# +[GblId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 30 0}] +T18355.$trModule2 = "T18355"# + +-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0} +T18355.$trModule1 :: GHC.Types.TrName +[GblId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 20}] +T18355.$trModule1 = GHC.Types.TrNameS T18355.$trModule2 + +-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0} +T18355.$trModule :: GHC.Types.Module +[GblId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 30}] +T18355.$trModule + = GHC.Types.Module T18355.$trModule3 T18355.$trModule1 + + + diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T index 93f7fc155a..0abd79858b 100644 --- a/testsuite/tests/simplCore/should_compile/all.T +++ b/testsuite/tests/simplCore/should_compile/all.T @@ -330,4 +330,5 @@ test('T17673', [ only_ways(['optasm']), grep_errmsg(r'^\w+\.\$wf') ], compile, [ test('T18078', [ only_ways(['optasm']), grep_errmsg(r'^\w+\.\$wf') ], compile, ['-ddump-simpl -dsuppress-uniques -dppr-cols=9999']) test('T18328', [ only_ways(['optasm']), grep_errmsg(r'Arity=') ], compile, ['-ddump-simpl -dsuppress-uniques']) test('T18347', normal, compile, ['-dcore-lint -O']) +test('T18355', [ grep_errmsg(r'OneShot') ], compile, ['-O -ddump-simpl -dsuppress-uniques']) test('T18399', normal, compile, ['-dcore-lint -O']) |