diff options
Diffstat (limited to 'compiler/GHC/Core/Opt/CSE.hs')
-rw-r--r-- | compiler/GHC/Core/Opt/CSE.hs | 85 |
1 files changed, 69 insertions, 16 deletions
diff --git a/compiler/GHC/Core/Opt/CSE.hs b/compiler/GHC/Core/Opt/CSE.hs index 45e26acc4b..9e3010ca47 100644 --- a/compiler/GHC/Core/Opt/CSE.hs +++ b/compiler/GHC/Core/Opt/CSE.hs @@ -18,7 +18,7 @@ import GHC.Prelude import GHC.Core.Subst import GHC.Types.Var ( Var ) import GHC.Types.Var.Env ( mkInScopeSet ) -import GHC.Types.Id ( Id, idType, idHasRules +import GHC.Types.Id ( Id, idType, idHasRules, zapStableUnfolding , idInlineActivation, setInlineActivation , zapIdOccInfo, zapIdUsageInfo, idInlinePragma , isJoinId, isJoinId_maybe ) @@ -236,8 +236,8 @@ because we promised to inline foo as what the user wrote. See similar Note Nor do we want to change the reverse mapping. Suppose we have - {-# Unf = Stable (\pq. build blah) #-} - foo = <expr> + foo {-# Unf = Stable (\pq. build blah) #-} + = <expr> bar = <expr> There could conceivably be merit in rewriting the RHS of bar: @@ -250,6 +250,23 @@ that a function's definition is so small that it should always inline. In this case we still want to do CSE (#13340). Hence the use of isAnyInlinePragma rather than isStableUnfolding. +Now consider + foo = <expr> + bar {-# Unf = Stable ... #-} + = <expr> + +where the unfolding was added by strictness analysis, say. Then +CSE goes ahead, so we get + bar = foo +and probably use SUBSTITUTE that will make 'bar' dead. But just +possibly not -- see Note [Dealing with ticks]. In that case we might +be left with + bar = tick t1 (tick t2 foo) +in which case we would really like to get rid of the stable unfolding +(generated by the strictness analyser, say). Hence the zapStableUnfolding +in cse_bind. Not a big deal, and only makes a difference when ticks +get into the picture. + Note [Corner case for case expressions] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Here is another reason that we do not use SUBSTITUTE for @@ -389,9 +406,10 @@ cse_bind toplevel env (in_id, in_rhs) out_id | otherwise = (env', (out_id'', out_rhs)) where - (env', out_id') = addBinding env in_id out_id out_rhs - (cse_done, out_rhs) = try_for_cse env in_rhs - out_id'' | cse_done = delayInlining toplevel out_id' + (env', out_id') = addBinding env in_id out_id out_rhs cse_done + (cse_done, out_rhs) = try_for_cse env in_rhs + out_id'' | cse_done = zapStableUnfolding $ + delayInlining toplevel out_id' | otherwise = out_id' delayInlining :: TopLevelFlag -> Id -> Id @@ -409,19 +427,23 @@ delayInlining top_lvl bndr | otherwise = bndr -addBinding :: CSEnv -- Includes InId->OutId cloning - -> InVar -- Could be a let-bound type - -> OutId -> OutExpr -- Processed binding - -> (CSEnv, OutId) -- Final env, final bndr +addBinding :: CSEnv -- Includes InId->OutId cloning + -> InVar -- Could be a let-bound type + -> OutId -> OutExpr -- Processed binding + -> Bool -- True <=> RHS was CSE'd and is a variable + -- or maybe (Tick t variable) + -> (CSEnv, OutId) -- Final env, final bndr -- Extend the CSE env with a mapping [rhs -> out-id] -- unless we can instead just substitute [in-id -> rhs] -- -- It's possible for the binder to be a type variable (see -- Note [Type-let] in GHC.Core), in which case we can just substitute. -addBinding env in_id out_id rhs' +addBinding env in_id out_id rhs' cse_done | not (isId in_id) = (extendCSSubst env in_id rhs', out_id) | noCSE in_id = (env, out_id) | use_subst = (extendCSSubst env in_id rhs', out_id) + | cse_done = (env, out_id) + -- See Note [Dealing with ticks] | otherwise = (extendCSEnv env rhs' id_expr', zapped_id) where id_expr' = varToCoreExpr out_id @@ -438,9 +460,8 @@ addBinding env in_id out_id rhs' -- Should we use SUBSTITUTE or EXTEND? -- See Note [CSE for bindings] - use_subst = case rhs' of - Var {} -> True - _ -> False + use_subst | Var {} <- rhs' = True + | otherwise = False -- | Given a binder `let x = e`, this function -- determines whether we should add `e -> x` to the cs_map @@ -487,6 +508,38 @@ The net effect is that for the y-binding we want to This is done by cse_bind. I got it wrong the first time (#13367). +Note [Dealing with ticks] +~~~~~~~~~~~~~~~~~~~~~~~~~ +Ticks complicate CSE a bit, as I discovered in the fallout from +fixing #19360. + +* To get more CSE-ing, we strip all the tickishFloatable ticks from + an expression + - when inserting into the cs_map (see extendCSEnv) + - when looking up in the cs_map (see call to lookupCSEnv in try_for_cse) + Quite why only the tickishFloatble ticks, I'm not quite sure. + +* If we get a hit in cs_map, we wrap the result in the ticks from the + thing we are looking up (see try_for_cse) + +Net result: if we get a hit, we might replace + let x = tick t1 (tick t2 e) +with + let x = tick t1 (tick t2 y) +where 'y' is the variable that 'e' maps to. Now consider addBinding for +the binding for 'x': + +* We can't use SUBSTITUTE because those ticks might not be trivial (we + use tickishIsCode in exprIsTrivial) + +* We should not use EXTEND, because we definitely don't want to + add (tick t1 (tick t2 y)) :-> x + to the cs_map. Remember we strip off the ticks, so that would amount + to adding y :-> x, very silly. + +TL;DR: we do neither; hence the cse_done case in addBinding. + + Note [Delay inlining after CSE] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Suppose (#15445) we have @@ -603,14 +656,14 @@ cseCase env scrut bndr ty alts combineAlts alt_env (map cse_alt alts) where ty' = substTy (csEnvSubst env) ty - scrut1 = tryForCSE env scrut + (cse_done, scrut1) = try_for_cse env scrut bndr1 = zapIdOccInfo bndr -- Zapping the OccInfo is needed because the extendCSEnv -- in cse_alt may mean that a dead case binder -- becomes alive, and Lint rejects that (env1, bndr2) = addBinder env bndr1 - (alt_env, bndr3) = addBinding env1 bndr bndr2 scrut1 + (alt_env, bndr3) = addBinding env1 bndr bndr2 scrut1 cse_done -- addBinding: see Note [CSE for case expressions] con_target :: OutExpr |