summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Peyton Jones <simonpj@microsoft.com>2021-02-19 11:03:15 +0000
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-03-01 17:31:36 -0500
commit6429943b0a377e076bcfd26c79ceb27cf2f4a9ab (patch)
tree0ea9b1e0ef0bb3cc0fa344f064034959f5fca4d8
parent3b79e8b833646e995f035e4402f2284cc15cbd72 (diff)
downloadhaskell-6429943b0a377e076bcfd26c79ceb27cf2f4a9ab.tar.gz
Fix terrible occurrence-analysis bug
Ticket #19360 showed up a terrible bug in the occurrence analyser, in a situation like this Rec { f = g ; g = ..f... {-# RULE g .. = ...f... #-} } Then f was postInlineUnconditionally, but not in the RULE (which is simplified first), so we had a RULE mentioning a variable that was not in scope. This led me to review (again) the subtle loop-breaker stuff in the occurrence analyser. The actual changes are few, and are largely simplifications. I did a /lot/ of comment re-organising though. There was an unexpected amount of fallout. * Validation failed when compiling the stage2 compiler with profiling on. That turned to tickle a second latent bug in the same OccAnal code (at least I think it was always there), which led me to simplify still further; see Note [inl_fvs] in GHC.Core.Opt.OccurAnal. * But that in turn let me to some strange behaviour in CSE when ticks are in the picture, which I duly fixed. See Note [Dealing with ticks] in GHC.Core.Opt.CSE. * Then I got an ASSERT failure in CoreToStg, which again seems to be a latent bug. See Note [Ticks in applications] in GHC.CoreToStg * I also made one unforced change: I now simplify the RHS of a RULE in the same way as the RHS of a stable unfolding. This can allow a trivial binding to disappear sooner than otherwise, and I don't think it has any downsides. The change is in GHC.Core.Opt.Simplify.simplRules.
-rw-r--r--compiler/GHC/Core/Opt/CSE.hs85
-rw-r--r--compiler/GHC/Core/Opt/OccurAnal.hs407
-rw-r--r--compiler/GHC/Core/Opt/Simplify.hs20
-rw-r--r--compiler/GHC/Core/Opt/Simplify/Utils.hs8
-rw-r--r--compiler/GHC/CoreToStg.hs27
-rw-r--r--testsuite/tests/numeric/should_compile/T7116.stdout14
-rw-r--r--testsuite/tests/simplCore/should_compile/T19360.hs29
-rw-r--r--testsuite/tests/simplCore/should_compile/T8331.stderr7
-rw-r--r--testsuite/tests/simplCore/should_compile/all.T1
9 files changed, 373 insertions, 225 deletions
diff --git a/compiler/GHC/Core/Opt/CSE.hs b/compiler/GHC/Core/Opt/CSE.hs
index 45e26acc4b..9e3010ca47 100644
--- a/compiler/GHC/Core/Opt/CSE.hs
+++ b/compiler/GHC/Core/Opt/CSE.hs
@@ -18,7 +18,7 @@ import GHC.Prelude
import GHC.Core.Subst
import GHC.Types.Var ( Var )
import GHC.Types.Var.Env ( mkInScopeSet )
-import GHC.Types.Id ( Id, idType, idHasRules
+import GHC.Types.Id ( Id, idType, idHasRules, zapStableUnfolding
, idInlineActivation, setInlineActivation
, zapIdOccInfo, zapIdUsageInfo, idInlinePragma
, isJoinId, isJoinId_maybe )
@@ -236,8 +236,8 @@ because we promised to inline foo as what the user wrote. See similar Note
Nor do we want to change the reverse mapping. Suppose we have
- {-# Unf = Stable (\pq. build blah) #-}
- foo = <expr>
+ foo {-# Unf = Stable (\pq. build blah) #-}
+ = <expr>
bar = <expr>
There could conceivably be merit in rewriting the RHS of bar:
@@ -250,6 +250,23 @@ that a function's definition is so small that it should always inline.
In this case we still want to do CSE (#13340). Hence the use of
isAnyInlinePragma rather than isStableUnfolding.
+Now consider
+ foo = <expr>
+ bar {-# Unf = Stable ... #-}
+ = <expr>
+
+where the unfolding was added by strictness analysis, say. Then
+CSE goes ahead, so we get
+ bar = foo
+and probably use SUBSTITUTE that will make 'bar' dead. But just
+possibly not -- see Note [Dealing with ticks]. In that case we might
+be left with
+ bar = tick t1 (tick t2 foo)
+in which case we would really like to get rid of the stable unfolding
+(generated by the strictness analyser, say). Hence the zapStableUnfolding
+in cse_bind. Not a big deal, and only makes a difference when ticks
+get into the picture.
+
Note [Corner case for case expressions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Here is another reason that we do not use SUBSTITUTE for
@@ -389,9 +406,10 @@ cse_bind toplevel env (in_id, in_rhs) out_id
| otherwise
= (env', (out_id'', out_rhs))
where
- (env', out_id') = addBinding env in_id out_id out_rhs
- (cse_done, out_rhs) = try_for_cse env in_rhs
- out_id'' | cse_done = delayInlining toplevel out_id'
+ (env', out_id') = addBinding env in_id out_id out_rhs cse_done
+ (cse_done, out_rhs) = try_for_cse env in_rhs
+ out_id'' | cse_done = zapStableUnfolding $
+ delayInlining toplevel out_id'
| otherwise = out_id'
delayInlining :: TopLevelFlag -> Id -> Id
@@ -409,19 +427,23 @@ delayInlining top_lvl bndr
| otherwise
= bndr
-addBinding :: CSEnv -- Includes InId->OutId cloning
- -> InVar -- Could be a let-bound type
- -> OutId -> OutExpr -- Processed binding
- -> (CSEnv, OutId) -- Final env, final bndr
+addBinding :: CSEnv -- Includes InId->OutId cloning
+ -> InVar -- Could be a let-bound type
+ -> OutId -> OutExpr -- Processed binding
+ -> Bool -- True <=> RHS was CSE'd and is a variable
+ -- or maybe (Tick t variable)
+ -> (CSEnv, OutId) -- Final env, final bndr
-- Extend the CSE env with a mapping [rhs -> out-id]
-- unless we can instead just substitute [in-id -> rhs]
--
-- It's possible for the binder to be a type variable (see
-- Note [Type-let] in GHC.Core), in which case we can just substitute.
-addBinding env in_id out_id rhs'
+addBinding env in_id out_id rhs' cse_done
| not (isId in_id) = (extendCSSubst env in_id rhs', out_id)
| noCSE in_id = (env, out_id)
| use_subst = (extendCSSubst env in_id rhs', out_id)
+ | cse_done = (env, out_id)
+ -- See Note [Dealing with ticks]
| otherwise = (extendCSEnv env rhs' id_expr', zapped_id)
where
id_expr' = varToCoreExpr out_id
@@ -438,9 +460,8 @@ addBinding env in_id out_id rhs'
-- Should we use SUBSTITUTE or EXTEND?
-- See Note [CSE for bindings]
- use_subst = case rhs' of
- Var {} -> True
- _ -> False
+ use_subst | Var {} <- rhs' = True
+ | otherwise = False
-- | Given a binder `let x = e`, this function
-- determines whether we should add `e -> x` to the cs_map
@@ -487,6 +508,38 @@ The net effect is that for the y-binding we want to
This is done by cse_bind. I got it wrong the first time (#13367).
+Note [Dealing with ticks]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+Ticks complicate CSE a bit, as I discovered in the fallout from
+fixing #19360.
+
+* To get more CSE-ing, we strip all the tickishFloatable ticks from
+ an expression
+ - when inserting into the cs_map (see extendCSEnv)
+ - when looking up in the cs_map (see call to lookupCSEnv in try_for_cse)
+ Quite why only the tickishFloatble ticks, I'm not quite sure.
+
+* If we get a hit in cs_map, we wrap the result in the ticks from the
+ thing we are looking up (see try_for_cse)
+
+Net result: if we get a hit, we might replace
+ let x = tick t1 (tick t2 e)
+with
+ let x = tick t1 (tick t2 y)
+where 'y' is the variable that 'e' maps to. Now consider addBinding for
+the binding for 'x':
+
+* We can't use SUBSTITUTE because those ticks might not be trivial (we
+ use tickishIsCode in exprIsTrivial)
+
+* We should not use EXTEND, because we definitely don't want to
+ add (tick t1 (tick t2 y)) :-> x
+ to the cs_map. Remember we strip off the ticks, so that would amount
+ to adding y :-> x, very silly.
+
+TL;DR: we do neither; hence the cse_done case in addBinding.
+
+
Note [Delay inlining after CSE]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Suppose (#15445) we have
@@ -603,14 +656,14 @@ cseCase env scrut bndr ty alts
combineAlts alt_env (map cse_alt alts)
where
ty' = substTy (csEnvSubst env) ty
- scrut1 = tryForCSE env scrut
+ (cse_done, scrut1) = try_for_cse env scrut
bndr1 = zapIdOccInfo bndr
-- Zapping the OccInfo is needed because the extendCSEnv
-- in cse_alt may mean that a dead case binder
-- becomes alive, and Lint rejects that
(env1, bndr2) = addBinder env bndr1
- (alt_env, bndr3) = addBinding env1 bndr bndr2 scrut1
+ (alt_env, bndr3) = addBinding env1 bndr bndr2 scrut1 cse_done
-- addBinding: see Note [CSE for case expressions]
con_target :: OutExpr
diff --git a/compiler/GHC/Core/Opt/OccurAnal.hs b/compiler/GHC/Core/Opt/OccurAnal.hs
index d4d617bf6f..3f31ae258b 100644
--- a/compiler/GHC/Core/Opt/OccurAnal.hs
+++ b/compiler/GHC/Core/Opt/OccurAnal.hs
@@ -369,6 +369,14 @@ Solution:
Note [Recursive bindings: the grand plan]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Loop breaking is surprisingly subtle. First read the section 4 of
+"Secrets of the GHC inliner". This describes our basic plan. We
+avoid infinite inlinings by choosing loop breakers, and ensuring that
+a loop breaker cuts each loop.
+
+See also Note [Inlining and hs-boot files] in GHC.Core.ToIface, which
+deals with a closely related source of infinite loops.
+
When we come across a binding group
Rec { x1 = r1; ...; xn = rn }
we treat it like this (occAnalRecBind):
@@ -379,9 +387,12 @@ we treat it like this (occAnalRecBind):
Wrap the details in a LetrecNode, ready for SCC analysis.
All this is done by makeNode.
+ The edges of this graph are the "scope edges".
+
2. Do SCC-analysis on these Nodes:
- Each CyclicSCC will become a new Rec
- Each AcyclicSCC will become a new NonRec
+
The key property is that every free variable of a binding is
accounted for by the scope edges, so that when we are done
everything is still in scope.
@@ -392,22 +403,11 @@ we treat it like this (occAnalRecBind):
identify suitable loop-breakers to ensure that inlining terminates.
This is done by occAnalRec.
- 4a To do so we form a new set of Nodes, with the same details, but
- different edges, the "loop-breaker nodes". The loop-breaker nodes
- have both more and fewer dependencies than the scope edges
- (see Note [Choosing loop breakers])
-
- More edges: if f calls g, and g has an active rule that mentions h
- then we add an edge from f -> h
+ To do so, form the loop-breaker graph, do SCC analysis. For each
+ CyclicSCC we choose a loop breaker, delete all edges to that node,
+ re-analyse the SCC, and iterate. See Note [Choosing loop breakers]
+ for the details
- Fewer edges: we only include dependencies on active rules, on rule
- RHSs (not LHSs) and if there is an INLINE pragma only
- on the stable unfolding (and vice versa). The scope
- edges must be much more inclusive.
-
- 4b. The "weak fvs" of a node are, by definition:
- the scope fvs - the loop-breaker fvs
- See Note [Weak loop breakers], and the nd_weak field of Details
Note [Dead code]
~~~~~~~~~~~~~~~~
@@ -432,7 +432,6 @@ when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
'AcyclicSCC f', where 'body_usage' won't contain 'f'.
-------------------------------------------------------------
Note [Forming Rec groups]
~~~~~~~~~~~~~~~~~~~~~~~~~
The key point about the "Forming Rec groups" step is that it /preserves
@@ -498,10 +497,9 @@ of the file.
at any of the definitions. This is done by Simplify.simplRecBind,
when it calls addLetIdInfo.
-------------------------------------------------------------
-Note [Inline rules]
-~~~~~~~~~~~~~~~~~~~
-None of the above stuff about RULES applies to Inline Rules,
+Note [Stable unfoldings]
+~~~~~~~~~~~~~~~~~~~~~~~~
+None of the above stuff about RULES applies to a stable unfolding
stored in a CoreUnfolding. The unfolding, if any, is simplified
at the same time as the regular RHS of the function (ie *not* like
Note [Rules are visible in their own rec group]), so it should be
@@ -575,6 +573,8 @@ But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite
- now there's another opportunity to apply the RULE
This showed up when compiling Control.Concurrent.Chan.getChanContents.
+Hence the transitive rule_fv_env stuff described in
+Note [Rules and loop breakers].
------------------------------------------------------------
Note [Finding join points]
@@ -840,13 +840,12 @@ occAnalRec env lvl (CyclicSCC details_s) (body_uds, binds)
= (body_uds, binds) -- See Note [Dead code]
| otherwise -- At this point we always build a single Rec
- = -- pprTrace "occAnalRec" (vcat
- -- [ text "weak_fvs" <+> ppr weak_fvs
- -- , text "lb nodes" <+> ppr loop_breaker_nodes])
+ = -- pprTrace "occAnalRec" (ppr loop_breaker_nodes)
(final_uds, Rec pairs : binds)
where
- bndrs = map nd_bndr details_s
+ bndrs = map nd_bndr details_s
+ all_simple = all nd_simple details_s
------------------------------
-- Make the nodes for the loop-breaker analysis
@@ -856,20 +855,19 @@ occAnalRec env lvl (CyclicSCC details_s) (body_uds, binds)
(final_uds, loop_breaker_nodes) = mkLoopBreakerNodes env lvl body_uds details_s
------------------------------
- weak_fvs :: VarSet
- weak_fvs = mapUnionVarSet nd_weak details_s
+ active_rule_fvs :: VarSet
+ active_rule_fvs = mapUnionVarSet nd_active_rule_fvs details_s
---------------------------
-- Now reconstruct the cycle
pairs :: [(Id,CoreExpr)]
- pairs | isEmptyVarSet weak_fvs = reOrderNodes 0 weak_fvs loop_breaker_nodes []
- | otherwise = loopBreakNodes 0 weak_fvs loop_breaker_nodes []
- -- If weak_fvs is empty, the loop_breaker_nodes will include
- -- all the edges in the original scope edges [remember,
- -- weak_fvs is the difference between scope edges and
- -- lb-edges], so a fresh SCC computation would yield a
- -- single CyclicSCC result; and reOrderNodes deals with
- -- exactly that case
+ pairs | all_simple = reOrderNodes 0 active_rule_fvs loop_breaker_nodes []
+ | otherwise = loopBreakNodes 0 active_rule_fvs loop_breaker_nodes []
+ -- In the common case when all are "simple" (no rules at all)
+ -- the loop_breaker_nodes will include all the scope edges
+ -- so a SCC computation would yield a single CyclicSCC result;
+ -- and reOrderNodes deals with exactly that case.
+ -- Saves a SCC analysis in a common case
{- *********************************************************************
@@ -880,149 +878,183 @@ occAnalRec env lvl (CyclicSCC details_s) (body_uds, binds)
{- Note [Choosing loop breakers]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Loop breaking is surprisingly subtle. First read the section 4 of
-"Secrets of the GHC inliner". This describes our basic plan.
-We avoid infinite inlinings by choosing loop breakers, and
-ensuring that a loop breaker cuts each loop.
+In Step 4 in Note [Recursive bindings: the grand plan]), occAnalRec does
+loop-breaking on each CyclicSCC of the original program:
-See also Note [Inlining and hs-boot files] in GHC.Core.ToIface, which
-deals with a closely related source of infinite loops.
+* mkLoopBreakerNodes: Form the loop-breaker graph for that CyclicSCC
+
+* loopBreakNodes: Do SCC analysis on it
+
+* reOrderNodes: For each CyclicSCC, pick a loop breaker
+ * Delete edges to that loop breaker
+ * Do another SCC analysis on that reduced SCC
+ * Repeat
+
+To form the loop-breaker graph, we construct a new set of Nodes, the
+"loop-breaker nodes", with the same details but different edges, the
+"loop-breaker edges". The loop-breaker nodes have both more and fewer
+dependencies than the scope edges:
+
+ More edges:
+ If f calls g, and g has an active rule that mentions h then
+ we add an edge from f -> h. See Note [Rules and loop breakers].
+
+ Fewer edges: we only include dependencies
+ * only on /active/ rules,
+ * on rule /RHSs/ (not LHSs)
+
+The scope edges, by contrast, must be much more inclusive.
+
+The nd_simple flag tracks the common case when a binding has no RULES
+at all, in which case the loop-breaker edges will be identical to the
+scope edges.
+
+Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
+chosen as a loop breaker, because their RHSs don't mention each other.
+And indeed both can be inlined safely.
+
+Note [inl_fvs]
+~~~~~~~~~~~~~~
+Note that the loop-breaker graph includes edges for occurrences in
+/both/ the RHS /and/ the stable unfolding. Consider this, which actually
+occurred when compiling BooleanFormula.hs in GHC:
+
+ Rec { lvl1 = go
+ ; lvl2[StableUnf = go] = lvl1
+ ; go = ...go...lvl2... }
+
+From the point of view of infinite inlining, we need only these edges:
+ lvl1 :-> go
+ lvl2 :-> go -- The RHS lvl1 will never be used for inlining
+ go :-> go, lvl2
+
+But the danger is that, lacking any edge to lvl1, we'll put it at the
+end thus
+ Rec { lvl2[ StableUnf = go] = lvl1
+ ; go[LoopBreaker] = ...go...lvl2... }
+ ; lvl1[Occ=Once] = go }
+
+And now the Simplifer will try to use PreInlineUnconditionally on lvl1
+(which occurs just once), but because it is last we won't actually
+substitute in lvl2. Sigh.
+
+To avoid this possiblity, we include edges from lvl2 to /both/ its
+stable unfolding /and/ its RHS. Hence the defn of inl_fvs in
+makeNode. Maybe we could be more clever, but it's very much a corner
+case.
+
+Note [Weak loop breakers]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+There is a last nasty wrinkle. Suppose we have
+
+ Rec { f = f_rhs
+ RULE f [] = g
+
+ h = h_rhs
+ g = h
+ ...more...
+ }
+
+Remember that we simplify the RULES before any RHS (see Note
+[Rules are visible in their own rec group] above).
+
+So we must *not* postInlineUnconditionally 'g', even though
+its RHS turns out to be trivial. (I'm assuming that 'g' is
+not chosen as a loop breaker.) Why not? Because then we
+drop the binding for 'g', which leaves it out of scope in the
+RULE!
+
+Here's a somewhat different example of the same thing
+ Rec { q = r
+ ; r = ...p...
+ ; p = p_rhs
+ RULE p [] = q }
+Here the RULE is "below" q, but we *still* can't postInlineUnconditionally
+q, because the RULE for p is active throughout. So the RHS of r
+might rewrite to r = ...q...
+So q must remain in scope in the output program!
+
+We "solve" this by:
+
+ Make q a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
+ iff q is a mentioned in the RHS of an active RULE in the Rec group
-Fundamentally, we do SCC analysis on a graph. For each recursive
-group we choose a loop breaker, delete all edges to that node,
-re-analyse the SCC, and iterate.
+A normal "strong" loop breaker has IAmLoopBreaker False. So:
-But what is the graph? NOT the same graph as was used for Note
-[Forming Rec groups]! In particular, a RULE is like an equation for
+ Inline postInlineUnconditionally
+strong IAmLoopBreaker False no no
+weak IAmLoopBreaker True yes no
+ other yes yes
+
+The **sole** reason for this kind of loop breaker is so that
+postInlineUnconditionally does not fire. Ugh.
+
+Annoyingly, since we simplify the rules *first* we'll never inline
+q into p's RULE. That trivial binding for q will hang around until
+we discard the rule. Yuk. But it's rare.
+
+ Note [Rules and loop breakers]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When we form the loop-breaker graph (Step 4 in Note [Recursive
+bindings: the grand plan]), we must be careful about RULEs.
+
+For a start, we want a loop breaker to cut every cycle, so inactive
+rules play no part; we need only consider /active/ rules.
+See Note [Finding rule RHS free vars]
+
+The second point is more subtle. A RULE is like an equation for
'f' that is *always* inlined if it is applicable. We do *not* disable
rules for loop-breakers. It's up to whoever makes the rules to make
sure that the rules themselves always terminate. See Note [Rules for
recursive functions] in GHC.Core.Opt.Simplify
Hence, if
- f's RHS (or its INLINE template if it has one) mentions g, and
+ f's RHS (or its stable unfolding if it has one) mentions g, and
g has a RULE that mentions h, and
h has a RULE that mentions f
then we *must* choose f to be a loop breaker. Example: see Note
-[Specialisation rules].
-
-In general, take the free variables of f's RHS, and augment it with
-all the variables reachable by RULES from those starting points. That
-is the whole reason for computing rule_fv_env in occAnalBind. (Of
-course we only consider free vars that are also binders in this Rec
-group.) See also Note [Finding rule RHS free vars]
+[Specialisation rules]. So out plan is this:
-Note that when we compute this rule_fv_env, we only consider variables
-free in the *RHS* of the rule, in contrast to the way we build the
-Rec group in the first place (Note [Rule dependency info])
+ Take the free variables of f's RHS, and augment it with all the
+ variables reachable by a transitive sequence RULES from those
+ starting points.
-Note that if 'g' has RHS that mentions 'w', we should add w to
-g's loop-breaker edges. More concretely there is an edge from f -> g
-iff
- (a) g is mentioned in f's RHS `xor` f's INLINE rhs
- (see Note [Inline rules])
- (b) or h is mentioned in f's RHS, and
- g appears in the RHS of an active RULE of h
- or a /transitive sequence/ of /active rules/ starting with h
+That is the whole reason for computing rule_fv_env in mkLoopBreakerNodes.
+Wrinkles:
-Why "active rules"? See Note [Finding rule RHS free vars]
+* We only consider /active/ rules. See Note [Finding rule RHS free vars]
-Why "transitive sequence"? Because active rules apply
-unconditionallly, without checking loop-breaker-ness.
-See Note [Loop breaker dependencies].
-
-Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
-chosen as a loop breaker, because their RHSs don't mention each other.
-And indeed both can be inlined safely.
+* We need only consider free vars that are also binders in this Rec
+ group. See also Note [Finding rule RHS free vars]
-Note again that the edges of the graph we use for computing loop breakers
-are not the same as the edges we use for computing the Rec blocks.
-That's why we use
-
-- makeNode for the Rec block analysis
-- makeLoopBreakerNodes for the loop breaker analysis
-
- * Note [Finding rule RHS free vars]
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Consider this real example from Data Parallel Haskell
- tagZero :: Array Int -> Array Tag
- {-# INLINE [1] tagZeroes #-}
- tagZero xs = pmap (\x -> fromBool (x==0)) xs
-
- {-# RULES "tagZero" [~1] forall xs n.
- pmap fromBool <blah blah> = tagZero xs #-}
- So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
- However, tagZero can only be inlined in phase 1 and later, while
- the RULE is only active *before* phase 1. So there's no problem.
-
- To make this work, we look for the RHS free vars only for
- *active* rules. That's the reason for the occ_rule_act field
- of the OccEnv.
-
- * Note [Weak loop breakers]
- ~~~~~~~~~~~~~~~~~~~~~~~~~
- There is a last nasty wrinkle. Suppose we have
-
- Rec { f = f_rhs
- RULE f [] = g
-
- h = h_rhs
- g = h
- ...more...
- }
-
- Remember that we simplify the RULES before any RHS (see Note
- [Rules are visible in their own rec group] above).
-
- So we must *not* postInlineUnconditionally 'g', even though
- its RHS turns out to be trivial. (I'm assuming that 'g' is
- not chosen as a loop breaker.) Why not? Because then we
- drop the binding for 'g', which leaves it out of scope in the
- RULE!
-
- Here's a somewhat different example of the same thing
- Rec { g = h
- ; h = ...f...
- ; f = f_rhs
- RULE f [] = g }
- Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
- g, because the RULE for f is active throughout. So the RHS of h
- might rewrite to h = ...g...
- So g must remain in scope in the output program!
-
- We "solve" this by:
-
- Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
- iff g is a "missing free variable" of the Rec group
-
- A "missing free variable" x is one that is mentioned in an RHS or
- INLINE or RULE of a binding in the Rec group, but where the
- dependency on x may not show up in the loop_breaker_nodes (see
- note [Choosing loop breakers} above).
-
- A normal "strong" loop breaker has IAmLoopBreaker False. So
-
- Inline postInlineUnconditionally
- strong IAmLoopBreaker False no no
- weak IAmLoopBreaker True yes no
- other yes yes
-
- The **sole** reason for this kind of loop breaker is so that
- postInlineUnconditionally does not fire. Ugh. (Typically it'll
- inline via the usual callSiteInline stuff, so it'll be dead in the
- next pass, so the main Ugh is the tiresome complication.)
--}
+* We only consider variables free in the *RHS* of the rule, in
+ contrast to the way we build the Rec group in the first place (Note
+ [Rule dependency info])
-type Binding = (Id,CoreExpr)
+* Why "transitive sequence of rules"? Because active rules apply
+ unconditionally, without checking loop-breaker-ness.
+ See Note [Loop breaker dependencies].
-loopBreakNodes :: Int
- -> VarSet -- Binders whose dependencies may be "missing"
- -- See Note [Weak loop breakers]
- -> [LetrecNode]
- -> [Binding] -- Append these to the end
- -> [Binding]
-{-
+Note [Finding rule RHS free vars]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this real example from Data Parallel Haskell
+ tagZero :: Array Int -> Array Tag
+ {-# INLINE [1] tagZeroes #-}
+ tagZero xs = pmap (\x -> fromBool (x==0)) xs
+
+ {-# RULES "tagZero" [~1] forall xs n.
+ pmap fromBool <blah blah> = tagZero xs #-}
+So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
+However, tagZero can only be inlined in phase 1 and later, while
+the RULE is only active *before* phase 1. So there's no problem.
+
+To make this work, we look for the RHS free vars only for
+*active* rules. That's the reason for the occ_rule_act field
+of the OccEnv.
+
+Note [loopBreakNodes]
+~~~~~~~~~~~~~~~~~~~~~
loopBreakNodes is applied to the list of nodes for a cyclic strongly
connected component (there's guaranteed to be a cycle). It returns
the same nodes, but
@@ -1039,7 +1071,18 @@ that the simplifier will generally do a good job if it works from top bottom,
recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
-}
+type Binding = (Id,CoreExpr)
+
+-- See Note [loopBreakNodes]
+loopBreakNodes :: Int
+ -> VarSet -- Binders whose dependencies may be "missing"
+ -- See Note [Weak loop breakers]
+ -> [LetrecNode]
+ -> [Binding] -- Append these to the end
+ -> [Binding]
+
-- Return the bindings sorted into a plausible order, and marked with loop breakers.
+-- See Note [loopBreakNodes]
loopBreakNodes depth weak_fvs nodes binds
= -- pprTrace "loopBreakNodes" (ppr nodes) $
go (stronglyConnCompFromEdgedVerticesUniqR nodes)
@@ -1269,16 +1312,13 @@ data Details
-- ignoring phase (ie assuming all are active)
-- See Note [Forming Rec groups]
- , nd_inl :: IdSet -- Free variables of
- -- the stable unfolding (if present and active)
- -- or the RHS (if not)
+ , nd_inl :: IdSet -- Free variables of the stable unfolding and the RHS
-- but excluding any RULES
-- This is the IdSet that may be used if the Id is inlined
- , nd_weak :: IdSet -- Binders of this Rec that are mentioned in nd_uds
- -- but are *not* in nd_inl. These are the ones whose
- -- dependencies might not be respected by loop_breaker_nodes
- -- See Note [Weak loop breakers]
+ , nd_simple :: Bool -- True iff this binding has no local RULES
+ -- If all nodes are simple we don't need a loop-breaker
+ -- dep-anal before reconstructing.
, nd_active_rule_fvs :: IdSet -- Variables bound in this Rec group that are free
-- in the RHS of an active rule for this bndr
@@ -1291,8 +1331,8 @@ instance Outputable Details where
(sep [ text "bndr =" <+> ppr (nd_bndr nd)
, text "uds =" <+> ppr (nd_uds nd)
, text "inl =" <+> ppr (nd_inl nd)
- , text "weak =" <+> ppr (nd_weak nd)
- , text "rule_rvs =" <+> ppr (nd_active_rule_fvs nd)
+ , text "simple =" <+> ppr (nd_simple nd)
+ , text "active_rule_fvs =" <+> ppr (nd_active_rule_fvs nd)
, text "score =" <+> ppr (nd_score nd)
])
@@ -1315,7 +1355,7 @@ makeNode :: OccEnv -> ImpRuleEdges -> VarSet
makeNode env imp_rule_edges bndr_set (bndr, rhs)
= DigraphNode { node_payload = details
, node_key = varUnique bndr
- , node_dependencies = nonDetKeysUniqSet node_fvs }
+ , node_dependencies = nonDetKeysUniqSet scope_fvs }
-- It's OK to use nonDetKeysUniqSet here as stronglyConnCompFromEdgedVerticesR
-- is still deterministic with edges in nondeterministic order as
-- explained in Note [Deterministic SCC] in GHC.Data.Graph.Directed.
@@ -1323,26 +1363,35 @@ makeNode env imp_rule_edges bndr_set (bndr, rhs)
details = ND { nd_bndr = bndr'
, nd_rhs = rhs'
, nd_rhs_bndrs = bndrs'
- , nd_uds = rhs_usage
+ , nd_uds = scope_uds
, nd_inl = inl_fvs
- , nd_weak = node_fvs `minusVarSet` inl_fvs
+ , nd_simple = null rules_w_uds && null imp_rule_info
, nd_active_rule_fvs = active_rule_fvs
, nd_score = pprPanic "makeNodeDetails" (ppr bndr) }
bndr' = bndr `setIdUnfolding` unf'
`setIdSpecialisation` mkRuleInfo rules'
- rhs_usage = rhs_uds `andUDs` unf_uds `andUDs` rule_uds
+ inl_uds = rhs_uds `andUDs` unf_uds
+ scope_uds = inl_uds `andUDs` rule_uds
-- Note [Rules are extra RHSs]
-- Note [Rule dependency info]
- node_fvs = udFreeVars bndr_set rhs_usage
+ scope_fvs = udFreeVars bndr_set scope_uds
+ -- scope_fvs: all occurrences from this binder: RHS, unfolding,
+ -- and RULES, both LHS and RHS thereof, active or inactive
+ inl_fvs = udFreeVars bndr_set inl_uds
+ -- inl_fvs: vars that would become free if the function was inlined.
+ -- We conservatively approximate that by thefree vars from the RHS
+ -- and the unfolding together.
+ -- See Note [inl_fvs]
+
+ mb_join_arity = isJoinId_maybe bndr
-- Get join point info from the *current* decision
-- We don't know what the new decision will be!
-- Using the old decision at least allows us to
-- preserve existing join point, even RULEs are added
-- See Note [Join points and unfoldings/rules]
- mb_join_arity = isJoinId_maybe bndr
--------- Right hand side ---------
-- Constructing the edges for the main Rec computation
@@ -1359,16 +1408,6 @@ makeNode env imp_rule_edges bndr_set (bndr, rhs)
unf = realIdUnfolding bndr -- realIdUnfolding: Ignore loop-breaker-ness
-- here because that is what we are setting!
(unf_uds, unf') = occAnalUnfolding rhs_env Recursive mb_join_arity unf
- inl_uds | isStableUnfolding unf = unf_uds
- | otherwise = rhs_uds
- inl_fvs = udFreeVars bndr_set inl_uds
- -- inl_fvs: the vars that would become free if the function was inlined;
- -- usually that means the RHS, unless the unfolding is a stable one.
- -- Note: We could do this only for functions with an *active* unfolding
- -- (returning emptyVarSet for an inactive one), but is_active
- -- isn't the right thing (it tells about RULE activation),
- -- so we'd need more plumbing
-
--------- IMP-RULES --------
is_active = occ_rule_act env :: Activation -> Bool
@@ -1397,6 +1436,7 @@ mkLoopBreakerNodes :: OccEnv -> TopLevelFlag
-> [Details]
-> (UsageDetails, -- adjusted
[LetrecNode])
+-- See Note [Choosing loop breakers]
-- This function primarily creates the Nodes for the
-- loop-breaker SCC analysis. More specifically:
-- a) tag each binder with its occurrence info
@@ -1445,8 +1485,7 @@ The loop breaker dependencies of x in a recursive
group { f1 = e1; ...; fn = en } are:
- The "inline free variables" of f: the fi free in
- either f's unfolding (if f has a stable unfolding)
- of f's RHS (if not)
+ f's stable unfolding and RHS; see Note [inl_fvs]
- Any fi reachable from those inline free variables by a sequence
of RULE rewrites. Remember, rule rewriting is not affected
diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs
index da039a8e83..13f0fdc46c 100644
--- a/compiler/GHC/Core/Opt/Simplify.hs
+++ b/compiler/GHC/Core/Opt/Simplify.hs
@@ -3996,14 +3996,17 @@ simplRules env mb_new_id rules mb_cont
= return rule
simpl_rule rule@(Rule { ru_bndrs = bndrs, ru_args = args
- , ru_fn = fn_name, ru_rhs = rhs })
+ , ru_fn = fn_name, ru_rhs = rhs
+ , ru_act = act })
= do { (env', bndrs') <- simplBinders env bndrs
; let rhs_ty = substTy env' (exprType rhs)
rhs_cont = case mb_cont of -- See Note [Rules and unfolding for join points]
Nothing -> mkBoringStop rhs_ty
Just cont -> ASSERT2( join_ok, bad_join_msg )
cont
- rule_env = updMode updModeForRules env'
+ lhs_env = updMode updModeForRules env'
+ rhs_env = updMode (updModeForStableUnfoldings act) env'
+ -- See Note [Simplifying the RHS of a RULE]
fn_name' = case mb_new_id of
Just id -> idName id
Nothing -> fn_name
@@ -4018,9 +4021,18 @@ simplRules env mb_new_id rules mb_cont
bad_join_msg = vcat [ ppr mb_new_id, ppr rule
, ppr (fmap isJoinId_maybe mb_new_id) ]
- ; args' <- mapM (simplExpr rule_env) args
- ; rhs' <- simplExprC rule_env rhs rhs_cont
+ ; args' <- mapM (simplExpr lhs_env) args
+ ; rhs' <- simplExprC rhs_env rhs rhs_cont
; return (rule { ru_bndrs = bndrs'
, ru_fn = fn_name'
, ru_args = args'
, ru_rhs = rhs' }) }
+
+{- Note [Simplifying the RHS of a RULE]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We can simplify the RHS of a RULE much as we do the RHS of a stable
+unfolding. We used to use the much more conservative updModeForRules
+for the RHS as well as the LHS, but that seems more conservative
+than necesary. Allowing some inlining might, for example, eliminate
+a binding.
+-} \ No newline at end of file
diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs
index 51dbc408d0..2e27466c55 100644
--- a/compiler/GHC/Core/Opt/Simplify/Utils.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs
@@ -883,12 +883,12 @@ simplEnvForGHCi logger dflags
updModeForStableUnfoldings :: Activation -> SimplMode -> SimplMode
-- See Note [Simplifying inside stable unfoldings]
-updModeForStableUnfoldings inline_rule_act current_mode
- = current_mode { sm_phase = phaseFromActivation inline_rule_act
+updModeForStableUnfoldings unf_act current_mode
+ = current_mode { sm_phase = phaseFromActivation unf_act
, sm_inline = True
, sm_eta_expand = False }
- -- sm_eta_expand: see Note [No eta expansion in stable unfoldings]
- -- For sm_rules, just inherit; sm_rules might be "off"
+ -- sm_eta_expand: see Note [No eta expansion in stable unfoldings]
+ -- sm_rules: just inherit; sm_rules might be "off"
-- because of -fno-enable-rewrite-rules
where
phaseFromActivation (ActiveAfter _ n) = Phase n
diff --git a/compiler/GHC/CoreToStg.hs b/compiler/GHC/CoreToStg.hs
index ee885eaacf..5a53cc933f 100644
--- a/compiler/GHC/CoreToStg.hs
+++ b/compiler/GHC/CoreToStg.hs
@@ -949,18 +949,33 @@ myCollectArgs expr
where
go (Var v) as ts = (v, as, ts)
go (App f a) as ts = go f (a:as) ts
- go (Tick t e) as ts = ASSERT( all isTypeArg as )
+ go (Tick t e) as ts = ASSERT2( not (tickishIsCode t) || all isTypeArg as
+ , ppr e $$ ppr as $$ ppr ts )
+ -- See Note [Ticks in applications]
go e as (t:ts) -- ticks can appear in type apps
go (Cast e _) as ts = go e as ts
go (Lam b e) as ts
| isTyVar b = go e as ts -- Note [Collect args]
go _ _ _ = pprPanic "CoreToStg.myCollectArgs" (ppr expr)
--- Note [Collect args]
--- ~~~~~~~~~~~~~~~~~~~
---
--- This big-lambda case occurred following a rather obscure eta expansion.
--- It all seems a bit yukky to me.
+{- Note [Collect args]
+~~~~~~~~~~~~~~~~~~~~~~
+This big-lambda case occurred following a rather obscure eta expansion.
+It all seems a bit yukky to me.
+
+Note [Ticks in applications]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We can get an application like
+ (tick t f) True False
+via inlining in the CorePrep pass; see Note [Inlining in CorePrep]
+in GHC.CoreToStg.Prep. The tick does not satisfy tickishIsCode;
+the inlining-in-CorePrep happens for cpExprIsTrivial which tests
+tickishIsCode.
+
+So we test the same thing here, pushing any non-code ticks to
+the top (they don't generate any code, after all). This showed
+up in the fallout from fixing #19360.
+-}
stgArity :: Id -> HowBound -> Arity
stgArity _ (LetBound _ arity) = arity
diff --git a/testsuite/tests/numeric/should_compile/T7116.stdout b/testsuite/tests/numeric/should_compile/T7116.stdout
index 996d391b44..8560ad4a10 100644
--- a/testsuite/tests/numeric/should_compile/T7116.stdout
+++ b/testsuite/tests/numeric/should_compile/T7116.stdout
@@ -62,11 +62,9 @@ dl :: Double -> Double
Arity=1,
Str=<SP(U)>,
Cpr=m1,
- Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
WorkFree=True, Expandable=True,
- Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False)
- Tmpl= \ (x [Occ=Once1!] :: Double) ->
- case x of { GHC.Types.D# y -> GHC.Types.D# (GHC.Prim.+## y y) }}]
+ Guidance=ALWAYS_IF(arity=0,unsat_ok=True,boring_ok=True)}]
dl = dr
-- RHS size: {terms: 8, types: 3, coercions: 0, joins: 0/0}
@@ -94,13 +92,9 @@ fl :: Float -> Float
Arity=1,
Str=<SP(U)>,
Cpr=m1,
- Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
WorkFree=True, Expandable=True,
- Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False)
- Tmpl= \ (x [Occ=Once1!] :: Float) ->
- case x of { GHC.Types.F# y ->
- GHC.Types.F# (GHC.Prim.plusFloat# y y)
- }}]
+ Guidance=ALWAYS_IF(arity=0,unsat_ok=True,boring_ok=True)}]
fl = fr
diff --git a/testsuite/tests/simplCore/should_compile/T19360.hs b/testsuite/tests/simplCore/should_compile/T19360.hs
new file mode 100644
index 0000000000..3998fb6d51
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T19360.hs
@@ -0,0 +1,29 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE RankNTypes #-}
+
+module T19360 where
+
+import GHC.Exts (oneShot)
+import Data.Typeable (Typeable, cast)
+
+newtype SDoc = SDoc' (Bool -> Bool)
+
+data ApiAnn = ApiAnn
+
+showAstData :: D a => Bool -> a -> SDoc
+showAstData z = showAstData'
+ where
+ showAstData' :: D a => a -> SDoc
+ showAstData' x =
+ case cast x of
+ Just (a :: ApiAnn) ->
+ case z of
+ True -> showAstData' a
+ False -> SDoc' (oneShot (\_ -> False))
+ Nothing -> gmapQr showAstData' x
+
+class Typeable a => D a where
+ gmapQr :: forall r. (forall d. D d => d -> r) -> a -> r
+
+instance D ApiAnn where
+ gmapQr g ApiAnn = g ApiAnn
diff --git a/testsuite/tests/simplCore/should_compile/T8331.stderr b/testsuite/tests/simplCore/should_compile/T8331.stderr
index 030b175298..1c51037481 100644
--- a/testsuite/tests/simplCore/should_compile/T8331.stderr
+++ b/testsuite/tests/simplCore/should_compile/T8331.stderr
@@ -4,6 +4,11 @@
forall (@s)
($dMonadAbstractIOST :: MonadAbstractIOST (ReaderT Int (ST s))).
useAbstractMonad @(ReaderT Int (ST s)) $dMonadAbstractIOST
- = useAbstractMonad_$suseAbstractMonad @s
+ = (useAbstractMonad1 @s)
+ `cast` (<Int>_R
+ %<'Many>_N ->_R <Int>_R %<'Many>_N ->_R Sym (N:ST[0] <s>_N <Int>_R)
+ ; Sym (N:ReaderT[0] <Int>_R <ST s>_R <Int>_N)
+ :: Coercible
+ (Int -> Int -> STRep s Int) (Int -> ReaderT Int (ST s) Int))
diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T
index b3936d11e6..a2783f33e1 100644
--- a/testsuite/tests/simplCore/should_compile/all.T
+++ b/testsuite/tests/simplCore/should_compile/all.T
@@ -346,3 +346,4 @@ test('T18668', normal, compile, ['-dsuppress-uniques'])
test('T18995', [ grep_errmsg(r'print') ], compile, ['-O -ddump-simpl -dsuppress-uniques'])
test('T19168', normal, compile, [''])
test('T19246', only_ways(['optasm']), multimod_compile, ['T19246', '-v0 -ddump-rules'])
+test('T19360', only_ways(['optasm']), compile, [''])