summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/coreSyn/CoreSyn.hs14
-rw-r--r--compiler/simplCore/OccurAnal.hs740
2 files changed, 475 insertions, 279 deletions
diff --git a/compiler/coreSyn/CoreSyn.hs b/compiler/coreSyn/CoreSyn.hs
index cb84e27b5b..cf570211f5 100644
--- a/compiler/coreSyn/CoreSyn.hs
+++ b/compiler/coreSyn/CoreSyn.hs
@@ -57,7 +57,7 @@ module CoreSyn (
maybeUnfoldingTemplate, otherCons,
isValueUnfolding, isEvaldUnfolding, isCheapUnfolding,
isExpandableUnfolding, isConLikeUnfolding, isCompulsoryUnfolding,
- isStableUnfolding, hasStableCoreUnfolding_maybe,
+ isStableUnfolding,
isClosedUnfolding, hasSomeUnfolding,
isBootUnfolding,
canUnfold, neverUnfoldGuidance, isStableSource,
@@ -1256,18 +1256,6 @@ expandUnfolding_maybe :: Unfolding -> Maybe CoreExpr
expandUnfolding_maybe (CoreUnfolding { uf_expandable = True, uf_tmpl = rhs }) = Just rhs
expandUnfolding_maybe _ = Nothing
-hasStableCoreUnfolding_maybe :: Unfolding -> Maybe Bool
--- Just True <=> has stable inlining, very keen to inline (eg. INLINE pragma)
--- Just False <=> has stable inlining, open to inlining it (eg. INLINABLE pragma)
--- Nothing <=> not stable, or cannot inline it anyway
-hasStableCoreUnfolding_maybe (CoreUnfolding { uf_src = src, uf_guidance = guide })
- | isStableSource src
- = case guide of
- UnfWhen {} -> Just True
- UnfIfGoodArgs {} -> Just False
- UnfNever -> Nothing
-hasStableCoreUnfolding_maybe _ = Nothing
-
isCompulsoryUnfolding :: Unfolding -> Bool
isCompulsoryUnfolding (CoreUnfolding { uf_src = InlineCompulsory }) = True
isCompulsoryUnfolding _ = False
diff --git a/compiler/simplCore/OccurAnal.hs b/compiler/simplCore/OccurAnal.hs
index 6950e56cdb..4ed96f5cab 100644
--- a/compiler/simplCore/OccurAnal.hs
+++ b/compiler/simplCore/OccurAnal.hs
@@ -34,7 +34,9 @@ import VarEnv
import Var
import Demand ( argOneShots, argsOneShots )
import Maybes ( orElse )
-import Digraph ( SCC(..), stronglyConnCompFromEdgedVerticesUniqR )
+import Digraph ( SCC(..), Node
+ , stronglyConnCompFromEdgedVerticesUniq
+ , stronglyConnCompFromEdgedVerticesUniqR )
import Unique
import UniqFM
import Util
@@ -45,7 +47,7 @@ import Control.Arrow ( second )
{-
************************************************************************
* *
-\subsection[OccurAnal-main]{Counting occurrences: main function}
+ occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
* *
************************************************************************
@@ -512,7 +514,7 @@ things right. For example, it might be that the rule LHS mentions an imported Id
and another module has a RULE that can rewrite that imported Id to one of our local
Ids.
-Note [Specialising imported functions]
+Note [Specialising imported functions] (referred to from Specialise)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
BUT for *automatically-generated* rules, the programmer can't be
responsible for the "programmer error" in Note [Rules for imported
@@ -640,10 +642,9 @@ But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite
This showed up when compiling Control.Concurrent.Chan.getChanContents.
-}
-type ImpRuleEdges = IdEnv IdSet -- Mapping from FVs of imported RULE LHSs to RHS FVs
-
-noImpRuleEdges :: ImpRuleEdges
-noImpRuleEdges = emptyVarEnv
+------------------------------------------------------------------
+-- occAnalBind
+------------------------------------------------------------------
occAnalBind :: OccEnv -- The incoming OccEnv
-> ImpRuleEdges
@@ -692,111 +693,23 @@ occAnalRecBind env imp_rule_edges pairs body_usage
-- * feed those components to occAnalRec
-- See Note [Recursive bindings: the grand plan]
where
- bndr_set = mkVarSet (map fst pairs)
-
- sccs :: [SCC (Node Details)]
+ sccs :: [SCC Details]
sccs = {-# SCC "occAnalBind.scc" #-}
- stronglyConnCompFromEdgedVerticesUniqR nodes
+ stronglyConnCompFromEdgedVerticesUniq nodes
- nodes :: [Node Details]
+ nodes :: [LetrecNode]
nodes = {-# SCC "occAnalBind.assoc" #-}
- map (makeNode env imp_rule_edges bndr_set) pairs
-
-type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique,
- -- which is gotten from the Id.
-data Details
- = ND { nd_bndr :: Id -- Binder
- , nd_rhs :: CoreExpr -- RHS, already occ-analysed
-
- , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings
- -- ignoring phase (ie assuming all are active)
- -- See Note [Forming Rec groups]
-
- , nd_inl :: IdSet -- Free variables of
- -- the stable unfolding (if present and active)
- -- or the RHS (if not)
- -- but excluding any RULES
- -- This is the IdSet that may be used if the Id is inlined
-
- , nd_weak :: IdSet -- Binders of this Rec that are mentioned in nd_uds
- -- but are *not* in nd_inl. These are the ones whose
- -- dependencies might not be respected by loop_breaker_nodes
- -- See Note [Weak loop breakers]
-
- , nd_active_rule_fvs :: IdSet -- Free variables of the RHS of active RULES
- }
-
-instance Outputable Details where
- ppr nd = text "ND" <> braces
- (sep [ text "bndr =" <+> ppr (nd_bndr nd)
- , text "uds =" <+> ppr (nd_uds nd)
- , text "inl =" <+> ppr (nd_inl nd)
- , text "weak =" <+> ppr (nd_weak nd)
- , text "rule =" <+> ppr (nd_active_rule_fvs nd)
- ])
+ map (makeNode env imp_rule_edges bndr_set) pairs
-makeNode :: OccEnv -> ImpRuleEdges -> VarSet -> (Var, CoreExpr) -> Node Details
--- See Note [Recursive bindings: the grand plan]
-makeNode env imp_rule_edges bndr_set (bndr, rhs)
- = (details, varUnique bndr, nonDetKeysUFM node_fvs)
- -- It's OK to use nonDetKeysUFM here as stronglyConnCompFromEdgedVerticesR
- -- is still deterministic with edges in nondeterministic order as
- -- explained in Note [Deterministic SCC] in Digraph.
- where
- details = ND { nd_bndr = bndr
- , nd_rhs = rhs'
- , nd_uds = rhs_usage3
- , nd_weak = node_fvs `minusVarSet` inl_fvs
- , nd_inl = inl_fvs
- , nd_active_rule_fvs = active_rule_fvs }
-
- -- Constructing the edges for the main Rec computation
- -- See Note [Forming Rec groups]
- (rhs_usage1, rhs') = occAnalRecRhs env rhs
- rhs_usage2 = addIdOccs rhs_usage1 all_rule_fvs -- Note [Rules are extra RHSs]
- -- Note [Rule dependency info]
- rhs_usage3 = case mb_unf_fvs of
- Just unf_fvs -> addIdOccs rhs_usage2 unf_fvs
- Nothing -> rhs_usage2
- node_fvs = udFreeVars bndr_set rhs_usage3
-
- -- Finding the free variables of the rules
- is_active = occ_rule_act env :: Activation -> Bool
- rules = filterOut isBuiltinRule (idCoreRules bndr)
- rules_w_fvs :: [(Activation, VarSet)] -- Find the RHS fvs
- rules_w_fvs = maybe id (\ids -> ((AlwaysActive, ids):)) (lookupVarEnv imp_rule_edges bndr)
- -- See Note [Preventing loops due to imported functions rules]
- [ (ru_act rule, fvs)
- | rule <- rules
- , let fvs = exprFreeVars (ru_rhs rule)
- `delVarSetList` ru_bndrs rule
- , not (isEmptyVarSet fvs) ]
- all_rule_fvs = rule_lhs_fvs `unionVarSet` rule_rhs_fvs
- rule_rhs_fvs = mapUnionVarSet snd rules_w_fvs
- rule_lhs_fvs = mapUnionVarSet (\ru -> exprsFreeVars (ru_args ru)
- `delVarSetList` ru_bndrs ru) rules
- active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_fvs, is_active a]
-
- -- Finding the free variables of the INLINE pragma (if any)
- unf = realIdUnfolding bndr -- Ignore any current loop-breaker flag
- mb_unf_fvs = stableUnfoldingVars unf
-
- -- Find the "nd_inl" free vars; for the loop-breaker phase
- inl_fvs = case mb_unf_fvs of
- Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
- Just unf_fvs -> unf_fvs
- -- We could check for an *active* INLINE (returning
- -- emptyVarSet for an inactive one), but is_active
- -- isn't the right thing (it tells about
- -- RULE activation), so we'd need more plumbing
+ bndr_set = mkVarSet (map fst pairs)
-----------------------------
-occAnalRec :: SCC (Node Details)
+occAnalRec :: SCC Details
-> (UsageDetails, [CoreBind])
-> (UsageDetails, [CoreBind])
-- The NonRec case is just like a Let (NonRec ...) above
-occAnalRec (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs, nd_uds = rhs_uds}, _, _))
+occAnalRec (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs, nd_uds = rhs_uds}))
(body_uds, binds)
| not (bndr `usedIn` body_uds)
= (body_uds, binds) -- See Note [Dead code]
@@ -810,7 +723,7 @@ occAnalRec (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs, nd_uds = rhs_uds}, _,
-- The Rec case is the interesting one
-- See Note [Recursive bindings: the grand plan]
-- See Note [Loop breaking]
-occAnalRec (CyclicSCC nodes) (body_uds, binds)
+occAnalRec (CyclicSCC details_s) (body_uds, binds)
| not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
= (body_uds, binds) -- See Note [Dead code]
@@ -822,23 +735,23 @@ occAnalRec (CyclicSCC nodes) (body_uds, binds)
(final_uds, Rec pairs : binds)
where
- details_s :: [Details]
- details_s = map fstOf3 nodes
- bndrs = [b | (ND { nd_bndr = b }) <- details_s]
- bndr_set = mkVarSet bndrs
+ bndrs = map nd_bndr details_s
+ bndr_set = mkVarSet bndrs
----------------------------
- -- Tag the binders with their occurrence info
- tagged_details_s :: [Details]
- tagged_details_s = map tag_details details_s
+ -- Compute usage details
total_uds = foldl add_uds body_uds details_s
final_uds = total_uds `minusVarEnv` bndr_set
add_uds usage_so_far nd = usage_so_far +++ nd_uds nd
- tag_details :: Details -> Details
- tag_details details@(ND { nd_bndr = bndr })
- | let bndr1 = setBinderOcc total_uds bndr
- = details { nd_bndr = bndr1 }
+ ------------------------------
+ -- See Note [Choosing loop breakers] for loop_breaker_nodes
+ loop_breaker_nodes :: [LetrecNode]
+ loop_breaker_nodes = mkLoopBreakerNodes bndr_set total_uds details_s
+
+ ------------------------------
+ weak_fvs :: VarSet
+ weak_fvs = mapUnionVarSet nd_weak details_s
---------------------------
-- Now reconstruct the cycle
@@ -852,35 +765,24 @@ occAnalRec (CyclicSCC nodes) (body_uds, binds)
-- single CyclicSCC result; and reOrderNodes deals with
-- exactly that case
- weak_fvs :: VarSet
- weak_fvs = mapUnionVarSet nd_weak details_s
- -- See Note [Choosing loop breakers] for loop_breaker_nodes
- loop_breaker_nodes :: [Node Details]
- loop_breaker_nodes = map mk_lb_node tagged_details_s
- mk_lb_node details@(ND { nd_bndr = b, nd_inl = inl_fvs })
- = (details, varUnique b, nonDetKeysUFM (extendFvs_ rule_fv_env inl_fvs))
- -- It's OK to use nonDetKeysUFM here as
- -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges
- -- in nondeterministic order as explained in
- -- Note [Deterministic SCC] in Digraph.
-
- ------------------------------------
- rule_fv_env :: IdEnv IdSet
- -- Maps a variable f to the variables from this group
- -- mentioned in RHS of active rules for f
- -- Domain is *subset* of bound vars (others have no rule fvs)
- rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
- init_rule_fvs -- See Note [Finding rule RHS free vars]
- = [ (b, trimmed_rule_fvs)
- | ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs } <- details_s
- , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
- , not (isEmptyVarSet trimmed_rule_fvs) ]
+------------------------------------------------------------------
+-- Loop breaking
+------------------------------------------------------------------
+
+type Binding = (Id,CoreExpr)
+loopBreakNodes :: Int
+ -> VarSet -- All binders
+ -> VarSet -- Binders whose dependencies may be "missing"
+ -- See Note [Weak loop breakers]
+ -> [LetrecNode]
+ -> [Binding] -- Append these to the end
+ -> [Binding]
{-
-@loopBreakSCC@ is applied to the list of (binder,rhs) pairs for a cyclic
-strongly connected component (there's guaranteed to be a cycle). It returns the
-same pairs, but
+loopBreakNodes is applied to the list of nodes for a cyclic strongly
+connected component (there's guaranteed to be a cycle). It returns
+the same nodes, but
a) in a better order,
b) with some of the Ids having a IAmALoopBreaker pragma
@@ -894,29 +796,6 @@ that the simplifier will generally do a good job if it works from top bottom,
recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
-}
-type Binding = (Id,CoreExpr)
-
-mk_loop_breaker :: Node Details -> Binding
-mk_loop_breaker (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
- = (setIdOccInfo bndr strongLoopBreaker, rhs)
-
-mk_non_loop_breaker :: VarSet -> Node Details -> Binding
--- See Note [Weak loop breakers]
-mk_non_loop_breaker weak_fvs (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
- | bndr `elemVarSet` weak_fvs = (setIdOccInfo bndr weakLoopBreaker, rhs)
- | otherwise = (bndr, rhs)
-
-udFreeVars :: VarSet -> UsageDetails -> VarSet
--- Find the subset of bndrs that are mentioned in uds
-udFreeVars bndrs uds = intersectUFM_C (\b _ -> b) bndrs uds
-
-loopBreakNodes :: Int
- -> VarSet -- All binders
- -> VarSet -- Binders whose dependencies may be "missing"
- -- See Note [Weak loop breakers]
- -> [Node Details]
- -> [Binding] -- Append these to the end
- -> [Binding]
-- Return the bindings sorted into a plausible order, and marked with loop breakers.
loopBreakNodes depth bndr_set weak_fvs nodes binds
= go (stronglyConnCompFromEdgedVerticesUniqR nodes) binds
@@ -929,9 +808,10 @@ loopBreakNodes depth bndr_set weak_fvs nodes binds
AcyclicSCC node -> mk_non_loop_breaker weak_fvs node : binds
CyclicSCC nodes -> reOrderNodes depth bndr_set weak_fvs nodes binds
-reOrderNodes :: Int -> VarSet -> VarSet -> [Node Details] -> [Binding] -> [Binding]
+----------------------------------
+reOrderNodes :: Int -> VarSet -> VarSet -> [LetrecNode] -> [Binding] -> [Binding]
-- Choose a loop breaker, mark it no-inline,
- -- do SCC analysis on the rest, and recursively sort them out
+ -- and call loopBreakNodes on the rest
reOrderNodes _ _ _ [] _ = panic "reOrderNodes"
reOrderNodes _ _ _ [node] binds = mk_loop_breaker node : binds
reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
@@ -940,93 +820,54 @@ reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
loopBreakNodes new_depth bndr_set weak_fvs unchosen $
(map mk_loop_breaker chosen_nodes ++ binds)
where
- (chosen_nodes, unchosen) = choose_loop_breaker (score node) [node] [] nodes
+ (chosen_nodes, unchosen) = chooseLoopBreaker approximate_lb
+ (nd_score (fstOf3 node))
+ [node] [] nodes
- approximate_loop_breaker = depth >= 2
- new_depth | approximate_loop_breaker = 0
- | otherwise = depth+1
+ approximate_lb = depth >= 2
+ new_depth | approximate_lb = 0
+ | otherwise = depth+1
-- After two iterations (d=0, d=1) give up
-- and approximate, returning to d=0
- choose_loop_breaker :: Int -- Best score so far
- -> [Node Details] -- Nodes with this score
- -> [Node Details] -- Nodes with higher scores
- -> [Node Details] -- Unprocessed nodes
- -> ([Node Details], [Node Details])
- -- This loop looks for the bind with the lowest score
- -- to pick as the loop breaker. The rest accumulate in
- choose_loop_breaker _ loop_nodes acc []
- = (loop_nodes, acc) -- Done
-
- -- If approximate_loop_breaker is True, we pick *all*
- -- nodes with lowest score, else just one
- -- See Note [Complexity of loop breaking]
- choose_loop_breaker loop_sc loop_nodes acc (node : nodes)
- | sc < loop_sc -- Lower score so pick this new one
- = choose_loop_breaker sc [node] (loop_nodes ++ acc) nodes
-
- | approximate_loop_breaker && sc == loop_sc
- = choose_loop_breaker loop_sc (node : loop_nodes) acc nodes
-
- | otherwise -- Higher score so don't pick it
- = choose_loop_breaker loop_sc loop_nodes (node : acc) nodes
- where
- sc = score node
-
- score :: Node Details -> Int -- Higher score => less likely to be picked as loop breaker
- score (ND { nd_bndr = bndr, nd_rhs = rhs }, _, _)
- | not (isId bndr) = 100 -- A type or cercion variable is never a loop breaker
-
- | isDFunId bndr = 9 -- Never choose a DFun as a loop breaker
- -- Note [DFuns should not be loop breakers]
-
- | Just be_very_keen <- hasStableCoreUnfolding_maybe (idUnfolding bndr)
- = if be_very_keen then 6 -- Note [Loop breakers and INLINE/INLINABLE pragmas]
- else 3
- -- Data structures are more important than INLINE pragmas
- -- so that dictionary/method recursion unravels
- -- Note that this case hits all stable unfoldings, so we
- -- never look at 'rhs' for stable unfoldings. That's right, because
- -- 'rhs' is irrelevant for inlining things with a stable unfolding
-
- | is_con_app rhs = 5 -- Data types help with cases: Note [Constructor applications]
-
- | exprIsTrivial rhs = 10 -- Practically certain to be inlined
- -- Used to have also: && not (isExportedId bndr)
- -- But I found this sometimes cost an extra iteration when we have
- -- rec { d = (a,b); a = ...df...; b = ...df...; df = d }
- -- where df is the exported dictionary. Then df makes a really
- -- bad choice for loop breaker
-
-
--- If an Id is marked "never inline" then it makes a great loop breaker
--- The only reason for not checking that here is that it is rare
--- and I've never seen a situation where it makes a difference,
--- so it probably isn't worth the time to test on every binder
--- | isNeverActive (idInlinePragma bndr) = -10
-
- | isOneOcc (idOccInfo bndr) = 2 -- Likely to be inlined
-
- | canUnfold (realIdUnfolding bndr) = 1
- -- The Id has some kind of unfolding
- -- Ignore loop-breaker-ness here because that is what we are setting!
+mk_loop_breaker :: LetrecNode -> Binding
+mk_loop_breaker (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
+ = (setIdOccInfo bndr strongLoopBreaker, rhs)
- | otherwise = 0
+mk_non_loop_breaker :: VarSet -> LetrecNode -> Binding
+-- See Note [Weak loop breakers]
+mk_non_loop_breaker weak_fvs (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
+ | bndr `elemVarSet` weak_fvs = (setIdOccInfo bndr weakLoopBreaker, rhs)
+ | otherwise = (bndr, rhs)
- -- Checking for a constructor application
- -- Cheap and cheerful; the simplifier moves casts out of the way
- -- The lambda case is important to spot x = /\a. C (f a)
- -- which comes up when C is a dictionary constructor and
- -- f is a default method.
- -- Example: the instance for Show (ST s a) in GHC.ST
- --
- -- However we *also* treat (\x. C p q) as a con-app-like thing,
- -- Note [Closure conversion]
- is_con_app (Var v) = isConLikeId v
- is_con_app (App f _) = is_con_app f
- is_con_app (Lam _ e) = is_con_app e
- is_con_app (Tick _ e) = is_con_app e
- is_con_app _ = False
+----------------------------------
+chooseLoopBreaker :: Bool -- True <=> Too many iterations,
+ -- so approximate
+ -> NodeScore -- Best score so far
+ -> [LetrecNode] -- Nodes with this score
+ -> [LetrecNode] -- Nodes with higher scores
+ -> [LetrecNode] -- Unprocessed nodes
+ -> ([LetrecNode], [LetrecNode])
+ -- This loop looks for the bind with the lowest score
+ -- to pick as the loop breaker. The rest accumulate in
+chooseLoopBreaker _ _ loop_nodes acc []
+ = (loop_nodes, acc) -- Done
+
+ -- If approximate_loop_breaker is True, we pick *all*
+ -- nodes with lowest score, else just one
+ -- See Note [Complexity of loop breaking]
+chooseLoopBreaker approx_lb loop_sc loop_nodes acc (node : nodes)
+ | approx_lb
+ , rank sc == rank loop_sc
+ = chooseLoopBreaker approx_lb loop_sc (node : loop_nodes) acc nodes
+
+ | sc `betterLB` loop_sc -- Better score so pick this new one
+ = chooseLoopBreaker approx_lb sc [node] (loop_nodes ++ acc) nodes
+
+ | otherwise -- Worse score so don't pick it
+ = chooseLoopBreaker approx_lb loop_sc loop_nodes (node : acc) nodes
+ where
+ sc = nd_score (fstOf3 node)
{-
Note [Complexity of loop breaking]
@@ -1150,6 +991,362 @@ ToDo: try using the occurrence info for the inline'd binder.
[March 97] We do the same for atomic RHSs. Reason: see notes with loopBreakSCC.
[June 98, SLPJ] I've undone this change; I don't understand it. See notes with loopBreakSCC.
+
+
+************************************************************************
+* *
+ Making nodes
+* *
+************************************************************************
+-}
+
+type ImpRuleEdges = IdEnv IdSet -- Mapping from FVs of imported RULE LHSs to RHS FVs
+
+noImpRuleEdges :: ImpRuleEdges
+noImpRuleEdges = emptyVarEnv
+
+type LetrecNode = Node Unique Details -- Node comes from Digraph
+ -- The Unique key is gotten from the Id
+data Details
+ = ND { nd_bndr :: Id -- Binder
+ , nd_rhs :: CoreExpr -- RHS, already occ-analysed
+
+ , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings
+ -- ignoring phase (ie assuming all are active)
+ -- See Note [Forming Rec groups]
+
+ , nd_inl :: IdSet -- Free variables of
+ -- the stable unfolding (if present and active)
+ -- or the RHS (if not)
+ -- but excluding any RULES
+ -- This is the IdSet that may be used if the Id is inlined
+
+ , nd_weak :: IdSet -- Binders of this Rec that are mentioned in nd_uds
+ -- but are *not* in nd_inl. These are the ones whose
+ -- dependencies might not be respected by loop_breaker_nodes
+ -- See Note [Weak loop breakers]
+
+ , nd_active_rule_fvs :: IdSet -- Free variables of the RHS of active RULES
+
+ , nd_score :: NodeScore
+ }
+
+instance Outputable Details where
+ ppr nd = text "ND" <> braces
+ (sep [ text "bndr =" <+> ppr (nd_bndr nd)
+ , text "uds =" <+> ppr (nd_uds nd)
+ , text "inl =" <+> ppr (nd_inl nd)
+ , text "weak =" <+> ppr (nd_weak nd)
+ , text "rule =" <+> ppr (nd_active_rule_fvs nd)
+ ])
+
+-- The NodeScore is compared lexicographically;
+-- e.g. lower rank wins regardless of size
+type NodeScore = ( Int -- Rank: lower => more likely to be picked as loop breaker
+ , Int -- Size of rhs: higher => more likely to be picked as LB
+ -- Maxes out at maxExprSize; we just use it to prioritise
+ -- small functions
+ , Bool ) -- Was it a loop breaker before?
+ -- True => more likely to be picked
+ -- Note [Loop breakers, node scoring, and stability]
+
+rank :: NodeScore -> Int
+rank (r, _, _) = r
+
+makeNode :: OccEnv -> ImpRuleEdges -> VarSet
+ -> (Var, CoreExpr) -> LetrecNode
+-- See Note [Recursive bindings: the grand plan]
+makeNode env imp_rule_edges bndr_set (bndr, rhs)
+ = (details, varUnique bndr, nonDetKeysUFM node_fvs)
+ -- It's OK to use nonDetKeysUFM here as stronglyConnCompFromEdgedVerticesR
+ -- is still deterministic with edges in nondeterministic order as
+ -- explained in Note [Deterministic SCC] in Digraph.
+ where
+ details = ND { nd_bndr = bndr
+ , nd_rhs = rhs'
+ , nd_uds = rhs_usage3
+ , nd_inl = inl_fvs
+ , nd_weak = node_fvs `minusVarSet` inl_fvs
+ , nd_active_rule_fvs = active_rule_fvs
+ , nd_score = pprPanic "makeNodeDetails" (ppr bndr) }
+
+ -- Constructing the edges for the main Rec computation
+ -- See Note [Forming Rec groups]
+ (rhs_usage1, rhs') = occAnalRecRhs env rhs
+ rhs_usage2 = addIdOccs rhs_usage1 all_rule_fvs -- Note [Rules are extra RHSs]
+ -- Note [Rule dependency info]
+ rhs_usage3 = case mb_unf_fvs of
+ Just unf_fvs -> addIdOccs rhs_usage2 unf_fvs
+ Nothing -> rhs_usage2
+ node_fvs = udFreeVars bndr_set rhs_usage3
+
+ -- Finding the free variables of the rules
+ is_active = occ_rule_act env :: Activation -> Bool
+ rules = filterOut isBuiltinRule (idCoreRules bndr)
+ rules_w_fvs :: [(Activation, VarSet)] -- Find the RHS fvs
+ rules_w_fvs = maybe id (\ids -> ((AlwaysActive, ids):)) (lookupVarEnv imp_rule_edges bndr)
+ -- See Note [Preventing loops due to imported functions rules]
+ [ (ru_act rule, fvs)
+ | rule <- rules
+ , let fvs = exprFreeVars (ru_rhs rule)
+ `delVarSetList` ru_bndrs rule
+ , not (isEmptyVarSet fvs) ]
+ all_rule_fvs = rule_lhs_fvs `unionVarSet` rule_rhs_fvs
+ rule_rhs_fvs = mapUnionVarSet snd rules_w_fvs
+ rule_lhs_fvs = mapUnionVarSet (\ru -> exprsFreeVars (ru_args ru)
+ `delVarSetList` ru_bndrs ru) rules
+ active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_fvs, is_active a]
+
+ -- Finding the free variables of the INLINE pragma (if any)
+ unf = realIdUnfolding bndr -- Ignore any current loop-breaker flag
+ mb_unf_fvs = stableUnfoldingVars unf
+
+ -- Find the "nd_inl" free vars; for the loop-breaker phase
+ inl_fvs = case mb_unf_fvs of
+ Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
+ Just unf_fvs -> unf_fvs
+ -- We could check for an *active* INLINE (returning
+ -- emptyVarSet for an inactive one), but is_active
+ -- isn't the right thing (it tells about
+ -- RULE activation), so we'd need more plumbing
+
+mkLoopBreakerNodes :: VarSet -> UsageDetails -> [Details] -> [LetrecNode]
+-- Does three things
+-- a) tag each binder with its occurrence info
+-- b) add a NodeScore to each node
+-- c) make a Node with the right dependency edges for
+-- the loop-breaker SCC analysis
+mkLoopBreakerNodes bndr_set total_uds details_s
+ = map mk_lb_node details_s
+ where
+ mk_lb_node nd@(ND { nd_bndr = bndr, nd_rhs = rhs, nd_inl = inl_fvs })
+ = (nd', varUnique bndr, nonDetKeysUFM lb_deps)
+ -- It's OK to use nonDetKeysUFM here as
+ -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges
+ -- in nondeterministic order as explained in
+ -- Note [Deterministic SCC] in Digraph.
+ where
+ nd' = nd { nd_bndr = bndr', nd_score = score }
+ bndr' = setBinderOcc total_uds bndr
+ score = nodeScore bndr bndr' rhs lb_deps
+ lb_deps = extendFvs_ rule_fv_env inl_fvs
+
+ rule_fv_env :: IdEnv IdSet
+ -- Maps a variable f to the variables from this group
+ -- mentioned in RHS of active rules for f
+ -- Domain is *subset* of bound vars (others have no rule fvs)
+ rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
+ init_rule_fvs -- See Note [Finding rule RHS free vars]
+ = [ (b, trimmed_rule_fvs)
+ | ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs } <- details_s
+ , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
+ , not (isEmptyVarSet trimmed_rule_fvs) ]
+
+
+------------------------------------------
+nodeScore :: Id -- Binder has old occ-info (just for loop-breaker-ness)
+ -> Id -- Binder with new occ-info
+ -> CoreExpr -- RHS
+ -> VarSet -- Loop-breaker dependencies
+ -> NodeScore
+nodeScore old_bndr new_bndr bind_rhs lb_deps
+ | not (isId old_bndr) -- A type or cercion variable is never a loop breaker
+ = (100, 0, False)
+
+ | old_bndr `elemVarSet` lb_deps -- Self-recursive things are great loop breakers
+ = (0, 0, True) -- See Note [Self-recursion and loop breakers]
+
+ | otherwise -- An Id has an unfolding
+ = case id_unfolding of
+ DFunUnfolding { df_args = args }
+ -- Never choose a DFun as a loop breaker
+ -- Note [DFuns should not be loop breakers]
+ -> (9, length args, is_lb)
+
+ CoreUnfolding { uf_src = src, uf_tmpl = unf_rhs, uf_guidance = guide }
+ | isStableSource src
+ -> case guide of
+ UnfWhen {} -> (6, cheapExprSize unf_rhs, is_lb)
+ UnfIfGoodArgs { ug_size = size} -> (3, size, is_lb)
+ UnfNever -> (0, 0, is_lb)
+ -- See Note [Loop breakers and INLINE/INLINABLE pragmas] for
+ -- the 6 vs 3 choice
+
+ -- Note that this case hits /all/ stable unfoldings, so we
+ -- never look at 'bind_rhs' for stable unfoldings. That's right, because
+ -- 'rhs' is irrelevant for inlining things with a stable unfolding
+
+ -- Data structures are more important than INLINE pragmas
+ -- so that dictionary/method recursion unravels
+
+ _ | exprIsTrivial bind_rhs
+ -> mk_score 10 -- Practically certain to be inlined
+ -- Used to have also: && not (isExportedId bndr)
+ -- But I found this sometimes cost an extra iteration when we have
+ -- rec { d = (a,b); a = ...df...; b = ...df...; df = d }
+ -- where df is the exported dictionary. Then df makes a really
+ -- bad choice for loop breaker
+
+ | is_con_app bind_rhs -- Data types help with cases: Note [Constructor applications]
+ -> mk_score 5
+
+ | isOneOcc (idOccInfo new_bndr)
+ -> mk_score 2 -- Likely to be inlined
+
+ | canUnfold id_unfolding -- The Id has some kind of unfolding
+ -> mk_score 1
+
+ | otherwise
+ -> (0, 0, is_lb)
+
+ where
+ mk_score :: Int -> NodeScore
+ mk_score rank = (rank, rhs_size, is_lb)
+
+ is_lb = isStrongLoopBreaker (idOccInfo old_bndr)
+ rhs_size = case id_unfolding of
+ CoreUnfolding { uf_guidance = guidance }
+ | UnfIfGoodArgs { ug_size = size } <- guidance
+ -> size
+ _ -> cheapExprSize bind_rhs
+
+ id_unfolding = realIdUnfolding old_bndr
+ -- realIdUnfolding: Ignore loop-breaker-ness here because
+ -- that is what we are setting!
+
+ -- Checking for a constructor application
+ -- Cheap and cheerful; the simplifier moves casts out of the way
+ -- The lambda case is important to spot x = /\a. C (f a)
+ -- which comes up when C is a dictionary constructor and
+ -- f is a default method.
+ -- Example: the instance for Show (ST s a) in GHC.ST
+ --
+ -- However we *also* treat (\x. C p q) as a con-app-like thing,
+ -- Note [Closure conversion]
+ is_con_app (Var v) = isConLikeId v
+ is_con_app (App f _) = is_con_app f
+ is_con_app (Lam _ e) = is_con_app e
+ is_con_app (Tick _ e) = is_con_app e
+ is_con_app _ = False
+
+maxExprSize :: Int
+maxExprSize = 20 -- Rather arbitrary
+
+cheapExprSize :: CoreExpr -> Int
+-- Maxes out at maxExprSize
+cheapExprSize e
+ = go 0 e
+ where
+ go n e | n >= maxExprSize = n
+ | otherwise = go1 n e
+
+ go1 n (Var {}) = n+1
+ go1 n (Lit {}) = n+1
+ go1 n (Type {}) = n
+ go1 n (Coercion {}) = n
+ go1 n (Tick _ e) = go1 n e
+ go1 n (Cast e _) = go1 n e
+ go1 n (App f a) = go (go1 n f) a
+ go1 n (Lam b e)
+ | isTyVar b = go1 n e
+ | otherwise = go (n+1) e
+ go1 n (Let b e) = gos (go1 n e) (rhssOfBind b)
+ go1 n (Case e _ _ as) = gos (go1 n e) (rhssOfAlts as)
+
+ gos n [] = n
+ gos n (e:es) | n >= maxExprSize = n
+ | otherwise = gos (go1 n e) es
+
+betterLB :: NodeScore -> NodeScore -> Bool
+-- If n1 `betterLB` n2 then choose n1 as the loop breaker
+betterLB (rank1, size1, lb1) (rank2, size2, _)
+ | rank1 < rank2 = True
+ | rank1 > rank2 = False
+ | size1 < size2 = False -- Make the bigger n2 into the loop breaker
+ | size1 > size2 = True
+ | lb1 = True -- Tie-break: if n1 was a loop breaker before, choose it
+ | otherwise = False -- See Note [Loop breakers, node scoring, and stability]
+
+{- Note [Self-recursion and loop breakers]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we have
+ rec { f = ...f...g...
+ ; g = .....f... }
+then 'f' has to be a loop breaker anyway, so we may as well choose it
+right away, so that g can inline freely.
+
+This is really just a cheap hack. Consider
+ rec { f = ...g...
+ ; g = ..f..h...
+ ; h = ...f....}
+Here f or g are better loop breakers than h; but we might accidentally
+choose h. Finding the minimal set of loop breakers is hard.
+
+Note [Loop breakers, node scoring, and stability]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+To choose a loop breaker, we give a NodeScore to each node in the SCC,
+and pick the one with the best score (according to 'betterLB').
+
+We need to be jolly careful (Trac #12425, #12234) about the stability
+of this choice. Suppose we have
+
+ let rec { f = ...g...g...
+ ; g = ...f...f... }
+ in
+ case x of
+ True -> ...f..
+ False -> ..f...
+
+In each iteration of the simplifier the occurrence analyser OccAnal
+chooses a loop breaker. Suppose in iteration 1 it choose g as the loop
+breaker. That means it is free to inline f.
+
+Suppose that GHC decides to inline f in the branches of the case, but
+(for some reason; eg it is not satureated) in the rhs of g. So we get
+
+ let rec { f = ...g...g...
+ ; g = ...f...f... }
+ in
+ case x of
+ True -> ...g...g.....
+ False -> ..g..g....
+
+Now suppose that, for some reason, in the next iteraion the occurrence
+analyser chooses f as the loop breaker, so it can freely inling g. And
+again for some reason the simplifer inlines g at its calls in the case
+branches, but not in the RHS of f. Then we get
+
+ let rec { f = ...g...g...
+ ; g = ...f...f... }
+ in
+ case x of
+ True -> ...(...f...f...)...(...f..f..).....
+ False -> ..(...f...f...)...(..f..f...)....
+
+You can see where this is going! Each iteration of the simplifier
+doubles the number of calls to f or g. No wonder GHC is slow!
+
+(In the particular example in comment:3 of #12425, f and g are the two
+mutually recursive fmap instances for CondT and Result. They are both
+marked INLINE which, oddly, is why they don't inline in each other's
+RHS, because the call there is not saturated.)
+
+The root cause is that we flip-flop on our choice of loop breaker. I
+always thought it didn't matter, and indeed for any single iteration
+to terminate, it doesn't matter. But when we iterate, it matters a
+lot!!
+
+So The Plan is this:
+ If there is a tie, choose the node that
+ was a loop breaker last time round
+
+Hence the is_lb field of NodeScore
+
+************************************************************************
+* *
+ Right hand sides
+* *
+************************************************************************
-}
occAnalRecRhs :: OccEnv -> CoreExpr -- Rhs
@@ -1184,19 +1381,6 @@ occAnalNonRecRhs env bndr rhs
active = isAlwaysActive (idInlineActivation bndr)
not_stable = not (isStableUnfolding (idUnfolding bndr))
-addIdOccs :: UsageDetails -> VarSet -> UsageDetails
-addIdOccs usage id_set = nonDetFoldUFM addIdOcc usage id_set
- -- It's OK to use nonDetFoldUFM here because addIdOcc commutes
-
-addIdOcc :: Id -> UsageDetails -> UsageDetails
-addIdOcc v u | isId v = addOneOcc u v NoOccInfo
- | otherwise = u
- -- Give a non-committal binder info (i.e NoOccInfo) because
- -- a) Many copies of the specialised thing can appear
- -- b) We don't want to substitute a BIG expression inside a RULE
- -- even if that's the only occurrence of the thing
- -- (Same goes for INLINE.)
-
{-
Note [Cascading inlines]
~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1238,8 +1422,12 @@ This is worse than the slow cascade, so we only want to say "certainly_inline"
if it really is certain. Look at the note with preInlineUnconditionally
for the various clauses.
-Expressions
-~~~~~~~~~~~
+
+************************************************************************
+* *
+ Expressions
+* *
+************************************************************************
-}
occAnal :: OccEnv
@@ -1419,12 +1607,15 @@ occAnalApp env (Var fun, args, ticks)
uds = fun_uds +++ final_args_uds
!(args_uds, args') = occAnalArgs env args one_shots
- !final_args_uds = markManyIf (isRhsEnv env && is_exp) args_uds
+ !final_args_uds
+ | isRhsEnv env && is_exp = mapVarEnv markInsideLam args_uds
+ | otherwise = args_uds
-- We mark the free vars of the argument of a constructor or PAP
- -- as "many", if it is the RHS of a let(rec).
- -- This means that nothing gets inlined into a constructor argument
- -- position, which is what we want. Typically those constructor
- -- arguments are just variables, or trivial expressions.
+ -- as "inside-lambda", if it is the RHS of a let(rec).
+ -- This means that nothing gets inlined into a constructor or PAP
+ -- argument position, which is what we want. Typically those
+ -- constructor arguments are just variables, or trivial expressions.
+ -- We use inside-lam because it's like eta-expanding the PAP.
--
-- This is the *whole point* of the isRhsEnv predicate
-- See Note [Arguments of let-bound constructors]
@@ -1889,6 +2080,23 @@ emptyDetails = (emptyVarEnv :: UsageDetails)
usedIn :: Id -> UsageDetails -> Bool
v `usedIn` details = isExportedId v || v `elemVarEnv` details
+addIdOccs :: UsageDetails -> VarSet -> UsageDetails
+addIdOccs usage id_set = nonDetFoldUFM addIdOcc usage id_set
+ -- It's OK to use nonDetFoldUFM here because addIdOcc commutes
+
+addIdOcc :: Id -> UsageDetails -> UsageDetails
+addIdOcc v u | isId v = addOneOcc u v NoOccInfo
+ | otherwise = u
+ -- Give a non-committal binder info (i.e NoOccInfo) because
+ -- a) Many copies of the specialised thing can appear
+ -- b) We don't want to substitute a BIG expression inside a RULE
+ -- even if that's the only occurrence of the thing
+ -- (Same goes for INLINE.)
+
+udFreeVars :: VarSet -> UsageDetails -> VarSet
+-- Find the subset of bndrs that are mentioned in uds
+udFreeVars bndrs uds = intersectUFM_C (\b _ -> b) bndrs uds
+
type IdWithOccInfo = Id
tagLamBinders :: UsageDetails -- Of scope