diff options
Diffstat (limited to 'compiler/simplCore/OccurAnal.hs')
-rw-r--r-- | compiler/simplCore/OccurAnal.hs | 386 |
1 files changed, 222 insertions, 164 deletions
diff --git a/compiler/simplCore/OccurAnal.hs b/compiler/simplCore/OccurAnal.hs index 5dd30aa668..236bb81066 100644 --- a/compiler/simplCore/OccurAnal.hs +++ b/compiler/simplCore/OccurAnal.hs @@ -19,10 +19,13 @@ module OccurAnal ( #include "HsVersions.h" +import GhcPrelude + import CoreSyn import CoreFVs import CoreUtils ( exprIsTrivial, isDefaultAlt, isExpandableApp, stripTicksTopE, mkTicks ) +import CoreArity ( joinRhsArity ) import Id import IdInfo import Name( localiseName ) @@ -56,11 +59,12 @@ import Control.Arrow ( second ) Here's the externally-callable interface: -} -occurAnalysePgm :: Module -- Used only in debug output - -> (Activation -> Bool) - -> [CoreRule] -> [CoreVect] -> VarSet +occurAnalysePgm :: Module -- Used only in debug output + -> (Id -> Bool) -- Active unfoldings + -> (Activation -> Bool) -- Active rules + -> [CoreRule] -> CoreProgram -> CoreProgram -occurAnalysePgm this_mod active_rule imp_rules vects vectVars binds +occurAnalysePgm this_mod active_unf active_rule imp_rules binds | isEmptyDetails final_usage = occ_anald_binds @@ -69,7 +73,9 @@ occurAnalysePgm this_mod active_rule imp_rules vects vectVars binds 2 (ppr final_usage ) ) occ_anald_glommed_binds where - init_env = initOccEnv active_rule + init_env = initOccEnv { occ_rule_act = active_rule + , occ_unf_act = active_unf } + (final_usage, occ_anald_binds) = go init_env binds (_, occ_anald_glommed_binds) = occAnalRecBind init_env TopLevel imp_rule_edges @@ -80,12 +86,8 @@ occurAnalysePgm this_mod active_rule imp_rules vects vectVars binds -- we can easily create an infinite loop (Trac #9583 is an example) initial_uds = addManyOccsSet emptyDetails - (rulesFreeVars imp_rules `unionVarSet` - vectsFreeVars vects `unionVarSet` - vectVars) - -- The RULES and VECTORISE declarations keep things alive! (For VECTORISE declarations, - -- we only get them *until* the vectoriser runs. Afterwards, these dependencies are - -- reflected in 'vectors' — see Note [Vectorisation declarations and occurrences].) + (rulesFreeVars imp_rules) + -- The RULES declarations keep things alive! -- Note [Preventing loops due to imported functions rules] imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv @@ -118,9 +120,7 @@ occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr occurAnalyseExpr' enable_binder_swap expr = snd (occAnal env expr) where - env = (initOccEnv all_active_rules) {occ_binder_swap = enable_binder_swap} - -- To be conservative, we say that all inlines and rules are active - all_active_rules = \_ -> True + env = initOccEnv { occ_binder_swap = enable_binder_swap } {- Note [Plugin rules] ~~~~~~~~~~~~~~~~~~~~~~ @@ -170,7 +170,7 @@ we treat it like this (occAnalRecBind): 4. To do so we form a new set of Nodes, with the same details, but different edges, the "loop-breaker nodes". The loop-breaker nodes - have both more and fewer depedencies than the scope edges + have both more and fewer dependencies than the scope edges (see Note [Choosing loop breakers]) More edges: if f calls g, and g has an active rule that mentions h @@ -698,39 +698,6 @@ costs us anything when, for some `j`: This appears to be very rare in practice. TODO Perhaps we should gather statistics to be sure. -Note [Excess polymorphism and join points] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In principle, if a function would be a join point except that it fails -the polymorphism rule (see Note [The polymorphism rule of join points] in -CoreSyn), it can still be made a join point with some effort. This is because -all tail calls must return the same type (they return to the same context!), and -thus if the return type depends on an argument, that argument must always be the -same. - -For instance, consider: - - let f :: forall a. a -> Char -> [a] - f @a x c = ... f @a x 'a' ... - in ... f @Int 1 'b' ... f @Int 2 'c' ... - -(where the calls are tail calls). `f` fails the polymorphism rule because its -return type is [a], where [a] is bound. But since the type argument is always -'Int', we can rewrite it as: - - let f' :: Int -> Char -> [Int] - f' x c = ... f' x 'a' ... - in ... f' 1 'b' ... f 2 'c' ... - -and now we can make f' a join point: - - join f' :: Int -> Char -> [Int] - f' x c = ... jump f' x 'a' ... - in ... jump f' 1 'b' ... jump f' 2 'c' ... - -It's not clear that this comes up often, however. TODO: Measure how often and -add this analysis if necessary. - ------------------------------------------------------------ Note [Adjusting right-hand sides] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -801,7 +768,7 @@ occAnalNonRecBind env lvl imp_rule_edges binder rhs body_usage = (body_usage, []) | otherwise -- It's mentioned in the body - = (body_usage' +++ rhs_usage', [NonRec tagged_binder rhs']) + = (body_usage' `andUDs` rhs_usage', [NonRec tagged_binder rhs']) where (body_usage', tagged_binder) = tagNonRecBinder lvl body_usage binder mb_join_arity = willBeJoinId_maybe tagged_binder @@ -816,16 +783,17 @@ occAnalNonRecBind env lvl imp_rule_edges binder rhs body_usage -- Unfoldings -- See Note [Unfoldings and join points] rhs_usage2 = case occAnalUnfolding env NonRecursive binder of - Just unf_usage -> rhs_usage1 +++ unf_usage + Just unf_usage -> rhs_usage1 `andUDs` unf_usage Nothing -> rhs_usage1 -- Rules -- See Note [Rules are extra RHSs] and Note [Rule dependency info] rules_w_uds = occAnalRules env mb_join_arity NonRecursive tagged_binder - rhs_usage3 = rhs_usage2 +++ combineUsageDetailsList - (map (\(_, l, r) -> l +++ r) rules_w_uds) - rhs_usage4 = maybe rhs_usage3 (addManyOccsSet rhs_usage3) $ - lookupVarEnv imp_rule_edges binder + rule_uds = map (\(_, l, r) -> l `andUDs` r) rules_w_uds + rhs_usage3 = foldr andUDs rhs_usage2 rule_uds + rhs_usage4 = case lookupVarEnv imp_rule_edges binder of + Nothing -> rhs_usage3 + Just vs -> addManyOccsSet rhs_usage3 vs -- See Note [Preventing loops due to imported functions rules] -- Final adjustment @@ -835,7 +803,7 @@ occAnalNonRecBind env lvl imp_rule_edges binder rhs body_usage occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)] -> UsageDetails -> (UsageDetails, [CoreBind]) occAnalRecBind env lvl imp_rule_edges pairs body_usage - = foldr (occAnalRec lvl) (body_usage, []) sccs + = foldr (occAnalRec env lvl) (body_usage, []) sccs -- For a recursive group, we -- * occ-analyse all the RHSs -- * compute strongly-connected components @@ -862,20 +830,20 @@ calls for the purpose of finding join points. -} ----------------------------- -occAnalRec :: TopLevelFlag +occAnalRec :: OccEnv -> TopLevelFlag -> SCC Details -> (UsageDetails, [CoreBind]) -> (UsageDetails, [CoreBind]) -- The NonRec case is just like a Let (NonRec ...) above -occAnalRec lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs - , nd_uds = rhs_uds, nd_rhs_bndrs = rhs_bndrs })) +occAnalRec _ lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs + , nd_uds = rhs_uds, nd_rhs_bndrs = rhs_bndrs })) (body_uds, binds) | not (bndr `usedIn` body_uds) = (body_uds, binds) -- See Note [Dead code] | otherwise -- It's mentioned in the body - = (body_uds' +++ rhs_uds', + = (body_uds' `andUDs` rhs_uds', NonRec tagged_bndr rhs : binds) where (body_uds', tagged_bndr) = tagNonRecBinder lvl body_uds bndr @@ -885,7 +853,7 @@ occAnalRec lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs -- The Rec case is the interesting one -- See Note [Recursive bindings: the grand plan] -- See Note [Loop breaking] -occAnalRec lvl (CyclicSCC details_s) (body_uds, binds) +occAnalRec env lvl (CyclicSCC details_s) (body_uds, binds) | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds = (body_uds, binds) -- See Note [Dead code] @@ -904,7 +872,7 @@ occAnalRec lvl (CyclicSCC details_s) (body_uds, binds) final_uds :: UsageDetails loop_breaker_nodes :: [LetrecNode] (final_uds, loop_breaker_nodes) - = mkLoopBreakerNodes lvl bndr_set body_uds details_s + = mkLoopBreakerNodes env lvl bndr_set body_uds details_s ------------------------------ weak_fvs :: VarSet @@ -955,7 +923,8 @@ recording inlinings for any Ids which aren't marked as "no-inline" as it goes. -- Return the bindings sorted into a plausible order, and marked with loop breakers. loopBreakNodes depth bndr_set weak_fvs nodes binds - = go (stronglyConnCompFromEdgedVerticesUniqR nodes) binds + = -- pprTrace "loopBreakNodes" (ppr nodes) $ + go (stronglyConnCompFromEdgedVerticesUniqR nodes) binds where go [] binds = binds go (scc:sccs) binds = loop_break_scc scc (go sccs binds) @@ -972,8 +941,8 @@ reOrderNodes :: Int -> VarSet -> VarSet -> [LetrecNode] -> [Binding] -> [Binding reOrderNodes _ _ _ [] _ = panic "reOrderNodes" reOrderNodes _ _ _ [node] binds = mk_loop_breaker node : binds reOrderNodes depth bndr_set weak_fvs (node : nodes) binds - = -- pprTrace "reOrderNodes" (text "unchosen" <+> ppr unchosen $$ - -- text "chosen" <+> ppr chosen_nodes) $ + = -- pprTrace "reOrderNodes" (vcat [ text "unchosen" <+> ppr unchosen + -- , text "chosen" <+> ppr chosen_nodes ]) $ loopBreakNodes new_depth bndr_set weak_fvs unchosen $ (map mk_loop_breaker chosen_nodes ++ binds) where @@ -1243,11 +1212,11 @@ makeNode env imp_rule_edges bndr_set (bndr, rhs) (bndrs, body) = collectBinders rhs (rhs_usage1, bndrs', body') = occAnalRecRhs env bndrs body rhs' = mkLams bndrs' body' - rhs_usage2 = rhs_usage1 +++ all_rule_uds + rhs_usage2 = foldr andUDs rhs_usage1 rule_uds -- Note [Rules are extra RHSs] -- Note [Rule dependency info] rhs_usage3 = case mb_unf_uds of - Just unf_uds -> rhs_usage2 +++ unf_uds + Just unf_uds -> rhs_usage2 `andUDs` unf_uds Nothing -> rhs_usage2 node_fvs = udFreeVars bndr_set rhs_usage3 @@ -1263,8 +1232,7 @@ makeNode env imp_rule_edges bndr_set (bndr, rhs) -- See Note [Preventing loops due to imported functions rules] [ (ru_act rule, udFreeVars bndr_set rhs_uds) | (rule, _, rhs_uds) <- rules_w_uds ] - all_rule_uds = combineUsageDetailsList $ - concatMap (\(_, l, r) -> [l, r]) rules_w_uds + rule_uds = map (\(_, l, r) -> l `andUDs` r) rules_w_uds active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_rhs_fvs , is_active a] @@ -1280,7 +1248,7 @@ makeNode env imp_rule_edges bndr_set (bndr, rhs) -- isn't the right thing (it tells about -- RULE activation), so we'd need more plumbing -mkLoopBreakerNodes :: TopLevelFlag +mkLoopBreakerNodes :: OccEnv -> TopLevelFlag -> VarSet -> UsageDetails -- for BODY of let -> [Details] @@ -1293,7 +1261,7 @@ mkLoopBreakerNodes :: TopLevelFlag -- the loop-breaker SCC analysis -- d) adjust each RHS's usage details according to -- the binder's (new) shotness and join-point-hood -mkLoopBreakerNodes lvl bndr_set body_uds details_s +mkLoopBreakerNodes env lvl bndr_set body_uds details_s = (final_uds, zipWith mk_lb_node details_s bndrs') where (final_uds, bndrs') = tagRecBinders lvl body_uds @@ -1309,7 +1277,7 @@ mkLoopBreakerNodes lvl bndr_set body_uds details_s -- Note [Deterministic SCC] in Digraph. where nd' = nd { nd_bndr = bndr', nd_score = score } - score = nodeScore bndr bndr' rhs lb_deps + score = nodeScore env bndr bndr' rhs lb_deps lb_deps = extendFvs_ rule_fv_env inl_fvs rule_fv_env :: IdEnv IdSet @@ -1325,18 +1293,22 @@ mkLoopBreakerNodes lvl bndr_set body_uds details_s ------------------------------------------ -nodeScore :: Id -- Binder has old occ-info (just for loop-breaker-ness) +nodeScore :: OccEnv + -> Id -- Binder has old occ-info (just for loop-breaker-ness) -> Id -- Binder with new occ-info -> CoreExpr -- RHS -> VarSet -- Loop-breaker dependencies -> NodeScore -nodeScore old_bndr new_bndr bind_rhs lb_deps +nodeScore env old_bndr new_bndr bind_rhs lb_deps | not (isId old_bndr) -- A type or cercion variable is never a loop breaker = (100, 0, False) | old_bndr `elemVarSet` lb_deps -- Self-recursive things are great loop breakers = (0, 0, True) -- See Note [Self-recursion and loop breakers] + | not (occ_unf_act env old_bndr) -- A binder whose inlining is inactive (e.g. has + = (0, 0, True) -- a NOINLINE pragam) makes a great loop breaker + | exprIsTrivial rhs = mk_score 10 -- Practically certain to be inlined -- Used to have also: && not (isExportedId bndr) @@ -1553,19 +1525,24 @@ occAnalNonRecRhs :: OccEnv occAnalNonRecRhs env bndr bndrs body = occAnalLamOrRhs rhs_env bndrs body where - -- See Note [Cascading inlines] - env1 | certainly_inline = env + env1 | is_join_point = env -- See Note [Join point RHSs] + | certainly_inline = env -- See Note [Cascading inlines] | otherwise = rhsCtxt env -- See Note [Sources of one-shot information] rhs_env = env1 { occ_one_shots = argOneShots dmd } certainly_inline -- See Note [Cascading inlines] - = case idOccInfo bndr of + = case occ of OneOcc { occ_in_lam = in_lam, occ_one_br = one_br } - -> not in_lam && one_br && active && not_stable - _ -> False + -> not in_lam && one_br && active && not_stable + _ -> False + + is_join_point = isAlwaysTailCalled occ + -- Like (isJoinId bndr) but happens one step earlier + -- c.f. willBeJoinId_maybe + occ = idOccInfo bndr dmd = idDemandInfo bndr active = isAlwaysActive (idInlineActivation bndr) not_stable = not (isStableUnfolding (idUnfolding bndr)) @@ -1591,7 +1568,7 @@ occAnalUnfolding env rec_flag id DFunUnfolding { df_bndrs = bndrs, df_args = args } -> Just $ zapDetails (delDetailsList usage bndrs) where - usage = foldr (+++) emptyDetails (map (fst . occAnal env) args) + usage = andUDsList (map (fst . occAnal env) args) _ -> Nothing @@ -1626,7 +1603,18 @@ occAnalRules env mb_expected_join_arity rec_flag id = case mb_expected_join_arity of Just ar | args `lengthIs` ar -> uds _ -> markAllNonTailCalled uds -{- +{- Note [Join point RHSs] +~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + x = e + join j = Just x + +We want to inline x into j right away, so we don't want to give +the join point a RhsCtxt (Trac #14137). It's not a huge deal, because +the FloatIn pass knows to float into join point RHSs; and the simplifier +does not float things out of join point RHSs. But it's a simple, cheap +thing to do. See Trac #14137. + Note [Cascading inlines] ~~~~~~~~~~~~~~~~~~~~~~~~ By default we use an rhsCtxt for the RHS of a binding. This tells the @@ -1653,15 +1641,19 @@ definitely inline the next time round, and so we analyse x3's rhs in an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff. Annoyingly, we have to approximate SimplUtils.preInlineUnconditionally. -If we say "yes" when preInlineUnconditionally says "no" the simplifier iterates -indefinitely: +If (a) the RHS is expandable (see isExpandableApp in occAnalApp), and + (b) certainly_inline says "yes" when preInlineUnconditionally says "no" +then the simplifier iterates indefinitely: x = f y - k = Just x + k = Just x -- We decide that k is 'certainly_inline' + v = ...k... -- but preInlineUnconditionally doesn't inline it inline ==> k = Just (f y) + v = ...k... float ==> x1 = f y k = Just x1 + v = ...k... This is worse than the slow cascade, so we only want to say "certainly_inline" if it really is certain. Look at the note with preInlineUnconditionally @@ -1702,11 +1694,17 @@ we can sort them into the right place when doing dependency analysis. -} occAnal env (Tick tickish body) + | SourceNote{} <- tickish + = (usage, Tick tickish body') + -- SourceNotes are best-effort; so we just proceed as usual. + -- If we drop a tick due to the issues described below it's + -- not the end of the world. + | tickish `tickishScopesLike` SoftScope = (markAllNonTailCalled usage, Tick tickish body') | Breakpoint _ ids <- tickish - = (usage_lam +++ foldr addManyOccs emptyDetails ids, Tick tickish body') + = (usage_lam `andUDs` foldr addManyOccs emptyDetails ids, Tick tickish body') -- never substitute for any of the Ids in a Breakpoint | otherwise @@ -1721,16 +1719,17 @@ occAnal env (Tick tickish body) -- Making j a join point may cause the simplifier to drop t -- (if the tick is put into the continuation). So we don't -- count j 1 as a tail call. + -- See #14242. occAnal env (Cast expr co) = case occAnal env expr of { (usage, expr') -> let usage1 = zapDetailsIf (isRhsEnv env) usage + -- usage1: if we see let x = y `cast` co + -- then mark y as 'Many' so that we don't + -- immediately inline y again. usage2 = addManyOccsSet usage1 (coVarsOfCo co) - -- See Note [Gather occurrences of coercion variables] + -- usage2: see Note [Gather occurrences of coercion variables] in (markAllNonTailCalled usage2, Cast expr' co) - -- If we see let x = y `cast` co - -- then mark y as 'Many' so that we don't - -- immediately inline y again. } occAnal env app@(App _ _) @@ -1772,30 +1771,13 @@ occAnal env (Case scrut bndr ty alts) = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') -> case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts') -> let - alts_usage = foldr combineAltsUsageDetails emptyDetails alts_usage_s - (alts_usage1, tagged_bndr) = tag_case_bndr alts_usage bndr - total_usage = markAllNonTailCalled scrut_usage +++ alts_usage1 + alts_usage = foldr orUDs emptyDetails alts_usage_s + (alts_usage1, tagged_bndr) = tagLamBinder alts_usage bndr + total_usage = markAllNonTailCalled scrut_usage `andUDs` alts_usage1 -- Alts can have tail calls, but the scrutinee can't in total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }} where - -- Note [Case binder usage] - -- ~~~~~~~~~~~~~~~~~~~~~~~~ - -- The case binder gets a usage of either "many" or "dead", never "one". - -- Reason: we like to inline single occurrences, to eliminate a binding, - -- but inlining a case binder *doesn't* eliminate a binding. - -- We *don't* want to transform - -- case x of w { (p,q) -> f w } - -- into - -- case x of w { (p,q) -> f (p,q) } - tag_case_bndr usage bndr - = (usage', setIdOccInfo bndr final_occ_info) - where - occ_info = lookupDetails usage bndr - usage' = usage `delDetails` bndr - final_occ_info = case occ_info of IAmDead -> IAmDead - _ -> noOccInfo - alt_env = mkAltEnv env scrut bndr occ_anal_alt = occAnalAlt alt_env @@ -1834,7 +1816,7 @@ occAnalArgs env (arg:args) one_shots = case argCtxt env one_shots of { (arg_env, one_shots') -> case occAnal arg_env arg of { (uds1, arg') -> case occAnalArgs env args one_shots' of { (uds2, args') -> - (uds1 +++ uds2, arg':args') }}} + (uds1 `andUDs` uds2, arg':args') }}} {- Applications are dealt with specially because we want @@ -1860,7 +1842,7 @@ occAnalApp env (Var fun, args, ticks) | null ticks = (uds, mkApps (Var fun) args') | otherwise = (uds, mkTicks ticks $ mkApps (Var fun) args') where - uds = fun_uds +++ final_args_uds + uds = fun_uds `andUDs` final_args_uds !(args_uds, args') = occAnalArgs env args one_shots !final_args_uds @@ -1890,7 +1872,7 @@ occAnalApp env (Var fun, args, ticks) -- See Note [Sources of one-shot information], bullet point A'] occAnalApp env (fun, args, ticks) - = (markAllNonTailCalled (fun_uds +++ args_uds), + = (markAllNonTailCalled (fun_uds `andUDs` args_uds), mkTicks ticks $ mkApps fun' args') where !(fun_uds, fun') = occAnal (addAppCtxt env args) fun @@ -2024,10 +2006,9 @@ occAnalAlt :: (OccEnv, Maybe (Id, CoreExpr)) occAnalAlt (env, scrut_bind) (con, bndrs, rhs) = case occAnal env rhs of { (rhs_usage1, rhs1) -> let - (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs - -- See Note [Binders in case alternatives] - (alt_usg', rhs2) = - wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1 + (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs + -- See Note [Binders in case alternatives] + (alt_usg', rhs2) = wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1 in (alt_usg', (con, tagged_bndrs, rhs2)) } @@ -2042,15 +2023,19 @@ wrapAltRHS env (Just (scrut_var, let_rhs)) alt_usg bndrs alt_rhs , scrut_var `usedIn` alt_usg -- bndrs are not be present in alt_usg so this -- handles condition (a) in Note [Binder swap] , not captured -- See condition (b) in Note [Binder swap] - = ( alt_usg' +++ let_rhs_usg + = ( alt_usg' `andUDs` let_rhs_usg , Let (NonRec tagged_scrut_var let_rhs') alt_rhs ) where - captured = any (`usedIn` let_rhs_usg) bndrs + captured = any (`usedIn` let_rhs_usg) bndrs -- Check condition (b) + -- The rhs of the let may include coercion variables -- if the scrutinee was a cast, so we must gather their -- usage. See Note [Gather occurrences of coercion variables] + -- Moreover, the rhs of the let may mention the case-binder, and + -- we want to gather its occ-info as well (let_rhs_usg, let_rhs') = occAnal env let_rhs - (alt_usg', [tagged_scrut_var]) = tagLamBinders alt_usg [scrut_var] + + (alt_usg', tagged_scrut_var) = tagLamBinder alt_usg scrut_var wrapAltRHS _ _ alt_usg _ alt_rhs = (alt_usg, alt_rhs) @@ -2067,8 +2052,12 @@ data OccEnv = OccEnv { occ_encl :: !OccEncl -- Enclosing context information , occ_one_shots :: !OneShots -- See Note [OneShots] , occ_gbl_scrut :: GlobalScruts + + , occ_unf_act :: Id -> Bool -- Which Id unfoldings are active + , occ_rule_act :: Activation -> Bool -- Which rules are active -- See Note [Finding rule RHS free vars] + , occ_binder_swap :: !Bool -- enable the binder_swap -- See CorePrep Note [Dead code in CorePrep] } @@ -2081,7 +2070,7 @@ type GlobalScruts = IdSet -- See Note [Binder swap on GlobalId scrutinees] -- x = (p,q) -- Don't inline p or q -- y = /\a -> (p a, q a) -- Still don't inline p or q -- z = f (p,q) -- Do inline p,q; it may make a rule fire --- So OccEncl tells enought about the context to know what to do when +-- So OccEncl tells enough about the context to know what to do when -- we encounter a constructor application or PAP. data OccEncl @@ -2097,12 +2086,15 @@ instance Outputable OccEncl where -- See note [OneShots] type OneShots = [OneShotInfo] -initOccEnv :: (Activation -> Bool) -> OccEnv -initOccEnv active_rule +initOccEnv :: OccEnv +initOccEnv = OccEnv { occ_encl = OccVanilla , occ_one_shots = [] , occ_gbl_scrut = emptyVarSet - , occ_rule_act = active_rule + -- To be conservative, we say that all + -- inlines and rules are active + , occ_unf_act = \_ -> True + , occ_rule_act = \_ -> True , occ_binder_swap = True } vanillaCtxt :: OccEnv -> OccEnv @@ -2160,7 +2152,12 @@ markJoinOneShots mb_join_arity bndrs Just n -> go n bndrs where go 0 bndrs = bndrs - go _ [] = WARN( True, ppr mb_join_arity <+> ppr bndrs ) [] + go _ [] = [] -- This can legitimately happen. + -- e.g. let j = case ... in j True + -- This will become an arity-1 join point after the + -- simplifier has eta-expanded it; but it may not have + -- enough lambdas /yet/. (Lint checks that JoinIds do + -- have enough lambdas.) go n (b:bs) = b' : go (n-1) bs where b' | isId b = setOneShotLambda b @@ -2298,6 +2295,9 @@ Core Lint never expects to find an *occurrence* of an Id marked as Dead, so we must zap the OccInfo on cb before making the binding x = cb. See Trac #5028. +NB: the OccInfo on /occurrences/ really doesn't matter much; the simplifier +doesn't use it. So this is only to satisfy the perhpas-over-picky Lint. + Historical note [no-case-of-case] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We *used* to suppress the binder-swap in case expressions when @@ -2361,10 +2361,10 @@ information right. -} mkAltEnv :: OccEnv -> CoreExpr -> Id -> (OccEnv, Maybe (Id, CoreExpr)) --- Does two things: a) makes the occ_one_shots = OccVanilla --- b) extends the GlobalScruts if possible --- c) returns a proxy mapping, binding the scrutinee --- to the case binder, if possible +-- Does three things: a) makes the occ_one_shots = OccVanilla +-- b) extends the GlobalScruts if possible +-- c) returns a proxy mapping, binding the scrutinee +-- to the case binder, if possible mkAltEnv env@(OccEnv { occ_gbl_scrut = pe }) scrut case_bndr = case stripTicksTopE (const True) scrut of Var v -> add_scrut v case_bndr' @@ -2373,15 +2373,19 @@ mkAltEnv env@(OccEnv { occ_gbl_scrut = pe }) scrut case_bndr _ -> (env { occ_encl = OccVanilla }, Nothing) where - add_scrut v rhs = ( env { occ_encl = OccVanilla, occ_gbl_scrut = pe `extendVarSet` v } + add_scrut v rhs = ( env { occ_encl = OccVanilla + , occ_gbl_scrut = pe `extendVarSet` v } , Just (localise v, rhs) ) - case_bndr' = Var (zapIdOccInfo case_bndr) -- See Note [Zap case binders in proxy bindings] - localise scrut_var = mkLocalIdOrCoVar (localiseName (idName scrut_var)) (idType scrut_var) - -- Localise the scrut_var before shadowing it; we're making a - -- new binding for it, and it might have an External Name, or - -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees] - -- Also we don't want any INLINE or NOINLINE pragmas! + case_bndr' = Var (zapIdOccInfo case_bndr) + -- See Note [Zap case binders in proxy bindings] + + -- Localise the scrut_var before shadowing it; we're making a + -- new binding for it, and it might have an External Name, or + -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees] + -- Also we don't want any INLINE or NOINLINE pragmas! + localise scrut_var = mkLocalIdOrCoVar (localiseName (idName scrut_var)) + (idType scrut_var) {- ************************************************************************ @@ -2426,13 +2430,13 @@ instance Outputable UsageDetails where ------------------- -- UsageDetails API -(+++), combineAltsUsageDetails +andUDs, orUDs :: UsageDetails -> UsageDetails -> UsageDetails -(+++) = combineUsageDetailsWith addOccInfo -combineAltsUsageDetails = combineUsageDetailsWith orOccInfo +andUDs = combineUsageDetailsWith addOccInfo +orUDs = combineUsageDetailsWith orOccInfo -combineUsageDetailsList :: [UsageDetails] -> UsageDetails -combineUsageDetailsList = foldl (+++) emptyDetails +andUDsList :: [UsageDetails] -> UsageDetails +andUDsList = foldl' andUDs emptyDetails mkOneOcc :: OccEnv -> Id -> InterestingCxt -> JoinArity -> UsageDetails mkOneOcc env id int_cxt arity @@ -2581,14 +2585,21 @@ tagLamBinders :: UsageDetails -- Of scope -> [Id] -- Binders -> (UsageDetails, -- Details with binders removed [IdWithOccInfo]) -- Tagged binders +tagLamBinders usage binders + = usage' `seq` (usage', bndrs') + where + (usage', bndrs') = mapAccumR tagLamBinder usage binders + +tagLamBinder :: UsageDetails -- Of scope + -> Id -- Binder + -> (UsageDetails, -- Details with binder removed + IdWithOccInfo) -- Tagged binders -- Used for lambda and case binders -- It copes with the fact that lambda bindings can have a -- stable unfolding, used for join points -tagLamBinders usage binders = usage' `seq` (usage', bndrs') +tagLamBinder usage bndr + = (usage2, bndr') where - (usage', bndrs') = mapAccumR tag_lam usage binders - tag_lam usage bndr = (usage2, bndr') - where occ = lookupDetails usage bndr bndr' = setBinderOcc (markNonTailCalled occ) bndr -- Don't try to make an argument into a join point @@ -2633,7 +2644,7 @@ tagRecBinders lvl body_uds triples -- 1. Determine join-point-hood of whole group, as determined by -- the *unadjusted* usage details - unadj_uds = body_uds +++ combineUsageDetailsList rhs_udss + unadj_uds = foldr andUDs body_uds rhs_udss will_be_joins = decideJoinPointHood lvl unadj_uds bndrs -- 2. Adjust usage details of each RHS, taking into account the @@ -2650,19 +2661,15 @@ tagRecBinders lvl body_uds triples , AlwaysTailCalled arity <- tailCallInfo occ = Just arity | otherwise - = ASSERT(not will_be_joins) -- Should be AlwaysTailCalled if we're - -- making join points! - Nothing + = ASSERT(not will_be_joins) -- Should be AlwaysTailCalled if + Nothing -- we are making join points! -- 3. Compute final usage details from adjusted RHS details - adj_uds = body_uds +++ combineUsageDetailsList rhs_udss' + adj_uds = foldr andUDs body_uds rhs_udss' - -- 4. Tag each binder with its adjusted details modulo the - -- join-point-hood decision - occs = map (lookupDetails adj_uds) bndrs - occs' | will_be_joins = occs - | otherwise = map markNonTailCalled occs - bndrs' = zipWith setBinderOcc occs' bndrs + -- 4. Tag each binder with its adjusted details + bndrs' = [ setBinderOcc (lookupDetails adj_uds bndr) bndr + | bndr <- bndrs ] -- 5. Drop the binders from the adjusted details and return usage' = adj_uds `delDetailsList` bndrs @@ -2683,10 +2690,15 @@ setBinderOcc occ_info bndr -- | Decide whether some bindings should be made into join points or not. -- Returns `False` if they can't be join points. Note that it's an --- all-or-nothing decision, as if multiple binders are given, they're assumed to --- be mutually recursive. +-- all-or-nothing decision, as if multiple binders are given, they're +-- assumed to be mutually recursive. +-- +-- It must, however, be a final decision. If we say "True" for 'f', +-- and then subsequently decide /not/ make 'f' into a join point, then +-- the decision about another binding 'g' might be invalidated if (say) +-- 'f' tail-calls 'g'. -- --- See Note [Invariants for join points] in CoreSyn. +-- See Note [Invariants on join points] in CoreSyn. decideJoinPointHood :: TopLevelFlag -> UsageDetails -> [CoreBndr] -> Bool @@ -2708,11 +2720,18 @@ decideJoinPointHood NotTopLevel usage bndrs ok bndr | -- Invariant 1: Only tail calls, all same join arity AlwaysTailCalled arity <- tailCallInfo (lookupDetails usage bndr) + , -- Invariant 1 as applied to LHSes of rules all (ok_rule arity) (idCoreRules bndr) + + -- Invariant 2a: stable unfoldings + -- See Note [Join points and INLINE pragmas] + , ok_unfolding arity (realIdUnfolding bndr) + -- Invariant 4: Satisfies polymorphism rule , isValidJoinPointType arity (idType bndr) = True + | otherwise = False @@ -2721,14 +2740,52 @@ decideJoinPointHood NotTopLevel usage bndrs = args `lengthIs` join_arity -- Invariant 1 as applied to LHSes of rules + -- ok_unfolding returns False if we should /not/ convert a non-join-id + -- into a join-id, even though it is AlwaysTailCalled + ok_unfolding join_arity (CoreUnfolding { uf_src = src, uf_tmpl = rhs }) + = not (isStableSource src && join_arity > joinRhsArity rhs) + ok_unfolding _ (DFunUnfolding {}) + = False + ok_unfolding _ _ + = True + willBeJoinId_maybe :: CoreBndr -> Maybe JoinArity willBeJoinId_maybe bndr - | AlwaysTailCalled arity <- tailCallInfo (idOccInfo bndr) - = Just arity - | otherwise - = isJoinId_maybe bndr + = case tailCallInfo (idOccInfo bndr) of + AlwaysTailCalled arity -> Just arity + _ -> isJoinId_maybe bndr + + +{- Note [Join points and INLINE pragmas] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + f x = let g = \x. not -- Arity 1 + {-# INLINE g #-} + in case x of + A -> g True True + B -> g True False + C -> blah2 + +Here 'g' is always tail-called applied to 2 args, but the stable +unfolding captured by the INLINE pragma has arity 1. If we try to +convert g to be a join point, its unfolding will still have arity 1 +(since it is stable, and we don't meddle with stable unfoldings), and +Lint will complain (see Note [Invariants on join points], (2a), in +CoreSyn. Trac #13413. + +Moreover, since g is going to be inlined anyway, there is no benefit +from making it a join point. + +If it is recursive, and uselessly marked INLINE, this will stop us +making it a join point, which is annoying. But occasionally +(notably in class methods; see Note [Instances and loop breakers] in +TcInstDcls) we mark recursive things as INLINE but the recursion +unravels; so ignoring INLINE pragmas on recursive things isn't good +either. + +See Invariant 2a of Note [Invariants on join points] in CoreSyn + -{- ************************************************************************ * * \subsection{Operations over OccInfo} @@ -2762,10 +2819,11 @@ orOccInfo (OneOcc { occ_in_lam = in_lam1, occ_int_cxt = int_cxt1 , occ_tail = tail1 }) (OneOcc { occ_in_lam = in_lam2, occ_int_cxt = int_cxt2 , occ_tail = tail2 }) - = OneOcc { occ_in_lam = in_lam1 || in_lam2 - , occ_one_br = False -- False, because it occurs in both branches + = OneOcc { occ_one_br = False -- False, because it occurs in both branches + , occ_in_lam = in_lam1 || in_lam2 , occ_int_cxt = int_cxt1 && int_cxt2 , occ_tail = tail1 `andTailCallInfo` tail2 } + orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) ) ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo` tailCallInfo a2 } |