summaryrefslogtreecommitdiff
path: root/compiler/simplCore/SimplUtils.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/simplCore/SimplUtils.hs')
-rw-r--r--compiler/simplCore/SimplUtils.hs1779
1 files changed, 1779 insertions, 0 deletions
diff --git a/compiler/simplCore/SimplUtils.hs b/compiler/simplCore/SimplUtils.hs
new file mode 100644
index 0000000000..eec0f4b230
--- /dev/null
+++ b/compiler/simplCore/SimplUtils.hs
@@ -0,0 +1,1779 @@
+{-
+(c) The AQUA Project, Glasgow University, 1993-1998
+
+\section[SimplUtils]{The simplifier utilities}
+-}
+
+{-# LANGUAGE CPP #-}
+
+module SimplUtils (
+ -- Rebuilding
+ mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
+
+ -- Inlining,
+ preInlineUnconditionally, postInlineUnconditionally,
+ activeUnfolding, activeRule,
+ getUnfoldingInRuleMatch,
+ simplEnvForGHCi, updModeForStableUnfoldings,
+
+ -- The continuation type
+ SimplCont(..), DupFlag(..),
+ isSimplified,
+ contIsDupable, contResultType, contInputType,
+ contIsTrivial, contArgs, dropArgs,
+ pushSimplifiedArgs, countValArgs, countArgs,
+ mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
+ interestingCallContext, interestingArg,
+
+ -- ArgInfo
+ ArgInfo(..), ArgSpec(..), mkArgInfo, addArgTo, addCastTo,
+ argInfoExpr, argInfoValArgs,
+
+ abstractFloats
+ ) where
+
+#include "HsVersions.h"
+
+import SimplEnv
+import CoreMonad ( SimplifierMode(..), Tick(..) )
+import MkCore ( sortQuantVars )
+import DynFlags
+import CoreSyn
+import qualified CoreSubst
+import PprCore
+import CoreFVs
+import CoreUtils
+import CoreArity
+import CoreUnfold
+import Name
+import Id
+import Var
+import Demand
+import SimplMonad
+import Type hiding( substTy )
+import Coercion hiding( substCo, substTy )
+import DataCon ( dataConWorkId )
+import VarSet
+import BasicTypes
+import Util
+import MonadUtils
+import Outputable
+import FastString
+import Pair
+
+import Control.Monad ( when )
+
+{-
+************************************************************************
+* *
+ The SimplCont type
+* *
+************************************************************************
+
+A SimplCont allows the simplifier to traverse the expression in a
+zipper-like fashion. The SimplCont represents the rest of the expression,
+"above" the point of interest.
+
+You can also think of a SimplCont as an "evaluation context", using
+that term in the way it is used for operational semantics. This is the
+way I usually think of it, For example you'll often see a syntax for
+evaluation context looking like
+ C ::= [] | C e | case C of alts | C `cast` co
+That's the kind of thing we are doing here, and I use that syntax in
+the comments.
+
+
+Key points:
+ * A SimplCont describes a *strict* context (just like
+ evaluation contexts do). E.g. Just [] is not a SimplCont
+
+ * A SimplCont describes a context that *does not* bind
+ any variables. E.g. \x. [] is not a SimplCont
+-}
+
+data SimplCont
+ = Stop -- An empty context, or <hole>
+ OutType -- Type of the <hole>
+ CallCtxt -- Tells if there is something interesting about
+ -- the context, and hence the inliner
+ -- should be a bit keener (see interestingCallContext)
+ -- Specifically:
+ -- This is an argument of a function that has RULES
+ -- Inlining the call might allow the rule to fire
+ -- Never ValAppCxt (use ApplyTo instead)
+ -- or CaseCtxt (use Select instead)
+
+ | CoerceIt -- <hole> `cast` co
+ OutCoercion -- The coercion simplified
+ -- Invariant: never an identity coercion
+ SimplCont
+
+ | ApplyTo -- <hole> arg
+ DupFlag -- See Note [DupFlag invariants]
+ InExpr StaticEnv -- The argument and its static env
+ SimplCont
+
+ | Select -- case <hole> of alts
+ DupFlag -- See Note [DupFlag invariants]
+ InId [InAlt] StaticEnv -- The case binder, alts type, alts, and subst-env
+ SimplCont
+
+ -- The two strict forms have no DupFlag, because we never duplicate them
+ | StrictBind -- (\x* \xs. e) <hole>
+ InId [InBndr] -- let x* = <hole> in e
+ InExpr StaticEnv -- is a special case
+ SimplCont
+
+ | StrictArg -- f e1 ..en <hole>
+ ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
+ -- plus strictness flags for *further* args
+ CallCtxt -- Whether *this* argument position is interesting
+ SimplCont
+
+ | TickIt
+ (Tickish Id) -- Tick tickish <hole>
+ SimplCont
+
+data ArgInfo
+ = ArgInfo {
+ ai_fun :: OutId, -- The function
+ ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
+ ai_type :: OutType, -- Type of (f a1 ... an)
+
+ ai_rules :: [CoreRule], -- Rules for this function
+
+ ai_encl :: Bool, -- Flag saying whether this function
+ -- or an enclosing one has rules (recursively)
+ -- True => be keener to inline in all args
+
+ ai_strs :: [Bool], -- Strictness of remaining arguments
+ -- Usually infinite, but if it is finite it guarantees
+ -- that the function diverges after being given
+ -- that number of args
+ ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline
+ -- Always infinite
+ }
+
+data ArgSpec = ValArg OutExpr -- Apply to this
+ | CastBy OutCoercion -- Cast by this
+
+instance Outputable ArgSpec where
+ ppr (ValArg e) = ptext (sLit "ValArg") <+> ppr e
+ ppr (CastBy c) = ptext (sLit "CastBy") <+> ppr c
+
+addArgTo :: ArgInfo -> OutExpr -> ArgInfo
+addArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai
+ , ai_type = applyTypeToArg (ai_type ai) arg }
+
+addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
+addCastTo ai co = ai { ai_args = CastBy co : ai_args ai
+ , ai_type = pSnd (coercionKind co) }
+
+argInfoValArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> ([OutExpr], SimplCont)
+argInfoValArgs env args cont
+ = go args [] cont
+ where
+ go :: [ArgSpec] -> [OutExpr] -> SimplCont -> ([OutExpr], SimplCont)
+ go (ValArg e : as) acc cont = go as (e:acc) cont
+ go (CastBy co : as) acc cont = go as [] (CoerceIt co (pushSimplifiedArgs env acc cont))
+ go [] acc cont = (acc, cont)
+
+argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
+argInfoExpr fun args
+ = go args
+ where
+ go [] = Var fun
+ go (ValArg a : as) = go as `App` a
+ go (CastBy co : as) = mkCast (go as) co
+
+instance Outputable SimplCont where
+ ppr (Stop ty interesting) = ptext (sLit "Stop") <> brackets (ppr interesting) <+> ppr ty
+ ppr (ApplyTo dup arg _ cont) = ((ptext (sLit "ApplyTo") <+> ppr dup <+> pprParendExpr arg)
+ {- $$ nest 2 (pprSimplEnv se) -}) $$ ppr cont
+ ppr (StrictBind b _ _ _ cont) = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont
+ ppr (StrictArg ai _ cont) = (ptext (sLit "StrictArg") <+> ppr (ai_fun ai)) $$ ppr cont
+ ppr (Select dup bndr alts se cont) = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$
+ ifPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
+ ppr (CoerceIt co cont) = (ptext (sLit "CoerceIt") <+> ppr co) $$ ppr cont
+ ppr (TickIt t cont) = (ptext (sLit "TickIt") <+> ppr t) $$ ppr cont
+
+data DupFlag = NoDup -- Unsimplified, might be big
+ | Simplified -- Simplified
+ | OkToDup -- Simplified and small
+
+isSimplified :: DupFlag -> Bool
+isSimplified NoDup = False
+isSimplified _ = True -- Invariant: the subst-env is empty
+
+instance Outputable DupFlag where
+ ppr OkToDup = ptext (sLit "ok")
+ ppr NoDup = ptext (sLit "nodup")
+ ppr Simplified = ptext (sLit "simpl")
+
+{-
+Note [DupFlag invariants]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+In both (ApplyTo dup _ env k)
+ and (Select dup _ _ env k)
+the following invariants hold
+
+ (a) if dup = OkToDup, then continuation k is also ok-to-dup
+ (b) if dup = OkToDup or Simplified, the subst-env is empty
+ (and and hence no need to re-simplify)
+-}
+
+-------------------
+mkBoringStop :: OutType -> SimplCont
+mkBoringStop ty = Stop ty BoringCtxt
+
+mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in CoreUnfold
+mkRhsStop ty = Stop ty RhsCtxt
+
+mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
+mkLazyArgStop ty cci = Stop ty cci
+
+-------------------
+contIsRhsOrArg :: SimplCont -> Bool
+contIsRhsOrArg (Stop {}) = True
+contIsRhsOrArg (StrictBind {}) = True
+contIsRhsOrArg (StrictArg {}) = True
+contIsRhsOrArg _ = False
+
+contIsRhs :: SimplCont -> Bool
+contIsRhs (Stop _ RhsCtxt) = True
+contIsRhs _ = False
+
+-------------------
+contIsDupable :: SimplCont -> Bool
+contIsDupable (Stop {}) = True
+contIsDupable (ApplyTo OkToDup _ _ _) = True -- See Note [DupFlag invariants]
+contIsDupable (Select OkToDup _ _ _ _) = True -- ...ditto...
+contIsDupable (CoerceIt _ cont) = contIsDupable cont
+contIsDupable _ = False
+
+-------------------
+contIsTrivial :: SimplCont -> Bool
+contIsTrivial (Stop {}) = True
+contIsTrivial (ApplyTo _ (Type _) _ cont) = contIsTrivial cont
+contIsTrivial (ApplyTo _ (Coercion _) _ cont) = contIsTrivial cont
+contIsTrivial (CoerceIt _ cont) = contIsTrivial cont
+contIsTrivial _ = False
+
+-------------------
+contResultType :: SimplCont -> OutType
+contResultType (Stop ty _) = ty
+contResultType (CoerceIt _ k) = contResultType k
+contResultType (StrictBind _ _ _ _ k) = contResultType k
+contResultType (StrictArg _ _ k) = contResultType k
+contResultType (Select _ _ _ _ k) = contResultType k
+contResultType (ApplyTo _ _ _ k) = contResultType k
+contResultType (TickIt _ k) = contResultType k
+
+contInputType :: SimplCont -> OutType
+contInputType (Stop ty _) = ty
+contInputType (CoerceIt co _) = pFst (coercionKind co)
+contInputType (Select d b _ se _) = perhapsSubstTy d se (idType b)
+contInputType (StrictBind b _ _ se _) = substTy se (idType b)
+contInputType (StrictArg ai _ _) = funArgTy (ai_type ai)
+contInputType (ApplyTo d e se k) = mkFunTy (perhapsSubstTy d se (exprType e)) (contInputType k)
+contInputType (TickIt _ k) = contInputType k
+
+perhapsSubstTy :: DupFlag -> SimplEnv -> InType -> OutType
+perhapsSubstTy dup_flag se ty
+ | isSimplified dup_flag = ty
+ | otherwise = substTy se ty
+
+-------------------
+countValArgs :: SimplCont -> Int
+countValArgs (ApplyTo _ (Type _) _ cont) = countValArgs cont
+countValArgs (ApplyTo _ (Coercion _) _ cont) = countValArgs cont
+countValArgs (ApplyTo _ _ _ cont) = 1 + countValArgs cont
+countValArgs _ = 0
+
+countArgs :: SimplCont -> Int
+countArgs (ApplyTo _ _ _ cont) = 1 + countArgs cont
+countArgs _ = 0
+
+contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
+-- Summarises value args, discards type args and coercions
+-- The returned continuation of the call is only used to
+-- answer questions like "are you interesting?"
+contArgs cont
+ | lone cont = (True, [], cont)
+ | otherwise = go [] cont
+ where
+ lone (ApplyTo {}) = False -- See Note [Lone variables] in CoreUnfold
+ lone (CoerceIt {}) = False
+ lone _ = True
+
+ go args (ApplyTo _ arg se cont)
+ | isTypeArg arg = go args cont
+ | otherwise = go (is_interesting arg se : args) cont
+ go args (CoerceIt _ cont) = go args cont
+ go args cont = (False, reverse args, cont)
+
+ is_interesting arg se = interestingArg (substExpr (text "contArgs") se arg)
+ -- Do *not* use short-cutting substitution here
+ -- because we want to get as much IdInfo as possible
+
+pushSimplifiedArgs :: SimplEnv -> [CoreExpr] -> SimplCont -> SimplCont
+pushSimplifiedArgs _env [] cont = cont
+pushSimplifiedArgs env (arg:args) cont = ApplyTo Simplified arg env (pushSimplifiedArgs env args cont)
+ -- The env has an empty SubstEnv
+
+dropArgs :: Int -> SimplCont -> SimplCont
+dropArgs 0 cont = cont
+dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont
+dropArgs n other = pprPanic "dropArgs" (ppr n <+> ppr other)
+
+{-
+Note [Interesting call context]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We want to avoid inlining an expression where there can't possibly be
+any gain, such as in an argument position. Hence, if the continuation
+is interesting (eg. a case scrutinee, application etc.) then we
+inline, otherwise we don't.
+
+Previously some_benefit used to return True only if the variable was
+applied to some value arguments. This didn't work:
+
+ let x = _coerce_ (T Int) Int (I# 3) in
+ case _coerce_ Int (T Int) x of
+ I# y -> ....
+
+we want to inline x, but can't see that it's a constructor in a case
+scrutinee position, and some_benefit is False.
+
+Another example:
+
+dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
+
+.... case dMonadST _@_ x0 of (a,b,c) -> ....
+
+we'd really like to inline dMonadST here, but we *don't* want to
+inline if the case expression is just
+
+ case x of y { DEFAULT -> ... }
+
+since we can just eliminate this case instead (x is in WHNF). Similar
+applies when x is bound to a lambda expression. Hence
+contIsInteresting looks for case expressions with just a single
+default case.
+-}
+
+interestingCallContext :: SimplCont -> CallCtxt
+-- See Note [Interesting call context]
+interestingCallContext cont
+ = interesting cont
+ where
+ interesting (Select _ _bndr _ _ _) = CaseCtxt
+
+ interesting (ApplyTo _ arg _ cont)
+ | isTypeArg arg = interesting cont
+ | otherwise = ValAppCtxt -- Can happen if we have (f Int |> co) y
+ -- If f has an INLINE prag we need to give it some
+ -- motivation to inline. See Note [Cast then apply]
+ -- in CoreUnfold
+
+ interesting (StrictArg _ cci _) = cci
+ interesting (StrictBind {}) = BoringCtxt
+ interesting (Stop _ cci) = cci
+ interesting (TickIt _ cci) = interesting cci
+ interesting (CoerceIt _ cont) = interesting cont
+ -- If this call is the arg of a strict function, the context
+ -- is a bit interesting. If we inline here, we may get useful
+ -- evaluation information to avoid repeated evals: e.g.
+ -- x + (y * z)
+ -- Here the contIsInteresting makes the '*' keener to inline,
+ -- which in turn exposes a constructor which makes the '+' inline.
+ -- Assuming that +,* aren't small enough to inline regardless.
+ --
+ -- It's also very important to inline in a strict context for things
+ -- like
+ -- foldr k z (f x)
+ -- Here, the context of (f x) is strict, and if f's unfolding is
+ -- a build it's *great* to inline it here. So we must ensure that
+ -- the context for (f x) is not totally uninteresting.
+
+
+-------------------
+mkArgInfo :: Id
+ -> [CoreRule] -- Rules for function
+ -> Int -- Number of value args
+ -> SimplCont -- Context of the call
+ -> ArgInfo
+
+mkArgInfo fun rules n_val_args call_cont
+ | n_val_args < idArity fun -- Note [Unsaturated functions]
+ = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
+ , ai_rules = rules, ai_encl = False
+ , ai_strs = vanilla_stricts
+ , ai_discs = vanilla_discounts }
+ | otherwise
+ = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
+ , ai_rules = rules
+ , ai_encl = interestingArgContext rules call_cont
+ , ai_strs = add_type_str fun_ty arg_stricts
+ , ai_discs = arg_discounts }
+ where
+ fun_ty = idType fun
+
+ vanilla_discounts, arg_discounts :: [Int]
+ vanilla_discounts = repeat 0
+ arg_discounts = case idUnfolding fun of
+ CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
+ -> discounts ++ vanilla_discounts
+ _ -> vanilla_discounts
+
+ vanilla_stricts, arg_stricts :: [Bool]
+ vanilla_stricts = repeat False
+
+ arg_stricts
+ = case splitStrictSig (idStrictness fun) of
+ (demands, result_info)
+ | not (demands `lengthExceeds` n_val_args)
+ -> -- Enough args, use the strictness given.
+ -- For bottoming functions we used to pretend that the arg
+ -- is lazy, so that we don't treat the arg as an
+ -- interesting context. This avoids substituting
+ -- top-level bindings for (say) strings into
+ -- calls to error. But now we are more careful about
+ -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
+ if isBotRes result_info then
+ map isStrictDmd demands -- Finite => result is bottom
+ else
+ map isStrictDmd demands ++ vanilla_stricts
+ | otherwise
+ -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
+ <+> ppr n_val_args <+> ppr demands )
+ vanilla_stricts -- Not enough args, or no strictness
+
+ add_type_str :: Type -> [Bool] -> [Bool]
+ -- If the function arg types are strict, record that in the 'strictness bits'
+ -- No need to instantiate because unboxed types (which dominate the strict
+ -- types) can't instantiate type variables.
+ -- add_type_str is done repeatedly (for each call); might be better
+ -- once-for-all in the function
+ -- But beware primops/datacons with no strictness
+ add_type_str _ [] = []
+ add_type_str fun_ty strs -- Look through foralls
+ | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty -- Includes coercions
+ = add_type_str fun_ty' strs
+ add_type_str fun_ty (str:strs) -- Add strict-type info
+ | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
+ = (str || isStrictType arg_ty) : add_type_str fun_ty' strs
+ add_type_str _ strs
+ = strs
+
+{- Note [Unsaturated functions]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider (test eyeball/inline4)
+ x = a:as
+ y = f x
+where f has arity 2. Then we do not want to inline 'x', because
+it'll just be floated out again. Even if f has lots of discounts
+on its first argument -- it must be saturated for these to kick in
+-}
+
+interestingArgContext :: [CoreRule] -> SimplCont -> Bool
+-- If the argument has form (f x y), where x,y are boring,
+-- and f is marked INLINE, then we don't want to inline f.
+-- But if the context of the argument is
+-- g (f x y)
+-- where g has rules, then we *do* want to inline f, in case it
+-- exposes a rule that might fire. Similarly, if the context is
+-- h (g (f x x))
+-- where h has rules, then we do want to inline f; hence the
+-- call_cont argument to interestingArgContext
+--
+-- The ai-rules flag makes this happen; if it's
+-- set, the inliner gets just enough keener to inline f
+-- regardless of how boring f's arguments are, if it's marked INLINE
+--
+-- The alternative would be to *always* inline an INLINE function,
+-- regardless of how boring its context is; but that seems overkill
+-- For example, it'd mean that wrapper functions were always inlined
+interestingArgContext rules call_cont
+ = notNull rules || enclosing_fn_has_rules
+ where
+ enclosing_fn_has_rules = go call_cont
+
+ go (Select {}) = False
+ go (ApplyTo {}) = False
+ go (StrictArg _ cci _) = interesting cci
+ go (StrictBind {}) = False -- ??
+ go (CoerceIt _ c) = go c
+ go (Stop _ cci) = interesting cci
+ go (TickIt _ c) = go c
+
+ interesting RuleArgCtxt = True
+ interesting _ = False
+
+{-
+************************************************************************
+* *
+ SimplifierMode
+* *
+************************************************************************
+
+The SimplifierMode controls several switches; see its definition in
+CoreMonad
+ sm_rules :: Bool -- Whether RULES are enabled
+ sm_inline :: Bool -- Whether inlining is enabled
+ sm_case_case :: Bool -- Whether case-of-case is enabled
+ sm_eta_expand :: Bool -- Whether eta-expansion is enabled
+-}
+
+simplEnvForGHCi :: DynFlags -> SimplEnv
+simplEnvForGHCi dflags
+ = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
+ , sm_phase = InitialPhase
+ , sm_rules = rules_on
+ , sm_inline = False
+ , sm_eta_expand = eta_expand_on
+ , sm_case_case = True }
+ where
+ rules_on = gopt Opt_EnableRewriteRules dflags
+ eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
+ -- Do not do any inlining, in case we expose some unboxed
+ -- tuple stuff that confuses the bytecode interpreter
+
+updModeForStableUnfoldings :: Activation -> SimplifierMode -> SimplifierMode
+-- See Note [Simplifying inside stable unfoldings]
+updModeForStableUnfoldings inline_rule_act current_mode
+ = current_mode { sm_phase = phaseFromActivation inline_rule_act
+ , sm_inline = True
+ , sm_eta_expand = False }
+ -- For sm_rules, just inherit; sm_rules might be "off"
+ -- because of -fno-enable-rewrite-rules
+ where
+ phaseFromActivation (ActiveAfter n) = Phase n
+ phaseFromActivation _ = InitialPhase
+
+{-
+Note [Inlining in gentle mode]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Something is inlined if
+ (i) the sm_inline flag is on, AND
+ (ii) the thing has an INLINE pragma, AND
+ (iii) the thing is inlinable in the earliest phase.
+
+Example of why (iii) is important:
+ {-# INLINE [~1] g #-}
+ g = ...
+
+ {-# INLINE f #-}
+ f x = g (g x)
+
+If we were to inline g into f's inlining, then an importing module would
+never be able to do
+ f e --> g (g e) ---> RULE fires
+because the stable unfolding for f has had g inlined into it.
+
+On the other hand, it is bad not to do ANY inlining into an
+stable unfolding, because then recursive knots in instance declarations
+don't get unravelled.
+
+However, *sometimes* SimplGently must do no call-site inlining at all
+(hence sm_inline = False). Before full laziness we must be careful
+not to inline wrappers, because doing so inhibits floating
+ e.g. ...(case f x of ...)...
+ ==> ...(case (case x of I# x# -> fw x#) of ...)...
+ ==> ...(case x of I# x# -> case fw x# of ...)...
+and now the redex (f x) isn't floatable any more.
+
+The no-inlining thing is also important for Template Haskell. You might be
+compiling in one-shot mode with -O2; but when TH compiles a splice before
+running it, we don't want to use -O2. Indeed, we don't want to inline
+anything, because the byte-code interpreter might get confused about
+unboxed tuples and suchlike.
+
+Note [Simplifying inside stable unfoldings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We must take care with simplification inside stable unfoldings (which come from
+INLINE pragmas).
+
+First, consider the following example
+ let f = \pq -> BIG
+ in
+ let g = \y -> f y y
+ {-# INLINE g #-}
+ in ...g...g...g...g...g...
+Now, if that's the ONLY occurrence of f, it might be inlined inside g,
+and thence copied multiple times when g is inlined. HENCE we treat
+any occurrence in a stable unfolding as a multiple occurrence, not a single
+one; see OccurAnal.addRuleUsage.
+
+Second, we do want *do* to some modest rules/inlining stuff in stable
+unfoldings, partly to eliminate senseless crap, and partly to break
+the recursive knots generated by instance declarations.
+
+However, suppose we have
+ {-# INLINE <act> f #-}
+ f = <rhs>
+meaning "inline f in phases p where activation <act>(p) holds".
+Then what inlinings/rules can we apply to the copy of <rhs> captured in
+f's stable unfolding? Our model is that literally <rhs> is substituted for
+f when it is inlined. So our conservative plan (implemented by
+updModeForStableUnfoldings) is this:
+
+ -------------------------------------------------------------
+ When simplifying the RHS of an stable unfolding, set the phase
+ to the phase in which the stable unfolding first becomes active
+ -------------------------------------------------------------
+
+That ensures that
+
+ a) Rules/inlinings that *cease* being active before p will
+ not apply to the stable unfolding, consistent with it being
+ inlined in its *original* form in phase p.
+
+ b) Rules/inlinings that only become active *after* p will
+ not apply to the stable unfolding, again to be consistent with
+ inlining the *original* rhs in phase p.
+
+For example,
+ {-# INLINE f #-}
+ f x = ...g...
+
+ {-# NOINLINE [1] g #-}
+ g y = ...
+
+ {-# RULE h g = ... #-}
+Here we must not inline g into f's RHS, even when we get to phase 0,
+because when f is later inlined into some other module we want the
+rule for h to fire.
+
+Similarly, consider
+ {-# INLINE f #-}
+ f x = ...g...
+
+ g y = ...
+and suppose that there are auto-generated specialisations and a strictness
+wrapper for g. The specialisations get activation AlwaysActive, and the
+strictness wrapper get activation (ActiveAfter 0). So the strictness
+wrepper fails the test and won't be inlined into f's stable unfolding. That
+means f can inline, expose the specialised call to g, so the specialisation
+rules can fire.
+
+A note about wrappers
+~~~~~~~~~~~~~~~~~~~~~
+It's also important not to inline a worker back into a wrapper.
+A wrapper looks like
+ wraper = inline_me (\x -> ...worker... )
+Normally, the inline_me prevents the worker getting inlined into
+the wrapper (initially, the worker's only call site!). But,
+if the wrapper is sure to be called, the strictness analyser will
+mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
+continuation.
+-}
+
+activeUnfolding :: SimplEnv -> Id -> Bool
+activeUnfolding env
+ | not (sm_inline mode) = active_unfolding_minimal
+ | otherwise = case sm_phase mode of
+ InitialPhase -> active_unfolding_gentle
+ Phase n -> active_unfolding n
+ where
+ mode = getMode env
+
+getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
+-- When matching in RULE, we want to "look through" an unfolding
+-- (to see a constructor) if *rules* are on, even if *inlinings*
+-- are not. A notable example is DFuns, which really we want to
+-- match in rules like (op dfun) in gentle mode. Another example
+-- is 'otherwise' which we want exprIsConApp_maybe to be able to
+-- see very early on
+getUnfoldingInRuleMatch env
+ = (in_scope, id_unf)
+ where
+ in_scope = seInScope env
+ mode = getMode env
+ id_unf id | unf_is_active id = idUnfolding id
+ | otherwise = NoUnfolding
+ unf_is_active id
+ | not (sm_rules mode) = active_unfolding_minimal id
+ | otherwise = isActive (sm_phase mode) (idInlineActivation id)
+
+active_unfolding_minimal :: Id -> Bool
+-- Compuslory unfoldings only
+-- Ignore SimplGently, because we want to inline regardless;
+-- the Id has no top-level binding at all
+--
+-- NB: we used to have a second exception, for data con wrappers.
+-- On the grounds that we use gentle mode for rule LHSs, and
+-- they match better when data con wrappers are inlined.
+-- But that only really applies to the trivial wrappers (like (:)),
+-- and they are now constructed as Compulsory unfoldings (in MkId)
+-- so they'll happen anyway.
+active_unfolding_minimal id = isCompulsoryUnfolding (realIdUnfolding id)
+
+active_unfolding :: PhaseNum -> Id -> Bool
+active_unfolding n id = isActiveIn n (idInlineActivation id)
+
+active_unfolding_gentle :: Id -> Bool
+-- Anything that is early-active
+-- See Note [Gentle mode]
+active_unfolding_gentle id
+ = isInlinePragma prag
+ && isEarlyActive (inlinePragmaActivation prag)
+ -- NB: wrappers are not early-active
+ where
+ prag = idInlinePragma id
+
+----------------------
+activeRule :: SimplEnv -> Activation -> Bool
+-- Nothing => No rules at all
+activeRule env
+ | not (sm_rules mode) = \_ -> False -- Rewriting is off
+ | otherwise = isActive (sm_phase mode)
+ where
+ mode = getMode env
+
+{-
+************************************************************************
+* *
+ preInlineUnconditionally
+* *
+************************************************************************
+
+preInlineUnconditionally
+~~~~~~~~~~~~~~~~~~~~~~~~
+@preInlineUnconditionally@ examines a bndr to see if it is used just
+once in a completely safe way, so that it is safe to discard the
+binding inline its RHS at the (unique) usage site, REGARDLESS of how
+big the RHS might be. If this is the case we don't simplify the RHS
+first, but just inline it un-simplified.
+
+This is much better than first simplifying a perhaps-huge RHS and then
+inlining and re-simplifying it. Indeed, it can be at least quadratically
+better. Consider
+
+ x1 = e1
+ x2 = e2[x1]
+ x3 = e3[x2]
+ ...etc...
+ xN = eN[xN-1]
+
+We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
+This can happen with cascades of functions too:
+
+ f1 = \x1.e1
+ f2 = \xs.e2[f1]
+ f3 = \xs.e3[f3]
+ ...etc...
+
+THE MAIN INVARIANT is this:
+
+ ---- preInlineUnconditionally invariant -----
+ IF preInlineUnconditionally chooses to inline x = <rhs>
+ THEN doing the inlining should not change the occurrence
+ info for the free vars of <rhs>
+ ----------------------------------------------
+
+For example, it's tempting to look at trivial binding like
+ x = y
+and inline it unconditionally. But suppose x is used many times,
+but this is the unique occurrence of y. Then inlining x would change
+y's occurrence info, which breaks the invariant. It matters: y
+might have a BIG rhs, which will now be dup'd at every occurrenc of x.
+
+
+Even RHSs labelled InlineMe aren't caught here, because there might be
+no benefit from inlining at the call site.
+
+[Sept 01] Don't unconditionally inline a top-level thing, because that
+can simply make a static thing into something built dynamically. E.g.
+ x = (a,b)
+ main = \s -> h x
+
+[Remember that we treat \s as a one-shot lambda.] No point in
+inlining x unless there is something interesting about the call site.
+
+But watch out: if you aren't careful, some useful foldr/build fusion
+can be lost (most notably in spectral/hartel/parstof) because the
+foldr didn't see the build. Doing the dynamic allocation isn't a big
+deal, in fact, but losing the fusion can be. But the right thing here
+seems to be to do a callSiteInline based on the fact that there is
+something interesting about the call site (it's strict). Hmm. That
+seems a bit fragile.
+
+Conclusion: inline top level things gaily until Phase 0 (the last
+phase), at which point don't.
+
+Note [pre/postInlineUnconditionally in gentle mode]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Even in gentle mode we want to do preInlineUnconditionally. The
+reason is that too little clean-up happens if you don't inline
+use-once things. Also a bit of inlining is *good* for full laziness;
+it can expose constant sub-expressions. Example in
+spectral/mandel/Mandel.hs, where the mandelset function gets a useful
+let-float if you inline windowToViewport
+
+However, as usual for Gentle mode, do not inline things that are
+inactive in the intial stages. See Note [Gentle mode].
+
+Note [Stable unfoldings and preInlineUnconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
+Example
+
+ {-# INLINE f #-}
+ f :: Eq a => a -> a
+ f x = ...
+
+ fInt :: Int -> Int
+ fInt = f Int dEqInt
+
+ ...fInt...fInt...fInt...
+
+Here f occurs just once, in the RHS of f1. But if we inline it there
+we'll lose the opportunity to inline at each of fInt's call sites.
+The INLINE pragma will only inline when the application is saturated
+for exactly this reason; and we don't want PreInlineUnconditionally
+to second-guess it. A live example is Trac #3736.
+ c.f. Note [Stable unfoldings and postInlineUnconditionally]
+
+Note [Top-level botomming Ids]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Don't inline top-level Ids that are bottoming, even if they are used just
+once, because FloatOut has gone to some trouble to extract them out.
+Inlining them won't make the program run faster!
+
+Note [Do not inline CoVars unconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Coercion variables appear inside coercions, and the RHS of a let-binding
+is a term (not a coercion) so we can't necessarily inline the latter in
+the former.
+-}
+
+preInlineUnconditionally :: DynFlags -> SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
+-- Precondition: rhs satisfies the let/app invariant
+-- See Note [CoreSyn let/app invariant] in CoreSyn
+-- Reason: we don't want to inline single uses, or discard dead bindings,
+-- for unlifted, side-effect-full bindings
+preInlineUnconditionally dflags env top_lvl bndr rhs
+ | not active = False
+ | isStableUnfolding (idUnfolding bndr) = False -- Note [Stable unfoldings and preInlineUnconditionally]
+ | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids]
+ | not (gopt Opt_SimplPreInlining dflags) = False
+ | isCoVar bndr = False -- Note [Do not inline CoVars unconditionally]
+ | otherwise = case idOccInfo bndr of
+ IAmDead -> True -- Happens in ((\x.1) v)
+ OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
+ _ -> False
+ where
+ mode = getMode env
+ active = isActive (sm_phase mode) act
+ -- See Note [pre/postInlineUnconditionally in gentle mode]
+ act = idInlineActivation bndr
+ try_once in_lam int_cxt -- There's one textual occurrence
+ | not in_lam = isNotTopLevel top_lvl || early_phase
+ | otherwise = int_cxt && canInlineInLam rhs
+
+-- Be very careful before inlining inside a lambda, because (a) we must not
+-- invalidate occurrence information, and (b) we want to avoid pushing a
+-- single allocation (here) into multiple allocations (inside lambda).
+-- Inlining a *function* with a single *saturated* call would be ok, mind you.
+-- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
+-- where
+-- is_cheap = exprIsCheap rhs
+-- ok = is_cheap && int_cxt
+
+ -- int_cxt The context isn't totally boring
+ -- E.g. let f = \ab.BIG in \y. map f xs
+ -- Don't want to substitute for f, because then we allocate
+ -- its closure every time the \y is called
+ -- But: let f = \ab.BIG in \y. map (f y) xs
+ -- Now we do want to substitute for f, even though it's not
+ -- saturated, because we're going to allocate a closure for
+ -- (f y) every time round the loop anyhow.
+
+ -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
+ -- so substituting rhs inside a lambda doesn't change the occ info.
+ -- Sadly, not quite the same as exprIsHNF.
+ canInlineInLam (Lit _) = True
+ canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
+ canInlineInLam _ = False
+ -- not ticks. Counting ticks cannot be duplicated, and non-counting
+ -- ticks around a Lam will disappear anyway.
+
+ early_phase = case sm_phase mode of
+ Phase 0 -> False
+ _ -> True
+-- If we don't have this early_phase test, consider
+-- x = length [1,2,3]
+-- The full laziness pass carefully floats all the cons cells to
+-- top level, and preInlineUnconditionally floats them all back in.
+-- Result is (a) static allocation replaced by dynamic allocation
+-- (b) many simplifier iterations because this tickles
+-- a related problem; only one inlining per pass
+--
+-- On the other hand, I have seen cases where top-level fusion is
+-- lost if we don't inline top level thing (e.g. string constants)
+-- Hence the test for phase zero (which is the phase for all the final
+-- simplifications). Until phase zero we take no special notice of
+-- top level things, but then we become more leery about inlining
+-- them.
+
+{-
+************************************************************************
+* *
+ postInlineUnconditionally
+* *
+************************************************************************
+
+postInlineUnconditionally
+~~~~~~~~~~~~~~~~~~~~~~~~~
+@postInlineUnconditionally@ decides whether to unconditionally inline
+a thing based on the form of its RHS; in particular if it has a
+trivial RHS. If so, we can inline and discard the binding altogether.
+
+NB: a loop breaker has must_keep_binding = True and non-loop-breakers
+only have *forward* references. Hence, it's safe to discard the binding
+
+NOTE: This isn't our last opportunity to inline. We're at the binding
+site right now, and we'll get another opportunity when we get to the
+ocurrence(s)
+
+Note that we do this unconditional inlining only for trival RHSs.
+Don't inline even WHNFs inside lambdas; doing so may simply increase
+allocation when the function is called. This isn't the last chance; see
+NOTE above.
+
+NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
+Because we don't even want to inline them into the RHS of constructor
+arguments. See NOTE above
+
+NB: At one time even NOINLINE was ignored here: if the rhs is trivial
+it's best to inline it anyway. We often get a=E; b=a from desugaring,
+with both a and b marked NOINLINE. But that seems incompatible with
+our new view that inlining is like a RULE, so I'm sticking to the 'active'
+story for now.
+-}
+
+postInlineUnconditionally
+ :: DynFlags -> SimplEnv -> TopLevelFlag
+ -> OutId -- The binder (an InId would be fine too)
+ -- (*not* a CoVar)
+ -> OccInfo -- From the InId
+ -> OutExpr
+ -> Unfolding
+ -> Bool
+-- Precondition: rhs satisfies the let/app invariant
+-- See Note [CoreSyn let/app invariant] in CoreSyn
+-- Reason: we don't want to inline single uses, or discard dead bindings,
+-- for unlifted, side-effect-full bindings
+postInlineUnconditionally dflags env top_lvl bndr occ_info rhs unfolding
+ | not active = False
+ | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
+ -- because it might be referred to "earlier"
+ | isExportedId bndr = False
+ | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
+ | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
+ | exprIsTrivial rhs = True
+ | otherwise
+ = case occ_info of
+ -- The point of examining occ_info here is that for *non-values*
+ -- that occur outside a lambda, the call-site inliner won't have
+ -- a chance (because it doesn't know that the thing
+ -- only occurs once). The pre-inliner won't have gotten
+ -- it either, if the thing occurs in more than one branch
+ -- So the main target is things like
+ -- let x = f y in
+ -- case v of
+ -- True -> case x of ...
+ -- False -> case x of ...
+ -- This is very important in practice; e.g. wheel-seive1 doubles
+ -- in allocation if you miss this out
+ OneOcc in_lam _one_br int_cxt -- OneOcc => no code-duplication issue
+ -> smallEnoughToInline dflags unfolding -- Small enough to dup
+ -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
+ --
+ -- NB: Do NOT inline arbitrarily big things, even if one_br is True
+ -- Reason: doing so risks exponential behaviour. We simplify a big
+ -- expression, inline it, and simplify it again. But if the
+ -- very same thing happens in the big expression, we get
+ -- exponential cost!
+ -- PRINCIPLE: when we've already simplified an expression once,
+ -- make sure that we only inline it if it's reasonably small.
+
+ && (not in_lam ||
+ -- Outside a lambda, we want to be reasonably aggressive
+ -- about inlining into multiple branches of case
+ -- e.g. let x = <non-value>
+ -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
+ -- Inlining can be a big win if C3 is the hot-spot, even if
+ -- the uses in C1, C2 are not 'interesting'
+ -- An example that gets worse if you add int_cxt here is 'clausify'
+
+ (isCheapUnfolding unfolding && int_cxt))
+ -- isCheap => acceptable work duplication; in_lam may be true
+ -- int_cxt to prevent us inlining inside a lambda without some
+ -- good reason. See the notes on int_cxt in preInlineUnconditionally
+
+ IAmDead -> True -- This happens; for example, the case_bndr during case of
+ -- known constructor: case (a,b) of x { (p,q) -> ... }
+ -- Here x isn't mentioned in the RHS, so we don't want to
+ -- create the (dead) let-binding let x = (a,b) in ...
+
+ _ -> False
+
+-- Here's an example that we don't handle well:
+-- let f = if b then Left (\x.BIG) else Right (\y.BIG)
+-- in \y. ....case f of {...} ....
+-- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
+-- But
+-- - We can't preInlineUnconditionally because that woud invalidate
+-- the occ info for b.
+-- - We can't postInlineUnconditionally because the RHS is big, and
+-- that risks exponential behaviour
+-- - We can't call-site inline, because the rhs is big
+-- Alas!
+
+ where
+ active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
+ -- See Note [pre/postInlineUnconditionally in gentle mode]
+
+{-
+Note [Top level and postInlineUnconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We don't do postInlineUnconditionally for top-level things (even for
+ones that are trivial):
+
+ * Doing so will inline top-level error expressions that have been
+ carefully floated out by FloatOut. More generally, it might
+ replace static allocation with dynamic.
+
+ * Even for trivial expressions there's a problem. Consider
+ {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
+ blah xs = reverse xs
+ ruggle = sort
+ In one simplifier pass we might fire the rule, getting
+ blah xs = ruggle xs
+ but in *that* simplifier pass we must not do postInlineUnconditionally
+ on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
+
+ If the rhs is trivial it'll be inlined by callSiteInline, and then
+ the binding will be dead and discarded by the next use of OccurAnal
+
+ * There is less point, because the main goal is to get rid of local
+ bindings used in multiple case branches.
+
+ * The inliner should inline trivial things at call sites anyway.
+
+Note [Stable unfoldings and postInlineUnconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Do not do postInlineUnconditionally if the Id has an stable unfolding,
+otherwise we lose the unfolding. Example
+
+ -- f has stable unfolding with rhs (e |> co)
+ -- where 'e' is big
+ f = e |> co
+
+Then there's a danger we'll optimise to
+
+ f' = e
+ f = f' |> co
+
+and now postInlineUnconditionally, losing the stable unfolding on f. Now f'
+won't inline because 'e' is too big.
+
+ c.f. Note [Stable unfoldings and preInlineUnconditionally]
+
+
+************************************************************************
+* *
+ Rebuilding a lambda
+* *
+************************************************************************
+-}
+
+mkLam :: [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
+-- mkLam tries three things
+-- a) eta reduction, if that gives a trivial expression
+-- b) eta expansion [only if there are some value lambdas]
+
+mkLam [] body _cont
+ = return body
+mkLam bndrs body cont
+ = do { dflags <- getDynFlags
+ ; mkLam' dflags bndrs body }
+ where
+ mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
+ mkLam' dflags bndrs (Cast body co)
+ | not (any bad bndrs)
+ -- Note [Casts and lambdas]
+ = do { lam <- mkLam' dflags bndrs body
+ ; return (mkCast lam (mkPiCos Representational bndrs co)) }
+ where
+ co_vars = tyCoVarsOfCo co
+ bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
+
+ mkLam' dflags bndrs body@(Lam {})
+ = mkLam' dflags (bndrs ++ bndrs1) body1
+ where
+ (bndrs1, body1) = collectBinders body
+
+ mkLam' dflags bndrs body
+ | gopt Opt_DoEtaReduction dflags
+ , Just etad_lam <- tryEtaReduce bndrs body
+ = do { tick (EtaReduction (head bndrs))
+ ; return etad_lam }
+
+ | not (contIsRhs cont) -- See Note [Eta-expanding lambdas]
+ , gopt Opt_DoLambdaEtaExpansion dflags
+ , any isRuntimeVar bndrs
+ , let body_arity = exprEtaExpandArity dflags body
+ , body_arity > 0
+ = do { tick (EtaExpansion (head bndrs))
+ ; return (mkLams bndrs (etaExpand body_arity body)) }
+
+ | otherwise
+ = return (mkLams bndrs body)
+
+{-
+Note [Eta expanding lambdas]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In general we *do* want to eta-expand lambdas. Consider
+ f (\x -> case x of (a,b) -> \s -> blah)
+where 's' is a state token, and hence can be eta expanded. This
+showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
+important function!
+
+The eta-expansion will never happen unless we do it now. (Well, it's
+possible that CorePrep will do it, but CorePrep only has a half-baked
+eta-expander that can't deal with casts. So it's much better to do it
+here.)
+
+However, when the lambda is let-bound, as the RHS of a let, we have a
+better eta-expander (in the form of tryEtaExpandRhs), so we don't
+bother to try expansion in mkLam in that case; hence the contIsRhs
+guard.
+
+Note [Casts and lambdas]
+~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+ (\x. (\y. e) `cast` g1) `cast` g2
+There is a danger here that the two lambdas look separated, and the
+full laziness pass might float an expression to between the two.
+
+So this equation in mkLam' floats the g1 out, thus:
+ (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
+where x:tx.
+
+In general, this floats casts outside lambdas, where (I hope) they
+might meet and cancel with some other cast:
+ \x. e `cast` co ===> (\x. e) `cast` (tx -> co)
+ /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
+ /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
+ (if not (g `in` co))
+
+Notice that it works regardless of 'e'. Originally it worked only
+if 'e' was itself a lambda, but in some cases that resulted in
+fruitless iteration in the simplifier. A good example was when
+compiling Text.ParserCombinators.ReadPrec, where we had a definition
+like (\x. Get `cast` g)
+where Get is a constructor with nonzero arity. Then mkLam eta-expanded
+the Get, and the next iteration eta-reduced it, and then eta-expanded
+it again.
+
+Note also the side condition for the case of coercion binders.
+It does not make sense to transform
+ /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
+because the latter is not well-kinded.
+
+************************************************************************
+* *
+ Eta expansion
+* *
+************************************************************************
+-}
+
+tryEtaExpandRhs :: SimplEnv -> OutId -> OutExpr -> SimplM (Arity, OutExpr)
+-- See Note [Eta-expanding at let bindings]
+tryEtaExpandRhs env bndr rhs
+ = do { dflags <- getDynFlags
+ ; (new_arity, new_rhs) <- try_expand dflags
+
+ ; WARN( new_arity < old_id_arity,
+ (ptext (sLit "Arity decrease:") <+> (ppr bndr <+> ppr old_id_arity
+ <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
+ -- Note [Arity decrease] in Simplify
+ return (new_arity, new_rhs) }
+ where
+ try_expand dflags
+ | exprIsTrivial rhs
+ = return (exprArity rhs, rhs)
+
+ | sm_eta_expand (getMode env) -- Provided eta-expansion is on
+ , let new_arity1 = findRhsArity dflags bndr rhs old_arity
+ new_arity2 = idCallArity bndr
+ new_arity = max new_arity1 new_arity2
+ , new_arity > old_arity -- And the current manifest arity isn't enough
+ = do { tick (EtaExpansion bndr)
+ ; return (new_arity, etaExpand new_arity rhs) }
+ | otherwise
+ = return (old_arity, rhs)
+
+ old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
+ old_id_arity = idArity bndr
+
+{-
+Note [Eta-expanding at let bindings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We now eta expand at let-bindings, which is where the payoff comes.
+The most significant thing is that we can do a simple arity analysis
+(in CoreArity.findRhsArity), which we can't do for free-floating lambdas
+
+One useful consequence of not eta-expanding lambdas is this example:
+ genMap :: C a => ...
+ {-# INLINE genMap #-}
+ genMap f xs = ...
+
+ myMap :: D a => ...
+ {-# INLINE myMap #-}
+ myMap = genMap
+
+Notice that 'genMap' should only inline if applied to two arguments.
+In the stable unfolding for myMap we'll have the unfolding
+ (\d -> genMap Int (..d..))
+We do not want to eta-expand to
+ (\d f xs -> genMap Int (..d..) f xs)
+because then 'genMap' will inline, and it really shouldn't: at least
+as far as the programmer is concerned, it's not applied to two
+arguments!
+
+Note [Do not eta-expand PAPs]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We used to have old_arity = manifestArity rhs, which meant that we
+would eta-expand even PAPs. But this gives no particular advantage,
+and can lead to a massive blow-up in code size, exhibited by Trac #9020.
+Suppose we have a PAP
+ foo :: IO ()
+ foo = returnIO ()
+Then we can eta-expand do
+ foo = (\eta. (returnIO () |> sym g) eta) |> g
+where
+ g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
+
+But there is really no point in doing this, and it generates masses of
+coercions and whatnot that eventually disappear again. For T9020, GHC
+allocated 6.6G beore, and 0.8G afterwards; and residency dropped from
+1.8G to 45M.
+
+But note that this won't eta-expand, say
+ f = \g -> map g
+Does it matter not eta-expanding such functions? I'm not sure. Perhaps
+strictness analysis will have less to bite on?
+
+
+************************************************************************
+* *
+\subsection{Floating lets out of big lambdas}
+* *
+************************************************************************
+
+Note [Floating and type abstraction]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this:
+ x = /\a. C e1 e2
+We'd like to float this to
+ y1 = /\a. e1
+ y2 = /\a. e2
+ x = /\a. C (y1 a) (y2 a)
+for the usual reasons: we want to inline x rather vigorously.
+
+You may think that this kind of thing is rare. But in some programs it is
+common. For example, if you do closure conversion you might get:
+
+ data a :-> b = forall e. (e -> a -> b) :$ e
+
+ f_cc :: forall a. a :-> a
+ f_cc = /\a. (\e. id a) :$ ()
+
+Now we really want to inline that f_cc thing so that the
+construction of the closure goes away.
+
+So I have elaborated simplLazyBind to understand right-hand sides that look
+like
+ /\ a1..an. body
+
+and treat them specially. The real work is done in SimplUtils.abstractFloats,
+but there is quite a bit of plumbing in simplLazyBind as well.
+
+The same transformation is good when there are lets in the body:
+
+ /\abc -> let(rec) x = e in b
+ ==>
+ let(rec) x' = /\abc -> let x = x' a b c in e
+ in
+ /\abc -> let x = x' a b c in b
+
+This is good because it can turn things like:
+
+ let f = /\a -> letrec g = ... g ... in g
+into
+ letrec g' = /\a -> ... g' a ...
+ in
+ let f = /\ a -> g' a
+
+which is better. In effect, it means that big lambdas don't impede
+let-floating.
+
+This optimisation is CRUCIAL in eliminating the junk introduced by
+desugaring mutually recursive definitions. Don't eliminate it lightly!
+
+[May 1999] If we do this transformation *regardless* then we can
+end up with some pretty silly stuff. For example,
+
+ let
+ st = /\ s -> let { x1=r1 ; x2=r2 } in ...
+ in ..
+becomes
+ let y1 = /\s -> r1
+ y2 = /\s -> r2
+ st = /\s -> ...[y1 s/x1, y2 s/x2]
+ in ..
+
+Unless the "..." is a WHNF there is really no point in doing this.
+Indeed it can make things worse. Suppose x1 is used strictly,
+and is of the form
+
+ x1* = case f y of { (a,b) -> e }
+
+If we abstract this wrt the tyvar we then can't do the case inline
+as we would normally do.
+
+That's why the whole transformation is part of the same process that
+floats let-bindings and constructor arguments out of RHSs. In particular,
+it is guarded by the doFloatFromRhs call in simplLazyBind.
+-}
+
+abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
+abstractFloats main_tvs body_env body
+ = ASSERT( notNull body_floats )
+ do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
+ ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
+ where
+ main_tv_set = mkVarSet main_tvs
+ body_floats = getFloatBinds body_env
+ empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
+
+ abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
+ abstract subst (NonRec id rhs)
+ = do { (poly_id, poly_app) <- mk_poly tvs_here id
+ ; let poly_rhs = mkLams tvs_here rhs'
+ subst' = CoreSubst.extendIdSubst subst id poly_app
+ ; return (subst', (NonRec poly_id poly_rhs)) }
+ where
+ rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
+ tvs_here = varSetElemsKvsFirst (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
+
+ -- Abstract only over the type variables free in the rhs
+ -- wrt which the new binding is abstracted. But the naive
+ -- approach of abstract wrt the tyvars free in the Id's type
+ -- fails. Consider:
+ -- /\ a b -> let t :: (a,b) = (e1, e2)
+ -- x :: a = fst t
+ -- in ...
+ -- Here, b isn't free in x's type, but we must nevertheless
+ -- abstract wrt b as well, because t's type mentions b.
+ -- Since t is floated too, we'd end up with the bogus:
+ -- poly_t = /\ a b -> (e1, e2)
+ -- poly_x = /\ a -> fst (poly_t a *b*)
+ -- So for now we adopt the even more naive approach of
+ -- abstracting wrt *all* the tyvars. We'll see if that
+ -- gives rise to problems. SLPJ June 98
+
+ abstract subst (Rec prs)
+ = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
+ ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
+ poly_rhss = [mkLams tvs_here (CoreSubst.substExpr (text "abstract_floats3") subst' rhs)
+ | rhs <- rhss]
+ ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
+ where
+ (ids,rhss) = unzip prs
+ -- For a recursive group, it's a bit of a pain to work out the minimal
+ -- set of tyvars over which to abstract:
+ -- /\ a b c. let x = ...a... in
+ -- letrec { p = ...x...q...
+ -- q = .....p...b... } in
+ -- ...
+ -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
+ -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
+ -- Since it's a pain, we just use the whole set, which is always safe
+ --
+ -- If you ever want to be more selective, remember this bizarre case too:
+ -- x::a = x
+ -- Here, we must abstract 'x' over 'a'.
+ tvs_here = sortQuantVars main_tvs
+
+ mk_poly tvs_here var
+ = do { uniq <- getUniqueM
+ ; let poly_name = setNameUnique (idName var) uniq -- Keep same name
+ poly_ty = mkForAllTys tvs_here (idType var) -- But new type of course
+ poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.lhs
+ mkLocalId poly_name poly_ty
+ ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
+ -- In the olden days, it was crucial to copy the occInfo of the original var,
+ -- because we were looking at occurrence-analysed but as yet unsimplified code!
+ -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
+ -- at already simplified code, so it doesn't matter
+ --
+ -- It's even right to retain single-occurrence or dead-var info:
+ -- Suppose we started with /\a -> let x = E in B
+ -- where x occurs once in B. Then we transform to:
+ -- let x' = /\a -> E in /\a -> let x* = x' a in B
+ -- where x* has an INLINE prag on it. Now, once x* is inlined,
+ -- the occurrences of x' will be just the occurrences originally
+ -- pinned on x.
+
+{-
+Note [Abstract over coercions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
+type variable a. Rather than sort this mess out, we simply bale out and abstract
+wrt all the type variables if any of them are coercion variables.
+
+
+Historical note: if you use let-bindings instead of a substitution, beware of this:
+
+ -- Suppose we start with:
+ --
+ -- x = /\ a -> let g = G in E
+ --
+ -- Then we'll float to get
+ --
+ -- x = let poly_g = /\ a -> G
+ -- in /\ a -> let g = poly_g a in E
+ --
+ -- But now the occurrence analyser will see just one occurrence
+ -- of poly_g, not inside a lambda, so the simplifier will
+ -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
+ -- (I used to think that the "don't inline lone occurrences" stuff
+ -- would stop this happening, but since it's the *only* occurrence,
+ -- PreInlineUnconditionally kicks in first!)
+ --
+ -- Solution: put an INLINE note on g's RHS, so that poly_g seems
+ -- to appear many times. (NB: mkInlineMe eliminates
+ -- such notes on trivial RHSs, so do it manually.)
+
+************************************************************************
+* *
+ prepareAlts
+* *
+************************************************************************
+
+prepareAlts tries these things:
+
+1. Eliminate alternatives that cannot match, including the
+ DEFAULT alternative.
+
+2. If the DEFAULT alternative can match only one possible constructor,
+ then make that constructor explicit.
+ e.g.
+ case e of x { DEFAULT -> rhs }
+ ===>
+ case e of x { (a,b) -> rhs }
+ where the type is a single constructor type. This gives better code
+ when rhs also scrutinises x or e.
+
+3. Returns a list of the constructors that cannot holds in the
+ DEFAULT alternative (if there is one)
+
+Here "cannot match" includes knowledge from GADTs
+
+It's a good idea to do this stuff before simplifying the alternatives, to
+avoid simplifying alternatives we know can't happen, and to come up with
+the list of constructors that are handled, to put into the IdInfo of the
+case binder, for use when simplifying the alternatives.
+
+Eliminating the default alternative in (1) isn't so obvious, but it can
+happen:
+
+data Colour = Red | Green | Blue
+
+f x = case x of
+ Red -> ..
+ Green -> ..
+ DEFAULT -> h x
+
+h y = case y of
+ Blue -> ..
+ DEFAULT -> [ case y of ... ]
+
+If we inline h into f, the default case of the inlined h can't happen.
+If we don't notice this, we may end up filtering out *all* the cases
+of the inner case y, which give us nowhere to go!
+-}
+
+prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
+-- The returned alternatives can be empty, none are possible
+prepareAlts scrut case_bndr' alts
+ -- Case binder is needed just for its type. Note that as an
+ -- OutId, it has maximum information; this is important.
+ -- Test simpl013 is an example
+ = do { us <- getUniquesM
+ ; let (imposs_deflt_cons, refined_deflt, alts')
+ = filterAlts us (varType case_bndr') imposs_cons alts
+ ; when refined_deflt $ tick (FillInCaseDefault case_bndr')
+
+ ; alts'' <- combineIdenticalAlts case_bndr' alts'
+ ; return (imposs_deflt_cons, alts'') }
+ where
+ imposs_cons = case scrut of
+ Var v -> otherCons (idUnfolding v)
+ _ -> []
+
+{-
+Note [Combine identical alternatives]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ If several alternatives are identical, merge them into
+ a single DEFAULT alternative. I've occasionally seen this
+ making a big difference:
+
+ case e of =====> case e of
+ C _ -> f x D v -> ....v....
+ D v -> ....v.... DEFAULT -> f x
+ DEFAULT -> f x
+
+The point is that we merge common RHSs, at least for the DEFAULT case.
+[One could do something more elaborate but I've never seen it needed.]
+To avoid an expensive test, we just merge branches equal to the *first*
+alternative; this picks up the common cases
+ a) all branches equal
+ b) some branches equal to the DEFAULT (which occurs first)
+
+The case where Combine Identical Alternatives transformation showed up
+was like this (base/Foreign/C/Err/Error.lhs):
+
+ x | p `is` 1 -> e1
+ | p `is` 2 -> e2
+ ...etc...
+
+where @is@ was something like
+
+ p `is` n = p /= (-1) && p == n
+
+This gave rise to a horrible sequence of cases
+
+ case p of
+ (-1) -> $j p
+ 1 -> e1
+ DEFAULT -> $j p
+
+and similarly in cascade for all the join points!
+
+NB: it's important that all this is done in [InAlt], *before* we work
+on the alternatives themselves, because Simpify.simplAlt may zap the
+occurrence info on the binders in the alternatives, which in turn
+defeats combineIdenticalAlts (see Trac #7360).
+-}
+
+combineIdenticalAlts :: OutId -> [InAlt] -> SimplM [InAlt]
+-- See Note [Combine identical alternatives]
+combineIdenticalAlts case_bndr ((_con1,bndrs1,rhs1) : con_alts)
+ | all isDeadBinder bndrs1 -- Remember the default
+ , length filtered_alts < length con_alts -- alternative comes first
+ = do { tick (AltMerge case_bndr)
+ ; return ((DEFAULT, [], rhs1) : filtered_alts) }
+ where
+ filtered_alts = filterOut identical_to_alt1 con_alts
+ identical_to_alt1 (_con,bndrs,rhs) = all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1
+
+combineIdenticalAlts _ alts = return alts
+
+{-
+************************************************************************
+* *
+ mkCase
+* *
+************************************************************************
+
+mkCase tries these things
+
+1. Merge Nested Cases
+
+ case e of b { ==> case e of b {
+ p1 -> rhs1 p1 -> rhs1
+ ... ...
+ pm -> rhsm pm -> rhsm
+ _ -> case b of b' { pn -> let b'=b in rhsn
+ pn -> rhsn ...
+ ... po -> let b'=b in rhso
+ po -> rhso _ -> let b'=b in rhsd
+ _ -> rhsd
+ }
+
+ which merges two cases in one case when -- the default alternative of
+ the outer case scrutises the same variable as the outer case. This
+ transformation is called Case Merging. It avoids that the same
+ variable is scrutinised multiple times.
+
+2. Eliminate Identity Case
+
+ case e of ===> e
+ True -> True;
+ False -> False
+
+ and similar friends.
+-}
+
+mkCase, mkCase1, mkCase2
+ :: DynFlags
+ -> OutExpr -> OutId
+ -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order
+ -> SimplM OutExpr
+
+--------------------------------------------------
+-- 1. Merge Nested Cases
+--------------------------------------------------
+
+mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts)
+ | gopt Opt_CaseMerge dflags
+ , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
+ , inner_scrut_var == outer_bndr
+ = do { tick (CaseMerge outer_bndr)
+
+ ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
+ (con, args, wrap_rhs rhs)
+ -- Simplifier's no-shadowing invariant should ensure
+ -- that outer_bndr is not shadowed by the inner patterns
+ wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
+ -- The let is OK even for unboxed binders,
+
+ wrapped_alts | isDeadBinder inner_bndr = inner_alts
+ | otherwise = map wrap_alt inner_alts
+
+ merged_alts = mergeAlts outer_alts wrapped_alts
+ -- NB: mergeAlts gives priority to the left
+ -- case x of
+ -- A -> e1
+ -- DEFAULT -> case x of
+ -- A -> e2
+ -- B -> e3
+ -- When we merge, we must ensure that e1 takes
+ -- precedence over e2 as the value for A!
+
+ ; mkCase1 dflags scrut outer_bndr alts_ty merged_alts
+ }
+ -- Warning: don't call mkCase recursively!
+ -- Firstly, there's no point, because inner alts have already had
+ -- mkCase applied to them, so they won't have a case in their default
+ -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
+ -- in munge_rhs may put a case into the DEFAULT branch!
+
+mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
+
+--------------------------------------------------
+-- 2. Eliminate Identity Case
+--------------------------------------------------
+
+mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _) -- Identity case
+ | all identity_alt alts
+ = do { tick (CaseIdentity case_bndr)
+ ; return (re_cast scrut rhs1) }
+ where
+ identity_alt (con, args, rhs) = check_eq rhs con args
+
+ check_eq (Cast rhs co) con args = not (any (`elemVarSet` tyCoVarsOfCo co) args)
+ {- See Note [RHS casts] -} && check_eq rhs con args
+ check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
+ check_eq (Var v) _ _ | v == case_bndr = True
+ check_eq (Var v) (DataAlt con) [] = v == dataConWorkId con -- Optimisation only
+ check_eq rhs (DataAlt con) args = rhs `cheapEqExpr` mkConApp con (arg_tys ++ varsToCoreExprs args)
+ check_eq _ _ _ = False
+
+ arg_tys = map Type (tyConAppArgs (idType case_bndr))
+
+ -- Note [RHS casts]
+ -- ~~~~~~~~~~~~~~~~
+ -- We've seen this:
+ -- case e of x { _ -> x `cast` c }
+ -- And we definitely want to eliminate this case, to give
+ -- e `cast` c
+ -- So we throw away the cast from the RHS, and reconstruct
+ -- it at the other end. All the RHS casts must be the same
+ -- if (all identity_alt alts) holds.
+ --
+ -- Don't worry about nested casts, because the simplifier combines them
+
+ re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
+ re_cast scrut _ = scrut
+
+mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
+
+--------------------------------------------------
+-- Catch-all
+--------------------------------------------------
+mkCase2 _dflags scrut bndr alts_ty alts
+ = return (Case scrut bndr alts_ty alts)
+
+{-
+Note [Dead binders]
+~~~~~~~~~~~~~~~~~~~~
+Note that dead-ness is maintained by the simplifier, so that it is
+accurate after simplification as well as before.
+
+
+Note [Cascading case merge]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Case merging should cascade in one sweep, because it
+happens bottom-up
+
+ case e of a {
+ DEFAULT -> case a of b
+ DEFAULT -> case b of c {
+ DEFAULT -> e
+ A -> ea
+ B -> eb
+ C -> ec
+==>
+ case e of a {
+ DEFAULT -> case a of b
+ DEFAULT -> let c = b in e
+ A -> let c = b in ea
+ B -> eb
+ C -> ec
+==>
+ case e of a {
+ DEFAULT -> let b = a in let c = b in e
+ A -> let b = a in let c = b in ea
+ B -> let b = a in eb
+ C -> ec
+
+
+However here's a tricky case that we still don't catch, and I don't
+see how to catch it in one pass:
+
+ case x of c1 { I# a1 ->
+ case a1 of c2 ->
+ 0 -> ...
+ DEFAULT -> case x of c3 { I# a2 ->
+ case a2 of ...
+
+After occurrence analysis (and its binder-swap) we get this
+
+ case x of c1 { I# a1 ->
+ let x = c1 in -- Binder-swap addition
+ case a1 of c2 ->
+ 0 -> ...
+ DEFAULT -> case x of c3 { I# a2 ->
+ case a2 of ...
+
+When we simplify the inner case x, we'll see that
+x=c1=I# a1. So we'll bind a2 to a1, and get
+
+ case x of c1 { I# a1 ->
+ case a1 of c2 ->
+ 0 -> ...
+ DEFAULT -> case a1 of ...
+
+This is corect, but we can't do a case merge in this sweep
+because c2 /= a1. Reason: the binding c1=I# a1 went inwards
+without getting changed to c1=I# c2.
+
+I don't think this is worth fixing, even if I knew how. It'll
+all come out in the next pass anyway.
+-}