1 files changed, 1779 insertions, 0 deletions
diff --git a/compiler/simplCore/SimplUtils.hs b/compiler/simplCore/SimplUtils.hs
new file mode 100644
index 0000000000..eec0f4b230
--- /dev/null
+++ b/compiler/simplCore/SimplUtils.hs
@@ -0,0 +1,1779 @@
+{-
+(c) The AQUA Project, Glasgow University, 1993-1998
+
+\section[SimplUtils]{The simplifier utilities}
+-}
+
+{-# LANGUAGE CPP #-}
+
+module SimplUtils (
+        -- Rebuilding
+        mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
+
+        -- Inlining,
+        preInlineUnconditionally, postInlineUnconditionally,
+        activeUnfolding, activeRule,
+        getUnfoldingInRuleMatch,
+        simplEnvForGHCi, updModeForStableUnfoldings,
+
+        -- The continuation type
+        SimplCont(..), DupFlag(..),
+        isSimplified,
+        contIsDupable, contResultType, contInputType,
+        contIsTrivial, contArgs, dropArgs,
+        pushSimplifiedArgs, countValArgs, countArgs,
+        mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
+        interestingCallContext, interestingArg,
+
+        -- ArgInfo
+        ArgInfo(..), ArgSpec(..), mkArgInfo, addArgTo, addCastTo,
+        argInfoExpr, argInfoValArgs,
+
+        abstractFloats
+    ) where
+
+#include "HsVersions.h"
+
+import SimplEnv
+import CoreMonad        ( SimplifierMode(..), Tick(..) )
+import MkCore           ( sortQuantVars )
+import DynFlags
+import CoreSyn
+import qualified CoreSubst
+import PprCore
+import CoreFVs
+import CoreUtils
+import CoreArity
+import CoreUnfold
+import Name
+import Id
+import Var
+import Demand
+import SimplMonad
+import Type     hiding( substTy )
+import Coercion hiding( substCo, substTy )
+import DataCon          ( dataConWorkId )
+import VarSet
+import BasicTypes
+import Util
+import MonadUtils
+import Outputable
+import FastString
+import Pair
+
+import Control.Monad    ( when )
+
+{-
+************************************************************************
+*                                                                      *
+                The SimplCont type
+*                                                                      *
+************************************************************************
+
+A SimplCont allows the simplifier to traverse the expression in a
+zipper-like fashion.  The SimplCont represents the rest of the expression,
+"above" the point of interest.
+
+You can also think of a SimplCont as an "evaluation context", using
+that term in the way it is used for operational semantics. This is the
+way I usually think of it, For example you'll often see a syntax for
+evaluation context looking like
+        C ::= []  |  C e   |  case C of alts  |  C `cast` co
+That's the kind of thing we are doing here, and I use that syntax in
+the comments.
+
+
+Key points:
+  * A SimplCont describes a *strict* context (just like
+    evaluation contexts do).  E.g. Just [] is not a SimplCont
+
+  * A SimplCont describes a context that *does not* bind
+    any variables.  E.g. \x. [] is not a SimplCont
+-}
+
+data SimplCont
+  = Stop                -- An empty context, or <hole>
+        OutType         -- Type of the <hole>
+        CallCtxt        -- Tells if there is something interesting about
+                        --          the context, and hence the inliner
+                        --          should be a bit keener (see interestingCallContext)
+                        -- Specifically:
+                        --     This is an argument of a function that has RULES
+                        --     Inlining the call might allow the rule to fire
+                        -- Never ValAppCxt (use ApplyTo instead)
+                        -- or CaseCtxt (use Select instead)
+
+  | CoerceIt            -- <hole> `cast` co
+        OutCoercion             -- The coercion simplified
+                                -- Invariant: never an identity coercion
+        SimplCont
+
+  | ApplyTo             -- <hole> arg
+        DupFlag                 -- See Note [DupFlag invariants]
+        InExpr StaticEnv        -- The argument and its static env
+        SimplCont
+
+  | Select              -- case <hole> of alts
+        DupFlag                 -- See Note [DupFlag invariants]
+        InId [InAlt] StaticEnv  -- The case binder, alts type, alts, and subst-env
+        SimplCont
+
+  -- The two strict forms have no DupFlag, because we never duplicate them
+  | StrictBind                  -- (\x* \xs. e) <hole>
+        InId [InBndr]           -- let x* = <hole> in e
+        InExpr StaticEnv        --      is a special case
+        SimplCont
+
+  | StrictArg           -- f e1 ..en <hole>
+        ArgInfo         -- Specifies f, e1..en, Whether f has rules, etc
+                        --     plus strictness flags for *further* args
+        CallCtxt        -- Whether *this* argument position is interesting
+        SimplCont
+
+  | TickIt
+        (Tickish Id)    -- Tick tickish <hole>
+        SimplCont
+
+data ArgInfo
+  = ArgInfo {
+        ai_fun   :: OutId,      -- The function
+        ai_args  :: [ArgSpec],  -- ...applied to these args (which are in *reverse* order)
+        ai_type  :: OutType,    -- Type of (f a1 ... an)
+
+        ai_rules :: [CoreRule], -- Rules for this function
+
+        ai_encl :: Bool,        -- Flag saying whether this function
+                                -- or an enclosing one has rules (recursively)
+                                --      True => be keener to inline in all args
+
+        ai_strs :: [Bool],      -- Strictness of remaining arguments
+                                --   Usually infinite, but if it is finite it guarantees
+                                --   that the function diverges after being given
+                                --   that number of args
+        ai_discs :: [Int]       -- Discounts for remaining arguments; non-zero => be keener to inline
+                                --   Always infinite
+    }
+
+data ArgSpec = ValArg OutExpr       -- Apply to this
+             | CastBy OutCoercion   -- Cast by this
+
+instance Outputable ArgSpec where
+  ppr (ValArg e) = ptext (sLit "ValArg") <+> ppr e
+  ppr (CastBy c) = ptext (sLit "CastBy") <+> ppr c
+
+addArgTo :: ArgInfo -> OutExpr -> ArgInfo
+addArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai
+                     , ai_type = applyTypeToArg (ai_type ai) arg  }
+
+addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
+addCastTo ai co = ai { ai_args = CastBy co : ai_args ai
+                     , ai_type = pSnd (coercionKind co) }
+
+argInfoValArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> ([OutExpr], SimplCont)
+argInfoValArgs env args cont
+  = go args [] cont
+  where
+    go :: [ArgSpec] -> [OutExpr] -> SimplCont -> ([OutExpr], SimplCont)
+    go (ValArg e  : as) acc cont = go as (e:acc) cont
+    go (CastBy co : as) acc cont = go as [] (CoerceIt co (pushSimplifiedArgs env acc cont))
+    go []               acc cont = (acc, cont)
+
+argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
+argInfoExpr fun args
+  = go args
+  where
+    go []               = Var fun
+    go (ValArg a : as)  = go as `App` a
+    go (CastBy co : as) = mkCast (go as) co
+
+instance Outputable SimplCont where
+  ppr (Stop ty interesting)          = ptext (sLit "Stop") <> brackets (ppr interesting) <+> ppr ty
+  ppr (ApplyTo dup arg _ cont)       = ((ptext (sLit "ApplyTo") <+> ppr dup <+> pprParendExpr arg)
+                                          {-  $$ nest 2 (pprSimplEnv se) -}) $$ ppr cont
+  ppr (StrictBind b _ _ _ cont)      = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont
+  ppr (StrictArg ai _ cont)          = (ptext (sLit "StrictArg") <+> ppr (ai_fun ai)) $$ ppr cont
+  ppr (Select dup bndr alts se cont) = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$
+                                       ifPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
+  ppr (CoerceIt co cont)             = (ptext (sLit "CoerceIt") <+> ppr co) $$ ppr cont
+  ppr (TickIt t cont)                = (ptext (sLit "TickIt") <+> ppr t) $$ ppr cont
+
+data DupFlag = NoDup       -- Unsimplified, might be big
+             | Simplified  -- Simplified
+             | OkToDup     -- Simplified and small
+
+isSimplified :: DupFlag -> Bool
+isSimplified NoDup = False
+isSimplified _     = True       -- Invariant: the subst-env is empty
+
+instance Outputable DupFlag where
+  ppr OkToDup    = ptext (sLit "ok")
+  ppr NoDup      = ptext (sLit "nodup")
+  ppr Simplified = ptext (sLit "simpl")
+
+{-
+Note [DupFlag invariants]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+In both (ApplyTo dup _ env k)
+   and  (Select dup _ _ env k)
+the following invariants hold
+
+  (a) if dup = OkToDup, then continuation k is also ok-to-dup
+  (b) if dup = OkToDup or Simplified, the subst-env is empty
+      (and and hence no need to re-simplify)
+-}
+
+-------------------
+mkBoringStop :: OutType -> SimplCont
+mkBoringStop ty = Stop ty BoringCtxt
+
+mkRhsStop :: OutType -> SimplCont       -- See Note [RHS of lets] in CoreUnfold
+mkRhsStop ty = Stop ty RhsCtxt
+
+mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
+mkLazyArgStop ty cci = Stop ty cci
+
+-------------------
+contIsRhsOrArg :: SimplCont -> Bool
+contIsRhsOrArg (Stop {})       = True
+contIsRhsOrArg (StrictBind {}) = True
+contIsRhsOrArg (StrictArg {})  = True
+contIsRhsOrArg _               = False
+
+contIsRhs :: SimplCont -> Bool
+contIsRhs (Stop _ RhsCtxt) = True
+contIsRhs _                = False
+
+-------------------
+contIsDupable :: SimplCont -> Bool
+contIsDupable (Stop {})                  = True
+contIsDupable (ApplyTo  OkToDup _ _ _)   = True -- See Note [DupFlag invariants]
+contIsDupable (Select   OkToDup _ _ _ _) = True -- ...ditto...
+contIsDupable (CoerceIt _ cont)          = contIsDupable cont
+contIsDupable _                          = False
+
+-------------------
+contIsTrivial :: SimplCont -> Bool
+contIsTrivial (Stop {})                   = True
+contIsTrivial (ApplyTo _ (Type _) _ cont) = contIsTrivial cont
+contIsTrivial (ApplyTo _ (Coercion _) _ cont) = contIsTrivial cont
+contIsTrivial (CoerceIt _ cont)           = contIsTrivial cont
+contIsTrivial _                           = False
+
+-------------------
+contResultType :: SimplCont -> OutType
+contResultType (Stop ty _)            = ty
+contResultType (CoerceIt _ k)         = contResultType k
+contResultType (StrictBind _ _ _ _ k) = contResultType k
+contResultType (StrictArg _ _ k)      = contResultType k
+contResultType (Select _ _ _ _ k)     = contResultType k
+contResultType (ApplyTo _ _ _ k)      = contResultType k
+contResultType (TickIt _ k)           = contResultType k
+
+contInputType :: SimplCont -> OutType
+contInputType (Stop ty _)             = ty
+contInputType (CoerceIt co _)         = pFst (coercionKind co)
+contInputType (Select d b _ se _)     = perhapsSubstTy d se (idType b)
+contInputType (StrictBind b _ _ se _) = substTy se (idType b)
+contInputType (StrictArg ai _ _)      = funArgTy (ai_type ai)
+contInputType (ApplyTo d e se k)      = mkFunTy (perhapsSubstTy d se (exprType e)) (contInputType k)
+contInputType (TickIt _ k)            = contInputType k
+
+perhapsSubstTy :: DupFlag -> SimplEnv -> InType -> OutType
+perhapsSubstTy dup_flag se ty
+  | isSimplified dup_flag = ty
+  | otherwise             = substTy se ty
+
+-------------------
+countValArgs :: SimplCont -> Int
+countValArgs (ApplyTo _ (Type _) _ cont) = countValArgs cont
+countValArgs (ApplyTo _ (Coercion _) _ cont) = countValArgs cont
+countValArgs (ApplyTo _ _        _ cont) = 1 + countValArgs cont
+countValArgs _                           = 0
+
+countArgs :: SimplCont -> Int
+countArgs (ApplyTo _ _ _ cont) = 1 + countArgs cont
+countArgs _                    = 0
+
+contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
+-- Summarises value args, discards type args and coercions
+-- The returned continuation of the call is only used to
+-- answer questions like "are you interesting?"
+contArgs cont
+  | lone cont = (True, [], cont)
+  | otherwise = go [] cont
+  where
+    lone (ApplyTo {})  = False  -- See Note [Lone variables] in CoreUnfold
+    lone (CoerceIt {}) = False
+    lone _             = True
+
+    go args (ApplyTo _ arg se cont)
+      | isTypeArg arg         = go args                           cont
+      | otherwise             = go (is_interesting arg se : args) cont
+    go args (CoerceIt _ cont) = go args cont
+    go args cont              = (False, reverse args, cont)
+
+    is_interesting arg se = interestingArg (substExpr (text "contArgs") se arg)
+                   -- Do *not* use short-cutting substitution here
+                   -- because we want to get as much IdInfo as possible
+
+pushSimplifiedArgs :: SimplEnv -> [CoreExpr] -> SimplCont -> SimplCont
+pushSimplifiedArgs _env []         cont = cont
+pushSimplifiedArgs env  (arg:args) cont = ApplyTo Simplified arg env (pushSimplifiedArgs env args cont)
+                   -- The env has an empty SubstEnv
+
+dropArgs :: Int -> SimplCont -> SimplCont
+dropArgs 0 cont = cont
+dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont
+dropArgs n other                = pprPanic "dropArgs" (ppr n <+> ppr other)
+
+{-
+Note [Interesting call context]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We want to avoid inlining an expression where there can't possibly be
+any gain, such as in an argument position.  Hence, if the continuation
+is interesting (eg. a case scrutinee, application etc.) then we
+inline, otherwise we don't.
+
+Previously some_benefit used to return True only if the variable was
+applied to some value arguments.  This didn't work:
+
+        let x = _coerce_ (T Int) Int (I# 3) in
+        case _coerce_ Int (T Int) x of
+                I# y -> ....
+
+we want to inline x, but can't see that it's a constructor in a case
+scrutinee position, and some_benefit is False.
+
+Another example:
+
+dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
+
+....  case dMonadST _@_ x0 of (a,b,c) -> ....
+
+we'd really like to inline dMonadST here, but we *don't* want to
+inline if the case expression is just
+
+        case x of y { DEFAULT -> ... }
+
+since we can just eliminate this case instead (x is in WHNF).  Similar
+applies when x is bound to a lambda expression.  Hence
+contIsInteresting looks for case expressions with just a single
+default case.
+-}
+
+interestingCallContext :: SimplCont -> CallCtxt
+-- See Note [Interesting call context]
+interestingCallContext cont
+  = interesting cont
+  where
+    interesting (Select _ _bndr _ _ _) = CaseCtxt
+
+    interesting (ApplyTo _ arg _ cont)
+        | isTypeArg arg = interesting cont
+        | otherwise     = ValAppCtxt    -- Can happen if we have (f Int |> co) y
+                                        -- If f has an INLINE prag we need to give it some
+                                        -- motivation to inline. See Note [Cast then apply]
+                                        -- in CoreUnfold
+
+    interesting (StrictArg _ cci _) = cci
+    interesting (StrictBind {})     = BoringCtxt
+    interesting (Stop _ cci)        = cci
+    interesting (TickIt _ cci)      = interesting cci
+    interesting (CoerceIt _ cont)   = interesting cont
+        -- If this call is the arg of a strict function, the context
+        -- is a bit interesting.  If we inline here, we may get useful
+        -- evaluation information to avoid repeated evals: e.g.
+        --      x + (y * z)
+        -- Here the contIsInteresting makes the '*' keener to inline,
+        -- which in turn exposes a constructor which makes the '+' inline.
+        -- Assuming that +,* aren't small enough to inline regardless.
+        --
+        -- It's also very important to inline in a strict context for things
+        -- like
+        --              foldr k z (f x)
+        -- Here, the context of (f x) is strict, and if f's unfolding is
+        -- a build it's *great* to inline it here.  So we must ensure that
+        -- the context for (f x) is not totally uninteresting.
+
+
+-------------------
+mkArgInfo :: Id
+          -> [CoreRule] -- Rules for function
+          -> Int        -- Number of value args
+          -> SimplCont  -- Context of the call
+          -> ArgInfo
+
+mkArgInfo fun rules n_val_args call_cont
+  | n_val_args < idArity fun            -- Note [Unsaturated functions]
+  = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
+            , ai_rules = rules, ai_encl = False
+            , ai_strs = vanilla_stricts
+            , ai_discs = vanilla_discounts }
+  | otherwise
+  = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
+            , ai_rules = rules
+            , ai_encl = interestingArgContext rules call_cont
+            , ai_strs  = add_type_str fun_ty arg_stricts
+            , ai_discs = arg_discounts }
+  where
+    fun_ty = idType fun
+
+    vanilla_discounts, arg_discounts :: [Int]
+    vanilla_discounts = repeat 0
+    arg_discounts = case idUnfolding fun of
+                        CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
+                              -> discounts ++ vanilla_discounts
+                        _     -> vanilla_discounts
+
+    vanilla_stricts, arg_stricts :: [Bool]
+    vanilla_stricts  = repeat False
+
+    arg_stricts
+      = case splitStrictSig (idStrictness fun) of
+          (demands, result_info)
+                | not (demands `lengthExceeds` n_val_args)
+                ->      -- Enough args, use the strictness given.
+                        -- For bottoming functions we used to pretend that the arg
+                        -- is lazy, so that we don't treat the arg as an
+                        -- interesting context.  This avoids substituting
+                        -- top-level bindings for (say) strings into
+                        -- calls to error.  But now we are more careful about
+                        -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
+                   if isBotRes result_info then
+                        map isStrictDmd demands         -- Finite => result is bottom
+                   else
+                        map isStrictDmd demands ++ vanilla_stricts
+               | otherwise
+               -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
+                                <+> ppr n_val_args <+> ppr demands )
+                   vanilla_stricts      -- Not enough args, or no strictness
+
+    add_type_str :: Type -> [Bool] -> [Bool]
+    -- If the function arg types are strict, record that in the 'strictness bits'
+    -- No need to instantiate because unboxed types (which dominate the strict
+    -- types) can't instantiate type variables.
+    -- add_type_str is done repeatedly (for each call); might be better
+    -- once-for-all in the function
+    -- But beware primops/datacons with no strictness
+    add_type_str _ [] = []
+    add_type_str fun_ty strs            -- Look through foralls
+        | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty       -- Includes coercions
+        = add_type_str fun_ty' strs
+    add_type_str fun_ty (str:strs)      -- Add strict-type info
+        | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
+        = (str || isStrictType arg_ty) : add_type_str fun_ty' strs
+    add_type_str _ strs
+        = strs
+
+{- Note [Unsaturated functions]
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider (test eyeball/inline4)
+        x = a:as
+        y = f x
+where f has arity 2.  Then we do not want to inline 'x', because
+it'll just be floated out again.  Even if f has lots of discounts
+on its first argument -- it must be saturated for these to kick in
+-}
+
+interestingArgContext :: [CoreRule] -> SimplCont -> Bool
+-- If the argument has form (f x y), where x,y are boring,
+-- and f is marked INLINE, then we don't want to inline f.
+-- But if the context of the argument is
+--      g (f x y)
+-- where g has rules, then we *do* want to inline f, in case it
+-- exposes a rule that might fire.  Similarly, if the context is
+--      h (g (f x x))
+-- where h has rules, then we do want to inline f; hence the
+-- call_cont argument to interestingArgContext
+--
+-- The ai-rules flag makes this happen; if it's
+-- set, the inliner gets just enough keener to inline f
+-- regardless of how boring f's arguments are, if it's marked INLINE
+--
+-- The alternative would be to *always* inline an INLINE function,
+-- regardless of how boring its context is; but that seems overkill
+-- For example, it'd mean that wrapper functions were always inlined
+interestingArgContext rules call_cont
+  = notNull rules || enclosing_fn_has_rules
+  where
+    enclosing_fn_has_rules = go call_cont
+
+    go (Select {})         = False
+    go (ApplyTo {})        = False
+    go (StrictArg _ cci _) = interesting cci
+    go (StrictBind {})     = False      -- ??
+    go (CoerceIt _ c)      = go c
+    go (Stop _ cci)        = interesting cci
+    go (TickIt _ c)        = go c
+
+    interesting RuleArgCtxt = True
+    interesting _           = False
+
+{-
+************************************************************************
+*                                                                      *
+                  SimplifierMode
+*                                                                      *
+************************************************************************
+
+The SimplifierMode controls several switches; see its definition in
+CoreMonad
+        sm_rules      :: Bool     -- Whether RULES are enabled
+        sm_inline     :: Bool     -- Whether inlining is enabled
+        sm_case_case  :: Bool     -- Whether case-of-case is enabled
+        sm_eta_expand :: Bool     -- Whether eta-expansion is enabled
+-}
+
+simplEnvForGHCi :: DynFlags -> SimplEnv
+simplEnvForGHCi dflags
+  = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
+                           , sm_phase = InitialPhase
+                           , sm_rules = rules_on
+                           , sm_inline = False
+                           , sm_eta_expand = eta_expand_on
+                           , sm_case_case = True }
+  where
+    rules_on      = gopt Opt_EnableRewriteRules   dflags
+    eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
+   -- Do not do any inlining, in case we expose some unboxed
+   -- tuple stuff that confuses the bytecode interpreter
+
+updModeForStableUnfoldings :: Activation -> SimplifierMode -> SimplifierMode
+-- See Note [Simplifying inside stable unfoldings]
+updModeForStableUnfoldings inline_rule_act current_mode
+  = current_mode { sm_phase = phaseFromActivation inline_rule_act
+                 , sm_inline = True
+                 , sm_eta_expand = False }
+                 -- For sm_rules, just inherit; sm_rules might be "off"
+                 -- because of -fno-enable-rewrite-rules
+  where
+    phaseFromActivation (ActiveAfter n) = Phase n
+    phaseFromActivation _               = InitialPhase
+
+{-
+Note [Inlining in gentle mode]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Something is inlined if
+   (i)   the sm_inline flag is on, AND
+   (ii)  the thing has an INLINE pragma, AND
+   (iii) the thing is inlinable in the earliest phase.
+
+Example of why (iii) is important:
+  {-# INLINE [~1] g #-}
+  g = ...
+
+  {-# INLINE f #-}
+  f x = g (g x)
+
+If we were to inline g into f's inlining, then an importing module would
+never be able to do
+        f e --> g (g e) ---> RULE fires
+because the stable unfolding for f has had g inlined into it.
+
+On the other hand, it is bad not to do ANY inlining into an
+stable unfolding, because then recursive knots in instance declarations
+don't get unravelled.
+
+However, *sometimes* SimplGently must do no call-site inlining at all
+(hence sm_inline = False).  Before full laziness we must be careful
+not to inline wrappers, because doing so inhibits floating
+    e.g. ...(case f x of ...)...
+    ==> ...(case (case x of I# x# -> fw x#) of ...)...
+    ==> ...(case x of I# x# -> case fw x# of ...)...
+and now the redex (f x) isn't floatable any more.
+
+The no-inlining thing is also important for Template Haskell.  You might be
+compiling in one-shot mode with -O2; but when TH compiles a splice before
+running it, we don't want to use -O2.  Indeed, we don't want to inline
+anything, because the byte-code interpreter might get confused about
+unboxed tuples and suchlike.
+
+Note [Simplifying inside stable unfoldings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We must take care with simplification inside stable unfoldings (which come from
+INLINE pragmas).
+
+First, consider the following example
+        let f = \pq -> BIG
+        in
+        let g = \y -> f y y
+            {-# INLINE g #-}
+        in ...g...g...g...g...g...
+Now, if that's the ONLY occurrence of f, it might be inlined inside g,
+and thence copied multiple times when g is inlined. HENCE we treat
+any occurrence in a stable unfolding as a multiple occurrence, not a single
+one; see OccurAnal.addRuleUsage.
+
+Second, we do want *do* to some modest rules/inlining stuff in stable
+unfoldings, partly to eliminate senseless crap, and partly to break
+the recursive knots generated by instance declarations.
+
+However, suppose we have
+        {-# INLINE <act> f #-}
+        f = <rhs>
+meaning "inline f in phases p where activation <act>(p) holds".
+Then what inlinings/rules can we apply to the copy of <rhs> captured in
+f's stable unfolding?  Our model is that literally <rhs> is substituted for
+f when it is inlined.  So our conservative plan (implemented by
+updModeForStableUnfoldings) is this:
+
+  -------------------------------------------------------------
+  When simplifying the RHS of an stable unfolding, set the phase
+  to the phase in which the stable unfolding first becomes active
+  -------------------------------------------------------------
+
+That ensures that
+
+  a) Rules/inlinings that *cease* being active before p will
+     not apply to the stable unfolding, consistent with it being
+     inlined in its *original* form in phase p.
+
+  b) Rules/inlinings that only become active *after* p will
+     not apply to the stable unfolding, again to be consistent with
+     inlining the *original* rhs in phase p.
+
+For example,
+        {-# INLINE f #-}
+        f x = ...g...
+
+        {-# NOINLINE [1] g #-}
+        g y = ...
+
+        {-# RULE h g = ... #-}
+Here we must not inline g into f's RHS, even when we get to phase 0,
+because when f is later inlined into some other module we want the
+rule for h to fire.
+
+Similarly, consider
+        {-# INLINE f #-}
+        f x = ...g...
+
+        g y = ...
+and suppose that there are auto-generated specialisations and a strictness
+wrapper for g.  The specialisations get activation AlwaysActive, and the
+strictness wrapper get activation (ActiveAfter 0).  So the strictness
+wrepper fails the test and won't be inlined into f's stable unfolding. That
+means f can inline, expose the specialised call to g, so the specialisation
+rules can fire.
+
+A note about wrappers
+~~~~~~~~~~~~~~~~~~~~~
+It's also important not to inline a worker back into a wrapper.
+A wrapper looks like
+        wraper = inline_me (\x -> ...worker... )
+Normally, the inline_me prevents the worker getting inlined into
+the wrapper (initially, the worker's only call site!).  But,
+if the wrapper is sure to be called, the strictness analyser will
+mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
+continuation.
+-}
+
+activeUnfolding :: SimplEnv -> Id -> Bool
+activeUnfolding env
+  | not (sm_inline mode) = active_unfolding_minimal
+  | otherwise            = case sm_phase mode of
+                             InitialPhase -> active_unfolding_gentle
+                             Phase n      -> active_unfolding n
+  where
+    mode = getMode env
+
+getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
+-- When matching in RULE, we want to "look through" an unfolding
+-- (to see a constructor) if *rules* are on, even if *inlinings*
+-- are not.  A notable example is DFuns, which really we want to
+-- match in rules like (op dfun) in gentle mode. Another example
+-- is 'otherwise' which we want exprIsConApp_maybe to be able to
+-- see very early on
+getUnfoldingInRuleMatch env
+  = (in_scope, id_unf)
+  where
+    in_scope = seInScope env
+    mode = getMode env
+    id_unf id | unf_is_active id = idUnfolding id
+              | otherwise        = NoUnfolding
+    unf_is_active id
+     | not (sm_rules mode) = active_unfolding_minimal id
+     | otherwise           = isActive (sm_phase mode) (idInlineActivation id)
+
+active_unfolding_minimal :: Id -> Bool
+-- Compuslory unfoldings only
+-- Ignore SimplGently, because we want to inline regardless;
+-- the Id has no top-level binding at all
+--
+-- NB: we used to have a second exception, for data con wrappers.
+-- On the grounds that we use gentle mode for rule LHSs, and
+-- they match better when data con wrappers are inlined.
+-- But that only really applies to the trivial wrappers (like (:)),
+-- and they are now constructed as Compulsory unfoldings (in MkId)
+-- so they'll happen anyway.
+active_unfolding_minimal id = isCompulsoryUnfolding (realIdUnfolding id)
+
+active_unfolding :: PhaseNum -> Id -> Bool
+active_unfolding n id = isActiveIn n (idInlineActivation id)
+
+active_unfolding_gentle :: Id -> Bool
+-- Anything that is early-active
+-- See Note [Gentle mode]
+active_unfolding_gentle id
+  =  isInlinePragma prag
+  && isEarlyActive (inlinePragmaActivation prag)
+       -- NB: wrappers are not early-active
+  where
+    prag = idInlinePragma id
+
+----------------------
+activeRule :: SimplEnv -> Activation -> Bool
+-- Nothing => No rules at all
+activeRule env
+  | not (sm_rules mode) = \_ -> False     -- Rewriting is off
+  | otherwise           = isActive (sm_phase mode)
+  where
+    mode = getMode env
+
+{-
+************************************************************************
+*                                                                      *
+                  preInlineUnconditionally
+*                                                                      *
+************************************************************************
+
+preInlineUnconditionally
+~~~~~~~~~~~~~~~~~~~~~~~~
+@preInlineUnconditionally@ examines a bndr to see if it is used just
+once in a completely safe way, so that it is safe to discard the
+binding inline its RHS at the (unique) usage site, REGARDLESS of how
+big the RHS might be.  If this is the case we don't simplify the RHS
+first, but just inline it un-simplified.
+
+This is much better than first simplifying a perhaps-huge RHS and then
+inlining and re-simplifying it.  Indeed, it can be at least quadratically
+better.  Consider
+
+        x1 = e1
+        x2 = e2[x1]
+        x3 = e3[x2]
+        ...etc...
+        xN = eN[xN-1]
+
+We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
+This can happen with cascades of functions too:
+
+        f1 = \x1.e1
+        f2 = \xs.e2[f1]
+        f3 = \xs.e3[f3]
+        ...etc...
+
+THE MAIN INVARIANT is this:
+
+        ----  preInlineUnconditionally invariant -----
+   IF preInlineUnconditionally chooses to inline x = <rhs>
+   THEN doing the inlining should not change the occurrence
+        info for the free vars of <rhs>
+        ----------------------------------------------
+
+For example, it's tempting to look at trivial binding like
+        x = y
+and inline it unconditionally.  But suppose x is used many times,
+but this is the unique occurrence of y.  Then inlining x would change
+y's occurrence info, which breaks the invariant.  It matters: y
+might have a BIG rhs, which will now be dup'd at every occurrenc of x.
+
+
+Even RHSs labelled InlineMe aren't caught here, because there might be
+no benefit from inlining at the call site.
+
+[Sept 01] Don't unconditionally inline a top-level thing, because that
+can simply make a static thing into something built dynamically.  E.g.
+        x = (a,b)
+        main = \s -> h x
+
+[Remember that we treat \s as a one-shot lambda.]  No point in
+inlining x unless there is something interesting about the call site.
+
+But watch out: if you aren't careful, some useful foldr/build fusion
+can be lost (most notably in spectral/hartel/parstof) because the
+foldr didn't see the build.  Doing the dynamic allocation isn't a big
+deal, in fact, but losing the fusion can be.  But the right thing here
+seems to be to do a callSiteInline based on the fact that there is
+something interesting about the call site (it's strict).  Hmm.  That
+seems a bit fragile.
+
+Conclusion: inline top level things gaily until Phase 0 (the last
+phase), at which point don't.
+
+Note [pre/postInlineUnconditionally in gentle mode]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Even in gentle mode we want to do preInlineUnconditionally.  The
+reason is that too little clean-up happens if you don't inline
+use-once things.  Also a bit of inlining is *good* for full laziness;
+it can expose constant sub-expressions.  Example in
+spectral/mandel/Mandel.hs, where the mandelset function gets a useful
+let-float if you inline windowToViewport
+
+However, as usual for Gentle mode, do not inline things that are
+inactive in the intial stages.  See Note [Gentle mode].
+
+Note [Stable unfoldings and preInlineUnconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
+Example
+
+   {-# INLINE f #-}
+   f :: Eq a => a -> a
+   f x = ...
+
+   fInt :: Int -> Int
+   fInt = f Int dEqInt
+
+   ...fInt...fInt...fInt...
+
+Here f occurs just once, in the RHS of f1. But if we inline it there
+we'll lose the opportunity to inline at each of fInt's call sites.
+The INLINE pragma will only inline when the application is saturated
+for exactly this reason; and we don't want PreInlineUnconditionally
+to second-guess it.  A live example is Trac #3736.
+    c.f. Note [Stable unfoldings and postInlineUnconditionally]
+
+Note [Top-level botomming Ids]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Don't inline top-level Ids that are bottoming, even if they are used just
+once, because FloatOut has gone to some trouble to extract them out.
+Inlining them won't make the program run faster!
+
+Note [Do not inline CoVars unconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Coercion variables appear inside coercions, and the RHS of a let-binding
+is a term (not a coercion) so we can't necessarily inline the latter in
+the former.
+-}
+
+preInlineUnconditionally :: DynFlags -> SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
+-- Precondition: rhs satisfies the let/app invariant
+-- See Note [CoreSyn let/app invariant] in CoreSyn
+-- Reason: we don't want to inline single uses, or discard dead bindings,
+--         for unlifted, side-effect-full bindings
+preInlineUnconditionally dflags env top_lvl bndr rhs
+  | not active                               = False
+  | isStableUnfolding (idUnfolding bndr)     = False -- Note [Stable unfoldings and preInlineUnconditionally]
+  | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids]
+  | not (gopt Opt_SimplPreInlining dflags)   = False
+  | isCoVar bndr                             = False -- Note [Do not inline CoVars unconditionally]
+  | otherwise = case idOccInfo bndr of
+                  IAmDead                    -> True -- Happens in ((\x.1) v)
+                  OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
+                  _                          -> False
+  where
+    mode = getMode env
+    active = isActive (sm_phase mode) act
+             -- See Note [pre/postInlineUnconditionally in gentle mode]
+    act = idInlineActivation bndr
+    try_once in_lam int_cxt     -- There's one textual occurrence
+        | not in_lam = isNotTopLevel top_lvl || early_phase
+        | otherwise  = int_cxt && canInlineInLam rhs
+
+-- Be very careful before inlining inside a lambda, because (a) we must not
+-- invalidate occurrence information, and (b) we want to avoid pushing a
+-- single allocation (here) into multiple allocations (inside lambda).
+-- Inlining a *function* with a single *saturated* call would be ok, mind you.
+--      || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
+--      where
+--              is_cheap = exprIsCheap rhs
+--              ok = is_cheap && int_cxt
+
+        --      int_cxt         The context isn't totally boring
+        -- E.g. let f = \ab.BIG in \y. map f xs
+        --      Don't want to substitute for f, because then we allocate
+        --      its closure every time the \y is called
+        -- But: let f = \ab.BIG in \y. map (f y) xs
+        --      Now we do want to substitute for f, even though it's not
+        --      saturated, because we're going to allocate a closure for
+        --      (f y) every time round the loop anyhow.
+
+        -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
+        -- so substituting rhs inside a lambda doesn't change the occ info.
+        -- Sadly, not quite the same as exprIsHNF.
+    canInlineInLam (Lit _)              = True
+    canInlineInLam (Lam b e)            = isRuntimeVar b || canInlineInLam e
+    canInlineInLam _                    = False
+      -- not ticks.  Counting ticks cannot be duplicated, and non-counting
+      -- ticks around a Lam will disappear anyway.
+
+    early_phase = case sm_phase mode of
+                    Phase 0 -> False
+                    _       -> True
+-- If we don't have this early_phase test, consider
+--      x = length [1,2,3]
+-- The full laziness pass carefully floats all the cons cells to
+-- top level, and preInlineUnconditionally floats them all back in.
+-- Result is (a) static allocation replaced by dynamic allocation
+--           (b) many simplifier iterations because this tickles
+--               a related problem; only one inlining per pass
+--
+-- On the other hand, I have seen cases where top-level fusion is
+-- lost if we don't inline top level thing (e.g. string constants)
+-- Hence the test for phase zero (which is the phase for all the final
+-- simplifications).  Until phase zero we take no special notice of
+-- top level things, but then we become more leery about inlining
+-- them.
+
+{-
+************************************************************************
+*                                                                      *
+                  postInlineUnconditionally
+*                                                                      *
+************************************************************************
+
+postInlineUnconditionally
+~~~~~~~~~~~~~~~~~~~~~~~~~
+@postInlineUnconditionally@ decides whether to unconditionally inline
+a thing based on the form of its RHS; in particular if it has a
+trivial RHS.  If so, we can inline and discard the binding altogether.
+
+NB: a loop breaker has must_keep_binding = True and non-loop-breakers
+only have *forward* references. Hence, it's safe to discard the binding
+
+NOTE: This isn't our last opportunity to inline.  We're at the binding
+site right now, and we'll get another opportunity when we get to the
+ocurrence(s)
+
+Note that we do this unconditional inlining only for trival RHSs.
+Don't inline even WHNFs inside lambdas; doing so may simply increase
+allocation when the function is called. This isn't the last chance; see
+NOTE above.
+
+NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
+Because we don't even want to inline them into the RHS of constructor
+arguments. See NOTE above
+
+NB: At one time even NOINLINE was ignored here: if the rhs is trivial
+it's best to inline it anyway.  We often get a=E; b=a from desugaring,
+with both a and b marked NOINLINE.  But that seems incompatible with
+our new view that inlining is like a RULE, so I'm sticking to the 'active'
+story for now.
+-}
+
+postInlineUnconditionally
+    :: DynFlags -> SimplEnv -> TopLevelFlag
+    -> OutId            -- The binder (an InId would be fine too)
+                        --            (*not* a CoVar)
+    -> OccInfo          -- From the InId
+    -> OutExpr
+    -> Unfolding
+    -> Bool
+-- Precondition: rhs satisfies the let/app invariant
+-- See Note [CoreSyn let/app invariant] in CoreSyn
+-- Reason: we don't want to inline single uses, or discard dead bindings,
+--         for unlifted, side-effect-full bindings
+postInlineUnconditionally dflags env top_lvl bndr occ_info rhs unfolding
+  | not active                  = False
+  | isWeakLoopBreaker occ_info  = False -- If it's a loop-breaker of any kind, don't inline
+                                        -- because it might be referred to "earlier"
+  | isExportedId bndr           = False
+  | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
+  | isTopLevel top_lvl          = False -- Note [Top level and postInlineUnconditionally]
+  | exprIsTrivial rhs           = True
+  | otherwise
+  = case occ_info of
+        -- The point of examining occ_info here is that for *non-values*
+        -- that occur outside a lambda, the call-site inliner won't have
+        -- a chance (because it doesn't know that the thing
+        -- only occurs once).   The pre-inliner won't have gotten
+        -- it either, if the thing occurs in more than one branch
+        -- So the main target is things like
+        --      let x = f y in
+        --      case v of
+        --         True  -> case x of ...
+        --         False -> case x of ...
+        -- This is very important in practice; e.g. wheel-seive1 doubles
+        -- in allocation if you miss this out
+      OneOcc in_lam _one_br int_cxt     -- OneOcc => no code-duplication issue
+        ->     smallEnoughToInline dflags unfolding     -- Small enough to dup
+                        -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
+                        --
+                        -- NB: Do NOT inline arbitrarily big things, even if one_br is True
+                        -- Reason: doing so risks exponential behaviour.  We simplify a big
+                        --         expression, inline it, and simplify it again.  But if the
+                        --         very same thing happens in the big expression, we get
+                        --         exponential cost!
+                        -- PRINCIPLE: when we've already simplified an expression once,
+                        -- make sure that we only inline it if it's reasonably small.
+
+           && (not in_lam ||
+                        -- Outside a lambda, we want to be reasonably aggressive
+                        -- about inlining into multiple branches of case
+                        -- e.g. let x = <non-value>
+                        --      in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
+                        -- Inlining can be a big win if C3 is the hot-spot, even if
+                        -- the uses in C1, C2 are not 'interesting'
+                        -- An example that gets worse if you add int_cxt here is 'clausify'
+
+                (isCheapUnfolding unfolding && int_cxt))
+                        -- isCheap => acceptable work duplication; in_lam may be true
+                        -- int_cxt to prevent us inlining inside a lambda without some
+                        -- good reason.  See the notes on int_cxt in preInlineUnconditionally
+
+      IAmDead -> True   -- This happens; for example, the case_bndr during case of
+                        -- known constructor:  case (a,b) of x { (p,q) -> ... }
+                        -- Here x isn't mentioned in the RHS, so we don't want to
+                        -- create the (dead) let-binding  let x = (a,b) in ...
+
+      _ -> False
+
+-- Here's an example that we don't handle well:
+--      let f = if b then Left (\x.BIG) else Right (\y.BIG)
+--      in \y. ....case f of {...} ....
+-- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
+-- But
+--  - We can't preInlineUnconditionally because that woud invalidate
+--    the occ info for b.
+--  - We can't postInlineUnconditionally because the RHS is big, and
+--    that risks exponential behaviour
+--  - We can't call-site inline, because the rhs is big
+-- Alas!
+
+  where
+    active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
+        -- See Note [pre/postInlineUnconditionally in gentle mode]
+
+{-
+Note [Top level and postInlineUnconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We don't do postInlineUnconditionally for top-level things (even for
+ones that are trivial):
+
+  * Doing so will inline top-level error expressions that have been
+    carefully floated out by FloatOut.  More generally, it might
+    replace static allocation with dynamic.
+
+  * Even for trivial expressions there's a problem.  Consider
+      {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
+      blah xs = reverse xs
+      ruggle = sort
+    In one simplifier pass we might fire the rule, getting
+      blah xs = ruggle xs
+    but in *that* simplifier pass we must not do postInlineUnconditionally
+    on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
+
+    If the rhs is trivial it'll be inlined by callSiteInline, and then
+    the binding will be dead and discarded by the next use of OccurAnal
+
+  * There is less point, because the main goal is to get rid of local
+    bindings used in multiple case branches.
+
+  * The inliner should inline trivial things at call sites anyway.
+
+Note [Stable unfoldings and postInlineUnconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Do not do postInlineUnconditionally if the Id has an stable unfolding,
+otherwise we lose the unfolding.  Example
+
+     -- f has stable unfolding with rhs (e |> co)
+     --   where 'e' is big
+     f = e |> co
+
+Then there's a danger we'll optimise to
+
+     f' = e
+     f = f' |> co
+
+and now postInlineUnconditionally, losing the stable unfolding on f.  Now f'
+won't inline because 'e' is too big.
+
+    c.f. Note [Stable unfoldings and preInlineUnconditionally]
+
+
+************************************************************************
+*                                                                      *
+        Rebuilding a lambda
+*                                                                      *
+************************************************************************
+-}
+
+mkLam :: [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
+-- mkLam tries three things
+--      a) eta reduction, if that gives a trivial expression
+--      b) eta expansion [only if there are some value lambdas]
+
+mkLam [] body _cont
+  = return body
+mkLam bndrs body cont
+  = do  { dflags <- getDynFlags
+        ; mkLam' dflags bndrs body }
+  where
+    mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
+    mkLam' dflags bndrs (Cast body co)
+      | not (any bad bndrs)
+        -- Note [Casts and lambdas]
+      = do { lam <- mkLam' dflags bndrs body
+           ; return (mkCast lam (mkPiCos Representational bndrs co)) }
+      where
+        co_vars  = tyCoVarsOfCo co
+        bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
+
+    mkLam' dflags bndrs body@(Lam {})
+      = mkLam' dflags (bndrs ++ bndrs1) body1
+      where
+        (bndrs1, body1) = collectBinders body
+
+    mkLam' dflags bndrs body
+      | gopt Opt_DoEtaReduction dflags
+      , Just etad_lam <- tryEtaReduce bndrs body
+      = do { tick (EtaReduction (head bndrs))
+           ; return etad_lam }
+
+      | not (contIsRhs cont)   -- See Note [Eta-expanding lambdas]
+      , gopt Opt_DoLambdaEtaExpansion dflags
+      , any isRuntimeVar bndrs
+      , let body_arity = exprEtaExpandArity dflags body
+      , body_arity > 0
+      = do { tick (EtaExpansion (head bndrs))
+           ; return (mkLams bndrs (etaExpand body_arity body)) }
+
+      | otherwise
+      = return (mkLams bndrs body)
+
+{-
+Note [Eta expanding lambdas]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In general we *do* want to eta-expand lambdas. Consider
+   f (\x -> case x of (a,b) -> \s -> blah)
+where 's' is a state token, and hence can be eta expanded.  This
+showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
+important function!
+
+The eta-expansion will never happen unless we do it now.  (Well, it's
+possible that CorePrep will do it, but CorePrep only has a half-baked
+eta-expander that can't deal with casts.  So it's much better to do it
+here.)
+
+However, when the lambda is let-bound, as the RHS of a let, we have a
+better eta-expander (in the form of tryEtaExpandRhs), so we don't
+bother to try expansion in mkLam in that case; hence the contIsRhs
+guard.
+
+Note [Casts and lambdas]
+~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+        (\x. (\y. e) `cast` g1) `cast` g2
+There is a danger here that the two lambdas look separated, and the
+full laziness pass might float an expression to between the two.
+
+So this equation in mkLam' floats the g1 out, thus:
+        (\x. e `cast` g1)  -->  (\x.e) `cast` (tx -> g1)
+where x:tx.
+
+In general, this floats casts outside lambdas, where (I hope) they
+might meet and cancel with some other cast:
+        \x. e `cast` co   ===>   (\x. e) `cast` (tx -> co)
+        /\a. e `cast` co  ===>   (/\a. e) `cast` (/\a. co)
+        /\g. e `cast` co  ===>   (/\g. e) `cast` (/\g. co)
+                          (if not (g `in` co))
+
+Notice that it works regardless of 'e'.  Originally it worked only
+if 'e' was itself a lambda, but in some cases that resulted in
+fruitless iteration in the simplifier.  A good example was when
+compiling Text.ParserCombinators.ReadPrec, where we had a definition
+like    (\x. Get `cast` g)
+where Get is a constructor with nonzero arity.  Then mkLam eta-expanded
+the Get, and the next iteration eta-reduced it, and then eta-expanded
+it again.
+
+Note also the side condition for the case of coercion binders.
+It does not make sense to transform
+        /\g. e `cast` g  ==>  (/\g.e) `cast` (/\g.g)
+because the latter is not well-kinded.
+
+************************************************************************
+*                                                                      *
+              Eta expansion
+*                                                                      *
+************************************************************************
+-}
+
+tryEtaExpandRhs :: SimplEnv -> OutId -> OutExpr -> SimplM (Arity, OutExpr)
+-- See Note [Eta-expanding at let bindings]
+tryEtaExpandRhs env bndr rhs
+  = do { dflags <- getDynFlags
+       ; (new_arity, new_rhs) <- try_expand dflags
+
+       ; WARN( new_arity < old_id_arity,
+               (ptext (sLit "Arity decrease:") <+> (ppr bndr <+> ppr old_id_arity
+                <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
+                        -- Note [Arity decrease] in Simplify
+         return (new_arity, new_rhs) }
+  where
+    try_expand dflags
+      | exprIsTrivial rhs
+      = return (exprArity rhs, rhs)
+
+      | sm_eta_expand (getMode env)      -- Provided eta-expansion is on
+      , let new_arity1 = findRhsArity dflags bndr rhs old_arity
+            new_arity2 = idCallArity bndr
+            new_arity  = max new_arity1 new_arity2
+      , new_arity > old_arity      -- And the current manifest arity isn't enough
+      = do { tick (EtaExpansion bndr)
+           ; return (new_arity, etaExpand new_arity rhs) }
+      | otherwise
+      = return (old_arity, rhs)
+
+    old_arity    = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
+    old_id_arity = idArity bndr
+
+{-
+Note [Eta-expanding at let bindings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We now eta expand at let-bindings, which is where the payoff comes.
+The most significant thing is that we can do a simple arity analysis
+(in CoreArity.findRhsArity), which we can't do for free-floating lambdas
+
+One useful consequence of not eta-expanding lambdas is this example:
+   genMap :: C a => ...
+   {-# INLINE genMap #-}
+   genMap f xs = ...
+
+   myMap :: D a => ...
+   {-# INLINE myMap #-}
+   myMap = genMap
+
+Notice that 'genMap' should only inline if applied to two arguments.
+In the stable unfolding for myMap we'll have the unfolding
+    (\d -> genMap Int (..d..))
+We do not want to eta-expand to
+    (\d f xs -> genMap Int (..d..) f xs)
+because then 'genMap' will inline, and it really shouldn't: at least
+as far as the programmer is concerned, it's not applied to two
+arguments!
+
+Note [Do not eta-expand PAPs]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We used to have old_arity = manifestArity rhs, which meant that we
+would eta-expand even PAPs.  But this gives no particular advantage,
+and can lead to a massive blow-up in code size, exhibited by Trac #9020.
+Suppose we have a PAP
+    foo :: IO ()
+    foo = returnIO ()
+Then we can eta-expand do
+    foo = (\eta. (returnIO () |> sym g) eta) |> g
+where
+    g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
+
+But there is really no point in doing this, and it generates masses of
+coercions and whatnot that eventually disappear again. For T9020, GHC
+allocated 6.6G beore, and 0.8G afterwards; and residency dropped from
+1.8G to 45M.
+
+But note that this won't eta-expand, say
+  f = \g -> map g
+Does it matter not eta-expanding such functions?  I'm not sure.  Perhaps
+strictness analysis will have less to bite on?
+
+
+************************************************************************
+*                                                                      *
+\subsection{Floating lets out of big lambdas}
+*                                                                      *
+************************************************************************
+
+Note [Floating and type abstraction]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this:
+        x = /\a. C e1 e2
+We'd like to float this to
+        y1 = /\a. e1
+        y2 = /\a. e2
+        x  = /\a. C (y1 a) (y2 a)
+for the usual reasons: we want to inline x rather vigorously.
+
+You may think that this kind of thing is rare.  But in some programs it is
+common.  For example, if you do closure conversion you might get:
+
+        data a :-> b = forall e. (e -> a -> b) :$ e
+
+        f_cc :: forall a. a :-> a
+        f_cc = /\a. (\e. id a) :$ ()
+
+Now we really want to inline that f_cc thing so that the
+construction of the closure goes away.
+
+So I have elaborated simplLazyBind to understand right-hand sides that look
+like
+        /\ a1..an. body
+
+and treat them specially. The real work is done in SimplUtils.abstractFloats,
+but there is quite a bit of plumbing in simplLazyBind as well.
+
+The same transformation is good when there are lets in the body:
+
+        /\abc -> let(rec) x = e in b
+   ==>
+        let(rec) x' = /\abc -> let x = x' a b c in e
+        in
+        /\abc -> let x = x' a b c in b
+
+This is good because it can turn things like:
+
+        let f = /\a -> letrec g = ... g ... in g
+into
+        letrec g' = /\a -> ... g' a ...
+        in
+        let f = /\ a -> g' a
+
+which is better.  In effect, it means that big lambdas don't impede
+let-floating.
+
+This optimisation is CRUCIAL in eliminating the junk introduced by
+desugaring mutually recursive definitions.  Don't eliminate it lightly!
+
+[May 1999]  If we do this transformation *regardless* then we can
+end up with some pretty silly stuff.  For example,
+
+        let
+            st = /\ s -> let { x1=r1 ; x2=r2 } in ...
+        in ..
+becomes
+        let y1 = /\s -> r1
+            y2 = /\s -> r2
+            st = /\s -> ...[y1 s/x1, y2 s/x2]
+        in ..
+
+Unless the "..." is a WHNF there is really no point in doing this.
+Indeed it can make things worse.  Suppose x1 is used strictly,
+and is of the form
+
+        x1* = case f y of { (a,b) -> e }
+
+If we abstract this wrt the tyvar we then can't do the case inline
+as we would normally do.
+
+That's why the whole transformation is part of the same process that
+floats let-bindings and constructor arguments out of RHSs.  In particular,
+it is guarded by the doFloatFromRhs call in simplLazyBind.
+-}
+
+abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
+abstractFloats main_tvs body_env body
+  = ASSERT( notNull body_floats )
+    do  { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
+        ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
+  where
+    main_tv_set = mkVarSet main_tvs
+    body_floats = getFloatBinds body_env
+    empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
+
+    abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
+    abstract subst (NonRec id rhs)
+      = do { (poly_id, poly_app) <- mk_poly tvs_here id
+           ; let poly_rhs = mkLams tvs_here rhs'
+                 subst'   = CoreSubst.extendIdSubst subst id poly_app
+           ; return (subst', (NonRec poly_id poly_rhs)) }
+      where
+        rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
+        tvs_here = varSetElemsKvsFirst (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
+
+                -- Abstract only over the type variables free in the rhs
+                -- wrt which the new binding is abstracted.  But the naive
+                -- approach of abstract wrt the tyvars free in the Id's type
+                -- fails. Consider:
+                --      /\ a b -> let t :: (a,b) = (e1, e2)
+                --                    x :: a     = fst t
+                --                in ...
+                -- Here, b isn't free in x's type, but we must nevertheless
+                -- abstract wrt b as well, because t's type mentions b.
+                -- Since t is floated too, we'd end up with the bogus:
+                --      poly_t = /\ a b -> (e1, e2)
+                --      poly_x = /\ a   -> fst (poly_t a *b*)
+                -- So for now we adopt the even more naive approach of
+                -- abstracting wrt *all* the tyvars.  We'll see if that
+                -- gives rise to problems.   SLPJ June 98
+
+    abstract subst (Rec prs)
+       = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
+            ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
+                  poly_rhss = [mkLams tvs_here (CoreSubst.substExpr (text "abstract_floats3") subst' rhs)
+                              | rhs <- rhss]
+            ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
+       where
+         (ids,rhss) = unzip prs
+                -- For a recursive group, it's a bit of a pain to work out the minimal
+                -- set of tyvars over which to abstract:
+                --      /\ a b c.  let x = ...a... in
+                --                 letrec { p = ...x...q...
+                --                          q = .....p...b... } in
+                --                 ...
+                -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
+                -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
+                -- Since it's a pain, we just use the whole set, which is always safe
+                --
+                -- If you ever want to be more selective, remember this bizarre case too:
+                --      x::a = x
+                -- Here, we must abstract 'x' over 'a'.
+         tvs_here = sortQuantVars main_tvs
+
+    mk_poly tvs_here var
+      = do { uniq <- getUniqueM
+           ; let  poly_name = setNameUnique (idName var) uniq           -- Keep same name
+                  poly_ty   = mkForAllTys tvs_here (idType var) -- But new type of course
+                  poly_id   = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.lhs
+                              mkLocalId poly_name poly_ty
+           ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
+                -- In the olden days, it was crucial to copy the occInfo of the original var,
+                -- because we were looking at occurrence-analysed but as yet unsimplified code!
+                -- In particular, we mustn't lose the loop breakers.  BUT NOW we are looking
+                -- at already simplified code, so it doesn't matter
+                --
+                -- It's even right to retain single-occurrence or dead-var info:
+                -- Suppose we started with  /\a -> let x = E in B
+                -- where x occurs once in B. Then we transform to:
+                --      let x' = /\a -> E in /\a -> let x* = x' a in B
+                -- where x* has an INLINE prag on it.  Now, once x* is inlined,
+                -- the occurrences of x' will be just the occurrences originally
+                -- pinned on x.
+
+{-
+Note [Abstract over coercions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
+type variable a.  Rather than sort this mess out, we simply bale out and abstract
+wrt all the type variables if any of them are coercion variables.
+
+
+Historical note: if you use let-bindings instead of a substitution, beware of this:
+
+                -- Suppose we start with:
+                --
+                --      x = /\ a -> let g = G in E
+                --
+                -- Then we'll float to get
+                --
+                --      x = let poly_g = /\ a -> G
+                --          in /\ a -> let g = poly_g a in E
+                --
+                -- But now the occurrence analyser will see just one occurrence
+                -- of poly_g, not inside a lambda, so the simplifier will
+                -- PreInlineUnconditionally poly_g back into g!  Badk to square 1!
+                -- (I used to think that the "don't inline lone occurrences" stuff
+                --  would stop this happening, but since it's the *only* occurrence,
+                --  PreInlineUnconditionally kicks in first!)
+                --
+                -- Solution: put an INLINE note on g's RHS, so that poly_g seems
+                --           to appear many times.  (NB: mkInlineMe eliminates
+                --           such notes on trivial RHSs, so do it manually.)
+
+************************************************************************
+*                                                                      *
+                prepareAlts
+*                                                                      *
+************************************************************************
+
+prepareAlts tries these things:
+
+1.  Eliminate alternatives that cannot match, including the
+    DEFAULT alternative.
+
+2.  If the DEFAULT alternative can match only one possible constructor,
+    then make that constructor explicit.
+    e.g.
+        case e of x { DEFAULT -> rhs }
+     ===>
+        case e of x { (a,b) -> rhs }
+    where the type is a single constructor type.  This gives better code
+    when rhs also scrutinises x or e.
+
+3. Returns a list of the constructors that cannot holds in the
+   DEFAULT alternative (if there is one)
+
+Here "cannot match" includes knowledge from GADTs
+
+It's a good idea to do this stuff before simplifying the alternatives, to
+avoid simplifying alternatives we know can't happen, and to come up with
+the list of constructors that are handled, to put into the IdInfo of the
+case binder, for use when simplifying the alternatives.
+
+Eliminating the default alternative in (1) isn't so obvious, but it can
+happen:
+
+data Colour = Red | Green | Blue
+
+f x = case x of
+        Red -> ..
+        Green -> ..
+        DEFAULT -> h x
+
+h y = case y of
+        Blue -> ..
+        DEFAULT -> [ case y of ... ]
+
+If we inline h into f, the default case of the inlined h can't happen.
+If we don't notice this, we may end up filtering out *all* the cases
+of the inner case y, which give us nowhere to go!
+-}
+
+prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
+-- The returned alternatives can be empty, none are possible
+prepareAlts scrut case_bndr' alts
+           -- Case binder is needed just for its type. Note that as an
+           --   OutId, it has maximum information; this is important.
+           --   Test simpl013 is an example
+  = do { us <- getUniquesM
+       ; let (imposs_deflt_cons, refined_deflt, alts')
+                = filterAlts us (varType case_bndr') imposs_cons alts
+       ; when refined_deflt $ tick (FillInCaseDefault case_bndr')
+
+       ; alts'' <- combineIdenticalAlts case_bndr' alts'
+       ; return (imposs_deflt_cons, alts'') }
+  where
+    imposs_cons = case scrut of
+                    Var v -> otherCons (idUnfolding v)
+                    _     -> []
+
+{-
+Note [Combine identical alternatives]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ If several alternatives are identical, merge them into
+ a single DEFAULT alternative.  I've occasionally seen this
+ making a big difference:
+
+     case e of               =====>     case e of
+       C _ -> f x                         D v -> ....v....
+       D v -> ....v....                   DEFAULT -> f x
+       DEFAULT -> f x
+
+The point is that we merge common RHSs, at least for the DEFAULT case.
+[One could do something more elaborate but I've never seen it needed.]
+To avoid an expensive test, we just merge branches equal to the *first*
+alternative; this picks up the common cases
+     a) all branches equal
+     b) some branches equal to the DEFAULT (which occurs first)
+
+The case where Combine Identical Alternatives transformation showed up
+was like this (base/Foreign/C/Err/Error.lhs):
+
+        x | p `is` 1 -> e1
+          | p `is` 2 -> e2
+        ...etc...
+
+where @is@ was something like
+
+        p `is` n = p /= (-1) && p == n
+
+This gave rise to a horrible sequence of cases
+
+        case p of
+          (-1) -> $j p
+          1    -> e1
+          DEFAULT -> $j p
+
+and similarly in cascade for all the join points!
+
+NB: it's important that all this is done in [InAlt], *before* we work
+on the alternatives themselves, because Simpify.simplAlt may zap the
+occurrence info on the binders in the alternatives, which in turn
+defeats combineIdenticalAlts (see Trac #7360).
+-}
+
+combineIdenticalAlts :: OutId -> [InAlt] -> SimplM [InAlt]
+-- See Note [Combine identical alternatives]
+combineIdenticalAlts case_bndr ((_con1,bndrs1,rhs1) : con_alts)
+  | all isDeadBinder bndrs1                     -- Remember the default
+  , length filtered_alts < length con_alts      -- alternative comes first
+  = do  { tick (AltMerge case_bndr)
+        ; return ((DEFAULT, [], rhs1) : filtered_alts) }
+  where
+    filtered_alts = filterOut identical_to_alt1 con_alts
+    identical_to_alt1 (_con,bndrs,rhs) = all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1
+
+combineIdenticalAlts _ alts = return alts
+
+{-
+************************************************************************
+*                                                                      *
+                mkCase
+*                                                                      *
+************************************************************************
+
+mkCase tries these things
+
+1.  Merge Nested Cases
+
+       case e of b {             ==>   case e of b {
+         p1 -> rhs1                      p1 -> rhs1
+         ...                             ...
+         pm -> rhsm                      pm -> rhsm
+         _  -> case b of b' {            pn -> let b'=b in rhsn
+                     pn -> rhsn          ...
+                     ...                 po -> let b'=b in rhso
+                     po -> rhso          _  -> let b'=b in rhsd
+                     _  -> rhsd
+       }
+
+    which merges two cases in one case when -- the default alternative of
+    the outer case scrutises the same variable as the outer case. This
+    transformation is called Case Merging.  It avoids that the same
+    variable is scrutinised multiple times.
+
+2.  Eliminate Identity Case
+
+        case e of               ===> e
+                True  -> True;
+                False -> False
+
+    and similar friends.
+-}
+
+mkCase, mkCase1, mkCase2
+   :: DynFlags
+   -> OutExpr -> OutId
+   -> OutType -> [OutAlt]               -- Alternatives in standard (increasing) order
+   -> SimplM OutExpr
+
+--------------------------------------------------
+--      1. Merge Nested Cases
+--------------------------------------------------
+
+mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts)
+  | gopt Opt_CaseMerge dflags
+  , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
+  , inner_scrut_var == outer_bndr
+  = do  { tick (CaseMerge outer_bndr)
+
+        ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
+                                          (con, args, wrap_rhs rhs)
+                -- Simplifier's no-shadowing invariant should ensure
+                -- that outer_bndr is not shadowed by the inner patterns
+              wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
+                -- The let is OK even for unboxed binders,
+
+              wrapped_alts | isDeadBinder inner_bndr = inner_alts
+                           | otherwise               = map wrap_alt inner_alts
+
+              merged_alts = mergeAlts outer_alts wrapped_alts
+                -- NB: mergeAlts gives priority to the left
+                --      case x of
+                --        A -> e1
+                --        DEFAULT -> case x of
+                --                      A -> e2
+                --                      B -> e3
+                -- When we merge, we must ensure that e1 takes
+                -- precedence over e2 as the value for A!
+
+        ; mkCase1 dflags scrut outer_bndr alts_ty merged_alts
+        }
+        -- Warning: don't call mkCase recursively!
+        -- Firstly, there's no point, because inner alts have already had
+        -- mkCase applied to them, so they won't have a case in their default
+        -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
+        -- in munge_rhs may put a case into the DEFAULT branch!
+
+mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
+
+--------------------------------------------------
+--      2. Eliminate Identity Case
+--------------------------------------------------
+
+mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _)      -- Identity case
+  | all identity_alt alts
+  = do { tick (CaseIdentity case_bndr)
+       ; return (re_cast scrut rhs1) }
+  where
+    identity_alt (con, args, rhs) = check_eq rhs con args
+
+    check_eq (Cast rhs co) con args         = not (any (`elemVarSet` tyCoVarsOfCo co) args)
+        {- See Note [RHS casts] -}            && check_eq rhs con args
+    check_eq (Lit lit) (LitAlt lit') _      = lit == lit'
+    check_eq (Var v)   _ _ | v == case_bndr = True
+    check_eq (Var v)   (DataAlt con) []     = v == dataConWorkId con   -- Optimisation only
+    check_eq rhs       (DataAlt con) args   = rhs `cheapEqExpr` mkConApp con (arg_tys ++ varsToCoreExprs args)
+    check_eq _ _ _ = False
+
+    arg_tys = map Type (tyConAppArgs (idType case_bndr))
+
+        -- Note [RHS casts]
+        -- ~~~~~~~~~~~~~~~~
+        -- We've seen this:
+        --      case e of x { _ -> x `cast` c }
+        -- And we definitely want to eliminate this case, to give
+        --      e `cast` c
+        -- So we throw away the cast from the RHS, and reconstruct
+        -- it at the other end.  All the RHS casts must be the same
+        -- if (all identity_alt alts) holds.
+        --
+        -- Don't worry about nested casts, because the simplifier combines them
+
+    re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
+    re_cast scrut _             = scrut
+
+mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
+
+--------------------------------------------------
+--      Catch-all
+--------------------------------------------------
+mkCase2 _dflags scrut bndr alts_ty alts
+  = return (Case scrut bndr alts_ty alts)
+
+{-
+Note [Dead binders]
+~~~~~~~~~~~~~~~~~~~~
+Note that dead-ness is maintained by the simplifier, so that it is
+accurate after simplification as well as before.
+
+
+Note [Cascading case merge]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Case merging should cascade in one sweep, because it
+happens bottom-up
+
+      case e of a {
+        DEFAULT -> case a of b
+                      DEFAULT -> case b of c {
+                                     DEFAULT -> e
+                                     A -> ea
+                      B -> eb
+        C -> ec
+==>
+      case e of a {
+        DEFAULT -> case a of b
+                      DEFAULT -> let c = b in e
+                      A -> let c = b in ea
+                      B -> eb
+        C -> ec
+==>
+      case e of a {
+        DEFAULT -> let b = a in let c = b in e
+        A -> let b = a in let c = b in ea
+        B -> let b = a in eb
+        C -> ec
+
+
+However here's a tricky case that we still don't catch, and I don't
+see how to catch it in one pass:
+
+  case x of c1 { I# a1 ->
+  case a1 of c2 ->
+    0 -> ...
+    DEFAULT -> case x of c3 { I# a2 ->
+               case a2 of ...
+
+After occurrence analysis (and its binder-swap) we get this
+
+  case x of c1 { I# a1 ->
+  let x = c1 in         -- Binder-swap addition
+  case a1 of c2 ->
+    0 -> ...
+    DEFAULT -> case x of c3 { I# a2 ->
+               case a2 of ...
+
+When we simplify the inner case x, we'll see that
+x=c1=I# a1.  So we'll bind a2 to a1, and get
+
+  case x of c1 { I# a1 ->
+  case a1 of c2 ->
+    0 -> ...
+    DEFAULT -> case a1 of ...
+
+This is corect, but we can't do a case merge in this sweep
+because c2 /= a1.  Reason: the binding c1=I# a1 went inwards
+without getting changed to c1=I# c2.
+
+I don't think this is worth fixing, even if I knew how. It'll
+all come out in the next pass anyway.
+-}