1 files changed, 1432 insertions, 0 deletions
diff --git a/compiler/coreSyn/CoreUnfold.hs b/compiler/coreSyn/CoreUnfold.hs
new file mode 100644
index 0000000000..dc9f95e73a
--- /dev/null
+++ b/compiler/coreSyn/CoreUnfold.hs
@@ -0,0 +1,1432 @@
+{-
+(c) The University of Glasgow 2006
+(c) The AQUA Project, Glasgow University, 1994-1998
+
+
+Core-syntax unfoldings
+
+Unfoldings (which can travel across module boundaries) are in Core
+syntax (namely @CoreExpr@s).
+
+The type @Unfolding@ sits ``above'' simply-Core-expressions
+unfoldings, capturing ``higher-level'' things we know about a binding,
+usually things that the simplifier found out (e.g., ``it's a
+literal'').  In the corner of a @CoreUnfolding@ unfolding, you will
+find, unsurprisingly, a Core expression.
+-}
+
+{-# LANGUAGE CPP #-}
+
+module CoreUnfold (
+        Unfolding, UnfoldingGuidance,   -- Abstract types
+
+        noUnfolding, mkImplicitUnfolding,
+        mkUnfolding, mkCoreUnfolding,
+        mkTopUnfolding, mkSimpleUnfolding, mkWorkerUnfolding,
+        mkInlineUnfolding, mkInlinableUnfolding, mkWwInlineRule,
+        mkCompulsoryUnfolding, mkDFunUnfolding,
+        specUnfolding,
+
+        interestingArg, ArgSummary(..),
+
+        couldBeSmallEnoughToInline, inlineBoringOk,
+        certainlyWillInline, smallEnoughToInline,
+
+        callSiteInline, CallCtxt(..),
+
+        -- Reexport from CoreSubst (it only live there so it can be used
+        -- by the Very Simple Optimiser)
+        exprIsConApp_maybe, exprIsLiteral_maybe
+    ) where
+
+#include "HsVersions.h"
+
+import DynFlags
+import CoreSyn
+import PprCore          ()      -- Instances
+import OccurAnal        ( occurAnalyseExpr )
+import CoreSubst hiding( substTy )
+import CoreArity       ( manifestArity, exprBotStrictness_maybe )
+import CoreUtils
+import Id
+import DataCon
+import Literal
+import PrimOp
+import IdInfo
+import BasicTypes       ( Arity )
+import Type
+import PrelNames
+import TysPrim          ( realWorldStatePrimTy )
+import Bag
+import Util
+import FastTypes
+import FastString
+import Outputable
+import ForeignCall
+
+import qualified Data.ByteString as BS
+import Data.Maybe
+
+{-
+************************************************************************
+*                                                                      *
+\subsection{Making unfoldings}
+*                                                                      *
+************************************************************************
+-}
+
+mkTopUnfolding :: DynFlags -> Bool -> CoreExpr -> Unfolding
+mkTopUnfolding dflags = mkUnfolding dflags InlineRhs True {- Top level -}
+
+mkImplicitUnfolding :: DynFlags -> CoreExpr -> Unfolding
+-- For implicit Ids, do a tiny bit of optimising first
+mkImplicitUnfolding dflags expr
+    = mkTopUnfolding dflags False (simpleOptExpr expr)
+
+-- Note [Top-level flag on inline rules]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Slight hack: note that mk_inline_rules conservatively sets the
+-- top-level flag to True.  It gets set more accurately by the simplifier
+-- Simplify.simplUnfolding.
+
+mkSimpleUnfolding :: DynFlags -> CoreExpr -> Unfolding
+mkSimpleUnfolding dflags = mkUnfolding dflags InlineRhs False False
+
+mkDFunUnfolding :: [Var] -> DataCon -> [CoreExpr] -> Unfolding
+mkDFunUnfolding bndrs con ops
+  = DFunUnfolding { df_bndrs = bndrs
+                  , df_con = con
+                  , df_args = map occurAnalyseExpr ops }
+                  -- See Note [Occurrrence analysis of unfoldings]
+
+mkWwInlineRule :: CoreExpr -> Arity -> Unfolding
+mkWwInlineRule expr arity
+  = mkCoreUnfolding InlineStable True
+                   (simpleOptExpr expr)
+                   (UnfWhen { ug_arity = arity, ug_unsat_ok = unSaturatedOk
+                            , ug_boring_ok = boringCxtNotOk })
+
+mkCompulsoryUnfolding :: CoreExpr -> Unfolding
+mkCompulsoryUnfolding expr         -- Used for things that absolutely must be unfolded
+  = mkCoreUnfolding InlineCompulsory True
+                    (simpleOptExpr expr)
+                    (UnfWhen { ug_arity = 0    -- Arity of unfolding doesn't matter
+                             , ug_unsat_ok = unSaturatedOk, ug_boring_ok = boringCxtOk })
+
+mkWorkerUnfolding :: DynFlags -> (CoreExpr -> CoreExpr) -> Unfolding -> Unfolding
+-- See Note [Worker-wrapper for INLINABLE functions] in WorkWrap
+mkWorkerUnfolding dflags work_fn
+                  (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
+                                 , uf_is_top = top_lvl })
+  | isStableSource src
+  = mkCoreUnfolding src top_lvl new_tmpl guidance
+  where
+    new_tmpl = simpleOptExpr (work_fn tmpl)
+    guidance = calcUnfoldingGuidance dflags new_tmpl
+
+mkWorkerUnfolding _ _ _ = noUnfolding
+
+mkInlineUnfolding :: Maybe Arity -> CoreExpr -> Unfolding
+mkInlineUnfolding mb_arity expr
+  = mkCoreUnfolding InlineStable
+                    True         -- Note [Top-level flag on inline rules]
+                    expr' guide
+  where
+    expr' = simpleOptExpr expr
+    guide = case mb_arity of
+              Nothing    -> UnfWhen { ug_arity = manifestArity expr'
+                                    , ug_unsat_ok = unSaturatedOk
+                                    , ug_boring_ok = boring_ok }
+              Just arity -> UnfWhen { ug_arity = arity
+                                    , ug_unsat_ok = needSaturated
+                                    , ug_boring_ok = boring_ok }
+    boring_ok = inlineBoringOk expr'
+
+mkInlinableUnfolding :: DynFlags -> CoreExpr -> Unfolding
+mkInlinableUnfolding dflags expr
+  = mkUnfolding dflags InlineStable True is_bot expr'
+  where
+    expr' = simpleOptExpr expr
+    is_bot = isJust (exprBotStrictness_maybe expr')
+
+specUnfolding :: DynFlags -> Subst -> [Var] -> [CoreExpr] -> Unfolding -> Unfolding
+-- See Note [Specialising unfoldings]
+specUnfolding _ subst new_bndrs spec_args
+              df@(DFunUnfolding { df_bndrs = bndrs, df_con = con , df_args = args })
+  = ASSERT2( length bndrs >= length spec_args, ppr df $$ ppr spec_args $$ ppr new_bndrs )
+    mkDFunUnfolding (new_bndrs ++ extra_bndrs) con
+                    (map (substExpr spec_doc subst2) args)
+  where
+    subst1 = extendSubstList subst (bndrs `zip` spec_args)
+    (subst2, extra_bndrs) = substBndrs subst1 (dropList spec_args bndrs)
+
+specUnfolding _dflags subst new_bndrs spec_args
+              (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
+                             , uf_is_top = top_lvl
+                             , uf_guidance = old_guidance })
+ | isStableSource src  -- See Note [Specialising unfoldings]
+ , UnfWhen { ug_arity = old_arity
+           , ug_unsat_ok = unsat_ok
+           , ug_boring_ok = boring_ok } <- old_guidance
+ = let guidance = UnfWhen { ug_arity = old_arity - count isValArg spec_args
+                                     + count isId new_bndrs
+                          , ug_unsat_ok = unsat_ok
+                          , ug_boring_ok = boring_ok }
+       new_tmpl = simpleOptExpr $ mkLams new_bndrs $
+                  mkApps (substExpr spec_doc subst tmpl) spec_args
+                   -- The beta-redexes created here will be simplified
+                   -- away by simplOptExpr in mkUnfolding
+
+   in mkCoreUnfolding src top_lvl new_tmpl guidance
+
+specUnfolding _ _ _ _ _ = noUnfolding
+
+spec_doc :: SDoc
+spec_doc = ptext (sLit "specUnfolding")
+
+{-
+Note [Specialising unfoldings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When we specialise a function for some given type-class arguments, we use
+specUnfolding to specialise its unfolding.  Some important points:
+
+* If the original function has a DFunUnfolding, the specialised one
+  must do so too!  Otherwise we lose the magic rules that make it
+  interact with ClassOps
+
+* There is a bit of hack for INLINABLE functions:
+     f :: Ord a => ....
+     f = <big-rhs>
+     {- INLINEABLE f #-}
+  Now if we specialise f, should the specialised version still have
+  an INLINEABLE pragma?  If it does, we'll capture a specialised copy
+  of <big-rhs> as its unfolding, and that probaby won't inline.  But
+  if we don't, the specialised version of <big-rhs> might be small
+  enough to inline at a call site. This happens with Control.Monad.liftM3,
+  and can cause a lot more allocation as a result (nofib n-body shows this).
+
+  Moreover, keeping the INLINEABLE thing isn't much help, because
+  the specialised function (probaby) isn't overloaded any more.
+
+  Conclusion: drop the INLINEALE pragma.  In practice what this means is:
+     if a stable unfolding has UnfoldingGuidance of UnfWhen,
+        we keep it (so the specialised thing too will always inline)
+     if a stable unfolding has UnfoldingGuidance of UnfIfGoodArgs
+        (which arises from INLINEABLE), we discard it
+-}
+
+mkCoreUnfolding :: UnfoldingSource -> Bool -> CoreExpr
+                -> UnfoldingGuidance -> Unfolding
+-- Occurrence-analyses the expression before capturing it
+mkCoreUnfolding src top_lvl expr guidance
+  = CoreUnfolding { uf_tmpl         = occurAnalyseExpr expr,
+                      -- See Note [Occurrrence analysis of unfoldings]
+                    uf_src          = src,
+                    uf_is_top       = top_lvl,
+                    uf_is_value     = exprIsHNF        expr,
+                    uf_is_conlike   = exprIsConLike    expr,
+                    uf_is_work_free = exprIsWorkFree   expr,
+                    uf_expandable   = exprIsExpandable expr,
+                    uf_guidance     = guidance }
+
+mkUnfolding :: DynFlags -> UnfoldingSource -> Bool -> Bool -> CoreExpr
+            -> Unfolding
+-- Calculates unfolding guidance
+-- Occurrence-analyses the expression before capturing it
+mkUnfolding dflags src top_lvl is_bottoming expr
+  | top_lvl && is_bottoming
+  , not (exprIsTrivial expr)
+  = NoUnfolding    -- See Note [Do not inline top-level bottoming functions]
+  | otherwise
+  = CoreUnfolding { uf_tmpl         = occurAnalyseExpr expr,
+                      -- See Note [Occurrrence analysis of unfoldings]
+                    uf_src          = src,
+                    uf_is_top       = top_lvl,
+                    uf_is_value     = exprIsHNF        expr,
+                    uf_is_conlike   = exprIsConLike    expr,
+                    uf_expandable   = exprIsExpandable expr,
+                    uf_is_work_free = exprIsWorkFree   expr,
+                    uf_guidance     = guidance }
+  where
+    guidance = calcUnfoldingGuidance dflags expr
+        -- NB: *not* (calcUnfoldingGuidance (occurAnalyseExpr expr))!
+        -- See Note [Calculate unfolding guidance on the non-occ-anal'd expression]
+
+{-
+Note [Occurrence analysis of unfoldings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We do occurrence-analysis of unfoldings once and for all, when the
+unfolding is built, rather than each time we inline them.
+
+But given this decision it's vital that we do
+*always* do it.  Consider this unfolding
+    \x -> letrec { f = ...g...; g* = f } in body
+where g* is (for some strange reason) the loop breaker.  If we don't
+occ-anal it when reading it in, we won't mark g as a loop breaker, and
+we may inline g entirely in body, dropping its binding, and leaving
+the occurrence in f out of scope. This happened in Trac #8892, where
+the unfolding in question was a DFun unfolding.
+
+But more generally, the simplifier is designed on the
+basis that it is looking at occurrence-analysed expressions, so better
+ensure that they acutally are.
+
+Note [Calculate unfolding guidance on the non-occ-anal'd expression]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Notice that we give the non-occur-analysed expression to
+calcUnfoldingGuidance.  In some ways it'd be better to occur-analyse
+first; for example, sometimes during simplification, there's a large
+let-bound thing which has been substituted, and so is now dead; so
+'expr' contains two copies of the thing while the occurrence-analysed
+expression doesn't.
+
+Nevertheless, we *don't* and *must not* occ-analyse before computing
+the size because
+
+a) The size computation bales out after a while, whereas occurrence
+   analysis does not.
+
+b) Residency increases sharply if you occ-anal first.  I'm not
+   100% sure why, but it's a large effect.  Compiling Cabal went
+   from residency of 534M to over 800M with this one change.
+
+This can occasionally mean that the guidance is very pessimistic;
+it gets fixed up next round.  And it should be rare, because large
+let-bound things that are dead are usually caught by preInlineUnconditionally
+
+
+************************************************************************
+*                                                                      *
+\subsection{The UnfoldingGuidance type}
+*                                                                      *
+************************************************************************
+-}
+
+inlineBoringOk :: CoreExpr -> Bool
+-- See Note [INLINE for small functions]
+-- True => the result of inlining the expression is
+--         no bigger than the expression itself
+--     eg      (\x y -> f y x)
+-- This is a quick and dirty version. It doesn't attempt
+-- to deal with  (\x y z -> x (y z))
+-- The really important one is (x `cast` c)
+inlineBoringOk e
+  = go 0 e
+  where
+    go :: Int -> CoreExpr -> Bool
+    go credit (Lam x e) | isId x           = go (credit+1) e
+                        | otherwise        = go credit e
+    go credit (App f (Type {}))            = go credit f
+    go credit (App f a) | credit > 0
+                        , exprIsTrivial a  = go (credit-1) f
+    go credit (Tick _ e)                 = go credit e -- dubious
+    go credit (Cast e _)                   = go credit e
+    go _      (Var {})                     = boringCxtOk
+    go _      _                            = boringCxtNotOk
+
+calcUnfoldingGuidance
+        :: DynFlags
+        -> CoreExpr    -- Expression to look at
+        -> UnfoldingGuidance
+calcUnfoldingGuidance dflags expr
+  = case sizeExpr dflags (iUnbox bOMB_OUT_SIZE) val_bndrs body of
+      TooBig -> UnfNever
+      SizeIs size cased_bndrs scrut_discount
+        | uncondInline expr n_val_bndrs (iBox size)
+        -> UnfWhen { ug_unsat_ok = unSaturatedOk
+                   , ug_boring_ok =  boringCxtOk
+                   , ug_arity = n_val_bndrs }   -- Note [INLINE for small functions]
+        | otherwise
+        -> UnfIfGoodArgs { ug_args  = map (mk_discount cased_bndrs) val_bndrs
+                         , ug_size  = iBox size
+                         , ug_res   = iBox scrut_discount }
+
+  where
+    (bndrs, body) = collectBinders expr
+    bOMB_OUT_SIZE = ufCreationThreshold dflags
+           -- Bomb out if size gets bigger than this
+    val_bndrs   = filter isId bndrs
+    n_val_bndrs = length val_bndrs
+
+    mk_discount :: Bag (Id,Int) -> Id -> Int
+    mk_discount cbs bndr = foldlBag combine 0 cbs
+           where
+             combine acc (bndr', disc)
+               | bndr == bndr' = acc `plus_disc` disc
+               | otherwise     = acc
+
+             plus_disc :: Int -> Int -> Int
+             plus_disc | isFunTy (idType bndr) = max
+                       | otherwise             = (+)
+             -- See Note [Function and non-function discounts]
+
+{-
+Note [Computing the size of an expression]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The basic idea of sizeExpr is obvious enough: count nodes.  But getting the
+heuristics right has taken a long time.  Here's the basic strategy:
+
+    * Variables, literals: 0
+      (Exception for string literals, see litSize.)
+
+    * Function applications (f e1 .. en): 1 + #value args
+
+    * Constructor applications: 1, regardless of #args
+
+    * Let(rec): 1 + size of components
+
+    * Note, cast: 0
+
+Examples
+
+  Size  Term
+  --------------
+    0     42#
+    0     x
+    0     True
+    2     f x
+    1     Just x
+    4     f (g x)
+
+Notice that 'x' counts 0, while (f x) counts 2.  That's deliberate: there's
+a function call to account for.  Notice also that constructor applications
+are very cheap, because exposing them to a caller is so valuable.
+
+[25/5/11] All sizes are now multiplied by 10, except for primops
+(which have sizes like 1 or 4.  This makes primops look fantastically
+cheap, and seems to be almost unversally beneficial.  Done partly as a
+result of #4978.
+
+Note [Do not inline top-level bottoming functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The FloatOut pass has gone to some trouble to float out calls to 'error'
+and similar friends.  See Note [Bottoming floats] in SetLevels.
+Do not re-inline them!  But we *do* still inline if they are very small
+(the uncondInline stuff).
+
+Note [INLINE for small functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider        {-# INLINE f #-}
+                f x = Just x
+                g y = f y
+Then f's RHS is no larger than its LHS, so we should inline it into
+even the most boring context.  In general, f the function is
+sufficiently small that its body is as small as the call itself, the
+inline unconditionally, regardless of how boring the context is.
+
+Things to note:
+
+(1) We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
+    than the thing it's replacing.  Notice that
+      (f x) --> (g 3)             -- YES, unconditionally
+      (f x) --> x : []            -- YES, *even though* there are two
+                                  --      arguments to the cons
+      x     --> g 3               -- NO
+      x     --> Just v            -- NO
+
+    It's very important not to unconditionally replace a variable by
+    a non-atomic term.
+
+(2) We do this even if the thing isn't saturated, else we end up with the
+    silly situation that
+       f x y = x
+       ...map (f 3)...
+    doesn't inline.  Even in a boring context, inlining without being
+    saturated will give a lambda instead of a PAP, and will be more
+    efficient at runtime.
+
+(3) However, when the function's arity > 0, we do insist that it
+    has at least one value argument at the call site.  (This check is
+    made in the UnfWhen case of callSiteInline.) Otherwise we find this:
+         f = /\a \x:a. x
+         d = /\b. MkD (f b)
+    If we inline f here we get
+         d = /\b. MkD (\x:b. x)
+    and then prepareRhs floats out the argument, abstracting the type
+    variables, so we end up with the original again!
+
+(4) We must be much more cautious about arity-zero things. Consider
+       let x = y +# z in ...
+    In *size* terms primops look very small, because the generate a
+    single instruction, but we do not want to unconditionally replace
+    every occurrence of x with (y +# z).  So we only do the
+    unconditional-inline thing for *trivial* expressions.
+
+    NB: you might think that PostInlineUnconditionally would do this
+    but it doesn't fire for top-level things; see SimplUtils
+    Note [Top level and postInlineUnconditionally]
+-}
+
+uncondInline :: CoreExpr -> Arity -> Int -> Bool
+-- Inline unconditionally if there no size increase
+-- Size of call is arity (+1 for the function)
+-- See Note [INLINE for small functions]
+uncondInline rhs arity size
+  | arity > 0 = size <= 10 * (arity + 1) -- See Note [INLINE for small functions] (1)
+  | otherwise = exprIsTrivial rhs        -- See Note [INLINE for small functions] (4)
+
+sizeExpr :: DynFlags
+         -> FastInt         -- Bomb out if it gets bigger than this
+         -> [Id]            -- Arguments; we're interested in which of these
+                            -- get case'd
+         -> CoreExpr
+         -> ExprSize
+
+-- Note [Computing the size of an expression]
+
+sizeExpr dflags bOMB_OUT_SIZE top_args expr
+  = size_up expr
+  where
+    size_up (Cast e _) = size_up e
+    size_up (Tick _ e) = size_up e
+    size_up (Type _)   = sizeZero           -- Types cost nothing
+    size_up (Coercion _) = sizeZero
+    size_up (Lit lit)  = sizeN (litSize lit)
+    size_up (Var f) | isRealWorldId f = sizeZero
+                      -- Make sure we get constructor discounts even
+                      -- on nullary constructors
+                    | otherwise       = size_up_call f [] 0
+
+    size_up (App fun arg)
+      | isTyCoArg arg = size_up fun
+      | otherwise     = size_up arg  `addSizeNSD`
+                        size_up_app fun [arg] (if isRealWorldExpr arg then 1 else 0)
+
+    size_up (Lam b e)
+      | isId b && not (isRealWorldId b) = lamScrutDiscount dflags (size_up e `addSizeN` 10)
+      | otherwise = size_up e
+
+    size_up (Let (NonRec binder rhs) body)
+      = size_up rhs             `addSizeNSD`
+        size_up body            `addSizeN`
+        (if isUnLiftedType (idType binder) then 0 else 10)
+                -- For the allocation
+                -- If the binder has an unlifted type there is no allocation
+
+    size_up (Let (Rec pairs) body)
+      = foldr (addSizeNSD . size_up . snd)
+              (size_up body `addSizeN` (10 * length pairs))     -- (length pairs) for the allocation
+              pairs
+
+    size_up (Case (Var v) _ _ alts)
+        | v `elem` top_args             -- We are scrutinising an argument variable
+        = alts_size (foldr addAltSize sizeZero alt_sizes)
+                    (foldr maxSize    sizeZero alt_sizes)
+                -- Good to inline if an arg is scrutinised, because
+                -- that may eliminate allocation in the caller
+                -- And it eliminates the case itself
+        where
+          alt_sizes = map size_up_alt alts
+
+                -- alts_size tries to compute a good discount for
+                -- the case when we are scrutinising an argument variable
+          alts_size (SizeIs tot tot_disc tot_scrut)  -- Size of all alternatives
+                    (SizeIs max _        _)          -- Size of biggest alternative
+                = SizeIs tot (unitBag (v, iBox (_ILIT(20) +# tot -# max)) `unionBags` tot_disc) tot_scrut
+                        -- If the variable is known, we produce a discount that
+                        -- will take us back to 'max', the size of the largest alternative
+                        -- The 1+ is a little discount for reduced allocation in the caller
+                        --
+                        -- Notice though, that we return tot_disc, the total discount from
+                        -- all branches.  I think that's right.
+
+          alts_size tot_size _ = tot_size
+
+    size_up (Case e _ _ alts) = size_up e  `addSizeNSD`
+                                foldr (addAltSize . size_up_alt) case_size alts
+      where
+          case_size
+           | is_inline_scrut e, not (lengthExceeds alts 1)  = sizeN (-10)
+           | otherwise = sizeZero
+                -- Normally we don't charge for the case itself, but
+                -- we charge one per alternative (see size_up_alt,
+                -- below) to account for the cost of the info table
+                -- and comparisons.
+                --
+                -- However, in certain cases (see is_inline_scrut
+                -- below), no code is generated for the case unless
+                -- there are multiple alts.  In these cases we
+                -- subtract one, making the first alt free.
+                -- e.g. case x# +# y# of _ -> ...   should cost 1
+                --      case touch# x# of _ -> ...  should cost 0
+                -- (see #4978)
+                --
+                -- I would like to not have the "not (lengthExceeds alts 1)"
+                -- condition above, but without that some programs got worse
+                -- (spectral/hartel/event and spectral/para).  I don't fully
+                -- understand why. (SDM 24/5/11)
+
+                -- unboxed variables, inline primops and unsafe foreign calls
+                -- are all "inline" things:
+          is_inline_scrut (Var v) = isUnLiftedType (idType v)
+          is_inline_scrut scrut
+              | (Var f, _) <- collectArgs scrut
+                = case idDetails f of
+                    FCallId fc  -> not (isSafeForeignCall fc)
+                    PrimOpId op -> not (primOpOutOfLine op)
+                    _other      -> False
+              | otherwise
+                = False
+
+    ------------
+    -- size_up_app is used when there's ONE OR MORE value args
+    size_up_app (App fun arg) args voids
+        | isTyCoArg arg                  = size_up_app fun args voids
+        | isRealWorldExpr arg            = size_up_app fun (arg:args) (voids + 1)
+        | otherwise                      = size_up arg  `addSizeNSD`
+                                           size_up_app fun (arg:args) voids
+    size_up_app (Var fun)     args voids = size_up_call fun args voids
+    size_up_app other         args voids = size_up other `addSizeN` (length args - voids)
+
+    ------------
+    size_up_call :: Id -> [CoreExpr] -> Int -> ExprSize
+    size_up_call fun val_args voids
+       = case idDetails fun of
+           FCallId _        -> sizeN (10 * (1 + length val_args))
+           DataConWorkId dc -> conSize    dc (length val_args)
+           PrimOpId op      -> primOpSize op (length val_args)
+           ClassOpId _      -> classOpSize dflags top_args val_args
+           _                -> funSize dflags top_args fun (length val_args) voids
+
+    ------------
+    size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 10
+        -- Don't charge for args, so that wrappers look cheap
+        -- (See comments about wrappers with Case)
+        --
+        -- IMPORATANT: *do* charge 1 for the alternative, else we
+        -- find that giant case nests are treated as practically free
+        -- A good example is Foreign.C.Error.errrnoToIOError
+
+    ------------
+        -- These addSize things have to be here because
+        -- I don't want to give them bOMB_OUT_SIZE as an argument
+    addSizeN TooBig          _  = TooBig
+    addSizeN (SizeIs n xs d) m  = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d
+
+        -- addAltSize is used to add the sizes of case alternatives
+    addAltSize TooBig            _      = TooBig
+    addAltSize _                 TooBig = TooBig
+    addAltSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
+        = mkSizeIs bOMB_OUT_SIZE (n1 +# n2)
+                                 (xs `unionBags` ys)
+                                 (d1 +# d2)   -- Note [addAltSize result discounts]
+
+        -- This variant ignores the result discount from its LEFT argument
+        -- It's used when the second argument isn't part of the result
+    addSizeNSD TooBig            _      = TooBig
+    addSizeNSD _                 TooBig = TooBig
+    addSizeNSD (SizeIs n1 xs _) (SizeIs n2 ys d2)
+        = mkSizeIs bOMB_OUT_SIZE (n1 +# n2)
+                                 (xs `unionBags` ys)
+                                 d2  -- Ignore d1
+
+    isRealWorldId id = idType id `eqType` realWorldStatePrimTy
+
+    -- an expression of type State# RealWorld must be a variable
+    isRealWorldExpr (Var id) = isRealWorldId id
+    isRealWorldExpr _        = False
+
+-- | Finds a nominal size of a string literal.
+litSize :: Literal -> Int
+-- Used by CoreUnfold.sizeExpr
+litSize (LitInteger {}) = 100   -- Note [Size of literal integers]
+litSize (MachStr str)   = 10 + 10 * ((BS.length str + 3) `div` 4)
+        -- If size could be 0 then @f "x"@ might be too small
+        -- [Sept03: make literal strings a bit bigger to avoid fruitless
+        --  duplication of little strings]
+litSize _other = 0    -- Must match size of nullary constructors
+                      -- Key point: if  x |-> 4, then x must inline unconditionally
+                      --            (eg via case binding)
+
+classOpSize :: DynFlags -> [Id] -> [CoreExpr] -> ExprSize
+-- See Note [Conlike is interesting]
+classOpSize _ _ []
+  = sizeZero
+classOpSize dflags top_args (arg1 : other_args)
+  = SizeIs (iUnbox size) arg_discount (_ILIT(0))
+  where
+    size = 20 + (10 * length other_args)
+    -- If the class op is scrutinising a lambda bound dictionary then
+    -- give it a discount, to encourage the inlining of this function
+    -- The actual discount is rather arbitrarily chosen
+    arg_discount = case arg1 of
+                     Var dict | dict `elem` top_args
+                              -> unitBag (dict, ufDictDiscount dflags)
+                     _other   -> emptyBag
+
+funSize :: DynFlags -> [Id] -> Id -> Int -> Int -> ExprSize
+-- Size for functions that are not constructors or primops
+-- Note [Function applications]
+funSize dflags top_args fun n_val_args voids
+  | fun `hasKey` buildIdKey   = buildSize
+  | fun `hasKey` augmentIdKey = augmentSize
+  | otherwise = SizeIs (iUnbox size) arg_discount (iUnbox res_discount)
+  where
+    some_val_args = n_val_args > 0
+
+    size | some_val_args = 10 * (1 + n_val_args - voids)
+         | otherwise     = 0
+        -- The 1+ is for the function itself
+        -- Add 1 for each non-trivial arg;
+        -- the allocation cost, as in let(rec)
+
+        --                  DISCOUNTS
+        --  See Note [Function and non-function discounts]
+    arg_discount | some_val_args && fun `elem` top_args
+                 = unitBag (fun, ufFunAppDiscount dflags)
+                 | otherwise = emptyBag
+        -- If the function is an argument and is applied
+        -- to some values, give it an arg-discount
+
+    res_discount | idArity fun > n_val_args = ufFunAppDiscount dflags
+                 | otherwise                = 0
+        -- If the function is partially applied, show a result discount
+
+conSize :: DataCon -> Int -> ExprSize
+conSize dc n_val_args
+  | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(10))    -- Like variables
+
+-- See Note [Unboxed tuple size and result discount]
+  | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox (10 * (1 + n_val_args)))
+
+-- See Note [Constructor size and result discount]
+  | otherwise = SizeIs (_ILIT(10)) emptyBag (iUnbox (10 * (1 + n_val_args)))
+
+{-
+Note [Constructor size and result discount]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Treat a constructors application as size 10, regardless of how many
+arguments it has; we are keen to expose them (and we charge separately
+for their args).  We can't treat them as size zero, else we find that
+(Just x) has size 0, which is the same as a lone variable; and hence
+'v' will always be replaced by (Just x), where v is bound to Just x.
+
+The "result discount" is applied if the result of the call is
+scrutinised (say by a case).  For a constructor application that will
+mean the constructor application will disappear, so we don't need to
+charge it to the function.  So the discount should at least match the
+cost of the constructor application, namely 10.  But to give a bit
+of extra incentive we give a discount of 10*(1 + n_val_args).
+
+Simon M tried a MUCH bigger discount: (10 * (10 + n_val_args)),
+and said it was an "unambiguous win", but its terribly dangerous
+because a fuction with many many case branches, each finishing with
+a constructor, can have an arbitrarily large discount.  This led to
+terrible code bloat: see Trac #6099.
+
+Note [Unboxed tuple size and result discount]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+However, unboxed tuples count as size zero. I found occasions where we had
+        f x y z = case op# x y z of { s -> (# s, () #) }
+and f wasn't getting inlined.
+
+I tried giving unboxed tuples a *result discount* of zero (see the
+commented-out line).  Why?  When returned as a result they do not
+allocate, so maybe we don't want to charge so much for them If you
+have a non-zero discount here, we find that workers often get inlined
+back into wrappers, because it look like
+    f x = case $wf x of (# a,b #) -> (a,b)
+and we are keener because of the case.  However while this change
+shrank binary sizes by 0.5% it also made spectral/boyer allocate 5%
+more. All other changes were very small. So it's not a big deal but I
+didn't adopt the idea.
+
+Note [Function and non-function discounts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We want a discount if the function is applied. A good example is
+monadic combinators with continuation arguments, where inlining is
+quite important.
+
+But we don't want a big discount when a function is called many times
+(see the detailed comments with Trac #6048) because if the function is
+big it won't be inlined at its many call sites and no benefit results.
+Indeed, we can get exponentially big inlinings this way; that is what
+Trac #6048 is about.
+
+On the other hand, for data-valued arguments, if there are lots of
+case expressions in the body, each one will get smaller if we apply
+the function to a constructor application, so we *want* a big discount
+if the argument is scrutinised by many case expressions.
+
+Conclusion:
+  - For functions, take the max of the discounts
+  - For data values, take the sum of the discounts
+
+
+Note [Literal integer size]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Literal integers *can* be big (mkInteger [...coefficients...]), but
+need not be (S# n).  We just use an aribitrary big-ish constant here
+so that, in particular, we don't inline top-level defns like
+   n = S# 5
+There's no point in doing so -- any optimisations will see the S#
+through n's unfolding.  Nor will a big size inhibit unfoldings functions
+that mention a literal Integer, because the float-out pass will float
+all those constants to top level.
+-}
+
+primOpSize :: PrimOp -> Int -> ExprSize
+primOpSize op n_val_args
+ = if primOpOutOfLine op
+      then sizeN (op_size + n_val_args)
+      else sizeN op_size
+ where
+   op_size = primOpCodeSize op
+
+
+buildSize :: ExprSize
+buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(40))
+        -- We really want to inline applications of build
+        -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
+        -- Indeed, we should add a result_discount becuause build is
+        -- very like a constructor.  We don't bother to check that the
+        -- build is saturated (it usually is).  The "-2" discounts for the \c n,
+        -- The "4" is rather arbitrary.
+
+augmentSize :: ExprSize
+augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(40))
+        -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
+        -- e plus ys. The -2 accounts for the \cn
+
+-- When we return a lambda, give a discount if it's used (applied)
+lamScrutDiscount :: DynFlags -> ExprSize -> ExprSize
+lamScrutDiscount dflags (SizeIs n vs _) = SizeIs n vs (iUnbox (ufFunAppDiscount dflags))
+lamScrutDiscount _      TooBig          = TooBig
+
+{-
+Note [addAltSize result discounts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When adding the size of alternatives, we *add* the result discounts
+too, rather than take the *maximum*.  For a multi-branch case, this
+gives a discount for each branch that returns a constructor, making us
+keener to inline.  I did try using 'max' instead, but it makes nofib
+'rewrite' and 'puzzle' allocate significantly more, and didn't make
+binary sizes shrink significantly either.
+
+Note [Discounts and thresholds]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Constants for discounts and thesholds are defined in main/DynFlags,
+all of form ufXxxx.   They are:
+
+ufCreationThreshold
+     At a definition site, if the unfolding is bigger than this, we
+     may discard it altogether
+
+ufUseThreshold
+     At a call site, if the unfolding, less discounts, is smaller than
+     this, then it's small enough inline
+
+ufKeenessFactor
+     Factor by which the discounts are multiplied before
+     subtracting from size
+
+ufDictDiscount
+     The discount for each occurrence of a dictionary argument
+     as an argument of a class method.  Should be pretty small
+     else big functions may get inlined
+
+ufFunAppDiscount
+     Discount for a function argument that is applied.  Quite
+     large, because if we inline we avoid the higher-order call.
+
+ufDearOp
+     The size of a foreign call or not-dupable PrimOp
+
+
+Note [Function applications]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In a function application (f a b)
+
+  - If 'f' is an argument to the function being analysed,
+    and there's at least one value arg, record a FunAppDiscount for f
+
+  - If the application if a PAP (arity > 2 in this example)
+    record a *result* discount (because inlining
+    with "extra" args in the call may mean that we now
+    get a saturated application)
+
+Code for manipulating sizes
+-}
+
+data ExprSize = TooBig
+              | SizeIs FastInt          -- Size found
+                       !(Bag (Id,Int))  -- Arguments cased herein, and discount for each such
+                       FastInt          -- Size to subtract if result is scrutinised
+                                        -- by a case expression
+
+instance Outputable ExprSize where
+  ppr TooBig         = ptext (sLit "TooBig")
+  ppr (SizeIs a _ c) = brackets (int (iBox a) <+> int (iBox c))
+
+-- subtract the discount before deciding whether to bale out. eg. we
+-- want to inline a large constructor application into a selector:
+--      tup = (a_1, ..., a_99)
+--      x = case tup of ...
+--
+mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize
+mkSizeIs max n xs d | (n -# d) ># max = TooBig
+                    | otherwise       = SizeIs n xs d
+
+maxSize :: ExprSize -> ExprSize -> ExprSize
+maxSize TooBig         _                                  = TooBig
+maxSize _              TooBig                             = TooBig
+maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2  = s1
+                                              | otherwise = s2
+
+sizeZero :: ExprSize
+sizeN :: Int -> ExprSize
+
+sizeZero = SizeIs (_ILIT(0))  emptyBag (_ILIT(0))
+sizeN n  = SizeIs (iUnbox n) emptyBag (_ILIT(0))
+
+{-
+************************************************************************
+*                                                                      *
+\subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
+*                                                                      *
+************************************************************************
+
+We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
+we ``couldn't possibly use'' on the other side.  Can be overridden w/
+flaggery.  Just the same as smallEnoughToInline, except that it has no
+actual arguments.
+-}
+
+couldBeSmallEnoughToInline :: DynFlags -> Int -> CoreExpr -> Bool
+couldBeSmallEnoughToInline dflags threshold rhs
+  = case sizeExpr dflags (iUnbox threshold) [] body of
+       TooBig -> False
+       _      -> True
+  where
+    (_, body) = collectBinders rhs
+
+----------------
+smallEnoughToInline :: DynFlags -> Unfolding -> Bool
+smallEnoughToInline dflags (CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_size = size}})
+  = size <= ufUseThreshold dflags
+smallEnoughToInline _ _
+  = False
+
+----------------
+certainlyWillInline :: DynFlags -> Unfolding -> Maybe Unfolding
+-- Sees if the unfolding is pretty certain to inline
+-- If so, return a *stable* unfolding for it, that will always inline
+certainlyWillInline dflags unf@(CoreUnfolding { uf_guidance = guidance, uf_tmpl = expr })
+  = case guidance of
+      UnfNever   -> Nothing
+      UnfWhen {} -> Just (unf { uf_src = InlineStable })
+
+      -- The UnfIfGoodArgs case seems important.  If we w/w small functions
+      -- binary sizes go up by 10%!  (This is with SplitObjs.)  I'm not totally
+      -- sure whyy.
+      UnfIfGoodArgs { ug_size = size, ug_args = args }
+         | not (null args)  -- See Note [certainlyWillInline: be careful of thunks]
+         , let arity = length args
+         , size - (10 * (arity + 1)) <= ufUseThreshold dflags
+         -> Just (unf { uf_src      = InlineStable
+                      , uf_guidance = UnfWhen { ug_arity     = arity
+                                              , ug_unsat_ok  = unSaturatedOk
+                                              , ug_boring_ok = inlineBoringOk expr } })
+                -- Note the "unsaturatedOk". A function like  f = \ab. a
+                -- will certainly inline, even if partially applied (f e), so we'd
+                -- better make sure that the transformed inlining has the same property
+
+      _  -> Nothing
+
+certainlyWillInline _ unf@(DFunUnfolding {})
+  = Just unf
+
+certainlyWillInline _ _
+  = Nothing
+
+{-
+Note [certainlyWillInline: be careful of thunks]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Don't claim that thunks will certainly inline, because that risks work
+duplication.  Even if the work duplication is not great (eg is_cheap
+holds), it can make a big difference in an inner loop In Trac #5623 we
+found that the WorkWrap phase thought that
+       y = case x of F# v -> F# (v +# v)
+was certainlyWillInline, so the addition got duplicated.
+
+
+************************************************************************
+*                                                                      *
+\subsection{callSiteInline}
+*                                                                      *
+************************************************************************
+
+This is the key function.  It decides whether to inline a variable at a call site
+
+callSiteInline is used at call sites, so it is a bit more generous.
+It's a very important function that embodies lots of heuristics.
+A non-WHNF can be inlined if it doesn't occur inside a lambda,
+and occurs exactly once or
+    occurs once in each branch of a case and is small
+
+If the thing is in WHNF, there's no danger of duplicating work,
+so we can inline if it occurs once, or is small
+
+NOTE: we don't want to inline top-level functions that always diverge.
+It just makes the code bigger.  Tt turns out that the convenient way to prevent
+them inlining is to give them a NOINLINE pragma, which we do in
+StrictAnal.addStrictnessInfoToTopId
+-}
+
+callSiteInline :: DynFlags
+               -> Id                    -- The Id
+               -> Bool                  -- True <=> unfolding is active
+               -> Bool                  -- True if there are are no arguments at all (incl type args)
+               -> [ArgSummary]          -- One for each value arg; True if it is interesting
+               -> CallCtxt              -- True <=> continuation is interesting
+               -> Maybe CoreExpr        -- Unfolding, if any
+
+instance Outputable ArgSummary where
+  ppr TrivArg    = ptext (sLit "TrivArg")
+  ppr NonTrivArg = ptext (sLit "NonTrivArg")
+  ppr ValueArg   = ptext (sLit "ValueArg")
+
+data CallCtxt
+  = BoringCtxt
+  | RhsCtxt             -- Rhs of a let-binding; see Note [RHS of lets]
+  | DiscArgCtxt         -- Argument of a fuction with non-zero arg discount
+  | RuleArgCtxt         -- We are somewhere in the argument of a function with rules
+
+  | ValAppCtxt          -- We're applied to at least one value arg
+                        -- This arises when we have ((f x |> co) y)
+                        -- Then the (f x) has argument 'x' but in a ValAppCtxt
+
+  | CaseCtxt            -- We're the scrutinee of a case
+                        -- that decomposes its scrutinee
+
+instance Outputable CallCtxt where
+  ppr CaseCtxt    = ptext (sLit "CaseCtxt")
+  ppr ValAppCtxt  = ptext (sLit "ValAppCtxt")
+  ppr BoringCtxt  = ptext (sLit "BoringCtxt")
+  ppr RhsCtxt     = ptext (sLit "RhsCtxt")
+  ppr DiscArgCtxt = ptext (sLit "DiscArgCtxt")
+  ppr RuleArgCtxt = ptext (sLit "RuleArgCtxt")
+
+callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info
+  = case idUnfolding id of
+      -- idUnfolding checks for loop-breakers, returning NoUnfolding
+      -- Things with an INLINE pragma may have an unfolding *and*
+      -- be a loop breaker  (maybe the knot is not yet untied)
+        CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top
+                      , uf_is_work_free = is_wf
+                      , uf_guidance = guidance, uf_expandable = is_exp }
+          | active_unfolding -> tryUnfolding dflags id lone_variable
+                                    arg_infos cont_info unf_template is_top
+                                    is_wf is_exp guidance
+          | otherwise -> traceInline dflags "Inactive unfolding:" (ppr id) Nothing
+        NoUnfolding      -> Nothing
+        OtherCon {}      -> Nothing
+        DFunUnfolding {} -> Nothing     -- Never unfold a DFun
+
+traceInline :: DynFlags -> String -> SDoc -> a -> a
+traceInline dflags str doc result
+ | dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags
+ = pprTrace str doc result
+ | otherwise
+ = result
+
+tryUnfolding :: DynFlags -> Id -> Bool -> [ArgSummary] -> CallCtxt
+             -> CoreExpr -> Bool -> Bool -> Bool -> UnfoldingGuidance
+             -> Maybe CoreExpr
+tryUnfolding dflags id lone_variable
+             arg_infos cont_info unf_template is_top
+             is_wf is_exp guidance
+ = case guidance of
+     UnfNever -> traceInline dflags str (ptext (sLit "UnfNever")) Nothing
+
+     UnfWhen { ug_arity = uf_arity, ug_unsat_ok = unsat_ok, ug_boring_ok = boring_ok }
+        | enough_args && (boring_ok || some_benefit)
+                -- See Note [INLINE for small functions (3)]
+        -> traceInline dflags str (mk_doc some_benefit empty True) (Just unf_template)
+        | otherwise
+        -> traceInline dflags str (mk_doc some_benefit empty False) Nothing
+        where
+          some_benefit = calc_some_benefit uf_arity
+          enough_args = (n_val_args >= uf_arity) || (unsat_ok && n_val_args > 0)
+
+     UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
+        | is_wf && some_benefit && small_enough
+        -> traceInline dflags str (mk_doc some_benefit extra_doc True) (Just unf_template)
+        | otherwise
+        -> traceInline dflags str (mk_doc some_benefit extra_doc False) Nothing
+        where
+          some_benefit = calc_some_benefit (length arg_discounts)
+          extra_doc = text "discounted size =" <+> int discounted_size
+          discounted_size = size - discount
+          small_enough = discounted_size <= ufUseThreshold dflags
+          discount = computeDiscount dflags arg_discounts
+                                     res_discount arg_infos cont_info
+
+  where
+    mk_doc some_benefit extra_doc yes_or_no
+      = vcat [ text "arg infos" <+> ppr arg_infos
+             , text "interesting continuation" <+> ppr cont_info
+             , text "some_benefit" <+> ppr some_benefit
+             , text "is exp:" <+> ppr is_exp
+             , text "is work-free:" <+> ppr is_wf
+             , text "guidance" <+> ppr guidance
+             , extra_doc
+             , text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]
+
+    str = "Considering inlining: " ++ showSDocDump dflags (ppr id)
+    n_val_args = length arg_infos
+
+           -- some_benefit is used when the RHS is small enough
+           -- and the call has enough (or too many) value
+           -- arguments (ie n_val_args >= arity). But there must
+           -- be *something* interesting about some argument, or the
+           -- result context, to make it worth inlining
+    calc_some_benefit :: Arity -> Bool   -- The Arity is the number of args
+                                         -- expected by the unfolding
+    calc_some_benefit uf_arity
+       | not saturated = interesting_args       -- Under-saturated
+                                        -- Note [Unsaturated applications]
+       | otherwise = interesting_args   -- Saturated or over-saturated
+                  || interesting_call
+      where
+        saturated      = n_val_args >= uf_arity
+        over_saturated = n_val_args > uf_arity
+        interesting_args = any nonTriv arg_infos
+                -- NB: (any nonTriv arg_infos) looks at the
+                -- over-saturated args too which is "wrong";
+                -- but if over-saturated we inline anyway.
+
+        interesting_call
+          | over_saturated
+          = True
+          | otherwise
+          = case cont_info of
+              CaseCtxt   -> not (lone_variable && is_wf)  -- Note [Lone variables]
+              ValAppCtxt -> True                              -- Note [Cast then apply]
+              RuleArgCtxt -> uf_arity > 0  -- See Note [Unfold info lazy contexts]
+              DiscArgCtxt -> uf_arity > 0  --
+              RhsCtxt     -> uf_arity > 0  --
+              _           -> not is_top && uf_arity > 0   -- Note [Nested functions]
+                                                      -- Note [Inlining in ArgCtxt]
+
+{-
+Note [Unfold into lazy contexts], Note [RHS of lets]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When the call is the argument of a function with a RULE, or the RHS of a let,
+we are a little bit keener to inline.  For example
+     f y = (y,y,y)
+     g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
+We'd inline 'f' if the call was in a case context, and it kind-of-is,
+only we can't see it.  Also
+     x = f v
+could be expensive whereas
+     x = case v of (a,b) -> a
+is patently cheap and may allow more eta expansion.
+So we treat the RHS of a let as not-totally-boring.
+
+Note [Unsaturated applications]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When a call is not saturated, we *still* inline if one of the
+arguments has interesting structure.  That's sometimes very important.
+A good example is the Ord instance for Bool in Base:
+
+ Rec {
+    $fOrdBool =GHC.Classes.D:Ord
+                 @ Bool
+                 ...
+                 $cmin_ajX
+
+    $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool
+    $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool
+  }
+
+But the defn of GHC.Classes.$dmmin is:
+
+  $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a
+    {- Arity: 3, HasNoCafRefs, Strictness: SLL,
+       Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
+                   case @ a GHC.Classes.<= @ a $dOrd x y of wild {
+                     GHC.Types.False -> y GHC.Types.True -> x }) -}
+
+We *really* want to inline $dmmin, even though it has arity 3, in
+order to unravel the recursion.
+
+
+Note [Things to watch]
+~~~~~~~~~~~~~~~~~~~~~~
+*   { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
+    Assume x is exported, so not inlined unconditionally.
+    Then we want x to inline unconditionally; no reason for it
+    not to, and doing so avoids an indirection.
+
+*   { x = I# 3; ....f x.... }
+    Make sure that x does not inline unconditionally!
+    Lest we get extra allocation.
+
+Note [Inlining an InlineRule]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+An InlineRules is used for
+  (a) programmer INLINE pragmas
+  (b) inlinings from worker/wrapper
+
+For (a) the RHS may be large, and our contract is that we *only* inline
+when the function is applied to all the arguments on the LHS of the
+source-code defn.  (The uf_arity in the rule.)
+
+However for worker/wrapper it may be worth inlining even if the
+arity is not satisfied (as we do in the CoreUnfolding case) so we don't
+require saturation.
+
+
+Note [Nested functions]
+~~~~~~~~~~~~~~~~~~~~~~~
+If a function has a nested defn we also record some-benefit, on the
+grounds that we are often able to eliminate the binding, and hence the
+allocation, for the function altogether; this is good for join points.
+But this only makes sense for *functions*; inlining a constructor
+doesn't help allocation unless the result is scrutinised.  UNLESS the
+constructor occurs just once, albeit possibly in multiple case
+branches.  Then inlining it doesn't increase allocation, but it does
+increase the chance that the constructor won't be allocated at all in
+the branches that don't use it.
+
+Note [Cast then apply]
+~~~~~~~~~~~~~~~~~~~~~~
+Consider
+   myIndex = __inline_me ( (/\a. <blah>) |> co )
+   co :: (forall a. a -> a) ~ (forall a. T a)
+     ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
+
+We need to inline myIndex to unravel this; but the actual call (myIndex a) has
+no value arguments.  The ValAppCtxt gives it enough incentive to inline.
+
+Note [Inlining in ArgCtxt]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+The condition (arity > 0) here is very important, because otherwise
+we end up inlining top-level stuff into useless places; eg
+   x = I# 3#
+   f = \y.  g x
+This can make a very big difference: it adds 16% to nofib 'integer' allocs,
+and 20% to 'power'.
+
+At one stage I replaced this condition by 'True' (leading to the above
+slow-down).  The motivation was test eyeball/inline1.hs; but that seems
+to work ok now.
+
+NOTE: arguably, we should inline in ArgCtxt only if the result of the
+call is at least CONLIKE.  At least for the cases where we use ArgCtxt
+for the RHS of a 'let', we only profit from the inlining if we get a
+CONLIKE thing (modulo lets).
+
+Note [Lone variables]   See also Note [Interaction of exprIsWorkFree and lone variables]
+~~~~~~~~~~~~~~~~~~~~~   which appears below
+The "lone-variable" case is important.  I spent ages messing about
+with unsatisfactory varaints, but this is nice.  The idea is that if a
+variable appears all alone
+
+        as an arg of lazy fn, or rhs    BoringCtxt
+        as scrutinee of a case          CaseCtxt
+        as arg of a fn                  ArgCtxt
+AND
+        it is bound to a cheap expression
+
+then we should not inline it (unless there is some other reason,
+e.g. is is the sole occurrence).  That is what is happening at
+the use of 'lone_variable' in 'interesting_call'.
+
+Why?  At least in the case-scrutinee situation, turning
+        let x = (a,b) in case x of y -> ...
+into
+        let x = (a,b) in case (a,b) of y -> ...
+and thence to
+        let x = (a,b) in let y = (a,b) in ...
+is bad if the binding for x will remain.
+
+Another example: I discovered that strings
+were getting inlined straight back into applications of 'error'
+because the latter is strict.
+        s = "foo"
+        f = \x -> ...(error s)...
+
+Fundamentally such contexts should not encourage inlining because the
+context can ``see'' the unfolding of the variable (e.g. case or a
+RULE) so there's no gain.  If the thing is bound to a value.
+
+However, watch out:
+
+ * Consider this:
+        foo = _inline_ (\n. [n])
+        bar = _inline_ (foo 20)
+        baz = \n. case bar of { (m:_) -> m + n }
+   Here we really want to inline 'bar' so that we can inline 'foo'
+   and the whole thing unravels as it should obviously do.  This is
+   important: in the NDP project, 'bar' generates a closure data
+   structure rather than a list.
+
+   So the non-inlining of lone_variables should only apply if the
+   unfolding is regarded as cheap; because that is when exprIsConApp_maybe
+   looks through the unfolding.  Hence the "&& is_wf" in the
+   InlineRule branch.
+
+ * Even a type application or coercion isn't a lone variable.
+   Consider
+        case $fMonadST @ RealWorld of { :DMonad a b c -> c }
+   We had better inline that sucker!  The case won't see through it.
+
+   For now, I'm treating treating a variable applied to types
+   in a *lazy* context "lone". The motivating example was
+        f = /\a. \x. BIG
+        g = /\a. \y.  h (f a)
+   There's no advantage in inlining f here, and perhaps
+   a significant disadvantage.  Hence some_val_args in the Stop case
+
+Note [Interaction of exprIsWorkFree and lone variables]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The lone-variable test says "don't inline if a case expression
+scrutines a lone variable whose unfolding is cheap".  It's very
+important that, under these circumstances, exprIsConApp_maybe
+can spot a constructor application. So, for example, we don't
+consider
+        let x = e in (x,x)
+to be cheap, and that's good because exprIsConApp_maybe doesn't
+think that expression is a constructor application.
+
+In the 'not (lone_variable && is_wf)' test, I used to test is_value
+rather than is_wf, which was utterly wrong, because the above
+expression responds True to exprIsHNF, which is what sets is_value.
+
+This kind of thing can occur if you have
+
+        {-# INLINE foo #-}
+        foo = let x = e in (x,x)
+
+which Roman did.
+-}
+
+computeDiscount :: DynFlags -> [Int] -> Int -> [ArgSummary] -> CallCtxt
+                -> Int
+computeDiscount dflags arg_discounts res_discount arg_infos cont_info
+        -- We multiple the raw discounts (args_discount and result_discount)
+        -- ty opt_UnfoldingKeenessFactor because the former have to do with
+        --  *size* whereas the discounts imply that there's some extra
+        --  *efficiency* to be gained (e.g. beta reductions, case reductions)
+        -- by inlining.
+
+  = 10          -- Discount of 10 because the result replaces the call
+                -- so we count 10 for the function itself
+
+    + 10 * length actual_arg_discounts
+               -- Discount of 10 for each arg supplied,
+               -- because the result replaces the call
+
+    + round (ufKeenessFactor dflags *
+             fromIntegral (total_arg_discount + res_discount'))
+  where
+    actual_arg_discounts = zipWith mk_arg_discount arg_discounts arg_infos
+    total_arg_discount   = sum actual_arg_discounts
+
+    mk_arg_discount _        TrivArg    = 0
+    mk_arg_discount _        NonTrivArg = 10
+    mk_arg_discount discount ValueArg   = discount
+
+    res_discount'
+      | LT <- arg_discounts `compareLength` arg_infos
+      = res_discount   -- Over-saturated
+      | otherwise
+      = case cont_info of
+                        BoringCtxt  -> 0
+                        CaseCtxt    -> res_discount  -- Presumably a constructor
+                        ValAppCtxt  -> res_discount  -- Presumably a function
+                        _           -> 40 `min` res_discount
+                -- ToDo: this 40 `min` res_discount doesn't seem right
+                --   for DiscArgCtxt it shouldn't matter because the function will
+                --    get the arg discount for any non-triv arg
+                --   for RuleArgCtxt we do want to be keener to inline; but not only
+                --    constructor results
+                --   for RhsCtxt I suppose that exposing a data con is good in general
+                --   And 40 seems very arbitrary
+                --
+                -- res_discount can be very large when a function returns
+                -- constructors; but we only want to invoke that large discount
+                -- when there's a case continuation.
+                -- Otherwise we, rather arbitrarily, threshold it.  Yuk.
+                -- But we want to aovid inlining large functions that return
+                -- constructors into contexts that are simply "interesting"
+
+{-
+************************************************************************
+*                                                                      *
+        Interesting arguments
+*                                                                      *
+************************************************************************
+
+Note [Interesting arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+An argument is interesting if it deserves a discount for unfoldings
+with a discount in that argument position.  The idea is to avoid
+unfolding a function that is applied only to variables that have no
+unfolding (i.e. they are probably lambda bound): f x y z There is
+little point in inlining f here.
+
+Generally, *values* (like (C a b) and (\x.e)) deserve discounts.  But
+we must look through lets, eg (let x = e in C a b), because the let will
+float, exposing the value, if we inline.  That makes it different to
+exprIsHNF.
+
+Before 2009 we said it was interesting if the argument had *any* structure
+at all; i.e. (hasSomeUnfolding v).  But does too much inlining; see Trac #3016.
+
+But we don't regard (f x y) as interesting, unless f is unsaturated.
+If it's saturated and f hasn't inlined, then it's probably not going
+to now!
+
+Note [Conlike is interesting]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+        f d = ...((*) d x y)...
+        ... f (df d')...
+where df is con-like. Then we'd really like to inline 'f' so that the
+rule for (*) (df d) can fire.  To do this
+  a) we give a discount for being an argument of a class-op (eg (*) d)
+  b) we say that a con-like argument (eg (df d)) is interesting
+-}
+
+data ArgSummary = TrivArg       -- Nothing interesting
+                | NonTrivArg    -- Arg has structure
+                | ValueArg      -- Arg is a con-app or PAP
+                                -- ..or con-like. Note [Conlike is interesting]
+
+interestingArg :: CoreExpr -> ArgSummary
+-- See Note [Interesting arguments]
+interestingArg e = go e 0
+  where
+    -- n is # value args to which the expression is applied
+    go (Lit {}) _          = ValueArg
+    go (Var v)  n
+       | isConLikeId v     = ValueArg   -- Experimenting with 'conlike' rather that
+                                        --    data constructors here
+       | idArity v > n     = ValueArg   -- Catches (eg) primops with arity but no unfolding
+       | n > 0             = NonTrivArg -- Saturated or unknown call
+       | conlike_unfolding = ValueArg   -- n==0; look for an interesting unfolding
+                                        -- See Note [Conlike is interesting]
+       | otherwise         = TrivArg    -- n==0, no useful unfolding
+       where
+         conlike_unfolding = isConLikeUnfolding (idUnfolding v)
+
+    go (Type _)          _ = TrivArg
+    go (Coercion _)      _ = TrivArg
+    go (App fn (Type _)) n = go fn n
+    go (App fn (Coercion _)) n = go fn n
+    go (App fn _)        n = go fn (n+1)
+    go (Tick _ a)      n = go a n
+    go (Cast e _)        n = go e n
+    go (Lam v e)         n
+       | isTyVar v         = go e n
+       | n>0               = go e (n-1)
+       | otherwise         = ValueArg
+    go (Let _ e)         n = case go e n of { ValueArg -> ValueArg; _ -> NonTrivArg }
+    go (Case {})         _ = NonTrivArg
+
+nonTriv ::  ArgSummary -> Bool
+nonTriv TrivArg = False
+nonTriv _       = True