summaryrefslogtreecommitdiff
path: root/compiler/coreSyn/CoreUnfold.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/coreSyn/CoreUnfold.hs')
-rw-r--r--compiler/coreSyn/CoreUnfold.hs1432
1 files changed, 1432 insertions, 0 deletions
diff --git a/compiler/coreSyn/CoreUnfold.hs b/compiler/coreSyn/CoreUnfold.hs
new file mode 100644
index 0000000000..dc9f95e73a
--- /dev/null
+++ b/compiler/coreSyn/CoreUnfold.hs
@@ -0,0 +1,1432 @@
+{-
+(c) The University of Glasgow 2006
+(c) The AQUA Project, Glasgow University, 1994-1998
+
+
+Core-syntax unfoldings
+
+Unfoldings (which can travel across module boundaries) are in Core
+syntax (namely @CoreExpr@s).
+
+The type @Unfolding@ sits ``above'' simply-Core-expressions
+unfoldings, capturing ``higher-level'' things we know about a binding,
+usually things that the simplifier found out (e.g., ``it's a
+literal''). In the corner of a @CoreUnfolding@ unfolding, you will
+find, unsurprisingly, a Core expression.
+-}
+
+{-# LANGUAGE CPP #-}
+
+module CoreUnfold (
+ Unfolding, UnfoldingGuidance, -- Abstract types
+
+ noUnfolding, mkImplicitUnfolding,
+ mkUnfolding, mkCoreUnfolding,
+ mkTopUnfolding, mkSimpleUnfolding, mkWorkerUnfolding,
+ mkInlineUnfolding, mkInlinableUnfolding, mkWwInlineRule,
+ mkCompulsoryUnfolding, mkDFunUnfolding,
+ specUnfolding,
+
+ interestingArg, ArgSummary(..),
+
+ couldBeSmallEnoughToInline, inlineBoringOk,
+ certainlyWillInline, smallEnoughToInline,
+
+ callSiteInline, CallCtxt(..),
+
+ -- Reexport from CoreSubst (it only live there so it can be used
+ -- by the Very Simple Optimiser)
+ exprIsConApp_maybe, exprIsLiteral_maybe
+ ) where
+
+#include "HsVersions.h"
+
+import DynFlags
+import CoreSyn
+import PprCore () -- Instances
+import OccurAnal ( occurAnalyseExpr )
+import CoreSubst hiding( substTy )
+import CoreArity ( manifestArity, exprBotStrictness_maybe )
+import CoreUtils
+import Id
+import DataCon
+import Literal
+import PrimOp
+import IdInfo
+import BasicTypes ( Arity )
+import Type
+import PrelNames
+import TysPrim ( realWorldStatePrimTy )
+import Bag
+import Util
+import FastTypes
+import FastString
+import Outputable
+import ForeignCall
+
+import qualified Data.ByteString as BS
+import Data.Maybe
+
+{-
+************************************************************************
+* *
+\subsection{Making unfoldings}
+* *
+************************************************************************
+-}
+
+mkTopUnfolding :: DynFlags -> Bool -> CoreExpr -> Unfolding
+mkTopUnfolding dflags = mkUnfolding dflags InlineRhs True {- Top level -}
+
+mkImplicitUnfolding :: DynFlags -> CoreExpr -> Unfolding
+-- For implicit Ids, do a tiny bit of optimising first
+mkImplicitUnfolding dflags expr
+ = mkTopUnfolding dflags False (simpleOptExpr expr)
+
+-- Note [Top-level flag on inline rules]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Slight hack: note that mk_inline_rules conservatively sets the
+-- top-level flag to True. It gets set more accurately by the simplifier
+-- Simplify.simplUnfolding.
+
+mkSimpleUnfolding :: DynFlags -> CoreExpr -> Unfolding
+mkSimpleUnfolding dflags = mkUnfolding dflags InlineRhs False False
+
+mkDFunUnfolding :: [Var] -> DataCon -> [CoreExpr] -> Unfolding
+mkDFunUnfolding bndrs con ops
+ = DFunUnfolding { df_bndrs = bndrs
+ , df_con = con
+ , df_args = map occurAnalyseExpr ops }
+ -- See Note [Occurrrence analysis of unfoldings]
+
+mkWwInlineRule :: CoreExpr -> Arity -> Unfolding
+mkWwInlineRule expr arity
+ = mkCoreUnfolding InlineStable True
+ (simpleOptExpr expr)
+ (UnfWhen { ug_arity = arity, ug_unsat_ok = unSaturatedOk
+ , ug_boring_ok = boringCxtNotOk })
+
+mkCompulsoryUnfolding :: CoreExpr -> Unfolding
+mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded
+ = mkCoreUnfolding InlineCompulsory True
+ (simpleOptExpr expr)
+ (UnfWhen { ug_arity = 0 -- Arity of unfolding doesn't matter
+ , ug_unsat_ok = unSaturatedOk, ug_boring_ok = boringCxtOk })
+
+mkWorkerUnfolding :: DynFlags -> (CoreExpr -> CoreExpr) -> Unfolding -> Unfolding
+-- See Note [Worker-wrapper for INLINABLE functions] in WorkWrap
+mkWorkerUnfolding dflags work_fn
+ (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
+ , uf_is_top = top_lvl })
+ | isStableSource src
+ = mkCoreUnfolding src top_lvl new_tmpl guidance
+ where
+ new_tmpl = simpleOptExpr (work_fn tmpl)
+ guidance = calcUnfoldingGuidance dflags new_tmpl
+
+mkWorkerUnfolding _ _ _ = noUnfolding
+
+mkInlineUnfolding :: Maybe Arity -> CoreExpr -> Unfolding
+mkInlineUnfolding mb_arity expr
+ = mkCoreUnfolding InlineStable
+ True -- Note [Top-level flag on inline rules]
+ expr' guide
+ where
+ expr' = simpleOptExpr expr
+ guide = case mb_arity of
+ Nothing -> UnfWhen { ug_arity = manifestArity expr'
+ , ug_unsat_ok = unSaturatedOk
+ , ug_boring_ok = boring_ok }
+ Just arity -> UnfWhen { ug_arity = arity
+ , ug_unsat_ok = needSaturated
+ , ug_boring_ok = boring_ok }
+ boring_ok = inlineBoringOk expr'
+
+mkInlinableUnfolding :: DynFlags -> CoreExpr -> Unfolding
+mkInlinableUnfolding dflags expr
+ = mkUnfolding dflags InlineStable True is_bot expr'
+ where
+ expr' = simpleOptExpr expr
+ is_bot = isJust (exprBotStrictness_maybe expr')
+
+specUnfolding :: DynFlags -> Subst -> [Var] -> [CoreExpr] -> Unfolding -> Unfolding
+-- See Note [Specialising unfoldings]
+specUnfolding _ subst new_bndrs spec_args
+ df@(DFunUnfolding { df_bndrs = bndrs, df_con = con , df_args = args })
+ = ASSERT2( length bndrs >= length spec_args, ppr df $$ ppr spec_args $$ ppr new_bndrs )
+ mkDFunUnfolding (new_bndrs ++ extra_bndrs) con
+ (map (substExpr spec_doc subst2) args)
+ where
+ subst1 = extendSubstList subst (bndrs `zip` spec_args)
+ (subst2, extra_bndrs) = substBndrs subst1 (dropList spec_args bndrs)
+
+specUnfolding _dflags subst new_bndrs spec_args
+ (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
+ , uf_is_top = top_lvl
+ , uf_guidance = old_guidance })
+ | isStableSource src -- See Note [Specialising unfoldings]
+ , UnfWhen { ug_arity = old_arity
+ , ug_unsat_ok = unsat_ok
+ , ug_boring_ok = boring_ok } <- old_guidance
+ = let guidance = UnfWhen { ug_arity = old_arity - count isValArg spec_args
+ + count isId new_bndrs
+ , ug_unsat_ok = unsat_ok
+ , ug_boring_ok = boring_ok }
+ new_tmpl = simpleOptExpr $ mkLams new_bndrs $
+ mkApps (substExpr spec_doc subst tmpl) spec_args
+ -- The beta-redexes created here will be simplified
+ -- away by simplOptExpr in mkUnfolding
+
+ in mkCoreUnfolding src top_lvl new_tmpl guidance
+
+specUnfolding _ _ _ _ _ = noUnfolding
+
+spec_doc :: SDoc
+spec_doc = ptext (sLit "specUnfolding")
+
+{-
+Note [Specialising unfoldings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When we specialise a function for some given type-class arguments, we use
+specUnfolding to specialise its unfolding. Some important points:
+
+* If the original function has a DFunUnfolding, the specialised one
+ must do so too! Otherwise we lose the magic rules that make it
+ interact with ClassOps
+
+* There is a bit of hack for INLINABLE functions:
+ f :: Ord a => ....
+ f = <big-rhs>
+ {- INLINEABLE f #-}
+ Now if we specialise f, should the specialised version still have
+ an INLINEABLE pragma? If it does, we'll capture a specialised copy
+ of <big-rhs> as its unfolding, and that probaby won't inline. But
+ if we don't, the specialised version of <big-rhs> might be small
+ enough to inline at a call site. This happens with Control.Monad.liftM3,
+ and can cause a lot more allocation as a result (nofib n-body shows this).
+
+ Moreover, keeping the INLINEABLE thing isn't much help, because
+ the specialised function (probaby) isn't overloaded any more.
+
+ Conclusion: drop the INLINEALE pragma. In practice what this means is:
+ if a stable unfolding has UnfoldingGuidance of UnfWhen,
+ we keep it (so the specialised thing too will always inline)
+ if a stable unfolding has UnfoldingGuidance of UnfIfGoodArgs
+ (which arises from INLINEABLE), we discard it
+-}
+
+mkCoreUnfolding :: UnfoldingSource -> Bool -> CoreExpr
+ -> UnfoldingGuidance -> Unfolding
+-- Occurrence-analyses the expression before capturing it
+mkCoreUnfolding src top_lvl expr guidance
+ = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
+ -- See Note [Occurrrence analysis of unfoldings]
+ uf_src = src,
+ uf_is_top = top_lvl,
+ uf_is_value = exprIsHNF expr,
+ uf_is_conlike = exprIsConLike expr,
+ uf_is_work_free = exprIsWorkFree expr,
+ uf_expandable = exprIsExpandable expr,
+ uf_guidance = guidance }
+
+mkUnfolding :: DynFlags -> UnfoldingSource -> Bool -> Bool -> CoreExpr
+ -> Unfolding
+-- Calculates unfolding guidance
+-- Occurrence-analyses the expression before capturing it
+mkUnfolding dflags src top_lvl is_bottoming expr
+ | top_lvl && is_bottoming
+ , not (exprIsTrivial expr)
+ = NoUnfolding -- See Note [Do not inline top-level bottoming functions]
+ | otherwise
+ = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
+ -- See Note [Occurrrence analysis of unfoldings]
+ uf_src = src,
+ uf_is_top = top_lvl,
+ uf_is_value = exprIsHNF expr,
+ uf_is_conlike = exprIsConLike expr,
+ uf_expandable = exprIsExpandable expr,
+ uf_is_work_free = exprIsWorkFree expr,
+ uf_guidance = guidance }
+ where
+ guidance = calcUnfoldingGuidance dflags expr
+ -- NB: *not* (calcUnfoldingGuidance (occurAnalyseExpr expr))!
+ -- See Note [Calculate unfolding guidance on the non-occ-anal'd expression]
+
+{-
+Note [Occurrence analysis of unfoldings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We do occurrence-analysis of unfoldings once and for all, when the
+unfolding is built, rather than each time we inline them.
+
+But given this decision it's vital that we do
+*always* do it. Consider this unfolding
+ \x -> letrec { f = ...g...; g* = f } in body
+where g* is (for some strange reason) the loop breaker. If we don't
+occ-anal it when reading it in, we won't mark g as a loop breaker, and
+we may inline g entirely in body, dropping its binding, and leaving
+the occurrence in f out of scope. This happened in Trac #8892, where
+the unfolding in question was a DFun unfolding.
+
+But more generally, the simplifier is designed on the
+basis that it is looking at occurrence-analysed expressions, so better
+ensure that they acutally are.
+
+Note [Calculate unfolding guidance on the non-occ-anal'd expression]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Notice that we give the non-occur-analysed expression to
+calcUnfoldingGuidance. In some ways it'd be better to occur-analyse
+first; for example, sometimes during simplification, there's a large
+let-bound thing which has been substituted, and so is now dead; so
+'expr' contains two copies of the thing while the occurrence-analysed
+expression doesn't.
+
+Nevertheless, we *don't* and *must not* occ-analyse before computing
+the size because
+
+a) The size computation bales out after a while, whereas occurrence
+ analysis does not.
+
+b) Residency increases sharply if you occ-anal first. I'm not
+ 100% sure why, but it's a large effect. Compiling Cabal went
+ from residency of 534M to over 800M with this one change.
+
+This can occasionally mean that the guidance is very pessimistic;
+it gets fixed up next round. And it should be rare, because large
+let-bound things that are dead are usually caught by preInlineUnconditionally
+
+
+************************************************************************
+* *
+\subsection{The UnfoldingGuidance type}
+* *
+************************************************************************
+-}
+
+inlineBoringOk :: CoreExpr -> Bool
+-- See Note [INLINE for small functions]
+-- True => the result of inlining the expression is
+-- no bigger than the expression itself
+-- eg (\x y -> f y x)
+-- This is a quick and dirty version. It doesn't attempt
+-- to deal with (\x y z -> x (y z))
+-- The really important one is (x `cast` c)
+inlineBoringOk e
+ = go 0 e
+ where
+ go :: Int -> CoreExpr -> Bool
+ go credit (Lam x e) | isId x = go (credit+1) e
+ | otherwise = go credit e
+ go credit (App f (Type {})) = go credit f
+ go credit (App f a) | credit > 0
+ , exprIsTrivial a = go (credit-1) f
+ go credit (Tick _ e) = go credit e -- dubious
+ go credit (Cast e _) = go credit e
+ go _ (Var {}) = boringCxtOk
+ go _ _ = boringCxtNotOk
+
+calcUnfoldingGuidance
+ :: DynFlags
+ -> CoreExpr -- Expression to look at
+ -> UnfoldingGuidance
+calcUnfoldingGuidance dflags expr
+ = case sizeExpr dflags (iUnbox bOMB_OUT_SIZE) val_bndrs body of
+ TooBig -> UnfNever
+ SizeIs size cased_bndrs scrut_discount
+ | uncondInline expr n_val_bndrs (iBox size)
+ -> UnfWhen { ug_unsat_ok = unSaturatedOk
+ , ug_boring_ok = boringCxtOk
+ , ug_arity = n_val_bndrs } -- Note [INLINE for small functions]
+ | otherwise
+ -> UnfIfGoodArgs { ug_args = map (mk_discount cased_bndrs) val_bndrs
+ , ug_size = iBox size
+ , ug_res = iBox scrut_discount }
+
+ where
+ (bndrs, body) = collectBinders expr
+ bOMB_OUT_SIZE = ufCreationThreshold dflags
+ -- Bomb out if size gets bigger than this
+ val_bndrs = filter isId bndrs
+ n_val_bndrs = length val_bndrs
+
+ mk_discount :: Bag (Id,Int) -> Id -> Int
+ mk_discount cbs bndr = foldlBag combine 0 cbs
+ where
+ combine acc (bndr', disc)
+ | bndr == bndr' = acc `plus_disc` disc
+ | otherwise = acc
+
+ plus_disc :: Int -> Int -> Int
+ plus_disc | isFunTy (idType bndr) = max
+ | otherwise = (+)
+ -- See Note [Function and non-function discounts]
+
+{-
+Note [Computing the size of an expression]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The basic idea of sizeExpr is obvious enough: count nodes. But getting the
+heuristics right has taken a long time. Here's the basic strategy:
+
+ * Variables, literals: 0
+ (Exception for string literals, see litSize.)
+
+ * Function applications (f e1 .. en): 1 + #value args
+
+ * Constructor applications: 1, regardless of #args
+
+ * Let(rec): 1 + size of components
+
+ * Note, cast: 0
+
+Examples
+
+ Size Term
+ --------------
+ 0 42#
+ 0 x
+ 0 True
+ 2 f x
+ 1 Just x
+ 4 f (g x)
+
+Notice that 'x' counts 0, while (f x) counts 2. That's deliberate: there's
+a function call to account for. Notice also that constructor applications
+are very cheap, because exposing them to a caller is so valuable.
+
+[25/5/11] All sizes are now multiplied by 10, except for primops
+(which have sizes like 1 or 4. This makes primops look fantastically
+cheap, and seems to be almost unversally beneficial. Done partly as a
+result of #4978.
+
+Note [Do not inline top-level bottoming functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The FloatOut pass has gone to some trouble to float out calls to 'error'
+and similar friends. See Note [Bottoming floats] in SetLevels.
+Do not re-inline them! But we *do* still inline if they are very small
+(the uncondInline stuff).
+
+Note [INLINE for small functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider {-# INLINE f #-}
+ f x = Just x
+ g y = f y
+Then f's RHS is no larger than its LHS, so we should inline it into
+even the most boring context. In general, f the function is
+sufficiently small that its body is as small as the call itself, the
+inline unconditionally, regardless of how boring the context is.
+
+Things to note:
+
+(1) We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
+ than the thing it's replacing. Notice that
+ (f x) --> (g 3) -- YES, unconditionally
+ (f x) --> x : [] -- YES, *even though* there are two
+ -- arguments to the cons
+ x --> g 3 -- NO
+ x --> Just v -- NO
+
+ It's very important not to unconditionally replace a variable by
+ a non-atomic term.
+
+(2) We do this even if the thing isn't saturated, else we end up with the
+ silly situation that
+ f x y = x
+ ...map (f 3)...
+ doesn't inline. Even in a boring context, inlining without being
+ saturated will give a lambda instead of a PAP, and will be more
+ efficient at runtime.
+
+(3) However, when the function's arity > 0, we do insist that it
+ has at least one value argument at the call site. (This check is
+ made in the UnfWhen case of callSiteInline.) Otherwise we find this:
+ f = /\a \x:a. x
+ d = /\b. MkD (f b)
+ If we inline f here we get
+ d = /\b. MkD (\x:b. x)
+ and then prepareRhs floats out the argument, abstracting the type
+ variables, so we end up with the original again!
+
+(4) We must be much more cautious about arity-zero things. Consider
+ let x = y +# z in ...
+ In *size* terms primops look very small, because the generate a
+ single instruction, but we do not want to unconditionally replace
+ every occurrence of x with (y +# z). So we only do the
+ unconditional-inline thing for *trivial* expressions.
+
+ NB: you might think that PostInlineUnconditionally would do this
+ but it doesn't fire for top-level things; see SimplUtils
+ Note [Top level and postInlineUnconditionally]
+-}
+
+uncondInline :: CoreExpr -> Arity -> Int -> Bool
+-- Inline unconditionally if there no size increase
+-- Size of call is arity (+1 for the function)
+-- See Note [INLINE for small functions]
+uncondInline rhs arity size
+ | arity > 0 = size <= 10 * (arity + 1) -- See Note [INLINE for small functions] (1)
+ | otherwise = exprIsTrivial rhs -- See Note [INLINE for small functions] (4)
+
+sizeExpr :: DynFlags
+ -> FastInt -- Bomb out if it gets bigger than this
+ -> [Id] -- Arguments; we're interested in which of these
+ -- get case'd
+ -> CoreExpr
+ -> ExprSize
+
+-- Note [Computing the size of an expression]
+
+sizeExpr dflags bOMB_OUT_SIZE top_args expr
+ = size_up expr
+ where
+ size_up (Cast e _) = size_up e
+ size_up (Tick _ e) = size_up e
+ size_up (Type _) = sizeZero -- Types cost nothing
+ size_up (Coercion _) = sizeZero
+ size_up (Lit lit) = sizeN (litSize lit)
+ size_up (Var f) | isRealWorldId f = sizeZero
+ -- Make sure we get constructor discounts even
+ -- on nullary constructors
+ | otherwise = size_up_call f [] 0
+
+ size_up (App fun arg)
+ | isTyCoArg arg = size_up fun
+ | otherwise = size_up arg `addSizeNSD`
+ size_up_app fun [arg] (if isRealWorldExpr arg then 1 else 0)
+
+ size_up (Lam b e)
+ | isId b && not (isRealWorldId b) = lamScrutDiscount dflags (size_up e `addSizeN` 10)
+ | otherwise = size_up e
+
+ size_up (Let (NonRec binder rhs) body)
+ = size_up rhs `addSizeNSD`
+ size_up body `addSizeN`
+ (if isUnLiftedType (idType binder) then 0 else 10)
+ -- For the allocation
+ -- If the binder has an unlifted type there is no allocation
+
+ size_up (Let (Rec pairs) body)
+ = foldr (addSizeNSD . size_up . snd)
+ (size_up body `addSizeN` (10 * length pairs)) -- (length pairs) for the allocation
+ pairs
+
+ size_up (Case (Var v) _ _ alts)
+ | v `elem` top_args -- We are scrutinising an argument variable
+ = alts_size (foldr addAltSize sizeZero alt_sizes)
+ (foldr maxSize sizeZero alt_sizes)
+ -- Good to inline if an arg is scrutinised, because
+ -- that may eliminate allocation in the caller
+ -- And it eliminates the case itself
+ where
+ alt_sizes = map size_up_alt alts
+
+ -- alts_size tries to compute a good discount for
+ -- the case when we are scrutinising an argument variable
+ alts_size (SizeIs tot tot_disc tot_scrut) -- Size of all alternatives
+ (SizeIs max _ _) -- Size of biggest alternative
+ = SizeIs tot (unitBag (v, iBox (_ILIT(20) +# tot -# max)) `unionBags` tot_disc) tot_scrut
+ -- If the variable is known, we produce a discount that
+ -- will take us back to 'max', the size of the largest alternative
+ -- The 1+ is a little discount for reduced allocation in the caller
+ --
+ -- Notice though, that we return tot_disc, the total discount from
+ -- all branches. I think that's right.
+
+ alts_size tot_size _ = tot_size
+
+ size_up (Case e _ _ alts) = size_up e `addSizeNSD`
+ foldr (addAltSize . size_up_alt) case_size alts
+ where
+ case_size
+ | is_inline_scrut e, not (lengthExceeds alts 1) = sizeN (-10)
+ | otherwise = sizeZero
+ -- Normally we don't charge for the case itself, but
+ -- we charge one per alternative (see size_up_alt,
+ -- below) to account for the cost of the info table
+ -- and comparisons.
+ --
+ -- However, in certain cases (see is_inline_scrut
+ -- below), no code is generated for the case unless
+ -- there are multiple alts. In these cases we
+ -- subtract one, making the first alt free.
+ -- e.g. case x# +# y# of _ -> ... should cost 1
+ -- case touch# x# of _ -> ... should cost 0
+ -- (see #4978)
+ --
+ -- I would like to not have the "not (lengthExceeds alts 1)"
+ -- condition above, but without that some programs got worse
+ -- (spectral/hartel/event and spectral/para). I don't fully
+ -- understand why. (SDM 24/5/11)
+
+ -- unboxed variables, inline primops and unsafe foreign calls
+ -- are all "inline" things:
+ is_inline_scrut (Var v) = isUnLiftedType (idType v)
+ is_inline_scrut scrut
+ | (Var f, _) <- collectArgs scrut
+ = case idDetails f of
+ FCallId fc -> not (isSafeForeignCall fc)
+ PrimOpId op -> not (primOpOutOfLine op)
+ _other -> False
+ | otherwise
+ = False
+
+ ------------
+ -- size_up_app is used when there's ONE OR MORE value args
+ size_up_app (App fun arg) args voids
+ | isTyCoArg arg = size_up_app fun args voids
+ | isRealWorldExpr arg = size_up_app fun (arg:args) (voids + 1)
+ | otherwise = size_up arg `addSizeNSD`
+ size_up_app fun (arg:args) voids
+ size_up_app (Var fun) args voids = size_up_call fun args voids
+ size_up_app other args voids = size_up other `addSizeN` (length args - voids)
+
+ ------------
+ size_up_call :: Id -> [CoreExpr] -> Int -> ExprSize
+ size_up_call fun val_args voids
+ = case idDetails fun of
+ FCallId _ -> sizeN (10 * (1 + length val_args))
+ DataConWorkId dc -> conSize dc (length val_args)
+ PrimOpId op -> primOpSize op (length val_args)
+ ClassOpId _ -> classOpSize dflags top_args val_args
+ _ -> funSize dflags top_args fun (length val_args) voids
+
+ ------------
+ size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 10
+ -- Don't charge for args, so that wrappers look cheap
+ -- (See comments about wrappers with Case)
+ --
+ -- IMPORATANT: *do* charge 1 for the alternative, else we
+ -- find that giant case nests are treated as practically free
+ -- A good example is Foreign.C.Error.errrnoToIOError
+
+ ------------
+ -- These addSize things have to be here because
+ -- I don't want to give them bOMB_OUT_SIZE as an argument
+ addSizeN TooBig _ = TooBig
+ addSizeN (SizeIs n xs d) m = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d
+
+ -- addAltSize is used to add the sizes of case alternatives
+ addAltSize TooBig _ = TooBig
+ addAltSize _ TooBig = TooBig
+ addAltSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
+ = mkSizeIs bOMB_OUT_SIZE (n1 +# n2)
+ (xs `unionBags` ys)
+ (d1 +# d2) -- Note [addAltSize result discounts]
+
+ -- This variant ignores the result discount from its LEFT argument
+ -- It's used when the second argument isn't part of the result
+ addSizeNSD TooBig _ = TooBig
+ addSizeNSD _ TooBig = TooBig
+ addSizeNSD (SizeIs n1 xs _) (SizeIs n2 ys d2)
+ = mkSizeIs bOMB_OUT_SIZE (n1 +# n2)
+ (xs `unionBags` ys)
+ d2 -- Ignore d1
+
+ isRealWorldId id = idType id `eqType` realWorldStatePrimTy
+
+ -- an expression of type State# RealWorld must be a variable
+ isRealWorldExpr (Var id) = isRealWorldId id
+ isRealWorldExpr _ = False
+
+-- | Finds a nominal size of a string literal.
+litSize :: Literal -> Int
+-- Used by CoreUnfold.sizeExpr
+litSize (LitInteger {}) = 100 -- Note [Size of literal integers]
+litSize (MachStr str) = 10 + 10 * ((BS.length str + 3) `div` 4)
+ -- If size could be 0 then @f "x"@ might be too small
+ -- [Sept03: make literal strings a bit bigger to avoid fruitless
+ -- duplication of little strings]
+litSize _other = 0 -- Must match size of nullary constructors
+ -- Key point: if x |-> 4, then x must inline unconditionally
+ -- (eg via case binding)
+
+classOpSize :: DynFlags -> [Id] -> [CoreExpr] -> ExprSize
+-- See Note [Conlike is interesting]
+classOpSize _ _ []
+ = sizeZero
+classOpSize dflags top_args (arg1 : other_args)
+ = SizeIs (iUnbox size) arg_discount (_ILIT(0))
+ where
+ size = 20 + (10 * length other_args)
+ -- If the class op is scrutinising a lambda bound dictionary then
+ -- give it a discount, to encourage the inlining of this function
+ -- The actual discount is rather arbitrarily chosen
+ arg_discount = case arg1 of
+ Var dict | dict `elem` top_args
+ -> unitBag (dict, ufDictDiscount dflags)
+ _other -> emptyBag
+
+funSize :: DynFlags -> [Id] -> Id -> Int -> Int -> ExprSize
+-- Size for functions that are not constructors or primops
+-- Note [Function applications]
+funSize dflags top_args fun n_val_args voids
+ | fun `hasKey` buildIdKey = buildSize
+ | fun `hasKey` augmentIdKey = augmentSize
+ | otherwise = SizeIs (iUnbox size) arg_discount (iUnbox res_discount)
+ where
+ some_val_args = n_val_args > 0
+
+ size | some_val_args = 10 * (1 + n_val_args - voids)
+ | otherwise = 0
+ -- The 1+ is for the function itself
+ -- Add 1 for each non-trivial arg;
+ -- the allocation cost, as in let(rec)
+
+ -- DISCOUNTS
+ -- See Note [Function and non-function discounts]
+ arg_discount | some_val_args && fun `elem` top_args
+ = unitBag (fun, ufFunAppDiscount dflags)
+ | otherwise = emptyBag
+ -- If the function is an argument and is applied
+ -- to some values, give it an arg-discount
+
+ res_discount | idArity fun > n_val_args = ufFunAppDiscount dflags
+ | otherwise = 0
+ -- If the function is partially applied, show a result discount
+
+conSize :: DataCon -> Int -> ExprSize
+conSize dc n_val_args
+ | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(10)) -- Like variables
+
+-- See Note [Unboxed tuple size and result discount]
+ | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox (10 * (1 + n_val_args)))
+
+-- See Note [Constructor size and result discount]
+ | otherwise = SizeIs (_ILIT(10)) emptyBag (iUnbox (10 * (1 + n_val_args)))
+
+{-
+Note [Constructor size and result discount]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Treat a constructors application as size 10, regardless of how many
+arguments it has; we are keen to expose them (and we charge separately
+for their args). We can't treat them as size zero, else we find that
+(Just x) has size 0, which is the same as a lone variable; and hence
+'v' will always be replaced by (Just x), where v is bound to Just x.
+
+The "result discount" is applied if the result of the call is
+scrutinised (say by a case). For a constructor application that will
+mean the constructor application will disappear, so we don't need to
+charge it to the function. So the discount should at least match the
+cost of the constructor application, namely 10. But to give a bit
+of extra incentive we give a discount of 10*(1 + n_val_args).
+
+Simon M tried a MUCH bigger discount: (10 * (10 + n_val_args)),
+and said it was an "unambiguous win", but its terribly dangerous
+because a fuction with many many case branches, each finishing with
+a constructor, can have an arbitrarily large discount. This led to
+terrible code bloat: see Trac #6099.
+
+Note [Unboxed tuple size and result discount]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+However, unboxed tuples count as size zero. I found occasions where we had
+ f x y z = case op# x y z of { s -> (# s, () #) }
+and f wasn't getting inlined.
+
+I tried giving unboxed tuples a *result discount* of zero (see the
+commented-out line). Why? When returned as a result they do not
+allocate, so maybe we don't want to charge so much for them If you
+have a non-zero discount here, we find that workers often get inlined
+back into wrappers, because it look like
+ f x = case $wf x of (# a,b #) -> (a,b)
+and we are keener because of the case. However while this change
+shrank binary sizes by 0.5% it also made spectral/boyer allocate 5%
+more. All other changes were very small. So it's not a big deal but I
+didn't adopt the idea.
+
+Note [Function and non-function discounts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We want a discount if the function is applied. A good example is
+monadic combinators with continuation arguments, where inlining is
+quite important.
+
+But we don't want a big discount when a function is called many times
+(see the detailed comments with Trac #6048) because if the function is
+big it won't be inlined at its many call sites and no benefit results.
+Indeed, we can get exponentially big inlinings this way; that is what
+Trac #6048 is about.
+
+On the other hand, for data-valued arguments, if there are lots of
+case expressions in the body, each one will get smaller if we apply
+the function to a constructor application, so we *want* a big discount
+if the argument is scrutinised by many case expressions.
+
+Conclusion:
+ - For functions, take the max of the discounts
+ - For data values, take the sum of the discounts
+
+
+Note [Literal integer size]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Literal integers *can* be big (mkInteger [...coefficients...]), but
+need not be (S# n). We just use an aribitrary big-ish constant here
+so that, in particular, we don't inline top-level defns like
+ n = S# 5
+There's no point in doing so -- any optimisations will see the S#
+through n's unfolding. Nor will a big size inhibit unfoldings functions
+that mention a literal Integer, because the float-out pass will float
+all those constants to top level.
+-}
+
+primOpSize :: PrimOp -> Int -> ExprSize
+primOpSize op n_val_args
+ = if primOpOutOfLine op
+ then sizeN (op_size + n_val_args)
+ else sizeN op_size
+ where
+ op_size = primOpCodeSize op
+
+
+buildSize :: ExprSize
+buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(40))
+ -- We really want to inline applications of build
+ -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
+ -- Indeed, we should add a result_discount becuause build is
+ -- very like a constructor. We don't bother to check that the
+ -- build is saturated (it usually is). The "-2" discounts for the \c n,
+ -- The "4" is rather arbitrary.
+
+augmentSize :: ExprSize
+augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(40))
+ -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
+ -- e plus ys. The -2 accounts for the \cn
+
+-- When we return a lambda, give a discount if it's used (applied)
+lamScrutDiscount :: DynFlags -> ExprSize -> ExprSize
+lamScrutDiscount dflags (SizeIs n vs _) = SizeIs n vs (iUnbox (ufFunAppDiscount dflags))
+lamScrutDiscount _ TooBig = TooBig
+
+{-
+Note [addAltSize result discounts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When adding the size of alternatives, we *add* the result discounts
+too, rather than take the *maximum*. For a multi-branch case, this
+gives a discount for each branch that returns a constructor, making us
+keener to inline. I did try using 'max' instead, but it makes nofib
+'rewrite' and 'puzzle' allocate significantly more, and didn't make
+binary sizes shrink significantly either.
+
+Note [Discounts and thresholds]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Constants for discounts and thesholds are defined in main/DynFlags,
+all of form ufXxxx. They are:
+
+ufCreationThreshold
+ At a definition site, if the unfolding is bigger than this, we
+ may discard it altogether
+
+ufUseThreshold
+ At a call site, if the unfolding, less discounts, is smaller than
+ this, then it's small enough inline
+
+ufKeenessFactor
+ Factor by which the discounts are multiplied before
+ subtracting from size
+
+ufDictDiscount
+ The discount for each occurrence of a dictionary argument
+ as an argument of a class method. Should be pretty small
+ else big functions may get inlined
+
+ufFunAppDiscount
+ Discount for a function argument that is applied. Quite
+ large, because if we inline we avoid the higher-order call.
+
+ufDearOp
+ The size of a foreign call or not-dupable PrimOp
+
+
+Note [Function applications]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In a function application (f a b)
+
+ - If 'f' is an argument to the function being analysed,
+ and there's at least one value arg, record a FunAppDiscount for f
+
+ - If the application if a PAP (arity > 2 in this example)
+ record a *result* discount (because inlining
+ with "extra" args in the call may mean that we now
+ get a saturated application)
+
+Code for manipulating sizes
+-}
+
+data ExprSize = TooBig
+ | SizeIs FastInt -- Size found
+ !(Bag (Id,Int)) -- Arguments cased herein, and discount for each such
+ FastInt -- Size to subtract if result is scrutinised
+ -- by a case expression
+
+instance Outputable ExprSize where
+ ppr TooBig = ptext (sLit "TooBig")
+ ppr (SizeIs a _ c) = brackets (int (iBox a) <+> int (iBox c))
+
+-- subtract the discount before deciding whether to bale out. eg. we
+-- want to inline a large constructor application into a selector:
+-- tup = (a_1, ..., a_99)
+-- x = case tup of ...
+--
+mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize
+mkSizeIs max n xs d | (n -# d) ># max = TooBig
+ | otherwise = SizeIs n xs d
+
+maxSize :: ExprSize -> ExprSize -> ExprSize
+maxSize TooBig _ = TooBig
+maxSize _ TooBig = TooBig
+maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2 = s1
+ | otherwise = s2
+
+sizeZero :: ExprSize
+sizeN :: Int -> ExprSize
+
+sizeZero = SizeIs (_ILIT(0)) emptyBag (_ILIT(0))
+sizeN n = SizeIs (iUnbox n) emptyBag (_ILIT(0))
+
+{-
+************************************************************************
+* *
+\subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
+* *
+************************************************************************
+
+We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
+we ``couldn't possibly use'' on the other side. Can be overridden w/
+flaggery. Just the same as smallEnoughToInline, except that it has no
+actual arguments.
+-}
+
+couldBeSmallEnoughToInline :: DynFlags -> Int -> CoreExpr -> Bool
+couldBeSmallEnoughToInline dflags threshold rhs
+ = case sizeExpr dflags (iUnbox threshold) [] body of
+ TooBig -> False
+ _ -> True
+ where
+ (_, body) = collectBinders rhs
+
+----------------
+smallEnoughToInline :: DynFlags -> Unfolding -> Bool
+smallEnoughToInline dflags (CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_size = size}})
+ = size <= ufUseThreshold dflags
+smallEnoughToInline _ _
+ = False
+
+----------------
+certainlyWillInline :: DynFlags -> Unfolding -> Maybe Unfolding
+-- Sees if the unfolding is pretty certain to inline
+-- If so, return a *stable* unfolding for it, that will always inline
+certainlyWillInline dflags unf@(CoreUnfolding { uf_guidance = guidance, uf_tmpl = expr })
+ = case guidance of
+ UnfNever -> Nothing
+ UnfWhen {} -> Just (unf { uf_src = InlineStable })
+
+ -- The UnfIfGoodArgs case seems important. If we w/w small functions
+ -- binary sizes go up by 10%! (This is with SplitObjs.) I'm not totally
+ -- sure whyy.
+ UnfIfGoodArgs { ug_size = size, ug_args = args }
+ | not (null args) -- See Note [certainlyWillInline: be careful of thunks]
+ , let arity = length args
+ , size - (10 * (arity + 1)) <= ufUseThreshold dflags
+ -> Just (unf { uf_src = InlineStable
+ , uf_guidance = UnfWhen { ug_arity = arity
+ , ug_unsat_ok = unSaturatedOk
+ , ug_boring_ok = inlineBoringOk expr } })
+ -- Note the "unsaturatedOk". A function like f = \ab. a
+ -- will certainly inline, even if partially applied (f e), so we'd
+ -- better make sure that the transformed inlining has the same property
+
+ _ -> Nothing
+
+certainlyWillInline _ unf@(DFunUnfolding {})
+ = Just unf
+
+certainlyWillInline _ _
+ = Nothing
+
+{-
+Note [certainlyWillInline: be careful of thunks]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Don't claim that thunks will certainly inline, because that risks work
+duplication. Even if the work duplication is not great (eg is_cheap
+holds), it can make a big difference in an inner loop In Trac #5623 we
+found that the WorkWrap phase thought that
+ y = case x of F# v -> F# (v +# v)
+was certainlyWillInline, so the addition got duplicated.
+
+
+************************************************************************
+* *
+\subsection{callSiteInline}
+* *
+************************************************************************
+
+This is the key function. It decides whether to inline a variable at a call site
+
+callSiteInline is used at call sites, so it is a bit more generous.
+It's a very important function that embodies lots of heuristics.
+A non-WHNF can be inlined if it doesn't occur inside a lambda,
+and occurs exactly once or
+ occurs once in each branch of a case and is small
+
+If the thing is in WHNF, there's no danger of duplicating work,
+so we can inline if it occurs once, or is small
+
+NOTE: we don't want to inline top-level functions that always diverge.
+It just makes the code bigger. Tt turns out that the convenient way to prevent
+them inlining is to give them a NOINLINE pragma, which we do in
+StrictAnal.addStrictnessInfoToTopId
+-}
+
+callSiteInline :: DynFlags
+ -> Id -- The Id
+ -> Bool -- True <=> unfolding is active
+ -> Bool -- True if there are are no arguments at all (incl type args)
+ -> [ArgSummary] -- One for each value arg; True if it is interesting
+ -> CallCtxt -- True <=> continuation is interesting
+ -> Maybe CoreExpr -- Unfolding, if any
+
+instance Outputable ArgSummary where
+ ppr TrivArg = ptext (sLit "TrivArg")
+ ppr NonTrivArg = ptext (sLit "NonTrivArg")
+ ppr ValueArg = ptext (sLit "ValueArg")
+
+data CallCtxt
+ = BoringCtxt
+ | RhsCtxt -- Rhs of a let-binding; see Note [RHS of lets]
+ | DiscArgCtxt -- Argument of a fuction with non-zero arg discount
+ | RuleArgCtxt -- We are somewhere in the argument of a function with rules
+
+ | ValAppCtxt -- We're applied to at least one value arg
+ -- This arises when we have ((f x |> co) y)
+ -- Then the (f x) has argument 'x' but in a ValAppCtxt
+
+ | CaseCtxt -- We're the scrutinee of a case
+ -- that decomposes its scrutinee
+
+instance Outputable CallCtxt where
+ ppr CaseCtxt = ptext (sLit "CaseCtxt")
+ ppr ValAppCtxt = ptext (sLit "ValAppCtxt")
+ ppr BoringCtxt = ptext (sLit "BoringCtxt")
+ ppr RhsCtxt = ptext (sLit "RhsCtxt")
+ ppr DiscArgCtxt = ptext (sLit "DiscArgCtxt")
+ ppr RuleArgCtxt = ptext (sLit "RuleArgCtxt")
+
+callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info
+ = case idUnfolding id of
+ -- idUnfolding checks for loop-breakers, returning NoUnfolding
+ -- Things with an INLINE pragma may have an unfolding *and*
+ -- be a loop breaker (maybe the knot is not yet untied)
+ CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top
+ , uf_is_work_free = is_wf
+ , uf_guidance = guidance, uf_expandable = is_exp }
+ | active_unfolding -> tryUnfolding dflags id lone_variable
+ arg_infos cont_info unf_template is_top
+ is_wf is_exp guidance
+ | otherwise -> traceInline dflags "Inactive unfolding:" (ppr id) Nothing
+ NoUnfolding -> Nothing
+ OtherCon {} -> Nothing
+ DFunUnfolding {} -> Nothing -- Never unfold a DFun
+
+traceInline :: DynFlags -> String -> SDoc -> a -> a
+traceInline dflags str doc result
+ | dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags
+ = pprTrace str doc result
+ | otherwise
+ = result
+
+tryUnfolding :: DynFlags -> Id -> Bool -> [ArgSummary] -> CallCtxt
+ -> CoreExpr -> Bool -> Bool -> Bool -> UnfoldingGuidance
+ -> Maybe CoreExpr
+tryUnfolding dflags id lone_variable
+ arg_infos cont_info unf_template is_top
+ is_wf is_exp guidance
+ = case guidance of
+ UnfNever -> traceInline dflags str (ptext (sLit "UnfNever")) Nothing
+
+ UnfWhen { ug_arity = uf_arity, ug_unsat_ok = unsat_ok, ug_boring_ok = boring_ok }
+ | enough_args && (boring_ok || some_benefit)
+ -- See Note [INLINE for small functions (3)]
+ -> traceInline dflags str (mk_doc some_benefit empty True) (Just unf_template)
+ | otherwise
+ -> traceInline dflags str (mk_doc some_benefit empty False) Nothing
+ where
+ some_benefit = calc_some_benefit uf_arity
+ enough_args = (n_val_args >= uf_arity) || (unsat_ok && n_val_args > 0)
+
+ UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
+ | is_wf && some_benefit && small_enough
+ -> traceInline dflags str (mk_doc some_benefit extra_doc True) (Just unf_template)
+ | otherwise
+ -> traceInline dflags str (mk_doc some_benefit extra_doc False) Nothing
+ where
+ some_benefit = calc_some_benefit (length arg_discounts)
+ extra_doc = text "discounted size =" <+> int discounted_size
+ discounted_size = size - discount
+ small_enough = discounted_size <= ufUseThreshold dflags
+ discount = computeDiscount dflags arg_discounts
+ res_discount arg_infos cont_info
+
+ where
+ mk_doc some_benefit extra_doc yes_or_no
+ = vcat [ text "arg infos" <+> ppr arg_infos
+ , text "interesting continuation" <+> ppr cont_info
+ , text "some_benefit" <+> ppr some_benefit
+ , text "is exp:" <+> ppr is_exp
+ , text "is work-free:" <+> ppr is_wf
+ , text "guidance" <+> ppr guidance
+ , extra_doc
+ , text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]
+
+ str = "Considering inlining: " ++ showSDocDump dflags (ppr id)
+ n_val_args = length arg_infos
+
+ -- some_benefit is used when the RHS is small enough
+ -- and the call has enough (or too many) value
+ -- arguments (ie n_val_args >= arity). But there must
+ -- be *something* interesting about some argument, or the
+ -- result context, to make it worth inlining
+ calc_some_benefit :: Arity -> Bool -- The Arity is the number of args
+ -- expected by the unfolding
+ calc_some_benefit uf_arity
+ | not saturated = interesting_args -- Under-saturated
+ -- Note [Unsaturated applications]
+ | otherwise = interesting_args -- Saturated or over-saturated
+ || interesting_call
+ where
+ saturated = n_val_args >= uf_arity
+ over_saturated = n_val_args > uf_arity
+ interesting_args = any nonTriv arg_infos
+ -- NB: (any nonTriv arg_infos) looks at the
+ -- over-saturated args too which is "wrong";
+ -- but if over-saturated we inline anyway.
+
+ interesting_call
+ | over_saturated
+ = True
+ | otherwise
+ = case cont_info of
+ CaseCtxt -> not (lone_variable && is_wf) -- Note [Lone variables]
+ ValAppCtxt -> True -- Note [Cast then apply]
+ RuleArgCtxt -> uf_arity > 0 -- See Note [Unfold info lazy contexts]
+ DiscArgCtxt -> uf_arity > 0 --
+ RhsCtxt -> uf_arity > 0 --
+ _ -> not is_top && uf_arity > 0 -- Note [Nested functions]
+ -- Note [Inlining in ArgCtxt]
+
+{-
+Note [Unfold into lazy contexts], Note [RHS of lets]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When the call is the argument of a function with a RULE, or the RHS of a let,
+we are a little bit keener to inline. For example
+ f y = (y,y,y)
+ g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
+We'd inline 'f' if the call was in a case context, and it kind-of-is,
+only we can't see it. Also
+ x = f v
+could be expensive whereas
+ x = case v of (a,b) -> a
+is patently cheap and may allow more eta expansion.
+So we treat the RHS of a let as not-totally-boring.
+
+Note [Unsaturated applications]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When a call is not saturated, we *still* inline if one of the
+arguments has interesting structure. That's sometimes very important.
+A good example is the Ord instance for Bool in Base:
+
+ Rec {
+ $fOrdBool =GHC.Classes.D:Ord
+ @ Bool
+ ...
+ $cmin_ajX
+
+ $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool
+ $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool
+ }
+
+But the defn of GHC.Classes.$dmmin is:
+
+ $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a
+ {- Arity: 3, HasNoCafRefs, Strictness: SLL,
+ Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
+ case @ a GHC.Classes.<= @ a $dOrd x y of wild {
+ GHC.Types.False -> y GHC.Types.True -> x }) -}
+
+We *really* want to inline $dmmin, even though it has arity 3, in
+order to unravel the recursion.
+
+
+Note [Things to watch]
+~~~~~~~~~~~~~~~~~~~~~~
+* { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
+ Assume x is exported, so not inlined unconditionally.
+ Then we want x to inline unconditionally; no reason for it
+ not to, and doing so avoids an indirection.
+
+* { x = I# 3; ....f x.... }
+ Make sure that x does not inline unconditionally!
+ Lest we get extra allocation.
+
+Note [Inlining an InlineRule]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+An InlineRules is used for
+ (a) programmer INLINE pragmas
+ (b) inlinings from worker/wrapper
+
+For (a) the RHS may be large, and our contract is that we *only* inline
+when the function is applied to all the arguments on the LHS of the
+source-code defn. (The uf_arity in the rule.)
+
+However for worker/wrapper it may be worth inlining even if the
+arity is not satisfied (as we do in the CoreUnfolding case) so we don't
+require saturation.
+
+
+Note [Nested functions]
+~~~~~~~~~~~~~~~~~~~~~~~
+If a function has a nested defn we also record some-benefit, on the
+grounds that we are often able to eliminate the binding, and hence the
+allocation, for the function altogether; this is good for join points.
+But this only makes sense for *functions*; inlining a constructor
+doesn't help allocation unless the result is scrutinised. UNLESS the
+constructor occurs just once, albeit possibly in multiple case
+branches. Then inlining it doesn't increase allocation, but it does
+increase the chance that the constructor won't be allocated at all in
+the branches that don't use it.
+
+Note [Cast then apply]
+~~~~~~~~~~~~~~~~~~~~~~
+Consider
+ myIndex = __inline_me ( (/\a. <blah>) |> co )
+ co :: (forall a. a -> a) ~ (forall a. T a)
+ ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
+
+We need to inline myIndex to unravel this; but the actual call (myIndex a) has
+no value arguments. The ValAppCtxt gives it enough incentive to inline.
+
+Note [Inlining in ArgCtxt]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+The condition (arity > 0) here is very important, because otherwise
+we end up inlining top-level stuff into useless places; eg
+ x = I# 3#
+ f = \y. g x
+This can make a very big difference: it adds 16% to nofib 'integer' allocs,
+and 20% to 'power'.
+
+At one stage I replaced this condition by 'True' (leading to the above
+slow-down). The motivation was test eyeball/inline1.hs; but that seems
+to work ok now.
+
+NOTE: arguably, we should inline in ArgCtxt only if the result of the
+call is at least CONLIKE. At least for the cases where we use ArgCtxt
+for the RHS of a 'let', we only profit from the inlining if we get a
+CONLIKE thing (modulo lets).
+
+Note [Lone variables] See also Note [Interaction of exprIsWorkFree and lone variables]
+~~~~~~~~~~~~~~~~~~~~~ which appears below
+The "lone-variable" case is important. I spent ages messing about
+with unsatisfactory varaints, but this is nice. The idea is that if a
+variable appears all alone
+
+ as an arg of lazy fn, or rhs BoringCtxt
+ as scrutinee of a case CaseCtxt
+ as arg of a fn ArgCtxt
+AND
+ it is bound to a cheap expression
+
+then we should not inline it (unless there is some other reason,
+e.g. is is the sole occurrence). That is what is happening at
+the use of 'lone_variable' in 'interesting_call'.
+
+Why? At least in the case-scrutinee situation, turning
+ let x = (a,b) in case x of y -> ...
+into
+ let x = (a,b) in case (a,b) of y -> ...
+and thence to
+ let x = (a,b) in let y = (a,b) in ...
+is bad if the binding for x will remain.
+
+Another example: I discovered that strings
+were getting inlined straight back into applications of 'error'
+because the latter is strict.
+ s = "foo"
+ f = \x -> ...(error s)...
+
+Fundamentally such contexts should not encourage inlining because the
+context can ``see'' the unfolding of the variable (e.g. case or a
+RULE) so there's no gain. If the thing is bound to a value.
+
+However, watch out:
+
+ * Consider this:
+ foo = _inline_ (\n. [n])
+ bar = _inline_ (foo 20)
+ baz = \n. case bar of { (m:_) -> m + n }
+ Here we really want to inline 'bar' so that we can inline 'foo'
+ and the whole thing unravels as it should obviously do. This is
+ important: in the NDP project, 'bar' generates a closure data
+ structure rather than a list.
+
+ So the non-inlining of lone_variables should only apply if the
+ unfolding is regarded as cheap; because that is when exprIsConApp_maybe
+ looks through the unfolding. Hence the "&& is_wf" in the
+ InlineRule branch.
+
+ * Even a type application or coercion isn't a lone variable.
+ Consider
+ case $fMonadST @ RealWorld of { :DMonad a b c -> c }
+ We had better inline that sucker! The case won't see through it.
+
+ For now, I'm treating treating a variable applied to types
+ in a *lazy* context "lone". The motivating example was
+ f = /\a. \x. BIG
+ g = /\a. \y. h (f a)
+ There's no advantage in inlining f here, and perhaps
+ a significant disadvantage. Hence some_val_args in the Stop case
+
+Note [Interaction of exprIsWorkFree and lone variables]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The lone-variable test says "don't inline if a case expression
+scrutines a lone variable whose unfolding is cheap". It's very
+important that, under these circumstances, exprIsConApp_maybe
+can spot a constructor application. So, for example, we don't
+consider
+ let x = e in (x,x)
+to be cheap, and that's good because exprIsConApp_maybe doesn't
+think that expression is a constructor application.
+
+In the 'not (lone_variable && is_wf)' test, I used to test is_value
+rather than is_wf, which was utterly wrong, because the above
+expression responds True to exprIsHNF, which is what sets is_value.
+
+This kind of thing can occur if you have
+
+ {-# INLINE foo #-}
+ foo = let x = e in (x,x)
+
+which Roman did.
+-}
+
+computeDiscount :: DynFlags -> [Int] -> Int -> [ArgSummary] -> CallCtxt
+ -> Int
+computeDiscount dflags arg_discounts res_discount arg_infos cont_info
+ -- We multiple the raw discounts (args_discount and result_discount)
+ -- ty opt_UnfoldingKeenessFactor because the former have to do with
+ -- *size* whereas the discounts imply that there's some extra
+ -- *efficiency* to be gained (e.g. beta reductions, case reductions)
+ -- by inlining.
+
+ = 10 -- Discount of 10 because the result replaces the call
+ -- so we count 10 for the function itself
+
+ + 10 * length actual_arg_discounts
+ -- Discount of 10 for each arg supplied,
+ -- because the result replaces the call
+
+ + round (ufKeenessFactor dflags *
+ fromIntegral (total_arg_discount + res_discount'))
+ where
+ actual_arg_discounts = zipWith mk_arg_discount arg_discounts arg_infos
+ total_arg_discount = sum actual_arg_discounts
+
+ mk_arg_discount _ TrivArg = 0
+ mk_arg_discount _ NonTrivArg = 10
+ mk_arg_discount discount ValueArg = discount
+
+ res_discount'
+ | LT <- arg_discounts `compareLength` arg_infos
+ = res_discount -- Over-saturated
+ | otherwise
+ = case cont_info of
+ BoringCtxt -> 0
+ CaseCtxt -> res_discount -- Presumably a constructor
+ ValAppCtxt -> res_discount -- Presumably a function
+ _ -> 40 `min` res_discount
+ -- ToDo: this 40 `min` res_discount doesn't seem right
+ -- for DiscArgCtxt it shouldn't matter because the function will
+ -- get the arg discount for any non-triv arg
+ -- for RuleArgCtxt we do want to be keener to inline; but not only
+ -- constructor results
+ -- for RhsCtxt I suppose that exposing a data con is good in general
+ -- And 40 seems very arbitrary
+ --
+ -- res_discount can be very large when a function returns
+ -- constructors; but we only want to invoke that large discount
+ -- when there's a case continuation.
+ -- Otherwise we, rather arbitrarily, threshold it. Yuk.
+ -- But we want to aovid inlining large functions that return
+ -- constructors into contexts that are simply "interesting"
+
+{-
+************************************************************************
+* *
+ Interesting arguments
+* *
+************************************************************************
+
+Note [Interesting arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+An argument is interesting if it deserves a discount for unfoldings
+with a discount in that argument position. The idea is to avoid
+unfolding a function that is applied only to variables that have no
+unfolding (i.e. they are probably lambda bound): f x y z There is
+little point in inlining f here.
+
+Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
+we must look through lets, eg (let x = e in C a b), because the let will
+float, exposing the value, if we inline. That makes it different to
+exprIsHNF.
+
+Before 2009 we said it was interesting if the argument had *any* structure
+at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016.
+
+But we don't regard (f x y) as interesting, unless f is unsaturated.
+If it's saturated and f hasn't inlined, then it's probably not going
+to now!
+
+Note [Conlike is interesting]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+ f d = ...((*) d x y)...
+ ... f (df d')...
+where df is con-like. Then we'd really like to inline 'f' so that the
+rule for (*) (df d) can fire. To do this
+ a) we give a discount for being an argument of a class-op (eg (*) d)
+ b) we say that a con-like argument (eg (df d)) is interesting
+-}
+
+data ArgSummary = TrivArg -- Nothing interesting
+ | NonTrivArg -- Arg has structure
+ | ValueArg -- Arg is a con-app or PAP
+ -- ..or con-like. Note [Conlike is interesting]
+
+interestingArg :: CoreExpr -> ArgSummary
+-- See Note [Interesting arguments]
+interestingArg e = go e 0
+ where
+ -- n is # value args to which the expression is applied
+ go (Lit {}) _ = ValueArg
+ go (Var v) n
+ | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
+ -- data constructors here
+ | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
+ | n > 0 = NonTrivArg -- Saturated or unknown call
+ | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
+ -- See Note [Conlike is interesting]
+ | otherwise = TrivArg -- n==0, no useful unfolding
+ where
+ conlike_unfolding = isConLikeUnfolding (idUnfolding v)
+
+ go (Type _) _ = TrivArg
+ go (Coercion _) _ = TrivArg
+ go (App fn (Type _)) n = go fn n
+ go (App fn (Coercion _)) n = go fn n
+ go (App fn _) n = go fn (n+1)
+ go (Tick _ a) n = go a n
+ go (Cast e _) n = go e n
+ go (Lam v e) n
+ | isTyVar v = go e n
+ | n>0 = go e (n-1)
+ | otherwise = ValueArg
+ go (Let _ e) n = case go e n of { ValueArg -> ValueArg; _ -> NonTrivArg }
+ go (Case {}) _ = NonTrivArg
+
+nonTriv :: ArgSummary -> Bool
+nonTriv TrivArg = False
+nonTriv _ = True