summaryrefslogtreecommitdiff
path: root/compiler/coreSyn
diff options
context:
space:
mode:
authorDimitrios Vytiniotis <dimitris@microsoft.com>2011-11-16 15:28:43 +0000
committerDimitrios Vytiniotis <dimitris@microsoft.com>2011-11-16 15:28:43 +0000
commit7ec5404a3fd277251a1ab353aa398adfc02b6d34 (patch)
tree78ff33800fad55d7dbb4e1b1732d4f82c4e092a2 /compiler/coreSyn
parentdb892577a2effc2266533e355dad2c40f9fd3be1 (diff)
parent1bbb89f3ab009367fcca84b73b351ddcf5be16a4 (diff)
downloadhaskell-ghc-constraint-solver.tar.gz
Merge branch 'master' of http://darcs.haskell.org/ghc into ghc-constraint-solverghc-constraint-solver
Diffstat (limited to 'compiler/coreSyn')
-rw-r--r--compiler/coreSyn/CoreArity.lhs193
-rw-r--r--compiler/coreSyn/CoreLint.lhs13
-rw-r--r--compiler/coreSyn/CoreSyn.lhs30
-rw-r--r--compiler/coreSyn/CoreUnfold.lhs106
-rw-r--r--compiler/coreSyn/CoreUtils.lhs84
5 files changed, 268 insertions, 158 deletions
diff --git a/compiler/coreSyn/CoreArity.lhs b/compiler/coreSyn/CoreArity.lhs
index f8565cb4c8..249861a4e4 100644
--- a/compiler/coreSyn/CoreArity.lhs
+++ b/compiler/coreSyn/CoreArity.lhs
@@ -34,6 +34,7 @@ import TyCon ( isRecursiveTyCon, isClassTyCon )
import Coercion
import BasicTypes
import Unique
+import DynFlags ( DynFlags, DynFlag(..), dopt )
import Outputable
import FastString
import Pair
@@ -128,11 +129,12 @@ exprBotStrictness_maybe :: CoreExpr -> Maybe (Arity, StrictSig)
-- and gives them a suitable strictness signatures. It's used during
-- float-out
exprBotStrictness_maybe e
- = case getBotArity (arityType is_cheap e) of
+ = case getBotArity (arityType env e) of
Nothing -> Nothing
Just ar -> Just (ar, mkStrictSig (mkTopDmdType (replicate ar topDmd) BotRes))
where
- is_cheap _ _ = False -- Irrelevant for this purpose
+ env = AE { ae_bndrs = [], ae_ped_bot = True, ae_cheap_fn = \ _ _ -> False }
+ -- For this purpose we can be very simple
\end{code}
Note [exprArity invariant]
@@ -251,34 +253,33 @@ Or, to put it another way, in any context C
It's all a bit more subtle than it looks:
-Note [Arity of case expressions]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We treat the arity of
- case x of p -> \s -> ...
-as 1 (or more) because for I/O ish things we really want to get that
-\s to the top. We are prepared to evaluate x each time round the loop
-in order to get that.
+Note [One-shot lambdas]
+~~~~~~~~~~~~~~~~~~~~~~~
+Consider one-shot lambdas
+ let x = expensive in \y z -> E
+We want this to have arity 1 if the \y-abstraction is a 1-shot lambda.
+
+Note [Dealing with bottom]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+A Big Deal with computing arities is expressions like
+
+ f = \x -> case x of
+ True -> \s -> e1
+ False -> \s -> e2
+
+This happens all the time when f :: Bool -> IO ()
+In this case we do eta-expand, in order to get that \s to the
+top, and give f arity 2.
This isn't really right in the presence of seq. Consider
- f = \x -> case x of
- True -> \y -> x+y
- False -> \y -> x-y
-Can we eta-expand here? At first the answer looks like "yes of course", but
-consider
(f bot) `seq` 1
-This should diverge! But if we eta-expand, it won't. Again, we ignore this
-"problem", because being scrupulous would lose an important transformation for
-many programs.
-1. Note [One-shot lambdas]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider one-shot lambdas
- let x = expensive in \y z -> E
-We want this to have arity 1 if the \y-abstraction is a 1-shot lambda.
+This should diverge! But if we eta-expand, it won't. We ignore this
+"problem" (unless -fpedantic-bottoms is on), because being scrupulous
+would lose an important transformation for many programs. (See
+Trac #5587 for an example.)
-3. Note [Dealing with bottom]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider
+Consider also
f = \x -> error "foo"
Here, arity 1 is fine. But if it is
f = \x -> case x of
@@ -290,22 +291,31 @@ should diverge, but it'll converge if we eta-expand f. Nevertheless, we
do so; it improves some programs significantly, and increasing convergence
isn't a bad thing. Hence the ABot/ATop in ArityType.
-However, this really isn't always the Right Thing, and we have several
-tickets reporting unexpected bahaviour resulting from this
-transformation. So we try to limit it as much as possible:
+So these two transformations aren't always the Right Thing, and we
+have several tickets reporting unexpected bahaviour resulting from
+this transformation. So we try to limit it as much as possible:
+
+ (1) Do NOT move a lambda outside a known-bottom case expression
+ case undefined of { (a,b) -> \y -> e }
+ This showed up in Trac #5557
- * Do NOT move a lambda outside a known-bottom case expression
- case undefined of { (a,b) -> \y -> e }
- This showed up in Trac #5557
+ (2) Do NOT move a lambda outside a case if all the branches of
+ the case are known to return bottom.
+ case x of { (a,b) -> \y -> error "urk" }
+ This case is less important, but the idea is that if the fn is
+ going to diverge eventually anyway then getting the best arity
+ isn't an issue, so we might as well play safe
- * Do NOT move a lambda outside a case if all the branches of
- the case are known to return bottom.
- case x of { (a,b) -> \y -> error "urk" }
- This case is less important, but the idea is that if the fn is
- going to diverge eventually anyway then getting the best arity
- isn't an issue, so we might as well play safe
+ (3) Do NOT move a lambda outside a case unless
+ (a) The scrutinee is ok-for-speculation, or
+ (b) There is an enclosing value \x, and the scrutinee is x
+ E.g. let x = case y of ( DEFAULT -> \v -> blah }
+ We don't move the \y out. This is pretty arbitrary; but it
+ catches the common case of doing `seq` on y.
+ This is the reason for the under_lam argument to arityType.
+ See Trac #5625
-Of course both these are readily defeated by disguising the bottoms.
+Of course both (1) and (2) are readily defeated by disguising the bottoms.
4. Note [Newtype arity]
~~~~~~~~~~~~~~~~~~~~~~~~
@@ -463,17 +473,21 @@ vanillaArityType = ATop [] -- Totally uninformative
-- ^ The Arity returned is the number of value args the
-- expression can be applied to without doing much work
-exprEtaExpandArity :: CheapFun -> CoreExpr -> Arity
+exprEtaExpandArity :: DynFlags -> CheapAppFun -> CoreExpr -> Arity
-- exprEtaExpandArity is used when eta expanding
-- e ==> \xy -> e x y
-exprEtaExpandArity cheap_fun e
- = case (arityType cheap_fun e) of
+exprEtaExpandArity dflags cheap_app e
+ = case (arityType env e) of
ATop (os:oss)
| os || has_lam e -> 1 + length oss -- Note [Eta expanding thunks]
| otherwise -> 0
ATop [] -> 0
ABot n -> n
where
+ env = AE { ae_bndrs = []
+ , ae_cheap_fn = mk_cheap_fn dflags cheap_app
+ , ae_ped_bot = dopt Opt_PedanticBottoms dflags }
+
has_lam (Tick _ e) = has_lam e
has_lam (Lam b e) = isId b || has_lam e
has_lam _ = False
@@ -482,8 +496,40 @@ getBotArity :: ArityType -> Maybe Arity
-- Arity of a divergent function
getBotArity (ABot n) = Just n
getBotArity _ = Nothing
+
+mk_cheap_fn :: DynFlags -> CheapAppFun -> CheapFun
+mk_cheap_fn dflags cheap_app
+ | not (dopt Opt_DictsCheap dflags)
+ = \e _ -> exprIsCheap' cheap_app e
+ | otherwise
+ = \e mb_ty -> exprIsCheap' cheap_app e
+ || case mb_ty of
+ Nothing -> False
+ Just ty -> isDictLikeTy ty
\end{code}
+Note [Eta expanding through dictionaries]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If the experimental -fdicts-cheap flag is on, we eta-expand through
+dictionary bindings. This improves arities. Thereby, it also
+means that full laziness is less prone to floating out the
+application of a function to its dictionary arguments, which
+can thereby lose opportunities for fusion. Example:
+ foo :: Ord a => a -> ...
+ foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
+ -- So foo has arity 1
+
+ f = \x. foo dInt $ bar x
+
+The (foo DInt) is floated out, and makes ineffective a RULE
+ foo (bar x) = ...
+
+One could go further and make exprIsCheap reply True to any
+dictionary-typed expression, but that's more work.
+
+See Note [Dictionary-like types] in TcType.lhs for why we use
+isDictLikeTy here rather than isDictTy
+
Note [Eta expanding thunks]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
When we see
@@ -558,10 +604,17 @@ type CheapFun = CoreExpr -> Maybe Type -> Bool
-- If the Maybe is Just, the type is the type
-- of the expression; Nothing means "don't know"
-arityType :: CheapFun -> CoreExpr -> ArityType
+data ArityEnv
+ = AE { ae_bndrs :: [Id] -- Enclosing value-lambda Ids
+ -- See Note [Dealing with bottom (3)]
+ , ae_cheap_fn :: CheapFun
+ , ae_ped_bot :: Bool -- True <=> be pedantic about bottoms
+ }
+
+arityType :: ArityEnv -> CoreExpr -> ArityType
-arityType cheap_fn (Cast e co)
- = case arityType cheap_fn e of
+arityType env (Cast e co)
+ = case arityType env e of
ATop os -> ATop (take co_arity os)
ABot n -> ABot (n `min` co_arity)
where
@@ -586,15 +639,20 @@ arityType _ (Var v)
one_shots = typeArity (idType v)
-- Lambdas; increase arity
-arityType cheap_fn (Lam x e)
- | isId x = arityLam x (arityType cheap_fn e)
- | otherwise = arityType cheap_fn e
+arityType env (Lam x e)
+ | isId x = arityLam x (arityType env' e)
+ | otherwise = arityType env e
+ where
+ env' = env { ae_bndrs = x : ae_bndrs env }
-- Applications; decrease arity, except for types
-arityType cheap_fn (App fun (Type _))
- = arityType cheap_fn fun
-arityType cheap_fn (App fun arg )
- = arityApp (arityType cheap_fn fun) (cheap_fn arg Nothing)
+arityType env (App fun (Type _))
+ = arityType env fun
+arityType env (App fun arg )
+ = arityApp (arityType env' fun) (ae_cheap_fn env arg Nothing)
+ where
+ env' = env { ae_bndrs = case ae_bndrs env of
+ { [] -> []; (_:xs) -> xs } }
-- Case/Let; keep arity if either the expression is cheap
-- or it's a 1-shot lambda
@@ -604,31 +662,40 @@ arityType cheap_fn (App fun arg )
-- f x y = case x of { (a,b) -> e }
-- The difference is observable using 'seq'
--
-arityType cheap_fn (Case scrut _ _ alts)
+arityType env (Case scrut _ _ alts)
| exprIsBottom scrut
= ABot 0 -- Do not eta expand
- -- See Note [Dealing with bottom]
+ -- See Note [Dealing with bottom (1)]
| otherwise
= case alts_type of
ABot n | n>0 -> ATop [] -- Don't eta expand
| otherwise -> ABot 0 -- if RHS is bottomming
- -- See Note [Dealing with bottom]
- ATop as | exprIsTrivial scrut -> ATop as
- | otherwise -> ATop (takeWhile id as)
+ -- See Note [Dealing with bottom (2)]
+
+ ATop as | not (ae_ped_bot env) -- Check -fpedantic-bottoms
+ , is_under scrut -> ATop as
+ | exprOkForSpeculation scrut -> ATop as
+ | otherwise -> ATop (takeWhile id as)
where
- alts_type = foldr1 andArityType [arityType cheap_fn rhs | (_,_,rhs) <- alts]
+ -- is_under implements Note [Dealing with bottom (3)]
+ is_under (Var f) = f `elem` ae_bndrs env
+ is_under (App f (Type {})) = is_under f
+ is_under (Cast f _) = is_under f
+ is_under _ = False
+
+ alts_type = foldr1 andArityType [arityType env rhs | (_,_,rhs) <- alts]
-arityType cheap_fn (Let b e)
- = floatIn (cheap_bind b) (arityType cheap_fn e)
+arityType env (Let b e)
+ = floatIn (cheap_bind b) (arityType env e)
where
cheap_bind (NonRec b e) = is_cheap (b,e)
cheap_bind (Rec prs) = all is_cheap prs
- is_cheap (b,e) = cheap_fn e (Just (idType b))
+ is_cheap (b,e) = ae_cheap_fn env e (Just (idType b))
-arityType cheap_fn (Tick t e)
- | not (tickishIsCode t) = arityType cheap_fn e
+arityType env (Tick t e)
+ | not (tickishIsCode t) = arityType env e
-arityType _ _ = vanillaArityType
+arityType _ _ = vanillaArityType
\end{code}
diff --git a/compiler/coreSyn/CoreLint.lhs b/compiler/coreSyn/CoreLint.lhs
index 7bd61fa351..77747aabf3 100644
--- a/compiler/coreSyn/CoreLint.lhs
+++ b/compiler/coreSyn/CoreLint.lhs
@@ -41,7 +41,6 @@ import Kind
import Type
import TypeRep
import TyCon
-import TcType
import BasicTypes
import StaticFlags
import ListSetOps
@@ -562,12 +561,12 @@ lintCoreAlt _ alt_ty (DEFAULT, args, rhs) =
; checkAltExpr rhs alt_ty }
lintCoreAlt scrut_ty alt_ty (LitAlt lit, args, rhs)
- | isIntegerTy scrut_ty
- = failWithL integerScrutinisedMsg
+ | litIsLifted lit
+ = failWithL integerScrutinisedMsg
| otherwise
- = do { checkL (null args) (mkDefaultArgsMsg args)
- ; checkTys lit_ty scrut_ty (mkBadPatMsg lit_ty scrut_ty)
- ; checkAltExpr rhs alt_ty }
+ = do { checkL (null args) (mkDefaultArgsMsg args)
+ ; checkTys lit_ty scrut_ty (mkBadPatMsg lit_ty scrut_ty)
+ ; checkAltExpr rhs alt_ty }
where
lit_ty = literalType lit
@@ -1196,7 +1195,7 @@ mkBadPatMsg con_result_ty scrut_ty
integerScrutinisedMsg :: Message
integerScrutinisedMsg
- = text "In a case alternative, scrutinee type is Integer"
+ = text "In a LitAlt, the literal is lifted (probably Integer)"
mkBadAltMsg :: Type -> CoreAlt -> Message
mkBadAltMsg scrut_ty alt
diff --git a/compiler/coreSyn/CoreSyn.lhs b/compiler/coreSyn/CoreSyn.lhs
index ea0ef2242f..a8dbbceb36 100644
--- a/compiler/coreSyn/CoreSyn.lhs
+++ b/compiler/coreSyn/CoreSyn.lhs
@@ -278,11 +278,16 @@ type Arg b = Expr b
type Alt b = (AltCon, [b], Expr b)
-- | A case alternative constructor (i.e. pattern match)
-data AltCon = DataAlt DataCon -- ^ A plain data constructor: @case e of { Foo x -> ... }@.
- -- Invariant: the 'DataCon' is always from a @data@ type, and never from a @newtype@
- | LitAlt Literal -- ^ A literal: @case e of { 1 -> ... }@
- | DEFAULT -- ^ Trivial alternative: @case e of { _ -> ... }@
- deriving (Eq, Ord, Data, Typeable)
+data AltCon
+ = DataAlt DataCon -- ^ A plain data constructor: @case e of { Foo x -> ... }@.
+ -- Invariant: the 'DataCon' is always from a @data@ type, and never from a @newtype@
+
+ | LitAlt Literal -- ^ A literal: @case e of { 1 -> ... }@
+ -- Invariant: always an *unlifted* literal
+ -- See Note [Literal alternatives]
+
+ | DEFAULT -- ^ Trivial alternative: @case e of { _ -> ... }@
+ deriving (Eq, Ord, Data, Typeable)
-- | Binding, used for top level bindings in a module and local bindings in a @let@.
data Bind b = NonRec b (Expr b)
@@ -290,6 +295,21 @@ data Bind b = NonRec b (Expr b)
deriving (Data, Typeable)
\end{code}
+Note [Literal alternatives]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Literal alternatives (LitAlt lit) are always for *un-lifted* literals.
+We have one literal, a literal Integer, that is lifted, and we don't
+allow in a LitAlt, because LitAlt cases don't do any evaluation. Also
+(see Trac #5603) if you say
+ case 3 of
+ S# x -> ...
+ J# _ _ -> ...
+(where S#, J# are the constructors for Integer) we don't want the
+simplifier calling findAlt with argument (LitAlt 3). No no. Integer
+literals are an opaque encoding of an algebraic data type, not of
+an unlifted literal, like all the others.
+
+
-------------------------- CoreSyn INVARIANTS ---------------------------
Note [CoreSyn top-level invariant]
diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs
index 4f1dee3da3..930041dea4 100644
--- a/compiler/coreSyn/CoreUnfold.lhs
+++ b/compiler/coreSyn/CoreUnfold.lhs
@@ -174,8 +174,7 @@ mkUnfolding src top_lvl is_bottoming expr
uf_guidance = guidance }
where
is_cheap = exprIsCheap expr
- (arity, guidance) = calcUnfoldingGuidance is_cheap
- opt_UF_CreationThreshold expr
+ (arity, guidance) = calcUnfoldingGuidance expr
-- Sometimes during simplification, there's a large let-bound thing
-- which has been substituted, and so is now dead; so 'expr' contains
-- two copies of the thing while the occurrence-analysed expression doesn't
@@ -217,14 +216,13 @@ inlineBoringOk e
go _ _ = boringCxtNotOk
calcUnfoldingGuidance
- :: Bool -- True <=> the rhs is cheap, or we want to treat it
- -- as cheap (INLINE things)
- -> Int -- Bomb out if size gets bigger than this
- -> CoreExpr -- Expression to look at
+ :: CoreExpr -- Expression to look at
-> (Arity, UnfoldingGuidance)
-calcUnfoldingGuidance expr_is_cheap bOMB_OUT_SIZE expr
+calcUnfoldingGuidance expr
= case collectBinders expr of { (bndrs, body) ->
let
+ bOMB_OUT_SIZE = opt_UF_CreationThreshold
+ -- Bomb out if size gets bigger than this
val_bndrs = filter isId bndrs
n_val_bndrs = length val_bndrs
@@ -232,8 +230,7 @@ calcUnfoldingGuidance expr_is_cheap bOMB_OUT_SIZE expr
= case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_bndrs body) of
TooBig -> UnfNever
SizeIs size cased_bndrs scrut_discount
- | uncondInline n_val_bndrs (iBox size)
- , expr_is_cheap
+ | uncondInline expr n_val_bndrs (iBox size)
-> UnfWhen unSaturatedOk boringCxtOk -- Note [INLINE for small functions]
| otherwise
-> UnfIfGoodArgs { ug_args = map (discount cased_bndrs) val_bndrs
@@ -278,9 +275,10 @@ Notice that 'x' counts 0, while (f x) counts 2. That's deliberate: there's
a function call to account for. Notice also that constructor applications
are very cheap, because exposing them to a caller is so valuable.
-[25/5/11] All sizes are now multiplied by 10, except for primops.
-This makes primops look cheap, and seems to be almost unversally
-beneficial. Done partly as a result of #4978.
+[25/5/11] All sizes are now multiplied by 10, except for primops
+(which have sizes like 1 or 4. This makes primops look fantastically
+cheap, and seems to be almost unversally beneficial. Done partly as a
+result of #4978.
Note [Do not inline top-level bottoming functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -289,7 +287,6 @@ and similar friends. See Note [Bottoming floats] in SetLevels.
Do not re-inline them! But we *do* still inline if they are very small
(the uncondInline stuff).
-
Note [INLINE for small functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider {-# INLINE f #-}
@@ -302,43 +299,54 @@ inline unconditionally, regardless of how boring the context is.
Things to note:
- * We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
- than the thing it's replacing. Notice that
+(1) We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
+ than the thing it's replacing. Notice that
(f x) --> (g 3) -- YES, unconditionally
(f x) --> x : [] -- YES, *even though* there are two
-- arguments to the cons
x --> g 3 -- NO
x --> Just v -- NO
- It's very important not to unconditionally replace a variable by
- a non-atomic term.
-
-* We do this even if the thing isn't saturated, else we end up with the
- silly situation that
- f x y = x
- ...map (f 3)...
- doesn't inline. Even in a boring context, inlining without being
- saturated will give a lambda instead of a PAP, and will be more
- efficient at runtime.
-
-* However, when the function's arity > 0, we do insist that it
- has at least one value argument at the call site. Otherwise we find this:
- f = /\a \x:a. x
- d = /\b. MkD (f b)
- If we inline f here we get
- d = /\b. MkD (\x:b. x)
- and then prepareRhs floats out the argument, abstracting the type
- variables, so we end up with the original again!
-
+ It's very important not to unconditionally replace a variable by
+ a non-atomic term.
+
+(2) We do this even if the thing isn't saturated, else we end up with the
+ silly situation that
+ f x y = x
+ ...map (f 3)...
+ doesn't inline. Even in a boring context, inlining without being
+ saturated will give a lambda instead of a PAP, and will be more
+ efficient at runtime.
+
+(3) However, when the function's arity > 0, we do insist that it
+ has at least one value argument at the call site. (This check is
+ made in the UnfWhen case of callSiteInline.) Otherwise we find this:
+ f = /\a \x:a. x
+ d = /\b. MkD (f b)
+ If we inline f here we get
+ d = /\b. MkD (\x:b. x)
+ and then prepareRhs floats out the argument, abstracting the type
+ variables, so we end up with the original again!
+
+(4) We must be much more cautious about arity-zero things. Consider
+ let x = y +# z in ...
+ In *size* terms primops look very small, because the generate a
+ single instruction, but we do not want to unconditionally replace
+ every occurrence of x with (y +# z). So we only do the
+ unconditional-inline thing for *trivial* expressions.
+
+ NB: you might think that PostInlineUnconditionally would do this
+ but it doesn't fire for top-level things; see SimplUtils
+ Note [Top level and postInlineUnconditionally]
\begin{code}
-uncondInline :: Arity -> Int -> Bool
+uncondInline :: CoreExpr -> Arity -> Int -> Bool
-- Inline unconditionally if there no size increase
-- Size of call is arity (+1 for the function)
-- See Note [INLINE for small functions]
-uncondInline arity size
- | arity == 0 = size == 0
- | otherwise = size <= 10 * (arity + 1)
+uncondInline rhs arity size
+ | arity > 0 = size <= 10 * (arity + 1) -- See Note [INLINE for small functions] (1)
+ | otherwise = exprIsTrivial rhs -- See Note [INLINE for small functions] (4)
\end{code}
@@ -747,17 +755,28 @@ smallEnoughToInline _
----------------
certainlyWillInline :: Unfolding -> Bool
-- Sees if the unfolding is pretty certain to inline
-certainlyWillInline (CoreUnfolding { uf_is_cheap = is_cheap, uf_arity = n_vals, uf_guidance = guidance })
+certainlyWillInline (CoreUnfolding { uf_arity = n_vals, uf_guidance = guidance })
= case guidance of
UnfNever -> False
UnfWhen {} -> True
UnfIfGoodArgs { ug_size = size}
- -> is_cheap && size - (10 * (n_vals +1)) <= opt_UF_UseThreshold
+ -> n_vals > 0 -- See Note [certainlyWillInline: be caseful of thunks]
+ && size - (10 * (n_vals +1)) <= opt_UF_UseThreshold
certainlyWillInline _
= False
\end{code}
+Note [certainlyWillInline: be caseful of thunks]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Don't claim that thunks will certainly inline, because that risks work
+duplication. Even if the work duplication is not great (eg is_cheap
+holds), it can make a big difference in an inner loop In Trac #5623 we
+found that the WorkWrap phase thought that
+ y = case x of F# v -> F# (v +# v)
+was certainlyWillInline, so the addition got duplicated.
+
+
%************************************************************************
%* *
\subsection{callSiteInline}
@@ -894,7 +913,7 @@ tryUnfolding dflags id lone_variable
UnfWhen unsat_ok boring_ok
-> (enough_args && (boring_ok || some_benefit), empty )
- where -- See Note [INLINE for small functions]
+ where -- See Note [INLINE for small functions (3)]
enough_args = saturated || (unsat_ok && n_val_args > 0)
UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
@@ -1084,7 +1103,8 @@ to be cheap, and that's good because exprIsConApp_maybe doesn't
think that expression is a constructor application.
I used to test is_value rather than is_cheap, which was utterly
-wrong, because the above expression responds True to exprIsHNF.
+wrong, because the above expression responds True to exprIsHNF,
+which is what sets is_value.
This kind of thing can occur if you have
diff --git a/compiler/coreSyn/CoreUtils.lhs b/compiler/coreSyn/CoreUtils.lhs
index f0aa71133c..27026b2353 100644
--- a/compiler/coreSyn/CoreUtils.lhs
+++ b/compiler/coreSyn/CoreUtils.lhs
@@ -729,6 +729,7 @@ it's applied only to dictionaries.
%************************************************************************
\begin{code}
+-----------------------------
-- | 'exprOkForSpeculation' returns True of an expression that is:
--
-- * Safe to evaluate even if normal order eval might not
@@ -769,12 +770,8 @@ exprOkForSpeculation :: Expr b -> Bool
exprOkForSpeculation (Lit _) = True
exprOkForSpeculation (Type _) = True
exprOkForSpeculation (Coercion _) = True
-
-exprOkForSpeculation (Var v)
- = isUnLiftedType (idType v) -- c.f. the Var case of exprIsHNF
- || isDataConWorkId v -- Nullary constructors
- || idArity v > 0 -- Functions
- || isEvaldUnfolding (idUnfolding v) -- Let-bound values
+exprOkForSpeculation (Var v) = appOkForSpeculation v []
+exprOkForSpeculation (Cast e _) = exprOkForSpeculation e
-- Tick annotations that *tick* cannot be speculated, because these
-- are meant to identify whether or not (and how often) the particular
@@ -783,8 +780,6 @@ exprOkForSpeculation (Tick tickish e)
| tickishCounts tickish = False
| otherwise = exprOkForSpeculation e
-exprOkForSpeculation (Cast e _) = exprOkForSpeculation e
-
exprOkForSpeculation (Case e _ _ alts)
= exprOkForSpeculation e -- Note [exprOkForSpeculation: case expressions]
&& all (\(_,_,rhs) -> exprOkForSpeculation rhs) alts
@@ -792,37 +787,46 @@ exprOkForSpeculation (Case e _ _ alts)
exprOkForSpeculation other_expr
= case collectArgs other_expr of
- (Var f, args) -> spec_ok (idDetails f) args
+ (Var f, args) -> appOkForSpeculation f args
_ -> False
- where
- spec_ok (DataConWorkId _) _
- = True -- The strictness of the constructor has already
+-----------------------------
+appOkForSpeculation :: Id -> [Expr b] -> Bool
+appOkForSpeculation fun args
+ = case idDetails fun of
+ DFunId new_type -> not new_type
+ -- DFuns terminate, unless the dict is implemented
+ -- with a newtype in which case they may not
+
+ DataConWorkId {} -> True
+ -- The strictness of the constructor has already
-- been expressed by its "wrapper", so we don't need
-- to take the arguments into account
- spec_ok (PrimOpId op) args
- | isDivOp op, -- Special case for dividing operations that fail
- [arg1, Lit lit] <- args -- only if the divisor is zero
- = not (isZeroLit lit) && exprOkForSpeculation arg1
- -- Often there is a literal divisor, and this
- -- can get rid of a thunk in an inner looop
-
- | DataToTagOp <- op -- See Note [dataToTag speculation]
- = True
-
- | otherwise
- = primOpOkForSpeculation op &&
- all exprOkForSpeculation args
- -- A bit conservative: we don't really need
- -- to care about lazy arguments, but this is easy
-
- spec_ok (DFunId new_type) _ = not new_type
- -- DFuns terminate, unless the dict is implemented with a newtype
- -- in which case they may not
-
- spec_ok _ _ = False
-
+ PrimOpId op
+ | isDivOp op -- Special case for dividing operations that fail
+ , [arg1, Lit lit] <- args -- only if the divisor is zero
+ -> not (isZeroLit lit) && exprOkForSpeculation arg1
+ -- Often there is a literal divisor, and this
+ -- can get rid of a thunk in an inner looop
+
+ | DataToTagOp <- op -- See Note [dataToTag speculation]
+ -> True
+
+ | otherwise
+ -> primOpOkForSpeculation op &&
+ all exprOkForSpeculation args
+ -- A bit conservative: we don't really need
+ -- to care about lazy arguments, but this is easy
+
+ _other -> isUnLiftedType (idType fun) -- c.f. the Var case of exprIsHNF
+ || idArity fun > n_val_args -- Partial apps
+ || (n_val_args ==0 &&
+ isEvaldUnfolding (idUnfolding fun)) -- Let-bound values
+ where
+ n_val_args = valArgCount args
+
+-----------------------------
altsAreExhaustive :: [Alt b] -> Bool
-- True <=> the case alterantives are definiely exhaustive
-- False <=> they may or may not be
@@ -991,19 +995,19 @@ exprIsHNFlike is_con is_con_unf = is_hnf_like
-- we could get an infinite loop
is_hnf_like (Lit _) = True
- is_hnf_like (Type _) = True -- Types are honorary Values;
+ is_hnf_like (Type _) = True -- Types are honorary Values;
-- we don't mind copying them
is_hnf_like (Coercion _) = True -- Same for coercions
is_hnf_like (Lam b e) = isRuntimeVar b || is_hnf_like e
is_hnf_like (Tick tickish e) = not (tickishCounts tickish)
&& is_hnf_like e
-- See Note [exprIsHNF Tick]
- is_hnf_like (Cast e _) = is_hnf_like e
- is_hnf_like (App e (Type _)) = is_hnf_like e
+ is_hnf_like (Cast e _) = is_hnf_like e
+ is_hnf_like (App e (Type _)) = is_hnf_like e
is_hnf_like (App e (Coercion _)) = is_hnf_like e
- is_hnf_like (App e a) = app_is_value e [a]
- is_hnf_like (Let _ e) = is_hnf_like e -- Lazy let(rec)s don't affect us
- is_hnf_like _ = False
+ is_hnf_like (App e a) = app_is_value e [a]
+ is_hnf_like (Let _ e) = is_hnf_like e -- Lazy let(rec)s don't affect us
+ is_hnf_like _ = False
-- There is at least one value argument
app_is_value :: CoreExpr -> [CoreArg] -> Bool