diff options
Diffstat (limited to 'compiler/GHC/Core/Opt/Arity.hs')
-rw-r--r-- | compiler/GHC/Core/Opt/Arity.hs | 1273 |
1 files changed, 1012 insertions, 261 deletions
diff --git a/compiler/GHC/Core/Opt/Arity.hs b/compiler/GHC/Core/Opt/Arity.hs index 811beb6c0a..5858ff91e0 100644 --- a/compiler/GHC/Core/Opt/Arity.hs +++ b/compiler/GHC/Core/Opt/Arity.hs @@ -11,16 +11,29 @@ -- | Arity and eta expansion module GHC.Core.Opt.Arity - ( manifestArity, joinRhsArity, exprArity - , typeArity, typeOneShots - , exprEtaExpandArity, findRhsArity - , etaExpand, etaExpandAT - , exprBotStrictness_maybe + ( -- Finding arity + manifestArity, joinRhsArity, exprArity + , findRhsArity, exprBotStrictness_maybe , ArityOpts(..) + -- ** Eta expansion + , exprEtaExpandArity, etaExpand, etaExpandAT + + -- ** Eta reduction + , tryEtaReduce + -- ** ArityType - , ArityType(..), mkBotArityType, mkTopArityType, expandableArityType - , arityTypeArity, maxWithArity, minWithArity, idArityType + , ArityType, mkBotArityType, mkManifestArityType + , arityTypeArity, idArityType, getBotArity + + -- ** typeArity and the state hack + , typeArity, typeOneShots, typeOneShot + , isOneShotBndr + , isStateHackType + + -- * Lambdas + , zapLamBndrs + -- ** Join points , etaExpandToJoinPoint, etaExpandToJoinPointRule @@ -39,7 +52,7 @@ import GHC.Core.Utils import GHC.Core.DataCon import GHC.Core.TyCon ( tyConArity ) import GHC.Core.TyCon.RecWalk ( initRecTc, checkRecTc ) -import GHC.Core.Predicate ( isDictTy, isCallStackPredTy ) +import GHC.Core.Predicate ( isDictTy, isEvVar, isCallStackPredTy ) import GHC.Core.Multiplicity -- We have two sorts of substitution: @@ -50,17 +63,19 @@ import GHC.Core.Type as Type import GHC.Core.Coercion as Type import GHC.Types.Demand -import GHC.Types.Var -import GHC.Types.Var.Env import GHC.Types.Id +import GHC.Types.Var.Env import GHC.Types.Var.Set import GHC.Types.Basic import GHC.Types.Tickish +import GHC.Builtin.Types.Prim import GHC.Builtin.Uniques + import GHC.Data.FastString import GHC.Data.Pair +import GHC.Utils.GlobalVars( unsafeHasNoStateHack ) import GHC.Utils.Constants (debugIsOn) import GHC.Utils.Outputable import GHC.Utils.Panic @@ -123,7 +138,8 @@ exprArity :: CoreExpr -> Arity -- We do /not/ guarantee that exprArity e <= typeArity e -- You may need to do arity trimming after calling exprArity -- See Note [Arity trimming] --- (If we do arity trimming here we have to do it at every cast. +-- Reason: if we do arity trimming here we have take exprType +-- and that can be expensive if there is a large cast exprArity e = go e where go (Var v) = idArity v @@ -139,13 +155,50 @@ exprArity e = go e go _ = 0 --------------- +exprBotStrictness_maybe :: CoreExpr -> Maybe (Arity, DmdSig) +-- A cheap and cheerful function that identifies bottoming functions +-- and gives them a suitable strictness signatures. It's used during +-- float-out +exprBotStrictness_maybe e + = case getBotArity (arityType botStrictnessArityEnv e) of + Nothing -> Nothing + Just ar -> Just (ar, mkVanillaDmdSig ar botDiv) + +{- Note [exprArity for applications] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When we come to an application we check that the arg is trivial. + eg f (fac x) does not have arity 2, + even if f has arity 3! + +* We require that is trivial rather merely cheap. Suppose f has arity 2. + Then f (Just y) + has arity 0, because if we gave it arity 1 and then inlined f we'd get + let v = Just y in \w. <f-body> + which has arity 0. And we try to maintain the invariant that we don't + have arity decreases. + +* The `max 0` is important! (\x y -> f x) has arity 2, even if f is + unknown, hence arity 0 + + +************************************************************************ +* * + typeArity and the "state hack" +* * +********************************************************************* -} + + typeArity :: Type -> Arity +-- ^ (typeArity ty) says how many arrows GHC can expose in 'ty', after +-- looking through newtypes. More generally, (typeOneShots ty) returns +-- ty's [OneShotInfo], based only on the type itself, using typeOneShot +-- on the argument type to access the "state hack". typeArity = length . typeOneShots typeOneShots :: Type -> [OneShotInfo] -- How many value arrows are visible in the type? -- We look through foralls, and newtypes --- See Note [typeArity invariants] +-- See Note [Arity invariants for bindings] typeOneShots ty = go initRecTc ty where @@ -174,64 +227,121 @@ typeOneShots ty | otherwise = [] ---------------- -exprBotStrictness_maybe :: CoreExpr -> Maybe (Arity, DmdSig) --- A cheap and cheerful function that identifies bottoming functions --- and gives them a suitable strictness signatures. It's used during --- float-out -exprBotStrictness_maybe e - = case getBotArity (arityType botStrictnessArityEnv e) of - Nothing -> Nothing - Just ar -> Just (ar, sig ar) - where - sig ar = mkClosedDmdSig (replicate ar topDmd) botDiv +typeOneShot :: Type -> OneShotInfo +typeOneShot ty + | isStateHackType ty = OneShotLam + | otherwise = NoOneShotInfo + +-- | Like 'idOneShotInfo', but taking the Horrible State Hack in to account +-- See Note [The state-transformer hack] in "GHC.Core.Opt.Arity" +idStateHackOneShotInfo :: Id -> OneShotInfo +idStateHackOneShotInfo id + | isStateHackType (idType id) = OneShotLam + | otherwise = idOneShotInfo id + +-- | Returns whether the lambda associated with the 'Id' is +-- certainly applied at most once +-- This one is the "business end", called externally. +-- It works on type variables as well as Ids, returning True +-- Its main purpose is to encapsulate the Horrible State Hack +-- See Note [The state-transformer hack] in "GHC.Core.Opt.Arity" +isOneShotBndr :: Var -> Bool +isOneShotBndr var + | isTyVar var = True + | OneShotLam <- idStateHackOneShotInfo var = True + | otherwise = False + +isStateHackType :: Type -> Bool +isStateHackType ty + | unsafeHasNoStateHack -- Switch off with -fno-state-hack + = False + | otherwise + = case tyConAppTyCon_maybe ty of + Just tycon -> tycon == statePrimTyCon + _ -> False + -- This is a gross hack. It claims that + -- every function over realWorldStatePrimTy is a one-shot + -- function. This is pretty true in practice, and makes a big + -- difference. For example, consider + -- a `thenST` \ r -> ...E... + -- The early full laziness pass, if it doesn't know that r is one-shot + -- will pull out E (let's say it doesn't mention r) to give + -- let lvl = E in a `thenST` \ r -> ...lvl... + -- When `thenST` gets inlined, we end up with + -- let lvl = E in \s -> case a s of (r, s') -> ...lvl... + -- and we don't re-inline E. + -- + -- It would be better to spot that r was one-shot to start with, but + -- I don't want to rely on that. + -- + -- Another good example is in fill_in in PrelPack.hs. We should be able to + -- spot that fill_in has arity 2 (and when Keith is done, we will) but we can't yet. -{- -Note [typeArity invariants] -~~~~~~~~~~~~~~~~~~~~~~~~~~ -We have the following invariants around typeArity - (1) In any binding x = e, - idArity f <= typeArity (idType f) +{- Note [Arity invariants for bindings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We have the following invariants for let-bindings + + (1) In any binding f = e, + idArity f <= typeArity (idType f) + We enforce this with trimArityType, called in findRhsArity; + see Note [Arity trimming]. + + Note that we enforce this only for /bindings/. We do /not/ insist that + arityTypeArity (arityType e) <= typeArity (exprType e) + because that is quite a bit more expensive to guaranteed; it would + mean checking at every Cast in the recursive arityType, for example. (2) If typeArity (exprType e) = n, then manifestArity (etaExpand e n) = n That is, etaExpand can always expand as much as typeArity says - So the case analysis in etaExpand and in typeArity must match + (or less, of course). So the case analysis in etaExpand and in + typeArity must match. -Why is this important? Because + Consequence: because of (1), if we eta-expand to (idArity f), we will + end up with n manifest lambdas. - - In GHC.Iface.Tidy we use exprArity to fix the *final arity* of - each top-level Id, and in + (3) In any binding f = e, + idArity f <= arityTypeArity (safeArityType (arityType e)) + That is, we call safeArityType before attributing e's arityType to f. + See Note [SafeArityType]. - - In CorePrep we use etaExpand on each rhs, so that the visible lambdas - actually match that arity, which in turn means - that the StgRhs has the right number of lambdas + So we call safeArityType in findRhsArity. Suppose we have f :: Int -> Int -> Int f x y = x+y -- Arity 2 g :: F Int - g = case x of { True -> f |> co1 - ; False -> g |> co2 } + g = case <cond> of { True -> f |> co1 + ; False -> g |> co2 } -Now, we can't eta-expand g to have arity 2, because etaExpand, which works -off the /type/ of the expression, doesn't know how to make an eta-expanded -binding +where F is a type family. Now, we can't eta-expand g to have arity 2, +because etaExpand, which works off the /type/ of the expression +(albeit looking through newtypes), doesn't know how to make an +eta-expanded binding g = (\a b. case x of ...) |> co -because can't make up `co` or the types of `a` and `b`. +because it can't make up `co` or the types of `a` and `b`. So invariant (1) ensures that every binding has an arity that is no greater than the typeArity of the RHS; and invariant (2) ensures that etaExpand and handle what typeArity says. +Why is this important? Because + + - In GHC.Iface.Tidy we use exprArity/manifestArity to fix the *final + arity* of each top-level Id, and in + + - In CorePrep we use etaExpand on each rhs, so that the visible + lambdas actually match that arity, which in turn means that the + StgRhs has a number of lambdas that precisely matches the arity. + Note [Arity trimming] ~~~~~~~~~~~~~~~~~~~~~ -Arity trimming, implemented by minWithArity, directly implements -invariant (1) of Note [typeArity invariants]. Failing to do so, and -hence breaking invariant (1) led to #5441. +Invariant (1) of Note [Arity invariants for bindings] is upheld by findRhsArity, +which calls trimArityType to trim the ArityType to match the Arity of the +binding. Failing to do so, and hence breaking invariant (1) led to #5441. How to trim? If we end in topDiv, it's easy. But we must take great care with dead ends (i.e. botDiv). Suppose the expression was (\x y. error "urk"), @@ -293,26 +403,34 @@ trying to *make* it hold, but it's tricky and I gave up. The test simplCore/should_compile/T3722 is an excellent example. -------- End of old out of date comments, just for interest ----------- +-} +{- ******************************************************************** +* * + Zapping lambda binders +* * +********************************************************************* -} -Note [exprArity for applications] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -When we come to an application we check that the arg is trivial. - eg f (fac x) does not have arity 2, - even if f has arity 3! - -* We require that is trivial rather merely cheap. Suppose f has arity 2. - Then f (Just y) - has arity 0, because if we gave it arity 1 and then inlined f we'd get - let v = Just y in \w. <f-body> - which has arity 0. And we try to maintain the invariant that we don't - have arity decreases. +zapLamBndrs :: FullArgCount -> [Var] -> [Var] +-- If (\xyz. t) appears under-applied to only two arguments, +-- we must zap the occ-info on x,y, because they appear (in 't') under the \z. +-- See Note [Occurrence analysis for lambda binders] in GHc.Core.Opt.OccurAnal +-- +-- NB: both `arg_count` and `bndrs` include both type and value args/bndrs +zapLamBndrs arg_count bndrs + | no_need_to_zap = bndrs + | otherwise = zap_em arg_count bndrs + where + no_need_to_zap = all isOneShotBndr (drop arg_count bndrs) -* The `max 0` is important! (\x y -> f x) has arity 2, even if f is - unknown, hence arity 0 + zap_em :: FullArgCount -> [Var] -> [Var] + zap_em 0 bs = bs + zap_em _ [] = [] + zap_em n (b:bs) | isTyVar b = b : zap_em (n-1) bs + | otherwise = zapLamIdInfo b : zap_em (n-1) bs -************************************************************************ +{- ********************************************************************* * * Computing the "arity" of an expression * * @@ -490,34 +608,72 @@ but not to introduce a new lambda. Note [ArityType] ~~~~~~~~~~~~~~~~ +ArityType can be thought of as an abstraction of an expression. +The ArityType + AT [ (IsCheap, NoOneShotInfo) + , (IsExpensive, OneShotLam) + , (IsCheap, OneShotLam) ] Dunno) + +abstracts an expression like + \x. let <expensive> in + \y{os}. + \z{os}. blah + +In general we have (AT lams div). Then +* In lams :: [(Cost,OneShotInfo)] + * The Cost flag describes the part of the expression down + to the first (value) lambda. + * The OneShotInfo flag gives the one-shot info on that lambda. + +* If 'div' is dead-ending ('isDeadEndDiv'), then application to + 'length lams' arguments will surely diverge, similar to the situation + with 'DmdType'. + ArityType is the result of a compositional analysis on expressions, from which we can decide the real arity of the expression (extracted with function exprEtaExpandArity). We use the following notation: - at ::= \o1..on.div + at ::= \p1..pn.div div ::= T | x | ⊥ - o ::= ? | 1 -And omit the \. if n = 0. Examples: - \?11.T stands for @AT [NoOneShotInfo,OneShotLam,OneShotLam] topDiv@ - ⊥ stands for @AT [] botDiv@ + p ::= (c o) + c ::= X | C -- Expensive or Cheap + o ::= ? | 1 -- NotOneShot or OneShotLam +We may omit the \. if n = 0. +And ⊥ stands for `AT [] botDiv` + +Here is an example demonstrating the notation: + \(C?)(X1)(C1).T +stands for + AT [ (IsCheap,NoOneShotInfo) + , (IsExpensive,OneShotLam) + , (IsCheap,OneShotLam) ] + topDiv + See the 'Outputable' instance for more information. It's pretty simple. +How can we use ArityType? Example: + f = \x\y. let v = <expensive> in + \s(one-shot) \t(one-shot). blah + 'f' has arity type \(C?)(C?)(X1)(C1).T + The one-shot-ness means we can, in effect, push that + 'let' inside the \st, and expand to arity 4 + +Suppose f = \xy. x+y +Then f :: \(C?)(C?).T + f v :: \(C?).T + f <expensive> :: \(X?).T + Here is what the fields mean. If an arbitrary expression 'f' has ArityType 'at', then * If @at = AT [o1,..,on] botDiv@ (notation: \o1..on.⊥), then @f x1..xn@ definitely diverges. Partial applications to fewer than n args may *or - may not* diverge. + may not* diverge. Ditto exnDiv. - We allow ourselves to eta-expand bottoming functions, even - if doing so may lose some `seq` sharing, - let x = <expensive> in \y. error (g x y) - ==> \y. let x = <expensive> in error (g x y) - - * If @at = AT [o1,..,on] topDiv@ (notation: \o1..on.T), then expanding 'f' - to @\x1..xn. f x1..xn@ loses no sharing, assuming the calls of f respect - the one-shot-ness o1..on of its definition. + * If `f` has ArityType `at` we can eta-expand `f` to have (aritTypeOneShots at) + arguments without losing sharing. This function checks that the either + there are no expensive expressions, or the lambdas are one-shots. NB 'f' is an arbitrary expression, eg @f = g e1 e2@. This 'f' can have arity type @AT oss _@, with @length oss > 0@, only if e1 e2 are themselves @@ -530,20 +686,45 @@ ArityType 'at', then So eta expansion is dynamically ok; see Note [State hack and bottoming functions], the part about catch# -Example: - f = \x\y. let v = <expensive> in - \s(one-shot) \t(one-shot). blah - 'f' has arity type \??11.T - The one-shot-ness means we can, in effect, push that - 'let' inside the \st. +Wrinkles +* Wrinkle [Bottoming functions]: see function 'arityLam'. + We treat bottoming functions as one-shot, because there is no point + in floating work outside the lambda, and it's fine to float it inside. -Suppose f = \xy. x+y -Then f :: \??.T - f v :: \?.T - f <expensive> :: T --} + For example, this is fine (see test stranal/sigs/BottomFromInnerLambda) + let x = <expensive> in \y. error (g x y) + ==> \y. let x = <expensive> in error (g x y) + Idea: perhaps we could enforce this invariant with + data Arity Type = TopAT [(Cost, OneShotInfo)] | DivAT [Cost] + + +Note [SafeArityType] +~~~~~~~~~~~~~~~~~~~~ +The function safeArityType trims an ArityType to return a "safe" ArityType, +for which we use a type synonym SafeArityType. It is "safe" in the sense +that (arityTypeArity at) really reflects the arity of the expression, whereas +a regular ArityType might have more lambdas in its [ATLamInfo] that the +(cost-free) arity of the expression. + +For example + \x.\y.let v = expensive in \z. blah +has + arityType = AT [C?, C?, X?, C?] Top +But the expression actually has arity 2, not 4, because of the X. +So safeArityType will trim it to (AT [C?, C?] Top), whose [ATLamInfo] +now reflects the (cost-free) arity of the expression + +Why do we ever need an "unsafe" ArityType, such as the example above? +Because its (cost-free) arity may increased by combineWithDemandOneShots +in findRhsArity. See Note [Combining arity type with demand info]. + +Thus the function `arityType` returns a regular "unsafe" ArityType, that +goes deeply into the lambdas (including under IsExpensive). But that is +very local; most ArityTypes are indeed "safe". We use the type synonym +SafeArityType to indicate where we believe the ArityType is safe. +-} -- | The analysis lattice of arity analysis. It is isomorphic to -- @@ -574,22 +755,33 @@ Then f :: \??.T -- -- We rely on this lattice structure for fixed-point iteration in -- 'findRhsArity'. For the semantics of 'ArityType', see Note [ArityType]. -data ArityType - = AT ![OneShotInfo] !Divergence - -- ^ @AT oss div@ means this value can safely be eta-expanded @length oss@ - -- times, provided use sites respect the 'OneShotInfo's in @oss@. - -- A 'OneShotLam' annotation can come from two sources: - -- * The user annotated a lambda as one-shot with 'GHC.Exts.oneShot' - -- * It's from a lambda binder of a type affected by `-fstate-hack`. - -- See 'idStateHackOneShotInfo'. - -- In both cases, 'OneShotLam' should win over 'NoOneShotInfo', see - -- Note [Combining case branches]. - -- - -- If @div@ is dead-ending ('isDeadEndDiv'), then application to - -- @length os@ arguments will surely diverge, similar to the situation - -- with 'DmdType'. +data ArityType -- See Note [ArityType] + = AT ![ATLamInfo] !Divergence + -- ^ `AT oss div` is an abstraction of the expression, which describes + -- its lambdas, and how much work appears where. + -- See Note [ArityType] for more information + -- + -- If `div` is dead-ending ('isDeadEndDiv'), then application to + -- `length os` arguments will surely diverge, similar to the situation + -- with 'DmdType'. deriving Eq +type ATLamInfo = (Cost,OneShotInfo) + -- ^ Info about one lambda in an ArityType + -- See Note [ArityType] + +type SafeArityType = ArityType -- See Note [SafeArityType] + +data Cost = IsCheap | IsExpensive + deriving( Eq ) + +allCosts :: (a -> Cost) -> [a] -> Cost +allCosts f xs = foldr (addCost . f) IsCheap xs + +addCost :: Cost -> Cost -> Cost +addCost IsCheap IsCheap = IsCheap +addCost _ _ = IsExpensive + -- | This is the BNF of the generated output: -- -- @ @@ -608,57 +800,56 @@ instance Outputable ArityType where pp_div Diverges = char '⊥' pp_div ExnOrDiv = char 'x' pp_div Dunno = char 'T' - pp_os OneShotLam = char '1' - pp_os NoOneShotInfo = char '?' + pp_os (IsCheap, OneShotLam) = text "(C1)" + pp_os (IsExpensive, OneShotLam) = text "(X1)" + pp_os (IsCheap, NoOneShotInfo) = text "(C?)" + pp_os (IsExpensive, NoOneShotInfo) = text "(X?)" mkBotArityType :: [OneShotInfo] -> ArityType -mkBotArityType oss = AT oss botDiv +mkBotArityType oss = AT [(IsCheap,os) | os <- oss] botDiv botArityType :: ArityType botArityType = mkBotArityType [] -mkTopArityType :: [OneShotInfo] -> ArityType -mkTopArityType oss = AT oss topDiv +mkManifestArityType :: [OneShotInfo] -> ArityType +mkManifestArityType oss = AT [(IsCheap,os) | os <- oss] topDiv topArityType :: ArityType -topArityType = mkTopArityType [] +topArityType = AT [] topDiv -- | The number of value args for the arity type -arityTypeArity :: ArityType -> Arity -arityTypeArity (AT oss _) = length oss - --- | True <=> eta-expansion will add at least one lambda -expandableArityType :: ArityType -> Bool -expandableArityType at = arityTypeArity at > 0 - --- | See Note [Dead ends] in "GHC.Types.Demand". --- Bottom implies a dead end. -isDeadEndArityType :: ArityType -> Bool -isDeadEndArityType (AT _ div) = isDeadEndDiv div - ------------------------ -infixl 2 `maxWithArity`, `minWithArity` - --- | Expand a non-bottoming arity type so that it has at least the given arity. -maxWithArity :: ArityType -> Arity -> ArityType -maxWithArity at@(AT oss div) !ar - | isDeadEndArityType at = at - | oss `lengthAtLeast` ar = at - | otherwise = AT (take ar $ oss ++ repeat NoOneShotInfo) div - --- | Trim an arity type so that it has at most the given arity. --- Any excess 'OneShotInfo's are truncated to 'topDiv', even if they end in --- 'ABot'. See Note [Arity trimming] -minWithArity :: ArityType -> Arity -> ArityType -minWithArity at@(AT oss _) ar - | oss `lengthAtMost` ar = at - | otherwise = AT (take ar oss) topDiv - ----------------------- -takeWhileOneShot :: ArityType -> ArityType -takeWhileOneShot (AT oss div) - | isDeadEndDiv div = AT (takeWhile isOneShotInfo oss) topDiv - | otherwise = AT (takeWhile isOneShotInfo oss) div +arityTypeArity :: SafeArityType -> Arity +arityTypeArity (AT lams _) = length lams + +arityTypeOneShots :: SafeArityType -> [OneShotInfo] +-- Returns a list only as long as the arity should be +arityTypeOneShots (AT lams _) = map snd lams + +safeArityType :: ArityType -> SafeArityType +-- ^ Assuming this ArityType is all we know, find the arity of +-- the function, and trim the argument info (and Divergenge) +-- to match that arity. See Note [SafeArityType] +safeArityType at@(AT lams _) + = case go 0 IsCheap lams of + Nothing -> at -- No trimming needed + Just ar -> AT (take ar lams) topDiv + where + go :: Arity -> Cost -> [(Cost,OneShotInfo)] -> Maybe Arity + go _ _ [] = Nothing + go ar ch1 ((ch2,os):lams) + = case (ch1 `addCost` ch2, os) of + (IsExpensive, NoOneShotInfo) -> Just ar + (ch, _) -> go (ar+1) ch lams + +infixl 2 `trimArityType` + +trimArityType :: Arity -> ArityType -> ArityType +-- ^ Trim an arity type so that it has at most the given arity. +-- Any excess 'OneShotInfo's are truncated to 'topDiv', even if +-- they end in 'ABot'. See Note [Arity trimming] +trimArityType max_arity at@(AT lams _) + | lams `lengthAtMost` max_arity = at + | otherwise = AT (take max_arity lams) topDiv data ArityOpts = ArityOpts { ao_ped_bot :: !Bool -- See Note [Dealing with bottom] @@ -667,10 +858,17 @@ data ArityOpts = ArityOpts -- | The Arity returned is the number of value args the -- expression can be applied to without doing much work -exprEtaExpandArity :: ArityOpts -> CoreExpr -> ArityType +exprEtaExpandArity :: ArityOpts -> CoreExpr -> Maybe SafeArityType -- exprEtaExpandArity is used when eta expanding -- e ==> \xy -> e x y -exprEtaExpandArity opts e = arityType (etaExpandArityEnv opts) e +-- Nothing if the expression has arity 0 +exprEtaExpandArity opts e + | AT [] _ <- arity_type + = Nothing + | otherwise + = Just arity_type + where + arity_type = safeArityType (arityType (etaExpandArityEnv opts) e) getBotArity :: ArityType -> Maybe Arity -- Arity of a divergent function @@ -678,29 +876,54 @@ getBotArity (AT oss div) | isDeadEndDiv div = Just $ length oss | otherwise = Nothing ----------------------- -findRhsArity :: ArityOpts -> RecFlag -> Id -> CoreExpr -> Arity -> ArityType + +{- ********************************************************************* +* * + findRhsArity +* * +********************************************************************* -} + +findRhsArity :: ArityOpts -> RecFlag -> Id -> CoreExpr -> Arity -> SafeArityType -- This implements the fixpoint loop for arity analysis -- See Note [Arity analysis] -- If findRhsArity e = (n, is_bot) then -- (a) any application of e to <n arguments will not do much work, -- so it is safe to expand e ==> (\x1..xn. e x1 .. xn) -- (b) if is_bot=True, then e applied to n args is guaranteed bottom - -findRhsArity opts NonRecursive _ rhs _ - = arityType (findRhsArityEnv opts) rhs - -findRhsArity opts Recursive bndr rhs old_arity - = go 0 botArityType - -- We always do one step, but usually that produces a result equal to - -- old_arity, and then we stop right away, because old_arity is assumed - -- to be sound. In other words, arities should never decrease. - -- Result: the common case is that there is just one iteration +-- +-- Returns an ArityType that is guaranteed trimmed to typeArity of 'bndr' +-- See Note [Arity trimming] +findRhsArity opts is_rec bndr rhs old_arity + = case is_rec of + Recursive -> go 0 botArityType + NonRecursive -> step init_env where - go :: Int -> ArityType -> ArityType - go !n cur_at@(AT oss div) + init_env :: ArityEnv + init_env = findRhsArityEnv opts + + ty_arity = typeArity (idType bndr) + id_one_shots = idDemandOneShots bndr + + step :: ArityEnv -> SafeArityType + step env = trimArityType ty_arity $ + safeArityType $ -- See Note [Arity invariants for bindings], item (3) + arityType env rhs `combineWithDemandOneShots` id_one_shots + -- trimArityType: see Note [Trim arity inside the loop] + -- combineWithDemandOneShots: take account of the demand on the + -- binder. Perhaps it is always called with 2 args + -- let f = \x. blah in (f 3 4, f 1 9) + -- f's demand-info says how many args it is called with + + -- The fixpoint iteration (go), done for recursive bindings. We + -- always do one step, but usually that produces a result equal + -- to old_arity, and then we stop right away, because old_arity + -- is assumed to be sound. In other words, arities should never + -- decrease. Result: the common case is that there is just one + -- iteration + go :: Int -> SafeArityType -> SafeArityType + go !n cur_at@(AT lams div) | not (isDeadEndDiv div) -- the "stop right away" case - , length oss <= old_arity = cur_at -- from above + , length lams <= old_arity = cur_at -- from above | next_at == cur_at = cur_at | otherwise = -- Warn if more than 2 iterations. Why 2? See Note [Exciting arity] @@ -709,20 +932,49 @@ findRhsArity opts Recursive bndr rhs old_arity (nest 2 (ppr bndr <+> ppr cur_at <+> ppr next_at $$ ppr rhs)) $ go (n+1) next_at where - next_at = step cur_at - - step :: ArityType -> ArityType - step at = -- pprTrace "step" (vcat [ ppr bndr <+> ppr at <+> ppr (arityType env rhs) - -- , ppr (idType bndr) - -- , ppr (typeArity (idType bndr)) ]) $ - arityType env rhs - where - env = extendSigEnv (findRhsArityEnv opts) bndr at + next_at = step (extendSigEnv init_env bndr cur_at) +infixl 2 `combineWithDemandOneShots` -{- -Note [Arity analysis] -~~~~~~~~~~~~~~~~~~~~~ +combineWithDemandOneShots :: ArityType -> [OneShotInfo] -> ArityType +-- See Note [Combining arity type with demand info] +combineWithDemandOneShots at@(AT lams div) oss + | null lams = at + | otherwise = AT (zip_lams lams oss) div + where + zip_lams :: [ATLamInfo] -> [OneShotInfo] -> [ATLamInfo] + zip_lams lams [] = lams + zip_lams [] oss = [ (IsExpensive,OneShotLam) + | _ <- takeWhile isOneShotInfo oss] + zip_lams ((ch,os1):lams) (os2:oss) + = (ch, os1 `bestOneShot` os2) : zip_lams lams oss + +idDemandOneShots :: Id -> [OneShotInfo] +idDemandOneShots bndr + = call_arity_one_shots `zip_lams` dmd_one_shots + where + call_arity_one_shots :: [OneShotInfo] + call_arity_one_shots + | call_arity == 0 = [] + | otherwise = NoOneShotInfo : replicate (call_arity-1) OneShotLam + -- Call Arity analysis says the function is always called + -- applied to this many arguments. The first NoOneShotInfo is because + -- if Call Arity says "always applied to 3 args" then the one-shot info + -- we get is [NoOneShotInfo, OneShotLam, OneShotLam] + call_arity = idCallArity bndr + + dmd_one_shots :: [OneShotInfo] + -- If the demand info is Cx(C1(C1(.))) then we know that an + -- application to one arg is also an application to three + dmd_one_shots = argOneShots (idDemandInfo bndr) + + -- Take the *longer* list + zip_lams (lam1:lams1) (lam2:lams2) = (lam1 `bestOneShot` lam2) : zip_lams lams1 lams2 + zip_lams [] lams2 = lams2 + zip_lams lams1 [] = lams1 + +{- Note [Arity analysis] +~~~~~~~~~~~~~~~~~~~~~~~~ The motivating example for arity analysis is this: f = \x. let g = f (x+1) @@ -784,57 +1036,118 @@ to floatIn the non-cheap let-binding. Which is all perfectly benign, but means we do two iterations (well, actually 3 'step's to detect we are stable) and don't want to emit the warning. -Note [Eta expanding through dictionaries] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If the experimental -fdicts-cheap flag is on, we eta-expand through -dictionary bindings. This improves arities. Thereby, it also -means that full laziness is less prone to floating out the -application of a function to its dictionary arguments, which -can thereby lose opportunities for fusion. Example: - foo :: Ord a => a -> ... - foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). .... - -- So foo has arity 1 +Note [Trim arity inside the loop] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Here's an example (from gadt/nbe.hs) which caused trouble. + data Exp g t where + Lam :: Ty a -> Exp (g,a) b -> Exp g (a->b) - f = \x. foo dInt $ bar x + eval :: Exp g t -> g -> t + eval (Lam _ e) g = \a -> eval e (g,a) -The (foo DInt) is floated out, and makes ineffective a RULE - foo (bar x) = ... +The danger is that we get arity 3 from analysing this; and the +next time arity 4, and so on for ever. Solution: use trimArityType +on each iteration. -One could go further and make exprIsCheap reply True to any -dictionary-typed expression, but that's more work. +Note [Combining arity type with demand info] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + let f = \x. let y = <expensive> in \p \q{os}. blah + in ...(f a b)...(f c d)... + +* From the RHS we get an ArityType like + AT [ (IsCheap,?), (IsExpensive,?), (IsCheap,OneShotLam) ] Dunno + where "?" means NoOneShotInfo + +* From the body, the demand analyser (or Call Arity) will tell us + that the function is always applied to at least two arguments. + +Combining these two pieces of info, we can get the final ArityType + AT [ (IsCheap,?), (IsExpensive,OneShotLam), (IsCheap,OneShotLam) ] Dunno +result: arity=3, which is better than we could do from either +source alone. + +The "combining" part is done by combineWithDemandOneShots. It +uses info from both Call Arity and demand analysis. + +We may have /more/ call demands from the calls than we have lambdas +in the binding. E.g. + let f1 = \x. g x x in ...(f1 p q r)... + -- Demand on f1 is Cx(C1(C1(L))) + + let f2 = \y. error y in ...(f2 p q r)... + -- Demand on f2 is Cx(C1(C1(L))) + +In both these cases we can eta expand f1 and f2 to arity 3. +But /only/ for called-once demands. Suppose we had + let f1 = \y. g x x in ...let h = f1 p q in ...(h r1)...(h r2)... + +Now we don't want to eta-expand f1 to have 3 args; only two. +Nor, in the case of f2, do we want to push that error call under +a lambda. Hence the takeWhile in combineWithDemandDoneShots. -} + +{- ********************************************************************* +* * + arityType +* * +********************************************************************* -} + arityLam :: Id -> ArityType -> ArityType -arityLam id (AT oss div) = AT (idStateHackOneShotInfo id : oss) div +arityLam id (AT oss div) + = AT ((IsCheap, one_shot) : oss) div + where + one_shot | isDeadEndDiv div = OneShotLam + | otherwise = idStateHackOneShotInfo id + -- If the body diverges, treat it as one-shot: no point + -- in floating out, and no penalty for floating in + -- See Wrinkle [Bottoming functions] in Note [ArityType] -floatIn :: Bool -> ArityType -> ArityType +floatIn :: Cost -> ArityType -> ArityType -- We have something like (let x = E in b), -- where b has the given arity type. -floatIn cheap at - | isDeadEndArityType at || cheap = at - -- If E is not cheap, keep arity only for one-shots - | otherwise = takeWhileOneShot at +floatIn IsCheap at = at +floatIn IsExpensive at = addWork at + +addWork :: ArityType -> ArityType +addWork at@(AT lams div) + = case lams of + [] -> at + lam:lams' -> AT (add_work lam : lams') div + where + add_work :: ATLamInfo -> ATLamInfo + add_work (_,os) = (IsExpensive,os) -arityApp :: ArityType -> Bool -> ArityType +arityApp :: ArityType -> Cost -> ArityType -- Processing (fun arg) where at is the ArityType of fun, -- Knock off an argument and behave like 'let' -arityApp (AT (_:oss) div) cheap = floatIn cheap (AT oss div) -arityApp at _ = at +arityApp (AT ((ch1,_):oss) div) ch2 = floatIn (ch1 `addCost` ch2) (AT oss div) +arityApp at _ = at -- | Least upper bound in the 'ArityType' lattice. -- See the haddocks on 'ArityType' for the lattice. -- -- Used for branches of a @case@. andArityType :: ArityType -> ArityType -> ArityType -andArityType (AT (os1:oss1) div1) (AT (os2:oss2) div2) - | AT oss' div' <- andArityType (AT oss1 div1) (AT oss2 div2) - = AT ((os1 `bestOneShot` os2) : oss') div' -- See Note [Combining case branches] -andArityType (AT [] div1) at2 - | isDeadEndDiv div1 = at2 -- Note [ABot branches: max arity wins] - | otherwise = takeWhileOneShot at2 -- See Note [Combining case branches] -andArityType at1 (AT [] div2) - | isDeadEndDiv div2 = at1 -- Note [ABot branches: max arity wins] - | otherwise = takeWhileOneShot at1 -- See Note [Combining case branches] +andArityType (AT (lam1:lams1) div1) (AT (lam2:lams2) div2) + | AT lams' div' <- andArityType (AT lams1 div1) (AT lams2 div2) + = AT ((lam1 `and_lam` lam2) : lams') div' -- See Note [Combining case branches] + where + (ch1,os1) `and_lam` (ch2,os2) + = ( ch1 `addCost` ch2, os1 `bestOneShot` os2) + +andArityType (AT [] div1) at2 = andWithTail div1 at2 +andArityType at1 (AT [] div2) = andWithTail div2 at1 + +andWithTail :: Divergence -> ArityType -> ArityType +andWithTail div1 at2@(AT oss2 _) + | isDeadEndDiv div1 -- case x of { T -> error; F -> \y.e } + = at2 + | otherwise -- case x of { T -> plusInt <expensive>; F -> \y.e } + = addWork (AT oss2 topDiv) -- We know div1 = topDiv + -- Note [ABot branches: max arity wins] + -- See Note [Combining case branches] {- Note [ABot branches: max arity wins] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -866,29 +1179,6 @@ basis that if we know one branch is one-shot, then they all must be. Surprisingly, this means that the one-shot arity type is effectively the top element of the lattice. -Note [Arity trimming] -~~~~~~~~~~~~~~~~~~~~~ -Consider ((\x y. blah) |> co), where co :: (Int->Int->Int) ~ (Int -> F a) , and -F is some type family. - -Because of Note [exprArity invariant], item (2), we must return with arity at -most 1, because typeArity (Int -> F a) = 1. So we have to trim the result of -calling arityType on (\x y. blah). Failing to do so, and hence breaking the -exprArity invariant, led to #5441. - -How to trim? If we end in topDiv, it's easy. But we must take great care with -dead ends (i.e. botDiv). Suppose the expression was (\x y. error "urk"), -we'll get \??.⊥. We absolutely must not trim that to \?.⊥, because that -claims that ((\x y. error "urk") |> co) diverges when given one argument, -which it absolutely does not. And Bad Things happen if we think something -returns bottom when it doesn't (#16066). - -So, if we need to trim a dead-ending arity type, switch (conservatively) to -topDiv. - -Historical note: long ago, we unconditionally switched to topDiv when we -encountered a cast, but that is far too conservative: see #5475 - Note [Eta expanding through CallStacks] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Just as it's good to eta-expand through dictionaries, so it is good to @@ -899,6 +1189,25 @@ do so through CallStacks. #20103 is a case in point, where we got We really want to eta-expand this! #20103 is quite convincing! We do this regardless of -fdicts-cheap; it's not really a dictionary. + +Note [Eta expanding through dictionaries] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If the experimental -fdicts-cheap flag is on, we eta-expand through +dictionary bindings. This improves arities. Thereby, it also +means that full laziness is less prone to floating out the +application of a function to its dictionary arguments, which +can thereby lose opportunities for fusion. Example: + foo :: Ord a => a -> ... + foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). .... + -- So foo has arity 1 + + f = \x. foo dInt $ bar x + +The (foo DInt) is floated out, and makes ineffective a RULE + foo (bar x) = ... + +One could go further and make exprIsCheap reply True to any +dictionary-typed expression, but that's more work. -} --------------------------- @@ -921,14 +1230,18 @@ We do this regardless of -fdicts-cheap; it's not really a dictionary. data AnalysisMode = BotStrictness -- ^ Used during 'exprBotStrictness_maybe'. + | EtaExpandArity { am_opts :: !ArityOpts } - -- ^ Used for finding an expression's eta-expanding arity quickly, without - -- fixed-point iteration ('exprEtaExpandArity'). - | FindRhsArity { am_opts :: !ArityOpts - , am_sigs :: !(IdEnv ArityType) } + -- ^ Used for finding an expression's eta-expanding arity quickly, + -- without fixed-point iteration ('exprEtaExpandArity'). + + | FindRhsArity { am_opts :: !ArityOpts + , am_sigs :: !(IdEnv SafeArityType) } -- ^ Used for regular, fixed-point arity analysis ('findRhsArity'). -- See Note [Arity analysis] for details about fixed-point iteration. - -- INVARIANT: Disjoint with 'ae_joins'. + -- am_dicts_cheap: see Note [Eta expanding through dictionaries] + -- am_sigs: note `SafeArityType` so we can use this in myIsCheapApp + -- INVARIANT: am_sigs is disjoint with 'ae_joins'. data ArityEnv = AE @@ -991,9 +1304,11 @@ extendJoinEnv env@(AE { ae_joins = joins }) join_ids = del_sig_env_list join_ids $ env { ae_joins = joins `extendVarSetList` join_ids } -extendSigEnv :: ArityEnv -> Id -> ArityType -> ArityEnv +extendSigEnv :: ArityEnv -> Id -> SafeArityType -> ArityEnv extendSigEnv env id ar_ty - = del_join_env id (modifySigEnv (\sigs -> extendVarEnv sigs id ar_ty) env) + = del_join_env id $ + modifySigEnv (\sigs -> extendVarEnv sigs id ar_ty) $ + env delInScope :: ArityEnv -> Id -> ArityEnv delInScope env id = del_join_env id $ del_sig_env id env @@ -1001,7 +1316,7 @@ delInScope env id = del_join_env id $ del_sig_env id env delInScopeList :: ArityEnv -> [Id] -> ArityEnv delInScopeList env ids = del_join_env_list ids $ del_sig_env_list ids env -lookupSigEnv :: ArityEnv -> Id -> Maybe ArityType +lookupSigEnv :: ArityEnv -> Id -> Maybe SafeArityType lookupSigEnv AE{ ae_mode = mode } id = case mode of BotStrictness -> Nothing EtaExpandArity{} -> Nothing @@ -1015,6 +1330,11 @@ pedanticBottoms AE{ ae_mode = mode } = case mode of EtaExpandArity{ am_opts = ArityOpts{ ao_ped_bot = ped_bot } } -> ped_bot FindRhsArity{ am_opts = ArityOpts{ ao_ped_bot = ped_bot } } -> ped_bot +exprCost :: ArityEnv -> CoreExpr -> Maybe Type -> Cost +exprCost env e mb_ty + | myExprIsCheap env e mb_ty = IsCheap + | otherwise = IsExpensive + -- | A version of 'exprIsCheap' that considers results from arity analysis -- and optionally the expression's type. -- Under 'exprBotStrictness_maybe', no expressions are cheap. @@ -1040,17 +1360,20 @@ myExprIsCheap AE{ae_mode = mode} e mb_ty = case mode of -- | A version of 'isCheapApp' that considers results from arity analysis. -- See Note [Arity analysis] for what's in the signature environment and why -- it's important. -myIsCheapApp :: IdEnv ArityType -> CheapAppFun +myIsCheapApp :: IdEnv SafeArityType -> CheapAppFun myIsCheapApp sigs fn n_val_args = case lookupVarEnv sigs fn of + -- Nothing means not a local function, fall back to regular -- 'GHC.Core.Utils.isCheapApp' - Nothing -> isCheapApp fn n_val_args - -- @Just at@ means local function with @at@ as current ArityType. + Nothing -> isCheapApp fn n_val_args + + -- `Just at` means local function with `at` as current SafeArityType. -- Roughly approximate what 'isCheapApp' is doing. - Just (AT oss div) + Just (AT lams div) | isDeadEndDiv div -> True -- See Note [isCheapApp: bottoming functions] in GHC.Core.Utils - | n_val_args < length oss -> True -- Essentially isWorkFreeApp - | otherwise -> False + | n_val_args == 0 -> True -- Essentially + | n_val_args < length lams -> True -- isWorkFreeApp + | otherwise -> False ---------------- arityType :: ArityEnv -> CoreExpr -> ArityType @@ -1077,7 +1400,10 @@ arityType env (Lam x e) arityType env (App fun (Type _)) = arityType env fun arityType env (App fun arg ) - = arityApp (arityType env fun) (myExprIsCheap env arg Nothing) + = arityApp fun_at arg_cost + where + fun_at = arityType env fun + arg_cost = exprCost env arg Nothing -- Case/Let; keep arity if either the expression is cheap -- or it's a 1-shot lambda @@ -1098,9 +1424,8 @@ arityType env (Case scrut bndr _ alts) | exprOkForSpeculation scrut = alts_type - | otherwise -- In the remaining cases we may not push - = takeWhileOneShot alts_type -- evaluation of the scrutinee in - + | otherwise -- In the remaining cases we may not push + = addWork alts_type -- evaluation of the scrutinee in where env' = delInScope env bndr arity_type_alt (Alt _con bndrs rhs) = arityType (delInScopeList env' bndrs) rhs @@ -1128,17 +1453,17 @@ arityType env (Let (Rec pairs) body) | otherwise = pprPanic "arityType:joinrec" (ppr pairs) -arityType env (Let (NonRec b r) e) - = floatIn cheap_rhs (arityType env' e) +arityType env (Let (NonRec b rhs) e) + = floatIn rhs_cost (arityType env' e) where - cheap_rhs = myExprIsCheap env r (Just (idType b)) - env' = extendSigEnv env b (arityType env r) + rhs_cost = exprCost env rhs (Just (idType b)) + env' = extendSigEnv env b (safeArityType (arityType env rhs)) arityType env (Let (Rec prs) e) - = floatIn (all is_cheap prs) (arityType env' e) + = floatIn (allCosts bind_cost prs) (arityType env' e) where - env' = delInScopeList env (map fst prs) - is_cheap (b,e) = myExprIsCheap env' e (Just (idType b)) + env' = delInScopeList env (map fst prs) + bind_cost (b,e) = exprCost env' e (Just (idType b)) arityType env (Tick t e) | not (tickishIsCode t) = arityType env e @@ -1201,7 +1526,7 @@ environment mapping let-bound Ids to their ArityType. idArityType :: Id -> ArityType idArityType v | strict_sig <- idDmdSig v - , not $ isTopSig strict_sig + , not $ isNopSig strict_sig , (ds, div) <- splitDmdSig strict_sig , let arity = length ds -- Every strictness signature admits an arity signature! @@ -1209,8 +1534,8 @@ idArityType v | otherwise = AT (take (idArity v) one_shots) topDiv where - one_shots :: [OneShotInfo] -- One-shot-ness derived from the type - one_shots = typeOneShots (idType v) + one_shots :: [(Cost,OneShotInfo)] -- One-shot-ness derived from the type + one_shots = repeat IsCheap `zip` typeOneShots (idType v) {- %************************************************************************ @@ -1319,7 +1644,7 @@ Consider We'll get an ArityType for foo of \?1.T. Then we want to eta-expand to - foo = (\x. \eta{os}. (case x of ...as before...) eta)) |> some_co + foo = (\x. \eta{os}. (case x of ...as before...) eta) |> some_co That 'eta' binder is fresh, and we really want it to have the one-shot flag from the inner \s{os}. By expanding with the @@ -1347,14 +1672,14 @@ etaExpand n orig_expr in_scope = {-#SCC "eta_expand:in-scopeX" #-} mkInScopeSet (exprFreeVars orig_expr) -etaExpandAT :: InScopeSet -> ArityType -> CoreExpr -> CoreExpr +etaExpandAT :: InScopeSet -> SafeArityType -> CoreExpr -> CoreExpr -- See Note [Eta expansion with ArityType] -- -- We pass in the InScopeSet from the simplifier to avoid recomputing -- it here, which can be jolly expensive if the casts are big -- In #18223 it took 10% of compile time just to do the exprFreeVars! -etaExpandAT in_scope (AT oss _) orig_expr - = eta_expand in_scope oss orig_expr +etaExpandAT in_scope at orig_expr + = eta_expand in_scope (arityTypeOneShots at) orig_expr -- etaExpand arity e = res -- Then 'res' has at least 'arity' lambdas at the top @@ -1369,7 +1694,11 @@ etaExpandAT in_scope (AT oss _) orig_expr eta_expand :: InScopeSet -> [OneShotInfo] -> CoreExpr -> CoreExpr eta_expand in_scope one_shots (Cast expr co) - = Cast (eta_expand in_scope one_shots expr) co + = mkCast (eta_expand in_scope one_shots expr) co + -- This mkCast is important, because eta_expand might return an + -- expression with a cast at the outside; and tryCastWorkerWrapper + -- asssumes that we don't have nested casts. Makes a difference + -- in compile-time for T18223 eta_expand in_scope one_shots orig_expr = go in_scope one_shots [] orig_expr @@ -1440,7 +1769,7 @@ casts complicate the question. If we have and e :: N (N Int) then the eta-expansion should look like - (\(x::S) (y::S) -> e |> co x y) |> sym co + (\(x::S) (y::S) -> (e |> co) x y) |> sym co where co :: N (N Int) ~ S -> S -> Int co = axN @(N Int) ; (S -> axN @Int) @@ -1619,11 +1948,11 @@ mkEtaWW orig_oss ppr_orig_expr in_scope orig_ty -- (go [o1,..,on] subst ty) = (in_scope, EI [b1,..,bn] co) -- co :: subst(ty) ~ b1_ty -> ... -> bn_ty -> tr - go _ [] subst _ -- See Note [exprArity invariant] + go _ [] subst _ ----------- Done! No more expansion needed = (getTCvInScope subst, EI [] MRefl) - go n oss@(one_shot:oss1) subst ty -- See Note [exprArity invariant] + go n oss@(one_shot:oss1) subst ty ----------- Forall types (forall a. ty) | Just (tcv,ty') <- splitForAllTyCoVar_maybe ty , (subst', tcv') <- Type.substVarBndr subst tcv @@ -1676,6 +2005,428 @@ mkEtaWW orig_oss ppr_orig_expr in_scope orig_ty -- with an explicit lambda having a non-function type +{- +************************************************************************ +* * + Eta reduction +* * +************************************************************************ + +Note [Eta reduction makes sense] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +GHC's eta reduction transforms + \x y. <fun> x y ---> <fun> +We discuss when this is /sound/ in Note [Eta reduction soundness]. +But even assuming it is sound, when is it /desirable/. That +is what we discuss here. + +This test is made by `ok_fun` in tryEtaReduce. + +1. We want to eta-reduce only if we get all the way to a trivial + expression; we don't want to remove extra lambdas unless we are + going to avoid allocating this thing altogether. + + Trivial means *including* casts and type lambdas: + * `\x. f x |> co --> f |> (ty(x) -> co)` (provided `co` doesn't mention `x`) + * `/\a. \x. f @(Maybe a) x --> /\a. f @(Maybe a)` + See Note [Do not eta reduce PAPs] for why we insist on a trivial head. + +2. Type and dictionary abstraction. Regardless of whether 'f' is a value, it + is always sound to reduce /type lambdas/, thus: + (/\a -> f a) --> f + Moreover, we always want to, because it makes RULEs apply more often: + This RULE: `forall g. foldr (build (/\a -> g a))` + should match `foldr (build (/\b -> ...something complex...))` + and the simplest way to do so is eta-reduce `/\a -> g a` in the RULE to `g`. + + The type checker can insert these eta-expanded versions, + with both type and dictionary lambdas; hence the slightly + ad-hoc (all ok_lam bndrs) + +3. (See fun_arity in tryEtaReduce.) We have to hide `f`'s `idArity` in + its own RHS, lest we suffer from the last point of Note [Arity + robustness] in GHC.Core.Opt.Simplify.Env. There we have `f = \x. f x` + and we should not eta-reduce to `f=f`. Which might change a + terminating program (think @f `seq` e@) to a non-terminating one. + So we check for being a loop breaker first. However for GlobalIds + we can look at the arity; and for primops we must, since they have + no unfolding. [SG: Perhaps this is rather a soundness subtlety?] + +Of course, eta reduction is not always sound. See Note [Eta reduction soundness] +for when it is. + +When there are multiple arguments, we might get multiple eta-redexes. Example: + \x y. e x y + ==> { reduce \y. (e x) y in context \x._ } + \x. e x + ==> { reduce \x. e x in context _ } + e +And (1) implies that we never want to stop with `\x. e x`, because that is not a +trivial expression. So in practice, the implementation works by considering a +whole group of leading lambdas to reduce. + +These delicacies are why we don't simply use 'exprIsTrivial' and 'exprIsHNF' +in 'tryEtaReduce'. Alas. + +Note [Eta reduction soundness] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +GHC's eta reduction transforms + \x y. <fun> x y ---> <fun> +For soundness, we obviously require that `x` and `y` +to not occur free. But what /other/ restrictions are there for +eta reduction to be sound? + +We discuss separately what it means for eta reduction to be +/desirable/, in Note [Eta reduction makes sense]. + +Eta reduction is *not* a sound transformation in general, because it +may change termination behavior if *value* lambdas are involved: + `bot` /= `\x. bot x` (as can be observed by a simple `seq`) +The past has shown that oversight of this fact can not only lead to endless +loops or exceptions, but also straight out *segfaults*. + +Nevertheless, we can give the following criteria for when it is sound to +perform eta reduction on an expression with n leading lambdas `\xs. e xs` +(checked in 'is_eta_reduction_sound' in 'tryEtaReduce', which focuses on the +case where `e` is trivial): + + A. It is sound to eta-reduce n arguments as long as n does not exceed the + `exprArity` of `e`. (Needs Arity analysis.) + This criterion exploits information about how `e` is *defined*. + + Example: If `e = \x. bot` then we know it won't diverge until it is called + with one argument. Hence it is safe to eta-reduce `\x. e x` to `e`. + By contrast, it would be *unsound* to eta-reduce 2 args, `\x y. e x y` to `e`: + `e 42` diverges when `(\x y. e x y) 42` does not. + + S. It is sound to eta-reduce n arguments in an evaluation context in which all + calls happen with at least n arguments. (Needs Strictness analysis.) + NB: This treats evaluations like a call with 0 args. + NB: This criterion exploits information about how `e` is *used*. + + Example: Given a function `g` like + `g c = Just (c 1 2 + c 2 3)` + it is safe to eta-reduce the arg in `g (\x y. e x y)` to `g e` without + knowing *anything* about `e` (perhaps it's a parameter occ itself), simply + because `g` always calls its parameter with 2 arguments. + It is also safe to eta-reduce just one arg, e.g., `g (\x. e x)` to `g e`. + By contrast, it would *unsound* to eta-reduce 3 args in a call site + like `g (\x y z. e x y z)` to `g e`, because that diverges when + `e = \x y. bot`. + + Could we relax to "*At least one call in the same trace* is with n args"? + (NB: Strictness analysis can only answer this relaxed question, not the + original formulation.) + Consider what happens for + ``g2 c = c True `seq` c False 42`` + Here, `g2` will call `c` with 2 arguments (if there is a call at all). + But it is unsound to eta-reduce the arg in `g2 (\x y. e x y)` to `g2 e` + when `e = \x. if x then bot else id`, because the latter will diverge when + the former would not. + + On the other hand, with `-fno-pendantic-bottoms` , we will have eta-expanded + the definition of `e` and then eta-reduction is sound + (see Note [Dealing with bottom]). + Consequence: We have to check that `-fpedantic-bottoms` is off; otherwise + eta-reduction based on demands is in fact unsound. + + See Note [Eta reduction based on evaluation context] for the implementation + details. This criterion is tested extensively in T21261. + + E. (See fun_arity in tryEtaReduce.) As a perhaps special case on the + boundary of (A) and (S), when we know that a fun binder `f` is in + WHNF, we simply assume it has arity 1 and apply (A). Example: + g f = f `seq` \x. f x + Here it's sound eta-reduce `\x. f x` to `f`, because `f` can't be bottom + after the `seq`. This turned up in #7542. + +And here are a few more technical criteria for when it is *not* sound to +eta-reduce that are specific to Core and GHC: + + L. With linear types, eta-reduction can break type-checking: + f :: A ⊸ B + g :: A -> B + g = \x. f x + The above is correct, but eta-reducing g would yield g=f, the linter will + complain that g and f don't have the same type. NB: Not unsound in the + dynamic semantics, but unsound according to the static semantics of Core. + + J. We may not undersaturate join points. + See Note [Invariants on join points] in GHC.Core, and #20599. + + B. We may not undersaturate functions with no binding. + See Note [Eta expanding primops]. + + W. We may not undersaturate StrictWorkerIds. + See Note [Strict Worker Ids] in GHC.CoreToStg.Prep. + +Here is a list of historic accidents surrounding unsound eta-reduction: + +* Consider + f = \x.f x + h y = case (case y of { True -> f `seq` True; False -> False }) of + True -> ...; False -> ... + If we (unsoundly) eta-reduce f to get f=f, the strictness analyser + says f=bottom, and replaces the (f `seq` True) with just + (f `cast` unsafe-co). + [SG in 2022: I don't think worker/wrapper would do this today.] + BUT, as things stand, 'f' got arity 1, and it *keeps* arity 1 (perhaps also + wrongly). So CorePrep eta-expands the definition again, so that it does not + terminate after all. + Result: seg-fault because the boolean case actually gets a function value. + See #1947. + +* Never *reduce* arity. For example + f = \xy. g x y + Then if h has arity 1 we don't want to eta-reduce because then + f's arity would decrease, and that is bad + [SG in 2022: I don't understand this point. There is no `h`, perhaps that + should have been `g`. Even then, this proposed eta-reduction is invalid by + criterion (A), which might actually be the point this anecdote is trying to + make. Perhaps the "no arity decrease" idea is also related to + Note [Arity robustness]?] + +Note [Do not eta reduce PAPs] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +I considered eta-reducing if the result is a PAP: + \x. f e1 e2 x ==> f e1 e2 + +This reduces clutter, sometimes a lot. See Note [Do not eta-expand PAPs] +in GHC.Core.Opt.Simplify.Utils, where we are careful not to eta-expand +a PAP. If eta-expanding is bad, then eta-reducing is good! + +Also the code generator likes eta-reduced PAPs; see GHC.CoreToStg.Prep +Note [No eta reduction needed in rhsToBody]. + +But note that we don't want to eta-reduce + \x y. f <expensive> x y +to + f <expensive> +The former has arity 2, and repeats <expensive> for every call of the +function; the latter has arity 0, and shares <expensive>. We don't want +to change behaviour. Hence the call to exprIsCheap in ok_fun. + +I noticed this when examining #18993 and, although it is delicate, +eta-reducing to a PAP happens to fix the regression in #18993. + +HOWEVER, if we transform + \x. f y x ==> f y +that might mean that f isn't saturated any more, and does not inline. +This led to some other regressions. + +TL;DR currrently we do /not/ eta reduce if the result is a PAP. + +Note [Eta reduction with casted arguments] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + (\(x:t3). f (x |> g)) :: t3 -> t2 + where + f :: t1 -> t2 + g :: t3 ~ t1 +This should be eta-reduced to + + f |> (sym g -> t2) + +So we need to accumulate a coercion, pushing it inward (past +variable arguments only) thus: + f (x |> co_arg) |> co --> (f |> (sym co_arg -> co)) x + f (x:t) |> co --> (f |> (t -> co)) x + f @ a |> co --> (f |> (forall a.co)) @ a + f @ (g:t1~t2) |> co --> (f |> (t1~t2 => co)) @ (g:t1~t2) +These are the equations for ok_arg. + +Note [Eta reduction with casted function] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Since we are pushing a coercion inwards, it is easy to accommodate + (\xy. (f x |> g) y) + (\xy. (f x y) |> g) + +See the `(Cast e co)` equation for `go` in `tryEtaReduce`. The +eta-expander pushes those casts outwards, so you might think we won't +ever see a cast here, but if we have + \xy. (f x y |> g) +we will call tryEtaReduce [x,y] (f x y |> g), and we'd like that to +work. This happens in GHC.Core.Opt.Simplify.Utils.mkLam, where +eta-expansion may be turned off (by sm_eta_expand). + +Note [Eta reduction based on evaluation context] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Note [Eta reduction soundness], criterion (S) allows us to eta-reduce +`g (\x y. e x y)` to `g e` when we know that `g` always calls its parameter with +at least 2 arguments. So how do we read that off `g`'s demand signature? + +Let's take the simple example of #21261, where `g` (actually, `f`) is defined as + g c = c 1 2 + c 3 4 +Then this is how the pieces are put together: + + * Demand analysis infers `<SCS(C1(L))>` for `g`'s demand signature + + * When the Simplifier next simplifies the argument in `g (\x y. e x y)`, it + looks up the *evaluation context* of the argument in the form of the + sub-demand `CS(C1(L))` and stores it in the 'SimplCont'. + (Why does it drop the outer evaluation cardinality of the demand, `S`? + Because it's irrelevant! When we simplify an expression, we do so under the + assumption that it is currently under evaluation.) + This sub-demand literally says "Whenever this expression is evaluated, it + is also called with two arguments, potentially multiple times". + + * Then the simplifier takes apart the lambda and simplifies the lambda group + and then calls 'tryEtaReduce' when rebuilding the lambda, passing the + evaluation context `CS(C1(L))` along. Then we simply peel off 2 call + sub-demands `Cn` and see whether all of the n's (here: `S=C_1N` and + `1=C_11`) were strict. And strict they are! Thus, it will eta-reduce + `\x y. e x y` to `e`. +-} + +-- | `tryEtaReduce [x,y,z] e sd` returns `Just e'` if `\x y z -> e` is evaluated +-- according to `sd` and can soundly and gainfully be eta-reduced to `e'`. +-- See Note [Eta reduction soundness] +-- and Note [Eta reduction makes sense] when that is the case. +tryEtaReduce :: [Var] -> CoreExpr -> SubDemand -> Maybe CoreExpr +-- Return an expression equal to (\bndrs. body) +tryEtaReduce bndrs body eval_sd + = go (reverse bndrs) body (mkRepReflCo (exprType body)) + where + incoming_arity = count isId bndrs -- See Note [Eta reduction makes sense], point (2) + + go :: [Var] -- Binders, innermost first, types [a3,a2,a1] + -> CoreExpr -- Of type tr + -> Coercion -- Of type tr ~ ts + -> Maybe CoreExpr -- Of type a1 -> a2 -> a3 -> ts + -- See Note [Eta reduction with casted arguments] + -- for why we have an accumulating coercion + -- + -- Invariant: (go bs body co) returns an expression + -- equivalent to (\(reverse bs). body |> co) + + -- See Note [Eta reduction with casted function] + go bs (Cast e co1) co2 + = go bs e (co1 `mkTransCo` co2) + + go bs (Tick t e) co + | tickishFloatable t + = fmap (Tick t) $ go bs e co + -- Float app ticks: \x -> Tick t (e x) ==> Tick t e + + go (b : bs) (App fun arg) co + | Just (co', ticks) <- ok_arg b arg co (exprType fun) + = fmap (flip (foldr mkTick) ticks) $ go bs fun co' + -- Float arg ticks: \x -> e (Tick t x) ==> Tick t e + + go remaining_bndrs fun co + | all isTyVar remaining_bndrs + -- If all the remaining_bnrs are tyvars, then the etad_exp + -- will be trivial, which is what we want. + -- e.g. We might have /\a \b. f [a] b, and we want to + -- eta-reduce to /\a. f [a] + -- We don't want to give up on this one: see #20040 + -- See Note [Eta reduction makes sense], point (1) + , remaining_bndrs `ltLength` bndrs + -- Only reply Just if /something/ has happened + , ok_fun fun + , let etad_expr = mkLams (reverse remaining_bndrs) (mkCast fun co) + used_vars = exprFreeVars etad_expr + reduced_bndrs = mkVarSet (dropList remaining_bndrs bndrs) + , used_vars `disjointVarSet` reduced_bndrs + -- Check for any of the binders free in the result, + -- including the accumulated coercion + -- See Note [Eta reduction makes sense], intro and point (1) + = Just etad_expr + + go _remaining_bndrs _fun _ = -- pprTrace "tER fail" (ppr _fun $$ ppr _remaining_bndrs) $ + Nothing + + --------------- + -- See Note [Eta reduction makes sense], point (1) + ok_fun (App fun (Type {})) = ok_fun fun + ok_fun (Cast fun _) = ok_fun fun + ok_fun (Tick _ expr) = ok_fun expr + ok_fun (Var fun_id) = is_eta_reduction_sound fun_id || all ok_lam bndrs + ok_fun _fun = False + + --------------- + -- See Note [Eta reduction soundness], this is THE place to check soundness! + is_eta_reduction_sound fun = + -- Check that eta-reduction won't make the program stricter... + (fun_arity fun >= incoming_arity -- criterion (A) and (E) + || all_calls_with_arity incoming_arity) -- criterion (S) + -- ... and that the function can be eta reduced to arity 0 + -- without violating invariants of Core and GHC + && canEtaReduceToArity fun 0 0 -- criteria (L), (J), (W), (B) + all_calls_with_arity n = isStrict (peelManyCalls n eval_sd) + -- See Note [Eta reduction based on evaluation context] + + --------------- + fun_arity fun + | isLocalId fun + , isStrongLoopBreaker (idOccInfo fun) = 0 + -- See Note [Eta reduction makes sense], point (3) + | arity > 0 = arity + | isEvaldUnfolding (idUnfolding fun) = 1 + -- See Note [Eta reduction soundness], criterion (E) + | otherwise = 0 + where + arity = idArity fun + + --------------- + ok_lam v = isTyVar v || isEvVar v + -- See Note [Eta reduction makes sense], point (2) + + --------------- + ok_arg :: Var -- Of type bndr_t + -> CoreExpr -- Of type arg_t + -> Coercion -- Of kind (t1~t2) + -> Type -- Type (arg_t -> t1) of the function + -- to which the argument is supplied + -> Maybe (Coercion -- Of type (arg_t -> t1 ~ bndr_t -> t2) + -- (and similarly for tyvars, coercion args) + , [CoreTickish]) + -- See Note [Eta reduction with casted arguments] + ok_arg bndr (Type ty) co _ + | Just tv <- getTyVar_maybe ty + , bndr == tv = Just (mkHomoForAllCos [tv] co, []) + ok_arg bndr (Var v) co fun_ty + | bndr == v + , let mult = idMult bndr + , Just (fun_mult, _, _) <- splitFunTy_maybe fun_ty + , mult `eqType` fun_mult -- There is no change in multiplicity, otherwise we must abort + = Just (mkFunResCo Representational (idScaledType bndr) co, []) + ok_arg bndr (Cast e co_arg) co fun_ty + | (ticks, Var v) <- stripTicksTop tickishFloatable e + , Just (fun_mult, _, _) <- splitFunTy_maybe fun_ty + , bndr == v + , fun_mult `eqType` idMult bndr + = Just (mkFunCo Representational (multToCo fun_mult) (mkSymCo co_arg) co, ticks) + -- The simplifier combines multiple casts into one, + -- so we can have a simple-minded pattern match here + ok_arg bndr (Tick t arg) co fun_ty + | tickishFloatable t, Just (co', ticks) <- ok_arg bndr arg co fun_ty + = Just (co', t:ticks) + + ok_arg _ _ _ _ = Nothing + +-- | Can we eta-reduce the given function to the specified arity? +-- See Note [Eta reduction soundness], criteria (B), (J), (W) and (L). +canEtaReduceToArity :: Id -> JoinArity -> Arity -> Bool +canEtaReduceToArity fun dest_join_arity dest_arity = + not $ + hasNoBinding fun -- (B) + -- Don't undersaturate functions with no binding. + + || ( isJoinId fun && dest_join_arity < idJoinArity fun ) -- (J) + -- Don't undersaturate join points. + -- See Note [Invariants on join points] in GHC.Core, and #20599 + + || ( dest_arity < idCbvMarkArity fun ) -- (W) + -- Don't undersaturate StrictWorkerIds. + -- See Note [Strict Worker Ids] in GHC.CoreToStg.Prep. + + || isLinearType (idType fun) -- (L) + -- Don't perform eta reduction on linear types. + -- If `f :: A %1-> B` and `g :: A -> B`, + -- then `g x = f x` is OK but `g = f` is not. + + {- ********************************************************************* * * The "push rules" |