1 files changed, 1012 insertions, 261 deletions
diff --git a/compiler/GHC/Core/Opt/Arity.hs b/compiler/GHC/Core/Opt/Arity.hs
index 811beb6c0a..5858ff91e0 100644
--- a/compiler/GHC/Core/Opt/Arity.hs
+++ b/compiler/GHC/Core/Opt/Arity.hs
@@ -11,16 +11,29 @@
 
 -- | Arity and eta expansion
 module GHC.Core.Opt.Arity
-   ( manifestArity, joinRhsArity, exprArity
-   , typeArity, typeOneShots
-   , exprEtaExpandArity, findRhsArity
-   , etaExpand, etaExpandAT
-   , exprBotStrictness_maybe
+   ( -- Finding arity
+     manifestArity, joinRhsArity, exprArity
+   , findRhsArity, exprBotStrictness_maybe
    , ArityOpts(..)
 
+   -- ** Eta expansion
+   , exprEtaExpandArity, etaExpand, etaExpandAT
+
+   -- ** Eta reduction
+   , tryEtaReduce
+
    -- ** ArityType
-   , ArityType(..), mkBotArityType, mkTopArityType, expandableArityType
-   , arityTypeArity, maxWithArity, minWithArity, idArityType
+   , ArityType, mkBotArityType, mkManifestArityType
+   , arityTypeArity, idArityType, getBotArity
+
+   -- ** typeArity and the state hack
+   , typeArity, typeOneShots, typeOneShot
+   , isOneShotBndr
+   , isStateHackType
+
+   -- * Lambdas
+   , zapLamBndrs
+
 
    -- ** Join points
    , etaExpandToJoinPoint, etaExpandToJoinPointRule
@@ -39,7 +52,7 @@ import GHC.Core.Utils
 import GHC.Core.DataCon
 import GHC.Core.TyCon     ( tyConArity )
 import GHC.Core.TyCon.RecWalk     ( initRecTc, checkRecTc )
-import GHC.Core.Predicate ( isDictTy, isCallStackPredTy )
+import GHC.Core.Predicate ( isDictTy, isEvVar, isCallStackPredTy )
 import GHC.Core.Multiplicity
 
 -- We have two sorts of substitution:
@@ -50,17 +63,19 @@ import GHC.Core.Type     as Type
 import GHC.Core.Coercion as Type
 
 import GHC.Types.Demand
-import GHC.Types.Var
-import GHC.Types.Var.Env
 import GHC.Types.Id
+import GHC.Types.Var.Env
 import GHC.Types.Var.Set
 import GHC.Types.Basic
 import GHC.Types.Tickish
 
+import GHC.Builtin.Types.Prim
 import GHC.Builtin.Uniques
+
 import GHC.Data.FastString
 import GHC.Data.Pair
 
+import GHC.Utils.GlobalVars( unsafeHasNoStateHack )
 import GHC.Utils.Constants (debugIsOn)
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
@@ -123,7 +138,8 @@ exprArity :: CoreExpr -> Arity
 -- We do /not/ guarantee that exprArity e <= typeArity e
 -- You may need to do arity trimming after calling exprArity
 -- See Note [Arity trimming]
--- (If we do arity trimming here we have to do it at every cast.
+-- Reason: if we do arity trimming here we have take exprType
+--         and that can be expensive if there is a large cast
 exprArity e = go e
   where
     go (Var v)                     = idArity v
@@ -139,13 +155,50 @@ exprArity e = go e
     go _                           = 0
 
 ---------------
+exprBotStrictness_maybe :: CoreExpr -> Maybe (Arity, DmdSig)
+-- A cheap and cheerful function that identifies bottoming functions
+-- and gives them a suitable strictness signatures.  It's used during
+-- float-out
+exprBotStrictness_maybe e
+  = case getBotArity (arityType botStrictnessArityEnv e) of
+        Nothing -> Nothing
+        Just ar -> Just (ar, mkVanillaDmdSig ar botDiv)
+
+{- Note [exprArity for applications]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When we come to an application we check that the arg is trivial.
+   eg  f (fac x) does not have arity 2,
+                 even if f has arity 3!
+
+* We require that is trivial rather merely cheap.  Suppose f has arity 2.
+  Then    f (Just y)
+  has arity 0, because if we gave it arity 1 and then inlined f we'd get
+          let v = Just y in \w. <f-body>
+  which has arity 0.  And we try to maintain the invariant that we don't
+  have arity decreases.
+
+*  The `max 0` is important!  (\x y -> f x) has arity 2, even if f is
+   unknown, hence arity 0
+
+
+************************************************************************
+*                                                                      *
+              typeArity and the "state hack"
+*                                                                      *
+********************************************************************* -}
+
+
 typeArity :: Type -> Arity
+-- ^ (typeArity ty) says how many arrows GHC can expose in 'ty', after
+-- looking through newtypes.  More generally, (typeOneShots ty) returns
+-- ty's [OneShotInfo], based only on the type itself, using typeOneShot
+-- on the argument type to access the "state hack".
 typeArity = length . typeOneShots
 
 typeOneShots :: Type -> [OneShotInfo]
 -- How many value arrows are visible in the type?
 -- We look through foralls, and newtypes
--- See Note [typeArity invariants]
+-- See Note [Arity invariants for bindings]
 typeOneShots ty
   = go initRecTc ty
   where
@@ -174,64 +227,121 @@ typeOneShots ty
       | otherwise
       = []
 
----------------
-exprBotStrictness_maybe :: CoreExpr -> Maybe (Arity, DmdSig)
--- A cheap and cheerful function that identifies bottoming functions
--- and gives them a suitable strictness signatures.  It's used during
--- float-out
-exprBotStrictness_maybe e
-  = case getBotArity (arityType botStrictnessArityEnv e) of
-        Nothing -> Nothing
-        Just ar -> Just (ar, sig ar)
-  where
-    sig ar = mkClosedDmdSig (replicate ar topDmd) botDiv
+typeOneShot :: Type -> OneShotInfo
+typeOneShot ty
+   | isStateHackType ty = OneShotLam
+   | otherwise          = NoOneShotInfo
+
+-- | Like 'idOneShotInfo', but taking the Horrible State Hack in to account
+-- See Note [The state-transformer hack] in "GHC.Core.Opt.Arity"
+idStateHackOneShotInfo :: Id -> OneShotInfo
+idStateHackOneShotInfo id
+    | isStateHackType (idType id) = OneShotLam
+    | otherwise                   = idOneShotInfo id
+
+-- | Returns whether the lambda associated with the 'Id' is
+--   certainly applied at most once
+-- This one is the "business end", called externally.
+-- It works on type variables as well as Ids, returning True
+-- Its main purpose is to encapsulate the Horrible State Hack
+-- See Note [The state-transformer hack] in "GHC.Core.Opt.Arity"
+isOneShotBndr :: Var -> Bool
+isOneShotBndr var
+  | isTyVar var                              = True
+  | OneShotLam <- idStateHackOneShotInfo var = True
+  | otherwise                                = False
+
+isStateHackType :: Type -> Bool
+isStateHackType ty
+  | unsafeHasNoStateHack   -- Switch off with -fno-state-hack
+  = False
+  | otherwise
+  = case tyConAppTyCon_maybe ty of
+        Just tycon -> tycon == statePrimTyCon
+        _          -> False
+        -- This is a gross hack.  It claims that
+        -- every function over realWorldStatePrimTy is a one-shot
+        -- function.  This is pretty true in practice, and makes a big
+        -- difference.  For example, consider
+        --      a `thenST` \ r -> ...E...
+        -- The early full laziness pass, if it doesn't know that r is one-shot
+        -- will pull out E (let's say it doesn't mention r) to give
+        --      let lvl = E in a `thenST` \ r -> ...lvl...
+        -- When `thenST` gets inlined, we end up with
+        --      let lvl = E in \s -> case a s of (r, s') -> ...lvl...
+        -- and we don't re-inline E.
+        --
+        -- It would be better to spot that r was one-shot to start with, but
+        -- I don't want to rely on that.
+        --
+        -- Another good example is in fill_in in PrelPack.hs.  We should be able to
+        -- spot that fill_in has arity 2 (and when Keith is done, we will) but we can't yet.
 
-{-
-Note [typeArity invariants]
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-We have the following invariants around typeArity
 
-  (1) In any binding x = e,
-      idArity f <= typeArity (idType f)
+{- Note [Arity invariants for bindings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We have the following invariants for let-bindings
+
+  (1) In any binding f = e,
+         idArity f <= typeArity (idType f)
+      We enforce this with trimArityType, called in findRhsArity;
+      see Note [Arity trimming].
+
+      Note that we enforce this only for /bindings/.  We do /not/ insist that
+         arityTypeArity (arityType e) <= typeArity (exprType e)
+      because that is quite a bit more expensive to guaranteed; it would
+      mean checking at every Cast in the recursive arityType, for example.
 
   (2) If typeArity (exprType e) = n,
       then manifestArity (etaExpand e n) = n
 
       That is, etaExpand can always expand as much as typeArity says
-      So the case analysis in etaExpand and in typeArity must match
+      (or less, of course). So the case analysis in etaExpand and in
+      typeArity must match.
 
-Why is this important?  Because
+      Consequence: because of (1), if we eta-expand to (idArity f), we will
+      end up with n manifest lambdas.
 
-  - In GHC.Iface.Tidy we use exprArity to fix the *final arity* of
-    each top-level Id, and in
+   (3) In any binding f = e,
+         idArity f <= arityTypeArity (safeArityType (arityType e))
+       That is, we call safeArityType before attributing e's arityType to f.
+       See Note [SafeArityType].
 
-  - In CorePrep we use etaExpand on each rhs, so that the visible lambdas
-    actually match that arity, which in turn means
-    that the StgRhs has the right number of lambdas
+       So we call safeArityType in findRhsArity.
 
 Suppose we have
    f :: Int -> Int -> Int
    f x y = x+y    -- Arity 2
 
    g :: F Int
-   g = case x of { True  -> f |> co1
-                 ; False -> g |> co2 }
+   g = case <cond> of { True  -> f |> co1
+                      ; False -> g |> co2 }
 
-Now, we can't eta-expand g to have arity 2, because etaExpand, which works
-off the /type/ of the expression, doesn't know how to make an eta-expanded
-binding
+where F is a type family.  Now, we can't eta-expand g to have arity 2,
+because etaExpand, which works off the /type/ of the expression
+(albeit looking through newtypes), doesn't know how to make an
+eta-expanded binding
    g = (\a b. case x of ...) |> co
-because can't make up `co` or the types of `a` and `b`.
+because it can't make up `co` or the types of `a` and `b`.
 
 So invariant (1) ensures that every binding has an arity that is no greater
 than the typeArity of the RHS; and invariant (2) ensures that etaExpand
 and handle what typeArity says.
 
+Why is this important?  Because
+
+  - In GHC.Iface.Tidy we use exprArity/manifestArity to fix the *final
+    arity* of each top-level Id, and in
+
+  - In CorePrep we use etaExpand on each rhs, so that the visible
+    lambdas actually match that arity, which in turn means that the
+    StgRhs has a number of lambdas that precisely matches the arity.
+
 Note [Arity trimming]
 ~~~~~~~~~~~~~~~~~~~~~
-Arity trimming, implemented by minWithArity, directly implements
-invariant (1) of Note [typeArity invariants]. Failing to do so, and
-hence breaking invariant (1) led to #5441.
+Invariant (1) of Note [Arity invariants for bindings] is upheld by findRhsArity,
+which calls trimArityType to trim the ArityType to match the Arity of the
+binding.  Failing to do so, and hence breaking invariant (1) led to #5441.
 
 How to trim?  If we end in topDiv, it's easy.  But we must take great care with
 dead ends (i.e. botDiv). Suppose the expression was (\x y. error "urk"),
@@ -293,26 +403,34 @@ trying to *make* it hold, but it's tricky and I gave up.
 
 The test simplCore/should_compile/T3722 is an excellent example.
 -------- End of old out of date comments, just for interest -----------
+-}
 
+{- ********************************************************************
+*                                                                      *
+                  Zapping lambda binders
+*                                                                      *
+********************************************************************* -}
 
-Note [exprArity for applications]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-When we come to an application we check that the arg is trivial.
-   eg  f (fac x) does not have arity 2,
-                 even if f has arity 3!
-
-* We require that is trivial rather merely cheap.  Suppose f has arity 2.
-  Then    f (Just y)
-  has arity 0, because if we gave it arity 1 and then inlined f we'd get
-          let v = Just y in \w. <f-body>
-  which has arity 0.  And we try to maintain the invariant that we don't
-  have arity decreases.
+zapLamBndrs :: FullArgCount -> [Var] -> [Var]
+-- If (\xyz. t) appears under-applied to only two arguments,
+-- we must zap the occ-info on x,y, because they appear (in 't') under the \z.
+-- See Note [Occurrence analysis for lambda binders] in GHc.Core.Opt.OccurAnal
+--
+-- NB: both `arg_count` and `bndrs` include both type and value args/bndrs
+zapLamBndrs arg_count bndrs
+  | no_need_to_zap = bndrs
+  | otherwise      = zap_em arg_count bndrs
+  where
+    no_need_to_zap = all isOneShotBndr (drop arg_count bndrs)
 
-*  The `max 0` is important!  (\x y -> f x) has arity 2, even if f is
-   unknown, hence arity 0
+    zap_em :: FullArgCount -> [Var] -> [Var]
+    zap_em 0 bs = bs
+    zap_em _ [] = []
+    zap_em n (b:bs) | isTyVar b = b              : zap_em (n-1) bs
+                    | otherwise = zapLamIdInfo b : zap_em (n-1) bs
 
 
-************************************************************************
+{- *********************************************************************
 *                                                                      *
            Computing the "arity" of an expression
 *                                                                      *
@@ -490,34 +608,72 @@ but not to introduce a new lambda.
 
 Note [ArityType]
 ~~~~~~~~~~~~~~~~
+ArityType can be thought of as an abstraction of an expression.
+The ArityType
+   AT [ (IsCheap,     NoOneShotInfo)
+      , (IsExpensive, OneShotLam)
+      , (IsCheap,     OneShotLam) ] Dunno)
+
+abstracts an expression like
+   \x. let <expensive> in
+       \y{os}.
+       \z{os}. blah
+
+In general we have (AT lams div).  Then
+* In lams :: [(Cost,OneShotInfo)]
+  * The Cost flag describes the part of the expression down
+    to the first (value) lambda.
+  * The OneShotInfo flag gives the one-shot info on that lambda.
+
+* If 'div' is dead-ending ('isDeadEndDiv'), then application to
+  'length lams' arguments will surely diverge, similar to the situation
+  with 'DmdType'.
+
 ArityType is the result of a compositional analysis on expressions,
 from which we can decide the real arity of the expression (extracted
 with function exprEtaExpandArity).
 
 We use the following notation:
-  at  ::= \o1..on.div
+  at  ::= \p1..pn.div
   div ::= T | x | ⊥
-  o   ::= ? | 1
-And omit the \. if n = 0. Examples:
-  \?11.T stands for @AT [NoOneShotInfo,OneShotLam,OneShotLam] topDiv@
-  ⊥      stands for @AT [] botDiv@
+  p   ::= (c o)
+  c   ::= X | C    -- Expensive or Cheap
+  o   ::= ? | 1    -- NotOneShot or OneShotLam
+We may omit the \. if n = 0.
+And ⊥ stands for `AT [] botDiv`
+
+Here is an example demonstrating the notation:
+  \(C?)(X1)(C1).T
+stands for
+   AT [ (IsCheap,NoOneShotInfo)
+      , (IsExpensive,OneShotLam)
+      , (IsCheap,OneShotLam) ]
+      topDiv
+
 See the 'Outputable' instance for more information. It's pretty simple.
 
+How can we use ArityType?  Example:
+      f = \x\y. let v = <expensive> in
+          \s(one-shot) \t(one-shot). blah
+      'f' has arity type \(C?)(C?)(X1)(C1).T
+      The one-shot-ness means we can, in effect, push that
+      'let' inside the \st, and expand to arity 4
+
+Suppose f = \xy. x+y
+Then  f             :: \(C?)(C?).T
+      f v           :: \(C?).T
+      f <expensive> :: \(X?).T
+
 Here is what the fields mean. If an arbitrary expression 'f' has
 ArityType 'at', then
 
  * If @at = AT [o1,..,on] botDiv@ (notation: \o1..on.⊥), then @f x1..xn@
    definitely diverges. Partial applications to fewer than n args may *or
-   may not* diverge.
+   may not* diverge.  Ditto exnDiv.
 
-   We allow ourselves to eta-expand bottoming functions, even
-   if doing so may lose some `seq` sharing,
-       let x = <expensive> in \y. error (g x y)
-       ==> \y. let x = <expensive> in error (g x y)
-
- * If @at = AT [o1,..,on] topDiv@ (notation: \o1..on.T), then expanding 'f'
-   to @\x1..xn. f x1..xn@ loses no sharing, assuming the calls of f respect
-   the one-shot-ness o1..on of its definition.
+ * If `f` has ArityType `at` we can eta-expand `f` to have (aritTypeOneShots at)
+   arguments without losing sharing. This function checks that the either
+   there are no expensive expressions, or the lambdas are one-shots.
 
    NB 'f' is an arbitrary expression, eg @f = g e1 e2@.  This 'f' can have
    arity type @AT oss _@, with @length oss > 0@, only if e1 e2 are themselves
@@ -530,20 +686,45 @@ ArityType 'at', then
    So eta expansion is dynamically ok; see Note [State hack and
    bottoming functions], the part about catch#
 
-Example:
-      f = \x\y. let v = <expensive> in
-          \s(one-shot) \t(one-shot). blah
-      'f' has arity type \??11.T
-      The one-shot-ness means we can, in effect, push that
-      'let' inside the \st.
+Wrinkles
 
+* Wrinkle [Bottoming functions]: see function 'arityLam'.
+  We treat bottoming functions as one-shot, because there is no point
+  in floating work outside the lambda, and it's fine to float it inside.
 
-Suppose f = \xy. x+y
-Then  f             :: \??.T
-      f v           :: \?.T
-      f <expensive> :: T
--}
+  For example, this is fine (see test stranal/sigs/BottomFromInnerLambda)
+       let x = <expensive> in \y. error (g x y)
+       ==> \y. let x = <expensive> in error (g x y)
 
+  Idea: perhaps we could enforce this invariant with
+     data Arity Type = TopAT [(Cost, OneShotInfo)] | DivAT [Cost]
+
+
+Note [SafeArityType]
+~~~~~~~~~~~~~~~~~~~~
+The function safeArityType trims an ArityType to return a "safe" ArityType,
+for which we use a type synonym SafeArityType.  It is "safe" in the sense
+that (arityTypeArity at) really reflects the arity of the expression, whereas
+a regular ArityType might have more lambdas in its [ATLamInfo] that the
+(cost-free) arity of the expression.
+
+For example
+   \x.\y.let v = expensive in \z. blah
+has
+   arityType = AT [C?, C?, X?, C?] Top
+But the expression actually has arity 2, not 4, because of the X.
+So safeArityType will trim it to (AT [C?, C?] Top), whose [ATLamInfo]
+now reflects the (cost-free) arity of the expression
+
+Why do we ever need an "unsafe" ArityType, such as the example above?
+Because its (cost-free) arity may increased by combineWithDemandOneShots
+in findRhsArity. See Note [Combining arity type with demand info].
+
+Thus the function `arityType` returns a regular "unsafe" ArityType, that
+goes deeply into the lambdas (including under IsExpensive). But that is
+very local; most ArityTypes are indeed "safe".  We use the type synonym
+SafeArityType to indicate where we believe the ArityType is safe.
+-}
 
 -- | The analysis lattice of arity analysis. It is isomorphic to
 --
@@ -574,22 +755,33 @@ Then  f             :: \??.T
 --
 -- We rely on this lattice structure for fixed-point iteration in
 -- 'findRhsArity'. For the semantics of 'ArityType', see Note [ArityType].
-data ArityType
-  = AT ![OneShotInfo] !Divergence
-  -- ^ @AT oss div@ means this value can safely be eta-expanded @length oss@
-  -- times, provided use sites respect the 'OneShotInfo's in @oss@.
-  -- A 'OneShotLam' annotation can come from two sources:
-  --     * The user annotated a lambda as one-shot with 'GHC.Exts.oneShot'
-  --     * It's from a lambda binder of a type affected by `-fstate-hack`.
-  --       See 'idStateHackOneShotInfo'.
-  -- In both cases, 'OneShotLam' should win over 'NoOneShotInfo', see
-  -- Note [Combining case branches].
-  --
-  -- If @div@ is dead-ending ('isDeadEndDiv'), then application to
-  -- @length os@ arguments will surely diverge, similar to the situation
-  -- with 'DmdType'.
+data ArityType  -- See Note [ArityType]
+  = AT ![ATLamInfo] !Divergence
+    -- ^ `AT oss div` is an abstraction of the expression, which describes
+    -- its lambdas, and how much work appears where.
+    -- See Note [ArityType] for more information
+    --
+    -- If `div` is dead-ending ('isDeadEndDiv'), then application to
+    -- `length os` arguments will surely diverge, similar to the situation
+    -- with 'DmdType'.
   deriving Eq
 
+type ATLamInfo = (Cost,OneShotInfo)
+     -- ^ Info about one lambda in an ArityType
+     -- See Note [ArityType]
+
+type SafeArityType = ArityType -- See Note [SafeArityType]
+
+data Cost = IsCheap | IsExpensive
+          deriving( Eq )
+
+allCosts :: (a -> Cost) -> [a] -> Cost
+allCosts f xs = foldr (addCost . f) IsCheap xs
+
+addCost :: Cost -> Cost -> Cost
+addCost IsCheap IsCheap = IsCheap
+addCost _       _       = IsExpensive
+
 -- | This is the BNF of the generated output:
 --
 -- @
@@ -608,57 +800,56 @@ instance Outputable ArityType where
       pp_div Diverges = char '⊥'
       pp_div ExnOrDiv = char 'x'
       pp_div Dunno    = char 'T'
-      pp_os OneShotLam    = char '1'
-      pp_os NoOneShotInfo = char '?'
+      pp_os (IsCheap,     OneShotLam)    = text "(C1)"
+      pp_os (IsExpensive, OneShotLam)    = text "(X1)"
+      pp_os (IsCheap,     NoOneShotInfo) = text "(C?)"
+      pp_os (IsExpensive, NoOneShotInfo) = text "(X?)"
 
 mkBotArityType :: [OneShotInfo] -> ArityType
-mkBotArityType oss = AT oss botDiv
+mkBotArityType oss = AT [(IsCheap,os) | os <- oss] botDiv
 
 botArityType :: ArityType
 botArityType = mkBotArityType []
 
-mkTopArityType :: [OneShotInfo] -> ArityType
-mkTopArityType oss = AT oss topDiv
+mkManifestArityType :: [OneShotInfo] -> ArityType
+mkManifestArityType oss = AT [(IsCheap,os) | os <- oss] topDiv
 
 topArityType :: ArityType
-topArityType = mkTopArityType []
+topArityType = AT [] topDiv
 
 -- | The number of value args for the arity type
-arityTypeArity :: ArityType -> Arity
-arityTypeArity (AT oss _) = length oss
-
--- | True <=> eta-expansion will add at least one lambda
-expandableArityType :: ArityType -> Bool
-expandableArityType at = arityTypeArity at > 0
-
--- | See Note [Dead ends] in "GHC.Types.Demand".
--- Bottom implies a dead end.
-isDeadEndArityType :: ArityType -> Bool
-isDeadEndArityType (AT _ div) = isDeadEndDiv div
-
------------------------
-infixl 2 `maxWithArity`, `minWithArity`
-
--- | Expand a non-bottoming arity type so that it has at least the given arity.
-maxWithArity :: ArityType -> Arity -> ArityType
-maxWithArity at@(AT oss div) !ar
-  | isDeadEndArityType at    = at
-  | oss `lengthAtLeast` ar   = at
-  | otherwise                = AT (take ar $ oss ++ repeat NoOneShotInfo) div
-
--- | Trim an arity type so that it has at most the given arity.
--- Any excess 'OneShotInfo's are truncated to 'topDiv', even if they end in
--- 'ABot'.  See Note [Arity trimming]
-minWithArity :: ArityType -> Arity -> ArityType
-minWithArity at@(AT oss _) ar
-  | oss `lengthAtMost` ar = at
-  | otherwise             = AT (take ar oss) topDiv
-
-----------------------
-takeWhileOneShot :: ArityType -> ArityType
-takeWhileOneShot (AT oss div)
-  | isDeadEndDiv div = AT (takeWhile isOneShotInfo oss) topDiv
-  | otherwise        = AT (takeWhile isOneShotInfo oss) div
+arityTypeArity :: SafeArityType -> Arity
+arityTypeArity (AT lams _) = length lams
+
+arityTypeOneShots :: SafeArityType -> [OneShotInfo]
+-- Returns a list only as long as the arity should be
+arityTypeOneShots (AT lams _) = map snd lams
+
+safeArityType :: ArityType -> SafeArityType
+-- ^ Assuming this ArityType is all we know, find the arity of
+-- the function, and trim the argument info (and Divergenge)
+-- to match that arity. See Note [SafeArityType]
+safeArityType at@(AT lams _)
+  = case go 0 IsCheap lams of
+      Nothing -> at  -- No trimming needed
+      Just ar -> AT (take ar lams) topDiv
+ where
+   go :: Arity -> Cost -> [(Cost,OneShotInfo)] -> Maybe Arity
+   go _ _ [] = Nothing
+   go ar ch1 ((ch2,os):lams)
+      = case (ch1 `addCost` ch2, os) of
+          (IsExpensive, NoOneShotInfo) -> Just ar
+          (ch,          _)             -> go (ar+1) ch lams
+
+infixl 2 `trimArityType`
+
+trimArityType :: Arity -> ArityType -> ArityType
+-- ^ Trim an arity type so that it has at most the given arity.
+-- Any excess 'OneShotInfo's are truncated to 'topDiv', even if
+-- they end in 'ABot'.  See Note [Arity trimming]
+trimArityType max_arity at@(AT lams _)
+  | lams `lengthAtMost` max_arity = at
+  | otherwise                    = AT (take max_arity lams) topDiv
 
 data ArityOpts = ArityOpts
   { ao_ped_bot :: !Bool -- See Note [Dealing with bottom]
@@ -667,10 +858,17 @@ data ArityOpts = ArityOpts
 
 -- | The Arity returned is the number of value args the
 -- expression can be applied to without doing much work
-exprEtaExpandArity :: ArityOpts -> CoreExpr -> ArityType
+exprEtaExpandArity :: ArityOpts -> CoreExpr -> Maybe SafeArityType
 -- exprEtaExpandArity is used when eta expanding
 --      e  ==>  \xy -> e x y
-exprEtaExpandArity opts e = arityType (etaExpandArityEnv opts) e
+-- Nothing if the expression has arity 0
+exprEtaExpandArity opts e
+  | AT [] _ <- arity_type
+  = Nothing
+  | otherwise
+  = Just arity_type
+  where
+    arity_type = safeArityType (arityType (etaExpandArityEnv opts) e)
 
 getBotArity :: ArityType -> Maybe Arity
 -- Arity of a divergent function
@@ -678,29 +876,54 @@ getBotArity (AT oss div)
   | isDeadEndDiv div = Just $ length oss
   | otherwise        = Nothing
 
-----------------------
-findRhsArity :: ArityOpts -> RecFlag -> Id -> CoreExpr -> Arity -> ArityType
+
+{- *********************************************************************
+*                                                                      *
+                   findRhsArity
+*                                                                      *
+********************************************************************* -}
+
+findRhsArity :: ArityOpts -> RecFlag -> Id -> CoreExpr -> Arity -> SafeArityType
 -- This implements the fixpoint loop for arity analysis
 -- See Note [Arity analysis]
 -- If findRhsArity e = (n, is_bot) then
 --  (a) any application of e to <n arguments will not do much work,
 --      so it is safe to expand e  ==>  (\x1..xn. e x1 .. xn)
 --  (b) if is_bot=True, then e applied to n args is guaranteed bottom
-
-findRhsArity opts NonRecursive _ rhs _
-  = arityType (findRhsArityEnv opts) rhs
-
-findRhsArity opts Recursive bndr rhs old_arity
-  = go 0 botArityType
-      -- We always do one step, but usually that produces a result equal to
-      -- old_arity, and then we stop right away, because old_arity is assumed
-      -- to be sound. In other words, arities should never decrease.
-      -- Result: the common case is that there is just one iteration
+--
+-- Returns an ArityType that is guaranteed trimmed to typeArity of 'bndr'
+--         See Note [Arity trimming]
+findRhsArity opts is_rec bndr rhs old_arity
+  = case is_rec of
+      Recursive    -> go 0 botArityType
+      NonRecursive -> step init_env
   where
-    go :: Int -> ArityType -> ArityType
-    go !n cur_at@(AT oss div)
+    init_env :: ArityEnv
+    init_env = findRhsArityEnv opts
+
+    ty_arity     = typeArity (idType bndr)
+    id_one_shots = idDemandOneShots bndr
+
+    step :: ArityEnv -> SafeArityType
+    step env = trimArityType ty_arity $
+               safeArityType $ -- See Note [Arity invariants for bindings], item (3)
+               arityType env rhs `combineWithDemandOneShots` id_one_shots
+       -- trimArityType: see Note [Trim arity inside the loop]
+       -- combineWithDemandOneShots: take account of the demand on the
+       -- binder.  Perhaps it is always called with 2 args
+       --   let f = \x. blah in (f 3 4, f 1 9)
+       -- f's demand-info says how many args it is called with
+
+    -- The fixpoint iteration (go), done for recursive bindings. We
+    -- always do one step, but usually that produces a result equal
+    -- to old_arity, and then we stop right away, because old_arity
+    -- is assumed to be sound. In other words, arities should never
+    -- decrease.  Result: the common case is that there is just one
+    -- iteration
+    go :: Int -> SafeArityType -> SafeArityType
+    go !n cur_at@(AT lams div)
       | not (isDeadEndDiv div)           -- the "stop right away" case
-      , length oss <= old_arity = cur_at -- from above
+      , length lams <= old_arity = cur_at -- from above
       | next_at == cur_at       = cur_at
       | otherwise               =
          -- Warn if more than 2 iterations. Why 2? See Note [Exciting arity]
@@ -709,20 +932,49 @@ findRhsArity opts Recursive bndr rhs old_arity
             (nest 2 (ppr bndr <+> ppr cur_at <+> ppr next_at $$ ppr rhs)) $
             go (n+1) next_at
       where
-        next_at = step cur_at
-
-    step :: ArityType -> ArityType
-    step at = -- pprTrace "step" (vcat [ ppr bndr <+> ppr at <+> ppr (arityType env rhs)
-              --                      , ppr (idType bndr)
-              --                      , ppr (typeArity (idType bndr)) ]) $
-              arityType env rhs
-      where
-        env = extendSigEnv (findRhsArityEnv opts) bndr at
+        next_at = step (extendSigEnv init_env bndr cur_at)
 
+infixl 2 `combineWithDemandOneShots`
 
-{-
-Note [Arity analysis]
-~~~~~~~~~~~~~~~~~~~~~
+combineWithDemandOneShots :: ArityType -> [OneShotInfo] -> ArityType
+-- See Note [Combining arity type with demand info]
+combineWithDemandOneShots at@(AT lams div) oss
+  | null lams = at
+  | otherwise = AT (zip_lams lams oss) div
+  where
+    zip_lams :: [ATLamInfo] -> [OneShotInfo] -> [ATLamInfo]
+    zip_lams lams []  = lams
+    zip_lams []   oss = [ (IsExpensive,OneShotLam)
+                        | _ <- takeWhile isOneShotInfo oss]
+    zip_lams ((ch,os1):lams) (os2:oss)
+      = (ch, os1 `bestOneShot` os2) : zip_lams lams oss
+
+idDemandOneShots :: Id -> [OneShotInfo]
+idDemandOneShots bndr
+  = call_arity_one_shots `zip_lams` dmd_one_shots
+  where
+    call_arity_one_shots :: [OneShotInfo]
+    call_arity_one_shots
+      | call_arity == 0 = []
+      | otherwise       = NoOneShotInfo : replicate (call_arity-1) OneShotLam
+    -- Call Arity analysis says the function is always called
+    -- applied to this many arguments.  The first NoOneShotInfo is because
+    -- if Call Arity says "always applied to 3 args" then the one-shot info
+    -- we get is [NoOneShotInfo, OneShotLam, OneShotLam]
+    call_arity = idCallArity bndr
+
+    dmd_one_shots :: [OneShotInfo]
+    -- If the demand info is Cx(C1(C1(.))) then we know that an
+    -- application to one arg is also an application to three
+    dmd_one_shots = argOneShots (idDemandInfo bndr)
+
+    -- Take the *longer* list
+    zip_lams (lam1:lams1) (lam2:lams2) = (lam1 `bestOneShot` lam2) : zip_lams lams1 lams2
+    zip_lams []           lams2        = lams2
+    zip_lams lams1        []           = lams1
+
+{- Note [Arity analysis]
+~~~~~~~~~~~~~~~~~~~~~~~~
 The motivating example for arity analysis is this:
 
   f = \x. let g = f (x+1)
@@ -784,57 +1036,118 @@ to floatIn the non-cheap let-binding.  Which is all perfectly benign, but
 means we do two iterations (well, actually 3 'step's to detect we are stable)
 and don't want to emit the warning.
 
-Note [Eta expanding through dictionaries]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-If the experimental -fdicts-cheap flag is on, we eta-expand through
-dictionary bindings.  This improves arities. Thereby, it also
-means that full laziness is less prone to floating out the
-application of a function to its dictionary arguments, which
-can thereby lose opportunities for fusion.  Example:
-        foo :: Ord a => a -> ...
-     foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
-        -- So foo has arity 1
+Note [Trim arity inside the loop]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Here's an example (from gadt/nbe.hs) which caused trouble.
+  data Exp g t where
+     Lam :: Ty a -> Exp (g,a) b -> Exp g (a->b)
 
-     f = \x. foo dInt $ bar x
+  eval :: Exp g t -> g -> t
+  eval (Lam _ e) g = \a -> eval e (g,a)
 
-The (foo DInt) is floated out, and makes ineffective a RULE
-     foo (bar x) = ...
+The danger is that we get arity 3 from analysing this; and the
+next time arity 4, and so on for ever.  Solution: use trimArityType
+on each iteration.
 
-One could go further and make exprIsCheap reply True to any
-dictionary-typed expression, but that's more work.
+Note [Combining arity type with demand info]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+   let f = \x. let y = <expensive> in \p \q{os}. blah
+   in ...(f a b)...(f c d)...
+
+* From the RHS we get an ArityType like
+    AT [ (IsCheap,?), (IsExpensive,?), (IsCheap,OneShotLam) ] Dunno
+  where "?" means NoOneShotInfo
+
+* From the body, the demand analyser (or Call Arity) will tell us
+  that the function is always applied to at least two arguments.
+
+Combining these two pieces of info, we can get the final ArityType
+    AT [ (IsCheap,?), (IsExpensive,OneShotLam), (IsCheap,OneShotLam) ] Dunno
+result: arity=3, which is better than we could do from either
+source alone.
+
+The "combining" part is done by combineWithDemandOneShots.  It
+uses info from both Call Arity and demand analysis.
+
+We may have /more/ call demands from the calls than we have lambdas
+in the binding.  E.g.
+    let f1 = \x. g x x in ...(f1 p q r)...
+    -- Demand on f1 is Cx(C1(C1(L)))
+
+    let f2 = \y. error y in ...(f2 p q r)...
+    -- Demand on f2 is Cx(C1(C1(L)))
+
+In both these cases we can eta expand f1 and f2 to arity 3.
+But /only/ for called-once demands.  Suppose we had
+    let f1 = \y. g x x in ...let h = f1 p q in ...(h r1)...(h r2)...
+
+Now we don't want to eta-expand f1 to have 3 args; only two.
+Nor, in the case of f2, do we want to push that error call under
+a lambda.  Hence the takeWhile in combineWithDemandDoneShots.
 -}
 
+
+{- *********************************************************************
+*                                                                      *
+                   arityType
+*                                                                      *
+********************************************************************* -}
+
 arityLam :: Id -> ArityType -> ArityType
-arityLam id (AT oss div) = AT (idStateHackOneShotInfo id : oss) div
+arityLam id (AT oss div)
+  = AT ((IsCheap, one_shot) : oss) div
+  where
+    one_shot | isDeadEndDiv div = OneShotLam
+             | otherwise        = idStateHackOneShotInfo id
+    -- If the body diverges, treat it as one-shot: no point
+    -- in floating out, and no penalty for floating in
+    -- See Wrinkle [Bottoming functions] in Note [ArityType]
 
-floatIn :: Bool -> ArityType -> ArityType
+floatIn :: Cost -> ArityType -> ArityType
 -- We have something like (let x = E in b),
 -- where b has the given arity type.
-floatIn cheap at
-  | isDeadEndArityType at || cheap = at
-  -- If E is not cheap, keep arity only for one-shots
-  | otherwise                      = takeWhileOneShot at
+floatIn IsCheap     at = at
+floatIn IsExpensive at = addWork at
+
+addWork :: ArityType -> ArityType
+addWork at@(AT lams div)
+  = case lams of
+      []      -> at
+      lam:lams' -> AT (add_work lam : lams') div
+  where
+    add_work :: ATLamInfo -> ATLamInfo
+    add_work (_,os) = (IsExpensive,os)
 
-arityApp :: ArityType -> Bool -> ArityType
+arityApp :: ArityType -> Cost -> ArityType
 -- Processing (fun arg) where at is the ArityType of fun,
 -- Knock off an argument and behave like 'let'
-arityApp (AT (_:oss) div) cheap = floatIn cheap (AT oss div)
-arityApp at               _     = at
+arityApp (AT ((ch1,_):oss) div) ch2 = floatIn (ch1 `addCost` ch2) (AT oss div)
+arityApp at                     _   = at
 
 -- | Least upper bound in the 'ArityType' lattice.
 -- See the haddocks on 'ArityType' for the lattice.
 --
 -- Used for branches of a @case@.
 andArityType :: ArityType -> ArityType -> ArityType
-andArityType (AT (os1:oss1) div1) (AT (os2:oss2) div2)
-  | AT oss' div' <- andArityType (AT oss1 div1) (AT oss2 div2)
-  = AT ((os1 `bestOneShot` os2) : oss') div' -- See Note [Combining case branches]
-andArityType (AT []         div1) at2
-  | isDeadEndDiv div1 = at2                  -- Note [ABot branches: max arity wins]
-  | otherwise         = takeWhileOneShot at2 -- See Note [Combining case branches]
-andArityType at1                  (AT []         div2)
-  | isDeadEndDiv div2 = at1                  -- Note [ABot branches: max arity wins]
-  | otherwise         = takeWhileOneShot at1 -- See Note [Combining case branches]
+andArityType (AT (lam1:lams1) div1) (AT (lam2:lams2) div2)
+  | AT lams' div' <- andArityType (AT lams1 div1) (AT lams2 div2)
+  = AT ((lam1 `and_lam` lam2) : lams') div' -- See Note [Combining case branches]
+  where
+    (ch1,os1) `and_lam` (ch2,os2)
+      = ( ch1 `addCost` ch2, os1 `bestOneShot` os2)
+
+andArityType (AT [] div1) at2 = andWithTail div1 at2
+andArityType at1 (AT [] div2) = andWithTail div2 at1
+
+andWithTail :: Divergence -> ArityType -> ArityType
+andWithTail div1 at2@(AT oss2 _)
+  | isDeadEndDiv div1     -- case x of { T -> error; F -> \y.e }
+  = at2
+  | otherwise  -- case x of { T -> plusInt <expensive>; F -> \y.e }
+  = addWork (AT oss2 topDiv)   -- We know div1 = topDiv
+        -- Note [ABot branches: max arity wins]
+        -- See Note [Combining case branches]
 
 {- Note [ABot branches: max arity wins]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -866,29 +1179,6 @@ basis that if we know one branch is one-shot, then they all must be.
 Surprisingly, this means that the one-shot arity type is effectively the top
 element of the lattice.
 
-Note [Arity trimming]
-~~~~~~~~~~~~~~~~~~~~~
-Consider ((\x y. blah) |> co), where co :: (Int->Int->Int) ~ (Int -> F a) , and
-F is some type family.
-
-Because of Note [exprArity invariant], item (2), we must return with arity at
-most 1, because typeArity (Int -> F a) = 1.  So we have to trim the result of
-calling arityType on (\x y. blah).  Failing to do so, and hence breaking the
-exprArity invariant, led to #5441.
-
-How to trim?  If we end in topDiv, it's easy.  But we must take great care with
-dead ends (i.e. botDiv). Suppose the expression was (\x y. error "urk"),
-we'll get \??.⊥.  We absolutely must not trim that to \?.⊥, because that
-claims that ((\x y. error "urk") |> co) diverges when given one argument,
-which it absolutely does not. And Bad Things happen if we think something
-returns bottom when it doesn't (#16066).
-
-So, if we need to trim a dead-ending arity type, switch (conservatively) to
-topDiv.
-
-Historical note: long ago, we unconditionally switched to topDiv when we
-encountered a cast, but that is far too conservative: see #5475
-
 Note [Eta expanding through CallStacks]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Just as it's good to eta-expand through dictionaries, so it is good to
@@ -899,6 +1189,25 @@ do so through CallStacks.  #20103 is a case in point, where we got
 
 We really want to eta-expand this!  #20103 is quite convincing!
 We do this regardless of -fdicts-cheap; it's not really a dictionary.
+
+Note [Eta expanding through dictionaries]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If the experimental -fdicts-cheap flag is on, we eta-expand through
+dictionary bindings.  This improves arities. Thereby, it also
+means that full laziness is less prone to floating out the
+application of a function to its dictionary arguments, which
+can thereby lose opportunities for fusion.  Example:
+        foo :: Ord a => a -> ...
+     foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
+        -- So foo has arity 1
+
+     f = \x. foo dInt $ bar x
+
+The (foo DInt) is floated out, and makes ineffective a RULE
+     foo (bar x) = ...
+
+One could go further and make exprIsCheap reply True to any
+dictionary-typed expression, but that's more work.
 -}
 
 ---------------------------
@@ -921,14 +1230,18 @@ We do this regardless of -fdicts-cheap; it's not really a dictionary.
 data AnalysisMode
   = BotStrictness
   -- ^ Used during 'exprBotStrictness_maybe'.
+
   | EtaExpandArity { am_opts :: !ArityOpts }
-  -- ^ Used for finding an expression's eta-expanding arity quickly, without
-  -- fixed-point iteration ('exprEtaExpandArity').
-  | FindRhsArity { am_opts :: !ArityOpts
-                 , am_sigs :: !(IdEnv ArityType) }
+  -- ^ Used for finding an expression's eta-expanding arity quickly,
+  -- without fixed-point iteration ('exprEtaExpandArity').
+
+  | FindRhsArity { am_opts        :: !ArityOpts
+                 , am_sigs        :: !(IdEnv SafeArityType) }
   -- ^ Used for regular, fixed-point arity analysis ('findRhsArity').
   --   See Note [Arity analysis] for details about fixed-point iteration.
-  --   INVARIANT: Disjoint with 'ae_joins'.
+  -- am_dicts_cheap: see Note [Eta expanding through dictionaries]
+  -- am_sigs: note `SafeArityType` so we can use this in myIsCheapApp
+  -- INVARIANT: am_sigs is disjoint with 'ae_joins'.
 
 data ArityEnv
   = AE
@@ -991,9 +1304,11 @@ extendJoinEnv env@(AE { ae_joins = joins }) join_ids
   = del_sig_env_list join_ids
   $ env { ae_joins = joins `extendVarSetList` join_ids }
 
-extendSigEnv :: ArityEnv -> Id -> ArityType -> ArityEnv
+extendSigEnv :: ArityEnv -> Id -> SafeArityType -> ArityEnv
 extendSigEnv env id ar_ty
-  = del_join_env id (modifySigEnv (\sigs -> extendVarEnv sigs id ar_ty) env)
+  = del_join_env id                                    $
+    modifySigEnv (\sigs -> extendVarEnv sigs id ar_ty) $
+    env
 
 delInScope :: ArityEnv -> Id -> ArityEnv
 delInScope env id = del_join_env id $ del_sig_env id env
@@ -1001,7 +1316,7 @@ delInScope env id = del_join_env id $ del_sig_env id env
 delInScopeList :: ArityEnv -> [Id] -> ArityEnv
 delInScopeList env ids = del_join_env_list ids $ del_sig_env_list ids env
 
-lookupSigEnv :: ArityEnv -> Id -> Maybe ArityType
+lookupSigEnv :: ArityEnv -> Id -> Maybe SafeArityType
 lookupSigEnv AE{ ae_mode = mode } id = case mode of
   BotStrictness                  -> Nothing
   EtaExpandArity{}               -> Nothing
@@ -1015,6 +1330,11 @@ pedanticBottoms AE{ ae_mode = mode } = case mode of
   EtaExpandArity{ am_opts = ArityOpts{ ao_ped_bot = ped_bot } } -> ped_bot
   FindRhsArity{ am_opts = ArityOpts{ ao_ped_bot = ped_bot } }   -> ped_bot
 
+exprCost :: ArityEnv -> CoreExpr -> Maybe Type -> Cost
+exprCost env e mb_ty
+  | myExprIsCheap env e mb_ty = IsCheap
+  | otherwise                 = IsExpensive
+
 -- | A version of 'exprIsCheap' that considers results from arity analysis
 -- and optionally the expression's type.
 -- Under 'exprBotStrictness_maybe', no expressions are cheap.
@@ -1040,17 +1360,20 @@ myExprIsCheap AE{ae_mode = mode} e mb_ty = case mode of
 -- | A version of 'isCheapApp' that considers results from arity analysis.
 -- See Note [Arity analysis] for what's in the signature environment and why
 -- it's important.
-myIsCheapApp :: IdEnv ArityType -> CheapAppFun
+myIsCheapApp :: IdEnv SafeArityType -> CheapAppFun
 myIsCheapApp sigs fn n_val_args = case lookupVarEnv sigs fn of
+
   -- Nothing means not a local function, fall back to regular
   -- 'GHC.Core.Utils.isCheapApp'
-  Nothing         -> isCheapApp fn n_val_args
-  -- @Just at@ means local function with @at@ as current ArityType.
+  Nothing -> isCheapApp fn n_val_args
+
+  -- `Just at` means local function with `at` as current SafeArityType.
   -- Roughly approximate what 'isCheapApp' is doing.
-  Just (AT oss div)
+  Just (AT lams div)
     | isDeadEndDiv div -> True -- See Note [isCheapApp: bottoming functions] in GHC.Core.Utils
-    | n_val_args < length oss -> True -- Essentially isWorkFreeApp
-    | otherwise -> False
+    | n_val_args == 0          -> True -- Essentially
+    | n_val_args < length lams -> True -- isWorkFreeApp
+    | otherwise                -> False
 
 ----------------
 arityType :: ArityEnv -> CoreExpr -> ArityType
@@ -1077,7 +1400,10 @@ arityType env (Lam x e)
 arityType env (App fun (Type _))
    = arityType env fun
 arityType env (App fun arg )
-   = arityApp (arityType env fun) (myExprIsCheap env arg Nothing)
+   = arityApp fun_at arg_cost
+   where
+     fun_at   = arityType env fun
+     arg_cost = exprCost env arg Nothing
 
         -- Case/Let; keep arity if either the expression is cheap
         -- or it's a 1-shot lambda
@@ -1098,9 +1424,8 @@ arityType env (Case scrut bndr _ alts)
   | exprOkForSpeculation scrut
   = alts_type
 
-  | otherwise                  -- In the remaining cases we may not push
-  = takeWhileOneShot alts_type -- evaluation of the scrutinee in
-
+  | otherwise            -- In the remaining cases we may not push
+  = addWork alts_type -- evaluation of the scrutinee in
   where
     env' = delInScope env bndr
     arity_type_alt (Alt _con bndrs rhs) = arityType (delInScopeList env' bndrs) rhs
@@ -1128,17 +1453,17 @@ arityType env (Let (Rec pairs) body)
       | otherwise
       = pprPanic "arityType:joinrec" (ppr pairs)
 
-arityType env (Let (NonRec b r) e)
-  = floatIn cheap_rhs (arityType env' e)
+arityType env (Let (NonRec b rhs) e)
+  = floatIn rhs_cost (arityType env' e)
   where
-    cheap_rhs = myExprIsCheap env r (Just (idType b))
-    env'      = extendSigEnv env b (arityType env r)
+    rhs_cost = exprCost env rhs (Just (idType b))
+    env'     = extendSigEnv env b (safeArityType (arityType env rhs))
 
 arityType env (Let (Rec prs) e)
-  = floatIn (all is_cheap prs) (arityType env' e)
+  = floatIn (allCosts bind_cost prs) (arityType env' e)
   where
-    env'           = delInScopeList env (map fst prs)
-    is_cheap (b,e) = myExprIsCheap env' e (Just (idType b))
+    env'            = delInScopeList env (map fst prs)
+    bind_cost (b,e) = exprCost env' e (Just (idType b))
 
 arityType env (Tick t e)
   | not (tickishIsCode t)     = arityType env e
@@ -1201,7 +1526,7 @@ environment mapping let-bound Ids to their ArityType.
 idArityType :: Id -> ArityType
 idArityType v
   | strict_sig <- idDmdSig v
-  , not $ isTopSig strict_sig
+  , not $ isNopSig strict_sig
   , (ds, div) <- splitDmdSig strict_sig
   , let arity = length ds
   -- Every strictness signature admits an arity signature!
@@ -1209,8 +1534,8 @@ idArityType v
   | otherwise
   = AT (take (idArity v) one_shots) topDiv
   where
-    one_shots :: [OneShotInfo]  -- One-shot-ness derived from the type
-    one_shots = typeOneShots (idType v)
+    one_shots :: [(Cost,OneShotInfo)]  -- One-shot-ness derived from the type
+    one_shots = repeat IsCheap `zip` typeOneShots (idType v)
 
 {-
 %************************************************************************
@@ -1319,7 +1644,7 @@ Consider
 We'll get an ArityType for foo of \?1.T.
 
 Then we want to eta-expand to
-  foo = (\x. \eta{os}. (case x of ...as before...) eta)) |> some_co
+  foo = (\x. \eta{os}. (case x of ...as before...) eta) |> some_co
 
 That 'eta' binder is fresh, and we really want it to have the
 one-shot flag from the inner \s{os}.  By expanding with the
@@ -1347,14 +1672,14 @@ etaExpand n orig_expr
     in_scope = {-#SCC "eta_expand:in-scopeX" #-}
                mkInScopeSet (exprFreeVars orig_expr)
 
-etaExpandAT :: InScopeSet -> ArityType -> CoreExpr -> CoreExpr
+etaExpandAT :: InScopeSet -> SafeArityType -> CoreExpr -> CoreExpr
 -- See Note [Eta expansion with ArityType]
 --
 -- We pass in the InScopeSet from the simplifier to avoid recomputing
 -- it here, which can be jolly expensive if the casts are big
 -- In #18223 it took 10% of compile time just to do the exprFreeVars!
-etaExpandAT in_scope (AT oss _) orig_expr
-  = eta_expand in_scope oss orig_expr
+etaExpandAT in_scope at orig_expr
+  = eta_expand in_scope (arityTypeOneShots at) orig_expr
 
 -- etaExpand arity e = res
 -- Then 'res' has at least 'arity' lambdas at the top
@@ -1369,7 +1694,11 @@ etaExpandAT in_scope (AT oss _) orig_expr
 
 eta_expand :: InScopeSet -> [OneShotInfo] -> CoreExpr -> CoreExpr
 eta_expand in_scope one_shots (Cast expr co)
-  = Cast (eta_expand in_scope one_shots expr) co
+  = mkCast (eta_expand in_scope one_shots expr) co
+    -- This mkCast is important, because eta_expand might return an
+    -- expression with a cast at the outside; and tryCastWorkerWrapper
+    -- asssumes that we don't have nested casts. Makes a difference
+    -- in compile-time for T18223
 
 eta_expand in_scope one_shots orig_expr
   = go in_scope one_shots [] orig_expr
@@ -1440,7 +1769,7 @@ casts complicate the question.  If we have
 and
    e :: N (N Int)
 then the eta-expansion should look like
-   (\(x::S) (y::S) -> e |> co x y) |> sym co
+   (\(x::S) (y::S) -> (e |> co) x y) |> sym co
 where
   co :: N (N Int) ~ S -> S -> Int
   co = axN @(N Int) ; (S -> axN @Int)
@@ -1619,11 +1948,11 @@ mkEtaWW orig_oss ppr_orig_expr in_scope orig_ty
     -- (go [o1,..,on] subst ty) = (in_scope, EI [b1,..,bn] co)
     --    co :: subst(ty) ~ b1_ty -> ... -> bn_ty -> tr
 
-    go _ [] subst _       -- See Note [exprArity invariant]
+    go _ [] subst _
        ----------- Done!  No more expansion needed
        = (getTCvInScope subst, EI [] MRefl)
 
-    go n oss@(one_shot:oss1) subst ty       -- See Note [exprArity invariant]
+    go n oss@(one_shot:oss1) subst ty
        ----------- Forall types  (forall a. ty)
        | Just (tcv,ty') <- splitForAllTyCoVar_maybe ty
        , (subst', tcv') <- Type.substVarBndr subst tcv
@@ -1676,6 +2005,428 @@ mkEtaWW orig_oss ppr_orig_expr in_scope orig_ty
         -- with an explicit lambda having a non-function type
 
 
+{-
+************************************************************************
+*                                                                      *
+                Eta reduction
+*                                                                      *
+************************************************************************
+
+Note [Eta reduction makes sense]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+GHC's eta reduction transforms
+   \x y. <fun> x y  --->  <fun>
+We discuss when this is /sound/ in Note [Eta reduction soundness].
+But even assuming it is sound, when is it /desirable/.  That
+is what we discuss here.
+
+This test is made by `ok_fun` in tryEtaReduce.
+
+1. We want to eta-reduce only if we get all the way to a trivial
+   expression; we don't want to remove extra lambdas unless we are
+   going to avoid allocating this thing altogether.
+
+   Trivial means *including* casts and type lambdas:
+     * `\x. f x |> co  -->  f |> (ty(x) -> co)` (provided `co` doesn't mention `x`)
+     * `/\a. \x. f @(Maybe a) x -->  /\a. f @(Maybe a)`
+   See Note [Do not eta reduce PAPs] for why we insist on a trivial head.
+
+2. Type and dictionary abstraction. Regardless of whether 'f' is a value, it
+   is always sound to reduce /type lambdas/, thus:
+        (/\a -> f a)  -->   f
+   Moreover, we always want to, because it makes RULEs apply more often:
+      This RULE:    `forall g. foldr (build (/\a -> g a))`
+      should match  `foldr (build (/\b -> ...something complex...))`
+   and the simplest way to do so is eta-reduce `/\a -> g a` in the RULE to `g`.
+
+   The type checker can insert these eta-expanded versions,
+   with both type and dictionary lambdas; hence the slightly
+   ad-hoc (all ok_lam bndrs)
+
+3. (See fun_arity in tryEtaReduce.) We have to hide `f`'s `idArity` in
+   its own RHS, lest we suffer from the last point of Note [Arity
+   robustness] in GHC.Core.Opt.Simplify.Env. There we have `f = \x. f x`
+   and we should not eta-reduce to `f=f`. Which might change a
+   terminating program (think @f `seq` e@) to a non-terminating one.
+   So we check for being a loop breaker first. However for GlobalIds
+   we can look at the arity; and for primops we must, since they have
+   no unfolding.  [SG: Perhaps this is rather a soundness subtlety?]
+
+Of course, eta reduction is not always sound. See Note [Eta reduction soundness]
+for when it is.
+
+When there are multiple arguments, we might get multiple eta-redexes. Example:
+   \x y. e x y
+   ==> { reduce \y. (e x) y in context \x._ }
+   \x. e x
+   ==> { reduce \x. e x in context _ }
+   e
+And (1) implies that we never want to stop with `\x. e x`, because that is not a
+trivial expression. So in practice, the implementation works by considering a
+whole group of leading lambdas to reduce.
+
+These delicacies are why we don't simply use 'exprIsTrivial' and 'exprIsHNF'
+in 'tryEtaReduce'. Alas.
+
+Note [Eta reduction soundness]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+GHC's eta reduction transforms
+   \x y. <fun> x y  --->  <fun>
+For soundness, we obviously require that `x` and `y`
+to not occur free. But what /other/ restrictions are there for
+eta reduction to be sound?
+
+We discuss separately what it means for eta reduction to be
+/desirable/, in Note [Eta reduction makes sense].
+
+Eta reduction is *not* a sound transformation in general, because it
+may change termination behavior if *value* lambdas are involved:
+  `bot`  /=  `\x. bot x`   (as can be observed by a simple `seq`)
+The past has shown that oversight of this fact can not only lead to endless
+loops or exceptions, but also straight out *segfaults*.
+
+Nevertheless, we can give the following criteria for when it is sound to
+perform eta reduction on an expression with n leading lambdas `\xs. e xs`
+(checked in 'is_eta_reduction_sound' in 'tryEtaReduce', which focuses on the
+case where `e` is trivial):
+
+ A. It is sound to eta-reduce n arguments as long as n does not exceed the
+    `exprArity` of `e`. (Needs Arity analysis.)
+    This criterion exploits information about how `e` is *defined*.
+
+    Example: If `e = \x. bot` then we know it won't diverge until it is called
+    with one argument. Hence it is safe to eta-reduce `\x. e x` to `e`.
+    By contrast, it would be *unsound* to eta-reduce 2 args, `\x y. e x y` to `e`:
+    `e 42` diverges when `(\x y. e x y) 42` does not.
+
+ S. It is sound to eta-reduce n arguments in an evaluation context in which all
+    calls happen with at least n arguments. (Needs Strictness analysis.)
+    NB: This treats evaluations like a call with 0 args.
+    NB: This criterion exploits information about how `e` is *used*.
+
+    Example: Given a function `g` like
+      `g c = Just (c 1 2 + c 2 3)`
+    it is safe to eta-reduce the arg in `g (\x y. e x y)` to `g e` without
+    knowing *anything* about `e` (perhaps it's a parameter occ itself), simply
+    because `g` always calls its parameter with 2 arguments.
+    It is also safe to eta-reduce just one arg, e.g., `g (\x. e x)` to `g e`.
+    By contrast, it would *unsound* to eta-reduce 3 args in a call site
+    like `g (\x y z. e x y z)` to `g e`, because that diverges when
+    `e = \x y. bot`.
+
+    Could we relax to "*At least one call in the same trace* is with n args"?
+    (NB: Strictness analysis can only answer this relaxed question, not the
+     original formulation.)
+    Consider what happens for
+      ``g2 c = c True `seq` c False 42``
+    Here, `g2` will call `c` with 2 arguments (if there is a call at all).
+    But it is unsound to eta-reduce the arg in `g2 (\x y. e x y)` to `g2 e`
+    when `e = \x. if x then bot else id`, because the latter will diverge when
+    the former would not.
+
+    On the other hand, with `-fno-pendantic-bottoms` , we will have eta-expanded
+    the definition of `e` and then eta-reduction is sound
+    (see Note [Dealing with bottom]).
+    Consequence: We have to check that `-fpedantic-bottoms` is off; otherwise
+    eta-reduction based on demands is in fact unsound.
+
+    See Note [Eta reduction based on evaluation context] for the implementation
+    details. This criterion is tested extensively in T21261.
+
+ E. (See fun_arity in tryEtaReduce.) As a perhaps special case on the
+    boundary of (A) and (S), when we know that a fun binder `f` is in
+    WHNF, we simply assume it has arity 1 and apply (A).  Example:
+       g f = f `seq` \x. f x
+    Here it's sound eta-reduce `\x. f x` to `f`, because `f` can't be bottom
+    after the `seq`. This turned up in #7542.
+
+And here are a few more technical criteria for when it is *not* sound to
+eta-reduce that are specific to Core and GHC:
+
+ L. With linear types, eta-reduction can break type-checking:
+      f :: A ⊸ B
+      g :: A -> B
+      g = \x. f x
+    The above is correct, but eta-reducing g would yield g=f, the linter will
+    complain that g and f don't have the same type. NB: Not unsound in the
+    dynamic semantics, but unsound according to the static semantics of Core.
+
+ J. We may not undersaturate join points.
+    See Note [Invariants on join points] in GHC.Core, and #20599.
+
+ B. We may not undersaturate functions with no binding.
+    See Note [Eta expanding primops].
+
+ W. We may not undersaturate StrictWorkerIds.
+    See Note [Strict Worker Ids] in GHC.CoreToStg.Prep.
+
+Here is a list of historic accidents surrounding unsound eta-reduction:
+
+* Consider
+        f = \x.f x
+        h y = case (case y of { True -> f `seq` True; False -> False }) of
+                True -> ...; False -> ...
+  If we (unsoundly) eta-reduce f to get f=f, the strictness analyser
+  says f=bottom, and replaces the (f `seq` True) with just
+  (f `cast` unsafe-co).
+  [SG in 2022: I don't think worker/wrapper would do this today.]
+  BUT, as things stand, 'f' got arity 1, and it *keeps* arity 1 (perhaps also
+  wrongly). So CorePrep eta-expands the definition again, so that it does not
+  terminate after all.
+  Result: seg-fault because the boolean case actually gets a function value.
+  See #1947.
+
+* Never *reduce* arity. For example
+      f = \xy. g x y
+  Then if h has arity 1 we don't want to eta-reduce because then
+  f's arity would decrease, and that is bad
+  [SG in 2022: I don't understand this point. There is no `h`, perhaps that
+   should have been `g`. Even then, this proposed eta-reduction is invalid by
+   criterion (A), which might actually be the point this anecdote is trying to
+   make. Perhaps the "no arity decrease" idea is also related to
+   Note [Arity robustness]?]
+
+Note [Do not eta reduce PAPs]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+I considered eta-reducing if the result is a PAP:
+   \x. f e1 e2 x  ==>   f e1 e2
+
+This reduces clutter, sometimes a lot. See Note [Do not eta-expand PAPs]
+in GHC.Core.Opt.Simplify.Utils, where we are careful not to eta-expand
+a PAP.  If eta-expanding is bad, then eta-reducing is good!
+
+Also the code generator likes eta-reduced PAPs; see GHC.CoreToStg.Prep
+Note [No eta reduction needed in rhsToBody].
+
+But note that we don't want to eta-reduce
+     \x y.  f <expensive> x y
+to
+     f <expensive>
+The former has arity 2, and repeats <expensive> for every call of the
+function; the latter has arity 0, and shares <expensive>.  We don't want
+to change behaviour.  Hence the call to exprIsCheap in ok_fun.
+
+I noticed this when examining #18993 and, although it is delicate,
+eta-reducing to a PAP happens to fix the regression in #18993.
+
+HOWEVER, if we transform
+   \x. f y x   ==>   f y
+that might mean that f isn't saturated any more, and does not inline.
+This led to some other regressions.
+
+TL;DR currrently we do /not/ eta reduce if the result is a PAP.
+
+Note [Eta reduction with casted arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+    (\(x:t3). f (x |> g)) :: t3 -> t2
+  where
+    f :: t1 -> t2
+    g :: t3 ~ t1
+This should be eta-reduced to
+
+    f |> (sym g -> t2)
+
+So we need to accumulate a coercion, pushing it inward (past
+variable arguments only) thus:
+   f (x |> co_arg) |> co  -->  (f |> (sym co_arg -> co)) x
+   f (x:t)         |> co  -->  (f |> (t -> co)) x
+   f @ a           |> co  -->  (f |> (forall a.co)) @ a
+   f @ (g:t1~t2)   |> co  -->  (f |> (t1~t2 => co)) @ (g:t1~t2)
+These are the equations for ok_arg.
+
+Note [Eta reduction with casted function]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Since we are pushing a coercion inwards, it is easy to accommodate
+    (\xy. (f x |> g) y)
+    (\xy. (f x y) |> g)
+
+See the `(Cast e co)` equation for `go` in `tryEtaReduce`.  The
+eta-expander pushes those casts outwards, so you might think we won't
+ever see a cast here, but if we have
+  \xy. (f x y |> g)
+we will call tryEtaReduce [x,y] (f x y |> g), and we'd like that to
+work.  This happens in GHC.Core.Opt.Simplify.Utils.mkLam, where
+eta-expansion may be turned off (by sm_eta_expand).
+
+Note [Eta reduction based on evaluation context]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note [Eta reduction soundness], criterion (S) allows us to eta-reduce
+`g (\x y. e x y)` to `g e` when we know that `g` always calls its parameter with
+at least 2 arguments. So how do we read that off `g`'s demand signature?
+
+Let's take the simple example of #21261, where `g` (actually, `f`) is defined as
+  g c = c 1 2 + c 3 4
+Then this is how the pieces are put together:
+
+  * Demand analysis infers `<SCS(C1(L))>` for `g`'s demand signature
+
+  * When the Simplifier next simplifies the argument in `g (\x y. e x y)`, it
+    looks up the *evaluation context* of the argument in the form of the
+    sub-demand `CS(C1(L))` and stores it in the 'SimplCont'.
+    (Why does it drop the outer evaluation cardinality of the demand, `S`?
+    Because it's irrelevant! When we simplify an expression, we do so under the
+    assumption that it is currently under evaluation.)
+    This sub-demand literally says "Whenever this expression is evaluated, it
+    is also called with two arguments, potentially multiple times".
+
+  * Then the simplifier takes apart the lambda and simplifies the lambda group
+    and then calls 'tryEtaReduce' when rebuilding the lambda, passing the
+    evaluation context `CS(C1(L))` along. Then we simply peel off 2 call
+    sub-demands `Cn` and see whether all of the n's (here: `S=C_1N` and
+    `1=C_11`) were strict. And strict they are! Thus, it will eta-reduce
+    `\x y. e x y` to `e`.
+-}
+
+-- | `tryEtaReduce [x,y,z] e sd` returns `Just e'` if `\x y z -> e` is evaluated
+-- according to `sd` and can soundly and gainfully be eta-reduced to `e'`.
+-- See Note [Eta reduction soundness]
+-- and Note [Eta reduction makes sense] when that is the case.
+tryEtaReduce :: [Var] -> CoreExpr -> SubDemand -> Maybe CoreExpr
+-- Return an expression equal to (\bndrs. body)
+tryEtaReduce bndrs body eval_sd
+  = go (reverse bndrs) body (mkRepReflCo (exprType body))
+  where
+    incoming_arity = count isId bndrs -- See Note [Eta reduction makes sense], point (2)
+
+    go :: [Var]            -- Binders, innermost first, types [a3,a2,a1]
+       -> CoreExpr         -- Of type tr
+       -> Coercion         -- Of type tr ~ ts
+       -> Maybe CoreExpr   -- Of type a1 -> a2 -> a3 -> ts
+    -- See Note [Eta reduction with casted arguments]
+    -- for why we have an accumulating coercion
+    --
+    -- Invariant: (go bs body co) returns an expression
+    --            equivalent to (\(reverse bs). body |> co)
+
+    -- See Note [Eta reduction with casted function]
+    go bs (Cast e co1) co2
+      = go bs e (co1 `mkTransCo` co2)
+
+    go bs (Tick t e) co
+      | tickishFloatable t
+      = fmap (Tick t) $ go bs e co
+      -- Float app ticks: \x -> Tick t (e x) ==> Tick t e
+
+    go (b : bs) (App fun arg) co
+      | Just (co', ticks) <- ok_arg b arg co (exprType fun)
+      = fmap (flip (foldr mkTick) ticks) $ go bs fun co'
+            -- Float arg ticks: \x -> e (Tick t x) ==> Tick t e
+
+    go remaining_bndrs fun co
+      | all isTyVar remaining_bndrs
+            -- If all the remaining_bnrs are tyvars, then the etad_exp
+            --    will be trivial, which is what we want.
+            -- e.g. We might have  /\a \b. f [a] b, and we want to
+            --      eta-reduce to  /\a. f [a]
+            -- We don't want to give up on this one: see #20040
+            -- See Note [Eta reduction makes sense], point (1)
+      , remaining_bndrs `ltLength` bndrs
+            -- Only reply Just if /something/ has happened
+      , ok_fun fun
+      , let etad_expr = mkLams (reverse remaining_bndrs) (mkCast fun co)
+            used_vars = exprFreeVars etad_expr
+            reduced_bndrs = mkVarSet (dropList remaining_bndrs bndrs)
+      , used_vars `disjointVarSet` reduced_bndrs
+          -- Check for any of the binders free in the result,
+          -- including the accumulated coercion
+          -- See Note [Eta reduction makes sense], intro and point (1)
+      = Just etad_expr
+
+    go _remaining_bndrs _fun  _  = -- pprTrace "tER fail" (ppr _fun $$ ppr _remaining_bndrs) $
+                                   Nothing
+
+    ---------------
+    -- See Note [Eta reduction makes sense], point (1)
+    ok_fun (App fun (Type {})) = ok_fun fun
+    ok_fun (Cast fun _)        = ok_fun fun
+    ok_fun (Tick _ expr)       = ok_fun expr
+    ok_fun (Var fun_id)        = is_eta_reduction_sound fun_id || all ok_lam bndrs
+    ok_fun _fun                = False
+
+    ---------------
+    -- See Note [Eta reduction soundness], this is THE place to check soundness!
+    is_eta_reduction_sound fun =
+      -- Check that eta-reduction won't make the program stricter...
+      (fun_arity fun >= incoming_arity            -- criterion (A) and (E)
+        || all_calls_with_arity incoming_arity)   -- criterion (S)
+      -- ... and that the function can be eta reduced to arity 0
+      -- without violating invariants of Core and GHC
+      && canEtaReduceToArity fun 0 0              -- criteria (L), (J), (W), (B)
+    all_calls_with_arity n = isStrict (peelManyCalls n eval_sd)
+       -- See Note [Eta reduction based on evaluation context]
+
+    ---------------
+    fun_arity fun
+       | isLocalId fun
+       , isStrongLoopBreaker (idOccInfo fun) = 0
+           -- See Note [Eta reduction makes sense], point (3)
+       | arity > 0                           = arity
+       | isEvaldUnfolding (idUnfolding fun)  = 1
+           -- See Note [Eta reduction soundness], criterion (E)
+       | otherwise                           = 0
+       where
+         arity = idArity fun
+
+    ---------------
+    ok_lam v = isTyVar v || isEvVar v
+    -- See Note [Eta reduction makes sense], point (2)
+
+    ---------------
+    ok_arg :: Var              -- Of type bndr_t
+           -> CoreExpr         -- Of type arg_t
+           -> Coercion         -- Of kind (t1~t2)
+           -> Type             -- Type (arg_t -> t1) of the function
+                               --      to which the argument is supplied
+           -> Maybe (Coercion  -- Of type (arg_t -> t1 ~  bndr_t -> t2)
+                               --   (and similarly for tyvars, coercion args)
+                    , [CoreTickish])
+    -- See Note [Eta reduction with casted arguments]
+    ok_arg bndr (Type ty) co _
+       | Just tv <- getTyVar_maybe ty
+       , bndr == tv  = Just (mkHomoForAllCos [tv] co, [])
+    ok_arg bndr (Var v) co fun_ty
+       | bndr == v
+       , let mult = idMult bndr
+       , Just (fun_mult, _, _) <- splitFunTy_maybe fun_ty
+       , mult `eqType` fun_mult -- There is no change in multiplicity, otherwise we must abort
+       = Just (mkFunResCo Representational (idScaledType bndr) co, [])
+    ok_arg bndr (Cast e co_arg) co fun_ty
+       | (ticks, Var v) <- stripTicksTop tickishFloatable e
+       , Just (fun_mult, _, _) <- splitFunTy_maybe fun_ty
+       , bndr == v
+       , fun_mult `eqType` idMult bndr
+       = Just (mkFunCo Representational (multToCo fun_mult) (mkSymCo co_arg) co, ticks)
+       -- The simplifier combines multiple casts into one,
+       -- so we can have a simple-minded pattern match here
+    ok_arg bndr (Tick t arg) co fun_ty
+       | tickishFloatable t, Just (co', ticks) <- ok_arg bndr arg co fun_ty
+       = Just (co', t:ticks)
+
+    ok_arg _ _ _ _ = Nothing
+
+-- | Can we eta-reduce the given function to the specified arity?
+-- See Note [Eta reduction soundness], criteria (B), (J), (W) and (L).
+canEtaReduceToArity :: Id -> JoinArity -> Arity -> Bool
+canEtaReduceToArity fun dest_join_arity dest_arity =
+  not $
+        hasNoBinding fun -- (B)
+       -- Don't undersaturate functions with no binding.
+
+    ||  ( isJoinId fun && dest_join_arity < idJoinArity fun ) -- (J)
+       -- Don't undersaturate join points.
+       -- See Note [Invariants on join points] in GHC.Core, and #20599
+
+    || ( dest_arity < idCbvMarkArity fun ) -- (W)
+       -- Don't undersaturate StrictWorkerIds.
+       -- See Note [Strict Worker Ids] in GHC.CoreToStg.Prep.
+
+    ||  isLinearType (idType fun) -- (L)
+       -- Don't perform eta reduction on linear types.
+       -- If `f :: A %1-> B` and `g :: A -> B`,
+       -- then `g x = f x` is OK but `g = f` is not.
+
+
 {- *********************************************************************
 *                                                                      *
               The "push rules"