4 files changed, 329 insertions, 204 deletions
diff --git a/compiler/coreSyn/CoreUtils.hs b/compiler/coreSyn/CoreUtils.hs
index 4eef079b32..4a9e136e5c 100644
--- a/compiler/coreSyn/CoreUtils.hs
+++ b/compiler/coreSyn/CoreUtils.hs
@@ -1586,11 +1586,12 @@ don't want to discard a seq on it.
 -}
 
 -- | Can we bind this 'CoreExpr' at the top level?
-exprIsTopLevelBindable :: CoreExpr -> Bool
+exprIsTopLevelBindable :: CoreExpr -> Type -> Bool
 -- See Note [CoreSyn top-level string literals]
-exprIsTopLevelBindable expr
+-- Precondition: exprType expr = ty
+exprIsTopLevelBindable expr ty
   = exprIsLiteralString expr
-  || not (isUnliftedType (exprType expr))
+  || not (isUnliftedType ty)
 
 exprIsLiteralString :: CoreExpr -> Bool
 exprIsLiteralString (Lit (MachStr _)) = True
diff --git a/compiler/simplCore/SetLevels.hs b/compiler/simplCore/SetLevels.hs
index d1ff3fc18b..76ac48bd75 100644
--- a/compiler/simplCore/SetLevels.hs
+++ b/compiler/simplCore/SetLevels.hs
@@ -64,10 +64,10 @@ module SetLevels (
 
 import CoreSyn
 import CoreMonad        ( FloatOutSwitches(..) )
-import CoreUtils        ( exprType
-                        , isExprLevPoly
+import CoreUtils        ( exprType, exprIsCheap, exprIsHNF
                         , exprOkForSpeculation
                         , exprIsTopLevelBindable
+                        , isExprLevPoly
                         , collectMakeStaticArgs
                         )
 import CoreArity        ( exprBotStrictness_maybe )
@@ -81,7 +81,7 @@ import Var
 import VarSet
 import VarEnv
 import Literal          ( litIsTrivial )
-import Demand           ( StrictSig, increaseStrictSigArity )
+import Demand           ( StrictSig, isStrictDmd, splitStrictSig, increaseStrictSigArity )
 import Name             ( getOccName, mkSystemVarName )
 import OccName          ( occNameString )
 import Type             ( isUnliftedType, Type, mkLamTypes, splitTyConApp_maybe )
@@ -95,7 +95,6 @@ import FastString
 import UniqDFM
 import FV
 import Data.Maybe
-
 import Control.Monad    ( zipWithM )
 
 {-
@@ -274,14 +273,15 @@ setLevels float_lams binds us
 
 lvlTopBind :: LevelEnv -> Bind Id -> LvlM (LevelledBind, LevelEnv)
 lvlTopBind env (NonRec bndr rhs)
-  = do { rhs' <- lvlNonTailExpr env (freeVars rhs)
+  = do { rhs' <- lvlRhs env NonRecursive Nothing -- Not a join point
+                        (freeVars rhs)
        ; let (env', [bndr']) = substAndLvlBndrs NonRecursive env tOP_LEVEL [bndr]
        ; return (NonRec bndr' rhs', env') }
 
 lvlTopBind env (Rec pairs)
   = do let (bndrs,rhss) = unzip pairs
            (env', bndrs') = substAndLvlBndrs Recursive env tOP_LEVEL bndrs
-       rhss' <- mapM (lvlNonTailExpr env' . freeVars) rhss
+       rhss' <- mapM (lvlRhs env' Recursive Nothing . freeVars) rhss
        return (Rec (bndrs' `zip` rhss'), env')
 
 {-
@@ -341,39 +341,7 @@ lvlExpr env (_, AnnTick tickish expr) = do
     let tickish' = substTickish (le_subst env) tickish
     return (Tick tickish' expr')
 
-lvlExpr env expr@(_, AnnApp _ _) = do
-    let
-      (fun, args) = collectAnnArgs expr
-    --
-    case fun of
-      (_, AnnVar f) | floatOverSat env   -- See Note [Floating over-saturated applications]
-                    , arity > 0
-                    , arity < n_val_args
-                    , Nothing <- isClassOpId_maybe f ->
-        do
-         let (lapp, rargs) = left (n_val_args - arity) expr []
-         rargs' <- mapM (lvlNonTailMFE False env) rargs
-         lapp' <- lvlNonTailMFE False env lapp
-         return (foldl App lapp' rargs')
-        where
-         n_val_args = count (isValArg . deAnnotate) args
-         arity = idArity f
-
-         -- separate out the PAP that we are floating from the extra
-         -- arguments, by traversing the spine until we have collected
-         -- (n_val_args - arity) value arguments.
-         left 0 e               rargs = (e, rargs)
-         left n (_, AnnApp f a) rargs
-            | isValArg (deAnnotate a) = left (n-1) f (a:rargs)
-            | otherwise               = left n     f (a:rargs)
-         left _ _ _                   = panic "SetLevels.lvlExpr.left"
-
-         -- No PAPs that we can float: just carry on with the
-         -- arguments and the function.
-      _otherwise -> do
-         args' <- mapM (lvlNonTailMFE False env) args
-         fun'  <- lvlNonTailExpr env fun
-         return (foldl App fun' args')
+lvlExpr env expr@(_, AnnApp _ _) = lvlApp env expr (collectAnnArgs expr)
 
 -- We don't split adjacent lambdas.  That is, given
 --      \x y -> (x+1,y)
@@ -383,7 +351,7 @@ lvlExpr env expr@(_, AnnApp _ _) = do
 -- lambdas makes them more expensive.
 
 lvlExpr env expr@(_, AnnLam {})
-  = do { new_body <- lvlNonTailMFE True new_env body
+  = do { new_body <- lvlNonTailMFE new_env True body
        ; return (mkLams new_bndrs new_body) }
   where
     (bndrs, body)        = collectAnnBndrs expr
@@ -405,7 +373,7 @@ lvlExpr env (_, AnnLet bind body)
        ; return (Let bind' body') }
 
 lvlExpr env (_, AnnCase scrut case_bndr ty alts)
-  = do { scrut' <- lvlNonTailMFE True env scrut
+  = do { scrut' <- lvlNonTailMFE env True scrut
        ; lvlCase env (freeVarsOf scrut) scrut' case_bndr ty alts }
 
 lvlNonTailExpr :: LevelEnv             -- Context
@@ -415,6 +383,54 @@ lvlNonTailExpr env expr
   = lvlExpr (placeJoinCeiling env) expr
 
 -------------------------------------------
+lvlApp :: LevelEnv
+       -> CoreExprWithFVs
+       -> (CoreExprWithFVs, [CoreExprWithFVs]) -- Input application
+        -> LvlM LevelledExpr                   -- Result expression
+lvlApp env orig_expr ((_,AnnVar fn), args)
+  | floatOverSat env   -- See Note [Floating over-saturated applications]
+  , arity > 0
+  , arity < n_val_args
+  , Nothing <- isClassOpId_maybe fn
+  =  do { rargs' <- mapM (lvlNonTailMFE env False) rargs
+        ; lapp'  <- lvlNonTailMFE env False lapp
+        ; return (foldl App lapp' rargs') }
+
+  | otherwise
+  = do { args' <- zipWithM (lvlMFE env) stricts args
+            -- Take account of argument strictness; see
+            -- Note [Floating to the top]
+       ; return (foldl App (lookupVar env fn) args') }
+  where
+    n_val_args = count (isValArg . deAnnotate) args
+    arity      = idArity fn
+
+    stricts :: [Bool]   -- True for strict argument
+    stricts = case splitStrictSig (idStrictness fn) of
+                (arg_ds, _) | not (arg_ds `lengthExceeds` n_val_args)
+                            -> map isStrictDmd arg_ds ++ repeat False
+                            | otherwise
+                            -> repeat False
+
+    -- Separate out the PAP that we are floating from the extra
+    -- arguments, by traversing the spine until we have collected
+    -- (n_val_args - arity) value arguments.
+    (lapp, rargs) = left (n_val_args - arity) orig_expr []
+
+    left 0 e               rargs = (e, rargs)
+    left n (_, AnnApp f a) rargs
+       | isValArg (deAnnotate a) = left (n-1) f (a:rargs)
+       | otherwise               = left n     f (a:rargs)
+    left _ _ _                   = panic "SetLevels.lvlExpr.left"
+
+lvlApp env _ (fun, args)
+  =  -- No PAPs that we can float: just carry on with the
+     -- arguments and the function.
+     do { args' <- mapM (lvlNonTailMFE env False) args
+        ; fun'  <- lvlNonTailExpr env fun
+        ; return (foldl App fun' args') }
+
+-------------------------------------------
 lvlCase :: LevelEnv             -- Level of in-scope names/tyvars
         -> DVarSet              -- Free vars of input scrutinee
         -> LevelledExpr         -- Processed scrutinee
@@ -431,8 +447,8 @@ lvlCase env scrut_fvs scrut' case_bndr ty alts
         -- Unlike lets we don't insist that it escapes a value lambda
     do { (env1, (case_bndr' : bs')) <- cloneCaseBndrs env dest_lvl (case_bndr : bs)
        ; let rhs_env = extendCaseBndrEnv env1 case_bndr scrut'
-       ; body' <- lvlMFE True rhs_env body
-       ; let alt' = (con, [TB b (StayPut dest_lvl) | b <- bs'], body')
+       ; body' <- lvlMFE rhs_env True body
+       ; let alt' = (con, map (stayPut dest_lvl) bs', body')
        ; return (Case scrut' (TB case_bndr' (FloatMe dest_lvl)) ty' [alt']) }
 
   | otherwise     -- Stays put
@@ -448,7 +464,7 @@ lvlCase env scrut_fvs scrut' case_bndr ty alts
             -- Don't abstact over type variables, hence const True
 
     lvl_alt alts_env (con, bs, rhs)
-      = do { rhs' <- lvlMFE True new_env rhs
+      = do { rhs' <- lvlMFE new_env True rhs
            ; return (con, bs', rhs') }
       where
         (new_env, bs') = substAndLvlBndrs NonRecursive alts_env incd_lvl bs
@@ -499,33 +515,41 @@ binding site.
 That's why we apply exprOkForSpeculation to scrut' and not to scrut.
 -}
 
-lvlMFE ::  Bool                 -- True <=> strict context [body of case or let]
-        -> LevelEnv             -- Level of in-scope names/tyvars
+lvlNonTailMFE :: LevelEnv             -- Level of in-scope names/tyvars
+              -> Bool                 -- True <=> strict context [body of case
+                                      --   or let]
+              -> CoreExprWithFVs      -- input expression
+              -> LvlM LevelledExpr    -- Result expression
+lvlNonTailMFE env strict_ctxt ann_expr
+  = lvlMFE (placeJoinCeiling env) strict_ctxt ann_expr
+
+lvlMFE ::  LevelEnv             -- Level of in-scope names/tyvars
+        -> Bool                 -- True <=> strict context [body of case or let]
         -> CoreExprWithFVs      -- input expression
         -> LvlM LevelledExpr    -- Result expression
 -- lvlMFE is just like lvlExpr, except that it might let-bind
 -- the expression, so that it can itself be floated.
 
-lvlMFE _ env (_, AnnType ty)
+lvlMFE env _ (_, AnnType ty)
   = return (Type (CoreSubst.substTy (le_subst env) ty))
 
 -- No point in floating out an expression wrapped in a coercion or note
 -- If we do we'll transform  lvl = e |> co
 --                       to  lvl' = e; lvl = lvl' |> co
 -- and then inline lvl.  Better just to float out the payload.
-lvlMFE strict_ctxt env (_, AnnTick t e)
-  = do { e' <- lvlMFE strict_ctxt env e
+lvlMFE env strict_ctxt (_, AnnTick t e)
+  = do { e' <- lvlMFE env strict_ctxt e
        ; return (Tick t e') }
 
-lvlMFE strict_ctxt env (_, AnnCast e (_, co))
-  = do  { e' <- lvlMFE strict_ctxt env e
+lvlMFE env strict_ctxt (_, AnnCast e (_, co))
+  = do  { e' <- lvlMFE env strict_ctxt e
         ; return (Cast e' (substCo (le_subst env) co)) }
 
--- Note [Case MFEs]
-lvlMFE True env e@(_, AnnCase {})
-  = lvlExpr env e     -- Don't share cases
+lvlMFE env strict_ctxt e@(_, AnnCase {})
+  | strict_ctxt       -- Don't share cases in a strict context
+  = lvlExpr env e     -- See Note [Case MFEs]
 
-lvlMFE strict_ctxt env ann_expr
+lvlMFE env strict_ctxt ann_expr
   |  floatTopLvlOnly env && not (isTopLvl dest_lvl)
          -- Only floating to the top level is allowed.
   || isTopLvl dest_lvl && need_join -- Can't put join point at top level
@@ -537,31 +561,64 @@ lvlMFE strict_ctxt env ann_expr
   =     -- Don't float it out
     lvlExpr env ann_expr
 
-  | Just (wrap_float, wrap_use)
-       <- canFloat_maybe rhs_env strict_ctxt (float_is_lam || need_join) expr
-  = do { expr1 <- if need_join then lvlExpr rhs_env ann_expr
-                               else lvlNonTailExpr rhs_env ann_expr
-       ; let abs_expr = mkLams abs_vars_w_lvls (wrap_float expr1)
-       ; var <- newLvlVar abs_expr join_arity_maybe
+  | float_is_new_lam || need_join || exprIsTopLevelBindable expr expr_ty
+         -- No wrapping needed if the type is lifted, or is a literal string
+         -- or if we are wrapping it in one or more value lambdas
+  = do { expr1 <- lvlFloatRhs abs_vars dest_lvl rhs_env NonRecursive join_arity_maybe ann_expr
+                  -- Treat the expr just like a right-hand side
+       ; var <- newLvlVar expr1 join_arity_maybe
        ; let var2 = annotateBotStr var float_n_lams mb_bot_str
-       ; return (Let (NonRec (TB var2 (FloatMe dest_lvl)) abs_expr)
-                     (wrap_use (mkVarApps (Var var2) abs_vars))) }
+       ; return (Let (NonRec (TB var2 (FloatMe dest_lvl)) expr1)
+                     (mkVarApps (Var var2) abs_vars)) }
+
+  -- OK, so the float has an unlifted type
+  --     and no new value lambdas (float_is_new_lam is False)
+  -- Try for the boxing strategy
+  -- See Note [Floating MFEs of unlifted type]
+  | escapes_value_lam
+  , not (exprIsCheap expr)  -- Boxing/unboxing isn't worth
+                            -- it for cheap expressions
+  , Just (tc, _) <- splitTyConApp_maybe expr_ty
+  , Just dc <- boxingDataCon_maybe tc
+  , let dc_res_ty = dataConOrigResTy dc  -- No free type variables
+        [bx_bndr, ubx_bndr] = mkTemplateLocals [dc_res_ty, expr_ty]
+  = do { expr1 <- lvlExpr rhs_env ann_expr
+       ; let l1r       = incMinorLvlFrom rhs_env
+             float_rhs = mkLams abs_vars_w_lvls $
+                         Case expr1 (stayPut l1r ubx_bndr) dc_res_ty
+                             [(DEFAULT, [], mkConApp dc [Var ubx_bndr])]
+
+       ; var <- newLvlVar float_rhs Nothing
+       ; let l1u      = incMinorLvlFrom env
+             use_expr = Case (mkVarApps (Var var) abs_vars)
+                             (stayPut l1u bx_bndr) expr_ty
+                             [(DataAlt dc, [stayPut l1u ubx_bndr], Var ubx_bndr)]
+       ; return (Let (NonRec (TB var (FloatMe dest_lvl)) float_rhs)
+                     use_expr) }
 
-  | otherwise
+  | otherwise          -- e.g. do not float unboxed tuples
   = lvlExpr env ann_expr
 
   where
     expr         = deAnnotate ann_expr
+    expr_ty      = exprType expr
     fvs          = freeVarsOf ann_expr
-    is_bot       = isJust mb_bot_str
+    is_bot       = isBottomThunk mb_bot_str
+    is_function  = isFunction ann_expr
     mb_bot_str   = exprBotStrictness_maybe expr
                            -- See Note [Bottoming floats]
                            -- esp Bottoming floats (2)
-    dest_lvl     = destLevel env fvs (isFunction ann_expr) is_bot need_join
+    dest_lvl     = destLevel env fvs is_function is_bot need_join
     abs_vars     = abstractVars dest_lvl env fvs
-    float_is_lam = float_n_lams > 0       -- The floated thing will be a value lambda
-    float_n_lams = count isId abs_vars    -- so nothing is shared; the only benefit
-                                          -- is getting it to the top level
+
+    -- float_is_new_lam: the floated thing will be a new value lambda
+    -- replacing, say (g (x+4)) by (lvl x).  No work is saved, nor is
+    -- allocation saved.  The benefit is to get it to the top level
+    -- and hence out of the body of this function altogether, making
+    -- it smaller and more inlinable
+    float_is_new_lam = float_n_lams > 0
+    float_n_lams     = count isId abs_vars
+
     (rhs_env, abs_vars_w_lvls) = lvlLamBndrs env dest_lvl abs_vars
 
         -- Note [Join points and MFEs]
@@ -571,69 +628,88 @@ lvlMFE strict_ctxt env ann_expr
 
         -- A decision to float entails let-binding this thing, and we only do
         -- that if we'll escape a value lambda, or will go to the top level.
-    float_me = (dest_lvl `ltMajLvl` (le_ctxt_lvl env) -- Escapes a value lambda
-                && not float_is_lam)                  -- See Note [Escaping a value lambda]
-
-            || (isTopLvl dest_lvl       -- Only float if we are going to the top level
-                && floatConsts env      --   and the floatConsts flag is on
-                && not strict_ctxt)     -- Don't float from a strict context
-          -- We are keen to float something to the top level, even if it does not
-          -- escape a lambda, because then it needs no allocation.  But it's controlled
-          -- by a flag, because doing this too early loses opportunities for RULES
-          -- which (needless to say) are important in some nofib programs
-          -- (gcd is an example).
-          --
-          -- Beware:
-          --    concat = /\ a -> foldr ..a.. (++) []
-          -- was getting turned into
-          --    lvl    = /\ a -> foldr ..a.. (++) []
-          --    concat = /\ a -> lvl a
-          -- which is pretty stupid.  Hence the strict_ctxt test
-
-lvlNonTailMFE :: Bool                 -- True <=> strict context [body of case
-                                      --   or let]
-              -> LevelEnv             -- Level of in-scope names/tyvars
-              -> CoreExprWithFVs      -- input expression
-              -> LvlM LevelledExpr    -- Result expression
-lvlNonTailMFE strict_ctxt env ann_expr
-  = lvlMFE strict_ctxt (placeJoinCeiling env) ann_expr
-
-canFloat_maybe :: LevelEnv
-               -> Bool      -- Strict context
-               -> Bool      -- The float has a value lambda
-               -> CoreExpr
-               -> Maybe ( LevelledExpr -> LevelledExpr   -- Wrep the flaot
-                        , LevelledExpr -> LevelledExpr)  -- Wrap the use
--- See Note [Floating MFEs of unlifted type]
-canFloat_maybe env strict_ctxt float_is_lam expr
-  | float_is_lam || exprIsTopLevelBindable expr
-  = Just (id, id) -- No wrapping needed if the type is lifted, or
-                  -- if we are wrapping it in one or more value lambdas
-                  -- or making it a join point
-
-  -- OK, so the float has an unlifted type and no value lambdas
-  | strict_ctxt
-  , Just (tc, _) <- splitTyConApp_maybe expr_ty
-  , Just dc <- boxingDataCon_maybe tc
-  , let dc_res_ty = dataConOrigResTy dc  -- No free type variables
-        [bx_bndr, ubx_bndr] = mkTemplateLocals [dc_res_ty, expr_ty]
-        l1 = incMinorLvl (le_ctxt_lvl env)
-        l2 = incMinorLvl l1
-  = Just ( \e -> Case e (TB ubx_bndr (StayPut l1)) dc_res_ty
-                   [(DEFAULT, [], mkConApp dc [Var ubx_bndr])]
-         , \e -> Case e (TB bx_bndr (StayPut l1)) expr_ty
-                   [(DataAlt dc, [TB ubx_bndr (StayPut l2)], Var ubx_bndr)] )
+    float_me = saves_work || saves_alloc
 
-  | otherwise          -- e.g. do not float unboxed tuples
-  = Nothing
-  where expr_ty = exprType expr
+    -- We can save work if we can move a redex outside a value lambda
+    -- But if float_is_new_lam is True, then the redex is wrapped in a
+    -- a new lambda, so no work is saved
+    saves_work = escapes_value_lam && not float_is_new_lam
 
-{- Note [Floating MFEs of unlifted type]
+    escapes_value_lam = dest_lvl `ltMajLvl` (le_ctxt_lvl env)
+                  -- See Note [Escaping a value lambda]
+
+    -- See Note [Floating to the top]
+    saves_alloc =  isTopLvl dest_lvl
+                && floatConsts env
+                && (not strict_ctxt || is_bot || exprIsHNF expr)
+
+isBottomThunk :: Maybe (Arity, s) -> Bool
+-- See Note [Bottoming floats] (2)
+isBottomThunk (Just (0, _)) = True   -- Zero arity
+isBottomThunk _             = False
+
+{- Note [Floating to the top]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We are keen to float something to the top level, even if it does not
+escape a value lambda (and hence save work), for two reasons:
+
+  * Doing so makes the function smaller, by floating out
+    bottoming expressions, or integer or string literals.  That in
+    turn makes it easier to inline, with less duplication.
+
+  * (Minor) Doing so may turn a dynamic allocation (done by machine
+    instructions) into a static one. Minor because we are assuming
+    we are not escaping a value lambda
+
+But do not so if:
+     - the context is a strict, and
+     - the expression is not a HNF, and
+     - the expression is not bottoming
+
+Exammples:
+
+* Bottoming
+      f x = case x of
+              0 -> error <big thing>
+              _ -> x+1
+  Here we want to float (error <big thing>) to top level, abstracting
+  over 'x', so as to make f's RHS smaller.
+
+* HNF
+      f = case y of
+            True  -> p:q
+            False -> blah
+  We may as well float the (p:q) so it becomes a static data structure.
+
+* Case scrutinee
+      f = case g True of ....
+  Don't float (g True) to top level; then we have the admin of a
+  top-level thunk to worry about, with zero gain.
+
+* Case alternative
+      h = case y of
+             True  -> g True
+             False -> False
+  Don't float (g True) to the top level
+
+* Arguments
+     t = f (g True)
+  If f is lazy, we /do/ float (g True) because then we can allocate
+  the thunk statically rather than dynamically.  But if f is strict
+  we don't (see the use of idStrictness in lvlApp).  It's not clear
+  if this test is worth the bother: it's only about CAFs!
+
+It's controlled by a flag (floatConsts) , because doing this too
+early loses opportunities for RULES which (needless to say) are
+important in some nofib programs (gcd is an example).  [SPJ note:
+I think this is obselete; the flag seems always on.]
+
+Note [Floating MFEs of unlifted type]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Suppose we have
    case f x of (r::Int#) -> blah
 we'd like to float (f x). But it's not trivial because it has type
-Int#, and we don't want to evaluate it to early.  But we can instead
+Int#, and we don't want to evaluate it too early.  But we can instead
 float a boxed version
    y = case f x of r -> I# r
 and replace the original (f x) with
@@ -648,10 +724,6 @@ convenient boxing constructor (see boxingDataCon_maybe).  In
 particular we /don't/ do it for unboxed tuples; it's better to float
 the components of the tuple individually.
 
-The work is done by canFloat_maybe, which constructs both the code
-that wraps the floating binding, and the code to appear at the
-original use site.
-
 I did experiment with a form of boxing that works for any type, namely
 wrapping in a function.  In our example
 
@@ -678,14 +750,14 @@ we'd like to float the call to error, to get
         f = \x. g (lvl x)
   To achieve this we pass is_bot to destLevel
 
-* Bottoming floats (2): And we'd like to do this even if it's a
-  function that guarantees to return bottom:
+* Bottoming floats (2): we do not do this for functions that return
+  bottom.  Instead we treat the /body/ of such a function specially,
+  via point (1).  For example:
         f = \x. ....(\y z. if x then error y else error z)....
   ===>
-        lvl = \x y z. if b then error y else error z
-        f = \x. ...(lvl x)...
-  To achieve this we use exprBotStrictness_maybe, which spots
-  an expression that diverges after applying some arguments
+        lvl = \x z y. if b then error y else error z
+        f = \x. ...(\y z. lvl x z y)...
+  (There is no guarantee that we'll choose the perfect argument order.)
 
 See Maessen's paper 1999 "Bottom extraction: factoring error handling out
 of functional programs" (unpublished I think).
@@ -726,6 +798,8 @@ in exchange we build a thunk, which is bad.  This case reduces allocation
 by 7% in spectral/puzzle (a rather strange benchmark) and 1.2% in real/fem.
 Doesn't change any other allocation at all.
 
+We will make a separate decision for the scrutinees and alterantives.
+
 Note [Join points and MFEs]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -796,12 +870,12 @@ notWorthFloating e abs_vars
     go (Tick t e) n  = not (tickishIsCode t) && go e n
     go (Cast e _)  n = go e n
     go (App e arg) n
-       | (Type {}) <- arg     = go e n
-       | (Coercion {}) <- arg = go e n
-       | n==0                 = False
-       | is_triv arg          = go e (n-1)
-       | otherwise            = False
-    go _ _                    = False
+       | Type {}     <- arg = go e n
+       | Coercion {} <- arg = go e n
+       | n==0               = False
+       | is_triv arg        = go e (n-1)
+       | otherwise          = False
+    go _ _                  = False
 
     is_triv (Lit {})              = True        -- Treat all literals as trivial
     is_triv (Var {})              = True        -- (ie not worth floating)
@@ -818,8 +892,8 @@ It's important to float Integer literals, so that they get shared,
 rather than being allocated every time round the loop.
 Hence the litIsTrivial.
 
-We'd *like* to share MachStr literal strings too, mainly so we could
-CSE them, but alas can't do so directly because they are unlifted.
+Ditto literal strings (MachStr), which we'd like to float to top
+level, which is now possible.
 
 
 Note [Escaping a value lambda]
@@ -875,7 +949,7 @@ lvlBind env (AnnNonRec bndr rhs)
           -- aren't expensive either
 
   = -- No float
-    do { rhs' <- lvlRhs env NonRecursive False mb_join_arity rhs
+    do { rhs' <- lvlRhs env NonRecursive mb_join_arity rhs
        ; let  bind_lvl        = incMinorLvl (le_ctxt_lvl env)
               (env', [bndr']) = substAndLvlBndrs NonRecursive env bind_lvl [bndr]
        ; return (NonRec bndr' rhs', env') }
@@ -884,18 +958,18 @@ lvlBind env (AnnNonRec bndr rhs)
   | null abs_vars
   = do {  -- No type abstraction; clone existing binder
          rhs' <- lvlRhs (setCtxtLvl env dest_lvl) NonRecursive
-                        zapping_join mb_join_arity rhs
+                        zapped_join rhs
        ; (env', [bndr']) <- cloneLetVars NonRecursive env dest_lvl
-                                         zapping_join [bndr]
+                                         need_zap [bndr]
        ; let bndr2 = annotateBotStr bndr' 0 mb_bot_str
        ; return (NonRec (TB bndr2 (FloatMe dest_lvl)) rhs', env') }
 
   | otherwise
   = do {  -- Yes, type abstraction; create a new binder, extend substitution, etc
          rhs' <- lvlFloatRhs abs_vars dest_lvl env NonRecursive
-                             zapping_join mb_join_arity rhs
+                             zapped_join rhs
        ; (env', [bndr']) <- newPolyBndrs dest_lvl env abs_vars
-                                         zapping_join [bndr]
+                                         need_zap [bndr]
        ; let bndr2 = annotateBotStr bndr' n_extra mb_bot_str
        ; return (NonRec (TB bndr2 (FloatMe dest_lvl)) rhs', env') }
 
@@ -908,14 +982,16 @@ lvlBind env (AnnNonRec bndr rhs)
     mb_bot_str = exprBotStrictness_maybe (deAnnotate rhs)
                            -- See Note [Bottoming floats]
                            -- esp Bottoming floats (2)
-    is_bot     = isJust mb_bot_str
+    is_bot     = isBottomThunk mb_bot_str
     n_extra    = count isId abs_vars
 
     mb_join_arity = isJoinId_maybe bndr
     is_unfloatable_join = case mb_join_arity of Just ar -> ar > 0
                                                 Nothing -> False
       -- See Note [When to ruin a join point]
-    zapping_join = dest_lvl `ltLvl` joinCeilingLevel env
+    need_zap = dest_lvl `ltLvl` joinCeilingLevel env
+    zapped_join | need_zap  = Nothing    -- Zap the join point
+                | otherwise = mb_join_arity
 
 lvlBind env (AnnRec pairs)
   |  floatTopLvlOnly env && not (isTopLvl dest_lvl)
@@ -923,15 +999,15 @@ lvlBind env (AnnRec pairs)
   || not (profitableFloat env dest_lvl)
   = do { let bind_lvl = incMinorLvl (le_ctxt_lvl env)
              (env', bndrs') = substAndLvlBndrs Recursive env bind_lvl bndrs
-       ; rhss' <- zipWithM (lvlRhs env' Recursive False) mb_join_arities rhss
+       ; rhss' <- zipWithM (lvlRhs env' Recursive) mb_join_arities rhss
        ; return (Rec (bndrs' `zip` rhss'), env') }
 
   | null abs_vars
   = do { (new_env, new_bndrs) <- cloneLetVars Recursive env dest_lvl
-                                              zapping_joins bndrs
+                                              need_zap bndrs
        ; let env_rhs = setCtxtLvl new_env dest_lvl
-       ; new_rhss <- zipWithM (lvlRhs env_rhs Recursive zapping_joins)
-                              mb_join_arities rhss
+       ; new_rhss <- zipWithM (lvlRhs env_rhs Recursive)
+                              (map zap_join mb_join_arities) rhss
        ; return ( Rec ([TB b (FloatMe dest_lvl) | b <- new_bndrs] `zip` new_rhss)
                 , new_env) }
 
@@ -953,16 +1029,16 @@ lvlBind env (AnnRec pairs)
         rhs_lvl = le_ctxt_lvl rhs_env
 
     (rhs_env', [new_bndr]) <- cloneLetVars Recursive rhs_env rhs_lvl
-                                           zapping_joins [bndr]
+                                           need_zap [bndr]
     let
         (lam_bndrs, rhs_body)   = collectAnnBndrs rhs
         (body_env1, lam_bndrs1) = substBndrsSL NonRecursive rhs_env' lam_bndrs
         (body_env2, lam_bndrs2) = lvlLamBndrs body_env1 rhs_lvl lam_bndrs1
         mb_join_arity           = isJoinId_maybe bndr
-    new_rhs_body <- lvlRhs body_env2 Recursive zapping_joins
-                           mb_join_arity rhs_body
+    new_rhs_body <- lvlRhs body_env2 Recursive
+                           (zap_join mb_join_arity) rhs_body
     (poly_env, [poly_bndr]) <- newPolyBndrs dest_lvl env abs_vars
-                                            zapping_joins [bndr]
+                                            need_zap [bndr]
     return (Rec [(TB poly_bndr (FloatMe dest_lvl)
                  , mkLams abs_vars_w_lvls $
                    mkLams lam_bndrs2 $
@@ -973,10 +1049,9 @@ lvlBind env (AnnRec pairs)
 
   | otherwise  -- Non-null abs_vars
   = do { (new_env, new_bndrs) <- newPolyBndrs dest_lvl env abs_vars
-                                              zapping_joins bndrs
-       ; new_rhss <- zipWithM (lvlFloatRhs abs_vars dest_lvl new_env
-                                           Recursive zapping_joins)
-                              mb_join_arities rhss
+                                              need_zap bndrs
+       ; new_rhss <- zipWithM (lvlFloatRhs abs_vars dest_lvl new_env Recursive)
+                              (map zap_join mb_join_arities) rhss
        ; return ( Rec ([TB b (FloatMe dest_lvl) | b <- new_bndrs] `zip` new_rhss)
                 , new_env) }
 
@@ -999,17 +1074,18 @@ lvlBind env (AnnRec pairs)
     has_unfloatable_join
       = any (\mb_ar -> case mb_ar of Just ar -> ar > 0
                                      Nothing -> False) mb_join_arities
-    zapping_joins = dest_lvl `ltLvl` joinCeilingLevel env
+
+    need_zap = dest_lvl `ltLvl` joinCeilingLevel env
+    zap_join mb_join_arity | need_zap  = Nothing
+                           | otherwise = mb_join_arity
 
 lvlRhs :: LevelEnv
        -> RecFlag
-       -> Bool -- True <=> we're zapping a join point back to a value
        -> Maybe JoinArity
        -> CoreExprWithFVs
        -> LvlM LevelledExpr
-lvlRhs env rec_flag zapping_join mb_join_arity expr
-  = lvlFloatRhs [] (le_ctxt_lvl env) env rec_flag zapping_join
-                mb_join_arity expr
+lvlRhs env rec_flag mb_join_arity expr
+  = lvlFloatRhs [] (le_ctxt_lvl env) env rec_flag mb_join_arity expr
 
 profitableFloat :: LevelEnv -> Level -> Bool
 profitableFloat env dest_lvl
@@ -1038,26 +1114,25 @@ demanded.
 ----------------------------------------------------
 -- Three help functions for the type-abstraction case
 
-lvlFloatRhs :: [OutVar] -> Level -> LevelEnv -> RecFlag -> Bool
+lvlFloatRhs :: [OutVar] -> Level -> LevelEnv -> RecFlag
             -> Maybe JoinArity -> CoreExprWithFVs
             -> LvlM (Expr LevelledBndr)
-lvlFloatRhs abs_vars dest_lvl env rec zapping_joins mb_join_arity rhs
-  = do { body' <- if | Just _ <- mb_join_arity, not zapping_joins
-                       -> lvlExpr rhs_env body
-                     | otherwise
-                       -> lvlNonTailExpr rhs_env body
-       ; return (mkLams all_bndrs_w_lvls body') }
+lvlFloatRhs abs_vars dest_lvl env rec mb_join_arity rhs
+  = do { body' <- if any isId bndrs  -- See Note [Floating from a RHS]
+                  then lvlMFE  body_env True body
+                  else lvlExpr body_env      body
+       ; return (mkLams bndrs' body') }
   where
-    (bndrs, body)               | Just join_arity <- mb_join_arity
-                                = collectNAnnBndrs join_arity rhs
-                                | otherwise
-                                = collectAnnBndrs rhs
-    (env1, bndrs1)              = substBndrsSL NonRecursive env bndrs
-    all_bndrs                   = abs_vars ++ bndrs1
-    (rhs_env, all_bndrs_w_lvls) | Just _ <- mb_join_arity
-                                = lvlJoinBndrs env1 dest_lvl rec all_bndrs
-                                | otherwise
-                                = lvlLamBndrs env1 dest_lvl all_bndrs
+    (bndrs, body)     | Just join_arity <- mb_join_arity
+                      = collectNAnnBndrs join_arity rhs
+                      | otherwise
+                      = collectAnnBndrs rhs
+    (env1, bndrs1)    = substBndrsSL NonRecursive env bndrs
+    all_bndrs         = abs_vars ++ bndrs1
+    (body_env, bndrs') | Just _ <- mb_join_arity
+                      = lvlJoinBndrs env1 dest_lvl rec all_bndrs
+                      | otherwise
+                      = lvlLamBndrs (placeJoinCeiling env1) dest_lvl all_bndrs
         -- The important thing here is that we call lvlLamBndrs on
         -- all these binders at once (abs_vars and bndrs), so they
         -- all get the same major level.  Otherwise we create stupid
@@ -1065,6 +1140,37 @@ lvlFloatRhs abs_vars dest_lvl env rec zapping_joins mb_join_arity rhs
         -- in the end they don't because we never float bindings in
         -- between lambdas
 
+{- Note [Floating from a RHS]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When float the RHS of a let-binding, we don't always want to apply
+lvlMFE to the body of a lambda, as we usually do, because the entire
+binding body is already going to the right place (dest_lvl)
+
+A particular example is the top level.  Consider
+   concat = /\ a -> foldr ..a.. (++) []
+We don't want to float the body of the lambda to get
+   lvl    = /\ a -> foldr ..a.. (++) []
+   concat = /\ a -> lvl a
+That would be stupid.
+
+Previously this was avoided in a much nastier way, by testing strict_ctxt
+in float_me in lvlMFE.  But that wasn't even right because it would fail
+to float out the error sub-expression in
+    f = \x. case x of
+              True  -> error ("blah" ++ show x)
+              False -> ...
+
+But we must be careful!  If we had
+  f = \x -> factorial 20
+we /would/ want to float that (factorial 20) out!  Functions are treated
+differently: see the use of isFunction in the calls to destLevel. If
+there are only type lambdas, then destLevel will say "go to top, and
+abstract over the free tyars" and we don't want that here.
+
+Conclusion: use lvlMFE if there are any value lambdas, lvlExpr
+otherwise.  A little subtle, and I got it wrong to start with.
+-}
+
 {-
 ************************************************************************
 *                                                                      *
@@ -1125,9 +1231,10 @@ lvlBndrs :: LevelEnv -> Level -> [CoreBndr] -> (LevelEnv, [LevelledBndr])
 lvlBndrs env@(LE { le_lvl_env = lvl_env }) new_lvl bndrs
   = ( env { le_ctxt_lvl = new_lvl
           , le_lvl_env  = addLvls new_lvl lvl_env bndrs }
-    , lvld_bndrs)
-  where
-    lvld_bndrs    = [TB bndr (StayPut new_lvl) | bndr <- bndrs]
+    , map (stayPut new_lvl) bndrs)
+
+stayPut :: Level -> OutVar -> LevelledBndr
+stayPut new_lvl bndr = TB bndr (StayPut new_lvl)
 
   -- Destination level is the max Id level of the expression
   -- (We'll abstract the type variables, if any.)
@@ -1137,25 +1244,30 @@ destLevel :: LevelEnv -> DVarSet
           -> Bool   -- True <=> is join point (or can be floated anyway)
           -> Level
 destLevel env fvs is_function is_bot is_join
-  | is_bot = tOP_LEVEL  -- Send bottoming bindings to the top
-                        -- regardless; see Note [Bottoming floats]
+  | is_bot              -- Send bottoming bindings to the top
+  = tOP_LEVEL           -- regardless; see Note [Bottoming floats]
                         -- Esp Bottoming floats (1)
+
   | Just n_args <- floatLams env
   , n_args > 0  -- n=0 case handled uniformly by the 'otherwise' case
   , is_function
   , countFreeIds fvs <= n_args
   = tOP_LEVEL   -- Send functions to top level; see
                 -- the comments with isFunction
-  | is_join, hits_ceiling = join_ceiling
+
+  | is_join
+  , hits_ceiling
+  = join_ceiling
+
   | otherwise = max_fv_level
   where
     max_fv_level = maxFvLevel isId env fvs -- Max over Ids only; the tyvars
                                            -- will be abstracted
 
+    join_ceiling = joinCeilingLevel env
     hits_ceiling = max_fv_level `ltLvl` join_ceiling &&
                    not (isTopLvl max_fv_level)
                      -- Note [When to ruin a join point]
-    join_ceiling = joinCeilingLevel env
 
 isFunction :: CoreExprWithFVs -> Bool
 -- The idea here is that we want to float *functions* to
@@ -1255,6 +1367,9 @@ floatTopLvlOnly le = floatToTopLevelOnly (le_switches le)
 setCtxtLvl :: LevelEnv -> Level -> LevelEnv
 setCtxtLvl env lvl = env { le_ctxt_lvl = lvl }
 
+incMinorLvlFrom :: LevelEnv -> Level
+incMinorLvlFrom env = incMinorLvl (le_ctxt_lvl env)
+
 -- extendCaseBndrEnv adds the mapping case-bndr->scrut-var if it can
 -- See Note [Binder-swap during float-out]
 extendCaseBndrEnv :: LevelEnv
diff --git a/compiler/simplCore/Simplify.hs b/compiler/simplCore/Simplify.hs
index 7357e32338..7b684f95fd 100644
--- a/compiler/simplCore/Simplify.hs
+++ b/compiler/simplCore/Simplify.hs
@@ -654,7 +654,7 @@ makeTrivialWithInfo :: TopLevelFlag -> SimplEnv
 -- Returned SimplEnv has same substitution as incoming one
 makeTrivialWithInfo top_lvl env context info expr
   | exprIsTrivial expr                          -- Already trivial
-  || not (bindingOk top_lvl expr)               -- Cannot trivialise
+  || not (bindingOk top_lvl expr expr_ty)       -- Cannot trivialise
                                                 --   See Note [Cannot trivialise]
   = return (env, expr)
   | otherwise           -- See Note [Take care] below
@@ -676,11 +676,11 @@ makeTrivialWithInfo top_lvl env context info expr
   where
     expr_ty = exprType expr
 
-bindingOk :: TopLevelFlag -> CoreExpr -> Bool
+bindingOk :: TopLevelFlag -> CoreExpr -> Type -> Bool
 -- True iff we can have a binding of this expression at this level
 -- Precondition: the type is the type of the expression
-bindingOk top_lvl expr
-  | isTopLevel top_lvl = exprIsTopLevelBindable expr
+bindingOk top_lvl expr expr_ty
+  | isTopLevel top_lvl = exprIsTopLevelBindable expr expr_ty
   | otherwise          = True
 
 {-
diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T
index 491fa19969..a9464fcb6b 100644
--- a/testsuite/tests/perf/compiler/all.T
+++ b/testsuite/tests/perf/compiler/all.T
@@ -608,7 +608,7 @@ test('T5837',
              # 2014-12-08: 115905208  Constraint solver perf improvements (esp kick-out)
              # 2016-04-06: 24199320  (x86/Linux, 64-bit machine) TypeInType
 
-           (wordsize(64), 57861352, 10)])
+           (wordsize(64), 50253880, 5)])
              # sample: 3926235424 (amd64/Linux, 15/2/2012)
              # 2012-10-02 81879216
              # 2012-09-20 87254264 amd64/Linux
@@ -634,6 +634,10 @@ test('T5837',
              #                          compilation pipeline
              # 2017-01-24 57861352  amd64/Linux, very likely due to the top-level strings
              #                          in Core patch.
+             # 2017-02-07 50253880  Another improvement in SetLevels.  I don't think
+             #                      all the gain here is from this patch, but I think it
+             #                      just pushed it over the edge, so I'm re-centreing, and
+             #                      changing to 5% tolerance
       ],
       compile, ['-freduction-depth=50'])
 
@@ -827,12 +831,16 @@ test('T9961',
 test('T9233',
     [ only_ways(['normal']),
       compiler_stats_num_field('bytes allocated',
-        [(wordsize(64), 861862608, 5),
-
+        [(wordsize(64), 884436192, 5),
          # 2015-08-04    999826288     initial value
          # 2016-04-14   1066246248     Final demand analyzer run
          # 2016-06-18    984268712     shuffling around of Data.Functor.Identity
-         # 2017-0123     861862608     worker/wrapper evald-ness flags; 10% improvement!
+         # 2017-01-20    920101608     Improvement to SetLevels apparently saved 4.2% in
+         #                             compiler allocation.  Program size seems virtually
+         #                             unchanged; maybe the compiler itself is a little faster
+         # 2017-01-23    861862608     worker/wrapper evald-ness flags; another 5% improvement!
+         # 2017-02-01    894486272     Join points
+         # 2017-02-07    884436192     Another improvement to SetLevels
 
          (wordsize(32),  515672240, 5)   # Put in your value here if you hit this
          # 2016-04-06    515672240     (x86/Linux) initial value
@@ -942,9 +950,10 @@ test('T13035',
 test('T13056',
      [ only_ways(['optasm']),
        compiler_stats_num_field('bytes allocated',
-         [(wordsize(64), 546800240, 5),
+         [(wordsize(64), 524611224, 5),
          # 2017-01-06    520166912 initial
          # 2017-01-31    546800240 Join points (#12988)
+         # 2017-02-07    524611224 new SetLevels
          ]),
      ],
      compile,