Float out partial applications

This fixes at least one case of performance regression in 7.0, and is nice win on nofib: Program Size Allocs Runtime Elapsed Min +0.3% -63.0% -38.5% -38.7% Max +1.2% +0.2% +0.9% +0.9% Geometric Mean +0.6% -3.0% -6.4% -6.6%
author: Simon Marlow <marlowsd@gmail.com> 2010-10-08 09:27:09 +0000
committer: Simon Marlow <marlowsd@gmail.com> 2010-10-08 09:27:09 +0000
commit: a66541af84d102f32b73fb7f89f48008c01092a6 (patch)
tree: 367cdd29dc93f8f83cbee620e3a5be4b88c12a5c /compiler
parent: 5e86045ae5f90d9138e395fde5792e50ac8f8afd (diff)
download: haskell-a66541af84d102f32b73fb7f89f48008c01092a6.tar.gz
3 files changed, 82 insertions, 22 deletions
diff --git a/compiler/coreSyn/CoreSyn.lhs b/compiler/coreSyn/CoreSyn.lhs
index c74de06b24..5e03e4d64e 100644
--- a/compiler/coreSyn/CoreSyn.lhs
+++ b/compiler/coreSyn/CoreSyn.lhs
@@ -59,6 +59,9 @@ module CoreSyn (
 	-- * Annotated expression data types
 	AnnExpr, AnnExpr'(..), AnnBind(..), AnnAlt,
 	
+        -- ** Operations on annotated expressions
+        collectAnnArgs,
+
 	-- ** Operations on annotations
 	deAnnotate, deAnnotate', deAnnAlt, collectAnnBndrs,
 
@@ -1142,6 +1145,17 @@ data AnnBind bndr annot
 \end{code}
 
 \begin{code}
+-- | Takes a nested application expression and returns the the function
+-- being applied and the arguments to which it is applied
+collectAnnArgs :: AnnExpr b a -> (AnnExpr b a, [AnnExpr b a])
+collectAnnArgs expr
+  = go expr []
+  where
+    go (_, AnnApp f a) as = go f (a:as)
+    go e 	       as = (e, as)
+\end{code}
+
+\begin{code}
 deAnnotate :: AnnExpr bndr annot -> Expr bndr
 deAnnotate (_, e) = deAnnotate' e
 
diff --git a/compiler/simplCore/CoreMonad.lhs b/compiler/simplCore/CoreMonad.lhs
index 00dedffa38..e3dbf3a304 100644
--- a/compiler/simplCore/CoreMonad.lhs
+++ b/compiler/simplCore/CoreMonad.lhs
@@ -304,8 +304,10 @@ data SimplifierSwitch
 \begin{code}
 data FloatOutSwitches = FloatOutSwitches {
         floatOutLambdas :: Bool,     -- ^ True <=> float lambdas to top level
-        floatOutConstants :: Bool    -- ^ True <=> float constants to top level,
+        floatOutConstants :: Bool,   -- ^ True <=> float constants to top level,
                                      --            even if they do not escape a lambda
+        floatOutPartialApplications :: Bool -- ^ True <=> float out partial applications
+                                            --            based on arity information.
     }
 instance Outputable FloatOutSwitches where
     ppr = pprFloatOutSwitches
@@ -320,10 +322,6 @@ pprFloatOutSwitches sw = pp_not (floatOutLambdas sw) <+> text "lambdas" <> comma
 -- | Switches that specify the minimum amount of floating out
 -- gentleFloatOutSwitches :: FloatOutSwitches
 -- gentleFloatOutSwitches = FloatOutSwitches False False
-
--- | Switches that do not specify floating out of lambdas, just of constants
-constantsOnlyFloatOutSwitches :: FloatOutSwitches
-constantsOnlyFloatOutSwitches = FloatOutSwitches False True
 \end{code}
 
 
@@ -420,14 +418,28 @@ getCoreToDo dflags
         -- so that overloaded functions have all their dictionary lambdas manifest
         runWhen do_specialise CoreDoSpecialising,
 
-        runWhen full_laziness (CoreDoFloatOutwards constantsOnlyFloatOutSwitches),
+        runWhen full_laziness $
+           CoreDoFloatOutwards FloatOutSwitches {
+                                 floatOutLambdas   = False,
+                                 floatOutConstants = True,
+                                 floatOutPartialApplications = False },
       		-- Was: gentleFloatOutSwitches	
-		-- I have no idea why, but not floating constants to top level is
-		-- very bad in some cases. 
+                --
+		-- I have no idea why, but not floating constants to
+		-- top level is very bad in some cases.
+                --
 		-- Notably: p_ident in spectral/rewrite
-		-- 	    Changing from "gentle" to "constantsOnly" improved
-		-- 	    rewrite's allocation by 19%, and made  0.0% difference
-		-- 	    to any other nofib benchmark
+		-- 	    Changing from "gentle" to "constantsOnly"
+		-- 	    improved rewrite's allocation by 19%, and
+		-- 	    made 0.0% difference to any other nofib
+		-- 	    benchmark
+                --
+                -- Not doing floatOutPartialApplications yet, we'll do
+                -- that later on when we've had a chance to get more
+                -- accurate arity information.  In fact it makes no
+                -- difference at all to performance if we do it here,
+                -- but maybe we save some unnecessary to-and-fro in
+                -- the simplifier.
 
         runWhen do_float_in CoreDoFloatInwards,
 
@@ -452,8 +464,11 @@ getCoreToDo dflags
                 simpl_phase 0 ["post-worker-wrapper"] max_iter
                 ]),
 
-        runWhen full_laziness
-          (CoreDoFloatOutwards constantsOnlyFloatOutSwitches),
+        runWhen full_laziness $
+           CoreDoFloatOutwards FloatOutSwitches {
+                                 floatOutLambdas   = False,
+                                 floatOutConstants = True,
+                                 floatOutPartialApplications = True },
                 -- nofib/spectral/hartel/wang doubles in speed if you
                 -- do full laziness late in the day.  It only happens
                 -- after fusion and other stuff, so the early pass doesn't
diff --git a/compiler/simplCore/SetLevels.lhs b/compiler/simplCore/SetLevels.lhs
index 23874bfafc..ebfc27ea6e 100644
--- a/compiler/simplCore/SetLevels.lhs
+++ b/compiler/simplCore/SetLevels.lhs
@@ -60,11 +60,7 @@ import CoreArity	( exprBotStrictness_maybe )
 import CoreFVs		-- all of it
 import CoreSubst	( Subst, emptySubst, extendInScope, extendInScopeList,
 			  extendIdSubst, cloneIdBndr, cloneRecIdBndrs )
-import Id		( idType, mkLocalIdWithInfo, mkSysLocal, isOneShotLambda,
-			  zapDemandIdInfo, transferPolyIdInfo,
-			  idSpecialisation, idUnfolding, setIdInfo, 
-			  setIdStrictness, setIdArity
-			)
+import Id
 import IdInfo
 import Var
 import VarSet
@@ -250,10 +246,42 @@ lvlExpr _ _ (  _, AnnType ty) = return (Type ty)
 lvlExpr _ env (_, AnnVar v)   = return (lookupVar env v)
 lvlExpr _ _   (_, AnnLit lit) = return (Lit lit)
 
-lvlExpr ctxt_lvl env (_, AnnApp fun arg) = do
-    fun' <- lvlExpr ctxt_lvl env fun   -- We don't do MFE on partial applications
-    arg' <- lvlMFE  False ctxt_lvl env arg
-    return (App fun' arg')
+lvlExpr ctxt_lvl env expr@(_, AnnApp _ _) = do
+    let
+      (fun, args) = collectAnnArgs expr
+    --
+    case fun of
+         -- float out partial applications.  This is very beneficial
+         -- in some cases (-7% runtime -4% alloc over nofib -O2).
+         -- In order to float a PAP, there must be a function at the
+         -- head of the application, and the application must be
+         -- over-saturated with respect to the function's arity.
+      (_, AnnVar f) | floatPAPs env &&
+                      arity > 0 && arity < n_val_args ->
+        do
+         let (lapp, rargs) = left (n_val_args - arity) expr []
+         rargs' <- mapM (lvlMFE False ctxt_lvl env) rargs
+         lapp' <- lvlMFE False ctxt_lvl env lapp
+         return (foldl App lapp' rargs')
+        where
+         n_val_args = count (isValArg . deAnnotate) args
+         arity = idArity f
+
+         -- separate out the PAP that we are floating from the extra
+         -- arguments, by traversing the spine until we have collected
+         -- (n_val_args - arity) value arguments.
+         left 0 e               rargs = (e, rargs)
+         left n (_, AnnApp f a) rargs
+            | isValArg (deAnnotate a) = left (n-1) f (a:rargs)
+            | otherwise               = left n     f (a:rargs)
+         left _ _ _                   = panic "SetLevels.lvlExpr.left"
+
+         -- No PAPs that we can float: just carry on with the
+         -- arguments and the function.
+      _otherwise -> do
+         args' <- mapM (lvlMFE False ctxt_lvl env) args
+         fun'  <- lvlExpr ctxt_lvl env fun
+         return (foldl App fun' args')
 
 lvlExpr ctxt_lvl env (_, AnnNote note expr) = do
     expr' <- lvlExpr ctxt_lvl env expr
@@ -741,6 +769,9 @@ floatLams (fos, _, _, _) = floatOutLambdas fos
 floatConsts :: LevelEnv -> Bool
 floatConsts (fos, _, _, _) = floatOutConstants fos
 
+floatPAPs :: LevelEnv -> Bool
+floatPAPs (fos, _, _, _) = floatOutPartialApplications fos
+
 extendLvlEnv :: LevelEnv -> [TaggedBndr Level] -> LevelEnv
 -- Used when *not* cloning
 extendLvlEnv (float_lams, lvl_env, subst, id_env) prs
author	Simon Marlow <marlowsd@gmail.com>	2010-10-08 09:27:09 +0000
committer	Simon Marlow <marlowsd@gmail.com>	2010-10-08 09:27:09 +0000
commit	a66541af84d102f32b73fb7f89f48008c01092a6 (patch)
tree	367cdd29dc93f8f83cbee620e3a5be4b88c12a5c /compiler
parent	5e86045ae5f90d9138e395fde5792e50ac8f8afd (diff)
download	haskell-a66541af84d102f32b73fb7f89f48008c01092a6.tar.gz