summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2010-10-08 09:27:09 +0000
committerSimon Marlow <marlowsd@gmail.com>2010-10-08 09:27:09 +0000
commita66541af84d102f32b73fb7f89f48008c01092a6 (patch)
tree367cdd29dc93f8f83cbee620e3a5be4b88c12a5c /compiler
parent5e86045ae5f90d9138e395fde5792e50ac8f8afd (diff)
downloadhaskell-a66541af84d102f32b73fb7f89f48008c01092a6.tar.gz
Float out partial applications
This fixes at least one case of performance regression in 7.0, and is nice win on nofib: Program Size Allocs Runtime Elapsed Min +0.3% -63.0% -38.5% -38.7% Max +1.2% +0.2% +0.9% +0.9% Geometric Mean +0.6% -3.0% -6.4% -6.6%
Diffstat (limited to 'compiler')
-rw-r--r--compiler/coreSyn/CoreSyn.lhs14
-rw-r--r--compiler/simplCore/CoreMonad.lhs41
-rw-r--r--compiler/simplCore/SetLevels.lhs49
3 files changed, 82 insertions, 22 deletions
diff --git a/compiler/coreSyn/CoreSyn.lhs b/compiler/coreSyn/CoreSyn.lhs
index c74de06b24..5e03e4d64e 100644
--- a/compiler/coreSyn/CoreSyn.lhs
+++ b/compiler/coreSyn/CoreSyn.lhs
@@ -59,6 +59,9 @@ module CoreSyn (
-- * Annotated expression data types
AnnExpr, AnnExpr'(..), AnnBind(..), AnnAlt,
+ -- ** Operations on annotated expressions
+ collectAnnArgs,
+
-- ** Operations on annotations
deAnnotate, deAnnotate', deAnnAlt, collectAnnBndrs,
@@ -1142,6 +1145,17 @@ data AnnBind bndr annot
\end{code}
\begin{code}
+-- | Takes a nested application expression and returns the the function
+-- being applied and the arguments to which it is applied
+collectAnnArgs :: AnnExpr b a -> (AnnExpr b a, [AnnExpr b a])
+collectAnnArgs expr
+ = go expr []
+ where
+ go (_, AnnApp f a) as = go f (a:as)
+ go e as = (e, as)
+\end{code}
+
+\begin{code}
deAnnotate :: AnnExpr bndr annot -> Expr bndr
deAnnotate (_, e) = deAnnotate' e
diff --git a/compiler/simplCore/CoreMonad.lhs b/compiler/simplCore/CoreMonad.lhs
index 00dedffa38..e3dbf3a304 100644
--- a/compiler/simplCore/CoreMonad.lhs
+++ b/compiler/simplCore/CoreMonad.lhs
@@ -304,8 +304,10 @@ data SimplifierSwitch
\begin{code}
data FloatOutSwitches = FloatOutSwitches {
floatOutLambdas :: Bool, -- ^ True <=> float lambdas to top level
- floatOutConstants :: Bool -- ^ True <=> float constants to top level,
+ floatOutConstants :: Bool, -- ^ True <=> float constants to top level,
-- even if they do not escape a lambda
+ floatOutPartialApplications :: Bool -- ^ True <=> float out partial applications
+ -- based on arity information.
}
instance Outputable FloatOutSwitches where
ppr = pprFloatOutSwitches
@@ -320,10 +322,6 @@ pprFloatOutSwitches sw = pp_not (floatOutLambdas sw) <+> text "lambdas" <> comma
-- | Switches that specify the minimum amount of floating out
-- gentleFloatOutSwitches :: FloatOutSwitches
-- gentleFloatOutSwitches = FloatOutSwitches False False
-
--- | Switches that do not specify floating out of lambdas, just of constants
-constantsOnlyFloatOutSwitches :: FloatOutSwitches
-constantsOnlyFloatOutSwitches = FloatOutSwitches False True
\end{code}
@@ -420,14 +418,28 @@ getCoreToDo dflags
-- so that overloaded functions have all their dictionary lambdas manifest
runWhen do_specialise CoreDoSpecialising,
- runWhen full_laziness (CoreDoFloatOutwards constantsOnlyFloatOutSwitches),
+ runWhen full_laziness $
+ CoreDoFloatOutwards FloatOutSwitches {
+ floatOutLambdas = False,
+ floatOutConstants = True,
+ floatOutPartialApplications = False },
-- Was: gentleFloatOutSwitches
- -- I have no idea why, but not floating constants to top level is
- -- very bad in some cases.
+ --
+ -- I have no idea why, but not floating constants to
+ -- top level is very bad in some cases.
+ --
-- Notably: p_ident in spectral/rewrite
- -- Changing from "gentle" to "constantsOnly" improved
- -- rewrite's allocation by 19%, and made 0.0% difference
- -- to any other nofib benchmark
+ -- Changing from "gentle" to "constantsOnly"
+ -- improved rewrite's allocation by 19%, and
+ -- made 0.0% difference to any other nofib
+ -- benchmark
+ --
+ -- Not doing floatOutPartialApplications yet, we'll do
+ -- that later on when we've had a chance to get more
+ -- accurate arity information. In fact it makes no
+ -- difference at all to performance if we do it here,
+ -- but maybe we save some unnecessary to-and-fro in
+ -- the simplifier.
runWhen do_float_in CoreDoFloatInwards,
@@ -452,8 +464,11 @@ getCoreToDo dflags
simpl_phase 0 ["post-worker-wrapper"] max_iter
]),
- runWhen full_laziness
- (CoreDoFloatOutwards constantsOnlyFloatOutSwitches),
+ runWhen full_laziness $
+ CoreDoFloatOutwards FloatOutSwitches {
+ floatOutLambdas = False,
+ floatOutConstants = True,
+ floatOutPartialApplications = True },
-- nofib/spectral/hartel/wang doubles in speed if you
-- do full laziness late in the day. It only happens
-- after fusion and other stuff, so the early pass doesn't
diff --git a/compiler/simplCore/SetLevels.lhs b/compiler/simplCore/SetLevels.lhs
index 23874bfafc..ebfc27ea6e 100644
--- a/compiler/simplCore/SetLevels.lhs
+++ b/compiler/simplCore/SetLevels.lhs
@@ -60,11 +60,7 @@ import CoreArity ( exprBotStrictness_maybe )
import CoreFVs -- all of it
import CoreSubst ( Subst, emptySubst, extendInScope, extendInScopeList,
extendIdSubst, cloneIdBndr, cloneRecIdBndrs )
-import Id ( idType, mkLocalIdWithInfo, mkSysLocal, isOneShotLambda,
- zapDemandIdInfo, transferPolyIdInfo,
- idSpecialisation, idUnfolding, setIdInfo,
- setIdStrictness, setIdArity
- )
+import Id
import IdInfo
import Var
import VarSet
@@ -250,10 +246,42 @@ lvlExpr _ _ ( _, AnnType ty) = return (Type ty)
lvlExpr _ env (_, AnnVar v) = return (lookupVar env v)
lvlExpr _ _ (_, AnnLit lit) = return (Lit lit)
-lvlExpr ctxt_lvl env (_, AnnApp fun arg) = do
- fun' <- lvlExpr ctxt_lvl env fun -- We don't do MFE on partial applications
- arg' <- lvlMFE False ctxt_lvl env arg
- return (App fun' arg')
+lvlExpr ctxt_lvl env expr@(_, AnnApp _ _) = do
+ let
+ (fun, args) = collectAnnArgs expr
+ --
+ case fun of
+ -- float out partial applications. This is very beneficial
+ -- in some cases (-7% runtime -4% alloc over nofib -O2).
+ -- In order to float a PAP, there must be a function at the
+ -- head of the application, and the application must be
+ -- over-saturated with respect to the function's arity.
+ (_, AnnVar f) | floatPAPs env &&
+ arity > 0 && arity < n_val_args ->
+ do
+ let (lapp, rargs) = left (n_val_args - arity) expr []
+ rargs' <- mapM (lvlMFE False ctxt_lvl env) rargs
+ lapp' <- lvlMFE False ctxt_lvl env lapp
+ return (foldl App lapp' rargs')
+ where
+ n_val_args = count (isValArg . deAnnotate) args
+ arity = idArity f
+
+ -- separate out the PAP that we are floating from the extra
+ -- arguments, by traversing the spine until we have collected
+ -- (n_val_args - arity) value arguments.
+ left 0 e rargs = (e, rargs)
+ left n (_, AnnApp f a) rargs
+ | isValArg (deAnnotate a) = left (n-1) f (a:rargs)
+ | otherwise = left n f (a:rargs)
+ left _ _ _ = panic "SetLevels.lvlExpr.left"
+
+ -- No PAPs that we can float: just carry on with the
+ -- arguments and the function.
+ _otherwise -> do
+ args' <- mapM (lvlMFE False ctxt_lvl env) args
+ fun' <- lvlExpr ctxt_lvl env fun
+ return (foldl App fun' args')
lvlExpr ctxt_lvl env (_, AnnNote note expr) = do
expr' <- lvlExpr ctxt_lvl env expr
@@ -741,6 +769,9 @@ floatLams (fos, _, _, _) = floatOutLambdas fos
floatConsts :: LevelEnv -> Bool
floatConsts (fos, _, _, _) = floatOutConstants fos
+floatPAPs :: LevelEnv -> Bool
+floatPAPs (fos, _, _, _) = floatOutPartialApplications fos
+
extendLvlEnv :: LevelEnv -> [TaggedBndr Level] -> LevelEnv
-- Used when *not* cloning
extendLvlEnv (float_lams, lvl_env, subst, id_env) prs