summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Eisenberg <eir@cis.upenn.edu>2014-12-16 16:35:43 -0500
committerRichard Eisenberg <eir@cis.upenn.edu>2014-12-17 10:47:17 -0500
commit922168fda3b3a3b96033a9c5d38f3fe70a99fd63 (patch)
treecaa9d5a6e1995b42ca933623166a357f0e8dcc81
parentea22a8f721f440458554c7500686baef57da4d4d (diff)
downloadhaskell-922168fda3b3a3b96033a9c5d38f3fe70a99fd63.tar.gz
Performance enhancements in TcFlatten.
This commit fixes some performance regressions introduced by 0cc47eb, adding more `Coercible` magic to the solver. See Note [flatten_many performance] in TcFlatten for more info. The improvements do not quite restore the old numbers. Given that the solver is really more involved now, I am accepting this regression. The way forward (I believe) would be to have *two* flatteners: one that deals only with nominal equalities and thus never checks roles, and the more general one. A nice design of keeping this performant without duplicating code eludes me, but someone else is welcome to take a stab.
-rw-r--r--compiler/typecheck/TcFlatten.hs83
-rw-r--r--compiler/utils/MonadUtils.hs3
-rw-r--r--testsuite/tests/perf/compiler/all.T16
3 files changed, 87 insertions, 15 deletions
diff --git a/compiler/typecheck/TcFlatten.hs b/compiler/typecheck/TcFlatten.hs
index 34c2c4a04a..818965d647 100644
--- a/compiler/typecheck/TcFlatten.hs
+++ b/compiler/typecheck/TcFlatten.hs
@@ -28,10 +28,11 @@ import TcSMonad as TcS
import DynFlags( DynFlags )
import Util
-import MonadUtils ( zipWithAndUnzipM )
import Bag
import FastString
import Control.Monad( when, liftM )
+import MonadUtils ( zipWithAndUnzipM )
+import GHC.Exts ( inline )
{-
Note [The flattening story]
@@ -643,6 +644,37 @@ soon throw out the phantoms when decomposing a TyConApp. (Or, the
canonicaliser will emit an insoluble, in which case the unflattened version
yields a better error message anyway.)
+Note [flatten_many performance]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In programs with lots of type-level evaluation, flatten_many becomes
+part of a tight loop. For example, see test perf/compiler/T9872a, which
+calls flatten_many a whopping 7,106,808 times. It is thus important
+that flatten_many be efficient.
+
+Performance testing showed that the current implementation is indeed
+efficient. It's critically important that zipWithAndUnzipM be
+specialized to TcS, and it's also quite helpful to actually `inline`
+it. On test T9872a, here are the allocation stats (Dec 16, 2014):
+
+ * Unspecialized, uninlined: 8,472,613,440 bytes allocated in the heap
+ * Specialized, uninlined: 6,639,253,488 bytes allocated in the heap
+ * Specialized, inlined: 6,281,539,792 bytes allocated in the heap
+
+To improve performance even further, flatten_many_nom is split off
+from flatten_many, as nominal equality is the common case. This would
+be natural to write using mapAndUnzipM, but even inlined, that function
+is not as performant as a hand-written loop.
+
+ * mapAndUnzipM, inlined: 7,463,047,432 bytes allocated in the heap
+ * hand-written recursion: 5,848,602,848 bytes allocated in the heap
+
+If you make any change here, pay close attention to the T9872{a,b,c} tests
+and T5321Fun.
+
+If we need to make this yet more performant, a possible way forward is to
+duplicate the flattener code for the nominal case, and make that case
+faster. This doesn't seem quite worth it, yet.
+
-}
------------------
@@ -676,13 +708,24 @@ flatten_many :: FlattenEnv -> [Role] -> [Type] -> TcS ([Xi], [TcCoercion])
-- we merely want (a) Given/Solved/Derived/Wanted info
-- (b) the GivenLoc/WantedLoc for when we create new evidence
flatten_many fmode roles tys
- = zipWithAndUnzipM go roles tys
+-- See Note [flatten_many performance]
+ = inline zipWithAndUnzipM go roles tys
where
- go Nominal ty = flatten_one (fmode { fe_eq_rel = NomEq }) ty
- go Representational ty = flatten_one (fmode { fe_eq_rel = ReprEq }) ty
+ go Nominal ty = flatten_one (setFEEqRel fmode NomEq) ty
+ go Representational ty = flatten_one (setFEEqRel fmode ReprEq) ty
go Phantom ty = -- See Note [Phantoms in the flattener]
return (ty, mkTcPhantomCo ty ty)
+-- | Like 'flatten_many', but assumes that every role is nominal.
+flatten_many_nom :: FlattenEnv -> [Type] -> TcS ([Xi], [TcCoercion])
+flatten_many_nom _ [] = return ([], [])
+-- See Note [flatten_many performance]
+flatten_many_nom fmode (ty:tys)
+ = ASSERT( fe_eq_rel fmode == NomEq )
+ do { (xi, co) <- flatten_one fmode ty
+ ; (xis, cos) <- flatten_many_nom fmode tys
+ ; return (xi:xis, co:cos) }
+
------------------
flatten_one :: FlattenEnv -> TcType -> TcS (Xi, TcCoercion)
-- Flatten a type to get rid of type function applications, returning
@@ -707,7 +750,7 @@ flatten_one fmode (AppTy ty1 ty2)
return (mkAppTy xi1 ty2, co1 `mkTcAppCo` mkTcNomReflCo ty2) }
where
flatten_rhs xi1 co1 eq_rel2
- = do { (xi2,co2) <- flatten_one (fmode { fe_eq_rel = eq_rel2 }) ty2
+ = do { (xi2,co2) <- flatten_one (setFEEqRel fmode eq_rel2) ty2
; traceTcS "flatten/appty"
(ppr ty1 $$ ppr ty2 $$ ppr xi1 $$
ppr co1 $$ ppr xi2 $$ ppr co2)
@@ -757,14 +800,16 @@ flatten_one fmode ty@(ForAllTy {})
-- We allow for-alls when, but only when, no type function
-- applications inside the forall involve the bound type variables.
= do { let (tvs, rho) = splitForAllTys ty
- ; (rho', co) <- flatten_one (fmode { fe_mode = FM_SubstOnly }) rho
+ ; (rho', co) <- flatten_one (setFEMode fmode FM_SubstOnly) rho
-- Substitute only under a forall
-- See Note [Flattening under a forall]
; return (mkForAllTys tvs rho', foldr mkTcForAllCo co tvs) }
flattenTyConApp :: FlattenEnv -> TyCon -> [TcType] -> TcS (Xi, TcCoercion)
flattenTyConApp fmode tc tys
- = do { (xis, cos) <- flatten_many fmode (tyConRolesX role tc) tys
+ = do { (xis, cos) <- case fe_eq_rel fmode of
+ NomEq -> flatten_many_nom fmode tys
+ ReprEq -> flatten_many fmode (tyConRolesX role tc) tys
; return (mkTyConApp tc xis, mkTcTyConAppCo role tc cos) }
where
role = feRole fmode
@@ -855,8 +900,7 @@ flatten_exact_fam_app fmode tc tys
roles = tyConRolesX (feRole fmode) tc
flatten_exact_fam_app_fully fmode tc tys
- = do { let roles = tyConRolesX (feRole fmode) tc
- ; (xis, cos) <- flatten_many (fmode { fe_mode = FM_FlattenAll }) roles tys
+ = do { (xis, cos) <- flatten_many_nom (setFEEqRel (setFEMode fmode FM_FlattenAll) NomEq) tys
; let ret_co = mkTcTyConAppCo (feRole fmode) tc cos
-- ret_co :: F xis ~ F tys
@@ -1222,7 +1266,7 @@ flattenTyVarFinal :: FlattenEnv -> TcTyVar -> TcS TyVar
flattenTyVarFinal fmode tv
= -- Done, but make sure the kind is zonked
do { let kind = tyVarKind tv
- kind_fmode = fmode { fe_mode = FM_SubstOnly }
+ kind_fmode = setFEMode fmode FM_SubstOnly
; (new_knd, _kind_co) <- flatten_one kind_fmode kind
; return (setVarType tv new_knd) }
@@ -1506,3 +1550,22 @@ unsolved constraints. The flat form will be
Flatten using the fun-eqs first.
-}
+
+-- | Change the 'EqRel' in a 'FlattenEnv'. Avoids allocating a
+-- new 'FlattenEnv' where possible.
+setFEEqRel :: FlattenEnv -> EqRel -> FlattenEnv
+setFEEqRel fmode@(FE { fe_eq_rel = old_eq_rel }) new_eq_rel
+ | old_eq_rel == new_eq_rel = fmode
+ | otherwise = fmode { fe_eq_rel = new_eq_rel }
+
+-- | Change the 'FlattenMode' in a 'FlattenEnv'. Avoids allocating
+-- a new 'FlattenEnv' where possible.
+setFEMode :: FlattenEnv -> FlattenMode -> FlattenEnv
+setFEMode fmode@(FE { fe_mode = old_mode }) new_mode
+ | old_mode `eq` new_mode = fmode
+ | otherwise = fmode { fe_mode = new_mode }
+ where
+ FM_FlattenAll `eq` FM_FlattenAll = True
+ FM_SubstOnly `eq` FM_SubstOnly = True
+ FM_Avoid tv1 b1 `eq` FM_Avoid tv2 b2 = tv1 == tv2 && b1 == b2
+ _ `eq` _ = False
diff --git a/compiler/utils/MonadUtils.hs b/compiler/utils/MonadUtils.hs
index edc863ab0c..0850ff43c4 100644
--- a/compiler/utils/MonadUtils.hs
+++ b/compiler/utils/MonadUtils.hs
@@ -77,6 +77,9 @@ zipWith3M_ f as bs cs = do { _ <- zipWith3M f as bs cs
zipWithAndUnzipM :: Monad m
=> (a -> b -> m (c, d)) -> [a] -> [b] -> m ([c], [d])
+{-# INLINE zipWithAndUnzipM #-}
+-- See Note [flatten_many performance] in TcFlatten for why this
+-- pragma is essential.
zipWithAndUnzipM f (x:xs) (y:ys)
= do { (c, d) <- f x y
; (cs, ds) <- zipWithAndUnzipM f xs ys
diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T
index b98a9bc9d5..14826dff09 100644
--- a/testsuite/tests/perf/compiler/all.T
+++ b/testsuite/tests/perf/compiler/all.T
@@ -406,7 +406,7 @@ test('T5321Fun',
# (increase due to new codegen)
# 2014-09-03: 299656164 (specialisation and inlining)
# 10/12/2014: 206406188 # Improvements in constraint solver
- (wordsize(64), 408110888, 10)])
+ (wordsize(64), 429921312, 10)])
# prev: 585521080
# 29/08/2012: 713385808 # (increase due to new codegen)
# 15/05/2013: 628341952 # (reason for decrease unknown)
@@ -415,6 +415,7 @@ test('T5321Fun',
# 10/09/2014: 601629032 # post-AMP-cleanup
# 06/11/2014: 541287000 # Simon's flat-skol changes to the constraint solver
# 10/12/2014: 408110888 # Improvements in constraint solver
+ # 16/12/2014: 429921312 # Flattener parameterized over roles
],
compile,[''])
@@ -477,7 +478,7 @@ test('T5837',
# 2014-12-01: 135914136 (Windows laptop, regression see below)
# 2014-12-08 115905208 Constraint solver perf improvements (esp kick-out)
- (wordsize(64), 234790312, 10)])
+ (wordsize(64), 231155640, 10)])
# sample: 3926235424 (amd64/Linux, 15/2/2012)
# 2012-10-02 81879216
# 2012-09-20 87254264 amd64/Linux
@@ -489,6 +490,8 @@ test('T5837',
# 2014-11-06 271028976 Linux, Accept big regression;
# See Note [An alternative story for the inert substitution] in TcFlatten
# 2014-12-08 234790312 Constraint solver perf improvements (esp kick-out)
+ # 2014-12-16 231155640 Mac Flattener parameterized over roles;
+ # some optimization
],
compile_fail,['-ftype-function-depth=50'])
@@ -556,8 +559,9 @@ test('T9675',
test('T9872a',
[ only_ways(['normal']),
compiler_stats_num_field('bytes allocated',
- [(wordsize(64), 5521332656, 5)
+ [(wordsize(64), 5848657456, 5)
# 2014-12-10 5521332656 Initally created
+ # 2014-12-16 5848657456 Flattener parameterized over roles
]),
],
compile_fail,
@@ -566,8 +570,9 @@ test('T9872a',
test('T9872b',
[ only_ways(['normal']),
compiler_stats_num_field('bytes allocated',
- [(wordsize(64), 6483306280, 5)
+ [(wordsize(64), 6892251912, 5)
# 2014-12-10 6483306280 Initally created
+ # 2014-12-16 6892251912 Flattener parameterized over roles
]),
],
compile_fail,
@@ -575,8 +580,9 @@ test('T9872b',
test('T9872c',
[ only_ways(['normal']),
compiler_stats_num_field('bytes allocated',
- [(wordsize(64), 5495850096, 5)
+ [(wordsize(64), 5842024784, 5)
# 2014-12-10 5495850096 Initally created
+ # 2014-12-16 5842024784 Flattener parameterized over roles
]),
],
compile_fail,