summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTobias Dammers <tdammers@gmail.com>2018-04-30 09:33:59 +0200
committerTobias Dammers <tdammers@gmail.com>2018-04-30 11:11:40 +0200
commit5a157f8032633804d9c2f7ba7d558e9d99ad1e78 (patch)
treed4fd6f2dffc1414effc7982e3f2a25001c4e3e17
parentbfc1fc2566944a455572303cbb2cbbf0c539c871 (diff)
downloadhaskell-wip/tdammers/T15019.tar.gz
Fix simplCast perf issues from #14737wip/tdammers/T15019
See Trac #15019. Reflexive coercion performance is a surprisingly delicate matter. On the one hand, we want to eliminate reflexive coercions as early as possible, to avoid performance bottlenecks later on; on the other hand, conclusively doing so involves calling `eqType`, which is expensive. In #14737, we addressed the `eqType` part by simply removing the `eqType` check. This, however, causes regressions, because the check no longer catches some of the reflexive coercions that we want to catch. So we need to find a balance between getting rid of reflexivity as soon as it appears (see Trac #11735, #14737, #15019), and avoiding calls to `isReflexiveCo` (which is expensive on big types, since it involves `eqType`). In particular, we want to behave well on * e |> co1 |> co2 where the two happent to cancel out entirely. That is quite common; e.g. a newtype wrapping and unwrapping cancel * (f |> co) @t1 @t2 ... @tn x1 .. xm Here we wil use pushCoTyArg and pushCoValArg successively, which build up NthCo stacks. Silly to do that if co is reflexive. A good compromise (determined experimentally) seems to be to call isReflexiveCo * when composing casts, and * at the end
-rw-r--r--compiler/coreSyn/CoreOpt.hs6
-rw-r--r--compiler/simplCore/Simplify.hs70
-rw-r--r--testsuite/tests/perf/compiler/all.T12
3 files changed, 63 insertions, 25 deletions
diff --git a/compiler/coreSyn/CoreOpt.hs b/compiler/coreSyn/CoreOpt.hs
index f1ff68d133..552cc3b191 100644
--- a/compiler/coreSyn/CoreOpt.hs
+++ b/compiler/coreSyn/CoreOpt.hs
@@ -963,6 +963,9 @@ pushCoTyArg co ty
-- -- | tyL `eqType` tyR
-- -- = Just (ty, Nothing)
+ | isReflCo co
+ = Just (ty, Nothing)
+
| isForAllTy tyL
= ASSERT2( isForAllTy tyR, ppr co $$ ppr ty )
Just (ty `mkCastTy` mkSymCo co1, Just co2)
@@ -998,6 +1001,9 @@ pushCoValArg co
-- -- | tyL `eqType` tyR
-- -- = Just (mkRepReflCo arg, Nothing)
+ | isReflCo co
+ = Just (mkRepReflCo arg, Nothing)
+
| isFunTy tyL
, (co1, co2) <- decomposeFunCo Representational co
-- If co :: (tyL1 -> tyL2) ~ (tyR1 -> tyR2)
diff --git a/compiler/simplCore/Simplify.hs b/compiler/simplCore/Simplify.hs
index d92f6d7e44..c8b2451a45 100644
--- a/compiler/simplCore/Simplify.hs
+++ b/compiler/simplCore/Simplify.hs
@@ -1209,40 +1209,73 @@ rebuild env expr cont
************************************************************************
-}
+{- Note [Optimising reflexivity]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It's important (for compiler performance) to get rid of reflexivity as soon
+as it appears. See Trac #11735, #14737, and #15019.
+
+In particular, we want to behave well on
+
+ * e |> co1 |> co2
+ where the two happen to cancel out entirely. That is quite common;
+ e.g. a newtype wrapping and unwrapping cancel.
+
+
+ * (f |> co) @t1 @t2 ... @tn x1 .. xm
+ Here we wil use pushCoTyArg and pushCoValArg successively, which
+ build up NthCo stacks. Silly to do that if co is reflexive.
+
+However, we don't want to call isReflexiveCo too much, because it uses
+type equality which is expensive on big types (Trac #14737 comment:7).
+
+A good compromise (determined experimentally) seems to be to call
+isReflexiveCo
+ * when composing casts, and
+ * at the end
+
+In investigating this I saw missed opportunities for on-the-fly
+coercion shrinkage. See Trac #15090.
+-}
+
+
simplCast :: SimplEnv -> InExpr -> Coercion -> SimplCont
-> SimplM (SimplFloats, OutExpr)
simplCast env body co0 cont0
= do { co1 <- {-#SCC "simplCast-simplCoercion" #-} simplCoercion env co0
- ; cont1 <- {-#SCC "simplCast-addCoerce" #-} addCoerce co1 cont0
+ ; cont1 <- {-#SCC "simplCast-addCoerce" #-}
+ if isReflCo co1
+ then return cont0 -- See Note [Optimising reflexivity]
+ else addCoerce co1 cont0
; {-#SCC "simplCast-simplExprF" #-} simplExprF env body cont1 }
where
-- If the first parameter is Nothing, then simplifying revealed a
-- reflexive coercion. Omit.
- addCoerce0 :: Maybe OutCoercion -> SimplCont -> SimplM SimplCont
- addCoerce0 Nothing cont = return cont
- addCoerce0 (Just co) cont = addCoerce co cont
+ addCoerceM :: Maybe OutCoercion -> SimplCont -> SimplM SimplCont
+ addCoerceM Nothing cont = return cont
+ addCoerceM (Just co) cont = addCoerce co cont
addCoerce :: OutCoercion -> SimplCont -> SimplM SimplCont
-
- addCoerce co1 (CastIt co2 cont)
- = {-#SCC "addCoerce-simple-recursion" #-}
- addCoerce (mkTransCo co1 co2) cont
+ addCoerce co1 (CastIt co2 cont) -- See Note [Optimising reflexivity]
+ | isReflexiveCo co' = return cont
+ | otherwise = addCoerce co' cont
+ where
+ co' = mkTransCo co1 co2
addCoerce co cont@(ApplyToTy { sc_arg_ty = arg_ty, sc_cont = tail })
| Just (arg_ty', m_co') <- pushCoTyArg co arg_ty
= {-#SCC "addCoerce-pushCoTyArg" #-}
- do { tail' <- addCoerce0 m_co' tail
+ do { tail' <- addCoerceM m_co' tail
; return (cont { sc_arg_ty = arg_ty', sc_cont = tail' }) }
addCoerce co cont@(ApplyToVal { sc_arg = arg, sc_env = arg_se
- , sc_dup = dup, sc_cont = tail })
+ , sc_dup = dup, sc_cont = tail })
| Just (co1, m_co2) <- pushCoValArg co
, Pair _ new_ty <- coercionKind co1
- , not (isTypeLevPoly new_ty) -- without this check, we get a lev-poly arg
+ , not (isTypeLevPoly new_ty) -- Without this check, we get a lev-poly arg
-- See Note [Levity polymorphism invariants] in CoreSyn
-- test: typecheck/should_run/EtaExpandLevPoly
= {-#SCC "addCoerce-pushCoValArg" #-}
- do { tail' <- addCoerce0 m_co2 tail
+ do { tail' <- addCoerceM m_co2 tail
; if isReflCo co1
then return (cont { sc_cont = tail' })
-- Avoid simplifying if possible;
@@ -1260,15 +1293,10 @@ simplCast env body co0 cont0
, sc_cont = tail' }) } }
addCoerce co cont
- | isReflexiveCo co = {-#SCC "addCoerce-reflexive" #-}
- return cont
- | otherwise = {-#SCC "addCoerce-other" #-}
- return (CastIt co cont)
- -- It's worth checking isReflexiveCo.
- -- For example, in the initial form of a worker
- -- we may find (coerce T (coerce S (\x.e))) y
- -- and we'd like it to simplify to e[y/x] in one round
- -- of simplification
+ | isReflexiveCo co = return cont -- Having this at the end makes a huge
+ -- difference in T12227, for some reason
+ -- See Note [Optimising reflexivity]
+ | otherwise = return (CastIt co cont)
simplArg :: SimplEnv -> DupFlag -> StaticEnv -> CoreExpr
-> SimplM (DupFlag, StaticEnv, OutExpr)
diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T
index 2001cda637..9df0989777 100644
--- a/testsuite/tests/perf/compiler/all.T
+++ b/testsuite/tests/perf/compiler/all.T
@@ -595,7 +595,7 @@ test('T5321FD',
# (due to better optCoercion, 5e7406d9, #9233)
# 2016-04-06: 250757460 (x86/Linux)
- (wordsize(64), 415136648, 10)])
+ (wordsize(64), 367567168, 10)])
# prev: 418306336
# 29/08/2012: 492905640
# (increase due to new codegen)
@@ -617,6 +617,7 @@ test('T5321FD',
# 2016-07-16: 477840432
# Optimize handling of built-in OccNames
# 2017-05-14: 415136648 (amd64/Linux) Two-pass CmmLayoutStack
+ # 2018-04-30: 367567168 improved simplCast performance #15019
],
compile,[''])
@@ -747,7 +748,7 @@ test('T9020',
# 2014-07-31: 343005716 (Windows) (general round of updates)
# 2017-03-24: 249904136 (x86/Linux, 64-bit machine)
- (wordsize(64), 562206104, 10)])
+ (wordsize(64), 392559256, 10)])
# prev: 795469104
# 2014-07-17: 728263536 (general round of updates)
# 2014-09-10: 785871680 post-AMP-cleanup
@@ -762,6 +763,7 @@ test('T9020',
# 2017-03-31: 493596312 Fix memory leak in simplifier
# 2017-04-28: 423163832 Remove exponential behaviour in simplifier
# 2018-04-09: 562206104 Inexplicable, collateral of #14737
+ # 2018-04-30: 392559256 improved simplCast performance #15019
],
compile,[''])
@@ -1040,7 +1042,7 @@ test('T12227',
test('T12425',
[ only_ways(['optasm']),
compiler_stats_num_field('bytes allocated',
- [(wordsize(64), 141952368, 5),
+ [(wordsize(64), 130646336, 5),
# initial: 125831400
# 2017-01-18: 133380960 Allow top-level string literals in Core
# 2017-02-17: 153611448 Type-indexed Typeable
@@ -1049,6 +1051,7 @@ test('T12425',
# 2017-04-28: 127500136 Remove exponential behaviour in simplifier
# 2017-05-23: 134780272 Addition of llvm-targets in dynflags (D3352)
# 2018-04-15: 141952368 Collateral of #14737
+ # 2018-04-30: 130646336 improved simplCast performance #15019
]),
],
compile,
@@ -1118,7 +1121,7 @@ test('T13056',
test('T12707',
[ compiler_stats_num_field('bytes allocated',
- [(wordsize(64), 1237898376, 5),
+ [(wordsize(64), 1141555816, 5),
# initial: 1271577192
# 2017-01-22: 1348865648 Allow top-level strings in Core
# 2017-01-31: 1280336112 Join points (#12988)
@@ -1127,6 +1130,7 @@ test('T12707',
# 2017-03-02: 1231809592 Drift from recent simplifier improvements
# 2017-05-14: 1163821528 (amd64/Linux) Two-pass CmmLayoutStack
# 2018-04-09: 1237898376 Inexplicable, collateral of #14737
+ # 2018-04-30: 1141555816 improved simplCast performance #15019
]),
],
compile,