summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Peyton Jones <simonpj@microsoft.com>2018-09-05 15:54:48 +0100
committerSimon Peyton Jones <simonpj@microsoft.com>2018-09-05 17:25:57 +0100
commit3addf72a6f40747cff213653382eb4476bdb53da (patch)
tree2dda31bb6858711f32c40769a49b6d842e1530e5
parent1152a3bee1aef3e24a03e0c2e4e5272ca926f7ab (diff)
downloadhaskell-3addf72a6f40747cff213653382eb4476bdb53da.tar.gz
Preserve specialisations despite CSE
Trac #15445 showed that, as a result of CSE, a function with an automatically generated specialisation RULE could be inlined before the RULE had a chance to fire. This patch attaches a NOINLINE[2] activation to the Id, during CSE, to stop this happening. See Note [Delay inlining after CSE] ---- Historical note --- This patch is simpler and more direct than an earlier version: commit 2110738b280543698407924a16ac92b6d804dc36 Author: Simon Peyton Jones <simonpj@microsoft.com> Date: Mon Jul 30 13:43:56 2018 +0100 Don't inline functions with RULES too early We had to revert this patch because it made GHC itself slower. Why? It delayed inlining of /all/ functions with RULES, and that was very bad in TcFlatten.flatten_ty_con_app * It delayed inlining of liftM * That delayed the unravelling of the recursion in some dictionary bindings. * That delayed some eta expansion, leaving flatten_ty_con_app = \x y. let <stuff> in \z. blah * That allowed the float-out pass to put sguff between the \y and \z. * And that permanently stopped eta expasion of the function, even once <stuff> was simplified. -- End of historical note ---
-rw-r--r--compiler/simplCore/CSE.hs68
-rw-r--r--testsuite/tests/numeric/should_compile/T14465.stdout2
-rw-r--r--testsuite/tests/numeric/should_compile/T7116.stdout4
-rw-r--r--testsuite/tests/simplCore/should_compile/T15445.hs8
-rw-r--r--testsuite/tests/simplCore/should_compile/T15445.stderr13
-rw-r--r--testsuite/tests/simplCore/should_compile/T15445a.hs10
-rw-r--r--testsuite/tests/simplCore/should_compile/all.T4
7 files changed, 94 insertions, 15 deletions
diff --git a/compiler/simplCore/CSE.hs b/compiler/simplCore/CSE.hs
index 7e44e2e14d..96fbd07454 100644
--- a/compiler/simplCore/CSE.hs
+++ b/compiler/simplCore/CSE.hs
@@ -15,7 +15,8 @@ import GhcPrelude
import CoreSubst
import Var ( Var )
import VarEnv ( elemInScopeSet, mkInScopeSet )
-import Id ( Id, idType, idInlineActivation, isDeadBinder
+import Id ( Id, idType, isDeadBinder
+ , idInlineActivation, setInlineActivation
, zapIdOccInfo, zapIdUsageInfo, idInlinePragma
, isJoinId, isJoinId_maybe )
import CoreUtils ( mkAltExpr, eqExpr
@@ -25,9 +26,7 @@ import CoreFVs ( exprFreeVars )
import Type ( tyConAppArgs )
import CoreSyn
import Outputable
-import BasicTypes ( TopLevelFlag(..), isTopLevel
- , isAlwaysActive, isAnyInlinePragma,
- inlinePragmaSpec, noUserInlineSpec )
+import BasicTypes
import CoreMap
import Util ( filterOut )
import Data.List ( mapAccumL )
@@ -335,14 +334,16 @@ cseBind toplevel env (NonRec b e)
(env1, b1) = addBinder env b
(env2, (b2, e2)) = cse_bind toplevel env1 (b,e) b1
-cseBind _ env (Rec [(in_id, rhs)])
+cseBind toplevel env (Rec [(in_id, rhs)])
| noCSE in_id
= (env1, Rec [(out_id, rhs')])
-- See Note [CSE for recursive bindings]
| Just previous <- lookupCSRecEnv env out_id rhs''
, let previous' = mkTicks ticks previous
- = (extendCSSubst env1 in_id previous', NonRec out_id previous')
+ out_id' = delayInlining toplevel out_id
+ = -- We have a hit in the recursive-binding cache
+ (extendCSSubst env1 in_id previous', NonRec out_id' previous')
| otherwise
= (extendCSRecEnv env1 out_id rhs'' id_expr', Rec [(zapped_id, rhs')])
@@ -382,10 +383,21 @@ cse_bind toplevel env (in_id, in_rhs) out_id
in (env, (out_id, mkLams params' out_body))
| otherwise
- = (env', (out_id', out_rhs))
+ = (env', (out_id'', out_rhs))
where
(env', out_id') = addBinding env in_id out_id out_rhs
- out_rhs = tryForCSE env in_rhs
+ (cse_done, out_rhs) = try_for_cse env in_rhs
+ out_id'' | cse_done = delayInlining toplevel out_id'
+ | otherwise = out_id'
+
+delayInlining :: TopLevelFlag -> Id -> Id
+-- Add a NOINLINE[2] if the Id doesn't have an INLNE pragma already
+delayInlining top_lvl bndr
+ | isTopLevel top_lvl
+ , isAlwaysActive (idInlineActivation bndr)
+ = bndr `setInlineActivation` activeAfterInitial
+ | otherwise
+ = bndr
addBinding :: CSEnv -- Includes InId->OutId cloning
-> InVar -- Could be a let-bound type
@@ -464,12 +476,46 @@ The net effect is that for the y-binding we want to
- but leave the original binding for y undisturbed
This is done by cse_bind. I got it wrong the first time (Trac #13367).
+
+Note [Delay inlining after CSE]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Suppose (Trac #15445) we have
+ f,g :: Num a => a -> a
+ f x = ...f (x-1).....
+ g y = ...g (y-1) ....
+
+and we make some specialisations of 'g', either automatically, or via
+a SPECIALISE pragma. Then CSE kicks in and notices that the RHSs of
+'f' and 'g' are identical, so we get
+ f x = ...f (x-1)...
+ g = f
+ {-# RULES g @Int _ = $sg #-}
+
+Now there is terrible danger that, in an importing module, we'll inline
+'g' before we have a chance to run its specialisation!
+
+Solution: during CSE, when adding a top-level
+ g = f
+binding after a "hit" in the CSE cache, add a NOINLINE[2] activation
+to it, to ensure it's not inlined right away.
+
+Why top level only? Because for nested bindings we are already past
+phase 2 and will never return there.
-}
tryForCSE :: CSEnv -> InExpr -> OutExpr
-tryForCSE env expr
- | Just e <- lookupCSEnv env expr'' = mkTicks ticks e
- | otherwise = expr'
+tryForCSE env expr = snd (try_for_cse env expr)
+
+try_for_cse :: CSEnv -> InExpr -> (Bool, OutExpr)
+-- (False, e') => We did not CSE the entire expression,
+-- but we might have CSE'd some sub-expressions,
+-- yielding e'
+--
+-- (True, te') => We CSE'd the entire expression,
+-- yielding the trivial expression te'
+try_for_cse env expr
+ | Just e <- lookupCSEnv env expr'' = (True, mkTicks ticks e)
+ | otherwise = (False, expr')
-- The varToCoreExpr is needed if we have
-- case e of xco { ...case e of yco { ... } ... }
-- Then CSE will substitute yco -> xco;
diff --git a/testsuite/tests/numeric/should_compile/T14465.stdout b/testsuite/tests/numeric/should_compile/T14465.stdout
index 32cf35639c..e6dfa5fc6c 100644
--- a/testsuite/tests/numeric/should_compile/T14465.stdout
+++ b/testsuite/tests/numeric/should_compile/T14465.stdout
@@ -93,7 +93,7 @@ plusOne :: Natural -> Natural
plusOne = \ (n :: Natural) -> plusNatural n M.minusOne1
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
-one :: Natural
+one [InlPrag=NOUSERINLINE[2]] :: Natural
[GblId,
Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
WorkFree=True, Expandable=True,
diff --git a/testsuite/tests/numeric/should_compile/T7116.stdout b/testsuite/tests/numeric/should_compile/T7116.stdout
index 681d171350..fc0159a1ec 100644
--- a/testsuite/tests/numeric/should_compile/T7116.stdout
+++ b/testsuite/tests/numeric/should_compile/T7116.stdout
@@ -65,7 +65,7 @@ dr
case x of { GHC.Types.D# x1 -> GHC.Types.D# (GHC.Prim.+## x1 x1) }
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
-dl :: Double -> Double
+dl [InlPrag=NOUSERINLINE[2]] :: Double -> Double
[GblId,
Arity=1,
Caf=NoCafRefs,
@@ -97,7 +97,7 @@ fr
}
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
-fl :: Float -> Float
+fl [InlPrag=NOUSERINLINE[2]] :: Float -> Float
[GblId,
Arity=1,
Caf=NoCafRefs,
diff --git a/testsuite/tests/simplCore/should_compile/T15445.hs b/testsuite/tests/simplCore/should_compile/T15445.hs
new file mode 100644
index 0000000000..36bf61dbbb
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T15445.hs
@@ -0,0 +1,8 @@
+module T15445 where
+
+import T15445a
+
+
+foo :: IO ()
+foo = do { print (plusTwoRec [1..10 :: Int])
+ ; print (plusTwoRec' [1..20 :: Int]) }
diff --git a/testsuite/tests/simplCore/should_compile/T15445.stderr b/testsuite/tests/simplCore/should_compile/T15445.stderr
new file mode 100644
index 0000000000..d5deac5a59
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T15445.stderr
@@ -0,0 +1,13 @@
+Rule fired: Class op + (BUILTIN)
+Rule fired: Class op fromInteger (BUILTIN)
+Rule fired: integerToInt (BUILTIN)
+Rule fired: SPEC plusTwoRec (T15445a)
+Rule fired: SPEC $fShow[] (GHC.Show)
+Rule fired: Class op >> (BUILTIN)
+Rule fired: Class op show (BUILTIN)
+Rule fired: SPEC plusTwoRec (T15445a)
+Rule fired: Class op enumFromTo (BUILTIN)
+Rule fired: Class op show (BUILTIN)
+Rule fired: Class op enumFromTo (BUILTIN)
+Rule fired: eftIntList (GHC.Enum)
+Rule fired: eftIntList (GHC.Enum)
diff --git a/testsuite/tests/simplCore/should_compile/T15445a.hs b/testsuite/tests/simplCore/should_compile/T15445a.hs
new file mode 100644
index 0000000000..02e5baceb5
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T15445a.hs
@@ -0,0 +1,10 @@
+module T15445a where
+
+{-# SPECIALIZE plusTwoRec :: [Int] -> [Int] #-}
+plusTwoRec :: Num a => [a] -> [a]
+plusTwoRec [] = []
+plusTwoRec (x:xs) = x+2:plusTwoRec xs
+
+plusTwoRec' :: Num a => [a] -> [a]
+plusTwoRec' [] = []
+plusTwoRec' (x:xs) = x+2:plusTwoRec' xs
diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T
index 188f6432fa..1284b7c3d4 100644
--- a/testsuite/tests/simplCore/should_compile/all.T
+++ b/testsuite/tests/simplCore/should_compile/all.T
@@ -318,6 +318,8 @@ test('T15005', normal, compile, ['-O'])
# we omit profiling because it affects the optimiser and makes the test fail
test('T15056', [extra_files(['T15056a.hs']), omit_ways(['profasm'])], multimod_compile, ['T15056', '-O -v0 -ddump-rule-firings'])
test('T15186', normal, multimod_compile, ['T15186', '-v0'])
-test('T15453', normal, compile, ['-O1'])
test('T15517', normal, compile, ['-O0'])
test('T15517a', normal, compile, ['-O0'])
+test('T15453', normal, compile, ['-dcore-lint -O1'])
+test('T15445', normal, multimod_compile, ['T15445', '-v0 -O -ddump-rule-firings'])
+