summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/simplCore/Simplify.hs68
-rw-r--r--compiler/specialise/Rules.hs9
-rw-r--r--testsuite/tests/simplCore/should_compile/T3234.stderr13
3 files changed, 74 insertions, 16 deletions
diff --git a/compiler/simplCore/Simplify.hs b/compiler/simplCore/Simplify.hs
index 4ef299440e..d18eda7f18 100644
--- a/compiler/simplCore/Simplify.hs
+++ b/compiler/simplCore/Simplify.hs
@@ -15,6 +15,7 @@ import SimplMonad
import Type hiding ( substTy, substTyVar, extendTvSubst, extendCvSubst )
import SimplEnv
import SimplUtils
+import OccurAnal ( occurAnalyseExpr )
import FamInstEnv ( FamInstEnv )
import Literal ( litIsLifted ) --, mkMachInt ) -- temporalily commented out. See #8326
import Id
@@ -1809,9 +1810,13 @@ tryRules env rules fn args call_cont
; let cont' = pushSimplifiedArgs env
(drop (ruleArity rule) args)
call_cont
- -- (ruleArity rule) says how many args the rule consumed
+ -- (ruleArity rule) says how
+ -- many args the rule consumed
+
+ occ_anald_rhs = occurAnalyseExpr rule_rhs
+ -- See Note [Occurence-analyse after rule firing]
; dump dflags rule rule_rhs
- ; return (Just (rule_rhs, cont')) }}}
+ ; return (Just (occ_anald_rhs, cont')) }}}
where
dump dflags rule rule_rhs
| dopt Opt_D_dump_rule_rewrites dflags
@@ -1842,7 +1847,64 @@ tryRules env rules fn args call_cont
= liftIO . dumpSDoc dflags alwaysQualify flag "" $
sep [text hdr, nest 4 details]
-{-
+{- Note [Occurence-analyse after rule firing]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+After firing a rule, we occurrence-analyse the instantiated RHS before
+simplifying it. Usually this doesn't make much difference, but it can
+be huge. Here's an example (simplCore/should_compile/T7785)
+
+ map f (map f (map f xs)
+
+= -- Use build/fold form of map, twice
+ map f (build (\cn. foldr (mapFB c f) n
+ (build (\cn. foldr (mapFB c f) n xs))))
+
+= -- Apply fold/build rule
+ map f (build (\cn. (\cn. foldr (mapFB c f) n xs) (mapFB c f) n))
+
+= -- Beta-reduce
+ -- Alas we have no occurrence-analysed, so we don't know
+ -- that c is used exactly once
+ map f (build (\cn. let c1 = mapFB c f in
+ foldr (mapFB c1 f) n xs))
+
+= -- Use mapFB rule: mapFB (mapFB c f) g = mapFB c (f.g)
+ -- We can do this becuase (mapFB c n) is a PAP and hence expandable
+ map f (build (\cn. let c1 = mapFB c n in
+ foldr (mapFB c (f.f)) n x))
+
+This is not too bad. But now do the same with the outer map, and
+we get another use of mapFB, and t can interact with /both/ remaining
+mapFB calls in the above expression. This is stupid because actually
+that 'c1' binding is dead. The outer map introduces another c2. If
+there is a deep stack of maps we get lots of dead bindings, and lots
+of redundant work as we repeatedly simplify the result of firing rules.
+
+The easy thing to do is simply to occurrence analyse the result of
+the rule firing. Note that this occ-anals not only the RHS of the
+rule, but also the function arguments, which by now are OutExprs.
+E.g.
+ RULE f (g x) = x+1
+
+Call f (g BIG) --> (\x. x+1) BIG
+
+The rule binders are lambda-bound and applied to the OutExpr arguments
+(here BIG) which lack all internal occurrence info.
+
+Is this inefficient? Not really: we are about to walk over the result
+of the rule firing to simplify it, so occurrence analysis is at most
+a constant factor.
+
+Possible improvement: occ-anal the rules when putting them in the
+database; and in the simplifier just occ-anal the OutExpr arguments.
+But that's more complicated and the rule RHS is usually tiny; so I'm
+just doing the simple thing.
+
+Historical note: previously we did occ-anal the rules in Rule.hs,
+but failed to occ-anal the OutExpr arguments, which led to the
+nasty performance problem described above.
+
+
Note [Optimising tagToEnum#]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If we have an enumeration data type:
diff --git a/compiler/specialise/Rules.hs b/compiler/specialise/Rules.hs
index ae0798ac2b..2ad4e1cd4e 100644
--- a/compiler/specialise/Rules.hs
+++ b/compiler/specialise/Rules.hs
@@ -31,7 +31,6 @@ module Rules (
import CoreSyn -- All of it
import Module ( Module, ModuleSet, elemModuleSet )
import CoreSubst
-import OccurAnal ( occurAnalyseExpr )
import CoreFVs ( exprFreeVars, exprsFreeVars, bindFreeVars
, rulesFreeVarsDSet, exprsOrphNames, exprFreeVarsList )
import CoreUtils ( exprType, eqExpr, mkTick, mkTicks,
@@ -172,7 +171,7 @@ mkRule :: Module -> Bool -> Bool -> RuleName -> Activation
mkRule this_mod is_auto is_local name act fn bndrs args rhs
= Rule { ru_name = name, ru_fn = fn, ru_act = act,
ru_bndrs = bndrs, ru_args = args,
- ru_rhs = occurAnalyseExpr rhs,
+ ru_rhs = rhs,
ru_rough = roughTopNames args,
ru_origin = this_mod,
ru_orphan = orph,
@@ -508,8 +507,7 @@ matchRule dflags rule_env _is_active fn args _rough_args
-- Built-in rules can't be switched off, it seems
= case match_fn dflags rule_env fn args of
Nothing -> Nothing
- Just expr -> Just (occurAnalyseExpr expr)
- -- We could do this when putting things into the rulebase, I guess
+ Just expr -> Just expr
matchRule _ in_scope is_active _ args rough_args
(Rule { ru_name = rule_name, ru_act = act, ru_rough = tpl_tops
@@ -522,8 +520,7 @@ matchRule _ in_scope is_active _ args rough_args
Just (bind_wrapper, tpl_vals) -> Just (bind_wrapper $
rule_fn `mkApps` tpl_vals)
where
- rule_fn = occurAnalyseExpr (mkLams tpl_vars rhs)
- -- We could do this when putting things into the rulebase, I guess
+ rule_fn = mkLams tpl_vars rhs
---------------------------------------
matchN :: InScopeEnv
diff --git a/testsuite/tests/simplCore/should_compile/T3234.stderr b/testsuite/tests/simplCore/should_compile/T3234.stderr
index ad31846bf6..e79bfbbc92 100644
--- a/testsuite/tests/simplCore/should_compile/T3234.stderr
+++ b/testsuite/tests/simplCore/should_compile/T3234.stderr
@@ -12,12 +12,15 @@
==================== Grand total simplifier statistics ====================
Total ticks: 55
-15 PreInlineUnconditionally
+18 PreInlineUnconditionally
+ 1 c
1 n
1 g
1 a
1 xs
1 ys
+ 1 c
+ 1 n
1 k
1 z
1 g
@@ -28,11 +31,7 @@ Total ticks: 55
1 lvl
1 lvl
1 lvl
-4 PostInlineUnconditionally
- 1 c
- 1 n
- 1 c
- 1 c
+1 PostInlineUnconditionally 1 c
1 UnfoldingDone 1 GHC.Base.build
5 RuleFired
1 ++
@@ -67,6 +66,6 @@ Total ticks: 55
1 c
1 n
1 a
-11 SimplifierDone 11
+10 SimplifierDone 10