summaryrefslogtreecommitdiff
path: root/compiler/simplCore
diff options
context:
space:
mode:
authorSimon Peyton Jones <simonpj@microsoft.com>2017-02-28 16:07:20 -0500
committerDavid Feuer <David.Feuer@gmail.com>2017-02-28 16:07:21 -0500
commit2effe18ab51d66474724d38b20e49cc1b8738f60 (patch)
tree8e69360fe2b24408778944f2450f3903e8236168 /compiler/simplCore
parent55efc9718b520ef354e32c15c4b49cdfecce412f (diff)
downloadhaskell-2effe18ab51d66474724d38b20e49cc1b8738f60.tar.gz
The Early Inline Patch
This very small patch switches on sm_inline even in the InitialPhase (aka "gentle" phase). There is no reason not to... and the results are astonishing. I think the peformance of GHC itself improves by about 5%; and some programs get much smaller, quicker. Result: across the board irmprovements in compile time performance. Here are the changes in perf/compiler; the numbers are decreases in compiler bytes-allocated: 3% T5837 7% parsing001 9% T12234 35% T9020 9% T3064 13% T9961 20% T13056 5% T9872d 5% T9872c 5% T9872b 7% T9872a 5% T783 35% T12227 20% T1969 Plus in perf/should_run 5% lazy-bs-alloc It wasn't as easy as it sounds: I did a raft of preparatory work in earlier patches. But it's great! Reviewers: austin, bgamari Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D3203
Diffstat (limited to 'compiler/simplCore')
-rw-r--r--compiler/simplCore/SimplCore.hs38
-rw-r--r--compiler/simplCore/SimplUtils.hs60
2 files changed, 57 insertions, 41 deletions
diff --git a/compiler/simplCore/SimplCore.hs b/compiler/simplCore/SimplCore.hs
index 7c89dc9099..34f49ad074 100644
--- a/compiler/simplCore/SimplCore.hs
+++ b/compiler/simplCore/SimplCore.hs
@@ -132,6 +132,7 @@ getCoreToDo dflags
rules_on = gopt Opt_EnableRewriteRules dflags
eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
ww_on = gopt Opt_WorkerWrapper dflags
+ vectorise_on = gopt Opt_Vectorise dflags
static_ptrs = xopt LangExt.StaticPointers dflags
maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
@@ -160,12 +161,12 @@ getCoreToDo dflags
-- We need to eliminate these common sub expressions before their definitions
-- are inlined in phase 2. The CSE introduces lots of v1 = v2 bindings,
-- so we also run simpl_gently to inline them.
- ++ (if gopt Opt_Vectorise dflags && phase == 3
+ ++ (if vectorise_on && phase == 3
then [CoreCSE, simpl_gently]
else [])
vectorisation
- = runWhen (gopt Opt_Vectorise dflags) $
+ = runWhen vectorise_on $
CoreDoPasses [ simpl_gently, CoreDoVectorisation ]
-- By default, we have 2 phases before phase 0.
@@ -188,7 +189,8 @@ getCoreToDo dflags
(base_mode { sm_phase = InitialPhase
, sm_names = ["Gentle"]
, sm_rules = rules_on -- Note [RULEs enabled in SimplGently]
- , sm_inline = False
+ , sm_inline = not vectorise_on
+ -- See Note [Inline in InitialPhase]
, sm_case_case = False })
-- Don't do case-of-case transformations.
-- This makes full laziness work better
@@ -381,7 +383,35 @@ addPluginPasses builtin_passes
query_plug todos (_, plug, options) = installCoreToDos plug options todos
#endif
-{-
+{- Note [Inline in InitialPhase]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In GHC 8 and earlier we did not inline anything in the InitialPhase. But that is
+confusing for users because when they say INLINE they expect the function to inline
+right away.
+
+So now we do inlining immediately, even in the InitialPhase, assuming that the
+Id's Activation allows it.
+
+This is a surprisingly big deal. Compiler performance improved a lot
+when I made this change:
+
+ perf/compiler/T5837.run T5837 [stat too good] (normal)
+ perf/compiler/parsing001.run parsing001 [stat too good] (normal)
+ perf/compiler/T12234.run T12234 [stat too good] (optasm)
+ perf/compiler/T9020.run T9020 [stat too good] (optasm)
+ perf/compiler/T3064.run T3064 [stat too good] (normal)
+ perf/compiler/T9961.run T9961 [stat too good] (normal)
+ perf/compiler/T13056.run T13056 [stat too good] (optasm)
+ perf/compiler/T9872d.run T9872d [stat too good] (normal)
+ perf/compiler/T783.run T783 [stat too good] (normal)
+ perf/compiler/T12227.run T12227 [stat too good] (normal)
+ perf/should_run/lazy-bs-alloc.run lazy-bs-alloc [stat too good] (normal)
+ perf/compiler/T1969.run T1969 [stat too good] (normal)
+ perf/compiler/T9872a.run T9872a [stat too good] (normal)
+ perf/compiler/T9872c.run T9872c [stat too good] (normal)
+ perf/compiler/T9872b.run T9872b [stat too good] (normal)
+ perf/compiler/T9872d.run T9872d [stat too good] (normal)
+
Note [RULEs enabled in SimplGently]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RULES are enabled when doing "gentle" simplification. Two reasons:
diff --git a/compiler/simplCore/SimplUtils.hs b/compiler/simplCore/SimplUtils.hs
index 79a6c610e7..0fe262b2c7 100644
--- a/compiler/simplCore/SimplUtils.hs
+++ b/compiler/simplCore/SimplUtils.hs
@@ -721,7 +721,8 @@ updModeForRules current_mode
{- Note [Simplifying rules]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-When simplifying a rule, refrain from any inlining or applying of other RULES.
+When simplifying a rule LHS, refrain from /any/ inlining or applying
+of other RULES.
Doing anything to the LHS is plain confusing, because it means that what the
rule matches is not what the user wrote. c.f. Trac #10595, and #10528.
@@ -868,11 +869,17 @@ continuation.
-}
activeUnfolding :: SimplEnv -> Id -> Bool
-activeUnfolding env
- | not (sm_inline mode) = active_unfolding_minimal
- | otherwise = case sm_phase mode of
- InitialPhase -> active_unfolding_gentle
- Phase n -> active_unfolding n
+activeUnfolding env id
+ | isCompulsoryUnfolding (realIdUnfolding id)
+ = True -- Even sm_inline can't override compulsory unfoldings
+ | otherwise
+ = isActive (sm_phase mode) (idInlineActivation id)
+ && sm_inline mode
+ -- `or` isStableUnfolding (realIdUnfolding id)
+ -- Inline things when
+ -- (a) they are active
+ -- (b) sm_inline says so, except that for stable unfoldings
+ -- (ie pragmas) we inline anyway
where
mode = getMode env
@@ -891,35 +898,13 @@ getUnfoldingInRuleMatch env
id_unf id | unf_is_active id = idUnfolding id
| otherwise = NoUnfolding
unf_is_active id
- | not (sm_rules mode) = active_unfolding_minimal id
+ | not (sm_rules mode) = -- active_unfolding_minimal id
+ isStableUnfolding (realIdUnfolding id)
+ -- Do we even need to test this? I think this InScopeEnv
+ -- is only consulted if activeRule returns True, which
+ -- never happens if sm_rules is False
| otherwise = isActive (sm_phase mode) (idInlineActivation id)
-active_unfolding_minimal :: Id -> Bool
--- Compuslory unfoldings only
--- Ignore SimplGently, because we want to inline regardless;
--- the Id has no top-level binding at all
---
--- NB: we used to have a second exception, for data con wrappers.
--- On the grounds that we use gentle mode for rule LHSs, and
--- they match better when data con wrappers are inlined.
--- But that only really applies to the trivial wrappers (like (:)),
--- and they are now constructed as Compulsory unfoldings (in MkId)
--- so they'll happen anyway.
-active_unfolding_minimal id = isCompulsoryUnfolding (realIdUnfolding id)
-
-active_unfolding :: PhaseNum -> Id -> Bool
-active_unfolding n id = isActiveIn n (idInlineActivation id)
-
-active_unfolding_gentle :: Id -> Bool
--- Anything that is early-active
--- See Note [Gentle mode]
-active_unfolding_gentle id
- = isInlinePragma prag
- && isEarlyActive (inlinePragmaActivation prag)
- -- NB: wrappers are not early-active
- where
- prag = idInlinePragma id
-
----------------------
activeRule :: SimplEnv -> Activation -> Bool
-- Nothing => No rules at all
@@ -1027,10 +1012,11 @@ Example
...fInt...fInt...fInt...
Here f occurs just once, in the RHS of fInt. But if we inline it there
-we'll lose the opportunity to inline at each of fInt's call sites.
-The INLINE pragma will only inline when the application is saturated
-for exactly this reason; and we don't want PreInlineUnconditionally
-to second-guess it. A live example is Trac #3736.
+it might make fInt look big, and we'll lose the opportunity to inline f
+at each of fInt's call sites. The INLINE pragma will only inline when
+the application is saturated for exactly this reason; and we don't
+want PreInlineUnconditionally to second-guess it. A live example is
+Trac #3736.
c.f. Note [Stable unfoldings and postInlineUnconditionally]
Note [Top-level bottoming Ids]