summaryrefslogtreecommitdiff
path: root/compiler/GHC/Core/Opt/Driver.hs
diff options
context:
space:
mode:
authorSimon Peyton Jones <simonpj@microsoft.com>2020-05-21 12:53:35 +0100
committerSebastian Graf <sebastian.graf@kit.edu>2020-06-09 12:04:31 +0200
commitc221207aa69092e9dff21310e797d0d35c47afde (patch)
treec1e9f290c7990a8166975d5e9ec6d652be49cc95 /compiler/GHC/Core/Opt/Driver.hs
parentbacad81aeafe410572b26da8a2de2cc412c15a06 (diff)
downloadhaskell-wip/T18078.tar.gz
Implement cast worker/wrapper properlywip/T18078
The cast worker/wrapper transformation transforms x = e |> co into y = e x = y |> co This is done by the simplifier, but we were being careless about transferring IdInfo from x to y, and about what to do if x is a NOINLNE function. This resulted in a series of bugs: #17673, #18093, #18078. This patch fixes all that: * Main change is in GHC.Core.Opt.Simplify, and the new prepareBinding function, which does this cast worker/wrapper transform. See Note [Cast worker/wrappers]. * There is quite a bit of refactoring around prepareRhs, makeTrivial etc. It's nicer now. * Some wrappers from strictness and cast w/w, notably those for a function with a NOINLINE, should inline very late. There wasn't really a mechanism for that, which was an existing bug really; so I invented a new finalPhase = Phase (-1). It's used for all simplifier runs after the user-visible phase 2,1,0 have run. (No new runs of the simplifier are introduced thereby.) See new Note [Compiler phases] in GHC.Types.Basic; the main changes are in GHC.Core.Opt.Driver * Doing this made me trip over two places where the AnonArgFlag on a FunTy was being lost so we could end up with (Num a -> ty) rather than (Num a => ty) - In coercionLKind/coercionRKind - In contHoleType in the Simplifier I fixed the former by defining mkFunctionType and using it in coercionLKind/RKind. I could have done the same for the latter, but the information is almost to hand. So I fixed the latter by - adding sc_hole_ty to ApplyToVal (like ApplyToTy), - adding as_hole_ty to ValArg (like TyArg) - adding sc_fun_ty to StrictArg Turned out I could then remove ai_type from ArgInfo. This is just moving the deck chairs around, but it worked out nicely. See the new Note [AnonArgFlag] in GHC.Types.Var * When looking at the 'arity decrease' thing (#18093) I discovered that stable unfoldings had a much lower arity than the actual optimised function. That's what led to the arity-decrease message. Simple solution: eta-expand. It's described in Note [Eta-expand stable unfoldings] in GHC.Core.Opt.Simplify * I also discovered that unsafeCoerce wasn't being inlined if the context was boring. So (\x. f (unsafeCoerce x)) would create a thunk -- yikes! I fixed that by making inlineBoringOK a bit cleverer: see Note [Inline unsafeCoerce] in GHC.Core.Unfold. I also found that unsafeCoerceName was unused, so I removed it. I made a test case for #18078, and a very similar one for #17673. The net effect of all this on nofib is very modest, but positive: -------------------------------------------------------------------------------- Program Size Allocs Runtime Elapsed TotalMem -------------------------------------------------------------------------------- anna -0.4% -0.1% -3.1% -3.1% 0.0% fannkuch-redux -0.4% -0.3% -0.1% -0.1% 0.0% maillist -0.4% -0.1% -7.8% -1.0% -14.3% primetest -0.4% -15.6% -7.1% -6.6% 0.0% -------------------------------------------------------------------------------- Min -0.9% -15.6% -13.3% -14.2% -14.3% Max -0.3% 0.0% +12.1% +12.4% 0.0% Geometric Mean -0.4% -0.2% -2.3% -2.2% -0.1% All following metric decreases are compile-time allocation decreases between -1% and -3%: Metric Decrease: T5631 T13701 T14697 T15164
Diffstat (limited to 'compiler/GHC/Core/Opt/Driver.hs')
-rw-r--r--compiler/GHC/Core/Opt/Driver.hs53
1 files changed, 28 insertions, 25 deletions
diff --git a/compiler/GHC/Core/Opt/Driver.hs b/compiler/GHC/Core/Opt/Driver.hs
index 082eb9d326..07714aafaa 100644
--- a/compiler/GHC/Core/Opt/Driver.hs
+++ b/compiler/GHC/Core/Opt/Driver.hs
@@ -37,7 +37,7 @@ import GHC.Core.Opt.FloatOut ( floatOutwards )
import GHC.Core.FamInstEnv
import GHC.Types.Id
import GHC.Utils.Error ( withTiming, withTimingD, DumpFormat (..) )
-import GHC.Types.Basic ( CompilerPhase(..), isDefaultInlinePragma, defaultInlinePragma )
+import GHC.Types.Basic
import GHC.Types.Var.Set
import GHC.Types.Var.Env
import GHC.Core.Opt.LiberateCase ( liberateCase )
@@ -141,8 +141,10 @@ getCoreToDo dflags
maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
- maybe_strictness_before phase
- = runWhen (phase `elem` strictnessBefore dflags) CoreDoDemand
+ maybe_strictness_before (Phase phase)
+ | phase `elem` strictnessBefore dflags = CoreDoDemand
+ maybe_strictness_before _
+ = CoreDoNothing
base_mode = SimplMode { sm_phase = panic "base_mode"
, sm_names = []
@@ -152,20 +154,20 @@ getCoreToDo dflags
, sm_inline = True
, sm_case_case = True }
- simpl_phase phase names iter
+ simpl_phase phase name iter
= CoreDoPasses
$ [ maybe_strictness_before phase
, CoreDoSimplify iter
- (base_mode { sm_phase = Phase phase
- , sm_names = names })
+ (base_mode { sm_phase = phase
+ , sm_names = [name] })
- , maybe_rule_check (Phase phase) ]
+ , maybe_rule_check phase ]
- simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
- | phase <- [phases, phases-1 .. 1] ]
+ -- Run GHC's internal simplification phase, after all rules have run.
+ -- See Note [Compiler phases] in GHC.Types.Basic
+ simplify name = simpl_phase FinalPhase name max_iter
-
- -- initial simplify: mk specialiser happy: minimum effort please
+ -- initial simplify: mk specialiser happy: minimum effort please
simpl_gently = CoreDoSimplify max_iter
(base_mode { sm_phase = InitialPhase
, sm_names = ["Gentle"]
@@ -182,7 +184,7 @@ getCoreToDo dflags
demand_analyser = (CoreDoPasses (
dmd_cpr_ww ++
- [simpl_phase 0 ["post-worker-wrapper"] max_iter]
+ [simplify "post-worker-wrapper"]
))
-- Static forms are moved to the top level with the FloatOut pass.
@@ -203,7 +205,7 @@ getCoreToDo dflags
if opt_level == 0 then
[ static_ptrs_float_outwards,
CoreDoSimplify max_iter
- (base_mode { sm_phase = Phase 0
+ (base_mode { sm_phase = FinalPhase
, sm_names = ["Non-opt simplification"] })
]
@@ -251,8 +253,10 @@ getCoreToDo dflags
-- GHC.Iface.Tidy.StaticPtrTable.
static_ptrs_float_outwards,
- simpl_phases,
-
+ -- Run the simplier phases 2,1,0 to allow rewrite rules to fire
+ CoreDoPasses [ simpl_phase (Phase phase) "main" max_iter
+ | phase <- [phases, phases-1 .. 1] ],
+ simpl_phase (Phase 0) "main" (max max_iter 3),
-- Phase 0: allow all Ids to be inlined now
-- This gets foldr inlined before strictness analysis
@@ -263,7 +267,6 @@ getCoreToDo dflags
-- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
-- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
-- Don't stop now!
- simpl_phase 0 ["main"] (max max_iter 3),
runWhen do_float_in CoreDoFloatInwards,
-- Run float-inwards immediately before the strictness analyser
@@ -274,9 +277,10 @@ getCoreToDo dflags
runWhen call_arity $ CoreDoPasses
[ CoreDoCallArity
- , simpl_phase 0 ["post-call-arity"] max_iter
+ , simplify "post-call-arity"
],
+ -- Strictness analysis
runWhen strictness demand_analyser,
runWhen exitification CoreDoExitify,
@@ -302,24 +306,24 @@ getCoreToDo dflags
runWhen do_float_in CoreDoFloatInwards,
- maybe_rule_check (Phase 0),
+ maybe_rule_check FinalPhase,
-- Case-liberation for -O2. This should be after
-- strictness analysis and the simplification which follows it.
runWhen liberate_case (CoreDoPasses [
CoreLiberateCase,
- simpl_phase 0 ["post-liberate-case"] max_iter
+ simplify "post-liberate-case"
]), -- Run the simplifier after LiberateCase to vastly
-- reduce the possibility of shadowing
-- Reason: see Note [Shadowing] in GHC.Core.Opt.SpecConstr
runWhen spec_constr CoreDoSpecConstr,
- maybe_rule_check (Phase 0),
+ maybe_rule_check FinalPhase,
runWhen late_specialise
(CoreDoPasses [ CoreDoSpecialising
- , simpl_phase 0 ["post-late-spec"] max_iter]),
+ , simplify "post-late-spec"]),
-- LiberateCase can yield new CSE opportunities because it peels
-- off one layer of a recursive function (concretely, I saw this
@@ -328,11 +332,10 @@ getCoreToDo dflags
runWhen ((liberate_case || spec_constr) && cse) CoreCSE,
-- Final clean-up simplification:
- simpl_phase 0 ["final"] max_iter,
+ simplify "final",
runWhen late_dmd_anal $ CoreDoPasses (
- dmd_cpr_ww ++
- [simpl_phase 0 ["post-late-ww"] max_iter]
+ dmd_cpr_ww ++ [simplify "post-late-ww"]
),
-- Final run of the demand_analyser, ensures that one-shot thunks are
@@ -342,7 +345,7 @@ getCoreToDo dflags
-- can become /exponentially/ more expensive. See #11731, #12996.
runWhen (strictness || late_dmd_anal) CoreDoDemand,
- maybe_rule_check (Phase 0)
+ maybe_rule_check FinalPhase
]
-- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity.