summaryrefslogtreecommitdiff
path: root/compiler/GHC/Core/Opt/Specialise.hs
diff options
context:
space:
mode:
authorSimon Peyton Jones <simonpj@microsoft.com>2020-08-24 14:36:57 +0100
committerSimon Peyton Jones <simonpj@microsoft.com>2020-09-21 16:15:14 +0100
commitff5a843e2003abed15f99d10eb1195cf9d572e06 (patch)
tree74e015a51530a05adb25daab808f97dd5b050a54 /compiler/GHC/Core/Opt/Specialise.hs
parent9df77fed8918bb335874a584a829ee32325cefb5 (diff)
downloadhaskell-wip/T18223.tar.gz
Better eta-expansion (again) and don't specilise DFunswip/T18223
This patch fixes #18223, which made GHC generate an exponential amount of code. There are three quite separate changes in here 1. Re-engineer eta-expansion (again). The eta-expander was generating lots of intermediate stuff, which could be optimised away, but which choked the simplifier meanwhile. Relatively easy to kill it off at source. See Note [The EtaInfo mechanism] in GHC.Core.Opt.Arity. The main new thing is the use of pushCoArg in getArg_maybe. 2. Stop Specialise specalising DFuns. This is the cause of a huge (and utterly unnecessary) blowup in program size in #18223. See Note [Do not specialise DFuns] in GHC.Core.Opt.Specialise. I also refactored the Specialise monad a bit... it was silly, because it passed on unchanging values as if they were mutable state. 3. Do an extra Simplifer run, after SpecConstra and before late-Specialise. I found (investigating perf/compiler/T16473) that failing to do this was crippling *both* SpecConstr *and* Specialise. See Note [Simplify after SpecConstr] in GHC.Core.Opt.Pipeline. This change does mean an extra run of the Simplifier, but only with -O2, and I think that's acceptable. T16473 allocates *three* times less with this change. (I changed it to check runtime rather than compile time.) Some smaller consequences * I moved pushCoercion, pushCoArg and friends from SimpleOpt to Arity, because it was needed by the new etaInfoApp. And pushCoValArg now returns a MCoercion rather than Coercion for the argument Coercion. * A minor, incidental improvement to Core pretty-printing This does fix #18223, (which was otherwise uncompilable. Hooray. But there is still a big intermediate because there are some very deeply nested types in that program. Modest reductions in compile-time allocation on a couple of benchmarks T12425 -2.0% T13253 -10.3% Metric increase with -O2, due to extra simplifier run T9233 +5.8% T12227 +1.8% T15630 +5.0% There is a spurious apparent increase on heap residency on T9630, on some architectures at least. I tried it with -G1 and the residency is essentially unchanged. Metric Increase T9233 T12227 T9630 Metric Decrease T12425 T13253
Diffstat (limited to 'compiler/GHC/Core/Opt/Specialise.hs')
-rw-r--r--compiler/GHC/Core/Opt/Specialise.hs279
1 files changed, 141 insertions, 138 deletions
diff --git a/compiler/GHC/Core/Opt/Specialise.hs b/compiler/GHC/Core/Opt/Specialise.hs
index 8e9e35d208..ba5679778b 100644
--- a/compiler/GHC/Core/Opt/Specialise.hs
+++ b/compiler/GHC/Core/Opt/Specialise.hs
@@ -21,22 +21,21 @@ import GHC.Tc.Utils.TcType hiding( substTy )
import GHC.Core.Type hiding( substTy, extendTvSubstList )
import GHC.Core.Multiplicity
import GHC.Core.Predicate
-import GHC.Unit.Module( Module, HasModule(..) )
+import GHC.Unit.Module( Module )
import GHC.Core.Coercion( Coercion )
import GHC.Core.Opt.Monad
import qualified GHC.Core.Subst as Core
-import GHC.Core.Unfold
import GHC.Core.Unfold.Make
import GHC.Types.Var ( isLocalVar )
import GHC.Types.Var.Set
import GHC.Types.Var.Env
import GHC.Core
import GHC.Core.Rules
-import GHC.Core.SimpleOpt ( collectBindersPushingCo )
import GHC.Core.Utils ( exprIsTrivial, getIdFromTrivialExpr_maybe
, mkCast, exprType )
import GHC.Core.FVs
-import GHC.Core.Opt.Arity ( etaExpandToJoinPointRule )
+import GHC.Core.Opt.Arity ( collectBindersPushingCo
+ , etaExpandToJoinPointRule )
import GHC.Types.Unique.Supply
import GHC.Types.Name
import GHC.Types.Id.Make ( voidArgId, voidPrimId )
@@ -53,12 +52,9 @@ import GHC.Utils.Misc
import GHC.Utils.Outputable
import GHC.Utils.Panic
import GHC.Data.FastString
-import GHC.Utils.Monad.State
import GHC.Types.Unique.DFM
import GHC.Core.TyCo.Rep (TyCoBinder (..))
-import Control.Monad
-
{-
************************************************************************
* *
@@ -592,28 +588,29 @@ specProgram guts@(ModGuts { mg_module = this_mod
, mg_binds = binds })
= do { dflags <- getDynFlags
+ -- We need to start with a Subst that knows all the things
+ -- that are in scope, so that the substitution engine doesn't
+ -- accidentally re-use a unique that's already in use
+ -- Easiest thing is to do it all at once, as if all the top-level
+ -- decls were mutually recursive
+ ; let top_env = SE { se_subst = Core.mkEmptySubst $ mkInScopeSet $ mkVarSet $
+ bindersOfBinds binds
+ , se_interesting = emptyVarSet
+ , se_module = this_mod
+ , se_dflags = dflags }
+
+ go [] = return ([], emptyUDs)
+ go (bind:binds) = do (binds', uds) <- go binds
+ (bind', uds') <- specBind top_env bind uds
+ return (bind' ++ binds', uds')
+
-- Specialise the bindings of this module
- ; (binds', uds) <- runSpecM dflags this_mod (go binds)
+ ; (binds', uds) <- runSpecM (go binds)
- ; (spec_rules, spec_binds) <- specImports dflags this_mod top_env
- local_rules uds
+ ; (spec_rules, spec_binds) <- specImports top_env local_rules uds
; return (guts { mg_binds = spec_binds ++ binds'
, mg_rules = spec_rules ++ local_rules }) }
- where
- -- We need to start with a Subst that knows all the things
- -- that are in scope, so that the substitution engine doesn't
- -- accidentally re-use a unique that's already in use
- -- Easiest thing is to do it all at once, as if all the top-level
- -- decls were mutually recursive
- top_env = SE { se_subst = Core.mkEmptySubst $ mkInScopeSet $ mkVarSet $
- bindersOfBinds binds
- , se_interesting = emptyVarSet }
-
- go [] = return ([], emptyUDs)
- go (bind:binds) = do (binds', uds) <- go binds
- (bind', uds') <- specBind top_env bind uds
- return (bind' ++ binds', uds')
{-
Note [Wrap bindings returned by specImports]
@@ -643,13 +640,13 @@ See #10491
* *
********************************************************************* -}
-specImports :: DynFlags -> Module -> SpecEnv
+specImports :: SpecEnv
-> [CoreRule]
-> UsageDetails
-> CoreM ([CoreRule], [CoreBind])
-specImports dflags this_mod top_env local_rules
+specImports top_env local_rules
(MkUD { ud_binds = dict_binds, ud_calls = calls })
- | not $ gopt Opt_CrossModuleSpecialise dflags
+ | not $ gopt Opt_CrossModuleSpecialise (se_dflags top_env)
-- See Note [Disabling cross-module specialisation]
= return ([], wrapDictBinds dict_binds [])
@@ -657,8 +654,7 @@ specImports dflags this_mod top_env local_rules
= do { hpt_rules <- getRuleBase
; let rule_base = extendRuleBaseList hpt_rules local_rules
- ; (spec_rules, spec_binds) <- spec_imports dflags this_mod top_env
- [] rule_base
+ ; (spec_rules, spec_binds) <- spec_imports top_env [] rule_base
dict_binds calls
-- Don't forget to wrap the specialized bindings with
@@ -674,9 +670,7 @@ specImports dflags this_mod top_env local_rules
}
-- | Specialise a set of calls to imported bindings
-spec_imports :: DynFlags
- -> Module
- -> SpecEnv -- Passed in so that all top-level Ids are in scope
+spec_imports :: SpecEnv -- Passed in so that all top-level Ids are in scope
-> [Id] -- Stack of imported functions being specialised
-- See Note [specImport call stack]
-> RuleBase -- Rules from this module and the home package
@@ -686,8 +680,7 @@ spec_imports :: DynFlags
-> CallDetails -- Calls for imported things
-> CoreM ( [CoreRule] -- New rules
, [CoreBind] ) -- Specialised bindings
-spec_imports dflags this_mod top_env
- callers rule_base dict_binds calls
+spec_imports top_env callers rule_base dict_binds calls
= do { let import_calls = dVarEnvElts calls
-- ; debugTraceMsg (text "specImports {" <+>
-- vcat [ text "calls:" <+> ppr import_calls
@@ -701,16 +694,13 @@ spec_imports dflags this_mod top_env
go _ [] = return ([], [])
go rb (cis : other_calls)
= do { -- debugTraceMsg (text "specImport {" <+> ppr cis)
- ; (rules1, spec_binds1) <- spec_import dflags this_mod top_env
- callers rb dict_binds cis
+ ; (rules1, spec_binds1) <- spec_import top_env callers rb dict_binds cis
-- ; debugTraceMsg (text "specImport }" <+> ppr cis)
; (rules2, spec_binds2) <- go (extendRuleBaseList rb rules1) other_calls
; return (rules1 ++ rules2, spec_binds1 ++ spec_binds2) }
-spec_import :: DynFlags
- -> Module
- -> SpecEnv -- Passed in so that all top-level Ids are in scope
+spec_import :: SpecEnv -- Passed in so that all top-level Ids are in scope
-> [Id] -- Stack of imported functions being specialised
-- See Note [specImport call stack]
-> RuleBase -- Rules from this module
@@ -719,8 +709,7 @@ spec_import :: DynFlags
-> CallInfoSet -- Imported function and calls for it
-> CoreM ( [CoreRule] -- New rules
, [CoreBind] ) -- Specialised bindings
-spec_import dflags this_mod top_env callers
- rb dict_binds cis@(CIS fn _)
+spec_import top_env callers rb dict_binds cis@(CIS fn _)
| isIn "specImport" fn callers
= return ([], []) -- No warning. This actually happens all the time
-- when specialising a recursive function, because
@@ -731,8 +720,7 @@ spec_import dflags this_mod top_env callers
= do { -- debugTraceMsg (text "specImport:no valid calls")
; return ([], []) }
- | wantSpecImport dflags unfolding
- , Just rhs <- maybeUnfoldingTemplate unfolding
+ | Just rhs <- canSpecImport dflags fn
= do { -- Get rules from the external package state
-- We keep doing this in case we "page-fault in"
-- more rules as we go along
@@ -744,8 +732,8 @@ spec_import dflags this_mod top_env callers
; (rules1, spec_pairs, MkUD { ud_binds = dict_binds1, ud_calls = new_calls })
<- do { -- debugTraceMsg (text "specImport1" <+> vcat [ppr fn, ppr good_calls, ppr rhs])
- ; runSpecM dflags this_mod $
- specCalls (Just this_mod) top_env rules_for_fn good_calls fn rhs }
+ ; runSpecM $
+ specCalls True top_env rules_for_fn good_calls fn rhs }
; let spec_binds1 = [NonRec b r | (b,r) <- spec_pairs]
-- After the rules kick in we may get recursion, but
-- we rely on a global GlomBinds to sort that out later
@@ -753,7 +741,7 @@ spec_import dflags this_mod top_env callers
-- Now specialise any cascaded calls
-- ; debugTraceMsg (text "specImport 2" <+> (ppr fn $$ ppr rules1 $$ ppr spec_binds1))
- ; (rules2, spec_binds2) <- spec_imports dflags this_mod top_env
+ ; (rules2, spec_binds2) <- spec_imports top_env
(fn:callers)
(extendRuleBaseList rb rules1)
(dict_binds `unionBags` dict_binds1)
@@ -769,11 +757,34 @@ spec_import dflags this_mod top_env callers
; return ([], [])}
where
- unfolding = realIdUnfolding fn -- We want to see the unfolding even for loop breakers
+ dflags = se_dflags top_env
good_calls = filterCalls cis dict_binds
-- SUPER IMPORTANT! Drop calls that (directly or indirectly) refer to fn
-- See Note [Avoiding loops in specImports]
+canSpecImport :: DynFlags -> Id -> Maybe CoreExpr
+-- See Note [Specialise imported INLINABLE things]
+canSpecImport dflags fn
+ | CoreUnfolding { uf_src = src, uf_tmpl = rhs } <- unf
+ , isStableSource src
+ = Just rhs -- By default, specialise only imported things that have a stable
+ -- unfolding; that is, have an INLINE or INLINABLE pragma
+ -- Specialise even INLINE things; it hasn't inlined yet,
+ -- so perhaps it never will. Moreover it may have calls
+ -- inside it that we want to specialise
+
+ -- CoreUnfolding case does /not/ include DFunUnfoldings;
+ -- We only specialise DFunUnfoldings with -fspecialise-aggressively
+ -- See Note [Do not specialise imported DFuns]
+
+ | gopt Opt_SpecialiseAggressively dflags
+ = maybeUnfoldingTemplate unf -- With -fspecialise-aggressively, specialise anything
+ -- with an unfolding, stable or not, DFun or not
+
+ | otherwise = Nothing
+ where
+ unf = realIdUnfolding fn -- We want to see the unfolding even for loop breakers
+
-- | Returns whether or not to show a missed-spec warning.
-- If -Wall-missed-specializations is on, show the warning.
-- Otherwise, if -Wmissed-specializations is on, only show a warning
@@ -798,24 +809,47 @@ tryWarnMissingSpecs dflags callers fn calls_for_fn
, whenPprDebug (text "calls:" <+> vcat (map (pprCallInfo fn) calls_for_fn))
, text "Probable fix: add INLINABLE pragma on" <+> quotes (ppr fn) ])
-wantSpecImport :: DynFlags -> Unfolding -> Bool
--- See Note [Specialise imported INLINABLE things]
-wantSpecImport dflags unf
- = case unf of
- NoUnfolding -> False
- BootUnfolding -> False
- OtherCon {} -> False
- DFunUnfolding {} -> True
- CoreUnfolding { uf_src = src, uf_guidance = _guidance }
- | gopt Opt_SpecialiseAggressively dflags -> True
- | isStableSource src -> True
- -- Specialise even INLINE things; it hasn't inlined yet,
- -- so perhaps it never will. Moreover it may have calls
- -- inside it that we want to specialise
- | otherwise -> False -- Stable, not INLINE, hence INLINABLE
-{- Note [Avoiding loops in specImports]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+{- Note [Do not specialise imported DFuns]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Ticket #18223 shows that specialising calls of DFuns is can cause a huge
+and entirely unnecessary blowup in program size. Consider a call to
+ f @[[[[[[[[T]]]]]]]] d1 x
+where df :: C a => C [a]
+ d1 :: C [[[[[[[[T]]]]]]]] = dfC[] @[[[[[[[T]]]]]]] d1
+ d2 :: C [[[[[[[T]]]]]]] = dfC[] @[[[[[[T]]]]]] d3
+ ...
+Now we'll specialise f's RHS, which may give rise to calls to 'g',
+also overloaded, which we will specialise, and so on. However, if
+we specialise the calls to dfC[], we'll generate specialised copies of
+all methods of C, at all types; and the same for C's superclasses.
+
+And many of these specialised functions will never be called. We are
+going to call the specialised 'f', and the specialised 'g', but DFuns
+group functions into a tuple, many of whose elements may never be used.
+
+With deeply-nested types this can lead to a simply overwhelming number
+of specialisations: see #18223 for a simple example (from the wild).
+I measured the number of specialisations for various numbers of calls
+of `flip evalStateT ()`, and got this
+
+ Size after one simplification
+ #calls #SPEC rules Terms Types
+ 5 56 3100 10600
+ 9 108 13660 77206
+
+The real tests case has 60+ calls, which blew GHC out of the water.
+
+Solution: don't specialise DFuns. The downside is that if we end
+up with (h (dfun d)), /and/ we don't specialise 'h', then we won't
+pass to 'h' a tuple of specialised functions.
+
+However, the flag -fspecialise-aggressively (experimental, off by default)
+allows DFuns to specialise as well.
+
+Note [Avoiding loops in specImports]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We must take great care when specialising instance declarations
(functions like $fOrdList) lest we accidentally build a recursive
dictionary. See Note [Avoiding loops].
@@ -1003,6 +1037,9 @@ data SpecEnv
-- Dict Ids that we know something about
-- and hence may be worth specialising against
-- See Note [Interesting dictionary arguments]
+
+ , se_module :: Module
+ , se_dflags :: DynFlags
}
instance Outputable SpecEnv where
@@ -1310,7 +1347,7 @@ specDefn :: SpecEnv
specDefn env body_uds fn rhs
= do { let (body_uds_without_me, calls_for_me) = callsForMe fn body_uds
rules_for_me = idCoreRules fn
- ; (rules, spec_defns, spec_uds) <- specCalls Nothing env rules_for_me
+ ; (rules, spec_defns, spec_uds) <- specCalls False env rules_for_me
calls_for_me fn rhs
; return ( fn `addIdSpecialisations` rules
, spec_defns
@@ -1323,8 +1360,8 @@ specDefn env body_uds fn rhs
-- body_uds_without_me
---------------------------
-specCalls :: Maybe Module -- Just this_mod => specialising imported fn
- -- Nothing => specialising local fn
+specCalls :: Bool -- True => specialising imported fn
+ -- False => specialising local fn
-> SpecEnv
-> [CoreRule] -- Existing RULES for the fn
-> [CallInfo]
@@ -1339,7 +1376,7 @@ type SpecInfo = ( [CoreRule] -- Specialisation rules
, [(Id,CoreExpr)] -- Specialised definition
, UsageDetails ) -- Usage details from specialised RHSs
-specCalls mb_mod env existing_rules calls_for_me fn rhs
+specCalls spec_imp env existing_rules calls_for_me fn rhs
-- The first case is the interesting one
| notNull calls_for_me -- And there are some calls to specialise
&& not (isNeverActive (idInlineActivation fn))
@@ -1370,7 +1407,9 @@ specCalls mb_mod env existing_rules calls_for_me fn rhs
inl_act = inlinePragmaActivation inl_prag
is_local = isLocalId fn
is_dfun = isDFunId fn
-
+ dflags = se_dflags env
+ ropts = initRuleOpts dflags
+ this_mod = se_module env
-- Figure out whether the function has an INLINE pragma
-- See Note [Inline specialisations]
@@ -1412,8 +1451,6 @@ specCalls mb_mod env existing_rules calls_for_me fn rhs
-- , ppr dx_binds ]) $
-- return ()
- ; dflags <- getDynFlags
- ; let ropts = initRuleOpts dflags
; if not useful -- No useful specialisation
|| already_covered ropts rules_acc rule_lhs_args
then return spec_acc
@@ -1441,17 +1478,15 @@ specCalls mb_mod env existing_rules calls_for_me fn rhs
= Nothing
; spec_fn <- newSpecIdSM fn spec_fn_ty spec_join_arity
- ; this_mod <- getModule
; let
-- The rule to put in the function's specialisation is:
-- forall x @b d1' d2'.
-- f x @T1 @b @T2 d1' d2' = f1 x @b
-- See Note [Specialising Calls]
- herald = case mb_mod of
- Nothing -- Specialising local fn
- -> text "SPEC"
- Just this_mod -- Specialising imported fn
- -> text "SPEC/" <> ppr this_mod
+ herald | spec_imp = -- Specialising imported fn
+ text "SPEC/" <> ppr this_mod
+ | otherwise = -- Specialising local fn
+ text "SPEC"
rule_name = mkFastString $ showSDoc dflags $
herald <+> ftext (occNameFS (getOccName fn))
@@ -2480,15 +2515,15 @@ mkCallUDs env f args
res = mkCallUDs' env f args
mkCallUDs' env f args
- | not (want_calls_for f) -- Imported from elsewhere
- || null ci_key -- No useful specialisation
- -- See also Note [Specialisations already covered]
+ | wantCallsFor env f -- We want it, and...
+ , not (null ci_key) -- this call site has a useful specialisation
+ = -- pprTrace "mkCallUDs: keeping" _trace_doc
+ singleCall f ci_key
+
+ | otherwise -- See also Note [Specialisations already covered]
= -- pprTrace "mkCallUDs: discarding" _trace_doc
emptyUDs
- | otherwise
- = -- pprTrace "mkCallUDs: keeping" _trace_doc
- singleCall f ci_key
where
_trace_doc = vcat [ppr f, ppr args, ppr ci_key]
pis = fst $ splitPiTys $ idType f
@@ -2525,12 +2560,23 @@ mkCallUDs' env f args
mk_spec_arg _ (Anon VisArg _)
= UnspecArg
- want_calls_for f = isLocalId f || isJust (maybeUnfoldingTemplate (realIdUnfolding f))
- -- For imported things, we gather call instances if
- -- there is an unfolding that we could in principle specialise
- -- We might still decide not to use it (consulting dflags)
- -- in specImports
- -- Use 'realIdUnfolding' to ignore the loop-breaker flag!
+wantCallsFor :: SpecEnv -> Id -> Bool
+wantCallsFor _env _f = True
+ -- We could reduce the size of the UsageDetails by being less eager
+ -- about collecting calls for LocalIds: there is no point for
+ -- ones that are lambda-bound. We can't decide this by looking at
+ -- the (absence of an) unfolding, because unfoldings for local
+ -- functions are discarded by cloneBindSM, so no local binder will
+ -- have an unfolding at this stage. We'd have to keep a candidate
+ -- set of let-binders.
+ --
+ -- Not many lambda-bound variables have dictionary arguments, so
+ -- this would make little difference anyway.
+ --
+ -- For imported Ids we could check for an unfolding, but we have to
+ -- do so anyway in canSpecImport, and it seems better to have it
+ -- all in one place. So we simply collect usage info for imported
+ -- overloaded functions.
{- Note [Type determines value]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -2780,55 +2826,12 @@ deleteCallsFor bs calls = delDVarEnvList calls bs
************************************************************************
-}
-newtype SpecM a = SpecM (State SpecState a) deriving (Functor)
-
-data SpecState = SpecState {
- spec_uniq_supply :: UniqSupply,
- spec_module :: Module,
- spec_dflags :: DynFlags
- }
-
-instance Applicative SpecM where
- pure x = SpecM $ return x
- (<*>) = ap
-
-instance Monad SpecM where
- SpecM x >>= f = SpecM $ do y <- x
- case f y of
- SpecM z ->
- z
-
-instance MonadFail SpecM where
- fail str = SpecM $ error str
-
-instance MonadUnique SpecM where
- getUniqueSupplyM
- = SpecM $ do st <- get
- let (us1, us2) = splitUniqSupply $ spec_uniq_supply st
- put $ st { spec_uniq_supply = us2 }
- return us1
-
- getUniqueM
- = SpecM $ do st <- get
- let (u,us') = takeUniqFromSupply $ spec_uniq_supply st
- put $ st { spec_uniq_supply = us' }
- return u
-
-instance HasDynFlags SpecM where
- getDynFlags = SpecM $ liftM spec_dflags get
-
-instance HasModule SpecM where
- getModule = SpecM $ liftM spec_module get
-
-runSpecM :: DynFlags -> Module -> SpecM a -> CoreM a
-runSpecM dflags this_mod (SpecM spec)
- = do us <- getUniqueSupplyM
- let initialState = SpecState {
- spec_uniq_supply = us,
- spec_module = this_mod,
- spec_dflags = dflags
- }
- return $ evalState spec initialState
+type SpecM a = UniqSM a
+
+runSpecM :: SpecM a -> CoreM a
+runSpecM thing_inside
+ = do { us <- getUniqueSupplyM
+ ; return (initUs_ us thing_inside) }
mapAndCombineSM :: (a -> SpecM (b, UsageDetails)) -> [a] -> SpecM ([b], UsageDetails)
mapAndCombineSM _ [] = return ([], emptyUDs)