diff options
Diffstat (limited to 'compiler/GHC/Core/Opt')
-rw-r--r-- | compiler/GHC/Core/Opt/DmdAnal.hs | 408 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Pipeline.hs | 5 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/WorkWrap.hs | 71 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/WorkWrap/Utils.hs | 334 |
4 files changed, 442 insertions, 376 deletions
diff --git a/compiler/GHC/Core/Opt/DmdAnal.hs b/compiler/GHC/Core/Opt/DmdAnal.hs index fa4bed48f0..e6b404ff61 100644 --- a/compiler/GHC/Core/Opt/DmdAnal.hs +++ b/compiler/GHC/Core/Opt/DmdAnal.hs @@ -31,6 +31,7 @@ import GHC.Types.Id import GHC.Core.Utils import GHC.Core.TyCon import GHC.Core.Type +import GHC.Core.Predicate ( isClassPred ) import GHC.Core.FVs ( rulesRhsFreeIds, bndrRuleAndUnfoldingIds ) import GHC.Core.Coercion ( Coercion ) import GHC.Core.TyCo.FVs ( coVarsOfCos ) @@ -39,7 +40,7 @@ import GHC.Core.Opt.Arity ( typeArity ) import GHC.Utils.Misc import GHC.Utils.Panic import GHC.Utils.Panic.Plain -import GHC.Data.Maybe ( isJust ) +import GHC.Data.Maybe ( isJust, orElse ) import GHC.Builtin.PrimOps import GHC.Builtin.Types.Prim ( realWorldStatePrimTy ) import GHC.Types.Unique.Set @@ -57,8 +58,9 @@ _ = pprTrace -- Tired of commenting out the import all the time -- | Options for the demand analysis data DmdAnalOpts = DmdAnalOpts - { dmd_strict_dicts :: !Bool -- ^ Use strict dictionaries - , dmd_unbox_width :: !Int -- ^ Use strict dictionaries + { dmd_strict_dicts :: !Bool -- ^ Use strict dictionaries + , dmd_unbox_width :: !Int -- ^ Use strict dictionaries + , dmd_max_worker_args :: !Int } -- This is a strict alternative to (,) @@ -278,8 +280,9 @@ dmdAnalBindLetUp top_lvl env id rhs anal_body = WithDmdType final_ty (R (NonRec where WithDmdType body_ty body' = anal_body env WithDmdType body_ty' id_dmd = findBndrDmd env body_ty id - -- See Note [Finalising boxity for demand signature] in "GHC.Core.Opt.WorkWrap.Utils" - id_dmd' = finaliseBoxity (ae_fam_envs env) NotInsideInlineableFun (idType id) id_dmd + -- See Note [Finalising boxity for demand signatures] + + id_dmd' = finaliseLetBoxity (ae_fam_envs env) (idType id) id_dmd !id' = setBindIdDemandInfo top_lvl id id_dmd' (rhs_ty, rhs') = dmdAnalStar env (dmdTransformThunkDmd rhs id_dmd') rhs @@ -867,7 +870,7 @@ dmdAnalRhsSig -- See Note [NOINLINE and strictness] dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs = -- pprTrace "dmdAnalRhsSig" (ppr id $$ ppr let_dmd $$ ppr sig $$ ppr lazy_fv) $ - (env', lazy_fv, id', rhs') + (final_env, lazy_fv, final_id, final_rhs) where rhs_arity = idArity id -- See Note [Demand signatures are computed for a threshold demand based on idArity] @@ -885,13 +888,15 @@ dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs = unboxedWhenSmall (ae_opts env) (unboxableResultWidth env id) topSubDmd -- See Note [Do not unbox class dictionaries] - WithDmdType rhs_dmd_ty rhs' = dmdAnal (adjustInlFun id env) rhs_dmd rhs - DmdType rhs_fv rhs_dmds rhs_div = rhs_dmd_ty + WithDmdType rhs_dmd_ty rhs' = dmdAnal env rhs_dmd rhs + DmdType rhs_fv rhs_dmds rhs_div = rhs_dmd_ty + (final_rhs_dmds, final_rhs) = finaliseArgBoxities env id rhs_arity rhs' + `orElse` (rhs_dmds, rhs') - sig = mkDmdSigForArity rhs_arity (DmdType sig_fv rhs_dmds rhs_div) + sig = mkDmdSigForArity rhs_arity (DmdType sig_fv final_rhs_dmds rhs_div) - id' = id `setIdDmdSig` sig - !env' = extendAnalEnv top_lvl env id' sig + final_id = id `setIdDmdSig` sig + !final_env = extendAnalEnv top_lvl env final_id sig -- See Note [Aggregated demand for cardinality] -- FIXME: That Note doesn't explain the following lines at all. The reason @@ -1156,6 +1161,369 @@ this, that actually happened in practice. {- ********************************************************************* * * + Finalising boxity +* * +********************************************************************* -} + +{- Note [Finalising boxity for demand signatures] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The worker/wrapper pass must strictly adhere to the boxity decisions +encoded in the demand signature, because that is the information that +demand analysis propagates throughout the program. Failing to +implement the strategy laid out in the signature can result in +reboxing in unexpected places. Hence, we must completely anticipate +unboxing decisions during demand analysis and reflect these decicions +in demand annotations. That is the job of 'finaliseArgBoxities', +which is defined here and called from demand analysis. + +Here is a list of different Notes it has to take care of: + + * Note [No lazy, Unboxed demands in demand signature] such as `L!P(L)` in + general, but still allow Note [Unboxing evaluated arguments] + * Note [No nested Unboxed inside Boxed in demand signature] such as `1P(1!L)` + * Implement fixes for corner cases Note [Do not unbox class dictionaries] + and Note [mkWWstr and unsafeCoerce] + +Then, in worker/wrapper blindly trusts the boxity info in the demand signature +and will not look at strictness info *at all*, in 'wantToUnboxArg'. + +Note [Finalising boxity for let-bound Ids] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + let x = e in body +where the demand on 'x' is 1!P(blah). We want to unbox x according to +Note [Thunk splitting] in GHC.Core.Opt.WorkWrap. We must do this becuase +worker/wrapper ignores stricness and looks only at boxity flags; so if +x's demand is L!P(blah) we might still split it (wrongly). We want to +switch to Boxed on any lazy demand. + +That is what finaliseLetBoxity does. It has no worker-arg budget, so it +is much simpler than finaliseArgBoxities. + +Note [No nested Unboxed inside Boxed in demand signature] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider +``` +f p@(x,y) + | even (x+y) = [] + | otherwise = [p] +``` +Demand analysis will infer that the function body puts a demand of `1P(1!L,1!L)` +on 'p', e.g., Boxed on the outside but Unboxed on the inside. But worker/wrapper +can't unbox the pair components without unboxing the pair! So we better say +`1P(1L,1L)` in the demand signature in order not to spread wrong Boxity info. +That happens via the call to trimBoxity in 'finaliseArgBoxities'/'finaliseLetBoxity'. + +Note [No lazy, Unboxed demands in demand signature] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider T19407: + + data Huge = Huge Bool () ... () -- think: DynFlags + data T = T { h :: Huge, n :: Int } + f t@(T h _) = g h t + g (H b _ ... _) t = if b then 1 else n t + +The body of `g` puts (approx.) demand `L!P(A,1)` on `t`. But we better +not put that demand in `g`'s demand signature, because worker/wrapper will not +in general unbox a lazy-and-unboxed demand like `L!P(..)`. +(The exception are known-to-be-evaluated arguments like strict fields, +see Note [Unboxing evaluated arguments].) + +The program above is an example where spreading misinformed boxity through the +signature is particularly egregious. If we give `g` that signature, then `f` +puts demand `S!P(1!P(1L,A,..),ML)` on `t`. Now we will unbox `t` in `f` it and +we get + + f (T (H b _ ... _) n) = $wf b n + $wf b n = $wg b (T (H b x ... x) n) + $wg = ... + +Massive reboxing in `$wf`! Solution: Trim boxity on lazy demands in +'trimBoxity', modulo Note [Unboxing evaluated arguments]. + +Note [Unboxing evaluated arguments] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this program (due to Roman): + + data X a = X !a + + foo :: X Int -> Int -> Int + foo x@(X a) n = go 0 + where + go i | i < n = a + go (i+1) + | otherwise = 0 + +We want the worker for 'foo' to look like this: + + $wfoo :: Int# -> Int# -> Int# + +with the first argument unboxed, so that it is not eval'd each time around the +'go' loop (which would otherwise happen, since 'foo' is not strict in 'a'). It +is sound for the wrapper to pass an unboxed arg because X is strict +(see Note [Strictness and Unboxing] in "GHC.Core.Opt.DmdAnal"), so its argument +must be evaluated. And if we *don't* pass an unboxed argument, we can't even +repair it by adding a `seq` thus: + + foo (X a) n = a `seq` go 0 + +because the seq is discarded (very early) since X is strict! + +So here's what we do + +* Since this has nothing to do with how 'foo' uses 'a', we leave demand + analysis alone, but account for the additional evaluatedness when + annotating the binder 'finaliseArgBoxities', which will retain the Unboxed + boxity on 'a' in the definition of 'foo' in the demand 'L!P(L)'; meaning + it's used lazily but unboxed nonetheless. This seems to contradict Note + [No lazy, Unboxed demands in demand signature], but we know that 'a' is + evaluated and thus can be unboxed. + +* When 'finaliseArgBoxities' decides to unbox a record, it will zip the field demands + together with the respective 'StrictnessMark'. In case of 'x', it will pair + up the lazy field demand 'L!P(L)' on 'a' with 'MarkedStrict' to account for + the strict field. + +* Said 'StrictnessMark' is passed to the recursive invocation of 'go_args' in + 'finaliseArgBoxities' when deciding whether to unbox 'a'. 'a' was used lazily, but + since it also says 'MarkedStrict', we'll retain the 'Unboxed' boxity on 'a'. + +* Worker/wrapper will consult 'wantToUnboxArg' for its unboxing decision. It will + /not/ look at the strictness bits of the demand, only at Boxity flags. As such, + it will happily unbox 'a' despite the lazy demand on it. + +The net effect is that boxity analysis and the w/w transformation are more +aggressive about unboxing the strict arguments of a data constructor than when +looking at strictness info exclusively. It is very much like (Nested) CPR, which +needs its nested fields to be evaluated in order for it to unbox nestedly. + +There is the usual danger of reboxing, which as usual we ignore. But +if X is monomorphic, and has an UNPACK pragma, then this optimisation +is even more important. We don't want the wrapper to rebox an unboxed +argument, and pass an Int to $wfoo! + +This works in nested situations like T10482 + + data family Bar a + data instance Bar (a, b) = BarPair !(Bar a) !(Bar b) + newtype instance Bar Int = Bar Int + + foo :: Bar ((Int, Int), Int) -> Int -> Int + foo f k = case f of BarPair x y -> + case burble of + True -> case x of + BarPair p q -> ... + False -> ... + +The extra eagerness lets us produce a worker of type: + $wfoo :: Int# -> Int# -> Int# -> Int -> Int + $wfoo p# q# y# = ... + +even though the `case x` is only lazily evaluated. + +--------- Historical note ------------ +We used to add data-con strictness demands when demand analysing case +expression. However, it was noticed in #15696 that this misses some cases. For +instance, consider the program (from T10482) + + data family Bar a + data instance Bar (a, b) = BarPair !(Bar a) !(Bar b) + newtype instance Bar Int = Bar Int + + foo :: Bar ((Int, Int), Int) -> Int -> Int + foo f k = + case f of + BarPair x y -> case burble of + True -> case x of + BarPair p q -> ... + False -> ... + +We really should be able to assume that `p` is already evaluated since it came +from a strict field of BarPair. This strictness would allow us to produce a +worker of type: + + $wfoo :: Int# -> Int# -> Int# -> Int -> Int + $wfoo p# q# y# = ... + +even though the `case x` is only lazily evaluated + +Indeed before we fixed #15696 this would happen since we would float the inner +`case x` through the `case burble` to get: + + foo f k = + case f of + BarPair x y -> case x of + BarPair p q -> case burble of + True -> ... + False -> ... + +However, after fixing #15696 this could no longer happen (for the reasons +discussed in ticket:15696#comment:76). This means that the demand placed on `f` +would then be significantly weaker (since the False branch of the case on +`burble` is not strict in `p` or `q`). + +Consequently, we now instead account for data-con strictness in mkWWstr_one, +applying the strictness demands to the final result of DmdAnal. The result is +that we get the strict demand signature we wanted even if we can't float +the case on `x` up through the case on `burble`. + +Note [Do not unbox class dictionaries] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have + f :: Ord a => [a] -> Int -> a + {-# INLINABLE f #-} +and we worker/wrapper f, we'll get a worker with an INLINABLE pragma +(see Note [Worker/wrapper for INLINABLE functions] in GHC.Core.Opt.WorkWrap), +which can still be specialised by the type-class specialiser, something like + fw :: Ord a => [a] -> Int# -> a + +BUT if f is strict in the Ord dictionary, we might unpack it, to get + fw :: (a->a->Bool) -> [a] -> Int# -> a +and the type-class specialiser can't specialise that. An example is #6056. + +But in any other situation, a dictionary is just an ordinary value, +and can be unpacked. So we track the INLINABLE pragma, and discard the boxity +flag in finaliseArgBoxities (see the isClassPred test). + +Historical note: #14955 describes how I got this fix wrong the first time. + +Note that the simplicity of this fix implies that INLINE functions (such as +wrapper functions after the WW run) will never say that they unbox class +dictionaries. That's not ideal, but not worth losing sleep over, as INLINE +functions will have been inlined by the time we run demand analysis so we'll +see the unboxing around the worker in client modules. I got aware of the issue +in T5075 by the change in boxity of loop between demand analysis runs. + +Note [Worker argument budget] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In 'finaliseArgBoxities' we don't want to generate workers with zillions of +argument when, say given a strict record with zillions of fields. So we +limit the maximum number of worker args to the maximum of + - -fmax-worker-args=N + - The number of args in the original function; if it already has has + zillions of arguments we don't want to seek /fewer/ args in the worker. +(Maybe we should /add/ them instead of maxing?) + +We pursue a "layered" strategy for unboxing: we unbox the top level of the +argument(s), subject to budget; if there are any arguments left we unbox the +next layer, using that depleted budget. + +To achieve this, we use the classic almost-circular programming technique in +which we we write one pass that takes a lazy list of the Budgets for every +layer. +-} + +data Budgets = MkB Arity Budgets -- An infinite list of arity budgets + +incTopBudget :: Budgets -> Budgets +incTopBudget (MkB n bg) = MkB (n+1) bg + +positiveTopBudget :: Budgets -> Bool +positiveTopBudget (MkB n _) = n >= 0 + +finaliseArgBoxities :: AnalEnv -> Id -> Arity -> CoreExpr + -> Maybe ([Demand], CoreExpr) +finaliseArgBoxities env fn arity rhs + | arity > count isId bndrs -- Can't find enough binders + = Nothing -- This happens if we have f = g + -- Then there are no binders; we don't worker/wrapper; and we + -- simply want to give f the same demand signature as g + + | otherwise + = Just (arg_dmds', add_demands arg_dmds' rhs) + -- add_demands: we must attach the final boxities to the lambda-binders + -- of the function, both because that's kosher, and because CPR analysis + -- uses the info on the binders directly. + where + opts = ae_opts env + fam_envs = ae_fam_envs env + is_inlinable_fn = isStableUnfolding (realIdUnfolding fn) + (bndrs, _body) = collectBinders rhs + max_wkr_args = dmd_max_worker_args opts `max` arity + -- See Note [Worker argument budget] + + -- This is the key line, which uses almost-circular programming + -- The remaining budget from one layer becomes the initial + -- budget for the next layer down. See Note [Worker argument budget] + (remaining_budget, arg_dmds') = go_args (MkB max_wkr_args remaining_budget) arg_triples + + arg_triples :: [(Type, StrictnessMark, Demand)] + arg_triples = take arity $ + map mk_triple $ + filter isRuntimeVar bndrs + + mk_triple :: Id -> (Type,StrictnessMark,Demand) + mk_triple bndr | is_cls_arg ty = (ty, NotMarkedStrict, trimBoxity dmd) + | otherwise = (ty, NotMarkedStrict, dmd) + where + ty = idType bndr + dmd = idDemandInfo bndr + + -- is_cls_arg: see Note [Do not unbox class dictionaries] + is_cls_arg arg_ty = is_inlinable_fn && isClassPred arg_ty + + go_args :: Budgets -> [(Type,StrictnessMark,Demand)] -> (Budgets, [Demand]) + go_args bg triples = mapAccumL go_arg bg triples + + go_arg :: Budgets -> (Type,StrictnessMark,Demand) -> (Budgets, Demand) + go_arg bg@(MkB bg_top bg_inner) (ty, str_mark, dmd@(n :* _)) + = case wantToUnboxArg fam_envs ty dmd of + DropAbsent -> (bg, dmd) + StopUnboxing -> (MkB (bg_top-1) bg_inner, trimBoxity dmd) + + Unbox DataConPatContext{dcpc_dc=dc, dcpc_tc_args=tc_args} dmds + -> (MkB (bg_top-1) final_bg_inner, final_dmd) + where + dc_arity = dataConRepArity dc + arg_tys = dubiousDataConInstArgTys dc tc_args + (bg_inner', dmds') = go_args (incTopBudget bg_inner) $ + zip3 arg_tys (dataConRepStrictness dc) dmds + dmd' = n :* (mkProd Unboxed $! dmds') + (final_bg_inner, final_dmd) + | dmds `lengthIs` dc_arity + , isStrict n || isMarkedStrict str_mark + -- isStrict: see Note [No lazy, Unboxed demands in demand signature] + -- isMarkedStrict: see Note [Unboxing evaluated arguments] + , positiveTopBudget bg_inner' + = (bg_inner', dmd') + | otherwise + = (bg_inner, trimBoxity dmd) + + add_demands :: [Demand] -> CoreExpr -> CoreExpr + -- Attach the demands to the outer lambdas of this expression + add_demands [] e = e + add_demands (dmd:dmds) (Lam v e) + | isTyVar v = Lam v (add_demands (dmd:dmds) e) + | otherwise = Lam (v `setIdDemandInfo` dmd) (add_demands dmds e) + add_demands dmds e = pprPanic "add_demands" (ppr dmds $$ ppr e) + +finaliseLetBoxity + :: FamInstEnvs + -> Type -- ^ Type of the let-bound Id + -> Demand -- ^ How the Id is used + -> Demand +-- See Note [Finalising boxity for let-bound Ids] +-- This function is like finaliseArgBoxities, but much simpler because +-- it has no "budget". It simply unboxes strict demands, and stops +-- when it reaches a lazy one. +finaliseLetBoxity env ty dmd + = go ty NotMarkedStrict dmd + where + go ty mark dmd@(n :* _) = + case wantToUnboxArg env ty dmd of + DropAbsent -> dmd + StopUnboxing -> trimBoxity dmd + Unbox DataConPatContext{dcpc_dc=dc, dcpc_tc_args=tc_args} dmds + | isStrict n || isMarkedStrict mark + , dmds `lengthIs` dataConRepArity dc + , let arg_tys = dubiousDataConInstArgTys dc tc_args + dmds' = strictZipWith3 go arg_tys (dataConRepStrictness dc) dmds + -> n :* (mkProd Unboxed $! dmds') + | otherwise + -> trimBoxity dmd + + +{- ********************************************************************* +* * Fixpoints * * ********************************************************************* -} @@ -1366,11 +1734,8 @@ annotateLamIdBndr env dmd_ty id -- pprTrace "annLamBndr" (vcat [ppr id, ppr dmd_ty, ppr final_ty]) $ WithDmdType main_ty new_id where - -- See Note [Finalising boxity for demand signature] in "GHC.Core.Opt.WorkWrap.Utils" - -- and Note [Do not unbox class dictionaries] - dmd' = finaliseBoxity (ae_fam_envs env) (ae_inl_fun env) (idType id) dmd - new_id = setIdDemandInfo id dmd' - main_ty = addDemand dmd' dmd_ty' + new_id = setIdDemandInfo id dmd + main_ty = addDemand dmd dmd_ty' WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty id {- Note [NOINLINE and strictness] @@ -1455,9 +1820,6 @@ data AnalEnv = AE , ae_virgin :: !Bool -- ^ True on first iteration only -- See Note [Initialising strictness] , ae_fam_envs :: !FamInstEnvs - , ae_inl_fun :: !InsideInlineableFun - -- ^ Whether we analyse the body of an inlineable fun. - -- See Note [Do not unbox class dictionaries]. } -- We use the se_env to tell us whether to @@ -1481,7 +1843,6 @@ emptyAnalEnv opts fam_envs , ae_sigs = emptySigEnv , ae_virgin = True , ae_fam_envs = fam_envs - , ae_inl_fun = NotInsideInlineableFun } emptySigEnv :: SigEnv @@ -1509,13 +1870,6 @@ lookupSigEnv env id = lookupVarEnv (ae_sigs env) id nonVirgin :: AnalEnv -> AnalEnv nonVirgin env = env { ae_virgin = False } --- | Sets 'ae_inl_fun' according to whether the given 'Id' has an inlineable --- unfolding. See Note [Do not unbox class dictionaries]. -adjustInlFun :: Id -> AnalEnv -> AnalEnv -adjustInlFun id env - | isStableUnfolding (realIdUnfolding id) = env { ae_inl_fun = InsideInlineableFun } - | otherwise = env { ae_inl_fun = NotInsideInlineableFun } - findBndrsDmds :: AnalEnv -> DmdType -> [Var] -> WithDmdType [Demand] -- Return the demands on the Ids in the [Var] findBndrsDmds env dmd_ty bndrs diff --git a/compiler/GHC/Core/Opt/Pipeline.hs b/compiler/GHC/Core/Opt/Pipeline.hs index 62a40fbcb2..3f8e3fc186 100644 --- a/compiler/GHC/Core/Opt/Pipeline.hs +++ b/compiler/GHC/Core/Opt/Pipeline.hs @@ -1065,8 +1065,9 @@ transferIdInfo exported_id local_id dmdAnal :: Logger -> DynFlags -> FamInstEnvs -> [CoreRule] -> CoreProgram -> IO CoreProgram dmdAnal logger dflags fam_envs rules binds = do let !opts = DmdAnalOpts - { dmd_strict_dicts = gopt Opt_DictsStrict dflags - , dmd_unbox_width = dmdUnboxWidth dflags + { dmd_strict_dicts = gopt Opt_DictsStrict dflags + , dmd_unbox_width = dmdUnboxWidth dflags + , dmd_max_worker_args = maxWorkerArgs dflags } binds_plus_dmds = dmdAnalProgram opts fam_envs rules binds Logger.putDumpFileMaybe logger Opt_D_dump_str_signatures "Strictness signatures" FormatText $ diff --git a/compiler/GHC/Core/Opt/WorkWrap.hs b/compiler/GHC/Core/Opt/WorkWrap.hs index 6180a69ab8..3e4770a997 100644 --- a/compiler/GHC/Core/Opt/WorkWrap.hs +++ b/compiler/GHC/Core/Opt/WorkWrap.hs @@ -11,7 +11,6 @@ import GHC.Prelude import GHC.Driver.Session -import GHC.Core.Opt.Arity ( manifestArity ) import GHC.Core import GHC.Core.Unfold.Make import GHC.Core.Utils ( exprType, exprIsHNF ) @@ -552,7 +551,7 @@ tryWW ww_opts is_rec fn_id rhs | isRecordSelector fn_id = return [ (new_fn_id, rhs ) ] - | is_fun && is_eta_exp + | is_fun = splitFun ww_opts new_fn_id rhs -- See Note [Thunk splitting] @@ -576,8 +575,6 @@ tryWW ww_opts is_rec fn_id rhs | otherwise = id -- See Note [Don't w/w join points for CPR] - -- is_eta_exp: see Note [Don't eta expand in w/w] - is_eta_exp = length wrap_dmds == manifestArity rhs is_fun = notNull wrap_dmds || isJoinId fn_id is_thunk = not is_fun && not (exprIsHNF rhs) && not (isJoinId fn_id) && not (isUnliftedType (idType fn_id)) @@ -722,6 +719,11 @@ by LitRubbish (see Note [Drop absent bindings]) so there is no great harm. --------------------- splitFun :: WwOpts -> Id -> CoreExpr -> UniqSM [(Id, CoreExpr)] splitFun ww_opts fn_id rhs + | not (wrap_dmds `lengthIs` count isId arg_vars) + -- See Note [Don't eta expand in w/w] + = return [(fn_id, rhs)] + + | otherwise = warnPprTrace (not (wrap_dmds `lengthIs` (arityInfo fn_info))) "splitFun" (ppr fn_id <+> (ppr wrap_dmds $$ ppr cpr)) $ @@ -907,41 +909,60 @@ in w/w so that we don't pass the argument at all. Note [Thunk splitting] ~~~~~~~~~~~~~~~~~~~~~~ -Suppose x is used strictly (never mind whether it has the CPR -property). +Suppose x is used strictly; never mind whether it has the CPR +property. I'll use a '*' to mean "x* is demanded strictly". let x* = x-rhs in body splitThunk transforms like this: - let - x* = case x-rhs of { I# a -> I# a } + x* = let x = x-rhs in + case x of { I# a -> I# a } in body -Now simplifier will transform to - +This is a little strange: we are re-using the same `x` in the RHS; and +the RHS takes `x` apart and reboxes it. But because the outer 'let' is +strict, and the inner let mentions `x` only once, the simplifier +transform it to case x-rhs of I# a -> let x* = I# a in body -which is what we want. Now suppose x-rhs is itself a case: - - x-rhs = case e of { T -> I# a; F -> I# b } +That is good: in `body` we know the form of `x`, which + * gives the CPR property, and + * allows case-of-case to happen on x -The join point will abstract over a, rather than over (which is -what would have happened before) which is fine. - -Notice that x certainly has the CPR property now! - -In fact, splitThunk uses the function argument w/w splitting -function, so that if x's demand is deeper (say U(U(L,L),L)) -then the splitting will go deeper too. - -NB: For recursive thunks, the Simplifier is unable to float `x-rhs` out of -`x*`'s RHS, because `x*` occurs freely in `x-rhs`, and will just change it -back to the original definition, so we just split non-recursive thunks. +Notes +* I tried transforming like this: + let + x* = let x = x-rhs in + case x of { I# a -> x } + in body + where I return `x` itself, rather than reboxing it. But this + turned out to cause some regressions, which I never fully + investigated. + +* Suppose x-rhs is itself a case: + x-rhs = case e of { T -> I# e1; F -> I# e2 } + Then we'll get + join j a = let x* = I# a in body + in case e of { T -> j e1; F -> j e2 } + which is good (no boxing). But in the original, unsplit program + we would transform + let x* = case e of ... in body + ==> join j2 x = body + in case e of { T -> j2 (I# e1); F -> j (I# e2) } + which is not good (boxing). + +* In fact, splitThunk uses the function argument w/w splitting + function, mkWWstr_one, so that if x's demand is deeper (say U(U(L,L),L)) + then the splitting will go deeper too. + +* For recursive thunks, the Simplifier is unable to float `x-rhs` out of + `x*`'s RHS, because `x*` occurs freely in `x-rhs`, and will just change it + back to the original definition, so we just split non-recursive thunks. Note [Thunk splitting for top-level binders] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs index 86e57286c1..1b2d3ca1ba 100644 --- a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs +++ b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs @@ -10,10 +10,10 @@ A library for the ``worker\/wrapper'' back-end to the strictness analyser module GHC.Core.Opt.WorkWrap.Utils ( WwOpts(..), initWwOpts, mkWwBodies, mkWWstr, mkWWstr_one, mkWorkerArgs , DataConPatContext(..) - , UnboxingDecision(..), InsideInlineableFun(..), wantToUnboxArg - , findTypeShape, IsRecDataConResult(..), isRecDataCon, finaliseBoxity + , UnboxingDecision(..), wantToUnboxArg + , findTypeShape, IsRecDataConResult(..), isRecDataCon , mkAbsentFiller - , isWorkerSmallEnough + , isWorkerSmallEnough, dubiousDataConInstArgTys ) where @@ -29,7 +29,6 @@ import GHC.Core.Make import GHC.Core.Subst import GHC.Core.Type import GHC.Core.Multiplicity -import GHC.Core.Predicate ( isClassPred ) import GHC.Core.Coercion import GHC.Core.Reduction import GHC.Core.FamInstEnv @@ -142,7 +141,7 @@ data WwOpts , wo_simple_opts :: !SimpleOpts , wo_cpr_anal :: !Bool , wo_fun_to_thunk :: !Bool - , wo_max_worker_args :: !Int + -- Used for absent argument error message , wo_module :: !Module } @@ -153,7 +152,6 @@ initWwOpts this_mod dflags fam_envs = MkWwOpts , wo_simple_opts = initSimpleOpts dflags , wo_cpr_anal = gopt Opt_CprAnal dflags , wo_fun_to_thunk = gopt Opt_FunToThunk dflags - , wo_max_worker_args = maxWorkerArgs dflags , wo_module = this_mod } @@ -245,9 +243,7 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr worker_body = mkLams work_lam_args . work_fn_cpr . call_rhs worker_args_dmds = [idDemandInfo v | v <- work_call_args, isId v] - ; if isWorkerSmallEnough (wo_max_worker_args opts) (length demands) work_args - && not (too_many_args_for_join_point arg_vars) - && ((useful1 && not only_one_void_argument) || useful2) + ; if ((useful1 && not only_one_void_argument) || useful2) then return (Just (worker_args_dmds, length work_call_args, wrapper_body, worker_body)) else return Nothing @@ -265,8 +261,6 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr zap_info info -- See Note [Zap IdInfo on worker args] = info `setOccInfo` noOccInfo - mb_join_arity = isJoinId_maybe fun_id - -- Note [Do not split void functions] only_one_void_argument | [d] <- demands @@ -276,17 +270,6 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr | otherwise = False - -- Note [Join points returning functions] - too_many_args_for_join_point wrap_args - | Just join_arity <- mb_join_arity - , wrap_args `lengthExceeds` join_arity - = warnPprTrace True "Unable to worker/wrapper join point" - (text "arity" <+> int join_arity <+> text "but" <+> - int (length wrap_args) <+> text "args") $ - True - | otherwise - = False - -- | Version of 'GHC.Core.mkApps' that does beta reduction on-the-fly. -- PRECONDITION: The arg expressions are not free in any of the lambdas binders. mkAppsBeta :: CoreExpr -> [CoreArg] -> CoreExpr @@ -454,36 +437,6 @@ occurrences, so $wj2 here is wrong. But of course, this is easy enough to fix: Hence we simply do the beta-reduction here. (This would be harder if we had to worry about hygiene, but luckily wy is freshly generated.) -Note [Join points returning functions] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -It is crucial that the arity of a join point depends on its *callers,* not its -own syntax. What this means is that a join point can have "extra lambdas": - -f :: Int -> Int -> (Int, Int) -> Int -f x y = join j (z, w) = \(u, v) -> ... - in jump j (x, y) - -Typically this happens with functions that are seen as computing functions, -rather than being curried. (The real-life example was GHC.Data.Graph.Ops.addConflicts.) - -When we create the wrapper, it *must* be in "eta-contracted" form so that the -jump has the right number of arguments: - -f x y = join $wj z' w' = \u' v' -> let {z = z'; w = w'; u = u'; v = v'} in ... - j (z, w) = jump $wj z w - -(See Note [Join points and beta-redexes] for where the lets come from.) If j -were a function, we would instead say - -f x y = let $wj = \z' w' u' v' -> let {z = z'; w = w'; u = u'; v = v'} in ... - j (z, w) (u, v) = $wj z w u v - -Notice that the worker ends up with the same lambdas; it's only the wrapper we -have to be concerned about. - -FIXME Currently the functionality to produce "eta-contracted" wrappers is -unimplemented; we simply give up. - Note [Freshen WW arguments] ~~~~~~~~~~~~~~~~~~~~~~~~~~~ When we do a worker/wrapper split, we must freshen the arg vars of the original @@ -577,8 +530,8 @@ wantToUnboxArg fam_envs ty (n :* sd) , Just dc <- tyConSingleAlgDataCon_maybe tc , let arity = dataConRepArity dc , Just (Unboxed, ds) <- viewProd arity sd -- See Note [Boxity analysis] - -- NB: No strictness or evaluatedness checks here. That is done by - -- 'finaliseBoxity'! + -- NB: No strictness or evaluatedness checks here. + -- That is done by 'finaliseArgBoxities'! = Unbox (DataConPatContext dc tc_args co) ds | otherwise @@ -657,33 +610,6 @@ Note that the data constructor /can/ have evidence arguments: equality constraints, type classes etc. So it can be GADT. These evidence arguments are simply value arguments, and should not get in the way. -Note [Do not unbox class dictionaries] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If we have - f :: Ord a => [a] -> Int -> a - {-# INLINABLE f #-} -and we worker/wrapper f, we'll get a worker with an INLINABLE pragma -(see Note [Worker/wrapper for INLINABLE functions] in GHC.Core.Opt.WorkWrap), -which can still be specialised by the type-class specialiser, something like - fw :: Ord a => [a] -> Int# -> a - -BUT if f is strict in the Ord dictionary, we might unpack it, to get - fw :: (a->a->Bool) -> [a] -> Int# -> a -and the type-class specialiser can't specialise that. An example is #6056. - -But in any other situation, a dictionary is just an ordinary value, -and can be unpacked. So we track the INLINABLE pragma, and discard the boxity -flag in finaliseBoxity (see the isClassPred test). - -Historical note: #14955 describes how I got this fix wrong the first time. - -Note that the simplicity of this fix implies that INLINE functions (such as -wrapper functions after the WW run) will never say that they unbox class -dictionaries. That's not ideal, but not worth losing sleep over, as INLINE -functions will have been inlined by the time we run demand analysis so we'll -see the unboxing around the worker in client modules. I got aware of the issue -in T5075 by the change in boxity of loop between demand analysis runs. - Note [mkWWstr and unsafeCoerce] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ By using unsafeCoerce, it is possible to make the number of demands fail to @@ -691,193 +617,6 @@ match the number of constructor arguments; this happened in #8037. If so, the worker/wrapper split doesn't work right and we get a Core Lint bug. The fix here is simply to decline to do w/w if that happens. -Note [Unboxing evaluated arguments] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Consider this program (due to Roman): - - data X a = X !a - - foo :: X Int -> Int -> Int - foo x@(X a) n = go 0 - where - go i | i < n = a + go (i+1) - | otherwise = 0 - -We want the worker for 'foo' too look like this: - - $wfoo :: Int# -> Int# -> Int# - -with the first argument unboxed, so that it is not eval'd each time around the -'go' loop (which would otherwise happen, since 'foo' is not strict in 'a'). It -is sound for the wrapper to pass an unboxed arg because X is strict -(see Note [Strictness and Unboxing] in "GHC.Core.Opt.DmdAnal"), so its argument -must be evaluated. And if we *don't* pass an unboxed argument, we can't even -repair it by adding a `seq` thus: - - foo (X a) n = a `seq` go 0 - -because the seq is discarded (very early) since X is strict! - -So here's what we do - -* Since this has nothing to do with how 'foo' uses 'a', we leave demand analysis - alone, but account for the additional evaluatedness when annotating the binder - in 'annotateLamIdBndr' via 'finaliseBoxity', which will retain the Unboxed boxity - on 'a' in the definition of 'foo' in the demand 'L!P(L)'; meaning it's used - lazily but unboxed nonetheless. This seems to contradict - Note [No lazy, Unboxed demands in demand signature], but we know that 'a' is - evaluated and thus can be unboxed. - -* When 'finaliseBoxity' decides to unbox a record, it will zip the field demands - together with the respective 'StrictnessMark'. In case of 'x', it will pair - up the lazy field demand 'L!P(L)' on 'a' with 'MarkedStrict' to account for - the strict field. - -* Said 'StrictnessMark' is passed to the recursive invocation of - 'finaliseBoxity' when deciding whether to unbox 'a'. 'a' was used lazily, but - since it also says 'MarkedStrict', we'll retain the 'Unboxed' boxity on 'a'. - -* Worker/wrapper will consult 'wantToUnboxArg' for its unboxing decision. It will - /not/ look at the strictness bits of the demand, only at Boxity flags. As such, - it will happily unbox 'a' despite the lazy demand on it. - -The net effect is that boxity analysis and the w/w transformation are more -aggressive about unboxing the strict arguments of a data constructor than when -looking at strictness info exclusively. It is very much like (Nested) CPR, which -needs its nested fields to be evaluated in order for it to unbox nestedly. - -There is the usual danger of reboxing, which as usual we ignore. But -if X is monomorphic, and has an UNPACK pragma, then this optimisation -is even more important. We don't want the wrapper to rebox an unboxed -argument, and pass an Int to $wfoo! - -This works in nested situations like T10482 - - data family Bar a - data instance Bar (a, b) = BarPair !(Bar a) !(Bar b) - newtype instance Bar Int = Bar Int - - foo :: Bar ((Int, Int), Int) -> Int -> Int - foo f k = case f of BarPair x y -> - case burble of - True -> case x of - BarPair p q -> ... - False -> ... - -The extra eagerness lets us produce a worker of type: - $wfoo :: Int# -> Int# -> Int# -> Int -> Int - $wfoo p# q# y# = ... - -even though the `case x` is only lazily evaluated. - ---------- Historical note ------------ -We used to add data-con strictness demands when demand analysing case -expression. However, it was noticed in #15696 that this misses some cases. For -instance, consider the program (from T10482) - - data family Bar a - data instance Bar (a, b) = BarPair !(Bar a) !(Bar b) - newtype instance Bar Int = Bar Int - - foo :: Bar ((Int, Int), Int) -> Int -> Int - foo f k = - case f of - BarPair x y -> case burble of - True -> case x of - BarPair p q -> ... - False -> ... - -We really should be able to assume that `p` is already evaluated since it came -from a strict field of BarPair. This strictness would allow us to produce a -worker of type: - - $wfoo :: Int# -> Int# -> Int# -> Int -> Int - $wfoo p# q# y# = ... - -even though the `case x` is only lazily evaluated - -Indeed before we fixed #15696 this would happen since we would float the inner -`case x` through the `case burble` to get: - - foo f k = - case f of - BarPair x y -> case x of - BarPair p q -> case burble of - True -> ... - False -> ... - -However, after fixing #15696 this could no longer happen (for the reasons -discussed in ticket:15696#comment:76). This means that the demand placed on `f` -would then be significantly weaker (since the False branch of the case on -`burble` is not strict in `p` or `q`). - -Consequently, we now instead account for data-con strictness in mkWWstr_one, -applying the strictness demands to the final result of DmdAnal. The result is -that we get the strict demand signature we wanted even if we can't float -the case on `x` up through the case on `burble`. - -Note [No nested Unboxed inside Boxed in demand signature] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Consider -``` -f p@(x,y) - | even (x+y) = [] - | otherwise = [p] -``` -Demand analysis will infer that the function body puts a demand of `1P(1!L,1!L)` -on 'p', e.g., Boxed on the outside but Unboxed on the inside. But worker/wrapper -can't unbox the pair components without unboxing the pair! So we better say -`1P(1L,1L)` in the demand signature in order not to spread wrong Boxity info. -That happens in 'finaliseBoxity'. - -Note [No lazy, Unboxed demands in demand signature] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Consider T19407: - - data Huge = Huge Bool () ... () -- think: DynFlags - data T = T { h :: Huge, n :: Int } - f t@(T h _) = g h t - g (H b _ ... _) t = if b then 1 else n t - -The body of `g` puts (approx.) demand `L!P(A,1)` on `t`. But we better -not put that demand in `g`'s demand signature, because worker/wrapper will not -in general unbox a lazy-and-unboxed demand like `L!P(..)`. -(The exception are known-to-be-evaluated arguments like strict fields, -see Note [Unboxing evaluated arguments].) - -The program above is an example where spreading misinformed boxity through the -signature is particularly egregious. If we give `g` that signature, then `f` -puts demand `S!P(1!P(1L,A,..),ML)` on `t`. Now we will unbox `t` in `f` it and -we get - - f (T (H b _ ... _) n) = $wf b n - $wf b n = $wg b (T (H b x ... x) n) - $wg = ... - -Massive reboxing in `$wf`! Solution: Trim boxity on lazy demands in -'finaliseBoxity', modulo Note [Unboxing evaluated arguments]. - -Note [Finalising boxity for demand signature] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The worker/wrapper pass must strictly adhere to the boxity decisions encoded -in the demand signature, because that is the information that demand analysis -propagates throughout the program. Failing to implement the strategy laid out -in the signature can result in reboxing in unexpected places. Hence, we must -completely anticipate unboxing decisions during demand analysis and reflect -these decicions in demand annotations. That is the job of 'finaliseBoxity', -which is defined here and called from demand analysis. - -Here is a list of different Notes it has to take care of: - - * Note [No lazy, Unboxed demands in demand signature] such as `L!P(L)` in - general, but still allow Note [Unboxing evaluated arguments] - * Note [No nested Unboxed inside Boxed in demand signature] such as `1P(1!L)` - * Implement fixes for corner cases Note [Do not unbox class dictionaries] - and Note [mkWWstr and unsafeCoerce] - -Then, in worker/wrapper blindly trusts the boxity info in the demand signature -and will not look at strictness info *at all*, in 'wantToUnboxArg'. - Note [non-algebraic or open body type warning] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are a few cases where the W/W transformation is told that something @@ -1031,6 +770,7 @@ function is worthy for splitting: E.g. B comes from a function like f x = error "urk" and the absent demand A can come from Note [Unboxing evaluated arguments] + in GHC.Core.Opt.DmdAnal. 2. If the argument is evaluated strictly (or known to be eval'd), we can take a view into the product demand ('viewProd'). In accordance @@ -1070,9 +810,9 @@ function is worthy for splitting: in GHC itself where the tuple was DynFlags 3. In all other cases (e.g., lazy, used demand and not eval'd), - 'finaliseBoxity' will have cleared the Boxity flag to 'Boxed' - (see Note [Finalising boxity for demand signature]) and - 'wantToUnboxArg' returns 'StopUnboxing' so that 'mkWWstr_one' + 'finaliseArgBoxities' will have cleared the Boxity flag to 'Boxed' + (see Note [Finalising boxity for demand signatures] in GHC.Core.Opt.DmdAnal) + and 'wantToUnboxArg' returns 'StopUnboxing' so that 'mkWWstr_one' stops unboxing. Note [Worker/wrapper for bottoming functions] @@ -1177,7 +917,7 @@ Needless to say, there are some wrinkles: Ideally, we'd just look at the 'StrictnessMark' of the DataCon's field, but it's quite nasty to thread the marks though 'mkWWstr' and 'mkWWstr_one'. So we rather look out for a necessary condition for strict fields: - Note [Unboxing evaluated arguments] makes it so that the demand on + Note [Unboxing evaluated arguments] in DmdAnal makes it so that the demand on 'zs' is absent and /strict/: It will get cardinality 'C_10', the empty interval, rather than 'C_00'. Hence the 'isStrictDmd' check: It guarantees we never fill in an error-thunk for an absent strict field. @@ -1410,56 +1150,6 @@ isRecDataCon fam_envs fuel dc -> combineIRDCRs (map (\dc -> go_dc (subWithInf fuel 1) rec_tc' dc) dcs) -- See Note [Detecting recursive data constructors], point (4) --- | A specialised Bool for an argument to 'finaliseBoxity'. --- See Note [Do not unbox class dictionaries]. -data InsideInlineableFun - = NotInsideInlineableFun -- ^ Not in an inlineable fun. - | InsideInlineableFun -- ^ We are in an inlineable fun, so we won't - -- unbox dictionary args. - deriving Eq - --- | This function makes sure that the demand only says 'Unboxed' where --- worker/wrapper should actually unbox and trims any boxity beyond that. --- Called for every demand annotation during DmdAnal. --- --- > data T a = T !a --- > f :: (T (Int,Int), Int) -> () --- > f p = ... -- demand on p: 1!P(L!P(L!P(L), L!P(L)), L!P(L)) --- --- 'finaliseBoxity' will trim the demand on 'p' to 1!P(L!P(LP(L), LP(L)), LP(L)). --- This is done when annotating lambdas and thunk bindings. --- See Note [Finalising boxity for demand signature] -finaliseBoxity - :: FamInstEnvs - -> InsideInlineableFun -- ^ See the haddocks on 'InsideInlineableFun' - -> Type -- ^ Type of the argument - -> Demand -- ^ How the arg was used - -> Demand -finaliseBoxity env in_inl_fun ty dmd = go NotMarkedStrict ty dmd - where - go mark ty dmd@(n :* _) = - case wantToUnboxArg env ty dmd of - DropAbsent -> dmd - Unbox DataConPatContext{dcpc_dc=dc, dcpc_tc_args=tc_args} ds - -- See Note [No lazy, Unboxed demands in demand signature] - -- See Note [Unboxing evaluated arguments] - | isStrict n || isMarkedStrict mark - -- See Note [Do not unbox class dictionaries] - , in_inl_fun == NotInsideInlineableFun || not (isClassPred ty) - -- See Note [mkWWstr and unsafeCoerce] - , ds `lengthIs` dataConRepArity dc - , let arg_tys = dubiousDataConInstArgTys dc tc_args - -> -- pprTrace "finaliseBoxity:Unbox" (ppr ty $$ ppr dmd $$ ppr ds) $ - n :* (mkProd Unboxed $! zip_go_with_marks dc arg_tys ds) - -- See Note [No nested Unboxed inside Boxed in demand signature] - _ -> trimBoxity dmd - - -- See Note [Unboxing evaluated arguments] - zip_go_with_marks dc arg_tys ds = case dataConWrapId_maybe dc of - Nothing -> strictZipWith (go NotMarkedStrict) arg_tys ds - -- Shortcut when DataCon worker=wrapper - Just _ -> strictZipWith3 go (dataConRepStrictness dc) arg_tys ds - {- ************************************************************************ * * |