summaryrefslogtreecommitdiff
path: root/compiler/GHC/Core/Opt/DmdAnal.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/GHC/Core/Opt/DmdAnal.hs')
-rw-r--r--compiler/GHC/Core/Opt/DmdAnal.hs408
1 files changed, 381 insertions, 27 deletions
diff --git a/compiler/GHC/Core/Opt/DmdAnal.hs b/compiler/GHC/Core/Opt/DmdAnal.hs
index fa4bed48f0..e6b404ff61 100644
--- a/compiler/GHC/Core/Opt/DmdAnal.hs
+++ b/compiler/GHC/Core/Opt/DmdAnal.hs
@@ -31,6 +31,7 @@ import GHC.Types.Id
import GHC.Core.Utils
import GHC.Core.TyCon
import GHC.Core.Type
+import GHC.Core.Predicate ( isClassPred )
import GHC.Core.FVs ( rulesRhsFreeIds, bndrRuleAndUnfoldingIds )
import GHC.Core.Coercion ( Coercion )
import GHC.Core.TyCo.FVs ( coVarsOfCos )
@@ -39,7 +40,7 @@ import GHC.Core.Opt.Arity ( typeArity )
import GHC.Utils.Misc
import GHC.Utils.Panic
import GHC.Utils.Panic.Plain
-import GHC.Data.Maybe ( isJust )
+import GHC.Data.Maybe ( isJust, orElse )
import GHC.Builtin.PrimOps
import GHC.Builtin.Types.Prim ( realWorldStatePrimTy )
import GHC.Types.Unique.Set
@@ -57,8 +58,9 @@ _ = pprTrace -- Tired of commenting out the import all the time
-- | Options for the demand analysis
data DmdAnalOpts = DmdAnalOpts
- { dmd_strict_dicts :: !Bool -- ^ Use strict dictionaries
- , dmd_unbox_width :: !Int -- ^ Use strict dictionaries
+ { dmd_strict_dicts :: !Bool -- ^ Use strict dictionaries
+ , dmd_unbox_width :: !Int -- ^ Use strict dictionaries
+ , dmd_max_worker_args :: !Int
}
-- This is a strict alternative to (,)
@@ -278,8 +280,9 @@ dmdAnalBindLetUp top_lvl env id rhs anal_body = WithDmdType final_ty (R (NonRec
where
WithDmdType body_ty body' = anal_body env
WithDmdType body_ty' id_dmd = findBndrDmd env body_ty id
- -- See Note [Finalising boxity for demand signature] in "GHC.Core.Opt.WorkWrap.Utils"
- id_dmd' = finaliseBoxity (ae_fam_envs env) NotInsideInlineableFun (idType id) id_dmd
+ -- See Note [Finalising boxity for demand signatures]
+
+ id_dmd' = finaliseLetBoxity (ae_fam_envs env) (idType id) id_dmd
!id' = setBindIdDemandInfo top_lvl id id_dmd'
(rhs_ty, rhs') = dmdAnalStar env (dmdTransformThunkDmd rhs id_dmd') rhs
@@ -867,7 +870,7 @@ dmdAnalRhsSig
-- See Note [NOINLINE and strictness]
dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs
= -- pprTrace "dmdAnalRhsSig" (ppr id $$ ppr let_dmd $$ ppr sig $$ ppr lazy_fv) $
- (env', lazy_fv, id', rhs')
+ (final_env, lazy_fv, final_id, final_rhs)
where
rhs_arity = idArity id
-- See Note [Demand signatures are computed for a threshold demand based on idArity]
@@ -885,13 +888,15 @@ dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs
= unboxedWhenSmall (ae_opts env) (unboxableResultWidth env id) topSubDmd
-- See Note [Do not unbox class dictionaries]
- WithDmdType rhs_dmd_ty rhs' = dmdAnal (adjustInlFun id env) rhs_dmd rhs
- DmdType rhs_fv rhs_dmds rhs_div = rhs_dmd_ty
+ WithDmdType rhs_dmd_ty rhs' = dmdAnal env rhs_dmd rhs
+ DmdType rhs_fv rhs_dmds rhs_div = rhs_dmd_ty
+ (final_rhs_dmds, final_rhs) = finaliseArgBoxities env id rhs_arity rhs'
+ `orElse` (rhs_dmds, rhs')
- sig = mkDmdSigForArity rhs_arity (DmdType sig_fv rhs_dmds rhs_div)
+ sig = mkDmdSigForArity rhs_arity (DmdType sig_fv final_rhs_dmds rhs_div)
- id' = id `setIdDmdSig` sig
- !env' = extendAnalEnv top_lvl env id' sig
+ final_id = id `setIdDmdSig` sig
+ !final_env = extendAnalEnv top_lvl env final_id sig
-- See Note [Aggregated demand for cardinality]
-- FIXME: That Note doesn't explain the following lines at all. The reason
@@ -1156,6 +1161,369 @@ this, that actually happened in practice.
{- *********************************************************************
* *
+ Finalising boxity
+* *
+********************************************************************* -}
+
+{- Note [Finalising boxity for demand signatures]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The worker/wrapper pass must strictly adhere to the boxity decisions
+encoded in the demand signature, because that is the information that
+demand analysis propagates throughout the program. Failing to
+implement the strategy laid out in the signature can result in
+reboxing in unexpected places. Hence, we must completely anticipate
+unboxing decisions during demand analysis and reflect these decicions
+in demand annotations. That is the job of 'finaliseArgBoxities',
+which is defined here and called from demand analysis.
+
+Here is a list of different Notes it has to take care of:
+
+ * Note [No lazy, Unboxed demands in demand signature] such as `L!P(L)` in
+ general, but still allow Note [Unboxing evaluated arguments]
+ * Note [No nested Unboxed inside Boxed in demand signature] such as `1P(1!L)`
+ * Implement fixes for corner cases Note [Do not unbox class dictionaries]
+ and Note [mkWWstr and unsafeCoerce]
+
+Then, in worker/wrapper blindly trusts the boxity info in the demand signature
+and will not look at strictness info *at all*, in 'wantToUnboxArg'.
+
+Note [Finalising boxity for let-bound Ids]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+ let x = e in body
+where the demand on 'x' is 1!P(blah). We want to unbox x according to
+Note [Thunk splitting] in GHC.Core.Opt.WorkWrap. We must do this becuase
+worker/wrapper ignores stricness and looks only at boxity flags; so if
+x's demand is L!P(blah) we might still split it (wrongly). We want to
+switch to Boxed on any lazy demand.
+
+That is what finaliseLetBoxity does. It has no worker-arg budget, so it
+is much simpler than finaliseArgBoxities.
+
+Note [No nested Unboxed inside Boxed in demand signature]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+```
+f p@(x,y)
+ | even (x+y) = []
+ | otherwise = [p]
+```
+Demand analysis will infer that the function body puts a demand of `1P(1!L,1!L)`
+on 'p', e.g., Boxed on the outside but Unboxed on the inside. But worker/wrapper
+can't unbox the pair components without unboxing the pair! So we better say
+`1P(1L,1L)` in the demand signature in order not to spread wrong Boxity info.
+That happens via the call to trimBoxity in 'finaliseArgBoxities'/'finaliseLetBoxity'.
+
+Note [No lazy, Unboxed demands in demand signature]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider T19407:
+
+ data Huge = Huge Bool () ... () -- think: DynFlags
+ data T = T { h :: Huge, n :: Int }
+ f t@(T h _) = g h t
+ g (H b _ ... _) t = if b then 1 else n t
+
+The body of `g` puts (approx.) demand `L!P(A,1)` on `t`. But we better
+not put that demand in `g`'s demand signature, because worker/wrapper will not
+in general unbox a lazy-and-unboxed demand like `L!P(..)`.
+(The exception are known-to-be-evaluated arguments like strict fields,
+see Note [Unboxing evaluated arguments].)
+
+The program above is an example where spreading misinformed boxity through the
+signature is particularly egregious. If we give `g` that signature, then `f`
+puts demand `S!P(1!P(1L,A,..),ML)` on `t`. Now we will unbox `t` in `f` it and
+we get
+
+ f (T (H b _ ... _) n) = $wf b n
+ $wf b n = $wg b (T (H b x ... x) n)
+ $wg = ...
+
+Massive reboxing in `$wf`! Solution: Trim boxity on lazy demands in
+'trimBoxity', modulo Note [Unboxing evaluated arguments].
+
+Note [Unboxing evaluated arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this program (due to Roman):
+
+ data X a = X !a
+
+ foo :: X Int -> Int -> Int
+ foo x@(X a) n = go 0
+ where
+ go i | i < n = a + go (i+1)
+ | otherwise = 0
+
+We want the worker for 'foo' to look like this:
+
+ $wfoo :: Int# -> Int# -> Int#
+
+with the first argument unboxed, so that it is not eval'd each time around the
+'go' loop (which would otherwise happen, since 'foo' is not strict in 'a'). It
+is sound for the wrapper to pass an unboxed arg because X is strict
+(see Note [Strictness and Unboxing] in "GHC.Core.Opt.DmdAnal"), so its argument
+must be evaluated. And if we *don't* pass an unboxed argument, we can't even
+repair it by adding a `seq` thus:
+
+ foo (X a) n = a `seq` go 0
+
+because the seq is discarded (very early) since X is strict!
+
+So here's what we do
+
+* Since this has nothing to do with how 'foo' uses 'a', we leave demand
+ analysis alone, but account for the additional evaluatedness when
+ annotating the binder 'finaliseArgBoxities', which will retain the Unboxed
+ boxity on 'a' in the definition of 'foo' in the demand 'L!P(L)'; meaning
+ it's used lazily but unboxed nonetheless. This seems to contradict Note
+ [No lazy, Unboxed demands in demand signature], but we know that 'a' is
+ evaluated and thus can be unboxed.
+
+* When 'finaliseArgBoxities' decides to unbox a record, it will zip the field demands
+ together with the respective 'StrictnessMark'. In case of 'x', it will pair
+ up the lazy field demand 'L!P(L)' on 'a' with 'MarkedStrict' to account for
+ the strict field.
+
+* Said 'StrictnessMark' is passed to the recursive invocation of 'go_args' in
+ 'finaliseArgBoxities' when deciding whether to unbox 'a'. 'a' was used lazily, but
+ since it also says 'MarkedStrict', we'll retain the 'Unboxed' boxity on 'a'.
+
+* Worker/wrapper will consult 'wantToUnboxArg' for its unboxing decision. It will
+ /not/ look at the strictness bits of the demand, only at Boxity flags. As such,
+ it will happily unbox 'a' despite the lazy demand on it.
+
+The net effect is that boxity analysis and the w/w transformation are more
+aggressive about unboxing the strict arguments of a data constructor than when
+looking at strictness info exclusively. It is very much like (Nested) CPR, which
+needs its nested fields to be evaluated in order for it to unbox nestedly.
+
+There is the usual danger of reboxing, which as usual we ignore. But
+if X is monomorphic, and has an UNPACK pragma, then this optimisation
+is even more important. We don't want the wrapper to rebox an unboxed
+argument, and pass an Int to $wfoo!
+
+This works in nested situations like T10482
+
+ data family Bar a
+ data instance Bar (a, b) = BarPair !(Bar a) !(Bar b)
+ newtype instance Bar Int = Bar Int
+
+ foo :: Bar ((Int, Int), Int) -> Int -> Int
+ foo f k = case f of BarPair x y ->
+ case burble of
+ True -> case x of
+ BarPair p q -> ...
+ False -> ...
+
+The extra eagerness lets us produce a worker of type:
+ $wfoo :: Int# -> Int# -> Int# -> Int -> Int
+ $wfoo p# q# y# = ...
+
+even though the `case x` is only lazily evaluated.
+
+--------- Historical note ------------
+We used to add data-con strictness demands when demand analysing case
+expression. However, it was noticed in #15696 that this misses some cases. For
+instance, consider the program (from T10482)
+
+ data family Bar a
+ data instance Bar (a, b) = BarPair !(Bar a) !(Bar b)
+ newtype instance Bar Int = Bar Int
+
+ foo :: Bar ((Int, Int), Int) -> Int -> Int
+ foo f k =
+ case f of
+ BarPair x y -> case burble of
+ True -> case x of
+ BarPair p q -> ...
+ False -> ...
+
+We really should be able to assume that `p` is already evaluated since it came
+from a strict field of BarPair. This strictness would allow us to produce a
+worker of type:
+
+ $wfoo :: Int# -> Int# -> Int# -> Int -> Int
+ $wfoo p# q# y# = ...
+
+even though the `case x` is only lazily evaluated
+
+Indeed before we fixed #15696 this would happen since we would float the inner
+`case x` through the `case burble` to get:
+
+ foo f k =
+ case f of
+ BarPair x y -> case x of
+ BarPair p q -> case burble of
+ True -> ...
+ False -> ...
+
+However, after fixing #15696 this could no longer happen (for the reasons
+discussed in ticket:15696#comment:76). This means that the demand placed on `f`
+would then be significantly weaker (since the False branch of the case on
+`burble` is not strict in `p` or `q`).
+
+Consequently, we now instead account for data-con strictness in mkWWstr_one,
+applying the strictness demands to the final result of DmdAnal. The result is
+that we get the strict demand signature we wanted even if we can't float
+the case on `x` up through the case on `burble`.
+
+Note [Do not unbox class dictionaries]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we have
+ f :: Ord a => [a] -> Int -> a
+ {-# INLINABLE f #-}
+and we worker/wrapper f, we'll get a worker with an INLINABLE pragma
+(see Note [Worker/wrapper for INLINABLE functions] in GHC.Core.Opt.WorkWrap),
+which can still be specialised by the type-class specialiser, something like
+ fw :: Ord a => [a] -> Int# -> a
+
+BUT if f is strict in the Ord dictionary, we might unpack it, to get
+ fw :: (a->a->Bool) -> [a] -> Int# -> a
+and the type-class specialiser can't specialise that. An example is #6056.
+
+But in any other situation, a dictionary is just an ordinary value,
+and can be unpacked. So we track the INLINABLE pragma, and discard the boxity
+flag in finaliseArgBoxities (see the isClassPred test).
+
+Historical note: #14955 describes how I got this fix wrong the first time.
+
+Note that the simplicity of this fix implies that INLINE functions (such as
+wrapper functions after the WW run) will never say that they unbox class
+dictionaries. That's not ideal, but not worth losing sleep over, as INLINE
+functions will have been inlined by the time we run demand analysis so we'll
+see the unboxing around the worker in client modules. I got aware of the issue
+in T5075 by the change in boxity of loop between demand analysis runs.
+
+Note [Worker argument budget]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In 'finaliseArgBoxities' we don't want to generate workers with zillions of
+argument when, say given a strict record with zillions of fields. So we
+limit the maximum number of worker args to the maximum of
+ - -fmax-worker-args=N
+ - The number of args in the original function; if it already has has
+ zillions of arguments we don't want to seek /fewer/ args in the worker.
+(Maybe we should /add/ them instead of maxing?)
+
+We pursue a "layered" strategy for unboxing: we unbox the top level of the
+argument(s), subject to budget; if there are any arguments left we unbox the
+next layer, using that depleted budget.
+
+To achieve this, we use the classic almost-circular programming technique in
+which we we write one pass that takes a lazy list of the Budgets for every
+layer.
+-}
+
+data Budgets = MkB Arity Budgets -- An infinite list of arity budgets
+
+incTopBudget :: Budgets -> Budgets
+incTopBudget (MkB n bg) = MkB (n+1) bg
+
+positiveTopBudget :: Budgets -> Bool
+positiveTopBudget (MkB n _) = n >= 0
+
+finaliseArgBoxities :: AnalEnv -> Id -> Arity -> CoreExpr
+ -> Maybe ([Demand], CoreExpr)
+finaliseArgBoxities env fn arity rhs
+ | arity > count isId bndrs -- Can't find enough binders
+ = Nothing -- This happens if we have f = g
+ -- Then there are no binders; we don't worker/wrapper; and we
+ -- simply want to give f the same demand signature as g
+
+ | otherwise
+ = Just (arg_dmds', add_demands arg_dmds' rhs)
+ -- add_demands: we must attach the final boxities to the lambda-binders
+ -- of the function, both because that's kosher, and because CPR analysis
+ -- uses the info on the binders directly.
+ where
+ opts = ae_opts env
+ fam_envs = ae_fam_envs env
+ is_inlinable_fn = isStableUnfolding (realIdUnfolding fn)
+ (bndrs, _body) = collectBinders rhs
+ max_wkr_args = dmd_max_worker_args opts `max` arity
+ -- See Note [Worker argument budget]
+
+ -- This is the key line, which uses almost-circular programming
+ -- The remaining budget from one layer becomes the initial
+ -- budget for the next layer down. See Note [Worker argument budget]
+ (remaining_budget, arg_dmds') = go_args (MkB max_wkr_args remaining_budget) arg_triples
+
+ arg_triples :: [(Type, StrictnessMark, Demand)]
+ arg_triples = take arity $
+ map mk_triple $
+ filter isRuntimeVar bndrs
+
+ mk_triple :: Id -> (Type,StrictnessMark,Demand)
+ mk_triple bndr | is_cls_arg ty = (ty, NotMarkedStrict, trimBoxity dmd)
+ | otherwise = (ty, NotMarkedStrict, dmd)
+ where
+ ty = idType bndr
+ dmd = idDemandInfo bndr
+
+ -- is_cls_arg: see Note [Do not unbox class dictionaries]
+ is_cls_arg arg_ty = is_inlinable_fn && isClassPred arg_ty
+
+ go_args :: Budgets -> [(Type,StrictnessMark,Demand)] -> (Budgets, [Demand])
+ go_args bg triples = mapAccumL go_arg bg triples
+
+ go_arg :: Budgets -> (Type,StrictnessMark,Demand) -> (Budgets, Demand)
+ go_arg bg@(MkB bg_top bg_inner) (ty, str_mark, dmd@(n :* _))
+ = case wantToUnboxArg fam_envs ty dmd of
+ DropAbsent -> (bg, dmd)
+ StopUnboxing -> (MkB (bg_top-1) bg_inner, trimBoxity dmd)
+
+ Unbox DataConPatContext{dcpc_dc=dc, dcpc_tc_args=tc_args} dmds
+ -> (MkB (bg_top-1) final_bg_inner, final_dmd)
+ where
+ dc_arity = dataConRepArity dc
+ arg_tys = dubiousDataConInstArgTys dc tc_args
+ (bg_inner', dmds') = go_args (incTopBudget bg_inner) $
+ zip3 arg_tys (dataConRepStrictness dc) dmds
+ dmd' = n :* (mkProd Unboxed $! dmds')
+ (final_bg_inner, final_dmd)
+ | dmds `lengthIs` dc_arity
+ , isStrict n || isMarkedStrict str_mark
+ -- isStrict: see Note [No lazy, Unboxed demands in demand signature]
+ -- isMarkedStrict: see Note [Unboxing evaluated arguments]
+ , positiveTopBudget bg_inner'
+ = (bg_inner', dmd')
+ | otherwise
+ = (bg_inner, trimBoxity dmd)
+
+ add_demands :: [Demand] -> CoreExpr -> CoreExpr
+ -- Attach the demands to the outer lambdas of this expression
+ add_demands [] e = e
+ add_demands (dmd:dmds) (Lam v e)
+ | isTyVar v = Lam v (add_demands (dmd:dmds) e)
+ | otherwise = Lam (v `setIdDemandInfo` dmd) (add_demands dmds e)
+ add_demands dmds e = pprPanic "add_demands" (ppr dmds $$ ppr e)
+
+finaliseLetBoxity
+ :: FamInstEnvs
+ -> Type -- ^ Type of the let-bound Id
+ -> Demand -- ^ How the Id is used
+ -> Demand
+-- See Note [Finalising boxity for let-bound Ids]
+-- This function is like finaliseArgBoxities, but much simpler because
+-- it has no "budget". It simply unboxes strict demands, and stops
+-- when it reaches a lazy one.
+finaliseLetBoxity env ty dmd
+ = go ty NotMarkedStrict dmd
+ where
+ go ty mark dmd@(n :* _) =
+ case wantToUnboxArg env ty dmd of
+ DropAbsent -> dmd
+ StopUnboxing -> trimBoxity dmd
+ Unbox DataConPatContext{dcpc_dc=dc, dcpc_tc_args=tc_args} dmds
+ | isStrict n || isMarkedStrict mark
+ , dmds `lengthIs` dataConRepArity dc
+ , let arg_tys = dubiousDataConInstArgTys dc tc_args
+ dmds' = strictZipWith3 go arg_tys (dataConRepStrictness dc) dmds
+ -> n :* (mkProd Unboxed $! dmds')
+ | otherwise
+ -> trimBoxity dmd
+
+
+{- *********************************************************************
+* *
Fixpoints
* *
********************************************************************* -}
@@ -1366,11 +1734,8 @@ annotateLamIdBndr env dmd_ty id
-- pprTrace "annLamBndr" (vcat [ppr id, ppr dmd_ty, ppr final_ty]) $
WithDmdType main_ty new_id
where
- -- See Note [Finalising boxity for demand signature] in "GHC.Core.Opt.WorkWrap.Utils"
- -- and Note [Do not unbox class dictionaries]
- dmd' = finaliseBoxity (ae_fam_envs env) (ae_inl_fun env) (idType id) dmd
- new_id = setIdDemandInfo id dmd'
- main_ty = addDemand dmd' dmd_ty'
+ new_id = setIdDemandInfo id dmd
+ main_ty = addDemand dmd dmd_ty'
WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty id
{- Note [NOINLINE and strictness]
@@ -1455,9 +1820,6 @@ data AnalEnv = AE
, ae_virgin :: !Bool -- ^ True on first iteration only
-- See Note [Initialising strictness]
, ae_fam_envs :: !FamInstEnvs
- , ae_inl_fun :: !InsideInlineableFun
- -- ^ Whether we analyse the body of an inlineable fun.
- -- See Note [Do not unbox class dictionaries].
}
-- We use the se_env to tell us whether to
@@ -1481,7 +1843,6 @@ emptyAnalEnv opts fam_envs
, ae_sigs = emptySigEnv
, ae_virgin = True
, ae_fam_envs = fam_envs
- , ae_inl_fun = NotInsideInlineableFun
}
emptySigEnv :: SigEnv
@@ -1509,13 +1870,6 @@ lookupSigEnv env id = lookupVarEnv (ae_sigs env) id
nonVirgin :: AnalEnv -> AnalEnv
nonVirgin env = env { ae_virgin = False }
--- | Sets 'ae_inl_fun' according to whether the given 'Id' has an inlineable
--- unfolding. See Note [Do not unbox class dictionaries].
-adjustInlFun :: Id -> AnalEnv -> AnalEnv
-adjustInlFun id env
- | isStableUnfolding (realIdUnfolding id) = env { ae_inl_fun = InsideInlineableFun }
- | otherwise = env { ae_inl_fun = NotInsideInlineableFun }
-
findBndrsDmds :: AnalEnv -> DmdType -> [Var] -> WithDmdType [Demand]
-- Return the demands on the Ids in the [Var]
findBndrsDmds env dmd_ty bndrs