diff options
author | Kavon Farvardin <kavon@farvard.in> | 2018-09-23 15:29:37 -0500 |
---|---|---|
committer | Kavon Farvardin <kavon@farvard.in> | 2018-09-23 15:29:37 -0500 |
commit | 84c2ad99582391005b5e873198b15e9e9eb4f78d (patch) | |
tree | caa8c2f2ec7e97fbb4977263c6817c9af5025cf4 /compiler/specialise | |
parent | 8ddb47cfcf5776e9a3c55fd37947c8a95e00fa12 (diff) | |
parent | e68b439fe5de61b9a2ca51af472185c62ccb8b46 (diff) | |
download | haskell-wip/T13904.tar.gz |
update to current master againwip/T13904
Diffstat (limited to 'compiler/specialise')
-rw-r--r-- | compiler/specialise/Rules.hs | 196 | ||||
-rw-r--r-- | compiler/specialise/SpecConstr.hs | 166 | ||||
-rw-r--r-- | compiler/specialise/Specialise.hs | 19 |
3 files changed, 252 insertions, 129 deletions
diff --git a/compiler/specialise/Rules.hs b/compiler/specialise/Rules.hs index b5606754e6..ad6a0757cb 100644 --- a/compiler/specialise/Rules.hs +++ b/compiler/specialise/Rules.hs @@ -28,6 +28,8 @@ module Rules ( #include "HsVersions.h" +import GhcPrelude + import CoreSyn -- All of it import Module ( Module, ModuleSet, elemModuleSet ) import CoreSubst @@ -38,7 +40,7 @@ import CoreUtils ( exprType, eqExpr, mkTick, mkTicks, stripTicksTopT, stripTicksTopE, isJoinBind ) import PprCore ( pprRules ) -import Type ( Type, substTy, mkTCvSubst ) +import Type ( Type, Kind, substTy, mkTCvSubst ) import TcType ( tcSplitTyConApp_maybe ) import TysWiredIn ( anyTypeOfKind ) import Coercion @@ -53,7 +55,7 @@ import NameSet import NameEnv import UniqFM import Unify ( ruleMatchTyKiX ) -import BasicTypes ( Activation, CompilerPhase, isActive, pprRuleName ) +import BasicTypes import DynFlags ( DynFlags ) import Outputable import FastString @@ -288,9 +290,10 @@ addRuleInfo (RuleInfo rs1 fvs1) (RuleInfo rs2 fvs2) = RuleInfo (rs1 ++ rs2) (fvs1 `unionDVarSet` fvs2) addIdSpecialisations :: Id -> [CoreRule] -> Id -addIdSpecialisations id [] - = id addIdSpecialisations id rules + | null rules + = id + | otherwise = setIdSpecialisation id $ extendRuleInfo (idSpecialisation id) rules @@ -310,9 +313,8 @@ ruleIsVisible _ BuiltinRule{} = True ruleIsVisible vis_orphs Rule { ru_orphan = orph, ru_origin = origin } = notOrphan orph || origin `elemModuleSet` vis_orphs -{- -Note [Where rules are found] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +{- Note [Where rules are found] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The rules for an Id come from two places: (a) the ones it is born with, stored inside the Id iself (idCoreRules fn), (b) rules added in other modules, stored in the global RuleBase (imp_rules) @@ -348,7 +350,7 @@ mkRuleBase rules = extendRuleBaseList emptyRuleBase rules extendRuleBaseList :: RuleBase -> [CoreRule] -> RuleBase extendRuleBaseList rule_base new_guys - = foldl extendRuleBase rule_base new_guys + = foldl' extendRuleBase rule_base new_guys unionRuleBase :: RuleBase -> RuleBase -> RuleBase unionRuleBase rb1 rb2 = plusNameEnv_C (++) rb1 rb2 @@ -411,21 +413,20 @@ lookupRule dflags in_scope is_active fn args rules findBest :: (Id, [CoreExpr]) -> (CoreRule,CoreExpr) -> [(CoreRule,CoreExpr)] -> (CoreRule,CoreExpr) -- All these pairs matched the expression --- Return the pair the the most specific rule +-- Return the pair the most specific rule -- The (fn,args) is just for overlap reporting findBest _ (rule,ans) [] = (rule,ans) findBest target (rule1,ans1) ((rule2,ans2):prs) | rule1 `isMoreSpecific` rule2 = findBest target (rule1,ans1) prs | rule2 `isMoreSpecific` rule1 = findBest target (rule2,ans2) prs - | debugIsOn = let pp_rule rule = sdocWithPprDebug $ \dbg -> if dbg - then ppr rule - else doubleQuotes (ftext (ruleName rule)) + | debugIsOn = let pp_rule rule + = ifPprDebug (ppr rule) + (doubleQuotes (ftext (ruleName rule))) in pprTrace "Rules.findBest: rule overlap (Rule 1 wins)" - (vcat [ sdocWithPprDebug $ \dbg -> if dbg - then text "Expression to match:" <+> ppr fn - <+> sep (map ppr args) - else empty + (vcat [ whenPprDebug $ + text "Expression to match:" <+> ppr fn + <+> sep (map ppr args) , text "Rule 1:" <+> pp_rule rule1 , text "Rule 2:" <+> pp_rule rule2]) $ findBest target (rule1,ans1) prs @@ -517,7 +518,7 @@ matchRule _ in_scope is_active _ args rough_args | ruleCantMatch tpl_tops rough_args = Nothing | otherwise = case matchN in_scope rule_name tpl_vars tpl_args args of - Nothing -> Nothing + Nothing -> Nothing Just (bind_wrapper, tpl_vals) -> Just (bind_wrapper $ rule_fn `mkApps` tpl_vals) where @@ -535,58 +536,82 @@ matchN :: InScopeEnv matchN (in_scope, id_unf) rule_name tmpl_vars tmpl_es target_es = do { subst <- go init_menv emptyRuleSubst tmpl_es target_es - ; let (_, matched_es) = mapAccumL lookup_tmpl subst tmpl_vars + ; let (_, matched_es) = mapAccumL lookup_tmpl subst $ + tmpl_vars `zip` tmpl_vars1 ; return (rs_binds subst, matched_es) } where - init_rn_env = mkRnEnv2 (extendInScopeSetList in_scope tmpl_vars) - -- See Note [Template binders] + (init_rn_env, tmpl_vars1) = mapAccumL rnBndrL (mkRnEnv2 in_scope) tmpl_vars + -- See Note [Cloning the template binders] - init_menv = RV { rv_tmpls = mkVarSet tmpl_vars, rv_lcl = init_rn_env - , rv_fltR = mkEmptySubst (rnInScopeSet init_rn_env) - , rv_unf = id_unf } + init_menv = RV { rv_tmpls = mkVarSet tmpl_vars1 + , rv_lcl = init_rn_env + , rv_fltR = mkEmptySubst (rnInScopeSet init_rn_env) + , rv_unf = id_unf } go _ subst [] _ = Just subst go _ _ _ [] = Nothing -- Fail if too few actual args go menv subst (t:ts) (e:es) = do { subst1 <- match menv subst t e ; go menv subst1 ts es } - lookup_tmpl :: RuleSubst -> Var -> (RuleSubst, CoreExpr) - lookup_tmpl rs@(RS { rs_tv_subst = tv_subst, rs_id_subst = id_subst }) tmpl_var - | isId tmpl_var - = case lookupVarEnv id_subst tmpl_var of + lookup_tmpl :: RuleSubst -> (InVar,OutVar) -> (RuleSubst, CoreExpr) + -- Need to return a RuleSubst solely for the benefit of mk_fake_ty + lookup_tmpl rs@(RS { rs_tv_subst = tv_subst, rs_id_subst = id_subst }) + (tmpl_var, tmpl_var1) + | isId tmpl_var1 + = case lookupVarEnv id_subst tmpl_var1 of Just e -> (rs, e) - Nothing | Just refl_co <- isReflCoVar_maybe tmpl_var - , let co_expr = Coercion refl_co - -> (rs { rs_id_subst = extendVarEnv id_subst tmpl_var co_expr }, co_expr) + Nothing | Just refl_co <- isReflCoVar_maybe tmpl_var1 + , let co_expr = Coercion refl_co + id_subst' = extendVarEnv id_subst tmpl_var1 co_expr + rs' = rs { rs_id_subst = id_subst' } + -> (rs', co_expr) -- See Note [Unbound RULE binders] | otherwise -> unbound tmpl_var | otherwise - = case lookupVarEnv tv_subst tmpl_var of + = case lookupVarEnv tv_subst tmpl_var1 of Just ty -> (rs, Type ty) - Nothing -> (rs { rs_tv_subst = extendVarEnv tv_subst tmpl_var fake_ty }, Type fake_ty) - -- See Note [Unbound RULE binders] + Nothing -> (rs', Type fake_ty) -- See Note [Unbound RULE binders] where - fake_ty = anyTypeOfKind kind - cv_subst = to_co_env id_subst - kind = Type.substTy (mkTCvSubst in_scope (tv_subst, cv_subst)) - (tyVarKind tmpl_var) - - to_co_env env = nonDetFoldUFM_Directly to_co emptyVarEnv env - -- It's OK to use nonDetFoldUFM_Directly because we forget the - -- order immediately by creating a new env - to_co uniq expr env - | Just co <- exprToCoercion_maybe expr - = extendVarEnv_Directly env uniq co - - | otherwise - = env - - unbound var = pprPanic "Template variable unbound in rewrite rule" $ - vcat [ text "Variable:" <+> ppr var - , text "Rule" <+> pprRuleName rule_name - , text "Rule bndrs:" <+> ppr tmpl_vars - , text "LHS args:" <+> ppr tmpl_es - , text "Actual args:" <+> ppr target_es ] + rs' = rs { rs_tv_subst = extendVarEnv tv_subst tmpl_var1 fake_ty } + fake_ty = mk_fake_ty in_scope rs tmpl_var1 + -- This call is the sole reason we accumulate + -- RuleSubst in lookup_tmpl + + unbound tmpl_var + = pprPanic "Template variable unbound in rewrite rule" $ + vcat [ text "Variable:" <+> ppr tmpl_var <+> dcolon <+> ppr (varType tmpl_var) + , text "Rule" <+> pprRuleName rule_name + , text "Rule bndrs:" <+> ppr tmpl_vars + , text "LHS args:" <+> ppr tmpl_es + , text "Actual args:" <+> ppr target_es ] + + +mk_fake_ty :: InScopeSet -> RuleSubst -> TyVar -> Kind +-- Roughly: +-- mk_fake_ty subst tv = Any @(subst (tyVarKind tv)) +-- That is: apply the substitution to the kind of the given tyvar, +-- and make an 'any' type of that kind. +-- Tiresomely, the RuleSubst is not well adapted to substTy, leading to +-- horrible impedence matching. +-- +-- Happily, this function is seldom called +mk_fake_ty in_scope (RS { rs_tv_subst = tv_subst, rs_id_subst = id_subst }) tmpl_var1 + = anyTypeOfKind kind + where + kind = Type.substTy (mkTCvSubst in_scope (tv_subst, cv_subst)) + (tyVarKind tmpl_var1) + + cv_subst = to_co_env id_subst + + to_co_env :: IdSubstEnv -> CvSubstEnv + to_co_env env = nonDetFoldUFM_Directly to_co emptyVarEnv env + -- It's OK to use nonDetFoldUFM_Directly because we forget the + -- order immediately by creating a new env + + to_co uniq expr env + = case exprToCoercion_maybe expr of + Just co -> extendVarEnv_Directly env uniq co + Nothing -> env {- Note [Unbound RULE binders] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -631,8 +656,8 @@ bound on the LHS: in Trac #13410, and also in test T10602. -Note [Template binders] -~~~~~~~~~~~~~~~~~~~~~~~ +Note [Cloning the template binders] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Consider the following match (example 1): Template: forall x. f x Target: f (x+1) @@ -643,21 +668,19 @@ Likewise this one (example 2): Template: forall x. f (\x.x) Target: f (\y.y) -We achieve this simply by: - * Adding forall'd template binders to the in-scope set - -This works even if the template binder are already in scope -(in the target) because +We achieve this simply by using rnBndrL to clone the template +binders if they are already in scope. - * The RuleSubst rs_tv_subst, rs_id_subst maps LHS template vars to - the target world. It is not applied recursively. - - * Having the template vars in the in-scope set ensures that in - example 2 above, the (\x.x) is cloned to (\x'. x'). - -In the past we used rnBndrL to clone the template variables if -they were already in scope. But (a) that's not necessary and (b) -it complicate the fancy footwork for Note [Unbound template type variables] +------ Historical note ------- +At one point I tried simply adding the template binders to the +in-scope set /without/ cloning them, but that failed in a horribly +obscure way in Trac #14777. Problem was that during matching we look +up target-term variables in the in-scope set (see Note [Lookup +in-scope]). If a target-term variable happens to name-clash with a +template variable, that lookup will find the template variable, which +is /utterly/ bogus. In Trac #14777, this transformed a term variable +into a type variable, and then crashed when we wanted its idInfo. +------ End of historical note ------- ************************************************************************ @@ -673,11 +696,12 @@ it complicate the fancy footwork for Note [Unbound template type variables] -- from nested matches; see the Let case of match, below -- data RuleMatchEnv - = RV { rv_tmpls :: VarSet -- Template variables - , rv_lcl :: RnEnv2 -- Renamings for *local bindings* + = RV { rv_lcl :: RnEnv2 -- Renamings for *local bindings* -- (lambda/case) + , rv_tmpls :: VarSet -- Template variables + -- (after applying envL of rv_lcl) , rv_fltR :: Subst -- Renamings for floated let-bindings - -- domain disjoint from envR of rv_lcl + -- (domain disjoint from envR of rv_lcl) -- See Note [Matching lets] , rv_unf :: IdUnfoldingFun } @@ -707,7 +731,6 @@ emptyRuleSubst = RS { rs_tv_subst = emptyVarEnv, rs_id_subst = emptyVarEnv -- For a start, in general eta expansion wastes work. -- SLPJ July 99 - match :: RuleMatchEnv -> RuleSubst -> CoreExpr -- Template @@ -738,7 +761,8 @@ match _ _ e@Tick{} _ -- succeed in matching what looks like the template variable 'a' against 3. -- The Var case follows closely what happens in Unify.match -match renv subst (Var v1) e2 = match_var renv subst v1 e2 +match renv subst (Var v1) e2 + = match_var renv subst v1 e2 match renv subst e1 (Var v2) -- Note [Expanding variables] | not (inRnEnvR rn_env v2) -- Note [Do not expand locally-bound variables] @@ -883,7 +907,7 @@ match_alts renv subst ((c1,vs1,r1):alts1) ((c2,vs2,r2):alts2) = do { subst1 <- match renv' subst r1 r2 ; match_alts renv subst1 alts1 alts2 } where - renv' = foldl mb renv (vs1 `zip` vs2) + renv' = foldl' mb renv (vs1 `zip` vs2) mb renv (v1,v2) = rnMatchBndr2 renv subst v1 v2 match_alts _ _ _ _ @@ -1110,19 +1134,19 @@ SpecConstr sees this fragment: Data.Maybe.Nothing -> lvl_smf; Data.Maybe.Just n_acT [Just S(L)] -> case n_acT of wild1_ams [Just A] { GHC.Base.I# y_amr [Just L] -> - \$wfoo_smW (GHC.Prim.-# ds_Xmb y_amr) wild_Xf + $wfoo_smW (GHC.Prim.-# ds_Xmb y_amr) wild_Xf }}; and correctly generates the rule RULES: "SC:$wfoo1" [0] __forall {y_amr [Just L] :: GHC.Prim.Int# sc_snn :: GHC.Prim.Int#} - \$wfoo_smW sc_snn (Data.Maybe.Just @ GHC.Base.Int (GHC.Base.I# y_amr)) - = \$s\$wfoo_sno y_amr sc_snn ;] + $wfoo_smW sc_snn (Data.Maybe.Just @ GHC.Base.Int (GHC.Base.I# y_amr)) + = $s$wfoo_sno y_amr sc_snn ;] BUT we must ensure that this rule matches in the original function! -Note that the call to \$wfoo is - \$wfoo_smW (GHC.Prim.-# ds_Xmb y_amr) wild_Xf +Note that the call to $wfoo is + $wfoo_smW (GHC.Prim.-# ds_Xmb y_amr) wild_Xf During matching we expand wild_Xf to (Just n_acT). But then we must also expand n_acT to (I# y_amr). And we can only do that if we look up n_acT @@ -1147,10 +1171,10 @@ is so important. -- string for the purposes of error reporting ruleCheckProgram :: CompilerPhase -- ^ Rule activation test -> String -- ^ Rule pattern - -> RuleEnv -- ^ Database of rules + -> (Id -> [CoreRule]) -- ^ Rules for an Id -> CoreProgram -- ^ Bindings to check in -> SDoc -- ^ Resulting check message -ruleCheckProgram phase rule_pat rule_base binds +ruleCheckProgram phase rule_pat rules binds | isEmptyBag results = text "Rule check results: no rule application sites" | otherwise @@ -1163,7 +1187,7 @@ ruleCheckProgram phase rule_pat rule_base binds , rc_id_unf = idUnfolding -- Not quite right -- Should use activeUnfolding , rc_pattern = rule_pat - , rc_rule_base = rule_base } + , rc_rules = rules } results = unionManyBags (map (ruleCheckBind env) binds) line = text (replicate 20 '-') @@ -1171,7 +1195,7 @@ data RuleCheckEnv = RuleCheckEnv { rc_is_active :: Activation -> Bool, rc_id_unf :: IdUnfoldingFun, rc_pattern :: String, - rc_rule_base :: RuleEnv + rc_rules :: Id -> [CoreRule] } ruleCheckBind :: RuleCheckEnv -> CoreBind -> Bag SDoc @@ -1205,7 +1229,7 @@ ruleCheckFun env fn args | null name_match_rules = emptyBag | otherwise = unitBag (ruleAppCheck_help env fn args name_match_rules) where - name_match_rules = filter match (getRules (rc_rule_base env) fn) + name_match_rules = filter match (rc_rules env fn) match rule = (rc_pattern env) `isPrefixOf` unpackFS (ruleName rule) ruleAppCheck_help :: RuleCheckEnv -> Id -> [CoreExpr] -> [CoreRule] -> SDoc diff --git a/compiler/specialise/SpecConstr.hs b/compiler/specialise/SpecConstr.hs index e5af0b8a3c..f6d27ccba5 100644 --- a/compiler/specialise/SpecConstr.hs +++ b/compiler/specialise/SpecConstr.hs @@ -19,6 +19,8 @@ module SpecConstr( #include "HsVersions.h" +import GhcPrelude + import CoreSyn import CoreSubst import CoreUtils @@ -36,7 +38,6 @@ import TyCon ( tyConName ) import Id import PprCore ( pprParendExpr ) import MkCore ( mkImpossibleExpr ) -import Var import VarEnv import VarSet import Name @@ -57,9 +58,6 @@ import Control.Monad ( zipWithM ) import Data.List import PrelNames ( specTyConName ) import Module - --- See Note [Forcing specialisation] - import TyCon ( TyCon ) import GHC.Exts( SpecConstrAnnotation(..) ) import Data.Ord( comparing ) @@ -502,31 +500,46 @@ This is all quite ugly; we ought to come up with a better design. ForceSpecConstr arguments are spotted in scExpr' and scTopBinds which then set sc_force to True when calling specLoop. This flag does four things: + * Ignore specConstrThreshold, to specialise functions of arbitrary size (see scTopBind) * Ignore specConstrCount, to make arbitrary numbers of specialisations (see specialise) * Specialise even for arguments that are not scrutinised in the loop - (see argToPat; Trac #4488) + (see argToPat; Trac #4448) * Only specialise on recursive types a finite number of times (see is_too_recursive; Trac #5550; Note [Limit recursive specialisation]) -This flag is inherited for nested non-recursive bindings (which are likely to -be join points and hence should be fully specialised) but reset for nested -recursive bindings. - -What alternatives did I consider? Annotating the loop itself doesn't -work because (a) it is local and (b) it will be w/w'ed and having -w/w propagating annotations somehow doesn't seem like a good idea. The -types of the loop arguments really seem to be the most persistent -thing. - -Annotating the types that make up the loop state doesn't work, -either, because (a) it would prevent us from using types like Either -or tuples here, (b) we don't want to restrict the set of types that -can be used in Stream states and (c) some types are fixed by the user -(e.g., the accumulator here) but we still want to specialise as much -as possible. +The flag holds only for specialising a single binding group, and NOT +for nested bindings. (So really it should be passed around explicitly +and not stored in ScEnv.) Trac #14379 turned out to be caused by + f SPEC x = let g1 x = ... + in ... +We force-specialise f (because of the SPEC), but that generates a specialised +copy of g1 (as well as the original). Alas g1 has a nested binding g2; and +in each copy of g1 we get an unspecialised and specialised copy of g2; and so +on. Result, exponential. So the force-spec flag now only applies to one +level of bindings at a time. + +Mechanism for this one-level-only thing: + + - Switch it on at the call to specRec, in scExpr and scTopBinds + - Switch it off when doing the RHSs; + this can be done very conveniently in decreaseSpecCount + +What alternatives did I consider? + +* Annotating the loop itself doesn't work because (a) it is local and + (b) it will be w/w'ed and having w/w propagating annotations somehow + doesn't seem like a good idea. The types of the loop arguments + really seem to be the most persistent thing. + +* Annotating the types that make up the loop state doesn't work, + either, because (a) it would prevent us from using types like Either + or tuples here, (b) we don't want to restrict the set of types that + can be used in Stream states and (c) some types are fixed by the + user (e.g., the accumulator here) but we still want to specialise as + much as possible. Alternatives to ForceSpecConstr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -586,7 +599,7 @@ more than N times (controlled by -fspec-constr-recursive=N) we check specialisations. If sc_count is "no limit" then we arbitrarily choose 10 as the limit (ugh). -See Trac #5550. Also Trac #13623, where this test had become over-agressive, +See Trac #5550. Also Trac #13623, where this test had become over-aggressive, and we lost a wonderful specialisation that we really wanted! Note [NoSpecConstr] @@ -597,7 +610,7 @@ to mean "don't specialise on arguments of this type". It was added before we had ForceSpecConstr. Lacking ForceSpecConstr we specialised regardless of size; and then we needed a way to turn that *off*. Now that we have ForceSpecConstr, this NoSpecConstr is probably redundant. -(Used only for PArray.) +(Used only for PArray, TODO: remove?) ----------------------------------------------------- Stuff not yet handled @@ -975,7 +988,8 @@ extendCaseBndrs env scrut case_bndr con alt_bndrs decreaseSpecCount :: ScEnv -> Int -> ScEnv -- See Note [Avoiding exponential blowup] decreaseSpecCount env n_specs - = env { sc_count = case sc_count env of + = env { sc_force = False -- See Note [Forcing specialisation] + , sc_count = case sc_count env of Nothing -> Nothing Just n -> Just (n `div` (n_specs + 1)) } -- The "+1" takes account of the original function; @@ -1545,7 +1559,11 @@ specRec top_lvl env body_usg rhs_infos return (usg_so_far, spec_infos) | otherwise - = do { specs_w_usg <- zipWithM (specialise env seed_calls) rhs_infos spec_infos + = -- pprTrace "specRec3" (vcat [ text "bndrs" <+> ppr (map ri_fn rhs_infos) + -- , text "iteration" <+> int n_iter + -- , text "spec_infos" <+> ppr (map (map os_pat . si_specs) spec_infos) + -- ]) $ + do { specs_w_usg <- zipWithM (specialise env seed_calls) rhs_infos spec_infos ; let (extra_usg_s, new_spec_infos) = unzip specs_w_usg extra_usg = combineUsages extra_usg_s all_usg = usg_so_far `combineUsage` extra_usg @@ -1792,7 +1810,7 @@ that specialisations didn't fire inside wrappers; see test simplCore/should_compile/spec-inline. So now I just use the inline-activation of the parent Id, as the -activation for the specialiation RULE, just like the main specialiser; +activation for the specialisation RULE, just like the main specialiser; This in turn means there is no point in specialising NOINLINE things, so we test for that. @@ -1881,6 +1899,69 @@ by trim_pats. * Otherwise we sort the patterns to choose the most general ones first; more general => more widely applicable. + +Note [SpecConstr and casts] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider (Trac #14270) a call like + + let f = e + in ... f (K @(a |> co)) ... + +where 'co' is a coercion variable not in scope at f's definition site. +If we aren't caereful we'll get + + let $sf a co = e (K @(a |> co)) + RULE "SC:f" forall a co. f (K @(a |> co)) = $sf a co + f = e + in ... + +But alas, when we match the call we won't bind 'co', because type-matching +(for good reasons) discards casts). + +I don't know how to solve this, so for now I'm just discarding any +call patterns that + * Mentions a coercion variable in a type argument + * That is not in scope at the binding of the function + +I think this is very rare. + +It is important (e.g. Trac #14936) that this /only/ applies to +coercions mentioned in casts. We don't want to be discombobulated +by casts in terms! For example, consider + f ((e1,e2) |> sym co) +where, say, + f :: Foo -> blah + co :: Foo ~R (Int,Int) + +Here we definitely do want to specialise for that pair! We do not +match on the structre of the coercion; instead we just match on a +coercion variable, so the RULE looks like + + forall (x::Int, y::Int, co :: (Int,Int) ~R Foo) + f ((x,y) |> co) = $sf x y co + +Often the body of f looks like + f arg = ...(case arg |> co' of + (x,y) -> blah)... + +so that the specialised f will turn into + $sf x y co = let arg = (x,y) |> co + in ...(case arg>| co' of + (x,y) -> blah).... + +which will simplify to not use 'co' at all. But we can't guarantee +that co will end up unused, so we still pass it. Absence analysis +may remove it later. + +Note that this /also/ discards the call pattern if we have a cast in a +/term/, although in fact Rules.match does make a very flaky and +fragile attempt to match coercions. e.g. a call like + f (Maybe Age) (Nothing |> co) blah + where co :: Maybe Int ~ Maybe Age +will be discarded. It's extremely fragile to match on the form of a +coercion, so I think it's better just not to try. A more complicated +alternative would be to discard calls that mention coercion variables +only in kind-casts, but I'm doing the simple thing for now. -} type CallPat = ([Var], [CoreExpr]) -- Quantified variables and arguments @@ -1918,7 +1999,8 @@ callsToNewPats env fn spec_info@(SI { si_specs = done_specs }) bndr_occs calls -- Discard specialisations if there are too many of them trimmed_pats = trim_pats env fn spec_info small_pats --- ; pprTrace "callsToPats" (vcat [ text "calls:" <+> ppr calls +-- ; pprTrace "callsToPats" (vcat [ text "calls to" <+> ppr fn <> colon <+> ppr calls +-- , text "done_specs:" <+> ppr (map os_pat done_specs) -- , text "good_pats:" <+> ppr good_pats ]) $ -- return () @@ -1931,7 +2013,8 @@ trim_pats env fn (SI { si_n_specs = done_spec_count }) pats | sc_force env || isNothing mb_scc || n_remaining >= n_pats - = pats -- No need to trim + = -- pprTrace "trim_pats: no-trim" (ppr (sc_force env) $$ ppr mb_scc $$ ppr n_remaining $$ ppr n_pats) + pats -- No need to trim | otherwise = emit_trace $ -- Need to trim, so keep the best ones @@ -1975,6 +2058,8 @@ trim_pats env fn (SI { si_n_specs = done_spec_count }) pats speakNOf spec_count' (text "call pattern") <> comma <+> text "but the limit is" <+> int max_specs) ] , text "Use -fspec-constr-count=n to set the bound" + , text "done_spec_count =" <+> int done_spec_count + , text "Keeping " <+> int n_remaining <> text ", out of" <+> int n_pats , text "Discarding:" <+> ppr (drop n_remaining sorted_pats) ] @@ -1983,21 +2068,23 @@ callToPats :: ScEnv -> [ArgOcc] -> Call -> UniqSM (Maybe CallPat) -- Type variables come first, since they may scope -- over the following term variables -- The [CoreExpr] are the argument patterns for the rule -callToPats env bndr_occs (Call _ args con_env) +callToPats env bndr_occs call@(Call _ args con_env) | args `ltLength` bndr_occs -- Check saturated = return Nothing | otherwise - = do { let in_scope = substInScope (sc_subst env) + = do { let in_scope = substInScope (sc_subst env) ; (interesting, pats) <- argsToPats env in_scope con_env args bndr_occs - ; let pat_fvs = exprsFreeVarsList pats + ; let pat_fvs = exprsFreeVarsList pats -- To get determinism we need the list of free variables in -- deterministic order. Otherwise we end up creating -- lambdas with different argument orders. See -- determinism/simplCore/should_compile/spec-inline-determ.hs -- for an example. For explanation of determinism -- considerations See Note [Unique Determinism] in Unique. + in_scope_vars = getInScopeVars in_scope - qvars = filterOut (`elemVarSet` in_scope_vars) pat_fvs + is_in_scope v = v `elemVarSet` in_scope_vars + qvars = filterOut is_in_scope pat_fvs -- Quantify over variables that are not in scope -- at the call site -- See Note [Free type variables of the qvar types] @@ -2012,8 +2099,21 @@ callToPats env bndr_occs (Call _ args con_env) sanitise id = id `setIdType` expandTypeSynonyms (idType id) -- See Note [Free type variables of the qvar types] + -- Bad coercion variables: see Note [SpecConstr and casts] + bad_covars :: CoVarSet + bad_covars = mapUnionVarSet get_bad_covars pats + get_bad_covars :: CoreArg -> CoVarSet + get_bad_covars (Type ty) + = filterVarSet (\v -> isId v && not (is_in_scope v)) $ + tyCoVarsOfType ty + get_bad_covars _ + = emptyVarSet + ; -- pprTrace "callToPats" (ppr args $$ ppr bndr_occs) $ - if interesting + WARN( not (isEmptyVarSet bad_covars) + , text "SpecConstr: bad covars:" <+> ppr bad_covars + $$ ppr call ) + if interesting && isEmptyVarSet bad_covars then return (Just (qvars', pats)) else return Nothing } diff --git a/compiler/specialise/Specialise.hs b/compiler/specialise/Specialise.hs index 869da640ea..6f775dfdcb 100644 --- a/compiler/specialise/Specialise.hs +++ b/compiler/specialise/Specialise.hs @@ -9,6 +9,8 @@ module Specialise ( specProgram, specUnfolding ) where #include "HsVersions.h" +import GhcPrelude + import Id import TcType hiding( substTy ) import Type hiding( substTy, extendTvSubstList ) @@ -43,9 +45,7 @@ import State import UniqDFM import Control.Monad -#if __GLASGOW_HASKELL__ > 710 import qualified Control.Monad.Fail as MonadFail -#endif {- ************************************************************************ @@ -147,7 +147,7 @@ becomes in fl -We still have recusion for non-overloaded functions which we +We still have recursion for non-overloaded functions which we specialise, but the recursive call should get specialised to the same recursive version. @@ -735,7 +735,7 @@ specImport dflags this_mod top_env done callers rb fn calls_for_fn = do { warnMsg (vcat [ hang (text "Could not specialise imported function" <+> quotes (ppr fn)) 2 (vcat [ text "when specialising" <+> quotes (ppr caller) | caller <- callers]) - , ifPprDebug (text "calls:" <+> vcat (map (pprCallInfo fn) calls_for_fn)) + , whenPprDebug (text "calls:" <+> vcat (map (pprCallInfo fn) calls_for_fn)) , text "Probable fix: add INLINABLE pragma on" <+> quotes (ppr fn) ]) ; return ([], []) } @@ -1343,10 +1343,10 @@ specCalls mb_mod env existing_rules calls_for_me fn rhs -- See Note [Specialising imported functions] in OccurAnal | InlinePragma { inl_inline = Inlinable } <- inl_prag - = (inl_prag { inl_inline = EmptyInlineSpec }, noUnfolding) + = (inl_prag { inl_inline = NoUserInline }, noUnfolding) | otherwise - = (inl_prag, specUnfolding poly_tyvars spec_app + = (inl_prag, specUnfolding dflags poly_tyvars spec_app arity_decrease fn_unf) arity_decrease = length spec_dict_args @@ -2011,6 +2011,7 @@ mkCallUDs' env f args EqPred {} -> True IrredPred {} -> True -- Things like (D []) where D is a -- Constraint-ranged family; Trac #7785 + ForAllPred {} -> True {- Note [Type determines value] @@ -2095,7 +2096,7 @@ mkDB bind = (bind, bind_fvs bind) -- | Identify the free variables of a 'CoreBind' bind_fvs :: CoreBind -> VarSet bind_fvs (NonRec bndr rhs) = pair_fvs (bndr,rhs) -bind_fvs (Rec prs) = foldl delVarSet rhs_fvs bndrs +bind_fvs (Rec prs) = foldl' delVarSet rhs_fvs bndrs where bndrs = map fst prs rhs_fvs = unionVarSets (map pair_fvs prs) @@ -2287,12 +2288,10 @@ instance Monad SpecM where case f y of SpecM z -> z - fail str = SpecM $ fail str + fail = MonadFail.fail -#if __GLASGOW_HASKELL__ > 710 instance MonadFail.MonadFail SpecM where fail str = SpecM $ fail str -#endif instance MonadUnique SpecM where getUniqueSupplyM |