summaryrefslogtreecommitdiff
path: root/compiler/specialise
diff options
context:
space:
mode:
authorKavon Farvardin <kavon@farvard.in>2018-09-23 15:29:37 -0500
committerKavon Farvardin <kavon@farvard.in>2018-09-23 15:29:37 -0500
commit84c2ad99582391005b5e873198b15e9e9eb4f78d (patch)
treecaa8c2f2ec7e97fbb4977263c6817c9af5025cf4 /compiler/specialise
parent8ddb47cfcf5776e9a3c55fd37947c8a95e00fa12 (diff)
parente68b439fe5de61b9a2ca51af472185c62ccb8b46 (diff)
downloadhaskell-wip/T13904.tar.gz
update to current master againwip/T13904
Diffstat (limited to 'compiler/specialise')
-rw-r--r--compiler/specialise/Rules.hs196
-rw-r--r--compiler/specialise/SpecConstr.hs166
-rw-r--r--compiler/specialise/Specialise.hs19
3 files changed, 252 insertions, 129 deletions
diff --git a/compiler/specialise/Rules.hs b/compiler/specialise/Rules.hs
index b5606754e6..ad6a0757cb 100644
--- a/compiler/specialise/Rules.hs
+++ b/compiler/specialise/Rules.hs
@@ -28,6 +28,8 @@ module Rules (
#include "HsVersions.h"
+import GhcPrelude
+
import CoreSyn -- All of it
import Module ( Module, ModuleSet, elemModuleSet )
import CoreSubst
@@ -38,7 +40,7 @@ import CoreUtils ( exprType, eqExpr, mkTick, mkTicks,
stripTicksTopT, stripTicksTopE,
isJoinBind )
import PprCore ( pprRules )
-import Type ( Type, substTy, mkTCvSubst )
+import Type ( Type, Kind, substTy, mkTCvSubst )
import TcType ( tcSplitTyConApp_maybe )
import TysWiredIn ( anyTypeOfKind )
import Coercion
@@ -53,7 +55,7 @@ import NameSet
import NameEnv
import UniqFM
import Unify ( ruleMatchTyKiX )
-import BasicTypes ( Activation, CompilerPhase, isActive, pprRuleName )
+import BasicTypes
import DynFlags ( DynFlags )
import Outputable
import FastString
@@ -288,9 +290,10 @@ addRuleInfo (RuleInfo rs1 fvs1) (RuleInfo rs2 fvs2)
= RuleInfo (rs1 ++ rs2) (fvs1 `unionDVarSet` fvs2)
addIdSpecialisations :: Id -> [CoreRule] -> Id
-addIdSpecialisations id []
- = id
addIdSpecialisations id rules
+ | null rules
+ = id
+ | otherwise
= setIdSpecialisation id $
extendRuleInfo (idSpecialisation id) rules
@@ -310,9 +313,8 @@ ruleIsVisible _ BuiltinRule{} = True
ruleIsVisible vis_orphs Rule { ru_orphan = orph, ru_origin = origin }
= notOrphan orph || origin `elemModuleSet` vis_orphs
-{-
-Note [Where rules are found]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+{- Note [Where rules are found]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The rules for an Id come from two places:
(a) the ones it is born with, stored inside the Id iself (idCoreRules fn),
(b) rules added in other modules, stored in the global RuleBase (imp_rules)
@@ -348,7 +350,7 @@ mkRuleBase rules = extendRuleBaseList emptyRuleBase rules
extendRuleBaseList :: RuleBase -> [CoreRule] -> RuleBase
extendRuleBaseList rule_base new_guys
- = foldl extendRuleBase rule_base new_guys
+ = foldl' extendRuleBase rule_base new_guys
unionRuleBase :: RuleBase -> RuleBase -> RuleBase
unionRuleBase rb1 rb2 = plusNameEnv_C (++) rb1 rb2
@@ -411,21 +413,20 @@ lookupRule dflags in_scope is_active fn args rules
findBest :: (Id, [CoreExpr])
-> (CoreRule,CoreExpr) -> [(CoreRule,CoreExpr)] -> (CoreRule,CoreExpr)
-- All these pairs matched the expression
--- Return the pair the the most specific rule
+-- Return the pair the most specific rule
-- The (fn,args) is just for overlap reporting
findBest _ (rule,ans) [] = (rule,ans)
findBest target (rule1,ans1) ((rule2,ans2):prs)
| rule1 `isMoreSpecific` rule2 = findBest target (rule1,ans1) prs
| rule2 `isMoreSpecific` rule1 = findBest target (rule2,ans2) prs
- | debugIsOn = let pp_rule rule = sdocWithPprDebug $ \dbg -> if dbg
- then ppr rule
- else doubleQuotes (ftext (ruleName rule))
+ | debugIsOn = let pp_rule rule
+ = ifPprDebug (ppr rule)
+ (doubleQuotes (ftext (ruleName rule)))
in pprTrace "Rules.findBest: rule overlap (Rule 1 wins)"
- (vcat [ sdocWithPprDebug $ \dbg -> if dbg
- then text "Expression to match:" <+> ppr fn
- <+> sep (map ppr args)
- else empty
+ (vcat [ whenPprDebug $
+ text "Expression to match:" <+> ppr fn
+ <+> sep (map ppr args)
, text "Rule 1:" <+> pp_rule rule1
, text "Rule 2:" <+> pp_rule rule2]) $
findBest target (rule1,ans1) prs
@@ -517,7 +518,7 @@ matchRule _ in_scope is_active _ args rough_args
| ruleCantMatch tpl_tops rough_args = Nothing
| otherwise
= case matchN in_scope rule_name tpl_vars tpl_args args of
- Nothing -> Nothing
+ Nothing -> Nothing
Just (bind_wrapper, tpl_vals) -> Just (bind_wrapper $
rule_fn `mkApps` tpl_vals)
where
@@ -535,58 +536,82 @@ matchN :: InScopeEnv
matchN (in_scope, id_unf) rule_name tmpl_vars tmpl_es target_es
= do { subst <- go init_menv emptyRuleSubst tmpl_es target_es
- ; let (_, matched_es) = mapAccumL lookup_tmpl subst tmpl_vars
+ ; let (_, matched_es) = mapAccumL lookup_tmpl subst $
+ tmpl_vars `zip` tmpl_vars1
; return (rs_binds subst, matched_es) }
where
- init_rn_env = mkRnEnv2 (extendInScopeSetList in_scope tmpl_vars)
- -- See Note [Template binders]
+ (init_rn_env, tmpl_vars1) = mapAccumL rnBndrL (mkRnEnv2 in_scope) tmpl_vars
+ -- See Note [Cloning the template binders]
- init_menv = RV { rv_tmpls = mkVarSet tmpl_vars, rv_lcl = init_rn_env
- , rv_fltR = mkEmptySubst (rnInScopeSet init_rn_env)
- , rv_unf = id_unf }
+ init_menv = RV { rv_tmpls = mkVarSet tmpl_vars1
+ , rv_lcl = init_rn_env
+ , rv_fltR = mkEmptySubst (rnInScopeSet init_rn_env)
+ , rv_unf = id_unf }
go _ subst [] _ = Just subst
go _ _ _ [] = Nothing -- Fail if too few actual args
go menv subst (t:ts) (e:es) = do { subst1 <- match menv subst t e
; go menv subst1 ts es }
- lookup_tmpl :: RuleSubst -> Var -> (RuleSubst, CoreExpr)
- lookup_tmpl rs@(RS { rs_tv_subst = tv_subst, rs_id_subst = id_subst }) tmpl_var
- | isId tmpl_var
- = case lookupVarEnv id_subst tmpl_var of
+ lookup_tmpl :: RuleSubst -> (InVar,OutVar) -> (RuleSubst, CoreExpr)
+ -- Need to return a RuleSubst solely for the benefit of mk_fake_ty
+ lookup_tmpl rs@(RS { rs_tv_subst = tv_subst, rs_id_subst = id_subst })
+ (tmpl_var, tmpl_var1)
+ | isId tmpl_var1
+ = case lookupVarEnv id_subst tmpl_var1 of
Just e -> (rs, e)
- Nothing | Just refl_co <- isReflCoVar_maybe tmpl_var
- , let co_expr = Coercion refl_co
- -> (rs { rs_id_subst = extendVarEnv id_subst tmpl_var co_expr }, co_expr)
+ Nothing | Just refl_co <- isReflCoVar_maybe tmpl_var1
+ , let co_expr = Coercion refl_co
+ id_subst' = extendVarEnv id_subst tmpl_var1 co_expr
+ rs' = rs { rs_id_subst = id_subst' }
+ -> (rs', co_expr) -- See Note [Unbound RULE binders]
| otherwise
-> unbound tmpl_var
| otherwise
- = case lookupVarEnv tv_subst tmpl_var of
+ = case lookupVarEnv tv_subst tmpl_var1 of
Just ty -> (rs, Type ty)
- Nothing -> (rs { rs_tv_subst = extendVarEnv tv_subst tmpl_var fake_ty }, Type fake_ty)
- -- See Note [Unbound RULE binders]
+ Nothing -> (rs', Type fake_ty) -- See Note [Unbound RULE binders]
where
- fake_ty = anyTypeOfKind kind
- cv_subst = to_co_env id_subst
- kind = Type.substTy (mkTCvSubst in_scope (tv_subst, cv_subst))
- (tyVarKind tmpl_var)
-
- to_co_env env = nonDetFoldUFM_Directly to_co emptyVarEnv env
- -- It's OK to use nonDetFoldUFM_Directly because we forget the
- -- order immediately by creating a new env
- to_co uniq expr env
- | Just co <- exprToCoercion_maybe expr
- = extendVarEnv_Directly env uniq co
-
- | otherwise
- = env
-
- unbound var = pprPanic "Template variable unbound in rewrite rule" $
- vcat [ text "Variable:" <+> ppr var
- , text "Rule" <+> pprRuleName rule_name
- , text "Rule bndrs:" <+> ppr tmpl_vars
- , text "LHS args:" <+> ppr tmpl_es
- , text "Actual args:" <+> ppr target_es ]
+ rs' = rs { rs_tv_subst = extendVarEnv tv_subst tmpl_var1 fake_ty }
+ fake_ty = mk_fake_ty in_scope rs tmpl_var1
+ -- This call is the sole reason we accumulate
+ -- RuleSubst in lookup_tmpl
+
+ unbound tmpl_var
+ = pprPanic "Template variable unbound in rewrite rule" $
+ vcat [ text "Variable:" <+> ppr tmpl_var <+> dcolon <+> ppr (varType tmpl_var)
+ , text "Rule" <+> pprRuleName rule_name
+ , text "Rule bndrs:" <+> ppr tmpl_vars
+ , text "LHS args:" <+> ppr tmpl_es
+ , text "Actual args:" <+> ppr target_es ]
+
+
+mk_fake_ty :: InScopeSet -> RuleSubst -> TyVar -> Kind
+-- Roughly:
+-- mk_fake_ty subst tv = Any @(subst (tyVarKind tv))
+-- That is: apply the substitution to the kind of the given tyvar,
+-- and make an 'any' type of that kind.
+-- Tiresomely, the RuleSubst is not well adapted to substTy, leading to
+-- horrible impedence matching.
+--
+-- Happily, this function is seldom called
+mk_fake_ty in_scope (RS { rs_tv_subst = tv_subst, rs_id_subst = id_subst }) tmpl_var1
+ = anyTypeOfKind kind
+ where
+ kind = Type.substTy (mkTCvSubst in_scope (tv_subst, cv_subst))
+ (tyVarKind tmpl_var1)
+
+ cv_subst = to_co_env id_subst
+
+ to_co_env :: IdSubstEnv -> CvSubstEnv
+ to_co_env env = nonDetFoldUFM_Directly to_co emptyVarEnv env
+ -- It's OK to use nonDetFoldUFM_Directly because we forget the
+ -- order immediately by creating a new env
+
+ to_co uniq expr env
+ = case exprToCoercion_maybe expr of
+ Just co -> extendVarEnv_Directly env uniq co
+ Nothing -> env
{- Note [Unbound RULE binders]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -631,8 +656,8 @@ bound on the LHS:
in Trac #13410, and also in test T10602.
-Note [Template binders]
-~~~~~~~~~~~~~~~~~~~~~~~
+Note [Cloning the template binders]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider the following match (example 1):
Template: forall x. f x
Target: f (x+1)
@@ -643,21 +668,19 @@ Likewise this one (example 2):
Template: forall x. f (\x.x)
Target: f (\y.y)
-We achieve this simply by:
- * Adding forall'd template binders to the in-scope set
-
-This works even if the template binder are already in scope
-(in the target) because
+We achieve this simply by using rnBndrL to clone the template
+binders if they are already in scope.
- * The RuleSubst rs_tv_subst, rs_id_subst maps LHS template vars to
- the target world. It is not applied recursively.
-
- * Having the template vars in the in-scope set ensures that in
- example 2 above, the (\x.x) is cloned to (\x'. x').
-
-In the past we used rnBndrL to clone the template variables if
-they were already in scope. But (a) that's not necessary and (b)
-it complicate the fancy footwork for Note [Unbound template type variables]
+------ Historical note -------
+At one point I tried simply adding the template binders to the
+in-scope set /without/ cloning them, but that failed in a horribly
+obscure way in Trac #14777. Problem was that during matching we look
+up target-term variables in the in-scope set (see Note [Lookup
+in-scope]). If a target-term variable happens to name-clash with a
+template variable, that lookup will find the template variable, which
+is /utterly/ bogus. In Trac #14777, this transformed a term variable
+into a type variable, and then crashed when we wanted its idInfo.
+------ End of historical note -------
************************************************************************
@@ -673,11 +696,12 @@ it complicate the fancy footwork for Note [Unbound template type variables]
-- from nested matches; see the Let case of match, below
--
data RuleMatchEnv
- = RV { rv_tmpls :: VarSet -- Template variables
- , rv_lcl :: RnEnv2 -- Renamings for *local bindings*
+ = RV { rv_lcl :: RnEnv2 -- Renamings for *local bindings*
-- (lambda/case)
+ , rv_tmpls :: VarSet -- Template variables
+ -- (after applying envL of rv_lcl)
, rv_fltR :: Subst -- Renamings for floated let-bindings
- -- domain disjoint from envR of rv_lcl
+ -- (domain disjoint from envR of rv_lcl)
-- See Note [Matching lets]
, rv_unf :: IdUnfoldingFun
}
@@ -707,7 +731,6 @@ emptyRuleSubst = RS { rs_tv_subst = emptyVarEnv, rs_id_subst = emptyVarEnv
-- For a start, in general eta expansion wastes work.
-- SLPJ July 99
-
match :: RuleMatchEnv
-> RuleSubst
-> CoreExpr -- Template
@@ -738,7 +761,8 @@ match _ _ e@Tick{} _
-- succeed in matching what looks like the template variable 'a' against 3.
-- The Var case follows closely what happens in Unify.match
-match renv subst (Var v1) e2 = match_var renv subst v1 e2
+match renv subst (Var v1) e2
+ = match_var renv subst v1 e2
match renv subst e1 (Var v2) -- Note [Expanding variables]
| not (inRnEnvR rn_env v2) -- Note [Do not expand locally-bound variables]
@@ -883,7 +907,7 @@ match_alts renv subst ((c1,vs1,r1):alts1) ((c2,vs2,r2):alts2)
= do { subst1 <- match renv' subst r1 r2
; match_alts renv subst1 alts1 alts2 }
where
- renv' = foldl mb renv (vs1 `zip` vs2)
+ renv' = foldl' mb renv (vs1 `zip` vs2)
mb renv (v1,v2) = rnMatchBndr2 renv subst v1 v2
match_alts _ _ _ _
@@ -1110,19 +1134,19 @@ SpecConstr sees this fragment:
Data.Maybe.Nothing -> lvl_smf;
Data.Maybe.Just n_acT [Just S(L)] ->
case n_acT of wild1_ams [Just A] { GHC.Base.I# y_amr [Just L] ->
- \$wfoo_smW (GHC.Prim.-# ds_Xmb y_amr) wild_Xf
+ $wfoo_smW (GHC.Prim.-# ds_Xmb y_amr) wild_Xf
}};
and correctly generates the rule
RULES: "SC:$wfoo1" [0] __forall {y_amr [Just L] :: GHC.Prim.Int#
sc_snn :: GHC.Prim.Int#}
- \$wfoo_smW sc_snn (Data.Maybe.Just @ GHC.Base.Int (GHC.Base.I# y_amr))
- = \$s\$wfoo_sno y_amr sc_snn ;]
+ $wfoo_smW sc_snn (Data.Maybe.Just @ GHC.Base.Int (GHC.Base.I# y_amr))
+ = $s$wfoo_sno y_amr sc_snn ;]
BUT we must ensure that this rule matches in the original function!
-Note that the call to \$wfoo is
- \$wfoo_smW (GHC.Prim.-# ds_Xmb y_amr) wild_Xf
+Note that the call to $wfoo is
+ $wfoo_smW (GHC.Prim.-# ds_Xmb y_amr) wild_Xf
During matching we expand wild_Xf to (Just n_acT). But then we must also
expand n_acT to (I# y_amr). And we can only do that if we look up n_acT
@@ -1147,10 +1171,10 @@ is so important.
-- string for the purposes of error reporting
ruleCheckProgram :: CompilerPhase -- ^ Rule activation test
-> String -- ^ Rule pattern
- -> RuleEnv -- ^ Database of rules
+ -> (Id -> [CoreRule]) -- ^ Rules for an Id
-> CoreProgram -- ^ Bindings to check in
-> SDoc -- ^ Resulting check message
-ruleCheckProgram phase rule_pat rule_base binds
+ruleCheckProgram phase rule_pat rules binds
| isEmptyBag results
= text "Rule check results: no rule application sites"
| otherwise
@@ -1163,7 +1187,7 @@ ruleCheckProgram phase rule_pat rule_base binds
, rc_id_unf = idUnfolding -- Not quite right
-- Should use activeUnfolding
, rc_pattern = rule_pat
- , rc_rule_base = rule_base }
+ , rc_rules = rules }
results = unionManyBags (map (ruleCheckBind env) binds)
line = text (replicate 20 '-')
@@ -1171,7 +1195,7 @@ data RuleCheckEnv = RuleCheckEnv {
rc_is_active :: Activation -> Bool,
rc_id_unf :: IdUnfoldingFun,
rc_pattern :: String,
- rc_rule_base :: RuleEnv
+ rc_rules :: Id -> [CoreRule]
}
ruleCheckBind :: RuleCheckEnv -> CoreBind -> Bag SDoc
@@ -1205,7 +1229,7 @@ ruleCheckFun env fn args
| null name_match_rules = emptyBag
| otherwise = unitBag (ruleAppCheck_help env fn args name_match_rules)
where
- name_match_rules = filter match (getRules (rc_rule_base env) fn)
+ name_match_rules = filter match (rc_rules env fn)
match rule = (rc_pattern env) `isPrefixOf` unpackFS (ruleName rule)
ruleAppCheck_help :: RuleCheckEnv -> Id -> [CoreExpr] -> [CoreRule] -> SDoc
diff --git a/compiler/specialise/SpecConstr.hs b/compiler/specialise/SpecConstr.hs
index e5af0b8a3c..f6d27ccba5 100644
--- a/compiler/specialise/SpecConstr.hs
+++ b/compiler/specialise/SpecConstr.hs
@@ -19,6 +19,8 @@ module SpecConstr(
#include "HsVersions.h"
+import GhcPrelude
+
import CoreSyn
import CoreSubst
import CoreUtils
@@ -36,7 +38,6 @@ import TyCon ( tyConName )
import Id
import PprCore ( pprParendExpr )
import MkCore ( mkImpossibleExpr )
-import Var
import VarEnv
import VarSet
import Name
@@ -57,9 +58,6 @@ import Control.Monad ( zipWithM )
import Data.List
import PrelNames ( specTyConName )
import Module
-
--- See Note [Forcing specialisation]
-
import TyCon ( TyCon )
import GHC.Exts( SpecConstrAnnotation(..) )
import Data.Ord( comparing )
@@ -502,31 +500,46 @@ This is all quite ugly; we ought to come up with a better design.
ForceSpecConstr arguments are spotted in scExpr' and scTopBinds which then set
sc_force to True when calling specLoop. This flag does four things:
+
* Ignore specConstrThreshold, to specialise functions of arbitrary size
(see scTopBind)
* Ignore specConstrCount, to make arbitrary numbers of specialisations
(see specialise)
* Specialise even for arguments that are not scrutinised in the loop
- (see argToPat; Trac #4488)
+ (see argToPat; Trac #4448)
* Only specialise on recursive types a finite number of times
(see is_too_recursive; Trac #5550; Note [Limit recursive specialisation])
-This flag is inherited for nested non-recursive bindings (which are likely to
-be join points and hence should be fully specialised) but reset for nested
-recursive bindings.
-
-What alternatives did I consider? Annotating the loop itself doesn't
-work because (a) it is local and (b) it will be w/w'ed and having
-w/w propagating annotations somehow doesn't seem like a good idea. The
-types of the loop arguments really seem to be the most persistent
-thing.
-
-Annotating the types that make up the loop state doesn't work,
-either, because (a) it would prevent us from using types like Either
-or tuples here, (b) we don't want to restrict the set of types that
-can be used in Stream states and (c) some types are fixed by the user
-(e.g., the accumulator here) but we still want to specialise as much
-as possible.
+The flag holds only for specialising a single binding group, and NOT
+for nested bindings. (So really it should be passed around explicitly
+and not stored in ScEnv.) Trac #14379 turned out to be caused by
+ f SPEC x = let g1 x = ...
+ in ...
+We force-specialise f (because of the SPEC), but that generates a specialised
+copy of g1 (as well as the original). Alas g1 has a nested binding g2; and
+in each copy of g1 we get an unspecialised and specialised copy of g2; and so
+on. Result, exponential. So the force-spec flag now only applies to one
+level of bindings at a time.
+
+Mechanism for this one-level-only thing:
+
+ - Switch it on at the call to specRec, in scExpr and scTopBinds
+ - Switch it off when doing the RHSs;
+ this can be done very conveniently in decreaseSpecCount
+
+What alternatives did I consider?
+
+* Annotating the loop itself doesn't work because (a) it is local and
+ (b) it will be w/w'ed and having w/w propagating annotations somehow
+ doesn't seem like a good idea. The types of the loop arguments
+ really seem to be the most persistent thing.
+
+* Annotating the types that make up the loop state doesn't work,
+ either, because (a) it would prevent us from using types like Either
+ or tuples here, (b) we don't want to restrict the set of types that
+ can be used in Stream states and (c) some types are fixed by the
+ user (e.g., the accumulator here) but we still want to specialise as
+ much as possible.
Alternatives to ForceSpecConstr
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -586,7 +599,7 @@ more than N times (controlled by -fspec-constr-recursive=N) we check
specialisations. If sc_count is "no limit" then we arbitrarily
choose 10 as the limit (ugh).
-See Trac #5550. Also Trac #13623, where this test had become over-agressive,
+See Trac #5550. Also Trac #13623, where this test had become over-aggressive,
and we lost a wonderful specialisation that we really wanted!
Note [NoSpecConstr]
@@ -597,7 +610,7 @@ to mean "don't specialise on arguments of this type". It was added
before we had ForceSpecConstr. Lacking ForceSpecConstr we specialised
regardless of size; and then we needed a way to turn that *off*. Now
that we have ForceSpecConstr, this NoSpecConstr is probably redundant.
-(Used only for PArray.)
+(Used only for PArray, TODO: remove?)
-----------------------------------------------------
Stuff not yet handled
@@ -975,7 +988,8 @@ extendCaseBndrs env scrut case_bndr con alt_bndrs
decreaseSpecCount :: ScEnv -> Int -> ScEnv
-- See Note [Avoiding exponential blowup]
decreaseSpecCount env n_specs
- = env { sc_count = case sc_count env of
+ = env { sc_force = False -- See Note [Forcing specialisation]
+ , sc_count = case sc_count env of
Nothing -> Nothing
Just n -> Just (n `div` (n_specs + 1)) }
-- The "+1" takes account of the original function;
@@ -1545,7 +1559,11 @@ specRec top_lvl env body_usg rhs_infos
return (usg_so_far, spec_infos)
| otherwise
- = do { specs_w_usg <- zipWithM (specialise env seed_calls) rhs_infos spec_infos
+ = -- pprTrace "specRec3" (vcat [ text "bndrs" <+> ppr (map ri_fn rhs_infos)
+ -- , text "iteration" <+> int n_iter
+ -- , text "spec_infos" <+> ppr (map (map os_pat . si_specs) spec_infos)
+ -- ]) $
+ do { specs_w_usg <- zipWithM (specialise env seed_calls) rhs_infos spec_infos
; let (extra_usg_s, new_spec_infos) = unzip specs_w_usg
extra_usg = combineUsages extra_usg_s
all_usg = usg_so_far `combineUsage` extra_usg
@@ -1792,7 +1810,7 @@ that specialisations didn't fire inside wrappers; see test
simplCore/should_compile/spec-inline.
So now I just use the inline-activation of the parent Id, as the
-activation for the specialiation RULE, just like the main specialiser;
+activation for the specialisation RULE, just like the main specialiser;
This in turn means there is no point in specialising NOINLINE things,
so we test for that.
@@ -1881,6 +1899,69 @@ by trim_pats.
* Otherwise we sort the patterns to choose the most general
ones first; more general => more widely applicable.
+
+Note [SpecConstr and casts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider (Trac #14270) a call like
+
+ let f = e
+ in ... f (K @(a |> co)) ...
+
+where 'co' is a coercion variable not in scope at f's definition site.
+If we aren't caereful we'll get
+
+ let $sf a co = e (K @(a |> co))
+ RULE "SC:f" forall a co. f (K @(a |> co)) = $sf a co
+ f = e
+ in ...
+
+But alas, when we match the call we won't bind 'co', because type-matching
+(for good reasons) discards casts).
+
+I don't know how to solve this, so for now I'm just discarding any
+call patterns that
+ * Mentions a coercion variable in a type argument
+ * That is not in scope at the binding of the function
+
+I think this is very rare.
+
+It is important (e.g. Trac #14936) that this /only/ applies to
+coercions mentioned in casts. We don't want to be discombobulated
+by casts in terms! For example, consider
+ f ((e1,e2) |> sym co)
+where, say,
+ f :: Foo -> blah
+ co :: Foo ~R (Int,Int)
+
+Here we definitely do want to specialise for that pair! We do not
+match on the structre of the coercion; instead we just match on a
+coercion variable, so the RULE looks like
+
+ forall (x::Int, y::Int, co :: (Int,Int) ~R Foo)
+ f ((x,y) |> co) = $sf x y co
+
+Often the body of f looks like
+ f arg = ...(case arg |> co' of
+ (x,y) -> blah)...
+
+so that the specialised f will turn into
+ $sf x y co = let arg = (x,y) |> co
+ in ...(case arg>| co' of
+ (x,y) -> blah)....
+
+which will simplify to not use 'co' at all. But we can't guarantee
+that co will end up unused, so we still pass it. Absence analysis
+may remove it later.
+
+Note that this /also/ discards the call pattern if we have a cast in a
+/term/, although in fact Rules.match does make a very flaky and
+fragile attempt to match coercions. e.g. a call like
+ f (Maybe Age) (Nothing |> co) blah
+ where co :: Maybe Int ~ Maybe Age
+will be discarded. It's extremely fragile to match on the form of a
+coercion, so I think it's better just not to try. A more complicated
+alternative would be to discard calls that mention coercion variables
+only in kind-casts, but I'm doing the simple thing for now.
-}
type CallPat = ([Var], [CoreExpr]) -- Quantified variables and arguments
@@ -1918,7 +1999,8 @@ callsToNewPats env fn spec_info@(SI { si_specs = done_specs }) bndr_occs calls
-- Discard specialisations if there are too many of them
trimmed_pats = trim_pats env fn spec_info small_pats
--- ; pprTrace "callsToPats" (vcat [ text "calls:" <+> ppr calls
+-- ; pprTrace "callsToPats" (vcat [ text "calls to" <+> ppr fn <> colon <+> ppr calls
+-- , text "done_specs:" <+> ppr (map os_pat done_specs)
-- , text "good_pats:" <+> ppr good_pats ]) $
-- return ()
@@ -1931,7 +2013,8 @@ trim_pats env fn (SI { si_n_specs = done_spec_count }) pats
| sc_force env
|| isNothing mb_scc
|| n_remaining >= n_pats
- = pats -- No need to trim
+ = -- pprTrace "trim_pats: no-trim" (ppr (sc_force env) $$ ppr mb_scc $$ ppr n_remaining $$ ppr n_pats)
+ pats -- No need to trim
| otherwise
= emit_trace $ -- Need to trim, so keep the best ones
@@ -1975,6 +2058,8 @@ trim_pats env fn (SI { si_n_specs = done_spec_count }) pats
speakNOf spec_count' (text "call pattern") <> comma <+>
text "but the limit is" <+> int max_specs) ]
, text "Use -fspec-constr-count=n to set the bound"
+ , text "done_spec_count =" <+> int done_spec_count
+ , text "Keeping " <+> int n_remaining <> text ", out of" <+> int n_pats
, text "Discarding:" <+> ppr (drop n_remaining sorted_pats) ]
@@ -1983,21 +2068,23 @@ callToPats :: ScEnv -> [ArgOcc] -> Call -> UniqSM (Maybe CallPat)
-- Type variables come first, since they may scope
-- over the following term variables
-- The [CoreExpr] are the argument patterns for the rule
-callToPats env bndr_occs (Call _ args con_env)
+callToPats env bndr_occs call@(Call _ args con_env)
| args `ltLength` bndr_occs -- Check saturated
= return Nothing
| otherwise
- = do { let in_scope = substInScope (sc_subst env)
+ = do { let in_scope = substInScope (sc_subst env)
; (interesting, pats) <- argsToPats env in_scope con_env args bndr_occs
- ; let pat_fvs = exprsFreeVarsList pats
+ ; let pat_fvs = exprsFreeVarsList pats
-- To get determinism we need the list of free variables in
-- deterministic order. Otherwise we end up creating
-- lambdas with different argument orders. See
-- determinism/simplCore/should_compile/spec-inline-determ.hs
-- for an example. For explanation of determinism
-- considerations See Note [Unique Determinism] in Unique.
+
in_scope_vars = getInScopeVars in_scope
- qvars = filterOut (`elemVarSet` in_scope_vars) pat_fvs
+ is_in_scope v = v `elemVarSet` in_scope_vars
+ qvars = filterOut is_in_scope pat_fvs
-- Quantify over variables that are not in scope
-- at the call site
-- See Note [Free type variables of the qvar types]
@@ -2012,8 +2099,21 @@ callToPats env bndr_occs (Call _ args con_env)
sanitise id = id `setIdType` expandTypeSynonyms (idType id)
-- See Note [Free type variables of the qvar types]
+ -- Bad coercion variables: see Note [SpecConstr and casts]
+ bad_covars :: CoVarSet
+ bad_covars = mapUnionVarSet get_bad_covars pats
+ get_bad_covars :: CoreArg -> CoVarSet
+ get_bad_covars (Type ty)
+ = filterVarSet (\v -> isId v && not (is_in_scope v)) $
+ tyCoVarsOfType ty
+ get_bad_covars _
+ = emptyVarSet
+
; -- pprTrace "callToPats" (ppr args $$ ppr bndr_occs) $
- if interesting
+ WARN( not (isEmptyVarSet bad_covars)
+ , text "SpecConstr: bad covars:" <+> ppr bad_covars
+ $$ ppr call )
+ if interesting && isEmptyVarSet bad_covars
then return (Just (qvars', pats))
else return Nothing }
diff --git a/compiler/specialise/Specialise.hs b/compiler/specialise/Specialise.hs
index 869da640ea..6f775dfdcb 100644
--- a/compiler/specialise/Specialise.hs
+++ b/compiler/specialise/Specialise.hs
@@ -9,6 +9,8 @@ module Specialise ( specProgram, specUnfolding ) where
#include "HsVersions.h"
+import GhcPrelude
+
import Id
import TcType hiding( substTy )
import Type hiding( substTy, extendTvSubstList )
@@ -43,9 +45,7 @@ import State
import UniqDFM
import Control.Monad
-#if __GLASGOW_HASKELL__ > 710
import qualified Control.Monad.Fail as MonadFail
-#endif
{-
************************************************************************
@@ -147,7 +147,7 @@ becomes
in
fl
-We still have recusion for non-overloaded functions which we
+We still have recursion for non-overloaded functions which we
specialise, but the recursive call should get specialised to the
same recursive version.
@@ -735,7 +735,7 @@ specImport dflags this_mod top_env done callers rb fn calls_for_fn
= do { warnMsg (vcat [ hang (text "Could not specialise imported function" <+> quotes (ppr fn))
2 (vcat [ text "when specialising" <+> quotes (ppr caller)
| caller <- callers])
- , ifPprDebug (text "calls:" <+> vcat (map (pprCallInfo fn) calls_for_fn))
+ , whenPprDebug (text "calls:" <+> vcat (map (pprCallInfo fn) calls_for_fn))
, text "Probable fix: add INLINABLE pragma on" <+> quotes (ppr fn) ])
; return ([], []) }
@@ -1343,10 +1343,10 @@ specCalls mb_mod env existing_rules calls_for_me fn rhs
-- See Note [Specialising imported functions] in OccurAnal
| InlinePragma { inl_inline = Inlinable } <- inl_prag
- = (inl_prag { inl_inline = EmptyInlineSpec }, noUnfolding)
+ = (inl_prag { inl_inline = NoUserInline }, noUnfolding)
| otherwise
- = (inl_prag, specUnfolding poly_tyvars spec_app
+ = (inl_prag, specUnfolding dflags poly_tyvars spec_app
arity_decrease fn_unf)
arity_decrease = length spec_dict_args
@@ -2011,6 +2011,7 @@ mkCallUDs' env f args
EqPred {} -> True
IrredPred {} -> True -- Things like (D []) where D is a
-- Constraint-ranged family; Trac #7785
+ ForAllPred {} -> True
{-
Note [Type determines value]
@@ -2095,7 +2096,7 @@ mkDB bind = (bind, bind_fvs bind)
-- | Identify the free variables of a 'CoreBind'
bind_fvs :: CoreBind -> VarSet
bind_fvs (NonRec bndr rhs) = pair_fvs (bndr,rhs)
-bind_fvs (Rec prs) = foldl delVarSet rhs_fvs bndrs
+bind_fvs (Rec prs) = foldl' delVarSet rhs_fvs bndrs
where
bndrs = map fst prs
rhs_fvs = unionVarSets (map pair_fvs prs)
@@ -2287,12 +2288,10 @@ instance Monad SpecM where
case f y of
SpecM z ->
z
- fail str = SpecM $ fail str
+ fail = MonadFail.fail
-#if __GLASGOW_HASKELL__ > 710
instance MonadFail.MonadFail SpecM where
fail str = SpecM $ fail str
-#endif
instance MonadUnique SpecM where
getUniqueSupplyM