diff options
Diffstat (limited to 'compiler/specialise/SpecConstr.hs')
-rw-r--r-- | compiler/specialise/SpecConstr.hs | 166 |
1 files changed, 133 insertions, 33 deletions
diff --git a/compiler/specialise/SpecConstr.hs b/compiler/specialise/SpecConstr.hs index e5af0b8a3c..f6d27ccba5 100644 --- a/compiler/specialise/SpecConstr.hs +++ b/compiler/specialise/SpecConstr.hs @@ -19,6 +19,8 @@ module SpecConstr( #include "HsVersions.h" +import GhcPrelude + import CoreSyn import CoreSubst import CoreUtils @@ -36,7 +38,6 @@ import TyCon ( tyConName ) import Id import PprCore ( pprParendExpr ) import MkCore ( mkImpossibleExpr ) -import Var import VarEnv import VarSet import Name @@ -57,9 +58,6 @@ import Control.Monad ( zipWithM ) import Data.List import PrelNames ( specTyConName ) import Module - --- See Note [Forcing specialisation] - import TyCon ( TyCon ) import GHC.Exts( SpecConstrAnnotation(..) ) import Data.Ord( comparing ) @@ -502,31 +500,46 @@ This is all quite ugly; we ought to come up with a better design. ForceSpecConstr arguments are spotted in scExpr' and scTopBinds which then set sc_force to True when calling specLoop. This flag does four things: + * Ignore specConstrThreshold, to specialise functions of arbitrary size (see scTopBind) * Ignore specConstrCount, to make arbitrary numbers of specialisations (see specialise) * Specialise even for arguments that are not scrutinised in the loop - (see argToPat; Trac #4488) + (see argToPat; Trac #4448) * Only specialise on recursive types a finite number of times (see is_too_recursive; Trac #5550; Note [Limit recursive specialisation]) -This flag is inherited for nested non-recursive bindings (which are likely to -be join points and hence should be fully specialised) but reset for nested -recursive bindings. - -What alternatives did I consider? Annotating the loop itself doesn't -work because (a) it is local and (b) it will be w/w'ed and having -w/w propagating annotations somehow doesn't seem like a good idea. The -types of the loop arguments really seem to be the most persistent -thing. - -Annotating the types that make up the loop state doesn't work, -either, because (a) it would prevent us from using types like Either -or tuples here, (b) we don't want to restrict the set of types that -can be used in Stream states and (c) some types are fixed by the user -(e.g., the accumulator here) but we still want to specialise as much -as possible. +The flag holds only for specialising a single binding group, and NOT +for nested bindings. (So really it should be passed around explicitly +and not stored in ScEnv.) Trac #14379 turned out to be caused by + f SPEC x = let g1 x = ... + in ... +We force-specialise f (because of the SPEC), but that generates a specialised +copy of g1 (as well as the original). Alas g1 has a nested binding g2; and +in each copy of g1 we get an unspecialised and specialised copy of g2; and so +on. Result, exponential. So the force-spec flag now only applies to one +level of bindings at a time. + +Mechanism for this one-level-only thing: + + - Switch it on at the call to specRec, in scExpr and scTopBinds + - Switch it off when doing the RHSs; + this can be done very conveniently in decreaseSpecCount + +What alternatives did I consider? + +* Annotating the loop itself doesn't work because (a) it is local and + (b) it will be w/w'ed and having w/w propagating annotations somehow + doesn't seem like a good idea. The types of the loop arguments + really seem to be the most persistent thing. + +* Annotating the types that make up the loop state doesn't work, + either, because (a) it would prevent us from using types like Either + or tuples here, (b) we don't want to restrict the set of types that + can be used in Stream states and (c) some types are fixed by the + user (e.g., the accumulator here) but we still want to specialise as + much as possible. Alternatives to ForceSpecConstr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -586,7 +599,7 @@ more than N times (controlled by -fspec-constr-recursive=N) we check specialisations. If sc_count is "no limit" then we arbitrarily choose 10 as the limit (ugh). -See Trac #5550. Also Trac #13623, where this test had become over-agressive, +See Trac #5550. Also Trac #13623, where this test had become over-aggressive, and we lost a wonderful specialisation that we really wanted! Note [NoSpecConstr] @@ -597,7 +610,7 @@ to mean "don't specialise on arguments of this type". It was added before we had ForceSpecConstr. Lacking ForceSpecConstr we specialised regardless of size; and then we needed a way to turn that *off*. Now that we have ForceSpecConstr, this NoSpecConstr is probably redundant. -(Used only for PArray.) +(Used only for PArray, TODO: remove?) ----------------------------------------------------- Stuff not yet handled @@ -975,7 +988,8 @@ extendCaseBndrs env scrut case_bndr con alt_bndrs decreaseSpecCount :: ScEnv -> Int -> ScEnv -- See Note [Avoiding exponential blowup] decreaseSpecCount env n_specs - = env { sc_count = case sc_count env of + = env { sc_force = False -- See Note [Forcing specialisation] + , sc_count = case sc_count env of Nothing -> Nothing Just n -> Just (n `div` (n_specs + 1)) } -- The "+1" takes account of the original function; @@ -1545,7 +1559,11 @@ specRec top_lvl env body_usg rhs_infos return (usg_so_far, spec_infos) | otherwise - = do { specs_w_usg <- zipWithM (specialise env seed_calls) rhs_infos spec_infos + = -- pprTrace "specRec3" (vcat [ text "bndrs" <+> ppr (map ri_fn rhs_infos) + -- , text "iteration" <+> int n_iter + -- , text "spec_infos" <+> ppr (map (map os_pat . si_specs) spec_infos) + -- ]) $ + do { specs_w_usg <- zipWithM (specialise env seed_calls) rhs_infos spec_infos ; let (extra_usg_s, new_spec_infos) = unzip specs_w_usg extra_usg = combineUsages extra_usg_s all_usg = usg_so_far `combineUsage` extra_usg @@ -1792,7 +1810,7 @@ that specialisations didn't fire inside wrappers; see test simplCore/should_compile/spec-inline. So now I just use the inline-activation of the parent Id, as the -activation for the specialiation RULE, just like the main specialiser; +activation for the specialisation RULE, just like the main specialiser; This in turn means there is no point in specialising NOINLINE things, so we test for that. @@ -1881,6 +1899,69 @@ by trim_pats. * Otherwise we sort the patterns to choose the most general ones first; more general => more widely applicable. + +Note [SpecConstr and casts] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider (Trac #14270) a call like + + let f = e + in ... f (K @(a |> co)) ... + +where 'co' is a coercion variable not in scope at f's definition site. +If we aren't caereful we'll get + + let $sf a co = e (K @(a |> co)) + RULE "SC:f" forall a co. f (K @(a |> co)) = $sf a co + f = e + in ... + +But alas, when we match the call we won't bind 'co', because type-matching +(for good reasons) discards casts). + +I don't know how to solve this, so for now I'm just discarding any +call patterns that + * Mentions a coercion variable in a type argument + * That is not in scope at the binding of the function + +I think this is very rare. + +It is important (e.g. Trac #14936) that this /only/ applies to +coercions mentioned in casts. We don't want to be discombobulated +by casts in terms! For example, consider + f ((e1,e2) |> sym co) +where, say, + f :: Foo -> blah + co :: Foo ~R (Int,Int) + +Here we definitely do want to specialise for that pair! We do not +match on the structre of the coercion; instead we just match on a +coercion variable, so the RULE looks like + + forall (x::Int, y::Int, co :: (Int,Int) ~R Foo) + f ((x,y) |> co) = $sf x y co + +Often the body of f looks like + f arg = ...(case arg |> co' of + (x,y) -> blah)... + +so that the specialised f will turn into + $sf x y co = let arg = (x,y) |> co + in ...(case arg>| co' of + (x,y) -> blah).... + +which will simplify to not use 'co' at all. But we can't guarantee +that co will end up unused, so we still pass it. Absence analysis +may remove it later. + +Note that this /also/ discards the call pattern if we have a cast in a +/term/, although in fact Rules.match does make a very flaky and +fragile attempt to match coercions. e.g. a call like + f (Maybe Age) (Nothing |> co) blah + where co :: Maybe Int ~ Maybe Age +will be discarded. It's extremely fragile to match on the form of a +coercion, so I think it's better just not to try. A more complicated +alternative would be to discard calls that mention coercion variables +only in kind-casts, but I'm doing the simple thing for now. -} type CallPat = ([Var], [CoreExpr]) -- Quantified variables and arguments @@ -1918,7 +1999,8 @@ callsToNewPats env fn spec_info@(SI { si_specs = done_specs }) bndr_occs calls -- Discard specialisations if there are too many of them trimmed_pats = trim_pats env fn spec_info small_pats --- ; pprTrace "callsToPats" (vcat [ text "calls:" <+> ppr calls +-- ; pprTrace "callsToPats" (vcat [ text "calls to" <+> ppr fn <> colon <+> ppr calls +-- , text "done_specs:" <+> ppr (map os_pat done_specs) -- , text "good_pats:" <+> ppr good_pats ]) $ -- return () @@ -1931,7 +2013,8 @@ trim_pats env fn (SI { si_n_specs = done_spec_count }) pats | sc_force env || isNothing mb_scc || n_remaining >= n_pats - = pats -- No need to trim + = -- pprTrace "trim_pats: no-trim" (ppr (sc_force env) $$ ppr mb_scc $$ ppr n_remaining $$ ppr n_pats) + pats -- No need to trim | otherwise = emit_trace $ -- Need to trim, so keep the best ones @@ -1975,6 +2058,8 @@ trim_pats env fn (SI { si_n_specs = done_spec_count }) pats speakNOf spec_count' (text "call pattern") <> comma <+> text "but the limit is" <+> int max_specs) ] , text "Use -fspec-constr-count=n to set the bound" + , text "done_spec_count =" <+> int done_spec_count + , text "Keeping " <+> int n_remaining <> text ", out of" <+> int n_pats , text "Discarding:" <+> ppr (drop n_remaining sorted_pats) ] @@ -1983,21 +2068,23 @@ callToPats :: ScEnv -> [ArgOcc] -> Call -> UniqSM (Maybe CallPat) -- Type variables come first, since they may scope -- over the following term variables -- The [CoreExpr] are the argument patterns for the rule -callToPats env bndr_occs (Call _ args con_env) +callToPats env bndr_occs call@(Call _ args con_env) | args `ltLength` bndr_occs -- Check saturated = return Nothing | otherwise - = do { let in_scope = substInScope (sc_subst env) + = do { let in_scope = substInScope (sc_subst env) ; (interesting, pats) <- argsToPats env in_scope con_env args bndr_occs - ; let pat_fvs = exprsFreeVarsList pats + ; let pat_fvs = exprsFreeVarsList pats -- To get determinism we need the list of free variables in -- deterministic order. Otherwise we end up creating -- lambdas with different argument orders. See -- determinism/simplCore/should_compile/spec-inline-determ.hs -- for an example. For explanation of determinism -- considerations See Note [Unique Determinism] in Unique. + in_scope_vars = getInScopeVars in_scope - qvars = filterOut (`elemVarSet` in_scope_vars) pat_fvs + is_in_scope v = v `elemVarSet` in_scope_vars + qvars = filterOut is_in_scope pat_fvs -- Quantify over variables that are not in scope -- at the call site -- See Note [Free type variables of the qvar types] @@ -2012,8 +2099,21 @@ callToPats env bndr_occs (Call _ args con_env) sanitise id = id `setIdType` expandTypeSynonyms (idType id) -- See Note [Free type variables of the qvar types] + -- Bad coercion variables: see Note [SpecConstr and casts] + bad_covars :: CoVarSet + bad_covars = mapUnionVarSet get_bad_covars pats + get_bad_covars :: CoreArg -> CoVarSet + get_bad_covars (Type ty) + = filterVarSet (\v -> isId v && not (is_in_scope v)) $ + tyCoVarsOfType ty + get_bad_covars _ + = emptyVarSet + ; -- pprTrace "callToPats" (ppr args $$ ppr bndr_occs) $ - if interesting + WARN( not (isEmptyVarSet bad_covars) + , text "SpecConstr: bad covars:" <+> ppr bad_covars + $$ ppr call ) + if interesting && isEmptyVarSet bad_covars then return (Just (qvars', pats)) else return Nothing } |