summaryrefslogtreecommitdiff
path: root/compiler/specialise/SpecConstr.hs
diff options
context:
space:
mode:
authorKavon Farvardin <kavon@farvard.in>2018-09-23 15:29:37 -0500
committerKavon Farvardin <kavon@farvard.in>2018-09-23 15:29:37 -0500
commit84c2ad99582391005b5e873198b15e9e9eb4f78d (patch)
treecaa8c2f2ec7e97fbb4977263c6817c9af5025cf4 /compiler/specialise/SpecConstr.hs
parent8ddb47cfcf5776e9a3c55fd37947c8a95e00fa12 (diff)
parente68b439fe5de61b9a2ca51af472185c62ccb8b46 (diff)
downloadhaskell-wip/T13904.tar.gz
update to current master againwip/T13904
Diffstat (limited to 'compiler/specialise/SpecConstr.hs')
-rw-r--r--compiler/specialise/SpecConstr.hs166
1 files changed, 133 insertions, 33 deletions
diff --git a/compiler/specialise/SpecConstr.hs b/compiler/specialise/SpecConstr.hs
index e5af0b8a3c..f6d27ccba5 100644
--- a/compiler/specialise/SpecConstr.hs
+++ b/compiler/specialise/SpecConstr.hs
@@ -19,6 +19,8 @@ module SpecConstr(
#include "HsVersions.h"
+import GhcPrelude
+
import CoreSyn
import CoreSubst
import CoreUtils
@@ -36,7 +38,6 @@ import TyCon ( tyConName )
import Id
import PprCore ( pprParendExpr )
import MkCore ( mkImpossibleExpr )
-import Var
import VarEnv
import VarSet
import Name
@@ -57,9 +58,6 @@ import Control.Monad ( zipWithM )
import Data.List
import PrelNames ( specTyConName )
import Module
-
--- See Note [Forcing specialisation]
-
import TyCon ( TyCon )
import GHC.Exts( SpecConstrAnnotation(..) )
import Data.Ord( comparing )
@@ -502,31 +500,46 @@ This is all quite ugly; we ought to come up with a better design.
ForceSpecConstr arguments are spotted in scExpr' and scTopBinds which then set
sc_force to True when calling specLoop. This flag does four things:
+
* Ignore specConstrThreshold, to specialise functions of arbitrary size
(see scTopBind)
* Ignore specConstrCount, to make arbitrary numbers of specialisations
(see specialise)
* Specialise even for arguments that are not scrutinised in the loop
- (see argToPat; Trac #4488)
+ (see argToPat; Trac #4448)
* Only specialise on recursive types a finite number of times
(see is_too_recursive; Trac #5550; Note [Limit recursive specialisation])
-This flag is inherited for nested non-recursive bindings (which are likely to
-be join points and hence should be fully specialised) but reset for nested
-recursive bindings.
-
-What alternatives did I consider? Annotating the loop itself doesn't
-work because (a) it is local and (b) it will be w/w'ed and having
-w/w propagating annotations somehow doesn't seem like a good idea. The
-types of the loop arguments really seem to be the most persistent
-thing.
-
-Annotating the types that make up the loop state doesn't work,
-either, because (a) it would prevent us from using types like Either
-or tuples here, (b) we don't want to restrict the set of types that
-can be used in Stream states and (c) some types are fixed by the user
-(e.g., the accumulator here) but we still want to specialise as much
-as possible.
+The flag holds only for specialising a single binding group, and NOT
+for nested bindings. (So really it should be passed around explicitly
+and not stored in ScEnv.) Trac #14379 turned out to be caused by
+ f SPEC x = let g1 x = ...
+ in ...
+We force-specialise f (because of the SPEC), but that generates a specialised
+copy of g1 (as well as the original). Alas g1 has a nested binding g2; and
+in each copy of g1 we get an unspecialised and specialised copy of g2; and so
+on. Result, exponential. So the force-spec flag now only applies to one
+level of bindings at a time.
+
+Mechanism for this one-level-only thing:
+
+ - Switch it on at the call to specRec, in scExpr and scTopBinds
+ - Switch it off when doing the RHSs;
+ this can be done very conveniently in decreaseSpecCount
+
+What alternatives did I consider?
+
+* Annotating the loop itself doesn't work because (a) it is local and
+ (b) it will be w/w'ed and having w/w propagating annotations somehow
+ doesn't seem like a good idea. The types of the loop arguments
+ really seem to be the most persistent thing.
+
+* Annotating the types that make up the loop state doesn't work,
+ either, because (a) it would prevent us from using types like Either
+ or tuples here, (b) we don't want to restrict the set of types that
+ can be used in Stream states and (c) some types are fixed by the
+ user (e.g., the accumulator here) but we still want to specialise as
+ much as possible.
Alternatives to ForceSpecConstr
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -586,7 +599,7 @@ more than N times (controlled by -fspec-constr-recursive=N) we check
specialisations. If sc_count is "no limit" then we arbitrarily
choose 10 as the limit (ugh).
-See Trac #5550. Also Trac #13623, where this test had become over-agressive,
+See Trac #5550. Also Trac #13623, where this test had become over-aggressive,
and we lost a wonderful specialisation that we really wanted!
Note [NoSpecConstr]
@@ -597,7 +610,7 @@ to mean "don't specialise on arguments of this type". It was added
before we had ForceSpecConstr. Lacking ForceSpecConstr we specialised
regardless of size; and then we needed a way to turn that *off*. Now
that we have ForceSpecConstr, this NoSpecConstr is probably redundant.
-(Used only for PArray.)
+(Used only for PArray, TODO: remove?)
-----------------------------------------------------
Stuff not yet handled
@@ -975,7 +988,8 @@ extendCaseBndrs env scrut case_bndr con alt_bndrs
decreaseSpecCount :: ScEnv -> Int -> ScEnv
-- See Note [Avoiding exponential blowup]
decreaseSpecCount env n_specs
- = env { sc_count = case sc_count env of
+ = env { sc_force = False -- See Note [Forcing specialisation]
+ , sc_count = case sc_count env of
Nothing -> Nothing
Just n -> Just (n `div` (n_specs + 1)) }
-- The "+1" takes account of the original function;
@@ -1545,7 +1559,11 @@ specRec top_lvl env body_usg rhs_infos
return (usg_so_far, spec_infos)
| otherwise
- = do { specs_w_usg <- zipWithM (specialise env seed_calls) rhs_infos spec_infos
+ = -- pprTrace "specRec3" (vcat [ text "bndrs" <+> ppr (map ri_fn rhs_infos)
+ -- , text "iteration" <+> int n_iter
+ -- , text "spec_infos" <+> ppr (map (map os_pat . si_specs) spec_infos)
+ -- ]) $
+ do { specs_w_usg <- zipWithM (specialise env seed_calls) rhs_infos spec_infos
; let (extra_usg_s, new_spec_infos) = unzip specs_w_usg
extra_usg = combineUsages extra_usg_s
all_usg = usg_so_far `combineUsage` extra_usg
@@ -1792,7 +1810,7 @@ that specialisations didn't fire inside wrappers; see test
simplCore/should_compile/spec-inline.
So now I just use the inline-activation of the parent Id, as the
-activation for the specialiation RULE, just like the main specialiser;
+activation for the specialisation RULE, just like the main specialiser;
This in turn means there is no point in specialising NOINLINE things,
so we test for that.
@@ -1881,6 +1899,69 @@ by trim_pats.
* Otherwise we sort the patterns to choose the most general
ones first; more general => more widely applicable.
+
+Note [SpecConstr and casts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider (Trac #14270) a call like
+
+ let f = e
+ in ... f (K @(a |> co)) ...
+
+where 'co' is a coercion variable not in scope at f's definition site.
+If we aren't caereful we'll get
+
+ let $sf a co = e (K @(a |> co))
+ RULE "SC:f" forall a co. f (K @(a |> co)) = $sf a co
+ f = e
+ in ...
+
+But alas, when we match the call we won't bind 'co', because type-matching
+(for good reasons) discards casts).
+
+I don't know how to solve this, so for now I'm just discarding any
+call patterns that
+ * Mentions a coercion variable in a type argument
+ * That is not in scope at the binding of the function
+
+I think this is very rare.
+
+It is important (e.g. Trac #14936) that this /only/ applies to
+coercions mentioned in casts. We don't want to be discombobulated
+by casts in terms! For example, consider
+ f ((e1,e2) |> sym co)
+where, say,
+ f :: Foo -> blah
+ co :: Foo ~R (Int,Int)
+
+Here we definitely do want to specialise for that pair! We do not
+match on the structre of the coercion; instead we just match on a
+coercion variable, so the RULE looks like
+
+ forall (x::Int, y::Int, co :: (Int,Int) ~R Foo)
+ f ((x,y) |> co) = $sf x y co
+
+Often the body of f looks like
+ f arg = ...(case arg |> co' of
+ (x,y) -> blah)...
+
+so that the specialised f will turn into
+ $sf x y co = let arg = (x,y) |> co
+ in ...(case arg>| co' of
+ (x,y) -> blah)....
+
+which will simplify to not use 'co' at all. But we can't guarantee
+that co will end up unused, so we still pass it. Absence analysis
+may remove it later.
+
+Note that this /also/ discards the call pattern if we have a cast in a
+/term/, although in fact Rules.match does make a very flaky and
+fragile attempt to match coercions. e.g. a call like
+ f (Maybe Age) (Nothing |> co) blah
+ where co :: Maybe Int ~ Maybe Age
+will be discarded. It's extremely fragile to match on the form of a
+coercion, so I think it's better just not to try. A more complicated
+alternative would be to discard calls that mention coercion variables
+only in kind-casts, but I'm doing the simple thing for now.
-}
type CallPat = ([Var], [CoreExpr]) -- Quantified variables and arguments
@@ -1918,7 +1999,8 @@ callsToNewPats env fn spec_info@(SI { si_specs = done_specs }) bndr_occs calls
-- Discard specialisations if there are too many of them
trimmed_pats = trim_pats env fn spec_info small_pats
--- ; pprTrace "callsToPats" (vcat [ text "calls:" <+> ppr calls
+-- ; pprTrace "callsToPats" (vcat [ text "calls to" <+> ppr fn <> colon <+> ppr calls
+-- , text "done_specs:" <+> ppr (map os_pat done_specs)
-- , text "good_pats:" <+> ppr good_pats ]) $
-- return ()
@@ -1931,7 +2013,8 @@ trim_pats env fn (SI { si_n_specs = done_spec_count }) pats
| sc_force env
|| isNothing mb_scc
|| n_remaining >= n_pats
- = pats -- No need to trim
+ = -- pprTrace "trim_pats: no-trim" (ppr (sc_force env) $$ ppr mb_scc $$ ppr n_remaining $$ ppr n_pats)
+ pats -- No need to trim
| otherwise
= emit_trace $ -- Need to trim, so keep the best ones
@@ -1975,6 +2058,8 @@ trim_pats env fn (SI { si_n_specs = done_spec_count }) pats
speakNOf spec_count' (text "call pattern") <> comma <+>
text "but the limit is" <+> int max_specs) ]
, text "Use -fspec-constr-count=n to set the bound"
+ , text "done_spec_count =" <+> int done_spec_count
+ , text "Keeping " <+> int n_remaining <> text ", out of" <+> int n_pats
, text "Discarding:" <+> ppr (drop n_remaining sorted_pats) ]
@@ -1983,21 +2068,23 @@ callToPats :: ScEnv -> [ArgOcc] -> Call -> UniqSM (Maybe CallPat)
-- Type variables come first, since they may scope
-- over the following term variables
-- The [CoreExpr] are the argument patterns for the rule
-callToPats env bndr_occs (Call _ args con_env)
+callToPats env bndr_occs call@(Call _ args con_env)
| args `ltLength` bndr_occs -- Check saturated
= return Nothing
| otherwise
- = do { let in_scope = substInScope (sc_subst env)
+ = do { let in_scope = substInScope (sc_subst env)
; (interesting, pats) <- argsToPats env in_scope con_env args bndr_occs
- ; let pat_fvs = exprsFreeVarsList pats
+ ; let pat_fvs = exprsFreeVarsList pats
-- To get determinism we need the list of free variables in
-- deterministic order. Otherwise we end up creating
-- lambdas with different argument orders. See
-- determinism/simplCore/should_compile/spec-inline-determ.hs
-- for an example. For explanation of determinism
-- considerations See Note [Unique Determinism] in Unique.
+
in_scope_vars = getInScopeVars in_scope
- qvars = filterOut (`elemVarSet` in_scope_vars) pat_fvs
+ is_in_scope v = v `elemVarSet` in_scope_vars
+ qvars = filterOut is_in_scope pat_fvs
-- Quantify over variables that are not in scope
-- at the call site
-- See Note [Free type variables of the qvar types]
@@ -2012,8 +2099,21 @@ callToPats env bndr_occs (Call _ args con_env)
sanitise id = id `setIdType` expandTypeSynonyms (idType id)
-- See Note [Free type variables of the qvar types]
+ -- Bad coercion variables: see Note [SpecConstr and casts]
+ bad_covars :: CoVarSet
+ bad_covars = mapUnionVarSet get_bad_covars pats
+ get_bad_covars :: CoreArg -> CoVarSet
+ get_bad_covars (Type ty)
+ = filterVarSet (\v -> isId v && not (is_in_scope v)) $
+ tyCoVarsOfType ty
+ get_bad_covars _
+ = emptyVarSet
+
; -- pprTrace "callToPats" (ppr args $$ ppr bndr_occs) $
- if interesting
+ WARN( not (isEmptyVarSet bad_covars)
+ , text "SpecConstr: bad covars:" <+> ppr bad_covars
+ $$ ppr call )
+ if interesting && isEmptyVarSet bad_covars
then return (Just (qvars', pats))
else return Nothing }