1 files changed, 133 insertions, 33 deletions
diff --git a/compiler/specialise/SpecConstr.hs b/compiler/specialise/SpecConstr.hs
index e5af0b8a3c..f6d27ccba5 100644
--- a/compiler/specialise/SpecConstr.hs
+++ b/compiler/specialise/SpecConstr.hs
@@ -19,6 +19,8 @@ module SpecConstr(
 
 #include "HsVersions.h"
 
+import GhcPrelude
+
 import CoreSyn
 import CoreSubst
 import CoreUtils
@@ -36,7 +38,6 @@ import TyCon            ( tyConName )
 import Id
 import PprCore          ( pprParendExpr )
 import MkCore           ( mkImpossibleExpr )
-import Var
 import VarEnv
 import VarSet
 import Name
@@ -57,9 +58,6 @@ import Control.Monad    ( zipWithM )
 import Data.List
 import PrelNames        ( specTyConName )
 import Module
-
--- See Note [Forcing specialisation]
-
 import TyCon ( TyCon )
 import GHC.Exts( SpecConstrAnnotation(..) )
 import Data.Ord( comparing )
@@ -502,31 +500,46 @@ This is all quite ugly; we ought to come up with a better design.
 
 ForceSpecConstr arguments are spotted in scExpr' and scTopBinds which then set
 sc_force to True when calling specLoop. This flag does four things:
+
   * Ignore specConstrThreshold, to specialise functions of arbitrary size
         (see scTopBind)
   * Ignore specConstrCount, to make arbitrary numbers of specialisations
         (see specialise)
   * Specialise even for arguments that are not scrutinised in the loop
-        (see argToPat; Trac #4488)
+        (see argToPat; Trac #4448)
   * Only specialise on recursive types a finite number of times
         (see is_too_recursive; Trac #5550; Note [Limit recursive specialisation])
 
-This flag is inherited for nested non-recursive bindings (which are likely to
-be join points and hence should be fully specialised) but reset for nested
-recursive bindings.
-
-What alternatives did I consider? Annotating the loop itself doesn't
-work because (a) it is local and (b) it will be w/w'ed and having
-w/w propagating annotations somehow doesn't seem like a good idea. The
-types of the loop arguments really seem to be the most persistent
-thing.
-
-Annotating the types that make up the loop state doesn't work,
-either, because (a) it would prevent us from using types like Either
-or tuples here, (b) we don't want to restrict the set of types that
-can be used in Stream states and (c) some types are fixed by the user
-(e.g., the accumulator here) but we still want to specialise as much
-as possible.
+The flag holds only for specialising a single binding group, and NOT
+for nested bindings.  (So really it should be passed around explicitly
+and not stored in ScEnv.)  Trac #14379 turned out to be caused by
+   f SPEC x = let g1 x = ...
+              in ...
+We force-specialise f (because of the SPEC), but that generates a specialised
+copy of g1 (as well as the original).  Alas g1 has a nested binding g2; and
+in each copy of g1 we get an unspecialised and specialised copy of g2; and so
+on. Result, exponential.  So the force-spec flag now only applies to one
+level of bindings at a time.
+
+Mechanism for this one-level-only thing:
+
+ - Switch it on at the call to specRec, in scExpr and scTopBinds
+ - Switch it off when doing the RHSs;
+   this can be done very conveniently in decreaseSpecCount
+
+What alternatives did I consider?
+
+* Annotating the loop itself doesn't work because (a) it is local and
+  (b) it will be w/w'ed and having w/w propagating annotations somehow
+  doesn't seem like a good idea. The types of the loop arguments
+  really seem to be the most persistent thing.
+
+* Annotating the types that make up the loop state doesn't work,
+  either, because (a) it would prevent us from using types like Either
+  or tuples here, (b) we don't want to restrict the set of types that
+  can be used in Stream states and (c) some types are fixed by the
+  user (e.g., the accumulator here) but we still want to specialise as
+  much as possible.
 
 Alternatives to ForceSpecConstr
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -586,7 +599,7 @@ more than N times (controlled by -fspec-constr-recursive=N) we check
     specialisations.  If sc_count is "no limit" then we arbitrarily
     choose 10 as the limit (ugh).
 
-See Trac #5550.   Also Trac #13623, where this test had become over-agressive,
+See Trac #5550.   Also Trac #13623, where this test had become over-aggressive,
 and we lost a wonderful specialisation that we really wanted!
 
 Note [NoSpecConstr]
@@ -597,7 +610,7 @@ to mean "don't specialise on arguments of this type".  It was added
 before we had ForceSpecConstr.  Lacking ForceSpecConstr we specialised
 regardless of size; and then we needed a way to turn that *off*.  Now
 that we have ForceSpecConstr, this NoSpecConstr is probably redundant.
-(Used only for PArray.)
+(Used only for PArray, TODO: remove?)
 
 -----------------------------------------------------
                 Stuff not yet handled
@@ -975,7 +988,8 @@ extendCaseBndrs env scrut case_bndr con alt_bndrs
 decreaseSpecCount :: ScEnv -> Int -> ScEnv
 -- See Note [Avoiding exponential blowup]
 decreaseSpecCount env n_specs
-  = env { sc_count = case sc_count env of
+  = env { sc_force = False   -- See Note [Forcing specialisation]
+        , sc_count = case sc_count env of
                        Nothing -> Nothing
                        Just n  -> Just (n `div` (n_specs + 1)) }
         -- The "+1" takes account of the original function;
@@ -1545,7 +1559,11 @@ specRec top_lvl env body_usg rhs_infos
         return (usg_so_far, spec_infos)
 
       | otherwise
-      = do  { specs_w_usg <- zipWithM (specialise env seed_calls) rhs_infos spec_infos
+      = -- pprTrace "specRec3" (vcat [ text "bndrs" <+> ppr (map ri_fn rhs_infos)
+        --                           , text "iteration" <+> int n_iter
+        --                          , text "spec_infos" <+> ppr (map (map os_pat . si_specs) spec_infos)
+        --                    ]) $
+        do  { specs_w_usg <- zipWithM (specialise env seed_calls) rhs_infos spec_infos
             ; let (extra_usg_s, new_spec_infos) = unzip specs_w_usg
                   extra_usg = combineUsages extra_usg_s
                   all_usg   = usg_so_far `combineUsage` extra_usg
@@ -1792,7 +1810,7 @@ that specialisations didn't fire inside wrappers; see test
 simplCore/should_compile/spec-inline.
 
 So now I just use the inline-activation of the parent Id, as the
-activation for the specialiation RULE, just like the main specialiser;
+activation for the specialisation RULE, just like the main specialiser;
 
 This in turn means there is no point in specialising NOINLINE things,
 so we test for that.
@@ -1881,6 +1899,69 @@ by trim_pats.
 
 * Otherwise we sort the patterns to choose the most general
   ones first; more general => more widely applicable.
+
+Note [SpecConstr and casts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider (Trac #14270) a call like
+
+    let f = e
+    in ... f (K @(a |> co)) ...
+
+where 'co' is a coercion variable not in scope at f's definition site.
+If we aren't caereful we'll get
+
+    let $sf a co = e (K @(a |> co))
+        RULE "SC:f" forall a co.  f (K @(a |> co)) = $sf a co
+        f = e
+    in ...
+
+But alas, when we match the call we won't bind 'co', because type-matching
+(for good reasons) discards casts).
+
+I don't know how to solve this, so for now I'm just discarding any
+call patterns that
+  * Mentions a coercion variable in a type argument
+  * That is not in scope at the binding of the function
+
+I think this is very rare.
+
+It is important (e.g. Trac #14936) that this /only/ applies to
+coercions mentioned in casts.  We don't want to be discombobulated
+by casts in terms!  For example, consider
+   f ((e1,e2) |> sym co)
+where, say,
+   f  :: Foo -> blah
+   co :: Foo ~R (Int,Int)
+
+Here we definitely do want to specialise for that pair!  We do not
+match on the structre of the coercion; instead we just match on a
+coercion variable, so the RULE looks like
+
+   forall (x::Int, y::Int, co :: (Int,Int) ~R Foo)
+     f ((x,y) |> co) = $sf x y co
+
+Often the body of f looks like
+   f arg = ...(case arg |> co' of
+                (x,y) -> blah)...
+
+so that the specialised f will turn into
+   $sf x y co = let arg = (x,y) |> co
+                in ...(case arg>| co' of
+                         (x,y) -> blah)....
+
+which will simplify to not use 'co' at all.  But we can't guarantee
+that co will end up unused, so we still pass it.  Absence analysis
+may remove it later.
+
+Note that this /also/ discards the call pattern if we have a cast in a
+/term/, although in fact Rules.match does make a very flaky and
+fragile attempt to match coercions.  e.g. a call like
+    f (Maybe Age) (Nothing |> co) blah
+    where co :: Maybe Int ~ Maybe Age
+will be discarded.  It's extremely fragile to match on the form of a
+coercion, so I think it's better just not to try.  A more complicated
+alternative would be to discard calls that mention coercion variables
+only in kind-casts, but I'm doing the simple thing for now.
 -}
 
 type CallPat = ([Var], [CoreExpr])      -- Quantified variables and arguments
@@ -1918,7 +1999,8 @@ callsToNewPats env fn spec_info@(SI { si_specs = done_specs }) bndr_occs calls
                 -- Discard specialisations if there are too many of them
               trimmed_pats = trim_pats env fn spec_info small_pats
 
---        ; pprTrace "callsToPats" (vcat [ text "calls:" <+> ppr calls
+--        ; pprTrace "callsToPats" (vcat [ text "calls to" <+> ppr fn <> colon <+> ppr calls
+--                                       , text "done_specs:" <+> ppr (map os_pat done_specs)
 --                                       , text "good_pats:" <+> ppr good_pats ]) $
 --          return ()
 
@@ -1931,7 +2013,8 @@ trim_pats env fn (SI { si_n_specs = done_spec_count }) pats
   | sc_force env
     || isNothing mb_scc
     || n_remaining >= n_pats
-  = pats          -- No need to trim
+  = -- pprTrace "trim_pats: no-trim" (ppr (sc_force env) $$ ppr mb_scc $$ ppr n_remaining $$ ppr n_pats)
+    pats          -- No need to trim
 
   | otherwise
   = emit_trace $  -- Need to trim, so keep the best ones
@@ -1975,6 +2058,8 @@ trim_pats env fn (SI { si_n_specs = done_spec_count }) pats
                                speakNOf spec_count' (text "call pattern") <> comma <+>
                                text "but the limit is" <+> int max_specs) ]
                , text "Use -fspec-constr-count=n to set the bound"
+               , text "done_spec_count =" <+> int done_spec_count
+               , text "Keeping " <+> int n_remaining <> text ", out of" <+> int n_pats
                , text "Discarding:" <+> ppr (drop n_remaining sorted_pats) ]
 
 
@@ -1983,21 +2068,23 @@ callToPats :: ScEnv -> [ArgOcc] -> Call -> UniqSM (Maybe CallPat)
         --      Type variables come first, since they may scope
         --      over the following term variables
         -- The [CoreExpr] are the argument patterns for the rule
-callToPats env bndr_occs (Call _ args con_env)
+callToPats env bndr_occs call@(Call _ args con_env)
   | args `ltLength` bndr_occs      -- Check saturated
   = return Nothing
   | otherwise
-  = do  { let in_scope      = substInScope (sc_subst env)
+  = do  { let in_scope = substInScope (sc_subst env)
         ; (interesting, pats) <- argsToPats env in_scope con_env args bndr_occs
-        ; let pat_fvs       = exprsFreeVarsList pats
+        ; let pat_fvs = exprsFreeVarsList pats
                 -- To get determinism we need the list of free variables in
                 -- deterministic order. Otherwise we end up creating
                 -- lambdas with different argument orders. See
                 -- determinism/simplCore/should_compile/spec-inline-determ.hs
                 -- for an example. For explanation of determinism
                 -- considerations See Note [Unique Determinism] in Unique.
+
               in_scope_vars = getInScopeVars in_scope
-              qvars         = filterOut (`elemVarSet` in_scope_vars) pat_fvs
+              is_in_scope v = v `elemVarSet` in_scope_vars
+              qvars         = filterOut is_in_scope pat_fvs
                 -- Quantify over variables that are not in scope
                 -- at the call site
                 -- See Note [Free type variables of the qvar types]
@@ -2012,8 +2099,21 @@ callToPats env bndr_occs (Call _ args con_env)
               sanitise id   = id `setIdType` expandTypeSynonyms (idType id)
                 -- See Note [Free type variables of the qvar types]
 
+              -- Bad coercion variables: see Note [SpecConstr and casts]
+              bad_covars :: CoVarSet
+              bad_covars = mapUnionVarSet get_bad_covars pats
+              get_bad_covars :: CoreArg -> CoVarSet
+              get_bad_covars (Type ty)
+                = filterVarSet (\v -> isId v && not (is_in_scope v)) $
+                  tyCoVarsOfType ty
+              get_bad_covars _
+                = emptyVarSet
+
         ; -- pprTrace "callToPats"  (ppr args $$ ppr bndr_occs) $
-          if interesting
+          WARN( not (isEmptyVarSet bad_covars)
+              , text "SpecConstr: bad covars:" <+> ppr bad_covars
+                $$ ppr call )
+          if interesting && isEmptyVarSet bad_covars
           then return (Just (qvars', pats))
           else return Nothing }