update to current master againwip/T13904

author: Kavon Farvardin <kavon@farvard.in> 2018-09-23 15:29:37 -0500
committer: Kavon Farvardin <kavon@farvard.in> 2018-09-23 15:29:37 -0500
commit: 84c2ad99582391005b5e873198b15e9e9eb4f78d (patch)
tree: caa8c2f2ec7e97fbb4977263c6817c9af5025cf4 /compiler/specialise
parent: 8ddb47cfcf5776e9a3c55fd37947c8a95e00fa12 (diff)
parent: e68b439fe5de61b9a2ca51af472185c62ccb8b46 (diff)
download: haskell-wip/T13904.tar.gz
3 files changed, 252 insertions, 129 deletions
diff --git a/compiler/specialise/Rules.hs b/compiler/specialise/Rules.hs
index b5606754e6..ad6a0757cb 100644
--- a/compiler/specialise/Rules.hs
+++ b/compiler/specialise/Rules.hs
@@ -28,6 +28,8 @@ module Rules (
 
 #include "HsVersions.h"
 
+import GhcPrelude
+
 import CoreSyn          -- All of it
 import Module           ( Module, ModuleSet, elemModuleSet )
 import CoreSubst
@@ -38,7 +40,7 @@ import CoreUtils        ( exprType, eqExpr, mkTick, mkTicks,
                           stripTicksTopT, stripTicksTopE,
                           isJoinBind )
 import PprCore          ( pprRules )
-import Type             ( Type, substTy, mkTCvSubst )
+import Type             ( Type, Kind, substTy, mkTCvSubst )
 import TcType           ( tcSplitTyConApp_maybe )
 import TysWiredIn       ( anyTypeOfKind )
 import Coercion
@@ -53,7 +55,7 @@ import NameSet
 import NameEnv
 import UniqFM
 import Unify            ( ruleMatchTyKiX )
-import BasicTypes       ( Activation, CompilerPhase, isActive, pprRuleName )
+import BasicTypes
 import DynFlags         ( DynFlags )
 import Outputable
 import FastString
@@ -288,9 +290,10 @@ addRuleInfo (RuleInfo rs1 fvs1) (RuleInfo rs2 fvs2)
   = RuleInfo (rs1 ++ rs2) (fvs1 `unionDVarSet` fvs2)
 
 addIdSpecialisations :: Id -> [CoreRule] -> Id
-addIdSpecialisations id []
-  = id
 addIdSpecialisations id rules
+  | null rules
+  = id
+  | otherwise
   = setIdSpecialisation id $
     extendRuleInfo (idSpecialisation id) rules
 
@@ -310,9 +313,8 @@ ruleIsVisible _ BuiltinRule{} = True
 ruleIsVisible vis_orphs Rule { ru_orphan = orph, ru_origin = origin }
     = notOrphan orph || origin `elemModuleSet` vis_orphs
 
-{-
-Note [Where rules are found]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+{- Note [Where rules are found]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The rules for an Id come from two places:
   (a) the ones it is born with, stored inside the Id iself (idCoreRules fn),
   (b) rules added in other modules, stored in the global RuleBase (imp_rules)
@@ -348,7 +350,7 @@ mkRuleBase rules = extendRuleBaseList emptyRuleBase rules
 
 extendRuleBaseList :: RuleBase -> [CoreRule] -> RuleBase
 extendRuleBaseList rule_base new_guys
-  = foldl extendRuleBase rule_base new_guys
+  = foldl' extendRuleBase rule_base new_guys
 
 unionRuleBase :: RuleBase -> RuleBase -> RuleBase
 unionRuleBase rb1 rb2 = plusNameEnv_C (++) rb1 rb2
@@ -411,21 +413,20 @@ lookupRule dflags in_scope is_active fn args rules
 findBest :: (Id, [CoreExpr])
          -> (CoreRule,CoreExpr) -> [(CoreRule,CoreExpr)] -> (CoreRule,CoreExpr)
 -- All these pairs matched the expression
--- Return the pair the the most specific rule
+-- Return the pair the most specific rule
 -- The (fn,args) is just for overlap reporting
 
 findBest _      (rule,ans)   [] = (rule,ans)
 findBest target (rule1,ans1) ((rule2,ans2):prs)
   | rule1 `isMoreSpecific` rule2 = findBest target (rule1,ans1) prs
   | rule2 `isMoreSpecific` rule1 = findBest target (rule2,ans2) prs
-  | debugIsOn = let pp_rule rule = sdocWithPprDebug $ \dbg -> if dbg
-                        then ppr rule
-                        else doubleQuotes (ftext (ruleName rule))
+  | debugIsOn = let pp_rule rule
+                      = ifPprDebug (ppr rule)
+                                   (doubleQuotes (ftext (ruleName rule)))
                 in pprTrace "Rules.findBest: rule overlap (Rule 1 wins)"
-                         (vcat [ sdocWithPprDebug $ \dbg -> if dbg
-                                   then text "Expression to match:" <+> ppr fn
-                                        <+> sep (map ppr args)
-                                   else empty
+                         (vcat [ whenPprDebug $
+                                 text "Expression to match:" <+> ppr fn
+                                 <+> sep (map ppr args)
                                , text "Rule 1:" <+> pp_rule rule1
                                , text "Rule 2:" <+> pp_rule rule2]) $
                 findBest target (rule1,ans1) prs
@@ -517,7 +518,7 @@ matchRule _ in_scope is_active _ args rough_args
   | ruleCantMatch tpl_tops rough_args = Nothing
   | otherwise
   = case matchN in_scope rule_name tpl_vars tpl_args args of
-        Nothing                        -> Nothing
+        Nothing                       -> Nothing
         Just (bind_wrapper, tpl_vals) -> Just (bind_wrapper $
                                                rule_fn `mkApps` tpl_vals)
   where
@@ -535,58 +536,82 @@ matchN  :: InScopeEnv
 
 matchN (in_scope, id_unf) rule_name tmpl_vars tmpl_es target_es
   = do  { subst <- go init_menv emptyRuleSubst tmpl_es target_es
-        ; let (_, matched_es) = mapAccumL lookup_tmpl subst tmpl_vars
+        ; let (_, matched_es) = mapAccumL lookup_tmpl subst $
+                                tmpl_vars `zip` tmpl_vars1
         ; return (rs_binds subst, matched_es) }
   where
-    init_rn_env = mkRnEnv2 (extendInScopeSetList in_scope tmpl_vars)
-                  -- See Note [Template binders]
+    (init_rn_env, tmpl_vars1) = mapAccumL rnBndrL (mkRnEnv2 in_scope) tmpl_vars
+                  -- See Note [Cloning the template binders]
 
-    init_menv = RV { rv_tmpls = mkVarSet tmpl_vars, rv_lcl = init_rn_env
-                   , rv_fltR = mkEmptySubst (rnInScopeSet init_rn_env)
-                   , rv_unf = id_unf }
+    init_menv = RV { rv_tmpls = mkVarSet tmpl_vars1
+                   , rv_lcl   = init_rn_env
+                   , rv_fltR  = mkEmptySubst (rnInScopeSet init_rn_env)
+                   , rv_unf   = id_unf }
 
     go _    subst []     _      = Just subst
     go _    _     _      []     = Nothing       -- Fail if too few actual args
     go menv subst (t:ts) (e:es) = do { subst1 <- match menv subst t e
                                      ; go menv subst1 ts es }
 
-    lookup_tmpl :: RuleSubst -> Var -> (RuleSubst, CoreExpr)
-    lookup_tmpl rs@(RS { rs_tv_subst = tv_subst, rs_id_subst = id_subst }) tmpl_var
-        | isId tmpl_var
-        = case lookupVarEnv id_subst tmpl_var of
+    lookup_tmpl :: RuleSubst -> (InVar,OutVar) -> (RuleSubst, CoreExpr)
+                   -- Need to return a RuleSubst solely for the benefit of mk_fake_ty
+    lookup_tmpl rs@(RS { rs_tv_subst = tv_subst, rs_id_subst = id_subst })
+                (tmpl_var, tmpl_var1)
+        | isId tmpl_var1
+        = case lookupVarEnv id_subst tmpl_var1 of
              Just e -> (rs, e)
-             Nothing | Just refl_co <- isReflCoVar_maybe tmpl_var
-                     , let co_expr = Coercion refl_co
-                     -> (rs { rs_id_subst = extendVarEnv id_subst tmpl_var co_expr }, co_expr)
+             Nothing | Just refl_co <- isReflCoVar_maybe tmpl_var1
+                     , let co_expr   = Coercion refl_co
+                           id_subst' = extendVarEnv id_subst tmpl_var1 co_expr
+                           rs'       = rs { rs_id_subst = id_subst' }
+                     -> (rs', co_expr) -- See Note [Unbound RULE binders]
                      | otherwise
                      -> unbound tmpl_var
         | otherwise
-        = case lookupVarEnv tv_subst tmpl_var of
+        = case lookupVarEnv tv_subst tmpl_var1 of
              Just ty -> (rs, Type ty)
-             Nothing -> (rs { rs_tv_subst = extendVarEnv tv_subst tmpl_var fake_ty }, Type fake_ty)
-                        -- See Note [Unbound RULE binders]
+             Nothing -> (rs', Type fake_ty) -- See Note [Unbound RULE binders]
         where
-          fake_ty = anyTypeOfKind kind
-          cv_subst = to_co_env id_subst
-          kind = Type.substTy (mkTCvSubst in_scope (tv_subst, cv_subst))
-                              (tyVarKind tmpl_var)
-
-          to_co_env env = nonDetFoldUFM_Directly to_co emptyVarEnv env
-            -- It's OK to use nonDetFoldUFM_Directly because we forget the
-            -- order immediately by creating a new env
-          to_co uniq expr env
-            | Just co <- exprToCoercion_maybe expr
-            = extendVarEnv_Directly env uniq co
-
-            | otherwise
-            = env
-
-    unbound var = pprPanic "Template variable unbound in rewrite rule" $
-                  vcat [ text "Variable:" <+> ppr var
-                       , text "Rule" <+> pprRuleName rule_name
-                       , text "Rule bndrs:" <+> ppr tmpl_vars
-                       , text "LHS args:" <+> ppr tmpl_es
-                       , text "Actual args:" <+> ppr target_es ]
+          rs'     = rs { rs_tv_subst = extendVarEnv tv_subst tmpl_var1 fake_ty }
+          fake_ty = mk_fake_ty in_scope rs tmpl_var1
+                    -- This call is the sole reason we accumulate
+                    -- RuleSubst in lookup_tmpl
+
+    unbound tmpl_var
+       = pprPanic "Template variable unbound in rewrite rule" $
+         vcat [ text "Variable:" <+> ppr tmpl_var <+> dcolon <+> ppr (varType tmpl_var)
+              , text "Rule" <+> pprRuleName rule_name
+              , text "Rule bndrs:" <+> ppr tmpl_vars
+              , text "LHS args:" <+> ppr tmpl_es
+              , text "Actual args:" <+> ppr target_es ]
+
+
+mk_fake_ty :: InScopeSet -> RuleSubst -> TyVar -> Kind
+-- Roughly:
+--    mk_fake_ty subst tv = Any @(subst (tyVarKind tv))
+-- That is: apply the substitution to the kind of the given tyvar,
+-- and make an 'any' type of that kind.
+-- Tiresomely, the RuleSubst is not well adapted to substTy, leading to
+-- horrible impedence matching.
+--
+-- Happily, this function is seldom called
+mk_fake_ty in_scope (RS { rs_tv_subst = tv_subst, rs_id_subst = id_subst }) tmpl_var1
+  = anyTypeOfKind kind
+  where
+    kind = Type.substTy (mkTCvSubst in_scope (tv_subst, cv_subst))
+                        (tyVarKind tmpl_var1)
+
+    cv_subst = to_co_env id_subst
+
+    to_co_env :: IdSubstEnv -> CvSubstEnv
+    to_co_env env = nonDetFoldUFM_Directly to_co emptyVarEnv env
+      -- It's OK to use nonDetFoldUFM_Directly because we forget the
+      -- order immediately by creating a new env
+
+    to_co uniq expr env
+      = case exprToCoercion_maybe expr of
+          Just co -> extendVarEnv_Directly env uniq co
+          Nothing -> env
 
 {- Note [Unbound RULE binders]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -631,8 +656,8 @@ bound on the LHS:
   in Trac #13410, and also in test T10602.
 
 
-Note [Template binders]
-~~~~~~~~~~~~~~~~~~~~~~~
+Note [Cloning the template binders]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Consider the following match (example 1):
         Template:  forall x.  f x
         Target:               f (x+1)
@@ -643,21 +668,19 @@ Likewise this one (example 2):
         Template:  forall x. f (\x.x)
         Target:              f (\y.y)
 
-We achieve this simply by:
-  * Adding forall'd template binders to the in-scope set
-
-This works even if the template binder are already in scope
-(in the target) because
+We achieve this simply by using rnBndrL to clone the template
+binders if they are already in scope.
 
-  * The RuleSubst rs_tv_subst, rs_id_subst maps LHS template vars to
-    the target world.  It is not applied recursively.
-
-  * Having the template vars in the in-scope set ensures that in
-    example 2 above, the (\x.x) is cloned to (\x'. x').
-
-In the past we used rnBndrL to clone the template variables if
-they were already in scope.  But (a) that's not necessary and (b)
-it complicate the fancy footwork for Note [Unbound template type variables]
+------ Historical note -------
+At one point I tried simply adding the template binders to the
+in-scope set /without/ cloning them, but that failed in a horribly
+obscure way in Trac #14777.  Problem was that during matching we look
+up target-term variables in the in-scope set (see Note [Lookup
+in-scope]).  If a target-term variable happens to name-clash with a
+template variable, that lookup will find the template variable, which
+is /utterly/ bogus.  In Trac #14777, this transformed a term variable
+into a type variable, and then crashed when we wanted its idInfo.
+------ End of historical note -------
 
 
 ************************************************************************
@@ -673,11 +696,12 @@ it complicate the fancy footwork for Note [Unbound template type variables]
 --   from nested matches; see the Let case of match, below
 --
 data RuleMatchEnv
-  = RV { rv_tmpls :: VarSet          -- Template variables
-       , rv_lcl   :: RnEnv2          -- Renamings for *local bindings*
+  = RV { rv_lcl   :: RnEnv2          -- Renamings for *local bindings*
                                      --   (lambda/case)
+       , rv_tmpls :: VarSet          -- Template variables
+                                     --   (after applying envL of rv_lcl)
        , rv_fltR  :: Subst           -- Renamings for floated let-bindings
-                                     --   domain disjoint from envR of rv_lcl
+                                     --   (domain disjoint from envR of rv_lcl)
                                      -- See Note [Matching lets]
        , rv_unf :: IdUnfoldingFun
        }
@@ -707,7 +731,6 @@ emptyRuleSubst = RS { rs_tv_subst = emptyVarEnv, rs_id_subst = emptyVarEnv
 --      For a start, in general eta expansion wastes work.
 --      SLPJ July 99
 
-
 match :: RuleMatchEnv
       -> RuleSubst
       -> CoreExpr               -- Template
@@ -738,7 +761,8 @@ match _ _ e@Tick{} _
 -- succeed in matching what looks like the template variable 'a' against 3.
 
 -- The Var case follows closely what happens in Unify.match
-match renv subst (Var v1)    e2 = match_var renv subst v1 e2
+match renv subst (Var v1) e2
+  = match_var renv subst v1 e2
 
 match renv subst e1 (Var v2)      -- Note [Expanding variables]
   | not (inRnEnvR rn_env v2) -- Note [Do not expand locally-bound variables]
@@ -883,7 +907,7 @@ match_alts renv subst ((c1,vs1,r1):alts1) ((c2,vs2,r2):alts2)
   = do  { subst1 <- match renv' subst r1 r2
         ; match_alts renv subst1 alts1 alts2 }
   where
-    renv' = foldl mb renv (vs1 `zip` vs2)
+    renv' = foldl' mb renv (vs1 `zip` vs2)
     mb renv (v1,v2) = rnMatchBndr2 renv subst v1 v2
 
 match_alts _ _ _ _
@@ -1110,19 +1134,19 @@ SpecConstr sees this fragment:
           Data.Maybe.Nothing -> lvl_smf;
           Data.Maybe.Just n_acT [Just S(L)] ->
             case n_acT of wild1_ams [Just A] { GHC.Base.I# y_amr [Just L] ->
-            \$wfoo_smW (GHC.Prim.-# ds_Xmb y_amr) wild_Xf
+              $wfoo_smW (GHC.Prim.-# ds_Xmb y_amr) wild_Xf
             }};
 
 and correctly generates the rule
 
         RULES: "SC:$wfoo1" [0] __forall {y_amr [Just L] :: GHC.Prim.Int#
                                           sc_snn :: GHC.Prim.Int#}
-          \$wfoo_smW sc_snn (Data.Maybe.Just @ GHC.Base.Int (GHC.Base.I# y_amr))
-          = \$s\$wfoo_sno y_amr sc_snn ;]
+          $wfoo_smW sc_snn (Data.Maybe.Just @ GHC.Base.Int (GHC.Base.I# y_amr))
+          = $s$wfoo_sno y_amr sc_snn ;]
 
 BUT we must ensure that this rule matches in the original function!
-Note that the call to \$wfoo is
-            \$wfoo_smW (GHC.Prim.-# ds_Xmb y_amr) wild_Xf
+Note that the call to $wfoo is
+            $wfoo_smW (GHC.Prim.-# ds_Xmb y_amr) wild_Xf
 
 During matching we expand wild_Xf to (Just n_acT).  But then we must also
 expand n_acT to (I# y_amr).  And we can only do that if we look up n_acT
@@ -1147,10 +1171,10 @@ is so important.
 -- string for the purposes of error reporting
 ruleCheckProgram :: CompilerPhase               -- ^ Rule activation test
                  -> String                      -- ^ Rule pattern
-                 -> RuleEnv                     -- ^ Database of rules
+                 -> (Id -> [CoreRule])          -- ^ Rules for an Id
                  -> CoreProgram                 -- ^ Bindings to check in
                  -> SDoc                        -- ^ Resulting check message
-ruleCheckProgram phase rule_pat rule_base binds
+ruleCheckProgram phase rule_pat rules binds
   | isEmptyBag results
   = text "Rule check results: no rule application sites"
   | otherwise
@@ -1163,7 +1187,7 @@ ruleCheckProgram phase rule_pat rule_base binds
                        , rc_id_unf    = idUnfolding     -- Not quite right
                                                         -- Should use activeUnfolding
                        , rc_pattern   = rule_pat
-                       , rc_rule_base = rule_base }
+                       , rc_rules = rules }
     results = unionManyBags (map (ruleCheckBind env) binds)
     line = text (replicate 20 '-')
 
@@ -1171,7 +1195,7 @@ data RuleCheckEnv = RuleCheckEnv {
     rc_is_active :: Activation -> Bool,
     rc_id_unf  :: IdUnfoldingFun,
     rc_pattern :: String,
-    rc_rule_base :: RuleEnv
+    rc_rules :: Id -> [CoreRule]
 }
 
 ruleCheckBind :: RuleCheckEnv -> CoreBind -> Bag SDoc
@@ -1205,7 +1229,7 @@ ruleCheckFun env fn args
   | null name_match_rules = emptyBag
   | otherwise             = unitBag (ruleAppCheck_help env fn args name_match_rules)
   where
-    name_match_rules = filter match (getRules (rc_rule_base env) fn)
+    name_match_rules = filter match (rc_rules env fn)
     match rule = (rc_pattern env) `isPrefixOf` unpackFS (ruleName rule)
 
 ruleAppCheck_help :: RuleCheckEnv -> Id -> [CoreExpr] -> [CoreRule] -> SDoc
diff --git a/compiler/specialise/SpecConstr.hs b/compiler/specialise/SpecConstr.hs
index e5af0b8a3c..f6d27ccba5 100644
--- a/compiler/specialise/SpecConstr.hs
+++ b/compiler/specialise/SpecConstr.hs
@@ -19,6 +19,8 @@ module SpecConstr(
 
 #include "HsVersions.h"
 
+import GhcPrelude
+
 import CoreSyn
 import CoreSubst
 import CoreUtils
@@ -36,7 +38,6 @@ import TyCon            ( tyConName )
 import Id
 import PprCore          ( pprParendExpr )
 import MkCore           ( mkImpossibleExpr )
-import Var
 import VarEnv
 import VarSet
 import Name
@@ -57,9 +58,6 @@ import Control.Monad    ( zipWithM )
 import Data.List
 import PrelNames        ( specTyConName )
 import Module
-
--- See Note [Forcing specialisation]
-
 import TyCon ( TyCon )
 import GHC.Exts( SpecConstrAnnotation(..) )
 import Data.Ord( comparing )
@@ -502,31 +500,46 @@ This is all quite ugly; we ought to come up with a better design.
 
 ForceSpecConstr arguments are spotted in scExpr' and scTopBinds which then set
 sc_force to True when calling specLoop. This flag does four things:
+
   * Ignore specConstrThreshold, to specialise functions of arbitrary size
         (see scTopBind)
   * Ignore specConstrCount, to make arbitrary numbers of specialisations
         (see specialise)
   * Specialise even for arguments that are not scrutinised in the loop
-        (see argToPat; Trac #4488)
+        (see argToPat; Trac #4448)
   * Only specialise on recursive types a finite number of times
         (see is_too_recursive; Trac #5550; Note [Limit recursive specialisation])
 
-This flag is inherited for nested non-recursive bindings (which are likely to
-be join points and hence should be fully specialised) but reset for nested
-recursive bindings.
-
-What alternatives did I consider? Annotating the loop itself doesn't
-work because (a) it is local and (b) it will be w/w'ed and having
-w/w propagating annotations somehow doesn't seem like a good idea. The
-types of the loop arguments really seem to be the most persistent
-thing.
-
-Annotating the types that make up the loop state doesn't work,
-either, because (a) it would prevent us from using types like Either
-or tuples here, (b) we don't want to restrict the set of types that
-can be used in Stream states and (c) some types are fixed by the user
-(e.g., the accumulator here) but we still want to specialise as much
-as possible.
+The flag holds only for specialising a single binding group, and NOT
+for nested bindings.  (So really it should be passed around explicitly
+and not stored in ScEnv.)  Trac #14379 turned out to be caused by
+   f SPEC x = let g1 x = ...
+              in ...
+We force-specialise f (because of the SPEC), but that generates a specialised
+copy of g1 (as well as the original).  Alas g1 has a nested binding g2; and
+in each copy of g1 we get an unspecialised and specialised copy of g2; and so
+on. Result, exponential.  So the force-spec flag now only applies to one
+level of bindings at a time.
+
+Mechanism for this one-level-only thing:
+
+ - Switch it on at the call to specRec, in scExpr and scTopBinds
+ - Switch it off when doing the RHSs;
+   this can be done very conveniently in decreaseSpecCount
+
+What alternatives did I consider?
+
+* Annotating the loop itself doesn't work because (a) it is local and
+  (b) it will be w/w'ed and having w/w propagating annotations somehow
+  doesn't seem like a good idea. The types of the loop arguments
+  really seem to be the most persistent thing.
+
+* Annotating the types that make up the loop state doesn't work,
+  either, because (a) it would prevent us from using types like Either
+  or tuples here, (b) we don't want to restrict the set of types that
+  can be used in Stream states and (c) some types are fixed by the
+  user (e.g., the accumulator here) but we still want to specialise as
+  much as possible.
 
 Alternatives to ForceSpecConstr
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -586,7 +599,7 @@ more than N times (controlled by -fspec-constr-recursive=N) we check
     specialisations.  If sc_count is "no limit" then we arbitrarily
     choose 10 as the limit (ugh).
 
-See Trac #5550.   Also Trac #13623, where this test had become over-agressive,
+See Trac #5550.   Also Trac #13623, where this test had become over-aggressive,
 and we lost a wonderful specialisation that we really wanted!
 
 Note [NoSpecConstr]
@@ -597,7 +610,7 @@ to mean "don't specialise on arguments of this type".  It was added
 before we had ForceSpecConstr.  Lacking ForceSpecConstr we specialised
 regardless of size; and then we needed a way to turn that *off*.  Now
 that we have ForceSpecConstr, this NoSpecConstr is probably redundant.
-(Used only for PArray.)
+(Used only for PArray, TODO: remove?)
 
 -----------------------------------------------------
                 Stuff not yet handled
@@ -975,7 +988,8 @@ extendCaseBndrs env scrut case_bndr con alt_bndrs
 decreaseSpecCount :: ScEnv -> Int -> ScEnv
 -- See Note [Avoiding exponential blowup]
 decreaseSpecCount env n_specs
-  = env { sc_count = case sc_count env of
+  = env { sc_force = False   -- See Note [Forcing specialisation]
+        , sc_count = case sc_count env of
                        Nothing -> Nothing
                        Just n  -> Just (n `div` (n_specs + 1)) }
         -- The "+1" takes account of the original function;
@@ -1545,7 +1559,11 @@ specRec top_lvl env body_usg rhs_infos
         return (usg_so_far, spec_infos)
 
       | otherwise
-      = do  { specs_w_usg <- zipWithM (specialise env seed_calls) rhs_infos spec_infos
+      = -- pprTrace "specRec3" (vcat [ text "bndrs" <+> ppr (map ri_fn rhs_infos)
+        --                           , text "iteration" <+> int n_iter
+        --                          , text "spec_infos" <+> ppr (map (map os_pat . si_specs) spec_infos)
+        --                    ]) $
+        do  { specs_w_usg <- zipWithM (specialise env seed_calls) rhs_infos spec_infos
             ; let (extra_usg_s, new_spec_infos) = unzip specs_w_usg
                   extra_usg = combineUsages extra_usg_s
                   all_usg   = usg_so_far `combineUsage` extra_usg
@@ -1792,7 +1810,7 @@ that specialisations didn't fire inside wrappers; see test
 simplCore/should_compile/spec-inline.
 
 So now I just use the inline-activation of the parent Id, as the
-activation for the specialiation RULE, just like the main specialiser;
+activation for the specialisation RULE, just like the main specialiser;
 
 This in turn means there is no point in specialising NOINLINE things,
 so we test for that.
@@ -1881,6 +1899,69 @@ by trim_pats.
 
 * Otherwise we sort the patterns to choose the most general
   ones first; more general => more widely applicable.
+
+Note [SpecConstr and casts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider (Trac #14270) a call like
+
+    let f = e
+    in ... f (K @(a |> co)) ...
+
+where 'co' is a coercion variable not in scope at f's definition site.
+If we aren't caereful we'll get
+
+    let $sf a co = e (K @(a |> co))
+        RULE "SC:f" forall a co.  f (K @(a |> co)) = $sf a co
+        f = e
+    in ...
+
+But alas, when we match the call we won't bind 'co', because type-matching
+(for good reasons) discards casts).
+
+I don't know how to solve this, so for now I'm just discarding any
+call patterns that
+  * Mentions a coercion variable in a type argument
+  * That is not in scope at the binding of the function
+
+I think this is very rare.
+
+It is important (e.g. Trac #14936) that this /only/ applies to
+coercions mentioned in casts.  We don't want to be discombobulated
+by casts in terms!  For example, consider
+   f ((e1,e2) |> sym co)
+where, say,
+   f  :: Foo -> blah
+   co :: Foo ~R (Int,Int)
+
+Here we definitely do want to specialise for that pair!  We do not
+match on the structre of the coercion; instead we just match on a
+coercion variable, so the RULE looks like
+
+   forall (x::Int, y::Int, co :: (Int,Int) ~R Foo)
+     f ((x,y) |> co) = $sf x y co
+
+Often the body of f looks like
+   f arg = ...(case arg |> co' of
+                (x,y) -> blah)...
+
+so that the specialised f will turn into
+   $sf x y co = let arg = (x,y) |> co
+                in ...(case arg>| co' of
+                         (x,y) -> blah)....
+
+which will simplify to not use 'co' at all.  But we can't guarantee
+that co will end up unused, so we still pass it.  Absence analysis
+may remove it later.
+
+Note that this /also/ discards the call pattern if we have a cast in a
+/term/, although in fact Rules.match does make a very flaky and
+fragile attempt to match coercions.  e.g. a call like
+    f (Maybe Age) (Nothing |> co) blah
+    where co :: Maybe Int ~ Maybe Age
+will be discarded.  It's extremely fragile to match on the form of a
+coercion, so I think it's better just not to try.  A more complicated
+alternative would be to discard calls that mention coercion variables
+only in kind-casts, but I'm doing the simple thing for now.
 -}
 
 type CallPat = ([Var], [CoreExpr])      -- Quantified variables and arguments
@@ -1918,7 +1999,8 @@ callsToNewPats env fn spec_info@(SI { si_specs = done_specs }) bndr_occs calls
                 -- Discard specialisations if there are too many of them
               trimmed_pats = trim_pats env fn spec_info small_pats
 
---        ; pprTrace "callsToPats" (vcat [ text "calls:" <+> ppr calls
+--        ; pprTrace "callsToPats" (vcat [ text "calls to" <+> ppr fn <> colon <+> ppr calls
+--                                       , text "done_specs:" <+> ppr (map os_pat done_specs)
 --                                       , text "good_pats:" <+> ppr good_pats ]) $
 --          return ()
 
@@ -1931,7 +2013,8 @@ trim_pats env fn (SI { si_n_specs = done_spec_count }) pats
   | sc_force env
     || isNothing mb_scc
     || n_remaining >= n_pats
-  = pats          -- No need to trim
+  = -- pprTrace "trim_pats: no-trim" (ppr (sc_force env) $$ ppr mb_scc $$ ppr n_remaining $$ ppr n_pats)
+    pats          -- No need to trim
 
   | otherwise
   = emit_trace $  -- Need to trim, so keep the best ones
@@ -1975,6 +2058,8 @@ trim_pats env fn (SI { si_n_specs = done_spec_count }) pats
                                speakNOf spec_count' (text "call pattern") <> comma <+>
                                text "but the limit is" <+> int max_specs) ]
                , text "Use -fspec-constr-count=n to set the bound"
+               , text "done_spec_count =" <+> int done_spec_count
+               , text "Keeping " <+> int n_remaining <> text ", out of" <+> int n_pats
                , text "Discarding:" <+> ppr (drop n_remaining sorted_pats) ]
 
 
@@ -1983,21 +2068,23 @@ callToPats :: ScEnv -> [ArgOcc] -> Call -> UniqSM (Maybe CallPat)
         --      Type variables come first, since they may scope
         --      over the following term variables
         -- The [CoreExpr] are the argument patterns for the rule
-callToPats env bndr_occs (Call _ args con_env)
+callToPats env bndr_occs call@(Call _ args con_env)
   | args `ltLength` bndr_occs      -- Check saturated
   = return Nothing
   | otherwise
-  = do  { let in_scope      = substInScope (sc_subst env)
+  = do  { let in_scope = substInScope (sc_subst env)
         ; (interesting, pats) <- argsToPats env in_scope con_env args bndr_occs
-        ; let pat_fvs       = exprsFreeVarsList pats
+        ; let pat_fvs = exprsFreeVarsList pats
                 -- To get determinism we need the list of free variables in
                 -- deterministic order. Otherwise we end up creating
                 -- lambdas with different argument orders. See
                 -- determinism/simplCore/should_compile/spec-inline-determ.hs
                 -- for an example. For explanation of determinism
                 -- considerations See Note [Unique Determinism] in Unique.
+
               in_scope_vars = getInScopeVars in_scope
-              qvars         = filterOut (`elemVarSet` in_scope_vars) pat_fvs
+              is_in_scope v = v `elemVarSet` in_scope_vars
+              qvars         = filterOut is_in_scope pat_fvs
                 -- Quantify over variables that are not in scope
                 -- at the call site
                 -- See Note [Free type variables of the qvar types]
@@ -2012,8 +2099,21 @@ callToPats env bndr_occs (Call _ args con_env)
               sanitise id   = id `setIdType` expandTypeSynonyms (idType id)
                 -- See Note [Free type variables of the qvar types]
 
+              -- Bad coercion variables: see Note [SpecConstr and casts]
+              bad_covars :: CoVarSet
+              bad_covars = mapUnionVarSet get_bad_covars pats
+              get_bad_covars :: CoreArg -> CoVarSet
+              get_bad_covars (Type ty)
+                = filterVarSet (\v -> isId v && not (is_in_scope v)) $
+                  tyCoVarsOfType ty
+              get_bad_covars _
+                = emptyVarSet
+
         ; -- pprTrace "callToPats"  (ppr args $$ ppr bndr_occs) $
-          if interesting
+          WARN( not (isEmptyVarSet bad_covars)
+              , text "SpecConstr: bad covars:" <+> ppr bad_covars
+                $$ ppr call )
+          if interesting && isEmptyVarSet bad_covars
           then return (Just (qvars', pats))
           else return Nothing }
 
diff --git a/compiler/specialise/Specialise.hs b/compiler/specialise/Specialise.hs
index 869da640ea..6f775dfdcb 100644
--- a/compiler/specialise/Specialise.hs
+++ b/compiler/specialise/Specialise.hs
@@ -9,6 +9,8 @@ module Specialise ( specProgram, specUnfolding ) where
 
 #include "HsVersions.h"
 
+import GhcPrelude
+
 import Id
 import TcType hiding( substTy )
 import Type   hiding( substTy, extendTvSubstList )
@@ -43,9 +45,7 @@ import State
 import UniqDFM
 
 import Control.Monad
-#if __GLASGOW_HASKELL__ > 710
 import qualified Control.Monad.Fail as MonadFail
-#endif
 
 {-
 ************************************************************************
@@ -147,7 +147,7 @@ becomes
                          in
                          fl
 
-We still have recusion for non-overloaded functions which we
+We still have recursion for non-overloaded functions which we
 specialise, but the recursive call should get specialised to the
 same recursive version.
 
@@ -735,7 +735,7 @@ specImport dflags this_mod top_env done callers rb fn calls_for_fn
   = do { warnMsg (vcat [ hang (text "Could not specialise imported function" <+> quotes (ppr fn))
                             2 (vcat [ text "when specialising" <+> quotes (ppr caller)
                                     | caller <- callers])
-                      , ifPprDebug (text "calls:" <+> vcat (map (pprCallInfo fn) calls_for_fn))
+                      , whenPprDebug (text "calls:" <+> vcat (map (pprCallInfo fn) calls_for_fn))
                       , text "Probable fix: add INLINABLE pragma on" <+> quotes (ppr fn) ])
        ; return ([], []) }
 
@@ -1343,10 +1343,10 @@ specCalls mb_mod env existing_rules calls_for_me fn rhs
                         -- See Note [Specialising imported functions] in OccurAnal
 
                   | InlinePragma { inl_inline = Inlinable } <- inl_prag
-                  = (inl_prag { inl_inline = EmptyInlineSpec }, noUnfolding)
+                  = (inl_prag { inl_inline = NoUserInline }, noUnfolding)
 
                   | otherwise
-                  = (inl_prag, specUnfolding poly_tyvars spec_app
+                  = (inl_prag, specUnfolding dflags poly_tyvars spec_app
                                              arity_decrease fn_unf)
 
                 arity_decrease = length spec_dict_args
@@ -2011,6 +2011,7 @@ mkCallUDs' env f args
             EqPred {}       -> True
             IrredPred {}    -> True   -- Things like (D []) where D is a
                                       -- Constraint-ranged family; Trac #7785
+            ForAllPred {}   -> True
 
 {-
 Note [Type determines value]
@@ -2095,7 +2096,7 @@ mkDB bind = (bind, bind_fvs bind)
 -- | Identify the free variables of a 'CoreBind'
 bind_fvs :: CoreBind -> VarSet
 bind_fvs (NonRec bndr rhs) = pair_fvs (bndr,rhs)
-bind_fvs (Rec prs)         = foldl delVarSet rhs_fvs bndrs
+bind_fvs (Rec prs)         = foldl' delVarSet rhs_fvs bndrs
                            where
                              bndrs = map fst prs
                              rhs_fvs = unionVarSets (map pair_fvs prs)
@@ -2287,12 +2288,10 @@ instance Monad SpecM where
                                case f y of
                                    SpecM z ->
                                        z
-    fail str = SpecM $ fail str
+    fail = MonadFail.fail
 
-#if __GLASGOW_HASKELL__ > 710
 instance MonadFail.MonadFail SpecM where
     fail str = SpecM $ fail str
-#endif
 
 instance MonadUnique SpecM where
     getUniqueSupplyM
author	Kavon Farvardin <kavon@farvard.in>	2018-09-23 15:29:37 -0500
committer	Kavon Farvardin <kavon@farvard.in>	2018-09-23 15:29:37 -0500
commit	84c2ad99582391005b5e873198b15e9e9eb4f78d (patch)
tree	caa8c2f2ec7e97fbb4977263c6817c9af5025cf4 /compiler/specialise
parent	8ddb47cfcf5776e9a3c55fd37947c8a95e00fa12 (diff)
parent	e68b439fe5de61b9a2ca51af472185c62ccb8b46 (diff)
download	haskell-wip/T13904.tar.gz