summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2020-06-02 16:01:40 -0400
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-08-18 15:38:55 -0400
commitf4cc57fa2df08f5b33a4cf86c3e041b8de9f6ebf (patch)
tree53a11791ee7a0cf14c00e9e8f75d74209c207ae2
parent55fd1dc55990623dcf3b2e6143e766242315d757 (diff)
downloadhaskell-f4cc57fa2df08f5b33a4cf86c3e041b8de9f6ebf.tar.gz
Allow unsaturated runRW# applications
Previously we had a very aggressive Core Lint check which caught unsaturated applications of runRW#. However, there is nothing wrong with such applications and they may naturally arise in desugared Core. For instance, the desugared Core of Data.Primitive.Array.runArray# from the `primitive` package contains: case ($) (runRW# @_ @_) (\s -> ...) of ... In this case it's almost certain that ($) will be inlined, turning the application into a saturated application. However, even if this weren't the case there isn't a problem: CorePrep (after deleting an unnecessary case) can simply generate code in its usual way, resulting in a call to the Haskell definition of runRW#. Fixes #18291.
-rw-r--r--compiler/GHC/Core/Lint.hs50
-rw-r--r--compiler/GHC/Core/Opt/Simplify.hs4
-rw-r--r--compiler/GHC/CoreToStg/Prep.hs124
-rw-r--r--libraries/ghc-prim/GHC/Magic.hs2
4 files changed, 111 insertions, 69 deletions
diff --git a/compiler/GHC/Core/Lint.hs b/compiler/GHC/Core/Lint.hs
index a3582de953..9054ca086c 100644
--- a/compiler/GHC/Core/Lint.hs
+++ b/compiler/GHC/Core/Lint.hs
@@ -731,8 +731,6 @@ lintJoinLams join_arity enforce rhs
where
go 0 expr = lintCoreExpr expr
go n (Lam var body) = lintLambda var $ go (n-1) body
- -- N.B. join points can be cast. e.g. we consider ((\x -> ...) `cast` ...)
- -- to be a join point at join arity 1.
go n expr | Just bndr <- enforce -- Join point with too few RHS lambdas
= failWithL $ mkBadJoinArityMsg bndr join_arity n rhs
| otherwise -- Future join point, not yet eta-expanded
@@ -781,36 +779,26 @@ hurts us here.
Note [Linting of runRW#]
~~~~~~~~~~~~~~~~~~~~~~~~
-runRW# has some very peculiar behavior (see Note [runRW magic] in
-GHC.CoreToStg.Prep) which CoreLint must accommodate.
+runRW# has some very special behavior (see Note [runRW magic] in
+GHC.CoreToStg.Prep) which CoreLint must accommodate, by allowing
+join points in its argument. For example, this is fine:
-As described in Note [Casts and lambdas] in
-GHC.Core.Opt.Simplify.Utils, the simplifier pushes casts out of
-lambdas. Concretely, the simplifier will transform
+ join j x = ...
+ in runRW# (\s. case v of
+ A -> j 3
+ B -> j 4)
- runRW# @r @ty (\s -> expr `cast` co)
+Usually those calls to the join point 'j' would not be valid tail calls,
+because they occur in a function argument. But in the case of runRW#
+they are fine, because runRW# (\s.e) behaves operationally just like e.
+(runRW# is ultimately inlined in GHC.CoreToStg.Prep.)
-into
-
- runRW# @r @ty ((\s -> expr) `cast` co)
-
-Consequently we need to handle the case that the continuation is a
-cast of a lambda. See Note [Casts and lambdas] in
-GHC.Core.Opt.Simplify.Utils.
-
-In the event that the continuation is headed by a lambda (which
-will bind the State# token) we can safely allow calls to join
-points since CorePrep is going to apply the continuation to
-RealWorld.
-
-In the case that the continuation is not a lambda we lint the
-continuation disallowing join points, to rule out things like,
+In the case that the continuation is /not/ a lambda we simply disable this
+special behaviour. For example, this is /not/ fine:
join j = ...
- in runRW# @r @ty (
- let x = jump j
- in x
- )
+ in runRW# @r @ty (jump j)
+
************************************************************************
@@ -931,10 +919,6 @@ lintCoreExpr e@(App _ _)
; (fun_ty2, ue2) <- lintCoreArg fun_pair1 arg_ty2
-- See Note [Linting of runRW#]
; let lintRunRWCont :: CoreArg -> LintM (LintedType, UsageEnv)
- lintRunRWCont (Cast expr co) = do
- (ty, ue) <- lintRunRWCont expr
- new_ty <- lintCastExpr expr ty co
- return (new_ty, ue)
lintRunRWCont expr@(Lam _ _) = do
lintJoinLams 1 (Just fun) expr
lintRunRWCont other = markAllJoinsBad $ lintCoreExpr other
@@ -943,10 +927,6 @@ lintCoreExpr e@(App _ _)
; app_ty <- lintValApp arg3 fun_ty2 arg3_ty ue2 ue3
; lintCoreArgs app_ty rest }
- | Var fun <- fun
- , fun `hasKey` runRWKey
- = failWithL (text "Invalid runRW# application")
-
| otherwise
= do { pair <- lintCoreFun fun (length args)
; lintCoreArgs pair args }
diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs
index a529a8b0a4..4895b312db 100644
--- a/compiler/GHC/Core/Opt/Simplify.hs
+++ b/compiler/GHC/Core/Opt/Simplify.hs
@@ -1972,8 +1972,10 @@ rebuildCall env info (ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_c
= rebuildCall env (addTyArgTo info arg_ty hole_ty) cont
---------- The runRW# rule. Do this after absorbing all arguments ------
+-- See Note [Simplification of runRW#] in GHC.CoreToSTG.Prep.
+--
-- runRW# :: forall (r :: RuntimeRep) (o :: TYPE r). (State# RealWorld -> o) -> o
--- K[ runRW# rr ty body ] --> runRW rr' ty' (\s. K[ body s ])
+-- K[ runRW# rr ty body ] --> runRW rr' ty' (\s. K[ body s ])
rebuildCall env (ArgInfo { ai_fun = fun_id, ai_args = rev_args })
(ApplyToVal { sc_arg = arg, sc_env = arg_se
, sc_cont = cont, sc_hole_ty = fun_ty })
diff --git a/compiler/GHC/CoreToStg/Prep.hs b/compiler/GHC/CoreToStg/Prep.hs
index 4ecb29da7a..e5fc09522d 100644
--- a/compiler/GHC/CoreToStg/Prep.hs
+++ b/compiler/GHC/CoreToStg/Prep.hs
@@ -723,18 +723,6 @@ instance Outputable ArgInfo where
ppr (CpeCast co) = text "cast" <+> ppr co
ppr (CpeTick tick) = text "tick" <+> ppr tick
-{-
- Note [runRW arg]
-~~~~~~~~~~~~~~~~~~~
-If we got, say
- runRW# (case bot of {})
-which happened in #11291, we do /not/ want to turn it into
- (case bot of {}) realWorldPrimId#
-because that gives a panic in CoreToStg.myCollectArgs, which expects
-only variables in function position. But if we are sure to make
-runRW# strict (which we do in GHC.Types.Id.Make), this can't happen
--}
-
cpeApp :: CorePrepEnv -> CoreExpr -> UniqSM (Floats, CpeRhs)
-- May return a CpeRhs because of saturating primops
cpeApp top_env expr
@@ -800,10 +788,6 @@ cpeApp top_env expr
_ -> cpe_app env arg (CpeApp (Var realWorldPrimId) : rest) (n-1)
-- TODO: What about casts?
- cpe_app _env (Var f) args n
- | f `hasKey` runRWKey
- = pprPanic "cpe_app(runRW#)" (ppr args $$ ppr n)
-
cpe_app env (Var v) args depth
= do { v1 <- fiddleCCall v
; let e2 = lookupCorePrepEnv env v1
@@ -925,34 +909,96 @@ optimization (right before lowering to STG, in CorePrep), we can ensure that
no further floating will occur. This allows us to safely inline things like
@runST@, which are otherwise needlessly expensive (see #10678 and #5916).
-'runRW' is defined (for historical reasons) in GHC.Magic, with a NOINLINE
-pragma. It is levity-polymorphic.
+'runRW' has a variety of quirks:
+
+ * 'runRW' is known-key with a NOINLINE definition in
+ GHC.Magic. This definition is used in cases where runRW is curried.
+
+ * In addition to its normal Haskell definition in GHC.Magic, we give it
+ a special late inlining here in CorePrep and GHC.CoreToByteCode, avoiding
+ the incorrect sharing due to float-out noted above.
+
+ * It is levity-polymorphic:
runRW# :: forall (r1 :: RuntimeRep). (o :: TYPE r)
=> (State# RealWorld -> (# State# RealWorld, o #))
- -> (# State# RealWorld, o #)
+ -> (# State# RealWorld, o #)
+
+ * It has some special simplification logic to allow unboxing of results when
+ runRW# appears in a strict context. See Note [Simplification of runRW#]
+ below.
+
+ * Since its body is inlined, we allow runRW#'s argument to contain jumps to
+ join points. That is, the following is allowed:
+
+ join j x = ...
+ in runRW# @_ @_ (\s -> ... jump j 42 ...)
+
+ The Core Linter knows about this. See Note [Linting of runRW#] in
+ GHC.Core.Lint for details.
+
+ The occurrence analyser and SetLevels also know about this, as described in
+ Note [Simplification of runRW#].
+
+Other relevant Notes:
-It's correctness needs no special treatment in GHC except this special inlining
-here in CorePrep (and in GHC.CoreToByteCode).
+ * Note [Simplification of runRW#] below, describing a transformation of runRW
+ applications in strict contexts performed by the simplifier.
+ * Note [Linting of runRW#] in GHC.Core.Lint
+ * Note [runRW arg] below, describing a non-obvious case where the
+ late-inlining could go wrong.
-However, there are a variety of optimisation opportunities that the simplifier
-takes advantage of. See Note [Simplification of runRW#].
+
+ Note [runRW arg]
+~~~~~~~~~~~~~~~~~~~
+Consider the Core program (from #11291),
+
+ runRW# (case bot of {})
+
+The late inlining logic in cpe_app would transform this into:
+
+ (case bot of {}) realWorldPrimId#
+
+Which would rise to a panic in CoreToStg.myCollectArgs, which expects only
+variables in function position.
+
+However, as runRW#'s strictness signature captures the fact that it will call
+its argument this can't happen: the simplifier will transform the bottoming
+application into simply (case bot of {}).
+
+Note that this reasoning does *not* apply to non-bottoming continuations like:
+
+ hello :: Bool -> Int
+ hello n =
+ runRW# (
+ case n of
+ True -> \s -> 23
+ _ -> \s -> 10)
+
+Why? The difference is that (case bot of {}) is considered by okCpeArg to be
+trivial, consequently cpeArg (which the catch-all case of cpe_app calls on both
+the function and the arguments) will forgo binding it to a variable. By
+contrast, in the non-bottoming case of `hello` above the function will be
+deemed non-trivial and consequently will be case-bound.
Note [Simplification of runRW#]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider the program,
- case runRW# (\s -> let n = I# 42# in n) of
+ case runRW# (\s -> I# 42#) of
I# n# -> f n#
There is no reason why we should allocate an I# constructor given that we
-immediately destructure it. To avoid this the simplifier will push strict
-contexts into runRW's continuation. That is, it transforms
+immediately destructure it.
+
+To avoid this the simplifier has a special transformation rule, specific to
+runRW#, that pushes a strict context into runRW#'s continuation. See the
+`runRW#` guard in `GHC.Core.Opt.Simplify.rebuildCall`. That is, it transforms
K[ runRW# @r @ty cont ]
~>
- runRW# @r @ty K[cont]
+ runRW# @r @ty (\s -> K[cont s])
This has a few interesting implications. Consider, for instance, this program:
@@ -971,15 +1017,29 @@ Performing the transform described above would result in:
If runRW# were a "normal" function this call to join point j would not be
allowed in its continuation argument. However, since runRW# is inlined (as
described in Note [runRW magic] above), such join point occurences are
-completely fine. Both occurrence analysis and Core Lint have special treatment
-for runRW# applications. See Note [Linting of runRW#] for details on the latter.
+completely fine. Both occurrence analysis (see the runRW guard in occAnalApp)
+and Core Lint (see the App case of lintCoreExpr) have special treatment for
+runRW# applications. See Note [Linting of runRW#] for details on the latter.
Moreover, it's helpful to ensure that runRW's continuation isn't floated out
-(since doing so would then require a call, whereas we would otherwise end up
-with straight-line). Consequently, GHC.Core.Opt.SetLevels.lvlApp has special
-treatment for runRW# applications, ensure the arguments are not floated if
+For instance, if we have
+
+ runRW# (\s -> do_something)
+
+where do_something contains only top-level free variables, we may be tempted to
+float the argument to the top-level. However, we must resist this urge as since
+doing so would then require that runRW# produce an allocation and call, e.g.:
+
+ let lvl = \s -> do_somethign
+ in
+ ....(runRW# lvl)....
+
+whereas without floating the inlining of the definition of runRW would result
+in straight-line code. Consequently, GHC.Core.Opt.SetLevels.lvlApp has special
+treatment for runRW# applications, ensure the arguments are not floated as
MFEs.
+
Other considered designs
------------------------
diff --git a/libraries/ghc-prim/GHC/Magic.hs b/libraries/ghc-prim/GHC/Magic.hs
index 77cfd00a54..d1f2650a96 100644
--- a/libraries/ghc-prim/GHC/Magic.hs
+++ b/libraries/ghc-prim/GHC/Magic.hs
@@ -122,7 +122,7 @@ oneShot f = f
runRW# :: forall (r :: RuntimeRep) (o :: TYPE r).
(State# RealWorld -> o) -> o
--- See Note [runRW magic] in CorePrep
+-- See Note [runRW magic] in GHC.CoreToStg.Prep.
{-# NOINLINE runRW# #-} -- runRW# is inlined manually in CorePrep
#if !defined(__HADDOCK_VERSION__)
runRW# m = m realWorld#