summaryrefslogtreecommitdiff
path: root/compiler/GHC/StgToCmm
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/GHC/StgToCmm')
-rw-r--r--compiler/GHC/StgToCmm/Bind.hs9
-rw-r--r--compiler/GHC/StgToCmm/Closure.hs31
-rw-r--r--compiler/GHC/StgToCmm/Config.hs1
-rw-r--r--compiler/GHC/StgToCmm/Expr.hs110
-rw-r--r--compiler/GHC/StgToCmm/Heap.hs32
-rw-r--r--compiler/GHC/StgToCmm/Monad.hs17
6 files changed, 12 insertions, 188 deletions
diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs
index 177c3f2912..1ddc1675ff 100644
--- a/compiler/GHC/StgToCmm/Bind.hs
+++ b/compiler/GHC/StgToCmm/Bind.hs
@@ -41,7 +41,6 @@ import GHC.StgToCmm.Closure
import GHC.StgToCmm.Foreign (emitPrimCall)
import GHC.Cmm.Graph
-import GHC.Cmm.BlockId
import GHC.Cmm
import GHC.Cmm.Info
import GHC.Cmm.Utils
@@ -511,12 +510,6 @@ closureCodeBody top_lvl bndr cl_info cc args@(arg0:_) body fv_details
; platform <- getPlatform
; let node_points = nodeMustPointToIt profile lf_info
node' = if node_points then Just node else Nothing
- ; loop_header_id <- newBlockId
- -- Extend reader monad with information that
- -- self-recursive tail calls can be optimized into local
- -- jumps. See Note [Self-recursive tail calls] in GHC.StgToCmm.Expr.
- ; withSelfLoop (bndr, loop_header_id, arg_regs) $ do
- {
-- Main payload
; entryHeapCheck cl_info node' arity arg_regs $ do
{ -- emit LDV code when profiling
@@ -533,7 +526,7 @@ closureCodeBody top_lvl bndr cl_info cc args@(arg0:_) body fv_details
; when node_points $ load_fvs node lf_info fv_bindings
; checkFunctionArgTags (text "TagCheck failed - Argument to local function:" <> ppr bndr) bndr args
; void $ cgExpr body
- }}}
+ }}
}
diff --git a/compiler/GHC/StgToCmm/Closure.hs b/compiler/GHC/StgToCmm/Closure.hs
index fc76664d94..0b2640bb54 100644
--- a/compiler/GHC/StgToCmm/Closure.hs
+++ b/compiler/GHC/StgToCmm/Closure.hs
@@ -77,7 +77,6 @@ import GHC.Runtime.Heap.Layout
import GHC.Cmm
import GHC.Cmm.Utils
import GHC.StgToCmm.Types
-import GHC.StgToCmm.Sequel
import GHC.Types.CostCentre
import GHC.Cmm.BlockId
@@ -95,7 +94,6 @@ import GHC.Types.Basic
import GHC.Utils.Outputable
import GHC.Utils.Panic
import GHC.Utils.Panic.Plain
-import GHC.Utils.Misc
import Data.Coerce (coerce)
import qualified Data.ByteString.Char8 as BS8
@@ -511,22 +509,9 @@ getCallMethod :: StgToCmmConfig
-- tail calls using the same data constructor,
-- JumpToIt. This saves us one case branch in
-- cgIdApp
- -> Maybe SelfLoopInfo -- can we perform a self-recursive tail-call
-> CallMethod
-getCallMethod cfg _ id _ n_args v_args _cg_loc (Just (self_loop_id, block_id, args))
- | stgToCmmLoopification cfg
- , id == self_loop_id
- , args `lengthIs` (n_args - v_args)
- -- If these patterns match then we know that:
- -- * loopification optimisation is turned on
- -- * function is performing a self-recursive call in a tail position
- -- * number of non-void parameters of the function matches functions arity.
- -- See Note [Self-recursive tail calls] and Note [Void arguments in
- -- self-recursive tail calls] in GHC.StgToCmm.Expr for more details
- = JumpToIt block_id args
-
-getCallMethod cfg name id (LFReEntrant _ arity _ _) n_args _v_args _cg_loc _self_loop_info
+getCallMethod cfg name id (LFReEntrant _ arity _ _) n_args _v_args _cg_loc
| n_args == 0 -- No args at all
&& not (profileIsProfiling (stgToCmmProfile cfg))
-- See Note [Evaluating functions with profiling] in rts/Apply.cmm
@@ -534,16 +519,16 @@ getCallMethod cfg name id (LFReEntrant _ arity _ _) n_args _v_args _cg_loc _self
| n_args < arity = SlowCall -- Not enough args
| otherwise = DirectEntry (enterIdLabel (stgToCmmPlatform cfg) name (idCafInfo id)) arity
-getCallMethod _ _name _ LFUnlifted n_args _v_args _cg_loc _self_loop_info
+getCallMethod _ _name _ LFUnlifted n_args _v_args _cg_loc
= assert (n_args == 0) ReturnIt
-getCallMethod _ _name _ (LFCon _) n_args _v_args _cg_loc _self_loop_info
+getCallMethod _ _name _ (LFCon _) n_args _v_args _cg_loc
= assert (n_args == 0) ReturnIt
-- n_args=0 because it'd be ill-typed to apply a saturated
-- constructor application to anything
getCallMethod cfg name id (LFThunk _ _ updatable std_form_info is_fun)
- n_args _v_args _cg_loc _self_loop_info
+ n_args _v_args _cg_loc
| Just sig <- idTagSig_maybe id
, isTaggedSig sig -- Infered to be already evaluated by Tag Inference
@@ -581,7 +566,7 @@ getCallMethod cfg name id (LFThunk _ _ updatable std_form_info is_fun)
updatable) 0
-- Imported(Unknown) Ids
-getCallMethod cfg name id (LFUnknown might_be_a_function) n_args _v_args _cg_locs _self_loop_info
+getCallMethod cfg name id (LFUnknown might_be_a_function) n_args _v_args _cg_locs
| n_args == 0
, Just sig <- idTagSig_maybe id
, isTaggedSig sig -- Infered to be already evaluated by Tag Inference
@@ -598,14 +583,14 @@ getCallMethod cfg name id (LFUnknown might_be_a_function) n_args _v_args _cg_loc
EnterIt -- Not a function
-- TODO: Redundant with above match?
--- getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc _self_loop_info
+-- getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc
-- = assertPpr (n_args == 0) (ppr name <+> ppr n_args)
-- EnterIt -- Not a function
-getCallMethod _ _name _ LFLetNoEscape _n_args _v_args (LneLoc blk_id lne_regs) _self_loop_info
+getCallMethod _ _name _ LFLetNoEscape _n_args _v_args (LneLoc blk_id lne_regs)
= JumpToIt blk_id lne_regs
-getCallMethod _ _ _ _ _ _ _ _ = panic "Unknown call method"
+getCallMethod _ _ _ _ _ _ _ = panic "Unknown call method"
-----------------------------------------------------------------------------
-- Data types for closure information
diff --git a/compiler/GHC/StgToCmm/Config.hs b/compiler/GHC/StgToCmm/Config.hs
index f2bd349ae7..e5a91b0a7f 100644
--- a/compiler/GHC/StgToCmm/Config.hs
+++ b/compiler/GHC/StgToCmm/Config.hs
@@ -43,7 +43,6 @@ data StgToCmmConfig = StgToCmmConfig
-- dynamic thunks
, stgToCmmTickyTag :: !Bool -- ^ True indicates ticky will count number of avoided tag checks by tag inference.
---------------------------------- Flags --------------------------------------
- , stgToCmmLoopification :: !Bool -- ^ Loopification enabled (cf @-floopification@)
, stgToCmmAlignCheck :: !Bool -- ^ Insert alignment check (cf @-falignment-sanitisation@)
, stgToCmmOptHpc :: !Bool -- ^ perform code generation for code coverage
, stgToCmmFastPAPCalls :: !Bool -- ^
diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs
index fcf91b4509..a7b9677c8e 100644
--- a/compiler/GHC/StgToCmm/Expr.hs
+++ b/compiler/GHC/StgToCmm/Expr.hs
@@ -996,7 +996,6 @@ cgIdApp fun_id args = do
platform <- getPlatform
fun_info <- getCgIdInfo fun_id
cfg <- getStgToCmmConfig
- self_loop <- getSelfLoop
let profile = stgToCmmProfile cfg
fun_arg = StgVarArg fun_id
fun_name = idName fun_id
@@ -1004,7 +1003,7 @@ cgIdApp fun_id args = do
lf_info = cg_lf fun_info
n_args = length args
v_args = length $ filter (isZeroBitTy . stgArgType) args
- case getCallMethod cfg fun_name fun_id lf_info n_args v_args (cg_loc fun_info) self_loop of
+ case getCallMethod cfg fun_name fun_id lf_info n_args v_args (cg_loc fun_info) of
-- A value in WHNF, so we can just return it.
ReturnIt
| isZeroBitTy (idType fun_id) -> emitReturn []
@@ -1052,113 +1051,6 @@ cgIdApp fun_id args = do
; emit (mkBranch blk_id)
; return AssignedDirectly }
--- Note [Self-recursive tail calls]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- Self-recursive tail calls can be optimized into a local jump in the same
--- way as let-no-escape bindings (see Note [What is a non-escaping let] in
--- "GHC.CoreToStg"). Consider this:
---
--- foo.info:
--- a = R1 // calling convention
--- b = R2
--- goto L1
--- L1: ...
--- ...
--- ...
--- L2: R1 = x
--- R2 = y
--- call foo(R1,R2)
---
--- Instead of putting x and y into registers (or other locations required by the
--- calling convention) and performing a call we can put them into local
--- variables a and b and perform jump to L1:
---
--- foo.info:
--- a = R1
--- b = R2
--- goto L1
--- L1: ...
--- ...
--- ...
--- L2: a = x
--- b = y
--- goto L1
---
--- This can be done only when function is calling itself in a tail position
--- and only if the call passes number of parameters equal to function's arity.
--- Note that this cannot be performed if a function calls itself with a
--- continuation.
---
--- This in fact implements optimization known as "loopification". It was
--- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
--- by Krzysztof Woś, though we use different approach. Krzysztof performed his
--- optimization at the Cmm level, whereas we perform ours during code generation
--- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
--- generated in the first place.
---
--- Implementation is spread across a couple of places in the code:
---
--- * FCode monad stores additional information in its reader environment
--- (stgToCmmSelfLoop field). This information tells us which function can
--- tail call itself in an optimized way (it is the function currently
--- being compiled), what is the label of a loop header (L1 in example above)
--- and information about local registers in which we should arguments
--- before making a call (this would be a and b in example above).
---
--- * Whenever we are compiling a function, we set that information to reflect
--- the fact that function currently being compiled can be jumped to, instead
--- of called. This is done in closureCodyBody in GHC.StgToCmm.Bind.
---
--- * We also have to emit a label to which we will be jumping. We make sure
--- that the label is placed after a stack check but before the heap
--- check. The reason is that making a recursive tail-call does not increase
--- the stack so we only need to check once. But it may grow the heap, so we
--- have to repeat the heap check in every self-call. This is done in
--- do_checks in GHC.StgToCmm.Heap.
---
--- * When we begin compilation of another closure we remove the additional
--- information from the environment. This is done by forkClosureBody
--- in GHC.StgToCmm.Monad. Other functions that duplicate the environment -
--- forkLneBody, forkAlts, codeOnly - duplicate that information. In other
--- words, we only need to clean the environment of the self-loop information
--- when compiling right hand side of a closure (binding).
---
--- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
--- of call will be generated. getCallMethod decides to generate a self
--- recursive tail call when (a) environment stores information about
--- possible self tail-call; (b) that tail call is to a function currently
--- being compiled; (c) number of passed non-void arguments is equal to
--- function's arity. (d) loopification is turned on via -floopification
--- command-line option.
---
--- * Command line option to turn loopification on and off is implemented in
--- DynFlags, then passed to StgToCmmConfig for this phase.
---
---
--- Note [Void arguments in self-recursive tail calls]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- State# tokens can get in the way of the loopification optimization as seen in
--- #11372. Consider this:
---
--- foo :: [a]
--- -> (a -> State# s -> (# State s, Bool #))
--- -> State# s
--- -> (# State# s, Maybe a #)
--- foo [] f s = (# s, Nothing #)
--- foo (x:xs) f s = case f x s of
--- (# s', b #) -> case b of
--- True -> (# s', Just x #)
--- False -> foo xs f s'
---
--- We would like to compile the call to foo as a local jump instead of a call
--- (see Note [Self-recursive tail calls]). However, the generated function has
--- an arity of 2 while we apply it to 3 arguments, one of them being of void
--- type. Thus, we mustn't count arguments of void type when checking whether
--- we can turn a call into a self-recursive jump.
---
-
emitEnter :: CmmExpr -> FCode ReturnKind
emitEnter fun = do
{ platform <- getPlatform
diff --git a/compiler/GHC/StgToCmm/Heap.hs b/compiler/GHC/StgToCmm/Heap.hs
index a7e7f23e9d..88d48c1897 100644
--- a/compiler/GHC/StgToCmm/Heap.hs
+++ b/compiler/GHC/StgToCmm/Heap.hs
@@ -635,15 +635,6 @@ do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
Just stk_hwm -> tickyStackCheck
>> (emit =<< mkCmmIfGoto' (sp_oflo stk_hwm) gc_id (Just False) )
- -- Emit new label that might potentially be a header
- -- of a self-recursive tail call.
- -- See Note [Self-recursive loop header].
- self_loop_info <- getSelfLoop
- case self_loop_info of
- Just (_, loop_header_id, _)
- | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id
- _otherwise -> return ()
-
if isJust mb_alloc_lit
then do
tickyHeapCheck
@@ -667,26 +658,3 @@ do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
-- stack check succeeds. Otherwise we might end up
-- with slop at the end of the current block, which can
-- confuse the LDV profiler.
-
--- Note [Self-recursive loop header]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- Self-recursive loop header is required by loopification optimization (See
--- Note [Self-recursive tail calls] in GHC.StgToCmm.Expr). We emit it if:
---
--- 1. There is information about self-loop in the FCode environment. We don't
--- check the binder (first component of the self_loop_info) because we are
--- certain that if the self-loop info is present then we are compiling the
--- binder body. Reason: the only possible way to get here with the
--- self_loop_info present is from closureCodeBody.
---
--- 2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible
--- to preempt the heap check (see #367 for motivation behind this check). It
--- is True for heap checks placed at the entry to a function and
--- let-no-escape heap checks but false for other heap checks (eg. in case
--- alternatives or created from hand-written high-level Cmm). The second
--- check (isJust mb_stk_hwm) is true for heap checks at the entry to a
--- function and some heap checks created in hand-written Cmm. Otherwise it
--- is Nothing. In other words the only situation when both conditions are
--- true is when compiling stack and heap checks at the entry to a
--- function. This is the only situation when we want to emit a self-loop
--- label.
diff --git a/compiler/GHC/StgToCmm/Monad.hs b/compiler/GHC/StgToCmm/Monad.hs
index 9f9d292937..50043bc01f 100644
--- a/compiler/GHC/StgToCmm/Monad.hs
+++ b/compiler/GHC/StgToCmm/Monad.hs
@@ -54,7 +54,7 @@ module GHC.StgToCmm.Monad (
getModuleName,
-- ideally we wouldn't export these, but some other modules access internal state
- getState, setState, getSelfLoop, withSelfLoop, getStgToCmmConfig,
+ getState, setState, getStgToCmmConfig,
-- more localised access to monad state
CgIdInfo(..),
@@ -296,8 +296,6 @@ data FCodeState =
-- else the RTS will deadlock _and_ also experience a severe
-- performance degradation
, fcs_sequel :: !Sequel -- ^ What to do at end of basic block
- , fcs_selfloop :: Maybe SelfLoopInfo -- ^ Which tail calls can be compiled as local jumps?
- -- See Note [Self-recursive tail calls] in GHC.StgToCmm.Expr
, fcs_ticky :: !CLabel -- ^ Destination for ticky counts
, fcs_tickscope :: !CmmTickScope -- ^ Tick scope for new blocks & ticks
}
@@ -455,7 +453,6 @@ initFCodeState :: Platform -> FCodeState
initFCodeState p =
MkFCodeState { fcs_upframeoffset = platformWordSizeInBytes p
, fcs_sequel = Return
- , fcs_selfloop = Nothing
, fcs_ticky = mkTopTickyCtrLabel
, fcs_tickscope = GlobalScope
}
@@ -467,22 +464,13 @@ getFCodeState = FCode $ \_ fstate state -> (fstate,state)
withFCodeState :: FCode a -> FCodeState -> FCode a
withFCodeState (FCode fcode) fst = FCode $ \cfg _ state -> fcode cfg fst state
-getSelfLoop :: FCode (Maybe SelfLoopInfo)
-getSelfLoop = fcs_selfloop <$> getFCodeState
-
-withSelfLoop :: SelfLoopInfo -> FCode a -> FCode a
-withSelfLoop self_loop code = do
- fstate <- getFCodeState
- withFCodeState code (fstate {fcs_selfloop = Just self_loop})
-
-- ----------------------------------------------------------------------------
-- Get/set the end-of-block info
withSequel :: Sequel -> FCode a -> FCode a
withSequel sequel code
= do { fstate <- getFCodeState
- ; withFCodeState code (fstate { fcs_sequel = sequel
- , fcs_selfloop = Nothing }) }
+ ; withFCodeState code (fstate { fcs_sequel = sequel }) }
getSequel :: FCode Sequel
getSequel = fcs_sequel <$> getFCodeState
@@ -578,7 +566,6 @@ forkClosureBody body_code
; state <- getState
; let fcs = fstate { fcs_sequel = Return
, fcs_upframeoffset = platformWordSizeInBytes platform
- , fcs_selfloop = Nothing
}
fork_state_in = (initCgState us) { cgs_binds = cgs_binds state }
((),fork_state_out) = doFCode body_code cfg fcs fork_state_in