summaryrefslogtreecommitdiff
path: root/compiler/codeGen
diff options
context:
space:
mode:
authorRyan Newton <rrnewton@gmail.com>2013-08-31 15:28:02 -0400
committerRyan Newton <rrnewton@gmail.com>2013-08-31 15:28:02 -0400
commite251a51a990c3a9c95dabab139d42ad69479f61c (patch)
tree94243fe32bb64d0cab79074ef8de8cb3530d2973 /compiler/codeGen
parent6fd60b2382efa357fe99fa017fd343db9724d43a (diff)
parentea87014a7ad4454f18bb15f6f0ee4b6e61b148be (diff)
downloadhaskell-e251a51a990c3a9c95dabab139d42ad69479f61c.tar.gz
Merge branch 'master' into atomics
Diffstat (limited to 'compiler/codeGen')
-rw-r--r--compiler/codeGen/StgCmm.hs7
-rw-r--r--compiler/codeGen/StgCmmBind.hs16
-rw-r--r--compiler/codeGen/StgCmmClosure.hs147
-rw-r--r--compiler/codeGen/StgCmmExpr.hs111
-rw-r--r--compiler/codeGen/StgCmmMonad.hs125
-rw-r--r--compiler/codeGen/StgCmmProf.hs71
6 files changed, 302 insertions, 175 deletions
diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs
index 8b3bac3b4f..9b1bce4b57 100644
--- a/compiler/codeGen/StgCmm.hs
+++ b/compiler/codeGen/StgCmm.hs
@@ -257,13 +257,10 @@ cgDataCon data_con
-- Stuff to support splitting
---------------------------------------------------------------
--- If we're splitting the object, we need to externalise all the
--- top-level names (and then make sure we only use the externalised
--- one in any C label we use which refers to this name).
-
maybeExternaliseId :: DynFlags -> Id -> FCode Id
maybeExternaliseId dflags id
- | gopt Opt_SplitObjs dflags, -- Externalise the name for -split-objs
+ | gopt Opt_SplitObjs dflags, -- See Note [Externalise when splitting]
+ -- in StgCmmMonad
isInternalName name = do { mod <- getModuleName
; returnFC (setIdName id (externalise mod)) }
| otherwise = returnFC id
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
index ce5491dc10..dccefd0fb0 100644
--- a/compiler/codeGen/StgCmmBind.hs
+++ b/compiler/codeGen/StgCmmBind.hs
@@ -30,6 +30,7 @@ import StgCmmForeign (emitPrimCall)
import MkGraph
import CoreSyn ( AltCon(..) )
import SMRep
+import BlockId
import Cmm
import CmmInfo
import CmmUtils
@@ -476,7 +477,17 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
; let node_points = nodeMustPointToIt dflags lf_info
node' = if node_points then Just node else Nothing
; when node_points (ldvEnterClosure cl_info)
-
+ -- Emit new label that might potentially be a header
+ -- of a self-recursive tail call. See Note
+ -- [Self-recursive tail calls] in StgCmmExpr
+ ; u <- newUnique
+ ; let loop_header_id = mkBlockId u
+ ; emitLabel loop_header_id
+ -- Extend reader monad with information that
+ -- self-recursive tail calls can be optimized into local
+ -- jumps
+ ; withSelfLoop (bndr, loop_header_id, arg_regs) $ do
+ {
-- Main payload
; entryHeapCheck cl_info node' arity arg_regs $ do
{ -- ticky after heap check to avoid double counting
@@ -490,7 +501,8 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
-- heap check, to reduce live vars over check
; when node_points $ load_fvs node lf_info fv_bindings
; void $ cgExpr body
- }}
+ }}}
+
}
-- A function closure pointer may be tagged, so we
diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs
index 611a570d70..04297b4258 100644
--- a/compiler/codeGen/StgCmmClosure.hs
+++ b/compiler/codeGen/StgCmmClosure.hs
@@ -27,10 +27,9 @@ module StgCmmClosure (
lfDynTag,
maybeIsLFCon, isLFThunk, isLFReEntrant, lfUpdatable,
- nodeMustPointToIt,
- CallMethod(..), getCallMethod,
-
- isKnownFun, funTag, tagForArity,
+ -- * Used by other modules
+ CgLoc(..), SelfLoopInfo, CallMethod(..),
+ nodeMustPointToIt, isKnownFun, funTag, tagForArity, getCallMethod,
-- * ClosureInfo
ClosureInfo,
@@ -69,11 +68,14 @@ module StgCmmClosure (
import StgSyn
import SMRep
import Cmm
+import PprCmmExpr()
+import BlockId
import CLabel
import Id
import IdInfo
import DataCon
+import FastString
import Name
import Type
import TypeRep
@@ -85,6 +87,37 @@ import DynFlags
import Util
-----------------------------------------------------------------------------
+-- Data types and synonyms
+-----------------------------------------------------------------------------
+
+-- These data types are mostly used by other modules, especially StgCmmMonad,
+-- but we define them here because some functions in this module need to
+-- have access to them as well
+
+data CgLoc
+ = CmmLoc CmmExpr -- A stable CmmExpr; that is, one not mentioning
+ -- Hp, so that it remains valid across calls
+
+ | LneLoc BlockId [LocalReg] -- A join point
+ -- A join point (= let-no-escape) should only
+ -- be tail-called, and in a saturated way.
+ -- To tail-call it, assign to these locals,
+ -- and branch to the block id
+
+instance Outputable CgLoc where
+ ppr (CmmLoc e) = ptext (sLit "cmm") <+> ppr e
+ ppr (LneLoc b rs) = ptext (sLit "lne") <+> ppr b <+> ppr rs
+
+type SelfLoopInfo = (Id, BlockId, [LocalReg])
+
+-- used by ticky profiling
+isKnownFun :: LambdaFormInfo -> Bool
+isKnownFun (LFReEntrant _ _ _ _) = True
+isKnownFun LFLetNoEscape = True
+isKnownFun _ = False
+
+
+-----------------------------------------------------------------------------
-- Representations
-----------------------------------------------------------------------------
@@ -122,23 +155,23 @@ isGcPtrRep _ = False
-- tail call or return that identifier.
data LambdaFormInfo
- = LFReEntrant -- Reentrant closure (a function)
- TopLevelFlag -- True if top level
- !RepArity -- Arity. Invariant: always > 0
- !Bool -- True <=> no fvs
+ = LFReEntrant -- Reentrant closure (a function)
+ TopLevelFlag -- True if top level
+ !RepArity -- Arity. Invariant: always > 0
+ !Bool -- True <=> no fvs
ArgDescr -- Argument descriptor (should really be in ClosureInfo)
- | LFThunk -- Thunk (zero arity)
+ | LFThunk -- Thunk (zero arity)
TopLevelFlag
- !Bool -- True <=> no free vars
- !Bool -- True <=> updatable (i.e., *not* single-entry)
+ !Bool -- True <=> no free vars
+ !Bool -- True <=> updatable (i.e., *not* single-entry)
StandardFormInfo
- !Bool -- True <=> *might* be a function type
+ !Bool -- True <=> *might* be a function type
- | LFCon -- A saturated constructor application
- DataCon -- The constructor
+ | LFCon -- A saturated constructor application
+ DataCon -- The constructor
- | LFUnknown -- Used for function arguments and imported things.
+ | LFUnknown -- Used for function arguments and imported things.
-- We know nothing about this closure.
-- Treat like updatable "LFThunk"...
-- Imported things which we *do* know something about use
@@ -149,10 +182,10 @@ data LambdaFormInfo
-- because then we know the entry code will do
-- For a function, the entry code is the fast entry point
- | LFUnLifted -- A value of unboxed type;
+ | LFUnLifted -- A value of unboxed type;
-- always a value, needs evaluation
- | LFLetNoEscape -- See LetNoEscape module for precise description
+ | LFLetNoEscape -- See LetNoEscape module for precise description
| LFBlackHole -- Used for the closures allocated to hold the result
-- of a CAF. We want the target of the update frame to
@@ -175,7 +208,7 @@ data StandardFormInfo
-- case x of
-- con a1,..,an -> ak
-- and the constructor is from a single-constr type.
- WordOff -- 0-origin offset of ak within the "goods" of
+ WordOff -- 0-origin offset of ak within the "goods" of
-- constructor (Recall that the a1,...,an may be laid
-- out in the heap in a non-obvious order.)
@@ -205,9 +238,9 @@ mkLFLetNoEscape :: LambdaFormInfo
mkLFLetNoEscape = LFLetNoEscape
-------------
-mkLFReEntrant :: TopLevelFlag -- True of top level
- -> [Id] -- Free vars
- -> [Id] -- Args
+mkLFReEntrant :: TopLevelFlag -- True of top level
+ -> [Id] -- Free vars
+ -> [Id] -- Args
-> ArgDescr -- Argument descriptor
-> LambdaFormInfo
@@ -256,7 +289,7 @@ mkLFImported :: Id -> LambdaFormInfo
mkLFImported id
| Just con <- isDataConWorkId_maybe id
, isNullaryRepDataCon con
- = LFCon con -- An imported nullary constructor
+ = LFCon con -- An imported nullary constructor
-- We assume that the constructor is evaluated so that
-- the id really does point directly to the constructor
@@ -465,49 +498,65 @@ When black-holing, single-entry closures could also be entered via node
(rather than directly) to catch double-entry. -}
data CallMethod
- = EnterIt -- No args, not a function
+ = EnterIt -- No args, not a function
- | JumpToIt -- A join point
+ | JumpToIt BlockId [LocalReg] -- A join point or a header of a local loop
- | ReturnIt -- It's a value (function, unboxed value,
+ | ReturnIt -- It's a value (function, unboxed value,
-- or constructor), so just return it.
| SlowCall -- Unknown fun, or known fun with
-- too few args.
| DirectEntry -- Jump directly, with args in regs
- CLabel -- The code label
- RepArity -- Its arity
+ CLabel -- The code label
+ RepArity -- Its arity
getCallMethod :: DynFlags
-> Name -- Function being applied
- -> CafInfo -- Can it refer to CAF's?
- -> LambdaFormInfo -- Its info
- -> RepArity -- Number of available arguments
+ -> Id -- Function Id used to chech if it can refer to
+ -- CAF's and whether the function is tail-calling
+ -- itself
+ -> LambdaFormInfo -- Its info
+ -> RepArity -- Number of available arguments
+ -> CgLoc -- Passed in from cgIdApp so that we can
+ -- handle let-no-escape bindings and self-recursive
+ -- tail calls using the same data constructor,
+ -- JumpToIt. This saves us one case branch in
+ -- cgIdApp
+ -> Maybe SelfLoopInfo -- can we perform a self-recursive tail call?
-> CallMethod
-getCallMethod dflags _name _ lf_info _n_args
+getCallMethod _ _ id _ n_args _cg_loc (Just (self_loop_id, block_id, args))
+ | id == self_loop_id, n_args == length args
+ -- If these patterns match then we know that:
+ -- * function is performing a self-recursive call in a tail position
+ -- * number of parameters of the function matches functions arity.
+ -- See Note [Self-recursive tail calls] in StgCmmExpr for more details
+ = JumpToIt block_id args
+
+getCallMethod dflags _name _ lf_info _n_args _cg_loc _self_loop_info
| nodeMustPointToIt dflags lf_info && gopt Opt_Parallel dflags
- = -- If we're parallel, then we must always enter via node.
+ = -- If we're parallel, then we must always enter via node.
-- The reason is that the closure may have been
-- fetched since we allocated it.
EnterIt
-getCallMethod dflags name caf (LFReEntrant _ arity _ _) n_args
+getCallMethod dflags name id (LFReEntrant _ arity _ _) n_args _cg_loc _self_loop_info
| n_args == 0 = ASSERT( arity /= 0 )
ReturnIt -- No args at all
| n_args < arity = SlowCall -- Not enough args
- | otherwise = DirectEntry (enterIdLabel dflags name caf) arity
+ | otherwise = DirectEntry (enterIdLabel dflags name (idCafInfo id)) arity
-getCallMethod _ _name _ LFUnLifted n_args
+getCallMethod _ _name _ LFUnLifted n_args _cg_loc _self_loop_info
= ASSERT( n_args == 0 ) ReturnIt
-getCallMethod _ _name _ (LFCon _) n_args
+getCallMethod _ _name _ (LFCon _) n_args _cg_loc _self_loop_info
= ASSERT( n_args == 0 ) ReturnIt
-getCallMethod dflags name caf (LFThunk _ _ updatable std_form_info is_fun) n_args
- | is_fun -- it *might* be a function, so we must "call" it (which is always safe)
- = SlowCall -- We cannot just enter it [in eval/apply, the entry code
+getCallMethod dflags name id (LFThunk _ _ updatable std_form_info is_fun) n_args _cg_loc _self_loop_info
+ | is_fun -- it *might* be a function, so we must "call" it (which is always safe)
+ = SlowCall -- We cannot just enter it [in eval/apply, the entry code
-- is the fast-entry code]
-- Since is_fun is False, we are *definitely* looking at a data value
@@ -527,27 +576,24 @@ getCallMethod dflags name caf (LFThunk _ _ updatable std_form_info is_fun) n_arg
| otherwise -- Jump direct to code for single-entry thunks
= ASSERT( n_args == 0 )
- DirectEntry (thunkEntryLabel dflags name caf std_form_info updatable) 0
+ DirectEntry (thunkEntryLabel dflags name (idCafInfo id) std_form_info updatable) 0
-getCallMethod _ _name _ (LFUnknown True) _n_args
+getCallMethod _ _name _ (LFUnknown True) _n_arg _cg_locs _self_loop_info
= SlowCall -- might be a function
-getCallMethod _ name _ (LFUnknown False) n_args
+getCallMethod _ name _ (LFUnknown False) n_args _cg_loc _self_loop_info
= ASSERT2( n_args == 0, ppr name <+> ppr n_args )
EnterIt -- Not a function
-getCallMethod _ _name _ LFBlackHole _n_args
- = SlowCall -- Presumably the black hole has by now
+getCallMethod _ _name _ LFBlackHole _n_args _cg_loc _self_loop_info
+ = SlowCall -- Presumably the black hole has by now
-- been updated, but we don't know with
-- what, so we slow call it
-getCallMethod _ _name _ LFLetNoEscape _n_args
- = JumpToIt
+getCallMethod _ _name _ LFLetNoEscape _n_args (LneLoc blk_id lne_regs) _self_loop_info
+ = JumpToIt blk_id lne_regs
-isKnownFun :: LambdaFormInfo -> Bool
-isKnownFun (LFReEntrant _ _ _ _) = True
-isKnownFun LFLetNoEscape = True
-isKnownFun _ = False
+getCallMethod _ _ _ _ _ _ _ = panic "Unknown call method"
-----------------------------------------------------------------------------
-- staticClosureRequired
@@ -680,7 +726,6 @@ mkCmmInfo ClosureInfo {..}
, cit_prof = closureProf
, cit_srt = NoC_SRT }
-
--------------------------------------
-- Building ClosureInfos
--------------------------------------
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
index 24b12f7237..331e65819f 100644
--- a/compiler/codeGen/StgCmmExpr.hs
+++ b/compiler/codeGen/StgCmmExpr.hs
@@ -160,7 +160,7 @@ cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
return ( lneIdInfo dflags bndr args
, code )
where
- code = forkProc $ do {
+ code = forkLneBody $ do {
; withNewTickyCounterLNE (idName bndr) args $ do
; restoreCurrentCostCentre cc_slot
; arg_regs <- bindArgsToRegs args
@@ -632,14 +632,20 @@ cgConApp con stg_args
cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
cgIdApp fun_id [] | isVoidId fun_id = emitReturn []
cgIdApp fun_id args = do
- dflags <- getDynFlags
- fun_info <- getCgIdInfo fun_id
- let fun_arg = StgVarArg fun_id
- fun_name = idName fun_id
- fun = idInfoToAmode fun_info
- lf_info = cg_lf fun_info
+ dflags <- getDynFlags
+ fun_info <- getCgIdInfo fun_id
+ self_loop_info <- getSelfLoop
+ let cg_fun_id = cg_id fun_info
+ -- NB: use (cg_id fun_info) instead of fun_id, because
+ -- the former may be externalised for -split-objs.
+ -- See Note [Externalise when splitting] in StgCmmMonad
+
+ fun_arg = StgVarArg cg_fun_id
+ fun_name = idName cg_fun_id
+ fun = idInfoToAmode fun_info
+ lf_info = cg_lf fun_info
node_points dflags = nodeMustPointToIt dflags lf_info
- case (getCallMethod dflags fun_name (idCafInfo fun_id) lf_info (length args)) of
+ case (getCallMethod dflags fun_name cg_fun_id lf_info (length args) (cg_loc fun_info) self_loop_info) of
-- A value in WHNF, so we can just return it.
ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged?
@@ -659,14 +665,87 @@ cgIdApp fun_id args = do
then directCall NativeNodeCall lbl arity (fun_arg:args)
else directCall NativeDirectCall lbl arity args }
- -- Let-no-escape call
- JumpToIt -> let (LneLoc blk_id lne_regs) = cg_loc fun_info
- in do
- { adjustHpBackwards -- always do this before a tail-call
- ; cmm_args <- getNonVoidArgAmodes args
- ; emitMultiAssign lne_regs cmm_args
- ; emit (mkBranch blk_id)
- ; return AssignedDirectly }
+ -- Let-no-escape call or self-recursive tail-call
+ JumpToIt blk_id lne_regs -> do
+ { adjustHpBackwards -- always do this before a tail-call
+ ; cmm_args <- getNonVoidArgAmodes args
+ ; emitMultiAssign lne_regs cmm_args
+ ; emit (mkBranch blk_id)
+ ; return AssignedDirectly }
+
+-- Note [Self-recursive tail calls]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Self-recursive tail calls can be optimized into a local jump in the same
+-- way as let-no-escape bindings (see Note [What is a non-escaping let] in
+-- stgSyn/CoreToStg.lhs). Consider this:
+--
+-- foo.info:
+-- a = R1 // calling convention
+-- b = R2
+-- goto L1
+-- L1: ...
+-- ...
+-- ...
+-- L2: R1 = x
+-- R2 = y
+-- call foo(R1,R2)
+--
+-- Instead of putting x and y into registers (or other locations required by the
+-- calling convention) and performing a call we can put them into local
+-- variables a and b and perform jump to L1:
+--
+-- foo.info:
+-- a = R1
+-- b = R2
+-- goto L1
+-- L1: ...
+-- ...
+-- ...
+-- L2: a = x
+-- b = y
+-- goto L1
+--
+-- This can be done only when function is calling itself in a tail position
+-- and only if the call passes number of parameters equal to function's arity.
+-- Note that this cannot be performed if a function calls itself with a
+-- continuation.
+--
+-- This in fact implements optimization known as "loopification". It was
+-- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
+-- by Krzysztof Woś, though we use different approach. Krzysztof performed his
+-- optimization at the Cmm level, whereas we perform ours during code generation
+-- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
+-- generated in the first place.
+--
+-- Implementation is spread across a couple of places in the code:
+--
+-- * FCode monad stores additional information in its reader environment
+-- (cgd_self_loop field). This information tells us which function can
+-- tail call itself in an optimized way (it is the function currently
+-- being compiled), what is the label of a loop header (L1 in example above)
+-- and information about local registers in which we should arguments
+-- before making a call (this would be a and b in example above).
+--
+-- * Whenever we are compiling a function, we set that information to reflect
+-- the fact that function currently being compiled can be jumped to, instead
+-- of called. We also have to emit a label to which we will be jumping. Both
+-- things are done in closureCodyBody in StgCmmBind.
+--
+-- * When we began compilation of another closure we remove the additional
+-- information from the environment. This is done by forkClosureBody
+-- in StgCmmMonad. Other functions that duplicate the environment -
+-- forkLneBody, forkAlts, codeOnly - duplicate that information. In other
+-- words, we only need to clean the environment of the self-loop information
+-- when compiling right hand side of a closure (binding).
+--
+-- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
+-- of call will be generated. getCallMethod decides to generate a self
+-- recursive tail call when (a) environment stores information about
+-- possible self tail-call; (b) that tail call is to a function currently
+-- being compiled; (c) number of passed arguments is equal to function's
+-- arity.
+
emitEnter :: CmmExpr -> FCode ReturnKind
emitEnter fun = do
diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs
index c3dc50ef98..27d4fd6386 100644
--- a/compiler/codeGen/StgCmmMonad.hs
+++ b/compiler/codeGen/StgCmmMonad.hs
@@ -26,7 +26,7 @@ module StgCmmMonad (
mkCmmIfThenElse, mkCmmIfThen, mkCmmIfGoto,
mkCall, mkCmmCall,
- forkClosureBody, forkAlts, forkProc, codeOnly,
+ forkClosureBody, forkLneBody, forkAlts, codeOnly,
ConTagZ,
@@ -44,10 +44,10 @@ module StgCmmMonad (
getModuleName,
-- ideally we wouldn't export these, but some other modules access internal state
- getState, setState, getInfoDown, getDynFlags, getThisPackage,
+ getState, setState, getSelfLoop, withSelfLoop, getInfoDown, getDynFlags, getThisPackage,
-- more localised access to monad state
- CgIdInfo(..), CgLoc(..),
+ CgIdInfo(..),
getBinds, setBinds,
-- out of general friendliness, we also export ...
@@ -60,6 +60,7 @@ import Cmm
import StgCmmClosure
import DynFlags
import Hoopl
+import Maybes
import MkGraph
import BlockId
import CLabel
@@ -100,11 +101,10 @@ infixr 9 `thenFC`
-- - A reader monad, for CgInfoDownwards, containing
-- - DynFlags,
-- - the current Module
--- - the static top-level environmnet
-- - the update-frame offset
-- - the ticky counter label
-- - the Sequel (the continuation to return to)
-
+-- - the self-recursive tail call information
--------------------------------------------------------
@@ -169,51 +169,48 @@ fixC fcode = FCode (
data CgInfoDownwards -- information only passed *downwards* by the monad
= MkCgInfoDown {
- cgd_dflags :: DynFlags,
- cgd_mod :: Module, -- Module being compiled
- cgd_updfr_off :: UpdFrameOffset, -- Size of current update frame
- cgd_ticky :: CLabel, -- Current destination for ticky counts
- cgd_sequel :: Sequel -- What to do at end of basic block
+ cgd_dflags :: DynFlags,
+ cgd_mod :: Module, -- Module being compiled
+ cgd_updfr_off :: UpdFrameOffset, -- Size of current update frame
+ cgd_ticky :: CLabel, -- Current destination for ticky counts
+ cgd_sequel :: Sequel, -- What to do at end of basic block
+ cgd_self_loop :: Maybe SelfLoopInfo -- Which tail calls can be compiled
+ -- as local jumps? See Note
+ -- [Self-recursive tail calls] in
+ -- StgCmmExpr
}
type CgBindings = IdEnv CgIdInfo
data CgIdInfo
= CgIdInfo
- { cg_id :: Id -- Id that this is the info for
+ { cg_id :: Id -- Id that this is the info for
-- Can differ from the Id at occurrence sites by
-- virtue of being externalised, for splittable C
+ -- See Note [Externalise when splitting]
, cg_lf :: LambdaFormInfo
, cg_loc :: CgLoc -- CmmExpr for the *tagged* value
}
-data CgLoc
- = CmmLoc CmmExpr -- A stable CmmExpr; that is, one not mentioning
- -- Hp, so that it remains valid across calls
-
- | LneLoc BlockId [LocalReg] -- A join point
- -- A join point (= let-no-escape) should only
- -- be tail-called, and in a saturated way.
- -- To tail-call it, assign to these locals,
- -- and branch to the block id
+-- Note [Externalise when splitting]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- If we're splitting the object with -fsplit-objs, we need to
+-- externalise *all* the top-level names, and then make sure we only
+-- use the externalised one in any C label we use which refers to this
+-- name.
instance Outputable CgIdInfo where
ppr (CgIdInfo { cg_id = id, cg_loc = loc })
= ppr id <+> ptext (sLit "-->") <+> ppr loc
-instance Outputable CgLoc where
- ppr (CmmLoc e) = ptext (sLit "cmm") <+> ppr e
- ppr (LneLoc b rs) = ptext (sLit "lne") <+> ppr b <+> ppr rs
-
-
-- Sequel tells what to do with the result of this expression
data Sequel
- = Return Bool -- Return result(s) to continuation found on the stack
- -- True <=> the continuation is update code (???)
+ = Return Bool -- Return result(s) to continuation found on the stack.
+ -- True <=> the continuation is update code (???)
| AssignTo
- [LocalReg] -- Put result(s) in these regs and fall through
- -- NB: no void arguments here
+ [LocalReg] -- Put result(s) in these regs and fall through
+ -- NB: no void arguments here
--
Bool -- Should we adjust the heap pointer back to
-- recover space that's unused on this path?
@@ -300,7 +297,8 @@ initCgInfoDown dflags mod
, cgd_mod = mod
, cgd_updfr_off = initUpdFrameOff dflags
, cgd_ticky = mkTopTickyCtrLabel
- , cgd_sequel = initSequel }
+ , cgd_sequel = initSequel
+ , cgd_self_loop = Nothing }
initSequel :: Sequel
initSequel = Return False
@@ -322,9 +320,7 @@ data CgState
-- Both are ordered only so that we can
-- reduce forward references, when it's easy to do so
- cgs_binds :: CgBindings, -- [Id -> info] : *local* bindings environment
- -- Bindings for top-level things are given in
- -- the info-down part
+ cgs_binds :: CgBindings,
cgs_hp_usg :: HeapUsage,
@@ -332,10 +328,10 @@ data CgState
data HeapUsage =
HeapUsage {
- virtHp :: VirtualHpOffset, -- Virtual offset of highest-allocated word
- -- Incremented whenever we allocate
+ virtHp :: VirtualHpOffset, -- Virtual offset of highest-allocated word
+ -- Incremented whenever we allocate
realHp :: VirtualHpOffset -- realHp: Virtual offset of real heap ptr
- -- Used in instruction addressing modes
+ -- Used in instruction addressing modes
}
type VirtualHpOffset = WordOff
@@ -449,6 +445,16 @@ newUnique = do
getInfoDown :: FCode CgInfoDownwards
getInfoDown = FCode $ \info_down state -> (# info_down,state #)
+getSelfLoop :: FCode (Maybe SelfLoopInfo)
+getSelfLoop = do
+ info_down <- getInfoDown
+ return $ cgd_self_loop info_down
+
+withSelfLoop :: SelfLoopInfo -> FCode a -> FCode a
+withSelfLoop self_loop code = do
+ info_down <- getInfoDown
+ withInfoDown code (info_down {cgd_self_loop = Just self_loop})
+
instance HasDynFlags FCode where
getDynFlags = liftM cgd_dflags getInfoDown
@@ -475,7 +481,7 @@ getModuleName = do { info <- getInfoDown; return (cgd_mod info) }
withSequel :: Sequel -> FCode a -> FCode a
withSequel sequel code
= do { info <- getInfoDown
- ; withInfoDown code (info {cgd_sequel = sequel }) }
+ ; withInfoDown code (info {cgd_sequel = sequel, cgd_self_loop = Nothing }) }
getSequel :: FCode Sequel
getSequel = do { info <- getInfoDown
@@ -520,15 +526,12 @@ setTickyCtrLabel ticky code = do
--------------------------------------------------------
forkClosureBody :: FCode () -> FCode ()
--- forkClosureBody takes a code, $c$, and compiles it in a
--- fresh environment, except that:
--- - compilation info and statics are passed in unchanged.
--- - local bindings are passed in unchanged
--- (it's up to the enclosed code to re-bind the
--- free variables to a field of the closure)
---
--- The current state is passed on completely unaltered, except that
--- C-- from the fork is incorporated.
+-- forkClosureBody compiles body_code in environment where:
+-- - sequel, update stack frame and self loop info are
+-- set to fresh values
+-- - state is set to a fresh value, except for local bindings
+-- that are passed in unchanged. It's up to the enclosed code to
+-- re-bind the free variables to a field of the closure.
forkClosureBody body_code
= do { dflags <- getDynFlags
@@ -536,26 +539,25 @@ forkClosureBody body_code
; us <- newUniqSupply
; state <- getState
; let body_info_down = info { cgd_sequel = initSequel
- , cgd_updfr_off = initUpdFrameOff dflags }
+ , cgd_updfr_off = initUpdFrameOff dflags
+ , cgd_self_loop = Nothing }
fork_state_in = (initCgState us) { cgs_binds = cgs_binds state }
((),fork_state_out) = doFCode body_code body_info_down fork_state_in
; setState $ state `addCodeBlocksFrom` fork_state_out }
-forkProc :: FCode a -> FCode a
--- 'forkProc' takes a code and compiles it in the *current* environment,
--- returning the graph thus constructed.
+forkLneBody :: FCode a -> FCode a
+-- 'forkLneBody' takes a body of let-no-escape binding and compiles
+-- it in the *current* environment, returning the graph thus constructed.
--
-- The current environment is passed on completely unchanged to
-- the successor. In particular, any heap usage from the enclosed
-- code is discarded; it should deal with its own heap consumption.
--- forkProc is used to compile let-no-escape bindings.
-forkProc body_code
+forkLneBody body_code
= do { info_down <- getInfoDown
; us <- newUniqSupply
; state <- getState
- ; let info_down' = info_down -- { cgd_sequel = initSequel }
- fork_state_in = (initCgState us) { cgs_binds = cgs_binds state }
- (result, fork_state_out) = doFCode body_code info_down' fork_state_in
+ ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state }
+ (result, fork_state_out) = doFCode body_code info_down fork_state_in
; setState $ state `addCodeBlocksFrom` fork_state_out
; return result }
@@ -565,10 +567,10 @@ codeOnly :: FCode () -> FCode ()
-- Used in almost-circular code to prevent false loop dependencies
codeOnly body_code
= do { info_down <- getInfoDown
- ; us <- newUniqSupply
- ; state <- getState
- ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state,
- cgs_hp_usg = cgs_hp_usg state }
+ ; us <- newUniqSupply
+ ; state <- getState
+ ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state
+ , cgs_hp_usg = cgs_hp_usg state }
((), fork_state_out) = doFCode body_code info_down fork_state_in
; setState $ state `addCodeBlocksFrom` fork_state_out }
@@ -587,9 +589,8 @@ forkAlts branch_fcodes
where
(us1,us2) = splitUniqSupply us
branch_state = (initCgState us1) {
- cgs_binds = cgs_binds state,
- cgs_hp_usg = cgs_hp_usg state }
-
+ cgs_binds = cgs_binds state
+ , cgs_hp_usg = cgs_hp_usg state }
(_us, results) = mapAccumL compile us branch_fcodes
(branch_results, branch_out_states) = unzip results
; setState $ foldl stateIncUsage state branch_out_states
diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs
index b1eaa1c27b..5044d763a4 100644
--- a/compiler/codeGen/StgCmmProf.hs
+++ b/compiler/codeGen/StgCmmProf.hs
@@ -6,28 +6,21 @@
--
-----------------------------------------------------------------------------
-{-# OPTIONS -fno-warn-tabs #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and
--- detab the module (please do the detabbing in a separate patch). See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
--- for details
-
module StgCmmProf (
- initCostCentres, ccType, ccsType,
- mkCCostCentre, mkCCostCentreStack,
+ initCostCentres, ccType, ccsType,
+ mkCCostCentre, mkCCostCentreStack,
- -- Cost-centre Profiling
- dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
+ -- Cost-centre Profiling
+ dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
enterCostCentreThunk, enterCostCentreFun,
costCentreFrom,
curCCS, storeCurCCS,
emitSetCCC,
- saveCurrentCostCentre, restoreCurrentCostCentre,
+ saveCurrentCostCentre, restoreCurrentCostCentre,
- -- Lag/drag/void stuff
- ldvEnter, ldvEnterClosure, ldvRecordCreate
+ -- Lag/drag/void stuff
+ ldvEnter, ldvEnterClosure, ldvRecordCreate
) where
#include "HsVersions.h"
@@ -78,8 +71,8 @@ mkCCostCentreStack :: CostCentreStack -> CmmLit
mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs)
costCentreFrom :: DynFlags
- -> CmmExpr -- A closure pointer
- -> CmmExpr -- The cost centre from that closure
+ -> CmmExpr -- A closure pointer
+ -> CmmExpr -- The cost centre from that closure
costCentreFrom dflags cl = CmmLoad (cmmOffsetB dflags cl (oFFSET_StgHeader_ccs dflags)) (ccsType dflags)
-- | The profiling header words in a static closure
@@ -94,43 +87,43 @@ dynProfHdr dflags ccs = ifProfilingL dflags [ccs, dynLdvInit dflags]
-- | Initialise the profiling field of an update frame
initUpdFrameProf :: CmmExpr -> FCode ()
initUpdFrameProf frame
- = ifProfiling $ -- frame->header.prof.ccs = CCCS
+ = ifProfiling $ -- frame->header.prof.ccs = CCCS
do dflags <- getDynFlags
emitStore (cmmOffset dflags frame (oFFSET_StgHeader_ccs dflags)) curCCS
-- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0)
- -- is unnecessary because it is not used anyhow.
+ -- is unnecessary because it is not used anyhow.
---------------------------------------------------------------------------
--- Saving and restoring the current cost centre
+-- Saving and restoring the current cost centre
---------------------------------------------------------------------------
-{- Note [Saving the current cost centre]
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+{- Note [Saving the current cost centre]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The current cost centre is like a global register. Like other
global registers, it's a caller-saves one. But consider
- case (f x) of (p,q) -> rhs
+ case (f x) of (p,q) -> rhs
Since 'f' may set the cost centre, we must restore it
before resuming rhs. So we want code like this:
- local_cc = CCC -- save
- r = f( x )
- CCC = local_cc -- restore
+ local_cc = CCC -- save
+ r = f( x )
+ CCC = local_cc -- restore
That is, we explicitly "save" the current cost centre in
a LocalReg, local_cc; and restore it after the call. The
C-- infrastructure will arrange to save local_cc across the
call.
The same goes for join points;
- let j x = join-stuff
- in blah-blah
+ let j x = join-stuff
+ in blah-blah
We want this kind of code:
- local_cc = CCC -- save
- blah-blah
+ local_cc = CCC -- save
+ blah-blah
J:
CCC = local_cc -- restore
-}
saveCurrentCostCentre :: FCode (Maybe LocalReg)
- -- Returns Nothing if profiling is off
+ -- Returns Nothing if profiling is off
saveCurrentCostCentre
= do dflags <- getDynFlags
if not (gopt Opt_SccProfilingOn dflags)
@@ -207,7 +200,7 @@ ifProfilingL dflags xs
---------------------------------------------------------------
--- Initialising Cost Centres & CCSs
+-- Initialising Cost Centres & CCSs
---------------------------------------------------------------
initCostCentres :: CollectedCCs -> FCode ()
@@ -233,15 +226,15 @@ emitCostCentreDecl cc = do
showPpr dflags (costCentreSrcSpan cc)
-- XXX going via FastString to get UTF-8 encoding is silly
; let
- lits = [ zero dflags, -- StgInt ccID,
- label, -- char *label,
- modl, -- char *module,
+ lits = [ zero dflags, -- StgInt ccID,
+ label, -- char *label,
+ modl, -- char *module,
loc, -- char *srcloc,
zero64, -- StgWord64 mem_alloc
zero dflags, -- StgWord time_ticks
is_caf, -- StgInt is_caf
zero dflags -- struct _CostCentre *link
- ]
+ ]
; emitDataLits (mkCCLabel cc) lits
}
@@ -290,19 +283,19 @@ emitSetCCC cc tick push
pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode ()
pushCostCentre result ccs cc
= emitRtsCallWithResult result AddrHint
- rtsPackageId
+ rtsPackageId
(fsLit "pushCostCentre") [(ccs,AddrHint),
- (CmmLit (mkCCostCentre cc), AddrHint)]
+ (CmmLit (mkCCostCentre cc), AddrHint)]
False
bumpSccCount :: DynFlags -> CmmExpr -> CmmAGraph
bumpSccCount dflags ccs
= addToMem (rEP_CostCentreStack_scc_count dflags)
- (cmmOffsetB dflags ccs (oFFSET_CostCentreStack_scc_count dflags)) 1
+ (cmmOffsetB dflags ccs (oFFSET_CostCentreStack_scc_count dflags)) 1
-----------------------------------------------------------------------------
--
--- Lag/drag/void stuff
+-- Lag/drag/void stuff
--
-----------------------------------------------------------------------------