diff options
author | Ryan Newton <rrnewton@gmail.com> | 2013-08-31 15:28:02 -0400 |
---|---|---|
committer | Ryan Newton <rrnewton@gmail.com> | 2013-08-31 15:28:02 -0400 |
commit | e251a51a990c3a9c95dabab139d42ad69479f61c (patch) | |
tree | 94243fe32bb64d0cab79074ef8de8cb3530d2973 /compiler/codeGen | |
parent | 6fd60b2382efa357fe99fa017fd343db9724d43a (diff) | |
parent | ea87014a7ad4454f18bb15f6f0ee4b6e61b148be (diff) | |
download | haskell-e251a51a990c3a9c95dabab139d42ad69479f61c.tar.gz |
Merge branch 'master' into atomics
Diffstat (limited to 'compiler/codeGen')
-rw-r--r-- | compiler/codeGen/StgCmm.hs | 7 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmBind.hs | 16 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmClosure.hs | 147 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmExpr.hs | 111 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmMonad.hs | 125 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmProf.hs | 71 |
6 files changed, 302 insertions, 175 deletions
diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs index 8b3bac3b4f..9b1bce4b57 100644 --- a/compiler/codeGen/StgCmm.hs +++ b/compiler/codeGen/StgCmm.hs @@ -257,13 +257,10 @@ cgDataCon data_con -- Stuff to support splitting --------------------------------------------------------------- --- If we're splitting the object, we need to externalise all the --- top-level names (and then make sure we only use the externalised --- one in any C label we use which refers to this name). - maybeExternaliseId :: DynFlags -> Id -> FCode Id maybeExternaliseId dflags id - | gopt Opt_SplitObjs dflags, -- Externalise the name for -split-objs + | gopt Opt_SplitObjs dflags, -- See Note [Externalise when splitting] + -- in StgCmmMonad isInternalName name = do { mod <- getModuleName ; returnFC (setIdName id (externalise mod)) } | otherwise = returnFC id diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index ce5491dc10..dccefd0fb0 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -30,6 +30,7 @@ import StgCmmForeign (emitPrimCall) import MkGraph import CoreSyn ( AltCon(..) ) import SMRep +import BlockId import Cmm import CmmInfo import CmmUtils @@ -476,7 +477,17 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details ; let node_points = nodeMustPointToIt dflags lf_info node' = if node_points then Just node else Nothing ; when node_points (ldvEnterClosure cl_info) - + -- Emit new label that might potentially be a header + -- of a self-recursive tail call. See Note + -- [Self-recursive tail calls] in StgCmmExpr + ; u <- newUnique + ; let loop_header_id = mkBlockId u + ; emitLabel loop_header_id + -- Extend reader monad with information that + -- self-recursive tail calls can be optimized into local + -- jumps + ; withSelfLoop (bndr, loop_header_id, arg_regs) $ do + { -- Main payload ; entryHeapCheck cl_info node' arity arg_regs $ do { -- ticky after heap check to avoid double counting @@ -490,7 +501,8 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details -- heap check, to reduce live vars over check ; when node_points $ load_fvs node lf_info fv_bindings ; void $ cgExpr body - }} + }}} + } -- A function closure pointer may be tagged, so we diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs index 611a570d70..04297b4258 100644 --- a/compiler/codeGen/StgCmmClosure.hs +++ b/compiler/codeGen/StgCmmClosure.hs @@ -27,10 +27,9 @@ module StgCmmClosure ( lfDynTag, maybeIsLFCon, isLFThunk, isLFReEntrant, lfUpdatable, - nodeMustPointToIt, - CallMethod(..), getCallMethod, - - isKnownFun, funTag, tagForArity, + -- * Used by other modules + CgLoc(..), SelfLoopInfo, CallMethod(..), + nodeMustPointToIt, isKnownFun, funTag, tagForArity, getCallMethod, -- * ClosureInfo ClosureInfo, @@ -69,11 +68,14 @@ module StgCmmClosure ( import StgSyn import SMRep import Cmm +import PprCmmExpr() +import BlockId import CLabel import Id import IdInfo import DataCon +import FastString import Name import Type import TypeRep @@ -85,6 +87,37 @@ import DynFlags import Util ----------------------------------------------------------------------------- +-- Data types and synonyms +----------------------------------------------------------------------------- + +-- These data types are mostly used by other modules, especially StgCmmMonad, +-- but we define them here because some functions in this module need to +-- have access to them as well + +data CgLoc + = CmmLoc CmmExpr -- A stable CmmExpr; that is, one not mentioning + -- Hp, so that it remains valid across calls + + | LneLoc BlockId [LocalReg] -- A join point + -- A join point (= let-no-escape) should only + -- be tail-called, and in a saturated way. + -- To tail-call it, assign to these locals, + -- and branch to the block id + +instance Outputable CgLoc where + ppr (CmmLoc e) = ptext (sLit "cmm") <+> ppr e + ppr (LneLoc b rs) = ptext (sLit "lne") <+> ppr b <+> ppr rs + +type SelfLoopInfo = (Id, BlockId, [LocalReg]) + +-- used by ticky profiling +isKnownFun :: LambdaFormInfo -> Bool +isKnownFun (LFReEntrant _ _ _ _) = True +isKnownFun LFLetNoEscape = True +isKnownFun _ = False + + +----------------------------------------------------------------------------- -- Representations ----------------------------------------------------------------------------- @@ -122,23 +155,23 @@ isGcPtrRep _ = False -- tail call or return that identifier. data LambdaFormInfo - = LFReEntrant -- Reentrant closure (a function) - TopLevelFlag -- True if top level - !RepArity -- Arity. Invariant: always > 0 - !Bool -- True <=> no fvs + = LFReEntrant -- Reentrant closure (a function) + TopLevelFlag -- True if top level + !RepArity -- Arity. Invariant: always > 0 + !Bool -- True <=> no fvs ArgDescr -- Argument descriptor (should really be in ClosureInfo) - | LFThunk -- Thunk (zero arity) + | LFThunk -- Thunk (zero arity) TopLevelFlag - !Bool -- True <=> no free vars - !Bool -- True <=> updatable (i.e., *not* single-entry) + !Bool -- True <=> no free vars + !Bool -- True <=> updatable (i.e., *not* single-entry) StandardFormInfo - !Bool -- True <=> *might* be a function type + !Bool -- True <=> *might* be a function type - | LFCon -- A saturated constructor application - DataCon -- The constructor + | LFCon -- A saturated constructor application + DataCon -- The constructor - | LFUnknown -- Used for function arguments and imported things. + | LFUnknown -- Used for function arguments and imported things. -- We know nothing about this closure. -- Treat like updatable "LFThunk"... -- Imported things which we *do* know something about use @@ -149,10 +182,10 @@ data LambdaFormInfo -- because then we know the entry code will do -- For a function, the entry code is the fast entry point - | LFUnLifted -- A value of unboxed type; + | LFUnLifted -- A value of unboxed type; -- always a value, needs evaluation - | LFLetNoEscape -- See LetNoEscape module for precise description + | LFLetNoEscape -- See LetNoEscape module for precise description | LFBlackHole -- Used for the closures allocated to hold the result -- of a CAF. We want the target of the update frame to @@ -175,7 +208,7 @@ data StandardFormInfo -- case x of -- con a1,..,an -> ak -- and the constructor is from a single-constr type. - WordOff -- 0-origin offset of ak within the "goods" of + WordOff -- 0-origin offset of ak within the "goods" of -- constructor (Recall that the a1,...,an may be laid -- out in the heap in a non-obvious order.) @@ -205,9 +238,9 @@ mkLFLetNoEscape :: LambdaFormInfo mkLFLetNoEscape = LFLetNoEscape ------------- -mkLFReEntrant :: TopLevelFlag -- True of top level - -> [Id] -- Free vars - -> [Id] -- Args +mkLFReEntrant :: TopLevelFlag -- True of top level + -> [Id] -- Free vars + -> [Id] -- Args -> ArgDescr -- Argument descriptor -> LambdaFormInfo @@ -256,7 +289,7 @@ mkLFImported :: Id -> LambdaFormInfo mkLFImported id | Just con <- isDataConWorkId_maybe id , isNullaryRepDataCon con - = LFCon con -- An imported nullary constructor + = LFCon con -- An imported nullary constructor -- We assume that the constructor is evaluated so that -- the id really does point directly to the constructor @@ -465,49 +498,65 @@ When black-holing, single-entry closures could also be entered via node (rather than directly) to catch double-entry. -} data CallMethod - = EnterIt -- No args, not a function + = EnterIt -- No args, not a function - | JumpToIt -- A join point + | JumpToIt BlockId [LocalReg] -- A join point or a header of a local loop - | ReturnIt -- It's a value (function, unboxed value, + | ReturnIt -- It's a value (function, unboxed value, -- or constructor), so just return it. | SlowCall -- Unknown fun, or known fun with -- too few args. | DirectEntry -- Jump directly, with args in regs - CLabel -- The code label - RepArity -- Its arity + CLabel -- The code label + RepArity -- Its arity getCallMethod :: DynFlags -> Name -- Function being applied - -> CafInfo -- Can it refer to CAF's? - -> LambdaFormInfo -- Its info - -> RepArity -- Number of available arguments + -> Id -- Function Id used to chech if it can refer to + -- CAF's and whether the function is tail-calling + -- itself + -> LambdaFormInfo -- Its info + -> RepArity -- Number of available arguments + -> CgLoc -- Passed in from cgIdApp so that we can + -- handle let-no-escape bindings and self-recursive + -- tail calls using the same data constructor, + -- JumpToIt. This saves us one case branch in + -- cgIdApp + -> Maybe SelfLoopInfo -- can we perform a self-recursive tail call? -> CallMethod -getCallMethod dflags _name _ lf_info _n_args +getCallMethod _ _ id _ n_args _cg_loc (Just (self_loop_id, block_id, args)) + | id == self_loop_id, n_args == length args + -- If these patterns match then we know that: + -- * function is performing a self-recursive call in a tail position + -- * number of parameters of the function matches functions arity. + -- See Note [Self-recursive tail calls] in StgCmmExpr for more details + = JumpToIt block_id args + +getCallMethod dflags _name _ lf_info _n_args _cg_loc _self_loop_info | nodeMustPointToIt dflags lf_info && gopt Opt_Parallel dflags - = -- If we're parallel, then we must always enter via node. + = -- If we're parallel, then we must always enter via node. -- The reason is that the closure may have been -- fetched since we allocated it. EnterIt -getCallMethod dflags name caf (LFReEntrant _ arity _ _) n_args +getCallMethod dflags name id (LFReEntrant _ arity _ _) n_args _cg_loc _self_loop_info | n_args == 0 = ASSERT( arity /= 0 ) ReturnIt -- No args at all | n_args < arity = SlowCall -- Not enough args - | otherwise = DirectEntry (enterIdLabel dflags name caf) arity + | otherwise = DirectEntry (enterIdLabel dflags name (idCafInfo id)) arity -getCallMethod _ _name _ LFUnLifted n_args +getCallMethod _ _name _ LFUnLifted n_args _cg_loc _self_loop_info = ASSERT( n_args == 0 ) ReturnIt -getCallMethod _ _name _ (LFCon _) n_args +getCallMethod _ _name _ (LFCon _) n_args _cg_loc _self_loop_info = ASSERT( n_args == 0 ) ReturnIt -getCallMethod dflags name caf (LFThunk _ _ updatable std_form_info is_fun) n_args - | is_fun -- it *might* be a function, so we must "call" it (which is always safe) - = SlowCall -- We cannot just enter it [in eval/apply, the entry code +getCallMethod dflags name id (LFThunk _ _ updatable std_form_info is_fun) n_args _cg_loc _self_loop_info + | is_fun -- it *might* be a function, so we must "call" it (which is always safe) + = SlowCall -- We cannot just enter it [in eval/apply, the entry code -- is the fast-entry code] -- Since is_fun is False, we are *definitely* looking at a data value @@ -527,27 +576,24 @@ getCallMethod dflags name caf (LFThunk _ _ updatable std_form_info is_fun) n_arg | otherwise -- Jump direct to code for single-entry thunks = ASSERT( n_args == 0 ) - DirectEntry (thunkEntryLabel dflags name caf std_form_info updatable) 0 + DirectEntry (thunkEntryLabel dflags name (idCafInfo id) std_form_info updatable) 0 -getCallMethod _ _name _ (LFUnknown True) _n_args +getCallMethod _ _name _ (LFUnknown True) _n_arg _cg_locs _self_loop_info = SlowCall -- might be a function -getCallMethod _ name _ (LFUnknown False) n_args +getCallMethod _ name _ (LFUnknown False) n_args _cg_loc _self_loop_info = ASSERT2( n_args == 0, ppr name <+> ppr n_args ) EnterIt -- Not a function -getCallMethod _ _name _ LFBlackHole _n_args - = SlowCall -- Presumably the black hole has by now +getCallMethod _ _name _ LFBlackHole _n_args _cg_loc _self_loop_info + = SlowCall -- Presumably the black hole has by now -- been updated, but we don't know with -- what, so we slow call it -getCallMethod _ _name _ LFLetNoEscape _n_args - = JumpToIt +getCallMethod _ _name _ LFLetNoEscape _n_args (LneLoc blk_id lne_regs) _self_loop_info + = JumpToIt blk_id lne_regs -isKnownFun :: LambdaFormInfo -> Bool -isKnownFun (LFReEntrant _ _ _ _) = True -isKnownFun LFLetNoEscape = True -isKnownFun _ = False +getCallMethod _ _ _ _ _ _ _ = panic "Unknown call method" ----------------------------------------------------------------------------- -- staticClosureRequired @@ -680,7 +726,6 @@ mkCmmInfo ClosureInfo {..} , cit_prof = closureProf , cit_srt = NoC_SRT } - -------------------------------------- -- Building ClosureInfos -------------------------------------- diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 24b12f7237..331e65819f 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -160,7 +160,7 @@ cgLetNoEscapeClosure bndr cc_slot _unused_cc args body return ( lneIdInfo dflags bndr args , code ) where - code = forkProc $ do { + code = forkLneBody $ do { ; withNewTickyCounterLNE (idName bndr) args $ do ; restoreCurrentCostCentre cc_slot ; arg_regs <- bindArgsToRegs args @@ -632,14 +632,20 @@ cgConApp con stg_args cgIdApp :: Id -> [StgArg] -> FCode ReturnKind cgIdApp fun_id [] | isVoidId fun_id = emitReturn [] cgIdApp fun_id args = do - dflags <- getDynFlags - fun_info <- getCgIdInfo fun_id - let fun_arg = StgVarArg fun_id - fun_name = idName fun_id - fun = idInfoToAmode fun_info - lf_info = cg_lf fun_info + dflags <- getDynFlags + fun_info <- getCgIdInfo fun_id + self_loop_info <- getSelfLoop + let cg_fun_id = cg_id fun_info + -- NB: use (cg_id fun_info) instead of fun_id, because + -- the former may be externalised for -split-objs. + -- See Note [Externalise when splitting] in StgCmmMonad + + fun_arg = StgVarArg cg_fun_id + fun_name = idName cg_fun_id + fun = idInfoToAmode fun_info + lf_info = cg_lf fun_info node_points dflags = nodeMustPointToIt dflags lf_info - case (getCallMethod dflags fun_name (idCafInfo fun_id) lf_info (length args)) of + case (getCallMethod dflags fun_name cg_fun_id lf_info (length args) (cg_loc fun_info) self_loop_info) of -- A value in WHNF, so we can just return it. ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged? @@ -659,14 +665,87 @@ cgIdApp fun_id args = do then directCall NativeNodeCall lbl arity (fun_arg:args) else directCall NativeDirectCall lbl arity args } - -- Let-no-escape call - JumpToIt -> let (LneLoc blk_id lne_regs) = cg_loc fun_info - in do - { adjustHpBackwards -- always do this before a tail-call - ; cmm_args <- getNonVoidArgAmodes args - ; emitMultiAssign lne_regs cmm_args - ; emit (mkBranch blk_id) - ; return AssignedDirectly } + -- Let-no-escape call or self-recursive tail-call + JumpToIt blk_id lne_regs -> do + { adjustHpBackwards -- always do this before a tail-call + ; cmm_args <- getNonVoidArgAmodes args + ; emitMultiAssign lne_regs cmm_args + ; emit (mkBranch blk_id) + ; return AssignedDirectly } + +-- Note [Self-recursive tail calls] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Self-recursive tail calls can be optimized into a local jump in the same +-- way as let-no-escape bindings (see Note [What is a non-escaping let] in +-- stgSyn/CoreToStg.lhs). Consider this: +-- +-- foo.info: +-- a = R1 // calling convention +-- b = R2 +-- goto L1 +-- L1: ... +-- ... +-- ... +-- L2: R1 = x +-- R2 = y +-- call foo(R1,R2) +-- +-- Instead of putting x and y into registers (or other locations required by the +-- calling convention) and performing a call we can put them into local +-- variables a and b and perform jump to L1: +-- +-- foo.info: +-- a = R1 +-- b = R2 +-- goto L1 +-- L1: ... +-- ... +-- ... +-- L2: a = x +-- b = y +-- goto L1 +-- +-- This can be done only when function is calling itself in a tail position +-- and only if the call passes number of parameters equal to function's arity. +-- Note that this cannot be performed if a function calls itself with a +-- continuation. +-- +-- This in fact implements optimization known as "loopification". It was +-- described in "Low-level code optimizations in the Glasgow Haskell Compiler" +-- by Krzysztof Woś, though we use different approach. Krzysztof performed his +-- optimization at the Cmm level, whereas we perform ours during code generation +-- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is +-- generated in the first place. +-- +-- Implementation is spread across a couple of places in the code: +-- +-- * FCode monad stores additional information in its reader environment +-- (cgd_self_loop field). This information tells us which function can +-- tail call itself in an optimized way (it is the function currently +-- being compiled), what is the label of a loop header (L1 in example above) +-- and information about local registers in which we should arguments +-- before making a call (this would be a and b in example above). +-- +-- * Whenever we are compiling a function, we set that information to reflect +-- the fact that function currently being compiled can be jumped to, instead +-- of called. We also have to emit a label to which we will be jumping. Both +-- things are done in closureCodyBody in StgCmmBind. +-- +-- * When we began compilation of another closure we remove the additional +-- information from the environment. This is done by forkClosureBody +-- in StgCmmMonad. Other functions that duplicate the environment - +-- forkLneBody, forkAlts, codeOnly - duplicate that information. In other +-- words, we only need to clean the environment of the self-loop information +-- when compiling right hand side of a closure (binding). +-- +-- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind +-- of call will be generated. getCallMethod decides to generate a self +-- recursive tail call when (a) environment stores information about +-- possible self tail-call; (b) that tail call is to a function currently +-- being compiled; (c) number of passed arguments is equal to function's +-- arity. + emitEnter :: CmmExpr -> FCode ReturnKind emitEnter fun = do diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs index c3dc50ef98..27d4fd6386 100644 --- a/compiler/codeGen/StgCmmMonad.hs +++ b/compiler/codeGen/StgCmmMonad.hs @@ -26,7 +26,7 @@ module StgCmmMonad ( mkCmmIfThenElse, mkCmmIfThen, mkCmmIfGoto, mkCall, mkCmmCall, - forkClosureBody, forkAlts, forkProc, codeOnly, + forkClosureBody, forkLneBody, forkAlts, codeOnly, ConTagZ, @@ -44,10 +44,10 @@ module StgCmmMonad ( getModuleName, -- ideally we wouldn't export these, but some other modules access internal state - getState, setState, getInfoDown, getDynFlags, getThisPackage, + getState, setState, getSelfLoop, withSelfLoop, getInfoDown, getDynFlags, getThisPackage, -- more localised access to monad state - CgIdInfo(..), CgLoc(..), + CgIdInfo(..), getBinds, setBinds, -- out of general friendliness, we also export ... @@ -60,6 +60,7 @@ import Cmm import StgCmmClosure import DynFlags import Hoopl +import Maybes import MkGraph import BlockId import CLabel @@ -100,11 +101,10 @@ infixr 9 `thenFC` -- - A reader monad, for CgInfoDownwards, containing -- - DynFlags, -- - the current Module --- - the static top-level environmnet -- - the update-frame offset -- - the ticky counter label -- - the Sequel (the continuation to return to) - +-- - the self-recursive tail call information -------------------------------------------------------- @@ -169,51 +169,48 @@ fixC fcode = FCode ( data CgInfoDownwards -- information only passed *downwards* by the monad = MkCgInfoDown { - cgd_dflags :: DynFlags, - cgd_mod :: Module, -- Module being compiled - cgd_updfr_off :: UpdFrameOffset, -- Size of current update frame - cgd_ticky :: CLabel, -- Current destination for ticky counts - cgd_sequel :: Sequel -- What to do at end of basic block + cgd_dflags :: DynFlags, + cgd_mod :: Module, -- Module being compiled + cgd_updfr_off :: UpdFrameOffset, -- Size of current update frame + cgd_ticky :: CLabel, -- Current destination for ticky counts + cgd_sequel :: Sequel, -- What to do at end of basic block + cgd_self_loop :: Maybe SelfLoopInfo -- Which tail calls can be compiled + -- as local jumps? See Note + -- [Self-recursive tail calls] in + -- StgCmmExpr } type CgBindings = IdEnv CgIdInfo data CgIdInfo = CgIdInfo - { cg_id :: Id -- Id that this is the info for + { cg_id :: Id -- Id that this is the info for -- Can differ from the Id at occurrence sites by -- virtue of being externalised, for splittable C + -- See Note [Externalise when splitting] , cg_lf :: LambdaFormInfo , cg_loc :: CgLoc -- CmmExpr for the *tagged* value } -data CgLoc - = CmmLoc CmmExpr -- A stable CmmExpr; that is, one not mentioning - -- Hp, so that it remains valid across calls - - | LneLoc BlockId [LocalReg] -- A join point - -- A join point (= let-no-escape) should only - -- be tail-called, and in a saturated way. - -- To tail-call it, assign to these locals, - -- and branch to the block id +-- Note [Externalise when splitting] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- If we're splitting the object with -fsplit-objs, we need to +-- externalise *all* the top-level names, and then make sure we only +-- use the externalised one in any C label we use which refers to this +-- name. instance Outputable CgIdInfo where ppr (CgIdInfo { cg_id = id, cg_loc = loc }) = ppr id <+> ptext (sLit "-->") <+> ppr loc -instance Outputable CgLoc where - ppr (CmmLoc e) = ptext (sLit "cmm") <+> ppr e - ppr (LneLoc b rs) = ptext (sLit "lne") <+> ppr b <+> ppr rs - - -- Sequel tells what to do with the result of this expression data Sequel - = Return Bool -- Return result(s) to continuation found on the stack - -- True <=> the continuation is update code (???) + = Return Bool -- Return result(s) to continuation found on the stack. + -- True <=> the continuation is update code (???) | AssignTo - [LocalReg] -- Put result(s) in these regs and fall through - -- NB: no void arguments here + [LocalReg] -- Put result(s) in these regs and fall through + -- NB: no void arguments here -- Bool -- Should we adjust the heap pointer back to -- recover space that's unused on this path? @@ -300,7 +297,8 @@ initCgInfoDown dflags mod , cgd_mod = mod , cgd_updfr_off = initUpdFrameOff dflags , cgd_ticky = mkTopTickyCtrLabel - , cgd_sequel = initSequel } + , cgd_sequel = initSequel + , cgd_self_loop = Nothing } initSequel :: Sequel initSequel = Return False @@ -322,9 +320,7 @@ data CgState -- Both are ordered only so that we can -- reduce forward references, when it's easy to do so - cgs_binds :: CgBindings, -- [Id -> info] : *local* bindings environment - -- Bindings for top-level things are given in - -- the info-down part + cgs_binds :: CgBindings, cgs_hp_usg :: HeapUsage, @@ -332,10 +328,10 @@ data CgState data HeapUsage = HeapUsage { - virtHp :: VirtualHpOffset, -- Virtual offset of highest-allocated word - -- Incremented whenever we allocate + virtHp :: VirtualHpOffset, -- Virtual offset of highest-allocated word + -- Incremented whenever we allocate realHp :: VirtualHpOffset -- realHp: Virtual offset of real heap ptr - -- Used in instruction addressing modes + -- Used in instruction addressing modes } type VirtualHpOffset = WordOff @@ -449,6 +445,16 @@ newUnique = do getInfoDown :: FCode CgInfoDownwards getInfoDown = FCode $ \info_down state -> (# info_down,state #) +getSelfLoop :: FCode (Maybe SelfLoopInfo) +getSelfLoop = do + info_down <- getInfoDown + return $ cgd_self_loop info_down + +withSelfLoop :: SelfLoopInfo -> FCode a -> FCode a +withSelfLoop self_loop code = do + info_down <- getInfoDown + withInfoDown code (info_down {cgd_self_loop = Just self_loop}) + instance HasDynFlags FCode where getDynFlags = liftM cgd_dflags getInfoDown @@ -475,7 +481,7 @@ getModuleName = do { info <- getInfoDown; return (cgd_mod info) } withSequel :: Sequel -> FCode a -> FCode a withSequel sequel code = do { info <- getInfoDown - ; withInfoDown code (info {cgd_sequel = sequel }) } + ; withInfoDown code (info {cgd_sequel = sequel, cgd_self_loop = Nothing }) } getSequel :: FCode Sequel getSequel = do { info <- getInfoDown @@ -520,15 +526,12 @@ setTickyCtrLabel ticky code = do -------------------------------------------------------- forkClosureBody :: FCode () -> FCode () --- forkClosureBody takes a code, $c$, and compiles it in a --- fresh environment, except that: --- - compilation info and statics are passed in unchanged. --- - local bindings are passed in unchanged --- (it's up to the enclosed code to re-bind the --- free variables to a field of the closure) --- --- The current state is passed on completely unaltered, except that --- C-- from the fork is incorporated. +-- forkClosureBody compiles body_code in environment where: +-- - sequel, update stack frame and self loop info are +-- set to fresh values +-- - state is set to a fresh value, except for local bindings +-- that are passed in unchanged. It's up to the enclosed code to +-- re-bind the free variables to a field of the closure. forkClosureBody body_code = do { dflags <- getDynFlags @@ -536,26 +539,25 @@ forkClosureBody body_code ; us <- newUniqSupply ; state <- getState ; let body_info_down = info { cgd_sequel = initSequel - , cgd_updfr_off = initUpdFrameOff dflags } + , cgd_updfr_off = initUpdFrameOff dflags + , cgd_self_loop = Nothing } fork_state_in = (initCgState us) { cgs_binds = cgs_binds state } ((),fork_state_out) = doFCode body_code body_info_down fork_state_in ; setState $ state `addCodeBlocksFrom` fork_state_out } -forkProc :: FCode a -> FCode a --- 'forkProc' takes a code and compiles it in the *current* environment, --- returning the graph thus constructed. +forkLneBody :: FCode a -> FCode a +-- 'forkLneBody' takes a body of let-no-escape binding and compiles +-- it in the *current* environment, returning the graph thus constructed. -- -- The current environment is passed on completely unchanged to -- the successor. In particular, any heap usage from the enclosed -- code is discarded; it should deal with its own heap consumption. --- forkProc is used to compile let-no-escape bindings. -forkProc body_code +forkLneBody body_code = do { info_down <- getInfoDown ; us <- newUniqSupply ; state <- getState - ; let info_down' = info_down -- { cgd_sequel = initSequel } - fork_state_in = (initCgState us) { cgs_binds = cgs_binds state } - (result, fork_state_out) = doFCode body_code info_down' fork_state_in + ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state } + (result, fork_state_out) = doFCode body_code info_down fork_state_in ; setState $ state `addCodeBlocksFrom` fork_state_out ; return result } @@ -565,10 +567,10 @@ codeOnly :: FCode () -> FCode () -- Used in almost-circular code to prevent false loop dependencies codeOnly body_code = do { info_down <- getInfoDown - ; us <- newUniqSupply - ; state <- getState - ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state, - cgs_hp_usg = cgs_hp_usg state } + ; us <- newUniqSupply + ; state <- getState + ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state + , cgs_hp_usg = cgs_hp_usg state } ((), fork_state_out) = doFCode body_code info_down fork_state_in ; setState $ state `addCodeBlocksFrom` fork_state_out } @@ -587,9 +589,8 @@ forkAlts branch_fcodes where (us1,us2) = splitUniqSupply us branch_state = (initCgState us1) { - cgs_binds = cgs_binds state, - cgs_hp_usg = cgs_hp_usg state } - + cgs_binds = cgs_binds state + , cgs_hp_usg = cgs_hp_usg state } (_us, results) = mapAccumL compile us branch_fcodes (branch_results, branch_out_states) = unzip results ; setState $ foldl stateIncUsage state branch_out_states diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs index b1eaa1c27b..5044d763a4 100644 --- a/compiler/codeGen/StgCmmProf.hs +++ b/compiler/codeGen/StgCmmProf.hs @@ -6,28 +6,21 @@ -- ----------------------------------------------------------------------------- -{-# OPTIONS -fno-warn-tabs #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and --- detab the module (please do the detabbing in a separate patch). See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces --- for details - module StgCmmProf ( - initCostCentres, ccType, ccsType, - mkCCostCentre, mkCCostCentreStack, + initCostCentres, ccType, ccsType, + mkCCostCentre, mkCCostCentreStack, - -- Cost-centre Profiling - dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, + -- Cost-centre Profiling + dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, enterCostCentreThunk, enterCostCentreFun, costCentreFrom, curCCS, storeCurCCS, emitSetCCC, - saveCurrentCostCentre, restoreCurrentCostCentre, + saveCurrentCostCentre, restoreCurrentCostCentre, - -- Lag/drag/void stuff - ldvEnter, ldvEnterClosure, ldvRecordCreate + -- Lag/drag/void stuff + ldvEnter, ldvEnterClosure, ldvRecordCreate ) where #include "HsVersions.h" @@ -78,8 +71,8 @@ mkCCostCentreStack :: CostCentreStack -> CmmLit mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs) costCentreFrom :: DynFlags - -> CmmExpr -- A closure pointer - -> CmmExpr -- The cost centre from that closure + -> CmmExpr -- A closure pointer + -> CmmExpr -- The cost centre from that closure costCentreFrom dflags cl = CmmLoad (cmmOffsetB dflags cl (oFFSET_StgHeader_ccs dflags)) (ccsType dflags) -- | The profiling header words in a static closure @@ -94,43 +87,43 @@ dynProfHdr dflags ccs = ifProfilingL dflags [ccs, dynLdvInit dflags] -- | Initialise the profiling field of an update frame initUpdFrameProf :: CmmExpr -> FCode () initUpdFrameProf frame - = ifProfiling $ -- frame->header.prof.ccs = CCCS + = ifProfiling $ -- frame->header.prof.ccs = CCCS do dflags <- getDynFlags emitStore (cmmOffset dflags frame (oFFSET_StgHeader_ccs dflags)) curCCS -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) - -- is unnecessary because it is not used anyhow. + -- is unnecessary because it is not used anyhow. --------------------------------------------------------------------------- --- Saving and restoring the current cost centre +-- Saving and restoring the current cost centre --------------------------------------------------------------------------- -{- Note [Saving the current cost centre] - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +{- Note [Saving the current cost centre] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The current cost centre is like a global register. Like other global registers, it's a caller-saves one. But consider - case (f x) of (p,q) -> rhs + case (f x) of (p,q) -> rhs Since 'f' may set the cost centre, we must restore it before resuming rhs. So we want code like this: - local_cc = CCC -- save - r = f( x ) - CCC = local_cc -- restore + local_cc = CCC -- save + r = f( x ) + CCC = local_cc -- restore That is, we explicitly "save" the current cost centre in a LocalReg, local_cc; and restore it after the call. The C-- infrastructure will arrange to save local_cc across the call. The same goes for join points; - let j x = join-stuff - in blah-blah + let j x = join-stuff + in blah-blah We want this kind of code: - local_cc = CCC -- save - blah-blah + local_cc = CCC -- save + blah-blah J: CCC = local_cc -- restore -} saveCurrentCostCentre :: FCode (Maybe LocalReg) - -- Returns Nothing if profiling is off + -- Returns Nothing if profiling is off saveCurrentCostCentre = do dflags <- getDynFlags if not (gopt Opt_SccProfilingOn dflags) @@ -207,7 +200,7 @@ ifProfilingL dflags xs --------------------------------------------------------------- --- Initialising Cost Centres & CCSs +-- Initialising Cost Centres & CCSs --------------------------------------------------------------- initCostCentres :: CollectedCCs -> FCode () @@ -233,15 +226,15 @@ emitCostCentreDecl cc = do showPpr dflags (costCentreSrcSpan cc) -- XXX going via FastString to get UTF-8 encoding is silly ; let - lits = [ zero dflags, -- StgInt ccID, - label, -- char *label, - modl, -- char *module, + lits = [ zero dflags, -- StgInt ccID, + label, -- char *label, + modl, -- char *module, loc, -- char *srcloc, zero64, -- StgWord64 mem_alloc zero dflags, -- StgWord time_ticks is_caf, -- StgInt is_caf zero dflags -- struct _CostCentre *link - ] + ] ; emitDataLits (mkCCLabel cc) lits } @@ -290,19 +283,19 @@ emitSetCCC cc tick push pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode () pushCostCentre result ccs cc = emitRtsCallWithResult result AddrHint - rtsPackageId + rtsPackageId (fsLit "pushCostCentre") [(ccs,AddrHint), - (CmmLit (mkCCostCentre cc), AddrHint)] + (CmmLit (mkCCostCentre cc), AddrHint)] False bumpSccCount :: DynFlags -> CmmExpr -> CmmAGraph bumpSccCount dflags ccs = addToMem (rEP_CostCentreStack_scc_count dflags) - (cmmOffsetB dflags ccs (oFFSET_CostCentreStack_scc_count dflags)) 1 + (cmmOffsetB dflags ccs (oFFSET_CostCentreStack_scc_count dflags)) 1 ----------------------------------------------------------------------------- -- --- Lag/drag/void stuff +-- Lag/drag/void stuff -- ----------------------------------------------------------------------------- |