diff options
-rw-r--r-- | compiler/cmm/CmmCallConv.hs | 48 | ||||
-rw-r--r-- | compiler/cmm/CmmExpr.hs | 2 | ||||
-rw-r--r-- | compiler/cmm/CmmLayoutStack.hs | 7 | ||||
-rw-r--r-- | compiler/cmm/CmmLint.hs | 6 | ||||
-rw-r--r-- | compiler/cmm/CmmMachOp.hs | 6 | ||||
-rw-r--r-- | compiler/cmm/MkGraph.hs | 1 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmBind.hs | 71 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmCon.hs | 4 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmEnv.hs | 15 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmExpr.hs | 4 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmHeap.hs | 85 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmProf.hs | 46 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmUtils.hs | 14 | ||||
-rw-r--r-- | rts/Schedule.c | 35 | ||||
-rw-r--r-- | rules/build-package-way.mk | 2 |
15 files changed, 174 insertions, 172 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs index c92ad0fa08..dd1b6af643 100644 --- a/compiler/cmm/CmmCallConv.hs +++ b/compiler/cmm/CmmCallConv.hs @@ -22,7 +22,6 @@ import Constants import qualified Data.List as L import DynFlags import Outputable -import Platform -- Calculate the 'GlobalReg' or stack locations for function call -- parameters as used by the Cmm calling convention. @@ -111,34 +110,19 @@ type AvailRegs = ( [VGcPtr -> GlobalReg] -- available vanilla regs. -- We take these register supplies from the *real* registers, i.e. those -- that are guaranteed to map to machine registers. -vanillaRegNos, floatRegNos, doubleRegNos, longRegNos :: DynFlags -> [Int] -vanillaRegNos dflags - | platformUnregisterised (targetPlatform dflags) = [] - | otherwise = regList mAX_Real_Vanilla_REG -floatRegNos dflags - | platformUnregisterised (targetPlatform dflags) = [] - | otherwise = regList mAX_Real_Float_REG -doubleRegNos dflags - | platformUnregisterised (targetPlatform dflags) = [] - | otherwise = regList mAX_Real_Double_REG -longRegNos dflags - | platformUnregisterised (targetPlatform dflags) = [] - | otherwise = regList mAX_Real_Long_REG - --- getRegsWithoutNode, getRegsWithNode :: DynFlags -> AvailRegs -getRegsWithoutNode dflags = - (filter (\r -> r VGcPtr /= node) intRegs, - map FloatReg (floatRegNos dflags), - map DoubleReg (doubleRegNos dflags), - map LongReg (longRegNos dflags)) - where intRegs = map VanillaReg (vanillaRegNos dflags) -getRegsWithNode dflags = - (intRegs, - map FloatReg (floatRegNos dflags), - map DoubleReg (doubleRegNos dflags), - map LongReg (longRegNos dflags)) - where intRegs = map VanillaReg (vanillaRegNos dflags) +getRegsWithoutNode _dflags = + ( filter (\r -> r VGcPtr /= node) realVanillaRegs + , realFloatRegs + , realDoubleRegs + , realLongRegs ) + +-- getRegsWithNode uses R1/node even if it isn't a register +getRegsWithNode _dflags = + ( if null realVanillaRegs then [VanillaReg 1] else realVanillaRegs + , realFloatRegs + , realDoubleRegs + , realLongRegs ) allFloatRegs, allDoubleRegs, allLongRegs :: [GlobalReg] allVanillaRegs :: [VGcPtr -> GlobalReg] @@ -148,6 +132,14 @@ allFloatRegs = map FloatReg $ regList mAX_Float_REG allDoubleRegs = map DoubleReg $ regList mAX_Double_REG allLongRegs = map LongReg $ regList mAX_Long_REG +realFloatRegs, realDoubleRegs, realLongRegs :: [GlobalReg] +realVanillaRegs :: [VGcPtr -> GlobalReg] + +realVanillaRegs = map VanillaReg $ regList mAX_Real_Vanilla_REG +realFloatRegs = map FloatReg $ regList mAX_Real_Float_REG +realDoubleRegs = map DoubleReg $ regList mAX_Real_Double_REG +realLongRegs = map LongReg $ regList mAX_Real_Long_REG + regList :: Int -> [Int] regList n = [1 .. n] diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 646ecb5c67..a6b9b11e5f 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -345,9 +345,11 @@ instance Eq GlobalReg where SpLim == SpLim = True Hp == Hp = True HpLim == HpLim = True + CCCS == CCCS = True CurrentTSO == CurrentTSO = True CurrentNursery == CurrentNursery = True HpAlloc == HpAlloc = True + EagerBlackholeInfo == EagerBlackholeInfo = True GCEnter1 == GCEnter1 = True GCFun == GCFun = True BaseReg == BaseReg = True diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs index d8c76f4d79..98008d5d0d 100644 --- a/compiler/cmm/CmmLayoutStack.hs +++ b/compiler/cmm/CmmLayoutStack.hs @@ -26,8 +26,6 @@ import Util import DynFlags import FastString import Outputable -import Data.Map (Map) -import qualified Data.Map as Map import qualified Data.Set as Set import Control.Monad.Fix import Data.Array as Array @@ -485,12 +483,11 @@ spOffsetForCall current_sp cont_stack args fixupStack :: StackMap -> StackMap -> [CmmNode O O] fixupStack old_stack new_stack = concatMap move new_locs where - old_map :: Map LocalReg ByteOff - old_map = Map.fromList (stackSlotRegs old_stack) + old_map = sm_regs old_stack new_locs = stackSlotRegs new_stack move (r,n) - | Just m <- Map.lookup r old_map, n == m = [] + | Just (_,m) <- lookupUFM old_map r, n == m = [] | otherwise = [CmmStore (CmmStackSlot Old n) (CmmReg (CmmLocal r))] diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs index 2e24dd7f82..47c30b1a0f 100644 --- a/compiler/cmm/CmmLint.hs +++ b/compiler/cmm/CmmLint.hs @@ -13,6 +13,7 @@ module CmmLint ( import Hoopl import Cmm import CmmUtils +import CmmLive import PprCmm () import BlockId import FastString @@ -53,7 +54,10 @@ lintCmmDecl (CmmData {}) lintCmmGraph :: CmmGraph -> CmmLint () -lintCmmGraph g = mapM_ (lintCmmBlock labels) blocks +lintCmmGraph g = cmmLiveness g `seq` mapM_ (lintCmmBlock labels) blocks + -- cmmLiveness throws an error if there are registers + -- live on entry to the graph (i.e. undefined + -- variables) where blocks = toBlockList g labels = setFromList (map entryLabel blocks) diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index 5073517be9..f42626f638 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -274,12 +274,6 @@ maybeInvertComparison op MO_S_Gt r -> Just (MO_S_Le r) MO_S_Le r -> Just (MO_S_Gt r) MO_S_Ge r -> Just (MO_S_Lt r) - MO_F_Eq r -> Just (MO_F_Ne r) - MO_F_Ne r -> Just (MO_F_Eq r) - MO_F_Ge r -> Just (MO_F_Le r) - MO_F_Le r -> Just (MO_F_Ge r) - MO_F_Gt r -> Just (MO_F_Lt r) - MO_F_Lt r -> Just (MO_F_Gt r) _other -> Nothing -- ---------------------------------------------------------------------------- diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs index 60704b5b32..a405a0befa 100644 --- a/compiler/cmm/MkGraph.hs +++ b/compiler/cmm/MkGraph.hs @@ -167,6 +167,7 @@ mkComment _ = nilOL ---------- Assignment and store mkAssign :: CmmReg -> CmmExpr -> CmmAGraph +mkAssign l (CmmReg r) | l == r = mkNop mkAssign l r = mkMiddle $ CmmAssign l r mkStore :: CmmExpr -> CmmExpr -> CmmAGraph diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index cb2b41d852..5aec9e3bbe 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -104,7 +104,8 @@ cgBind :: StgBinding -> FCode () cgBind (StgNonRec name rhs) = do { ((info, init), body) <- getCodeR $ cgRhs name rhs ; addBindC (cg_id info) info - ; emit (init <*> body) } + ; emit (body <*> init) } + -- init cannot be used in body, so slightly better to sink it eagerly cgBind (StgRec pairs) = do { ((new_binds, inits), body) <- getCodeR $ fixC (\ new_binds_inits -> @@ -311,11 +312,11 @@ mkRhsClosure _ bndr cc _ fvs upd_flag args body ; emit (mkComment $ mkFastString "calling allocDynClosure") ; let toVarArg (NonVoid a, off) = (NonVoid (StgVarArg a), off) ; let info_tbl = mkCmmInfo closure_info - ; (tmp, init) <- allocDynClosure info_tbl lf_info use_cc blame_cc + ; hp_plus_n <- allocDynClosure info_tbl lf_info use_cc blame_cc (map toVarArg fv_details) -- RETURN - ; regIdInfo bndr lf_info tmp init } + ; regIdInfo bndr lf_info hp_plus_n } -- Use with care; if used inappropriately, it could break invariants. stripNV :: NonVoid a -> a @@ -349,11 +350,11 @@ cgStdThunk bndr _cc _bndr_info _body lf_info payload -- BUILD THE OBJECT ; let info_tbl = mkCmmInfo closure_info - ; (tmp, init) <- allocDynClosure info_tbl lf_info + ; hp_plus_n <- allocDynClosure info_tbl lf_info use_cc blame_cc payload_w_offsets -- RETURN - ; regIdInfo bndr lf_info tmp init } + ; regIdInfo bndr lf_info hp_plus_n } mkClosureLFInfo :: Id -- The binder -> TopLevelFlag -- True of top level @@ -394,16 +395,16 @@ closureCodeBody :: Bool -- whether this is a top-level binding argSatisfactionCheck (by calling fetchAndReschedule). There info if Node points to closure is available. -- HWL -} -closureCodeBody top_lvl bndr cl_info cc args arity body fv_details - | length args == 0 -- No args i.e. thunk +closureCodeBody top_lvl bndr cl_info cc _args arity body fv_details + | arity == 0 -- No args i.e. thunk = emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl [] $ \(_, node, _) -> thunkCode cl_info fv_details cc node arity body where lf_info = closureLFInfo cl_info info_tbl = mkCmmInfo cl_info -closureCodeBody top_lvl bndr cl_info _cc args arity body fv_details - = ASSERT( length args > 0 ) +closureCodeBody top_lvl bndr cl_info cc args arity body fv_details + = -- Note: args may be [], if all args are Void do { -- Allocate the global ticky counter, -- and establish the ticky-counter -- label for this block @@ -417,7 +418,7 @@ closureCodeBody top_lvl bndr cl_info _cc args arity body fv_details -- Emit the main entry code ; emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args $ - \(offset, node, arg_regs) -> do + \(_offset, node, arg_regs) -> do -- Emit slow-entry code (for entering a closure through a PAP) { mkSlowEntryCode cl_info arg_regs @@ -426,11 +427,15 @@ closureCodeBody top_lvl bndr cl_info _cc args arity body fv_details node_points = nodeMustPointToIt dflags lf_info node' = if node_points then Just node else Nothing ; tickyEnterFun cl_info + ; enterCostCentreFun cc + (CmmMachOp mo_wordSub + [ CmmReg nodeReg + , CmmLit (mkIntCLit (funTag cl_info)) ]) ; whenC node_points (ldvEnterClosure cl_info) ; granYield arg_regs node_points -- Main payload - ; entryHeapCheck cl_info offset node' arity arg_regs $ do + ; entryHeapCheck cl_info node' arity arg_regs $ do { fv_bindings <- mapM bind_fv fv_details -- Load free vars out of closure *after* -- heap check, to reduce live vars over check @@ -463,7 +468,6 @@ mkSlowEntryCode :: ClosureInfo -> [LocalReg] -> FCode () -- If this function doesn't have a specialised ArgDescr, we need -- to generate the function's arg bitmap and slow-entry code. -- Here, we emit the slow-entry code. -mkSlowEntryCode _ [] = panic "entering a closure with no arguments?" mkSlowEntryCode cl_info arg_regs -- function closure is already in `Node' | Just (_, ArgGen _) <- closureFunInfo cl_info = do dflags <- getDynFlags @@ -489,7 +493,7 @@ thunkCode cl_info fv_details _cc node arity body ; granThunk node_points -- Heap overflow check - ; entryHeapCheck cl_info 0 node' arity [] $ do + ; entryHeapCheck cl_info node' arity [] $ do { -- Overwrite with black hole if necessary -- but *after* the heap-overflow check ; whenC (blackHoleOnEntry cl_info && node_points) @@ -574,16 +578,15 @@ setupUpdate closure_info node body lbl | bh = mkBHUpdInfoLabel | otherwise = mkUpdInfoLabel - pushUpdateFrame [CmmReg (CmmLocal node), mkLblExpr lbl] body + pushUpdateFrame lbl (CmmReg (CmmLocal node)) body | otherwise -- A static closure = do { tickyUpdateBhCaf closure_info ; if closureUpdReqd closure_info then do -- Blackhole the (updatable) CAF: - { upd_closure <- link_caf True - ; pushUpdateFrame [upd_closure, - mkLblExpr mkBHUpdInfoLabel] body } + { upd_closure <- link_caf node True + ; pushUpdateFrame mkBHUpdInfoLabel upd_closure body } else do {tickyUpdateFrameOmitted; body} } @@ -593,16 +596,21 @@ setupUpdate closure_info node body -- Push the update frame on the stack in the Entry area, -- leaving room for the return address that is already -- at the old end of the area. -pushUpdateFrame :: [CmmExpr] -> FCode () -> FCode () -pushUpdateFrame es body - = do -- [EZY] I'm not sure if we need to special-case for BH too +-- +pushUpdateFrame :: CLabel -> CmmExpr -> FCode () -> FCode () +pushUpdateFrame lbl updatee body + = do updfr <- getUpdFrameOff - offset <- foldM push updfr es - withUpdFrameOff offset body - where push off e = - do emitStore (CmmStackSlot Old base) e - return base - where base = off + widthInBytes (cmmExprWidth e) + dflags <- getDynFlags + let + hdr = fixedHdrSize dflags * wORD_SIZE + frame = updfr + hdr + sIZEOF_StgUpdateFrame_NoHdr + off_updatee = hdr + oFFSET_StgUpdateFrame_updatee + -- + emitStore (CmmStackSlot Old frame) (mkLblExpr lbl) + emitStore (CmmStackSlot Old (frame - off_updatee)) updatee + initUpdFrameProf frame + withUpdFrameOff frame body ----------------------------------------------------------------------------- -- Entering a CAF @@ -637,7 +645,8 @@ pushUpdateFrame es body -- be closer together, and the compiler wouldn't need to know -- about off_indirectee etc. -link_caf :: Bool -- True <=> updatable, False <=> single-entry +link_caf :: LocalReg -- pointer to the closure + -> Bool -- True <=> updatable, False <=> single-entry -> FCode CmmExpr -- Returns amode for closure to be updated -- To update a CAF we must allocate a black hole, link the CAF onto the -- CAF list, then update the CAF to point to the fresh black hole. @@ -645,7 +654,7 @@ link_caf :: Bool -- True <=> updatable, False <=> single-entry -- updated with the new value when available. The reason for all of this -- is that we only want to update dynamic heap objects, not static ones, -- so that generational GC is easier. -link_caf _is_upd = do +link_caf node _is_upd = do { dflags <- getDynFlags -- Alloc black hole specifying CC_HDR(Node) as the cost centre ; let use_cc = costCentreFrom (CmmReg nodeReg) @@ -668,9 +677,9 @@ link_caf _is_upd = do ; ret <- newTemp bWord ; emitRtsCallGen [(ret,NoHint)] rtsPackageId (fsLit "newCAF") [ (CmmReg (CmmGlobal BaseReg), AddrHint), - (CmmReg nodeReg, AddrHint), + (CmmReg (CmmLocal node), AddrHint), (hp_rel, AddrHint) ] - (Just [node]) False + False -- node is live, so save it. -- see Note [atomic CAF entry] in rts/sm/Storage.c @@ -680,7 +689,7 @@ link_caf _is_upd = do -- re-enter R1. Doing this directly is slightly dodgy; we're -- assuming lots of things, like the stack pointer hasn't -- moved since we entered the CAF. - (let target = entryCode dflags (closureInfoPtr (CmmReg nodeReg)) in + (let target = entryCode dflags (closureInfoPtr (CmmReg (CmmLocal node))) in mkJump dflags target [] updfr) ; return hp_rel } diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs index 3efa63d770..23226bb45e 100644 --- a/compiler/codeGen/StgCmmCon.hs +++ b/compiler/codeGen/StgCmmCon.hs @@ -210,9 +210,9 @@ buildDynCon' dflags _ binder ccs con args -- No void args in args_w_offsets nonptr_wds = tot_wds - ptr_wds info_tbl = mkDataConInfoTable dflags con False ptr_wds nonptr_wds - ; (tmp, init) <- allocDynClosure info_tbl lf_info + ; hp_plus_n <- allocDynClosure info_tbl lf_info use_cc blame_cc args_w_offsets - ; regIdInfo binder lf_info tmp init } + ; regIdInfo binder lf_info hp_plus_n } where lf_info = mkConLFInfo con diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs index 67953ce95a..4d91451628 100644 --- a/compiler/codeGen/StgCmmEnv.hs +++ b/compiler/codeGen/StgCmmEnv.hs @@ -44,7 +44,7 @@ import CLabel import BlockId import CmmExpr import CmmUtils -import MkGraph (CmmAGraph, mkAssign, (<*>)) +import MkGraph (CmmAGraph, mkAssign) import FastString import Id import VarEnv @@ -103,13 +103,12 @@ lneIdInfo id regs -- register, and store a plain register in the CgIdInfo. We allocate -- a new register in order to keep single-assignment and help out the -- inliner. -- EZY -regIdInfo :: Id -> LambdaFormInfo -> LocalReg -> CmmAGraph -> FCode (CgIdInfo, CmmAGraph) -regIdInfo id lf_info reg init - = do { reg' <- newTemp (localRegType reg) - ; let init' = init <*> mkAssign (CmmLocal reg') - (addDynTag (CmmReg (CmmLocal reg)) - (lfDynTag lf_info)) - ; return (mkCgIdInfo id lf_info (CmmReg (CmmLocal reg')), init') } +regIdInfo :: Id -> LambdaFormInfo -> CmmExpr -> FCode (CgIdInfo, CmmAGraph) +regIdInfo id lf_info expr + = do { reg <- newTemp (cmmExprType expr) + ; let init = mkAssign (CmmLocal reg) + (addDynTag expr (lfDynTag lf_info)) + ; return (mkCgIdInfo id lf_info (CmmReg (CmmLocal reg)), init) } idInfoToAmode :: CgIdInfo -> CmmExpr -- Returns a CmmExpr for the *tagged* pointer diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 1d016d6b3d..cf3dc67dfc 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -432,8 +432,8 @@ cgCase scrut bndr alt_type alts ----------------- maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg) maybeSaveCostCentre simple_scrut - | simple_scrut = saveCurrentCostCentre - | otherwise = return Nothing + | simple_scrut = return Nothing + | otherwise = saveCurrentCostCentre ----------------- diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index d3bf17f7d7..12f3b1347e 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -15,7 +15,7 @@ module StgCmmHeap ( mkVirtHeapOffsets, mkVirtConstrOffsets, mkStaticClosureFields, mkStaticClosure, - allocDynClosure, allocDynClosureReg, allocDynClosureCmm, + allocDynClosure, allocDynClosureCmm, emitSetDynHdr ) where @@ -63,12 +63,7 @@ allocDynClosure -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object -- ie Info ptr has offset zero. -- No void args in here - -> FCode (LocalReg, CmmAGraph) - -allocDynClosureReg - :: CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr - -> [(CmmExpr, VirtualHpOffset)] - -> FCode (LocalReg, CmmAGraph) + -> FCode CmmExpr -- returns Hp+n allocDynClosureCmm :: CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr @@ -81,32 +76,25 @@ allocDynClosureCmm -- returned LocalReg, which should point to the closure after executing -- the graph. --- Note [Return a LocalReg] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- allocDynClosure returns a LocalReg, not a (Hp+8) CmmExpr. --- Reason: --- ...allocate object... --- obj = Hp + 8 --- y = f(z) --- ...here obj is still valid, --- but Hp+8 means something quite different... +-- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is +-- only valid until Hp is changed. The caller should assign the +-- result to a LocalReg if it is required to remain live. +-- +-- The reason we don't assign it to a LocalReg here is that the caller +-- is often about to call regIdInfo, which immediately assigns the +-- result of allocDynClosure to a new temp in order to add the tag. +-- So by not generating a LocalReg here we avoid a common source of +-- new temporaries and save some compile time. This can be quite +-- significant - see test T4801. allocDynClosure info_tbl lf_info use_cc _blame_cc args_w_offsets = do { let (args, offsets) = unzip args_w_offsets ; cmm_args <- mapM getArgAmode args -- No void args - ; allocDynClosureReg info_tbl lf_info + ; allocDynClosureCmm info_tbl lf_info use_cc _blame_cc (zip cmm_args offsets) } -allocDynClosureReg info_tbl lf_info use_cc _blame_cc amodes_w_offsets - = do { hp_rel <- allocDynClosureCmm info_tbl lf_info - use_cc _blame_cc amodes_w_offsets - - -- Note [Return a LocalReg] - ; getCodeR $ assignTemp hp_rel - } - allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do { virt_hp <- getVirtHp @@ -340,14 +328,13 @@ These are used in the following circumstances -- A heap/stack check at a function or thunk entry point. entryHeapCheck :: ClosureInfo - -> Int -- Arg Offset -> Maybe LocalReg -- Function (closure environment) -> Int -- Arity -- not same as len args b/c of voids -> [LocalReg] -- Non-void args (empty for thunk) -> FCode () -> FCode () -entryHeapCheck cl_info offset nodeSet arity args code +entryHeapCheck cl_info nodeSet arity args code = do dflags <- getDynFlags let is_thunk = arity == 0 is_fastf = case closureFunInfo cl_info of @@ -355,25 +342,31 @@ entryHeapCheck cl_info offset nodeSet arity args code _otherwise -> True args' = map (CmmReg . CmmLocal) args - setN = case nodeSet of - Just _ -> mkNop -- No need to assign R1, it already - -- points to the closure - Nothing -> mkAssign nodeReg $ - CmmLit (CmmLabel $ staticClosureLabel cl_info) - - {- Thunks: jump GCEnter1 - Function (fast): Set R1 = node, jump GCFun - Function (slow): Set R1 = node, call generic_gc -} - gc_call upd = setN <*> gc_lbl upd - gc_lbl upd - | is_thunk = mkDirectJump dflags (CmmReg $ CmmGlobal GCEnter1) [] sp - | is_fastf = mkDirectJump dflags (CmmReg $ CmmGlobal GCFun) [] sp - | otherwise = mkForeignJump dflags Slow (CmmReg $ CmmGlobal GCFun) args' upd - where sp = max offset upd - {- DT (12/08/10) This is a little fishy, mainly the sp fix up amount. - - This is since the ncg inserts spills before the stack/heap check. - - This should be fixed up and then we won't need to fix up the Sp on - - GC calls, but until then this fishy code works -} + node = case nodeSet of + Just r -> CmmReg (CmmLocal r) + Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info) + stg_gc_fun = CmmReg (CmmGlobal GCFun) + stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1) + + {- Thunks: jump stg_gc_enter_1 + + Function (fast): call (NativeNode) stg_gc_fun(fun, args) + + Function (slow): R1 = fun + call (slow) stg_gc_fun(args) + XXX: this is a bit naughty, we should really pass R1 as an + argument and use a special calling convention. + -} + gc_call upd + | is_thunk + = mkJump dflags stg_gc_enter1 [node] upd + + | is_fastf + = mkJump dflags stg_gc_fun (node : args') upd + + | otherwise + = mkAssign nodeReg node <*> + mkForeignJump dflags Slow stg_gc_fun args' upd updfr_sz <- getUpdFrameOff diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs index 5031693cc5..56c02d040f 100644 --- a/compiler/codeGen/StgCmmProf.hs +++ b/compiler/codeGen/StgCmmProf.hs @@ -19,7 +19,7 @@ module StgCmmProf ( -- Cost-centre Profiling dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, - enterCostCentreThunk, + enterCostCentreThunk, enterCostCentreFun, costCentreFrom, curCCS, storeCurCCS, emitSetCCC, @@ -99,11 +99,11 @@ dynProfHdr :: DynFlags -> CmmExpr -> [CmmExpr] -- Profiling header words in a dynamic closure dynProfHdr dflags ccs = ifProfilingL dflags [ccs, dynLdvInit] -initUpdFrameProf :: CmmExpr -> FCode () +initUpdFrameProf :: ByteOff -> FCode () -- Initialise the profiling field of an update frame -initUpdFrameProf frame_amode +initUpdFrameProf frame_off = ifProfiling $ -- frame->header.prof.ccs = CCCS - emitStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS + emitStore (CmmStackSlot Old (frame_off - oFFSET_StgHeader_ccs)) curCCS -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) -- is unnecessary because it is not used anyhow. @@ -190,6 +190,15 @@ enterCostCentreThunk closure = ifProfiling $ do emit $ storeCurCCS (costCentreFrom closure) +enterCostCentreFun :: CostCentreStack -> CmmExpr -> FCode () +enterCostCentreFun ccs closure = + ifProfiling $ do + if isCurrentCCS ccs + then emitRtsCall rtsPackageId (fsLit "enterFunCCS") + [(CmmReg (CmmGlobal BaseReg), AddrHint), + (costCentreFrom closure, AddrHint)] False + else return () -- top-level function, nothing to do + ifProfiling :: FCode () -> FCode () ifProfiling code = do dflags <- getDynFlags @@ -224,20 +233,19 @@ emitCostCentreDecl cc = do $ Module.moduleName $ cc_mod cc) ; dflags <- getDynFlags - ; loc <- newStringCLit (showPpr dflags (costCentreSrcSpan cc)) - -- XXX should UTF-8 encode - -- All cost centres will be in the main package, since we - -- don't normally use -auto-all or add SCCs to other packages. - -- Hence don't emit the package name in the module here. - ; let lits = [ zero, -- StgInt ccID, - label, -- char *label, - modl, -- char *module, - loc, -- char *srcloc, - zero64, -- StgWord64 mem_alloc - zero, -- StgWord time_ticks - is_caf, -- StgInt is_caf - zero -- struct _CostCentre *link - ] + ; loc <- newByteStringCLit $ bytesFS $ mkFastString $ + showPpr dflags (costCentreSrcSpan cc) + -- XXX going via FastString to get UTF-8 encoding is silly + ; let + lits = [ zero, -- StgInt ccID, + label, -- char *label, + modl, -- char *module, + loc, -- char *srcloc, + zero64, -- StgWord64 mem_alloc + zero, -- StgWord time_ticks + is_caf, -- StgInt is_caf + zero -- struct _CostCentre *link + ] ; emitDataLits (mkCCLabel cc) lits } where @@ -289,7 +297,7 @@ pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode () pushCostCentre result ccs cc = emitRtsCallWithResult result AddrHint rtsPackageId - (fsLit "PushCostCentre") [(ccs,AddrHint), + (fsLit "pushCostCentre") [(ccs,AddrHint), (CmmLit (mkCCostCentre cc), AddrHint)] False diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index af2b0203ec..13c8eccb9a 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -17,7 +17,7 @@ module StgCmmUtils ( cgLit, mkSimpleLit, emitDataLits, mkDataLits, emitRODataLits, mkRODataLits, - emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, emitRtsCallGen, + emitRtsCall, emitRtsCallWithResult, emitRtsCallGen, assignTemp, newTemp, newUnboxedTupleRegs, @@ -179,17 +179,12 @@ tagToClosure tycon tag ------------------------------------------------------------------------- emitRtsCall :: PackageId -> FastString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode () -emitRtsCall pkg fun args safe = emitRtsCallGen [] pkg fun args Nothing safe - -- The 'Nothing' says "save all global registers" - -emitRtsCallWithVols :: PackageId -> FastString -> [(CmmExpr,ForeignHint)] -> [GlobalReg] -> Bool -> FCode () -emitRtsCallWithVols pkg fun args vols safe - = emitRtsCallGen [] pkg fun args (Just vols) safe +emitRtsCall pkg fun args safe = emitRtsCallGen [] pkg fun args safe emitRtsCallWithResult :: LocalReg -> ForeignHint -> PackageId -> FastString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode () emitRtsCallWithResult res hint pkg fun args safe - = emitRtsCallGen [(res,hint)] pkg fun args Nothing safe + = emitRtsCallGen [(res,hint)] pkg fun args safe -- Make a call to an RTS C procedure emitRtsCallGen @@ -197,10 +192,9 @@ emitRtsCallGen -> PackageId -> FastString -> [(CmmExpr,ForeignHint)] - -> Maybe [GlobalReg] -> Bool -- True <=> CmmSafe call -> FCode () -emitRtsCallGen res pkg fun args _vols safe +emitRtsCallGen res pkg fun args safe = do { dflags <- getDynFlags ; updfr_off <- getUpdFrameOff ; let (caller_save, caller_load) = callerSaveVolatileRegs dflags diff --git a/rts/Schedule.c b/rts/Schedule.c index dee71c4676..310e68e629 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -1658,21 +1658,30 @@ delete_threads_and_gc: traceSparkCounters(cap); - if (recent_activity == ACTIVITY_INACTIVE && force_major) - { - // We are doing a GC because the system has been idle for a - // timeslice and we need to check for deadlock. Record the - // fact that we've done a GC and turn off the timer signal; - // it will get re-enabled if we run any threads after the GC. - recent_activity = ACTIVITY_DONE_GC; - stopTimer(); - } - else - { + switch (recent_activity) { + case ACTIVITY_INACTIVE: + if (force_major) { + // We are doing a GC because the system has been idle for a + // timeslice and we need to check for deadlock. Record the + // fact that we've done a GC and turn off the timer signal; + // it will get re-enabled if we run any threads after the GC. + recent_activity = ACTIVITY_DONE_GC; + stopTimer(); + break; + } + // fall through... + + case ACTIVITY_MAYBE_NO: // the GC might have taken long enough for the timer to set - // recent_activity = ACTIVITY_INACTIVE, but we aren't - // necessarily deadlocked: + // recent_activity = ACTIVITY_MAYBE_NO or ACTIVITY_INACTIVE, + // but we aren't necessarily deadlocked: recent_activity = ACTIVITY_YES; + break; + + case ACTIVITY_DONE_GC: + // If we are actually active, the scheduler will reset the + // recent_activity flag and re-enable the timer. + break; } #if defined(THREADED_RTS) diff --git a/rules/build-package-way.mk b/rules/build-package-way.mk index cf89e1e823..18ac917099 100644 --- a/rules/build-package-way.mk +++ b/rules/build-package-way.mk @@ -76,7 +76,7 @@ else $$($1_$2_$3_LIB) : $$($1_$2_$3_ALL_OBJS) $$(ALL_RTS_LIBS) $$($1_$2_$3_DEPS_LIBS) $$(call cmd,$1_$2_HC) $$($1_$2_$3_ALL_HC_OPTS) $$($1_$2_$3_ALL_OBJS) \ -shared -dynamic -dynload deploy \ - $$(addprefix -l,$$($1_$2_EXTRA_LIBRARIES)) \ + $$(addprefix -l,$$($1_$2_EXTRA_LIBRARIES)) $$(addprefix -L,$$($1_$2_EXTRA_LIBDIRS)) \ -dylib-install-name $(ghclibdir)/`basename "$$@" | sed 's/^libHS//;s/[-]ghc.*//'`/`basename "$$@"` \ -no-auto-link-packages \ -o $$@ |