diff options
author | dias@eecs.harvard.edu <unknown> | 2008-10-17 17:07:07 +0000 |
---|---|---|
committer | dias@eecs.harvard.edu <unknown> | 2008-10-17 17:07:07 +0000 |
commit | 6bc92166180824bf046d31e378359e3c386150f9 (patch) | |
tree | 20ed1d073150c1ef7ad5deb31dbfec27253b5eae /compiler/codeGen | |
parent | c62b824e9e8808eb3845ddb1614494b0575eaafd (diff) | |
download | haskell-6bc92166180824bf046d31e378359e3c386150f9.tar.gz |
Removed warnings, made Haddock happy, added examples in documentation
The interesting examples talk about our story with heap checks in
case alternatives and our story with the case scrutinee as a Boolean.
Diffstat (limited to 'compiler/codeGen')
-rw-r--r-- | compiler/codeGen/StgCmmBind.hs | 19 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmExpr.hs | 103 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmForeign.hs | 2 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmHeap.hs | 7 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmLayout.hs | 3 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmUtils.hs | 4 |
6 files changed, 103 insertions, 35 deletions
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index 04676787fe..a78abc751a 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -84,7 +84,7 @@ cgTopRhsClosure id ccs _ upd_flag srt args body = do (_, _, fv_details) = mkVirtHeapOffsets (isLFThunk lf_info) (addIdReps []) -- Don't drop the non-void args until the closure info has been made - ; forkClosureBody (closureCodeBody True id closure_info ccs srt_info + ; forkClosureBody (closureCodeBody True id closure_info ccs (nonVoidIds args) (length args) body fv_details) ; pprTrace "arity for" (ppr id <+> ppr (length args) <+> ppr args) $ @@ -293,7 +293,7 @@ mkRhsClosure bndr cc _ fvs upd_flag srt args body -- forkClosureBody: (a) ensure that bindings in here are not seen elsewhere -- (b) ignore Sequel from context; use empty Sequel -- And compile the body - closureCodeBody False bndr closure_info cc c_srt (nonVoidIds args) + closureCodeBody False bndr closure_info cc (nonVoidIds args) (length args) body fv_details -- BUILD THE OBJECT @@ -361,7 +361,6 @@ closureCodeBody :: Bool -- whether this is a top-level binding -> Id -- the closure's name -> ClosureInfo -- Lots of information about this closure -> CostCentreStack -- Optional cost centre attached to closure - -> C_SRT -> [NonVoid Id] -- incoming args to the closure -> Int -- arity, including void args -> StgExpr @@ -381,12 +380,12 @@ closureCodeBody :: Bool -- whether this is a top-level binding argSatisfactionCheck (by calling fetchAndReschedule). There info if Node points to closure is available. -- HWL -} -closureCodeBody top_lvl bndr cl_info cc srt args arity body fv_details +closureCodeBody top_lvl bndr cl_info cc args arity body fv_details | length args == 0 -- No args i.e. thunk = emitClosureProcAndInfoTable top_lvl bndr cl_info [] $ - (\ (node, _) -> thunkCode cl_info fv_details cc srt node arity body) + (\ (node, _) -> thunkCode cl_info fv_details cc node arity body) -closureCodeBody top_lvl bndr cl_info cc srt args arity body fv_details +closureCodeBody top_lvl bndr cl_info cc args arity body fv_details = ASSERT( length args > 0 ) do { -- Allocate the global ticky counter, -- and establish the ticky-counter @@ -407,7 +406,7 @@ closureCodeBody top_lvl bndr cl_info cc srt args arity body fv_details ; granYield arg_regs node_points -- Main payload - ; entryHeapCheck node arity arg_regs srt $ do + ; entryHeapCheck node arity arg_regs $ do { enterCostCentre cl_info cc body ; fv_bindings <- mapM bind_fv fv_details ; load_fvs node lf_info fv_bindings -- Load free vars out of closure *after* @@ -454,15 +453,15 @@ mkSlowEntryCode _ [] = panic "entering a closure with no arguments?" ----------------------------------------- thunkCode :: ClosureInfo -> [(NonVoid Id, VirtualHpOffset)] -> CostCentreStack -> - C_SRT -> LocalReg -> Int -> StgExpr -> FCode () -thunkCode cl_info fv_details cc srt node arity body + LocalReg -> Int -> StgExpr -> FCode () +thunkCode cl_info fv_details cc node arity body = do { let node_points = nodeMustPointToIt (closureLFInfo cl_info) ; tickyEnterThunk cl_info ; ldvEnterClosure cl_info -- NB: Node always points when profiling ; granThunk node_points -- Heap overflow check - ; entryHeapCheck node arity [] srt $ do + ; entryHeapCheck node arity [] $ do { -- Overwrite with black hole if necessary -- but *after* the heap-overflow check whenC (blackHoleOnEntry cl_info && node_points) diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index dac7d67c09..3b6aac9790 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -115,10 +115,10 @@ cgLetNoEscapeRhs local_cc bndr rhs = ; return info } -cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd srt args body) - = cgLetNoEscapeClosure bndr local_cc cc srt (nonVoidIds args) body +cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd _ args body) + = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args) - = cgLetNoEscapeClosure bndr local_cc cc NoSRT [] (StgConApp con args) + = cgLetNoEscapeClosure bndr local_cc cc [] (StgConApp con args) -- For a constructor RHS we want to generate a single chunk of -- code which can be jumped to from many places, which will -- return the constructor. It's easy; just behave as if it @@ -129,17 +129,15 @@ cgLetNoEscapeClosure :: Id -- binder -> Maybe LocalReg -- Slot for saved current cost centre -> CostCentreStack -- XXX: *** NOT USED *** why not? - -> SRT -> [NonVoid Id] -- Args (as in \ args -> body) -> StgExpr -- Body (as in above) -> FCode CgIdInfo -cgLetNoEscapeClosure bndr cc_slot _unused_cc srt args body +cgLetNoEscapeClosure bndr cc_slot _unused_cc args body = do { arg_regs <- forkProc $ do { restoreCurrentCostCentre cc_slot ; arg_regs <- bindArgsToRegs args - ; c_srt <- getSRTInfo srt - ; altHeapCheck arg_regs c_srt (cgExpr body) + ; altHeapCheck arg_regs (cgExpr body) -- Using altHeapCheck just reduces -- instructions to save on stack ; return arg_regs } @@ -262,11 +260,14 @@ data GcPlan -- of the case alternative(s) into the upstream check ------------------------------------- +-- See Note [case on Bool] cgCase :: StgExpr -> Id -> SRT -> AltType -> [StgAlt] -> FCode () --- cgCase (OpApp ) bndr srt AlgAlt [(DataAlt flase, a2] - -- | isBoolTy (idType bndr) - -- , isDeadBndr bndr - -- = +{- +cgCase (OpApp ) bndr srt AlgAlt [(DataAlt flase, a2] + | isBoolTy (idType bndr) + , isDeadBndr bndr + = +-} cgCase scrut bndr srt alt_type alts = do { up_hp_usg <- getVirtHp -- Upstream heap usage @@ -280,10 +281,10 @@ cgCase scrut bndr srt alt_type alts gc_plan = if gcInAlts then GcInAlts alt_regs srt else NoGcInAlts ; mb_cc <- maybeSaveCostCentre simple_scrut - ; c_srt <- getSRTInfo srt ; withSequel (AssignTo alt_regs gcInAlts) (cgExpr scrut) ; restoreCurrentCostCentre mb_cc + -- JD: We need Note: [Better Alt Heap Checks] ; bindArgsToRegs ret_bndrs ; cgAlts gc_plan (NonVoid bndr) alt_type alts } @@ -402,9 +403,8 @@ cgAltRhss gc_plan bndr alts maybeAltHeapCheck :: GcPlan -> FCode a -> FCode a maybeAltHeapCheck NoGcInAlts code = code -maybeAltHeapCheck (GcInAlts regs srt) code - = do { c_srt <- getSRTInfo srt - ; altHeapCheck regs c_srt code } +maybeAltHeapCheck (GcInAlts regs _) code + = altHeapCheck regs code ----------------------------------------------------------------------------- -- Tail calls @@ -482,4 +482,77 @@ cgTailCall fun_id fun_info args node_points = nodeMustPointToIt lf_info +{- Note [case on Bool] + ~~~~~~~~~~~~~~~~~~~ +A case on a Boolean value does two things: + 1. It looks up the Boolean in a closure table and assigns the + result to the binder. + 2. It branches to the True or False case through analysis + of the closure assigned to the binder. +But the indirection through the closure table is unnecessary +if the assignment to the binder will be dead code (use isDeadBndr). + +The following example illustrates how badly the code turns out: + STG: + case <=## [ww_s7Hx y_s7HD] of wild2_sbH8 { + GHC.Bool.False -> <true code> // sbH8 dead + GHC.Bool.True -> <false code> // sbH8 dead + }; + Cmm: + _s7HD::F64 = F64[_sbH7::I64 + 7]; // MidAssign + _ccsW::I64 = %MO_F_Le_W64(_s7Hx::F64, _s7HD::F64); // MidAssign + // emitReturn // MidComment + _sbH8::I64 = I64[ghczmprim_GHCziBool_Bool_closure_tbl + (_ccsW::I64 << 3)]; // MidAssign + _ccsX::I64 = _sbH8::I64 & 7; // MidAssign + if (_ccsX::I64 >= 2) goto ccsH; else goto ccsI; // LastCondBranch + +The assignments to _sbH8 and _ccsX are completely unnecessary. +Instead, we should branch based on the value of _ccsW. +-} +{- Note [Better Alt Heap Checks] +If two function calls can share a return point, then they will also +get the same info table. Therefore, it's worth our effort to make +those opportunities appear as frequently as possible. + +Here are a few examples of how it should work: + + STG: + case f x of + True -> <True code -- including allocation> + False -> <False code> + Cmm: + r = call f(x) returns to L; + L: + if r & 7 >= 2 goto L1 else goto L2; + L1: + if Hp > HpLim then + r = gc(r); + goto L; + <True code -- including allocation> + L2: + <False code> +Note that the code following both the call to f(x) and the code to gc(r) +should be the same, which will allow the common blockifier to discover +that they are the same. Therefore, both function calls will return to the same +block, and they will use the same info table. + +Here's an example of the Cmm code we want from a primOp. +The primOp doesn't produce an info table for us to reuse, but that's okay: +we should still generate the same code: + STG: + case f x of + 0 -> <0-case code -- including allocation> + _ -> <default-case code> + Cmm: + r = a +# b; + L: + if r == 0 then goto L1 else goto L2; + L1: + if Hp > HpLim then + r = gc(r); + goto L; + <0-case code -- including allocation> + L2: + <default-case code> +-} diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index 2a6b794e2d..2735b69424 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -117,7 +117,7 @@ emitForeignCall -- only RTS procedures do this -> FCode () emitForeignCall safety results target args _srt ret - | not (playSafe safety) = do -- trace "emitForeignCall; ret is undone" $ do + | not (playSafe safety) = do let (caller_save, caller_load) = callerSaveVolatileRegs updfr_off <- getUpdFrameOff emit caller_save diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index 3f803d1d65..713857929a 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -337,11 +337,10 @@ These are used in the following circumstances entryHeapCheck :: LocalReg -- Function (closure environment) -> Int -- Arity -- not same as length args b/c of voids -> [LocalReg] -- Non-void args (empty for thunk) - -> C_SRT -> FCode () -> FCode () -entryHeapCheck fun arity args srt code +entryHeapCheck fun arity args code = do updfr_sz <- getUpdFrameOff heapCheck True (gc_call updfr_sz) code -- The 'fun' keeps relevant CAFs alive where @@ -381,8 +380,8 @@ entryHeapCheck fun arity args srt code gc_lbl_ptrs _ = Nothing -altHeapCheck :: [LocalReg] -> C_SRT -> FCode a -> FCode a -altHeapCheck regs srt code +altHeapCheck :: [LocalReg] -> FCode a -> FCode a +altHeapCheck regs code = do updfr_sz <- getUpdFrameOff heapCheck False (gc_call updfr_sz) code where diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index 33fd3e8d5a..74bac43108 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -166,9 +166,6 @@ direct_call caller lbl arity args reps | otherwise -- Over-saturated call = ASSERT( arity == length initial_reps ) do { pap_id <- newTemp gcWord - ; let srt = pprTrace "Urk! SRT for over-sat call" - (ppr lbl) NoC_SRT - -- XXX: what if rest_args contains static refs? ; withSequel (AssignTo [pap_id] True) (emitCall Native target fast_args) ; slow_call (CmmReg (CmmLocal pap_id)) diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index 057e5597e8..4803f5fba7 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -883,10 +883,10 @@ getSRTInfo (SRTEntries {}) = panic "getSRTInfo" getSRTInfo (SRT off len bmp) | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape] = do { id <- newUnique - ; top_srt <- getSRTLabel + -- ; top_srt <- getSRTLabel ; let srt_desc_lbl = mkLargeSRTLabel id -- JD: We're not constructing and emitting SRTs in the back end, - -- which renders this code wrong (and it now names a now-non-existent label). + -- which renders this code wrong (it now names a now-non-existent label). -- ; emitRODataLits srt_desc_lbl -- ( cmmLabelOffW top_srt off -- : mkWordCLit (fromIntegral len) |