summaryrefslogtreecommitdiff
path: root/compiler/codeGen
diff options
context:
space:
mode:
authordias@eecs.harvard.edu <unknown>2008-10-17 17:07:07 +0000
committerdias@eecs.harvard.edu <unknown>2008-10-17 17:07:07 +0000
commit6bc92166180824bf046d31e378359e3c386150f9 (patch)
tree20ed1d073150c1ef7ad5deb31dbfec27253b5eae /compiler/codeGen
parentc62b824e9e8808eb3845ddb1614494b0575eaafd (diff)
downloadhaskell-6bc92166180824bf046d31e378359e3c386150f9.tar.gz
Removed warnings, made Haddock happy, added examples in documentation
The interesting examples talk about our story with heap checks in case alternatives and our story with the case scrutinee as a Boolean.
Diffstat (limited to 'compiler/codeGen')
-rw-r--r--compiler/codeGen/StgCmmBind.hs19
-rw-r--r--compiler/codeGen/StgCmmExpr.hs103
-rw-r--r--compiler/codeGen/StgCmmForeign.hs2
-rw-r--r--compiler/codeGen/StgCmmHeap.hs7
-rw-r--r--compiler/codeGen/StgCmmLayout.hs3
-rw-r--r--compiler/codeGen/StgCmmUtils.hs4
6 files changed, 103 insertions, 35 deletions
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
index 04676787fe..a78abc751a 100644
--- a/compiler/codeGen/StgCmmBind.hs
+++ b/compiler/codeGen/StgCmmBind.hs
@@ -84,7 +84,7 @@ cgTopRhsClosure id ccs _ upd_flag srt args body = do
(_, _, fv_details) = mkVirtHeapOffsets (isLFThunk lf_info)
(addIdReps [])
-- Don't drop the non-void args until the closure info has been made
- ; forkClosureBody (closureCodeBody True id closure_info ccs srt_info
+ ; forkClosureBody (closureCodeBody True id closure_info ccs
(nonVoidIds args) (length args) body fv_details)
; pprTrace "arity for" (ppr id <+> ppr (length args) <+> ppr args) $
@@ -293,7 +293,7 @@ mkRhsClosure bndr cc _ fvs upd_flag srt args body
-- forkClosureBody: (a) ensure that bindings in here are not seen elsewhere
-- (b) ignore Sequel from context; use empty Sequel
-- And compile the body
- closureCodeBody False bndr closure_info cc c_srt (nonVoidIds args)
+ closureCodeBody False bndr closure_info cc (nonVoidIds args)
(length args) body fv_details
-- BUILD THE OBJECT
@@ -361,7 +361,6 @@ closureCodeBody :: Bool -- whether this is a top-level binding
-> Id -- the closure's name
-> ClosureInfo -- Lots of information about this closure
-> CostCentreStack -- Optional cost centre attached to closure
- -> C_SRT
-> [NonVoid Id] -- incoming args to the closure
-> Int -- arity, including void args
-> StgExpr
@@ -381,12 +380,12 @@ closureCodeBody :: Bool -- whether this is a top-level binding
argSatisfactionCheck (by calling fetchAndReschedule).
There info if Node points to closure is available. -- HWL -}
-closureCodeBody top_lvl bndr cl_info cc srt args arity body fv_details
+closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
| length args == 0 -- No args i.e. thunk
= emitClosureProcAndInfoTable top_lvl bndr cl_info [] $
- (\ (node, _) -> thunkCode cl_info fv_details cc srt node arity body)
+ (\ (node, _) -> thunkCode cl_info fv_details cc node arity body)
-closureCodeBody top_lvl bndr cl_info cc srt args arity body fv_details
+closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
= ASSERT( length args > 0 )
do { -- Allocate the global ticky counter,
-- and establish the ticky-counter
@@ -407,7 +406,7 @@ closureCodeBody top_lvl bndr cl_info cc srt args arity body fv_details
; granYield arg_regs node_points
-- Main payload
- ; entryHeapCheck node arity arg_regs srt $ do
+ ; entryHeapCheck node arity arg_regs $ do
{ enterCostCentre cl_info cc body
; fv_bindings <- mapM bind_fv fv_details
; load_fvs node lf_info fv_bindings -- Load free vars out of closure *after*
@@ -454,15 +453,15 @@ mkSlowEntryCode _ [] = panic "entering a closure with no arguments?"
-----------------------------------------
thunkCode :: ClosureInfo -> [(NonVoid Id, VirtualHpOffset)] -> CostCentreStack ->
- C_SRT -> LocalReg -> Int -> StgExpr -> FCode ()
-thunkCode cl_info fv_details cc srt node arity body
+ LocalReg -> Int -> StgExpr -> FCode ()
+thunkCode cl_info fv_details cc node arity body
= do { let node_points = nodeMustPointToIt (closureLFInfo cl_info)
; tickyEnterThunk cl_info
; ldvEnterClosure cl_info -- NB: Node always points when profiling
; granThunk node_points
-- Heap overflow check
- ; entryHeapCheck node arity [] srt $ do
+ ; entryHeapCheck node arity [] $ do
{ -- Overwrite with black hole if necessary
-- but *after* the heap-overflow check
whenC (blackHoleOnEntry cl_info && node_points)
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
index dac7d67c09..3b6aac9790 100644
--- a/compiler/codeGen/StgCmmExpr.hs
+++ b/compiler/codeGen/StgCmmExpr.hs
@@ -115,10 +115,10 @@ cgLetNoEscapeRhs local_cc bndr rhs =
; return info
}
-cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd srt args body)
- = cgLetNoEscapeClosure bndr local_cc cc srt (nonVoidIds args) body
+cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd _ args body)
+ = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
- = cgLetNoEscapeClosure bndr local_cc cc NoSRT [] (StgConApp con args)
+ = cgLetNoEscapeClosure bndr local_cc cc [] (StgConApp con args)
-- For a constructor RHS we want to generate a single chunk of
-- code which can be jumped to from many places, which will
-- return the constructor. It's easy; just behave as if it
@@ -129,17 +129,15 @@ cgLetNoEscapeClosure
:: Id -- binder
-> Maybe LocalReg -- Slot for saved current cost centre
-> CostCentreStack -- XXX: *** NOT USED *** why not?
- -> SRT
-> [NonVoid Id] -- Args (as in \ args -> body)
-> StgExpr -- Body (as in above)
-> FCode CgIdInfo
-cgLetNoEscapeClosure bndr cc_slot _unused_cc srt args body
+cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
= do { arg_regs <- forkProc $ do
{ restoreCurrentCostCentre cc_slot
; arg_regs <- bindArgsToRegs args
- ; c_srt <- getSRTInfo srt
- ; altHeapCheck arg_regs c_srt (cgExpr body)
+ ; altHeapCheck arg_regs (cgExpr body)
-- Using altHeapCheck just reduces
-- instructions to save on stack
; return arg_regs }
@@ -262,11 +260,14 @@ data GcPlan
-- of the case alternative(s) into the upstream check
-------------------------------------
+-- See Note [case on Bool]
cgCase :: StgExpr -> Id -> SRT -> AltType -> [StgAlt] -> FCode ()
--- cgCase (OpApp ) bndr srt AlgAlt [(DataAlt flase, a2]
- -- | isBoolTy (idType bndr)
- -- , isDeadBndr bndr
- -- =
+{-
+cgCase (OpApp ) bndr srt AlgAlt [(DataAlt flase, a2]
+ | isBoolTy (idType bndr)
+ , isDeadBndr bndr
+ =
+-}
cgCase scrut bndr srt alt_type alts
= do { up_hp_usg <- getVirtHp -- Upstream heap usage
@@ -280,10 +281,10 @@ cgCase scrut bndr srt alt_type alts
gc_plan = if gcInAlts then GcInAlts alt_regs srt else NoGcInAlts
; mb_cc <- maybeSaveCostCentre simple_scrut
- ; c_srt <- getSRTInfo srt
; withSequel (AssignTo alt_regs gcInAlts) (cgExpr scrut)
; restoreCurrentCostCentre mb_cc
+ -- JD: We need Note: [Better Alt Heap Checks]
; bindArgsToRegs ret_bndrs
; cgAlts gc_plan (NonVoid bndr) alt_type alts }
@@ -402,9 +403,8 @@ cgAltRhss gc_plan bndr alts
maybeAltHeapCheck :: GcPlan -> FCode a -> FCode a
maybeAltHeapCheck NoGcInAlts code
= code
-maybeAltHeapCheck (GcInAlts regs srt) code
- = do { c_srt <- getSRTInfo srt
- ; altHeapCheck regs c_srt code }
+maybeAltHeapCheck (GcInAlts regs _) code
+ = altHeapCheck regs code
-----------------------------------------------------------------------------
-- Tail calls
@@ -482,4 +482,77 @@ cgTailCall fun_id fun_info args
node_points = nodeMustPointToIt lf_info
+{- Note [case on Bool]
+ ~~~~~~~~~~~~~~~~~~~
+A case on a Boolean value does two things:
+ 1. It looks up the Boolean in a closure table and assigns the
+ result to the binder.
+ 2. It branches to the True or False case through analysis
+ of the closure assigned to the binder.
+But the indirection through the closure table is unnecessary
+if the assignment to the binder will be dead code (use isDeadBndr).
+
+The following example illustrates how badly the code turns out:
+ STG:
+ case <=## [ww_s7Hx y_s7HD] of wild2_sbH8 {
+ GHC.Bool.False -> <true code> // sbH8 dead
+ GHC.Bool.True -> <false code> // sbH8 dead
+ };
+ Cmm:
+ _s7HD::F64 = F64[_sbH7::I64 + 7]; // MidAssign
+ _ccsW::I64 = %MO_F_Le_W64(_s7Hx::F64, _s7HD::F64); // MidAssign
+ // emitReturn // MidComment
+ _sbH8::I64 = I64[ghczmprim_GHCziBool_Bool_closure_tbl + (_ccsW::I64 << 3)]; // MidAssign
+ _ccsX::I64 = _sbH8::I64 & 7; // MidAssign
+ if (_ccsX::I64 >= 2) goto ccsH; else goto ccsI; // LastCondBranch
+
+The assignments to _sbH8 and _ccsX are completely unnecessary.
+Instead, we should branch based on the value of _ccsW.
+-}
+{- Note [Better Alt Heap Checks]
+If two function calls can share a return point, then they will also
+get the same info table. Therefore, it's worth our effort to make
+those opportunities appear as frequently as possible.
+
+Here are a few examples of how it should work:
+
+ STG:
+ case f x of
+ True -> <True code -- including allocation>
+ False -> <False code>
+ Cmm:
+ r = call f(x) returns to L;
+ L:
+ if r & 7 >= 2 goto L1 else goto L2;
+ L1:
+ if Hp > HpLim then
+ r = gc(r);
+ goto L;
+ <True code -- including allocation>
+ L2:
+ <False code>
+Note that the code following both the call to f(x) and the code to gc(r)
+should be the same, which will allow the common blockifier to discover
+that they are the same. Therefore, both function calls will return to the same
+block, and they will use the same info table.
+
+Here's an example of the Cmm code we want from a primOp.
+The primOp doesn't produce an info table for us to reuse, but that's okay:
+we should still generate the same code:
+ STG:
+ case f x of
+ 0 -> <0-case code -- including allocation>
+ _ -> <default-case code>
+ Cmm:
+ r = a +# b;
+ L:
+ if r == 0 then goto L1 else goto L2;
+ L1:
+ if Hp > HpLim then
+ r = gc(r);
+ goto L;
+ <0-case code -- including allocation>
+ L2:
+ <default-case code>
+-}
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
index 2a6b794e2d..2735b69424 100644
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -117,7 +117,7 @@ emitForeignCall
-- only RTS procedures do this
-> FCode ()
emitForeignCall safety results target args _srt ret
- | not (playSafe safety) = do -- trace "emitForeignCall; ret is undone" $ do
+ | not (playSafe safety) = do
let (caller_save, caller_load) = callerSaveVolatileRegs
updfr_off <- getUpdFrameOff
emit caller_save
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
index 3f803d1d65..713857929a 100644
--- a/compiler/codeGen/StgCmmHeap.hs
+++ b/compiler/codeGen/StgCmmHeap.hs
@@ -337,11 +337,10 @@ These are used in the following circumstances
entryHeapCheck :: LocalReg -- Function (closure environment)
-> Int -- Arity -- not same as length args b/c of voids
-> [LocalReg] -- Non-void args (empty for thunk)
- -> C_SRT
-> FCode ()
-> FCode ()
-entryHeapCheck fun arity args srt code
+entryHeapCheck fun arity args code
= do updfr_sz <- getUpdFrameOff
heapCheck True (gc_call updfr_sz) code -- The 'fun' keeps relevant CAFs alive
where
@@ -381,8 +380,8 @@ entryHeapCheck fun arity args srt code
gc_lbl_ptrs _ = Nothing
-altHeapCheck :: [LocalReg] -> C_SRT -> FCode a -> FCode a
-altHeapCheck regs srt code
+altHeapCheck :: [LocalReg] -> FCode a -> FCode a
+altHeapCheck regs code
= do updfr_sz <- getUpdFrameOff
heapCheck False (gc_call updfr_sz) code
where
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index 33fd3e8d5a..74bac43108 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -166,9 +166,6 @@ direct_call caller lbl arity args reps
| otherwise -- Over-saturated call
= ASSERT( arity == length initial_reps )
do { pap_id <- newTemp gcWord
- ; let srt = pprTrace "Urk! SRT for over-sat call"
- (ppr lbl) NoC_SRT
- -- XXX: what if rest_args contains static refs?
; withSequel (AssignTo [pap_id] True)
(emitCall Native target fast_args)
; slow_call (CmmReg (CmmLocal pap_id))
diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs
index 057e5597e8..4803f5fba7 100644
--- a/compiler/codeGen/StgCmmUtils.hs
+++ b/compiler/codeGen/StgCmmUtils.hs
@@ -883,10 +883,10 @@ getSRTInfo (SRTEntries {}) = panic "getSRTInfo"
getSRTInfo (SRT off len bmp)
| len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape]
= do { id <- newUnique
- ; top_srt <- getSRTLabel
+ -- ; top_srt <- getSRTLabel
; let srt_desc_lbl = mkLargeSRTLabel id
-- JD: We're not constructing and emitting SRTs in the back end,
- -- which renders this code wrong (and it now names a now-non-existent label).
+ -- which renders this code wrong (it now names a now-non-existent label).
-- ; emitRODataLits srt_desc_lbl
-- ( cmmLabelOffW top_srt off
-- : mkWordCLit (fromIntegral len)