summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEdward Z. Yang <ezyang@mit.edu>2012-09-17 18:28:49 +0200
committerEdward Z. Yang <ezyang@mit.edu>2012-09-26 13:46:57 -0700
commitd3128bfc286002862e916296629a22f1ce987e4e (patch)
treeb819a5ba9bb6079775186726a603e96cbf26c9fb
parent2145ffc452a64bae457a38276c81b60f22ddf161 (diff)
downloadhaskell-d3128bfc286002862e916296629a22f1ce987e4e.tar.gz
Partially fix #367 by adding HpLim checks to entry with -fno-omit-yields.
The current fix is relatively dumb as far as where to add HpLim checks: it will always perform a check unless we know that we're returning from a closure or we are doing a non let-no-escape case analysis. The performance impact on the nofib suite looks like this: Min +5.7% -0.0% -6.5% -6.4% -50.0% Max +6.3% +5.8% +5.0% +5.5% +0.8% Geometric Mean +6.2% +0.1% +0.5% +0.5% -0.8% Overall, the executable bloat is the biggest problem, so we keep the old omit-yields optimization on by default. Remember that if you need an interruptibility guarantee, you need to recompile all of your libraries with -fno-omit-yields. A better fix would involve only inserting the yields necessary to break loops; this is left as future work. Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
-rw-r--r--compiler/codeGen/StgCmmExpr.hs4
-rw-r--r--compiler/codeGen/StgCmmHeap.hs57
-rw-r--r--compiler/main/DynFlags.hs4
-rw-r--r--docs/users_guide/using.xml18
4 files changed, 60 insertions, 23 deletions
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
index 307d3715b3..a8ffc12bb0 100644
--- a/compiler/codeGen/StgCmmExpr.hs
+++ b/compiler/codeGen/StgCmmExpr.hs
@@ -163,9 +163,7 @@ cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
code = forkProc $ do
{ restoreCurrentCostCentre cc_slot
; arg_regs <- bindArgsToRegs args
- ; void $ altHeapCheck arg_regs (cgExpr body) }
- -- Using altHeapCheck just reduces
- -- instructions to save on stack
+ ; void $ noEscapeHeapCheck arg_regs (cgExpr body) }
------------------------------------------------------------------------
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
index fb3739177c..b7cca48f5a 100644
--- a/compiler/codeGen/StgCmmHeap.hs
+++ b/compiler/codeGen/StgCmmHeap.hs
@@ -10,7 +10,7 @@ module StgCmmHeap (
getVirtHp, setVirtHp, setRealHp,
getHpRelOffset, hpRel,
- entryHeapCheck, altHeapCheck, altHeapCheckReturnsTo,
+ entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
mkVirtHeapOffsets, mkVirtConstrOffsets,
mkStaticClosureFields, mkStaticClosure,
@@ -371,7 +371,7 @@ entryHeapCheck cl_info nodeSet arity args code
loop_id <- newLabelC
emitLabel loop_id
- heapCheck True (gc_call updfr_sz <*> mkBranch loop_id) code
+ heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
{-
-- This code is slightly outdated now and we could easily keep the above
@@ -436,32 +436,41 @@ entryHeapCheck cl_info nodeSet arity args code
-- else we do a normal call to stg_gc_noregs
altHeapCheck :: [LocalReg] -> FCode a -> FCode a
-altHeapCheck regs code = do
+altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code
+
+altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
+altOrNoEscapeHeapCheck checkYield regs code = do
dflags <- getDynFlags
case cannedGCEntryPoint dflags regs of
- Nothing -> genericGC code
+ Nothing -> genericGC checkYield code
Just gc -> do
lret <- newLabelC
let (off, copyin) = copyInOflow dflags NativeReturn (Young lret) regs
lcont <- newLabelC
emitOutOfLine lret (copyin <*> mkBranch lcont)
emitLabel lcont
- cannedGCReturnsTo False gc regs lret off code
+ cannedGCReturnsTo checkYield False gc regs lret off code
altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
altHeapCheckReturnsTo regs lret off code
= do dflags <- getDynFlags
case cannedGCEntryPoint dflags regs of
- Nothing -> genericGC code
- Just gc -> cannedGCReturnsTo True gc regs lret off code
+ Nothing -> genericGC False code
+ Just gc -> cannedGCReturnsTo False True gc regs lret off code
+
+-- noEscapeHeapCheck is implemented identically to altHeapCheck (which
+-- is more efficient), but cannot be optimized away in the non-allocating
+-- case because it may occur in a loop
+noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
+noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
-cannedGCReturnsTo :: Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
+cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
-> FCode a
-> FCode a
-cannedGCReturnsTo cont_on_stack gc regs lret off code
+cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
= do dflags <- getDynFlags
updfr_sz <- getUpdFrameOff
- heapCheck False (gc_call dflags gc updfr_sz) code
+ heapCheck False checkYield (gc_call dflags gc updfr_sz) code
where
reg_exprs = map (CmmReg . CmmLocal) regs
-- Note [stg_gc arguments]
@@ -470,13 +479,13 @@ cannedGCReturnsTo cont_on_stack gc regs lret off code
| cont_on_stack = mkJumpReturnsTo dflags label GC reg_exprs lret off sp
| otherwise = mkCallReturnsTo dflags label GC reg_exprs lret off sp (0,[])
-genericGC :: FCode a -> FCode a
-genericGC code
+genericGC :: Bool -> FCode a -> FCode a
+genericGC checkYield code
= do updfr_sz <- getUpdFrameOff
lretry <- newLabelC
emitLabel lretry
call <- mkCall generic_gc (GC, GC) [] [] updfr_sz (0,[])
- heapCheck False (call <*> mkBranch lretry) code
+ heapCheck False checkYield (call <*> mkBranch lretry) code
cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr
cannedGCEntryPoint dflags regs
@@ -524,22 +533,23 @@ mkGcLabel :: String -> CmmExpr
mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit s)))
-------------------------------
-heapCheck :: Bool -> CmmAGraph -> FCode a -> FCode a
-heapCheck checkStack do_gc code
+heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
+heapCheck checkStack checkYield do_gc code
= getHeapUsage $ \ hpHw ->
-- Emit heap checks, but be sure to do it lazily so
-- that the conditionals on hpHw don't cause a black hole
- do { codeOnly $ do_checks checkStack hpHw do_gc
+ do { codeOnly $ do_checks checkStack checkYield hpHw do_gc
; tickyAllocHeap hpHw
; doGranAllocate hpHw
; setRealHp hpHw
; code }
do_checks :: Bool -- Should we check the stack?
+ -> Bool -- Should we check for preemption?
-> WordOff -- Heap headroom
-> CmmAGraph -- What to do on failure
-> FCode ()
-do_checks checkStack alloc do_gc = do
+do_checks checkStack checkYield alloc do_gc = do
dflags <- getDynFlags
let
alloc_lit = mkIntExpr dflags (alloc * wORD_SIZE dflags) -- Bytes
@@ -557,15 +567,22 @@ do_checks checkStack alloc do_gc = do
hp_oflo = CmmMachOp (mo_wordUGt dflags)
[CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
+ -- Yielding if HpLim == 0
+ yielding = CmmMachOp (mo_wordEq dflags)
+ [CmmReg (CmmGlobal HpLim), CmmLit (zeroCLit dflags)]
+
alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit
gc_id <- newLabelC
when checkStack $ do
emit =<< mkCmmIfGoto sp_oflo gc_id
- when (alloc /= 0) $ do
- emitAssign hpReg bump_hp
- emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
+ if (alloc /= 0)
+ then do
+ emitAssign hpReg bump_hp
+ emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
+ else do
+ when (not (dopt Opt_OmitYields dflags) && checkYield) (emit =<< mkCmmIfGoto yielding gc_id)
emitOutOfLine gc_id $
do_gc -- this is expected to jump back somewhere
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index ed273d90e5..b412fc1166 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -293,6 +293,7 @@ data DynFlag
| Opt_IrrefutableTuples
| Opt_CmmSink
| Opt_CmmElimCommonBlocks
+ | Opt_OmitYields
-- Interface files
| Opt_IgnoreInterfacePragmas
@@ -2275,6 +2276,7 @@ fFlags = [
( "irrefutable-tuples", Opt_IrrefutableTuples, nop ),
( "cmm-sink", Opt_CmmSink, nop ),
( "cmm-elim-common-blocks", Opt_CmmElimCommonBlocks, nop ),
+ ( "omit-yields", Opt_OmitYields, nop ),
( "gen-manifest", Opt_GenManifest, nop ),
( "embed-manifest", Opt_EmbedManifest, nop ),
( "ext-core", Opt_EmitExternalCore, nop ),
@@ -2459,6 +2461,8 @@ defaultFlags platform
Opt_SharedImplib,
+ Opt_OmitYields,
+
Opt_GenManifest,
Opt_EmbedManifest,
Opt_PrintBindContents,
diff --git a/docs/users_guide/using.xml b/docs/users_guide/using.xml
index 2c5217b40d..c3a1366f43 100644
--- a/docs/users_guide/using.xml
+++ b/docs/users_guide/using.xml
@@ -2332,6 +2332,24 @@ last (x : xs) = last' x xs
</listitem>
</varlistentry>
+ <varlistentry>
+ <term>
+ <option>-fomit-yields</option>
+ <indexterm><primary><option>-fomit-yields</option></primary></indexterm>
+ </term>
+ <listitem>
+ <para><emphasis>On by default.</emphasis> Tells GHC to omit
+ heap checks when no allocation is being performed. While this improves
+ binary sizes by about 5%, it also means that threads run in
+ tight non-allocating loops will not get preempted in a timely
+ fashion. If it is important to always be able to interrupt such
+ threads, you should turn this optimization off. Consider also
+ recompiling all libraries with this optimization turned off, if you
+ need to guarantee interruptibility.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>