diff options
| author | Ian Lynagh <ian@well-typed.com> | 2012-09-27 14:06:32 +0100 |
|---|---|---|
| committer | Ian Lynagh <ian@well-typed.com> | 2012-09-27 14:06:32 +0100 |
| commit | 42cb30bd2c00705da598cc8d4170b41fb5693166 (patch) | |
| tree | 9dda2d7da294ce2825085c5518150899b0490325 | |
| parent | 155e9e1369e1a063452f82a35d9edc58c1da2ef7 (diff) | |
| parent | d3128bfc286002862e916296629a22f1ce987e4e (diff) | |
| download | haskell-42cb30bd2c00705da598cc8d4170b41fb5693166.tar.gz | |
Merge branch 'master' of darcs.haskell.org:/srv/darcs//ghc
| -rw-r--r-- | compiler/codeGen/StgCmmExpr.hs | 4 | ||||
| -rw-r--r-- | compiler/codeGen/StgCmmHeap.hs | 57 | ||||
| -rw-r--r-- | compiler/main/DynFlags.hs | 4 | ||||
| -rw-r--r-- | docs/users_guide/using.xml | 18 |
4 files changed, 60 insertions, 23 deletions
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 307d3715b3..a8ffc12bb0 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -163,9 +163,7 @@ cgLetNoEscapeClosure bndr cc_slot _unused_cc args body code = forkProc $ do { restoreCurrentCostCentre cc_slot ; arg_regs <- bindArgsToRegs args - ; void $ altHeapCheck arg_regs (cgExpr body) } - -- Using altHeapCheck just reduces - -- instructions to save on stack + ; void $ noEscapeHeapCheck arg_regs (cgExpr body) } ------------------------------------------------------------------------ diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index fb3739177c..b7cca48f5a 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -10,7 +10,7 @@ module StgCmmHeap ( getVirtHp, setVirtHp, setRealHp, getHpRelOffset, hpRel, - entryHeapCheck, altHeapCheck, altHeapCheckReturnsTo, + entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo, mkVirtHeapOffsets, mkVirtConstrOffsets, mkStaticClosureFields, mkStaticClosure, @@ -371,7 +371,7 @@ entryHeapCheck cl_info nodeSet arity args code loop_id <- newLabelC emitLabel loop_id - heapCheck True (gc_call updfr_sz <*> mkBranch loop_id) code + heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code {- -- This code is slightly outdated now and we could easily keep the above @@ -436,32 +436,41 @@ entryHeapCheck cl_info nodeSet arity args code -- else we do a normal call to stg_gc_noregs altHeapCheck :: [LocalReg] -> FCode a -> FCode a -altHeapCheck regs code = do +altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code + +altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a +altOrNoEscapeHeapCheck checkYield regs code = do dflags <- getDynFlags case cannedGCEntryPoint dflags regs of - Nothing -> genericGC code + Nothing -> genericGC checkYield code Just gc -> do lret <- newLabelC let (off, copyin) = copyInOflow dflags NativeReturn (Young lret) regs lcont <- newLabelC emitOutOfLine lret (copyin <*> mkBranch lcont) emitLabel lcont - cannedGCReturnsTo False gc regs lret off code + cannedGCReturnsTo checkYield False gc regs lret off code altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a altHeapCheckReturnsTo regs lret off code = do dflags <- getDynFlags case cannedGCEntryPoint dflags regs of - Nothing -> genericGC code - Just gc -> cannedGCReturnsTo True gc regs lret off code + Nothing -> genericGC False code + Just gc -> cannedGCReturnsTo False True gc regs lret off code + +-- noEscapeHeapCheck is implemented identically to altHeapCheck (which +-- is more efficient), but cannot be optimized away in the non-allocating +-- case because it may occur in a loop +noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a +noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code -cannedGCReturnsTo :: Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff +cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a -cannedGCReturnsTo cont_on_stack gc regs lret off code +cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code = do dflags <- getDynFlags updfr_sz <- getUpdFrameOff - heapCheck False (gc_call dflags gc updfr_sz) code + heapCheck False checkYield (gc_call dflags gc updfr_sz) code where reg_exprs = map (CmmReg . CmmLocal) regs -- Note [stg_gc arguments] @@ -470,13 +479,13 @@ cannedGCReturnsTo cont_on_stack gc regs lret off code | cont_on_stack = mkJumpReturnsTo dflags label GC reg_exprs lret off sp | otherwise = mkCallReturnsTo dflags label GC reg_exprs lret off sp (0,[]) -genericGC :: FCode a -> FCode a -genericGC code +genericGC :: Bool -> FCode a -> FCode a +genericGC checkYield code = do updfr_sz <- getUpdFrameOff lretry <- newLabelC emitLabel lretry call <- mkCall generic_gc (GC, GC) [] [] updfr_sz (0,[]) - heapCheck False (call <*> mkBranch lretry) code + heapCheck False checkYield (call <*> mkBranch lretry) code cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr cannedGCEntryPoint dflags regs @@ -524,22 +533,23 @@ mkGcLabel :: String -> CmmExpr mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit s))) ------------------------------- -heapCheck :: Bool -> CmmAGraph -> FCode a -> FCode a -heapCheck checkStack do_gc code +heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a +heapCheck checkStack checkYield do_gc code = getHeapUsage $ \ hpHw -> -- Emit heap checks, but be sure to do it lazily so -- that the conditionals on hpHw don't cause a black hole - do { codeOnly $ do_checks checkStack hpHw do_gc + do { codeOnly $ do_checks checkStack checkYield hpHw do_gc ; tickyAllocHeap hpHw ; doGranAllocate hpHw ; setRealHp hpHw ; code } do_checks :: Bool -- Should we check the stack? + -> Bool -- Should we check for preemption? -> WordOff -- Heap headroom -> CmmAGraph -- What to do on failure -> FCode () -do_checks checkStack alloc do_gc = do +do_checks checkStack checkYield alloc do_gc = do dflags <- getDynFlags let alloc_lit = mkIntExpr dflags (alloc * wORD_SIZE dflags) -- Bytes @@ -557,15 +567,22 @@ do_checks checkStack alloc do_gc = do hp_oflo = CmmMachOp (mo_wordUGt dflags) [CmmReg hpReg, CmmReg (CmmGlobal HpLim)] + -- Yielding if HpLim == 0 + yielding = CmmMachOp (mo_wordEq dflags) + [CmmReg (CmmGlobal HpLim), CmmLit (zeroCLit dflags)] + alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit gc_id <- newLabelC when checkStack $ do emit =<< mkCmmIfGoto sp_oflo gc_id - when (alloc /= 0) $ do - emitAssign hpReg bump_hp - emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id) + if (alloc /= 0) + then do + emitAssign hpReg bump_hp + emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id) + else do + when (not (dopt Opt_OmitYields dflags) && checkYield) (emit =<< mkCmmIfGoto yielding gc_id) emitOutOfLine gc_id $ do_gc -- this is expected to jump back somewhere diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index ed273d90e5..b412fc1166 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -293,6 +293,7 @@ data DynFlag | Opt_IrrefutableTuples | Opt_CmmSink | Opt_CmmElimCommonBlocks + | Opt_OmitYields -- Interface files | Opt_IgnoreInterfacePragmas @@ -2275,6 +2276,7 @@ fFlags = [ ( "irrefutable-tuples", Opt_IrrefutableTuples, nop ), ( "cmm-sink", Opt_CmmSink, nop ), ( "cmm-elim-common-blocks", Opt_CmmElimCommonBlocks, nop ), + ( "omit-yields", Opt_OmitYields, nop ), ( "gen-manifest", Opt_GenManifest, nop ), ( "embed-manifest", Opt_EmbedManifest, nop ), ( "ext-core", Opt_EmitExternalCore, nop ), @@ -2459,6 +2461,8 @@ defaultFlags platform Opt_SharedImplib, + Opt_OmitYields, + Opt_GenManifest, Opt_EmbedManifest, Opt_PrintBindContents, diff --git a/docs/users_guide/using.xml b/docs/users_guide/using.xml index 2c5217b40d..c3a1366f43 100644 --- a/docs/users_guide/using.xml +++ b/docs/users_guide/using.xml @@ -2332,6 +2332,24 @@ last (x : xs) = last' x xs </listitem> </varlistentry> + <varlistentry> + <term> + <option>-fomit-yields</option> + <indexterm><primary><option>-fomit-yields</option></primary></indexterm> + </term> + <listitem> + <para><emphasis>On by default.</emphasis> Tells GHC to omit + heap checks when no allocation is being performed. While this improves + binary sizes by about 5%, it also means that threads run in + tight non-allocating loops will not get preempted in a timely + fashion. If it is important to always be able to interrupt such + threads, you should turn this optimization off. Consider also + recompiling all libraries with this optimization turned off, if you + need to guarantee interruptibility. + </para> + </listitem> + </varlistentry> + </variablelist> </sect2> |
