diff options
author | Simon Marlow <marlowsd@gmail.com> | 2018-01-05 16:12:49 +0000 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2018-01-08 08:41:35 +0000 |
commit | a1a689dda48113f3735834350fb562bb1927a633 (patch) | |
tree | 0931516152cc40f0481bb2fad35274f225bcd76c | |
parent | 303106d55d75a9c796e58867cb541ad136bb217f (diff) | |
download | haskell-a1a689dda48113f3735834350fb562bb1927a633.tar.gz |
Improve accuracy of get/setAllocationCounter
Summary:
get/setAllocationCounter didn't take into account allocations in the
current block. This was known at the time, but it turns out to be
important to have more accuracy when using these in a fine-grained
way.
Test Plan:
New unit test to test incrementally larger allocaitons. Before I got
results like this:
```
+0
+0
+0
+0
+0
+4096
+0
+0
+0
+0
+0
+4064
+0
+0
+4088
+4056
+0
+0
+0
+4088
+4096
+4056
+4096
```
Notice how the results aren't always monotonically increasing. After
this patch:
```
+344
+416
+488
+560
+632
+704
+776
+848
+920
+992
+1064
+1136
+1208
+1280
+1352
+1424
+1496
+1568
+1640
+1712
+1784
+1856
+1928
+2000
+2072
+2144
```
Reviewers: niteria, bgamari, hvr, erikd
Subscribers: rwbarton, thomie, carter
Differential Revision: https://phabricator.haskell.org/D4288
-rw-r--r-- | compiler/codeGen/StgCmmForeign.hs | 4 | ||||
-rw-r--r-- | compiler/prelude/primops.txt.pp | 14 | ||||
-rw-r--r-- | includes/rts/Threads.h | 2 | ||||
-rw-r--r-- | includes/stg/MiscClosures.h | 3 | ||||
-rw-r--r-- | libraries/base/GHC/Conc/Sync.hs | 21 | ||||
-rw-r--r-- | rts/PrimOps.cmm | 20 | ||||
-rw-r--r-- | rts/RtsSymbols.c | 4 | ||||
-rw-r--r-- | rts/Threads.c | 13 | ||||
-rw-r--r-- | testsuite/tests/rts/all.T | 7 | ||||
-rw-r--r-- | testsuite/tests/rts/alloccounter1.hs | 19 | ||||
-rw-r--r-- | testsuite/tests/rts/alloccounter1.stdout | 1 |
11 files changed, 74 insertions, 34 deletions
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index fc3d42aa8b..3473307423 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -408,8 +408,8 @@ Opening the nursery corresponds to the following code: @ tso = CurrentTSO; cn = CurrentNursery; - bdfree = CurrentNuresry->free; - bdstart = CurrentNuresry->start; + bdfree = CurrentNursery->free; + bdstart = CurrentNursery->start; // We *add* the currently occupied portion of the nursery block to // the allocation limit, because we will subtract it again in diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp index 333694d0d2..e958baf0e4 100644 --- a/compiler/prelude/primops.txt.pp +++ b/compiler/prelude/primops.txt.pp @@ -2921,6 +2921,20 @@ primop TraceMarkerOp "traceMarker#" GenPrimOp has_side_effects = True out_of_line = True +primop GetThreadAllocationCounter "getThreadAllocationCounter#" GenPrimOp + State# RealWorld -> (# State# RealWorld, INT64 #) + { Retrieves the allocation counter for the current thread. } + with + has_side_effects = True + out_of_line = True + +primop SetThreadAllocationCounter "setThreadAllocationCounter#" GenPrimOp + INT64 -> State# RealWorld -> State# RealWorld + { Sets the allocation counter for the current thread to the given value. } + with + has_side_effects = True + out_of_line = True + ------------------------------------------------------------------------ section "Safe coercions" ------------------------------------------------------------------------ diff --git a/includes/rts/Threads.h b/includes/rts/Threads.h index fceacdc75d..f72f5ed121 100644 --- a/includes/rts/Threads.h +++ b/includes/rts/Threads.h @@ -43,8 +43,6 @@ StgRegTable * resumeThread (void *); // int cmp_thread (StgPtr tso1, StgPtr tso2); int rts_getThreadId (StgPtr tso); -HsInt64 rts_getThreadAllocationCounter (StgPtr tso); -void rts_setThreadAllocationCounter (StgPtr tso, HsInt64 i); void rts_enableThreadAllocationLimit (StgPtr tso); void rts_disableThreadAllocationLimit (StgPtr tso); diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h index 76cfbd6c8c..1fbfab9fbe 100644 --- a/includes/stg/MiscClosures.h +++ b/includes/stg/MiscClosures.h @@ -468,6 +468,9 @@ RTS_FUN_DECL(stg_traceCcszh); RTS_FUN_DECL(stg_clearCCSzh); RTS_FUN_DECL(stg_traceEventzh); RTS_FUN_DECL(stg_traceMarkerzh); +RTS_FUN_DECL(stg_getThreadAllocationCounterzh); +RTS_FUN_DECL(stg_setThreadAllocationCounterzh); + /* Other misc stuff */ // See wiki:Commentary/Compiler/Backends/PprC#Prototypes diff --git a/libraries/base/GHC/Conc/Sync.hs b/libraries/base/GHC/Conc/Sync.hs index de7779291f..e15bcbcaa0 100644 --- a/libraries/base/GHC/Conc/Sync.hs +++ b/libraries/base/GHC/Conc/Sync.hs @@ -105,6 +105,7 @@ import Data.Maybe import GHC.Base import {-# SOURCE #-} GHC.IO.Handle ( hFlush ) import {-# SOURCE #-} GHC.IO.Handle.FD ( stdout ) +import GHC.Int import GHC.IO import GHC.IO.Encoding.UTF8 import GHC.IO.Exception @@ -194,18 +195,16 @@ instance Ord ThreadId where -- -- @since 4.8.0.0 setAllocationCounter :: Int64 -> IO () -setAllocationCounter i = do - ThreadId t <- myThreadId - rts_setThreadAllocationCounter t i +setAllocationCounter (I64# i) = IO $ \s -> + case setThreadAllocationCounter# i s of s' -> (# s', () #) -- | Return the current value of the allocation counter for the -- current thread. -- -- @since 4.8.0.0 getAllocationCounter :: IO Int64 -getAllocationCounter = do - ThreadId t <- myThreadId - rts_getThreadAllocationCounter t +getAllocationCounter = IO $ \s -> + case getThreadAllocationCounter# s of (# s', ctr #) -> (# s', I64# ctr #) -- | Enables the allocation counter to be treated as a limit for the -- current thread. When the allocation limit is enabled, if the @@ -242,16 +241,6 @@ disableAllocationLimit = do ThreadId t <- myThreadId rts_disableThreadAllocationLimit t --- We cannot do these operations safely on another thread, because on --- a 32-bit machine we cannot do atomic operations on a 64-bit value. --- Therefore, we only expose APIs that allow getting and setting the --- limit of the current thread. -foreign import ccall unsafe "rts_setThreadAllocationCounter" - rts_setThreadAllocationCounter :: ThreadId# -> Int64 -> IO () - -foreign import ccall unsafe "rts_getThreadAllocationCounter" - rts_getThreadAllocationCounter :: ThreadId# -> IO Int64 - foreign import ccall unsafe "rts_enableThreadAllocationLimit" rts_enableThreadAllocationLimit :: ThreadId# -> IO () diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 2b3a304d06..1caa0c3343 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -2495,3 +2495,23 @@ stg_traceMarkerzh ( W_ msg ) return (); } + +stg_getThreadAllocationCounterzh () +{ + // Account for the allocation in the current block + W_ offset; + offset = Hp - bdescr_start(CurrentNursery); + return (StgTSO_alloc_limit(CurrentTSO) - offset); +} + +stg_setThreadAllocationCounterzh ( I64 counter ) +{ + // Allocation in the current block will be subtracted by + // getThreadAllocationCounter#, so we have to offset any existing + // allocation here. See also openNursery/closeNursery in + // compiler/codeGen/StgCmmForeign.hs. + W_ offset; + offset = Hp - bdescr_start(CurrentNursery); + StgTSO_alloc_limit(CurrentTSO) = counter + offset; + return (); +} diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c index 2ea6713eee..0fc98663ec 100644 --- a/rts/RtsSymbols.c +++ b/rts/RtsSymbols.c @@ -744,8 +744,6 @@ SymI_HasProto(rts_isProfiled) \ SymI_HasProto(rts_isDynamic) \ SymI_HasProto(rts_setInCallCapability) \ - SymI_HasProto(rts_getThreadAllocationCounter) \ - SymI_HasProto(rts_setThreadAllocationCounter) \ SymI_HasProto(rts_enableThreadAllocationLimit) \ SymI_HasProto(rts_disableThreadAllocationLimit) \ SymI_HasProto(rts_setMainThread) \ @@ -896,6 +894,8 @@ SymI_HasProto(stg_traceCcszh) \ SymI_HasProto(stg_traceEventzh) \ SymI_HasProto(stg_traceMarkerzh) \ + SymI_HasProto(stg_getThreadAllocationCounterzh) \ + SymI_HasProto(stg_setThreadAllocationCounterzh) \ SymI_HasProto(getMonotonicNSec) \ SymI_HasProto(lockFile) \ SymI_HasProto(unlockFile) \ diff --git a/rts/Threads.c b/rts/Threads.c index b09dfa8ccc..c54156f383 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -165,19 +165,8 @@ rts_getThreadId(StgPtr tso) } /* --------------------------------------------------------------------------- - * Getting & setting the thread allocation limit + * Enabling and disabling the thread allocation limit * ------------------------------------------------------------------------ */ -HsInt64 rts_getThreadAllocationCounter(StgPtr tso) -{ - // NB. doesn't take into account allocation in the current nursery - // block, so it might be off by up to 4k. - return PK_Int64((W_*)&(((StgTSO *)tso)->alloc_limit)); -} - -void rts_setThreadAllocationCounter(StgPtr tso, HsInt64 i) -{ - ASSIGN_Int64((W_*)&(((StgTSO *)tso)->alloc_limit), i); -} void rts_enableThreadAllocationLimit(StgPtr tso) { diff --git a/testsuite/tests/rts/all.T b/testsuite/tests/rts/all.T index 6377bde04f..7086d9113f 100644 --- a/testsuite/tests/rts/all.T +++ b/testsuite/tests/rts/all.T @@ -382,3 +382,10 @@ test('T12903', [when(opsys('mingw32'), skip)], compile_and_run, ['']) test('T13832', exit_code(1), compile_and_run, ['-threaded']) test('T13894', normal, compile_and_run, ['']) test('T14497', normal, compile_and_run, ['-O']) + +test('alloccounter1', normal, compile_and_run, + [ + # avoid allocating stack chunks, which counts as + # allocation and messes up the results: + '-with-rtsopts=-k1m' + ]) diff --git a/testsuite/tests/rts/alloccounter1.hs b/testsuite/tests/rts/alloccounter1.hs new file mode 100644 index 0000000000..4b81896d2c --- /dev/null +++ b/testsuite/tests/rts/alloccounter1.hs @@ -0,0 +1,19 @@ +module Main where + +import Control.Exception +import Control.Monad +import Data.List +import System.Mem + +main = do + let + testAlloc n = do + let start = 999999 + setAllocationCounter start + evaluate (last [1..n]) + c <- getAllocationCounter + -- print (start - c) + return (start - c) + results <- forM [1..1000] testAlloc + print (sort results == results) + -- results better be in ascending order diff --git a/testsuite/tests/rts/alloccounter1.stdout b/testsuite/tests/rts/alloccounter1.stdout new file mode 100644 index 0000000000..0ca95142bb --- /dev/null +++ b/testsuite/tests/rts/alloccounter1.stdout @@ -0,0 +1 @@ +True |