summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2018-01-05 16:12:49 +0000
committerSimon Marlow <marlowsd@gmail.com>2018-01-08 08:41:35 +0000
commita1a689dda48113f3735834350fb562bb1927a633 (patch)
tree0931516152cc40f0481bb2fad35274f225bcd76c
parent303106d55d75a9c796e58867cb541ad136bb217f (diff)
downloadhaskell-a1a689dda48113f3735834350fb562bb1927a633.tar.gz
Improve accuracy of get/setAllocationCounter
Summary: get/setAllocationCounter didn't take into account allocations in the current block. This was known at the time, but it turns out to be important to have more accuracy when using these in a fine-grained way. Test Plan: New unit test to test incrementally larger allocaitons. Before I got results like this: ``` +0 +0 +0 +0 +0 +4096 +0 +0 +0 +0 +0 +4064 +0 +0 +4088 +4056 +0 +0 +0 +4088 +4096 +4056 +4096 ``` Notice how the results aren't always monotonically increasing. After this patch: ``` +344 +416 +488 +560 +632 +704 +776 +848 +920 +992 +1064 +1136 +1208 +1280 +1352 +1424 +1496 +1568 +1640 +1712 +1784 +1856 +1928 +2000 +2072 +2144 ``` Reviewers: niteria, bgamari, hvr, erikd Subscribers: rwbarton, thomie, carter Differential Revision: https://phabricator.haskell.org/D4288
-rw-r--r--compiler/codeGen/StgCmmForeign.hs4
-rw-r--r--compiler/prelude/primops.txt.pp14
-rw-r--r--includes/rts/Threads.h2
-rw-r--r--includes/stg/MiscClosures.h3
-rw-r--r--libraries/base/GHC/Conc/Sync.hs21
-rw-r--r--rts/PrimOps.cmm20
-rw-r--r--rts/RtsSymbols.c4
-rw-r--r--rts/Threads.c13
-rw-r--r--testsuite/tests/rts/all.T7
-rw-r--r--testsuite/tests/rts/alloccounter1.hs19
-rw-r--r--testsuite/tests/rts/alloccounter1.stdout1
11 files changed, 74 insertions, 34 deletions
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
index fc3d42aa8b..3473307423 100644
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -408,8 +408,8 @@ Opening the nursery corresponds to the following code:
@
tso = CurrentTSO;
cn = CurrentNursery;
- bdfree = CurrentNuresry->free;
- bdstart = CurrentNuresry->start;
+ bdfree = CurrentNursery->free;
+ bdstart = CurrentNursery->start;
// We *add* the currently occupied portion of the nursery block to
// the allocation limit, because we will subtract it again in
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index 333694d0d2..e958baf0e4 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -2921,6 +2921,20 @@ primop TraceMarkerOp "traceMarker#" GenPrimOp
has_side_effects = True
out_of_line = True
+primop GetThreadAllocationCounter "getThreadAllocationCounter#" GenPrimOp
+ State# RealWorld -> (# State# RealWorld, INT64 #)
+ { Retrieves the allocation counter for the current thread. }
+ with
+ has_side_effects = True
+ out_of_line = True
+
+primop SetThreadAllocationCounter "setThreadAllocationCounter#" GenPrimOp
+ INT64 -> State# RealWorld -> State# RealWorld
+ { Sets the allocation counter for the current thread to the given value. }
+ with
+ has_side_effects = True
+ out_of_line = True
+
------------------------------------------------------------------------
section "Safe coercions"
------------------------------------------------------------------------
diff --git a/includes/rts/Threads.h b/includes/rts/Threads.h
index fceacdc75d..f72f5ed121 100644
--- a/includes/rts/Threads.h
+++ b/includes/rts/Threads.h
@@ -43,8 +43,6 @@ StgRegTable * resumeThread (void *);
//
int cmp_thread (StgPtr tso1, StgPtr tso2);
int rts_getThreadId (StgPtr tso);
-HsInt64 rts_getThreadAllocationCounter (StgPtr tso);
-void rts_setThreadAllocationCounter (StgPtr tso, HsInt64 i);
void rts_enableThreadAllocationLimit (StgPtr tso);
void rts_disableThreadAllocationLimit (StgPtr tso);
diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h
index 76cfbd6c8c..1fbfab9fbe 100644
--- a/includes/stg/MiscClosures.h
+++ b/includes/stg/MiscClosures.h
@@ -468,6 +468,9 @@ RTS_FUN_DECL(stg_traceCcszh);
RTS_FUN_DECL(stg_clearCCSzh);
RTS_FUN_DECL(stg_traceEventzh);
RTS_FUN_DECL(stg_traceMarkerzh);
+RTS_FUN_DECL(stg_getThreadAllocationCounterzh);
+RTS_FUN_DECL(stg_setThreadAllocationCounterzh);
+
/* Other misc stuff */
// See wiki:Commentary/Compiler/Backends/PprC#Prototypes
diff --git a/libraries/base/GHC/Conc/Sync.hs b/libraries/base/GHC/Conc/Sync.hs
index de7779291f..e15bcbcaa0 100644
--- a/libraries/base/GHC/Conc/Sync.hs
+++ b/libraries/base/GHC/Conc/Sync.hs
@@ -105,6 +105,7 @@ import Data.Maybe
import GHC.Base
import {-# SOURCE #-} GHC.IO.Handle ( hFlush )
import {-# SOURCE #-} GHC.IO.Handle.FD ( stdout )
+import GHC.Int
import GHC.IO
import GHC.IO.Encoding.UTF8
import GHC.IO.Exception
@@ -194,18 +195,16 @@ instance Ord ThreadId where
--
-- @since 4.8.0.0
setAllocationCounter :: Int64 -> IO ()
-setAllocationCounter i = do
- ThreadId t <- myThreadId
- rts_setThreadAllocationCounter t i
+setAllocationCounter (I64# i) = IO $ \s ->
+ case setThreadAllocationCounter# i s of s' -> (# s', () #)
-- | Return the current value of the allocation counter for the
-- current thread.
--
-- @since 4.8.0.0
getAllocationCounter :: IO Int64
-getAllocationCounter = do
- ThreadId t <- myThreadId
- rts_getThreadAllocationCounter t
+getAllocationCounter = IO $ \s ->
+ case getThreadAllocationCounter# s of (# s', ctr #) -> (# s', I64# ctr #)
-- | Enables the allocation counter to be treated as a limit for the
-- current thread. When the allocation limit is enabled, if the
@@ -242,16 +241,6 @@ disableAllocationLimit = do
ThreadId t <- myThreadId
rts_disableThreadAllocationLimit t
--- We cannot do these operations safely on another thread, because on
--- a 32-bit machine we cannot do atomic operations on a 64-bit value.
--- Therefore, we only expose APIs that allow getting and setting the
--- limit of the current thread.
-foreign import ccall unsafe "rts_setThreadAllocationCounter"
- rts_setThreadAllocationCounter :: ThreadId# -> Int64 -> IO ()
-
-foreign import ccall unsafe "rts_getThreadAllocationCounter"
- rts_getThreadAllocationCounter :: ThreadId# -> IO Int64
-
foreign import ccall unsafe "rts_enableThreadAllocationLimit"
rts_enableThreadAllocationLimit :: ThreadId# -> IO ()
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index 2b3a304d06..1caa0c3343 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -2495,3 +2495,23 @@ stg_traceMarkerzh ( W_ msg )
return ();
}
+
+stg_getThreadAllocationCounterzh ()
+{
+ // Account for the allocation in the current block
+ W_ offset;
+ offset = Hp - bdescr_start(CurrentNursery);
+ return (StgTSO_alloc_limit(CurrentTSO) - offset);
+}
+
+stg_setThreadAllocationCounterzh ( I64 counter )
+{
+ // Allocation in the current block will be subtracted by
+ // getThreadAllocationCounter#, so we have to offset any existing
+ // allocation here. See also openNursery/closeNursery in
+ // compiler/codeGen/StgCmmForeign.hs.
+ W_ offset;
+ offset = Hp - bdescr_start(CurrentNursery);
+ StgTSO_alloc_limit(CurrentTSO) = counter + offset;
+ return ();
+}
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index 2ea6713eee..0fc98663ec 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -744,8 +744,6 @@
SymI_HasProto(rts_isProfiled) \
SymI_HasProto(rts_isDynamic) \
SymI_HasProto(rts_setInCallCapability) \
- SymI_HasProto(rts_getThreadAllocationCounter) \
- SymI_HasProto(rts_setThreadAllocationCounter) \
SymI_HasProto(rts_enableThreadAllocationLimit) \
SymI_HasProto(rts_disableThreadAllocationLimit) \
SymI_HasProto(rts_setMainThread) \
@@ -896,6 +894,8 @@
SymI_HasProto(stg_traceCcszh) \
SymI_HasProto(stg_traceEventzh) \
SymI_HasProto(stg_traceMarkerzh) \
+ SymI_HasProto(stg_getThreadAllocationCounterzh) \
+ SymI_HasProto(stg_setThreadAllocationCounterzh) \
SymI_HasProto(getMonotonicNSec) \
SymI_HasProto(lockFile) \
SymI_HasProto(unlockFile) \
diff --git a/rts/Threads.c b/rts/Threads.c
index b09dfa8ccc..c54156f383 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -165,19 +165,8 @@ rts_getThreadId(StgPtr tso)
}
/* ---------------------------------------------------------------------------
- * Getting & setting the thread allocation limit
+ * Enabling and disabling the thread allocation limit
* ------------------------------------------------------------------------ */
-HsInt64 rts_getThreadAllocationCounter(StgPtr tso)
-{
- // NB. doesn't take into account allocation in the current nursery
- // block, so it might be off by up to 4k.
- return PK_Int64((W_*)&(((StgTSO *)tso)->alloc_limit));
-}
-
-void rts_setThreadAllocationCounter(StgPtr tso, HsInt64 i)
-{
- ASSIGN_Int64((W_*)&(((StgTSO *)tso)->alloc_limit), i);
-}
void rts_enableThreadAllocationLimit(StgPtr tso)
{
diff --git a/testsuite/tests/rts/all.T b/testsuite/tests/rts/all.T
index 6377bde04f..7086d9113f 100644
--- a/testsuite/tests/rts/all.T
+++ b/testsuite/tests/rts/all.T
@@ -382,3 +382,10 @@ test('T12903', [when(opsys('mingw32'), skip)], compile_and_run, [''])
test('T13832', exit_code(1), compile_and_run, ['-threaded'])
test('T13894', normal, compile_and_run, [''])
test('T14497', normal, compile_and_run, ['-O'])
+
+test('alloccounter1', normal, compile_and_run,
+ [
+ # avoid allocating stack chunks, which counts as
+ # allocation and messes up the results:
+ '-with-rtsopts=-k1m'
+ ])
diff --git a/testsuite/tests/rts/alloccounter1.hs b/testsuite/tests/rts/alloccounter1.hs
new file mode 100644
index 0000000000..4b81896d2c
--- /dev/null
+++ b/testsuite/tests/rts/alloccounter1.hs
@@ -0,0 +1,19 @@
+module Main where
+
+import Control.Exception
+import Control.Monad
+import Data.List
+import System.Mem
+
+main = do
+ let
+ testAlloc n = do
+ let start = 999999
+ setAllocationCounter start
+ evaluate (last [1..n])
+ c <- getAllocationCounter
+ -- print (start - c)
+ return (start - c)
+ results <- forM [1..1000] testAlloc
+ print (sort results == results)
+ -- results better be in ascending order
diff --git a/testsuite/tests/rts/alloccounter1.stdout b/testsuite/tests/rts/alloccounter1.stdout
new file mode 100644
index 0000000000..0ca95142bb
--- /dev/null
+++ b/testsuite/tests/rts/alloccounter1.stdout
@@ -0,0 +1 @@
+True