summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichal Terepeta <michal.terepeta@gmail.com>2019-03-24 13:58:53 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2019-04-01 20:07:49 -0400
commit7cf5ba3dba88356571197b470556e888581212d7 (patch)
tree6e89f905fe3f630ffa6684c661c374be2bfd5ff6
parent39282422afe58a0855c2fe5315163236c116c2f4 (diff)
downloadhaskell-7cf5ba3dba88356571197b470556e888581212d7.tar.gz
Improve performance of newSmallArray#
This: - Hoists part of the condition outside of the initialization loop in `stg_newSmallArrayzh`. - Annotates one of the unlikely branches as unlikely, also in `stg_newSmallArrayzh`. - Adds a couple of annotations to `allocateMightFail` indicating which branches are likely to be taken. Together this gives about 5% improvement. Signed-off-by: Michal Terepeta <michal.terepeta@gmail.com>
-rw-r--r--includes/Rts.h8
-rw-r--r--rts/PrimOps.cmm7
-rw-r--r--rts/sm/Storage.c4
3 files changed, 14 insertions, 5 deletions
diff --git a/includes/Rts.h b/includes/Rts.h
index a1a83397f3..f1f8351298 100644
--- a/includes/Rts.h
+++ b/includes/Rts.h
@@ -58,7 +58,13 @@ extern "C" {
#if __GNUC__ >= 4
#define RTS_UNLIKELY(p) __builtin_expect((p),0)
#else
-#define RTS_UNLIKELY(p) p
+#define RTS_UNLIKELY(p) (p)
+#endif
+
+#if __GNUC__ >= 4
+#define RTS_LIKELY(p) __builtin_expect(!!(p), 1)
+#else
+#define RTS_LIKELY(p) (p)
#endif
/* __builtin_unreachable is supported since GNU C 4.5 */
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index bc89839aec..47b9bca04b 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -403,7 +403,7 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n;
("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
- if (arr == NULL) {
+ if (arr == NULL) (likely: False) {
jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
}
TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0);
@@ -413,8 +413,11 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
// Initialise all elements of the array with the value in R2
p = arr + SIZEOF_StgSmallMutArrPtrs;
+ // Avoid the shift for `WDS(n)` in the inner loop
+ W_ limit;
+ limit = arr + SIZEOF_StgSmallMutArrPtrs + WDS(n);
for:
- if (p < arr + SIZEOF_StgSmallMutArrPtrs + WDS(n)) (likely: True) {
+ if (p < limit) (likely: True) {
W_[p] = init;
p = p + WDS(1);
goto for;
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 71864585f6..f889e2262b 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -856,7 +856,7 @@ allocateMightFail (Capability *cap, W_ n)
bdescr *bd;
StgPtr p;
- if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
+ if (RTS_UNLIKELY(n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))) {
// The largest number of words such that
// the computation of req_blocks will not overflow.
W_ max_words = (HS_WORD_MAX & ~(BLOCK_SIZE-1)) / sizeof(W_);
@@ -897,7 +897,7 @@ allocateMightFail (Capability *cap, W_ n)
accountAllocation(cap, n);
bd = cap->r.rCurrentAlloc;
- if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
+ if (RTS_UNLIKELY(bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W)) {
if (bd) finishedNurseryBlock(cap,bd);