diff options
author | Michal Terepeta <michal.terepeta@gmail.com> | 2019-03-24 13:58:53 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2019-04-01 20:07:49 -0400 |
commit | 7cf5ba3dba88356571197b470556e888581212d7 (patch) | |
tree | 6e89f905fe3f630ffa6684c661c374be2bfd5ff6 | |
parent | 39282422afe58a0855c2fe5315163236c116c2f4 (diff) | |
download | haskell-7cf5ba3dba88356571197b470556e888581212d7.tar.gz |
Improve performance of newSmallArray#
This:
- Hoists part of the condition outside of the initialization loop in
`stg_newSmallArrayzh`.
- Annotates one of the unlikely branches as unlikely, also in
`stg_newSmallArrayzh`.
- Adds a couple of annotations to `allocateMightFail` indicating which
branches are likely to be taken.
Together this gives about 5% improvement.
Signed-off-by: Michal Terepeta <michal.terepeta@gmail.com>
-rw-r--r-- | includes/Rts.h | 8 | ||||
-rw-r--r-- | rts/PrimOps.cmm | 7 | ||||
-rw-r--r-- | rts/sm/Storage.c | 4 |
3 files changed, 14 insertions, 5 deletions
diff --git a/includes/Rts.h b/includes/Rts.h index a1a83397f3..f1f8351298 100644 --- a/includes/Rts.h +++ b/includes/Rts.h @@ -58,7 +58,13 @@ extern "C" { #if __GNUC__ >= 4 #define RTS_UNLIKELY(p) __builtin_expect((p),0) #else -#define RTS_UNLIKELY(p) p +#define RTS_UNLIKELY(p) (p) +#endif + +#if __GNUC__ >= 4 +#define RTS_LIKELY(p) __builtin_expect(!!(p), 1) +#else +#define RTS_LIKELY(p) (p) #endif /* __builtin_unreachable is supported since GNU C 4.5 */ diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index bc89839aec..47b9bca04b 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -403,7 +403,7 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init ) words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n; ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words); - if (arr == NULL) { + if (arr == NULL) (likely: False) { jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure); } TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0); @@ -413,8 +413,11 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init ) // Initialise all elements of the array with the value in R2 p = arr + SIZEOF_StgSmallMutArrPtrs; + // Avoid the shift for `WDS(n)` in the inner loop + W_ limit; + limit = arr + SIZEOF_StgSmallMutArrPtrs + WDS(n); for: - if (p < arr + SIZEOF_StgSmallMutArrPtrs + WDS(n)) (likely: True) { + if (p < limit) (likely: True) { W_[p] = init; p = p + WDS(1); goto for; diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 71864585f6..f889e2262b 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -856,7 +856,7 @@ allocateMightFail (Capability *cap, W_ n) bdescr *bd; StgPtr p; - if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { + if (RTS_UNLIKELY(n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))) { // The largest number of words such that // the computation of req_blocks will not overflow. W_ max_words = (HS_WORD_MAX & ~(BLOCK_SIZE-1)) / sizeof(W_); @@ -897,7 +897,7 @@ allocateMightFail (Capability *cap, W_ n) accountAllocation(cap, n); bd = cap->r.rCurrentAlloc; - if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + if (RTS_UNLIKELY(bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W)) { if (bd) finishedNurseryBlock(cap,bd); |