summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/jemalloc/internal/edata.h35
1 files changed, 19 insertions, 16 deletions
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index e77a55e6..d2d16c46 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -664,13 +664,20 @@ edata_cmp_summary_get(const edata_t *edata) {
static inline int
edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
- int ret;
- ret = (a.sn > b.sn) - (a.sn < b.sn);
- if (ret != 0) {
- return ret;
- }
- ret = (a.addr > b.addr) - (a.addr < b.addr);
- return ret;
+ /*
+ * Logically, what we're doing here is comparing based on `.sn`, and
+ * falling back to comparing on `.addr` in the case that `a.sn == b.sn`.
+ * We accomplish this by multiplying the result of the `.sn` comparison
+ * by 2, so that so long as it is not 0, it will dominate the `.addr`
+ * comparison in determining the sign of the returned result value.
+ * The justification for doing things this way is that this is
+ * branchless - all of the branches that would be present in a
+ * straightforward implementation are common cases, and thus the branch
+ * prediction accuracy is not great. As a result, this implementation
+ * is measurably faster (by around 30%).
+ */
+ return (2 * ((a.sn > b.sn) - (a.sn < b.sn))) +
+ ((a.addr > b.addr) - (a.addr < b.addr));
}
static inline int
@@ -683,15 +690,11 @@ edata_snad_comp(const edata_t *a, const edata_t *b) {
static inline int
edata_esnead_comp(const edata_t *a, const edata_t *b) {
- int ret;
-
- ret = edata_esn_comp(a, b);
- if (ret != 0) {
- return ret;
- }
-
- ret = edata_ead_comp(a, b);
- return ret;
+ /*
+ * Similar to `edata_cmp_summary_comp`, we've opted for a
+ * branchless implementation for the sake of performance.
+ */
+ return (2 * edata_esn_comp(a, b)) + edata_ead_comp(a, b);
}
ph_proto(, edata_avail, edata_t)