summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuangli Dai <gdai@fb.com>2022-09-19 17:05:55 -0700
committerQi Wang <interwq@gmail.com>2022-11-21 11:14:05 -0800
commite8f9f13811c16acb1ab8771fd2ffe4437e1b8620 (patch)
tree283717b1a17e817a1a044b6ed82f5035e1e14b23
parent06374d2a6ad525be86e4381b4bb5010fedff3268 (diff)
downloadjemalloc-e8f9f13811c16acb1ab8771fd2ffe4437e1b8620.tar.gz
Inline free and sdallocx into operator delete
-rw-r--r--include/jemalloc/internal/jemalloc_internal_externs.h3
-rw-r--r--include/jemalloc/internal/jemalloc_internal_inlines_c.h224
-rw-r--r--include/jemalloc/internal/prof_inlines.h6
-rw-r--r--src/jemalloc.c211
-rw-r--r--src/jemalloc_cpp.cpp18
-rw-r--r--test/stress/cpp/microbench.cpp7
6 files changed, 241 insertions, 228 deletions
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 63b9bd2c..d90f6ddb 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -70,7 +70,8 @@ size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
void jemalloc_prefork(void);
void jemalloc_postfork_parent(void);
void jemalloc_postfork_child(void);
-void je_sdallocx_noflags(void *ptr, size_t size);
+void sdallocx_default(void *ptr, size_t size, int flags);
+void free_default(void *ptr);
void *malloc_default(size_t size);
#endif /* JEMALLOC_INTERNAL_EXTERNS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index b0868b7d..719b8eea 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -7,6 +7,17 @@
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/thread_event.h"
#include "jemalloc/internal/witness.h"
+#include "jemalloc/internal/arena_externs.h"
+#include "jemalloc/internal/emap.h"
+
+/*
+ * These correspond to the macros in jemalloc/jemalloc_macros.h. Broadly, we
+ * should have one constant here per magic value there. Note however that the
+ * representations need not be related.
+ */
+#define TCACHE_IND_NONE ((unsigned)-1)
+#define TCACHE_IND_AUTOMATIC ((unsigned)-2)
+#define ARENA_IND_AUTOMATIC ((unsigned)-1)
/*
* Translating the names of the 'i' functions:
@@ -337,4 +348,217 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
return fallback_alloc(size);
}
+JEMALLOC_ALWAYS_INLINE tcache_t *
+tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) {
+ tcache_t *tcache;
+ if (tcache_ind == TCACHE_IND_AUTOMATIC) {
+ if (likely(!slow)) {
+ /* Getting tcache ptr unconditionally. */
+ tcache = tsd_tcachep_get(tsd);
+ assert(tcache == tcache_get(tsd));
+ } else if (is_alloc ||
+ likely(tsd_reentrancy_level_get(tsd) == 0)) {
+ tcache = tcache_get(tsd);
+ } else {
+ tcache = NULL;
+ }
+ } else {
+ /*
+ * Should not specify tcache on deallocation path when being
+ * reentrant.
+ */
+ assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0 ||
+ tsd_state_nocleanup(tsd));
+ if (tcache_ind == TCACHE_IND_NONE) {
+ tcache = NULL;
+ } else {
+ tcache = tcaches_get(tsd, tcache_ind);
+ }
+ }
+ return tcache;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
+ if (config_opt_size_checks) {
+ emap_alloc_ctx_t dbg_ctx;
+ emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
+ &dbg_ctx);
+ if (alloc_ctx->szind != dbg_ctx.szind) {
+ safety_check_fail_sized_dealloc(
+ /* current_dealloc */ true, ptr,
+ /* true_size */ sz_size2index(dbg_ctx.szind),
+ /* input_size */ sz_size2index(alloc_ctx->szind));
+ return true;
+ }
+ if (alloc_ctx->slab != dbg_ctx.slab) {
+ safety_check_fail(
+ "Internal heap corruption detected: "
+ "mismatch in slab bit");
+ return true;
+ }
+ }
+ return false;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_aligned(const void *ptr) {
+ return ((uintptr_t)ptr & PAGE_MASK) == 0;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+free_fastpath_nonfast_aligned(void *ptr, bool check_prof) {
+ /*
+ * free_fastpath do not handle two uncommon cases: 1) sampled profiled
+ * objects and 2) sampled junk & stash for use-after-free detection.
+ * Both have special alignments which are used to escape the fastpath.
+ *
+ * prof_sample is page-aligned, which covers the UAF check when both
+ * are enabled (the assertion below). Avoiding redundant checks since
+ * this is on the fastpath -- at most one runtime branch from this.
+ */
+ if (config_debug && cache_bin_nonfast_aligned(ptr)) {
+ assert(prof_sample_aligned(ptr));
+ }
+
+ if (config_prof && check_prof) {
+ /* When prof is enabled, the prof_sample alignment is enough. */
+ if (prof_sample_aligned(ptr)) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ if (config_uaf_detection) {
+ if (cache_bin_nonfast_aligned(ptr)) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ return false;
+}
+
+/* Returns whether or not the free attempt was successful. */
+JEMALLOC_ALWAYS_INLINE
+bool free_fastpath(void *ptr, size_t size, bool size_hint) {
+ tsd_t *tsd = tsd_get(false);
+ /* The branch gets optimized away unless tsd_get_allocates(). */
+ if (unlikely(tsd == NULL)) {
+ return false;
+ }
+ /*
+ * The tsd_fast() / initialized checks are folded into the branch
+ * testing (deallocated_after >= threshold) later in this function.
+ * The threshold will be set to 0 when !tsd_fast.
+ */
+ assert(tsd_fast(tsd) ||
+ *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
+
+ emap_alloc_ctx_t alloc_ctx;
+ if (!size_hint) {
+ bool err = emap_alloc_ctx_try_lookup_fast(tsd,
+ &arena_emap_global, ptr, &alloc_ctx);
+
+ /* Note: profiled objects will have alloc_ctx.slab set */
+ if (unlikely(err || !alloc_ctx.slab ||
+ free_fastpath_nonfast_aligned(ptr,
+ /* check_prof */ false))) {
+ return false;
+ }
+ assert(alloc_ctx.szind != SC_NSIZES);
+ } else {
+ /*
+ * Check for both sizes that are too large, and for sampled /
+ * special aligned objects. The alignment check will also check
+ * for null ptr.
+ */
+ if (unlikely(size > SC_LOOKUP_MAXCLASS ||
+ free_fastpath_nonfast_aligned(ptr,
+ /* check_prof */ true))) {
+ return false;
+ }
+ alloc_ctx.szind = sz_size2index_lookup(size);
+ /* Max lookup class must be small. */
+ assert(alloc_ctx.szind < SC_NBINS);
+ /* This is a dead store, except when opt size checking is on. */
+ alloc_ctx.slab = true;
+ }
+ /*
+ * Currently the fastpath only handles small sizes. The branch on
+ * SC_LOOKUP_MAXCLASS makes sure of it. This lets us avoid checking
+ * tcache szind upper limit (i.e. tcache_maxclass) as well.
+ */
+ assert(alloc_ctx.slab);
+
+ uint64_t deallocated, threshold;
+ te_free_fastpath_ctx(tsd, &deallocated, &threshold);
+
+ size_t usize = sz_index2size(alloc_ctx.szind);
+ uint64_t deallocated_after = deallocated + usize;
+ /*
+ * Check for events and tsd non-nominal (fast_threshold will be set to
+ * 0) in a single branch. Note that this handles the uninitialized case
+ * as well (TSD init will be triggered on the non-fastpath). Therefore
+ * anything depends on a functional TSD (e.g. the alloc_ctx sanity check
+ * below) needs to be after this branch.
+ */
+ if (unlikely(deallocated_after >= threshold)) {
+ return false;
+ }
+ assert(tsd_fast(tsd));
+ bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
+ if (fail) {
+ /* See the comment in isfree. */
+ return true;
+ }
+
+ tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
+ /* slow */ false, /* is_alloc */ false);
+ cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
+
+ /*
+ * If junking were enabled, this is where we would do it. It's not
+ * though, since we ensured above that we're on the fast path. Assert
+ * that to double-check.
+ */
+ assert(!opt_junk_free);
+
+ if (!cache_bin_dalloc_easy(bin, ptr)) {
+ return false;
+ }
+
+ *tsd_thread_deallocatedp_get(tsd) = deallocated_after;
+
+ return true;
+}
+
+JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
+je_sdallocx_noflags(void *ptr, size_t size) {
+ LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr,
+ size);
+
+ if (!free_fastpath(ptr, size, true)) {
+ sdallocx_default(ptr, size, 0);
+ }
+
+ LOG("core.sdallocx.exit", "");
+}
+
+JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
+je_sdallocx_impl(void *ptr, size_t size, int flags) {
+ if (flags != 0 || !free_fastpath(ptr, size, true)) {
+ sdallocx_default(ptr, size, flags);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
+je_free_impl(void *ptr) {
+ if (!free_fastpath(ptr, 0, false)) {
+ free_default(ptr);
+ }
+}
+
#endif /* JEMALLOC_INTERNAL_INLINES_C_H */
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index a8e7e7fb..7d9608b5 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -4,6 +4,7 @@
#include "jemalloc/internal/safety_check.h"
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/thread_event.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_c.h"
JEMALLOC_ALWAYS_INLINE void
prof_active_assert() {
@@ -228,11 +229,6 @@ prof_sample_align(size_t orig_align) {
}
JEMALLOC_ALWAYS_INLINE bool
-prof_sample_aligned(const void *ptr) {
- return ((uintptr_t)ptr & PAGE_MASK) == 0;
-}
-
-JEMALLOC_ALWAYS_INLINE bool
prof_sampled(tsd_t *tsd, const void *ptr) {
prof_info_t prof_info;
prof_info_get(tsd, ptr, NULL, &prof_info);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 039be40f..7407022f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2273,15 +2273,6 @@ static_opts_init(static_opts_t *static_opts) {
static_opts->usize = false;
}
-/*
- * These correspond to the macros in jemalloc/jemalloc_macros.h. Broadly, we
- * should have one constant here per magic value there. Note however that the
- * representations need not be related.
- */
-#define TCACHE_IND_NONE ((unsigned)-1)
-#define TCACHE_IND_AUTOMATIC ((unsigned)-2)
-#define ARENA_IND_AUTOMATIC ((unsigned)-1)
-
typedef struct dynamic_opts_s dynamic_opts_t;
struct dynamic_opts_s {
void **result;
@@ -2346,36 +2337,6 @@ zero_get(bool guarantee, bool slow) {
}
}
-JEMALLOC_ALWAYS_INLINE tcache_t *
-tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) {
- tcache_t *tcache;
- if (tcache_ind == TCACHE_IND_AUTOMATIC) {
- if (likely(!slow)) {
- /* Getting tcache ptr unconditionally. */
- tcache = tsd_tcachep_get(tsd);
- assert(tcache == tcache_get(tsd));
- } else if (is_alloc ||
- likely(tsd_reentrancy_level_get(tsd) == 0)) {
- tcache = tcache_get(tsd);
- } else {
- tcache = NULL;
- }
- } else {
- /*
- * Should not specify tcache on deallocation path when being
- * reentrant.
- */
- assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0 ||
- tsd_state_nocleanup(tsd));
- if (tcache_ind == TCACHE_IND_NONE) {
- tcache = NULL;
- } else {
- tcache = tcaches_get(tsd, tcache_ind);
- }
- }
- return tcache;
-}
-
/* Return true if a manual arena is specified and arena_get() OOMs. */
JEMALLOC_ALWAYS_INLINE bool
arena_get_from_ind(tsd_t *tsd, unsigned arena_ind, arena_t **arena_p) {
@@ -2915,29 +2876,6 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
thread_dalloc_event(tsd, usize);
}
-JEMALLOC_ALWAYS_INLINE bool
-maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
- if (config_opt_size_checks) {
- emap_alloc_ctx_t dbg_ctx;
- emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
- &dbg_ctx);
- if (alloc_ctx->szind != dbg_ctx.szind) {
- safety_check_fail_sized_dealloc(
- /* current_dealloc */ true, ptr,
- /* true_size */ sz_size2index(dbg_ctx.szind),
- /* input_size */ sz_size2index(alloc_ctx->szind));
- return true;
- }
- if (alloc_ctx->slab != dbg_ctx.slab) {
- safety_check_fail(
- "Internal heap corruption detected: "
- "mismatch in slab bit");
- return true;
- }
- }
- return false;
-}
-
JEMALLOC_ALWAYS_INLINE void
isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
if (!slow_path) {
@@ -3044,142 +2982,11 @@ free_default(void *ptr) {
}
}
-JEMALLOC_ALWAYS_INLINE bool
-free_fastpath_nonfast_aligned(void *ptr, bool check_prof) {
- /*
- * free_fastpath do not handle two uncommon cases: 1) sampled profiled
- * objects and 2) sampled junk & stash for use-after-free detection.
- * Both have special alignments which are used to escape the fastpath.
- *
- * prof_sample is page-aligned, which covers the UAF check when both
- * are enabled (the assertion below). Avoiding redundant checks since
- * this is on the fastpath -- at most one runtime branch from this.
- */
- if (config_debug && cache_bin_nonfast_aligned(ptr)) {
- assert(prof_sample_aligned(ptr));
- }
-
- if (config_prof && check_prof) {
- /* When prof is enabled, the prof_sample alignment is enough. */
- if (prof_sample_aligned(ptr)) {
- return true;
- } else {
- return false;
- }
- }
-
- if (config_uaf_detection) {
- if (cache_bin_nonfast_aligned(ptr)) {
- return true;
- } else {
- return false;
- }
- }
-
- return false;
-}
-
-/* Returns whether or not the free attempt was successful. */
-JEMALLOC_ALWAYS_INLINE
-bool free_fastpath(void *ptr, size_t size, bool size_hint) {
- tsd_t *tsd = tsd_get(false);
- /* The branch gets optimized away unless tsd_get_allocates(). */
- if (unlikely(tsd == NULL)) {
- return false;
- }
- /*
- * The tsd_fast() / initialized checks are folded into the branch
- * testing (deallocated_after >= threshold) later in this function.
- * The threshold will be set to 0 when !tsd_fast.
- */
- assert(tsd_fast(tsd) ||
- *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
-
- emap_alloc_ctx_t alloc_ctx;
- if (!size_hint) {
- bool err = emap_alloc_ctx_try_lookup_fast(tsd,
- &arena_emap_global, ptr, &alloc_ctx);
-
- /* Note: profiled objects will have alloc_ctx.slab set */
- if (unlikely(err || !alloc_ctx.slab ||
- free_fastpath_nonfast_aligned(ptr,
- /* check_prof */ false))) {
- return false;
- }
- assert(alloc_ctx.szind != SC_NSIZES);
- } else {
- /*
- * Check for both sizes that are too large, and for sampled /
- * special aligned objects. The alignment check will also check
- * for null ptr.
- */
- if (unlikely(size > SC_LOOKUP_MAXCLASS ||
- free_fastpath_nonfast_aligned(ptr,
- /* check_prof */ true))) {
- return false;
- }
- alloc_ctx.szind = sz_size2index_lookup(size);
- /* Max lookup class must be small. */
- assert(alloc_ctx.szind < SC_NBINS);
- /* This is a dead store, except when opt size checking is on. */
- alloc_ctx.slab = true;
- }
- /*
- * Currently the fastpath only handles small sizes. The branch on
- * SC_LOOKUP_MAXCLASS makes sure of it. This lets us avoid checking
- * tcache szind upper limit (i.e. tcache_maxclass) as well.
- */
- assert(alloc_ctx.slab);
-
- uint64_t deallocated, threshold;
- te_free_fastpath_ctx(tsd, &deallocated, &threshold);
-
- size_t usize = sz_index2size(alloc_ctx.szind);
- uint64_t deallocated_after = deallocated + usize;
- /*
- * Check for events and tsd non-nominal (fast_threshold will be set to
- * 0) in a single branch. Note that this handles the uninitialized case
- * as well (TSD init will be triggered on the non-fastpath). Therefore
- * anything depends on a functional TSD (e.g. the alloc_ctx sanity check
- * below) needs to be after this branch.
- */
- if (unlikely(deallocated_after >= threshold)) {
- return false;
- }
- assert(tsd_fast(tsd));
- bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
- if (fail) {
- /* See the comment in isfree. */
- return true;
- }
-
- tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
- /* slow */ false, /* is_alloc */ false);
- cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
-
- /*
- * If junking were enabled, this is where we would do it. It's not
- * though, since we ensured above that we're on the fast path. Assert
- * that to double-check.
- */
- assert(!opt_junk_free);
-
- if (!cache_bin_dalloc_easy(bin, ptr)) {
- return false;
- }
-
- *tsd_thread_deallocatedp_get(tsd) = deallocated_after;
-
- return true;
-}
-
JEMALLOC_EXPORT void JEMALLOC_NOTHROW
je_free(void *ptr) {
LOG("core.free.entry", "ptr: %p", ptr);
- if (!free_fastpath(ptr, 0, false)) {
- free_default(ptr);
- }
+ je_free_impl(ptr);
LOG("core.free.exit", "");
}
@@ -4000,21 +3807,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
size, flags);
- if (flags != 0 || !free_fastpath(ptr, size, true)) {
- sdallocx_default(ptr, size, flags);
- }
-
- LOG("core.sdallocx.exit", "");
-}
-
-void JEMALLOC_NOTHROW
-je_sdallocx_noflags(void *ptr, size_t size) {
- LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr,
- size);
-
- if (!free_fastpath(ptr, size, true)) {
- sdallocx_default(ptr, size, 0);
- }
+ je_sdallocx_impl(ptr, size, flags);
LOG("core.sdallocx.exit", "");
}
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 8b53a392..e39615bc 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -173,21 +173,21 @@ operator new[](std::size_t size, std::align_val_t alignment, const std::nothrow_
void
operator delete(void *ptr) noexcept {
- je_free(ptr);
+ je_free_impl(ptr);
}
void
operator delete[](void *ptr) noexcept {
- je_free(ptr);
+ je_free_impl(ptr);
}
void
operator delete(void *ptr, const std::nothrow_t &) noexcept {
- je_free(ptr);
+ je_free_impl(ptr);
}
void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
- je_free(ptr);
+ je_free_impl(ptr);
}
#if __cpp_sized_deallocation >= 201309
@@ -224,27 +224,27 @@ alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment)
if (unlikely(ptr == nullptr)) {
return;
}
- je_sdallocx(ptr, size, MALLOCX_ALIGN(alignment));
+ je_sdallocx_impl(ptr, size, MALLOCX_ALIGN(alignment));
}
void
operator delete(void* ptr, std::align_val_t) noexcept {
- je_free(ptr);
+ je_free_impl(ptr);
}
void
operator delete[](void* ptr, std::align_val_t) noexcept {
- je_free(ptr);
+ je_free_impl(ptr);
}
void
operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
- je_free(ptr);
+ je_free_impl(ptr);
}
void
operator delete[](void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
- je_free(ptr);
+ je_free_impl(ptr);
}
void
diff --git a/test/stress/cpp/microbench.cpp b/test/stress/cpp/microbench.cpp
index 65f41dea..3d23403b 100644
--- a/test/stress/cpp/microbench.cpp
+++ b/test/stress/cpp/microbench.cpp
@@ -4,7 +4,7 @@
static void
malloc_free(void) {
void *p = malloc(1);
- expect_ptr_not_null(p, "Unexpected new failure");
+ expect_ptr_not_null(p, "Unexpected malloc failure");
free(p);
}
@@ -18,7 +18,7 @@ new_delete(void) {
static void
malloc_free_array(void) {
void *p = malloc(sizeof(int)*8);
- expect_ptr_not_null(p, "Unexpected new[] failure");
+ expect_ptr_not_null(p, "Unexpected malloc failure");
free(p);
}
@@ -40,7 +40,7 @@ new_sized_delete(void) {
static void
malloc_sdallocx(void) {
void *p = malloc(1);
- expect_ptr_not_null(p, "Unexpected new failure");
+ expect_ptr_not_null(p, "Unexpected malloc failure");
sdallocx(p, 1, 0);
}
#endif
@@ -79,5 +79,4 @@ main() {
test_free_vs_delete,
test_free_array_vs_delete_array,
test_sized_delete_vs_sdallocx);
-
}