From e8f9f13811c16acb1ab8771fd2ffe4437e1b8620 Mon Sep 17 00:00:00 2001 From: Guangli Dai Date: Mon, 19 Sep 2022 17:05:55 -0700 Subject: Inline free and sdallocx into operator delete --- .../jemalloc/internal/jemalloc_internal_externs.h | 3 +- .../internal/jemalloc_internal_inlines_c.h | 224 +++++++++++++++++++++ include/jemalloc/internal/prof_inlines.h | 6 +- src/jemalloc.c | 211 +------------------ src/jemalloc_cpp.cpp | 18 +- test/stress/cpp/microbench.cpp | 7 +- 6 files changed, 241 insertions(+), 228 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h index 63b9bd2c..d90f6ddb 100644 --- a/include/jemalloc/internal/jemalloc_internal_externs.h +++ b/include/jemalloc/internal/jemalloc_internal_externs.h @@ -70,7 +70,8 @@ size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags); void jemalloc_prefork(void); void jemalloc_postfork_parent(void); void jemalloc_postfork_child(void); -void je_sdallocx_noflags(void *ptr, size_t size); +void sdallocx_default(void *ptr, size_t size, int flags); +void free_default(void *ptr); void *malloc_default(size_t size); #endif /* JEMALLOC_INTERNAL_EXTERNS_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h index b0868b7d..719b8eea 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -7,6 +7,17 @@ #include "jemalloc/internal/sz.h" #include "jemalloc/internal/thread_event.h" #include "jemalloc/internal/witness.h" +#include "jemalloc/internal/arena_externs.h" +#include "jemalloc/internal/emap.h" + +/* + * These correspond to the macros in jemalloc/jemalloc_macros.h. Broadly, we + * should have one constant here per magic value there. Note however that the + * representations need not be related. + */ +#define TCACHE_IND_NONE ((unsigned)-1) +#define TCACHE_IND_AUTOMATIC ((unsigned)-2) +#define ARENA_IND_AUTOMATIC ((unsigned)-1) /* * Translating the names of the 'i' functions: @@ -337,4 +348,217 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) { return fallback_alloc(size); } +JEMALLOC_ALWAYS_INLINE tcache_t * +tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) { + tcache_t *tcache; + if (tcache_ind == TCACHE_IND_AUTOMATIC) { + if (likely(!slow)) { + /* Getting tcache ptr unconditionally. */ + tcache = tsd_tcachep_get(tsd); + assert(tcache == tcache_get(tsd)); + } else if (is_alloc || + likely(tsd_reentrancy_level_get(tsd) == 0)) { + tcache = tcache_get(tsd); + } else { + tcache = NULL; + } + } else { + /* + * Should not specify tcache on deallocation path when being + * reentrant. + */ + assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0 || + tsd_state_nocleanup(tsd)); + if (tcache_ind == TCACHE_IND_NONE) { + tcache = NULL; + } else { + tcache = tcaches_get(tsd, tcache_ind); + } + } + return tcache; +} + +JEMALLOC_ALWAYS_INLINE bool +maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) { + if (config_opt_size_checks) { + emap_alloc_ctx_t dbg_ctx; + emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr, + &dbg_ctx); + if (alloc_ctx->szind != dbg_ctx.szind) { + safety_check_fail_sized_dealloc( + /* current_dealloc */ true, ptr, + /* true_size */ sz_size2index(dbg_ctx.szind), + /* input_size */ sz_size2index(alloc_ctx->szind)); + return true; + } + if (alloc_ctx->slab != dbg_ctx.slab) { + safety_check_fail( + "Internal heap corruption detected: " + "mismatch in slab bit"); + return true; + } + } + return false; +} + +JEMALLOC_ALWAYS_INLINE bool +prof_sample_aligned(const void *ptr) { + return ((uintptr_t)ptr & PAGE_MASK) == 0; +} + +JEMALLOC_ALWAYS_INLINE bool +free_fastpath_nonfast_aligned(void *ptr, bool check_prof) { + /* + * free_fastpath do not handle two uncommon cases: 1) sampled profiled + * objects and 2) sampled junk & stash for use-after-free detection. + * Both have special alignments which are used to escape the fastpath. + * + * prof_sample is page-aligned, which covers the UAF check when both + * are enabled (the assertion below). Avoiding redundant checks since + * this is on the fastpath -- at most one runtime branch from this. + */ + if (config_debug && cache_bin_nonfast_aligned(ptr)) { + assert(prof_sample_aligned(ptr)); + } + + if (config_prof && check_prof) { + /* When prof is enabled, the prof_sample alignment is enough. */ + if (prof_sample_aligned(ptr)) { + return true; + } else { + return false; + } + } + + if (config_uaf_detection) { + if (cache_bin_nonfast_aligned(ptr)) { + return true; + } else { + return false; + } + } + + return false; +} + +/* Returns whether or not the free attempt was successful. */ +JEMALLOC_ALWAYS_INLINE +bool free_fastpath(void *ptr, size_t size, bool size_hint) { + tsd_t *tsd = tsd_get(false); + /* The branch gets optimized away unless tsd_get_allocates(). */ + if (unlikely(tsd == NULL)) { + return false; + } + /* + * The tsd_fast() / initialized checks are folded into the branch + * testing (deallocated_after >= threshold) later in this function. + * The threshold will be set to 0 when !tsd_fast. + */ + assert(tsd_fast(tsd) || + *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0); + + emap_alloc_ctx_t alloc_ctx; + if (!size_hint) { + bool err = emap_alloc_ctx_try_lookup_fast(tsd, + &arena_emap_global, ptr, &alloc_ctx); + + /* Note: profiled objects will have alloc_ctx.slab set */ + if (unlikely(err || !alloc_ctx.slab || + free_fastpath_nonfast_aligned(ptr, + /* check_prof */ false))) { + return false; + } + assert(alloc_ctx.szind != SC_NSIZES); + } else { + /* + * Check for both sizes that are too large, and for sampled / + * special aligned objects. The alignment check will also check + * for null ptr. + */ + if (unlikely(size > SC_LOOKUP_MAXCLASS || + free_fastpath_nonfast_aligned(ptr, + /* check_prof */ true))) { + return false; + } + alloc_ctx.szind = sz_size2index_lookup(size); + /* Max lookup class must be small. */ + assert(alloc_ctx.szind < SC_NBINS); + /* This is a dead store, except when opt size checking is on. */ + alloc_ctx.slab = true; + } + /* + * Currently the fastpath only handles small sizes. The branch on + * SC_LOOKUP_MAXCLASS makes sure of it. This lets us avoid checking + * tcache szind upper limit (i.e. tcache_maxclass) as well. + */ + assert(alloc_ctx.slab); + + uint64_t deallocated, threshold; + te_free_fastpath_ctx(tsd, &deallocated, &threshold); + + size_t usize = sz_index2size(alloc_ctx.szind); + uint64_t deallocated_after = deallocated + usize; + /* + * Check for events and tsd non-nominal (fast_threshold will be set to + * 0) in a single branch. Note that this handles the uninitialized case + * as well (TSD init will be triggered on the non-fastpath). Therefore + * anything depends on a functional TSD (e.g. the alloc_ctx sanity check + * below) needs to be after this branch. + */ + if (unlikely(deallocated_after >= threshold)) { + return false; + } + assert(tsd_fast(tsd)); + bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx); + if (fail) { + /* See the comment in isfree. */ + return true; + } + + tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC, + /* slow */ false, /* is_alloc */ false); + cache_bin_t *bin = &tcache->bins[alloc_ctx.szind]; + + /* + * If junking were enabled, this is where we would do it. It's not + * though, since we ensured above that we're on the fast path. Assert + * that to double-check. + */ + assert(!opt_junk_free); + + if (!cache_bin_dalloc_easy(bin, ptr)) { + return false; + } + + *tsd_thread_deallocatedp_get(tsd) = deallocated_after; + + return true; +} + +JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW +je_sdallocx_noflags(void *ptr, size_t size) { + LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr, + size); + + if (!free_fastpath(ptr, size, true)) { + sdallocx_default(ptr, size, 0); + } + + LOG("core.sdallocx.exit", ""); +} + +JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW +je_sdallocx_impl(void *ptr, size_t size, int flags) { + if (flags != 0 || !free_fastpath(ptr, size, true)) { + sdallocx_default(ptr, size, flags); + } +} + +JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW +je_free_impl(void *ptr) { + if (!free_fastpath(ptr, 0, false)) { + free_default(ptr); + } +} + #endif /* JEMALLOC_INTERNAL_INLINES_C_H */ diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h index a8e7e7fb..7d9608b5 100644 --- a/include/jemalloc/internal/prof_inlines.h +++ b/include/jemalloc/internal/prof_inlines.h @@ -4,6 +4,7 @@ #include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/thread_event.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" JEMALLOC_ALWAYS_INLINE void prof_active_assert() { @@ -227,11 +228,6 @@ prof_sample_align(size_t orig_align) { orig_align; } -JEMALLOC_ALWAYS_INLINE bool -prof_sample_aligned(const void *ptr) { - return ((uintptr_t)ptr & PAGE_MASK) == 0; -} - JEMALLOC_ALWAYS_INLINE bool prof_sampled(tsd_t *tsd, const void *ptr) { prof_info_t prof_info; diff --git a/src/jemalloc.c b/src/jemalloc.c index 039be40f..7407022f 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -2273,15 +2273,6 @@ static_opts_init(static_opts_t *static_opts) { static_opts->usize = false; } -/* - * These correspond to the macros in jemalloc/jemalloc_macros.h. Broadly, we - * should have one constant here per magic value there. Note however that the - * representations need not be related. - */ -#define TCACHE_IND_NONE ((unsigned)-1) -#define TCACHE_IND_AUTOMATIC ((unsigned)-2) -#define ARENA_IND_AUTOMATIC ((unsigned)-1) - typedef struct dynamic_opts_s dynamic_opts_t; struct dynamic_opts_s { void **result; @@ -2346,36 +2337,6 @@ zero_get(bool guarantee, bool slow) { } } -JEMALLOC_ALWAYS_INLINE tcache_t * -tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) { - tcache_t *tcache; - if (tcache_ind == TCACHE_IND_AUTOMATIC) { - if (likely(!slow)) { - /* Getting tcache ptr unconditionally. */ - tcache = tsd_tcachep_get(tsd); - assert(tcache == tcache_get(tsd)); - } else if (is_alloc || - likely(tsd_reentrancy_level_get(tsd) == 0)) { - tcache = tcache_get(tsd); - } else { - tcache = NULL; - } - } else { - /* - * Should not specify tcache on deallocation path when being - * reentrant. - */ - assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0 || - tsd_state_nocleanup(tsd)); - if (tcache_ind == TCACHE_IND_NONE) { - tcache = NULL; - } else { - tcache = tcaches_get(tsd, tcache_ind); - } - } - return tcache; -} - /* Return true if a manual arena is specified and arena_get() OOMs. */ JEMALLOC_ALWAYS_INLINE bool arena_get_from_ind(tsd_t *tsd, unsigned arena_ind, arena_t **arena_p) { @@ -2915,29 +2876,6 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { thread_dalloc_event(tsd, usize); } -JEMALLOC_ALWAYS_INLINE bool -maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) { - if (config_opt_size_checks) { - emap_alloc_ctx_t dbg_ctx; - emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr, - &dbg_ctx); - if (alloc_ctx->szind != dbg_ctx.szind) { - safety_check_fail_sized_dealloc( - /* current_dealloc */ true, ptr, - /* true_size */ sz_size2index(dbg_ctx.szind), - /* input_size */ sz_size2index(alloc_ctx->szind)); - return true; - } - if (alloc_ctx->slab != dbg_ctx.slab) { - safety_check_fail( - "Internal heap corruption detected: " - "mismatch in slab bit"); - return true; - } - } - return false; -} - JEMALLOC_ALWAYS_INLINE void isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) { if (!slow_path) { @@ -3044,142 +2982,11 @@ free_default(void *ptr) { } } -JEMALLOC_ALWAYS_INLINE bool -free_fastpath_nonfast_aligned(void *ptr, bool check_prof) { - /* - * free_fastpath do not handle two uncommon cases: 1) sampled profiled - * objects and 2) sampled junk & stash for use-after-free detection. - * Both have special alignments which are used to escape the fastpath. - * - * prof_sample is page-aligned, which covers the UAF check when both - * are enabled (the assertion below). Avoiding redundant checks since - * this is on the fastpath -- at most one runtime branch from this. - */ - if (config_debug && cache_bin_nonfast_aligned(ptr)) { - assert(prof_sample_aligned(ptr)); - } - - if (config_prof && check_prof) { - /* When prof is enabled, the prof_sample alignment is enough. */ - if (prof_sample_aligned(ptr)) { - return true; - } else { - return false; - } - } - - if (config_uaf_detection) { - if (cache_bin_nonfast_aligned(ptr)) { - return true; - } else { - return false; - } - } - - return false; -} - -/* Returns whether or not the free attempt was successful. */ -JEMALLOC_ALWAYS_INLINE -bool free_fastpath(void *ptr, size_t size, bool size_hint) { - tsd_t *tsd = tsd_get(false); - /* The branch gets optimized away unless tsd_get_allocates(). */ - if (unlikely(tsd == NULL)) { - return false; - } - /* - * The tsd_fast() / initialized checks are folded into the branch - * testing (deallocated_after >= threshold) later in this function. - * The threshold will be set to 0 when !tsd_fast. - */ - assert(tsd_fast(tsd) || - *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0); - - emap_alloc_ctx_t alloc_ctx; - if (!size_hint) { - bool err = emap_alloc_ctx_try_lookup_fast(tsd, - &arena_emap_global, ptr, &alloc_ctx); - - /* Note: profiled objects will have alloc_ctx.slab set */ - if (unlikely(err || !alloc_ctx.slab || - free_fastpath_nonfast_aligned(ptr, - /* check_prof */ false))) { - return false; - } - assert(alloc_ctx.szind != SC_NSIZES); - } else { - /* - * Check for both sizes that are too large, and for sampled / - * special aligned objects. The alignment check will also check - * for null ptr. - */ - if (unlikely(size > SC_LOOKUP_MAXCLASS || - free_fastpath_nonfast_aligned(ptr, - /* check_prof */ true))) { - return false; - } - alloc_ctx.szind = sz_size2index_lookup(size); - /* Max lookup class must be small. */ - assert(alloc_ctx.szind < SC_NBINS); - /* This is a dead store, except when opt size checking is on. */ - alloc_ctx.slab = true; - } - /* - * Currently the fastpath only handles small sizes. The branch on - * SC_LOOKUP_MAXCLASS makes sure of it. This lets us avoid checking - * tcache szind upper limit (i.e. tcache_maxclass) as well. - */ - assert(alloc_ctx.slab); - - uint64_t deallocated, threshold; - te_free_fastpath_ctx(tsd, &deallocated, &threshold); - - size_t usize = sz_index2size(alloc_ctx.szind); - uint64_t deallocated_after = deallocated + usize; - /* - * Check for events and tsd non-nominal (fast_threshold will be set to - * 0) in a single branch. Note that this handles the uninitialized case - * as well (TSD init will be triggered on the non-fastpath). Therefore - * anything depends on a functional TSD (e.g. the alloc_ctx sanity check - * below) needs to be after this branch. - */ - if (unlikely(deallocated_after >= threshold)) { - return false; - } - assert(tsd_fast(tsd)); - bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx); - if (fail) { - /* See the comment in isfree. */ - return true; - } - - tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC, - /* slow */ false, /* is_alloc */ false); - cache_bin_t *bin = &tcache->bins[alloc_ctx.szind]; - - /* - * If junking were enabled, this is where we would do it. It's not - * though, since we ensured above that we're on the fast path. Assert - * that to double-check. - */ - assert(!opt_junk_free); - - if (!cache_bin_dalloc_easy(bin, ptr)) { - return false; - } - - *tsd_thread_deallocatedp_get(tsd) = deallocated_after; - - return true; -} - JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_free(void *ptr) { LOG("core.free.entry", "ptr: %p", ptr); - if (!free_fastpath(ptr, 0, false)) { - free_default(ptr); - } + je_free_impl(ptr); LOG("core.free.exit", ""); } @@ -4000,21 +3807,7 @@ je_sdallocx(void *ptr, size_t size, int flags) { LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr, size, flags); - if (flags != 0 || !free_fastpath(ptr, size, true)) { - sdallocx_default(ptr, size, flags); - } - - LOG("core.sdallocx.exit", ""); -} - -void JEMALLOC_NOTHROW -je_sdallocx_noflags(void *ptr, size_t size) { - LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr, - size); - - if (!free_fastpath(ptr, size, true)) { - sdallocx_default(ptr, size, 0); - } + je_sdallocx_impl(ptr, size, flags); LOG("core.sdallocx.exit", ""); } diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp index 8b53a392..e39615bc 100644 --- a/src/jemalloc_cpp.cpp +++ b/src/jemalloc_cpp.cpp @@ -173,21 +173,21 @@ operator new[](std::size_t size, std::align_val_t alignment, const std::nothrow_ void operator delete(void *ptr) noexcept { - je_free(ptr); + je_free_impl(ptr); } void operator delete[](void *ptr) noexcept { - je_free(ptr); + je_free_impl(ptr); } void operator delete(void *ptr, const std::nothrow_t &) noexcept { - je_free(ptr); + je_free_impl(ptr); } void operator delete[](void *ptr, const std::nothrow_t &) noexcept { - je_free(ptr); + je_free_impl(ptr); } #if __cpp_sized_deallocation >= 201309 @@ -224,27 +224,27 @@ alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment) if (unlikely(ptr == nullptr)) { return; } - je_sdallocx(ptr, size, MALLOCX_ALIGN(alignment)); + je_sdallocx_impl(ptr, size, MALLOCX_ALIGN(alignment)); } void operator delete(void* ptr, std::align_val_t) noexcept { - je_free(ptr); + je_free_impl(ptr); } void operator delete[](void* ptr, std::align_val_t) noexcept { - je_free(ptr); + je_free_impl(ptr); } void operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept { - je_free(ptr); + je_free_impl(ptr); } void operator delete[](void* ptr, std::align_val_t, const std::nothrow_t&) noexcept { - je_free(ptr); + je_free_impl(ptr); } void diff --git a/test/stress/cpp/microbench.cpp b/test/stress/cpp/microbench.cpp index 65f41dea..3d23403b 100644 --- a/test/stress/cpp/microbench.cpp +++ b/test/stress/cpp/microbench.cpp @@ -4,7 +4,7 @@ static void malloc_free(void) { void *p = malloc(1); - expect_ptr_not_null(p, "Unexpected new failure"); + expect_ptr_not_null(p, "Unexpected malloc failure"); free(p); } @@ -18,7 +18,7 @@ new_delete(void) { static void malloc_free_array(void) { void *p = malloc(sizeof(int)*8); - expect_ptr_not_null(p, "Unexpected new[] failure"); + expect_ptr_not_null(p, "Unexpected malloc failure"); free(p); } @@ -40,7 +40,7 @@ new_sized_delete(void) { static void malloc_sdallocx(void) { void *p = malloc(1); - expect_ptr_not_null(p, "Unexpected new failure"); + expect_ptr_not_null(p, "Unexpected malloc failure"); sdallocx(p, 1, 0); } #endif @@ -79,5 +79,4 @@ main() { test_free_vs_delete, test_free_array_vs_delete_array, test_sized_delete_vs_sdallocx); - } -- cgit v1.2.1