summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAliaksey Kandratsenka <alkondratenko@gmail.com>2017-04-02 19:27:00 -0700
committerAliaksey Kandratsenka <alkondratenko@gmail.com>2017-05-14 19:04:56 -0700
commitb1d88662cb4a38ee47aa48076745898033526f9d (patch)
tree2ab6cf652c29b3b23982d67791eb7b6a411e3d90
parent991f47a159f0e169883f06686f13c31688fa2bf0 (diff)
downloadgperftools-b1d88662cb4a38ee47aa48076745898033526f9d.tar.gz
change size class to be represented by 32 bit int
This moves code closer to Google-internal version and provides for slightly tighter code encoding on amd64.
-rw-r--r--src/common.h38
-rw-r--r--src/packed-cache-inl.h2
-rw-r--r--src/page_heap.cc2
-rw-r--r--src/page_heap.h10
-rw-r--r--src/tcmalloc.cc30
-rw-r--r--src/tests/packed-cache_test.cc4
-rw-r--r--src/thread_cache.cc10
-rw-r--r--src/thread_cache.h18
8 files changed, 64 insertions, 50 deletions
diff --git a/src/common.h b/src/common.h
index 14d7c29..a5801b7 100644
--- a/src/common.h
+++ b/src/common.h
@@ -196,6 +196,21 @@ class SizeMap {
return (static_cast<uint32_t>(s) + 127 + (120 << 7)) >> 7;
}
+ // If size is no more than kMaxSize, compute index of the
+ // class_array[] entry for it, putting the class index in output
+ // parameter idx and returning true. Otherwise return false.
+ static inline bool ATTRIBUTE_ALWAYS_INLINE ClassIndexMaybe(size_t s,
+ uint32* idx) {
+ if (PREDICT_TRUE(s <= kMaxSmallSize)) {
+ *idx = (static_cast<uint32>(s) + 7) >> 3;
+ return true;
+ } else if (s <= kMaxSize) {
+ *idx = (static_cast<uint32>(s) + 127 + (120 << 7)) >> 7;
+ return true;
+ }
+ return false;
+ }
+
// Compute index of the class_array[] entry for a given size
static inline size_t ClassIndex(size_t s) {
// Use unsigned arithmetic to avoid unnecessary sign extensions.
@@ -237,31 +252,30 @@ class SizeMap {
return class_array_[ClassIndex(size)];
}
- inline bool MaybeSizeClass(size_t size, size_t *size_class) {
- size_t class_idx;
- if (PREDICT_TRUE(size <= kMaxSmallSize)) {
- class_idx = SmallSizeClass(size);
- } else if (size <= kMaxSize) {
- class_idx = LargeSizeClass(size);
- } else {
+ // Check if size is small enough to be representable by a size
+ // class, and if it is, put matching size class into *cl. Returns
+ // true iff matching size class was found.
+ inline bool ATTRIBUTE_ALWAYS_INLINE GetSizeClass(size_t size, uint32* cl) {
+ uint32 idx;
+ if (!ClassIndexMaybe(size, &idx)) {
return false;
}
- *size_class = class_array_[class_idx];
+ *cl = class_array_[idx];
return true;
}
// Get the byte-size for a specified class
- inline int32 ByteSizeForClass(size_t cl) {
+ inline int32 ATTRIBUTE_ALWAYS_INLINE ByteSizeForClass(uint32 cl) {
return class_to_size_[cl];
}
// Mapping from size class to max size storable in that class
- inline int32 class_to_size(size_t cl) {
+ inline int32 class_to_size(uint32 cl) {
return class_to_size_[cl];
}
// Mapping from size class to number of pages to allocate at a time
- inline size_t class_to_pages(size_t cl) {
+ inline size_t class_to_pages(uint32 cl) {
return class_to_pages_[cl];
}
@@ -270,7 +284,7 @@ class SizeMap {
// amortize the lock overhead for accessing the central list. Making
// it too big may temporarily cause unnecessary memory wastage in the
// per-thread free list until the scavenger cleans up the list.
- inline int num_objects_to_move(size_t cl) {
+ inline int num_objects_to_move(uint32 cl) {
return num_objects_to_move_[cl];
}
};
diff --git a/src/packed-cache-inl.h b/src/packed-cache-inl.h
index 03a871c..7c216e5 100644
--- a/src/packed-cache-inl.h
+++ b/src/packed-cache-inl.h
@@ -138,7 +138,7 @@ class PackedCache {
public:
typedef uintptr_t T;
typedef uintptr_t K;
- typedef size_t V;
+ typedef uint32 V;
#ifdef TCMALLOC_SMALL_BUT_SLOW
// Decrease the size map cache if running in the small memory mode.
static const int kHashbits = 12;
diff --git a/src/page_heap.cc b/src/page_heap.cc
index 50b2752..b92d9ed 100644
--- a/src/page_heap.cc
+++ b/src/page_heap.cc
@@ -513,7 +513,7 @@ bool PageHeap::EnsureLimit(Length n, bool withRelease)
return takenPages + n <= limit;
}
-void PageHeap::RegisterSizeClass(Span* span, size_t sc) {
+void PageHeap::RegisterSizeClass(Span* span, uint32 sc) {
// Associate span object with all interior pages as well
ASSERT(span->location == Span::IN_USE);
ASSERT(GetDescriptor(span->start) == span);
diff --git a/src/page_heap.h b/src/page_heap.h
index 193bc97..eeb7cd6 100644
--- a/src/page_heap.h
+++ b/src/page_heap.h
@@ -129,7 +129,7 @@ class PERFTOOLS_DLL_DECL PageHeap {
// specified size-class.
// REQUIRES: span was returned by an earlier call to New()
// and has not yet been deleted.
- void RegisterSizeClass(Span* span, size_t sc);
+ void RegisterSizeClass(Span* span, uint32 sc);
// Split an allocated span into two spans: one of length "n" pages
// followed by another span of length "span->length - n" pages.
@@ -194,16 +194,16 @@ class PERFTOOLS_DLL_DECL PageHeap {
Length ReleaseAtLeastNPages(Length num_pages);
// Reads and writes to pagemap_cache_ do not require locking.
- bool TryGetSizeClass(PageID p, size_t* out) const {
+ bool TryGetSizeClass(PageID p, uint32* out) const {
return pagemap_cache_.TryGet(p, out);
}
- void SetCachedSizeClass(PageID p, size_t cl) {
+ void SetCachedSizeClass(PageID p, uint32 cl) {
ASSERT(cl != 0);
pagemap_cache_.Put(p, cl);
}
void InvalidateCachedSizeClass(PageID p) { pagemap_cache_.Invalidate(p); }
- size_t GetSizeClassOrZero(PageID p) const {
- size_t cached_value;
+ uint32 GetSizeClassOrZero(PageID p) const {
+ uint32 cached_value;
if (!TryGetSizeClass(p, &cached_value)) {
cached_value = 0;
}
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index 21b1e5d..9d718f1 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -421,7 +421,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
out->printf("transfer cache, and central cache, by size class\n");
out->printf("------------------------------------------------\n");
uint64_t cumulative = 0;
- for (int cl = 0; cl < Static::num_size_classes(); ++cl) {
+ for (uint32 cl = 0; cl < Static::num_size_classes(); ++cl) {
if (class_count[cl] > 0) {
size_t cl_size = Static::sizemap()->ByteSizeForClass(cl);
uint64_t class_bytes = class_count[cl] * cl_size;
@@ -810,7 +810,7 @@ class TCMallocImplementation : public MallocExtension {
if ((p >> (kAddressBits - kPageShift)) > 0) {
return kNotOwned;
}
- size_t cl;
+ uint32 cl;
if (Static::pageheap()->TryGetSizeClass(p, &cl)) {
return kOwned;
}
@@ -915,8 +915,8 @@ static uint32_t size_class_with_alignment(size_t size, size_t align) {
if (align >= kPageSize) {
return 0;
}
- size_t cl;
- if (!Static::sizemap()->MaybeSizeClass(size, &cl)) {
+ uint32 cl;
+ if (!Static::sizemap()->GetSizeClass(size, &cl)) {
return 0;
}
// Search through acceptable size classes looking for one with
@@ -942,7 +942,7 @@ static ATTRIBUTE_NOINLINE size_t nallocx_slow(size_t size, int flags) {
if (PREDICT_FALSE(!Static::IsInited())) ThreadCache::InitModule();
size_t align = static_cast<size_t>(1ull << (flags & 0x3f));
- size_t cl = size_class_with_alignment(size, align);
+ uint32 cl = size_class_with_alignment(size, align);
if (cl) {
return Static::sizemap()->ByteSizeForClass(cl);
} else {
@@ -960,9 +960,9 @@ size_t tc_nallocx(size_t size, int flags) {
if (PREDICT_FALSE(flags != 0)) {
return nallocx_slow(size, flags);
}
- size_t cl;
+ uint32 cl;
// size class 0 is only possible if malloc is not yet initialized
- if (Static::sizemap()->MaybeSizeClass(size, &cl) && cl != 0) {
+ if (Static::sizemap()->GetSizeClass(size, &cl) && cl != 0) {
return Static::sizemap()->ByteSizeForClass(cl);
} else {
return nallocx_slow(size, 0);
@@ -1043,7 +1043,7 @@ static TCMallocGuard module_enter_exit_hook;
static inline bool CheckCachedSizeClass(void *ptr) {
PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
- size_t cached_value;
+ uint32 cached_value;
if (!Static::pageheap()->TryGetSizeClass(p, &cached_value)) {
return true;
}
@@ -1270,12 +1270,12 @@ ATTRIBUTE_ALWAYS_INLINE inline void* do_malloc(size_t size) {
// note: it will force initialization of malloc if necessary
ThreadCache* cache = ThreadCache::GetCache();
- size_t cl;
+ uint32 cl;
ASSERT(Static::IsInited());
ASSERT(cache != NULL);
- if (PREDICT_FALSE(!Static::sizemap()->MaybeSizeClass(size, &cl))) {
+ if (PREDICT_FALSE(!Static::sizemap()->GetSizeClass(size, &cl))) {
return do_malloc_pages(cache, size);
}
@@ -1345,7 +1345,7 @@ void do_free_with_callback(void* ptr,
ThreadCache* heap = ThreadCache::GetCacheIfPresent();
const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
- size_t cl;
+ uint32 cl;
#ifndef NO_TCMALLOC_SAMPLES
// we only pass size hint when ptr is not page aligned. Which
@@ -1353,7 +1353,7 @@ void do_free_with_callback(void* ptr,
ASSERT(!use_hint || size_hint < kPageSize);
#endif
- if (!use_hint || PREDICT_FALSE(!Static::sizemap()->MaybeSizeClass(size_hint, &cl))) {
+ if (!use_hint || PREDICT_FALSE(!Static::sizemap()->GetSizeClass(size_hint, &cl))) {
// if we're in sized delete, but size is too large, no need to
// probe size cache
bool cache_hit = !use_hint && Static::pageheap()->TryGetSizeClass(p, &cl);
@@ -1407,7 +1407,7 @@ inline size_t GetSizeWithCallback(const void* ptr,
if (ptr == NULL)
return 0;
const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
- size_t cl;
+ uint32 cl;
if (Static::pageheap()->TryGetSizeClass(p, &cl)) {
return Static::sizemap()->ByteSizeForClass(cl);
}
@@ -1726,8 +1726,8 @@ static void * malloc_fast_path(size_t size) {
return AllocateFull(size);
}
- size_t cl;
- if (PREDICT_FALSE(!Static::sizemap()->MaybeSizeClass(size, &cl))) {
+ uint32 cl;
+ if (PREDICT_FALSE(!Static::sizemap()->GetSizeClass(size, &cl))) {
return AllocateFull(size);
}
diff --git a/src/tests/packed-cache_test.cc b/src/tests/packed-cache_test.cc
index 4af5178..3984594 100644
--- a/src/tests/packed-cache_test.cc
+++ b/src/tests/packed-cache_test.cc
@@ -39,14 +39,14 @@ static const int kHashbits = PackedCache<20>::kHashbits;
template <int kKeybits>
static size_t MustGet(const PackedCache<kKeybits>& cache, uintptr_t key) {
- size_t rv;
+ uint32 rv;
CHECK(cache.TryGet(key, &rv));
return rv;
}
template <int kKeybits>
static size_t Has(const PackedCache<kKeybits>& cache, uintptr_t key) {
- size_t dummy;
+ uint32 dummy;
return cache.TryGet(key, &dummy);
}
diff --git a/src/thread_cache.cc b/src/thread_cache.cc
index 80a7776..7208d35 100644
--- a/src/thread_cache.cc
+++ b/src/thread_cache.cc
@@ -94,7 +94,7 @@ void ThreadCache::Init(pthread_t tid) {
prev_ = NULL;
tid_ = tid;
in_setspecific_ = false;
- for (size_t cl = 0; cl < Static::num_size_classes(); ++cl) {
+ for (uint32 cl = 0; cl < Static::num_size_classes(); ++cl) {
list_[cl].Init(Static::sizemap()->class_to_size(cl));
}
@@ -105,7 +105,7 @@ void ThreadCache::Init(pthread_t tid) {
void ThreadCache::Cleanup() {
// Put unused memory back into central cache
- for (int cl = 0; cl < Static::num_size_classes(); ++cl) {
+ for (uint32 cl = 0; cl < Static::num_size_classes(); ++cl) {
if (list_[cl].length() > 0) {
ReleaseToCentralCache(&list_[cl], cl, list_[cl].length());
}
@@ -114,7 +114,7 @@ void ThreadCache::Cleanup() {
// Remove some objects of class "cl" from central cache and add to thread heap.
// On success, return the first object for immediate use; otherwise return NULL.
-void* ThreadCache::FetchFromCentralCache(size_t cl, int32_t byte_size) {
+void* ThreadCache::FetchFromCentralCache(uint32 cl, int32_t byte_size) {
FreeList* list = &list_[cl];
ASSERT(list->empty());
const int batch_size = Static::sizemap()->num_objects_to_move(cl);
@@ -151,7 +151,7 @@ void* ThreadCache::FetchFromCentralCache(size_t cl, int32_t byte_size) {
return start;
}
-void ThreadCache::ListTooLong(FreeList* list, size_t cl) {
+void ThreadCache::ListTooLong(FreeList* list, uint32 cl) {
size_left_ -= list->object_size();
const int batch_size = Static::sizemap()->num_objects_to_move(cl);
@@ -182,7 +182,7 @@ void ThreadCache::ListTooLong(FreeList* list, size_t cl) {
}
// Remove some objects of class "cl" from thread heap and add to central cache
-void ThreadCache::ReleaseToCentralCache(FreeList* src, size_t cl, int N) {
+void ThreadCache::ReleaseToCentralCache(FreeList* src, uint32 cl, int N) {
ASSERT(src == &list_[cl]);
if (N > src->length()) N = src->length();
size_t delta_bytes = N * Static::sizemap()->ByteSizeForClass(cl);
diff --git a/src/thread_cache.h b/src/thread_cache.h
index f7e9e17..f2f4ecc 100644
--- a/src/thread_cache.h
+++ b/src/thread_cache.h
@@ -78,15 +78,15 @@ class ThreadCache {
void Cleanup();
// Accessors (mostly just for printing stats)
- int freelist_length(size_t cl) const { return list_[cl].length(); }
+ int freelist_length(uint32 cl) const { return list_[cl].length(); }
// Total byte size in cache
size_t Size() const { return max_size_ - size_left_; }
// Allocate an object of the given size and class. The size given
// must be the same as the size of the class in the size map.
- void* Allocate(size_t size, size_t cl);
- void Deallocate(void* ptr, size_t size_class);
+ void* Allocate(size_t size, uint32 cl);
+ void Deallocate(void* ptr, uint32 size_class);
void Scavenge();
@@ -244,16 +244,16 @@ class ThreadCache {
// Gets and returns an object from the central cache, and, if possible,
// also adds some objects of that size class to this thread cache.
- void* FetchFromCentralCache(size_t cl, int32_t byte_size);
+ void* FetchFromCentralCache(uint32 cl, int32_t byte_size);
- void ListTooLong(void* ptr, size_t cl);
+ void ListTooLong(void* ptr, uint32 cl);
// Releases some number of items from src. Adjusts the list's max_length
// to eventually converge on num_objects_to_move(cl).
- void ListTooLong(FreeList* src, size_t cl);
+ void ListTooLong(FreeList* src, uint32 cl);
// Releases N items from this thread cache.
- void ReleaseToCentralCache(FreeList* src, size_t cl, int N);
+ void ReleaseToCentralCache(FreeList* src, uint32 cl, int N);
void SetMaxSize(int32 new_max_size);
@@ -366,7 +366,7 @@ inline int ThreadCache::HeapsInUse() {
return threadcache_allocator.inuse();
}
-inline ATTRIBUTE_ALWAYS_INLINE void* ThreadCache::Allocate(size_t size, size_t cl) {
+inline ATTRIBUTE_ALWAYS_INLINE void* ThreadCache::Allocate(size_t size, uint32 cl) {
FreeList* list = &list_[cl];
#ifdef NO_TCMALLOC_SAMPLES
@@ -385,7 +385,7 @@ inline ATTRIBUTE_ALWAYS_INLINE void* ThreadCache::Allocate(size_t size, size_t c
return rv;
}
-inline ATTRIBUTE_ALWAYS_INLINE void ThreadCache::Deallocate(void* ptr, size_t cl) {
+inline ATTRIBUTE_ALWAYS_INLINE void ThreadCache::Deallocate(void* ptr, uint32 cl) {
ASSERT(list_[cl].max_length() > 0);
FreeList* list = &list_[cl];
// This catches back-to-back frees of allocs in the same size