support different number of size classes at runtime

With TCMALLOC_TRANSFER_NUM_OBJ environment variable we can change transfer batch size. And with that comes slightly different number of size classes depending on value of transfer batch size. We used to have hardcoded number of size classes, so we couldn't really support any batch size setting. This commit adds support for dynamic number of size classes (runtime value returned by Static::num_size_classes()).
author: Aliaksey Kandratsenka <alkondratenko@gmail.com> 2017-02-02 15:38:41 -0800
committer: Aliaksey Kandratsenka <alkondratenko@gmail.com> 2017-05-14 19:04:56 -0700
commit: 7bc34ad1f60be2df6ca38f4bffdba4daa9aa9a7d (patch)
tree: 869656f1155809cd268c64a8621dff9c9d4e432e
parent: 4585b78c8dae9183dbf5f124c0343a9f1244ed67 (diff)
download: gperftools-7bc34ad1f60be2df6ca38f4bffdba4daa9aa9a7d.tar.gz
9 files changed, 38 insertions, 37 deletions
diff --git a/src/central_freelist.cc b/src/central_freelist.cc
index ea4bfcc..01a7310 100644
--- a/src/central_freelist.cc
+++ b/src/central_freelist.cc
@@ -152,14 +152,14 @@ bool CentralFreeList::EvictRandomSizeClass(
     int locked_size_class, bool force) {
   static int race_counter = 0;
   int t = race_counter++;  // Updated without a lock, but who cares.
-  if (t >= kNumClasses) {
-    while (t >= kNumClasses) {
-      t -= kNumClasses;
+  if (t >= Static::num_size_classes()) {
+    while (t >= Static::num_size_classes()) {
+      t -= Static::num_size_classes();
     }
     race_counter = t;
   }
   ASSERT(t >= 0);
-  ASSERT(t < kNumClasses);
+  ASSERT(t < Static::num_size_classes());
   if (t == locked_size_class) return false;
   return Static::central_cache()[t].ShrinkCache(locked_size_class, force);
 }
diff --git a/src/common.cc b/src/common.cc
index 9d4993d..55b282a 100644
--- a/src/common.cc
+++ b/src/common.cc
@@ -173,14 +173,15 @@ void SizeMap::Init() {
     class_to_size_[sc] = size;
     sc++;
   }
-  if (sc != kNumClasses) {
+  num_size_classes = sc;
+  if (sc > kClassSizesMax) {
     Log(kCrash, __FILE__, __LINE__,
-        "wrong number of size classes: (found vs. expected )", sc, kNumClasses);
+        "too many size classes: (found vs. max)", sc, kClassSizesMax);
   }
 
   // Initialize the mapping arrays
   int next_size = 0;
-  for (int c = 1; c < kNumClasses; c++) {
+  for (int c = 1; c < num_size_classes; c++) {
     const int max_size_in_class = class_to_size_[c];
     for (int s = next_size; s <= max_size_in_class; s += kAlignment) {
       class_array_[ClassIndex(s)] = c;
@@ -191,7 +192,7 @@ void SizeMap::Init() {
   // Double-check sizes just to be safe
   for (size_t size = 0; size <= kMaxSize;) {
     const int sc = SizeClass(size);
-    if (sc <= 0 || sc >= kNumClasses) {
+    if (sc <= 0 || sc >= num_size_classes) {
       Log(kCrash, __FILE__, __LINE__,
           "Bad size class (class, size)", sc, size);
     }
@@ -212,7 +213,7 @@ void SizeMap::Init() {
   }
 
   // Initialize the num_objects_to_move array.
-  for (size_t cl = 1; cl  < kNumClasses; ++cl) {
+  for (size_t cl = 1; cl  < num_size_classes; ++cl) {
     num_objects_to_move_[cl] = NumMoveSize(ByteSizeForClass(cl));
   }
 }
diff --git a/src/common.h b/src/common.h
index d137084..14d7c29 100644
--- a/src/common.h
+++ b/src/common.h
@@ -60,11 +60,8 @@ typedef uintptr_t Length;
 // Keep in mind when using the 16 bytes alignment you can have a space
 // waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
 static const size_t kMinAlign   = 8;
-// Number of classes created until reach page size 128.
-static const size_t kBaseClasses = 16;
 #else
 static const size_t kMinAlign   = 16;
-static const size_t kBaseClasses = 9;
 #endif
 
 // Using large pages speeds up the execution at a cost of larger memory use.
@@ -77,15 +74,14 @@ static const size_t kBaseClasses = 9;
 // These two factors cause a bounded increase in memory use.
 #if defined(TCMALLOC_32K_PAGES)
 static const size_t kPageShift  = 15;
-static const size_t kNumClasses = kBaseClasses + 69;
 #elif defined(TCMALLOC_64K_PAGES)
 static const size_t kPageShift  = 16;
-static const size_t kNumClasses = kBaseClasses + 73;
 #else
 static const size_t kPageShift  = 13;
-static const size_t kNumClasses = kBaseClasses + 79;
 #endif
 
+static const size_t kClassSizesMax = 96;
+
 static const size_t kMaxThreadCacheSize = 4 << 20;
 
 static const size_t kPageSize   = 1 << kPageShift;
@@ -217,17 +213,19 @@ class SizeMap {
   // amortize the lock overhead for accessing the central list.  Making
   // it too big may temporarily cause unnecessary memory wastage in the
   // per-thread free list until the scavenger cleans up the list.
-  int num_objects_to_move_[kNumClasses];
+  int num_objects_to_move_[kClassSizesMax];
 
   int NumMoveSize(size_t size);
 
   // Mapping from size class to max size storable in that class
-  int32 class_to_size_[kNumClasses];
+  int32 class_to_size_[kClassSizesMax];
 
   // Mapping from size class to number of pages to allocate at a time
-  size_t class_to_pages_[kNumClasses];
+  size_t class_to_pages_[kClassSizesMax];
 
  public:
+  size_t num_size_classes;
+
   // Constructor should do nothing since we rely on explicit Init()
   // call, which may or may not be called before the constructor runs.
   SizeMap() { }
diff --git a/src/page_heap.cc b/src/page_heap.cc
index c5c7871..50b2752 100644
--- a/src/page_heap.cc
+++ b/src/page_heap.cc
@@ -68,7 +68,7 @@ PageHeap::PageHeap()
       // Start scavenging at kMaxPages list
       release_index_(kMaxPages),
       aggressive_decommit_(false) {
-  COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits);
+  COMPILE_ASSERT(kClassSizesMax <= (1 << PageMapCache::kValuebits), valuebits);
   DLL_Init(&large_.normal);
   DLL_Init(&large_.returned);
   for (int i = 0; i < kMaxPages; i++) {
diff --git a/src/static_vars.cc b/src/static_vars.cc
index be8d1d2..5dfacf1 100644
--- a/src/static_vars.cc
+++ b/src/static_vars.cc
@@ -55,13 +55,13 @@ namespace tcmalloc {
 void CentralCacheLockAll()
 {
   Static::pageheap_lock()->Lock();
-  for (int i = 0; i < kNumClasses; ++i)
+  for (int i = 0; i < Static::num_size_classes(); ++i)
     Static::central_cache()[i].Lock();
 }
 
 void CentralCacheUnlockAll()
 {
-  for (int i = 0; i < kNumClasses; ++i)
+  for (int i = 0; i < Static::num_size_classes(); ++i)
     Static::central_cache()[i].Unlock();
   Static::pageheap_lock()->Unlock();
 }
@@ -70,7 +70,7 @@ void CentralCacheUnlockAll()
 bool Static::inited_;
 SpinLock Static::pageheap_lock_(SpinLock::LINKER_INITIALIZED);
 SizeMap Static::sizemap_;
-CentralFreeListPadded Static::central_cache_[kNumClasses];
+CentralFreeListPadded Static::central_cache_[kClassSizesMax];
 PageHeapAllocator<Span> Static::span_allocator_;
 PageHeapAllocator<StackTrace> Static::stacktrace_allocator_;
 Span Static::sampled_objects_;
@@ -87,7 +87,7 @@ void Static::InitStaticVars() {
   bucket_allocator_.Init();
   // Do a bit of sanitizing: make sure central_cache is aligned properly
   CHECK_CONDITION((sizeof(central_cache_[0]) % 64) == 0);
-  for (int i = 0; i < kNumClasses; ++i) {
+  for (int i = 0; i < num_size_classes(); ++i) {
     central_cache_[i].Init(i);
   }
 
diff --git a/src/static_vars.h b/src/static_vars.h
index b72e2f7..1728330 100644
--- a/src/static_vars.h
+++ b/src/static_vars.h
@@ -62,6 +62,8 @@ class Static {
 
   static SizeMap* sizemap() { return &sizemap_; }
 
+  static unsigned num_size_classes() { return sizemap_.num_size_classes; }
+
   //////////////////////////////////////////////////////////////////////
   // In addition to the explicit initialization comment, the variables below
   // must be protected by pageheap_lock.
@@ -100,7 +102,7 @@ class Static {
   // can run their constructors.
 
   ATTRIBUTE_HIDDEN static SizeMap sizemap_;
-  ATTRIBUTE_HIDDEN static CentralFreeListPadded central_cache_[kNumClasses];
+  ATTRIBUTE_HIDDEN static CentralFreeListPadded central_cache_[kClassSizesMax];
   ATTRIBUTE_HIDDEN static PageHeapAllocator<Span> span_allocator_;
   ATTRIBUTE_HIDDEN static PageHeapAllocator<StackTrace> stacktrace_allocator_;
   ATTRIBUTE_HIDDEN static Span sampled_objects_;
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index 5ddc76b..21b1e5d 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -315,7 +315,7 @@ static void ExtractStats(TCMallocStats* r, uint64_t* class_count,
                          PageHeap::LargeSpanStats* large_spans) {
   r->central_bytes = 0;
   r->transfer_bytes = 0;
-  for (int cl = 0; cl < kNumClasses; ++cl) {
+  for (int cl = 0; cl < Static::num_size_classes(); ++cl) {
     const int length = Static::central_cache()[cl].length();
     const int tc_length = Static::central_cache()[cl].tc_length();
     const size_t cache_overhead = Static::central_cache()[cl].OverheadBytes();
@@ -354,7 +354,7 @@ static double PagesToMiB(uint64_t pages) {
 // WRITE stats to "out"
 static void DumpStats(TCMalloc_Printer* out, int level) {
   TCMallocStats stats;
-  uint64_t class_count[kNumClasses];
+  uint64_t class_count[kClassSizesMax];
   PageHeap::SmallSpanStats small;
   PageHeap::LargeSpanStats large;
   if (level >= 2) {
@@ -421,7 +421,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
     out->printf("transfer cache, and central cache, by size class\n");
     out->printf("------------------------------------------------\n");
     uint64_t cumulative = 0;
-    for (int cl = 0; cl < kNumClasses; ++cl) {
+    for (int cl = 0; cl < Static::num_size_classes(); ++cl) {
       if (class_count[cl] > 0) {
         size_t cl_size = Static::sizemap()->ByteSizeForClass(cl);
         uint64_t class_bytes = class_count[cl] * cl_size;
@@ -831,7 +831,7 @@ class TCMallocImplementation : public MallocExtension {
 
     // central class information
     int64 prev_class_size = 0;
-    for (int cl = 1; cl < kNumClasses; ++cl) {
+    for (int cl = 1; cl < Static::num_size_classes(); ++cl) {
       size_t class_size = Static::sizemap()->ByteSizeForClass(cl);
       MallocExtension::FreeListInfo i;
       i.min_object_size = prev_class_size + 1;
@@ -851,7 +851,7 @@ class TCMallocImplementation : public MallocExtension {
     }
 
     // Add stats from per-thread heaps
-    uint64_t class_count[kNumClasses];
+    uint64_t class_count[kClassSizesMax];
     memset(class_count, 0, sizeof(class_count));
     {
       SpinLockHolder h(Static::pageheap_lock());
@@ -860,7 +860,7 @@ class TCMallocImplementation : public MallocExtension {
     }
 
     prev_class_size = 0;
-    for (int cl = 1; cl < kNumClasses; ++cl) {
+    for (int cl = 1; cl < Static::num_size_classes(); ++cl) {
       MallocExtension::FreeListInfo i;
       i.min_object_size = prev_class_size + 1;
       i.max_object_size = Static::sizemap()->ByteSizeForClass(cl);
@@ -925,11 +925,11 @@ static uint32_t size_class_with_alignment(size_t size, size_t align) {
   // are aligned at powers of two.  We will waste time and space if
   // we miss in the size class array, but that is deemed acceptable
   // since memalign() should be used rarely.
-  while (cl < kNumClasses &&
+  while (cl < Static::num_size_classes() &&
          ((Static::sizemap()->class_to_size(cl) & (align - 1)) != 0)) {
     cl++;
   }
-  if (cl == kNumClasses) {
+  if (cl == Static::num_size_classes()) {
     return 0;
   }
   return cl;
diff --git a/src/thread_cache.cc b/src/thread_cache.cc
index 22289b0..80a7776 100644
--- a/src/thread_cache.cc
+++ b/src/thread_cache.cc
@@ -94,7 +94,7 @@ void ThreadCache::Init(pthread_t tid) {
   prev_ = NULL;
   tid_  = tid;
   in_setspecific_ = false;
-  for (size_t cl = 0; cl < kNumClasses; ++cl) {
+  for (size_t cl = 0; cl < Static::num_size_classes(); ++cl) {
     list_[cl].Init(Static::sizemap()->class_to_size(cl));
   }
 
@@ -105,7 +105,7 @@ void ThreadCache::Init(pthread_t tid) {
 
 void ThreadCache::Cleanup() {
   // Put unused memory back into central cache
-  for (int cl = 0; cl < kNumClasses; ++cl) {
+  for (int cl = 0; cl < Static::num_size_classes(); ++cl) {
     if (list_[cl].length() > 0) {
       ReleaseToCentralCache(&list_[cl], cl, list_[cl].length());
     }
@@ -210,7 +210,7 @@ void ThreadCache::Scavenge() {
   // that situation by dropping L/2 nodes from the free list.  This
   // may not release much memory, but if so we will call scavenge again
   // pretty soon and the low-water marks will be high on that call.
-  for (int cl = 0; cl < kNumClasses; cl++) {
+  for (int cl = 0; cl < Static::num_size_classes(); cl++) {
     FreeList* list = &list_[cl];
     const int lowmark = list->lowwatermark();
     if (lowmark > 0) {
@@ -492,7 +492,7 @@ void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) {
   for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
     *total_bytes += h->Size();
     if (class_count) {
-      for (int cl = 0; cl < kNumClasses; ++cl) {
+      for (int cl = 0; cl < Static::num_size_classes(); ++cl) {
         class_count[cl] += h->freelist_length(cl);
       }
     }
diff --git a/src/thread_cache.h b/src/thread_cache.h
index f245997..f7e9e17 100644
--- a/src/thread_cache.h
+++ b/src/thread_cache.h
@@ -321,7 +321,7 @@ class ThreadCache {
   // This class is laid out with the most frequently used fields
   // first so that hot elements are placed on the same cache line.
 
-  FreeList      list_[kNumClasses];     // Array indexed by size-class
+  FreeList      list_[kClassSizesMax];     // Array indexed by size-class
 
   // Thread cache size is max_size_ - size_left_. We use such indirect
   // representation to speed up some key operations.
author	Aliaksey Kandratsenka <alkondratenko@gmail.com>	2017-02-02 15:38:41 -0800
committer	Aliaksey Kandratsenka <alkondratenko@gmail.com>	2017-05-14 19:04:56 -0700
commit	7bc34ad1f60be2df6ca38f4bffdba4daa9aa9a7d (patch)
tree	869656f1155809cd268c64a8621dff9c9d4e432e
parent	4585b78c8dae9183dbf5f124c0343a9f1244ed67 (diff)
download	gperftools-7bc34ad1f60be2df6ca38f4bffdba4daa9aa9a7d.tar.gz