summaryrefslogtreecommitdiff
path: root/src/common.h
diff options
context:
space:
mode:
authorcsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2010-06-21 15:59:56 +0000
committercsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2010-06-21 15:59:56 +0000
commitcb7393cbe2d737009001fd9d30dad568bac7a3d8 (patch)
tree239f4ca1c465d8389bf1c767189f2bb4e7b0c609 /src/common.h
parentd8c02761689ba909f474b85618f99ac6dfc9a168 (diff)
downloadgperftools-cb7393cbe2d737009001fd9d30dad568bac7a3d8.tar.gz
* Default to not sampling in tcmalloc (csilvers)
* Add -DTCMALLOC_LARGE_PAGES: better perf for some workloads (rus) * Extend pprof --tools to allow per-tool configs (csilvers) * Have STL_Allocator pass on # bytes to free (richardfang) * Add a header guard to config.h (csilvers) * DOC: Clean up documentation around tcmalloc.slack_bytes (fikes) * DOC: Document ProfilerFlush, ProfilerStartWithOptions (csilvers) * PORTING: Work around a gcc 4.5.0 optimization bug (csilvers) * PORTING: Use -fno-builtin-malloc and friends when compiling tcmalloc * PORTING: Define _WIN32_WINNT high enough for mingw (csilvers) * PORTING: Work around libtool bug getting deps wrong in some cases * Update README.windows to emphasize $IncludeDir more (csilvers) * Rename README.windows to README_windows.txt (csilvers) git-svn-id: http://gperftools.googlecode.com/svn/trunk@95 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
Diffstat (limited to 'src/common.h')
-rw-r--r--src/common.h43
1 files changed, 37 insertions, 6 deletions
diff --git a/src/common.h b/src/common.h
index b0278eb..5226998 100644
--- a/src/common.h
+++ b/src/common.h
@@ -54,16 +54,45 @@ typedef uintptr_t Length;
// Configuration
//-------------------------------------------------------------------
-// Not all possible combinations of the following parameters make
-// sense. In particular, if kMaxSize increases, you may have to
-// increase kNumClasses as well.
+// Using large pages speeds up the execution at a cost of larger memory use.
+// Deallocation may speed up by a factor as the page map gets 8x smaller, so
+// lookups in the page map result in fewer L2 cache misses, which translates to
+// speedup for application/platform combinations with high L2 cache pressure.
+// As the number of size classes increases with large pages, we increase
+// the thread cache allowance to avoid passing more free ranges to and from
+// central lists. Also, larger pages are less likely to get freed.
+// These two factors cause a bounded increase in memory use.
+
+#if defined(TCMALLOC_LARGE_PAGES)
+static const size_t kPageShift = 15;
+static const size_t kNumClasses = 95;
+static const size_t kMaxThreadCacheSize = 4 << 20;
+#else
static const size_t kPageShift = 12;
+static const size_t kNumClasses = 61;
+static const size_t kMaxThreadCacheSize = 2 << 20;
+#endif
+
static const size_t kPageSize = 1 << kPageShift;
static const size_t kMaxSize = 8u * kPageSize;
static const size_t kAlignment = 8;
-static const size_t kNumClasses = 61;
static const size_t kLargeSizeClass = 0;
+// Default bound on the total amount of thread caches.
+static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize;
+
+// Lower bound on the per-thread cache sizes
+static const size_t kMinThreadCacheSize = kMaxSize * 2;
+
+// The number of bytes one ThreadCache will steal from another when
+// the first ThreadCache is forced to Scavenge(), delaying the
+// next call to Scavenge for this thread.
+static const size_t kStealAmount = 1 << 16;
+
+// The number of times that a deallocation can cause a freelist to
+// go over its max_length() before shrinking max_length().
+static const int kMaxOverages = 3;
+
// Maximum length we allow a per-thread free-list to have before we
// move objects from it into the corresponding central free-list. We
// want this big to avoid locking the central free-list too often. It
@@ -115,8 +144,10 @@ class SizeMap {
// ...
// 32768 (32768 + 127 + (120<<7)) / 128 376
static const int kMaxSmallSize = 1024;
- unsigned char class_array_[377];
-
+ static const size_t kClassArraySize =
+ (((1 << kPageShift) * 8u + 127 + (120 << 7)) >> 7) + 1;
+ unsigned char class_array_[kClassArraySize];
+
// Compute index of the class_array[] entry for a given size
static inline int ClassIndex(int s) {
ASSERT(0 <= s);