diff options
author | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2010-06-21 15:59:56 +0000 |
---|---|---|
committer | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2010-06-21 15:59:56 +0000 |
commit | cb7393cbe2d737009001fd9d30dad568bac7a3d8 (patch) | |
tree | 239f4ca1c465d8389bf1c767189f2bb4e7b0c609 /src/common.h | |
parent | d8c02761689ba909f474b85618f99ac6dfc9a168 (diff) | |
download | gperftools-cb7393cbe2d737009001fd9d30dad568bac7a3d8.tar.gz |
* Default to not sampling in tcmalloc (csilvers)
* Add -DTCMALLOC_LARGE_PAGES: better perf for some workloads (rus)
* Extend pprof --tools to allow per-tool configs (csilvers)
* Have STL_Allocator pass on # bytes to free (richardfang)
* Add a header guard to config.h (csilvers)
* DOC: Clean up documentation around tcmalloc.slack_bytes (fikes)
* DOC: Document ProfilerFlush, ProfilerStartWithOptions (csilvers)
* PORTING: Work around a gcc 4.5.0 optimization bug (csilvers)
* PORTING: Use -fno-builtin-malloc and friends when compiling tcmalloc
* PORTING: Define _WIN32_WINNT high enough for mingw (csilvers)
* PORTING: Work around libtool bug getting deps wrong in some cases
* Update README.windows to emphasize $IncludeDir more (csilvers)
* Rename README.windows to README_windows.txt (csilvers)
git-svn-id: http://gperftools.googlecode.com/svn/trunk@95 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
Diffstat (limited to 'src/common.h')
-rw-r--r-- | src/common.h | 43 |
1 files changed, 37 insertions, 6 deletions
diff --git a/src/common.h b/src/common.h index b0278eb..5226998 100644 --- a/src/common.h +++ b/src/common.h @@ -54,16 +54,45 @@ typedef uintptr_t Length; // Configuration //------------------------------------------------------------------- -// Not all possible combinations of the following parameters make -// sense. In particular, if kMaxSize increases, you may have to -// increase kNumClasses as well. +// Using large pages speeds up the execution at a cost of larger memory use. +// Deallocation may speed up by a factor as the page map gets 8x smaller, so +// lookups in the page map result in fewer L2 cache misses, which translates to +// speedup for application/platform combinations with high L2 cache pressure. +// As the number of size classes increases with large pages, we increase +// the thread cache allowance to avoid passing more free ranges to and from +// central lists. Also, larger pages are less likely to get freed. +// These two factors cause a bounded increase in memory use. + +#if defined(TCMALLOC_LARGE_PAGES) +static const size_t kPageShift = 15; +static const size_t kNumClasses = 95; +static const size_t kMaxThreadCacheSize = 4 << 20; +#else static const size_t kPageShift = 12; +static const size_t kNumClasses = 61; +static const size_t kMaxThreadCacheSize = 2 << 20; +#endif + static const size_t kPageSize = 1 << kPageShift; static const size_t kMaxSize = 8u * kPageSize; static const size_t kAlignment = 8; -static const size_t kNumClasses = 61; static const size_t kLargeSizeClass = 0; +// Default bound on the total amount of thread caches. +static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize; + +// Lower bound on the per-thread cache sizes +static const size_t kMinThreadCacheSize = kMaxSize * 2; + +// The number of bytes one ThreadCache will steal from another when +// the first ThreadCache is forced to Scavenge(), delaying the +// next call to Scavenge for this thread. +static const size_t kStealAmount = 1 << 16; + +// The number of times that a deallocation can cause a freelist to +// go over its max_length() before shrinking max_length(). +static const int kMaxOverages = 3; + // Maximum length we allow a per-thread free-list to have before we // move objects from it into the corresponding central free-list. We // want this big to avoid locking the central free-list too often. It @@ -115,8 +144,10 @@ class SizeMap { // ... // 32768 (32768 + 127 + (120<<7)) / 128 376 static const int kMaxSmallSize = 1024; - unsigned char class_array_[377]; - + static const size_t kClassArraySize = + (((1 << kPageShift) * 8u + 127 + (120 << 7)) >> 7) + 1; + unsigned char class_array_[kClassArraySize]; + // Compute index of the class_array[] entry for a given size static inline int ClassIndex(int s) { ASSERT(0 <= s); |