summaryrefslogtreecommitdiff
path: root/src/tcmalloc.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/tcmalloc.cc')
-rw-r--r--src/tcmalloc.cc1050
1 files changed, 742 insertions, 308 deletions
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index bf45dfb..a23449b 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -55,7 +55,6 @@
// TODO: Bias reclamation to larger addresses
// TODO: implement mallinfo/mallopt
// TODO: Better testing
-// TODO: Return memory to system
//
// 9/28/2003 (new page-level allocator replaces ptmalloc2):
// * malloc/free of small objects goes from ~300 ns to ~50 ns.
@@ -73,28 +72,68 @@
#else
#include <sys/types.h>
#endif
-#include <malloc.h>
+#ifdef HAVE_STRUCT_MALLINFO
+#include <malloc.h> // for struct mallinfo
+#endif
#include <string.h>
#include <pthread.h>
#include <unistd.h>
#include <errno.h>
#include <stdarg.h>
#include "base/commandlineflags.h"
-#include "google/malloc_hook.h"
-#include "google/malloc_extension.h"
-#include "google/stacktrace.h"
+#include "base/basictypes.h" // gets us PRIu64
+#include "base/sysinfo.h"
+#include "base/spinlock.h"
+#include <google/malloc_hook.h>
+#include <google/malloc_extension.h>
+#include <google/stacktrace.h>
#include "internal_logging.h"
-#include "internal_spinlock.h"
#include "pagemap.h"
#include "system-alloc.h"
#include "maybe_threads.h"
-#if defined HAVE_INTTYPES_H
-#define __STDC_FORMAT_MACROS
-#include <inttypes.h>
-#define LLU PRIu64
-#else
-#define LLU "llu" // hope for the best
+// Even if we have support for thread-local storage in the compiler
+// and linker, the OS may not support it. We need to check that at
+// runtime. Right now, we have to keep a manual set of "bad" OSes.
+#if defined(HAVE_TLS)
+ static bool kernel_supports_tls = false; // be conservative
+ static inline bool KernelSupportsTLS() {
+ return kernel_supports_tls;
+ }
+# if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS
+ static void CheckIfKernelSupportsTLS() {
+ kernel_supports_tls = false;
+ }
+# else
+# include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too
+ static void CheckIfKernelSupportsTLS() {
+ struct utsname buf;
+ if (uname(&buf) != 0) { // should be impossible
+ MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno);
+ kernel_supports_tls = false;
+ } else if (strcasecmp(buf.sysname, "linux") == 0) {
+ // The linux case: the first kernel to support TLS was 2.6.0
+ if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x
+ kernel_supports_tls = false;
+ else if (buf.release[0] == '2' && buf.release[1] == '.' &&
+ buf.release[2] >= '0' && buf.release[2] < '6' &&
+ buf.release[3] == '.') // 2.0 - 2.5
+ kernel_supports_tls = false;
+ else
+ kernel_supports_tls = true;
+ } else { // some other kernel, we'll be optimisitic
+ kernel_supports_tls = true;
+ }
+ // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG
+ }
+# endif // HAVE_DECL_UNAME
+#endif // HAVE_TLS
+
+// __THROW is defined in glibc systems. It means, counter-intuitively,
+// "This function will never throw an exception." It's an optional
+// optimization tool, but we may need to use it to match glibc prototypes.
+#ifndef __THROW // I guess we're not on a glibc system
+# define __THROW // __THROW is just an optimization, so ok to make it ""
#endif
//-------------------------------------------------------------------
@@ -109,7 +148,7 @@ static const size_t kPageSize = 1 << kPageShift;
static const size_t kMaxSize = 8u * kPageSize;
static const size_t kAlignShift = 3;
static const size_t kAlignment = 1 << kAlignShift;
-static const size_t kNumClasses = 170;
+static const size_t kNumClasses = 68;
// Allocates a big block of memory for the pagemap once we reach more than
// 128MB
@@ -168,26 +207,63 @@ DEFINE_int64(tcmalloc_sample_parameter, 262147,
" larger prime number");
static size_t sample_period = 262147;
// Protects sample_period above
-static SpinLock sample_period_lock = SPINLOCK_INITIALIZER;
+static SpinLock sample_period_lock(SpinLock::LINKER_INITIALIZED);
+
+// Parameters for controlling how fast memory is returned to the OS.
+
+DEFINE_double(tcmalloc_release_rate, 1,
+ "Rate at which we release unused memory to the system. "
+ "Zero means we never release memory back to the system. "
+ "Increase this flag to return memory faster; decrease it "
+ "to return memory slower. Reasonable rates are in the "
+ "range [0,10]");
//-------------------------------------------------------------------
// Mapping from size to size_class and vice versa
//-------------------------------------------------------------------
-// A pair of arrays we use for implementing the mapping from a size to
-// its size class. Indexed by "floor(lg(size))".
-static const int kSizeBits = 8 * sizeof(size_t);
-static unsigned char size_base[kSizeBits];
-static unsigned char size_shift[kSizeBits];
-
-// Mapping from size class to size
+// Sizes <= 1024 have an alignment >= 8. So for such sizes we have an
+// array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128.
+// So for these larger sizes we have an array indexed by ceil(size/128).
+//
+// We flatten both logical arrays into one physical array and use
+// arithmetic to compute an appropriate index. The "base_index[]"
+// array contains the bases of the two logical arrays.
+//
+// base_index[] contains non-obvious values. We always add 127 to the
+// size before dividing it by either 8 or 128 to implement ceil()
+// efficiently. Therefore base_index[0] is -15 to compensate for the
+// extra 127/8 we added to small sizes. Similarly base_index[1] is
+// 120, so that the first index used by the second logical array is
+// just past the last index used by the first logical array.
+//
+// Examples:
+// Size Expression Index
+// -------------------------------------------------------
+// 0 -15 + ((0+127) / 8) 0
+// 1 -15 + ((1+127) / 8) 1
+// ...
+// 1024 -15 + ((1024+127) / 8) 128
+// 1025 120 + ((1025+127) / 128) 129
+// ...
+// 32768 120 + ((32768+127) / 128) 376
+static const int kMaxSmallSize = 1024;
+static const int shift_amount[2] = { 3, 7 }; // For divides by 8 or 128
+static const int base_index[2] = { -15, 120 }; // For finding array bases
+static unsigned char class_array[377];
+
+// Compute index of the class_array[] entry for a given size
+static inline int ClassIndex(size_t s) {
+ const int i = (s > kMaxSmallSize);
+ return base_index[i] + ((s+127) >> shift_amount[i]);
+}
+
+// Mapping from size class to max size storable in that class
static size_t class_to_size[kNumClasses];
// Mapping from size class to number of pages to allocate at a time
static size_t class_to_pages[kNumClasses];
-
-
// TransferCache is used to cache transfers of num_objects_to_move[size_class]
// back and forth between thread caches and the central cache for a given size
// class.
@@ -202,20 +278,6 @@ struct TCEntry {
// one class can have is kNumClasses.
static const int kNumTransferEntries = kNumClasses;
-// Return floor(log2(n)) for n > 0.
-#if (defined __i386__ || defined __x86_64__) && defined __GNUC__
-static inline int LgFloor(size_t n) {
- // "ro" for the input spec means the input can come from either a
- // register ("r") or offsetable memory ("o").
- size_t result;
- __asm__("bsr %1, %0"
- : "=r" (result) // Output spec
- : "ro" (n) // Input spec
- : "cc" // Clobbers condition-codes
- );
- return result;
-}
-#else
// Note: the following only works for "n"s that fit in 32-bits, but
// that is fine since we only use it for small sizes.
static inline int LgFloor(size_t n) {
@@ -231,8 +293,6 @@ static inline int LgFloor(size_t n) {
ASSERT(n == 1);
return log;
}
-#endif
-
// Some very basic linked list functions for dealing with using void * as
// storage.
@@ -298,10 +358,7 @@ static inline size_t SLL_Size(void *head) {
// Setup helper functions.
static inline int SizeClass(size_t size) {
- if (size == 0) size = 1;
- const int lg = LgFloor(size);
- const int align = size_shift[lg];
- return static_cast<int>(size_base[lg]) + ((size-1) >> align);
+ return class_array[ClassIndex(size)];
}
// Get the byte-size for a specified class
@@ -335,13 +392,18 @@ static int NumMoveSize(size_t size) {
// Initialize the mapping arrays
static void InitSizeClasses() {
- // Special initialization for small sizes
- for (int lg = 0; lg < kAlignShift; lg++) {
- size_base[lg] = 1;
- size_shift[lg] = kAlignShift;
+ // Do some sanity checking on base_index[]/shift_amount[]/class_array[]
+ if (ClassIndex(0) < 0) {
+ MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0));
+ abort();
+ }
+ if (ClassIndex(kMaxSize) >= sizeof(class_array)) {
+ MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize));
+ abort();
}
- int next_class = 1;
+ // Compute the size classes we want to use
+ int sc = 1; // Next size class to assign
int alignshift = kAlignShift;
int last_lg = -1;
for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) {
@@ -357,31 +419,49 @@ static void InitSizeClasses() {
if ((lg >= 7) && (alignshift < 8)) {
alignshift++;
}
- size_base[lg] = next_class - ((size-1) >> alignshift);
- size_shift[lg] = alignshift;
+ last_lg = lg;
}
- class_to_size[next_class] = size;
- last_lg = lg;
+ // Allocate enough pages so leftover is less than 1/8 of total.
+ // This bounds wasted space to at most 12.5%.
+ size_t psize = kPageSize;
+ while ((psize % size) > (psize >> 3)) {
+ psize += kPageSize;
+ }
+ const size_t my_pages = psize >> kPageShift;
+
+ if (sc > 1 && my_pages == class_to_pages[sc-1]) {
+ // See if we can merge this into the previous class without
+ // increasing the fragmentation of the previous class.
+ const size_t my_objects = (my_pages << kPageShift) / size;
+ const size_t prev_objects = (class_to_pages[sc-1] << kPageShift)
+ / class_to_size[sc-1];
+ if (my_objects == prev_objects) {
+ // Adjust last class to include this size
+ class_to_size[sc-1] = size;
+ continue;
+ }
+ }
- next_class++;
+ // Add new class
+ class_to_pages[sc] = my_pages;
+ class_to_size[sc] = size;
+ sc++;
}
- if (next_class >= kNumClasses) {
- MESSAGE("used up too many size classes: %d\n", next_class);
+ if (sc != kNumClasses) {
+ MESSAGE("wrong number of size classes: found %d instead of %d\n",
+ sc, int(kNumClasses));
abort();
}
- // Initialize the number of pages we should allocate to split into
- // small objects for a given class.
- for (size_t cl = 1; cl < next_class; cl++) {
- // Allocate enough pages so leftover is less than 1/8 of total.
- // This bounds wasted space to at most 12.5%.
- size_t psize = kPageSize;
- const size_t s = class_to_size[cl];
- while ((psize % s) > (psize >> 3)) {
- psize += kPageSize;
+ // Initialize the mapping arrays
+ int next_size = 0;
+ for (int c = 1; c < kNumClasses; c++) {
+ const int max_size_in_class = class_to_size[c];
+ for (int s = next_size; s <= max_size_in_class; s += kAlignment) {
+ class_array[ClassIndex(s)] = c;
}
- class_to_pages[cl] = psize >> kPageShift;
+ next_size = max_size_in_class + kAlignment;
}
// Double-check sizes just to be safe
@@ -415,6 +495,23 @@ static void InitSizeClasses() {
for (size_t cl = 1; cl < kNumClasses; ++cl) {
num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl));
}
+
+ if (false) {
+ // Dump class sizes and maximum external wastage per size class
+ for (size_t cl = 1; cl < kNumClasses; ++cl) {
+ const int alloc_size = class_to_pages[cl] << kPageShift;
+ const int alloc_objs = alloc_size / class_to_size[cl];
+ const int min_used = (class_to_size[cl-1] + 1) * alloc_objs;
+ const int max_waste = alloc_size - min_used;
+ MESSAGE("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n",
+ int(cl),
+ int(class_to_size[cl-1] + 1),
+ int(class_to_size[cl]),
+ int(class_to_pages[cl] << kPageShift),
+ max_waste * 100.0 / alloc_size
+ );
+ }
+ }
}
// -------------------------------------------------------------------------
@@ -620,20 +717,6 @@ static void DLL_Prepend(Span* list, Span* span) {
list->next = span;
}
-static void DLL_InsertOrdered(Span* list, Span* span) {
- ASSERT(span->next == NULL);
- ASSERT(span->prev == NULL);
- // Look for appropriate place to insert
- Span* x = list;
- while ((x->next != list) && (x->next->start < span->start)) {
- x = x->next;
- }
- span->next = x->next;
- span->prev = x;
- x->next->prev = span;
- x->next = span;
-}
-
// -------------------------------------------------------------------------
// Stack traces kept for sampled allocations
// The following state is protected by pageheap_lock_.
@@ -729,16 +812,27 @@ class TCMalloc_PageHeap {
bool Check();
bool CheckList(Span* list, Length min_pages, Length max_pages);
+ // Release all pages on the free list for reuse by the OS:
+ void ReleaseFreePages();
+
private:
// Pick the appropriate map type based on pointer size
typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap;
PageMap pagemap_;
+ // We segregate spans of a given size into two circular linked
+ // lists: one for normal spans, and one for spans whose memory
+ // has been returned to the system.
+ struct SpanList {
+ Span normal;
+ Span returned;
+ };
+
// List of free spans of length >= kMaxPages
- Span large_;
+ SpanList large_;
// Array mapping from span length to a doubly linked list of free spans
- Span free_[kMaxPages];
+ SpanList free_[kMaxPages];
// Number of pages kept in free lists
uintptr_t free_pages_;
@@ -753,7 +847,9 @@ class TCMalloc_PageHeap {
// span into appropriate free lists. Also update "span" to have
// length exactly "n" and mark it as non-free so it can be returned
// to the client.
- void Carve(Span* span, Length n);
+ //
+ // "released" is true iff "span" was found on a "returned" list.
+ void Carve(Span* span, Length n, bool released);
void RecordSpan(Span* span) {
pagemap_.set(span->start, span);
@@ -761,14 +857,34 @@ class TCMalloc_PageHeap {
pagemap_.set(span->start + span->length - 1, span);
}
}
+
+ // Allocate a large span of length == n. If successful, returns a
+ // span of exactly the specified length. Else, returns NULL.
+ Span* AllocLarge(Length n);
+
+ // Incrementally release some memory to the system.
+ // IncrementalScavenge(n) is called whenever n pages are freed.
+ void IncrementalScavenge(Length n);
+
+ // Number of pages to deallocate before doing more scavenging
+ int64_t scavenge_counter_;
+
+ // Index of last free list we scavenged
+ int scavenge_index_;
};
-TCMalloc_PageHeap::TCMalloc_PageHeap() : pagemap_(MetaDataAlloc),
- free_pages_(0),
- system_bytes_(0) {
- DLL_Init(&large_);
+TCMalloc_PageHeap::TCMalloc_PageHeap()
+ : pagemap_(MetaDataAlloc),
+ free_pages_(0),
+ system_bytes_(0),
+ scavenge_counter_(0),
+ // Start scavenging at kMaxPages list
+ scavenge_index_(kMaxPages-1) {
+ DLL_Init(&large_.normal);
+ DLL_Init(&large_.returned);
for (int i = 0; i < kMaxPages; i++) {
- DLL_Init(&free_[i]);
+ DLL_Init(&free_[i].normal);
+ DLL_Init(&free_[i].returned);
}
}
@@ -780,40 +896,79 @@ Span* TCMalloc_PageHeap::New(Length n) {
// Find first size >= n that has a non-empty list
for (Length s = n; s < kMaxPages; s++) {
- if (!DLL_IsEmpty(&free_[s])) {
- Span* result = free_[s].next;
- Carve(result, n);
- ASSERT(Check());
- free_pages_ -= n;
- return result;
+ Span* ll = NULL;
+ bool released = false;
+ if (!DLL_IsEmpty(&free_[s].normal)) {
+ // Found normal span
+ ll = &free_[s].normal;
+ } else if (!DLL_IsEmpty(&free_[s].returned)) {
+ // Found returned span; reallocate it
+ ll = &free_[s].returned;
+ released = true;
+ } else {
+ // Keep looking in larger classes
+ continue;
}
+
+ Span* result = ll->next;
+ Carve(result, n, released);
+ ASSERT(Check());
+ free_pages_ -= n;
+ return result;
}
- // Look in large list. If we first do not find something, we try to
- // grow the heap and try again.
- for (int i = 0; i < 2; i++) {
- // find the best span (closest to n in size)
- Span *best = NULL;
- for (Span* span = large_.next; span != &large_; span = span->next) {
- if (span->length >= n &&
- (best == NULL || span->length < best->length)) {
+ Span* result = AllocLarge(n);
+ if (result != NULL) return result;
+
+ // Grow the heap and try again
+ if (!GrowHeap(n)) {
+ ASSERT(Check());
+ return NULL;
+ }
+
+ return AllocLarge(n);
+}
+
+Span* TCMalloc_PageHeap::AllocLarge(Length n) {
+ // find the best span (closest to n in size).
+ // The following loops implements address-ordered best-fit.
+ bool from_released = false;
+ Span *best = NULL;
+
+ // Search through normal list
+ for (Span* span = large_.normal.next;
+ span != &large_.normal;
+ span = span->next) {
+ if (span->length >= n) {
+ if ((best == NULL)
+ || (span->length < best->length)
+ || ((span->length == best->length) && (span->start < best->start))) {
best = span;
+ from_released = false;
}
}
- if (best != NULL) {
- Carve(best, n);
- ASSERT(Check());
- free_pages_ -= n;
- return best;
- }
- if (i == 0) {
- // Nothing suitable in large list. Grow the heap and look again.
- if (!GrowHeap(n)) {
- ASSERT(Check());
- return NULL;
+ }
+
+ // Search through released list in case it has a better fit
+ for (Span* span = large_.returned.next;
+ span != &large_.returned;
+ span = span->next) {
+ if (span->length >= n) {
+ if ((best == NULL)
+ || (span->length < best->length)
+ || ((span->length == best->length) && (span->start < best->start))) {
+ best = span;
+ from_released = true;
}
}
}
+
+ if (best != NULL) {
+ Carve(best, n, from_released);
+ ASSERT(Check());
+ free_pages_ -= n;
+ return best;
+ }
return NULL;
}
@@ -834,7 +989,7 @@ Span* TCMalloc_PageHeap::Split(Span* span, Length n) {
return leftover;
}
-void TCMalloc_PageHeap::Carve(Span* span, Length n) {
+void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) {
ASSERT(n > 0);
DLL_Remove(span);
span->free = 0;
@@ -847,11 +1002,12 @@ void TCMalloc_PageHeap::Carve(Span* span, Length n) {
leftover->free = 1;
Event(leftover, 'S', extra);
RecordSpan(leftover);
- if (extra < kMaxPages) {
- DLL_Prepend(&free_[extra], leftover);
- } else {
- DLL_InsertOrdered(&large_, leftover);
- }
+
+ // Place leftover span on appropriate free list
+ SpanList* listpair = (extra < kMaxPages) ? &free_[extra] : &large_;
+ Span* dst = released ? &listpair->returned : &listpair->normal;
+ DLL_Prepend(dst, leftover);
+
span->length = n;
pagemap_.set(span->start + n - 1, span);
}
@@ -870,6 +1026,10 @@ void TCMalloc_PageHeap::Delete(Span* span) {
// necessary. We do not bother resetting the stale pagemap
// entries for the pieces we are merging together because we only
// care about the pagemap entries for the boundaries.
+ //
+ // Note that the spans we merge into "span" may come out of
+ // a "returned" list. For simplicity, we move these into the
+ // "normal" list of the appropriate size class.
const PageID p = span->start;
const Length n = span->length;
Span* prev = GetDescriptor(p-1);
@@ -899,15 +1059,71 @@ void TCMalloc_PageHeap::Delete(Span* span) {
Event(span, 'D', span->length);
span->free = 1;
if (span->length < kMaxPages) {
- DLL_Prepend(&free_[span->length], span);
+ DLL_Prepend(&free_[span->length].normal, span);
} else {
- DLL_InsertOrdered(&large_, span);
+ DLL_Prepend(&large_.normal, span);
}
free_pages_ += n;
+ IncrementalScavenge(n);
ASSERT(Check());
}
+void TCMalloc_PageHeap::IncrementalScavenge(Length n) {
+ // Fast path; not yet time to release memory
+ scavenge_counter_ -= n;
+ if (scavenge_counter_ >= 0) return; // Not yet time to scavenge
+
+ // Never delay scavenging for more than the following number of
+ // deallocated pages. With 4K pages, this comes to 4GB of
+ // deallocation.
+ static const int kMaxReleaseDelay = 1 << 20;
+
+ // If there is nothing to release, wait for so many pages before
+ // scavenging again. With 4K pages, this comes to 1GB of memory.
+ static const int kDefaultReleaseDelay = 1 << 18;
+
+ const double rate = FLAGS_tcmalloc_release_rate;
+ if (rate <= 1e-6) {
+ // Tiny release rate means that releasing is disabled.
+ scavenge_counter_ = kDefaultReleaseDelay;
+ return;
+ }
+
+ // Find index of free list to scavenge
+ int index = scavenge_index_ + 1;
+ for (int i = 0; i < kMaxPages+1; i++) {
+ if (index > kMaxPages) index = 0;
+ SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index];
+ if (!DLL_IsEmpty(&slist->normal)) {
+ // Release the last span on the normal portion of this list
+ Span* s = slist->normal.prev;
+ DLL_Remove(s);
+ TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
+ static_cast<size_t>(s->length << kPageShift));
+ DLL_Prepend(&slist->returned, s);
+
+ // Compute how long to wait until we return memory.
+ // FLAGS_tcmalloc_release_rate==1 means wait for 1000 pages
+ // after releasing one page.
+ const double mult = 1000.0 / rate;
+ double wait = mult * static_cast<double>(s->length);
+ if (wait > kMaxReleaseDelay) {
+ // Avoid overflow and bound to reasonable range
+ wait = kMaxReleaseDelay;
+ }
+ scavenge_counter_ = static_cast<int64_t>(wait);
+
+ scavenge_index_ = index; // Scavenge at index+1 next time
+ return;
+ }
+ index++;
+ }
+
+ // Nothing to scavenge, delay for a while
+ scavenge_counter_ = kDefaultReleaseDelay;
+}
+
void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) {
// Associate span object with all interior pages as well
ASSERT(!span->free);
@@ -920,40 +1136,69 @@ void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) {
}
}
+static double PagesToMB(uint64_t pages) {
+ return (pages << kPageShift) / 1048576.0;
+}
+
void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) {
int nonempty_sizes = 0;
for (int s = 0; s < kMaxPages; s++) {
- if (!DLL_IsEmpty(&free_[s])) nonempty_sizes++;
+ if (!DLL_IsEmpty(&free_[s].normal) || !DLL_IsEmpty(&free_[s].returned)) {
+ nonempty_sizes++;
+ }
}
out->printf("------------------------------------------------\n");
- out->printf("PageHeap: %d sizes; %6.1f MB free\n", nonempty_sizes,
- (static_cast<double>(free_pages_) * kPageSize) / 1048576.0);
+ out->printf("PageHeap: %d sizes; %6.1f MB free\n",
+ nonempty_sizes, PagesToMB(free_pages_));
out->printf("------------------------------------------------\n");
- uint64_t cumulative = 0;
+ uint64_t total_normal = 0;
+ uint64_t total_returned = 0;
for (int s = 0; s < kMaxPages; s++) {
- if (!DLL_IsEmpty(&free_[s])) {
- const int list_length = DLL_Length(&free_[s]);
- uint64_t s_pages = s * list_length;
- cumulative += s_pages;
- out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum\n",
- s, list_length,
- (s_pages << kPageShift) / 1048576.0,
- (cumulative << kPageShift) / 1048576.0);
+ const int n_length = DLL_Length(&free_[s].normal);
+ const int r_length = DLL_Length(&free_[s].returned);
+ if (n_length + r_length > 0) {
+ uint64_t n_pages = s * n_length;
+ uint64_t r_pages = s * r_length;
+ total_normal += n_pages;
+ total_returned += r_pages;
+ out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum"
+ "; unmapped: %6.1f MB; %6.1f MB cum\n",
+ s,
+ (n_length + r_length),
+ PagesToMB(n_pages + r_pages),
+ PagesToMB(total_normal + total_returned),
+ PagesToMB(r_pages),
+ PagesToMB(total_returned));
}
}
- uint64_t large_pages = 0;
- int large_spans = 0;
- for (Span* s = large_.next; s != &large_; s = s->next) {
- out->printf(" [ %6" PRIuS " pages ]\n", s->length);
- large_pages += s->length;
- large_spans++;
- }
- cumulative += large_pages;
- out->printf(">255 large * %6u spans ~ %6.1f MB; %6.1f MB cum\n",
- large_spans,
- (large_pages << kPageShift) / 1048576.0,
- (cumulative << kPageShift) / 1048576.0);
+ uint64_t n_pages = 0;
+ uint64_t r_pages = 0;
+ int n_spans = 0;
+ int r_spans = 0;
+ out->printf("Normal large spans:\n");
+ for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) {
+ out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n",
+ s->length, PagesToMB(s->length));
+ n_pages += s->length;
+ n_spans++;
+ }
+ out->printf("Unmapped large spans:\n");
+ for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) {
+ out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n",
+ s->length, PagesToMB(s->length));
+ r_pages += s->length;
+ r_spans++;
+ }
+ total_normal += n_pages;
+ total_returned += r_pages;
+ out->printf(">255 large * %6u spans ~ %6.1f MB; %6.1f MB cum"
+ "; unmapped: %6.1f MB; %6.1f MB cum\n",
+ (n_spans + r_spans),
+ PagesToMB(n_pages + r_pages),
+ PagesToMB(total_normal + total_returned),
+ PagesToMB(r_pages),
+ PagesToMB(total_returned));
}
static void RecordGrowth(size_t growth) {
@@ -1013,10 +1258,13 @@ bool TCMalloc_PageHeap::GrowHeap(Length n) {
}
bool TCMalloc_PageHeap::Check() {
- ASSERT(free_[0].next == &free_[0]);
- CheckList(&large_, kMaxPages, 1000000000);
+ ASSERT(free_[0].normal.next == &free_[0].normal);
+ ASSERT(free_[0].returned.next == &free_[0].returned);
+ CheckList(&large_.normal, kMaxPages, 1000000000);
+ CheckList(&large_.returned, kMaxPages, 1000000000);
for (Length s = 1; s < kMaxPages; s++) {
- CheckList(&free_[s], s, s);
+ CheckList(&free_[s].normal, s, s);
+ CheckList(&free_[s].returned, s, s);
}
return true;
}
@@ -1032,6 +1280,26 @@ bool TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages
return true;
}
+static void ReleaseFreeList(Span* list, Span* returned) {
+ // Walk backwards through list so that when we push these
+ // spans on the "returned" list, we preserve the order.
+ while (!DLL_IsEmpty(list)) {
+ Span* s = list->prev;
+ DLL_Remove(s);
+ DLL_Prepend(returned, s);
+ TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
+ static_cast<size_t>(s->length << kPageShift));
+ }
+}
+
+void TCMalloc_PageHeap::ReleaseFreePages() {
+ for (Length s = 0; s < kMaxPages; s++) {
+ ReleaseFreeList(&free_[s].normal, &free_[s].returned);
+ }
+ ReleaseFreeList(&large_.normal, &large_.returned);
+ ASSERT(Check());
+}
+
//-------------------------------------------------------------------
// Free list
//-------------------------------------------------------------------
@@ -1105,6 +1373,11 @@ class TCMalloc_ThreadCache {
uint32_t rnd_; // Cheap random number generator
size_t bytes_until_sample_; // Bytes until we sample next
+ // Allocate a new heap. REQUIRES: pageheap_lock is held.
+ static inline TCMalloc_ThreadCache* NewHeap(pthread_t tid);
+
+ // Use only as pthread thread-specific destructor function.
+ static void DestroyThreadCache(void* ptr);
public:
// All ThreadCache objects are kept in a linked list (for stats collection)
TCMalloc_ThreadCache* next_;
@@ -1132,14 +1405,16 @@ class TCMalloc_ThreadCache {
bool SampleAllocation(size_t k);
// Pick next sampling point
- void PickNextSample();
+ void PickNextSample(size_t k);
static void InitModule();
static void InitTSD();
+ static TCMalloc_ThreadCache* GetThreadHeap();
static TCMalloc_ThreadCache* GetCache();
static TCMalloc_ThreadCache* GetCacheIfPresent();
- static void* CreateCacheIfNecessary();
- static void DeleteCache(void* ptr);
+ static TCMalloc_ThreadCache* CreateCacheIfNecessary();
+ static void DeleteCache(TCMalloc_ThreadCache* heap);
+ static void BecomeIdle();
static void RecomputeThreadCacheSize();
};
@@ -1260,7 +1535,7 @@ class TCMalloc_Central_FreeListPadded : public TCMalloc_Central_FreeList {
static TCMalloc_Central_FreeListPadded central_cache[kNumClasses];
// Page-level allocator
-static SpinLock pageheap_lock = SPINLOCK_INITIALIZER;
+static SpinLock pageheap_lock(SpinLock::LINKER_INITIALIZED);
static char pageheap_memory[sizeof(TCMalloc_PageHeap)];
static bool phinited = false;
@@ -1268,6 +1543,16 @@ static bool phinited = false;
// of pageheap_memory.
#define pageheap ((TCMalloc_PageHeap*) pageheap_memory)
+// If TLS is available, we also store a copy
+// of the per-thread object in a __thread variable
+// since __thread variables are faster to read
+// than pthread_getspecific(). We still need
+// pthread_setspecific() because __thread
+// variables provide no way to run cleanup
+// code when a thread is destroyed.
+#ifdef HAVE_TLS
+static __thread TCMalloc_ThreadCache *threadlocal_heap;
+#endif
// Thread-specific key. Initialization here is somewhat tricky
// because some Linux startup code invokes malloc() before it
// is in a good enough state to handle pthread_keycreate().
@@ -1297,7 +1582,6 @@ static volatile size_t per_thread_cache_size = kMaxThreadCacheSize;
//-------------------------------------------------------------------
void TCMalloc_Central_FreeList::Init(size_t cl) {
- lock_.Init();
size_class_ = cl;
DLL_Init(&empty_);
DLL_Init(&nonempty_);
@@ -1396,9 +1680,9 @@ bool TCMalloc_Central_FreeList::MakeCacheSpace() {
namespace {
class LockInverter {
private:
- TCMalloc_SpinLock *held_, *temp_;
+ SpinLock *held_, *temp_;
public:
- inline explicit LockInverter(TCMalloc_SpinLock* held, TCMalloc_SpinLock *temp)
+ inline explicit LockInverter(SpinLock* held, SpinLock *temp)
: held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); }
inline ~LockInverter() { temp_->Unlock(); held_->Lock(); }
};
@@ -1558,7 +1842,7 @@ void TCMalloc_Central_FreeList::Populate() {
inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) {
if (bytes_until_sample_ < k) {
- PickNextSample();
+ PickNextSample(k);
return true;
} else {
bytes_until_sample_ -= k;
@@ -1577,9 +1861,10 @@ void TCMalloc_ThreadCache::Init(pthread_t tid) {
}
// Initialize RNG -- run it for a bit to get to good values
+ bytes_until_sample_ = 0;
rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this));
for (int i = 0; i < 100; i++) {
- PickNextSample();
+ PickNextSample(FLAGS_tcmalloc_sample_parameter * 2);
}
}
@@ -1670,27 +1955,7 @@ void TCMalloc_ThreadCache::Scavenge() {
//MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0);
}
-inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() {
- void* ptr = NULL;
- if (!tsd_inited) {
- InitModule();
- } else {
- ptr = perftools_pthread_getspecific(heap_key);
- }
- if (ptr == NULL) ptr = CreateCacheIfNecessary();
- return reinterpret_cast<TCMalloc_ThreadCache*>(ptr);
-}
-
-// In deletion paths, we do not try to create a thread-cache. This is
-// because we may be in the thread destruction code and may have
-// already cleaned up the cache for this thread.
-inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() {
- if (!tsd_inited) return NULL;
- return reinterpret_cast<TCMalloc_ThreadCache*>
- (perftools_pthread_getspecific(heap_key));
-}
-
-void TCMalloc_ThreadCache::PickNextSample() {
+void TCMalloc_ThreadCache::PickNextSample(size_t k) {
// Make next "random" number
// x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers
static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0);
@@ -1713,7 +1978,27 @@ void TCMalloc_ThreadCache::PickNextSample() {
sample_period = primes_list[i];
last_flag_value = flag_value;
}
- bytes_until_sample_ = rnd_ % sample_period;
+
+ bytes_until_sample_ += rnd_ % sample_period;
+
+ if (k > (static_cast<size_t>(-1) >> 2)) {
+ // If the user has asked for a huge allocation then it is possible
+ // for the code below to loop infinitely. Just return (note that
+ // this throws off the sampling accuracy somewhat, but a user who
+ // is allocating more than 1G of memory at a time can live with a
+ // minor inaccuracy in profiling of small allocations, and also
+ // would rather not wait for the loop below to terminate).
+ return;
+ }
+
+ while (bytes_until_sample_ < k) {
+ // Increase bytes_until_sample_ by enough average sampling periods
+ // (sample_period >> 1) to allow us to sample past the current
+ // allocation.
+ bytes_until_sample_ += (sample_period >> 1);
+ }
+
+ bytes_until_sample_ -= k;
}
void TCMalloc_ThreadCache::InitModule() {
@@ -1740,9 +2025,52 @@ void TCMalloc_ThreadCache::InitModule() {
}
}
+inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::NewHeap(pthread_t tid) {
+ // Create the heap and add it to the linked list
+ TCMalloc_ThreadCache *heap = threadheap_allocator.New();
+ heap->Init(tid);
+ heap->next_ = thread_heaps;
+ heap->prev_ = NULL;
+ if (thread_heaps != NULL) thread_heaps->prev_ = heap;
+ thread_heaps = heap;
+ thread_heap_count++;
+ RecomputeThreadCacheSize();
+ return heap;
+}
+
+inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() {
+#ifdef HAVE_TLS
+ // __thread is faster, but only when the kernel supports it
+ if (KernelSupportsTLS())
+ return threadlocal_heap;
+#endif
+ return
+ reinterpret_cast<TCMalloc_ThreadCache *>(perftools_pthread_getspecific(heap_key));
+}
+
+inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() {
+ TCMalloc_ThreadCache* ptr = NULL;
+ if (!tsd_inited) {
+ InitModule();
+ } else {
+ ptr = GetThreadHeap();
+ }
+ if (ptr == NULL) ptr = CreateCacheIfNecessary();
+ return ptr;
+}
+
+// In deletion paths, we do not try to create a thread-cache. This is
+// because we may be in the thread destruction code and may have
+// already cleaned up the cache for this thread.
+inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() {
+ if (!tsd_inited) return NULL;
+ void* const p = GetThreadHeap();
+ return reinterpret_cast<TCMalloc_ThreadCache*>(p);
+}
+
void TCMalloc_ThreadCache::InitTSD() {
ASSERT(!tsd_inited);
- perftools_pthread_key_create(&heap_key, DeleteCache);
+ perftools_pthread_key_create(&heap_key, DestroyThreadCache);
tsd_inited = true;
// We may have used a fake pthread_t for the main thread. Fix it.
@@ -1756,7 +2084,7 @@ void TCMalloc_ThreadCache::InitTSD() {
}
}
-void* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
+TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
// Initialize per-thread data if necessary
TCMalloc_ThreadCache* heap = NULL;
{
@@ -1780,17 +2108,7 @@ void* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
}
}
- if (heap == NULL) {
- // Create the heap and add it to the linked list
- heap = threadheap_allocator.New();
- heap->Init(me);
- heap->next_ = thread_heaps;
- heap->prev_ = NULL;
- if (thread_heaps != NULL) thread_heaps->prev_ = heap;
- thread_heaps = heap;
- thread_heap_count++;
- RecomputeThreadCacheSize();
- }
+ if (heap == NULL) heap = NewHeap(me);
}
// We call pthread_setspecific() outside the lock because it may
@@ -1800,15 +2118,52 @@ void* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
if (!heap->in_setspecific_ && tsd_inited) {
heap->in_setspecific_ = true;
perftools_pthread_setspecific(heap_key, heap);
+#ifdef HAVE_TLS
+ // Also keep a copy in __thread for faster retrieval
+ threadlocal_heap = heap;
+#endif
heap->in_setspecific_ = false;
}
return heap;
}
-void TCMalloc_ThreadCache::DeleteCache(void* ptr) {
+void TCMalloc_ThreadCache::BecomeIdle() {
+ if (!tsd_inited) return; // No caches yet
+ TCMalloc_ThreadCache* heap = GetThreadHeap();
+ if (heap == NULL) return; // No thread cache to remove
+ if (heap->in_setspecific_) return; // Do not disturb the active caller
+
+ heap->in_setspecific_ = true;
+ perftools_pthread_setspecific(heap_key, NULL);
+#ifdef HAVE_TLS
+ // Also update the copy in __thread
+ threadlocal_heap = NULL;
+#endif
+ heap->in_setspecific_ = false;
+ if (GetThreadHeap() == heap) {
+ // Somehow heap got reinstated by a recursive call to malloc
+ // from pthread_setspecific. We give up in this case.
+ return;
+ }
+
+ // We can now get rid of the heap
+ DeleteCache(heap);
+}
+
+void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr) {
+ // Note that "ptr" cannot be NULL since pthread promises not
+ // to invoke the destructor on NULL values, but for safety,
+ // we check anyway.
+ if (ptr == NULL) return;
+#ifdef HAVE_TLS
+ // Prevent fast path of GetThreadHeap() from returning heap.
+ threadlocal_heap = NULL;
+#endif
+ DeleteCache(reinterpret_cast<TCMalloc_ThreadCache*>(ptr));
+}
+
+void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache* heap) {
// Remove all memory from heap
- TCMalloc_ThreadCache* heap;
- heap = reinterpret_cast<TCMalloc_ThreadCache*>(ptr);
heap->Cleanup();
// Remove from linked list
@@ -1832,6 +2187,7 @@ void TCMalloc_ThreadCache::RecomputeThreadCacheSize() {
if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
per_thread_cache_size = space;
+ //MESSAGE("Threads %d => cache size %8d\n", n, int(space));
}
void TCMalloc_ThreadCache::Print() const {
@@ -1902,7 +2258,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
uint64_t class_bytes = class_count[cl] * ByteSizeForClass(cl);
cumulative += class_bytes;
out->printf("class %3d [ %8" PRIuS " bytes ] : "
- "%8" LLU " objs; %5.1f MB; %5.1f cum MB\n",
+ "%8" PRIu64 " objs; %5.1f MB; %5.1f cum MB\n",
cl, ByteSizeForClass(cl),
class_count[cl],
class_bytes / 1048576.0,
@@ -1921,15 +2277,15 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
- stats.thread_bytes;
out->printf("------------------------------------------------\n"
- "MALLOC: %12" LLU " Heap size\n"
- "MALLOC: %12" LLU " Bytes in use by application\n"
- "MALLOC: %12" LLU " Bytes free in page heap\n"
- "MALLOC: %12" LLU " Bytes free in central cache\n"
- "MALLOC: %12" LLU " Bytes free in transfer cache\n"
- "MALLOC: %12" LLU " Bytes free in thread caches\n"
- "MALLOC: %12" LLU " Spans in use\n"
- "MALLOC: %12" LLU " Thread heaps in use\n"
- "MALLOC: %12" LLU " Metadata allocated\n"
+ "MALLOC: %12" PRIu64 " Heap size\n"
+ "MALLOC: %12" PRIu64 " Bytes in use by application\n"
+ "MALLOC: %12" PRIu64 " Bytes free in page heap\n"
+ "MALLOC: %12" PRIu64 " Bytes free in central cache\n"
+ "MALLOC: %12" PRIu64 " Bytes free in transfer cache\n"
+ "MALLOC: %12" PRIu64 " Bytes free in thread caches\n"
+ "MALLOC: %12" PRIu64 " Spans in use\n"
+ "MALLOC: %12" PRIu64 " Thread heaps in use\n"
+ "MALLOC: %12" PRIu64 " Metadata allocated\n"
"------------------------------------------------\n",
stats.system_bytes,
bytes_in_use,
@@ -2120,31 +2476,80 @@ class TCMallocImplementation : public MallocExtension {
return false;
}
+
+ virtual void MarkThreadIdle() {
+ TCMalloc_ThreadCache::BecomeIdle();
+ }
+
+ virtual void ReleaseFreeMemory() {
+ SpinLockHolder h(&pageheap_lock);
+ pageheap->ReleaseFreePages();
+ }
+};
+
+// The constructor allocates an object to ensure that initialization
+// runs before main(), and therefore we do not have a chance to become
+// multi-threaded before initialization. We also create the TSD key
+// here. Presumably by the time this constructor runs, glibc is in
+// good enough shape to handle pthread_key_create().
+//
+// The constructor also takes the opportunity to tell STL to use
+// tcmalloc. We want to do this early, before construct time, so
+// all user STL allocations go through tcmalloc (which works really
+// well for STL).
+//
+// The destructor prints stats when the program exits.
+class TCMallocGuard {
+ public:
+
+ TCMallocGuard() {
+#ifdef HAVE_TLS // this is true if the cc/ld/libc combo support TLS
+ // Check whether the kernel also supports TLS (needs to happen at runtime)
+ CheckIfKernelSupportsTLS();
+#endif
+ free(malloc(1));
+ TCMalloc_ThreadCache::InitTSD();
+ free(malloc(1));
+ MallocExtension::Register(new TCMallocImplementation);
+ }
+
+ ~TCMallocGuard() {
+ const char* env = getenv("MALLOCSTATS");
+ if (env != NULL) {
+ int level = atoi(env);
+ if (level < 1) level = 1;
+ PrintStats(level);
+ }
+ }
};
+static TCMallocGuard module_enter_exit_hook;
//-------------------------------------------------------------------
// Helpers for the exported routines below
//-------------------------------------------------------------------
static Span* DoSampledAllocation(size_t size) {
- SpinLockHolder h(&pageheap_lock);
+ // Grab the stack trace outside the heap lock
+ StackTrace tmp;
+ tmp.depth = GetStackTrace(tmp.stack, kMaxStackDepth, 1);
+ tmp.size = size;
+
+ SpinLockHolder h(&pageheap_lock);
// Allocate span
- Span* span = pageheap->New(pages(size == 0 ? 1 : size));
+ Span *span = pageheap->New(pages(size == 0 ? 1 : size));
if (span == NULL) {
return NULL;
}
-
+
// Allocate stack trace
- StackTrace* stack = stacktrace_allocator.New();
+ StackTrace *stack = stacktrace_allocator.New();
if (stack == NULL) {
// Sampling failed because of lack of memory
return span;
}
- // Fill stack trace and record properly
- stack->depth = GetStackTrace(stack->stack, kMaxStackDepth, 1);
- stack->size = size;
+ *stack = tmp;
span->sample = 1;
span->objects = stack;
DLL_Prepend(&sampled_objects, span);
@@ -2155,9 +2560,6 @@ static Span* DoSampledAllocation(size_t size) {
static inline void* do_malloc(size_t size) {
void* ret = NULL;
- if (TCMallocDebug::level >= TCMallocDebug::kVerbose) {
- MESSAGE("In tcmalloc do_malloc(%" PRIuS")\n", size);
- }
// The following call forces module initialization
TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
@@ -2180,8 +2582,6 @@ static inline void* do_malloc(size_t size) {
}
static inline void do_free(void* ptr) {
- if (TCMallocDebug::level >= TCMallocDebug::kVerbose)
- MESSAGE("In tcmalloc do_free(%p)\n", ptr);
if (ptr == NULL) return;
ASSERT(pageheap != NULL); // Should not call free() before malloc()
const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
@@ -2286,47 +2686,41 @@ static void* do_memalign(size_t align, size_t size) {
return reinterpret_cast<void*>(span->start << kPageShift);
}
+// Helpers for use by exported routines below:
+static inline void do_malloc_stats() {
+ PrintStats(1);
+}
-// The constructor allocates an object to ensure that initialization
-// runs before main(), and therefore we do not have a chance to become
-// multi-threaded before initialization. We also create the TSD key
-// here. Presumably by the time this constructor runs, glibc is in
-// good enough shape to handle pthread_key_create().
-//
-// The constructor also takes the opportunity to tell STL to use
-// tcmalloc. We want to do this early, before construct time, so
-// all user STL allocations go through tcmalloc (which works really
-// well for STL).
-//
-// The destructor prints stats when the program exits.
+static inline int do_mallopt(int cmd, int value) {
+ return 1; // Indicates error
+}
-class TCMallocGuard {
- public:
- TCMallocGuard() {
- char *envval;
- if ((envval = getenv("TCMALLOC_DEBUG"))) {
- TCMallocDebug::level = atoi(envval);
- MESSAGE("Set tcmalloc debugging level to %d\n", TCMallocDebug::level);
- }
- do_free(do_malloc(1));
- TCMalloc_ThreadCache::InitTSD();
- do_free(do_malloc(1));
- MallocExtension::Register(new TCMallocImplementation);
- }
+#ifdef HAVE_STRUCT_MALLINFO // mallinfo isn't defined on freebsd, for instance
+static inline struct mallinfo do_mallinfo() {
+ TCMallocStats stats;
+ ExtractStats(&stats, NULL);
- ~TCMallocGuard() {
- const char* env = getenv("MALLOCSTATS");
- if (env != NULL) {
- int level = atoi(env);
- if (level < 1) level = 1;
- PrintStats(level);
- }
- }
-};
+ // Just some of the fields are filled in.
+ struct mallinfo info;
+ memset(&info, 0, sizeof(info));
-static TCMallocGuard module_enter_exit_hook;
+ // Unfortunately, the struct contains "int" field, so some of the
+ // size values will be truncated.
+ info.arena = static_cast<int>(stats.system_bytes);
+ info.fsmblks = static_cast<int>(stats.thread_bytes
+ + stats.central_bytes
+ + stats.transfer_bytes);
+ info.fordblks = static_cast<int>(stats.pageheap_bytes);
+ info.uordblks = static_cast<int>(stats.system_bytes
+ - stats.thread_bytes
+ - stats.central_bytes
+ - stats.transfer_bytes
+ - stats.pageheap_bytes);
+ return info;
+}
+#endif
//-------------------------------------------------------------------
// Exported routines
@@ -2337,18 +2731,67 @@ static TCMallocGuard module_enter_exit_hook;
// heap-checker.cc depends on this to start a stack trace from
// the call to the (de)allocation function.
-extern "C" void* malloc(size_t size) {
+// Put all callers of MallocHook::Invoke* in this module into
+// ATTRIBUTE_SECTION(google_malloc_allocators) section,
+// so that MallocHook::GetCallerStackTrace can function accurately:
+
+// NOTE: __THROW expands to 'throw()', which means 'never throws.' Urgh.
+extern "C" {
+ void* malloc(size_t size)
+ __THROW ATTRIBUTE_SECTION(google_malloc_allocators);
+ void free(void* ptr)
+ __THROW ATTRIBUTE_SECTION(google_malloc_allocators);
+ void* realloc(void* ptr, size_t size)
+ __THROW ATTRIBUTE_SECTION(google_malloc_allocators);
+ void* calloc(size_t nmemb, size_t size)
+ __THROW ATTRIBUTE_SECTION(google_malloc_allocators);
+ void cfree(void* ptr)
+ __THROW ATTRIBUTE_SECTION(google_malloc_allocators);
+
+ void* memalign(size_t __alignment, size_t __size)
+ __THROW ATTRIBUTE_SECTION(google_malloc_allocators);
+ int posix_memalign(void** ptr, size_t align, size_t size)
+ __THROW ATTRIBUTE_SECTION(google_malloc_allocators);
+ void* valloc(size_t __size)
+ __THROW ATTRIBUTE_SECTION(google_malloc_allocators);
+ void* pvalloc(size_t __size)
+ __THROW ATTRIBUTE_SECTION(google_malloc_allocators);
+}
+
+static void *MemalignOverride(size_t align, size_t size, const void *caller)
+ __THROW ATTRIBUTE_SECTION(google_malloc_allocators);
+
+void* operator new(size_t size)
+ ATTRIBUTE_SECTION(google_malloc_allocators);
+void operator delete(void* p)
+ __THROW ATTRIBUTE_SECTION(google_malloc_allocators);
+void* operator new[](size_t size)
+ ATTRIBUTE_SECTION(google_malloc_allocators);
+void operator delete[](void* p)
+ __THROW ATTRIBUTE_SECTION(google_malloc_allocators);
+
+// And the nothrow variants of these:
+void* operator new(size_t size, const std::nothrow_t&)
+ __THROW ATTRIBUTE_SECTION(google_malloc_allocators);
+void operator delete(void* p, const std::nothrow_t&)
+ __THROW ATTRIBUTE_SECTION(google_malloc_allocators);
+void* operator new[](size_t size, const std::nothrow_t&)
+ __THROW ATTRIBUTE_SECTION(google_malloc_allocators);
+void operator delete[](void* p, const std::nothrow_t&)
+ __THROW ATTRIBUTE_SECTION(google_malloc_allocators);
+
+extern "C" void* malloc(size_t size) __THROW {
void* result = do_malloc(size);
MallocHook::InvokeNewHook(result, size);
return result;
}
-extern "C" void free(void* ptr) {
+extern "C" void free(void* ptr) __THROW {
MallocHook::InvokeDeleteHook(ptr);
do_free(ptr);
}
-extern "C" void* calloc(size_t n, size_t elem_size) {
+extern "C" void* calloc(size_t n, size_t elem_size) __THROW {
// Overflow check
const size_t size = n * elem_size;
if (elem_size != 0 && size / elem_size != n) return NULL;
@@ -2361,12 +2804,12 @@ extern "C" void* calloc(size_t n, size_t elem_size) {
return result;
}
-extern "C" void cfree(void* ptr) {
+extern "C" void cfree(void* ptr) __THROW {
MallocHook::InvokeDeleteHook(ptr);
do_free(ptr);
}
-extern "C" void* realloc(void* old_ptr, size_t new_size) {
+extern "C" void* realloc(void* old_ptr, size_t new_size) __THROW {
if (old_ptr == NULL) {
void* result = do_malloc(new_size);
MallocHook::InvokeNewHook(result, new_size);
@@ -2406,21 +2849,12 @@ extern "C" void* realloc(void* old_ptr, size_t new_size) {
}
}
-#ifndef COMPILER_INTEL
-#define OP_THROWNOTHING
-#define OP_THROWBADALLOC
-#else
-#define OP_THROWNOTHING throw()
-#define OP_THROWBADALLOC throw(std::bad_alloc)
-#endif
-
-static SpinLock set_new_handler_lock = SPINLOCK_INITIALIZER;
+static SpinLock set_new_handler_lock(SpinLock::LINKER_INITIALIZED);
static inline void* cpp_alloc(size_t size, bool nothrow) {
for (;;) {
void* p = do_malloc(size);
#ifdef PREANSINEW
- MallocHook::InvokeNewHook(p, size);
return p;
#else
if (p == NULL) { // allocation failed
@@ -2446,60 +2880,77 @@ static inline void* cpp_alloc(size_t size, bool nothrow) {
(*nh)();
} catch (const std::bad_alloc&) {
if (!nothrow) throw;
- MallocHook::InvokeNewHook(p, size);
return p;
}
} else { // allocation success
- MallocHook::InvokeNewHook(p, size);
return p;
}
#endif
}
}
-void* operator new(size_t size) OP_THROWBADALLOC {
- return cpp_alloc(size, false);
+void* operator new(size_t size) {
+ void* p = cpp_alloc(size, false);
+ // We keep this next instruction out of cpp_alloc for a reason: when
+ // it's in, and new just calls cpp_alloc, the optimizer may fold the
+ // new call into cpp_alloc, which messes up our whole section-based
+ // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc
+ // isn't the last thing this fn calls, and prevents the folding.
+ MallocHook::InvokeNewHook(p, size);
+ return p;
}
-void* operator new(size_t size, const std::nothrow_t&) OP_THROWNOTHING {
- return cpp_alloc(size, true);
+void* operator new(size_t size, const std::nothrow_t&) __THROW {
+ void* p = cpp_alloc(size, true);
+ MallocHook::InvokeNewHook(p, size);
+ return p;
}
-void operator delete(void* p) OP_THROWNOTHING {
+void operator delete(void* p) __THROW {
MallocHook::InvokeDeleteHook(p);
do_free(p);
}
-void operator delete(void* p, const std::nothrow_t&) OP_THROWNOTHING {
+void operator delete(void* p, const std::nothrow_t&) __THROW {
MallocHook::InvokeDeleteHook(p);
do_free(p);
}
-void* operator new[](size_t size) OP_THROWBADALLOC {
- return cpp_alloc(size, false);
+void* operator new[](size_t size) {
+ void* p = cpp_alloc(size, false);
+ // We keep this next instruction out of cpp_alloc for a reason: when
+ // it's in, and new just calls cpp_alloc, the optimizer may fold the
+ // new call into cpp_alloc, which messes up our whole section-based
+ // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc
+ // isn't the last thing this fn calls, and prevents the folding.
+ MallocHook::InvokeNewHook(p, size);
+ return p;
}
-void* operator new[](size_t size, const std::nothrow_t&) OP_THROWNOTHING {
- return cpp_alloc(size, true);
+void* operator new[](size_t size, const std::nothrow_t&) __THROW {
+ void* p = cpp_alloc(size, true);
+ MallocHook::InvokeNewHook(p, size);
+ return p;
}
-void operator delete[](void* p) OP_THROWNOTHING {
+void operator delete[](void* p) __THROW {
MallocHook::InvokeDeleteHook(p);
do_free(p);
}
-void operator delete[](void* p, const std::nothrow_t&) OP_THROWNOTHING {
+void operator delete[](void* p, const std::nothrow_t&) __THROW {
MallocHook::InvokeDeleteHook(p);
do_free(p);
}
-extern "C" void* memalign(size_t align, size_t size) {
+extern "C" void* memalign(size_t align, size_t size) __THROW {
void* result = do_memalign(align, size);
MallocHook::InvokeNewHook(result, size);
return result;
}
-extern "C" int posix_memalign(void** result_ptr, size_t align, size_t size) {
+extern "C" int posix_memalign(void** result_ptr, size_t align, size_t size)
+ __THROW {
if (((align % sizeof(void*)) != 0) ||
((align & (align - 1)) != 0) ||
(align == 0)) {
@@ -2518,7 +2969,7 @@ extern "C" int posix_memalign(void** result_ptr, size_t align, size_t size) {
static size_t pagesize = 0;
-extern "C" void* valloc(size_t size) {
+extern "C" void* valloc(size_t size) __THROW {
// Allocate page-aligned object of length >= size bytes
if (pagesize == 0) pagesize = getpagesize();
void* result = do_memalign(pagesize, size);
@@ -2526,7 +2977,7 @@ extern "C" void* valloc(size_t size) {
return result;
}
-extern "C" void* pvalloc(size_t size) {
+extern "C" void* pvalloc(size_t size) __THROW {
// Round up size to a multiple of pagesize
if (pagesize == 0) pagesize = getpagesize();
size = (size + pagesize - 1) & ~(pagesize - 1);
@@ -2536,36 +2987,18 @@ extern "C" void* pvalloc(size_t size) {
}
extern "C" void malloc_stats(void) {
- PrintStats(1);
+ do_malloc_stats();
}
extern "C" int mallopt(int cmd, int value) {
- return 1; // Indicates error
+ return do_mallopt(cmd, value);
}
+#ifdef HAVE_STRUCT_MALLINFO
extern "C" struct mallinfo mallinfo(void) {
- TCMallocStats stats;
- ExtractStats(&stats, NULL);
-
- // Just some of the fields are filled in.
- struct mallinfo info;
- memset(&info, 0, sizeof(info));
-
- // Unfortunately, the struct contains "int" field, so some of the
- // size values will be truncated.
- info.arena = static_cast<int>(stats.system_bytes);
- info.fsmblks = static_cast<int>(stats.thread_bytes
- + stats.central_bytes
- + stats.transfer_bytes);
- info.fordblks = static_cast<int>(stats.pageheap_bytes);
- info.uordblks = static_cast<int>(stats.system_bytes
- - stats.thread_bytes
- - stats.central_bytes
- - stats.transfer_bytes
- - stats.pageheap_bytes);
-
- return info;
+ return do_mallinfo();
}
+#endif
//-------------------------------------------------------------------
// Some library routines on RedHat 9 allocate memory using malloc()
@@ -2611,7 +3044,8 @@ extern "C" {
// This function is an exception to the rule of calling MallocHook method
// from the stack frame of the allocation function;
// heap-checker handles this special case explicitly.
-static void *MemalignOverride(size_t align, size_t size, const void *caller) {
+static void *MemalignOverride(size_t align, size_t size, const void *caller)
+ __THROW {
void* result = do_memalign(align, size);
MallocHook::InvokeNewHook(result, size);
return result;