summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorcsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2010-06-21 15:59:56 +0000
committercsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2010-06-21 15:59:56 +0000
commitcb7393cbe2d737009001fd9d30dad568bac7a3d8 (patch)
tree239f4ca1c465d8389bf1c767189f2bb4e7b0c609 /src
parentd8c02761689ba909f474b85618f99ac6dfc9a168 (diff)
downloadgperftools-cb7393cbe2d737009001fd9d30dad568bac7a3d8.tar.gz
* Default to not sampling in tcmalloc (csilvers)
* Add -DTCMALLOC_LARGE_PAGES: better perf for some workloads (rus) * Extend pprof --tools to allow per-tool configs (csilvers) * Have STL_Allocator pass on # bytes to free (richardfang) * Add a header guard to config.h (csilvers) * DOC: Clean up documentation around tcmalloc.slack_bytes (fikes) * DOC: Document ProfilerFlush, ProfilerStartWithOptions (csilvers) * PORTING: Work around a gcc 4.5.0 optimization bug (csilvers) * PORTING: Use -fno-builtin-malloc and friends when compiling tcmalloc * PORTING: Define _WIN32_WINNT high enough for mingw (csilvers) * PORTING: Work around libtool bug getting deps wrong in some cases * Update README.windows to emphasize $IncludeDir more (csilvers) * Rename README.windows to README_windows.txt (csilvers) git-svn-id: http://gperftools.googlecode.com/svn/trunk@95 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
Diffstat (limited to 'src')
-rw-r--r--src/base/dynamic_annotations.c17
-rw-r--r--src/base/dynamic_annotations.h13
-rw-r--r--src/base/stl_allocator.h4
-rw-r--r--src/central_freelist.cc24
-rw-r--r--src/common.h43
-rw-r--r--src/config.h.in7
-rw-r--r--src/google/malloc_extension.h25
-rw-r--r--src/heap-checker.cc3
-rw-r--r--src/internal_logging.h4
-rw-r--r--src/linked_list.h2
-rw-r--r--src/memory_region_map.h2
-rw-r--r--src/page_heap.cc97
-rw-r--r--src/page_heap.h71
-rwxr-xr-xsrc/pprof35
-rwxr-xr-xsrc/sampler.cc7
-rw-r--r--src/span.h4
-rw-r--r--src/tcmalloc.cc136
-rw-r--r--src/tests/frag_unittest.cc7
-rw-r--r--src/tests/page_heap_test.cc2
-rw-r--r--src/tests/testutil.cc2
-rw-r--r--src/thread_cache.cc3
-rw-r--r--src/thread_cache.h16
-rw-r--r--src/windows/config.h5
-rw-r--r--src/windows/mingw.h13
24 files changed, 307 insertions, 235 deletions
diff --git a/src/base/dynamic_annotations.c b/src/base/dynamic_annotations.c
index cdefaa7..bddd693 100644
--- a/src/base/dynamic_annotations.c
+++ b/src/base/dynamic_annotations.c
@@ -141,8 +141,25 @@ int RunningOnValgrind(void) {
static volatile int running_on_valgrind = -1;
/* C doesn't have thread-safe initialization of statics, and we
don't want to depend on pthread_once here, so hack it. */
+ ANNOTATE_BENIGN_RACE(&running_on_valgrind, "safe hack");
int local_running_on_valgrind = running_on_valgrind;
if (local_running_on_valgrind == -1)
running_on_valgrind = local_running_on_valgrind = GetRunningOnValgrind();
return local_running_on_valgrind;
}
+
+/* See the comments in dynamic_annotations.h */
+double ValgrindSlowdown() {
+ if (RunningOnValgrind() == 0) {
+ return 1.0;
+ }
+ /* Same initialization hack as in RunningOnValgrind(). */
+ static volatile double slowdown = 0.0;
+ ANNOTATE_BENIGN_RACE(&slowdown, "safe hack");
+ int local_slowdown = slowdown;
+ if (local_slowdown == 0.0) {
+ char *env = getenv("VALGRIND_SLOWDOWN");
+ slowdown = local_slowdown = env ? atof(env) : 50.0;
+ }
+ return local_slowdown;
+}
diff --git a/src/base/dynamic_annotations.h b/src/base/dynamic_annotations.h
index dae1a14..ceb9809 100644
--- a/src/base/dynamic_annotations.h
+++ b/src/base/dynamic_annotations.h
@@ -457,6 +457,19 @@ void AnnotateFlushState(const char *file, int line);
*/
int RunningOnValgrind(void);
+/* ValgrindSlowdown returns:
+ * 1.0, if (RunningOnValgrind() == 0)
+ * 50.0, if (RunningOnValgrind() != 0 && getenv("VALGRIND_SLOWDOWN") == NULL)
+ * atof(getenv("VALGRIND_SLOWDOWN")) otherwise
+ This function can be used to scale timeout values:
+ EXAMPLE:
+ for (;;) {
+ DoExpensiveBackgroundTask();
+ SleepForSeconds(5 * ValgrindSlowdown());
+ }
+ */
+double ValgrindSlowdown();
+
#ifdef __cplusplus
}
#endif
diff --git a/src/base/stl_allocator.h b/src/base/stl_allocator.h
index b0ddc68..7b0b8ca 100644
--- a/src/base/stl_allocator.h
+++ b/src/base/stl_allocator.h
@@ -45,7 +45,7 @@
// Generic allocator class for STL objects
// that uses a given type-less allocator Alloc, which must provide:
// static void* Alloc::Allocate(size_t size);
-// static void Alloc::Free(void* ptr);
+// static void Alloc::Free(void* ptr, size_t size);
//
// STL_Allocator<T, MyAlloc> provides the same thread-safety
// guarantees as MyAlloc.
@@ -82,7 +82,7 @@ class STL_Allocator {
RAW_DCHECK((n * sizeof(T)) / sizeof(T) == n, "n is too big to allocate");
return static_cast<T*>(Alloc::Allocate(n * sizeof(T)));
}
- void deallocate(pointer p, size_type /*n*/) { Alloc::Free(p); }
+ void deallocate(pointer p, size_type n) { Alloc::Free(p, n * sizeof(T)); }
size_type max_size() const { return size_t(-1) / sizeof(T); }
diff --git a/src/central_freelist.cc b/src/central_freelist.cc
index 5b7dfbb..da498e6 100644
--- a/src/central_freelist.cc
+++ b/src/central_freelist.cc
@@ -57,9 +57,22 @@ void CentralFreeList::ReleaseListToSpans(void* start) {
}
}
-void CentralFreeList::ReleaseToSpans(void* object) {
+// MapObjectToSpan should logically be part of ReleaseToSpans. But
+// this triggers an optimization bug in gcc 4.5.0. Moving to a
+// separate function, and making sure that function isn't inlined,
+// seems to fix the problem. It also should be fixed for gcc 4.5.1.
+static
+#if __GNUC__ == 4 && __GNUC_MINOR__ == 5 && __GNUC_PATCHLEVEL__ == 0
+__attribute__ ((noinline))
+#endif
+Span* MapObjectToSpan(void* object) {
const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
Span* span = Static::pageheap()->GetDescriptor(p);
+ return span;
+}
+
+void CentralFreeList::ReleaseToSpans(void* object) {
+ Span* span = MapObjectToSpan(object);
ASSERT(span != NULL);
ASSERT(span->refcount > 0);
@@ -266,7 +279,8 @@ void CentralFreeList::Populate() {
Span* span;
{
SpinLockHolder h(Static::pageheap_lock());
- span = Static::pageheap()->New(npages, size_class_, kPageSize);
+ span = Static::pageheap()->New(npages);
+ if (span) Static::pageheap()->RegisterSizeClass(span, size_class_);
}
if (span == NULL) {
MESSAGE("tcmalloc: allocation failed", npages << kPageShift);
@@ -274,6 +288,12 @@ void CentralFreeList::Populate() {
return;
}
ASSERT(span->length == npages);
+ // Cache sizeclass info eagerly. Locking is not necessary.
+ // (Instead of being eager, we could just replace any stale info
+ // about this span, but that seems to be no better in practice.)
+ for (int i = 0; i < npages; i++) {
+ Static::pageheap()->CacheSizeClass(span->start + i, size_class_);
+ }
// Split the block into pieces and add to the free-list
// TODO: coloring of objects to avoid cache conflicts?
diff --git a/src/common.h b/src/common.h
index b0278eb..5226998 100644
--- a/src/common.h
+++ b/src/common.h
@@ -54,16 +54,45 @@ typedef uintptr_t Length;
// Configuration
//-------------------------------------------------------------------
-// Not all possible combinations of the following parameters make
-// sense. In particular, if kMaxSize increases, you may have to
-// increase kNumClasses as well.
+// Using large pages speeds up the execution at a cost of larger memory use.
+// Deallocation may speed up by a factor as the page map gets 8x smaller, so
+// lookups in the page map result in fewer L2 cache misses, which translates to
+// speedup for application/platform combinations with high L2 cache pressure.
+// As the number of size classes increases with large pages, we increase
+// the thread cache allowance to avoid passing more free ranges to and from
+// central lists. Also, larger pages are less likely to get freed.
+// These two factors cause a bounded increase in memory use.
+
+#if defined(TCMALLOC_LARGE_PAGES)
+static const size_t kPageShift = 15;
+static const size_t kNumClasses = 95;
+static const size_t kMaxThreadCacheSize = 4 << 20;
+#else
static const size_t kPageShift = 12;
+static const size_t kNumClasses = 61;
+static const size_t kMaxThreadCacheSize = 2 << 20;
+#endif
+
static const size_t kPageSize = 1 << kPageShift;
static const size_t kMaxSize = 8u * kPageSize;
static const size_t kAlignment = 8;
-static const size_t kNumClasses = 61;
static const size_t kLargeSizeClass = 0;
+// Default bound on the total amount of thread caches.
+static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize;
+
+// Lower bound on the per-thread cache sizes
+static const size_t kMinThreadCacheSize = kMaxSize * 2;
+
+// The number of bytes one ThreadCache will steal from another when
+// the first ThreadCache is forced to Scavenge(), delaying the
+// next call to Scavenge for this thread.
+static const size_t kStealAmount = 1 << 16;
+
+// The number of times that a deallocation can cause a freelist to
+// go over its max_length() before shrinking max_length().
+static const int kMaxOverages = 3;
+
// Maximum length we allow a per-thread free-list to have before we
// move objects from it into the corresponding central free-list. We
// want this big to avoid locking the central free-list too often. It
@@ -115,8 +144,10 @@ class SizeMap {
// ...
// 32768 (32768 + 127 + (120<<7)) / 128 376
static const int kMaxSmallSize = 1024;
- unsigned char class_array_[377];
-
+ static const size_t kClassArraySize =
+ (((1 << kPageShift) * 8u + 127 + (120 << 7)) >> 7) + 1;
+ unsigned char class_array_[kClassArraySize];
+
// Compute index of the class_array[] entry for a given size
static inline int ClassIndex(int s) {
ASSERT(0 <= s);
diff --git a/src/config.h.in b/src/config.h.in
index 49bbf0d..a1d5c68 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -1,5 +1,10 @@
/* src/config.h.in. Generated from configure.ac by autoheader. */
+
+#ifndef GOOGLE_PERFTOOLS_CONFIG_H_
+#define GOOGLE_PERFTOOLS_CONFIG_H_
+
+
/* Define to 1 if compiler supports __builtin_stack_pointer */
#undef HAVE_BUILTIN_STACK_POINTER
@@ -240,3 +245,5 @@
#include "windows/mingw.h"
#endif
+#endif /* #ifndef GOOGLE_PERFTOOLS_CONFIG_H_ */
+
diff --git a/src/google/malloc_extension.h b/src/google/malloc_extension.h
index fc272c9..9c05897 100644
--- a/src/google/malloc_extension.h
+++ b/src/google/malloc_extension.h
@@ -145,21 +145,22 @@ class PERFTOOLS_DLL_DECL MallocExtension {
// Number of bytes used across all thread caches.
// This property is not writable.
//
- // "tcmalloc.slack_bytes"
- // Number of bytes allocated from system, but not currently in
- // use by malloced objects. I.e., bytes available for
- // allocation without needing more bytes from system. It is
- // the sum of pageheap_free_bytes and pageheap_unmapped_bytes.
- // This property is not writable.
- //
// "tcmalloc.pageheap_free_bytes"
- // Number of bytes in free, mapped pages in pageheap
- // This property is not writable.
+ // Number of bytes in free, mapped pages in page heap. These
+ // bytes can be used to fulfill allocation requests. They
+ // always count towards virtual memory usage, and unless the
+ // underlying memory is swapped out by the OS, they also count
+ // towards physical memory usage. This property is not writable.
//
// "tcmalloc.pageheap_unmapped_bytes"
- // Number of bytes in free, unmapped pages in pageheap
- // This property is not writable.
- //
+ // Number of bytes in free, unmapped pages in page heap.
+ // These are bytes that have been released back to the OS,
+ // possibly by one of the MallocExtension "Release" calls.
+ // They can be used to fulfill allocation requests, but
+ // typically incur a page fault. They always count towards
+ // virtual memory usage, and depending on the OS, typically
+ // do not count towards physical memory usage. This property
+ // is not writable.
// -------------------------------------------------------------------
// Get the named "property"'s value. Returns true if the property
diff --git a/src/heap-checker.cc b/src/heap-checker.cc
index 2779c97..2b0b854 100644
--- a/src/heap-checker.cc
+++ b/src/heap-checker.cc
@@ -304,6 +304,9 @@ class HeapLeakChecker::Allocator {
if (p) alloc_count_ -= 1;
LowLevelAlloc::Free(p);
}
+ static void Free(void* p, size_t /* n */) {
+ Free(p);
+ }
// destruct, free, and make *p to be NULL
template<typename T> static void DeleteAndNull(T** p) {
(*p)->~T();
diff --git a/src/internal_logging.h b/src/internal_logging.h
index 731b2d9..0cb9ba2 100644
--- a/src/internal_logging.h
+++ b/src/internal_logging.h
@@ -119,9 +119,7 @@ do { \
#ifndef NDEBUG
#define ASSERT(cond) CHECK_CONDITION(cond)
#else
-#define ASSERT(cond) \
- do { \
- } while (0 && (cond))
+#define ASSERT(cond) ((void) 0)
#endif
// Print into buffer
diff --git a/src/linked_list.h b/src/linked_list.h
index 638174b..4b0af1b 100644
--- a/src/linked_list.h
+++ b/src/linked_list.h
@@ -36,6 +36,8 @@
#ifndef TCMALLOC_LINKED_LIST_H_
#define TCMALLOC_LINKED_LIST_H_
+#include <stddef.h>
+
namespace tcmalloc {
inline void *SLL_Next(void *t) {
diff --git a/src/memory_region_map.h b/src/memory_region_map.h
index f88c7b9..776abb3 100644
--- a/src/memory_region_map.h
+++ b/src/memory_region_map.h
@@ -231,7 +231,7 @@ class MemoryRegionMap {
static void *Allocate(size_t n) {
return LowLevelAlloc::AllocWithArena(n, arena_);
}
- static void Free(const void *p) {
+ static void Free(const void *p, size_t /* n */) {
LowLevelAlloc::Free(const_cast<void*>(p));
}
};
diff --git a/src/page_heap.cc b/src/page_heap.cc
index 7bfeea4..1e63cb9 100644
--- a/src/page_heap.cc
+++ b/src/page_heap.cc
@@ -61,64 +61,49 @@ PageHeap::PageHeap()
}
}
-// Returns the minimum number of pages necessary to ensure that an
-// allocation of size n can be aligned to the given alignment.
-static Length AlignedAllocationSize(Length n, size_t alignment) {
- ASSERT(alignment >= kPageSize);
- return n + tcmalloc::pages(alignment - kPageSize);
-}
-
-Span* PageHeap::New(Length n, size_t sc, size_t align) {
+Span* PageHeap::New(Length n) {
ASSERT(Check());
ASSERT(n > 0);
- if (align < kPageSize) {
- align = kPageSize;
- }
-
- Length aligned_size = AlignedAllocationSize(n, align);
-
// Find first size >= n that has a non-empty list
- for (Length s = aligned_size; s < kMaxPages; s++) {
+ for (Length s = n; s < kMaxPages; s++) {
Span* ll = &free_[s].normal;
// If we're lucky, ll is non-empty, meaning it has a suitable span.
if (!DLL_IsEmpty(ll)) {
ASSERT(ll->next->location == Span::ON_NORMAL_FREELIST);
- return Carve(ll->next, n, sc, align);
+ return Carve(ll->next, n);
}
// Alternatively, maybe there's a usable returned span.
ll = &free_[s].returned;
if (!DLL_IsEmpty(ll)) {
ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST);
- return Carve(ll->next, n, sc, align);
+ return Carve(ll->next, n);
}
// Still no luck, so keep looking in larger classes.
}
- Span* result = AllocLarge(n, sc, align);
+ Span* result = AllocLarge(n);
if (result != NULL) return result;
// Grow the heap and try again
- if (!GrowHeap(aligned_size)) {
+ if (!GrowHeap(n)) {
ASSERT(Check());
return NULL;
}
- return AllocLarge(n, sc, align);
+ return AllocLarge(n);
}
-Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) {
- // Find the best span (closest to n in size).
+Span* PageHeap::AllocLarge(Length n) {
+ // find the best span (closest to n in size).
// The following loops implements address-ordered best-fit.
Span *best = NULL;
- Length aligned_size = AlignedAllocationSize(n, align);
-
// Search through normal list
for (Span* span = large_.normal.next;
span != &large_.normal;
span = span->next) {
- if (span->length >= aligned_size) {
+ if (span->length >= n) {
if ((best == NULL)
|| (span->length < best->length)
|| ((span->length == best->length) && (span->start < best->start))) {
@@ -132,7 +117,7 @@ Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) {
for (Span* span = large_.returned.next;
span != &large_.returned;
span = span->next) {
- if (span->length >= aligned_size) {
+ if (span->length >= n) {
if ((best == NULL)
|| (span->length < best->length)
|| ((span->length == best->length) && (span->start < best->start))) {
@@ -142,18 +127,19 @@ Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) {
}
}
- return best == NULL ? NULL : Carve(best, n, sc, align);
+ return best == NULL ? NULL : Carve(best, n);
}
Span* PageHeap::Split(Span* span, Length n) {
ASSERT(0 < n);
ASSERT(n < span->length);
- ASSERT((span->location != Span::IN_USE) || span->sizeclass == 0);
+ ASSERT(span->location == Span::IN_USE);
+ ASSERT(span->sizeclass == 0);
Event(span, 'T', n);
const int extra = span->length - n;
Span* leftover = NewSpan(span->start + n, extra);
- leftover->location = span->location;
+ ASSERT(leftover->location == Span::IN_USE);
Event(leftover, 'U', extra);
RecordSpan(leftover);
pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
@@ -162,44 +148,25 @@ Span* PageHeap::Split(Span* span, Length n) {
return leftover;
}
-Span* PageHeap::Carve(Span* span, Length n, size_t sc, size_t align) {
+Span* PageHeap::Carve(Span* span, Length n) {
ASSERT(n > 0);
ASSERT(span->location != Span::IN_USE);
- ASSERT(align >= kPageSize);
-
- Length align_pages = align >> kPageShift;
+ const int old_location = span->location;
RemoveFromFreeList(span);
-
- if (span->start & (align_pages - 1)) {
- Length skip_for_alignment = align_pages - (span->start & (align_pages - 1));
- Span* aligned = Split(span, skip_for_alignment);
- PrependToFreeList(span); // Skip coalescing - no candidates possible
- span = aligned;
- }
+ span->location = Span::IN_USE;
+ Event(span, 'A', n);
const int extra = span->length - n;
ASSERT(extra >= 0);
if (extra > 0) {
- Span* leftover = Split(span, n);
- PrependToFreeList(leftover);
+ Span* leftover = NewSpan(span->start + n, extra);
+ leftover->location = old_location;
+ Event(leftover, 'S', extra);
+ RecordSpan(leftover);
+ PrependToFreeList(leftover); // Skip coalescing - no candidates possible
+ span->length = n;
+ pagemap_.set(span->start + n - 1, span);
}
-
- span->location = Span::IN_USE;
- span->sizeclass = sc;
- Event(span, 'A', n);
-
- // Cache sizeclass info eagerly. Locking is not necessary.
- // (Instead of being eager, we could just replace any stale info
- // about this span, but that seems to be no better in practice.)
- CacheSizeClass(span->start, sc);
-
- if (sc != kLargeSizeClass) {
- for (Length i = 1; i < n; i++) {
- pagemap_.set(span->start + i, span);
- CacheSizeClass(span->start + i, sc);
- }
- }
-
ASSERT(Check());
return span;
}
@@ -351,6 +318,18 @@ Length PageHeap::ReleaseAtLeastNPages(Length num_pages) {
return released_pages;
}
+void PageHeap::RegisterSizeClass(Span* span, size_t sc) {
+ // Associate span object with all interior pages as well
+ ASSERT(span->location == Span::IN_USE);
+ ASSERT(GetDescriptor(span->start) == span);
+ ASSERT(GetDescriptor(span->start+span->length-1) == span);
+ Event(span, 'C', sc);
+ span->sizeclass = sc;
+ for (Length i = 1; i < span->length-1; i++) {
+ pagemap_.set(span->start+i, span);
+ }
+}
+
static double MB(uint64_t bytes) {
return bytes / 1048576.0;
}
diff --git a/src/page_heap.h b/src/page_heap.h
index de36266..74030d2 100644
--- a/src/page_heap.h
+++ b/src/page_heap.h
@@ -93,49 +93,21 @@ class PERFTOOLS_DLL_DECL PageHeap {
public:
PageHeap();
- // Allocate a run of "n" pages. Returns NULL if out of memory.
- // Caller should not pass "n == 0" -- instead, n should have been
- // rounded up already. The span will be used for allocating objects
- // with the specifled sizeclass sc (sc must be zero for large
- // objects). The first page of the span will be aligned to the value
- // specified by align, which must be a power of two.
- Span* New(Length n, size_t sc, size_t align);
+ // Allocate a run of "n" pages. Returns zero if out of memory.
+ // Caller should not pass "n == 0" -- instead, n should have
+ // been rounded up already.
+ Span* New(Length n);
// Delete the span "[p, p+n-1]".
// REQUIRES: span was returned by earlier call to New() and
// has not yet been deleted.
void Delete(Span* span);
- // Gets either the size class of addr, if it is a small object, or it's span.
- // Return:
- // if addr is invalid:
- // leave *out_sc and *out_span unchanged and return false;
- // if addr is valid and has a small size class:
- // *out_sc = the size class
- // *out_span = <undefined>
- // return true
- // if addr is valid and has a large size class:
- // *out_sc = kLargeSizeClass
- // *out_span = the span pointer
- // return true
- bool GetSizeClassOrSpan(void* addr, size_t* out_sc, Span** out_span) {
- const PageID p = reinterpret_cast<uintptr_t>(addr) >> kPageShift;
- size_t cl = GetSizeClassIfCached(p);
- Span* span = NULL;
-
- if (cl != kLargeSizeClass) {
- ASSERT(cl == GetDescriptor(p)->sizeclass);
- } else {
- span = GetDescriptor(p);
- if (!span) {
- return false;
- }
- cl = span->sizeclass;
- }
- *out_span = span;
- *out_sc = cl;
- return true;
- }
+ // Mark an allocated span as being used for small objects of the
+ // specified size-class.
+ // REQUIRES: span was returned by an earlier call to New()
+ // and has not yet been deleted.
+ void RegisterSizeClass(Span* span, size_t sc);
// Split an allocated span into two spans: one of length "n" pages
// followed by another span of length "span->length - n" pages.
@@ -143,29 +115,14 @@ class PERFTOOLS_DLL_DECL PageHeap {
// Returns a pointer to the second span.
//
// REQUIRES: "0 < n < span->length"
- // REQUIRES: a) the span is free or b) sizeclass == 0
+ // REQUIRES: span->location == IN_USE
+ // REQUIRES: span->sizeclass == 0
Span* Split(Span* span, Length n);
// Return the descriptor for the specified page. Returns NULL if
// this PageID was not allocated previously.
inline Span* GetDescriptor(PageID p) const {
- Span* ret = reinterpret_cast<Span*>(pagemap_.get(p));
-#ifndef NDEBUG
- if (ret != NULL && ret->location == Span::IN_USE) {
- size_t cl = GetSizeClassIfCached(p);
- // Three cases:
- // - The object is not cached
- // - The object is cached correctly
- // - It is a large object and we're not looking at the first
- // page. This happens in coalescing.
- ASSERT(cl == kLargeSizeClass || cl == ret->sizeclass ||
- (ret->start != p && ret->sizeclass == kLargeSizeClass));
- // If the object is sampled, it must have be kLargeSizeClass
- ASSERT(ret->sizeclass == kLargeSizeClass || !ret->sample);
- }
-#endif
-
- return ret;
+ return reinterpret_cast<Span*>(pagemap_.get(p));
}
// Dump state to stderr
@@ -266,7 +223,7 @@ class PERFTOOLS_DLL_DECL PageHeap {
// length exactly "n" and mark it as non-free so it can be returned
// to the client. After all that, decrease free_pages_ by n and
// return span.
- Span* Carve(Span* span, Length n, size_t sc, size_t align);
+ Span* Carve(Span* span, Length n);
void RecordSpan(Span* span) {
pagemap_.set(span->start, span);
@@ -277,7 +234,7 @@ class PERFTOOLS_DLL_DECL PageHeap {
// Allocate a large span of length == n. If successful, returns a
// span of exactly the specified length. Else, returns NULL.
- Span* AllocLarge(Length n, size_t sc, size_t align);
+ Span* AllocLarge(Length n);
// Coalesce span with neighboring spans if possible, prepend to
// appropriate free list, and adjust stats.
diff --git a/src/pprof b/src/pprof
index 8aff380..8d4ddcf 100755
--- a/src/pprof
+++ b/src/pprof
@@ -215,7 +215,7 @@ Call-graph Options:
(i.e. direct leak generators) more visible
Miscellaneous:
- --tools=<prefix> Prefix for object tool pathnames
+ --tools=<prefix or binary:fullpath>[,...] \$PATH for object tool pathnames
--test Run unit tests
--help This message
--version Version information
@@ -4331,18 +4331,27 @@ sub ConfigureTool {
my $tool = shift;
my $path;
- if ($main::opt_tools ne "") {
- # Use a prefix specified by the --tools option...
- $path = $main::opt_tools . $tool;
- if (!-x $path) {
- error("No '$tool' found with prefix specified by --tools $main::opt_tools\n");
- }
- } elsif (exists $ENV{"PPROF_TOOLS"} &&
- $ENV{"PPROF_TOOLS"} ne "") {
- #... or specified with the PPROF_TOOLS environment variable...
- $path = $ENV{"PPROF_TOOLS"} . $tool;
- if (!-x $path) {
- error("No '$tool' found with prefix specified by PPROF_TOOLS=$ENV{PPROF_TOOLS}\n");
+ # --tools (or $PPROF_TOOLS) is a comma separated list, where each
+ # item is either a) a pathname prefix, or b) a map of the form
+ # <tool>:<path>. First we look for an entry of type (b) for our
+ # tool. If one is found, we use it. Otherwise, we consider all the
+ # pathname prefixes in turn, until one yields an existing file. If
+ # none does, we use a default path.
+ my $tools = $main::opt_tools || $ENV{"PPROF_TOOLS"} || "";
+ if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) {
+ $path = $2;
+ # TODO(csilvers): sanity-check that $path exists? Hard if it's relative.
+ } elsif ($tools) {
+ foreach my $prefix (split(',', $tools)) {
+ next if ($prefix =~ /:/); # ignore "tool:fullpath" entries in the list
+ if (-x $prefix . $tool) {
+ $path = $prefix . $tool;
+ last;
+ }
+ }
+ if (!$path) {
+ error("No '$tool' found with prefix specified by " .
+ "--tools (or \$PPROF_TOOLS) '$tools'\n");
}
} else {
# ... otherwise use the version that exists in the same directory as
diff --git a/src/sampler.cc b/src/sampler.cc
index cbc6ab4..a13544a 100755
--- a/src/sampler.cc
+++ b/src/sampler.cc
@@ -42,16 +42,15 @@ using std::min;
// The approximate gap in bytes between sampling actions.
// I.e., we take one sample approximately once every
// tcmalloc_sample_parameter bytes of allocation
-// i.e. about once every 512KB.
+// i.e. about once every 512KB if value is 1<<19.
#ifdef NO_TCMALLOC_SAMPLES
DEFINE_int64(tcmalloc_sample_parameter, 0,
"Unused: code is compiled with NO_TCMALLOC_SAMPLES");
#else
DEFINE_int64(tcmalloc_sample_parameter,
- EnvToInt64("TCMALLOC_SAMPLE_PARAMETER", 1<<19),
+ EnvToInt64("TCMALLOC_SAMPLE_PARAMETER", 0),
"The approximate gap in bytes between sampling actions. "
- "This must be between 1 and 1<<58.");
-// Note: there are other places in this file where the number 19 occurs.
+ "This must be between 1 and 2^58.");
#endif
namespace tcmalloc {
diff --git a/src/span.h b/src/span.h
index b3483ca..ab9a796 100644
--- a/src/span.h
+++ b/src/span.h
@@ -60,10 +60,6 @@ struct Span {
int value[64];
#endif
- void* start_ptr() {
- return reinterpret_cast<void*>(start << kPageShift);
- }
-
// What freelist the span is on: IN_USE if on none, or normal or returned
enum { IN_USE, ON_NORMAL_FREELIST, ON_RETURNED_FREELIST };
};
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index 011fc91..13d2c23 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -469,6 +469,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
"MALLOC: %12" PRIu64 " Spans in use\n"
"MALLOC: %12" PRIu64 " Thread heaps in use\n"
"MALLOC: %12" PRIu64 " (%7.1f MB) Metadata allocated\n"
+ "MALLOC: %12" PRIu64 " Tcmalloc page size\n"
"------------------------------------------------\n",
stats.pageheap.system_bytes, stats.pageheap.system_bytes / MB,
bytes_in_use, bytes_in_use / MB,
@@ -479,7 +480,8 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
stats.thread_bytes, stats.thread_bytes / MB,
uint64_t(Static::span_allocator()->inuse()),
uint64_t(ThreadCache::HeapsInUse()),
- stats.metadata_bytes, stats.metadata_bytes / MB);
+ stats.metadata_bytes, stats.metadata_bytes / MB,
+ uint64_t(kPageSize));
}
static void PrintStats(int level) {
@@ -637,9 +639,8 @@ class TCMallocImplementation : public MallocExtension {
}
if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
- // We assume that bytes in the page heap are not fragmented too
- // badly, and are therefore available for allocation without
- // growing the pageheap system byte count.
+ // Kept for backwards compatibility. Now defined externally as:
+ // pageheap_free_bytes + pageheap_unmapped_bytes.
SpinLockHolder l(Static::pageheap_lock());
PageHeap::Stats stats = Static::pageheap()->stats();
*value = stats.free_bytes + stats.unmapped_bytes;
@@ -798,25 +799,22 @@ static TCMallocGuard module_enter_exit_hook;
// Helpers for the exported routines below
//-------------------------------------------------------------------
-static inline void* CheckedMallocResult(void *result) {
- Span* fetched_span;
- size_t cl;
-
- if (result != NULL) {
- ASSERT(Static::pageheap()->GetSizeClassOrSpan(result, &cl, &fetched_span));
- }
+static inline bool CheckCachedSizeClass(void *ptr) {
+ PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
+ size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p);
+ return cached_value == 0 ||
+ cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass;
+}
+static inline void* CheckedMallocResult(void *result) {
+ ASSERT(result == NULL || CheckCachedSizeClass(result));
return result;
}
static inline void* SpanToMallocResult(Span *span) {
- Span* fetched_span = NULL;
- size_t cl = 0;
- ASSERT(Static::pageheap()->GetSizeClassOrSpan(span->start_ptr(),
- &cl, &fetched_span));
- ASSERT(cl == kLargeSizeClass);
- ASSERT(span == fetched_span);
- return span->start_ptr();
+ Static::pageheap()->CacheSizeClass(span->start, 0);
+ return
+ CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
}
static void* DoSampledAllocation(size_t size) {
@@ -827,8 +825,7 @@ static void* DoSampledAllocation(size_t size) {
SpinLockHolder h(Static::pageheap_lock());
// Allocate span
- Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size),
- kLargeSizeClass, kPageSize);
+ Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size));
if (span == NULL) {
return NULL;
}
@@ -919,7 +916,7 @@ inline void* do_malloc_pages(ThreadCache* heap, size_t size) {
report_large = should_report_large(num_pages);
} else {
SpinLockHolder h(Static::pageheap_lock());
- Span* span = Static::pageheap()->New(num_pages, kLargeSizeClass, kPageSize);
+ Span* span = Static::pageheap()->New(num_pages);
result = (span == NULL ? NULL : SpanToMallocResult(span));
report_large = should_report_large(num_pages);
}
@@ -975,22 +972,28 @@ static inline ThreadCache* GetCacheIfPresent() {
inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
if (ptr == NULL) return;
ASSERT(Static::pageheap() != NULL); // Should not call free() before malloc()
- Span* span;
- size_t cl;
-
- if (!Static::pageheap()->GetSizeClassOrSpan(ptr, &cl, &span)) {
- // result can be false because the pointer passed in is invalid
- // (not something returned by malloc or friends), or because the
- // pointer was allocated with some other allocator besides
- // tcmalloc. The latter can happen if tcmalloc is linked in via
- // a dynamic library, but is not listed last on the link line.
- // In that case, libraries after it on the link line will
- // allocate with libc malloc, but free with tcmalloc's free.
- (*invalid_free_fn)(ptr); // Decide how to handle the bad free request
- return;
+ const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
+ Span* span = NULL;
+ size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
+
+ if (cl == 0) {
+ span = Static::pageheap()->GetDescriptor(p);
+ if (!span) {
+ // span can be NULL because the pointer passed in is invalid
+ // (not something returned by malloc or friends), or because the
+ // pointer was allocated with some other allocator besides
+ // tcmalloc. The latter can happen if tcmalloc is linked in via
+ // a dynamic library, but is not listed last on the link line.
+ // In that case, libraries after it on the link line will
+ // allocate with libc malloc, but free with tcmalloc's free.
+ (*invalid_free_fn)(ptr); // Decide how to handle the bad free request
+ return;
+ }
+ cl = span->sizeclass;
+ Static::pageheap()->CacheSizeClass(p, cl);
}
-
- if (cl != kLargeSizeClass) {
+ if (cl != 0) {
+ ASSERT(!Static::pageheap()->GetDescriptor(p)->sample);
ThreadCache* heap = GetCacheIfPresent();
if (heap != NULL) {
heap->Deallocate(ptr, cl);
@@ -1001,7 +1004,8 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
}
} else {
SpinLockHolder h(Static::pageheap_lock());
- ASSERT(span != NULL && ptr == span->start_ptr());
+ ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
+ ASSERT(span != NULL && span->start == p);
if (span->sample) {
tcmalloc::DLL_Remove(span);
Static::stacktrace_allocator()->Delete(
@@ -1021,17 +1025,20 @@ inline size_t GetSizeWithCallback(void* ptr,
size_t (*invalid_getsize_fn)(void*)) {
if (ptr == NULL)
return 0;
-
- Span* span;
- size_t cl;
- if (!Static::pageheap()->GetSizeClassOrSpan(ptr, &cl, &span)) {
- return (*invalid_getsize_fn)(ptr);
- }
-
- if (cl != kLargeSizeClass) {
+ const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
+ size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
+ if (cl != 0) {
return Static::sizemap()->ByteSizeForClass(cl);
} else {
- return span->length << kPageShift;
+ Span *span = Static::pageheap()->GetDescriptor(p);
+ if (span == NULL) { // means we do not own this memory
+ return (*invalid_getsize_fn)(ptr);
+ } else if (span->sizeclass != 0) {
+ Static::pageheap()->CacheSizeClass(p, span->sizeclass);
+ return Static::sizemap()->ByteSizeForClass(span->sizeclass);
+ } else {
+ return span->length << kPageShift;
+ }
}
}
@@ -1126,10 +1133,39 @@ void* do_memalign(size_t align, size_t size) {
// We will allocate directly from the page heap
SpinLockHolder h(Static::pageheap_lock());
- // Any page-level allocation will be fine
- Span* span = Static::pageheap()->New(tcmalloc::pages(size),
- kLargeSizeClass, align);
- return span == NULL ? NULL : SpanToMallocResult(span);
+ if (align <= kPageSize) {
+ // Any page-level allocation will be fine
+ // TODO: We could put the rest of this page in the appropriate
+ // TODO: cache but it does not seem worth it.
+ Span* span = Static::pageheap()->New(tcmalloc::pages(size));
+ return span == NULL ? NULL : SpanToMallocResult(span);
+ }
+
+ // Allocate extra pages and carve off an aligned portion
+ const Length alloc = tcmalloc::pages(size + align);
+ Span* span = Static::pageheap()->New(alloc);
+ if (span == NULL) return NULL;
+
+ // Skip starting portion so that we end up aligned
+ Length skip = 0;
+ while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
+ skip++;
+ }
+ ASSERT(skip < alloc);
+ if (skip > 0) {
+ Span* rest = Static::pageheap()->Split(span, skip);
+ Static::pageheap()->Delete(span);
+ span = rest;
+ }
+
+ // Skip trailing portion that we do not need to return
+ const Length needed = tcmalloc::pages(size);
+ ASSERT(span->length >= needed);
+ if (span->length > needed) {
+ Span* trailer = Static::pageheap()->Split(span, needed);
+ Static::pageheap()->Delete(trailer);
+ }
+ return SpanToMallocResult(span);
}
// Helpers for use by exported routines below:
diff --git a/src/tests/frag_unittest.cc b/src/tests/frag_unittest.cc
index 08494b4..160c41c 100644
--- a/src/tests/frag_unittest.cc
+++ b/src/tests/frag_unittest.cc
@@ -44,13 +44,16 @@
#endif
#include <vector>
#include "base/logging.h"
+#include "common.h"
#include <google/malloc_extension.h>
using std::vector;
int main(int argc, char** argv) {
- static const int kAllocSize = 36<<10; // Bigger than tcmalloc page size
- static const int kTotalAlloc = 400 << 20; // Allocate 400MB in total
+ // Make kAllocSize larger than tcmalloc page size.
+ static const int kAllocSize = 9 << kPageShift;
+ // Allocate 400MB in total.
+ static const int kTotalAlloc = 400 << 20;
static const int kAllocIterations = kTotalAlloc / kAllocSize;
// Allocate lots of objects
diff --git a/src/tests/page_heap_test.cc b/src/tests/page_heap_test.cc
index fd444da..9120b78 100644
--- a/src/tests/page_heap_test.cc
+++ b/src/tests/page_heap_test.cc
@@ -26,7 +26,7 @@ static void TestPageHeap_Stats() {
CheckStats(ph, 0, 0, 0);
// Allocate a span 's1'
- tcmalloc::Span* s1 = ph->New(256, kLargeSizeClass, kPageSize);
+ tcmalloc::Span* s1 = ph->New(256);
CheckStats(ph, 256, 0, 0);
// Split span 's1' into 's1', 's2'. Delete 's2'
diff --git a/src/tests/testutil.cc b/src/tests/testutil.cc
index f2b8592..745de99 100644
--- a/src/tests/testutil.cc
+++ b/src/tests/testutil.cc
@@ -80,7 +80,7 @@ struct FunctionAndId {
int id;
};
-#if defined(NO_THREADS) || !(defined(HAVE_PTHREADS) || defined(_WIN32))
+#if defined(NO_THREADS) || !(defined(HAVE_PTHREAD) || defined(_WIN32))
extern "C" void RunThread(void (*fn)()) {
(*fn)();
diff --git a/src/thread_cache.cc b/src/thread_cache.cc
index 64f4deb..8d31117 100644
--- a/src/thread_cache.cc
+++ b/src/thread_cache.cc
@@ -42,7 +42,8 @@ using std::min;
using std::max;
DEFINE_int64(tcmalloc_max_total_thread_cache_bytes,
- EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES", 16<<20),
+ EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES",
+ kDefaultOverallThreadCacheSize),
"Bound on the total amount of bytes allocated to "
"thread caches. This bound is not strict, so it is possible "
"for the cache to go over this bound in certain circumstances. ");
diff --git a/src/thread_cache.h b/src/thread_cache.h
index 1165447..352c683 100644
--- a/src/thread_cache.h
+++ b/src/thread_cache.h
@@ -63,9 +63,6 @@ inline bool KernelSupportsTLS() {
class ThreadCache {
public:
- // Default bound on the total amount of thread caches.
- static const size_t kDefaultOverallThreadCacheSize = 16 << 20;
-
// All ThreadCache objects are kept in a linked list (for stats collection)
ThreadCache* next_;
ThreadCache* prev_;
@@ -213,19 +210,6 @@ class ThreadCache {
}
};
- // The number of bytes one ThreadCache will steal from another when
- // the first ThreadCache is forced to Scavenge(), delaying the
- // next call to Scavenge for this thread.
- static const size_t kStealAmount = 1 << 16;
-
- // Lower and upper bounds on the per-thread cache sizes
- static const size_t kMinThreadCacheSize = kMaxSize * 2; //kStealAmount;
- static const size_t kMaxThreadCacheSize = 2 << 20;
-
- // The number of times that a deallocation can cause a freelist to
- // go over its max_length() before shrinking max_length().
- static const int kMaxOverages = 3;
-
// Gets and returns an object from the central cache, and, if possible,
// also adds some objects of that size class to this thread cache.
void* FetchFromCentralCache(size_t cl, size_t byte_size);
diff --git a/src/windows/config.h b/src/windows/config.h
index b5d9bb6..6d6f771 100644
--- a/src/windows/config.h
+++ b/src/windows/config.h
@@ -154,7 +154,7 @@
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
-/* Define to 1 if you have the <sys/ucontext.h> header file. */
+/* <sys/ucontext.h> is broken on redhat 7 */
#undef HAVE_SYS_UCONTEXT_H
/* Define to 1 if you have the <sys/wait.h> header file. */
@@ -172,6 +172,9 @@
/* Define to 1 if you have the <unwind.h> header file. */
#undef HAVE_UNWIND_H
+/* Define to 1 if you have the <valgrind.h> header file. */
+#undef HAVE_VALGRIND_H
+
/* define if your compiler has __attribute__ */
#undef HAVE___ATTRIBUTE__
diff --git a/src/windows/mingw.h b/src/windows/mingw.h
index e69b5da..747b285 100644
--- a/src/windows/mingw.h
+++ b/src/windows/mingw.h
@@ -45,10 +45,23 @@
# define PERFTOOLS_NO_ALIGNED_MALLOC 1
#endif
+// This must be defined before the windows.h is included. We need at
+// least 0x0400 for mutex.h to have access to TryLock, and at least
+// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx.
+// (This latter is an optimization we could take out if need be.)
+#ifndef _WIN32_WINNT
+# define _WIN32_WINNT 0x0501
+#endif
+
#include "windows/port.h"
#define HAVE_SNPRINTF 1
+// Some mingw distributions have a pthreads wrapper, but it doesn't
+// work as well as native windows spinlocks (at least for us). So
+// pretend the pthreads wrapper doesn't exist, even when it does.
+#undef HAVE_PTHREAD
+
#endif /* __MINGW32__ */
#endif /* GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_ */