summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/base/basictypes.h30
-rw-r--r--src/base/dynamic_annotations.h14
-rw-r--r--src/base/sysinfo.cc98
-rw-r--r--src/base/sysinfo.h4
-rw-r--r--src/debugallocation.cc2
-rw-r--r--src/google/heap-checker.h8
-rw-r--r--src/google/malloc_extension.h74
-rw-r--r--src/google/malloc_extension_c.h1
-rw-r--r--src/heap-checker.cc7
-rw-r--r--src/malloc_extension.cc11
-rw-r--r--src/page_heap.cc206
-rw-r--r--src/page_heap.h66
-rw-r--r--src/pagemap.h53
-rwxr-xr-xsrc/pprof409
-rw-r--r--src/stacktrace_config.h13
-rw-r--r--src/symbolize.cc2
-rw-r--r--src/symbolize.h6
-rw-r--r--src/tcmalloc.cc137
-rw-r--r--src/tests/malloc_extension_c_test.c1
-rw-r--r--src/tests/page_heap_test.cc55
-rw-r--r--src/tests/pagemap_unittest.cc51
-rw-r--r--src/tests/profile-handler_unittest.cc17
-rw-r--r--src/tests/tcmalloc_unittest.cc130
23 files changed, 1048 insertions, 347 deletions
diff --git a/src/base/basictypes.h b/src/base/basictypes.h
index e4d4140..9991413 100644
--- a/src/base/basictypes.h
+++ b/src/base/basictypes.h
@@ -240,7 +240,7 @@ struct CompileAssert {
# define HAVE_ATTRIBUTE_SECTION_START 1
#elif defined(HAVE___ATTRIBUTE__) && defined(__MACH__)
-# define ATTRIBUTE_SECTION(name) __attribute__ ((section ("__DATA, " #name)))
+# define ATTRIBUTE_SECTION(name) __attribute__ ((section ("__TEXT, " #name)))
#include <mach-o/getsect.h>
#include <mach-o/dyld.h>
@@ -251,18 +251,32 @@ class AssignAttributeStartEnd {
if (_dyld_present()) {
for (int i = _dyld_image_count() - 1; i >= 0; --i) {
const mach_header* hdr = _dyld_get_image_header(i);
- uint32_t len;
- *pstart = getsectdatafromheader(hdr, "__DATA", name, &len);
- if (*pstart) { // NULL if not defined in this dynamic library
- *pstart += _dyld_get_image_vmaddr_slide(i); // correct for reloc
- *pend = *pstart + len;
- return;
+#ifdef MH_MAGIC_64
+ if (hdr->magic == MH_MAGIC_64) {
+ uint64_t len;
+ *pstart = getsectdatafromheader_64((mach_header_64*)hdr,
+ "__TEXT", name, &len);
+ if (*pstart) { // NULL if not defined in this dynamic library
+ *pstart += _dyld_get_image_vmaddr_slide(i); // correct for reloc
+ *pend = *pstart + len;
+ return;
+ }
+ }
+#endif
+ if (hdr->magic == MH_MAGIC) {
+ uint32_t len;
+ *pstart = getsectdatafromheader(hdr, "__TEXT", name, &len);
+ if (*pstart) { // NULL if not defined in this dynamic library
+ *pstart += _dyld_get_image_vmaddr_slide(i); // correct for reloc
+ *pend = *pstart + len;
+ return;
+ }
}
}
}
// If we get here, not defined in a dll at all. See if defined statically.
unsigned long len; // don't ask me why this type isn't uint32_t too...
- *pstart = getsectdata("__DATA", name, &len);
+ *pstart = getsectdata("__TEXT", name, &len);
*pend = *pstart + len;
}
};
diff --git a/src/base/dynamic_annotations.h b/src/base/dynamic_annotations.h
index 5995ac4..a2a268f 100644
--- a/src/base/dynamic_annotations.h
+++ b/src/base/dynamic_annotations.h
@@ -203,9 +203,16 @@
} while (0)
// Instruct the tool to create a happens-before arc between mu->Unlock() and
- // mu->Lock(). This annotation may slow down the race detector; normally it
- // is used only when it would be difficult to annotate each of the mutex's
- // critical sections individually using the annotations above.
+ // mu->Lock(). This annotation may slow down the race detector and hide real
+ // races. Normally it is used only when it would be difficult to annotate each
+ // of the mutex's critical sections individually using the annotations above.
+ // This annotation makes sense only for hybrid race detectors. For pure
+ // happens-before detectors this is a no-op. For more details see
+ // http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid .
+ #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \
+ AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu)
+
+ // Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX.
#define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \
AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu)
@@ -357,6 +364,7 @@
#define ANNOTATE_NEW_MEMORY(address, size) // empty
#define ANNOTATE_EXPECT_RACE(address, description) // empty
#define ANNOTATE_BENIGN_RACE(address, description) // empty
+ #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) // empty
#define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) // empty
#define ANNOTATE_TRACE_MEMORY(arg) // empty
#define ANNOTATE_THREAD_NAME(name) // empty
diff --git a/src/base/sysinfo.cc b/src/base/sysinfo.cc
index a2bc2a9..3919ba4 100644
--- a/src/base/sysinfo.cc
+++ b/src/base/sysinfo.cc
@@ -441,6 +441,48 @@ static void ConstructFilename(const char* spec, pid_t pid,
}
#endif
+// A templatized helper function instantiated for Mach (OS X) only.
+// It can handle finding info for both 32 bits and 64 bits.
+// Returns true if it successfully handled the hdr, false else.
+#ifdef __MACH__ // Mac OS X, almost certainly
+template<uint32_t kMagic, uint32_t kLCSegment,
+ typename MachHeader, typename SegmentCommand>
+static bool NextExtMachHelper(const mach_header* hdr,
+ int current_image, int current_load_cmd,
+ uint64 *start, uint64 *end, char **flags,
+ uint64 *offset, int64 *inode, char **filename,
+ uint64 *file_mapping, uint64 *file_pages,
+ uint64 *anon_mapping, uint64 *anon_pages,
+ dev_t *dev) {
+ static char kDefaultPerms[5] = "r-xp";
+ if (hdr->magic != kMagic)
+ return false;
+ const char* lc = (const char *)hdr + sizeof(MachHeader);
+ // TODO(csilvers): make this not-quadradic (increment and hold state)
+ for (int j = 0; j < current_load_cmd; j++) // advance to *our* load_cmd
+ lc += ((const load_command *)lc)->cmdsize;
+ if (((const load_command *)lc)->cmd == kLCSegment) {
+ const intptr_t dlloff = _dyld_get_image_vmaddr_slide(current_image);
+ const SegmentCommand* sc = (const SegmentCommand *)lc;
+ if (start) *start = sc->vmaddr + dlloff;
+ if (end) *end = sc->vmaddr + sc->vmsize + dlloff;
+ if (flags) *flags = kDefaultPerms; // can we do better?
+ if (offset) *offset = sc->fileoff;
+ if (inode) *inode = 0;
+ if (filename)
+ *filename = const_cast<char*>(_dyld_get_image_name(current_image));
+ if (file_mapping) *file_mapping = 0;
+ if (file_pages) *file_pages = 0; // could we use sc->filesize?
+ if (anon_mapping) *anon_mapping = 0;
+ if (anon_pages) *anon_pages = 0;
+ if (dev) *dev = 0;
+ return true;
+ }
+
+ return false;
+}
+#endif
+
ProcMapsIterator::ProcMapsIterator(pid_t pid) {
Init(pid, NULL, false);
}
@@ -456,6 +498,7 @@ ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer,
void ProcMapsIterator::Init(pid_t pid, Buffer *buffer,
bool use_maps_backing) {
+ pid_ = pid;
using_maps_backing_ = use_maps_backing;
dynamic_buffer_ = NULL;
if (!buffer) {
@@ -691,6 +734,7 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags,
COMPILE_ASSERT(MA_READ == 4, solaris_ma_read_must_equal_4);
COMPILE_ASSERT(MA_WRITE == 2, solaris_ma_write_must_equal_2);
COMPILE_ASSERT(MA_EXEC == 1, solaris_ma_exec_must_equal_1);
+ Buffer object_path;
int nread = 0; // fill up buffer with text
NO_INTR(nread = read(fd_, ibuf_, sizeof(prmap_t)));
if (nread == sizeof(prmap_t)) {
@@ -700,13 +744,27 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags,
// two middle ints are major and minor device numbers, but I'm not sure.
sscanf(mapinfo->pr_mapname, "ufs.%*d.%*d.%ld", &inode_from_mapname);
+ if (pid_ == 0) {
+ CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize,
+ "/proc/self/path/%s", mapinfo->pr_mapname),
+ Buffer::kBufSize);
+ } else {
+ CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize,
+ "/proc/%d/path/%s", pid_, mapinfo->pr_mapname),
+ Buffer::kBufSize);
+ }
+ ssize_t len = readlink(object_path.buf_, current_filename_, PATH_MAX);
+ CHECK_LT(len, PATH_MAX);
+ if (len < 0)
+ len = 0;
+ current_filename_[len] = '\0';
+
if (start) *start = mapinfo->pr_vaddr;
if (end) *end = mapinfo->pr_vaddr + mapinfo->pr_size;
if (flags) *flags = kPerms[mapinfo->pr_mflags & 7];
if (offset) *offset = mapinfo->pr_offset;
if (inode) *inode = inode_from_mapname;
- // TODO(csilvers): How to map from /proc/map/object to filename?
- if (filename) *filename = mapinfo->pr_mapname; // format is ufs.?.?.inode
+ if (filename) *filename = current_filename_;
if (file_mapping) *file_mapping = 0;
if (file_pages) *file_pages = 0;
if (anon_mapping) *anon_mapping = 0;
@@ -715,7 +773,6 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags,
return true;
}
#elif defined(__MACH__)
- static char kDefaultPerms[5] = "r-xp";
// We return a separate entry for each segment in the DLL. (TODO(csilvers):
// can we do better?) A DLL ("image") has load-commands, some of which
// talk about segment boundaries.
@@ -728,25 +785,22 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags,
// We start with the next load command (we've already looked at this one).
for (current_load_cmd_--; current_load_cmd_ >= 0; current_load_cmd_--) {
- const char* lc = ((const char *)hdr + sizeof(struct mach_header));
- // TODO(csilvers): make this not-quadradic (increment and hold state)
- for (int j = 0; j < current_load_cmd_; j++) // advance to *our* load_cmd
- lc += ((const load_command *)lc)->cmdsize;
- if (((const load_command *)lc)->cmd == LC_SEGMENT) {
- const intptr_t dlloff = _dyld_get_image_vmaddr_slide(current_image_);
- const segment_command* sc = (const segment_command *)lc;
- if (start) *start = sc->vmaddr + dlloff;
- if (end) *end = sc->vmaddr + sc->vmsize + dlloff;
- if (flags) *flags = kDefaultPerms; // can we do better?
- if (offset) *offset = sc->fileoff;
- if (inode) *inode = 0;
- if (filename)
- *filename = const_cast<char*>(_dyld_get_image_name(current_image_));
- if (file_mapping) *file_mapping = 0;
- if (file_pages) *file_pages = 0; // could we use sc->filesize?
- if (anon_mapping) *anon_mapping = 0;
- if (anon_pages) *anon_pages = 0;
- if (dev) *dev = 0;
+#ifdef MH_MAGIC_64
+ if (NextExtMachHelper<MH_MAGIC_64, LC_SEGMENT_64,
+ struct mach_header_64, struct segment_command_64>(
+ hdr, current_image_, current_load_cmd_,
+ start, end, flags, offset, inode, filename,
+ file_mapping, file_pages, anon_mapping,
+ anon_pages, dev)) {
+ return true;
+ }
+#endif
+ if (NextExtMachHelper<MH_MAGIC, LC_SEGMENT,
+ struct mach_header, struct segment_command>(
+ hdr, current_image_, current_load_cmd_,
+ start, end, flags, offset, inode, filename,
+ file_mapping, file_pages, anon_mapping,
+ anon_pages, dev)) {
return true;
}
}
diff --git a/src/base/sysinfo.h b/src/base/sysinfo.h
index b4b5c9f..0bcc1f5 100644
--- a/src/base/sysinfo.h
+++ b/src/base/sysinfo.h
@@ -209,9 +209,13 @@ class ProcMapsIterator {
#elif defined(__MACH__)
int current_image_; // dll's are called "images" in macos parlance
int current_load_cmd_; // the segment of this dll we're examining
+#elif defined(__sun__) // Solaris
+ int fd_;
+ char current_filename_[PATH_MAX];
#else
int fd_; // filehandle on /proc/*/maps
#endif
+ pid_t pid_;
char flags_[10];
Buffer* dynamic_buffer_; // dynamically-allocated Buffer
bool using_maps_backing_; // true if we are looking at maps_backing instead of maps.
diff --git a/src/debugallocation.cc b/src/debugallocation.cc
index dcf722d..47fef16 100644
--- a/src/debugallocation.cc
+++ b/src/debugallocation.cc
@@ -674,7 +674,7 @@ class MallocBlock {
uintptr_t pc =
reinterpret_cast<uintptr_t>(queue_entry.deleter_pcs[i]) - 1;
TracePrintf(STDERR_FILENO, " @ %p %s\n",
- pc, symbolization_table[pc]);
+ reinterpret_cast<void*>(pc), symbolization_table[pc]);
}
} else {
RAW_LOG(ERROR,
diff --git a/src/google/heap-checker.h b/src/google/heap-checker.h
index 751eb9f..c0ee8a8 100644
--- a/src/google/heap-checker.h
+++ b/src/google/heap-checker.h
@@ -51,10 +51,12 @@
#ifndef BASE_HEAP_CHECKER_H_
#define BASE_HEAP_CHECKER_H_
-#include "config.h"
-
#include <sys/types.h> // for size_t
-#ifdef HAVE_STDINT_H
+// I can't #include config.h in this public API file, but I should
+// really use configure (and make malloc_extension.h a .in file) to
+// figure out if the system has stdint.h or not. But I'm lazy, so
+// for now I'm assuming it's a problem only with MSVC.
+#ifndef _MSC_VER
#include <stdint.h> // for uintptr_t
#endif
#include <stdarg.h> // for va_list
diff --git a/src/google/malloc_extension.h b/src/google/malloc_extension.h
index bc53e0f..0342843 100644
--- a/src/google/malloc_extension.h
+++ b/src/google/malloc_extension.h
@@ -42,6 +42,13 @@
#define BASE_MALLOC_EXTENSION_H_
#include <stddef.h>
+// I can't #include config.h in this public API file, but I should
+// really use configure (and make malloc_extension.h a .in file) to
+// figure out if the system has stdint.h or not. But I'm lazy, so
+// for now I'm assuming it's a problem only with MSVC.
+#ifndef _MSC_VER
+#include <stdint.h>
+#endif
#include <string>
// Annoying stuff for windows -- makes sure clients can import these functions
@@ -58,6 +65,10 @@ static const int kMallocHistogramSize = 64;
// One day, we could support other types of writers (perhaps for C?)
typedef std::string MallocExtensionWriter;
+namespace base {
+struct MallocRange;
+}
+
// The default implementations of the following routines do nothing.
// All implementations should be thread-safe; the current one
// (TCMallocImplementation) is.
@@ -99,6 +110,14 @@ class PERFTOOLS_DLL_DECL MallocExtension {
// be passed to "pprof".
virtual void GetHeapGrowthStacks(MallocExtensionWriter* writer);
+ // Invokes func(arg, range) for every controlled memory
+ // range. *range is filled in with information about the range.
+ //
+ // This is a best-effort interface useful only for performance
+ // analysis. The implementation may not call func at all.
+ typedef void (RangeFunction)(void*, const base::MallocRange*);
+ virtual void Ranges(void* arg, RangeFunction func);
+
// -------------------------------------------------------------------
// Control operations for getting and setting malloc implementation
// specific parameters. Some currently useful properties:
@@ -127,12 +146,20 @@ class PERFTOOLS_DLL_DECL MallocExtension {
// This property is not writable.
//
// "tcmalloc.slack_bytes"
- // Number of bytes allocated from system, but not currently
- // in use by malloced objects. I.e., bytes available for
- // allocation without needing more bytes from system.
+ // Number of bytes allocated from system, but not currently in
+ // use by malloced objects. I.e., bytes available for
+ // allocation without needing more bytes from system. It is
+ // the sum of pageheap_free_bytes and pageheap_unmapped_bytes.
+ // This property is not writable.
+ //
+ // "tcmalloc.pageheap_free_bytes"
+ // Number of bytes in free, mapped pages in pageheap
+ // This property is not writable.
+ //
+ // "tcmalloc.pageheap_unmapped_bytes"
+ // Number of bytes in free, unmapped pages in pageheap
// This property is not writable.
//
- // TODO: Add more properties as necessary
// -------------------------------------------------------------------
// Get the named "property"'s value. Returns true if the property
@@ -167,12 +194,14 @@ class PERFTOOLS_DLL_DECL MallocExtension {
// Most malloc implementations ignore this routine.
virtual void MarkThreadBusy();
- // Try to free memory back to the operating system for reuse. Only
- // use this extension if the application has recently freed a lot of
- // memory, and does not anticipate using it again for a long time --
- // to get this memory back may require faulting pages back in by the
- // OS, and that may be slow. (Currently only implemented in
- // tcmalloc.)
+ // Try to release num_bytes of free memory back to the operating
+ // system for reuse. Use this extension with caution -- to get this
+ // memory back may require faulting pages back in by the OS, and
+ // that may be slow. (Currently only implemented in tcmalloc.)
+ // A negative values for num_bytes results in a noop.
+ virtual void ReleaseToSystem(ssize_t num_bytes);
+
+ // Same as ReleaseToSystem() but release as much memory as possible.
virtual void ReleaseFreeMemory();
// Sets the rate at which we release unused memory to the system.
@@ -239,4 +268,29 @@ class PERFTOOLS_DLL_DECL MallocExtension {
virtual void** ReadHeapGrowthStackTraces();
};
+namespace base {
+
+// Information passed per range. More fields may be added later.
+struct MallocRange {
+ enum Type {
+ INUSE, // Application is using this range
+ FREE, // Range is currently free
+ UNMAPPED, // Backing physical memory has been returned to the OS
+ UNKNOWN,
+ // More enum values may be added in the future
+ };
+
+ uintptr_t address; // Address of range
+ size_t length; // Byte length of range
+ Type type; // Type of this range
+ double fraction; // Fraction of range that is being used (0 if !INUSE)
+
+ // Perhaps add the following:
+ // - stack trace if this range was sampled
+ // - heap growth stack trace if applicable to this range
+ // - age when allocated (for inuse) or freed (if not in use)
+};
+
+} // namespace base
+
#endif // BASE_MALLOC_EXTENSION_H_
diff --git a/src/google/malloc_extension_c.h b/src/google/malloc_extension_c.h
index 514305e..95f7f4c 100644
--- a/src/google/malloc_extension_c.h
+++ b/src/google/malloc_extension_c.h
@@ -75,6 +75,7 @@ PERFTOOLS_DLL_DECL int MallocExtension_GetNumericProperty(const char* property,
PERFTOOLS_DLL_DECL int MallocExtension_SetNumericProperty(const char* property, size_t value);
PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadIdle(void);
PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadBusy(void);
+PERFTOOLS_DLL_DECL void MallocExtension_ReleaseToSystem(ssize_t num_bytes);
PERFTOOLS_DLL_DECL void MallocExtension_ReleaseFreeMemory(void);
PERFTOOLS_DLL_DECL size_t MallocExtension_GetEstimatedAllocatedSize(size_t size);
PERFTOOLS_DLL_DECL size_t MallocExtension_GetAllocatedSize(void* p);
diff --git a/src/heap-checker.cc b/src/heap-checker.cc
index fc8973a..59288e6 100644
--- a/src/heap-checker.cc
+++ b/src/heap-checker.cc
@@ -47,11 +47,9 @@
#ifdef HAVE_PTHREAD
#include <pthread.h>
#endif
-#ifdef HAVE_POLL_H
-#include <poll.h>
-#endif
#include <sys/stat.h>
#include <sys/types.h>
+#include <time.h>
#include <assert.h>
#ifdef HAVE_LINUX_PTRACE_H
@@ -2298,7 +2296,8 @@ void HeapLeakChecker_AfterDestructors() {
}
if (FLAGS_heap_check_after_destructors) {
if (HeapLeakChecker::DoMainHeapCheck()) {
- poll(0, 0, 500);
+ const struct timespec sleep_time = { 0, 500000000 }; // 500 ms
+ nanosleep(&sleep_time, NULL);
// Need this hack to wait for other pthreads to exit.
// Otherwise tcmalloc find errors
// on a free() call from pthreads.
diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc
index 068a693..95fd1c1 100644
--- a/src/malloc_extension.cc
+++ b/src/malloc_extension.cc
@@ -143,10 +143,14 @@ void MallocExtension::MarkThreadBusy() {
// Default implementation does nothing
}
-void MallocExtension::ReleaseFreeMemory() {
+void MallocExtension::ReleaseToSystem(ssize_t num_bytes) {
// Default implementation does nothing
}
+void MallocExtension::ReleaseFreeMemory() {
+ ReleaseToSystem(LONG_MAX);
+}
+
void MallocExtension::SetMemoryReleaseRate(double rate) {
// Default implementation does nothing
}
@@ -300,6 +304,10 @@ void MallocExtension::GetHeapGrowthStacks(MallocExtensionWriter* writer) {
DumpAddressMap(writer);
}
+void MallocExtension::Ranges(void* arg, RangeFunction func) {
+ // No callbacks by default
+}
+
// These are C shims that work on the current instance.
#define C_SHIM(fn, retval, paramlist, arglist) \
@@ -325,5 +333,6 @@ C_SHIM(SetNumericProperty, int,
C_SHIM(MarkThreadIdle, void, (void), ());
C_SHIM(MarkThreadBusy, void, (void), ());
C_SHIM(ReleaseFreeMemory, void, (void), ());
+C_SHIM(ReleaseToSystem, void, (ssize_t num_bytes), (num_bytes));
C_SHIM(GetEstimatedAllocatedSize, size_t, (size_t size), (size));
C_SHIM(GetAllocatedSize, size_t, (void* p), (p));
diff --git a/src/page_heap.cc b/src/page_heap.cc
index 9cbc70e..1e63cb9 100644
--- a/src/page_heap.cc
+++ b/src/page_heap.cc
@@ -49,11 +49,9 @@ namespace tcmalloc {
PageHeap::PageHeap()
: pagemap_(MetaDataAlloc),
pagemap_cache_(0),
- free_pages_(0),
- system_bytes_(0),
scavenge_counter_(0),
// Start scavenging at kMaxPages list
- scavenge_index_(kMaxPages-1) {
+ release_index_(kMaxPages) {
COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits);
DLL_Init(&large_.normal);
DLL_Init(&large_.returned);
@@ -154,7 +152,7 @@ Span* PageHeap::Carve(Span* span, Length n) {
ASSERT(n > 0);
ASSERT(span->location != Span::IN_USE);
const int old_location = span->location;
- DLL_Remove(span);
+ RemoveFromFreeList(span);
span->location = Span::IN_USE;
Event(span, 'A', n);
@@ -165,18 +163,11 @@ Span* PageHeap::Carve(Span* span, Length n) {
leftover->location = old_location;
Event(leftover, 'S', extra);
RecordSpan(leftover);
-
- // Place leftover span on appropriate free list
- SpanList* listpair = (extra < kMaxPages) ? &free_[extra] : &large_;
- Span* dst = (leftover->location == Span::ON_RETURNED_FREELIST
- ? &listpair->returned : &listpair->normal);
- DLL_Prepend(dst, leftover);
-
+ PrependToFreeList(leftover); // Skip coalescing - no candidates possible
span->length = n;
pagemap_.set(span->start + n - 1, span);
}
ASSERT(Check());
- free_pages_ -= n;
return span;
}
@@ -191,13 +182,12 @@ void PageHeap::Delete(Span* span) {
span->sample = 0;
span->location = Span::ON_NORMAL_FREELIST;
Event(span, 'D', span->length);
- AddToFreeList(span);
- free_pages_ += n;
+ MergeIntoFreeList(span); // Coalesces if possible
IncrementalScavenge(n);
ASSERT(Check());
}
-void PageHeap::AddToFreeList(Span* span) {
+void PageHeap::MergeIntoFreeList(Span* span) {
ASSERT(span->location != Span::IN_USE);
// Coalesce -- we guarantee that "p" != 0, so no bounds checking
@@ -214,7 +204,7 @@ void PageHeap::AddToFreeList(Span* span) {
// Merge preceding span into this span
ASSERT(prev->start + prev->length == p);
const Length len = prev->length;
- DLL_Remove(prev);
+ RemoveFromFreeList(prev);
DeleteSpan(prev);
span->start -= len;
span->length += len;
@@ -226,35 +216,43 @@ void PageHeap::AddToFreeList(Span* span) {
// Merge next span into this span
ASSERT(next->start == p+n);
const Length len = next->length;
- DLL_Remove(next);
+ RemoveFromFreeList(next);
DeleteSpan(next);
span->length += len;
pagemap_.set(span->start + span->length - 1, span);
Event(span, 'R', len);
}
+ PrependToFreeList(span);
+}
+
+void PageHeap::PrependToFreeList(Span* span) {
+ ASSERT(span->location != Span::IN_USE);
SpanList* list = (span->length < kMaxPages) ? &free_[span->length] : &large_;
if (span->location == Span::ON_NORMAL_FREELIST) {
+ stats_.free_bytes += (span->length << kPageShift);
DLL_Prepend(&list->normal, span);
} else {
+ stats_.unmapped_bytes += (span->length << kPageShift);
DLL_Prepend(&list->returned, span);
}
}
+void PageHeap::RemoveFromFreeList(Span* span) {
+ ASSERT(span->location != Span::IN_USE);
+ if (span->location == Span::ON_NORMAL_FREELIST) {
+ stats_.free_bytes -= (span->length << kPageShift);
+ } else {
+ stats_.unmapped_bytes -= (span->length << kPageShift);
+ }
+ DLL_Remove(span);
+}
+
void PageHeap::IncrementalScavenge(Length n) {
// Fast path; not yet time to release memory
scavenge_counter_ -= n;
if (scavenge_counter_ >= 0) return; // Not yet time to scavenge
- // Never delay scavenging for more than the following number of
- // deallocated pages. With 4K pages, this comes to 4GB of
- // deallocation.
- static const int kMaxReleaseDelay = 1 << 20;
-
- // If there is nothing to release, wait for so many pages before
- // scavenging again. With 4K pages, this comes to 1GB of memory.
- static const int kDefaultReleaseDelay = 1 << 18;
-
const double rate = FLAGS_tcmalloc_release_rate;
if (rate <= 1e-6) {
// Tiny release rate means that releasing is disabled.
@@ -262,41 +260,62 @@ void PageHeap::IncrementalScavenge(Length n) {
return;
}
- // Find index of free list to scavenge
- int index = scavenge_index_ + 1;
- for (int i = 0; i < kMaxPages+1; i++) {
- if (index > kMaxPages) index = 0;
- SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index];
- if (!DLL_IsEmpty(&slist->normal)) {
- // Release the last span on the normal portion of this list
- Span* s = slist->normal.prev;
- ASSERT(s->location == Span::ON_NORMAL_FREELIST);
- DLL_Remove(s);
- const Length n = s->length;
- TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
- static_cast<size_t>(s->length << kPageShift));
- s->location = Span::ON_RETURNED_FREELIST;
- AddToFreeList(s);
-
- // Compute how long to wait until we return memory.
- // FLAGS_tcmalloc_release_rate==1 means wait for 1000 pages
- // after releasing one page.
- const double mult = 1000.0 / rate;
- double wait = mult * static_cast<double>(n);
- if (wait > kMaxReleaseDelay) {
- // Avoid overflow and bound to reasonable range
- wait = kMaxReleaseDelay;
- }
- scavenge_counter_ = static_cast<int64_t>(wait);
+ Length released_pages = ReleaseAtLeastNPages(1);
- scavenge_index_ = index; // Scavenge at index+1 next time
- return;
+ if (released_pages == 0) {
+ // Nothing to scavenge, delay for a while.
+ scavenge_counter_ = kDefaultReleaseDelay;
+ } else {
+ // Compute how long to wait until we return memory.
+ // FLAGS_tcmalloc_release_rate==1 means wait for 1000 pages
+ // after releasing one page.
+ const double mult = 1000.0 / rate;
+ double wait = mult * static_cast<double>(released_pages);
+ if (wait > kMaxReleaseDelay) {
+ // Avoid overflow and bound to reasonable range.
+ wait = kMaxReleaseDelay;
}
- index++;
+ scavenge_counter_ = static_cast<int64_t>(wait);
}
+}
+
+Length PageHeap::ReleaseLastNormalSpan(SpanList* slist) {
+ Span* s = slist->normal.prev;
+ ASSERT(s->location == Span::ON_NORMAL_FREELIST);
+ RemoveFromFreeList(s);
+ const Length n = s->length;
+ TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
+ static_cast<size_t>(s->length << kPageShift));
+ s->location = Span::ON_RETURNED_FREELIST;
+ MergeIntoFreeList(s); // Coalesces if possible.
+ return n;
+}
- // Nothing to scavenge, delay for a while
- scavenge_counter_ = kDefaultReleaseDelay;
+Length PageHeap::ReleaseAtLeastNPages(Length num_pages) {
+ Length released_pages = 0;
+ Length prev_released_pages = -1;
+
+ // Round robin through the lists of free spans, releasing the last
+ // span in each list. Stop after releasing at least num_pages.
+ while (released_pages < num_pages) {
+ if (released_pages == prev_released_pages) {
+ // Last iteration of while loop made no progress.
+ break;
+ }
+ prev_released_pages = released_pages;
+
+ for (int i = 0; i < kMaxPages+1 && released_pages < num_pages;
+ i++, release_index_++) {
+ if (release_index_ > kMaxPages) release_index_ = 0;
+ SpanList* slist = (release_index_ == kMaxPages) ?
+ &large_ : &free_[release_index_];
+ if (!DLL_IsEmpty(&slist->normal)) {
+ Length released_len = ReleaseLastNormalSpan(slist);
+ released_pages += released_len;
+ }
+ }
+ }
+ return released_pages;
}
void PageHeap::RegisterSizeClass(Span* span, size_t sc) {
@@ -311,6 +330,10 @@ void PageHeap::RegisterSizeClass(Span* span, size_t sc) {
}
}
+static double MB(uint64_t bytes) {
+ return bytes / 1048576.0;
+}
+
static double PagesToMB(uint64_t pages) {
return (pages << kPageShift) / 1048576.0;
}
@@ -323,8 +346,8 @@ void PageHeap::Dump(TCMalloc_Printer* out) {
}
}
out->printf("------------------------------------------------\n");
- out->printf("PageHeap: %d sizes; %6.1f MB free\n",
- nonempty_sizes, PagesToMB(free_pages_));
+ out->printf("PageHeap: %d sizes; %6.1f MB free; %6.1f MB unmapped\n",
+ nonempty_sizes, MB(stats_.free_bytes), MB(stats_.unmapped_bytes));
out->printf("------------------------------------------------\n");
uint64_t total_normal = 0;
uint64_t total_returned = 0;
@@ -376,6 +399,37 @@ void PageHeap::Dump(TCMalloc_Printer* out) {
PagesToMB(total_returned));
}
+bool PageHeap::GetNextRange(PageID start, base::MallocRange* r) {
+ Span* span = reinterpret_cast<Span*>(pagemap_.Next(start));
+ if (span == NULL) {
+ return false;
+ }
+ r->address = span->start << kPageShift;
+ r->length = span->length << kPageShift;
+ r->fraction = 0;
+ switch (span->location) {
+ case Span::IN_USE:
+ r->type = base::MallocRange::INUSE;
+ r->fraction = 1;
+ if (span->sizeclass > 0) {
+ // Only some of the objects in this span may be in use.
+ const size_t osize = Static::sizemap()->class_to_size(span->sizeclass);
+ r->fraction = (1.0 * osize * span->refcount) / r->length;
+ }
+ break;
+ case Span::ON_NORMAL_FREELIST:
+ r->type = base::MallocRange::FREE;
+ break;
+ case Span::ON_RETURNED_FREELIST:
+ r->type = base::MallocRange::UNMAPPED;
+ break;
+ default:
+ r->type = base::MallocRange::UNKNOWN;
+ break;
+ }
+ return true;
+}
+
static void RecordGrowth(size_t growth) {
StackTrace* t = Static::stacktrace_allocator()->New();
t->depth = GetStackTrace(t->stack, kMaxStackDepth-1, 3);
@@ -401,8 +455,8 @@ bool PageHeap::GrowHeap(Length n) {
ask = actual_size >> kPageShift;
RecordGrowth(ask << kPageShift);
- uint64_t old_system_bytes = system_bytes_;
- system_bytes_ += (ask << kPageShift);
+ uint64_t old_system_bytes = stats_.system_bytes;
+ stats_.system_bytes += (ask << kPageShift);
const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
ASSERT(p > 0);
@@ -411,7 +465,7 @@ bool PageHeap::GrowHeap(Length n) {
// when a program keeps allocating and freeing large blocks.
if (old_system_bytes < kPageMapBigAllocationThreshold
- && system_bytes_ >= kPageMapBigAllocationThreshold) {
+ && stats_.system_bytes >= kPageMapBigAllocationThreshold) {
pagemap_.PreallocateMoreMemory();
}
@@ -419,10 +473,8 @@ bool PageHeap::GrowHeap(Length n) {
// Plus ensure one before and one after so coalescing code
// does not need bounds-checking.
if (pagemap_.Ensure(p-1, ask+2)) {
- // Pretend the new area is allocated and then Delete() it to
- // cause any necessary coalescing to occur.
- //
- // We do not adjust free_pages_ here since Delete() will do it for us.
+ // Pretend the new area is allocated and then Delete() it to cause
+ // any necessary coalescing to occur.
Span* span = NewSpan(p, ask);
RecordSpan(span);
Delete(span);
@@ -464,26 +516,4 @@ bool PageHeap::CheckList(Span* list, Length min_pages, Length max_pages,
return true;
}
-void PageHeap::ReleaseFreeList(Span* list) {
- // Walk backwards through list so that when we push these
- // spans on the "returned" list, we preserve the order.
- while (!DLL_IsEmpty(list)) {
- Span* s = list->prev;
- DLL_Remove(s);
- ASSERT(s->location == Span::ON_NORMAL_FREELIST);
- s->location = Span::ON_RETURNED_FREELIST;
- TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
- static_cast<size_t>(s->length << kPageShift));
- AddToFreeList(s); // Coalesces if possible
- }
-}
-
-void PageHeap::ReleaseFreePages() {
- for (Length s = 0; s < kMaxPages; s++) {
- ReleaseFreeList(&free_[s].normal);
- }
- ReleaseFreeList(&large_.normal);
- ASSERT(Check());
-}
-
} // namespace tcmalloc
diff --git a/src/page_heap.h b/src/page_heap.h
index bd18931..5ab0d04 100644
--- a/src/page_heap.h
+++ b/src/page_heap.h
@@ -34,6 +34,7 @@
#define TCMALLOC_PAGE_HEAP_H_
#include <config.h>
+#include <google/malloc_extension.h>
#include "common.h"
#include "packed-cache-inl.h"
#include "pagemap.h"
@@ -119,13 +120,18 @@ class PageHeap {
// Dump state to stderr
void Dump(TCMalloc_Printer* out);
- // Return number of bytes allocated from system
- inline uint64_t SystemBytes() const { return system_bytes_; }
+ // If this page heap is managing a range with starting page # >= start,
+ // store info about the range in *r and return true. Else return false.
+ bool GetNextRange(PageID start, base::MallocRange* r);
- // Return number of free bytes in heap
- uint64_t FreeBytes() const {
- return (static_cast<uint64_t>(free_pages_) << kPageShift);
- }
+ // Page heap statistics
+ struct Stats {
+ Stats() : system_bytes(0), free_bytes(0), unmapped_bytes(0) {}
+ uint64_t system_bytes; // Total bytes allocated from system
+ uint64_t free_bytes; // Total bytes on normal freelists
+ uint64_t unmapped_bytes; // Total bytes on returned freelists
+ };
+ inline Stats stats() const { return stats_; }
bool Check();
// Like Check() but does some more comprehensive checking.
@@ -133,8 +139,13 @@ class PageHeap {
bool CheckList(Span* list, Length min_pages, Length max_pages,
int freelist); // ON_NORMAL_FREELIST or ON_RETURNED_FREELIST
- // Release all free pages in this heap for reuse by the OS:
- void ReleaseFreePages();
+ // Try to release at least num_pages for reuse by the OS. Returns
+ // the actual number of pages released, which may be less than
+ // num_pages if there weren't enough pages to release. The result
+ // may also be larger than num_pages since page_heap might decide to
+ // release one large range instead of fragmenting it into two
+ // smaller released and unreleased ranges.
+ Length ReleaseAtLeastNPages(Length num_pages);
// Return 0 if we have no information, or else the correct sizeclass for p.
// Reads and writes to pagemap_cache_ do not require locking.
@@ -163,6 +174,15 @@ class PageHeap {
// REQUIRED: kMaxPages >= kMinSystemAlloc;
static const size_t kMaxPages = kMinSystemAlloc;
+ // Never delay scavenging for more than the following number of
+ // deallocated pages. With 4K pages, this comes to 4GB of
+ // deallocation.
+ static const int kMaxReleaseDelay = 1 << 20;
+
+ // If there is nothing to release, wait for so many pages before
+ // scavenging again. With 4K pages, this comes to 1GB of memory.
+ static const int kDefaultReleaseDelay = 1 << 18;
+
// Pick the appropriate map and cache types based on pointer size
typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap;
typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache;
@@ -183,11 +203,8 @@ class PageHeap {
// Array mapping from span length to a doubly linked list of free spans
SpanList free_[kMaxPages];
- // Number of pages kept in free lists
- uintptr_t free_pages_;
-
- // Bytes allocated from system
- uint64_t system_bytes_;
+ // Statistics on system, free, and unmapped bytes
+ Stats stats_;
bool GrowHeap(Length n);
@@ -211,23 +228,30 @@ class PageHeap {
// span of exactly the specified length. Else, returns NULL.
Span* AllocLarge(Length n);
- // Coalesce span with neighboring spans if possible. Add the
- // resulting span to the appropriate free list.
- void AddToFreeList(Span* span);
+ // Coalesce span with neighboring spans if possible, prepend to
+ // appropriate free list, and adjust stats.
+ void MergeIntoFreeList(Span* span);
+
+ // Prepends span to appropriate free list, and adjusts stats.
+ void PrependToFreeList(Span* span);
+
+ // Removes span from its free list, and adjust stats.
+ void RemoveFromFreeList(Span* span);
// Incrementally release some memory to the system.
// IncrementalScavenge(n) is called whenever n pages are freed.
void IncrementalScavenge(Length n);
- // Release all pages in the specified free list for reuse by the OS
- // REQURES: list must be a "normal" list (i.e., not "returned")
- void ReleaseFreeList(Span* list);
+ // Release the last span on the normal portion of this list.
+ // Return the length of that span.
+ Length ReleaseLastNormalSpan(SpanList* slist);
+
// Number of pages to deallocate before doing more scavenging
int64_t scavenge_counter_;
- // Index of last free list we scavenged
- int scavenge_index_;
+ // Index of last free list where we released memory to the OS.
+ int release_index_;
};
} // namespace tcmalloc
diff --git a/src/pagemap.h b/src/pagemap.h
index 3559932..1786e68 100644
--- a/src/pagemap.h
+++ b/src/pagemap.h
@@ -95,10 +95,20 @@ class TCMalloc_PageMap1 {
// REQUIRES "k" is in range "[0,2^BITS-1]".
// REQUIRES "k" has been ensured before.
//
- // Sets the value for KEY.
+ // Sets the value 'v' for key 'k'.
void set(Number k, void* v) {
array_[k] = v;
}
+
+ // Return the first non-NULL pointer found in this map for
+ // a page number >= k. Returns NULL if no such number is found.
+ void* Next(Number k) const {
+ while (k < (1 << BITS)) {
+ if (array_[k] != NULL) return array_[k];
+ k++;
+ }
+ return NULL;
+ }
};
// Two-level radix tree
@@ -170,6 +180,24 @@ class TCMalloc_PageMap2 {
// Allocate enough to keep track of all possible pages
Ensure(0, 1 << BITS);
}
+
+ void* Next(Number k) const {
+ while (k < (1 << BITS)) {
+ const Number i1 = k >> LEAF_BITS;
+ Leaf* leaf = root_[i1];
+ if (leaf != NULL) {
+ // Scan forward in leaf
+ for (Number i2 = k & (LEAF_LENGTH - 1); i2 < LEAF_LENGTH; i2++) {
+ if (leaf->values[i2] != NULL) {
+ return leaf->values[i2];
+ }
+ }
+ }
+ // Skip to next top-level entry
+ k = (i1 + 1) << LEAF_BITS;
+ }
+ return NULL;
+ }
};
// Three-level radix tree
@@ -264,6 +292,29 @@ class TCMalloc_PageMap3 {
void PreallocateMoreMemory() {
}
+
+ void* Next(Number k) const {
+ while (k < (Number(1) << BITS)) {
+ const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS);
+ const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1);
+ if (root_->ptrs[i1] == NULL) {
+ // Advance to next top-level entry
+ k = (i1 + 1) << (LEAF_BITS + INTERIOR_BITS);
+ } else {
+ Leaf* leaf = reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2]);
+ if (leaf != NULL) {
+ for (Number i3 = (k & (LEAF_LENGTH-1)); i3 < LEAF_LENGTH; i3++) {
+ if (leaf->values[i3] != NULL) {
+ return leaf->values[i3];
+ }
+ }
+ }
+ // Advance to next interior entry
+ k = ((k >> LEAF_BITS) + 1) << LEAF_BITS;
+ }
+ }
+ return NULL;
+ }
};
#endif // TCMALLOC_PAGEMAP_H_
diff --git a/src/pprof b/src/pprof
index 88a6041..62cce12 100755
--- a/src/pprof
+++ b/src/pprof
@@ -92,6 +92,7 @@ my $GV = "gv";
my $PS2PDF = "ps2pdf";
# These are used for dynamic profiles
my $WGET = "wget";
+my $WGET_FLAGS = "--no-http-keep-alive"; # only supported by some wgets
my $CURL = "curl";
# These are the web pages that servers need to support for dynamic profiles
@@ -117,6 +118,11 @@ my $address_length = 16;
# A list of paths to search for shared object files
my @prefix_list = ();
+# Special routine name that should not have any symbols.
+# Used as separator to parse "addr2line -i" output.
+my $sep_symbol = '_fini';
+my $sep_address = undef;
+
##### Argument parsing #####
sub usage_string {
@@ -504,6 +510,20 @@ sub Init() {
ConfigureObjTools($main::prog)
}
+ # Check what flags our commandline utilities support
+ if (open(TFILE, "$WGET $WGET_FLAGS -V 2>&1 |")) {
+ my @lines = <TFILE>;
+ if (grep(/unrecognized/, @lines) > 0) {
+ # grep found 'unrecognized' token from WGET, clear WGET flags
+ $WGET_FLAGS = "";
+ }
+ close(TFILE);
+ }
+ # TODO(csilvers): check all the other binaries and objtools to see
+ # if they are installed and what flags they support, and store that
+ # in a data structure here, rather than scattering these tests about.
+ # Then, ideally, rewrite code to use wget OR curl OR GET or ...
+
# Break the opt_list_prefix into the prefix_list array
@prefix_list = split (',', $main::opt_lib_prefix);
@@ -952,22 +972,31 @@ sub PrintSymbolizedProfile {
print 'binary=', $prog, "\n";
}
while (my ($pc, $name) = each(%{$symbols})) {
- my $fullname = $name->[2];
- print '0x', $pc, ' ', $fullname, "\n";
+ my $sep = ' ';
+ print '0x', $pc;
+ # We have a list of function names, which include the inlined
+ # calls. They are separated (and terminated) by --, which is
+ # illegal in function names.
+ for (my $j = 2; $j <= $#{$name}; $j += 3) {
+ print $sep, $name->[$j];
+ $sep = '--';
+ }
+ print "\n";
}
print '---', "\n";
+ $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash
+ my $profile_marker = $&;
+ print '--- ', $profile_marker, "\n";
if (defined($main::collected_profile)) {
# if used with remote fetch, simply dump the collected profile to output.
open(SRC, "<$main::collected_profile");
while (<SRC>) {
print $_;
}
+ close(SRC);
} else {
# dump a cpu-format profile to standard out
- $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash
- my $profile_marker = $&;
- print '--- ', $profile_marker, "\n";
PrintProfileData($profile);
}
}
@@ -1069,9 +1098,9 @@ sub PrintDisassembly {
}
# Return reference to array of tuples of the form:
-# [address, filename, linenumber, instruction]
+# [start_address, filename, linenumber, instruction, limit_address]
# E.g.,
-# ["0x806c43d", "/foo/bar.cc", 131, "ret"]
+# ["0x806c43d", "/foo/bar.cc", 131, "ret", "0x806c440"]
sub Disassemble {
my $prog = shift;
my $offset = shift;
@@ -1086,6 +1115,7 @@ sub Disassemble {
my @result = ();
my $filename = "";
my $linenumber = -1;
+ my $last = ["", "", "", ""];
while (<OBJDUMP>) {
s/\r//g; # turn windows-looking lines into unix-looking lines
chop;
@@ -1098,7 +1128,9 @@ sub Disassemble {
# Disassembly line -- zero-extend address to full length
my $addr = HexExtend($1);
my $k = AddressAdd($addr, $offset);
- push(@result, [$k, $filename, $linenumber, $2]);
+ $last->[4] = $k; # Store ending address for previous instruction
+ $last = [$k, $filename, $linenumber, $2, $end_addr];
+ push(@result, $last);
}
}
close(OBJDUMP);
@@ -1274,8 +1306,13 @@ sub PrintSource {
my $total1 = 0; # Total flat counts
my $total2 = 0; # Total cumulative counts
foreach my $e (@instructions) {
- my $c1 = GetEntry($flat, $e->[0]);
- my $c2 = GetEntry($cumulative, $e->[0]);
+ # Add up counts for all address that fall inside this instruction
+ my $c1 = 0;
+ my $c2 = 0;
+ for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) {
+ $c1 += GetEntry($flat, $a);
+ $c2 += GetEntry($cumulative, $a);
+ }
$running1 += $c1;
$running2 += $c2;
$total1 += $c1;
@@ -1386,8 +1423,13 @@ sub PrintDisassembledFunction {
my $flat_total = 0;
my $cum_total = 0;
foreach my $e (@instructions) {
- my $c1 = GetEntry($flat, $e->[0]);
- my $c2 = GetEntry($cumulative, $e->[0]);
+ # Add up counts for all address that fall inside this instruction
+ my $c1 = 0;
+ my $c2 = 0;
+ for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) {
+ $c1 += GetEntry($flat, $a);
+ $c2 += GetEntry($cumulative, $a);
+ }
push(@flat_count, $c1);
push(@cum_count, $c2);
$flat_total += $c1;
@@ -1615,10 +1657,10 @@ sub PrintDot {
foreach my $k (keys(%{$raw})) {
# TODO: omit low %age edges
$n = $raw->{$k};
- my @addrs = split(/\n/, $k);
- for (my $i = 1; $i <= $#addrs; $i++) {
- my $src = OutputKey($symbols, $addrs[$i]);
- my $dst = OutputKey($symbols, $addrs[$i-1]);
+ my @translated = TranslateStack($symbols, $k);
+ for (my $i = 1; $i <= $#translated; $i++) {
+ my $src = $translated[$i];
+ my $dst = $translated[$i-1];
#next if ($src eq $dst); # Avoid self-edges?
if (exists($node{$src}) && exists($node{$dst})) {
my $edge_label = "$src\001$dst";
@@ -1648,14 +1690,18 @@ sub PrintDot {
if ($edgeweight > 100000) { $edgeweight = 100000; }
$edgeweight = int($edgeweight);
+ my $style = sprintf("setlinewidth(%f)", $w);
+ if ($x[1] =~ m/\(inline\)/) {
+ $style .= ",dashed";
+ }
+
# Use a slightly squashed function of the edge count as the weight
- printf DOT ("N%s -> N%s [label=%s, weight=%d, " .
- "style=\"setlinewidth(%f)\"];\n",
+ printf DOT ("N%s -> N%s [label=%s, weight=%d, style=\"%s\"];\n",
$node{$x[0]},
$node{$x[1]},
Unparse($n),
$edgeweight,
- $w);
+ $style);
}
}
@@ -1665,42 +1711,74 @@ sub PrintDot {
return 1;
}
-# Generate the key under which a given address should be counted
-# based on the user-specified output granularity.
-sub OutputKey {
+# Translate a stack of addresses into a stack of symbols
+sub TranslateStack {
my $symbols = shift;
- my $a = shift;
-
- # Skip large addresses since they sometimes show up as fake entries on RH9
- if (length($a) > 8) {
- if ($a gt "7fffffffffffffff") { return ''; }
- }
-
- # Extract symbolic info for address
- my $func = $a;
- my $fullfunc = $a;
- my $fileline = "";
- if (exists($symbols->{$a})) {
- $func = $symbols->{$a}->[0];
- $fullfunc = $symbols->{$a}->[2];
- $fileline = $symbols->{$a}->[1];
- }
-
- if ($main::opt_disasm || $main::opt_list) {
- return $a; # We want just the address for the key
- } elsif ($main::opt_addresses) {
- return "$a $func $fileline";
- } elsif ($main::opt_lines) {
- return "$func $fileline";
- } elsif ($main::opt_functions) {
- return $func;
- } elsif ($main::opt_files) {
- my $f = ($fileline eq '') ? $a : $fileline;
- $f =~ s/:\d+$//;
- return $f;
- } else {
- return $a;
+ my $k = shift;
+
+ my @addrs = split(/\n/, $k);
+ my @result = ();
+ for (my $i = 0; $i <= $#addrs; $i++) {
+ my $a = $addrs[$i];
+
+ # Skip large addresses since they sometimes show up as fake entries on RH9
+ if (length($a) > 8 && $a gt "7fffffffffffffff") {
+ next;
+ }
+
+ if ($main::opt_disasm || $main::opt_list) {
+ # We want just the address for the key
+ push(@result, $a);
+ next;
+ }
+
+ my $symlist = $symbols->{$a};
+ if (!defined($symlist)) {
+ $symlist = [$a, "", $a];
+ }
+
+ # We can have a sequence of symbols for a particular entry
+ # (more than one symbol in the case of inlining). Callers
+ # come before callees in symlist, so walk backwards since
+ # the translated stack should contain callees before callers.
+ for (my $j = $#{$symlist}; $j >= 2; $j -= 3) {
+ my $func = $symlist->[$j-2];
+ my $fileline = $symlist->[$j-1];
+ my $fullfunc = $symlist->[$j];
+ if ($j > 2) {
+ $func = "$func (inline)";
+ }
+ if ($main::opt_addresses) {
+ push(@result, "$a $func $fileline");
+ } elsif ($main::opt_lines) {
+ if ($func eq '??' && $fileline eq '??:0') {
+ push(@result, "$a");
+ } else {
+ push(@result, "$func $fileline");
+ }
+ } elsif ($main::opt_functions) {
+ if ($func eq '??') {
+ push(@result, "$a");
+ } else {
+ push(@result, $func);
+ }
+ } elsif ($main::opt_files) {
+ if ($fileline eq '??:0' || $fileline eq '') {
+ push(@result, "$a");
+ } else {
+ my $f = $fileline;
+ $f =~ s/:\d+$//;
+ push(@result, $f);
+ }
+ } else {
+ push(@result, $a);
+ last; # Do not print inlined info
+ }
+ }
}
+
+ # print join(",", @addrs), " => ", join(",", @result), "\n";
+ return @result;
}
# Generate percent string for a number and a total
@@ -1978,17 +2056,16 @@ sub ReduceProfile {
my $result = {};
foreach my $k (keys(%{$profile})) {
my $count = $profile->{$k};
- my @addrs = split(/\n/, $k);
+ my @translated = TranslateStack($symbols, $k);
my @path = ();
my %seen = ();
$seen{''} = 1; # So that empty keys are skipped
- foreach my $a (@addrs) {
+ foreach my $e (@translated) {
# To avoid double-counting due to recursion, skip a stack-trace
# entry if it has already been seen
- my $key = OutputKey($symbols, $a);
- if (!$seen{$key}) {
- $seen{$key} = 1;
- push(@path, $key);
+ if (!$seen{$e}) {
+ $seen{$e} = 1;
+ push(@path, $e);
}
}
my $reduced_path = join("\n", @path);
@@ -1997,6 +2074,20 @@ sub ReduceProfile {
return $result;
}
+# Does the specified symbol array match the regexp?
+sub SymbolMatches {
+ my $sym = shift;
+ my $re = shift;
+ if (defined($sym)) {
+ for (my $i = 0; $i < $#{$sym}; $i += 3) {
+ if ($sym->[$i] =~ m/$re/ || $sym->[$i+1] =~ m/$re/) {
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
# Focus only on paths involving specified regexps
sub FocusProfile {
my $symbols = shift;
@@ -2008,10 +2099,7 @@ sub FocusProfile {
my @addrs = split(/\n/, $k);
foreach my $a (@addrs) {
# Reply if it matches either the address/shortname/fileline
- if (($a =~ m/$focus/) ||
- (exists($symbols->{$a}) &&
- (($symbols->{$a}->[0] =~ m/$focus/) ||
- ($symbols->{$a}->[1] =~ m/$focus/)))) {
+ if (($a =~ m/$focus/) || SymbolMatches($symbols->{$a}, $focus)) {
AddEntry($result, $k, $count);
last;
}
@@ -2032,10 +2120,7 @@ sub IgnoreProfile {
my $matched = 0;
foreach my $a (@addrs) {
# Reply if it matches either the address/shortname/fileline
- if (($a =~ m/$ignore/) ||
- (exists($symbols->{$a}) &&
- (($symbols->{$a}->[0] =~ m/$ignore/) ||
- ($symbols->{$a}->[1] =~ m/$ignore/)))) {
+ if (($a =~ m/$ignore/) || SymbolMatches($symbols->{$a}, $ignore)) {
$matched = 1;
last;
}
@@ -2195,7 +2280,7 @@ sub IsSymbolizedProfileFile {
sub CheckSymbolPage {
my $url = SymbolPageURL();
- open(SYMBOL, "$WGET -qO- '$url' |");
+ open(SYMBOL, "$WGET $WGET_FLAGS -qO- '$url' |");
my $line = <SYMBOL>;
$line =~ s/\r//g; # turn windows-looking lines into unix-looking lines
close(SYMBOL);
@@ -2240,7 +2325,7 @@ sub SymbolPageURL {
sub FetchProgramName() {
my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]);
my $url = "http://$host:$port$PROGRAM_NAME_PAGE";
- my $command_line = "$WGET -qO- '$url'";
+ my $command_line = "$WGET $WGET_FLAGS -qO- '$url'";
open(CMDLINE, "$command_line |") or error($command_line);
my $cmdline = <CMDLINE>;
$cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines
@@ -2346,13 +2431,21 @@ sub FetchSymbols {
# /symbol, the symbols match and are retrievable from the map.
my $shortpc = $pc;
$shortpc =~ s/^0*//;
+ # Each line may have a list of names, which includes the function
+ # and also other functions it has inlined. They are separated
+ # (in PrintSymbolizedFile), by --, which is illegal in function names.
+ my $fullnames;
if (defined($symbol_map->{$shortpc})) {
- $fullname = $symbol_map->{$shortpc};
+ $fullnames = $symbol_map->{$shortpc};
} else {
- $fullname = "0x" . $pc; # Just use addresses
+ $fullnames = "0x" . $pc; # Just use addresses
+ }
+ my $sym = [];
+ $symbols->{$pc} = $sym;
+ foreach my $fullname (split("--", $fullnames)) {
+ my $name = ShortFunctionName($fullname);
+ push(@{$sym}, $name, "?", $fullname);
}
- my $name = ShortFunctionName($fullname);
- $symbols->{$pc} = [$name, "?", $fullname];
}
return $symbols;
}
@@ -2427,7 +2520,7 @@ sub FetchDynamicProfile {
return $real_profile;
}
- my $cmd = "$WGET $wget_timeout -q -O $tmp_profile '$url'";
+ my $cmd = "$WGET $WGET_FLAGS $wget_timeout -q -O $tmp_profile '$url'";
if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)){
print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n";
if ($encourage_patience) {
@@ -2752,12 +2845,26 @@ sub ReadCPUProfile {
# Make key out of the stack entries
my @k = ();
- for (my $j = $d; $j--; ) {
+ for (my $j = 0; $j < $d; $j++) {
my $pclo = $slots->get($i++);
my $pchi = $slots->get($i++);
if ($pclo == -1 || $pchi == -1) {
error("$fname: Unexpected EOF when reading stack of depth $d\n");
}
+
+ # Subtract one from caller pc so we map back to call instr.
+ # However, don't do this if we're reading a symbolized profile
+ # file, in which case the subtract-one was done when the file
+ # was written.
+ if ($j > 0 && !$main::use_symbolized_profile) {
+ if ($pclo == 0) {
+ $pchi--;
+ $pclo = 0xffffffff;
+ } else {
+ $pclo--;
+ }
+ }
+
my $pc = sprintf("%08x%08x", $pchi, $pclo);
$pcs->{$pc} = 1;
push @k, $pc;
@@ -3516,87 +3623,111 @@ sub MapToSymbols {
my $pclist = shift;
my $symbols = shift;
+ my $debug = 0;
+
# Ignore empty binaries
if ($#{$pclist} < 0) { return; }
- my $got_symbols = MapSymbolsWithNM($image, $offset, $pclist, $symbols);
- if ($main::opt_interactive ||
- $main::opt_addresses ||
- $main::opt_lines ||
- $main::opt_files ||
- $main::opt_list ||
- $main::opt_callgrind ||
- !$got_symbols) {
- GetLineNumbers($image, $offset, $pclist, $symbols);
+ # Figure out the addr2line command to use
+ my $addr2line = $obj_tool_map{"addr2line"};
+ my $cmd = "$addr2line -f -C -e $image";
+ if (exists $obj_tool_map{"addr2line_pdb"}) {
+ $addr2line = $obj_tool_map{"addr2line_pdb"};
+ $cmd = "$addr2line --demangle -f -C -e $image";
}
-}
-# The file $tmpfile_sym must already have been created before calling this.
-sub GetLineNumbersViaAddr2Line {
- my $addr2line_command = shift;
- my $pclist = shift;
- my $symbols = shift;
+ # If "addr2line" isn't installed on the system at all, just use
+ # nm to get what info we can (function names, but not line numbers).
+ if (system("$addr2line --help >/dev/null 2>&1") != 0) {
+ MapSymbolsWithNM($image, $offset, $pclist, $symbols);
+ return;
+ }
+
+ # "addr2line -i" can produce a variable number of lines per input
+ # address, with no separator that allows us to tell when data for
+ # the next address starts. So we find the address for a special
+ # symbol (_fini) and interleave this address between all real
+ # addresses passed to addr2line. The name of this special symbol
+ # can then be used as a separator.
+ $sep_address = undef; # May be filled in by MapSymbolsWithNM()
+ my $nm_symbols = {};
+ MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols);
+ # TODO(csilvers): only add '-i' if addr2line supports it.
+ if (defined($sep_address)) {
+ # Only add " -i" to addr2line if the binary supports it.
+ # addr2line --help returns 0, but not if it sees an unknown flag first.
+ if (system("$cmd -i --help >/dev/null 2>&1") == 0) {
+ $cmd .= " -i";
+ } else {
+ $sep_address = undef; # no need for sep_address if we don't support -i
+ }
+ }
+
+ # Make file with all PC values with intervening 'sep_address' so
+ # that we can reliably detect the end of inlined function list
+ open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n");
+ if ($debug) { print("---- $image ---\n"); }
+ for (my $i = 0; $i <= $#{$pclist}; $i++) {
+ # addr2line always reads hex addresses, and does not need '0x' prefix.
+ if ($debug) { printf("%s\n", $pclist->[$i]); }
+ printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset));
+ if (defined($sep_address)) {
+ printf ADDRESSES ("%s\n", $sep_address);
+ }
+ }
+ close(ADDRESSES);
+ if ($debug) {
+ print("----\n");
+ system("cat $main::tmpfile_sym");
+ print("----\n");
+ system("$cmd <$main::tmpfile_sym");
+ print("----\n");
+ }
- open(SYMBOLS, "$addr2line_command <$main::tmpfile_sym |")
- || error("$addr2line_command: $!\n");
- my $count = 0;
+ open(SYMBOLS, "$cmd <$main::tmpfile_sym |") || error("$cmd: $!\n");
+ my $count = 0; # Index in pclist
while (<SYMBOLS>) {
+ # Read fullfunction and filelineinfo from next pair of lines
s/\r?\n$//g;
my $fullfunction = $_;
-
$_ = <SYMBOLS>;
s/\r?\n$//g;
my $filelinenum = $_;
- $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths
- if (!$main::opt_list) {
- $filelinenum =~ s|^.*/([^/]+:\d+)$|$1|; # Remove directory name
+
+ if (defined($sep_address) && $fullfunction eq $sep_symbol) {
+ # Terminating marker for data for this address
+ $count++;
+ next;
}
+ $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths
+
my $pcstr = $pclist->[$count];
- if (defined($symbols->{$pcstr})) {
- # Override just the line-number portion. The function name portion
- # is less buggy when computed using nm instead of addr2line. But
- # don't override if addr2line is giving ??'s and nm didn't. (This
- # may be seen mostly/entirely on cygwin's addr2line/nm.)
- if (($filelinenum ne "??:0") || ($symbols->{$pcstr}->[1] eq "?")) {
- $symbols->{$pcstr}->[1] = $filelinenum;
+ my $function = ShortFunctionName($fullfunction);
+ if ($fullfunction eq '??') {
+ # See if nm found a symbol
+ my $nms = $nm_symbols->{$pcstr};
+ if (defined($nms)) {
+ $function = $nms->[0];
+ $fullfunction = $nms->[2];
}
- } else {
- my $function = ShortFunctionName($fullfunction);
- $symbols->{$pcstr} = [$function, $filelinenum, $fullfunction];
}
- $count++;
- }
- close(SYMBOLS);
- return $count;
-}
-sub GetLineNumbers {
- my $image = shift;
- my $offset = shift;
- my $pclist = shift;
- my $symbols = shift;
-
- # Make file with all PC values
- open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n");
- for (my $i = 0; $i <= $#{$pclist}; $i++) {
- # addr2line always reads hex addresses, and does not need '0x' prefix.
- printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset));
- }
- close(ADDRESSES);
-
- # Pass to addr2line
- my $addr2line = $obj_tool_map{"addr2line"};
- my @addr2line_commands = ("$addr2line -f -C -e $image");
- if (exists $obj_tool_map{"addr2line_pdb"}) {
- my $addr2line_pdb = $obj_tool_map{"addr2line_pdb"};
- push(@addr2line_commands, "$addr2line_pdb --demangle -f -C -e $image");
- }
- foreach my $addr2line_command (@addr2line_commands) {
- if (GetLineNumbersViaAddr2Line("$addr2line_command", $pclist, $symbols)) {
- last;
+ # Prepend to accumulated symbols for pcstr
+ # (so that caller comes before callee)
+ my $sym = $symbols->{$pcstr};
+ if (!defined($sym)) {
+ $sym = [];
+ $symbols->{$pcstr} = $sym;
+ }
+ unshift(@{$sym}, $function, $filelinenum, $fullfunction);
+ if ($debug) { printf("%s => [%s]\n", $pcstr, join(" ", @{$sym})); }
+ if (!defined($sep_address)) {
+ # Inlining is off, se this entry ends immediately
+ $count++;
}
}
+ close(SYMBOLS);
}
# Use nm to map the list of referenced PCs to symbols. Return true iff we
@@ -3646,7 +3777,7 @@ sub MapSymbolsWithNM {
}
return 1;
}
-
+
sub ShortFunctionName {
my $function = shift;
while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types
@@ -3813,6 +3944,10 @@ sub GetProcedureBoundariesViaNm {
next;
}
+ if ($this_routine eq $sep_symbol) {
+ $sep_address = HexExtend($start_val);
+ }
+
# Tag this routine with the starting address in case the image
# has multiple occurrences of this routine. We use a syntax
# that resembles template paramters that are automatically
diff --git a/src/stacktrace_config.h b/src/stacktrace_config.h
index 3bd0fb3..b58ab1d 100644
--- a/src/stacktrace_config.h
+++ b/src/stacktrace_config.h
@@ -46,17 +46,8 @@
#ifndef BASE_STACKTRACE_CONFIG_H_
#define BASE_STACKTRACE_CONFIG_H_
-// First, the i386 case.
-#if defined(__i386__) && __GNUC__ >= 2
-# if !defined(NO_FRAME_POINTER)
-# define STACKTRACE_INL_HEADER "stacktrace_x86-inl.h"
-# define STACKTRACE_SKIP_CONTEXT_ROUTINES 1
-# else
-# define STACKTRACE_INL_HEADER "stacktrace_generic-inl.h"
-# endif
-
-// Now, the x86_64 case.
-#elif defined(__x86_64__) && __GNUC__ >= 2
+// First, the i386 and x86_64 case.
+#if (defined(__i386__) || defined(__x86_64__)) && __GNUC__ >= 2
# if !defined(NO_FRAME_POINTER)
# define STACKTRACE_INL_HEADER "stacktrace_x86-inl.h"
# define STACKTRACE_SKIP_CONTEXT_ROUTINES 1
diff --git a/src/symbolize.cc b/src/symbolize.cc
index b7cdf0e..6fe44b9 100644
--- a/src/symbolize.cc
+++ b/src/symbolize.cc
@@ -166,7 +166,7 @@ extern bool Symbolize(char *out, int out_size,
return false;
// make the symbolization_table values point to the output vector
SymbolMap::iterator fill = symbolization_table->begin();
- char *current_name = out;
+ const char *current_name = out;
for (int i = 0; i < total_bytes_read; i++) {
if (out[i] == '\n') {
fill->second = current_name;
diff --git a/src/symbolize.h b/src/symbolize.h
index 72196f6..8fb0366 100644
--- a/src/symbolize.h
+++ b/src/symbolize.h
@@ -33,6 +33,10 @@
#ifndef TCMALLOC_SYMBOLIZE_H_
#define TCMALLOC_SYMBOLIZE_H_
+#include "config.h"
+#ifdef HAVE_STDINT_H
+#include <stdint.h> // for uintptr_t
+#endif
#include <map>
using std::map;
@@ -42,7 +46,7 @@ static const int kSymbolSize = 1024;
// TODO(glider): it's better to make SymbolMap a class that encapsulates the
// address operations and has the Symbolize() method.
-typedef map<uintptr_t, char*> SymbolMap;
+typedef map<uintptr_t, const char*> SymbolMap;
extern bool Symbolize(char *out, int out_size,
SymbolMap *symbolization_table);
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index daa01d0..450c1ab 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -136,6 +136,7 @@
# define WIN32_DO_PATCHING 1
#endif
+using std::max;
using tcmalloc::PageHeap;
using tcmalloc::PageHeapAllocator;
using tcmalloc::SizeMap;
@@ -382,12 +383,11 @@ size_t InvalidGetAllocatedSize(void* ptr) {
// Extract interesting stats
struct TCMallocStats {
- uint64_t system_bytes; // Bytes alloced from system
- uint64_t thread_bytes; // Bytes in thread caches
- uint64_t central_bytes; // Bytes in central cache
- uint64_t transfer_bytes; // Bytes in central transfer cache
- uint64_t pageheap_bytes; // Bytes in page heap
- uint64_t metadata_bytes; // Bytes alloced for metadata
+ uint64_t thread_bytes; // Bytes in thread caches
+ uint64_t central_bytes; // Bytes in central cache
+ uint64_t transfer_bytes; // Bytes in central transfer cache
+ uint64_t metadata_bytes; // Bytes alloced for metadata
+ PageHeap::Stats pageheap; // Stats from page heap
};
// Get stats into "r". Also get per-size-class counts if class_count != NULL
@@ -409,13 +409,8 @@ static void ExtractStats(TCMallocStats* r, uint64_t* class_count) {
{ // scope
SpinLockHolder h(Static::pageheap_lock());
ThreadCache::GetThreadStats(&r->thread_bytes, class_count);
- }
-
- { //scope
- SpinLockHolder h(Static::pageheap_lock());
- r->system_bytes = Static::pageheap()->SystemBytes();
r->metadata_bytes = tcmalloc::metadata_system_bytes();
- r->pageheap_bytes = Static::pageheap()->FreeBytes();
+ r->pageheap = Static::pageheap()->stats();
}
}
@@ -453,8 +448,9 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
DumpSystemAllocatorStats(out);
}
- const uint64_t bytes_in_use = stats.system_bytes
- - stats.pageheap_bytes
+ const uint64_t bytes_in_use = stats.pageheap.system_bytes
+ - stats.pageheap.free_bytes
+ - stats.pageheap.unmapped_bytes
- stats.central_bytes
- stats.transfer_bytes
- stats.thread_bytes;
@@ -463,6 +459,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
"MALLOC: %12" PRIu64 " (%7.1f MB) Heap size\n"
"MALLOC: %12" PRIu64 " (%7.1f MB) Bytes in use by application\n"
"MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in page heap\n"
+ "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes unmapped in page heap\n"
"MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in central cache\n"
"MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in transfer cache\n"
"MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in thread caches\n"
@@ -470,9 +467,10 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
"MALLOC: %12" PRIu64 " Thread heaps in use\n"
"MALLOC: %12" PRIu64 " (%7.1f MB) Metadata allocated\n"
"------------------------------------------------\n",
- stats.system_bytes, stats.system_bytes / MB,
+ stats.pageheap.system_bytes, stats.pageheap.system_bytes / MB,
bytes_in_use, bytes_in_use / MB,
- stats.pageheap_bytes, stats.pageheap_bytes / MB,
+ stats.pageheap.free_bytes, stats.pageheap.free_bytes / MB,
+ stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MB,
stats.central_bytes, stats.central_bytes / MB,
stats.transfer_bytes, stats.transfer_bytes / MB,
stats.thread_bytes, stats.thread_bytes / MB,
@@ -536,9 +534,50 @@ static void** DumpHeapGrowthStackTraces() {
return result;
}
+static void IterateOverRanges(void* arg, MallocExtension::RangeFunction func) {
+ PageID page = 1; // Some code may assume that page==0 is never used
+ bool done = false;
+ while (!done) {
+ // Accumulate a small number of ranges in a local buffer
+ static const int kNumRanges = 16;
+ static base::MallocRange ranges[kNumRanges];
+ int n = 0;
+ {
+ SpinLockHolder h(Static::pageheap_lock());
+ while (n < kNumRanges) {
+ if (!Static::pageheap()->GetNextRange(page, &ranges[n])) {
+ done = true;
+ break;
+ } else {
+ uintptr_t limit = ranges[n].address + ranges[n].length;
+ page = (limit + kPageSize - 1) >> kPageShift;
+ n++;
+ }
+ }
+ }
+
+ for (int i = 0; i < n; i++) {
+ (*func)(arg, &ranges[i]);
+ }
+ }
+}
+
// TCMalloc's support for extra malloc interfaces
class TCMallocImplementation : public MallocExtension {
+ private:
+ // ReleaseToSystem() might release more than the requested bytes because
+ // the page heap releases at the span granularity, and spans are of wildly
+ // different sizes. This member keeps track of the extra bytes bytes
+ // released so that the app can periodically call ReleaseToSystem() to
+ // release memory at a constant rate.
+ // NOTE: Protected by Static::pageheap_lock().
+ size_t extra_bytes_released_;
+
public:
+ TCMallocImplementation()
+ : extra_bytes_released_(0) {
+ }
+
virtual void GetStats(char* buffer, int buffer_length) {
ASSERT(buffer_length > 0);
TCMalloc_Printer printer(buffer, buffer_length);
@@ -568,32 +607,51 @@ class TCMallocImplementation : public MallocExtension {
return DumpHeapGrowthStackTraces();
}
+ virtual void Ranges(void* arg, RangeFunction func) {
+ IterateOverRanges(arg, func);
+ }
+
virtual bool GetNumericProperty(const char* name, size_t* value) {
ASSERT(name != NULL);
if (strcmp(name, "generic.current_allocated_bytes") == 0) {
TCMallocStats stats;
ExtractStats(&stats, NULL);
- *value = stats.system_bytes
+ *value = stats.pageheap.system_bytes
- stats.thread_bytes
- stats.central_bytes
- stats.transfer_bytes
- - stats.pageheap_bytes;
+ - stats.pageheap.free_bytes
+ - stats.pageheap.unmapped_bytes;
return true;
}
if (strcmp(name, "generic.heap_size") == 0) {
TCMallocStats stats;
ExtractStats(&stats, NULL);
- *value = stats.system_bytes;
+ *value = stats.pageheap.system_bytes;
return true;
}
if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
// We assume that bytes in the page heap are not fragmented too
- // badly, and are therefore available for allocation.
+ // badly, and are therefore available for allocation without
+ // growing the pageheap system byte count.
+ SpinLockHolder l(Static::pageheap_lock());
+ PageHeap::Stats stats = Static::pageheap()->stats();
+ *value = stats.free_bytes + stats.unmapped_bytes;
+ return true;
+ }
+
+ if (strcmp(name, "tcmalloc.pageheap_free_bytes") == 0) {
SpinLockHolder l(Static::pageheap_lock());
- *value = Static::pageheap()->FreeBytes();
+ *value = Static::pageheap()->stats().free_bytes;
+ return true;
+ }
+
+ if (strcmp(name, "tcmalloc.pageheap_unmapped_bytes") == 0) {
+ SpinLockHolder l(Static::pageheap_lock());
+ *value = Static::pageheap()->stats().unmapped_bytes;
return true;
}
@@ -631,9 +689,32 @@ class TCMallocImplementation : public MallocExtension {
virtual void MarkThreadBusy(); // Implemented below
- virtual void ReleaseFreeMemory() {
+ virtual void ReleaseToSystem(ssize_t num_bytes) {
+ if (num_bytes <= 0) {
+ return;
+ }
SpinLockHolder h(Static::pageheap_lock());
- Static::pageheap()->ReleaseFreePages();
+ if (num_bytes <= extra_bytes_released_) {
+ // We released too much on a prior call, so don't release any
+ // more this time.
+ extra_bytes_released_ = extra_bytes_released_ - num_bytes;
+ return;
+ }
+ num_bytes = num_bytes - extra_bytes_released_;
+ // num_bytes might be less than one page. If we pass zero to
+ // ReleaseAtLeastNPages, it won't do anything, so we release a whole
+ // page now and let extra_bytes_released_ smooth it out over time.
+ Length num_pages = max<Length>(num_bytes >> kPageShift, 1);
+ size_t bytes_released = Static::pageheap()->ReleaseAtLeastNPages(
+ num_pages) << kPageShift;
+ if (bytes_released > num_bytes) {
+ extra_bytes_released_ = bytes_released - num_bytes;
+ } else {
+ // The PageHeap wasn't able to release num_bytes. Don't try to
+ // compensate with a big release next time. Specifically,
+ // ReleaseFreeMemory() calls ReleaseToSystem(LONG_MAX).
+ extra_bytes_released_ = 0;
+ }
}
virtual void SetMemoryReleaseRate(double rate) {
@@ -1063,16 +1144,18 @@ inline struct mallinfo do_mallinfo() {
// Unfortunately, the struct contains "int" field, so some of the
// size values will be truncated.
- info.arena = static_cast<int>(stats.system_bytes);
+ info.arena = static_cast<int>(stats.pageheap.system_bytes);
info.fsmblks = static_cast<int>(stats.thread_bytes
+ stats.central_bytes
+ stats.transfer_bytes);
- info.fordblks = static_cast<int>(stats.pageheap_bytes);
- info.uordblks = static_cast<int>(stats.system_bytes
+ info.fordblks = static_cast<int>(stats.pageheap.free_bytes +
+ stats.pageheap.unmapped_bytes);
+ info.uordblks = static_cast<int>(stats.pageheap.system_bytes
- stats.thread_bytes
- stats.central_bytes
- stats.transfer_bytes
- - stats.pageheap_bytes);
+ - stats.pageheap.free_bytes
+ - stats.pageheap.unmapped_bytes);
return info;
}
diff --git a/src/tests/malloc_extension_c_test.c b/src/tests/malloc_extension_c_test.c
index aad2d4b..b6319a1 100644
--- a/src/tests/malloc_extension_c_test.c
+++ b/src/tests/malloc_extension_c_test.c
@@ -108,6 +108,7 @@ void TestMallocExtension(void) {
}
MallocExtension_MarkThreadIdle();
MallocExtension_MarkThreadBusy();
+ MallocExtension_ReleaseToSystem(1);
MallocExtension_ReleaseFreeMemory();
if (MallocExtension_GetEstimatedAllocatedSize(10) < 10) {
FAIL("GetEstimatedAllocatedSize returned a bad value (too small)");
diff --git a/src/tests/page_heap_test.cc b/src/tests/page_heap_test.cc
new file mode 100644
index 0000000..9120b78
--- /dev/null
+++ b/src/tests/page_heap_test.cc
@@ -0,0 +1,55 @@
+// Copyright 2009 Google Inc. All Rights Reserved.
+// Author: fikes@google.com (Andrew Fikes)
+
+#include <stdio.h>
+#include "config_for_unittests.h"
+#include "base/logging.h"
+#include "common.h"
+#include "page_heap.h"
+
+namespace {
+
+static void CheckStats(const tcmalloc::PageHeap* ph,
+ uint64_t system_pages,
+ uint64_t free_pages,
+ uint64_t unmapped_pages) {
+ tcmalloc::PageHeap::Stats stats = ph->stats();
+ EXPECT_EQ(system_pages, stats.system_bytes >> kPageShift);
+ EXPECT_EQ(free_pages, stats.free_bytes >> kPageShift);
+ EXPECT_EQ(unmapped_pages, stats.unmapped_bytes >> kPageShift);
+}
+
+static void TestPageHeap_Stats() {
+ tcmalloc::PageHeap* ph = new tcmalloc::PageHeap();
+
+ // Empty page heap
+ CheckStats(ph, 0, 0, 0);
+
+ // Allocate a span 's1'
+ tcmalloc::Span* s1 = ph->New(256);
+ CheckStats(ph, 256, 0, 0);
+
+ // Split span 's1' into 's1', 's2'. Delete 's2'
+ tcmalloc::Span* s2 = ph->Split(s1, 128);
+ Length s2_len = s2->length;
+ ph->Delete(s2);
+ CheckStats(ph, 256, 128, 0);
+
+ // Unmap deleted span 's2'
+ EXPECT_EQ(s2_len, ph->ReleaseAtLeastNPages(1));
+ CheckStats(ph, 256, 0, 128);
+
+ // Delete span 's1'
+ ph->Delete(s1);
+ CheckStats(ph, 256, 128, 128);
+
+ delete ph;
+}
+
+} // namespace
+
+int main(int argc, char **argv) {
+ TestPageHeap_Stats();
+ printf("PASS\n");
+ return 0;
+}
diff --git a/src/tests/pagemap_unittest.cc b/src/tests/pagemap_unittest.cc
index dcf6c9a..83e76e2 100644
--- a/src/tests/pagemap_unittest.cc
+++ b/src/tests/pagemap_unittest.cc
@@ -113,6 +113,53 @@ void TestMap(int limit, bool limit_is_below_the_overflow_boundary) {
}
}
+// REQUIRES: BITS==10, i.e., valid range is [0,1023].
+// Representations for different types will end up being:
+// PageMap1: array[1024]
+// PageMap2: array[32][32]
+// PageMap3: array[16][16][4]
+template <class Type>
+void TestNext(const char* name) {
+ RAW_LOG(ERROR, "Running NextTest %s\n", name);
+ Type map(malloc);
+ char a, b, c, d, e;
+
+ // When map is empty
+ CHECK(map.Next(0) == NULL);
+ CHECK(map.Next(5) == NULL);
+ CHECK(map.Next(1<<30) == NULL);
+
+ // Add a single value
+ map.Ensure(40, 1);
+ map.set(40, &a);
+ CHECK(map.Next(0) == &a);
+ CHECK(map.Next(39) == &a);
+ CHECK(map.Next(40) == &a);
+ CHECK(map.Next(41) == NULL);
+ CHECK(map.Next(1<<30) == NULL);
+
+ // Add a few values
+ map.Ensure(41, 1);
+ map.Ensure(100, 3);
+ map.set(41, &b);
+ map.set(100, &c);
+ map.set(101, &d);
+ map.set(102, &e);
+ CHECK(map.Next(0) == &a);
+ CHECK(map.Next(39) == &a);
+ CHECK(map.Next(40) == &a);
+ CHECK(map.Next(41) == &b);
+ CHECK(map.Next(42) == &c);
+ CHECK(map.Next(63) == &c);
+ CHECK(map.Next(64) == &c);
+ CHECK(map.Next(65) == &c);
+ CHECK(map.Next(99) == &c);
+ CHECK(map.Next(100) == &c);
+ CHECK(map.Next(101) == &d);
+ CHECK(map.Next(102) == &e);
+ CHECK(map.Next(103) == NULL);
+}
+
int main(int argc, char** argv) {
TestMap< TCMalloc_PageMap1<10> > (100, true);
TestMap< TCMalloc_PageMap1<10> > (1 << 10, false);
@@ -121,6 +168,10 @@ int main(int argc, char** argv) {
TestMap< TCMalloc_PageMap3<20> > (100, true);
TestMap< TCMalloc_PageMap3<20> > (1 << 20, false);
+ TestNext< TCMalloc_PageMap1<10> >("PageMap1");
+ TestNext< TCMalloc_PageMap2<10> >("PageMap2");
+ TestNext< TCMalloc_PageMap3<10> >("PageMap3");
+
printf("PASS\n");
return 0;
}
diff --git a/src/tests/profile-handler_unittest.cc b/src/tests/profile-handler_unittest.cc
index 4b247c7..1e72b2e 100644
--- a/src/tests/profile-handler_unittest.cc
+++ b/src/tests/profile-handler_unittest.cc
@@ -8,8 +8,9 @@
#include "profile-handler.h"
#include <assert.h>
-#include <sys/time.h>
#include <pthread.h>
+#include <sys/time.h>
+#include <time.h>
#include "base/logging.h"
#include "base/simple_mutex.h"
@@ -46,11 +47,11 @@ class Thread {
bool joinable_;
};
-// Sleep interval in usecs. To ensure a SIGPROF timer interrupt under heavy
-// load, this is set to a 20x of ProfileHandler timer interval (i.e 100Hz)
+// timespec of the sleep interval. To ensure a SIGPROF timer interrupt under
+// heavy load, this is set to a 20x of ProfileHandler timer interval (i.e 100Hz)
// TODO(nabeelmian) Under very heavy loads, the worker thread may not accumulate
// enough cpu usage to get a profile tick.
-int kSleepInterval = 200000;
+const struct timespec sleep_interval = { 0, 200000000 }; // 200 ms
// Whether each thread has separate timers.
static bool timer_separate_ = false;
@@ -213,7 +214,7 @@ class ProfileHandlerTest {
busy_worker_->Start();
// Wait for worker to start up and register with the ProfileHandler.
// TODO(nabeelmian) This may not work under very heavy load.
- usleep(kSleepInterval);
+ nanosleep(&sleep_interval, NULL);
}
// Stops the worker thread.
@@ -257,7 +258,7 @@ class ProfileHandlerTest {
uint64 interrupts_before = GetInterruptCount();
// Sleep for a bit and check that tick counter is making progress.
int old_tick_count = tick_counter;
- usleep(kSleepInterval);
+ nanosleep(&sleep_interval, NULL);
int new_tick_count = tick_counter;
EXPECT_GT(new_tick_count, old_tick_count);
uint64 interrupts_after = GetInterruptCount();
@@ -268,7 +269,7 @@ class ProfileHandlerTest {
void VerifyUnregistration(const int& tick_counter) {
// Sleep for a bit and check that tick counter is not making progress.
int old_tick_count = tick_counter;
- usleep(kSleepInterval);
+ nanosleep(&sleep_interval, NULL);
int new_tick_count = tick_counter;
EXPECT_EQ(new_tick_count, old_tick_count);
// If no callbacks, signal handler and shared timer should be disabled.
@@ -297,7 +298,7 @@ class ProfileHandlerTest {
}
// Verify that the ProfileHandler is not accumulating profile ticks.
uint64 interrupts_before = GetInterruptCount();
- usleep(kSleepInterval);
+ nanosleep(&sleep_interval, NULL);
uint64 interrupts_after = GetInterruptCount();
EXPECT_EQ(interrupts_after, interrupts_before);
}
diff --git a/src/tests/tcmalloc_unittest.cc b/src/tests/tcmalloc_unittest.cc
index 713fbe1..8eccb18 100644
--- a/src/tests/tcmalloc_unittest.cc
+++ b/src/tests/tcmalloc_unittest.cc
@@ -124,6 +124,9 @@
using std::vector;
using std::string;
+DECLARE_double(tcmalloc_release_rate);
+DECLARE_int32(max_free_queue_size); // in debugallocation.cc
+
namespace testing {
static const int FLAGS_numtests = 50000;
@@ -747,6 +750,127 @@ static void TestHugeThreadCache() {
delete[] array;
}
+namespace {
+
+struct RangeCallbackState {
+ uintptr_t ptr;
+ base::MallocRange::Type expected_type;
+ size_t min_size;
+ bool matched;
+};
+
+static void RangeCallback(void* arg, const base::MallocRange* r) {
+ RangeCallbackState* state = reinterpret_cast<RangeCallbackState*>(arg);
+ if (state->ptr >= r->address &&
+ state->ptr < r->address + r->length) {
+ CHECK_EQ(r->type, state->expected_type);
+ CHECK_GE(r->length, state->min_size);
+ state->matched = true;
+ }
+}
+
+// Check that at least one of the callbacks from Ranges() contains
+// the specified address with the specified type, and has size
+// >= min_size.
+static void CheckRangeCallback(void* ptr, base::MallocRange::Type type,
+ size_t min_size) {
+ RangeCallbackState state;
+ state.ptr = reinterpret_cast<uintptr_t>(ptr);
+ state.expected_type = type;
+ state.min_size = min_size;
+ state.matched = false;
+ MallocExtension::instance()->Ranges(&state, RangeCallback);
+ CHECK(state.matched);
+}
+
+}
+
+static void TestRanges() {
+ static const int MB = 1048576;
+ void* a = malloc(MB);
+ void* b = malloc(MB);
+ CheckRangeCallback(a, base::MallocRange::INUSE, MB);
+ CheckRangeCallback(b, base::MallocRange::INUSE, MB);
+ free(a);
+ CheckRangeCallback(a, base::MallocRange::FREE, MB);
+ CheckRangeCallback(b, base::MallocRange::INUSE, MB);
+ MallocExtension::instance()->ReleaseFreeMemory();
+ CheckRangeCallback(a, base::MallocRange::UNMAPPED, MB);
+ CheckRangeCallback(b, base::MallocRange::INUSE, MB);
+ free(b);
+ CheckRangeCallback(a, base::MallocRange::UNMAPPED, MB);
+ CheckRangeCallback(b, base::MallocRange::FREE, MB);
+}
+
+static size_t GetUnmappedBytes() {
+ size_t bytes;
+ CHECK(MallocExtension::instance()->GetNumericProperty(
+ "tcmalloc.pageheap_unmapped_bytes", &bytes));
+ return bytes;
+}
+
+static void TestReleaseToSystem() {
+ // Debug allocation mode adds overhead to each allocation which
+ // messes up all the equality tests here. I just disable the
+ // teset in this mode. TODO(csilvers): get it to work for debugalloc?
+#ifndef DEBUGALLOCATION
+ const double old_tcmalloc_release_rate = FLAGS_tcmalloc_release_rate;
+ FLAGS_tcmalloc_release_rate = 0;
+
+ static const int MB = 1048576;
+ void* a = malloc(MB);
+ void* b = malloc(MB);
+ MallocExtension::instance()->ReleaseFreeMemory();
+ size_t starting_bytes = GetUnmappedBytes();
+
+ // Calling ReleaseFreeMemory() a second time shouldn't do anything.
+ MallocExtension::instance()->ReleaseFreeMemory();
+ EXPECT_EQ(starting_bytes, GetUnmappedBytes());
+
+ // ReleaseToSystem shouldn't do anything either.
+ MallocExtension::instance()->ReleaseToSystem(MB);
+ EXPECT_EQ(starting_bytes, GetUnmappedBytes());
+
+ free(a);
+
+ // Negative numbers should be ignored.
+ MallocExtension::instance()->ReleaseToSystem(-5);
+ EXPECT_EQ(starting_bytes, GetUnmappedBytes());
+
+ // The span to release should be 1MB.
+ MallocExtension::instance()->ReleaseToSystem(MB/2);
+ EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes());
+
+ // Should do nothing since the previous call released too much.
+ MallocExtension::instance()->ReleaseToSystem(MB/4);
+ EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes());
+
+ free(b);
+
+ // Use up the extra MB/4 bytes from 'a' and also release 'b'.
+ MallocExtension::instance()->ReleaseToSystem(MB/2);
+ EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes());
+
+ // Should do nothing since the previous call released too much.
+ MallocExtension::instance()->ReleaseToSystem(MB/2);
+ EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes());
+
+ // Nothing else to release.
+ MallocExtension::instance()->ReleaseFreeMemory();
+ EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes());
+
+ a = malloc(MB);
+ free(a);
+ EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes());
+
+ // Releasing less than a page should still trigger a release.
+ MallocExtension::instance()->ReleaseToSystem(1);
+ EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes());
+
+ FLAGS_tcmalloc_release_rate = old_tcmalloc_release_rate;
+#endif // #ifndef DEBUGALLOCATION
+}
+
static int RunAllTests(int argc, char** argv) {
// Optional argv[1] is the seed
AllocatorState rnd(argc > 1 ? atoi(argv[1]) : 100);
@@ -1023,6 +1147,8 @@ static int RunAllTests(int argc, char** argv) {
#endif
TestHugeThreadCache();
+ TestRanges();
+ TestReleaseToSystem();
return 0;
}
@@ -1032,6 +1158,10 @@ static int RunAllTests(int argc, char** argv) {
using testing::RunAllTests;
int main(int argc, char** argv) {
+#ifdef DEBUGALLOCATION // debug allocation takes forever for huge allocs
+ FLAGS_max_free_queue_size = 0; // return freed blocks to tcmalloc immediately
+#endif
+
RunAllTests(argc, argv);
// Test tc_version()