diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/base/basictypes.h | 30 | ||||
-rw-r--r-- | src/base/dynamic_annotations.h | 14 | ||||
-rw-r--r-- | src/base/sysinfo.cc | 98 | ||||
-rw-r--r-- | src/base/sysinfo.h | 4 | ||||
-rw-r--r-- | src/debugallocation.cc | 2 | ||||
-rw-r--r-- | src/google/heap-checker.h | 8 | ||||
-rw-r--r-- | src/google/malloc_extension.h | 74 | ||||
-rw-r--r-- | src/google/malloc_extension_c.h | 1 | ||||
-rw-r--r-- | src/heap-checker.cc | 7 | ||||
-rw-r--r-- | src/malloc_extension.cc | 11 | ||||
-rw-r--r-- | src/page_heap.cc | 206 | ||||
-rw-r--r-- | src/page_heap.h | 66 | ||||
-rw-r--r-- | src/pagemap.h | 53 | ||||
-rwxr-xr-x | src/pprof | 409 | ||||
-rw-r--r-- | src/stacktrace_config.h | 13 | ||||
-rw-r--r-- | src/symbolize.cc | 2 | ||||
-rw-r--r-- | src/symbolize.h | 6 | ||||
-rw-r--r-- | src/tcmalloc.cc | 137 | ||||
-rw-r--r-- | src/tests/malloc_extension_c_test.c | 1 | ||||
-rw-r--r-- | src/tests/page_heap_test.cc | 55 | ||||
-rw-r--r-- | src/tests/pagemap_unittest.cc | 51 | ||||
-rw-r--r-- | src/tests/profile-handler_unittest.cc | 17 | ||||
-rw-r--r-- | src/tests/tcmalloc_unittest.cc | 130 |
23 files changed, 1048 insertions, 347 deletions
diff --git a/src/base/basictypes.h b/src/base/basictypes.h index e4d4140..9991413 100644 --- a/src/base/basictypes.h +++ b/src/base/basictypes.h @@ -240,7 +240,7 @@ struct CompileAssert { # define HAVE_ATTRIBUTE_SECTION_START 1 #elif defined(HAVE___ATTRIBUTE__) && defined(__MACH__) -# define ATTRIBUTE_SECTION(name) __attribute__ ((section ("__DATA, " #name))) +# define ATTRIBUTE_SECTION(name) __attribute__ ((section ("__TEXT, " #name))) #include <mach-o/getsect.h> #include <mach-o/dyld.h> @@ -251,18 +251,32 @@ class AssignAttributeStartEnd { if (_dyld_present()) { for (int i = _dyld_image_count() - 1; i >= 0; --i) { const mach_header* hdr = _dyld_get_image_header(i); - uint32_t len; - *pstart = getsectdatafromheader(hdr, "__DATA", name, &len); - if (*pstart) { // NULL if not defined in this dynamic library - *pstart += _dyld_get_image_vmaddr_slide(i); // correct for reloc - *pend = *pstart + len; - return; +#ifdef MH_MAGIC_64 + if (hdr->magic == MH_MAGIC_64) { + uint64_t len; + *pstart = getsectdatafromheader_64((mach_header_64*)hdr, + "__TEXT", name, &len); + if (*pstart) { // NULL if not defined in this dynamic library + *pstart += _dyld_get_image_vmaddr_slide(i); // correct for reloc + *pend = *pstart + len; + return; + } + } +#endif + if (hdr->magic == MH_MAGIC) { + uint32_t len; + *pstart = getsectdatafromheader(hdr, "__TEXT", name, &len); + if (*pstart) { // NULL if not defined in this dynamic library + *pstart += _dyld_get_image_vmaddr_slide(i); // correct for reloc + *pend = *pstart + len; + return; + } } } } // If we get here, not defined in a dll at all. See if defined statically. unsigned long len; // don't ask me why this type isn't uint32_t too... - *pstart = getsectdata("__DATA", name, &len); + *pstart = getsectdata("__TEXT", name, &len); *pend = *pstart + len; } }; diff --git a/src/base/dynamic_annotations.h b/src/base/dynamic_annotations.h index 5995ac4..a2a268f 100644 --- a/src/base/dynamic_annotations.h +++ b/src/base/dynamic_annotations.h @@ -203,9 +203,16 @@ } while (0) // Instruct the tool to create a happens-before arc between mu->Unlock() and - // mu->Lock(). This annotation may slow down the race detector; normally it - // is used only when it would be difficult to annotate each of the mutex's - // critical sections individually using the annotations above. + // mu->Lock(). This annotation may slow down the race detector and hide real + // races. Normally it is used only when it would be difficult to annotate each + // of the mutex's critical sections individually using the annotations above. + // This annotation makes sense only for hybrid race detectors. For pure + // happens-before detectors this is a no-op. For more details see + // http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . + #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \ + AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) + + // Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \ AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) @@ -357,6 +364,7 @@ #define ANNOTATE_NEW_MEMORY(address, size) // empty #define ANNOTATE_EXPECT_RACE(address, description) // empty #define ANNOTATE_BENIGN_RACE(address, description) // empty + #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) // empty #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) // empty #define ANNOTATE_TRACE_MEMORY(arg) // empty #define ANNOTATE_THREAD_NAME(name) // empty diff --git a/src/base/sysinfo.cc b/src/base/sysinfo.cc index a2bc2a9..3919ba4 100644 --- a/src/base/sysinfo.cc +++ b/src/base/sysinfo.cc @@ -441,6 +441,48 @@ static void ConstructFilename(const char* spec, pid_t pid, } #endif +// A templatized helper function instantiated for Mach (OS X) only. +// It can handle finding info for both 32 bits and 64 bits. +// Returns true if it successfully handled the hdr, false else. +#ifdef __MACH__ // Mac OS X, almost certainly +template<uint32_t kMagic, uint32_t kLCSegment, + typename MachHeader, typename SegmentCommand> +static bool NextExtMachHelper(const mach_header* hdr, + int current_image, int current_load_cmd, + uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename, + uint64 *file_mapping, uint64 *file_pages, + uint64 *anon_mapping, uint64 *anon_pages, + dev_t *dev) { + static char kDefaultPerms[5] = "r-xp"; + if (hdr->magic != kMagic) + return false; + const char* lc = (const char *)hdr + sizeof(MachHeader); + // TODO(csilvers): make this not-quadradic (increment and hold state) + for (int j = 0; j < current_load_cmd; j++) // advance to *our* load_cmd + lc += ((const load_command *)lc)->cmdsize; + if (((const load_command *)lc)->cmd == kLCSegment) { + const intptr_t dlloff = _dyld_get_image_vmaddr_slide(current_image); + const SegmentCommand* sc = (const SegmentCommand *)lc; + if (start) *start = sc->vmaddr + dlloff; + if (end) *end = sc->vmaddr + sc->vmsize + dlloff; + if (flags) *flags = kDefaultPerms; // can we do better? + if (offset) *offset = sc->fileoff; + if (inode) *inode = 0; + if (filename) + *filename = const_cast<char*>(_dyld_get_image_name(current_image)); + if (file_mapping) *file_mapping = 0; + if (file_pages) *file_pages = 0; // could we use sc->filesize? + if (anon_mapping) *anon_mapping = 0; + if (anon_pages) *anon_pages = 0; + if (dev) *dev = 0; + return true; + } + + return false; +} +#endif + ProcMapsIterator::ProcMapsIterator(pid_t pid) { Init(pid, NULL, false); } @@ -456,6 +498,7 @@ ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer, void ProcMapsIterator::Init(pid_t pid, Buffer *buffer, bool use_maps_backing) { + pid_ = pid; using_maps_backing_ = use_maps_backing; dynamic_buffer_ = NULL; if (!buffer) { @@ -691,6 +734,7 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags, COMPILE_ASSERT(MA_READ == 4, solaris_ma_read_must_equal_4); COMPILE_ASSERT(MA_WRITE == 2, solaris_ma_write_must_equal_2); COMPILE_ASSERT(MA_EXEC == 1, solaris_ma_exec_must_equal_1); + Buffer object_path; int nread = 0; // fill up buffer with text NO_INTR(nread = read(fd_, ibuf_, sizeof(prmap_t))); if (nread == sizeof(prmap_t)) { @@ -700,13 +744,27 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags, // two middle ints are major and minor device numbers, but I'm not sure. sscanf(mapinfo->pr_mapname, "ufs.%*d.%*d.%ld", &inode_from_mapname); + if (pid_ == 0) { + CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize, + "/proc/self/path/%s", mapinfo->pr_mapname), + Buffer::kBufSize); + } else { + CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize, + "/proc/%d/path/%s", pid_, mapinfo->pr_mapname), + Buffer::kBufSize); + } + ssize_t len = readlink(object_path.buf_, current_filename_, PATH_MAX); + CHECK_LT(len, PATH_MAX); + if (len < 0) + len = 0; + current_filename_[len] = '\0'; + if (start) *start = mapinfo->pr_vaddr; if (end) *end = mapinfo->pr_vaddr + mapinfo->pr_size; if (flags) *flags = kPerms[mapinfo->pr_mflags & 7]; if (offset) *offset = mapinfo->pr_offset; if (inode) *inode = inode_from_mapname; - // TODO(csilvers): How to map from /proc/map/object to filename? - if (filename) *filename = mapinfo->pr_mapname; // format is ufs.?.?.inode + if (filename) *filename = current_filename_; if (file_mapping) *file_mapping = 0; if (file_pages) *file_pages = 0; if (anon_mapping) *anon_mapping = 0; @@ -715,7 +773,6 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags, return true; } #elif defined(__MACH__) - static char kDefaultPerms[5] = "r-xp"; // We return a separate entry for each segment in the DLL. (TODO(csilvers): // can we do better?) A DLL ("image") has load-commands, some of which // talk about segment boundaries. @@ -728,25 +785,22 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags, // We start with the next load command (we've already looked at this one). for (current_load_cmd_--; current_load_cmd_ >= 0; current_load_cmd_--) { - const char* lc = ((const char *)hdr + sizeof(struct mach_header)); - // TODO(csilvers): make this not-quadradic (increment and hold state) - for (int j = 0; j < current_load_cmd_; j++) // advance to *our* load_cmd - lc += ((const load_command *)lc)->cmdsize; - if (((const load_command *)lc)->cmd == LC_SEGMENT) { - const intptr_t dlloff = _dyld_get_image_vmaddr_slide(current_image_); - const segment_command* sc = (const segment_command *)lc; - if (start) *start = sc->vmaddr + dlloff; - if (end) *end = sc->vmaddr + sc->vmsize + dlloff; - if (flags) *flags = kDefaultPerms; // can we do better? - if (offset) *offset = sc->fileoff; - if (inode) *inode = 0; - if (filename) - *filename = const_cast<char*>(_dyld_get_image_name(current_image_)); - if (file_mapping) *file_mapping = 0; - if (file_pages) *file_pages = 0; // could we use sc->filesize? - if (anon_mapping) *anon_mapping = 0; - if (anon_pages) *anon_pages = 0; - if (dev) *dev = 0; +#ifdef MH_MAGIC_64 + if (NextExtMachHelper<MH_MAGIC_64, LC_SEGMENT_64, + struct mach_header_64, struct segment_command_64>( + hdr, current_image_, current_load_cmd_, + start, end, flags, offset, inode, filename, + file_mapping, file_pages, anon_mapping, + anon_pages, dev)) { + return true; + } +#endif + if (NextExtMachHelper<MH_MAGIC, LC_SEGMENT, + struct mach_header, struct segment_command>( + hdr, current_image_, current_load_cmd_, + start, end, flags, offset, inode, filename, + file_mapping, file_pages, anon_mapping, + anon_pages, dev)) { return true; } } diff --git a/src/base/sysinfo.h b/src/base/sysinfo.h index b4b5c9f..0bcc1f5 100644 --- a/src/base/sysinfo.h +++ b/src/base/sysinfo.h @@ -209,9 +209,13 @@ class ProcMapsIterator { #elif defined(__MACH__) int current_image_; // dll's are called "images" in macos parlance int current_load_cmd_; // the segment of this dll we're examining +#elif defined(__sun__) // Solaris + int fd_; + char current_filename_[PATH_MAX]; #else int fd_; // filehandle on /proc/*/maps #endif + pid_t pid_; char flags_[10]; Buffer* dynamic_buffer_; // dynamically-allocated Buffer bool using_maps_backing_; // true if we are looking at maps_backing instead of maps. diff --git a/src/debugallocation.cc b/src/debugallocation.cc index dcf722d..47fef16 100644 --- a/src/debugallocation.cc +++ b/src/debugallocation.cc @@ -674,7 +674,7 @@ class MallocBlock { uintptr_t pc = reinterpret_cast<uintptr_t>(queue_entry.deleter_pcs[i]) - 1; TracePrintf(STDERR_FILENO, " @ %p %s\n", - pc, symbolization_table[pc]); + reinterpret_cast<void*>(pc), symbolization_table[pc]); } } else { RAW_LOG(ERROR, diff --git a/src/google/heap-checker.h b/src/google/heap-checker.h index 751eb9f..c0ee8a8 100644 --- a/src/google/heap-checker.h +++ b/src/google/heap-checker.h @@ -51,10 +51,12 @@ #ifndef BASE_HEAP_CHECKER_H_ #define BASE_HEAP_CHECKER_H_ -#include "config.h" - #include <sys/types.h> // for size_t -#ifdef HAVE_STDINT_H +// I can't #include config.h in this public API file, but I should +// really use configure (and make malloc_extension.h a .in file) to +// figure out if the system has stdint.h or not. But I'm lazy, so +// for now I'm assuming it's a problem only with MSVC. +#ifndef _MSC_VER #include <stdint.h> // for uintptr_t #endif #include <stdarg.h> // for va_list diff --git a/src/google/malloc_extension.h b/src/google/malloc_extension.h index bc53e0f..0342843 100644 --- a/src/google/malloc_extension.h +++ b/src/google/malloc_extension.h @@ -42,6 +42,13 @@ #define BASE_MALLOC_EXTENSION_H_ #include <stddef.h> +// I can't #include config.h in this public API file, but I should +// really use configure (and make malloc_extension.h a .in file) to +// figure out if the system has stdint.h or not. But I'm lazy, so +// for now I'm assuming it's a problem only with MSVC. +#ifndef _MSC_VER +#include <stdint.h> +#endif #include <string> // Annoying stuff for windows -- makes sure clients can import these functions @@ -58,6 +65,10 @@ static const int kMallocHistogramSize = 64; // One day, we could support other types of writers (perhaps for C?) typedef std::string MallocExtensionWriter; +namespace base { +struct MallocRange; +} + // The default implementations of the following routines do nothing. // All implementations should be thread-safe; the current one // (TCMallocImplementation) is. @@ -99,6 +110,14 @@ class PERFTOOLS_DLL_DECL MallocExtension { // be passed to "pprof". virtual void GetHeapGrowthStacks(MallocExtensionWriter* writer); + // Invokes func(arg, range) for every controlled memory + // range. *range is filled in with information about the range. + // + // This is a best-effort interface useful only for performance + // analysis. The implementation may not call func at all. + typedef void (RangeFunction)(void*, const base::MallocRange*); + virtual void Ranges(void* arg, RangeFunction func); + // ------------------------------------------------------------------- // Control operations for getting and setting malloc implementation // specific parameters. Some currently useful properties: @@ -127,12 +146,20 @@ class PERFTOOLS_DLL_DECL MallocExtension { // This property is not writable. // // "tcmalloc.slack_bytes" - // Number of bytes allocated from system, but not currently - // in use by malloced objects. I.e., bytes available for - // allocation without needing more bytes from system. + // Number of bytes allocated from system, but not currently in + // use by malloced objects. I.e., bytes available for + // allocation without needing more bytes from system. It is + // the sum of pageheap_free_bytes and pageheap_unmapped_bytes. + // This property is not writable. + // + // "tcmalloc.pageheap_free_bytes" + // Number of bytes in free, mapped pages in pageheap + // This property is not writable. + // + // "tcmalloc.pageheap_unmapped_bytes" + // Number of bytes in free, unmapped pages in pageheap // This property is not writable. // - // TODO: Add more properties as necessary // ------------------------------------------------------------------- // Get the named "property"'s value. Returns true if the property @@ -167,12 +194,14 @@ class PERFTOOLS_DLL_DECL MallocExtension { // Most malloc implementations ignore this routine. virtual void MarkThreadBusy(); - // Try to free memory back to the operating system for reuse. Only - // use this extension if the application has recently freed a lot of - // memory, and does not anticipate using it again for a long time -- - // to get this memory back may require faulting pages back in by the - // OS, and that may be slow. (Currently only implemented in - // tcmalloc.) + // Try to release num_bytes of free memory back to the operating + // system for reuse. Use this extension with caution -- to get this + // memory back may require faulting pages back in by the OS, and + // that may be slow. (Currently only implemented in tcmalloc.) + // A negative values for num_bytes results in a noop. + virtual void ReleaseToSystem(ssize_t num_bytes); + + // Same as ReleaseToSystem() but release as much memory as possible. virtual void ReleaseFreeMemory(); // Sets the rate at which we release unused memory to the system. @@ -239,4 +268,29 @@ class PERFTOOLS_DLL_DECL MallocExtension { virtual void** ReadHeapGrowthStackTraces(); }; +namespace base { + +// Information passed per range. More fields may be added later. +struct MallocRange { + enum Type { + INUSE, // Application is using this range + FREE, // Range is currently free + UNMAPPED, // Backing physical memory has been returned to the OS + UNKNOWN, + // More enum values may be added in the future + }; + + uintptr_t address; // Address of range + size_t length; // Byte length of range + Type type; // Type of this range + double fraction; // Fraction of range that is being used (0 if !INUSE) + + // Perhaps add the following: + // - stack trace if this range was sampled + // - heap growth stack trace if applicable to this range + // - age when allocated (for inuse) or freed (if not in use) +}; + +} // namespace base + #endif // BASE_MALLOC_EXTENSION_H_ diff --git a/src/google/malloc_extension_c.h b/src/google/malloc_extension_c.h index 514305e..95f7f4c 100644 --- a/src/google/malloc_extension_c.h +++ b/src/google/malloc_extension_c.h @@ -75,6 +75,7 @@ PERFTOOLS_DLL_DECL int MallocExtension_GetNumericProperty(const char* property, PERFTOOLS_DLL_DECL int MallocExtension_SetNumericProperty(const char* property, size_t value); PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadIdle(void); PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadBusy(void); +PERFTOOLS_DLL_DECL void MallocExtension_ReleaseToSystem(ssize_t num_bytes); PERFTOOLS_DLL_DECL void MallocExtension_ReleaseFreeMemory(void); PERFTOOLS_DLL_DECL size_t MallocExtension_GetEstimatedAllocatedSize(size_t size); PERFTOOLS_DLL_DECL size_t MallocExtension_GetAllocatedSize(void* p); diff --git a/src/heap-checker.cc b/src/heap-checker.cc index fc8973a..59288e6 100644 --- a/src/heap-checker.cc +++ b/src/heap-checker.cc @@ -47,11 +47,9 @@ #ifdef HAVE_PTHREAD #include <pthread.h> #endif -#ifdef HAVE_POLL_H -#include <poll.h> -#endif #include <sys/stat.h> #include <sys/types.h> +#include <time.h> #include <assert.h> #ifdef HAVE_LINUX_PTRACE_H @@ -2298,7 +2296,8 @@ void HeapLeakChecker_AfterDestructors() { } if (FLAGS_heap_check_after_destructors) { if (HeapLeakChecker::DoMainHeapCheck()) { - poll(0, 0, 500); + const struct timespec sleep_time = { 0, 500000000 }; // 500 ms + nanosleep(&sleep_time, NULL); // Need this hack to wait for other pthreads to exit. // Otherwise tcmalloc find errors // on a free() call from pthreads. diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc index 068a693..95fd1c1 100644 --- a/src/malloc_extension.cc +++ b/src/malloc_extension.cc @@ -143,10 +143,14 @@ void MallocExtension::MarkThreadBusy() { // Default implementation does nothing } -void MallocExtension::ReleaseFreeMemory() { +void MallocExtension::ReleaseToSystem(ssize_t num_bytes) { // Default implementation does nothing } +void MallocExtension::ReleaseFreeMemory() { + ReleaseToSystem(LONG_MAX); +} + void MallocExtension::SetMemoryReleaseRate(double rate) { // Default implementation does nothing } @@ -300,6 +304,10 @@ void MallocExtension::GetHeapGrowthStacks(MallocExtensionWriter* writer) { DumpAddressMap(writer); } +void MallocExtension::Ranges(void* arg, RangeFunction func) { + // No callbacks by default +} + // These are C shims that work on the current instance. #define C_SHIM(fn, retval, paramlist, arglist) \ @@ -325,5 +333,6 @@ C_SHIM(SetNumericProperty, int, C_SHIM(MarkThreadIdle, void, (void), ()); C_SHIM(MarkThreadBusy, void, (void), ()); C_SHIM(ReleaseFreeMemory, void, (void), ()); +C_SHIM(ReleaseToSystem, void, (ssize_t num_bytes), (num_bytes)); C_SHIM(GetEstimatedAllocatedSize, size_t, (size_t size), (size)); C_SHIM(GetAllocatedSize, size_t, (void* p), (p)); diff --git a/src/page_heap.cc b/src/page_heap.cc index 9cbc70e..1e63cb9 100644 --- a/src/page_heap.cc +++ b/src/page_heap.cc @@ -49,11 +49,9 @@ namespace tcmalloc { PageHeap::PageHeap() : pagemap_(MetaDataAlloc), pagemap_cache_(0), - free_pages_(0), - system_bytes_(0), scavenge_counter_(0), // Start scavenging at kMaxPages list - scavenge_index_(kMaxPages-1) { + release_index_(kMaxPages) { COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits); DLL_Init(&large_.normal); DLL_Init(&large_.returned); @@ -154,7 +152,7 @@ Span* PageHeap::Carve(Span* span, Length n) { ASSERT(n > 0); ASSERT(span->location != Span::IN_USE); const int old_location = span->location; - DLL_Remove(span); + RemoveFromFreeList(span); span->location = Span::IN_USE; Event(span, 'A', n); @@ -165,18 +163,11 @@ Span* PageHeap::Carve(Span* span, Length n) { leftover->location = old_location; Event(leftover, 'S', extra); RecordSpan(leftover); - - // Place leftover span on appropriate free list - SpanList* listpair = (extra < kMaxPages) ? &free_[extra] : &large_; - Span* dst = (leftover->location == Span::ON_RETURNED_FREELIST - ? &listpair->returned : &listpair->normal); - DLL_Prepend(dst, leftover); - + PrependToFreeList(leftover); // Skip coalescing - no candidates possible span->length = n; pagemap_.set(span->start + n - 1, span); } ASSERT(Check()); - free_pages_ -= n; return span; } @@ -191,13 +182,12 @@ void PageHeap::Delete(Span* span) { span->sample = 0; span->location = Span::ON_NORMAL_FREELIST; Event(span, 'D', span->length); - AddToFreeList(span); - free_pages_ += n; + MergeIntoFreeList(span); // Coalesces if possible IncrementalScavenge(n); ASSERT(Check()); } -void PageHeap::AddToFreeList(Span* span) { +void PageHeap::MergeIntoFreeList(Span* span) { ASSERT(span->location != Span::IN_USE); // Coalesce -- we guarantee that "p" != 0, so no bounds checking @@ -214,7 +204,7 @@ void PageHeap::AddToFreeList(Span* span) { // Merge preceding span into this span ASSERT(prev->start + prev->length == p); const Length len = prev->length; - DLL_Remove(prev); + RemoveFromFreeList(prev); DeleteSpan(prev); span->start -= len; span->length += len; @@ -226,35 +216,43 @@ void PageHeap::AddToFreeList(Span* span) { // Merge next span into this span ASSERT(next->start == p+n); const Length len = next->length; - DLL_Remove(next); + RemoveFromFreeList(next); DeleteSpan(next); span->length += len; pagemap_.set(span->start + span->length - 1, span); Event(span, 'R', len); } + PrependToFreeList(span); +} + +void PageHeap::PrependToFreeList(Span* span) { + ASSERT(span->location != Span::IN_USE); SpanList* list = (span->length < kMaxPages) ? &free_[span->length] : &large_; if (span->location == Span::ON_NORMAL_FREELIST) { + stats_.free_bytes += (span->length << kPageShift); DLL_Prepend(&list->normal, span); } else { + stats_.unmapped_bytes += (span->length << kPageShift); DLL_Prepend(&list->returned, span); } } +void PageHeap::RemoveFromFreeList(Span* span) { + ASSERT(span->location != Span::IN_USE); + if (span->location == Span::ON_NORMAL_FREELIST) { + stats_.free_bytes -= (span->length << kPageShift); + } else { + stats_.unmapped_bytes -= (span->length << kPageShift); + } + DLL_Remove(span); +} + void PageHeap::IncrementalScavenge(Length n) { // Fast path; not yet time to release memory scavenge_counter_ -= n; if (scavenge_counter_ >= 0) return; // Not yet time to scavenge - // Never delay scavenging for more than the following number of - // deallocated pages. With 4K pages, this comes to 4GB of - // deallocation. - static const int kMaxReleaseDelay = 1 << 20; - - // If there is nothing to release, wait for so many pages before - // scavenging again. With 4K pages, this comes to 1GB of memory. - static const int kDefaultReleaseDelay = 1 << 18; - const double rate = FLAGS_tcmalloc_release_rate; if (rate <= 1e-6) { // Tiny release rate means that releasing is disabled. @@ -262,41 +260,62 @@ void PageHeap::IncrementalScavenge(Length n) { return; } - // Find index of free list to scavenge - int index = scavenge_index_ + 1; - for (int i = 0; i < kMaxPages+1; i++) { - if (index > kMaxPages) index = 0; - SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index]; - if (!DLL_IsEmpty(&slist->normal)) { - // Release the last span on the normal portion of this list - Span* s = slist->normal.prev; - ASSERT(s->location == Span::ON_NORMAL_FREELIST); - DLL_Remove(s); - const Length n = s->length; - TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), - static_cast<size_t>(s->length << kPageShift)); - s->location = Span::ON_RETURNED_FREELIST; - AddToFreeList(s); - - // Compute how long to wait until we return memory. - // FLAGS_tcmalloc_release_rate==1 means wait for 1000 pages - // after releasing one page. - const double mult = 1000.0 / rate; - double wait = mult * static_cast<double>(n); - if (wait > kMaxReleaseDelay) { - // Avoid overflow and bound to reasonable range - wait = kMaxReleaseDelay; - } - scavenge_counter_ = static_cast<int64_t>(wait); + Length released_pages = ReleaseAtLeastNPages(1); - scavenge_index_ = index; // Scavenge at index+1 next time - return; + if (released_pages == 0) { + // Nothing to scavenge, delay for a while. + scavenge_counter_ = kDefaultReleaseDelay; + } else { + // Compute how long to wait until we return memory. + // FLAGS_tcmalloc_release_rate==1 means wait for 1000 pages + // after releasing one page. + const double mult = 1000.0 / rate; + double wait = mult * static_cast<double>(released_pages); + if (wait > kMaxReleaseDelay) { + // Avoid overflow and bound to reasonable range. + wait = kMaxReleaseDelay; } - index++; + scavenge_counter_ = static_cast<int64_t>(wait); } +} + +Length PageHeap::ReleaseLastNormalSpan(SpanList* slist) { + Span* s = slist->normal.prev; + ASSERT(s->location == Span::ON_NORMAL_FREELIST); + RemoveFromFreeList(s); + const Length n = s->length; + TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), + static_cast<size_t>(s->length << kPageShift)); + s->location = Span::ON_RETURNED_FREELIST; + MergeIntoFreeList(s); // Coalesces if possible. + return n; +} - // Nothing to scavenge, delay for a while - scavenge_counter_ = kDefaultReleaseDelay; +Length PageHeap::ReleaseAtLeastNPages(Length num_pages) { + Length released_pages = 0; + Length prev_released_pages = -1; + + // Round robin through the lists of free spans, releasing the last + // span in each list. Stop after releasing at least num_pages. + while (released_pages < num_pages) { + if (released_pages == prev_released_pages) { + // Last iteration of while loop made no progress. + break; + } + prev_released_pages = released_pages; + + for (int i = 0; i < kMaxPages+1 && released_pages < num_pages; + i++, release_index_++) { + if (release_index_ > kMaxPages) release_index_ = 0; + SpanList* slist = (release_index_ == kMaxPages) ? + &large_ : &free_[release_index_]; + if (!DLL_IsEmpty(&slist->normal)) { + Length released_len = ReleaseLastNormalSpan(slist); + released_pages += released_len; + } + } + } + return released_pages; } void PageHeap::RegisterSizeClass(Span* span, size_t sc) { @@ -311,6 +330,10 @@ void PageHeap::RegisterSizeClass(Span* span, size_t sc) { } } +static double MB(uint64_t bytes) { + return bytes / 1048576.0; +} + static double PagesToMB(uint64_t pages) { return (pages << kPageShift) / 1048576.0; } @@ -323,8 +346,8 @@ void PageHeap::Dump(TCMalloc_Printer* out) { } } out->printf("------------------------------------------------\n"); - out->printf("PageHeap: %d sizes; %6.1f MB free\n", - nonempty_sizes, PagesToMB(free_pages_)); + out->printf("PageHeap: %d sizes; %6.1f MB free; %6.1f MB unmapped\n", + nonempty_sizes, MB(stats_.free_bytes), MB(stats_.unmapped_bytes)); out->printf("------------------------------------------------\n"); uint64_t total_normal = 0; uint64_t total_returned = 0; @@ -376,6 +399,37 @@ void PageHeap::Dump(TCMalloc_Printer* out) { PagesToMB(total_returned)); } +bool PageHeap::GetNextRange(PageID start, base::MallocRange* r) { + Span* span = reinterpret_cast<Span*>(pagemap_.Next(start)); + if (span == NULL) { + return false; + } + r->address = span->start << kPageShift; + r->length = span->length << kPageShift; + r->fraction = 0; + switch (span->location) { + case Span::IN_USE: + r->type = base::MallocRange::INUSE; + r->fraction = 1; + if (span->sizeclass > 0) { + // Only some of the objects in this span may be in use. + const size_t osize = Static::sizemap()->class_to_size(span->sizeclass); + r->fraction = (1.0 * osize * span->refcount) / r->length; + } + break; + case Span::ON_NORMAL_FREELIST: + r->type = base::MallocRange::FREE; + break; + case Span::ON_RETURNED_FREELIST: + r->type = base::MallocRange::UNMAPPED; + break; + default: + r->type = base::MallocRange::UNKNOWN; + break; + } + return true; +} + static void RecordGrowth(size_t growth) { StackTrace* t = Static::stacktrace_allocator()->New(); t->depth = GetStackTrace(t->stack, kMaxStackDepth-1, 3); @@ -401,8 +455,8 @@ bool PageHeap::GrowHeap(Length n) { ask = actual_size >> kPageShift; RecordGrowth(ask << kPageShift); - uint64_t old_system_bytes = system_bytes_; - system_bytes_ += (ask << kPageShift); + uint64_t old_system_bytes = stats_.system_bytes; + stats_.system_bytes += (ask << kPageShift); const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; ASSERT(p > 0); @@ -411,7 +465,7 @@ bool PageHeap::GrowHeap(Length n) { // when a program keeps allocating and freeing large blocks. if (old_system_bytes < kPageMapBigAllocationThreshold - && system_bytes_ >= kPageMapBigAllocationThreshold) { + && stats_.system_bytes >= kPageMapBigAllocationThreshold) { pagemap_.PreallocateMoreMemory(); } @@ -419,10 +473,8 @@ bool PageHeap::GrowHeap(Length n) { // Plus ensure one before and one after so coalescing code // does not need bounds-checking. if (pagemap_.Ensure(p-1, ask+2)) { - // Pretend the new area is allocated and then Delete() it to - // cause any necessary coalescing to occur. - // - // We do not adjust free_pages_ here since Delete() will do it for us. + // Pretend the new area is allocated and then Delete() it to cause + // any necessary coalescing to occur. Span* span = NewSpan(p, ask); RecordSpan(span); Delete(span); @@ -464,26 +516,4 @@ bool PageHeap::CheckList(Span* list, Length min_pages, Length max_pages, return true; } -void PageHeap::ReleaseFreeList(Span* list) { - // Walk backwards through list so that when we push these - // spans on the "returned" list, we preserve the order. - while (!DLL_IsEmpty(list)) { - Span* s = list->prev; - DLL_Remove(s); - ASSERT(s->location == Span::ON_NORMAL_FREELIST); - s->location = Span::ON_RETURNED_FREELIST; - TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), - static_cast<size_t>(s->length << kPageShift)); - AddToFreeList(s); // Coalesces if possible - } -} - -void PageHeap::ReleaseFreePages() { - for (Length s = 0; s < kMaxPages; s++) { - ReleaseFreeList(&free_[s].normal); - } - ReleaseFreeList(&large_.normal); - ASSERT(Check()); -} - } // namespace tcmalloc diff --git a/src/page_heap.h b/src/page_heap.h index bd18931..5ab0d04 100644 --- a/src/page_heap.h +++ b/src/page_heap.h @@ -34,6 +34,7 @@ #define TCMALLOC_PAGE_HEAP_H_ #include <config.h> +#include <google/malloc_extension.h> #include "common.h" #include "packed-cache-inl.h" #include "pagemap.h" @@ -119,13 +120,18 @@ class PageHeap { // Dump state to stderr void Dump(TCMalloc_Printer* out); - // Return number of bytes allocated from system - inline uint64_t SystemBytes() const { return system_bytes_; } + // If this page heap is managing a range with starting page # >= start, + // store info about the range in *r and return true. Else return false. + bool GetNextRange(PageID start, base::MallocRange* r); - // Return number of free bytes in heap - uint64_t FreeBytes() const { - return (static_cast<uint64_t>(free_pages_) << kPageShift); - } + // Page heap statistics + struct Stats { + Stats() : system_bytes(0), free_bytes(0), unmapped_bytes(0) {} + uint64_t system_bytes; // Total bytes allocated from system + uint64_t free_bytes; // Total bytes on normal freelists + uint64_t unmapped_bytes; // Total bytes on returned freelists + }; + inline Stats stats() const { return stats_; } bool Check(); // Like Check() but does some more comprehensive checking. @@ -133,8 +139,13 @@ class PageHeap { bool CheckList(Span* list, Length min_pages, Length max_pages, int freelist); // ON_NORMAL_FREELIST or ON_RETURNED_FREELIST - // Release all free pages in this heap for reuse by the OS: - void ReleaseFreePages(); + // Try to release at least num_pages for reuse by the OS. Returns + // the actual number of pages released, which may be less than + // num_pages if there weren't enough pages to release. The result + // may also be larger than num_pages since page_heap might decide to + // release one large range instead of fragmenting it into two + // smaller released and unreleased ranges. + Length ReleaseAtLeastNPages(Length num_pages); // Return 0 if we have no information, or else the correct sizeclass for p. // Reads and writes to pagemap_cache_ do not require locking. @@ -163,6 +174,15 @@ class PageHeap { // REQUIRED: kMaxPages >= kMinSystemAlloc; static const size_t kMaxPages = kMinSystemAlloc; + // Never delay scavenging for more than the following number of + // deallocated pages. With 4K pages, this comes to 4GB of + // deallocation. + static const int kMaxReleaseDelay = 1 << 20; + + // If there is nothing to release, wait for so many pages before + // scavenging again. With 4K pages, this comes to 1GB of memory. + static const int kDefaultReleaseDelay = 1 << 18; + // Pick the appropriate map and cache types based on pointer size typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap; typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache; @@ -183,11 +203,8 @@ class PageHeap { // Array mapping from span length to a doubly linked list of free spans SpanList free_[kMaxPages]; - // Number of pages kept in free lists - uintptr_t free_pages_; - - // Bytes allocated from system - uint64_t system_bytes_; + // Statistics on system, free, and unmapped bytes + Stats stats_; bool GrowHeap(Length n); @@ -211,23 +228,30 @@ class PageHeap { // span of exactly the specified length. Else, returns NULL. Span* AllocLarge(Length n); - // Coalesce span with neighboring spans if possible. Add the - // resulting span to the appropriate free list. - void AddToFreeList(Span* span); + // Coalesce span with neighboring spans if possible, prepend to + // appropriate free list, and adjust stats. + void MergeIntoFreeList(Span* span); + + // Prepends span to appropriate free list, and adjusts stats. + void PrependToFreeList(Span* span); + + // Removes span from its free list, and adjust stats. + void RemoveFromFreeList(Span* span); // Incrementally release some memory to the system. // IncrementalScavenge(n) is called whenever n pages are freed. void IncrementalScavenge(Length n); - // Release all pages in the specified free list for reuse by the OS - // REQURES: list must be a "normal" list (i.e., not "returned") - void ReleaseFreeList(Span* list); + // Release the last span on the normal portion of this list. + // Return the length of that span. + Length ReleaseLastNormalSpan(SpanList* slist); + // Number of pages to deallocate before doing more scavenging int64_t scavenge_counter_; - // Index of last free list we scavenged - int scavenge_index_; + // Index of last free list where we released memory to the OS. + int release_index_; }; } // namespace tcmalloc diff --git a/src/pagemap.h b/src/pagemap.h index 3559932..1786e68 100644 --- a/src/pagemap.h +++ b/src/pagemap.h @@ -95,10 +95,20 @@ class TCMalloc_PageMap1 { // REQUIRES "k" is in range "[0,2^BITS-1]". // REQUIRES "k" has been ensured before. // - // Sets the value for KEY. + // Sets the value 'v' for key 'k'. void set(Number k, void* v) { array_[k] = v; } + + // Return the first non-NULL pointer found in this map for + // a page number >= k. Returns NULL if no such number is found. + void* Next(Number k) const { + while (k < (1 << BITS)) { + if (array_[k] != NULL) return array_[k]; + k++; + } + return NULL; + } }; // Two-level radix tree @@ -170,6 +180,24 @@ class TCMalloc_PageMap2 { // Allocate enough to keep track of all possible pages Ensure(0, 1 << BITS); } + + void* Next(Number k) const { + while (k < (1 << BITS)) { + const Number i1 = k >> LEAF_BITS; + Leaf* leaf = root_[i1]; + if (leaf != NULL) { + // Scan forward in leaf + for (Number i2 = k & (LEAF_LENGTH - 1); i2 < LEAF_LENGTH; i2++) { + if (leaf->values[i2] != NULL) { + return leaf->values[i2]; + } + } + } + // Skip to next top-level entry + k = (i1 + 1) << LEAF_BITS; + } + return NULL; + } }; // Three-level radix tree @@ -264,6 +292,29 @@ class TCMalloc_PageMap3 { void PreallocateMoreMemory() { } + + void* Next(Number k) const { + while (k < (Number(1) << BITS)) { + const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1); + if (root_->ptrs[i1] == NULL) { + // Advance to next top-level entry + k = (i1 + 1) << (LEAF_BITS + INTERIOR_BITS); + } else { + Leaf* leaf = reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2]); + if (leaf != NULL) { + for (Number i3 = (k & (LEAF_LENGTH-1)); i3 < LEAF_LENGTH; i3++) { + if (leaf->values[i3] != NULL) { + return leaf->values[i3]; + } + } + } + // Advance to next interior entry + k = ((k >> LEAF_BITS) + 1) << LEAF_BITS; + } + } + return NULL; + } }; #endif // TCMALLOC_PAGEMAP_H_ @@ -92,6 +92,7 @@ my $GV = "gv"; my $PS2PDF = "ps2pdf"; # These are used for dynamic profiles my $WGET = "wget"; +my $WGET_FLAGS = "--no-http-keep-alive"; # only supported by some wgets my $CURL = "curl"; # These are the web pages that servers need to support for dynamic profiles @@ -117,6 +118,11 @@ my $address_length = 16; # A list of paths to search for shared object files my @prefix_list = (); +# Special routine name that should not have any symbols. +# Used as separator to parse "addr2line -i" output. +my $sep_symbol = '_fini'; +my $sep_address = undef; + ##### Argument parsing ##### sub usage_string { @@ -504,6 +510,20 @@ sub Init() { ConfigureObjTools($main::prog) } + # Check what flags our commandline utilities support + if (open(TFILE, "$WGET $WGET_FLAGS -V 2>&1 |")) { + my @lines = <TFILE>; + if (grep(/unrecognized/, @lines) > 0) { + # grep found 'unrecognized' token from WGET, clear WGET flags + $WGET_FLAGS = ""; + } + close(TFILE); + } + # TODO(csilvers): check all the other binaries and objtools to see + # if they are installed and what flags they support, and store that + # in a data structure here, rather than scattering these tests about. + # Then, ideally, rewrite code to use wget OR curl OR GET or ... + # Break the opt_list_prefix into the prefix_list array @prefix_list = split (',', $main::opt_lib_prefix); @@ -952,22 +972,31 @@ sub PrintSymbolizedProfile { print 'binary=', $prog, "\n"; } while (my ($pc, $name) = each(%{$symbols})) { - my $fullname = $name->[2]; - print '0x', $pc, ' ', $fullname, "\n"; + my $sep = ' '; + print '0x', $pc; + # We have a list of function names, which include the inlined + # calls. They are separated (and terminated) by --, which is + # illegal in function names. + for (my $j = 2; $j <= $#{$name}; $j += 3) { + print $sep, $name->[$j]; + $sep = '--'; + } + print "\n"; } print '---', "\n"; + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $profile_marker = $&; + print '--- ', $profile_marker, "\n"; if (defined($main::collected_profile)) { # if used with remote fetch, simply dump the collected profile to output. open(SRC, "<$main::collected_profile"); while (<SRC>) { print $_; } + close(SRC); } else { # dump a cpu-format profile to standard out - $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $profile_marker = $&; - print '--- ', $profile_marker, "\n"; PrintProfileData($profile); } } @@ -1069,9 +1098,9 @@ sub PrintDisassembly { } # Return reference to array of tuples of the form: -# [address, filename, linenumber, instruction] +# [start_address, filename, linenumber, instruction, limit_address] # E.g., -# ["0x806c43d", "/foo/bar.cc", 131, "ret"] +# ["0x806c43d", "/foo/bar.cc", 131, "ret", "0x806c440"] sub Disassemble { my $prog = shift; my $offset = shift; @@ -1086,6 +1115,7 @@ sub Disassemble { my @result = (); my $filename = ""; my $linenumber = -1; + my $last = ["", "", "", ""]; while (<OBJDUMP>) { s/\r//g; # turn windows-looking lines into unix-looking lines chop; @@ -1098,7 +1128,9 @@ sub Disassemble { # Disassembly line -- zero-extend address to full length my $addr = HexExtend($1); my $k = AddressAdd($addr, $offset); - push(@result, [$k, $filename, $linenumber, $2]); + $last->[4] = $k; # Store ending address for previous instruction + $last = [$k, $filename, $linenumber, $2, $end_addr]; + push(@result, $last); } } close(OBJDUMP); @@ -1274,8 +1306,13 @@ sub PrintSource { my $total1 = 0; # Total flat counts my $total2 = 0; # Total cumulative counts foreach my $e (@instructions) { - my $c1 = GetEntry($flat, $e->[0]); - my $c2 = GetEntry($cumulative, $e->[0]); + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } $running1 += $c1; $running2 += $c2; $total1 += $c1; @@ -1386,8 +1423,13 @@ sub PrintDisassembledFunction { my $flat_total = 0; my $cum_total = 0; foreach my $e (@instructions) { - my $c1 = GetEntry($flat, $e->[0]); - my $c2 = GetEntry($cumulative, $e->[0]); + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } push(@flat_count, $c1); push(@cum_count, $c2); $flat_total += $c1; @@ -1615,10 +1657,10 @@ sub PrintDot { foreach my $k (keys(%{$raw})) { # TODO: omit low %age edges $n = $raw->{$k}; - my @addrs = split(/\n/, $k); - for (my $i = 1; $i <= $#addrs; $i++) { - my $src = OutputKey($symbols, $addrs[$i]); - my $dst = OutputKey($symbols, $addrs[$i-1]); + my @translated = TranslateStack($symbols, $k); + for (my $i = 1; $i <= $#translated; $i++) { + my $src = $translated[$i]; + my $dst = $translated[$i-1]; #next if ($src eq $dst); # Avoid self-edges? if (exists($node{$src}) && exists($node{$dst})) { my $edge_label = "$src\001$dst"; @@ -1648,14 +1690,18 @@ sub PrintDot { if ($edgeweight > 100000) { $edgeweight = 100000; } $edgeweight = int($edgeweight); + my $style = sprintf("setlinewidth(%f)", $w); + if ($x[1] =~ m/\(inline\)/) { + $style .= ",dashed"; + } + # Use a slightly squashed function of the edge count as the weight - printf DOT ("N%s -> N%s [label=%s, weight=%d, " . - "style=\"setlinewidth(%f)\"];\n", + printf DOT ("N%s -> N%s [label=%s, weight=%d, style=\"%s\"];\n", $node{$x[0]}, $node{$x[1]}, Unparse($n), $edgeweight, - $w); + $style); } } @@ -1665,42 +1711,74 @@ sub PrintDot { return 1; } -# Generate the key under which a given address should be counted -# based on the user-specified output granularity. -sub OutputKey { +# Translate a stack of addresses into a stack of symbols +sub TranslateStack { my $symbols = shift; - my $a = shift; - - # Skip large addresses since they sometimes show up as fake entries on RH9 - if (length($a) > 8) { - if ($a gt "7fffffffffffffff") { return ''; } - } - - # Extract symbolic info for address - my $func = $a; - my $fullfunc = $a; - my $fileline = ""; - if (exists($symbols->{$a})) { - $func = $symbols->{$a}->[0]; - $fullfunc = $symbols->{$a}->[2]; - $fileline = $symbols->{$a}->[1]; - } - - if ($main::opt_disasm || $main::opt_list) { - return $a; # We want just the address for the key - } elsif ($main::opt_addresses) { - return "$a $func $fileline"; - } elsif ($main::opt_lines) { - return "$func $fileline"; - } elsif ($main::opt_functions) { - return $func; - } elsif ($main::opt_files) { - my $f = ($fileline eq '') ? $a : $fileline; - $f =~ s/:\d+$//; - return $f; - } else { - return $a; + my $k = shift; + + my @addrs = split(/\n/, $k); + my @result = (); + for (my $i = 0; $i <= $#addrs; $i++) { + my $a = $addrs[$i]; + + # Skip large addresses since they sometimes show up as fake entries on RH9 + if (length($a) > 8 && $a gt "7fffffffffffffff") { + next; + } + + if ($main::opt_disasm || $main::opt_list) { + # We want just the address for the key + push(@result, $a); + next; + } + + my $symlist = $symbols->{$a}; + if (!defined($symlist)) { + $symlist = [$a, "", $a]; + } + + # We can have a sequence of symbols for a particular entry + # (more than one symbol in the case of inlining). Callers + # come before callees in symlist, so walk backwards since + # the translated stack should contain callees before callers. + for (my $j = $#{$symlist}; $j >= 2; $j -= 3) { + my $func = $symlist->[$j-2]; + my $fileline = $symlist->[$j-1]; + my $fullfunc = $symlist->[$j]; + if ($j > 2) { + $func = "$func (inline)"; + } + if ($main::opt_addresses) { + push(@result, "$a $func $fileline"); + } elsif ($main::opt_lines) { + if ($func eq '??' && $fileline eq '??:0') { + push(@result, "$a"); + } else { + push(@result, "$func $fileline"); + } + } elsif ($main::opt_functions) { + if ($func eq '??') { + push(@result, "$a"); + } else { + push(@result, $func); + } + } elsif ($main::opt_files) { + if ($fileline eq '??:0' || $fileline eq '') { + push(@result, "$a"); + } else { + my $f = $fileline; + $f =~ s/:\d+$//; + push(@result, $f); + } + } else { + push(@result, $a); + last; # Do not print inlined info + } + } } + + # print join(",", @addrs), " => ", join(",", @result), "\n"; + return @result; } # Generate percent string for a number and a total @@ -1978,17 +2056,16 @@ sub ReduceProfile { my $result = {}; foreach my $k (keys(%{$profile})) { my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); + my @translated = TranslateStack($symbols, $k); my @path = (); my %seen = (); $seen{''} = 1; # So that empty keys are skipped - foreach my $a (@addrs) { + foreach my $e (@translated) { # To avoid double-counting due to recursion, skip a stack-trace # entry if it has already been seen - my $key = OutputKey($symbols, $a); - if (!$seen{$key}) { - $seen{$key} = 1; - push(@path, $key); + if (!$seen{$e}) { + $seen{$e} = 1; + push(@path, $e); } } my $reduced_path = join("\n", @path); @@ -1997,6 +2074,20 @@ sub ReduceProfile { return $result; } +# Does the specified symbol array match the regexp? +sub SymbolMatches { + my $sym = shift; + my $re = shift; + if (defined($sym)) { + for (my $i = 0; $i < $#{$sym}; $i += 3) { + if ($sym->[$i] =~ m/$re/ || $sym->[$i+1] =~ m/$re/) { + return 1; + } + } + } + return 0; +} + # Focus only on paths involving specified regexps sub FocusProfile { my $symbols = shift; @@ -2008,10 +2099,7 @@ sub FocusProfile { my @addrs = split(/\n/, $k); foreach my $a (@addrs) { # Reply if it matches either the address/shortname/fileline - if (($a =~ m/$focus/) || - (exists($symbols->{$a}) && - (($symbols->{$a}->[0] =~ m/$focus/) || - ($symbols->{$a}->[1] =~ m/$focus/)))) { + if (($a =~ m/$focus/) || SymbolMatches($symbols->{$a}, $focus)) { AddEntry($result, $k, $count); last; } @@ -2032,10 +2120,7 @@ sub IgnoreProfile { my $matched = 0; foreach my $a (@addrs) { # Reply if it matches either the address/shortname/fileline - if (($a =~ m/$ignore/) || - (exists($symbols->{$a}) && - (($symbols->{$a}->[0] =~ m/$ignore/) || - ($symbols->{$a}->[1] =~ m/$ignore/)))) { + if (($a =~ m/$ignore/) || SymbolMatches($symbols->{$a}, $ignore)) { $matched = 1; last; } @@ -2195,7 +2280,7 @@ sub IsSymbolizedProfileFile { sub CheckSymbolPage { my $url = SymbolPageURL(); - open(SYMBOL, "$WGET -qO- '$url' |"); + open(SYMBOL, "$WGET $WGET_FLAGS -qO- '$url' |"); my $line = <SYMBOL>; $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines close(SYMBOL); @@ -2240,7 +2325,7 @@ sub SymbolPageURL { sub FetchProgramName() { my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]); my $url = "http://$host:$port$PROGRAM_NAME_PAGE"; - my $command_line = "$WGET -qO- '$url'"; + my $command_line = "$WGET $WGET_FLAGS -qO- '$url'"; open(CMDLINE, "$command_line |") or error($command_line); my $cmdline = <CMDLINE>; $cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines @@ -2346,13 +2431,21 @@ sub FetchSymbols { # /symbol, the symbols match and are retrievable from the map. my $shortpc = $pc; $shortpc =~ s/^0*//; + # Each line may have a list of names, which includes the function + # and also other functions it has inlined. They are separated + # (in PrintSymbolizedFile), by --, which is illegal in function names. + my $fullnames; if (defined($symbol_map->{$shortpc})) { - $fullname = $symbol_map->{$shortpc}; + $fullnames = $symbol_map->{$shortpc}; } else { - $fullname = "0x" . $pc; # Just use addresses + $fullnames = "0x" . $pc; # Just use addresses + } + my $sym = []; + $symbols->{$pc} = $sym; + foreach my $fullname (split("--", $fullnames)) { + my $name = ShortFunctionName($fullname); + push(@{$sym}, $name, "?", $fullname); } - my $name = ShortFunctionName($fullname); - $symbols->{$pc} = [$name, "?", $fullname]; } return $symbols; } @@ -2427,7 +2520,7 @@ sub FetchDynamicProfile { return $real_profile; } - my $cmd = "$WGET $wget_timeout -q -O $tmp_profile '$url'"; + my $cmd = "$WGET $WGET_FLAGS $wget_timeout -q -O $tmp_profile '$url'"; if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)){ print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; if ($encourage_patience) { @@ -2752,12 +2845,26 @@ sub ReadCPUProfile { # Make key out of the stack entries my @k = (); - for (my $j = $d; $j--; ) { + for (my $j = 0; $j < $d; $j++) { my $pclo = $slots->get($i++); my $pchi = $slots->get($i++); if ($pclo == -1 || $pchi == -1) { error("$fname: Unexpected EOF when reading stack of depth $d\n"); } + + # Subtract one from caller pc so we map back to call instr. + # However, don't do this if we're reading a symbolized profile + # file, in which case the subtract-one was done when the file + # was written. + if ($j > 0 && !$main::use_symbolized_profile) { + if ($pclo == 0) { + $pchi--; + $pclo = 0xffffffff; + } else { + $pclo--; + } + } + my $pc = sprintf("%08x%08x", $pchi, $pclo); $pcs->{$pc} = 1; push @k, $pc; @@ -3516,87 +3623,111 @@ sub MapToSymbols { my $pclist = shift; my $symbols = shift; + my $debug = 0; + # Ignore empty binaries if ($#{$pclist} < 0) { return; } - my $got_symbols = MapSymbolsWithNM($image, $offset, $pclist, $symbols); - if ($main::opt_interactive || - $main::opt_addresses || - $main::opt_lines || - $main::opt_files || - $main::opt_list || - $main::opt_callgrind || - !$got_symbols) { - GetLineNumbers($image, $offset, $pclist, $symbols); + # Figure out the addr2line command to use + my $addr2line = $obj_tool_map{"addr2line"}; + my $cmd = "$addr2line -f -C -e $image"; + if (exists $obj_tool_map{"addr2line_pdb"}) { + $addr2line = $obj_tool_map{"addr2line_pdb"}; + $cmd = "$addr2line --demangle -f -C -e $image"; } -} -# The file $tmpfile_sym must already have been created before calling this. -sub GetLineNumbersViaAddr2Line { - my $addr2line_command = shift; - my $pclist = shift; - my $symbols = shift; + # If "addr2line" isn't installed on the system at all, just use + # nm to get what info we can (function names, but not line numbers). + if (system("$addr2line --help >/dev/null 2>&1") != 0) { + MapSymbolsWithNM($image, $offset, $pclist, $symbols); + return; + } + + # "addr2line -i" can produce a variable number of lines per input + # address, with no separator that allows us to tell when data for + # the next address starts. So we find the address for a special + # symbol (_fini) and interleave this address between all real + # addresses passed to addr2line. The name of this special symbol + # can then be used as a separator. + $sep_address = undef; # May be filled in by MapSymbolsWithNM() + my $nm_symbols = {}; + MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols); + # TODO(csilvers): only add '-i' if addr2line supports it. + if (defined($sep_address)) { + # Only add " -i" to addr2line if the binary supports it. + # addr2line --help returns 0, but not if it sees an unknown flag first. + if (system("$cmd -i --help >/dev/null 2>&1") == 0) { + $cmd .= " -i"; + } else { + $sep_address = undef; # no need for sep_address if we don't support -i + } + } + + # Make file with all PC values with intervening 'sep_address' so + # that we can reliably detect the end of inlined function list + open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n"); + if ($debug) { print("---- $image ---\n"); } + for (my $i = 0; $i <= $#{$pclist}; $i++) { + # addr2line always reads hex addresses, and does not need '0x' prefix. + if ($debug) { printf("%s\n", $pclist->[$i]); } + printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); + if (defined($sep_address)) { + printf ADDRESSES ("%s\n", $sep_address); + } + } + close(ADDRESSES); + if ($debug) { + print("----\n"); + system("cat $main::tmpfile_sym"); + print("----\n"); + system("$cmd <$main::tmpfile_sym"); + print("----\n"); + } - open(SYMBOLS, "$addr2line_command <$main::tmpfile_sym |") - || error("$addr2line_command: $!\n"); - my $count = 0; + open(SYMBOLS, "$cmd <$main::tmpfile_sym |") || error("$cmd: $!\n"); + my $count = 0; # Index in pclist while (<SYMBOLS>) { + # Read fullfunction and filelineinfo from next pair of lines s/\r?\n$//g; my $fullfunction = $_; - $_ = <SYMBOLS>; s/\r?\n$//g; my $filelinenum = $_; - $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths - if (!$main::opt_list) { - $filelinenum =~ s|^.*/([^/]+:\d+)$|$1|; # Remove directory name + + if (defined($sep_address) && $fullfunction eq $sep_symbol) { + # Terminating marker for data for this address + $count++; + next; } + $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths + my $pcstr = $pclist->[$count]; - if (defined($symbols->{$pcstr})) { - # Override just the line-number portion. The function name portion - # is less buggy when computed using nm instead of addr2line. But - # don't override if addr2line is giving ??'s and nm didn't. (This - # may be seen mostly/entirely on cygwin's addr2line/nm.) - if (($filelinenum ne "??:0") || ($symbols->{$pcstr}->[1] eq "?")) { - $symbols->{$pcstr}->[1] = $filelinenum; + my $function = ShortFunctionName($fullfunction); + if ($fullfunction eq '??') { + # See if nm found a symbol + my $nms = $nm_symbols->{$pcstr}; + if (defined($nms)) { + $function = $nms->[0]; + $fullfunction = $nms->[2]; } - } else { - my $function = ShortFunctionName($fullfunction); - $symbols->{$pcstr} = [$function, $filelinenum, $fullfunction]; } - $count++; - } - close(SYMBOLS); - return $count; -} -sub GetLineNumbers { - my $image = shift; - my $offset = shift; - my $pclist = shift; - my $symbols = shift; - - # Make file with all PC values - open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n"); - for (my $i = 0; $i <= $#{$pclist}; $i++) { - # addr2line always reads hex addresses, and does not need '0x' prefix. - printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); - } - close(ADDRESSES); - - # Pass to addr2line - my $addr2line = $obj_tool_map{"addr2line"}; - my @addr2line_commands = ("$addr2line -f -C -e $image"); - if (exists $obj_tool_map{"addr2line_pdb"}) { - my $addr2line_pdb = $obj_tool_map{"addr2line_pdb"}; - push(@addr2line_commands, "$addr2line_pdb --demangle -f -C -e $image"); - } - foreach my $addr2line_command (@addr2line_commands) { - if (GetLineNumbersViaAddr2Line("$addr2line_command", $pclist, $symbols)) { - last; + # Prepend to accumulated symbols for pcstr + # (so that caller comes before callee) + my $sym = $symbols->{$pcstr}; + if (!defined($sym)) { + $sym = []; + $symbols->{$pcstr} = $sym; + } + unshift(@{$sym}, $function, $filelinenum, $fullfunction); + if ($debug) { printf("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } + if (!defined($sep_address)) { + # Inlining is off, se this entry ends immediately + $count++; } } + close(SYMBOLS); } # Use nm to map the list of referenced PCs to symbols. Return true iff we @@ -3646,7 +3777,7 @@ sub MapSymbolsWithNM { } return 1; } - + sub ShortFunctionName { my $function = shift; while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types @@ -3813,6 +3944,10 @@ sub GetProcedureBoundariesViaNm { next; } + if ($this_routine eq $sep_symbol) { + $sep_address = HexExtend($start_val); + } + # Tag this routine with the starting address in case the image # has multiple occurrences of this routine. We use a syntax # that resembles template paramters that are automatically diff --git a/src/stacktrace_config.h b/src/stacktrace_config.h index 3bd0fb3..b58ab1d 100644 --- a/src/stacktrace_config.h +++ b/src/stacktrace_config.h @@ -46,17 +46,8 @@ #ifndef BASE_STACKTRACE_CONFIG_H_ #define BASE_STACKTRACE_CONFIG_H_ -// First, the i386 case. -#if defined(__i386__) && __GNUC__ >= 2 -# if !defined(NO_FRAME_POINTER) -# define STACKTRACE_INL_HEADER "stacktrace_x86-inl.h" -# define STACKTRACE_SKIP_CONTEXT_ROUTINES 1 -# else -# define STACKTRACE_INL_HEADER "stacktrace_generic-inl.h" -# endif - -// Now, the x86_64 case. -#elif defined(__x86_64__) && __GNUC__ >= 2 +// First, the i386 and x86_64 case. +#if (defined(__i386__) || defined(__x86_64__)) && __GNUC__ >= 2 # if !defined(NO_FRAME_POINTER) # define STACKTRACE_INL_HEADER "stacktrace_x86-inl.h" # define STACKTRACE_SKIP_CONTEXT_ROUTINES 1 diff --git a/src/symbolize.cc b/src/symbolize.cc index b7cdf0e..6fe44b9 100644 --- a/src/symbolize.cc +++ b/src/symbolize.cc @@ -166,7 +166,7 @@ extern bool Symbolize(char *out, int out_size, return false; // make the symbolization_table values point to the output vector SymbolMap::iterator fill = symbolization_table->begin(); - char *current_name = out; + const char *current_name = out; for (int i = 0; i < total_bytes_read; i++) { if (out[i] == '\n') { fill->second = current_name; diff --git a/src/symbolize.h b/src/symbolize.h index 72196f6..8fb0366 100644 --- a/src/symbolize.h +++ b/src/symbolize.h @@ -33,6 +33,10 @@ #ifndef TCMALLOC_SYMBOLIZE_H_ #define TCMALLOC_SYMBOLIZE_H_ +#include "config.h" +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uintptr_t +#endif #include <map> using std::map; @@ -42,7 +46,7 @@ static const int kSymbolSize = 1024; // TODO(glider): it's better to make SymbolMap a class that encapsulates the // address operations and has the Symbolize() method. -typedef map<uintptr_t, char*> SymbolMap; +typedef map<uintptr_t, const char*> SymbolMap; extern bool Symbolize(char *out, int out_size, SymbolMap *symbolization_table); diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc index daa01d0..450c1ab 100644 --- a/src/tcmalloc.cc +++ b/src/tcmalloc.cc @@ -136,6 +136,7 @@ # define WIN32_DO_PATCHING 1 #endif +using std::max; using tcmalloc::PageHeap; using tcmalloc::PageHeapAllocator; using tcmalloc::SizeMap; @@ -382,12 +383,11 @@ size_t InvalidGetAllocatedSize(void* ptr) { // Extract interesting stats struct TCMallocStats { - uint64_t system_bytes; // Bytes alloced from system - uint64_t thread_bytes; // Bytes in thread caches - uint64_t central_bytes; // Bytes in central cache - uint64_t transfer_bytes; // Bytes in central transfer cache - uint64_t pageheap_bytes; // Bytes in page heap - uint64_t metadata_bytes; // Bytes alloced for metadata + uint64_t thread_bytes; // Bytes in thread caches + uint64_t central_bytes; // Bytes in central cache + uint64_t transfer_bytes; // Bytes in central transfer cache + uint64_t metadata_bytes; // Bytes alloced for metadata + PageHeap::Stats pageheap; // Stats from page heap }; // Get stats into "r". Also get per-size-class counts if class_count != NULL @@ -409,13 +409,8 @@ static void ExtractStats(TCMallocStats* r, uint64_t* class_count) { { // scope SpinLockHolder h(Static::pageheap_lock()); ThreadCache::GetThreadStats(&r->thread_bytes, class_count); - } - - { //scope - SpinLockHolder h(Static::pageheap_lock()); - r->system_bytes = Static::pageheap()->SystemBytes(); r->metadata_bytes = tcmalloc::metadata_system_bytes(); - r->pageheap_bytes = Static::pageheap()->FreeBytes(); + r->pageheap = Static::pageheap()->stats(); } } @@ -453,8 +448,9 @@ static void DumpStats(TCMalloc_Printer* out, int level) { DumpSystemAllocatorStats(out); } - const uint64_t bytes_in_use = stats.system_bytes - - stats.pageheap_bytes + const uint64_t bytes_in_use = stats.pageheap.system_bytes + - stats.pageheap.free_bytes + - stats.pageheap.unmapped_bytes - stats.central_bytes - stats.transfer_bytes - stats.thread_bytes; @@ -463,6 +459,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) { "MALLOC: %12" PRIu64 " (%7.1f MB) Heap size\n" "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes in use by application\n" "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in page heap\n" + "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes unmapped in page heap\n" "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in central cache\n" "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in transfer cache\n" "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in thread caches\n" @@ -470,9 +467,10 @@ static void DumpStats(TCMalloc_Printer* out, int level) { "MALLOC: %12" PRIu64 " Thread heaps in use\n" "MALLOC: %12" PRIu64 " (%7.1f MB) Metadata allocated\n" "------------------------------------------------\n", - stats.system_bytes, stats.system_bytes / MB, + stats.pageheap.system_bytes, stats.pageheap.system_bytes / MB, bytes_in_use, bytes_in_use / MB, - stats.pageheap_bytes, stats.pageheap_bytes / MB, + stats.pageheap.free_bytes, stats.pageheap.free_bytes / MB, + stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MB, stats.central_bytes, stats.central_bytes / MB, stats.transfer_bytes, stats.transfer_bytes / MB, stats.thread_bytes, stats.thread_bytes / MB, @@ -536,9 +534,50 @@ static void** DumpHeapGrowthStackTraces() { return result; } +static void IterateOverRanges(void* arg, MallocExtension::RangeFunction func) { + PageID page = 1; // Some code may assume that page==0 is never used + bool done = false; + while (!done) { + // Accumulate a small number of ranges in a local buffer + static const int kNumRanges = 16; + static base::MallocRange ranges[kNumRanges]; + int n = 0; + { + SpinLockHolder h(Static::pageheap_lock()); + while (n < kNumRanges) { + if (!Static::pageheap()->GetNextRange(page, &ranges[n])) { + done = true; + break; + } else { + uintptr_t limit = ranges[n].address + ranges[n].length; + page = (limit + kPageSize - 1) >> kPageShift; + n++; + } + } + } + + for (int i = 0; i < n; i++) { + (*func)(arg, &ranges[i]); + } + } +} + // TCMalloc's support for extra malloc interfaces class TCMallocImplementation : public MallocExtension { + private: + // ReleaseToSystem() might release more than the requested bytes because + // the page heap releases at the span granularity, and spans are of wildly + // different sizes. This member keeps track of the extra bytes bytes + // released so that the app can periodically call ReleaseToSystem() to + // release memory at a constant rate. + // NOTE: Protected by Static::pageheap_lock(). + size_t extra_bytes_released_; + public: + TCMallocImplementation() + : extra_bytes_released_(0) { + } + virtual void GetStats(char* buffer, int buffer_length) { ASSERT(buffer_length > 0); TCMalloc_Printer printer(buffer, buffer_length); @@ -568,32 +607,51 @@ class TCMallocImplementation : public MallocExtension { return DumpHeapGrowthStackTraces(); } + virtual void Ranges(void* arg, RangeFunction func) { + IterateOverRanges(arg, func); + } + virtual bool GetNumericProperty(const char* name, size_t* value) { ASSERT(name != NULL); if (strcmp(name, "generic.current_allocated_bytes") == 0) { TCMallocStats stats; ExtractStats(&stats, NULL); - *value = stats.system_bytes + *value = stats.pageheap.system_bytes - stats.thread_bytes - stats.central_bytes - stats.transfer_bytes - - stats.pageheap_bytes; + - stats.pageheap.free_bytes + - stats.pageheap.unmapped_bytes; return true; } if (strcmp(name, "generic.heap_size") == 0) { TCMallocStats stats; ExtractStats(&stats, NULL); - *value = stats.system_bytes; + *value = stats.pageheap.system_bytes; return true; } if (strcmp(name, "tcmalloc.slack_bytes") == 0) { // We assume that bytes in the page heap are not fragmented too - // badly, and are therefore available for allocation. + // badly, and are therefore available for allocation without + // growing the pageheap system byte count. + SpinLockHolder l(Static::pageheap_lock()); + PageHeap::Stats stats = Static::pageheap()->stats(); + *value = stats.free_bytes + stats.unmapped_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_free_bytes") == 0) { SpinLockHolder l(Static::pageheap_lock()); - *value = Static::pageheap()->FreeBytes(); + *value = Static::pageheap()->stats().free_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_unmapped_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = Static::pageheap()->stats().unmapped_bytes; return true; } @@ -631,9 +689,32 @@ class TCMallocImplementation : public MallocExtension { virtual void MarkThreadBusy(); // Implemented below - virtual void ReleaseFreeMemory() { + virtual void ReleaseToSystem(ssize_t num_bytes) { + if (num_bytes <= 0) { + return; + } SpinLockHolder h(Static::pageheap_lock()); - Static::pageheap()->ReleaseFreePages(); + if (num_bytes <= extra_bytes_released_) { + // We released too much on a prior call, so don't release any + // more this time. + extra_bytes_released_ = extra_bytes_released_ - num_bytes; + return; + } + num_bytes = num_bytes - extra_bytes_released_; + // num_bytes might be less than one page. If we pass zero to + // ReleaseAtLeastNPages, it won't do anything, so we release a whole + // page now and let extra_bytes_released_ smooth it out over time. + Length num_pages = max<Length>(num_bytes >> kPageShift, 1); + size_t bytes_released = Static::pageheap()->ReleaseAtLeastNPages( + num_pages) << kPageShift; + if (bytes_released > num_bytes) { + extra_bytes_released_ = bytes_released - num_bytes; + } else { + // The PageHeap wasn't able to release num_bytes. Don't try to + // compensate with a big release next time. Specifically, + // ReleaseFreeMemory() calls ReleaseToSystem(LONG_MAX). + extra_bytes_released_ = 0; + } } virtual void SetMemoryReleaseRate(double rate) { @@ -1063,16 +1144,18 @@ inline struct mallinfo do_mallinfo() { // Unfortunately, the struct contains "int" field, so some of the // size values will be truncated. - info.arena = static_cast<int>(stats.system_bytes); + info.arena = static_cast<int>(stats.pageheap.system_bytes); info.fsmblks = static_cast<int>(stats.thread_bytes + stats.central_bytes + stats.transfer_bytes); - info.fordblks = static_cast<int>(stats.pageheap_bytes); - info.uordblks = static_cast<int>(stats.system_bytes + info.fordblks = static_cast<int>(stats.pageheap.free_bytes + + stats.pageheap.unmapped_bytes); + info.uordblks = static_cast<int>(stats.pageheap.system_bytes - stats.thread_bytes - stats.central_bytes - stats.transfer_bytes - - stats.pageheap_bytes); + - stats.pageheap.free_bytes + - stats.pageheap.unmapped_bytes); return info; } diff --git a/src/tests/malloc_extension_c_test.c b/src/tests/malloc_extension_c_test.c index aad2d4b..b6319a1 100644 --- a/src/tests/malloc_extension_c_test.c +++ b/src/tests/malloc_extension_c_test.c @@ -108,6 +108,7 @@ void TestMallocExtension(void) { } MallocExtension_MarkThreadIdle(); MallocExtension_MarkThreadBusy(); + MallocExtension_ReleaseToSystem(1); MallocExtension_ReleaseFreeMemory(); if (MallocExtension_GetEstimatedAllocatedSize(10) < 10) { FAIL("GetEstimatedAllocatedSize returned a bad value (too small)"); diff --git a/src/tests/page_heap_test.cc b/src/tests/page_heap_test.cc new file mode 100644 index 0000000..9120b78 --- /dev/null +++ b/src/tests/page_heap_test.cc @@ -0,0 +1,55 @@ +// Copyright 2009 Google Inc. All Rights Reserved. +// Author: fikes@google.com (Andrew Fikes) + +#include <stdio.h> +#include "config_for_unittests.h" +#include "base/logging.h" +#include "common.h" +#include "page_heap.h" + +namespace { + +static void CheckStats(const tcmalloc::PageHeap* ph, + uint64_t system_pages, + uint64_t free_pages, + uint64_t unmapped_pages) { + tcmalloc::PageHeap::Stats stats = ph->stats(); + EXPECT_EQ(system_pages, stats.system_bytes >> kPageShift); + EXPECT_EQ(free_pages, stats.free_bytes >> kPageShift); + EXPECT_EQ(unmapped_pages, stats.unmapped_bytes >> kPageShift); +} + +static void TestPageHeap_Stats() { + tcmalloc::PageHeap* ph = new tcmalloc::PageHeap(); + + // Empty page heap + CheckStats(ph, 0, 0, 0); + + // Allocate a span 's1' + tcmalloc::Span* s1 = ph->New(256); + CheckStats(ph, 256, 0, 0); + + // Split span 's1' into 's1', 's2'. Delete 's2' + tcmalloc::Span* s2 = ph->Split(s1, 128); + Length s2_len = s2->length; + ph->Delete(s2); + CheckStats(ph, 256, 128, 0); + + // Unmap deleted span 's2' + EXPECT_EQ(s2_len, ph->ReleaseAtLeastNPages(1)); + CheckStats(ph, 256, 0, 128); + + // Delete span 's1' + ph->Delete(s1); + CheckStats(ph, 256, 128, 128); + + delete ph; +} + +} // namespace + +int main(int argc, char **argv) { + TestPageHeap_Stats(); + printf("PASS\n"); + return 0; +} diff --git a/src/tests/pagemap_unittest.cc b/src/tests/pagemap_unittest.cc index dcf6c9a..83e76e2 100644 --- a/src/tests/pagemap_unittest.cc +++ b/src/tests/pagemap_unittest.cc @@ -113,6 +113,53 @@ void TestMap(int limit, bool limit_is_below_the_overflow_boundary) { } } +// REQUIRES: BITS==10, i.e., valid range is [0,1023]. +// Representations for different types will end up being: +// PageMap1: array[1024] +// PageMap2: array[32][32] +// PageMap3: array[16][16][4] +template <class Type> +void TestNext(const char* name) { + RAW_LOG(ERROR, "Running NextTest %s\n", name); + Type map(malloc); + char a, b, c, d, e; + + // When map is empty + CHECK(map.Next(0) == NULL); + CHECK(map.Next(5) == NULL); + CHECK(map.Next(1<<30) == NULL); + + // Add a single value + map.Ensure(40, 1); + map.set(40, &a); + CHECK(map.Next(0) == &a); + CHECK(map.Next(39) == &a); + CHECK(map.Next(40) == &a); + CHECK(map.Next(41) == NULL); + CHECK(map.Next(1<<30) == NULL); + + // Add a few values + map.Ensure(41, 1); + map.Ensure(100, 3); + map.set(41, &b); + map.set(100, &c); + map.set(101, &d); + map.set(102, &e); + CHECK(map.Next(0) == &a); + CHECK(map.Next(39) == &a); + CHECK(map.Next(40) == &a); + CHECK(map.Next(41) == &b); + CHECK(map.Next(42) == &c); + CHECK(map.Next(63) == &c); + CHECK(map.Next(64) == &c); + CHECK(map.Next(65) == &c); + CHECK(map.Next(99) == &c); + CHECK(map.Next(100) == &c); + CHECK(map.Next(101) == &d); + CHECK(map.Next(102) == &e); + CHECK(map.Next(103) == NULL); +} + int main(int argc, char** argv) { TestMap< TCMalloc_PageMap1<10> > (100, true); TestMap< TCMalloc_PageMap1<10> > (1 << 10, false); @@ -121,6 +168,10 @@ int main(int argc, char** argv) { TestMap< TCMalloc_PageMap3<20> > (100, true); TestMap< TCMalloc_PageMap3<20> > (1 << 20, false); + TestNext< TCMalloc_PageMap1<10> >("PageMap1"); + TestNext< TCMalloc_PageMap2<10> >("PageMap2"); + TestNext< TCMalloc_PageMap3<10> >("PageMap3"); + printf("PASS\n"); return 0; } diff --git a/src/tests/profile-handler_unittest.cc b/src/tests/profile-handler_unittest.cc index 4b247c7..1e72b2e 100644 --- a/src/tests/profile-handler_unittest.cc +++ b/src/tests/profile-handler_unittest.cc @@ -8,8 +8,9 @@ #include "profile-handler.h" #include <assert.h> -#include <sys/time.h> #include <pthread.h> +#include <sys/time.h> +#include <time.h> #include "base/logging.h" #include "base/simple_mutex.h" @@ -46,11 +47,11 @@ class Thread { bool joinable_; }; -// Sleep interval in usecs. To ensure a SIGPROF timer interrupt under heavy -// load, this is set to a 20x of ProfileHandler timer interval (i.e 100Hz) +// timespec of the sleep interval. To ensure a SIGPROF timer interrupt under +// heavy load, this is set to a 20x of ProfileHandler timer interval (i.e 100Hz) // TODO(nabeelmian) Under very heavy loads, the worker thread may not accumulate // enough cpu usage to get a profile tick. -int kSleepInterval = 200000; +const struct timespec sleep_interval = { 0, 200000000 }; // 200 ms // Whether each thread has separate timers. static bool timer_separate_ = false; @@ -213,7 +214,7 @@ class ProfileHandlerTest { busy_worker_->Start(); // Wait for worker to start up and register with the ProfileHandler. // TODO(nabeelmian) This may not work under very heavy load. - usleep(kSleepInterval); + nanosleep(&sleep_interval, NULL); } // Stops the worker thread. @@ -257,7 +258,7 @@ class ProfileHandlerTest { uint64 interrupts_before = GetInterruptCount(); // Sleep for a bit and check that tick counter is making progress. int old_tick_count = tick_counter; - usleep(kSleepInterval); + nanosleep(&sleep_interval, NULL); int new_tick_count = tick_counter; EXPECT_GT(new_tick_count, old_tick_count); uint64 interrupts_after = GetInterruptCount(); @@ -268,7 +269,7 @@ class ProfileHandlerTest { void VerifyUnregistration(const int& tick_counter) { // Sleep for a bit and check that tick counter is not making progress. int old_tick_count = tick_counter; - usleep(kSleepInterval); + nanosleep(&sleep_interval, NULL); int new_tick_count = tick_counter; EXPECT_EQ(new_tick_count, old_tick_count); // If no callbacks, signal handler and shared timer should be disabled. @@ -297,7 +298,7 @@ class ProfileHandlerTest { } // Verify that the ProfileHandler is not accumulating profile ticks. uint64 interrupts_before = GetInterruptCount(); - usleep(kSleepInterval); + nanosleep(&sleep_interval, NULL); uint64 interrupts_after = GetInterruptCount(); EXPECT_EQ(interrupts_after, interrupts_before); } diff --git a/src/tests/tcmalloc_unittest.cc b/src/tests/tcmalloc_unittest.cc index 713fbe1..8eccb18 100644 --- a/src/tests/tcmalloc_unittest.cc +++ b/src/tests/tcmalloc_unittest.cc @@ -124,6 +124,9 @@ using std::vector; using std::string; +DECLARE_double(tcmalloc_release_rate); +DECLARE_int32(max_free_queue_size); // in debugallocation.cc + namespace testing { static const int FLAGS_numtests = 50000; @@ -747,6 +750,127 @@ static void TestHugeThreadCache() { delete[] array; } +namespace { + +struct RangeCallbackState { + uintptr_t ptr; + base::MallocRange::Type expected_type; + size_t min_size; + bool matched; +}; + +static void RangeCallback(void* arg, const base::MallocRange* r) { + RangeCallbackState* state = reinterpret_cast<RangeCallbackState*>(arg); + if (state->ptr >= r->address && + state->ptr < r->address + r->length) { + CHECK_EQ(r->type, state->expected_type); + CHECK_GE(r->length, state->min_size); + state->matched = true; + } +} + +// Check that at least one of the callbacks from Ranges() contains +// the specified address with the specified type, and has size +// >= min_size. +static void CheckRangeCallback(void* ptr, base::MallocRange::Type type, + size_t min_size) { + RangeCallbackState state; + state.ptr = reinterpret_cast<uintptr_t>(ptr); + state.expected_type = type; + state.min_size = min_size; + state.matched = false; + MallocExtension::instance()->Ranges(&state, RangeCallback); + CHECK(state.matched); +} + +} + +static void TestRanges() { + static const int MB = 1048576; + void* a = malloc(MB); + void* b = malloc(MB); + CheckRangeCallback(a, base::MallocRange::INUSE, MB); + CheckRangeCallback(b, base::MallocRange::INUSE, MB); + free(a); + CheckRangeCallback(a, base::MallocRange::FREE, MB); + CheckRangeCallback(b, base::MallocRange::INUSE, MB); + MallocExtension::instance()->ReleaseFreeMemory(); + CheckRangeCallback(a, base::MallocRange::UNMAPPED, MB); + CheckRangeCallback(b, base::MallocRange::INUSE, MB); + free(b); + CheckRangeCallback(a, base::MallocRange::UNMAPPED, MB); + CheckRangeCallback(b, base::MallocRange::FREE, MB); +} + +static size_t GetUnmappedBytes() { + size_t bytes; + CHECK(MallocExtension::instance()->GetNumericProperty( + "tcmalloc.pageheap_unmapped_bytes", &bytes)); + return bytes; +} + +static void TestReleaseToSystem() { + // Debug allocation mode adds overhead to each allocation which + // messes up all the equality tests here. I just disable the + // teset in this mode. TODO(csilvers): get it to work for debugalloc? +#ifndef DEBUGALLOCATION + const double old_tcmalloc_release_rate = FLAGS_tcmalloc_release_rate; + FLAGS_tcmalloc_release_rate = 0; + + static const int MB = 1048576; + void* a = malloc(MB); + void* b = malloc(MB); + MallocExtension::instance()->ReleaseFreeMemory(); + size_t starting_bytes = GetUnmappedBytes(); + + // Calling ReleaseFreeMemory() a second time shouldn't do anything. + MallocExtension::instance()->ReleaseFreeMemory(); + EXPECT_EQ(starting_bytes, GetUnmappedBytes()); + + // ReleaseToSystem shouldn't do anything either. + MallocExtension::instance()->ReleaseToSystem(MB); + EXPECT_EQ(starting_bytes, GetUnmappedBytes()); + + free(a); + + // Negative numbers should be ignored. + MallocExtension::instance()->ReleaseToSystem(-5); + EXPECT_EQ(starting_bytes, GetUnmappedBytes()); + + // The span to release should be 1MB. + MallocExtension::instance()->ReleaseToSystem(MB/2); + EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes()); + + // Should do nothing since the previous call released too much. + MallocExtension::instance()->ReleaseToSystem(MB/4); + EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes()); + + free(b); + + // Use up the extra MB/4 bytes from 'a' and also release 'b'. + MallocExtension::instance()->ReleaseToSystem(MB/2); + EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes()); + + // Should do nothing since the previous call released too much. + MallocExtension::instance()->ReleaseToSystem(MB/2); + EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes()); + + // Nothing else to release. + MallocExtension::instance()->ReleaseFreeMemory(); + EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes()); + + a = malloc(MB); + free(a); + EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes()); + + // Releasing less than a page should still trigger a release. + MallocExtension::instance()->ReleaseToSystem(1); + EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes()); + + FLAGS_tcmalloc_release_rate = old_tcmalloc_release_rate; +#endif // #ifndef DEBUGALLOCATION +} + static int RunAllTests(int argc, char** argv) { // Optional argv[1] is the seed AllocatorState rnd(argc > 1 ? atoi(argv[1]) : 100); @@ -1023,6 +1147,8 @@ static int RunAllTests(int argc, char** argv) { #endif TestHugeThreadCache(); + TestRanges(); + TestReleaseToSystem(); return 0; } @@ -1032,6 +1158,10 @@ static int RunAllTests(int argc, char** argv) { using testing::RunAllTests; int main(int argc, char** argv) { +#ifdef DEBUGALLOCATION // debug allocation takes forever for huge allocs + FLAGS_max_free_queue_size = 0; // return freed blocks to tcmalloc immediately +#endif + RunAllTests(argc, argv); // Test tc_version() |