summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorcsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2007-03-22 04:55:49 +0000
committercsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2007-03-22 04:55:49 +0000
commit8e188310f7d8732d81b7b04f193f89964b7af6c5 (patch)
treefea586faa0bc381027f2f47aee513fc9ea4b6e96 /src
parentc3b96b3ac552160abde541bba8ac7b4f8338efa0 (diff)
downloadgperftools-8e188310f7d8732d81b7b04f193f89964b7af6c5.tar.gz
Wed Jun 14 15:11:14 2006 Google Inc. <opensource@google.com>
* google-perftools: version 0.8 release * Experimental support for remote profiling added to pprof (many) * Fixed race condition in ProfileData::FlushTable (etune) * Better support for weird /proc maps (maxim, mec) * Fix heap-checker interaction with gdb (markus) * Better 64-bit support in pprof (aruns) * Reduce scavenging cost in tcmalloc by capping NumMoveSize (sanjay) * Cast syscall(SYS_mmap); works on more 64-bit systems now (menage) * Document the text output of pprof! (csilvers) * Better compiler support for no-THREADS and for old compilers (csilvers) * Make libunwind the default stack unwinder for x86-64 (aruns) * Somehow the COPYING file got erased. Regenerate it (csilvers) git-svn-id: http://gperftools.googlecode.com/svn/trunk@23 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
Diffstat (limited to 'src')
-rw-r--r--src/base/linux_syscall_support.h125
-rw-r--r--src/base/linuxthreads.c37
-rw-r--r--src/base/thread_lister.c16
-rw-r--r--src/google/heap-checker.h13
-rw-r--r--src/heap-checker.cc123
-rw-r--r--src/malloc_extension.cc15
-rw-r--r--src/malloc_hook.cc2
-rwxr-xr-xsrc/pprof989
-rw-r--r--src/profiler.cc10
-rw-r--r--src/stacktrace.cc9
-rw-r--r--src/stacktrace_libunwind-inl.h4
-rw-r--r--src/tcmalloc.cc56
-rw-r--r--src/tests/heap-checker_unittest.cc26
-rw-r--r--src/tests/tcmalloc_unittest.cc48
14 files changed, 1044 insertions, 429 deletions
diff --git a/src/base/linux_syscall_support.h b/src/base/linux_syscall_support.h
index 319455e..0dfdd8d 100644
--- a/src/base/linux_syscall_support.h
+++ b/src/base/linux_syscall_support.h
@@ -45,6 +45,14 @@
#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__)) && \
defined(__linux)
+#ifdef __cplusplus
+/* Some system header files in older versions of gcc neglect to properly
+ * handle being included from C++. As it appears to be harmless to have
+ * multiple nested 'extern "C"' blocks, just add another one here.
+ */
+extern "C" {
+#endif
+
#include <errno.h>
#include <signal.h>
#include <stdarg.h>
@@ -79,35 +87,47 @@
#if defined(__i386__)
#ifndef __NR_getdents64
-#define __NR_getdents64 220
+#define __NR_getdents64 220
#endif
#ifndef __NR_gettid
-#define __NR_gettid 224
+#define __NR_gettid 224
#endif
#ifndef __NR_futex
-#define __NR_futex 240
+#define __NR_futex 240
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity 241
+#define __NR_sched_getaffinity 242
#endif
/* End of i386 definitions */
#elif defined(__ARM_ARCH_3__)
#ifndef __NR_getdents64
-#define __NR_getdents64 217
+#define __NR_getdents64 (__NR_SYSCALL_BASE + 217)
#endif
#ifndef __NR_gettid
-#define __NR_gettid 224
+#define __NR_gettid (__NR_SYSCALL_BASE + 224)
#endif
#ifndef __NR_futex
-#define __NR_futex 240
+#define __NR_futex (__NR_SYSCALL_BASE + 240)
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity (__NR_SYSCALL_BASE + 241)
+#define __NR_sched_getaffinity (__NR_SYSCALL_BASE + 242)
#endif
/* End of ARM 3 definitions */
#elif defined(__x86_64__)
#ifndef __NR_getdents64
-#define __NR_getdents64 217
+#define __NR_getdents64 217
#endif
#ifndef __NR_gettid
-#define __NR_gettid 186
+#define __NR_gettid 186
#endif
#ifndef __NR_futex
-#define __NR_futex 202
+#define __NR_futex 202
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity 203
+#define __NR_sched_getaffinity 204
#endif
/* End of x86-64 definitions */
#endif
@@ -306,9 +326,11 @@ struct dirent64;
#endif
#if defined(__x86_64__)
struct msghdr;
+ struct sockaddr;
#define __NR_sys_mmap __NR_mmap
#define __NR_sys_recvmsg __NR_recvmsg
#define __NR_sys_sendmsg __NR_sendmsg
+ #define __NR_sys_sendto __NR_sendto
#define __NR_sys_shutdown __NR_shutdown
#define __NR_sys_rt_sigaction __NR_rt_sigaction
#define __NR_sys_rt_sigprocmask __NR_rt_sigprocmask
@@ -322,6 +344,10 @@ struct dirent64;
struct msghdr*, m, int, f);
static inline _syscall3(int, sys_sendmsg, int, s,
const struct msghdr*, m, int, f);
+ static inline _syscall6(int, sys_sendto, int, s,
+ const void*, m, size_t, l,
+ int, f,
+ const struct sockaddr*, a, int, t);
static inline _syscall2(int, sys_shutdown, int, s,
int, h);
static inline _syscall4(int, sys_rt_sigaction, int, s,
@@ -378,6 +404,8 @@ struct dirent64;
}
#define sys_recvmsg(s,m,f) sys_socketcall(17, (s), (m), (f))
#define sys_sendmsg(s,m,f) sys_socketcall(16, (s), (m), (f))
+ #define sys_sendto(s,m,l,f,a,t) sys_socketcall(11, (s), (m), (l),(f),\
+ (a), (t))
#define sys_shutdown(s,h) sys_socketcall(13, (s), (h))
#define sys_socket(d,t,p) sys_socketcall(1, (d), (t), (p))
#define sys_socketpair(d,t,p,s) sys_socketcall(8, (d), (t), (p),(s))
@@ -387,39 +415,41 @@ struct dirent64;
static inline _syscall3(pid_t, sys_waitpid, pid_t, p,
int*, s, int, o);
#endif
- #define __NR_sys_close __NR_close
- #define __NR_sys_dup __NR_dup
- #define __NR_sys_dup2 __NR_dup2
- #define __NR_sys_execve __NR_execve
- #define __NR_sys__exit __NR_exit
- #define __NR_sys_fcntl __NR_fcntl
- #define __NR_sys_fork __NR_fork
- #define __NR_sys_fstat __NR_fstat
- #define __NR_sys_getdents __NR_getdents
- #define __NR_sys_getdents64 __NR_getdents64
- #define __NR_sys_getegid __NR_getegid
- #define __NR_sys_geteuid __NR_geteuid
- #define __NR_sys_getpgrp __NR_getpgrp
- #define __NR_sys_getpid __NR_getpid
- #define __NR_sys_getppid __NR_getppid
- #define __NR_sys_getpriority __NR_getpriority
- #define __NR_sys_getrlimit __NR_getrlimit
- #define __NR_sys_getsid __NR_getsid
- #define __NR__gettid __NR_gettid
- #define __NR_sys_kill __NR_kill
- #define __NR_sys_lseek __NR_lseek
- #define __NR_sys_munmap __NR_munmap
- #define __NR_sys_open __NR_open
- #define __NR_sys_pipe __NR_pipe
- #define __NR_sys_prctl __NR_prctl
- #define __NR_sys_ptrace __NR_ptrace
- #define __NR_sys_read __NR_read
- #define __NR_sys_readlink __NR_readlink
- #define __NR_sys_sched_yield __NR_sched_yield
- #define __NR_sys_sigaltstack __NR_sigaltstack
- #define __NR_sys_stat __NR_stat
- #define __NR_sys_write __NR_write
- #define __NR_sys_futex __NR_futex
+ #define __NR_sys_close __NR_close
+ #define __NR_sys_dup __NR_dup
+ #define __NR_sys_dup2 __NR_dup2
+ #define __NR_sys_execve __NR_execve
+ #define __NR_sys__exit __NR_exit
+ #define __NR_sys_fcntl __NR_fcntl
+ #define __NR_sys_fork __NR_fork
+ #define __NR_sys_fstat __NR_fstat
+ #define __NR_sys_futex __NR_futex
+ #define __NR_sys_getdents __NR_getdents
+ #define __NR_sys_getdents64 __NR_getdents64
+ #define __NR_sys_getegid __NR_getegid
+ #define __NR_sys_geteuid __NR_geteuid
+ #define __NR_sys_getpgrp __NR_getpgrp
+ #define __NR_sys_getpid __NR_getpid
+ #define __NR_sys_getppid __NR_getppid
+ #define __NR_sys_getpriority __NR_getpriority
+ #define __NR_sys_getrlimit __NR_getrlimit
+ #define __NR_sys_getsid __NR_getsid
+ #define __NR__gettid __NR_gettid
+ #define __NR_sys_kill __NR_kill
+ #define __NR_sys_lseek __NR_lseek
+ #define __NR_sys_munmap __NR_munmap
+ #define __NR_sys_open __NR_open
+ #define __NR_sys_pipe __NR_pipe
+ #define __NR_sys_prctl __NR_prctl
+ #define __NR_sys_ptrace __NR_ptrace
+ #define __NR_sys_read __NR_read
+ #define __NR_sys_readlink __NR_readlink
+ #define __NR_sys_sched_getaffinity __NR_sched_getaffinity
+ #define __NR_sys_sched_setaffinity __NR_sched_setaffinity
+ #define __NR_sys_sched_yield __NR_sched_yield
+ #define __NR_sys_sigaltstack __NR_sigaltstack
+ #define __NR_sys_stat __NR_stat
+ #define __NR_sys_write __NR_write
static inline _syscall1(int, sys_close, int, f);
static inline _syscall1(int, sys_dup, int, f);
static inline _syscall2(int, sys_dup2, int, s,
@@ -432,6 +462,8 @@ struct dirent64;
static inline _syscall0(pid_t, sys_fork);
static inline _syscall2(int, sys_fstat, int, f,
struct stat*, b);
+ static inline _syscall4(int, sys_futex, int*, addrx, int, opx, int, valx,
+ struct timespec *, timeoutx);
static inline _syscall3(int, sys_getdents, int, f,
struct dirent*, d, int, c);
static inline _syscall3(int, sys_getdents64, int, f,
@@ -464,6 +496,10 @@ struct dirent64;
void *, b, size_t, c);
static inline _syscall3(int, sys_readlink, const char*, p,
char*, b, size_t, s);
+ static inline _syscall3(int, sys_sched_getaffinity, pid_t, pid,
+ unsigned int, len, unsigned long *, mask);
+ static inline _syscall3(int, sys_sched_setaffinity, pid_t, pid,
+ unsigned int, len, unsigned long *, mask);
static inline _syscall0(int, sys_sched_yield);
static inline _syscall2(int, sys_sigaltstack, const stack_t*, s,
const stack_t*, o);
@@ -471,8 +507,6 @@ struct dirent64;
struct stat*, b);
static inline _syscall3(ssize_t, sys_write, int, f,
const void *, b, size_t, c);
- static inline _syscall4(int, sys_futex, int*, addrx, int, opx, int, valx,
- struct timespec *, timeoutx);
static inline int sys_sysconf(int name) {
extern int __getpagesize(void);
@@ -517,6 +551,9 @@ struct dirent64;
#undef RETURN
#endif
+#ifdef __cplusplus
+}
+#endif
#endif
#endif
diff --git a/src/base/linuxthreads.c b/src/base/linuxthreads.c
index e721582..3696987 100644
--- a/src/base/linuxthreads.c
+++ b/src/base/linuxthreads.c
@@ -51,6 +51,10 @@
#include "base/linux_syscall_support.h"
#include "base/thread_lister.h"
+#ifndef CLONE_UNTRACED
+#define CLONE_UNTRACED 0x00800000
+#endif
+
/* itoa() is not a standard function, and we cannot safely call printf()
* after suspending threads. So, we just implement our own copy. A
@@ -97,8 +101,19 @@ static int local_clone (int (*fn)(void *), void *arg, ...) {
* Leave 4kB of gap between the callers stack and the new clone. This
* should be more than sufficient for the caller to call waitpid() until
* the cloned thread terminates.
+ *
+ * It is important that we set the CLONE_UNTRACED flag, because newer
+ * versions of "gdb" otherwise attempt to attach to our thread, and will
+ * attempt to reap its status codes. This subsequently results in the
+ * caller hanging indefinitely in waitpid(), waiting for a change in
+ * status that will never happen. By setting the CLONE_UNTRACED flag, we
+ * prevent "gdb" from stealing events, but we still expect the thread
+ * lister to fail, because it cannot PTRACE_ATTACH to the process that
+ * is being debugged. This is OK and the error code will be reported
+ * correctly.
*/
- return clone(fn, (char *)&arg - 4096, CLONE_VM|CLONE_FS|CLONE_FILES, arg);
+ return clone(fn, (char *)&arg - 4096,
+ CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_UNTRACED, arg);
}
@@ -209,7 +224,8 @@ struct ListerParams {
static void ListerThread(struct ListerParams *args) {
static const int signals[] = { SIGABRT, SIGILL, SIGFPE, SIGSEGV, SIGBUS,
SIGXCPU, SIGXFSZ };
- pid_t clone_pid = sys_gettid();
+ int found_parent = 0;
+ pid_t clone_pid = sys_gettid(), ppid = sys_getppid();
char proc_self_task[80], marker_name[48], *marker_path;
const char *proc_paths[3];
const char *const *proc_path = proc_paths;
@@ -239,8 +255,7 @@ static void ListerThread(struct ListerParams *args) {
}
/* Compute search paths for finding thread directories in /proc */
- local_itoa(strrchr(strcpy(proc_self_task, "/proc/"), '\000'),
- sys_getppid());
+ local_itoa(strrchr(strcpy(proc_self_task, "/proc/"), '\000'), ppid);
marker_path = strrchr(strcpy(marker_name, proc_self_task), '\000');
strcat(proc_self_task, "/task/");
proc_paths[0] = proc_self_task; /* /proc/$$/task/ */
@@ -417,6 +432,7 @@ static void ListerThread(struct ListerParams *args) {
num_threads--;
sig_num_threads = num_threads;
} else {
+ found_parent |= pid == ppid;
added_entries++;
}
}
@@ -435,6 +451,16 @@ static void ListerThread(struct ListerParams *args) {
NO_INTR(sys_close(marker));
sig_marker = marker = -1;
+ /* If we never found the parent process, something is very wrong.
+ * Most likely, we are running in debugger. Any attempt to operate
+ * on the threads would be very incomplete. Let's just report an
+ * error to the caller.
+ */
+ if (!found_parent) {
+ ResumeAllProcessThreads(num_threads, pids);
+ sys__exit(3);
+ }
+
/* Now we are ready to call the callback,
* which takes care of resuming the threads for us.
*/
@@ -530,6 +556,9 @@ int ListAllProcessThreads(void *parameter,
case 2: args.err = EFAULT; /* Some fault (e.g. SIGSEGV) detected */
args.result = -1;
break;
+ case 3: args.err = EPERM; /* Process is already being traced */
+ args.result = -1;
+ break;
default:args.err = ECHILD; /* Child died unexpectedly */
args.result = -1;
break;
diff --git a/src/base/thread_lister.c b/src/base/thread_lister.c
index 6def758..f3df16b 100644
--- a/src/base/thread_lister.c
+++ b/src/base/thread_lister.c
@@ -31,7 +31,8 @@
* Author: Markus Gutschke
*/
-#include <stdio.h> // needed for NULL on some powerpc platforms (?!)
+#include <stdio.h> /* needed for NULL on some powerpc platforms (?!) */
+#include <sys/prctl.h>
#include "base/thread_lister.h"
#include "base/linuxthreads.h"
/* Include other thread listers here that define THREADS macro
@@ -46,16 +47,23 @@
int ListAllProcessThreads(void *parameter,
ListAllProcessThreadsCallBack callback, ...) {
- int rc;
+ int rc;
va_list ap;
+ int dumpable = prctl(PR_GET_DUMPABLE, 0);
+ if (!dumpable)
+ prctl(PR_SET_DUMPABLE, 1);
va_start(ap, callback);
- rc = callback(parameter, 0, NULL, ap);
+ pid_t pid = getpid();
+ rc = callback(parameter, 1, &pid, ap);
va_end(ap);
+ if (!dumpable)
+ prctl(PR_SET_DUMPABLE, 0);
return rc;
}
-void ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) {
+int ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) {
+ return 1;
}
#endif
diff --git a/src/google/heap-checker.h b/src/google/heap-checker.h
index 66d23de..f888ae0 100644
--- a/src/google/heap-checker.h
+++ b/src/google/heap-checker.h
@@ -255,6 +255,19 @@ class HeapCleaner {
};
class HeapLeakChecker {
+ public: // Static functions for working with (whole-program) leak checking.
+
+ // If heap leak checking is currently active in some mode
+ // e.g. if leak checking was started (and is still active now)
+ // due to any valid non-empty --heap_check flag value
+ // (including "local") on the command-line
+ // or via a dependency on //base:heapcheck.
+ // The return value reflects iff HeapLeakChecker objects manually
+ // constructed right now will be doing leak checking or nothing.
+ // Note that we can go from active to inactive state during InitGoogle()
+ // if FLAGS_heap_check gets set to "" by some code before/during InitGoogle().
+ static bool IsActive();
+
public: // Non-static functions for starting and doing leak checking.
// Start checking and name the leak check performed.
diff --git a/src/heap-checker.cc b/src/heap-checker.cc
index dc9c46d..4e8e2dc 100644
--- a/src/heap-checker.cc
+++ b/src/heap-checker.cc
@@ -468,6 +468,18 @@ static bool RecordGlobalDataLocked(uint64 start_address,
if (inode == 0)
return true;
+ // Sometimes people mmap their own files read-write. That would cause
+ // the strict ELF checker later to reject them. We do not want to loosen
+ // up the ELF checker, because we need to catch freaky files if they
+ // show up. So, make an exception for common files that we have seen.
+ //
+ // TODO(mec): the longer this gets, the more attractive it is to
+ // check for the ELF header and just accept all non-ELF files.
+ if (inode != 0) {
+ if (filename && strcmp(filename, "/dev/zero") == 0)
+ return true;
+ }
+
// Grab some ELF types.
#ifdef _LP64
typedef Elf64_Ehdr ElfFileHeader;
@@ -692,8 +704,15 @@ HeapLeakChecker::UseProcMaps(ProcMapsTask proc_maps_task) {
"Looking at /proc/self/maps line:\n %s\n",
proc_map_line);
- if (start_address >= end_address)
- abort();
+ if (start_address >= end_address) {
+ // Crash if a line we can be interested in is ill-formed:
+ if (inode != 0) abort();
+ // Skip other ill-formed lines: some are possible
+ // probably due to the interplay of how /proc/self/maps is updated
+ // while we read it in chunks in ProcMapsIterator and
+ // do things in this loop.
+ continue;
+ }
// Determine if any shared libraries are present.
if (inode != 0 && strstr(filename, "lib") && strstr(filename, ".so")) {
@@ -738,6 +757,14 @@ static int64 live_bytes_total = 0;
// (protected by our lock; IgnoreAllLiveObjectsLocked sets it)
static pid_t self_thread_pid = 0;
+// Status of our thread listing callback execution
+// (protected by our lock; used from within IgnoreAllLiveObjectsLocked)
+static enum {
+ CALLBACK_NOT_STARTED,
+ CALLBACK_STARTED,
+ CALLBACK_COMPLETED,
+} thread_listing_status = CALLBACK_NOT_STARTED;
+
// Ideally to avoid deadlocks this function should not result in any libc
// or other function calls that might need to lock a mutex:
// It is called when all threads of a process are stopped
@@ -774,6 +801,7 @@ int HeapLeakChecker::IgnoreLiveThreads(void* parameter,
int num_threads,
pid_t* thread_pids,
va_list ap) {
+ thread_listing_status = CALLBACK_STARTED;
if (HeapProfiler::kMaxLogging) {
HeapProfiler::MESSAGE(2, "HeapChecker: Found %d threads (from pid %d)\n",
num_threads, getpid());
@@ -838,6 +866,7 @@ int HeapLeakChecker::IgnoreLiveThreads(void* parameter,
IgnoreNonThreadLiveObjectsLocked();
// Can now resume the threads:
ResumeAllProcessThreads(num_threads, thread_pids);
+ thread_listing_status = CALLBACK_COMPLETED;
return failures;
}
@@ -928,7 +957,8 @@ IgnoreAllLiveObjectsLocked(const StackExtent& self_stack) {
UseProcMaps(RECORD_GLOBAL_DATA_LOCKED);
}
// Ignore all thread stacks:
- bool executed_with_threads_stopped = false;
+ thread_listing_status = CALLBACK_NOT_STARTED;
+ bool need_to_ignore_non_thread_objects = true;
self_thread_pid = getpid();
self_thread_stack = self_stack;
if (FLAGS_heap_check_ignore_thread_live) {
@@ -939,10 +969,22 @@ IgnoreAllLiveObjectsLocked(const StackExtent& self_stack) {
// if not suspended they could still mess with the pointer
// graph while we walk it).
int r = ListAllProcessThreads(NULL, IgnoreLiveThreads);
- executed_with_threads_stopped = (r >= 0);
- if (r == -1) {
- HeapProfiler::MESSAGE(0, "HeapChecker: Could not find thread stacks; "
- "may get false leak reports\n");
+ need_to_ignore_non_thread_objects = r < 0;
+ if (r < 0) {
+ HeapProfiler::MESSAGE(0, "HeapChecker: thread finding failed "
+ "with %d errno=%d\n", r, errno);
+ if (thread_listing_status == CALLBACK_COMPLETED) {
+ HeapProfiler::MESSAGE(0, "HeapChecker: thread finding callback "
+ "finished ok; hopefully everything is fine\n");
+ need_to_ignore_non_thread_objects = false;
+ } else if (thread_listing_status == CALLBACK_STARTED) {
+ HeapProfiler::MESSAGE(0, "HeapChecker: thread finding callback was "
+ "interrupted or crashed; can't fix this\n");
+ abort();
+ } else { // CALLBACK_NOT_STARTED
+ HeapProfiler::MESSAGE(0, "HeapChecker: Could not find thread stacks; "
+ "may get false leak reports\n");
+ }
} else if (r != 0) {
HeapProfiler::MESSAGE(0, "HeapChecker: Thread stacks not found "
"for %d threads; may get false leak reports\n",
@@ -960,7 +1002,7 @@ IgnoreAllLiveObjectsLocked(const StackExtent& self_stack) {
}
// Do all other live data ignoring here if we did not do it
// within thread listing callback with all threads stopped.
- if (!executed_with_threads_stopped) IgnoreNonThreadLiveObjectsLocked();
+ if (need_to_ignore_non_thread_objects) IgnoreNonThreadLiveObjectsLocked();
if (live_objects_total) {
HeapProfiler::MESSAGE(0, "HeapChecker: "
"Ignoring "LLD" reachable "
@@ -1349,10 +1391,13 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
(same_heap ? (inuse_bytes_increase_ != 0 || inuse_allocs_increase_ != 0)
: (inuse_bytes_increase_ > 0 || inuse_allocs_increase_ > 0));
if (see_leaks || do_full) {
+ bool pprof_can_ignore = false;
+ const char* command_tail = " --text 2>/dev/null"; // normal command
const char* gv_command_tail
= " --edgefraction=1e-10 --nodefraction=1e-10 --gv 2>/dev/null";
string ignore_re;
if (disabled_regexp) {
+ pprof_can_ignore = true;
ignore_re += " --ignore='^";
ignore_re += *disabled_regexp;
ignore_re += "$'";
@@ -1361,22 +1406,29 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
// some STLs can give us spurious leak alerts (since the STL tries to
// do its own memory pooling), so we avoid it by using STL as little
// as possible for "big" objects that might require "lots" of memory.
- char command[6 * PATH_MAX + 200];
+ char base_command[6 * PATH_MAX + 200];
+ char beg_profile[PATH_MAX+1], end_profile[PATH_MAX+1];
if (use_initial_profile) {
+ snprintf(beg_profile, sizeof(beg_profile), "%s.%s-beg.heap",
+ profile_prefix->c_str(), name_);
// compare against initial profile only if need to
const char* drop_negative = same_heap ? "" : " --drop_negative";
- snprintf(command, sizeof(command), "%s --base=\"%s.%s-beg.heap\" %s ",
- pprof_path(), profile_prefix->c_str(), name_,
- drop_negative);
+ snprintf(base_command, sizeof(base_command),
+ "%s --base=\"%s\" %s ",
+ pprof_path(), beg_profile, drop_negative);
} else {
- snprintf(command, sizeof(command), "%s",
+ beg_profile[0] = '\0';
+ snprintf(base_command, sizeof(base_command), "%s",
pprof_path());
}
- snprintf(command + strlen(command), sizeof(command) - strlen(command),
- " %s \"%s.%s-end.heap\" %s --inuse_objects --lines",
- invocation_path(), profile_prefix->c_str(),
- name_, ignore_re.c_str());
+ snprintf(end_profile, sizeof(end_profile), "%s.%s-end.heap",
+ profile_prefix->c_str(), name_);
+ snprintf(base_command + strlen(base_command),
+ sizeof(base_command) - strlen(base_command),
+ " %s \"%s\" %s --inuse_objects --lines",
+ invocation_path(), end_profile, ignore_re.c_str());
// --lines is important here to catch leaks when !see_leaks
+
char cwd[PATH_MAX+1];
if (getcwd(cwd, sizeof(cwd)) != cwd) abort();
if (see_leaks) {
@@ -1390,7 +1442,7 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
"To investigate leaks manually use e.g.\n"
"cd %s; " // for proper symbol resolution
"%s%s\n\n",
- cwd, command, gv_command_tail);
+ cwd, base_command, gv_command_tail);
}
string output;
int checked_leaks = 0;
@@ -1403,14 +1455,18 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
} else {
// We don't care about pprof's stderr as long as it
// succeeds with empty report:
- checked_leaks = GetStatusOutput(command, &output);
+ char full_command[6 * PATH_MAX + 200]; // needed to concatenate
+ snprintf(full_command, sizeof(full_command), "%s%s",
+ base_command, command_tail);
+ checked_leaks = GetStatusOutput(full_command, &output);
if (checked_leaks != 0) {
HeapProfiler::MESSAGE(-1, "ERROR: Could not run pprof at %s\n",
pprof_path());
abort();
}
}
- if (see_leaks && output.empty() && checked_leaks == 0) {
+ if (see_leaks && pprof_can_ignore &&
+ output.empty() && checked_leaks == 0) {
HeapProfiler::MESSAGE(-1, "HeapChecker: "
"These must be leaks that we disabled"
" (pprof succeeded)! This check WILL FAIL"
@@ -1420,7 +1476,24 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
// do not fail the check just due to us being a stripped binary
if (!see_leaks && strstr(output.c_str(), "nm: ") != NULL &&
strstr(output.c_str(), ": no symbols") != NULL) output.resize(0);
- if (!(see_leaks || checked_leaks == 0)) abort();
+ }
+ // Make sure the profiles we created are still there.
+ // They can get deleted e.g. if the program forks/executes itself
+ // and FLAGS_cleanup_old_heap_profiles was kept as true.
+ if (access(end_profile, R_OK) != 0 ||
+ (beg_profile[0] && access(beg_profile, R_OK) != 0)) {
+ HeapProfiler::MESSAGE(-1, "HeapChecker: "
+ "One of the heap profiles is gone: %s %s\n",
+ beg_profile, end_profile);
+ abort();
+ }
+ if (!(see_leaks || checked_leaks == 0)) {
+ // Crash if something went wrong with executing pprof
+ // and we rely on pprof to do its work:
+ HeapProfiler::MESSAGE(-1, "HeapChecker: "
+ "pprof command failed: %s%s\n",
+ base_command, command_tail);
+ abort();
}
if (see_leaks && use_initial_profile) {
HeapProfiler::MESSAGE(-1, "HeapChecker: "
@@ -1438,7 +1511,7 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
"To investigate leaks manually uge e.g.\n"
"cd %s; " // for proper symbol resolution
"%s%s\n\n",
- name_, cwd, command, gv_command_tail);
+ name_, cwd, base_command, gv_command_tail);
if (use_initial_profile) {
HeapProfiler::MESSAGE(-1, "HeapChecker: "
"CAVEAT: Some of the reported leaks might have "
@@ -1491,6 +1564,10 @@ HeapLeakChecker::~HeapLeakChecker() {
// HeapLeakChecker overall heap check components
//----------------------------------------------------------------------
+bool HeapLeakChecker::IsActive() {
+ return heap_checker_on;
+}
+
vector<HeapCleaner::void_function>* HeapCleaner::heap_cleanups_ = NULL;
// When a HeapCleaner object is intialized, add its function to the static list
@@ -1653,7 +1730,7 @@ void HeapLeakChecker::DoMainHeapCheck() {
HeapProfiler::MESSAGE(0, "HeapChecker: "
"Checking for whole-program memory leaks\n");
if (!main_heap_checker->DoNoLeaks(same_heap, do_full, do_report)) {
- HeapProfiler::MESSAGE(-1, "ERROR: Leaks found in main heap check, aborting\n");
+ HeapProfiler::MESSAGE(-1, "HeapChecker: crashing because of leaks\n");
abort();
}
delete main_heap_checker;
diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc
index 0260a34..686b4bc 100644
--- a/src/malloc_extension.cc
+++ b/src/malloc_extension.cc
@@ -166,6 +166,21 @@ struct StackTraceHash {
}
return h;
}
+ // Less operator for MSVC's hash containers.
+ bool operator()(void** entry1, void** entry2) const {
+ if (Depth(entry1) != Depth(entry2))
+ return Depth(entry1) < Depth(entry2);
+ for (int i = 0; i < Depth(entry1); i++) {
+ if (PC(entry1, i) != PC(entry2, i)) {
+ return PC(entry1, i) < PC(entry2, i);
+ }
+ }
+ return false; // entries are equal
+ }
+ // These two public members are required by msvc. 4 and 8 are the
+ // default values.
+ static const size_t bucket_size = 4;
+ static const size_t min_buckets = 8;
};
struct StackTraceEqual {
diff --git a/src/malloc_hook.cc b/src/malloc_hook.cc
index 8499c73..613e612 100644
--- a/src/malloc_hook.cc
+++ b/src/malloc_hook.cc
@@ -115,7 +115,7 @@ extern "C" void* mmap64(void *start, size_t length,
int fd, __off64_t offset) __THROW {
void *result;
- result = syscall(SYS_mmap, start, length, prot, flags, fd, offset);
+ result = (void *)syscall(SYS_mmap, start, length, prot, flags, fd, offset);
MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset);
return result;
}
diff --git a/src/pprof b/src/pprof
index 5df1798..24b5b74 100755
--- a/src/pprof
+++ b/src/pprof
@@ -41,6 +41,9 @@
# Examples:
#
# % tools/pprof "program" "profile"
+# Enters "interactive" mode
+#
+# % tools/pprof --text "program" "profile"
# Generates one line per procedure
#
# % tools/pprof --gv "program" "profile"
@@ -68,6 +71,8 @@
use strict;
use Getopt::Long;
+my $PPROF_VERSION = "0.8";
+
# These are the object tools we use, which come from various sources.
# We want to invoke them directly, rather than via users' aliases and/or
# search paths, because some people have colorizing versions of them that
@@ -79,9 +84,22 @@ my %obj_tool_map = (
"objdump" => "objdump",
"nm" => "nm",
"addr2line" => "addr2line",
+ "c++filt" => "c++filt",
);
my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local
my $GV = "gv";
+# These are used for dynamic profiles
+my $WGET = "wget";
+my $CURL = "curl";
+
+# These are the web pages that servers need to support for dynamic profiles
+my $HEAP_PAGE = "/pprof/heap";
+my $PROFILE_PAGE = "/pprof/profile"; # must support cgi-param "?seconds=#"
+my $GROWTH_PAGE = "/pprof/growth";
+my $CONTENTION_PAGE = "/pprof/contention";
+my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST
+my $PROGRAM_NAME_PAGE = "/pprof/cmdline";
+
# There is a pervasive dependency on the length (in hex characters, i.e.,
# nibbles) of an address, distinguishing between 32-bit and 64-bit profiles:
@@ -90,23 +108,40 @@ my $address_length = 8; # Hope for 32-bit, reset if 64-bit detected.
##### Argument parsing #####
sub usage_string {
- return <<'EOF';
-Usage: pprof [options] <program> <profile> ...
- Prints specified cpu- or heap-profile
-
+ return <<EOF;
+Usage:
+pprof [options] <program> <profiles>
+ <profiles> is a space separated list of profile names.
+pprof [options] <profile>
+ <profile> is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE
+
+ Each profile name can be:
+ /path/to/profile - a path to a profile file
+ host:port[/<service>] - a location of a service to get profile from
+
+ The /<service> can be $HEAP_PAGE, $PROFILE_PAGE, $GROWTH_PAGE, or $CONTENTION_PAGE.
+ For instance: "pprof http://myserver.com:80$HEAP_PAGE".
+ If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling).
+
+ For more help with querying remote servers, including how to add the
+ necessary server-side support code, see this filename (or one like it):
+
+ /usr/doc/google-perftools-$PPROF_VERSION/pprof_remote_servers.html
+
Options:
--cum Sort by cumulative data
--base=<base> Subtract <base> from <profile> before display
- --interactive Run in interactive mode (interactive "help" gives help)
-
+ --interactive Run in interactive mode (interactive "help" gives help) [default]
+ --seconds=<n> Length of time for dynamic profiles [default=30 secs]
+
Reporting Granularity:
--addresses Report at address level
--lines Report at source line level
--functions Report at function level [default]
--files Report at source file level
-
+
Output type:
- --text Generate text report [default]
+ --text Generate text report
--gv Generate Postscript and display
--list=<regexp> Generate source listing of matching routines
--disasm=<regexp> Generate disassembly of matching routines
@@ -114,7 +149,7 @@ Output type:
--ps Generate Postcript to stdout
--pdf Generate PDF to stdout
--gif Generate GIF to stdout
-
+
Heap-Profile Options:
--inuse_space Display in-use (mega)bytes [default]
--inuse_objects Display in-use objects
@@ -122,7 +157,12 @@ Heap-Profile Options:
--alloc_objects Display allocated objects
--show_bytes Display space in bytes
--drop_negative Ignore negative differences
-
+
+Contention-profile options:
+ --total_delay Display total delay at each region [default]
+ --contentions Display number of delays at each region
+ --mean_delay Display mean delay at each region
+
Call-graph Options:
--nodecount=<n> Show at most so many nodes [default=80]
--nodefraction=<f> Hide nodes below <f>*total [default=.005]
@@ -130,7 +170,7 @@ Call-graph Options:
--focus=<regexp> Focus on nodes matching <regexp>
--ignore=<regexp> Ignore nodes matching <regexp>
--scale=<n> Set GV scaling [default=0]
-
+
Miscellaneous:
--tools=<prefix> Prefix for object tool pathnames
--test Run unit tests
@@ -138,7 +178,7 @@ Miscellaneous:
--version Version information
Examples:
-
+
pprof /bin/ls ls.prof
Outputs one line per procedure
pprof --gv /bin/ls ls.prof
@@ -151,12 +191,14 @@ pprof --list=getdir /bin/ls ls.prof
(Per-line) annotated source listing for getdir()
pprof --disasm=getdir /bin/ls ls.prof
(Per-PC) annotated disassembly for getdir()
+pprof localhost:1234
+ Outputs one line per procedure for localhost:1234
EOF
}
sub version_string {
- return <<'EOF'
-pprof (part of google-perftools 0.7)
+ return <<EOF
+pprof (part of google-perftools $PPROF_VERSION)
Copyright 1998-2006 Google Inc.
@@ -175,301 +217,387 @@ sub usage {
exit(1);
}
+sub Init() {
+ # Setup tmp-file name and handler to clean it up.
+ # We do this in the very beginning so that we can use
+ # error() and cleanup() function anytime here after.
+ $main::tmpfile_sym = "/tmp/pprof$$.sym";
+ $main::tmpfile_ps = "/tmp/pprof$$";
+ $main::next_tmpfile = 0;
+ $SIG{'INT'} = \&sighandler;
-$main::opt_help = 0;
-$main::opt_version = 0;
-
-$main::opt_cum = 0;
-$main::opt_base = '';
-$main::opt_addresses = 0;
-$main::opt_lines = 0;
-$main::opt_functions = 0;
-$main::opt_files = 0;
-
-$main::opt_text = 0;
-$main::opt_list = "";
-$main::opt_disasm = "";
-$main::opt_gv = 0;
-$main::opt_dot = 0;
-$main::opt_ps = 0;
-$main::opt_pdf = 0;
-$main::opt_gif = 0;
-
-$main::opt_nodecount = 80;
-$main::opt_nodefraction = 0.005;
-$main::opt_edgefraction = 0.001;
-$main::opt_focus = '';
-$main::opt_ignore = '';
-$main::opt_scale = 0;
-
-$main::opt_inuse_space = 0;
-$main::opt_inuse_objects = 0;
-$main::opt_alloc_space = 0;
-$main::opt_alloc_objects = 0;
-$main::opt_show_bytes = 0;
-$main::opt_drop_negative = 0;
-$main::opt_interactive = 0;
-
-$main::opt_tools = "";
-$main::opt_debug = 0;
-$main::opt_test = 0;
-
-# Are we printing a heap profile?
-$main::heap_profile = 0;
-
-# Are we printing a lock profile?
-$main::lock_profile = 0;
-
-GetOptions("help!" => \$main::opt_help,
- "version!" => \$main::opt_version,
- "cum!" => \$main::opt_cum,
- "base=s" => \$main::opt_base,
- "functions!" => \$main::opt_functions,
- "lines!" => \$main::opt_lines,
- "addresses!" => \$main::opt_addresses,
- "files!" => \$main::opt_files,
- "text!" => \$main::opt_text,
- "list=s" => \$main::opt_list,
- "disasm=s" => \$main::opt_disasm,
- "gv!" => \$main::opt_gv,
- "dot!" => \$main::opt_dot,
- "ps!" => \$main::opt_ps,
- "pdf!" => \$main::opt_pdf,
- "gif!" => \$main::opt_gif,
- "interactive!" => \$main::opt_interactive,
- "nodecount=i" => \$main::opt_nodecount,
- "nodefraction=f" => \$main::opt_nodefraction,
- "edgefraction=f" => \$main::opt_edgefraction,
- "focus=s" => \$main::opt_focus,
- "ignore=s" => \$main::opt_ignore,
- "scale=i" => \$main::opt_scale,
- "inuse_space!" => \$main::opt_inuse_space,
- "inuse_objects!" => \$main::opt_inuse_objects,
- "alloc_space!" => \$main::opt_alloc_space,
- "alloc_objects!" => \$main::opt_alloc_objects,
- "show_bytes!" => \$main::opt_show_bytes,
- "drop_negative!" => \$main::opt_drop_negative,
- "tools=s" => \$main::opt_tools,
- "test!" => \$main::opt_test,
- "debug!" => \$main::opt_debug,
- ) || usage("Invalid option(s)");
-
-# Deal with the standard --help and --version
-if ($main::opt_help) {
- print usage_string();
- exit(0);
-}
-if ($main::opt_version) {
- print version_string();
- exit(0);
-}
+ $main::opt_help = 0;
+ $main::opt_version = 0;
-# Disassembly/listing mode requires address-level info
-if ($main::opt_disasm || $main::opt_list) {
- $main::opt_functions = 0;
+ $main::opt_cum = 0;
+ $main::opt_base = '';
+ $main::opt_addresses = 0;
$main::opt_lines = 0;
- $main::opt_addresses = 1;
+ $main::opt_functions = 0;
$main::opt_files = 0;
-}
-
-# Check heap-profiling flags
-if ($main::opt_inuse_space +
- $main::opt_inuse_objects +
- $main::opt_alloc_space +
- $main::opt_alloc_objects > 1) {
- usage("Specify at most on of --inuse/--alloc options");
-}
-# Check output granularities
-my $grains =
- $main::opt_functions +
- $main::opt_lines +
- $main::opt_addresses +
- $main::opt_files +
- 0;
-if ($grains > 1) {
- usage("Only specify one output granularity option");
-}
-if ($grains == 0) {
- $main::opt_functions = 1;
-}
+ $main::opt_text = 0;
+ $main::opt_list = "";
+ $main::opt_disasm = "";
+ $main::opt_gv = 0;
+ $main::opt_dot = 0;
+ $main::opt_ps = 0;
+ $main::opt_pdf = 0;
+ $main::opt_gif = 0;
+
+ $main::opt_nodecount = 80;
+ $main::opt_nodefraction = 0.005;
+ $main::opt_edgefraction = 0.001;
+ $main::opt_focus = '';
+ $main::opt_ignore = '';
+ $main::opt_scale = 0;
+ $main::opt_seconds = 30;
+
+ $main::opt_inuse_space = 0;
+ $main::opt_inuse_objects = 0;
+ $main::opt_alloc_space = 0;
+ $main::opt_alloc_objects = 0;
+ $main::opt_show_bytes = 0;
+ $main::opt_drop_negative = 0;
+ $main::opt_interactive = 0;
+
+ $main::opt_total_delay = 0;
+ $main::opt_contentions = 0;
+ $main::opt_mean_delay = 0;
+
+ $main::opt_tools = "";
+ $main::opt_debug = 0;
+ $main::opt_test = 0;
+
+ # Are we using $SYMBOL_PAGE?
+ $main::use_symbol_page = 0;
+
+ # Are we printing a heap profile?
+ $main::heap_profile = 0;
-# Check output modes
-my $modes =
- $main::opt_text +
- $main::opt_gv +
- $main::opt_dot +
- $main::opt_ps +
- $main::opt_pdf +
- $main::opt_gif +
- 0;
-if ($modes > 1) {
- usage("Only specify one output mode");
-}
-if ($modes == 0) {
- $main::opt_text = 1;
-}
+ # Are we printing a lock profile?
+ $main::lock_profile = 0;
-if ($main::opt_test) {
- RunUnitTests();
- # Should not return
- exit(1);
-}
+ GetOptions("help!" => \$main::opt_help,
+ "version!" => \$main::opt_version,
+ "cum!" => \$main::opt_cum,
+ "base=s" => \$main::opt_base,
+ "seconds=i" => \$main::opt_seconds,
+ "functions!" => \$main::opt_functions,
+ "lines!" => \$main::opt_lines,
+ "addresses!" => \$main::opt_addresses,
+ "files!" => \$main::opt_files,
+ "text!" => \$main::opt_text,
+ "list=s" => \$main::opt_list,
+ "disasm=s" => \$main::opt_disasm,
+ "gv!" => \$main::opt_gv,
+ "dot!" => \$main::opt_dot,
+ "ps!" => \$main::opt_ps,
+ "pdf!" => \$main::opt_pdf,
+ "gif!" => \$main::opt_gif,
+ "interactive!" => \$main::opt_interactive,
+ "nodecount=i" => \$main::opt_nodecount,
+ "nodefraction=f" => \$main::opt_nodefraction,
+ "edgefraction=f" => \$main::opt_edgefraction,
+ "focus=s" => \$main::opt_focus,
+ "ignore=s" => \$main::opt_ignore,
+ "scale=i" => \$main::opt_scale,
+ "inuse_space!" => \$main::opt_inuse_space,
+ "inuse_objects!" => \$main::opt_inuse_objects,
+ "alloc_space!" => \$main::opt_alloc_space,
+ "alloc_objects!" => \$main::opt_alloc_objects,
+ "show_bytes!" => \$main::opt_show_bytes,
+ "drop_negative!" => \$main::opt_drop_negative,
+ "total_delay!" => \$main::opt_total_delay,
+ "contentions!" => \$main::opt_contentions,
+ "mean_delay!" => \$main::opt_mean_delay,
+ "tools=s" => \$main::opt_tools,
+ "test!" => \$main::opt_test,
+ "debug!" => \$main::opt_debug,
+ ) || usage("Invalid option(s)");
+
+ # Deal with the standard --help and --version
+ if ($main::opt_help) {
+ print usage_string();
+ exit(0);
+ }
-# Binary name and profile arguments list
-$main::prog = "";
-@main::pfile_args = ();
+ if ($main::opt_version) {
+ print version_string();
+ exit(0);
+ }
-$main::prog = shift || usage("Did not specify program");
-scalar(@ARGV) || usage("Did not specify profile file");
+ # Disassembly/listing mode requires address-level info
+ if ($main::opt_disasm || $main::opt_list) {
+ $main::opt_functions = 0;
+ $main::opt_lines = 0;
+ $main::opt_addresses = 1;
+ $main::opt_files = 0;
+ }
+
+ # Check heap-profiling flags
+ if ($main::opt_inuse_space +
+ $main::opt_inuse_objects +
+ $main::opt_alloc_space +
+ $main::opt_alloc_objects > 1) {
+ usage("Specify at most on of --inuse/--alloc options");
+ }
+
+ # Check output granularities
+ my $grains =
+ $main::opt_functions +
+ $main::opt_lines +
+ $main::opt_addresses +
+ $main::opt_files +
+ 0;
+ if ($grains > 1) {
+ usage("Only specify one output granularity option");
+ }
+ if ($grains == 0) {
+ $main::opt_functions = 1;
+ }
+
+ # Check output modes
+ my $modes =
+ $main::opt_text +
+ $main::opt_gv +
+ $main::opt_dot +
+ $main::opt_ps +
+ $main::opt_pdf +
+ $main::opt_gif +
+ $main::opt_interactive +
+ 0;
+ if ($modes > 1) {
+ usage("Only specify one output mode");
+ }
+ if ($modes == 0) {
+ if (-t STDOUT) { # If STDOUT is a tty, activate interactive mode
+ $main::opt_interactive = 1;
+ } else {
+ $main::opt_text = 1;
+ }
+ }
-# Parse profile file/location arguments
-foreach my $farg (@ARGV) {
- unshift(@main::pfile_args, $farg);
-}
-ConfigureObjTools($main::prog);
+ if ($main::opt_test) {
+ RunUnitTests();
+ # Should not return
+ exit(1);
+ }
-##### Main section #####
+ # Binary name and profile arguments list
+ $main::prog = "";
+ @main::pfile_args = ();
-# Setup tmp-file name and handler to clean it up
-$main::tmpfile_sym = "/tmp/pprof$$.sym";
-$main::tmpfile_ps = "/tmp/pprof$$";
-$main::next_tmpfile = 0;
-$main::collected_profile = undef;
-@main::profile_files = ();
-#$main::op_time = time();
-$SIG{'INT'} = \&sighandler;
+ # Remote profiling without a binary (using $SYMBOL_PAGE instead)
+ if (IsProfileURL($ARGV[0])) {
+ $main::use_symbol_page = 1;
+ }
-# Fetch all profile data
-FetchDynamicProfiles();
+ if ($main::use_symbol_page) { # We don't need a binary!
+ my %disabled = ('--lines' => $main::opt_lines,
+ '--disasm' => $main::opt_disasm);
+ for my $option (keys %disabled) {
+ usage("$option cannot be used without a binary") if $disabled{$option};
+ }
+ # Set $main::prog later...
+ scalar(@ARGV) || usage("Did not specify profile file");
+ } else {
+ $main::prog = shift(@ARGV) || usage("Did not specify program");
+ scalar(@ARGV) || usage("Did not specify profile file");
+ }
-# Read one profile, pick the last item on the list
-my $data = ReadProfile($main::prog, pop(@main::profile_files));
-my $profile = $data->{profile};
-my $libs = $data->{libs}; # Info about main program and shared libraries
+ # Parse profile file/location arguments
+ foreach my $farg (@ARGV) {
+ if ($farg =~ m/(.*)\@([0-9]+)/ ) {
+ my $machine = $1;
+ my $num_machines = $2;
+ for (my $i = 0; $i < $num_machines; $i++) {
+ unshift(@main::pfile_args, "$i.$machine");
+ }
+ } else {
+ unshift(@main::pfile_args, $farg);
+ }
+ }
-# List of function names to skip
-$main::skip = ();
-$main::skip_regexp = 'NOMATCH';
-if ($main::heap_profile) {
- foreach my $name ('calloc',
- 'cfree',
- 'malloc',
- 'free',
- 'memalign',
- 'pvalloc',
- 'valloc',
- 'realloc',
- 'do_malloc',
- 'DoSampledAllocation',
- '__builtin_delete',
- '__builtin_new',
- '__builtin_vec_delete',
- '__builtin_vec_new') {
- $main::skip{$name} = 1;
+ if ($main::use_symbol_page) {
+ unless (IsProfileURL($main::pfile_args[0])) {
+ error("The first profile should be a remote form to use $SYMBOL_PAGE\n");
+ }
+ CheckSymbolPage();
+ $main::prog = FetchProgramName();
+ } else {
+ ConfigureObjTools($main::prog)
}
- $main::skip_regexp = "TCMalloc";
}
-if ($main::lock_profile) {
- foreach my $vname ('Mutex::Unlock', 'Mutex::UnlockSlow') {
- $main::skip{$vname} = 1;
+
+sub Main() {
+ Init();
+ $main::collected_profile = undef;
+ @main::profile_files = ();
+ $main::op_time = time();
+
+ # Fetch all profile data
+ FetchDynamicProfiles();
+
+ # Read one profile, pick the last item on the list
+ my $data = ReadProfile($main::prog, pop(@main::profile_files));
+ my $profile = $data->{profile};
+ my $libs = $data->{libs}; # Info about main program and shared libraries
+
+ # List of function names to skip
+ $main::skip = ();
+ $main::skip_regexp = 'NOMATCH';
+ if ($main::heap_profile) {
+ foreach my $name ('calloc',
+ 'cfree',
+ 'malloc',
+ 'free',
+ 'memalign',
+ 'pvalloc',
+ 'valloc',
+ 'realloc',
+ 'do_malloc',
+ 'DoSampledAllocation',
+ 'simple_alloc::allocate',
+ '__malloc_alloc_template::allocate',
+ '__builtin_delete',
+ '__builtin_new',
+ '__builtin_vec_delete',
+ '__builtin_vec_new') {
+ $main::skip{$name} = 1;
+ }
+ $main::skip_regexp = "TCMalloc";
+ }
+ if ($main::lock_profile) {
+ foreach my $vname ('Mutex::Unlock', 'Mutex::UnlockSlow') {
+ $main::skip{$vname} = 1;
+ }
}
-}
-# Add additional profiles, if available.
-if (scalar(@main::profile_files) > 0) {
- foreach my $pname (@main::profile_files) {
- my $p = ReadProfile($main::prog, $pname)->{profile};
- $profile = AddProfile($profile, $p);
+ # Add additional profiles, if available.
+ if (scalar(@main::profile_files) > 0) {
+ foreach my $pname (@main::profile_files) {
+ my $p = ReadProfile($main::prog, $pname)->{profile};
+ $profile = AddProfile($profile, $p);
+ }
}
-}
-# Subtract base from profile, if specified
-if ($main::opt_base ne '') {
- my $base = ReadProfile($main::prog, $main::opt_base)->{profile};
- $profile = SubtractProfile($profile, $base);
-}
+ # Subtract base from profile, if specified
+ if ($main::opt_base ne '') {
+ my $base = ReadProfile($main::prog, $main::opt_base)->{profile};
+ $profile = SubtractProfile($profile, $base);
+ }
-# Get total data in profile
-my $total = TotalProfile($profile);
+ # Get total data in profile
+ my $total = TotalProfile($profile);
-# Extract symbols
-my $symbols = ExtractSymbols($libs, $profile, $data->{pcs});
+ # Collect symbols
+ my $symbols = undef;
+ if ($main::use_symbol_page) {
+ $symbols = FetchSymbols($data->{pcs});
+ } else {
+ $symbols = ExtractSymbols($libs, $profile, $data->{pcs});
+ }
-# Focus?
-if ($main::opt_focus ne '') {
- $profile = FocusProfile($symbols, $profile, $main::opt_focus);
-}
+ # Focus?
+ if ($main::opt_focus ne '') {
+ $profile = FocusProfile($symbols, $profile, $main::opt_focus);
+ }
-# Ignore?
-if ($main::opt_ignore ne '') {
- $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore);
-}
+ # Ignore?
+ if ($main::opt_ignore ne '') {
+ $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore);
+ }
-# Reduce profiles to required output granularity, and also clean
-# each stack trace so a given entry exists at most once.
-my $reduced = ReduceProfile($symbols, $profile);
+ # Reduce profiles to required output granularity, and also clean
+ # each stack trace so a given entry exists at most once.
+ my $reduced = ReduceProfile($symbols, $profile);
-# Get derived profiles
-my $flat = FlatProfile($reduced);
-my $cumulative = CumulativeProfile($reduced);
+ # Get derived profiles
+ my $flat = FlatProfile($reduced);
+ my $cumulative = CumulativeProfile($reduced);
-# Print
-if (!$main::opt_interactive) {
- if ($main::opt_disasm) {
- PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm);
- } elsif ($main::opt_list) {
- PrintListing($libs, $flat, $cumulative, $main::opt_list);
- } elsif ($main::opt_text) {
- PrintText($symbols, $flat, $cumulative, $total, -1);
- } else {
- if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
- if ($main::opt_gv) {
- if (!system("$GV --version >/dev/null 2>&1")) {
- # Options using double dash are supported by this gv version.
- system("$GV --scale=$main::opt_scale " .
- PsTempName($main::next_tmpfile));
- } else {
- # Old gv version - only supports options that use single dash.
- system("$GV -scale $main::opt_scale " .
- PsTempName($main::next_tmpfile));
+ # Print
+ if (!$main::opt_interactive) {
+ if ($main::opt_disasm) {
+ PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm);
+ } elsif ($main::opt_list) {
+ PrintListing($libs, $flat, $cumulative, $main::opt_list);
+ } elsif ($main::opt_text) {
+ PrintText($symbols, $flat, $cumulative, $total, -1);
+ } else {
+ if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
+ if ($main::opt_gv) {
+ if (!system("$GV --version >/dev/null 2>&1")) {
+ # Options using double dash are supported by this gv version.
+ system("$GV --scale=$main::opt_scale " .
+ PsTempName($main::next_tmpfile));
+ } else {
+ # Old gv version - only supports options that use single dash.
+ system("$GV -scale $main::opt_scale " .
+ PsTempName($main::next_tmpfile));
+ }
}
+ } else {
+ exit(1);
}
- } else {
- exit(1);
}
+ } else {
+ InteractiveMode($profile, $symbols, $libs, $total);
}
-} else {
- InteractiveMode();
+
+ cleanup();
+ exit(0);
}
-cleanup();
-exit(0);
+##### Entry Point #####
+Main();
+
+# Temporary code to detect if we're running on a Goobuntu system.
+# These systems don't have the right stuff installed for the special
+# Readline libraries to work, so as a temporary workaround, we default
+# to using the normal stdio code, rather than the fancier readline-based
+# code
+sub ReadlineMightFail {
+ if (-e '/lib/libtermcap.so.2') {
+ return 0; # libtermcap exists, so readline should be okay
+ } else {
+ return 1;
+ }
+}
##### Interactive helper routines #####
sub InteractiveMode {
- $| = 1; # Make output unbuffered for interactive mode
- my $orig_profile = $profile;
+ $| = 1; # Make output unbuffered for interactive mode
+ my ($orig_profile, $symbols, $libs, $total) = @_;
# Use ReadLine if it's installed.
- if ( defined(eval {require Term::ReadLine}) ) {
+ if ( !ReadlineMightFail() &&
+ defined(eval {require Term::ReadLine}) ) {
my $term = new Term::ReadLine 'pprof';
while ( defined ($_ = $term->readline('(pprof) '))) {
$term->addhistory($_) if /\S/;
- if (!InteractiveCommand($orig_profile, $_)) {
- last; # exit when we get an interactive command to quit
+ if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) {
+ last; # exit when we get an interactive command to quit
}
}
} else { # don't have readline
while (1) {
print "(pprof) ";
$_ = <STDIN>;
- if (!InteractiveCommand($orig_profile, $_)) {
- last; # exit when we get an interactive command to quit
+
+ # Save some flags that might be reset by InteractiveCommand()
+ my $save_opt_lines = $main::opt_lines;
+
+ if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) {
+ last; # exit when we get an interactive command to quit
}
+
+ # Restore flags
+ $main::opt_lines = $save_opt_lines;
}
}
}
@@ -477,7 +605,7 @@ sub InteractiveMode {
# Takes two args: orig profile, and command to run.
# Returns 1 if we should keep going, or 0 if we were asked to quit
sub InteractiveCommand {
- my($orig_profile, $command) = @_;
+ my($orig_profile, $symbols, $libs, $total, $command) = @_;
$_ = $command; # just to make future m//'s easier
if (!defined($_)) {
print "\n";
@@ -490,8 +618,7 @@ sub InteractiveCommand {
InteractiveHelpMessage();
return 1;
}
- # Clear all the options
- $main::opt_lines = 0;
+ # Clear all the mode options -- mode is controlled by "$command"
$main::opt_text = 0;
$main::opt_disasm = 0;
$main::opt_list = 0;
@@ -507,7 +634,7 @@ sub InteractiveCommand {
my $ignore;
($routine, $ignore) = ParseInteractiveArgs($3);
- my $profile = ProcessProfile($orig_profile, "", $ignore);
+ my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore);
my $reduced = ReduceProfile($symbols, $profile);
# Get derived profiles
@@ -524,7 +651,7 @@ sub InteractiveCommand {
my $ignore;
($routine, $ignore) = ParseInteractiveArgs($1);
- my $profile = ProcessProfile($orig_profile, "", $ignore);
+ my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore);
my $reduced = ReduceProfile($symbols, $profile);
# Get derived profiles
@@ -542,7 +669,7 @@ sub InteractiveCommand {
($routine, $ignore) = ParseInteractiveArgs($1);
# Process current profile to account for various settings
- my $profile = ProcessProfile($orig_profile, "", $ignore);
+ my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore);
my $reduced = ReduceProfile($symbols, $profile);
# Get derived profiles
@@ -560,7 +687,7 @@ sub InteractiveCommand {
($focus, $ignore) = ParseInteractiveArgs($1);
# Process current profile to account for various settings
- my $profile = ProcessProfile($orig_profile, $focus, $ignore);
+ my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore);
my $reduced = ReduceProfile($symbols, $profile);
# Get derived profiles
@@ -587,6 +714,7 @@ sub InteractiveCommand {
sub ProcessProfile {
my $orig_profile = shift;
+ my $symbols = shift;
my $focus = shift;
my $ignore = shift;
@@ -598,18 +726,18 @@ sub ProcessProfile {
$profile = FocusProfile($symbols, $profile, $focus);
my $focus_count = TotalProfile($profile);
printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n",
- $focus,
- Unparse($focus_count), Units(),
- Unparse($total_count), ($focus_count*100.0) / $total_count);
+ $focus,
+ Unparse($focus_count), Units(),
+ Unparse($total_count), ($focus_count*100.0) / $total_count);
}
if ($ignore ne '') {
$profile = IgnoreProfile($symbols, $profile, $ignore);
my $ignore_count = TotalProfile($profile);
printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n",
- $ignore,
- Unparse($ignore_count), Units(),
- Unparse($total_count),
- ($ignore_count*100.0) / $total_count);
+ $ignore,
+ Unparse($ignore_count), Units(),
+ Unparse($total_count),
+ ($ignore_count*100.0) / $total_count);
}
return $profile;
@@ -637,7 +765,7 @@ Commands:
Show top lines ordered by flat profile count, or cumulative count
if --cum is specified. If a number is present after 'top', the
top K routines will be shown (defaults to showing the top 10)
-
+
disasm [routine_regexp] [-ignore1] [-ignore2]
Show disassembly of routines whose names match "routine_regexp",
annotated with sample counts.
@@ -649,6 +777,10 @@ For commands that accept optional -ignore tags, samples where any routine in
the stack trace matches the regular expression in any of the -ignore
parameters will be ignored.
+Further pprof details are available at this location (or one similar):
+
+ /usr/doc/google-perftools-$PPROF_VERSION/cpu_profiler.html
+
ENDOFHELP
}
sub ParseInteractiveArgs {
@@ -1023,8 +1155,8 @@ sub PrintDot {
if ($nodelimit > 0 || $edgelimit > 0) {
printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n",
- Unparse($nodelimit), Units(),
- Unparse($edgelimit), Units());
+ Unparse($nodelimit), Units(),
+ Unparse($edgelimit), Units());
}
# Open DOT output file
@@ -1160,8 +1292,6 @@ sub OutputKey {
# Skip large addresses since they sometimes show up as fake entries on RH9
if (length($a) > 8) {
if ($a gt "7fffffffffffffff") { return ''; }
- } else {
- if (hex($a) > 0x7fffffff) { return ''; }
}
# Extract symbolic info for address
@@ -1220,7 +1350,7 @@ sub Unparse {
return sprintf("%.1f", $num / 1048576.0);
}
}
- } elsif ($main::lock_profile) {
+ } elsif ($main::lock_profile && !$main::opt_contentions) {
return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds
} else {
return sprintf("%d", $num);
@@ -1249,7 +1379,7 @@ sub Units {
return "MB";
}
}
- } elsif ($main::lock_profile) {
+ } elsif ($main::lock_profile && !$main::opt_contentions) {
return "seconds";
} else {
return "samples";
@@ -1267,7 +1397,9 @@ sub FlatProfile {
foreach my $k (keys(%{$profile})) {
my $count = $profile->{$k};
my @addrs = split(/\n/, $k);
- AddEntry($result, $addrs[0], $count);
+ if ($#addrs >= 0) {
+ AddEntry($result, $addrs[0], $count);
+ }
}
return $result;
}
@@ -1458,14 +1590,191 @@ sub AddEntries {
##### Code to profile a server dynamically #####
+sub CheckSymbolPage {
+ my $url = SymbolPageURL();
+ open(SYMBOL, "$WGET -qO- '$url' |");
+ my $line = <SYMBOL>;
+ close(SYMBOL);
+ unless (defined($line)) {
+ error("$url doesn't exist\n");
+ }
+
+ if ($line =~ /^num_symbols:\s+(\d+)$/) {
+ if ($1 == 0) {
+ error("Stripped binary. No symbols available.\n");
+ }
+ } else {
+ error("Failed to get the number of symbols from $url\n");
+ }
+}
+
+sub IsProfileURL {
+ my $profile_name = shift;
+ my ($host, $port, $type) = ParseProfileURL($profile_name);
+ return defined($host) and defined($port) and defined($type);
+}
+
+sub ParseProfileURL {
+ my $profile_name = shift;
+ if ($profile_name =~ m,^(http://|)([^/:]+):(\d+)(|/|$PROFILE_PAGE|$HEAP_PAGE|$GROWTH_PAGE|$CONTENTION_PAGE)$,o) {
+ return ($2, $3, $4);
+ }
+ return ();
+}
+
+# We fetch symbols from the first profile argument.
+sub SymbolPageURL {
+ my ($host, $port, $type) = ParseProfileURL($main::pfile_args[0]);
+ return "http://$host:$port$SYMBOL_PAGE";
+}
+
+sub FetchProgramName() {
+ my ($host, $port, $type) = ParseProfileURL($main::pfile_args[0]);
+ my $url = "http://$host:$port$PROGRAM_NAME_PAGE";
+ my $command_line = "$WGET -qO- '$url'";
+ open(CMDLINE, "$command_line |") or error($command_line);
+ my $cmdline = <CMDLINE>;
+ close(CMDLINE);
+ error("Failed to get program name from $url\n") unless defined($cmdline);
+ $cmdline =~ s/\x00.+//; # Remove argv[1] and latters.
+ $cmdline =~ s!\n!!g; # Remove LFs.
+ return $cmdline;
+}
+
+# Gee, curl's -L (--location) option isn't reliable at least
+# with its 7.12.3 version. Curl will forget to post data if
+# there is a redirection. This function is a workaround for
+# curl. Redirection happens on borg hosts.
+sub ResolveRedirectionForCurl {
+ my $url = shift;
+ my $command_line = "$CURL -s --head '$url'";
+ open(CMDLINE, "$command_line |") or error($command_line);
+ while (<CMDLINE>) {
+ if (/^Location: (.*)/) {
+ $url = $1;
+ }
+ }
+ close(CMDLINE);
+ return $url;
+}
+
+# Fetch symbols from $SYMBOL_PAGE for all PC values found in profile
+sub FetchSymbols {
+ my $pcset = shift;
+
+ my %seen = ();
+ my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq
+ my $post_data = join("+", sort((map {"0x" . "$_"} @pcs)));
+ open(POSTFILE, ">$main::tmpfile_sym");
+ print POSTFILE $post_data;
+ close(POSTFILE);
+
+ my $url = SymbolPageURL();
+ # Here we use curl for sending data via POST since old
+ # wgets don't't have --post-file option.
+ $url = ResolveRedirectionForCurl($url);
+ my $command_line = "$CURL -sd '\@$main::tmpfile_sym' '$url'";
+ # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols.
+ my $cppfilt = $obj_tool_map{"c++filt"};
+ open(SYMBOL, "$command_line | $cppfilt |") or error($command_line);
+
+ my %map;
+ while (<SYMBOL>) {
+ if (m/^0x([0-9a-f]+)\s+(.+)/) {
+ $map{$1} = $2;
+ }
+ }
+ close(SYMBOL);
+
+ my $symbols = {};
+ for my $pc (@pcs) {
+ my $fullname;
+ if (defined($map{$pc})) {
+ $fullname = $map{$pc};
+ } else {
+ $fullname = "0x" . $pc; # Just use addresses
+ }
+ my $name = ShortFunctionName($fullname);
+ $symbols->{$pc} = [$name, "?", $fullname];
+ }
+ return $symbols;
+}
+
+sub BaseName {
+ my $file_name = shift;
+ $file_name =~ s!^.*/!!; # Remove directory name
+ return $file_name;
+}
+
+sub MakeProfileBaseName {
+ my ($binary_name, $profile_name) = @_;
+ my ($host, $port, $type) = ParseProfileURL($profile_name);
+ my $binary_shortname = BaseName($binary_name);
+ return sprintf("%s.%s.%s-port%s",
+ $binary_shortname, $main::op_time, $host, $port);
+}
+
sub FetchDynamicProfile {
my $binary_name = shift;
my $profile_name = shift;
my $fetch_name_only = shift;
my $encourage_patience = shift;
- # TODO: Add support for fetching profiles dynamically from a server
- return $profile_name;
+ my $user_dir = $ENV{HOME};
+ my $profile_dir = $user_dir . "/pprof";
+ if (!(-d $profile_dir)) {
+ mkdir($profile_dir) || die("Unable to create profile directory $profile_dir\n");
+ }
+ if (!IsProfileURL($profile_name)) {
+ return $profile_name;
+ } else {
+ my ($host, $port, $type) = ParseProfileURL($profile_name);
+ if ($type eq "" || $type eq "/") {
+ # Missing type specifier defaults to cpu-profile
+ $type = $PROFILE_PAGE;
+ }
+
+ my $profile_file = MakeProfileBaseName($binary_name, $profile_name);
+
+ my $url;
+ my $wget_timeout;
+ if ($type eq $PROFILE_PAGE) {
+ $url = sprintf("http://$host:$port$PROFILE_PAGE?seconds=%d",
+ $main::opt_seconds);
+ $wget_timeout = sprintf("--timeout=%d",
+ int($main::opt_seconds * 1.01 + 60));
+ } else {
+ # For non-CPU profiles, we add a type-extension to
+ # the target profile file name.
+ my $suffix = $type;
+ $suffix =~ s,/,.,g;
+ $profile_file .= "$suffix";
+ $url = "http://$host:$port$type";
+ $wget_timeout = "";
+ }
+ my $tmp_profile = "$profile_dir/.tmp.$profile_file";
+ my $real_profile = "$profile_dir/$profile_file";
+
+ if ($fetch_name_only > 0) {
+ return $real_profile;
+ }
+
+ my $cmd = "$WGET $wget_timeout -q -O $tmp_profile '$url'";
+ if ($type eq $PROFILE_PAGE) {
+ print STDERR "Gathering CPU profile from $host:$port for $main::opt_seconds seconds to\n ${real_profile}\n";
+ if ($encourage_patience) {
+ print STDERR "Be patient...\n";
+ }
+ } else {
+ print STDERR "Fetching $type profile from $host:$port to\n ${real_profile}\n";
+ }
+
+ (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n");
+ (system("mv $tmp_profile $real_profile") == 0) || error("Unable to rename profile\n");
+ print STDERR "Wrote profile to $real_profile\n";
+ $main::collected_profile = $real_profile;
+ return $main::collected_profile;
+ }
}
# Collect profiles in parallel
@@ -1543,10 +1852,11 @@ sub ReadProfile {
open(PROFILE, "<$fname") || error("$fname: $!\n");
binmode PROFILE; # New perls do UTF-8 processing
my $header = <PROFILE>;
+ my $contention_marker = substr($CONTENTION_PAGE, 1); # remove leading /
if ($header =~ m/^heap profile:/) {
$main::heap_profile = 1;
return ReadHeapProfile($prog, $fname, $header);
- } elsif ($header =~ m/^--- *contentionz/ ) {
+ } elsif ($header =~ m/^--- *$contention_marker/o ) {
$main::lock_profile = 1;
return ReadSynchProfile($prog, $fname);
} elsif ($header =~ m/^--- *Stacks:/ ) {
@@ -1581,17 +1891,11 @@ sub ReadCPUProfile {
my $pcs = {};
# Parse string into array of slots.
- # L! is needed for 64-bit # platforms, but not supported on 5.005
- # (despite the manpage claims)
+ # L! cannot be used because with a native 64-bit build, it will cause
+ # 1) a valid 64-bit profile to use the 32-bit codepath, and
+ # 2) a valid 32-bit profile to be unrecognized.
- my $format;
- if ($] >= 5.008) {
- $format = "L!*";
- } else {
- $format = "L*";
- }
-
- my @slots = unpack($format, $str);
+ my @slots = unpack("L*", $str);
# Read header. The current header version is a 5-element structure
# containing:
@@ -1713,15 +2017,55 @@ sub ReadHeapProfile {
$index = 2;
}
- # Find the type of this profile
+ # Find the type of this profile. The header line looks like:
+ # heap profile: 1246: 8800744 [ 1246: 8800744] @ <heap-url>/266053
+ # There are two pairs <count: size>, the first inuse objects/space, and the
+ # second allocated objects/space. This is followed optionally by a profile
+ # type, and if that is present, optionally by a sampling frequency. The
+ # interpretation of the sampling frequency is that the profiler, for each
+ # sample, calculates a uniformly distributed random integer less than the
+ # given value, and records the next sample after that many bytes have been
+ # allocated. Therefore, the expected sample interval is half of the given
+ # frequency. By default, if not specified, the expected sample interval is
+ # 128KB. Only remote-heap-page profiles are adjusted for sample size.
+ my $should_adjust_sample = 0;
+ my $sample_adjustment = 0;
chomp($header);
my $type = "unknown";
- if ($header =~ m/^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*(.*))?/) {
+ if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") {
if (defined($6) && ($6 ne '')) {
$type = $6;
+ # The regex test here is to see if type is a substring of HEAP_PAGE
+ if (($HEAP_PAGE =~ /$type/)) {
+ $should_adjust_sample = 1;
+ if (defined($8) && ($8 ne '')) {
+ $sample_adjustment = int($8)/2;
+ printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n",
+ $sample_adjustment);
+ }
+ }
+ } else {
+ # We detect whether or not this is a remote-heap profile by checking
+ # that the total-allocated stats ($n2,$s2) are exactly the
+ # same as the in-use stats ($n1,$s1). It is remotely conceivable
+ # that a non-remote-heap profile may pass this check, but it is hard
+ # to imagine how that could happen.
+ my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
+ if (($n1 == $n2) && ($s1 == $s2)) {
+ # This is likely to be a remote-heap based sample profile
+ $should_adjust_sample = 1;
+ }
}
}
+ # For remote-heap generated profiles, adjust the counts and sizes to
+ # account for the sample rate (we sample once every 128KB by default).
+ if ($should_adjust_sample && ($sample_adjustment == 0)) {
+ # Turn on profile adjustment.
+ $sample_adjustment = 128*1024;
+ print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n";
+ }
+
my $profile = {};
my $pcs = {};
my $map = "";
@@ -1739,13 +2083,13 @@ sub ReadHeapProfile {
# Read /proc/self/maps data as formatted by DumpAddressMap()
my $buildvar = "";
while (<PROFILE>) {
- # Parse "build=<dir>" specification if supplied
- if (m/^\s*build=(.*)\n/) {
- $buildvar = $1;
- }
+ # Parse "build=<dir>" specification if supplied
+ if (m/^\s*build=(.*)\n/) {
+ $buildvar = $1;
+ }
- # Expand "$build" variable if available
- $_ =~ s/\$build\b/$buildvar/g;
+ # Expand "$build" variable if available
+ $_ =~ s/\$build\b/$buildvar/g;
$map .= $_;
}
@@ -1760,6 +2104,20 @@ sub ReadHeapProfile {
my $stack = $5;
my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
+ if ($sample_adjustment) {
+ my $ratio;
+ $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
+ if ($ratio < 1) {
+ $n1 /= $ratio;
+ $s1 /= $ratio;
+ }
+ $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
+ if ($ratio < 1) {
+ $n2 /= $ratio;
+ $s2 /= $ratio;
+ }
+ }
+
my @counts = ($n1, $s1, $n2, $s2);
AddEntries($profile, $pcs, $stack, $counts[$index]);
}
@@ -1785,17 +2143,35 @@ sub ReadSynchProfile {
my $seen_clockrate = 0;
my $line;
+ my $index = 0;
+ if ($main::opt_total_delay) {
+ $index = 0;
+ } elsif ($main::opt_contentions) {
+ $index = 1;
+ } elsif ($main::opt_mean_delay) {
+ $index = 2;
+ }
+
while ( $line = <PROFILE> ) {
- if ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ ||
- $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) {
- my ($count, $stack) = ($1, $2);
- if ($count !~ /^\d+$/) {
+ if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) {
+ my ($cycles, $count, $stack) = ($1, $2, $3);
+
+ # Convert cycles to nanoseconds
+ $cycles /= $cyclespernanosec;
+
+ my @values = ($cycles, $count, $cycles / $count);
+ AddEntries($profile, $pcs, $stack, $values[$index]);
+
+ } elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ ||
+ $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) {
+ my ($cycles, $stack) = ($1, $2);
+ if ($cycles !~ /^\d+$/) {
next;
}
# Convert cycles to nanoseconds
- $count /= $cyclespernanosec;
- AddEntries($profile, $pcs, $stack, $count);
+ $cycles /= $cyclespernanosec;
+ AddEntries($profile, $pcs, $stack, $cycles);
} elsif ( $line =~ m|cycles/second = (\d+)|) {
$cyclespernanosec = $1 / 1e9;
@@ -1838,6 +2214,7 @@ sub HexExtend {
# Split /proc/pid/maps dump into a list of libraries
sub ParseLibraries {
+ return if $main::use_symbol_page; # We don't need libraries info.
my $prog = shift;
my $map = shift;
my $pcs = shift;
diff --git a/src/profiler.cc b/src/profiler.cc
index 5843720..8ddcc41 100644
--- a/src/profiler.cc
+++ b/src/profiler.cc
@@ -460,12 +460,12 @@ void ProfileData::SetHandler(void (*handler)(int)) {
}
void ProfileData::FlushTable() {
- if (out_ < 0) {
- // Profiling is not enabled
- return;
- }
-
LOCK(&state_lock_); {
+ if (out_ < 0) {
+ // Profiling is not enabled
+ UNLOCK(&state_lock_);
+ return;
+ }
SetHandler(SIG_IGN); // Disable timer interrupts while we're flushing
LOCK(&table_lock_); {
// Move data from hash table to eviction buffer
diff --git a/src/stacktrace.cc b/src/stacktrace.cc
index 859d52a..da20659 100644
--- a/src/stacktrace.cc
+++ b/src/stacktrace.cc
@@ -45,17 +45,14 @@
#include "stacktrace_x86-inl.h"
#endif
-#if !defined(IMPLEMENTED_STACK_TRACE) && defined(USE_LIBUNWIND) && HAVE_LIBUNWIND_H
+#if !defined(IMPLEMENTED_STACK_TRACE) && defined(__x86_64__) && HAVE_LIBUNWIND_H
#define IMPLEMENTED_STACK_TRACE
-// This is turned off by default. Possible reasons for turning on in the
-// future:
-// 1. Compiler independence
-// 2. Architecture independence
-// 3. A more liberal MIT license, which allows use with multiple compilers
+#define UNW_LOCAL_ONLY
#include "stacktrace_libunwind-inl.h"
#endif
#if !defined(IMPLEMENTED_STACK_TRACE) && defined(__x86_64__) && HAVE_UNWIND_H
+// This implementation suffers from deadlocks. Don't enable it.
#define IMPLEMENTED_STACK_TRACE
#include "stacktrace_x86_64-inl.h"
#endif
diff --git a/src/stacktrace_libunwind-inl.h b/src/stacktrace_libunwind-inl.h
index 42c28d3..bf39633 100644
--- a/src/stacktrace_libunwind-inl.h
+++ b/src/stacktrace_libunwind-inl.h
@@ -51,14 +51,14 @@ int GetStackTrace(void** result, int max_depth, int skip_count) {
do {
ret = unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip);
- assert(ret == 0);
+ if (ret < 0)
+ break;
if (skip_count > 0) {
skip_count--;
} else {
result[n++] = ip;
}
ret = unw_step(&cursor);
- assert(ret >= 0);
} while ((n < max_depth) && (ret > 0));
return n;
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index 5dc062e..bf45dfb 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -79,6 +79,7 @@
#include <unistd.h>
#include <errno.h>
#include <stdarg.h>
+#include "base/commandlineflags.h"
#include "google/malloc_hook.h"
#include "google/malloc_extension.h"
#include "google/stacktrace.h"
@@ -147,12 +148,27 @@ static const size_t kDefaultOverallThreadCacheSize = 16 << 20;
// REQUIRED: kMaxPages >= kMinSystemAlloc;
static const size_t kMaxPages = kMinSystemAlloc;
+/* The smallest prime > 2^n */
+static unsigned int primes_list[] = {
+ // Small values might cause high rates of sampling
+ // and hence commented out.
+ // 2, 5, 11, 17, 37, 67, 131, 257,
+ // 521, 1031, 2053, 4099, 8209, 16411,
+ 32771, 65537, 131101, 262147, 524309, 1048583,
+ 2097169, 4194319, 8388617, 16777259, 33554467 };
+
// Twice the approximate gap between sampling actions.
// I.e., we take one sample approximately once every
-// kSampleParameter/2
+// tcmalloc_sample_parameter/2
// bytes of allocation, i.e., ~ once every 128KB.
// Must be a prime number.
-static const size_t kSampleParameter = 266053;
+DEFINE_int64(tcmalloc_sample_parameter, 262147,
+ "Twice the approximate gap between sampling actions."
+ " Must be a prime number. Otherwise will be rounded up to a "
+ " larger prime number");
+static size_t sample_period = 262147;
+// Protects sample_period above
+static SpinLock sample_period_lock = SPINLOCK_INITIALIZER;
//-------------------------------------------------------------------
// Mapping from size to size_class and vice versa
@@ -303,6 +319,17 @@ static int NumMoveSize(size_t size) {
// and thread caches.
if (num > static_cast<int>(0.8 * kMaxFreeListLength))
num = static_cast<int>(0.8 * kMaxFreeListLength);
+
+ // Also, avoid bringing in too many objects into small object free
+ // lists. There are lots of such lists, and if we allow each one to
+ // fetch too many at a time, we end up having to scavenge too often
+ // (especially when there are lots of threads and each thread gets a
+ // small allowance for its thread cache).
+ //
+ // TODO: Make thread cache free list sizes dynamic so that we do not
+ // have to equally divide a fixed resource amongst lots of threads.
+ if (num > 32) num = 32;
+
return num;
}
@@ -918,7 +945,7 @@ void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) {
uint64_t large_pages = 0;
int large_spans = 0;
for (Span* s = large_.next; s != &large_; s = s->next) {
- out->printf(" [ %6" PRIuS " spans ]\n", s->length);
+ out->printf(" [ %6" PRIuS " pages ]\n", s->length);
large_pages += s->length;
large_spans++;
}
@@ -1057,6 +1084,7 @@ class TCMalloc_ThreadCache_FreeList {
SLL_PopRange(&list_, N, start, end);
ASSERT(length_ >= N);
length_ -= N;
+ if (length_ < lowater_) lowater_ = length_;
}
};
@@ -1669,9 +1697,23 @@ void TCMalloc_ThreadCache::PickNextSample() {
uint32_t r = rnd_;
rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly);
- // Next point is "rnd_ % (2*sample_period)". I.e., average
- // increment is "sample_period".
- bytes_until_sample_ = rnd_ % kSampleParameter;
+ // Next point is "rnd_ % (sample_period)". I.e., average
+ // increment is "sample_period/2".
+ const int flag_value = FLAGS_tcmalloc_sample_parameter;
+ static int last_flag_value = -1;
+
+ if (flag_value != last_flag_value) {
+ SpinLockHolder h(&sample_period_lock);
+ int i;
+ for (i = 0; i < (sizeof(primes_list)/sizeof(primes_list[0]) - 1); i++) {
+ if (primes_list[i] >= flag_value) {
+ break;
+ }
+ }
+ sample_period = primes_list[i];
+ last_flag_value = flag_value;
+ }
+ bytes_until_sample_ = rnd_ % sample_period;
}
void TCMalloc_ThreadCache::InitModule() {
@@ -2118,7 +2160,7 @@ static inline void* do_malloc(size_t size) {
}
// The following call forces module initialization
TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
- if (heap->SampleAllocation(size)) {
+ if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
Span* span = DoSampledAllocation(size);
if (span != NULL) {
ret = reinterpret_cast<void*>(span->start << kPageShift);
diff --git a/src/tests/heap-checker_unittest.cc b/src/tests/heap-checker_unittest.cc
index e9ec6c3..3e85e7a 100644
--- a/src/tests/heap-checker_unittest.cc
+++ b/src/tests/heap-checker_unittest.cc
@@ -273,6 +273,7 @@ static void DoRunHidden(Closure* c, int n) {
if (n) {
run_hidden_ptr(c, n-1);
wipe_stack_ptr(n);
+ sleep(0); // undo -foptimize-sibling-calls
} else {
c->Run();
}
@@ -284,6 +285,7 @@ static void DoWipeStack(int n) {
volatile int arr[sz];
for (int i = 0; i < sz; ++i) arr[i] = 0;
wipe_stack_ptr(n-1);
+ sleep(0); // undo -foptimize-sibling-calls
}
}
@@ -463,14 +465,14 @@ static void TestHeapLeakCheckerPProf() {
// trick heap change: same total # of bytes and objects, but
// different individual object sizes
static void TestHeapLeakCheckerTrick() {
- void* bar1 = AllocHidden(60 * sizeof(int));
+ void* bar1 = AllocHidden(240 * sizeof(int));
Use(&bar1);
- void* bar2 = AllocHidden(40 * sizeof(int));
+ void* bar2 = AllocHidden(160 * sizeof(int));
Use(&bar2);
HeapLeakChecker check("trick");
- void* foo1 = AllocHidden(70 * sizeof(int));
+ void* foo1 = AllocHidden(280 * sizeof(int));
Use(&foo1);
- void* foo2 = AllocHidden(30 * sizeof(int));
+ void* foo2 = AllocHidden(120 * sizeof(int));
Use(&foo2);
DeAllocHidden(&bar1);
DeAllocHidden(&bar2);
@@ -482,16 +484,16 @@ static void TestHeapLeakCheckerTrick() {
// no false negatives from pprof
static void TestHeapLeakCheckerDeathTrick() {
- void* bar1 = AllocHidden(60 * sizeof(int));
+ void* bar1 = AllocHidden(240 * sizeof(int));
Use(&bar1);
- void* bar2 = AllocHidden(40 * sizeof(int));
+ void* bar2 = AllocHidden(160 * sizeof(int));
Use(&bar2);
HeapLeakChecker check("death_trick");
DeAllocHidden(&bar1);
DeAllocHidden(&bar2);
- void* foo1 = AllocHidden(70 * sizeof(int));
+ void* foo1 = AllocHidden(280 * sizeof(int));
Use(&foo1);
- void* foo2 = AllocHidden(30 * sizeof(int));
+ void* foo2 = AllocHidden(120 * sizeof(int));
Use(&foo2);
// TODO(maxim): use the above if we make pprof work in automated test runs
if (!FLAGS_maybe_stripped) {
@@ -733,13 +735,19 @@ static void* HeapBusyThreadBody(void* a) {
}
}
if (FLAGS_test_register_leak) {
- // Hide the register pointer value with an xor mask.
+ // Hide the register "ptr" value with an xor mask.
// If one provides --test_register_leak flag, the test should
// (with very high probability) crash on some leak check
// with a leak report (of some x * sizeof(int) + y * sizeof(int*) bytes)
// pointing at the two lines above in this function
// with "new (initialized) int" in them as the allocators
// of the leaked objects.
+ // CAVEAT: We can't really prevent a compiler to save some
+ // temporary values of "ptr" on the stack and thus let us find
+ // the heap objects not via the register.
+ // Hence it's normal if for certain compilers or optimization modes
+ // --test_register_leak does not cause a leak crash of the above form
+ // (this happens e.g. for gcc 4.0.1 in opt mode).
ptr = reinterpret_cast<int **>(
reinterpret_cast<uintptr_t>(ptr) ^ kHideMask);
// busy loop to get the thread interrupted at:
diff --git a/src/tests/tcmalloc_unittest.cc b/src/tests/tcmalloc_unittest.cc
index b030e32..9f2df59 100644
--- a/src/tests/tcmalloc_unittest.cc
+++ b/src/tests/tcmalloc_unittest.cc
@@ -399,11 +399,14 @@ static void TestHugeAllocations() {
for (size_t i = 0; i < 10000; i++) {
TryHugeAllocation(kMaxSize - i);
}
-
- // Check that asking for stuff near signed/unsigned boundary returns NULL
+ // Asking for memory sizes near signed/unsigned boundary (kMaxSignedSize)
+ // might work or not, depending on the amount of virtual memory.
for (size_t i = 0; i < 100; i++) {
- TryHugeAllocation(kMaxSignedSize - i);
- TryHugeAllocation(kMaxSignedSize + i);
+ void* p = NULL;
+ p = malloc(kMaxSignedSize + i);
+ if (p) free(p); // if: free(NULL) is not necessarily defined
+ p = malloc(kMaxSignedSize - i);
+ if (p) free(p);
}
}
@@ -560,18 +563,6 @@ int main(int argc, char** argv) {
free(p);
}
- // Check that large allocations fail with NULL instead of crashing
- fprintf(LOGSTREAM, "==== Testing out of memory\n");
- for (int s = 0; ; s += (10<<20)) {
- void* large_object = malloc(s);
- if (large_object == NULL) break;
- free(large_object);
- }
-
- // Check that huge allocations fail with NULL instead of crashing
- fprintf(LOGSTREAM, "==== Testing huge allocations\n");
- TestHugeAllocations();
-
// Check calloc() with various arguments
fprintf(LOGSTREAM, "==== Testing calloc\n");
TestCalloc(0, 0, true);
@@ -611,10 +602,16 @@ int main(int argc, char** argv) {
threads[i] = new TesterThread(i);
}
- // Start
+ // Start the threads.
+ // Set the stack size to a small value to avoid inheriting 120MB+
+ // limit when running under the google make system.
+ pthread_attr_t attr;
+ pthread_attr_init(&attr);
+ pthread_attr_setstacksize(&attr, 1 << 20);
for (int i = 0; i < FLAGS_numthreads; ++i) {
- CHECK_EQ(pthread_create(&thread_ids[i], NULL, RunThread, threads[i]), 0);
+ CHECK_EQ(pthread_create(&thread_ids[i], &attr, RunThread, threads[i]), 0);
}
+ pthread_attr_destroy(&attr);
// Wait
for (int i = 0; i < FLAGS_numthreads; ++i) {
@@ -624,6 +621,21 @@ int main(int argc, char** argv) {
for (int i = 0; i < FLAGS_numthreads; ++i) delete threads[i]; // Cleanup
+ // Do the memory intensive tests after threads are done, since exhausting
+ // the available address space can make pthread_create to fail.
+
+ // Check that huge allocations fail with NULL instead of crashing
+ fprintf(LOGSTREAM, "==== Testing huge allocations\n");
+ TestHugeAllocations();
+
+ // Check that large allocations fail with NULL instead of crashing
+ fprintf(LOGSTREAM, "==== Testing out of memory\n");
+ for (int s = 0; ; s += (10<<20)) {
+ void* large_object = malloc(s);
+ if (large_object == NULL) break;
+ free(large_object);
+ }
+
fprintf(LOGSTREAM, "PASS\n");
return 0;
}