summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2007-03-22 04:46:29 +0000
committercsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2007-03-22 04:46:29 +0000
commit60a3a2ce77ed2713b2eedd20952d9cfc56ff7ccf (patch)
tree234b959ff2d934386ecfc08a7477f5c94290b709
parent298274f8d4f474d2b16a35c8babc58817088c59e (diff)
downloadgperftools-60a3a2ce77ed2713b2eedd20952d9cfc56ff7ccf.tar.gz
Fri Jan 27 14:04:27 2006 Google Inc. <opensource@google.com>
* google-perftools: version 0.6 release * More sophisticated stacktrace usage, possibly using libunwind (aruns) * Update pprof to handle 64-bit profiles (dehnert) * Fix GetStackTrace to correctly return top stackframe (sanjay) * Add ANSI compliance for new and new[], including new_handler (jkearney) * More accuracy by reading ELF files directly rather than objdump (mec) * Add readline support for pprof (addi) * Add #includes for PPC (csilvers) * New PC-detection routine for ibook powerpc (asbestoshead) * Vastly improved tcmalloc unittest (csilvers) * Move documentation from /usr/doc to /usr/share/doc git-svn-id: http://gperftools.googlecode.com/svn/trunk@19 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
-rw-r--r--ChangeLog14
-rw-r--r--Makefile.am13
-rwxr-xr-xconfigure113
-rw-r--r--configure.ac5
-rw-r--r--packages/rpm/rpm.spec1
-rw-r--r--src/base/thread_lister.c1
-rw-r--r--src/config.h.in3
-rw-r--r--src/heap-checker.cc303
-rw-r--r--src/heap-profiler.cc12
-rw-r--r--src/malloc_extension.cc1
-rwxr-xr-xsrc/pprof1232
-rw-r--r--src/profiler.cc39
-rw-r--r--src/stacktrace.cc148
-rw-r--r--src/stacktrace_generic-inl.h61
-rw-r--r--src/stacktrace_libunwind-inl.h69
-rw-r--r--src/stacktrace_x86-inl.h119
-rw-r--r--src/stacktrace_x86_64-inl.h134
-rw-r--r--src/tcmalloc.cc546
-rw-r--r--src/tests/stacktrace_unittest.cc19
-rw-r--r--src/tests/tcmalloc_unittest.cc607
20 files changed, 2843 insertions, 597 deletions
diff --git a/ChangeLog b/ChangeLog
index d52a139..dd3827d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -61,3 +61,17 @@ Mon Nov 14 17:28:59 2005 Google Inc. <opensource@google.com>
* Add va_start/va_end calls around vsnprintf() (csilvers)
* Write our own __syscall_return(), since it's not defined
consistently on all 64-bit linux distros (markus)
+
+Fri Jan 27 14:04:27 2006 Google Inc. <opensource@google.com>
+
+ * google-perftools: version 0.6 release
+ * More sophisticated stacktrace usage, possibly using libunwind (aruns)
+ * Update pprof to handle 64-bit profiles (dehnert)
+ * Fix GetStackTrace to correctly return top stackframe (sanjay)
+ * Add ANSI compliance for new and new[], including new_handler (jkearney)
+ * More accuracy by reading ELF files directly rather than objdump (mec)
+ * Add readline support for pprof (addi)
+ * Add #includes for PPC (csilvers)
+ * New PC-detection routine for ibook powerpc (asbestoshead)
+ * Vastly improved tcmalloc unittest (csilvers)
+ * Move documentation from /usr/doc to /usr/share/doc
diff --git a/Makefile.am b/Makefile.am
index cab257c..8d39bbb 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -21,7 +21,7 @@ perftoolsincludedir = $(googleincludedir)/perftools
# We'll add to this later, on a library-by-library basis
perftoolsinclude_HEADERS =
-docdir = $(prefix)/doc/$(PACKAGE)-$(VERSION)
+docdir = $(prefix)/share/doc/$(PACKAGE)-$(VERSION)
# This is for HTML and other documentation you want to install.
# Add your documentation files (in doc/) in addition to these
# top-level boilerplate files. Also add a TODO file if you have one.
@@ -53,7 +53,11 @@ dist_doc_DATA += doc/index.html
### The header files we use. We divide into categories based on directory
S_STACKTRACE_INCLUDES =
SG_STACKTRACE_INCLUDES = src/google/stacktrace.h
-SGP_STACKTRACE_INCLUDES = src/config.h
+SGP_STACKTRACE_INCLUDES = src/config.h \
+ src/stacktrace_generic-inl.h \
+ src/stacktrace_libunwind-inl.h \
+ src/stacktrace_x86_64-inl.h \
+ src/stacktrace_x86-inl.h
STACKTRACE_INCLUDES = $(S_STACKTRACE_INCLUDES) $(SG_STACKTRACE_INCLUDES) $(SGP_STACKTRACE_INCLUDES)
googleinclude_HEADERS += $(SG_STACKTRACE_INCLUDES)
perftoolsinclude_HEADERS += $(SGP_STACKTRACE_INCLUDES)
@@ -324,7 +328,7 @@ CPU_PROFILER_SYMBOLS = '(ProfilerStart|ProfilerStop|ProfilerEnable|ProfilerDisab
libprofiler_la_LDFLAGS = -export-symbols-regex $(CPU_PROFILER_SYMBOLS)
### Unittests
-check_SCRIPTS += profiler_unittest_sh
+check_SCRIPTS += profiler_unittest_sh pprof_unittest
noinst_SCRIPTS += src/tests/profiler_unittest.sh
# These are sub-programs used by profiler_unittest.sh
@@ -352,6 +356,9 @@ profiler4_unittest_CXXFLAGS = -g $(PTHREAD_CFLAGS) -DWITH_THREADS
profiler4_unittest_LDFLAGS = $(PTHREAD_CFLAGS)
profiler4_unittest_LDADD = -lprofiler $(PTHREAD_LIBS)
+pprof_unittest: $(top_srcdir)/src/pprof
+ $< -test
+
### Documentation
dist_man_MANS = doc/pprof.1
dist_doc_DATA += doc/cpu_profiler.html \
diff --git a/configure b/configure
index 6db11b6..5b99e4d 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.57 for google-perftools 0.5.
+# Generated by GNU Autoconf 2.57 for google-perftools 0.6.
#
# Report bugs to <opensource@google.com>.
#
@@ -422,8 +422,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='google-perftools'
PACKAGE_TARNAME='google-perftools'
-PACKAGE_VERSION='0.5'
-PACKAGE_STRING='google-perftools 0.5'
+PACKAGE_VERSION='0.6'
+PACKAGE_STRING='google-perftools 0.6'
PACKAGE_BUGREPORT='opensource@google.com'
ac_unique_file="README"
@@ -953,7 +953,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures google-perftools 0.5 to adapt to many kinds of systems.
+\`configure' configures google-perftools 0.6 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1019,7 +1019,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of google-perftools 0.5:";;
+ short | recursive ) echo "Configuration of google-perftools 0.6:";;
esac
cat <<\_ACEOF
@@ -1125,7 +1125,7 @@ fi
test -n "$ac_init_help" && exit 0
if $ac_init_version; then
cat <<\_ACEOF
-google-perftools configure 0.5
+google-perftools configure 0.6
generated by GNU Autoconf 2.57
Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002
@@ -1140,7 +1140,7 @@ cat >&5 <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by google-perftools $as_me 0.5, which was
+It was created by google-perftools $as_me 0.6, which was
generated by GNU Autoconf 2.57. Invocation command line was
$ $0 $@
@@ -1733,7 +1733,7 @@ fi
# Define the identity of the package.
PACKAGE=google-perftools
- VERSION=0.5
+ VERSION=0.6
cat >>confdefs.h <<_ACEOF
@@ -20799,6 +20799,99 @@ _ACEOF
fi
+echo "$as_me:$LINENO: checking for struct sigcontext.regs->nip" >&5
+echo $ECHO_N "checking for struct sigcontext.regs->nip... $ECHO_C" >&6
+if test "${ac_cv_member_struct_sigcontext_regs__nip+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <signal.h>
+
+int
+main ()
+{
+static struct sigcontext ac_aggr;
+if (ac_aggr.regs->nip)
+return 0;
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_member_struct_sigcontext_regs__nip=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <signal.h>
+
+int
+main ()
+{
+static struct sigcontext ac_aggr;
+if (sizeof ac_aggr.regs->nip)
+return 0;
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_member_struct_sigcontext_regs__nip=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_cv_member_struct_sigcontext_regs__nip=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_member_struct_sigcontext_regs__nip" >&5
+echo "${ECHO_T}$ac_cv_member_struct_sigcontext_regs__nip" >&6
+if test $ac_cv_member_struct_sigcontext_regs__nip = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_STRUCT_SIGCONTEXT_REGS__NIP 1
+_ACEOF
+
+
+fi
# Defines PRIuS
@@ -22300,7 +22393,7 @@ _ASBOX
} >&5
cat >&5 <<_CSEOF
-This file was extended by google-perftools $as_me 0.5, which was
+This file was extended by google-perftools $as_me 0.6, which was
generated by GNU Autoconf 2.57. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -22363,7 +22456,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
-google-perftools config.status 0.5
+google-perftools config.status 0.6
configured by $0, generated by GNU Autoconf 2.57,
with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/configure.ac b/configure.ac
index c908272..aa1826b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -5,7 +5,7 @@
# make sure we're interpreted by some minimal autoconf
AC_PREREQ(2.57)
-AC_INIT(google-perftools, 0.5, opensource@google.com)
+AC_INIT(google-perftools, 0.6, opensource@google.com)
# The argument here is just something that should be in the current directory
# (for sanity checking)
AC_CONFIG_SRCDIR(README)
@@ -43,7 +43,8 @@ AC_CHECK_MEMBERS([struct sigcontext.sc_eip,
struct sigcontext.eip,
struct sigcontext.rip,
struct sigcontext.sc_ip,
- struct siginfo.si_faddr],,,
+ struct siginfo.si_faddr,
+ struct sigcontext.regs->nip],,,
[#include <signal.h>])
# Defines PRIuS
diff --git a/packages/rpm/rpm.spec b/packages/rpm/rpm.spec
index 24c0432..60e903a 100644
--- a/packages/rpm/rpm.spec
+++ b/packages/rpm/rpm.spec
@@ -15,7 +15,6 @@ Packager: Google <opensource@google.com>
Source: http://goog-perftools.sourceforge.net/%{NAME}-%{PACKAGE_VERSION}.tar.gz
Distribution: Redhat 7 and above.
Buildroot: %{_tmppath}/%{name}-root
-Docdir: %prefix/doc
Prefix: %prefix
%description
diff --git a/src/base/thread_lister.c b/src/base/thread_lister.c
index 7eca594..6def758 100644
--- a/src/base/thread_lister.c
+++ b/src/base/thread_lister.c
@@ -31,6 +31,7 @@
* Author: Markus Gutschke
*/
+#include <stdio.h> // needed for NULL on some powerpc platforms (?!)
#include "base/thread_lister.h"
#include "base/linuxthreads.h"
/* Include other thread listers here that define THREADS macro
diff --git a/src/config.h.in b/src/config.h.in
index 365c4f8..2abb5bd 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -78,6 +78,9 @@
/* Define to 1 if `eip' is member of `struct sigcontext'. */
#undef HAVE_STRUCT_SIGCONTEXT_EIP
+/* Define to 1 if `regs->nip' is member of `struct sigcontext'. */
+#undef HAVE_STRUCT_SIGCONTEXT_REGS__NIP
+
/* Define to 1 if `rip' is member of `struct sigcontext'. */
#undef HAVE_STRUCT_SIGCONTEXT_RIP
diff --git a/src/heap-checker.cc b/src/heap-checker.cc
index f081f97..b34eea2 100644
--- a/src/heap-checker.cc
+++ b/src/heap-checker.cc
@@ -45,13 +45,14 @@
#include <google/perftools/hash_set.h>
#include <algorithm>
+#include <fcntl.h>
+#include <string.h>
#include <errno.h>
#include <unistd.h>
-#include <string.h>
-#include <sys/stat.h>
+#include <sys/mman.h>
#include <sys/poll.h>
+#include <sys/stat.h>
#include <sys/types.h>
-#include <fcntl.h>
#include <assert.h>
#ifdef HAVE_LINUX_PTRACE_H
@@ -61,6 +62,8 @@
#include <syscall.h>
#endif
+#include <elf.h>
+
#include <google/stacktrace.h>
#include <google/heap-profiler.h>
#include <google/heap-checker.h>
@@ -393,51 +396,219 @@ static int GetStatusOutput(const char* command, string* output) {
return pclose(f);
}
-// A ProcMapsTask to record global data to ignore later
-// that belongs to 'library' mapped at 'start_address' with 'file_offset'.
-static void RecordGlobalDataLocked(const char* library,
- uint64 start_address,
- uint64 file_offset) {
- HeapProfiler::MESSAGE(2, "HeapChecker: Looking into %s\n", library);
- string command("/usr/bin/objdump -hw ");
- command.append(library);
- string output;
- if (GetStatusOutput(command.c_str(), &output) != 0) {
- HeapProfiler::MESSAGE(-1, "HeapChecker: "
- "Failed executing %s\n", command.c_str());
- abort();
- }
- const char* output_start = output.c_str();
+// RAII class for a file descriptor.
+
+class FileDescriptor {
+ public:
+ FileDescriptor(int fd) : fd_(fd) { ; }
+ ~FileDescriptor() { if (fd_ >= 0) close(fd_); }
+ int Close() { int fd = fd_; fd_ = -1; return close(fd); }
+ operator int() { return fd_; }
+ private:
+ int fd_;
+};
+
+// This function takes the fields from a /proc/self/maps line:
+//
+// start_address start address of a memory region.
+// end_address end address of a memory region
+// permissions rwx + private/shared bit
+// file_offset file offset within the mapped file
+// (major:minor) major and minor device number of the mapped file
+// inode inode number of the mapped file
+// filename filename of the mapped file
+//
+// First, if the region is not writeable, then it cannot have any heap
+// pointers in it.
+//
+// It would be simple to just mark every writeable memory region as live.
+// However, that would pick up unused bottom pieces of thread stacks
+// and other rot. So we need more complexity:
+//
+// Second, if the region is anonymous, we ignore it. That skips the
+// main stack and the thread stacks which are picked up elsewhere.
+// Unfortunately that also skips the BSS portions of shared library
+// segments, and we need to pick those up.
+//
+// So, for each writable memory region that is mapped to a file,
+// we recover the original segment information from that file
+// (segment headers, not section headers). We pick out the segment
+// that contains the given "file_offset", figure out where that
+// segment was loaded into memory, and register all of the memory
+// addresses for that segment.
+//
+// Picking out the right segment is a bit of a mess because the
+// same file offset can appear in multiple segments! For example:
+//
+// LOAD 0x000000 0x08048000 0x08048000 0x231a8 0x231a8 R E 0x1000
+// LOAD 0x0231a8 0x0806c1a8 0x0806c1a8 0x01360 0x014e8 RW 0x1000
+//
+// File offset 0x23000 appears in both segments. The second segment
+// stars at 0x23000 because of rounding. Fortunately, we skip the
+// first segment because it is not writeable. Most of the shared
+// objects we see have one read-executable segment and one read-write
+// segment so we really skate.
+//
+// If a shared library has no initialized data, only BSS, then the
+// size of the read-write LOAD segment will be zero, the dynamic loader
+// will create an anonymous memory region for the BSS but no named
+// segment [I think -- gotta check ld-linux.so -- mec]. We will
+// overlook that segment and never get here. This is a bug.
+
+static bool RecordGlobalDataLocked(uint64 start_address,
+ uint64 end_address,
+ const char* permissions,
+ uint64 file_offset,
+ int64 inode,
+ const char* filename) {
+ // Ignore non-writeable regions.
+ if (strchr(permissions, 'w') == NULL)
+ return true;
+
+ // Ignore anonymous regions.
+ // This drops BSS regions which causes us much work later.
+ if (inode == 0)
+ return true;
+
+ // Grab some ELF types.
+#ifdef _LP64
+ typedef Elf64_Ehdr ElfFileHeader;
+ typedef Elf64_Phdr ElfProgramHeader;
+#else
+ typedef Elf32_Ehdr ElfFileHeader;
+ typedef Elf32_Phdr ElfProgramHeader;
+#endif
- if (FLAGS_heap_profile_log >= 5) {
- HeapProfiler::MESSAGE(5, "HeapChecker: Looking at objdump\n");
- write(STDERR_FILENO, output.data(), output.size());
+ // Cannot mmap the ELF file because of the live ProcMapsIterator.
+ // Have to read little pieces. Fortunately there are just two.
+ HeapProfiler::MESSAGE(2, "HeapChecker: Looking into %s\n", filename);
+ FileDescriptor fd_elf(open(filename, O_RDONLY));
+ if (fd_elf < 0)
+ return false;
+
+ // Read and validate the file header.
+ ElfFileHeader efh;
+ if (read(fd_elf, &efh, sizeof(efh)) != sizeof(efh))
+ return false;
+ if (memcmp(&efh.e_ident[0], ELFMAG, SELFMAG) != 0)
+ return false;
+ if (efh.e_version != EV_CURRENT)
+ return false;
+ if (efh.e_type != ET_EXEC && efh.e_type != ET_DYN)
+ return false;
+
+ // Read the segment headers.
+ if (efh.e_phentsize != sizeof(ElfProgramHeader))
+ return false;
+ if (lseek(fd_elf, efh.e_phoff, SEEK_SET) != efh.e_phoff)
+ return false;
+ const size_t phsize = efh.e_phnum * efh.e_phentsize;
+ ElfProgramHeader* eph = new ElfProgramHeader[efh.e_phnum];
+ if (read(fd_elf, eph, phsize) != phsize) {
+ delete[] eph;
+ return false;
}
- while (1) {
- char sec_name[11];
- uint64 sec_size, sec_vmaddr, sec_lmaddr, sec_offset;
- if (sscanf(output_start, "%*d .%10s "LLX" "LLX" "LLX" "LLX" ",
- sec_name, &sec_size, &sec_vmaddr,
- &sec_lmaddr, &sec_offset) == 5) {
- if (strcmp(sec_name, "data") == 0 ||
- strcmp(sec_name, "bss") == 0) {
- uint64 real_start = start_address + sec_offset - file_offset;
- HeapProfiler::MESSAGE(4, "HeapChecker: "
- "Got section %s: %p of "LLX" bytes\n",
- sec_name,
- reinterpret_cast<void*>(real_start),
- sec_size);
- (*library_live_objects)[library].
- push_back(AllocObject(reinterpret_cast<void*>(real_start),
- sec_size, IN_GLOBAL_DATA));
+ // Gonna need this page size for page boundary considerations.
+ // Better be a power of 2.
+ const int int_page_size = getpagesize();
+ if (int_page_size <= 0 || int_page_size & (int_page_size-1))
+ abort();
+ const uint64 page_size = int_page_size;
+
+ // Walk the segment headers.
+ // Find the segment header that contains the given file offset.
+ bool found_load_segment = false;
+ for (int iph = 0; iph < efh.e_phnum; ++iph) {
+ HeapProfiler::MESSAGE(3, "HeapChecker: %s %d: p_type: %d p_flags: %x",
+ filename, iph, eph[iph].p_type, eph[iph].p_flags);
+ if (eph[iph].p_type == PT_LOAD && eph[iph].p_flags & PF_W) {
+ // Sanity check the segment header.
+ if (eph[iph].p_vaddr != eph[iph].p_paddr) {
+ delete[] eph;
+ return false;
+ }
+ if ((eph[iph].p_vaddr & (page_size-1)) !=
+ (eph[iph].p_offset & (page_size-1))) {
+ delete[] eph;
+ return false;
+ }
+
+ // The segment is not aligned in the ELF file, but will be
+ // aligned in memory. So round it to page boundaries.
+ // Note: we lose if p_end is on the last page.
+ const uint64 p_start = eph[iph].p_offset &~ (page_size-1);
+ const uint64 p_end = ((eph[iph].p_offset + eph[iph].p_memsz)
+ + (page_size-1)) &~ (page_size-1);
+ if (p_end < p_start) {
+ delete[] eph;
+ return false;
+ }
+ if (file_offset >= p_start && file_offset < p_end) {
+ // Found it.
+ if (found_load_segment) {
+ delete[] eph;
+ return false;
+ }
+ found_load_segment = true;
+
+ // [p_start, p_end) is the file segment, where p_end is extended
+ // for BSS (which does not actually come from the file).
+ //
+ // [start_address, end_address) is the memory region from
+ // /proc/self/maps.
+ //
+ // The point of correspondence is:
+ // file_offset in filespace <-> start_address in memoryspace
+ //
+ // This point of correspondence is reliable because the kernel
+ // virtual memory system actually uses this information for
+ // demand-paging the file.
+ //
+ // A single file segment can get loaded into several contiguous
+ // memory regions with different permissions. I have seen as
+ // many as four regions: no-permission alignment region +
+ // read-only-after-relocation region + read-write data region +
+ // read-write anonymous bss region. So file_offset from the
+ // memory region may not equal to p_start from the file segement;
+ // file_offset can be anywhere in [p_start, p_end).
+
+ // Check that [start_address, end_address) is wholly contained
+ // in [p_start, p_end).
+ if (end_address < start_address ||
+ end_address - start_address > p_end - file_offset) {
+ delete[] eph;
+ return false;
+ }
+
+ // Calculate corresponding address and length for [p_start, p_end).
+ if (file_offset - p_start > start_address) {
+ delete[] eph;
+ return false;
+ }
+ void* addr = reinterpret_cast<void*>(start_address -
+ (file_offset - p_start));
+ const uintptr_t length = p_end - p_start;
+
+ // That is what we need.
+ (*library_live_objects)[filename].
+ push_back(AllocObject(addr, length, IN_GLOBAL_DATA));
}
}
- // skip to the next line
- const char* next = strpbrk(output_start, "\n\r");
- if (next == NULL) break;
- output_start = next + 1;
}
+ delete[] eph;
+
+ if (!found_load_segment) {
+ HeapProfiler::MESSAGE(-1,
+ "HeapChecker: no LOAD segment found in %s\n",
+ filename);
+ return false;
+ }
+
+ if (fd_elf.Close() < 0)
+ return false;
+
+ return true;
}
// See if 'library' from /proc/self/maps has base name 'library_base'
@@ -522,26 +693,32 @@ HeapLeakChecker::UseProcMaps(ProcMapsTask proc_maps_task) {
HeapProfiler::MESSAGE(4, "HeapChecker: "
"Looking at /proc/self/maps line:\n %s\n",
proc_map_line);
- if (proc_maps_task == DISABLE_LIBRARY_ALLOCS &&
- strncmp(permissions, "r-xp", 4) == 0 && inode != 0) {
- if (start_address >= end_address) abort();
- DisableLibraryAllocs(filename,
- reinterpret_cast<void*>(start_address),
- reinterpret_cast<void*>(end_address));
- }
- if (proc_maps_task == RECORD_GLOBAL_DATA_LOCKED &&
- // grhat based on Red Hat Linux 9
- (strncmp(permissions, "rw-p", 4) == 0 ||
- // Fedora Core 3
- strncmp(permissions, "rwxp", 4) == 0) &&
- inode != 0) {
- if (start_address >= end_address) abort();
- RecordGlobalDataLocked(proc_map_line + size, start_address, file_offset);
- }
+
+ if (start_address >= end_address)
+ abort();
+
// Determine if any shared libraries are present.
- if (strstr(filename, "lib") && strstr(filename, ".so")) {
+ if (inode != 0 && strstr(filename, "lib") && strstr(filename, ".so")) {
saw_shared_lib = true;
}
+
+ if (proc_maps_task == DISABLE_LIBRARY_ALLOCS) {
+ if (inode != 0 && strncmp(permissions, "r-xp", 4) == 0) {
+ DisableLibraryAllocs(filename,
+ reinterpret_cast<void*>(start_address),
+ reinterpret_cast<void*>(end_address));
+ }
+ }
+
+ if (proc_maps_task == RECORD_GLOBAL_DATA_LOCKED) {
+ if (!RecordGlobalDataLocked(start_address, end_address, permissions,
+ file_offset, inode, filename)) {
+ HeapProfiler::MESSAGE(
+ -1, "HeapChecker: failed RECORD_GLOBAL_DATA_LOCKED on %s\n",
+ filename);
+ abort();
+ }
+ }
}
fclose(fp);
@@ -825,7 +1002,7 @@ void HeapLeakChecker::DisableChecksUp(int stack_frames) {
if (!heap_checker_on) return;
if (stack_frames < 1) abort();
void* stack[1];
- if (GetStackTrace(stack, 1, stack_frames) != 1) abort();
+ if (GetStackTrace(stack, 1, stack_frames+1) != 1) abort();
DisableChecksAt(stack[0]);
}
@@ -840,7 +1017,7 @@ bool HeapLeakChecker::HaveDisabledChecksUp(int stack_frames) {
if (!heap_checker_on) return false;
if (stack_frames < 1) abort();
void* stack[1];
- if (GetStackTrace(stack, 1, stack_frames) != 1) abort();
+ if (GetStackTrace(stack, 1, stack_frames+1) != 1) abort();
return HaveDisabledChecksAt(stack[0]);
}
@@ -865,14 +1042,14 @@ void HeapLeakChecker::DisableChecksIn(const char* pattern) {
void* HeapLeakChecker::GetDisableChecksStart() {
if (!heap_checker_on) return NULL;
void* start_address;
- if (GetStackTrace(&start_address, 1, 0) != 1) abort();
+ if (GetStackTrace(&start_address, 1, 1) != 1) abort();
return start_address;
}
void HeapLeakChecker::DisableChecksToHereFrom(void* start_address) {
if (!heap_checker_on) return;
void* end_address;
- if (GetStackTrace(&end_address, 1, 0) != 1) abort();
+ if (GetStackTrace(&end_address, 1, 1) != 1) abort();
if (start_address > end_address) swap(start_address, end_address);
DisableChecksFromTo(start_address, end_address,
10000); // practically no stack depth limit:
diff --git a/src/heap-profiler.cc b/src/heap-profiler.cc
index 45eb908..7cb7d56 100644
--- a/src/heap-profiler.cc
+++ b/src/heap-profiler.cc
@@ -869,10 +869,14 @@ void HeapProfiler::EarlyStartLocked() {
// Our first allocation after registering our hook is treated specially by
// RecordAlloc(); It looks at the stack and counts how many frames up we
// are. First we record the current stack pointer.
- void* here[1];
- GetStackTrace(here, 1, 0);
- // This actually records the frame above this one. We take this into account
- // in RecordAlloc.
+ // Note: The stacktrace implementations differ about how many args they
+ // fill when skip is non-zero. Safest just to reserve maxdepth space.
+ void* here[2];
+ GetStackTrace(here, 2, 1);
+ // Skip the first frame. It points to the current offset within this
+ // function, which will have changed by the time we get to the malloc()
+ // call which triggers. Instead, we store our parent function's offset,
+ // which is shared by both us and by the malloc() call below.
recordalloc_reference_stack_position_ = here[0];
done_first_alloc_ = false; // Initialization has not occured yet
void* first_alloc = malloc(kFirstAllocationNumBytes);
diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc
index 1a42e6e..0260a34 100644
--- a/src/malloc_extension.cc
+++ b/src/malloc_extension.cc
@@ -249,6 +249,7 @@ void MallocExtension::GetHeapSample(string* result) {
// TODO(menage) Get this working in google-perftools
//DumpAddressMap(DebugStringWriter, result);
+ delete[] entries;
}
void MallocExtension::GetHeapGrowthStacks(std::string* result) {
diff --git a/src/pprof b/src/pprof
index 46366bc..e80063c 100755
--- a/src/pprof
+++ b/src/pprof
@@ -1,6 +1,6 @@
#! /usr/bin/perl -w
-# Copyright (c) 2005, Google Inc.
+# Copyright (c) 1998-2006, Google Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@@ -68,21 +68,31 @@
use strict;
use Getopt::Long;
-# These are the external binaries we use. We hard-code in the path
-# because some people have colorizing versions of these binaries which
-# can cause trouble.
-my $OBJDUMP = "/usr/bin/objdump";
-my $NM = "/usr/bin/nm";
-my $ADDR2LINE = "/usr/bin/addr2line";
+# These are the object tools we use, which come from various sources.
+# We want to invoke them directly, rather than via users' aliases and/or
+# search paths, because some people have colorizing versions of them that
+# can cause trouble, or because the default versions on a system may not
+# deal with 64-bit objects if 64-bit profiles are being analyzed.
+# (However, we will default to the user's defaults if we can't find better.)
+# See ConfigureObjTools near the bottom of this script.
+my %obj_tool_map = (
+ "objdump" => "objdump",
+ "nm" => "nm",
+ "addr2line" => "addr2line",
+);
my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local
my $GV = "gv";
+# There is a pervasive dependency on the length (in hex characters, i.e.,
+# nibbles) of an address, distinguishing between 32-bit and 64-bit profiles:
+my $address_length = 8; # Hope for 32-bit, reset if 64-bit detected.
+
##### Argument parsing #####
sub usage_string {
return <<'EOF';
-Usage: pprof [options] <program> <profile>
- Prints specified cpu- or heap-profile
+Usage: pprof [options] <program> <profile> ...
+ Prints specified cpu- or heap-profile
Options:
--cum Sort by cumulative data
@@ -111,7 +121,7 @@ Heap-Profile Options:
--alloc_space Display allocated (mega)bytes
--alloc_objects Display allocated objects
--show_bytes Display space in bytes
- --drop_negative Ignore negaive differences
+ --drop_negative Ignore negative differences
Call-graph Options:
--nodecount=<n> Show at most so many nodes [default=80]
@@ -121,6 +131,12 @@ Call-graph Options:
--ignore=<regexp> Ignore nodes matching <regexp>
--scale=<n> Set GV scaling [default=0]
+Miscellaneous:
+ --tools=<prefix> Prefix for object tool pathnames
+ --test Run unit tests
+ --help This message
+ --version Version information
+
Examples:
pprof /bin/ls ls.prof
@@ -140,9 +156,9 @@ EOF
sub version_string {
return <<'EOF'
-pprof (part of google-perftools 0.1)
+pprof (part of google-perftools 0.6)
-Copyright 1998-2005 Google Inc.
+Copyright 1998-2006 Google Inc.
This is BSD licensed software; see the source for copying conditions
and license information.
@@ -151,14 +167,14 @@ PARTICULAR PURPOSE.
EOF
}
-sub fatal {
+sub usage {
my $msg = shift;
print STDERR "$msg\n\n";
print STDERR usage_string();
print STDERR "\nFATAL ERROR: $msg\n"; # just as a reminder
exit(1);
}
-
+
$main::opt_help = 0;
$main::opt_version = 0;
@@ -194,6 +210,10 @@ $main::opt_show_bytes = 0;
$main::opt_drop_negative = 0;
$main::opt_interactive = 0;
+$main::opt_tools = "";
+$main::opt_debug = 0;
+$main::opt_test = 0;
+
# Are we printing a heap profile?
$main::heap_profile = 0;
@@ -229,7 +249,10 @@ GetOptions("help!" => \$main::opt_help,
"alloc_objects!" => \$main::opt_alloc_objects,
"show_bytes!" => \$main::opt_show_bytes,
"drop_negative!" => \$main::opt_drop_negative,
- ) || fatal("Invalid option(s)");
+ "tools=s" => \$main::opt_tools,
+ "test!" => \$main::opt_test,
+ "debug!" => \$main::opt_debug,
+ ) || usage("Invalid option(s)");
# Deal with the standard --help and --version
if ($main::opt_help) {
@@ -255,7 +278,7 @@ if ($main::opt_inuse_space +
$main::opt_inuse_objects +
$main::opt_alloc_space +
$main::opt_alloc_objects > 1) {
- fatal("Specify at most on of --inuse/--alloc options");
+ usage("Specify at most on of --inuse/--alloc options");
}
# Check output granularities
@@ -266,7 +289,7 @@ my $grains =
$main::opt_files +
0;
if ($grains > 1) {
- fatal("Only specify one output granularity option");
+ usage("Only specify one output granularity option");
}
if ($grains == 0) {
$main::opt_functions = 1;
@@ -282,14 +305,30 @@ my $modes =
$main::opt_gif +
0;
if ($modes > 1) {
- fatal("Only specify one output mode");
+ usage("Only specify one output mode");
}
if ($modes == 0) {
$main::opt_text = 1;
}
-my $prog = shift || fatal("Did not specify program");
-my $pfile_arg = shift || fatal("Did not specify profile file");
+if ($main::opt_test) {
+ RunUnitTests();
+ # Should not return
+ exit(1);
+}
+
+# Binary name and profile arguments list
+$main::prog = "";
+@main::pfile_args = ();
+
+$main::prog = shift || usage("Did not specify program");
+scalar(@ARGV) || usage("Did not specify profile file");
+
+# Parse profile file/location arguments
+foreach my $farg (@ARGV) {
+ unshift(@main::pfile_args, $farg);
+}
+ConfigureObjTools($main::prog);
##### Main section #####
@@ -298,16 +337,21 @@ $main::tmpfile_sym = "/tmp/pprof$$.sym";
$main::tmpfile_ps = "/tmp/pprof$$";
$main::next_tmpfile = 0;
$main::collected_profile = undef;
+@main::profile_files = ();
+#$main::op_time = time();
$SIG{'INT'} = \&sighandler;
-# Read profile data
-my $pfile = FetchDynamicProfile($prog, $pfile_arg);
-my $data = ReadProfile($prog, $pfile);
+# Fetch all profile data
+FetchDynamicProfiles();
+
+# Read one profile, pick the last item on the list
+my $data = ReadProfile($main::prog, pop(@main::profile_files));
my $profile = $data->{profile};
my $libs = $data->{libs}; # Info about main program and shared libraries
# List of function names to skip
$main::skip = ();
+$main::skip_regexp = 'NOMATCH';
if ($main::heap_profile) {
foreach my $name ('calloc',
'cfree',
@@ -317,22 +361,33 @@ if ($main::heap_profile) {
'pvalloc',
'valloc',
'realloc',
+ 'do_malloc',
+ 'DoSampledAllocation',
'__builtin_delete',
'__builtin_new',
'__builtin_vec_delete',
'__builtin_vec_new') {
$main::skip{$name} = 1;
}
+ $main::skip_regexp = "TCMalloc";
}
if ($main::lock_profile) {
- foreach my $name ('Mutex::Unlock') {
- $main::skip{$name} = 1;
+ foreach my $vname ('Mutex::Unlock', 'Mutex::UnlockSlow') {
+ $main::skip{$vname} = 1;
+ }
+}
+
+# Add additional profiles, if available.
+if (scalar(@main::profile_files) > 0) {
+ foreach my $pname (@main::profile_files) {
+ my $p = ReadProfile($main::prog, $pname)->{profile};
+ $profile = AddProfile($profile, $p);
}
}
# Subtract base from profile, if specified
if ($main::opt_base ne '') {
- my $base = ReadProfile($prog, $main::opt_base)->{profile};
+ my $base = ReadProfile($main::prog, $main::opt_base)->{profile};
$profile = SubtractProfile($profile, $base);
}
@@ -369,13 +424,17 @@ if (!$main::opt_interactive) {
} elsif ($main::opt_text) {
PrintText($symbols, $flat, $cumulative, $total, -1);
} else {
- if (PrintDot($prog, $symbols, $profile, $flat, $cumulative, $total)) {
+ if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
if ($main::opt_gv) {
- # Some versions of gv use -scale, and some use --scale. *sigh*
- # We use --help to determine if gv expects one dash or two.
- system("$GV --help >/dev/null 2>&1 " .
- "&& $GV --scale=$main::opt_scale $main::tmpfile_ps " .
- "|| $GV -scale $main::opt_scale $main::tmpfile_ps")
+ if (!system("$GV --version >/dev/null 2>&1")) {
+ # Options using double dash are supported by this gv version.
+ system("$GV --scale=$main::opt_scale " .
+ PsTempName($main::next_tmpfile));
+ } else {
+ # Old gv version - only supports options that use single dash.
+ system("$GV -scale $main::opt_scale " .
+ PsTempName($main::next_tmpfile));
+ }
}
} else {
exit(1);
@@ -390,108 +449,139 @@ exit(0);
##### Interactive helper routines #####
+
sub InteractiveMode {
$| = 1; # Make output unbuffered for interactive mode
my $orig_profile = $profile;
- while (1) {
- print "(pprof) ";
- $_ = <STDIN>;
- if (!defined($_)) {
- print "\n";
- last;
- }
- if (m/^ *quit/) {
- last;
+
+ # Use ReadLine if it's installed.
+ if ( defined(eval {require Term::ReadLine}) ) {
+ my $term = new Term::ReadLine 'pprof';
+ while ( defined ($_ = $term->readline('(pprof) '))) {
+ $term->addhistory($_) if /\S/;
+ if (!InteractiveCommand($orig_profile, $_)) {
+ last; # exit when we get an interactive command to quit
+ }
}
- if (m/^ *help/) {
- InteractiveHelpMessage();
- next;
+ } else { # don't have readline
+ while (1) {
+ print "(pprof) ";
+ $_ = <STDIN>;
+ if (!InteractiveCommand($orig_profile, $_)) {
+ last; # exit when we get an interactive command to quit
+ }
}
- # Clear all the options
- $main::opt_lines = 0;
- $main::opt_text = 0;
- $main::opt_disasm = 0;
- $main::opt_list = 0;
- $main::opt_gv = 0;
- $main::opt_cum = 0;
+ }
+}
+
+# Takes two args: orig profile, and command to run.
+# Returns 1 if we should keep going, or 0 if we were asked to quit
+sub InteractiveCommand {
+ my($orig_profile, $command) = @_;
+ $_ = $command; # just to make future m//'s easier
+ if (!defined($_)) {
+ print "\n";
+ return 0;
+ }
+ if (m/^ *quit/) {
+ return 0;
+ }
+ if (m/^ *help/) {
+ InteractiveHelpMessage();
+ return 1;
+ }
+ # Clear all the options
+ $main::opt_lines = 0;
+ $main::opt_text = 0;
+ $main::opt_disasm = 0;
+ $main::opt_list = 0;
+ $main::opt_gv = 0;
+ $main::opt_cum = 0;
- if (m/^ *(text|top)(\d*) *(.*)/) {
- $main::opt_text = 1;
+ if (m/^ *(text|top)(\d*) *(.*)/) {
+ $main::opt_text = 1;
- my $line_limit = ($2 ne "") ? int($2) : 10;
+ my $line_limit = ($2 ne "") ? int($2) : 10;
- my $routine;
- my $ignore;
- ($routine, $ignore) = ParseInteractiveArgs($3);
+ my $routine;
+ my $ignore;
+ ($routine, $ignore) = ParseInteractiveArgs($3);
- my $profile = ProcessProfile($orig_profile, "", $ignore);
- my $reduced = ReduceProfile($symbols, $profile);
+ my $profile = ProcessProfile($orig_profile, "", $ignore);
+ my $reduced = ReduceProfile($symbols, $profile);
- # Get derived profiles
- my $flat = FlatProfile($reduced);
- my $cumulative = CumulativeProfile($reduced);
+ # Get derived profiles
+ my $flat = FlatProfile($reduced);
+ my $cumulative = CumulativeProfile($reduced);
- PrintText($symbols, $flat, $cumulative, $total, $line_limit);
- next;
- }
- if (m/^ *list *(.+)/) {
- $main::opt_list = 1;
+ PrintText($symbols, $flat, $cumulative, $total, $line_limit);
+ return 1;
+ }
+ if (m/^ *list *(.+)/) {
+ $main::opt_list = 1;
- my $routine;
- my $ignore;
- ($routine, $ignore) = ParseInteractiveArgs($1);
+ my $routine;
+ my $ignore;
+ ($routine, $ignore) = ParseInteractiveArgs($1);
- my $profile = ProcessProfile($orig_profile, "", $ignore);
- my $reduced = ReduceProfile($symbols, $profile);
+ my $profile = ProcessProfile($orig_profile, "", $ignore);
+ my $reduced = ReduceProfile($symbols, $profile);
- # Get derived profiles
- my $flat = FlatProfile($reduced);
- my $cumulative = CumulativeProfile($reduced);
+ # Get derived profiles
+ my $flat = FlatProfile($reduced);
+ my $cumulative = CumulativeProfile($reduced);
- PrintListing($libs, $flat, $cumulative, $routine);
- next;
- }
- if (m/^ *disasm *(.+)/) {
- $main::opt_disasm = 1;
+ PrintListing($libs, $flat, $cumulative, $routine);
+ return 1;
+ }
+ if (m/^ *disasm *(.+)/) {
+ $main::opt_disasm = 1;
- my $routine;
- my $ignore;
- ($routine, $ignore) = ParseInteractiveArgs($1);
+ my $routine;
+ my $ignore;
+ ($routine, $ignore) = ParseInteractiveArgs($1);
- # Process current profile to account for various settings
- my $profile = ProcessProfile($orig_profile, "", $ignore);
- my $reduced = ReduceProfile($symbols, $profile);
+ # Process current profile to account for various settings
+ my $profile = ProcessProfile($orig_profile, "", $ignore);
+ my $reduced = ReduceProfile($symbols, $profile);
- # Get derived profiles
- my $flat = FlatProfile($reduced);
- my $cumulative = CumulativeProfile($reduced);
+ # Get derived profiles
+ my $flat = FlatProfile($reduced);
+ my $cumulative = CumulativeProfile($reduced);
- PrintDisassembly($libs, $flat, $cumulative, $routine);
- next;
- }
- if (m/^ *gv *(.*)/) {
- $main::opt_gv = 1;
+ PrintDisassembly($libs, $flat, $cumulative, $routine);
+ return 1;
+ }
+ if (m/^ *gv *(.*)/) {
+ $main::opt_gv = 1;
- my $focus;
- my $ignore;
- ($focus, $ignore) = ParseInteractiveArgs($1);
+ my $focus;
+ my $ignore;
+ ($focus, $ignore) = ParseInteractiveArgs($1);
- # Process current profile to account for various settings
- my $profile = ProcessProfile($orig_profile, $focus, $ignore);
- my $reduced = ReduceProfile($symbols, $profile);
+ # Process current profile to account for various settings
+ my $profile = ProcessProfile($orig_profile, $focus, $ignore);
+ my $reduced = ReduceProfile($symbols, $profile);
- # Get derived profiles
- my $flat = FlatProfile($reduced);
- my $cumulative = CumulativeProfile($reduced);
+ # Get derived profiles
+ my $flat = FlatProfile($reduced);
+ my $cumulative = CumulativeProfile($reduced);
- if (PrintDot($prog, $symbols, $profile, $flat, $cumulative, $total)) {
- system("gv -scale $main::opt_scale -noresize " .
- PsTempName($main::next_tmpfile) . " &");
- $main::next_tmpfile++;
+ if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
+ if (!system("$GV --version >/dev/null 2>&1")) {
+ # Options using double dash are supported by this gv version.
+ system("$GV --scale=$main::opt_scale --noresize " .
+ PsTempName($main::next_tmpfile) . " &");
+ } else {
+ # Old gv version - only supports options that use single dash.
+ system("$GV -scale $main::opt_scale -noresize " .
+ PsTempName($main::next_tmpfile) . " &");
}
- next;
+ $main::next_tmpfile++;
}
+ return 1;
}
+ return 1;
}
@@ -503,19 +593,23 @@ sub ProcessProfile {
# Process current profile to account for various settings
my $profile = $orig_profile;
my $total_count = TotalProfile($profile);
- print "Total: ", $total_count, " samples\n";
+ printf("Total: %s %s\n", Unparse($total_count), Units());
if ($focus ne '') {
$profile = FocusProfile($symbols, $profile, $focus);
my $focus_count = TotalProfile($profile);
- printf "After focusing on '%s': %d samples of %d (%0.1f%%)\n",
- $focus, $focus_count, $total_count, ($focus_count*100.0) / $total_count;
+ printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n",
+ $focus,
+ Unparse($focus_count), Units(),
+ Unparse($total_count), ($focus_count*100.0) / $total_count);
}
if ($ignore ne '') {
$profile = IgnoreProfile($symbols, $profile, $ignore);
my $ignore_count = TotalProfile($profile);
- printf "After ignoring '%s': %d samples of %d (%0.1f%%)\n",
- $ignore, $ignore_count, $total_count,
- ($ignore_count*100.0) / $total_count;
+ printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n",
+ $ignore,
+ Unparse($ignore_count), Units(),
+ Unparse($total_count),
+ ($ignore_count*100.0) / $total_count);
}
return $profile;
@@ -531,12 +625,12 @@ Commands:
Show graphical hierarchical display of current profile. Without
any arguments, shows all samples in the profile. With the optional
"focus" argument, restricts the samples shown to just those where
- the "focus" regular expression matches a routine name on the stack
+ the "focus" regular expression matches a routine name on the stack
trace.
list [routine_regexp] [-ignore1] [-ignore2]
Show source listing of routines whose names match "routine_regexp"
-
+
top [--cum] [-ignore1] [-ignore2]
top20 [--cum] [-ignore1] [-ignore2]
top37 [--cum] [-ignore1] [-ignore2]
@@ -585,7 +679,7 @@ sub PsTempName {
my $fnum = shift;
return "$main::tmpfile_ps" . "." . "$fnum" . ".ps";
}
-
+
# Print text output
sub PrintText {
my $symbols = shift;
@@ -636,24 +730,27 @@ sub PrintDisassembly {
foreach my $lib (@{$libs}) {
my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts);
- my $offset = $lib->[1] - $lib->[3];
- foreach my $routine (keys(%{$symbol_table})) {
+ my $offset = AddressSub($lib->[1], $lib->[3]);
+ foreach my $routine (sort ByName keys(%{$symbol_table})) {
my $start_addr = $symbol_table->{$routine}->[0];
my $end_addr = $symbol_table->{$routine}->[1];
# See if there are any samples in this routine
my $total_flat = 0;
my $total_cum = 0;
- for (my $addr = $start_addr; $addr < $end_addr; $addr++) {
- $total_flat += GetEntry($flat, sprintf("0x%x", $addr+$offset));
- $total_cum += GetEntry($cumulative, sprintf("0x%x", $addr+$offset));
+ my $length = hex(AddressSub($end_addr, $start_addr));
+ my $addr = AddressAdd($start_addr, $offset);
+ for (my $i = 0; $i < $length; $i++) {
+ $total_cum += GetEntry($cumulative, $addr);
+ $total_flat += GetEntry($flat, $addr);
+ $addr = AddressInc($addr);
}
# Skip disassembly if there are no samples in routine
next if ($total_cum == 0);
print "ROUTINE ====================== $routine\n";
- printf "%6s %6s Total samples (flat / cumulative)\n",
- Unparse($total_flat), Unparse($total_cum);
+ printf "%6s %6s Total %s (flat / cumulative)\n",
+ Unparse($total_flat), Unparse($total_cum), Units();
my @instructions = Disassemble($lib->[0], $offset,
$start_addr, $end_addr);
@@ -666,7 +763,8 @@ sub PrintDisassembly {
}
my $f = GetEntry($flat, $e->[0]);
my $c = GetEntry($cumulative, $e->[0]);
- my $address = $e->[0]; $address =~ s/^0x//;
+ my $address = $e->[0];
+ $address =~ s/^0x//;
printf("%6s %6s %-20s %8s: %6s\n",
UnparseAlt($f),
UnparseAlt($c),
@@ -689,10 +787,11 @@ sub Disassemble {
my $start_addr = shift;
my $end_addr = shift;
- my $cmd = sprintf("$OBJDUMP -d -l --no-show-raw-insn " .
- "--start-address=%d --stop-address=%d $prog",
- $start_addr, $end_addr);
- open(OBJDUMP, "$cmd |") || error("$OBJDUMP: $!\n");
+ my $objdump = $obj_tool_map{"objdump"};
+ my $cmd = sprintf("$objdump -d -l --no-show-raw-insn " .
+ "--start-address=0x$start_addr " .
+ "--stop-address=0x$end_addr $prog");
+ open(OBJDUMP, "$cmd |") || error("$objdump: $!\n");
my @result = ();
my $filename = "";
my $linenumber = -1;
@@ -704,8 +803,9 @@ sub Disassemble {
$filename = $1;
$linenumber = $2;
} elsif (m/^ +([0-9a-f]+):\s*(.*)/) {
- # Disassembly line
- my $k = sprintf("0x%x", hex($1) + $offset);
+ # Disassembly line -- zero-extend address to full length
+ my $addr = HexExtend($1);
+ my $k = AddressAdd($addr, $offset);
push(@result, [$k, $filename, $linenumber, $2]);
}
}
@@ -727,18 +827,21 @@ sub PrintListing {
foreach my $lib (@{$libs}) {
my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts);
- my $offset = $lib->[1] - $lib->[3];
+ my $offset = AddressSub($lib->[1], $lib->[3]);
foreach my $routine (sort ByName keys(%{$symbol_table})) {
# Print if there are any samples in this routine
my $start_addr = $symbol_table->{$routine}->[0];
my $end_addr = $symbol_table->{$routine}->[1];
- for (my $addr = $start_addr; $addr < $end_addr; $addr++) {
- if (defined($cumulative->{sprintf("0x%x", $addr+$offset)})) {
+ my $length = hex(AddressSub($end_addr, $start_addr));
+ my $addr = AddressAdd($start_addr, $offset);
+ for (my $i = 0; $i < $length; $i++) {
+ if (defined($cumulative->{$addr})) {
PrintSource($lib->[0], $offset,
$routine, $flat, $cumulative,
$start_addr, $end_addr);
last;
}
+ $addr = AddressInc($addr);
}
}
}
@@ -794,7 +897,7 @@ sub PrintSource {
}
}
- # Hack 3: Extend last line forward until its indentation is less than
+ # Hack 3: Extend last line forward until its indentation is less than
# the indentation we saw on $firstline
my $oldlastline = $lastline;
{
@@ -808,17 +911,17 @@ sub PrintSource {
$l++;
my $indent = Indentation($_);
if ($l >= $firstline) {
- if ($first_indentation < 0 && $indent >= 0) {
- $first_indentation = $indent;
- last if ($first_indentation == 0);
- }
+ if ($first_indentation < 0 && $indent >= 0) {
+ $first_indentation = $indent;
+ last if ($first_indentation == 0);
+ }
}
if ($l >= $lastline && $indent >= 0) {
- if ($indent >= $first_indentation) {
- $lastline = $l+1;
- } else {
- last;
- }
+ if ($indent >= $first_indentation) {
+ $lastline = $l+1;
+ } else {
+ last;
+ }
}
}
close(FILE);
@@ -871,8 +974,8 @@ sub PrintSource {
my $l = 0;
while (<FILE>) {
$l++;
- if ($l >= $firstline - 5 &&
- (($l <= $oldlastline + 5) || ($l <= $lastline))) {
+ if ($l >= $firstline - 5 &&
+ (($l <= $oldlastline + 5) || ($l <= $lastline))) {
chop;
my $text = $_;
printf("%6s %6s %4d: %s\n",
@@ -1000,20 +1103,21 @@ sub PrintDot {
# Get edges and counts per edge
my %edge = ();
+ my $n;
foreach my $k (keys(%{$raw})) {
# TODO: omit low %age edges
- my $n = $raw->{$k};
+ $n = $raw->{$k};
my @addrs = split(/\n/, $k);
for (my $i = 1; $i <= $#addrs; $i++) {
my $src = OutputKey($symbols, $addrs[$i]);
my $dst = OutputKey($symbols, $addrs[$i-1]);
#next if ($src eq $dst); # Avoid self-edges?
if (exists($node{$src}) && exists($node{$dst})) {
- my $e = "$src\001$dst";
- if (!exists($edge{$e})) {
- $edge{$e} = 0;
+ my $edge_label = "$src\001$dst";
+ if (!exists($edge{$edge_label})) {
+ $edge{$edge_label} = 0;
}
- $edge{$e} += $n;
+ $edge{$edge_label} += $n;
}
}
}
@@ -1021,7 +1125,7 @@ sub PrintDot {
# Print edges
foreach my $e (keys(%edge)) {
my @x = split(/\001/, $e);
- my $n = $edge{$e};
+ $n = $edge{$e};
if (abs($n) > $edgelimit) {
# Compute line width based on edge count
@@ -1054,8 +1158,10 @@ sub OutputKey {
my $a = shift;
# Skip large addresses since they sometimes show up as fake entries on RH9
- if (hex($a) > 0x7fffffff) {
- return '';
+ if (length($a) > 8) {
+ if ($a gt "7fffffffffffffff") { return ''; }
+ } else {
+ if (hex($a) > 0x7fffffff) { return ''; }
}
# Extract symbolic info for address
@@ -1069,7 +1175,7 @@ sub OutputKey {
}
# We drop a few well-known names
- if ($main::skip{$func}) {
+ if ($main::skip{$func} || ($func =~ m/$main::skip_regexp/)) {
return '';
}
@@ -1114,6 +1220,8 @@ sub Unparse {
return sprintf("%.1f", $num / 1048576.0);
}
}
+ } elsif ($main::lock_profile) {
+ return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds
} else {
return sprintf("%d", $num);
}
@@ -1141,6 +1249,8 @@ sub Units {
return "MB";
}
}
+ } elsif ($main::lock_profile) {
+ return "seconds";
} else {
return "samples";
}
@@ -1264,6 +1374,25 @@ sub TotalProfile {
return $result;
}
+# Add A to B
+sub AddProfile {
+ my $A = shift;
+ my $B = shift;
+
+ my $R = {};
+ # add all keys in A
+ foreach my $k (keys(%{$A})) {
+ my $v = $A->{$k};
+ AddEntry($R, $k, $v);
+ }
+ # add all keys in B
+ foreach my $k (keys(%{$B})) {
+ my $v = $B->{$k};
+ AddEntry($R, $k, $v);
+ }
+ return $R;
+}
+
# Subtract B from A
sub SubtractProfile {
my $A = shift;
@@ -1278,7 +1407,7 @@ sub SubtractProfile {
AddEntry($R, $k, $v);
}
if (!$main::opt_drop_negative) {
- # take care of when substracted profile has more things
+ # Take care of when subtracted profile has more entries
foreach my $k (keys(%{$B})) {
if (!exists($A->{$k})) {
AddEntry($R, $k, 0 - $B->{$k});
@@ -1310,16 +1439,90 @@ sub AddEntry {
$profile->{$k} += $n;
}
+# Add a stack of entries to specified profile, and add them to the $pcs
+# list.
+sub AddEntries {
+ my $profile = shift;
+ my $pcs = shift;
+ my $stack = shift;
+ my $count = shift;
+ my @k = ();
+
+ foreach my $e (split(/\s+/, $stack)) {
+ my $pc = HexExtend($e);
+ $pcs->{$pc} = 1;
+ push @k, $pc;
+ }
+ AddEntry($profile, (join "\n", @k), $count);
+}
+
##### Code to profile a server dynamically #####
sub FetchDynamicProfile {
my $binary_name = shift;
my $profile_name = shift;
+ my $fetch_name_only = shift;
+ my $encourage_patience = shift;
# TODO: Add support for fetching profiles dynamically from a server
return $profile_name;
}
+# Collect profiles in parallel
+sub FetchDynamicProfiles {
+ my $items = scalar(@main::pfile_args);
+ my $levels = log($items) / log(2);
+
+ if ($items == 1) {
+ $main::profile_files[0] = FetchDynamicProfile($main::prog, $main::pfile_args[0], 0, 1);
+ } else {
+ # math rounding issues
+ if ((2 ** $levels) < $items) {
+ $levels++;
+ }
+ my $count = scalar(@main::pfile_args);
+ for (my $i = 0; $i < $count; $i++) {
+ $main::profile_files[$i] = FetchDynamicProfile($main::prog, $main::pfile_args[$i], 1, 0);
+ }
+ print STDERR "Fetching $count profiles, Be patient...\n";
+ FetchDynamicProfilesRecurse($levels, 0, 0);
+ $main::collected_profile = join(" \\\n ", @main::profile_files);
+ }
+}
+
+# Recursively fork a process to get enough processes
+# collecting profiles
+sub FetchDynamicProfilesRecurse {
+ my $maxlevel = shift;
+ my $level = shift;
+ my $position = shift;
+
+ if (my $pid = fork()) {
+ $position = 0 | ($position << 1);
+ TryCollectProfile($maxlevel, $level, $position);
+ wait;
+ } else {
+ $position = 1 | ($position << 1);
+ TryCollectProfile($maxlevel, $level, $position);
+ exit(0);
+ }
+}
+
+# Collect a single profile
+sub TryCollectProfile {
+ my $maxlevel = shift;
+ my $level = shift;
+ my $position = shift;
+
+ if ($level >= ($maxlevel - 1)) {
+ if ($position < scalar(@main::pfile_args)) {
+ FetchDynamicProfile($main::prog, $main::pfile_args[$position], 0, 0);
+ }
+ } else {
+ FetchDynamicProfilesRecurse($maxlevel, $level+1, $position);
+ }
+}
+
##### Parsing code #####
# Parse profile generated by common/profiler.cc and return a reference
@@ -1328,7 +1531,7 @@ sub FetchDynamicProfile {
# $result->{period} Sampling period (in microseconds)
# $result->{profile} Profile object
# $result->{map} Memory map info from profile
-# $result->{pcs} List of all PC values seen
+# $result->{pcs} Hash of all PC values seen, key is hex address
sub ReadProfile {
my $prog = shift;
my $fname = shift;
@@ -1371,6 +1574,12 @@ sub ReadCPUProfile {
my $nbytes = read(PROFILE, $str, (stat PROFILE)[7]); # read entire file
close(PROFILE);
+ my $version;
+ my $period;
+ my $i;
+ my $profile = {};
+ my $pcs = {};
+
# Parse string into array of slots.
# L! is needed for 64-bit # platforms, but not supported on 5.005
# (despite the manpage claims)
@@ -1384,37 +1593,95 @@ sub ReadCPUProfile {
my @slots = unpack($format, $str);
- # Read header
- if ($#slots < 1 || $slots[0] != 0 || $slots[1] < 3) {
+ # Read header. The current header version is a 5-element structure
+ # containing:
+ # 0: header count (always 0)
+ # 1: header "words" (after this one: 3)
+ # 2: format version (0)
+ # 3: sampling period (usec)
+ # 4: unused padding (always 0)
+ # The header words are 32-bit or 64-bit depending on the ABI of the program
+ # that generated the profile. In the 64-bit case, since our x86-architecture
+ # machines are little-endian, the actual value of each of these elements is
+ # in the first 32-bit word, and the second is always zero. The @slots array
+ # above was read as a sequence of 32-bit words in both cases, so we need to
+ # explicitly check for both cases. A typical slot sequence for each is:
+ # 32-bit: 0 3 0 100 0
+ # 64-bit: 0 0 3 0 0 0 100 0 0 0
+ #
+ if ($#slots < 4 || $slots[0] != 0 ) {
error("$fname: not a profile file, or old format profile file\n");
}
- my $version = $slots[2];
- my $period = $slots[3];
- my $i = 2 + $slots[1];
+ if ($slots[1] >= 3) {
+ # Normal 32-bit header:
+ $version = $slots[2];
+ $period = $slots[3];
+ $i = 2 + $slots[1];
+ $address_length = 8;
+
+ # Parse profile
+ while ($i <= $#slots) {
+ my $n = $slots[$i++];
+ my $d = $slots[$i++];
+ if ($slots[$i] == 0) {
+ # End of profile data marker
+ $i += $d;
+ last;
+ }
- # Parse profile
- my $profile = {};
- my $pcs = {};
- while ($i <= $#slots) {
- my $n = $slots[$i++];
- my $d = $slots[$i++];
- if ($slots[$i] == 0) {
- # End of profile data marker
+ # Make key out of the stack entries
+ my @k = ();
+ for (my $j = 0; $j < $d; $j++) {
+ my $pc = sprintf("%08x", $slots[$i+$j]);
+ $pcs->{$pc} = 1;
+ push @k, $pc;
+ }
+
+ AddEntry($profile, (join "\n", @k), $n);
$i += $d;
- last;
}
- # Make key out of the stack entries
- my $k = "";
- for (my $j = 0; $j < $d; $j++) {
- my $pc = $slots[$i+$j];
- $pcs->{$pc} = 1;
- $k .= sprintf("\n0x%x", $pc);
- }
- $k =~ s/^\n//;
+ # Normal 64-bit header: All entries are doubled in size. The first
+ # word (little-endian) should contain the real value, the second should
+ # be zero.
+ } elsif ($#slots < 9 || $slots[1] != 0 || $slots[2] < 3 || $slots[3] != 0
+ || $slots[5] != 0 || $slots[7] != 0) {
+ error("$fname: not a profile file, or old format profile file\n");
+ } else {
+ $version = $slots[4];
+ $period = $slots[6];
+ $i = 4 + 2 * $slots[2];
+ $address_length = 16;
+
+ # Parse profile
+ while ($i <= $#slots) {
+ my $n = $slots[$i++];
+ my $nhi = $slots[$i++];
+ # Huge counts may coerce to floating point, keeping scale, not precision
+ if ($nhi != 0) { $n += $nhi*(2**32); }
+ my $d = $slots[$i++];
+ if ($slots[$i++] != 0) {
+ my $addr = sprintf("%o", 4 * $i);
+ print STDERR "At index $i ($addr):\n";
+ error("$fname: stack trace depth >= 2**32\n");
+ }
+ if ($slots[$i] == 0 && $slots[$i+1] == 0) {
+ # End of profile data marker
+ $i += 2 * $d;
+ last;
+ }
- AddEntry($profile, $k, $n);
- $i += $d;
+ # Make key out of the stack entries
+ my @k = ();
+ for (my $j = $d; $j--; ) {
+ my $pclo = $slots[$i++];
+ my $pchi = $slots[$i++];
+ my $pc = sprintf("%08x%08x", $pchi, $pclo);
+ $pcs->{$pc} = 1;
+ push @k, $pc;
+ }
+ AddEntry($profile, (join "\n", @k), $n);
+ }
}
# Parse map
@@ -1458,6 +1725,7 @@ sub ReadHeapProfile {
my $profile = {};
my $pcs = {};
my $map = "";
+
while (<PROFILE>) {
if (/^MAPPED_LIBRARIES:/) {
# Read the /proc/self/maps data
@@ -1493,15 +1761,7 @@ sub ReadHeapProfile {
my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
my @counts = ($n1, $s1, $n2, $s2);
- my $n = $counts[$index];
- my $k = "";
- foreach my $e (split(/\s+/, $stack)) {
- my $pc = hex($e);
- $pcs->{$pc} = 1;
- $k .= sprintf("\n0x%x", $pc);
- }
- $k =~ s/^\n//;
- AddEntry($profile, $k, $n);
+ AddEntries($profile, $pcs, $stack, $counts[$index]);
}
}
@@ -1516,28 +1776,33 @@ sub ReadHeapProfile {
sub ReadSynchProfile {
my ($prog, $fname, $header) = @_;
- my ($line, $map, $pc, @k, $count, $stack);
- $map = '';
+ my $map = '';
my $profile = {};
my $pcs = {};
my $sampling_period = 1;
+ my $cyclespernanosec = 2.8; # Default assumption for old binaries
+ my $seen_clockrate = 0;
+ my $line;
while ( $line = <PROFILE> ) {
if ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ ||
$line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) {
- # Sample entry
- ($count, $stack) = ($1, $2);
- $count = 1 if $count !~ /^\d+$/;
-
- @k = ();
- foreach $pc (split /\s+/, $stack) {
- $pcs->{hex($pc)} = 1;
- push @k, $pc;
+ my ($count, $stack) = ($1, $2);
+ if ($count !~ /^\d+$/) {
+ next;
}
- AddEntry($profile, (join "\n", @k), $count);
+
+ # Convert cycles to nanoseconds
+ $count /= $cyclespernanosec;
+ AddEntries($profile, $pcs, $stack, $count);
+
+ } elsif ( $line =~ m|cycles/second = (\d+)|) {
+ $cyclespernanosec = $1 / 1e9;
+ $seen_clockrate = 1;
} elsif ( $line =~ /sampling period = (\d+)/ ) {
$sampling_period = $1;
+
} else {
# Memory map entry
$map .= $line;
@@ -1545,15 +1810,30 @@ sub ReadSynchProfile {
}
close PROFILE;
+ if (!$seen_clockrate) {
+ printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n",
+ $cyclespernanosec);
+ }
+
my $r = {};
- $r->{'version'} = 0;
- $r->{'period'} = $sampling_period;
- $r->{'profile'} = $profile;
- $r->{'libs'} = ParseLibraries($prog, $map, $pcs);
- $r->{'pcs'} = $pcs;
+ $r->{version} = 0;
+ $r->{period} = $sampling_period;
+ $r->{profile} = $profile;
+ $r->{libs} = ParseLibraries($prog, $map, $pcs);
+ $r->{pcs} = $pcs;
return $r;
}
+# Given a hex value in the form "0x1abcd" return "0001abcd" or
+# "000000000001abcd", depending on the current address length.
+# There's probably a more idiomatic (or faster) way to do this...
+sub HexExtend {
+ my $addr = shift;
+
+ $addr =~ s/^0x//;
+ return substr("000000000000000".$addr, -$address_length);
+}
+
##### Symbol extraction #####
# Split /proc/pid/maps dump into a list of libraries
@@ -1564,7 +1844,14 @@ sub ParseLibraries {
my $result = [];
my $h = "[a-f0-9]+";
+ my $zero_offset = HexExtend("0");
+
+ my $buildvar = "";
foreach my $l (split("\n", $map)) {
+ if ($l =~ m/^\s*build=(.*)$/) {
+ $buildvar = $1;
+ }
+
my $start;
my $finish;
my $offset;
@@ -1572,32 +1859,40 @@ sub ParseLibraries {
if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.so(\.\d+)*\w*)/) {
# Full line from /proc/self/maps. Example:
# 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so
- $start = hex($1);
- $finish = hex($2);
- $offset = hex($3);
+ $start = HexExtend($1);
+ $finish = HexExtend($2);
+ $offset = HexExtend($3);
$lib = $4;
} elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) {
# Cooked line from DumpAddressMap. Example:
# 40000000-40015000: /lib/ld-2.3.2.so
- $start = hex($1);
- $finish = hex($2);
- $offset = 0;
+ $start = HexExtend($1);
+ $finish = HexExtend($2);
+ $offset = $zero_offset;
$lib = $3;
} else {
next;
}
+ # Expand "$build" variable if available
+ $lib =~ s/\$build\b/$buildvar/g;
+
# Get objdump output from the library file to figure out how to
# map between mapped addresses and addresses in the library.
- open(OBJDUMP, "$OBJDUMP -h $lib |") || error("$OBJDUMP $lib: $!\n");
+ my $objdump = $obj_tool_map{"objdump"};
+ open(OBJDUMP, "$objdump -h $lib |")
+ || error("$objdump $lib: $!\n");
while (<OBJDUMP>) {
# Idx Name Size VMA LMA File off Algn
# 10 .text 00104b2c 420156f0 420156f0 000156f0 2**4
+ # For 64-bit objects, VMA and LMA will be 16 hex digits, size and file
+ # offset may still be 8. But AddressSub below will still handle that.
my @x = split;
if (($#x >= 6) && ($x[1] eq '.text')) {
- my $vma = hex($x[3]);
- my $file_offset = hex($x[5]);
- $offset += $vma - $file_offset;
+ my $vma = $x[3];
+ my $file_offset = $x[5];
+ my $vma_offset = AddressSub($vma, $file_offset);
+ $offset = AddressAdd($offset, $vma_offset);
last;
}
}
@@ -1607,15 +1902,173 @@ sub ParseLibraries {
}
# Append special entry for the main program
- my $max_pc = 0;
+ my $max_pc = "0";
foreach my $pc (keys(%{$pcs})) {
- if ($pc > $max_pc) { $max_pc = $pc; }
+ if ($pc gt $max_pc) { $max_pc = $pc; }
}
- push(@{$result}, [$prog, 0, $max_pc, 0]);
+ push(@{$result}, [$prog, $zero_offset, $max_pc, $zero_offset]);
return $result;
}
+# Add two hex addresses of length $address_length.
+# Run pprof --test for unit test if this is changed.
+sub AddressAdd {
+ my $addr1 = shift;
+ my $addr2 = shift;
+ my $sum;
+
+ if ($address_length == 8) {
+ # Perl doesn't cope with wraparound arithmetic, so do it explicitly:
+ $sum = (hex($addr1)+hex($addr2)) % (0x10000000 * 16);
+ return sprintf("%08x", $sum);
+
+ } else {
+ # Do the addition in 7-nibble chunks to trivialize carry handling.
+
+ if ($main::opt_debug and $main::opt_test) {
+ print STDERR "AddressAdd $addr1 + $addr2 = ";
+ }
+
+ my $a1 = substr($addr1,-7);
+ $addr1 = substr($addr1,0,-7);
+ my $a2 = substr($addr2,-7);
+ $addr2 = substr($addr2,0,-7);
+ $sum = hex($a1) + hex($a2);
+ my $c = 0;
+ if ($sum > 0xfffffff) {
+ $c = 1;
+ $sum -= 0x10000000;
+ }
+ my $r = sprintf("%07x", $sum);
+
+ $a1 = substr($addr1,-7);
+ $addr1 = substr($addr1,0,-7);
+ $a2 = substr($addr2,-7);
+ $addr2 = substr($addr2,0,-7);
+ $sum = hex($a1) + hex($a2) + $c;
+ $c = 0;
+ if ($sum > 0xfffffff) {
+ $c = 1;
+ $sum -= 0x10000000;
+ }
+ $r = sprintf("%07x", $sum) . $r;
+
+ $sum = hex($addr1) + hex($addr2) + $c;
+ if ($sum > 0xff) { $sum -= 0x100; }
+ $r = sprintf("%02x", $sum) . $r;
+
+ if ($main::opt_debug and $main::opt_test) { print STDERR "$r\n"; }
+
+ return $r;
+ }
+}
+
+
+# Subtract two hex addresses of length $address_length.
+# Run pprof --test for unit test if this is changed.
+sub AddressSub {
+ my $addr1 = shift;
+ my $addr2 = shift;
+ my $diff;
+
+ if ($address_length == 8) {
+ # Perl doesn't cope with wraparound arithmetic, so do it explicitly:
+ $diff = (hex($addr1)-hex($addr2)) % (0x10000000 * 16);
+ return sprintf("%08x", $diff);
+
+ } else {
+ # Do the addition in 7-nibble chunks to trivialize borrow handling.
+ # if ($main::opt_debug) { print STDERR "AddressSub $addr1 - $addr2 = "; }
+
+ my $a1 = hex(substr($addr1,-7));
+ $addr1 = substr($addr1,0,-7);
+ my $a2 = hex(substr($addr2,-7));
+ $addr2 = substr($addr2,0,-7);
+ my $b = 0;
+ if ($a2 > $a1) {
+ $b = 1;
+ $a1 += 0x10000000;
+ }
+ $diff = $a1 - $a2;
+ my $r = sprintf("%07x", $diff);
+
+ $a1 = hex(substr($addr1,-7));
+ $addr1 = substr($addr1,0,-7);
+ $a2 = hex(substr($addr2,-7)) + $b;
+ $addr2 = substr($addr2,0,-7);
+ $b = 0;
+ if ($a2 > $a1) {
+ $b = 1;
+ $a1 += 0x10000000;
+ }
+ $diff = $a1 - $a2;
+ $r = sprintf("%07x", $diff) . $r;
+
+ $a1 = hex($addr1);
+ $a2 = hex($addr2) + $b;
+ if ($a2 > $a1) { $a1 += 0x100; }
+ $diff = $a1 - $a2;
+ $r = sprintf("%02x", $diff) . $r;
+
+ # if ($main::opt_debug) { print STDERR "$r\n"; }
+
+ return $r;
+ }
+}
+
+# Increment a hex addresses of length $address_length.
+# Run pprof --test for unit test if this is changed.
+sub AddressInc {
+ my $addr = shift;
+ my $sum;
+
+ if ($address_length == 8) {
+ # Perl doesn't cope with wraparound arithmetic, so do it explicitly:
+ $sum = (hex($addr)+1) % (0x10000000 * 16);
+ return sprintf("%08x", $sum);
+
+ } else {
+ # Do the addition in 7-nibble chunks to trivialize carry handling.
+ # We are always doing this to step through the addresses in a function,
+ # and will almost never overflow the first chunk, so we check for this
+ # case and exit early.
+
+ # if ($main::opt_debug) { print STDERR "AddressInc $addr1 = "; }
+
+ my $a1 = substr($addr,-7);
+ $addr = substr($addr,0,-7);
+ $sum = hex($a1) + 1;
+ my $r = sprintf("%07x", $sum);
+ if ($sum <= 0xfffffff) {
+ $r = $addr . $r;
+ # if ($main::opt_debug) { print STDERR "$r\n"; }
+ return HexExtend($r);
+ } else {
+ $r = "0000000";
+ }
+
+ $a1 = substr($addr,-7);
+ $addr = substr($addr,0,-7);
+ $sum = hex($a1) + 1;
+ $r = sprintf("%07x", $sum) . $r;
+ if ($sum <= 0xfffffff) {
+ $r = $addr . $r;
+ # if ($main::opt_debug) { print STDERR "$r\n"; }
+ return HexExtend($r);
+ } else {
+ $r = "00000000000000";
+ }
+
+ $sum = hex($addr) + 1;
+ if ($sum > 0xff) { $sum -= 0x100; }
+ $r = sprintf("%02x", $sum) . $r;
+
+ # if ($main::opt_debug) { print STDERR "$r\n"; }
+ return $r;
+ }
+}
+
# Extract symbols for all PC values found in profile
sub ExtractSymbols {
my $libs = shift;
@@ -1635,13 +2088,13 @@ sub ExtractSymbols {
# Get list of pcs that belong in this library.
my $contained = [];
foreach my $pc (keys(%{$pcset})) {
- if (!$seen{$pc} && ($pc >= $start) && ($pc <= $finish)) {
+ if (!$seen{$pc} && ($pc ge $start) && ($pc le $finish)) {
$seen{$pc} = 1;
push(@{$contained}, $pc);
}
}
# Map to symbols
- MapToSymbols($libname, $start - $offset, $contained, $symbols);
+ MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols);
}
return $symbols;
@@ -1675,13 +2128,15 @@ sub GetLineNumbers {
# Make file with all PC values
open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n");
for (my $i = 0; $i <= $#{$pclist}; $i++) {
- printf ADDRESSES ("0x%x\n", $pclist->[$i] - $offset);
+ # addr2line always reads hex addresses, and does not need '0x' prefix.
+ printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset));
}
close(ADDRESSES);
# Pass to addr2line
- open(SYMBOLS, "$ADDR2LINE -f -C -e $image <$main::tmpfile_sym |")
- || error("$ADDR2LINE: $!\n");
+ my $addr2line = $obj_tool_map{"addr2line"};
+ open(SYMBOLS, "$addr2line -f -C -e $image <$main::tmpfile_sym |")
+ || error("$addr2line: $!\n");
my $count = 0;
while (<SYMBOLS>) {
chop;
@@ -1694,7 +2149,7 @@ sub GetLineNumbers {
$filelinenum =~ s|^.*/([^/]+:\d+)$|$1|; # Remove directory name
}
- my $pcstr = sprintf("0x%x", $pclist->[$count]);
+ my $pcstr = $pclist->[$count];
if (defined($symbols->{$pcstr})) {
# Override just the line-number portion. The function name portion
# is less buggy when computed using nm instead of addr2line.
@@ -1708,7 +2163,7 @@ sub GetLineNumbers {
close(SYMBOLS);
}
-# Alternate implementation
+# Use nm to map the list of referenced PCs to symbols
sub MapSymbolsWithNM {
my $image = shift;
my $offset = shift;
@@ -1717,14 +2172,15 @@ sub MapSymbolsWithNM {
# Get nm output sorted by increasing address
my $symbol_table = GetProcedureBoundaries($image, ".");
- my @names = sort { $symbol_table->{$a}->[0] <=> $symbol_table->{$b}->[0] }
+ # Start addresses are already the right length (8 or 16 hex digits).
+ my @names = sort { $symbol_table->{$a}->[0] cmp $symbol_table->{$b}->[0] }
keys(%{$symbol_table});
if ($#names < 0) {
- # No symbols: just use address
+ # No symbols: just use addresses
foreach my $pc (@{$pclist}) {
- my $pcstr = sprintf("0x%x", $pc);
- $symbols->{$pcstr} = [$pcstr, "?", $pcstr];
+ my $pcstr = "0x" . $pc;
+ $symbols->{$pc} = [$pcstr, "?", $pcstr];
}
return;
}
@@ -1733,22 +2189,21 @@ sub MapSymbolsWithNM {
my $index = 0;
my $fullname = $names[0];
my $name = ShortFunctionName($fullname);
- foreach my $pc (sort { $a <=> $b } @{$pclist}) {
+ foreach my $pc (sort { $a cmp $b } @{$pclist}) {
# Adjust for mapped offset
- my $mpc = $pc - $offset;
- while (($index < $#names) && ($mpc >= $symbol_table->{$fullname}->[1])){
+ my $mpc = AddressSub($pc, $offset);
+ while (($index < $#names) && ($mpc ge $symbol_table->{$fullname}->[1])){
$index++;
$fullname = $names[$index];
$name = ShortFunctionName($fullname);
}
- my $pcstr = sprintf("0x%x", $pc);
- $symbols->{$pcstr} = [$name, "?", $fullname];
+ $symbols->{$pc} = [$name, "?", $fullname];
}
}
sub ShortFunctionName {
my $function = shift;
- while ($function =~ s/\([^()]*\)//g) { } # Remove argument types
+ while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types
while ($function =~ s/<[^<>]*>//g) { } # Remove template arguments
$function =~ s/^.*\s+(\w+::)/$1/; # Remove leading type
return $function;
@@ -1756,6 +2211,70 @@ sub ShortFunctionName {
##### Miscellaneous #####
+# Find the right versions of the above object tools to use. The argument
+# is the program file being analyzed, and should be an ELF 32-bit or ELF
+# 64-bit executable file. If it is, we select the default tools location
+# based on that; otherwise, we'll emit a warning and take the user's
+# defaults.
+sub ConfigureObjTools {
+ my $prog_file = shift;
+
+ # Figure out the right default pathname prefix based on the program file
+ # type:
+ my $default_prefix = "/usr/bin/";
+ my $file_type = `/usr/bin/file $prog_file`;
+ if ($file_type =~ /ELF 32-bit/) {
+ $default_prefix = "/usr/bin/";
+ } elsif ($file_type =~ /ELF 64-bit/) {
+ # Change $address_length to 16 if the program file is ELF 64-bit.
+ # We can't detect this from many (most?) heap or lock contention
+ # profiles, since the actual addresses referenced are generally in low
+ # memory even for 64-bit programs.
+ $address_length = 16;
+ } else {
+ print STDERR "WARNING: program $prog_file is apparently not an ELF file\n";
+ # Don't change the default prefix.
+ }
+
+ # Go fill in %obj_tool_map with the pathnames to use:
+ foreach my $tool (keys %obj_tool_map) {
+ $obj_tool_map{$tool} = ConfigureTool($tool, $default_prefix);
+ }
+}
+
+sub ConfigureTool {
+ my $tool = shift;
+ my $prefix = shift;
+ my $path;
+
+ if ($main::opt_debug) {
+ print STDERR "Configuring '$tool' with default prefix '$prefix'\n";
+ }
+
+ # Try a specific prefix specified by the user:
+ if ($main::opt_tools ne "") {
+ $path = $main::opt_tools . $tool;
+ if ($main::opt_debug) { print STDERR " (a) Trying '$path'\n"; }
+ if (-x $path) { return $path; }
+ }
+
+ # Try the default prefix passed to us:
+ $path = $prefix . $tool;
+ if ($main::opt_debug) { print STDERR " (b) Trying '$path'\n"; }
+ if (-x $path) { return $path; }
+
+ # Try the normal system default (/usr/bin/):
+ if ($prefix ne "/usr/bin") {
+ $path = "/usr/bin/$tool";
+ if ($main::opt_debug) { print STDERR " (c) Trying '$path'\n"; }
+ if (-x $path) { return $path; }
+ }
+
+ # If all else fails, hope the bare toolname works:
+ if ($main::opt_debug) { print STDERR " Returning '$tool'\n"; }
+ return $tool;
+}
+
sub cleanup {
unlink($main::tmpfile_sym);
for (my $i = 0; $i < $main::next_tmpfile; $i++) {
@@ -1763,12 +2282,15 @@ sub cleanup {
}
# We leave any collected profiles in $HOME/pprof in case the user wants
# to look at them later. We print a message informing them of this.
- if (defined($main::collected_profile)) {
- print STDERR "Dynamically gathered profile is in $main::collected_profile\n";
+ if ((scalar(@main::profile_files) > 0) &&
+ defined($main::collected_profile)) {
+ if (scalar(@main::profile_files) == 1) {
+ print STDERR "Dynamically gathered profile is in $main::collected_profile\n";
+ }
print STDERR "If you want to investigate this profile further, you can do:\n";
print STDERR "\n";
print STDERR " pprof \\\n";
- print STDERR " $prog \\\n";
+ print STDERR " $main::prog \\\n";
print STDERR " $main::collected_profile\n";
print STDERR "\n";
}
@@ -1786,16 +2308,6 @@ sub error {
exit(1);
}
-# Return a list of all routines that match $regexp.
-# For each routine, the following list is returned:
-# $result->[i]->[0] Routine name
-# $result->[i]->[1] Start address
-# $result->[i]->[2] Finish address
-# $result->[i]->[3] Image file name (program or shared library)
-# $result->[i]->[4] Offset for image in address space
-sub GetMatchingRoutines {
-}
-
# Gets the procedure boundaries for all routines in "$image" whose names
# match "$regexp" and returns them in a hashtable mapping from procedure
@@ -1805,15 +2317,16 @@ sub GetProcedureBoundaries {
my $regexp = shift;
my $symbol_table = {};
- open(NM, "$NM -C -n $image |") || error("$NM: $!\n");
- my $last_start = "0x0";
+ my $nm = $obj_tool_map{"nm"};
+ open(NM, "$nm -C -n $image |") || error("$nm: $!\n");
+ my $last_start = "0";
my $routine = "";
while (<NM>) {
if (m/^([0-9a-f]+) . (..*)/) {
my $start_val = $1;
my $this_routine = $2;
if (defined($routine) && $routine =~ m/$regexp/) {
- $symbol_table->{$routine} = [hex($last_start), hex($start_val)];
+ $symbol_table->{$routine} = [$last_start, $start_val];
}
$last_start = $start_val;
$routine = $this_routine;
@@ -1823,3 +2336,218 @@ sub GetProcedureBoundaries {
return $symbol_table;
}
+
+
+# The test vectors for AddressAdd/Sub/Inc are 8-16-nibble hex strings.
+# To make them more readable, we add underscores at interesting places.
+# This routine removes the underscores, producing the canonical representation
+# used by pprof to represent addresses, particularly in the tested routines.
+sub CanonicalHex {
+ my $arg = shift;
+ return join '', (split '_',$arg);
+}
+
+
+# Unit test for AddressAdd:
+sub AddressAddUnitTest {
+ my $test_data_8 = shift;
+ my $test_data_16 = shift;
+ my $error_count = 0;
+ my $fail_count = 0;
+ my $pass_count = 0;
+ # print STDERR "AddressAddUnitTest: ", 1+$#{$test_data_8}, " tests\n";
+
+ # First a few 8-nibble addresses. Note that this implementation uses
+ # plain old arithmetic, so a quick sanity check along with verifying what
+ # happens to overflow (we want it to wrap):
+ $address_length = 8;
+ foreach my $row (@{$test_data_8}) {
+ if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
+ my $sum = AddressAdd ($row->[0], $row->[1]);
+ if ($sum ne $row->[2]) {
+ printf STDERR "ERROR: %s != %s + %s = %s\n", $sum,
+ $row->[0], $row->[1], $row->[2];
+ ++$fail_count;
+ } else {
+ ++$pass_count;
+ }
+ }
+ printf STDERR "AddressAdd 32-bit tests: %d passes, %d failures\n",
+ $pass_count, $fail_count;
+ $error_count = $fail_count;
+ $fail_count = 0;
+ $pass_count = 0;
+
+ # Now 16-nibble addresses.
+ $address_length = 16;
+ foreach my $row (@{$test_data_16}) {
+ if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
+ my $sum = AddressAdd (CanonicalHex($row->[0]), CanonicalHex($row->[1]));
+ my $expected = join '', (split '_',$row->[2]);
+ if ($sum ne CanonicalHex($row->[2])) {
+ printf STDERR "ERROR: %s != %s + %s = %s\n", $sum,
+ $row->[0], $row->[1], $row->[2];
+ ++$fail_count;
+ } else {
+ ++$pass_count;
+ }
+ }
+ printf STDERR "AddressAdd 64-bit tests: %d passes, %d failures\n",
+ $pass_count, $fail_count;
+ $error_count += $fail_count;
+
+ return $error_count;
+}
+
+
+# Unit test for AddressSub:
+sub AddressSubUnitTest {
+ my $test_data_8 = shift;
+ my $test_data_16 = shift;
+ my $error_count = 0;
+ my $fail_count = 0;
+ my $pass_count = 0;
+ # print STDERR "AddressSubUnitTest: ", 1+$#{$test_data_8}, " tests\n";
+
+ # First a few 8-nibble addresses. Note that this implementation uses
+ # plain old arithmetic, so a quick sanity check along with verifying what
+ # happens to overflow (we want it to wrap):
+ $address_length = 8;
+ foreach my $row (@{$test_data_8}) {
+ if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
+ my $sum = AddressSub ($row->[0], $row->[1]);
+ if ($sum ne $row->[3]) {
+ printf STDERR "ERROR: %s != %s - %s = %s\n", $sum,
+ $row->[0], $row->[1], $row->[3];
+ ++$fail_count;
+ } else {
+ ++$pass_count;
+ }
+ }
+ printf STDERR "AddressSub 32-bit tests: %d passes, %d failures\n",
+ $pass_count, $fail_count;
+ $error_count = $fail_count;
+ $fail_count = 0;
+ $pass_count = 0;
+
+ # Now 16-nibble addresses.
+ $address_length = 16;
+ foreach my $row (@{$test_data_16}) {
+ if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
+ my $sum = AddressSub (CanonicalHex($row->[0]), CanonicalHex($row->[1]));
+ if ($sum ne CanonicalHex($row->[3])) {
+ printf STDERR "ERROR: %s != %s - %s = %s\n", $sum,
+ $row->[0], $row->[1], $row->[3];
+ ++$fail_count;
+ } else {
+ ++$pass_count;
+ }
+ }
+ printf STDERR "AddressSub 64-bit tests: %d passes, %d failures\n",
+ $pass_count, $fail_count;
+ $error_count += $fail_count;
+
+ return $error_count;
+}
+
+
+# Unit test for AddressInc:
+sub AddressIncUnitTest {
+ my $test_data_8 = shift;
+ my $test_data_16 = shift;
+ my $error_count = 0;
+ my $fail_count = 0;
+ my $pass_count = 0;
+ # print STDERR "AddressIncUnitTest: ", 1+$#{$test_data_8}, " tests\n";
+
+ # First a few 8-nibble addresses. Note that this implementation uses
+ # plain old arithmetic, so a quick sanity check along with verifying what
+ # happens to overflow (we want it to wrap):
+ $address_length = 8;
+ foreach my $row (@{$test_data_8}) {
+ if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
+ my $sum = AddressInc ($row->[0]);
+ if ($sum ne $row->[4]) {
+ printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum,
+ $row->[0], $row->[4];
+ ++$fail_count;
+ } else {
+ ++$pass_count;
+ }
+ }
+ printf STDERR "AddressInc 32-bit tests: %d passes, %d failures\n",
+ $pass_count, $fail_count;
+ $error_count = $fail_count;
+ $fail_count = 0;
+ $pass_count = 0;
+
+ # Now 16-nibble addresses.
+ $address_length = 16;
+ foreach my $row (@{$test_data_16}) {
+ if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
+ my $sum = AddressInc (CanonicalHex($row->[0]));
+ if ($sum ne CanonicalHex($row->[4])) {
+ printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum,
+ $row->[0], $row->[4];
+ ++$fail_count;
+ } else {
+ ++$pass_count;
+ }
+ }
+ printf STDERR "AddressInc 64-bit tests: %d passes, %d failures\n",
+ $pass_count, $fail_count;
+ $error_count += $fail_count;
+
+ return $error_count;
+}
+
+
+# Driver for unit tests.
+# Currently just the address add/subtract/increment routines for 64-bit.
+sub RunUnitTests {
+ my $error_count = 0;
+
+ # This is a list of tuples [a, b, a+b, a-b, a+1]
+ my $unit_test_data_8 = [
+ [qw(aaaaaaaa 50505050 fafafafa 5a5a5a5a aaaaaaab)],
+ [qw(50505050 aaaaaaaa fafafafa a5a5a5a6 50505051)],
+ [qw(ffffffff aaaaaaaa aaaaaaa9 55555555 00000000)],
+ [qw(00000001 ffffffff 00000000 00000002 00000002)],
+ [qw(00000001 fffffff0 fffffff1 00000011 00000002)],
+ ];
+ my $unit_test_data_16 = [
+ # The implementation handles data in 7-nibble chunks, so those are the
+ # interesting boundaries.
+ [qw(aaaaaaaa 50505050
+ 00_000000f_afafafa 00_0000005_a5a5a5a 00_000000a_aaaaaab)],
+ [qw(50505050 aaaaaaaa
+ 00_000000f_afafafa ff_ffffffa_5a5a5a6 00_0000005_0505051)],
+ [qw(ffffffff aaaaaaaa
+ 00_000001a_aaaaaa9 00_0000005_5555555 00_0000010_0000000)],
+ [qw(00000001 ffffffff
+ 00_0000010_0000000 ff_ffffff0_0000002 00_0000000_0000002)],
+ [qw(00000001 fffffff0
+ 00_000000f_ffffff1 ff_ffffff0_0000011 00_0000000_0000002)],
+
+ [qw(00_a00000a_aaaaaaa 50505050
+ 00_a00000f_afafafa 00_a000005_a5a5a5a 00_a00000a_aaaaaab)],
+ [qw(0f_fff0005_0505050 aaaaaaaa
+ 0f_fff000f_afafafa 0f_ffefffa_5a5a5a6 0f_fff0005_0505051)],
+ [qw(00_000000f_fffffff 01_800000a_aaaaaaa
+ 01_800001a_aaaaaa9 fe_8000005_5555555 00_0000010_0000000)],
+ [qw(00_0000000_0000001 ff_fffffff_fffffff
+ 00_0000000_0000000 00_0000000_0000002 00_0000000_0000002)],
+ [qw(00_0000000_0000001 ff_fffffff_ffffff0
+ ff_fffffff_ffffff1 00_0000000_0000011 00_0000000_0000002)],
+ ];
+
+ $error_count += AddressAddUnitTest($unit_test_data_8, $unit_test_data_16);
+ $error_count += AddressSubUnitTest($unit_test_data_8, $unit_test_data_16);
+ $error_count += AddressIncUnitTest($unit_test_data_8, $unit_test_data_16);
+ if ($error_count > 0) {
+ print STDERR $error_count, " errors: FAILED\n";
+ } else {
+ print STDERR "PASS\n";
+ }
+ exit ($error_count);
+}
diff --git a/src/profiler.cc b/src/profiler.cc
index 7197d42..3633f6f 100644
--- a/src/profiler.cc
+++ b/src/profiler.cc
@@ -1,10 +1,10 @@
// Copyright (c) 2005, Google Inc.
// All rights reserved.
-//
+//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
-//
+//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
@@ -14,7 +14,7 @@
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
-//
+//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -96,7 +96,7 @@ DEFINE_string(cpu_profile, "",
typedef struct siginfo SigStructure;
inline void* GetPC(const SigStructure& sig_structure ) {
return (void*)sig_structure.si_faddr; // maybe not correct
-}
+}
#elif defined HAVE_STRUCT_SIGCONTEXT_SC_EIP
typedef struct sigcontext SigStructure;
@@ -126,6 +126,13 @@ inline void* GetPC(const SigStructure& sig_structure ) {
typedef struct ucontext SigStructure;
inline void* GetPC(const SigStructure& sig_structure ) {
return (void*)sig_structure.uc_mcontext.gregs[REG_RIP];
+}
+
+#elif defined HAVE_STRUCT_SIGCONTEXT_REGS__NIP
+typedef struct sigcontext SigStructure;
+inline void* GetPC(const SigStructure& sig_structure ) {
+ return (void*)sig_structure.regs->nip;
+}
#else
#error I dont know what your PC is
@@ -141,7 +148,7 @@ class ProfileData {
// Is profiling turned on at all
inline bool enabled() { return out_ >= 0; }
-
+
// What is the frequency of interrupts (ticks per second)
inline int frequency() { return frequency_; }
@@ -156,7 +163,7 @@ class ProfileData {
void Stop();
void GetCurrentState(ProfilerState* state);
-
+
private:
static const int kMaxStackDepth = 64; // Max stack depth profiled
static const int kMaxFrequency = 4000; // Largest allowed frequency
@@ -189,7 +196,7 @@ class ProfileData {
pthread_mutex_t table_lock_; // Cannot use "Mutex" in signal handlers
#endif
Bucket* hash_; // hash table
-
+
Slot* evict_; // evicted entries
int num_evicted_; // how many evicted entries?
int out_; // fd for output file
@@ -297,8 +304,8 @@ ProfileData::ProfileData() :
if (!Start(fname)) {
fprintf(stderr, "Can't turn on cpu profiling: ");
- perror(fname);
- exit(1);
+ perror(fname);
+ exit(1);
}
}
@@ -322,8 +329,8 @@ bool ProfileData::Start(const char* fname) {
fname_ = strdup(fname);
LOCK(&table_lock_);
-
- // Reset counters
+
+ // Reset counters
num_evicted_ = 0;
count_ = 0;
evictions_ = 0;
@@ -484,8 +491,8 @@ void ProfileData::FlushTable() {
void ProfileData::Add(unsigned long pc) {
void* stack[kMaxStackDepth];
stack[0] = (void*)pc;
- int depth = GetStackTrace(stack+1, kMaxStackDepth-1,
- 3/*Removes sighandlers*/);
+ int depth = GetStackTrace(stack+1, kMaxStackDepth-1,
+ 4/*Removes Add,prof_handler,sighandlers*/);
depth++; // To account for pc value
// Make hash-value
@@ -519,7 +526,7 @@ void ProfileData::Add(unsigned long pc) {
}
}
}
-
+
if (!done) {
// Evict entry with smallest count
Entry* e = &bucket->entry[0];
@@ -532,7 +539,7 @@ void ProfileData::Add(unsigned long pc) {
evictions_++;
Evict(*e);
}
-
+
// Use the newly evicted entry
e->depth = depth;
e->count = 1;
@@ -594,7 +601,7 @@ void ProfilerFlush() {
pdata.FlushTable();
}
-bool ProfilingIsEnabledForAllThreads() {
+bool ProfilingIsEnabledForAllThreads() {
return pdata.enabled();
}
diff --git a/src/stacktrace.cc b/src/stacktrace.cc
index 708d6ce..859d52a 100644
--- a/src/stacktrace.cc
+++ b/src/stacktrace.cc
@@ -40,145 +40,31 @@
// Linux/x86 implementation (requires the binary to be compiled with
// frame pointers)
-#if (defined(__i386__) || defined(__x86_64__)) && \
- defined(__linux) && !defined(NO_FRAME_POINTER)
+#if defined(__i386__) && defined(__linux) && !defined(NO_FRAME_POINTER)
#define IMPLEMENTED_STACK_TRACE
-
-#include <stdint.h> // for uintptr_t
-
-// Given a pointer to a stack frame, locate and return the calling
-// stackframe, or return NULL if no stackframe can be found. Perform
-// sanity checks to reduce the chance that a bad pointer is returned.
-static void **NextStackFrame(void **old_sp) {
- void **new_sp = (void **) *old_sp;
-
- // Check that the transition from frame pointer old_sp to frame
- // pointer new_sp isn't clearly bogus
- if (new_sp <= old_sp) return NULL;
- if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL;
-#ifdef __i386__
- // On 64-bit machines, the stack pointer can be very close to
- // 0xffffffff, so we explicitly check for a pointer into the
- // last two pages in the address space
- if ((uintptr_t)new_sp >= 0xffffe000) return NULL;
-#endif
- if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL;
- return new_sp;
-}
-
-// Note: the code for GetStackExtent below is pretty similar to this one;
-// change both if chaning one.
-int GetStackTrace(void** result, int max_depth, int skip_count) {
- void **sp;
-#ifdef __i386__
- // Stack frame format:
- // sp[0] pointer to previous frame
- // sp[1] caller address
- // sp[2] first argument
- // ...
- sp = (void **)&result - 2;
-#endif
-
-#ifdef __x86_64__
- // Arguments are passed in registers on x86-64, so we can't just
- // offset from &result
- sp = (void **) __builtin_frame_address(0);
-#endif
-
- int n = 0;
- skip_count++; // Do not include the "GetStackTrace" frame
- while (sp && n < max_depth) {
- if (*(sp+1) == (void *)0) {
- // In 64-bit code, we often see a frame that
- // points to itself and has a return address of 0.
- break;
- }
- if (skip_count > 0) {
- skip_count--;
- } else {
- result[n++] = *(sp+1);
- }
- void** new_sp = NextStackFrame(sp);
- if (!new_sp) break;
- sp = new_sp;
- }
- return n;
-}
-
-// Note: the code is pretty similar to GetStackTrace above;
-// change both if chaning one.
-bool GetStackExtent(void* sp, void** stack_top, void** stack_bottom) {
- void** cur_sp;
-
- if (sp != NULL) {
- cur_sp = (void**)sp;
- *stack_top = sp;
- } else {
-#ifdef __i386__
- // Stack frame format:
- // sp[0] pointer to previous frame
- // sp[1] caller address
- // sp[2] first argument
- // ...
- cur_sp = (void**)&sp - 2;
+#include "stacktrace_x86-inl.h"
#endif
-#ifdef __x86_64__
- // Arguments are passed in registers on x86-64, so we can't just
- // offset from &result
- cur_sp = (void**)__builtin_frame_address(0);
+#if !defined(IMPLEMENTED_STACK_TRACE) && defined(USE_LIBUNWIND) && HAVE_LIBUNWIND_H
+#define IMPLEMENTED_STACK_TRACE
+// This is turned off by default. Possible reasons for turning on in the
+// future:
+// 1. Compiler independence
+// 2. Architecture independence
+// 3. A more liberal MIT license, which allows use with multiple compilers
+#include "stacktrace_libunwind-inl.h"
#endif
- *stack_top = NULL;
- }
-
- while (cur_sp) {
- void** new_sp = NextStackFrame(cur_sp);
- if (!new_sp) {
- *stack_bottom = (void*)cur_sp;
- return true;
- }
- cur_sp = new_sp;
- if (*stack_top == NULL) *stack_top = (void*)cur_sp;
- // get out of the stack frame for this call
- }
- return false;
-}
+#if !defined(IMPLEMENTED_STACK_TRACE) && defined(__x86_64__) && HAVE_UNWIND_H
+#define IMPLEMENTED_STACK_TRACE
+#include "stacktrace_x86_64-inl.h"
#endif
-// Portable implementation - just use glibc
-//
-// Note: The glibc implementation may cause a call to malloc.
-// This can cause a deadlock in HeapProfiler.
-#if !defined(IMPLEMENTED_STACK_TRACE) && defined(HAVE_EXECINFO_H)
-#include <stdlib.h>
-#include <execinfo.h>
-
-int GetStackTrace(void** result, int max_depth, int skip_count) {
- static const int kStackLength = 64;
- void * stack[kStackLength];
- int size;
-
- size = backtrace(stack, kStackLength);
- skip_count++; // we want to skip the current frame as well
- int result_count = size - skip_count;
- if ( result_count < 0 )
- result_count = 0;
- else if ( result_count > max_depth )
- result_count = max_depth;
-
- for (int i = 0; i < result_count; i++)
- result[i] = stack[i + skip_count];
-
- return result_count;
-}
-
-bool GetStackExtent(void* sp, void** stack_bottom, void** stack_top) {
- return false; // can't climb up
-}
-
+#if !defined(IMPLEMENTED_STACK_TRACE) && !defined(__x86_64__) && HAVE_EXECINFO_H
+#define IMPLEMENTED_STACK_TRACE
+#include "stacktrace_generic-inl.h"
#endif
-#if !defined(IMPLEMENTED_STACK_TRACE) && !defined(HAVE_EXECINFO_H)
+#ifndef IMPLEMENTED_STACK_TRACE
#error Cannot calculate stack trace: will need to write for your environment
#endif
diff --git a/src/stacktrace_generic-inl.h b/src/stacktrace_generic-inl.h
new file mode 100644
index 0000000..28a539a
--- /dev/null
+++ b/src/stacktrace_generic-inl.h
@@ -0,0 +1,61 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+//
+// Portable implementation - just use glibc
+//
+// Note: The glibc implementation may cause a call to malloc.
+// This can cause a deadlock in HeapProfiler.
+#include <execinfo.h>
+#include <stdlib.h>
+#include "google/stacktrace.h"
+
+int GetStackTrace(void** result, int max_depth, int skip_count) {
+ static const int kStackLength = 64;
+ void * stack[kStackLength];
+ int size;
+
+ size = backtrace(stack, kStackLength);
+ skip_count++; // we want to skip the current frame as well
+ int result_count = size - skip_count;
+ if (result_count < 0)
+ result_count = 0;
+ if (result_count > max_depth)
+ result_count = max_depth;
+ for (int i = 0; i < result_count; i++)
+ result[i] = stack[i + skip_count];
+
+ return result_count;
+}
+
+bool GetStackExtent(void* sp, void** stack_top, void** stack_bottom) {
+ return false; // can't climb up
+}
diff --git a/src/stacktrace_libunwind-inl.h b/src/stacktrace_libunwind-inl.h
new file mode 100644
index 0000000..42c28d3
--- /dev/null
+++ b/src/stacktrace_libunwind-inl.h
@@ -0,0 +1,69 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Arun Sharma
+//
+// Produce stack trace using libunwind
+
+extern "C" {
+#include <assert.h>
+#include <libunwind.h>
+}
+#include "google/stacktrace.h"
+
+int GetStackTrace(void** result, int max_depth, int skip_count) {
+ void *ip;
+ int ret, n = 0;
+ unw_cursor_t cursor;
+ unw_context_t uc;
+
+ unw_getcontext(&uc);
+ ret = unw_init_local(&cursor, &uc);
+ assert(ret >= 0);
+ skip_count++; // Do not include the "GetStackTrace" frame
+
+ do {
+ ret = unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip);
+ assert(ret == 0);
+ if (skip_count > 0) {
+ skip_count--;
+ } else {
+ result[n++] = ip;
+ }
+ ret = unw_step(&cursor);
+ assert(ret >= 0);
+ } while ((n < max_depth) && (ret > 0));
+
+ return n;
+}
+
+bool GetStackExtent(void* sp, void** stack_top, void** stack_bottom) {
+ return false; // Not implemented yet
+}
diff --git a/src/stacktrace_x86-inl.h b/src/stacktrace_x86-inl.h
new file mode 100644
index 0000000..cdab19b
--- /dev/null
+++ b/src/stacktrace_x86-inl.h
@@ -0,0 +1,119 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+//
+// Produce stack trace
+
+#include <stdint.h> // for uintptr_t
+#include <stdlib.h> // for NULL
+#include "google/stacktrace.h"
+
+// Given a pointer to a stack frame, locate and return the calling
+// stackframe, or return NULL if no stackframe can be found. Perform
+// sanity checks to reduce the chance that a bad pointer is returned.
+static void **NextStackFrame(void **old_sp) {
+ void **new_sp = (void **) *old_sp;
+
+ // Check that the transition from frame pointer old_sp to frame
+ // pointer new_sp isn't clearly bogus
+ if (new_sp <= old_sp) return NULL;
+ if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL;
+#ifdef __i386__
+ // On 64-bit machines, the stack pointer can be very close to
+ // 0xffffffff, so we explicitly check for a pointer into the
+ // last two pages in the address space
+ if ((uintptr_t)new_sp >= 0xffffe000) return NULL;
+#endif
+ if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL;
+ return new_sp;
+}
+
+// Note: the code for GetStackExtent below is pretty similar to this one;
+// change both if chaning one.
+int GetStackTrace(void** result, int max_depth, int skip_count) {
+ void **sp;
+ // Stack frame format:
+ // sp[0] pointer to previous frame
+ // sp[1] caller address
+ // sp[2] first argument
+ // ...
+ sp = (void **)&result - 2;
+
+ int n = 0;
+ while (sp && n < max_depth) {
+ if (*(sp+1) == (void *)0) {
+ // In 64-bit code, we often see a frame that
+ // points to itself and has a return address of 0.
+ break;
+ }
+ if (skip_count > 0) {
+ skip_count--;
+ } else {
+ result[n++] = *(sp+1);
+ }
+ void** new_sp = NextStackFrame(sp);
+ if (!new_sp) break;
+ sp = new_sp;
+ }
+ return n;
+}
+
+// Note: the code is pretty similar to GetStackTrace above;
+// change both if changing one.
+bool GetStackExtent(void* sp, void** stack_top, void** stack_bottom) {
+ void** cur_sp;
+
+ if (sp != NULL) {
+ cur_sp = (void**)sp;
+ *stack_top = sp;
+ } else {
+ // Stack frame format:
+ // sp[0] pointer to previous frame
+ // sp[1] caller address
+ // sp[2] first argument
+ // ...
+ cur_sp = (void**)&sp - 2;
+
+ *stack_top = NULL;
+ }
+
+ while (cur_sp) {
+ void** new_sp = NextStackFrame(cur_sp);
+ if (!new_sp) {
+ *stack_bottom = (void*)cur_sp;
+ return true;
+ }
+ cur_sp = new_sp;
+ if (*stack_top == NULL) *stack_top = (void*)cur_sp;
+ // get out of the stack frame for this call
+ }
+ return false;
+}
diff --git a/src/stacktrace_x86_64-inl.h b/src/stacktrace_x86_64-inl.h
new file mode 100644
index 0000000..3cdfe13
--- /dev/null
+++ b/src/stacktrace_x86_64-inl.h
@@ -0,0 +1,134 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Arun Sharma
+//
+// Produce stack trace using libgcc
+
+extern "C" {
+#include <stdlib.h> // for NULL
+#include <unwind.h> // ABI defined unwinder
+}
+#include "google/stacktrace.h"
+
+typedef struct {
+ void **result;
+ int max_depth;
+ int skip_count;
+ int count;
+ void *top;
+ void *bottom;
+} trace_arg_t;
+
+
+// Workaround for the malloc() in _Unwind_Backtrace() issue.
+static _Unwind_Reason_Code nop_backtrace(struct _Unwind_Context *uc, void *opq) {
+ return _URC_NO_REASON;
+}
+
+
+// This code is not considered ready to run until
+// static initializers run so that we are guaranteed
+// that any malloc-related initialization is done.
+static bool ready_to_run = false;
+class StackTraceInit {
+ public:
+ StackTraceInit() {
+ // Extra call to force initialization
+ _Unwind_Backtrace(nop_backtrace, NULL);
+ ready_to_run = true;
+ }
+};
+
+static StackTraceInit module_initializer; // Force initialization
+
+static _Unwind_Reason_Code GetOneFrame(struct _Unwind_Context *uc, void *opq) {
+ trace_arg_t *targ = (trace_arg_t *) opq;
+
+ if (targ->skip_count > 0) {
+ targ->skip_count--;
+ } else {
+ targ->result[targ->count++] = (void *) _Unwind_GetIP(uc);
+ }
+
+ if (targ->count == targ->max_depth)
+ return _URC_END_OF_STACK;
+
+ return _URC_NO_REASON;
+}
+
+int GetStackTrace(void** result, int max_depth, int skip_count) {
+ if (!ready_to_run)
+ return 0;
+
+ trace_arg_t targ;
+
+ skip_count += 1; // Do not include the "GetStackTrace" frame
+
+ targ.result = result;
+ targ.max_depth = max_depth;
+ targ.skip_count = skip_count;
+ targ.count = 0;
+
+ _Unwind_Backtrace(GetOneFrame, &targ);
+
+ return targ.count;
+}
+
+static _Unwind_Reason_Code SearchExtents(struct _Unwind_Context *uc, void *opq) {
+ trace_arg_t *targ = (trace_arg_t *) opq;
+
+ targ->bottom = (void *) _Unwind_GetCFA(uc);
+
+ if (targ->top == NULL)
+ targ->top = targ->bottom;
+
+ return _URC_NO_REASON;
+}
+
+bool GetStackExtent(void* sp, void** stack_top, void** stack_bottom) {
+ if (!ready_to_run)
+ return false;
+
+ trace_arg_t targ;
+
+ targ.top = NULL;
+ targ.bottom = NULL;
+
+ _Unwind_Backtrace(SearchExtents, &targ);
+
+ if ((targ.top != NULL) && (targ.bottom != NULL)) {
+ *stack_bottom = targ.bottom;
+ *stack_top = targ.top;
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index 2eb9ef4..5dc062e 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -1,10 +1,10 @@
// Copyright (c) 2005, Google Inc.
// All rights reserved.
-//
+//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
-//
+//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
@@ -14,7 +14,7 @@
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
-//
+//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -127,7 +127,7 @@ static const int kMinSystemAlloc = 1 << (20 - kPageShift);
// amortize the lock overhead for accessing the central list. Making
// it too big may temporarily cause unnecessary memory wastage in the
// per-thread free list until the scavenger cleans up the list.
-static const int kNumObjectsToMove = 32;
+static int num_objects_to_move[kNumClasses];
// Maximum length we allow a per-thread free-list to have before we
// move objects from it into the corresponding central free-list. We
@@ -170,6 +170,22 @@ static size_t class_to_size[kNumClasses];
// Mapping from size class to number of pages to allocate at a time
static size_t class_to_pages[kNumClasses];
+
+
+// TransferCache is used to cache transfers of num_objects_to_move[size_class]
+// back and forth between thread caches and the central cache for a given size
+// class.
+struct TCEntry {
+ void *head; // Head of chain of objects.
+ void *tail; // Tail of chain of objects.
+};
+// A central cache freelist can have anywhere from 0 to kNumTransferEntries
+// slots to put link list chains into. To keep memory usage bounded the total
+// number of TCEntries across size classes is fixed. Currently each size
+// class is initially given one TCEntry which also means that the maximum any
+// one class can have is kNumClasses.
+static const int kNumTransferEntries = kNumClasses;
+
// Return floor(log2(n)) for n > 0.
#if (defined __i386__ || defined __x86_64__) && defined __GNUC__
static inline int LgFloor(size_t n) {
@@ -201,6 +217,70 @@ static inline int LgFloor(size_t n) {
}
#endif
+
+// Some very basic linked list functions for dealing with using void * as
+// storage.
+
+static inline void *SLL_Next(void *t) {
+ return *(reinterpret_cast<void**>(t));
+}
+
+static inline void SLL_SetNext(void *t, void *n) {
+ *(reinterpret_cast<void**>(t)) = n;
+}
+
+static inline void SLL_Push(void **list, void *element) {
+ SLL_SetNext(element, *list);
+ *list = element;
+}
+
+static inline void *SLL_Pop(void **list) {
+ void *result = *list;
+ *list = SLL_Next(*list);
+ return result;
+}
+
+
+// Remove N elements from a linked list to which head points. head will be
+// modified to point to the new head. start and end will point to the first
+// and last nodes of the range. Note that end will point to NULL after this
+// function is called.
+static inline void SLL_PopRange(void **head, int N, void **start, void **end) {
+ if (N == 0) {
+ *start = NULL;
+ *end = NULL;
+ return;
+ }
+
+ void *tmp = *head;
+ for (int i = 1; i < N; ++i) {
+ tmp = SLL_Next(tmp);
+ }
+
+ *start = *head;
+ *end = tmp;
+ *head = SLL_Next(tmp);
+ // Unlink range from list.
+ SLL_SetNext(tmp, NULL);
+}
+
+static inline void SLL_PushRange(void **head, void *start, void *end) {
+ if (!start) return;
+ SLL_SetNext(end, *head);
+ *head = start;
+}
+
+static inline size_t SLL_Size(void *head) {
+ int count = 0;
+ while (head) {
+ count++;
+ head = SLL_Next(head);
+ }
+ return count;
+}
+
+// Setup helper functions.
+
static inline int SizeClass(size_t size) {
if (size == 0) size = 1;
const int lg = LgFloor(size);
@@ -213,6 +293,19 @@ static inline size_t ByteSizeForClass(size_t cl) {
return class_to_size[cl];
}
+
+static int NumMoveSize(size_t size) {
+ if (size == 0) return 0;
+ // Use approx 64k transfers between thread and central caches.
+ int num = static_cast<int>(64.0 * 1024.0 / size);
+ if (num < 2) num = 2;
+ // Clamp well below kMaxFreeListLength to avoid ping pong between central
+ // and thread caches.
+ if (num > static_cast<int>(0.8 * kMaxFreeListLength))
+ num = static_cast<int>(0.8 * kMaxFreeListLength);
+ return num;
+}
+
// Initialize the mapping arrays
static void InitSizeClasses() {
// Special initialization for small sizes
@@ -290,6 +383,11 @@ static void InitSizeClasses() {
abort();
}
}
+
+ // Initialize the num_objects_to_move array.
+ for (size_t cl = 1; cl < kNumClasses; ++cl) {
+ num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl));
+ }
}
// -------------------------------------------------------------------------
@@ -833,7 +931,7 @@ void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) {
static void RecordGrowth(size_t growth) {
StackTrace* t = stacktrace_allocator.New();
- t->depth = GetStackTrace(t->stack, kMaxStackDepth-1, 4);
+ t->depth = GetStackTrace(t->stack, kMaxStackDepth-1, 3);
t->size = growth;
t->stack[kMaxStackDepth-1] = reinterpret_cast<void*>(growth_stacks);
growth_stacks = t;
@@ -939,18 +1037,26 @@ class TCMalloc_ThreadCache_FreeList {
void clear_lowwatermark() { lowater_ = length_; }
void Push(void* ptr) {
- *(reinterpret_cast<void**>(ptr)) = list_;
- list_ = ptr;
+ SLL_Push(&list_, ptr);
length_++;
}
void* Pop() {
ASSERT(list_ != NULL);
- void* result = list_;
- list_ = *(reinterpret_cast<void**>(result));
length_--;
if (length_ < lowater_) lowater_ = length_;
- return result;
+ return SLL_Pop(&list_);
+ }
+
+ void PushRange(int N, void *start, void *end) {
+ SLL_PushRange(&list_, start, end);
+ length_ += N;
+ }
+
+ void PopRange(int N, void **start, void **end) {
+ SLL_PopRange(&list_, N, start, end);
+ ASSERT(length_ >= N);
+ length_ -= N;
}
};
@@ -1017,34 +1123,98 @@ class TCMalloc_Central_FreeList {
public:
void Init(size_t cl);
- // REQUIRES: lock_ is held
- // Insert object.
- // May temporarily release lock_.
- void Insert(void* object);
+ // These methods all do internal locking.
+ // Insert the specified range into the central freelist. N is the number of
+ // elements in the range.
+ void InsertRange(void *start, void *end, int N);
+
+ // Returns the actual number of fetched elements into N.
+ void RemoveRange(void **start, void **end, int *N);
+
+ // Returns the number of free objects in cache.
+ int length() {
+ SpinLockHolder h(&lock_);
+ return counter_;
+ }
+
+ // Returns the number of free objects in the transfer cache.
+ int tc_length() {
+ SpinLockHolder h(&lock_);
+ return used_slots_ * num_objects_to_move[size_class_];
+ }
+
+ private:
// REQUIRES: lock_ is held
// Remove object from cache and return.
// Return NULL if no free entries in cache.
- void* Remove();
+ void* FetchFromSpans();
// REQUIRES: lock_ is held
- // Populate cache by fetching from the page heap.
+ // Remove object from cache and return. Fetches
+ // from pageheap if cache is empty. Only returns
+ // NULL on allocation failure.
+ void* FetchFromSpansSafe();
+
+ // REQUIRES: lock_ is held
+ // Release a linked list of objects to spans.
// May temporarily release lock_.
- void Populate();
+ void ReleaseListToSpans(void *start);
// REQUIRES: lock_ is held
- // Number of free objects in cache
- int length() const { return counter_; }
+ // Release an object to spans.
+ // May temporarily release lock_.
+ void ReleaseToSpans(void* object);
+
+ // REQUIRES: lock_ is held
+ // Populate cache by fetching from the page heap.
+ // May temporarily release lock_.
+ void Populate();
- // Lock -- exposed because caller grabs it before touching this object
+ // REQUIRES: lock is held.
+ // Tries to make room for a TCEntry. If the cache is full it will try to
+ // expand it at the cost of some other cache size. Return false if there is
+ // no space.
+ bool MakeCacheSpace();
+
+ // REQUIRES: lock_ for locked_size_class is held.
+ // Picks a "random" size class to steal TCEntry slot from. In reality it
+ // just iterates over the sizeclasses but does so without taking a lock.
+ // Returns true on success.
+ // May temporarily lock a "random" size class.
+ static bool EvictRandomSizeClass(int locked_size_class, bool force);
+
+ // REQUIRES: lock_ is *not* held.
+ // Tries to shrink the Cache. If force is true it will relase objects to
+ // spans if it allows it to shrink the cache. Return false if it failed to
+ // shrink the cache. Decrements cache_size_ on succeess.
+ // May temporarily take lock_. If it takes lock_, the locked_size_class
+ // lock is released to the thread from holding two size class locks
+ // concurrently which could lead to a deadlock.
+ bool ShrinkCache(int locked_size_class, bool force);
+
+ // This lock protects all the data members. cached_entries and cache_size_
+ // may be looked at without holding the lock.
SpinLock lock_;
- private:
- // We keep linked lists of empty and non-emoty spans.
+ // We keep linked lists of empty and non-empty spans.
size_t size_class_; // My size class
Span empty_; // Dummy header for list of empty spans
Span nonempty_; // Dummy header for list of non-empty spans
size_t counter_; // Number of free objects in cache entry
+
+ // Here we reserve space for TCEntry cache slots. Since one size class can
+ // end up getting all the TCEntries quota in the system we just preallocate
+ // sufficient number of entries here.
+ TCEntry tc_slots_[kNumTransferEntries];
+
+ // Number of currently used cached entries in tc_slots_. This variable is
+ // updated under a lock but can be read without one.
+ int32_t used_slots_;
+ // The current number of slots for this size class. This is an
+ // adaptive value that is increased if there is lots of traffic
+ // on a given size class.
+ int32_t cache_size_;
};
// Pad each CentralCache object to multiple of 64 bytes
@@ -1104,9 +1274,21 @@ void TCMalloc_Central_FreeList::Init(size_t cl) {
DLL_Init(&empty_);
DLL_Init(&nonempty_);
counter_ = 0;
+
+ cache_size_ = 1;
+ used_slots_ = 0;
+ ASSERT(cache_size_ <= kNumTransferEntries);
+}
+
+void TCMalloc_Central_FreeList::ReleaseListToSpans(void* start) {
+ while (start) {
+ void *next = SLL_Next(start);
+ ReleaseToSpans(start);
+ start = next;
+ }
}
-void TCMalloc_Central_FreeList::Insert(void* object) {
+void TCMalloc_Central_FreeList::ReleaseToSpans(void* object) {
const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
Span* span = pageheap->GetDescriptor(p);
ASSERT(span != NULL);
@@ -1151,7 +1333,139 @@ void TCMalloc_Central_FreeList::Insert(void* object) {
}
}
-void* TCMalloc_Central_FreeList::Remove() {
+bool TCMalloc_Central_FreeList::EvictRandomSizeClass(
+ int locked_size_class, bool force) {
+ static int race_counter = 0;
+ int t = race_counter++; // Updated without a lock, but who cares.
+ if (t >= kNumClasses) {
+ while (t >= kNumClasses) {
+ t -= kNumClasses;
+ }
+ race_counter = t;
+ }
+ ASSERT(t >= 0);
+ ASSERT(t < kNumClasses);
+ if (t == locked_size_class) return false;
+ return central_cache[t].ShrinkCache(locked_size_class, force);
+}
+
+bool TCMalloc_Central_FreeList::MakeCacheSpace() {
+ // Is there room in the cache?
+ if (used_slots_ < cache_size_) return true;
+ // Check if we can expand this cache?
+ if (cache_size_ == kNumTransferEntries) return false;
+ // Ok, we'll try to grab an entry from some other size class.
+ if (EvictRandomSizeClass(size_class_, false) ||
+ EvictRandomSizeClass(size_class_, true)) {
+ // Succeeded in evicting, we're going to make our cache larger.
+ cache_size_++;
+ return true;
+ }
+ return false;
+}
+
+
+namespace {
+class LockInverter {
+ private:
+ TCMalloc_SpinLock *held_, *temp_;
+ public:
+ inline explicit LockInverter(TCMalloc_SpinLock* held, TCMalloc_SpinLock *temp)
+ : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); }
+ inline ~LockInverter() { temp_->Unlock(); held_->Lock(); }
+};
+}
+
+bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class, bool force) {
+ // Start with a quick check without taking a lock.
+ if (cache_size_ == 0) return false;
+ // We don't evict from a full cache unless we are 'forcing'.
+ if (force == false && used_slots_ == cache_size_) return false;
+
+ // Grab lock, but first release the other lock held by this thread. We use
+ // the lock inverter to ensure that we never hold two size class locks
+ // concurrently. That can create a deadlock because there is no well
+ // defined nesting order.
+ LockInverter li(&central_cache[locked_size_class].lock_, &lock_);
+ ASSERT(used_slots_ <= cache_size_);
+ ASSERT(0 <= cache_size_);
+ if (cache_size_ == 0) return false;
+ if (used_slots_ == cache_size_) {
+ if (force == false) return false;
+ // ReleaseListToSpans releases the lock, so we have to make all the
+ // updates to the central list before calling it.
+ cache_size_--;
+ used_slots_--;
+ ReleaseListToSpans(tc_slots_[used_slots_].head);
+ return true;
+ }
+ cache_size_--;
+ return true;
+}
+
+void TCMalloc_Central_FreeList::InsertRange(void *start, void *end, int N) {
+ SpinLockHolder h(&lock_);
+ if (N == num_objects_to_move[size_class_] &&
+ MakeCacheSpace()) {
+ int slot = used_slots_++;
+ ASSERT(slot >=0);
+ ASSERT(slot < kNumTransferEntries);
+ TCEntry *entry = &tc_slots_[slot];
+ entry->head = start;
+ entry->tail = end;
+ return;
+ }
+ ReleaseListToSpans(start);
+}
+
+void TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int *N) {
+ int num = *N;
+ ASSERT(num > 0);
+
+ SpinLockHolder h(&lock_);
+ if (num == num_objects_to_move[size_class_] && used_slots_ > 0) {
+ int slot = --used_slots_;
+ ASSERT(slot >= 0);
+ TCEntry *entry = &tc_slots_[slot];
+ *start = entry->head;
+ *end = entry->tail;
+ return;
+ }
+
+ // TODO: Prefetch multiple TCEntries?
+ void *tail = FetchFromSpansSafe();
+ if (!tail) {
+ // We are completely out of memory.
+ *start = *end = NULL;
+ *N = 0;
+ return;
+ }
+
+ SLL_SetNext(tail, NULL);
+ void *head = tail;
+ int count = 1;
+ while (count < num) {
+ void *t = FetchFromSpans();
+ if (!t) break;
+ SLL_Push(&head, t);
+ count++;
+ }
+ *start = head;
+ *end = tail;
+ *N = count;
+}
+
+
+void* TCMalloc_Central_FreeList::FetchFromSpansSafe() {
+ void *t = FetchFromSpans();
+ if (!t) {
+ Populate();
+ t = FetchFromSpans();
+ }
+ return t;
+}
+
+void* TCMalloc_Central_FreeList::FetchFromSpans() {
if (DLL_IsEmpty(&nonempty_)) return NULL;
Span* span = nonempty_.next;
@@ -1244,11 +1558,8 @@ void TCMalloc_ThreadCache::Init(pthread_t tid) {
void TCMalloc_ThreadCache::Cleanup() {
// Put unused memory back into central cache
for (int cl = 0; cl < kNumClasses; ++cl) {
- FreeList* src = &list_[cl];
- TCMalloc_Central_FreeList* dst = &central_cache[cl];
- SpinLockHolder h(&dst->lock_);
- while (!src->empty()) {
- dst->Insert(src->Pop());
+ if (list_[cl].length() > 0) {
+ ReleaseToCentralCache(cl, list_[cl].length());
}
}
}
@@ -1271,43 +1582,39 @@ inline void TCMalloc_ThreadCache::Deallocate(void* ptr, size_t cl) {
list->Push(ptr);
// If enough data is free, put back into central cache
if (list->length() > kMaxFreeListLength) {
- ReleaseToCentralCache(cl, kNumObjectsToMove);
+ ReleaseToCentralCache(cl, num_objects_to_move[cl]);
}
if (size_ >= per_thread_cache_size) Scavenge();
}
// Remove some objects of class "cl" from central cache and add to thread heap
void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl) {
- TCMalloc_Central_FreeList* src = &central_cache[cl];
- FreeList* dst = &list_[cl];
- SpinLockHolder h(&src->lock_);
- for (int i = 0; i < kNumObjectsToMove; i++) {
- void* object = src->Remove();
- if (object == NULL) {
- if (i == 0) {
- src->Populate(); // Temporarily releases src->lock_
- object = src->Remove();
- }
- if (object == NULL) {
- break;
- }
- }
- dst->Push(object);
- size_ += ByteSizeForClass(cl);
- }
+ int fetch_count = num_objects_to_move[cl];
+ void *start, *end;
+ central_cache[cl].RemoveRange(&start, &end, &fetch_count);
+ list_[cl].PushRange(fetch_count, start, end);
+ size_ += ByteSizeForClass(cl) * fetch_count;
}
// Remove some objects of class "cl" from thread heap and add to central cache
void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) {
+ ASSERT(N > 0);
FreeList* src = &list_[cl];
- TCMalloc_Central_FreeList* dst = &central_cache[cl];
- SpinLockHolder h(&dst->lock_);
if (N > src->length()) N = src->length();
size_ -= N*ByteSizeForClass(cl);
- while (N-- > 0) {
- void* ptr = src->Pop();
- dst->Insert(ptr);
+
+ // We return prepackaged chains of the correct size to the central cache.
+ // TODO: Use the same format internally in the thread caches?
+ int batch_size = num_objects_to_move[cl];
+ while (N > batch_size) {
+ void *tail, *head;
+ src->PopRange(batch_size, &head, &tail);
+ central_cache[cl].InsertRange(head, tail, batch_size);
+ N -= batch_size;
}
+ void *tail, *head;
+ src->PopRange(N, &head, &tail);
+ central_cache[cl].InsertRange(head, tail, N);
}
// Release idle memory to the central cache
@@ -1395,7 +1702,7 @@ void TCMalloc_ThreadCache::InitTSD() {
ASSERT(!tsd_inited);
perftools_pthread_key_create(&heap_key, DeleteCache);
tsd_inited = true;
-
+
// We may have used a fake pthread_t for the main thread. Fix it.
pthread_t zero;
memset(&zero, 0, sizeof(zero));
@@ -1499,6 +1806,7 @@ struct TCMallocStats {
uint64_t system_bytes; // Bytes alloced from system
uint64_t thread_bytes; // Bytes in thread caches
uint64_t central_bytes; // Bytes in central cache
+ uint64_t transfer_bytes; // Bytes in central transfer cache
uint64_t pageheap_bytes; // Bytes in page heap
uint64_t metadata_bytes; // Bytes alloced for metadata
};
@@ -1506,11 +1814,14 @@ struct TCMallocStats {
// Get stats into "r". Also get per-size-class counts if class_count != NULL
static void ExtractStats(TCMallocStats* r, uint64_t* class_count) {
r->central_bytes = 0;
+ r->transfer_bytes = 0;
for (int cl = 0; cl < kNumClasses; ++cl) {
- SpinLockHolder h(&central_cache[cl].lock_);
const int length = central_cache[cl].length();
+ const int tc_length = central_cache[cl].tc_length();
r->central_bytes += static_cast<uint64_t>(ByteSizeForClass(cl)) * length;
- if (class_count) class_count[cl] = length;
+ r->transfer_bytes +=
+ static_cast<uint64_t>(ByteSizeForClass(cl)) * tc_length;
+ if (class_count) class_count[cl] = length + tc_length;
}
// Add stats from per-thread heaps
@@ -1564,6 +1875,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
const uint64_t bytes_in_use = stats.system_bytes
- stats.pageheap_bytes
- stats.central_bytes
+ - stats.transfer_bytes
- stats.thread_bytes;
out->printf("------------------------------------------------\n"
@@ -1571,6 +1883,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
"MALLOC: %12" LLU " Bytes in use by application\n"
"MALLOC: %12" LLU " Bytes free in page heap\n"
"MALLOC: %12" LLU " Bytes free in central cache\n"
+ "MALLOC: %12" LLU " Bytes free in transfer cache\n"
"MALLOC: %12" LLU " Bytes free in thread caches\n"
"MALLOC: %12" LLU " Spans in use\n"
"MALLOC: %12" LLU " Thread heaps in use\n"
@@ -1580,6 +1893,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
bytes_in_use,
stats.pageheap_bytes,
stats.central_bytes,
+ stats.transfer_bytes,
stats.thread_bytes,
uint64_t(span_allocator.inuse()),
uint64_t(threadheap_allocator.inuse()),
@@ -1787,7 +2101,7 @@ static Span* DoSampledAllocation(size_t size) {
}
// Fill stack trace and record properly
- stack->depth = GetStackTrace(stack->stack, kMaxStackDepth, 2);
+ stack->depth = GetStackTrace(stack->stack, kMaxStackDepth, 1);
stack->size = size;
span->sample = 1;
span->objects = stack;
@@ -1797,28 +2111,34 @@ static Span* DoSampledAllocation(size_t size) {
}
static inline void* do_malloc(size_t size) {
+ void* ret = NULL;
- if (TCMallocDebug::level >= TCMallocDebug::kVerbose)
+ if (TCMallocDebug::level >= TCMallocDebug::kVerbose) {
MESSAGE("In tcmalloc do_malloc(%" PRIuS")\n", size);
+ }
// The following call forces module initialization
TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
if (heap->SampleAllocation(size)) {
Span* span = DoSampledAllocation(size);
- if (span == NULL) return NULL;
- return reinterpret_cast<void*>(span->start << kPageShift);
+ if (span != NULL) {
+ ret = reinterpret_cast<void*>(span->start << kPageShift);
+ }
} else if (size > kMaxSize) {
// Use page-level allocator
SpinLockHolder h(&pageheap_lock);
Span* span = pageheap->New(pages(size));
- if (span == NULL) return NULL;
- return reinterpret_cast<void*>(span->start << kPageShift);
+ if (span != NULL) {
+ ret = reinterpret_cast<void*>(span->start << kPageShift);
+ }
} else {
- return heap->Allocate(size);
+ ret = heap->Allocate(size);
}
+ if (ret == NULL) errno = ENOMEM;
+ return ret;
}
static inline void do_free(void* ptr) {
- if (TCMallocDebug::level >= TCMallocDebug::kVerbose)
+ if (TCMallocDebug::level >= TCMallocDebug::kVerbose)
MESSAGE("In tcmalloc do_free(%p)\n", ptr);
if (ptr == NULL) return;
ASSERT(pageheap != NULL); // Should not call free() before malloc()
@@ -1835,8 +2155,8 @@ static inline void do_free(void* ptr) {
heap->Deallocate(ptr, cl);
} else {
// Delete directly into central cache
- SpinLockHolder h(&central_cache[cl].lock_);
- central_cache[cl].Insert(ptr);
+ SLL_SetNext(ptr, NULL);
+ central_cache[cl].InsertRange(ptr, ptr, 1);
}
} else {
SpinLockHolder h(&pageheap_lock);
@@ -2045,39 +2365,88 @@ extern "C" void* realloc(void* old_ptr, size_t new_size) {
}
#ifndef COMPILER_INTEL
-#define OPNEW_THROW
-#define OPDELETE_THROW
+#define OP_THROWNOTHING
+#define OP_THROWBADALLOC
#else
-#define OPNEW_THROW throw(std::bad_alloc)
-#define OPDELETE_THROW throw()
+#define OP_THROWNOTHING throw()
+#define OP_THROWBADALLOC throw(std::bad_alloc)
#endif
-void* operator new(size_t size) OPNEW_THROW {
- void* p = do_malloc(size);
- if (p == NULL) {
- MESSAGE("Unable to allocate %" PRIuS " bytes: new failed\n", size);
- abort();
+static SpinLock set_new_handler_lock = SPINLOCK_INITIALIZER;
+
+static inline void* cpp_alloc(size_t size, bool nothrow) {
+ for (;;) {
+ void* p = do_malloc(size);
+#ifdef PREANSINEW
+ MallocHook::InvokeNewHook(p, size);
+ return p;
+#else
+ if (p == NULL) { // allocation failed
+ // Get the current new handler. NB: this function is not
+ // thread-safe. We make a feeble stab at making it so here, but
+ // this lock only protects against tcmalloc interfering with
+ // itself, not with other libraries calling set_new_handler.
+ std::new_handler nh;
+ {
+ SpinLockHolder h(&set_new_handler_lock);
+ nh = std::set_new_handler(0);
+ (void) std::set_new_handler(nh);
+ }
+ // If no new_handler is established, the allocation failed.
+ if (!nh) {
+ if (nothrow) return 0;
+ throw std::bad_alloc();
+ }
+ // Otherwise, try the new_handler. If it returns, retry the
+ // allocation. If it throws std::bad_alloc, fail the allocation.
+ // if it throws something else, don't interfere.
+ try {
+ (*nh)();
+ } catch (const std::bad_alloc&) {
+ if (!nothrow) throw;
+ MallocHook::InvokeNewHook(p, size);
+ return p;
+ }
+ } else { // allocation success
+ MallocHook::InvokeNewHook(p, size);
+ return p;
+ }
+#endif
}
- MallocHook::InvokeNewHook(p, size);
- return p;
}
-void operator delete(void* p) OPDELETE_THROW {
+void* operator new(size_t size) OP_THROWBADALLOC {
+ return cpp_alloc(size, false);
+}
+
+void* operator new(size_t size, const std::nothrow_t&) OP_THROWNOTHING {
+ return cpp_alloc(size, true);
+}
+
+void operator delete(void* p) OP_THROWNOTHING {
MallocHook::InvokeDeleteHook(p);
do_free(p);
}
-void* operator new[](size_t size) OPNEW_THROW {
- void* p = do_malloc(size);
- if (p == NULL) {
- MESSAGE("Unable to allocate %" PRIuS " bytes: new failed\n", size);
- abort();
- }
- MallocHook::InvokeNewHook(p, size);
- return p;
+void operator delete(void* p, const std::nothrow_t&) OP_THROWNOTHING {
+ MallocHook::InvokeDeleteHook(p);
+ do_free(p);
+}
+
+void* operator new[](size_t size) OP_THROWBADALLOC {
+ return cpp_alloc(size, false);
+}
+
+void* operator new[](size_t size, const std::nothrow_t&) OP_THROWNOTHING {
+ return cpp_alloc(size, true);
+}
+
+void operator delete[](void* p) OP_THROWNOTHING {
+ MallocHook::InvokeDeleteHook(p);
+ do_free(p);
}
-void operator delete[](void* p) OPDELETE_THROW {
+void operator delete[](void* p, const std::nothrow_t&) OP_THROWNOTHING {
MallocHook::InvokeDeleteHook(p);
do_free(p);
}
@@ -2143,11 +2512,14 @@ extern "C" struct mallinfo mallinfo(void) {
// Unfortunately, the struct contains "int" field, so some of the
// size values will be truncated.
info.arena = static_cast<int>(stats.system_bytes);
- info.fsmblks = static_cast<int>(stats.thread_bytes + stats.central_bytes);
+ info.fsmblks = static_cast<int>(stats.thread_bytes
+ + stats.central_bytes
+ + stats.transfer_bytes);
info.fordblks = static_cast<int>(stats.pageheap_bytes);
info.uordblks = static_cast<int>(stats.system_bytes
- stats.thread_bytes
- stats.central_bytes
+ - stats.transfer_bytes
- stats.pageheap_bytes);
return info;
diff --git a/src/tests/stacktrace_unittest.cc b/src/tests/stacktrace_unittest.cc
index 34a7718..b73cf4b 100644
--- a/src/tests/stacktrace_unittest.cc
+++ b/src/tests/stacktrace_unittest.cc
@@ -42,6 +42,7 @@
// backtrace, and maybe print the backtrace to stdout.
//-----------------------------------------------------------------------//
+void CheckStackTraceLeaf();
void CheckStackTrace4(int i);
void CheckStackTrace3(int i);
void CheckStackTrace2(int i);
@@ -51,8 +52,9 @@ void CheckStackTrace(int i);
// The sequence of functions whose return addresses we expect to see in the
// backtrace.
-const int BACKTRACE_STEPS = 5;
+const int BACKTRACE_STEPS = 6;
void * expected_stack[BACKTRACE_STEPS] = {
+ (void *) &CheckStackTraceLeaf,
(void *) &CheckStackTrace4,
(void *) &CheckStackTrace3,
(void *) &CheckStackTrace2,
@@ -77,10 +79,6 @@ void * expected_stack[BACKTRACE_STEPS] = {
// ...
// stack[5] is ret addr within CheckStackTrace
-// The google version does not include the caller in the
-// backtrace. Some other version might. (glibc backtrace()?)
-const int self_in_backtrace = 0;
-
//-----------------------------------------------------------------------//
void CheckRetAddrIsInFunction( void * ret_addr, void * function_start_addr)
@@ -96,14 +94,20 @@ void CheckStackTraceLeaf(void) {
const int STACK_LEN = 10;
void *stack[STACK_LEN];
int size;
+ void *top, *bottom;
size = GetStackTrace(stack, STACK_LEN, 0);
printf("Obtained %d stack frames.\n", size);
CHECK_LE(size, STACK_LEN);
-
+
+ // GetStackExtent() may not be implemented
+ if (GetStackExtent(NULL, &top, &bottom)) {
+ printf("%p %p\n", top, bottom);
+ }
+
for (int i = 0; i < BACKTRACE_STEPS; i++)
{
- CheckRetAddrIsInFunction(stack[i + self_in_backtrace], expected_stack[i]);
+ CheckRetAddrIsInFunction(stack[i], expected_stack[i]);
}
@@ -111,6 +115,7 @@ void CheckStackTraceLeaf(void) {
{
char **strings = backtrace_symbols(stack, size);
+ printf("Obtained %d stack frames.\n", size);
for (int i = 0; i < size; i++)
printf("%s\n", strings[i]);
printf("CheckStackTrace() addr: %p\n", &CheckStackTrace);
diff --git a/src/tests/tcmalloc_unittest.cc b/src/tests/tcmalloc_unittest.cc
index 22a6c29..c151c2a 100644
--- a/src/tests/tcmalloc_unittest.cc
+++ b/src/tests/tcmalloc_unittest.cc
@@ -1,10 +1,10 @@
// Copyright (c) 2005, Google Inc.
// All rights reserved.
-//
+//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
-//
+//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
@@ -14,7 +14,7 @@
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
-//
+//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -28,33 +28,598 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ---
-// Author: Paul Menage
+// Unittest for the TCMalloc implementation.
//
-// TODO(menage) Turn this into a real unittest ...
+// * The test consists of a set of threads.
+// * Each thread maintains a set of allocated objects, with
+// a bound on the total amount of data in the set.
+// * Each allocated object's contents are generated by
+// hashing the object pointer, and a generation count
+// in the object. This allows us to easily check for
+// data corruption.
+// * At any given step, the thread can do any of the following:
+// a. Allocate an object
+// b. Increment an object's generation count and update
+// its contents.
+// c. Pass the object to another thread
+// d. Free an object
+// Also, at the end of every step, object(s) are freed to maintain
+// the memory upper-bound.
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
-#include <stdint.h>
-#include "google/malloc_extension.h"
+#include <stdint.h> // for intptr_t
+#include <unistd.h> // for getpid()
+#include <pthread.h>
+#include <vector>
+#include <string>
+#include <new>
+#include "base/logging.h"
+
+#define LOGSTREAM stdout
+
+using std::vector;
+using std::string;
+
+static const int FLAGS_numtests = 50000;
+static const int FLAGS_log_every_n_tests = 10000;
+
+// Testing parameters
+static const int FLAGS_lgmaxsize = 16; // lg() of the max size object to alloc
+static const int FLAGS_numthreads = 10; // Number of threads
+static const int FLAGS_threadmb = 4; // Max memory size allocated by thread
+
+// Weights of different operations
+static const int FLAGS_mallocweight = 50; // Weight for picking malloc
+static const int FLAGS_freeweight = 50; // Weight for picking free
+static const int FLAGS_updateweight = 10; // Weight for picking update
+static const int FLAGS_passweight = 1; // Weight for passing object
+
+static const int kSizeBits = 8 * sizeof(size_t);
+static const size_t kMaxSize = ~static_cast<size_t>(0);
+static const size_t kMaxSignedSize = ((size_t(1) << (kSizeBits-1)) - 1);
+
+static const size_t kNotTooBig = 100000;
+static const size_t kTooBig = kMaxSignedSize;
+
+static int news_handled = 0;
+
+// Global array of threads
+class TesterThread;
+static TesterThread** threads;
+
+// To help with generating random numbers
+class TestHarness {
+ private:
+ // Information kept per type
+ struct Type {
+ string name;
+ int type;
+ int weight;
+ };
+
+ public:
+ TestHarness(int seed)
+ : types_(new vector<Type>), total_weight_(0), num_tests_(0) {
+ srandom(seed);
+ }
+ ~TestHarness() {
+ delete types_;
+ }
+
+ // Add operation type with specified weight. When starting a new
+ // iteration, an operation type is picked with probability
+ // proportional to its weight.
+ //
+ // "type" must be non-negative.
+ // "weight" must be non-negative.
+ void AddType(int type, int weight, const char* name);
+
+ // Call this to get the type of operation for the next iteration.
+ // It returns a random operation type from the set of registered
+ // operations. Returns -1 if tests should finish.
+ int PickType();
+
+ // If n == 0, returns the next pseudo-random number in the range [0 .. 0]
+ // If n != 0, returns the next pseudo-random number in the range [0 .. n)
+ int Uniform(int n) {
+ if (n == 0) {
+ return random() * 0;
+ } else {
+ return random() % n;
+ }
+ }
+ // Pick "base" uniformly from range [0,max_log] and then return
+ // "base" random bits. The effect is to pick a number in the range
+ // [0,2^max_log-1] with bias towards smaller numbers.
+ int Skewed(int max_log) {
+ const int base = random() % (max_log+1);
+ return random() % (1 << base);
+ }
+
+ private:
+ vector<Type>* types_; // Registered types
+ int total_weight_; // Total weight of all types
+ int num_tests_; // Num tests run so far
+};
+
+void TestHarness::AddType(int type, int weight, const char* name) {
+ Type t;
+ t.name = name;
+ t.type = type;
+ t.weight = weight;
+ types_->push_back(t);
+ total_weight_ += weight;
+}
+
+int TestHarness::PickType() {
+ if (num_tests_ >= FLAGS_numtests) return -1;
+
+ assert(total_weight_ > 0);
+ // This is a little skewed if total_weight_ doesn't divide 2^31, but it's close
+ int v = Uniform(total_weight_);
+ int i;
+ for (i = 0; i < types_->size(); i++) {
+ v -= (*types_)[i].weight;
+ if (v < 0) {
+ break;
+ }
+ }
+
+ assert(i < types_->size());
+ if ((num_tests_ % FLAGS_log_every_n_tests) == 0) {
+ fprintf(LOGSTREAM, "Test %d out of %d: %s\n",
+ num_tests_, FLAGS_numtests, (*types_)[i].name.c_str());
+ }
+ num_tests_++;
+ return (*types_)[i].type;
+}
+
+
+// Info kept per thread
+class TesterThread {
+ private:
+ // Info kept per allocated object
+ struct Object {
+ char* ptr; // Allocated pointer
+ int size; // Allocated size
+ int generation; // Generation counter of object contents
+ };
+
+ pthread_mutex_t lock_; // For passing in another thread's obj
+ int id_; // My thread id
+ TestHarness rnd_; // For generating random numbers
+ vector<Object> heap_; // This thread's heap
+ vector<Object> passed_; // Pending objects passed from others
+ size_t heap_size_; // Current heap size
+ int locks_ok_; // Number of OK TryLock() ops
+ int locks_failed_; // Number of failed TryLock() ops
+
+ // Type of operations
+ enum Type { MALLOC, FREE, UPDATE, PASS };
+
+ public:
+ TesterThread(int id)
+ : id_(id),
+ rnd_(id+1),
+ heap_size_(0),
+ locks_ok_(0),
+ locks_failed_(0) {
+ CHECK_EQ(pthread_mutex_init(&lock_, NULL), 0);
+ }
+
+ virtual ~TesterThread() {
+ fprintf(LOGSTREAM, "Thread %2d: locks %6d ok; %6d trylocks failed\n",
+ id_, locks_ok_, locks_failed_);
+ if (locks_ok_ + locks_failed_ >= 1000) {
+ CHECK_LE(locks_failed_, locks_ok_ / 2);
+ }
+ }
+
+ virtual void Run() {
+ rnd_.AddType(MALLOC, FLAGS_mallocweight, "malloc");
+ rnd_.AddType(FREE, FLAGS_freeweight, "free");
+ rnd_.AddType(UPDATE, FLAGS_updateweight, "update");
+ rnd_.AddType(PASS, FLAGS_passweight, "pass");
+
+ while (true) {
+ AcquirePassedObjects();
+
+ switch (rnd_.PickType()) {
+ case MALLOC: AllocateObject(); break;
+ case FREE: FreeObject(); break;
+ case UPDATE: UpdateObject(); break;
+ case PASS: PassObject(); break;
+ case -1: goto done;
+ default: assert("" == "Unknown type");
+ }
+
+ ShrinkHeap();
+ }
+
+ done:
+ DeleteHeap();
+ }
+
+ // Allocate a new object
+ void AllocateObject() {
+ Object object;
+ object.size = rnd_.Skewed(FLAGS_lgmaxsize);
+ object.ptr = reinterpret_cast<char*>(malloc(object.size));
+ object.generation = 0;
+ FillContents(&object);
+ heap_.push_back(object);
+ heap_size_ += object.size;
+ }
+
+ // Mutate a random object
+ void UpdateObject() {
+ if (heap_.empty()) return;
+ const int index = rnd_.Uniform(heap_.size());
+ CheckContents(heap_[index]);
+ heap_[index].generation++;
+ FillContents(&heap_[index]);
+ }
+
+ // Free a random object
+ void FreeObject() {
+ if (heap_.empty()) return;
+ const int index = rnd_.Uniform(heap_.size());
+ Object object = heap_[index];
+ CheckContents(object);
+ free(object.ptr);
+ heap_size_ -= object.size;
+ heap_[index] = heap_[heap_.size()-1];
+ heap_.pop_back();
+ }
+
+ // Delete all objects in the heap
+ void DeleteHeap() {
+ while (!heap_.empty()) {
+ FreeObject();
+ }
+ }
+
+ // Free objects until our heap is small enough
+ void ShrinkHeap() {
+ while (heap_size_ > FLAGS_threadmb << 20) {
+ assert(!heap_.empty());
+ FreeObject();
+ }
+ }
+
+ // Pass a random object to another thread
+ void PassObject() {
+ // Pick object to pass
+ if (heap_.empty()) return;
+ const int index = rnd_.Uniform(heap_.size());
+ Object object = heap_[index];
+ CheckContents(object);
+
+ // Pick thread to pass
+ const int tid = rnd_.Uniform(FLAGS_numthreads);
+ TesterThread* thread = threads[tid];
+
+ if (pthread_mutex_trylock(&thread->lock_) == 0) {
+ // Pass the object
+ locks_ok_++;
+ thread->passed_.push_back(object);
+ CHECK_EQ(pthread_mutex_unlock(&thread->lock_), 0);
+ heap_size_ -= object.size;
+ heap_[index] = heap_[heap_.size()-1];
+ heap_.pop_back();
+ } else {
+ locks_failed_++;
+ }
+ }
+
+ // Grab any objects passed to this thread by another thread
+ void AcquirePassedObjects() {
+ // We do not create unnecessary contention by always using
+ // TryLock(). Plus we unlock immediately after swapping passed
+ // objects into a local vector.
+ vector<Object> copy;
+ { // Locking scope
+ if (pthread_mutex_trylock(&lock_) != 0) {
+ locks_failed_++;
+ return;
+ }
+ locks_ok_++;
+ swap(copy, passed_);
+ CHECK_EQ(pthread_mutex_unlock(&lock_), 0);
+ }
+
+ for (int i = 0; i < copy.size(); ++i) {
+ const Object& object = copy[i];
+ CheckContents(object);
+ heap_.push_back(object);
+ heap_size_ += object.size;
+ }
+ }
+
+ // Fill object contents according to ptr/generation
+ void FillContents(Object* object) {
+ // It doesn't need to be a great random number generator, but it does
+ // need to be a re-entrant one, since this is run in a thread
+ unsigned short xsubi[3]; // arguments for nrand48()
+ xsubi[0] = (reinterpret_cast<intptr_t>(object->ptr) & 0x7fff0000) >> 16;
+ xsubi[1] = (reinterpret_cast<intptr_t>(object->ptr) & 0x0000ffff);
+ xsubi[2] = 0x1234;
+
+ for (int i = 0; i < object->generation; ++i) {
+ nrand48(xsubi);
+ }
+ const char c = static_cast<char>(nrand48(xsubi));
+ memset(object->ptr, c, object->size);
+ }
+
+ // Check object contents
+ void CheckContents(const Object& object) {
+ // It doesn't need to be a great random number generator, but it does
+ // need to be a re-entrant one, since this is run in a thread
+ unsigned short xsubi[3]; // arguments for nrand48()
+ xsubi[0] = (reinterpret_cast<intptr_t>(object.ptr) & 0x7fff0000) >> 16;
+ xsubi[1] = (reinterpret_cast<intptr_t>(object.ptr) & 0x0000ffff);
+ xsubi[2] = 0x1234;
+
+ for (int i = 0; i < object.generation; ++i) {
+ nrand48(xsubi);
+ }
+
+ // For large objects, we just check a prefix/suffix
+ const char expected = static_cast<char>(nrand48(xsubi));
+ const int limit1 = object.size < 32 ? object.size : 32;
+ const int start2 = limit1 > object.size - 32 ? limit1 : object.size - 32;
+ for (int i = 0; i < limit1; ++i) {
+ CHECK_EQ(object.ptr[i], expected);
+ }
+ for (int i = start2; i < object.size; ++i) {
+ CHECK_EQ(object.ptr[i], expected);
+ }
+ }
+};
+
+static void* RunThread(void *vthread) {
+ TesterThread* t = reinterpret_cast<TesterThread*>(vthread);
+ t->Run();
+ return NULL;
+}
+
+static void TryHugeAllocation(size_t s) {
+ void* p = malloc(s);
+ CHECK(p == NULL); // huge allocation s should fail!
+}
+
+static void TestHugeAllocations() {
+ // Check that asking for stuff tiny bit smaller than largest possible
+ // size returns NULL.
+ for (size_t i = 0; i < 10000; i++) {
+ TryHugeAllocation(kMaxSize - i);
+ }
+
+ // Check that asking for stuff near signed/unsigned boundary returns NULL
+ for (size_t i = 0; i < 100; i++) {
+ TryHugeAllocation(kMaxSignedSize - i);
+ TryHugeAllocation(kMaxSignedSize + i);
+ }
+}
+
+static void TestCalloc(size_t n, size_t s, bool ok) {
+ char* p = reinterpret_cast<char*>(calloc(n, s));
+ fprintf(LOGSTREAM, "calloc(%x, %x): %p\n", n, s, p);
+ if (!ok) {
+ CHECK(p == NULL); // calloc(n, s) should succeed
+ } else {
+ CHECK(p != NULL); // calloc(n, s) should not succeed
+ for (int i = 0; i < n*s; i++) {
+ CHECK(p[i] == '\0');
+ }
+ free(p);
+ }
+}
+
+static void TestNewHandler() throw (std::bad_alloc) {
+ ++news_handled;
+ throw std::bad_alloc();
+}
+
+static void TestOneNew(void* (*func)(size_t)) {
+ // success test
+ try {
+ void* ptr = (*func)(kNotTooBig);
+ if (0 == ptr) {
+ fprintf(LOGSTREAM, "allocation should not have failed.\n");
+ abort();
+ }
+ } catch (...) {
+ fprintf(LOGSTREAM, "allocation threw unexpected exception.\n");
+ abort();
+ }
+
+ // failure test
+ // we should always receive a bad_alloc exception
+ try {
+ (*func)(kTooBig);
+ fprintf(LOGSTREAM, "allocation should have failed.\n");
+ abort();
+ } catch (const std::bad_alloc&) {
+ // correct
+ } catch (...) {
+ fprintf(LOGSTREAM, "allocation threw unexpected exception.\n");
+ abort();
+ }
+}
+
+static void TestNew(void* (*func)(size_t)) {
+ news_handled = 0;
+
+ // test without new_handler:
+ std::new_handler saved_handler = std::set_new_handler(0);
+ TestOneNew(func);
+
+ // test with new_handler:
+ std::set_new_handler(TestNewHandler);
+ TestOneNew(func);
+ if (news_handled != 1) {
+ fprintf(LOGSTREAM, "new_handler was not called.\n");
+ abort();
+ }
+ std::set_new_handler(saved_handler);
+}
+
+static void TestOneNothrowNew(void* (*func)(size_t, const std::nothrow_t&)) {
+ // success test
+ try {
+ void* ptr = (*func)(kNotTooBig, std::nothrow);
+ if (0 == ptr) {
+ fprintf(LOGSTREAM, "allocation should not have failed.\n");
+ abort();
+ }
+ } catch (...) {
+ fprintf(LOGSTREAM, "allocation threw unexpected exception.\n");
+ abort();
+ }
+
+ // failure test
+ // we should always receive a bad_alloc exception
+ try {
+ if ((*func)(kTooBig, std::nothrow) != 0) {
+ fprintf(LOGSTREAM, "allocation should have failed.\n");
+ abort();
+ }
+ } catch (...) {
+ fprintf(LOGSTREAM, "nothrow allocation threw unexpected exception.\n");
+ abort();
+ }
+}
+
+static void TestNothrowNew(void* (*func)(size_t, const std::nothrow_t&)) {
+ news_handled = 0;
+
+ // test without new_handler:
+ std::new_handler saved_handler = std::set_new_handler(0);
+ TestOneNothrowNew(func);
+
+ // test with new_handler:
+ std::set_new_handler(TestNewHandler);
+ TestOneNothrowNew(func);
+ if (news_handled != 1) {
+ fprintf(LOGSTREAM, "nothrow new_handler was not called.\n");
+ abort();
+ }
+ std::set_new_handler(saved_handler);
+}
+
+
+int main(int argc, char** argv) {
+ // TODO(csilvers): port MemoryUsage() over so the test can use that
+#if 0
+ // Allocate and deallocate blocks of increasing sizes to check if the alloc
+ // metadata fragments the memory. (Do not put other allocations/deallocations
+ // before this test, it may break).
+ {
+ size_t memory_usage = MemoryUsage(getpid());
+ fprintf(LOGSTREAM, "==== Testing fragmentation\n");
+ for ( int i = 200; i < 240; ++i ) {
+ int size = i << 20;
+ void *test1 = malloc(size);
+ for ( int j = 0; j < size; j += (1 << 12) ) {
+ static_cast<char*>(test1)[j] = 1;
+ }
+ free(test1);
+ }
+ // There may still be a bit of fragmentation at the beginning, until we
+ // reach kPageMapBigAllocationThreshold bytes so we check for
+ // 200 + 240 + margin.
+ CHECK_LT(MemoryUsage(getpid()), memory_usage + (450 << 20) );
+ }
+#endif
+
+ // Check that empty allocation works
+ fprintf(LOGSTREAM, "==== Testing empty allocation\n");
+ {
+ void* p1 = malloc(0);
+ CHECK(p1 != NULL);
+ void* p2 = malloc(0);
+ CHECK(p2 != NULL);
+ CHECK(p1 != p2);
+ free(p1);
+ free(p2);
+ }
+
+ // Check that 512MB can be allocated
+ fprintf(LOGSTREAM, "==== Testing large allocation\n");
+ {
+ void* p = malloc(512<<20);
+ CHECK(p != NULL); // could not allocate 512MB
+ free(p);
+ }
+
+ // Check that large allocations fail with NULL instead of crashing
+ fprintf(LOGSTREAM, "==== Testing out of memory\n");
+ for (int s = 0; ; s += (10<<20)) {
+ void* large_object = malloc(s);
+ if (large_object == NULL) break;
+ free(large_object);
+ }
+
+ // Check that huge allocations fail with NULL instead of crashing
+ fprintf(LOGSTREAM, "==== Testing huge allocations\n");
+ TestHugeAllocations();
+
+ // Check calloc() with various arguments
+ fprintf(LOGSTREAM, "==== Testing calloc\n");
+ TestCalloc(0, 0, true);
+ TestCalloc(0, 1, true);
+ TestCalloc(1, 1, true);
+ TestCalloc(1<<10, 0, true);
+ TestCalloc(1<<20, 0, true);
+ TestCalloc(0, 1<<10, true);
+ TestCalloc(0, 1<<20, true);
+ TestCalloc(1<<20, 2, true);
+ TestCalloc(2, 1<<20, true);
+ TestCalloc(1000, 1000, true);
+
+ TestCalloc(kMaxSize, 2, false);
+ TestCalloc(2, kMaxSize, false);
+ TestCalloc(kMaxSize, kMaxSize, false);
+
+ TestCalloc(kMaxSignedSize, 3, false);
+ TestCalloc(3, kMaxSignedSize, false);
+ TestCalloc(kMaxSignedSize, kMaxSignedSize, false);
+
+ fprintf(LOGSTREAM, "Testing operator new(nothrow).\n");
+ TestNothrowNew(&::operator new);
+ fprintf(LOGSTREAM, "Testing operator new[](nothrow).\n");
+ TestNothrowNew(&::operator new[]);
+ fprintf(LOGSTREAM, "Testing operator new.\n");
+ TestNew(&::operator new);
+ fprintf(LOGSTREAM, "Testing operator new[].\n");
+ TestNew(&::operator new[]);
+ fprintf(LOGSTREAM, "Done testing C++ operators.\n");
-#define BUFSIZE (100 << 10)
+ // Create threads
+ fprintf(LOGSTREAM, "==== Testing threaded allocation/deallocation\n");
+ threads = new TesterThread*[FLAGS_numthreads];
+ pthread_t* thread_ids = new pthread_t[FLAGS_numthreads];
+ for (int i = 0; i < FLAGS_numthreads; ++i) {
+ threads[i] = new TesterThread(i);
+ }
-int main(int argc, char **argv) {
- char *buf1 = (char *)malloc(BUFSIZE);
- memset(buf1, 0, BUFSIZE);
- printf("Allocated buf1 via malloc() at %p\n", buf1);
+ // Start
+ for (int i = 0; i < FLAGS_numthreads; ++i) {
+ CHECK_EQ(pthread_create(&thread_ids[i], NULL, RunThread, threads[i]), 0);
+ }
- char *buf2 = new char[BUFSIZE];
- memset(buf2, 0, BUFSIZE);
- printf("Allocated buf2 via new at %p\n", buf2);
-
- free(buf1);
- delete[] buf2;
+ // Wait
+ for (int i = 0; i < FLAGS_numthreads; ++i) {
+ void* junk;
+ CHECK_EQ(pthread_join(thread_ids[i], &junk), 0);
+ }
- char buffer[10 << 10];
- MallocExtension::instance()->GetStats(buffer, sizeof(buffer));
- printf("Malloc stats:\n%s\n", buffer);
+ for (int i = 0; i < FLAGS_numthreads; ++i) delete threads[i]; // Cleanup
+ fprintf(LOGSTREAM, "PASS\n");
return 0;
}