summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2007-03-22 04:55:49 +0000
committercsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2007-03-22 04:55:49 +0000
commit8e188310f7d8732d81b7b04f193f89964b7af6c5 (patch)
treefea586faa0bc381027f2f47aee513fc9ea4b6e96
parentc3b96b3ac552160abde541bba8ac7b4f8338efa0 (diff)
downloadgperftools-8e188310f7d8732d81b7b04f193f89964b7af6c5.tar.gz
Wed Jun 14 15:11:14 2006 Google Inc. <opensource@google.com>
* google-perftools: version 0.8 release * Experimental support for remote profiling added to pprof (many) * Fixed race condition in ProfileData::FlushTable (etune) * Better support for weird /proc maps (maxim, mec) * Fix heap-checker interaction with gdb (markus) * Better 64-bit support in pprof (aruns) * Reduce scavenging cost in tcmalloc by capping NumMoveSize (sanjay) * Cast syscall(SYS_mmap); works on more 64-bit systems now (menage) * Document the text output of pprof! (csilvers) * Better compiler support for no-THREADS and for old compilers (csilvers) * Make libunwind the default stack unwinder for x86-64 (aruns) * Somehow the COPYING file got erased. Regenerate it (csilvers) git-svn-id: http://gperftools.googlecode.com/svn/trunk@23 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
-rw-r--r--COPYING28
-rw-r--r--ChangeLog15
-rw-r--r--Makefile.am33
-rw-r--r--aclocal.m4184
-rwxr-xr-xconfigure259
-rw-r--r--configure.ac2
-rw-r--r--doc/cpu_profiler.html18
-rw-r--r--doc/pprof_remote_servers.html190
-rw-r--r--src/base/linux_syscall_support.h125
-rw-r--r--src/base/linuxthreads.c37
-rw-r--r--src/base/thread_lister.c16
-rw-r--r--src/google/heap-checker.h13
-rw-r--r--src/heap-checker.cc123
-rw-r--r--src/malloc_extension.cc15
-rw-r--r--src/malloc_hook.cc2
-rwxr-xr-xsrc/pprof989
-rw-r--r--src/profiler.cc10
-rw-r--r--src/stacktrace.cc9
-rw-r--r--src/stacktrace_libunwind-inl.h4
-rw-r--r--src/tcmalloc.cc56
-rw-r--r--src/tests/heap-checker_unittest.cc26
-rw-r--r--src/tests/tcmalloc_unittest.cc48
22 files changed, 1719 insertions, 483 deletions
diff --git a/COPYING b/COPYING
index e69de29..e4956cf 100644
--- a/COPYING
+++ b/COPYING
@@ -0,0 +1,28 @@
+Copyright (c) 2005, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/ChangeLog b/ChangeLog
index 22597c3..90bf766 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -85,3 +85,18 @@ Thu Apr 13 20:59:09 2006 Google Inc. <opensource@google.com>
* Syscall support for older kernels, including _syscall6 (markus)
* Support PIC mode (markus, mbland, iant)
* Better support for running in non-threaded contexts (csilvers)
+
+Wed Jun 14 15:11:14 2006 Google Inc. <opensource@google.com>
+
+ * google-perftools: version 0.8 release
+ * Experimental support for remote profiling added to pprof (many)
+ * Fixed race condition in ProfileData::FlushTable (etune)
+ * Better support for weird /proc maps (maxim, mec)
+ * Fix heap-checker interaction with gdb (markus)
+ * Better 64-bit support in pprof (aruns)
+ * Reduce scavenging cost in tcmalloc by capping NumMoveSize (sanjay)
+ * Cast syscall(SYS_mmap); works on more 64-bit systems now (menage)
+ * Document the text output of pprof! (csilvers)
+ * Better compiler support for no-THREADS and for old compilers (csilvers)
+ * Make libunwind the default stack unwinder for x86-64 (aruns)
+ * Somehow the COPYING file got erased. Regenerate it (csilvers)
diff --git a/Makefile.am b/Makefile.am
index 8d39bbb..83fa966 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -115,21 +115,24 @@ libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) -export-symbols-regex $(TCMAL
libtcmalloc_minimal_la_LIBADD = $(PTHREAD_LIBS) libstacktrace.la
### Unittests
-TESTS += malloc_unittest
-MALLOC_UNITEST_INCLUDES = src/config.h \
- src/google/malloc_extension.h \
- src/google/malloc_hook.h \
- src/base/basictypes.h \
- src/google/perftools/hash_set.h \
- src/maybe_threads.h
-malloc_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
- src/malloc_hook.cc \
- src/malloc_extension.cc \
- src/maybe_threads.cc \
- $(MALLOC_UNITTEST_INCLUDES)
-malloc_unittest_CXXFLAGS = $(PTHREAD_CFLAGS)
-malloc_unittest_LDFLAGS = $(PTHREAD_CFLAGS)
-malloc_unittest_LDADD = $(PTHREAD_LIBS)
+
+# Commented out for the moment because malloc(very_big_num) is broken in
+# standard libc! At least, in some situations, some of the time.
+## TESTS += malloc_unittest
+## MALLOC_UNITEST_INCLUDES = src/config.h \
+## src/google/malloc_extension.h \
+## src/google/malloc_hook.h \
+## src/base/basictypes.h \
+## src/google/perftools/hash_set.h \
+## src/maybe_threads.h
+## malloc_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
+## src/malloc_hook.cc \
+## src/malloc_extension.cc \
+## src/maybe_threads.cc \
+## $(MALLOC_UNITTEST_INCLUDES)
+## malloc_unittest_CXXFLAGS = $(PTHREAD_CFLAGS)
+## malloc_unittest_LDFLAGS = $(PTHREAD_CFLAGS)
+## malloc_unittest_LDADD = $(PTHREAD_LIBS)
TESTS += tcmalloc_unittest
TCMALLOC_UNITTEST_INCLUDES = src/google/malloc_extension.h
diff --git a/aclocal.m4 b/aclocal.m4
index 0b68740..d98f614 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -6751,7 +6751,61 @@ AC_DEFUN([AC_COMPILER_CHARACTERISTICS],
# This was retrieved from
-# http://www.gnu.org/software/ac-archive/htmldoc/acx_pthread.html
+# http://0pointer.de/cgi-bin/viewcvs.cgi/trunk/common/acx_pthread.m4?rev=1220
+# See also (perhaps for new versions?)
+# http://0pointer.de/cgi-bin/viewcvs.cgi/trunk/common/acx_pthread.m4
+
+dnl @synopsis ACX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
+dnl
+dnl @summary figure out how to build C programs using POSIX threads
+dnl
+dnl This macro figures out how to build C programs using POSIX threads.
+dnl It sets the PTHREAD_LIBS output variable to the threads library and
+dnl linker flags, and the PTHREAD_CFLAGS output variable to any special
+dnl C compiler flags that are needed. (The user can also force certain
+dnl compiler flags/libs to be tested by setting these environment
+dnl variables.)
+dnl
+dnl Also sets PTHREAD_CC to any special C compiler that is needed for
+dnl multi-threaded programs (defaults to the value of CC otherwise).
+dnl (This is necessary on AIX to use the special cc_r compiler alias.)
+dnl
+dnl NOTE: You are assumed to not only compile your program with these
+dnl flags, but also link it with them as well. e.g. you should link
+dnl with $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS
+dnl $LIBS
+dnl
+dnl If you are only building threads programs, you may wish to use
+dnl these variables in your default LIBS, CFLAGS, and CC:
+dnl
+dnl LIBS="$PTHREAD_LIBS $LIBS"
+dnl CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+dnl CC="$PTHREAD_CC"
+dnl
+dnl In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute
+dnl constant has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to
+dnl that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX).
+dnl
+dnl ACTION-IF-FOUND is a list of shell commands to run if a threads
+dnl library is found, and ACTION-IF-NOT-FOUND is a list of commands to
+dnl run it if it is not found. If ACTION-IF-FOUND is not specified, the
+dnl default action will define HAVE_PTHREAD.
+dnl
+dnl Please let the authors know if this macro fails on any platform, or
+dnl if you have any other suggestions or comments. This macro was based
+dnl on work by SGJ on autoconf scripts for FFTW (www.fftw.org) (with
+dnl help from M. Frigo), as well as ac_pthread and hb_pthread macros
+dnl posted by Alejandro Forero Cuervo to the autoconf macro repository.
+dnl We are also grateful for the helpful feedback of numerous users.
+dnl
+dnl @category InstalledPackages
+dnl @author Steven G. Johnson <stevenj@alum.mit.edu>
+dnl @version 2005-06-15
+dnl @license GPLWithACException
+dnl
+dnl Checks for GCC shared/pthread inconsistency based on work by
+dnl Marcin Owsiany <marcin@owsiany.pl>
+
AC_DEFUN([ACX_PTHREAD], [
AC_REQUIRE([AC_CANONICAL_HOST])
@@ -6809,6 +6863,7 @@ acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -m
# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it
# doesn't hurt to check since this sometimes defines pthreads too;
# also defines -D_REENTRANT)
+# ... -mt is also the pthreads flag for HP/aCC
# pthread: Linux, etcetera
# --thread-safe: KAI C++
# pthread-config: use pthread-config program (for GNU Pth library)
@@ -6818,13 +6873,13 @@ case "${host_cpu}-${host_os}" in
# On Solaris (at least, for some versions), libc contains stubbed
# (non-functional) versions of the pthreads routines, so link-based
- # tests will erroneously succeed. (We need to link with -pthread or
+ # tests will erroneously succeed. (We need to link with -pthreads/-mt/
# -lpthread.) (The stubs are missing pthread_cleanup_push, or rather
# a function called by this macro, so we could check for that, but
# who knows whether they'll stub that too in a future libc.) So,
# we'll just look for -pthreads and -lpthread first:
- acx_pthread_flags="-pthread -pthreads pthread -mt $acx_pthread_flags"
+ acx_pthread_flags="-pthreads pthread -mt -pthread $acx_pthread_flags"
;;
esac
@@ -6841,12 +6896,12 @@ for flag in $acx_pthread_flags; do
PTHREAD_CFLAGS="$flag"
;;
- pthread-config)
- AC_CHECK_PROG(acx_pthread_config, pthread-config, yes, no)
- if test x"$acx_pthread_config" = xno; then continue; fi
- PTHREAD_CFLAGS="`pthread-config --cflags`"
- PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`"
- ;;
+ pthread-config)
+ AC_CHECK_PROG(acx_pthread_config, pthread-config, yes, no)
+ if test x"$acx_pthread_config" = xno; then continue; fi
+ PTHREAD_CFLAGS="`pthread-config --cflags`"
+ PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`"
+ ;;
*)
AC_MSG_CHECKING([for the pthreads library -l$flag])
@@ -6895,12 +6950,12 @@ if test "x$acx_pthread_ok" = xyes; then
CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
# Detect AIX lossage: JOINABLE attribute is called UNDETACHED.
- AC_MSG_CHECKING([for joinable pthread attribute])
- attr_name=unknown
- for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do
- AC_TRY_LINK([#include <pthread.h>], [int attr=$attr;],
+ AC_MSG_CHECKING([for joinable pthread attribute])
+ attr_name=unknown
+ for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do
+ AC_TRY_LINK([#include <pthread.h>], [int attr=$attr; return attr;],
[attr_name=$attr; break])
- done
+ done
AC_MSG_RESULT($attr_name)
if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then
AC_DEFINE_UNQUOTED(PTHREAD_CREATE_JOINABLE, $attr_name,
@@ -6924,6 +6979,107 @@ if test "x$acx_pthread_ok" = xyes; then
# More AIX lossage: must compile with cc_r
AC_CHECK_PROG(PTHREAD_CC, cc_r, cc_r, ${CC})
+
+ # The next part tries to detect GCC inconsistency with -shared on some
+ # architectures and systems. The problem is that in certain
+ # configurations, when -shared is specified, GCC "forgets" to
+ # internally use various flags which are still necessary.
+
+ # First, check whether caller wants us to skip -shared checks
+ # this is useful
+ AC_MSG_CHECKING([whether to check for GCC pthread/shared inconsistencies])
+ if test x"$GCC" != xyes; then
+ AC_MSG_RESULT([no])
+ else
+ AC_MSG_RESULT([yes])
+
+ # In order not to create several levels of indentation, we test
+ # the value of "$ok" until we find out the cure or run out of
+ # ideas.
+ ok="no"
+
+ #
+ # Prepare the flags
+ #
+ save_CFLAGS="$CFLAGS"
+ save_LIBS="$LIBS"
+ save_CC="$CC"
+ # Try with the flags determined by the earlier checks.
+ #
+ # -Wl,-z,defs forces link-time symbol resolution, so that the
+ # linking checks with -shared actually have any value
+ #
+ # FIXME: -fPIC is required for -shared on many architectures,
+ # so we specify it here, but the right way would probably be to
+ # properly detect whether it is actually required.
+ CFLAGS="-shared -fPIC -Wl,-z,defs $CFLAGS $PTHREAD_CFLAGS"
+ LIBS="$PTHREAD_LIBS $LIBS"
+ CC="$PTHREAD_CC"
+
+ AC_MSG_CHECKING([whether -pthread is sufficient with -shared])
+ AC_TRY_LINK([#include <pthread.h>],
+ [pthread_t th; pthread_join(th, 0);
+ pthread_attr_init(0); pthread_cleanup_push(0, 0);
+ pthread_create(0,0,0,0); pthread_cleanup_pop(0); ],
+ [ok=yes])
+
+ if test "x$ok" = xyes; then
+ AC_MSG_RESULT([yes])
+ else
+ AC_MSG_RESULT([no])
+ fi
+
+ #
+ # Linux gcc on some architectures such as mips/mipsel forgets
+ # about -lpthread
+ #
+ if test x"$ok" = xno; then
+ AC_MSG_CHECKING([whether -lpthread fixes that])
+ LIBS="-lpthread $PTHREAD_LIBS $save_LIBS"
+ AC_TRY_LINK([#include <pthread.h>],
+ [pthread_t th; pthread_join(th, 0);
+ pthread_attr_init(0); pthread_cleanup_push(0, 0);
+ pthread_create(0,0,0,0); pthread_cleanup_pop(0); ],
+ [ok=yes])
+
+ if test "x$ok" = xyes; then
+ AC_MSG_RESULT([yes])
+ PTHREAD_LIBS="-lpthread $PTHREAD_LIBS"
+ else
+ AC_MSG_RESULT([no])
+ fi
+ fi
+ #
+ # FreeBSD 4.10 gcc forgets to use -lc_r instead of -lc
+ #
+ if test x"$ok" = xno; then
+ AC_MSG_CHECKING([whether -lc_r fixes that])
+ LIBS="-lc_r $PTHREAD_LIBS $save_LIBS"
+ AC_TRY_LINK([#include <pthread.h>],
+ [pthread_t th; pthread_join(th, 0);
+ pthread_attr_init(0); pthread_cleanup_push(0, 0);
+ pthread_create(0,0,0,0); pthread_cleanup_pop(0); ],
+ [ok=yes])
+
+ if test "x$ok" = xyes; then
+ AC_MSG_RESULT([yes])
+ PTHREAD_LIBS="-lc_r $PTHREAD_LIBS"
+ else
+ AC_MSG_RESULT([no])
+ fi
+ fi
+ if test x"$ok" = xno; then
+ # OK, we have run out of ideas
+ AC_MSG_WARN([Impossible to determine how to use pthreads with shared libraries])
+
+ # so it's not safe to assume that we may use pthreads
+ acx_pthread_ok=no
+ fi
+
+ CFLAGS="$save_CFLAGS"
+ LIBS="$save_LIBS"
+ CC="$save_CC"
+ fi
else
PTHREAD_CC="$CC"
fi
diff --git a/configure b/configure
index 2a11d9e..9147d97 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.57 for google-perftools 0.7.
+# Generated by GNU Autoconf 2.57 for google-perftools 0.8.
#
# Report bugs to <opensource@google.com>.
#
@@ -422,8 +422,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='google-perftools'
PACKAGE_TARNAME='google-perftools'
-PACKAGE_VERSION='0.7'
-PACKAGE_STRING='google-perftools 0.7'
+PACKAGE_VERSION='0.8'
+PACKAGE_STRING='google-perftools 0.8'
PACKAGE_BUGREPORT='opensource@google.com'
ac_unique_file="README"
@@ -953,7 +953,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures google-perftools 0.7 to adapt to many kinds of systems.
+\`configure' configures google-perftools 0.8 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1019,7 +1019,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of google-perftools 0.7:";;
+ short | recursive ) echo "Configuration of google-perftools 0.8:";;
esac
cat <<\_ACEOF
@@ -1125,7 +1125,7 @@ fi
test -n "$ac_init_help" && exit 0
if $ac_init_version; then
cat <<\_ACEOF
-google-perftools configure 0.7
+google-perftools configure 0.8
generated by GNU Autoconf 2.57
Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002
@@ -1140,7 +1140,7 @@ cat >&5 <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by google-perftools $as_me 0.7, which was
+It was created by google-perftools $as_me 0.8, which was
generated by GNU Autoconf 2.57. Invocation command line was
$ $0 $@
@@ -1733,7 +1733,7 @@ fi
# Define the identity of the package.
PACKAGE=google-perftools
- VERSION=0.7
+ VERSION=0.8
cat >>confdefs.h <<_ACEOF
@@ -21171,6 +21171,7 @@ acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -m
# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it
# doesn't hurt to check since this sometimes defines pthreads too;
# also defines -D_REENTRANT)
+# ... -mt is also the pthreads flag for HP/aCC
# pthread: Linux, etcetera
# --thread-safe: KAI C++
# pthread-config: use pthread-config program (for GNU Pth library)
@@ -21180,13 +21181,13 @@ case "${host_cpu}-${host_os}" in
# On Solaris (at least, for some versions), libc contains stubbed
# (non-functional) versions of the pthreads routines, so link-based
- # tests will erroneously succeed. (We need to link with -pthread or
+ # tests will erroneously succeed. (We need to link with -pthreads/-mt/
# -lpthread.) (The stubs are missing pthread_cleanup_push, or rather
# a function called by this macro, so we could check for that, but
# who knows whether they'll stub that too in a future libc.) So,
# we'll just look for -pthreads and -lpthread first:
- acx_pthread_flags="-pthread -pthreads pthread -mt $acx_pthread_flags"
+ acx_pthread_flags="-pthreads pthread -mt -pthread $acx_pthread_flags"
;;
esac
@@ -21205,8 +21206,8 @@ echo $ECHO_N "checking whether pthreads work with $flag... $ECHO_C" >&6
PTHREAD_CFLAGS="$flag"
;;
- pthread-config)
- # Extract the first word of "pthread-config", so it can be a program name with args.
+ pthread-config)
+ # Extract the first word of "pthread-config", so it can be a program name with args.
set dummy pthread-config; ac_word=$2
echo "$as_me:$LINENO: checking for $ac_word" >&5
echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
@@ -21242,10 +21243,10 @@ else
echo "${ECHO_T}no" >&6
fi
- if test x"$acx_pthread_config" = xno; then continue; fi
- PTHREAD_CFLAGS="`pthread-config --cflags`"
- PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`"
- ;;
+ if test x"$acx_pthread_config" = xno; then continue; fi
+ PTHREAD_CFLAGS="`pthread-config --cflags`"
+ PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`"
+ ;;
*)
echo "$as_me:$LINENO: checking for the pthreads library -l$flag" >&5
@@ -21328,11 +21329,11 @@ if test "x$acx_pthread_ok" = xyes; then
CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
# Detect AIX lossage: JOINABLE attribute is called UNDETACHED.
- echo "$as_me:$LINENO: checking for joinable pthread attribute" >&5
+ echo "$as_me:$LINENO: checking for joinable pthread attribute" >&5
echo $ECHO_N "checking for joinable pthread attribute... $ECHO_C" >&6
- attr_name=unknown
- for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do
- cat >conftest.$ac_ext <<_ACEOF
+ attr_name=unknown
+ for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do
+ cat >conftest.$ac_ext <<_ACEOF
#line $LINENO "configure"
/* confdefs.h. */
_ACEOF
@@ -21343,7 +21344,7 @@ cat >>conftest.$ac_ext <<_ACEOF
int
main ()
{
-int attr=$attr;
+int attr=$attr; return attr;
;
return 0;
}
@@ -21367,7 +21368,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
fi
rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
- done
+ done
echo "$as_me:$LINENO: result: $attr_name" >&5
echo "${ECHO_T}$attr_name" >&6
if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then
@@ -21431,6 +21432,216 @@ else
echo "${ECHO_T}no" >&6
fi
+
+ # The next part tries to detect GCC inconsistency with -shared on some
+ # architectures and systems. The problem is that in certain
+ # configurations, when -shared is specified, GCC "forgets" to
+ # internally use various flags which are still necessary.
+
+ # First, check whether caller wants us to skip -shared checks
+ # this is useful
+ echo "$as_me:$LINENO: checking whether to check for GCC pthread/shared inconsistencies" >&5
+echo $ECHO_N "checking whether to check for GCC pthread/shared inconsistencies... $ECHO_C" >&6
+ if test x"$GCC" != xyes; then
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+ else
+ echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+
+ # In order not to create several levels of indentation, we test
+ # the value of "$ok" until we find out the cure or run out of
+ # ideas.
+ ok="no"
+
+ #
+ # Prepare the flags
+ #
+ save_CFLAGS="$CFLAGS"
+ save_LIBS="$LIBS"
+ save_CC="$CC"
+ # Try with the flags determined by the earlier checks.
+ #
+ # -Wl,-z,defs forces link-time symbol resolution, so that the
+ # linking checks with -shared actually have any value
+ #
+ # FIXME: -fPIC is required for -shared on many architectures,
+ # so we specify it here, but the right way would probably be to
+ # properly detect whether it is actually required.
+ CFLAGS="-shared -fPIC -Wl,-z,defs $CFLAGS $PTHREAD_CFLAGS"
+ LIBS="$PTHREAD_LIBS $LIBS"
+ CC="$PTHREAD_CC"
+
+ echo "$as_me:$LINENO: checking whether -pthread is sufficient with -shared" >&5
+echo $ECHO_N "checking whether -pthread is sufficient with -shared... $ECHO_C" >&6
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <pthread.h>
+int
+main ()
+{
+pthread_t th; pthread_join(th, 0);
+ pthread_attr_init(0); pthread_cleanup_push(0, 0);
+ pthread_create(0,0,0,0); pthread_cleanup_pop(0);
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ok=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+fi
+rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
+
+ if test "x$ok" = xyes; then
+ echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+ else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+ fi
+
+ #
+ # Linux gcc on some architectures such as mips/mipsel forgets
+ # about -lpthread
+ #
+ if test x"$ok" = xno; then
+ echo "$as_me:$LINENO: checking whether -lpthread fixes that" >&5
+echo $ECHO_N "checking whether -lpthread fixes that... $ECHO_C" >&6
+ LIBS="-lpthread $PTHREAD_LIBS $save_LIBS"
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <pthread.h>
+int
+main ()
+{
+pthread_t th; pthread_join(th, 0);
+ pthread_attr_init(0); pthread_cleanup_push(0, 0);
+ pthread_create(0,0,0,0); pthread_cleanup_pop(0);
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ok=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+fi
+rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
+
+ if test "x$ok" = xyes; then
+ echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+ PTHREAD_LIBS="-lpthread $PTHREAD_LIBS"
+ else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+ fi
+ fi
+ #
+ # FreeBSD 4.10 gcc forgets to use -lc_r instead of -lc
+ #
+ if test x"$ok" = xno; then
+ echo "$as_me:$LINENO: checking whether -lc_r fixes that" >&5
+echo $ECHO_N "checking whether -lc_r fixes that... $ECHO_C" >&6
+ LIBS="-lc_r $PTHREAD_LIBS $save_LIBS"
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <pthread.h>
+int
+main ()
+{
+pthread_t th; pthread_join(th, 0);
+ pthread_attr_init(0); pthread_cleanup_push(0, 0);
+ pthread_create(0,0,0,0); pthread_cleanup_pop(0);
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ok=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+fi
+rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
+
+ if test "x$ok" = xyes; then
+ echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+ PTHREAD_LIBS="-lc_r $PTHREAD_LIBS"
+ else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+ fi
+ fi
+ if test x"$ok" = xno; then
+ # OK, we have run out of ideas
+ { echo "$as_me:$LINENO: WARNING: Impossible to determine how to use pthreads with shared libraries" >&5
+echo "$as_me: WARNING: Impossible to determine how to use pthreads with shared libraries" >&2;}
+
+ # so it's not safe to assume that we may use pthreads
+ acx_pthread_ok=no
+ fi
+
+ CFLAGS="$save_CFLAGS"
+ LIBS="$save_LIBS"
+ CC="$save_CC"
+ fi
else
PTHREAD_CC="$CC"
fi
@@ -22393,7 +22604,7 @@ _ASBOX
} >&5
cat >&5 <<_CSEOF
-This file was extended by google-perftools $as_me 0.7, which was
+This file was extended by google-perftools $as_me 0.8, which was
generated by GNU Autoconf 2.57. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -22456,7 +22667,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
-google-perftools config.status 0.7
+google-perftools config.status 0.8
configured by $0, generated by GNU Autoconf 2.57,
with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/configure.ac b/configure.ac
index 2e1ab8b..f72e687 100644
--- a/configure.ac
+++ b/configure.ac
@@ -5,7 +5,7 @@
# make sure we're interpreted by some minimal autoconf
AC_PREREQ(2.57)
-AC_INIT(google-perftools, 0.7, opensource@google.com)
+AC_INIT(google-perftools, 0.8, opensource@google.com)
# The argument here is just something that should be in the current directory
# (for sanity checking)
AC_CONFIG_SRCDIR(README)
diff --git a/doc/cpu_profiler.html b/doc/cpu_profiler.html
index bc18940..ff98321 100644
--- a/doc/cpu_profiler.html
+++ b/doc/cpu_profiler.html
@@ -109,6 +109,24 @@ detail below.</p>
annotated with the flat and cumulative sample counts at each PC value.
</pre>
+<h3>Analyzing Text Output</h3>
+
+<p>Text mode has lines of output that look like this:</p>
+<pre>
+ 14 2.1% 17.2% 58 8.7% std::_Rb_tree::find
+</pre>
+
+<p>Here is how to interpret the columns:</p>
+<ol>
+ <li> Number of profiling samples in this function
+ <li> Percentage of profiling samples in this function
+ <li> Percentage of profiling samples in the functions printed so far
+ <li> Number of profiling samples in this function and its callees
+ <li> Percentage of profiling samples in this function and its callees
+ <li> Function name
+</ol>
+
+
<h3>Node Information</h3>
<p>In the various graphical modes of pprof, the output is a call graph
diff --git a/doc/pprof_remote_servers.html b/doc/pprof_remote_servers.html
new file mode 100644
index 0000000..b93ccd3
--- /dev/null
+++ b/doc/pprof_remote_servers.html
@@ -0,0 +1,190 @@
+<HTML>
+
+<HEAD>
+<title>pprof and Remote Servers</title>
+</HEAD>
+
+<BODY>
+
+<h1><code>pprof</code> and Remote Servers</h2>
+
+<p>In mid-2006, we added an experimental facility to <A
+HREF="cpu_profiler.html">pprof</A>, the tool that analyzes CPU and
+heap profiles. This facility allows you to collect profile
+information from running applications. It makes it easy to collect
+profile information without having to stop the program first, and
+without having to log into the machine where the application is
+running. This is meant to be used on webservers, but will work on any
+application that can be modified to accept TCP connections on a port
+of its choosing, and to respond to HTTP requests on that port.</p>
+
+<p>We do not currently have infrastructure, such as apache modules,
+that you can pop into a webserver or other application to get the
+necessary functionality "for free." However, it's easy to generate
+the necessary data, which should allow the interested developer to add
+the necessary support into his or her applications.</p>
+
+<p>To use <code>pprof</code> in this experimental "server" mode, you
+give the script a host and port it should query, replacing the normal
+commandline arguments of application + profile file:</p>
+<pre>
+ % pprof internalweb.mycompany.com:80
+</pre>
+
+<p>The host must be listening on that port, and be able to accept HTTP/1.0
+requests -- sent via <code>wget</code> and <code>curl</code> -- for
+several urls. The following sections list the urls that
+<code>pprof</code> can send, and the responses it expects in
+return.</p>
+
+
+<ul><li> <code><b>/pprof/heap</b></code>
+
+<p><code>pprof</code> asks for the url <code>/pprof/heap</code> to
+get heap information. The actual url is controlled via the variable
+<code>HEAP_PAGE</code> in the <code>pprof</code> script, so you
+can change it if you'd like.</p>
+
+<p>The server should respond by calling</p>
+<pre>
+ MallocExtension::instance()->GetHeapSample(&output);
+</pre>
+<p>and sending <code>output</code> back as an HTTP response to
+<code>pprof</code>. <code>MallocExtension</code> is defined in the
+header file <code>google/malloc_extension.h</code>.</p>
+
+<p>Here's an example, from an actual Google webserver, of what the
+output should look like:</p>
+<pre>
+heap profile: 9369: 126987529 [ 9369: 126987529] @ heap
+ 2: 1024 [ 2: 1024] @ 0x87da913 0x8923ad4 0x891d4c2 0x892de12 0x8930519 0x83a16c2 0x836cb38 0x834cd1c 0x8349ba5 0x10a3177 0x8349961
+ 1: 36 [ 1: 36] @ 0x87da913 0x83a0929 0x836cb38 0x834cd1c 0x8349ba5 0x10a3177 0x8349961
+ 308: 10092544 [ 308: 10092544] @ 0x87da913 0x8970d66 0x8970e64 0x896e8e2 0x88e69d2 0x88e6add 0x88e6dec 0x88e7384 0x88e73fa 0x8838793 0x8838b36 0x88395f8 0x88f5a4b 0x890d03a 0x890d65a 0x8917666 0x890d1f3 0x890e6e4 0x8349c1b 0x10a3177 0x8349961
+[...]
+</pre>
+
+
+</li><li> <code><b>/pprof/growth</b></code>
+
+<p><code>pprof</code> asks for the url <code>/pprof/growth</code> to
+get heap-profiling delta (growth) information. The actual url is
+controlled via the variable <code>GROWTH_PAGE</code> in the
+<code>pprof</code> script, so you can change it if you'd like.</p>
+
+<p>The server should respond by calling</p>
+<pre>
+ MallocExtension::instance()->GetHeapGrowthStacks(&output);
+</pre>
+<p>and sending <code>output</code> back as an HTTP response to
+<code>pprof</code>. <code>MallocExtension</code> is defined in the
+header file <code>google/malloc_extension.h</code>.</p>
+
+<p>Here's an example, from an actual Google webserver, of what the
+output should look like:</p>
+<pre>
+heap profile: 741: 812122112 [ 741: 812122112] @ growth
+ 1: 1572864 [ 1: 1572864] @ 0x87da564 0x87db8a3 0x84787a4 0x846e851 0x836d12f 0x834cd1c 0x8349ba5 0x10a3177 0x8349961
+ 1: 1048576 [ 1: 1048576] @ 0x87d92e8 0x87d9213 0x87d9178 0x87d94d3 0x87da9da 0x8a364ff 0x8a437e7 0x8ab7d23 0x8ab7da9 0x8ac7454 0x8348465 0x10a3161 0x8349961
+[...]
+</pre>
+
+
+</li><li> <code><b>/pprof/profile</b></code>
+
+<p><code>pprof</code> asks for the url
+<code>/pprof/profile?seconds=XX</code> to get cpu-profiling
+information. The actual url is controlled via the variable
+<code>PROFILE_PAGE</code> in the <code>pprof</code> script, so you can
+change it if you'd like.</p>
+
+<p>The server should respond by calling
+<code>ProfilerStart(filename)</code>, continuing to do its work, and
+then, XX seconds later, calling <code>ProfilerStop()</code>. (These
+functions are declared in <code>google/profiler.h</code>.) The
+application is responsible for picking a unique filename for
+<code>ProfilerStart()</code>. After calling
+<code>ProfilerStop()</code>, the server should read the contents of
+<code>filename</code> and send them back as an HTTP response to
+<code>pprof</code>.</p>
+
+<p>Obviously, to get useful profile information the application must
+continue to run in the XX seconds that the profiler is running. Thus,
+the profile start-stop calls should be done in a separate thread, or
+be otherwise non-blocking.</p>
+
+<p>The profiler output file is binary, but near the end of it, it
+should have lines of text somewhat like this:</p>
+<pre>
+01016000-01017000 rw-p 00015000 03:01 59314 /lib/ld-2.2.2.so
+</pre>
+
+
+</li><li> <code><b>/pprof/contention</b></code>
+
+<p>This is intended to be able to profile (thread) lock contention in
+addition to CPU and memory use. It's not yet usable.</p>
+
+
+</li><li> <code><b>/pprof/cmdline</b></code>
+
+<p><code>pprof</code> asks for the url <code>/pprof/cmdline</code> to
+figure out what application it's profiling. The actual url is
+controlled via the variable <code>PROGRAM_NAME_PAGE</code> in the
+<code>pprof</code> script, so you can change it if you'd like.</p>
+
+<p>The server should respond by reading the contents of
+<code>/proc/self/cmdline</code>, converting all internal NUL (\0)
+characters to newlines, and sending the result back as an HTTP
+response to <code>pprof</code>.</p>
+
+<p>Here's an example return value:<p>
+<pre>
+/root/server/custom_webserver
+80
+--configfile=/root/server/ws.config
+</pre>
+
+
+</li><li> <code><b>/pprof/symbol</b></code>
+
+<p><code>pprof</code> asks for the url <code>/pprof/symbol</code> to
+map from hex addresses to variable names. The actual url is
+controlled via the variable <code>SYMBOL_PAGE</code> in the
+<code>pprof</code> script, so you can change it if you'd like.</p>
+
+<p>This is perhaps the hardest request to write code for, because
+it must accept POST requests. This means that after the HTTP headers,
+pprof will pass in a list of hex addresses connected by
+<code>+</code>, like so:</p>
+<pre>
+ curl -d '0x0824d061+0x0824d1cf' http://remote_host:80/pprof/symbol
+</pre>
+
+<p>The server should read the POST data, which will be in one line,
+and for each hex value, should write one line of output to the output
+stream, like so:</p>
+<pre>
+&lt;hex address&gt;&lt;tab&gt;&lt;function name&gt;
+</pre>
+<p>For instance:</p>
+<pre>
+0x08b2dabd _Update
+</pre>
+
+<p>The other reason this is the most difficult request to implement,
+is that the application will have to figure out for itself how to map
+from address to function name. One possibility is to run <code>nm -C
+-n &lt;program name&gt;</code> to get the mappings, either statically
+(say at program-compile time), or dynamically, by having the
+application call out to <code>nm</code> for every
+<code>pprof/symbol</code> call (presumably with some caching!).</p>
+
+<p><code>pprof</code> itself does just this for local profiles (not
+ones that talk to remote servers); look at the subroutine
+<code>GetProcedureBoundaries</code>.</p>
+
+
+<hr>
+Last modified: Mon Jun 12 21:30:14 PDT 2006
+</body>
+</html>
diff --git a/src/base/linux_syscall_support.h b/src/base/linux_syscall_support.h
index 319455e..0dfdd8d 100644
--- a/src/base/linux_syscall_support.h
+++ b/src/base/linux_syscall_support.h
@@ -45,6 +45,14 @@
#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__)) && \
defined(__linux)
+#ifdef __cplusplus
+/* Some system header files in older versions of gcc neglect to properly
+ * handle being included from C++. As it appears to be harmless to have
+ * multiple nested 'extern "C"' blocks, just add another one here.
+ */
+extern "C" {
+#endif
+
#include <errno.h>
#include <signal.h>
#include <stdarg.h>
@@ -79,35 +87,47 @@
#if defined(__i386__)
#ifndef __NR_getdents64
-#define __NR_getdents64 220
+#define __NR_getdents64 220
#endif
#ifndef __NR_gettid
-#define __NR_gettid 224
+#define __NR_gettid 224
#endif
#ifndef __NR_futex
-#define __NR_futex 240
+#define __NR_futex 240
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity 241
+#define __NR_sched_getaffinity 242
#endif
/* End of i386 definitions */
#elif defined(__ARM_ARCH_3__)
#ifndef __NR_getdents64
-#define __NR_getdents64 217
+#define __NR_getdents64 (__NR_SYSCALL_BASE + 217)
#endif
#ifndef __NR_gettid
-#define __NR_gettid 224
+#define __NR_gettid (__NR_SYSCALL_BASE + 224)
#endif
#ifndef __NR_futex
-#define __NR_futex 240
+#define __NR_futex (__NR_SYSCALL_BASE + 240)
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity (__NR_SYSCALL_BASE + 241)
+#define __NR_sched_getaffinity (__NR_SYSCALL_BASE + 242)
#endif
/* End of ARM 3 definitions */
#elif defined(__x86_64__)
#ifndef __NR_getdents64
-#define __NR_getdents64 217
+#define __NR_getdents64 217
#endif
#ifndef __NR_gettid
-#define __NR_gettid 186
+#define __NR_gettid 186
#endif
#ifndef __NR_futex
-#define __NR_futex 202
+#define __NR_futex 202
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity 203
+#define __NR_sched_getaffinity 204
#endif
/* End of x86-64 definitions */
#endif
@@ -306,9 +326,11 @@ struct dirent64;
#endif
#if defined(__x86_64__)
struct msghdr;
+ struct sockaddr;
#define __NR_sys_mmap __NR_mmap
#define __NR_sys_recvmsg __NR_recvmsg
#define __NR_sys_sendmsg __NR_sendmsg
+ #define __NR_sys_sendto __NR_sendto
#define __NR_sys_shutdown __NR_shutdown
#define __NR_sys_rt_sigaction __NR_rt_sigaction
#define __NR_sys_rt_sigprocmask __NR_rt_sigprocmask
@@ -322,6 +344,10 @@ struct dirent64;
struct msghdr*, m, int, f);
static inline _syscall3(int, sys_sendmsg, int, s,
const struct msghdr*, m, int, f);
+ static inline _syscall6(int, sys_sendto, int, s,
+ const void*, m, size_t, l,
+ int, f,
+ const struct sockaddr*, a, int, t);
static inline _syscall2(int, sys_shutdown, int, s,
int, h);
static inline _syscall4(int, sys_rt_sigaction, int, s,
@@ -378,6 +404,8 @@ struct dirent64;
}
#define sys_recvmsg(s,m,f) sys_socketcall(17, (s), (m), (f))
#define sys_sendmsg(s,m,f) sys_socketcall(16, (s), (m), (f))
+ #define sys_sendto(s,m,l,f,a,t) sys_socketcall(11, (s), (m), (l),(f),\
+ (a), (t))
#define sys_shutdown(s,h) sys_socketcall(13, (s), (h))
#define sys_socket(d,t,p) sys_socketcall(1, (d), (t), (p))
#define sys_socketpair(d,t,p,s) sys_socketcall(8, (d), (t), (p),(s))
@@ -387,39 +415,41 @@ struct dirent64;
static inline _syscall3(pid_t, sys_waitpid, pid_t, p,
int*, s, int, o);
#endif
- #define __NR_sys_close __NR_close
- #define __NR_sys_dup __NR_dup
- #define __NR_sys_dup2 __NR_dup2
- #define __NR_sys_execve __NR_execve
- #define __NR_sys__exit __NR_exit
- #define __NR_sys_fcntl __NR_fcntl
- #define __NR_sys_fork __NR_fork
- #define __NR_sys_fstat __NR_fstat
- #define __NR_sys_getdents __NR_getdents
- #define __NR_sys_getdents64 __NR_getdents64
- #define __NR_sys_getegid __NR_getegid
- #define __NR_sys_geteuid __NR_geteuid
- #define __NR_sys_getpgrp __NR_getpgrp
- #define __NR_sys_getpid __NR_getpid
- #define __NR_sys_getppid __NR_getppid
- #define __NR_sys_getpriority __NR_getpriority
- #define __NR_sys_getrlimit __NR_getrlimit
- #define __NR_sys_getsid __NR_getsid
- #define __NR__gettid __NR_gettid
- #define __NR_sys_kill __NR_kill
- #define __NR_sys_lseek __NR_lseek
- #define __NR_sys_munmap __NR_munmap
- #define __NR_sys_open __NR_open
- #define __NR_sys_pipe __NR_pipe
- #define __NR_sys_prctl __NR_prctl
- #define __NR_sys_ptrace __NR_ptrace
- #define __NR_sys_read __NR_read
- #define __NR_sys_readlink __NR_readlink
- #define __NR_sys_sched_yield __NR_sched_yield
- #define __NR_sys_sigaltstack __NR_sigaltstack
- #define __NR_sys_stat __NR_stat
- #define __NR_sys_write __NR_write
- #define __NR_sys_futex __NR_futex
+ #define __NR_sys_close __NR_close
+ #define __NR_sys_dup __NR_dup
+ #define __NR_sys_dup2 __NR_dup2
+ #define __NR_sys_execve __NR_execve
+ #define __NR_sys__exit __NR_exit
+ #define __NR_sys_fcntl __NR_fcntl
+ #define __NR_sys_fork __NR_fork
+ #define __NR_sys_fstat __NR_fstat
+ #define __NR_sys_futex __NR_futex
+ #define __NR_sys_getdents __NR_getdents
+ #define __NR_sys_getdents64 __NR_getdents64
+ #define __NR_sys_getegid __NR_getegid
+ #define __NR_sys_geteuid __NR_geteuid
+ #define __NR_sys_getpgrp __NR_getpgrp
+ #define __NR_sys_getpid __NR_getpid
+ #define __NR_sys_getppid __NR_getppid
+ #define __NR_sys_getpriority __NR_getpriority
+ #define __NR_sys_getrlimit __NR_getrlimit
+ #define __NR_sys_getsid __NR_getsid
+ #define __NR__gettid __NR_gettid
+ #define __NR_sys_kill __NR_kill
+ #define __NR_sys_lseek __NR_lseek
+ #define __NR_sys_munmap __NR_munmap
+ #define __NR_sys_open __NR_open
+ #define __NR_sys_pipe __NR_pipe
+ #define __NR_sys_prctl __NR_prctl
+ #define __NR_sys_ptrace __NR_ptrace
+ #define __NR_sys_read __NR_read
+ #define __NR_sys_readlink __NR_readlink
+ #define __NR_sys_sched_getaffinity __NR_sched_getaffinity
+ #define __NR_sys_sched_setaffinity __NR_sched_setaffinity
+ #define __NR_sys_sched_yield __NR_sched_yield
+ #define __NR_sys_sigaltstack __NR_sigaltstack
+ #define __NR_sys_stat __NR_stat
+ #define __NR_sys_write __NR_write
static inline _syscall1(int, sys_close, int, f);
static inline _syscall1(int, sys_dup, int, f);
static inline _syscall2(int, sys_dup2, int, s,
@@ -432,6 +462,8 @@ struct dirent64;
static inline _syscall0(pid_t, sys_fork);
static inline _syscall2(int, sys_fstat, int, f,
struct stat*, b);
+ static inline _syscall4(int, sys_futex, int*, addrx, int, opx, int, valx,
+ struct timespec *, timeoutx);
static inline _syscall3(int, sys_getdents, int, f,
struct dirent*, d, int, c);
static inline _syscall3(int, sys_getdents64, int, f,
@@ -464,6 +496,10 @@ struct dirent64;
void *, b, size_t, c);
static inline _syscall3(int, sys_readlink, const char*, p,
char*, b, size_t, s);
+ static inline _syscall3(int, sys_sched_getaffinity, pid_t, pid,
+ unsigned int, len, unsigned long *, mask);
+ static inline _syscall3(int, sys_sched_setaffinity, pid_t, pid,
+ unsigned int, len, unsigned long *, mask);
static inline _syscall0(int, sys_sched_yield);
static inline _syscall2(int, sys_sigaltstack, const stack_t*, s,
const stack_t*, o);
@@ -471,8 +507,6 @@ struct dirent64;
struct stat*, b);
static inline _syscall3(ssize_t, sys_write, int, f,
const void *, b, size_t, c);
- static inline _syscall4(int, sys_futex, int*, addrx, int, opx, int, valx,
- struct timespec *, timeoutx);
static inline int sys_sysconf(int name) {
extern int __getpagesize(void);
@@ -517,6 +551,9 @@ struct dirent64;
#undef RETURN
#endif
+#ifdef __cplusplus
+}
+#endif
#endif
#endif
diff --git a/src/base/linuxthreads.c b/src/base/linuxthreads.c
index e721582..3696987 100644
--- a/src/base/linuxthreads.c
+++ b/src/base/linuxthreads.c
@@ -51,6 +51,10 @@
#include "base/linux_syscall_support.h"
#include "base/thread_lister.h"
+#ifndef CLONE_UNTRACED
+#define CLONE_UNTRACED 0x00800000
+#endif
+
/* itoa() is not a standard function, and we cannot safely call printf()
* after suspending threads. So, we just implement our own copy. A
@@ -97,8 +101,19 @@ static int local_clone (int (*fn)(void *), void *arg, ...) {
* Leave 4kB of gap between the callers stack and the new clone. This
* should be more than sufficient for the caller to call waitpid() until
* the cloned thread terminates.
+ *
+ * It is important that we set the CLONE_UNTRACED flag, because newer
+ * versions of "gdb" otherwise attempt to attach to our thread, and will
+ * attempt to reap its status codes. This subsequently results in the
+ * caller hanging indefinitely in waitpid(), waiting for a change in
+ * status that will never happen. By setting the CLONE_UNTRACED flag, we
+ * prevent "gdb" from stealing events, but we still expect the thread
+ * lister to fail, because it cannot PTRACE_ATTACH to the process that
+ * is being debugged. This is OK and the error code will be reported
+ * correctly.
*/
- return clone(fn, (char *)&arg - 4096, CLONE_VM|CLONE_FS|CLONE_FILES, arg);
+ return clone(fn, (char *)&arg - 4096,
+ CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_UNTRACED, arg);
}
@@ -209,7 +224,8 @@ struct ListerParams {
static void ListerThread(struct ListerParams *args) {
static const int signals[] = { SIGABRT, SIGILL, SIGFPE, SIGSEGV, SIGBUS,
SIGXCPU, SIGXFSZ };
- pid_t clone_pid = sys_gettid();
+ int found_parent = 0;
+ pid_t clone_pid = sys_gettid(), ppid = sys_getppid();
char proc_self_task[80], marker_name[48], *marker_path;
const char *proc_paths[3];
const char *const *proc_path = proc_paths;
@@ -239,8 +255,7 @@ static void ListerThread(struct ListerParams *args) {
}
/* Compute search paths for finding thread directories in /proc */
- local_itoa(strrchr(strcpy(proc_self_task, "/proc/"), '\000'),
- sys_getppid());
+ local_itoa(strrchr(strcpy(proc_self_task, "/proc/"), '\000'), ppid);
marker_path = strrchr(strcpy(marker_name, proc_self_task), '\000');
strcat(proc_self_task, "/task/");
proc_paths[0] = proc_self_task; /* /proc/$$/task/ */
@@ -417,6 +432,7 @@ static void ListerThread(struct ListerParams *args) {
num_threads--;
sig_num_threads = num_threads;
} else {
+ found_parent |= pid == ppid;
added_entries++;
}
}
@@ -435,6 +451,16 @@ static void ListerThread(struct ListerParams *args) {
NO_INTR(sys_close(marker));
sig_marker = marker = -1;
+ /* If we never found the parent process, something is very wrong.
+ * Most likely, we are running in debugger. Any attempt to operate
+ * on the threads would be very incomplete. Let's just report an
+ * error to the caller.
+ */
+ if (!found_parent) {
+ ResumeAllProcessThreads(num_threads, pids);
+ sys__exit(3);
+ }
+
/* Now we are ready to call the callback,
* which takes care of resuming the threads for us.
*/
@@ -530,6 +556,9 @@ int ListAllProcessThreads(void *parameter,
case 2: args.err = EFAULT; /* Some fault (e.g. SIGSEGV) detected */
args.result = -1;
break;
+ case 3: args.err = EPERM; /* Process is already being traced */
+ args.result = -1;
+ break;
default:args.err = ECHILD; /* Child died unexpectedly */
args.result = -1;
break;
diff --git a/src/base/thread_lister.c b/src/base/thread_lister.c
index 6def758..f3df16b 100644
--- a/src/base/thread_lister.c
+++ b/src/base/thread_lister.c
@@ -31,7 +31,8 @@
* Author: Markus Gutschke
*/
-#include <stdio.h> // needed for NULL on some powerpc platforms (?!)
+#include <stdio.h> /* needed for NULL on some powerpc platforms (?!) */
+#include <sys/prctl.h>
#include "base/thread_lister.h"
#include "base/linuxthreads.h"
/* Include other thread listers here that define THREADS macro
@@ -46,16 +47,23 @@
int ListAllProcessThreads(void *parameter,
ListAllProcessThreadsCallBack callback, ...) {
- int rc;
+ int rc;
va_list ap;
+ int dumpable = prctl(PR_GET_DUMPABLE, 0);
+ if (!dumpable)
+ prctl(PR_SET_DUMPABLE, 1);
va_start(ap, callback);
- rc = callback(parameter, 0, NULL, ap);
+ pid_t pid = getpid();
+ rc = callback(parameter, 1, &pid, ap);
va_end(ap);
+ if (!dumpable)
+ prctl(PR_SET_DUMPABLE, 0);
return rc;
}
-void ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) {
+int ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) {
+ return 1;
}
#endif
diff --git a/src/google/heap-checker.h b/src/google/heap-checker.h
index 66d23de..f888ae0 100644
--- a/src/google/heap-checker.h
+++ b/src/google/heap-checker.h
@@ -255,6 +255,19 @@ class HeapCleaner {
};
class HeapLeakChecker {
+ public: // Static functions for working with (whole-program) leak checking.
+
+ // If heap leak checking is currently active in some mode
+ // e.g. if leak checking was started (and is still active now)
+ // due to any valid non-empty --heap_check flag value
+ // (including "local") on the command-line
+ // or via a dependency on //base:heapcheck.
+ // The return value reflects iff HeapLeakChecker objects manually
+ // constructed right now will be doing leak checking or nothing.
+ // Note that we can go from active to inactive state during InitGoogle()
+ // if FLAGS_heap_check gets set to "" by some code before/during InitGoogle().
+ static bool IsActive();
+
public: // Non-static functions for starting and doing leak checking.
// Start checking and name the leak check performed.
diff --git a/src/heap-checker.cc b/src/heap-checker.cc
index dc9c46d..4e8e2dc 100644
--- a/src/heap-checker.cc
+++ b/src/heap-checker.cc
@@ -468,6 +468,18 @@ static bool RecordGlobalDataLocked(uint64 start_address,
if (inode == 0)
return true;
+ // Sometimes people mmap their own files read-write. That would cause
+ // the strict ELF checker later to reject them. We do not want to loosen
+ // up the ELF checker, because we need to catch freaky files if they
+ // show up. So, make an exception for common files that we have seen.
+ //
+ // TODO(mec): the longer this gets, the more attractive it is to
+ // check for the ELF header and just accept all non-ELF files.
+ if (inode != 0) {
+ if (filename && strcmp(filename, "/dev/zero") == 0)
+ return true;
+ }
+
// Grab some ELF types.
#ifdef _LP64
typedef Elf64_Ehdr ElfFileHeader;
@@ -692,8 +704,15 @@ HeapLeakChecker::UseProcMaps(ProcMapsTask proc_maps_task) {
"Looking at /proc/self/maps line:\n %s\n",
proc_map_line);
- if (start_address >= end_address)
- abort();
+ if (start_address >= end_address) {
+ // Crash if a line we can be interested in is ill-formed:
+ if (inode != 0) abort();
+ // Skip other ill-formed lines: some are possible
+ // probably due to the interplay of how /proc/self/maps is updated
+ // while we read it in chunks in ProcMapsIterator and
+ // do things in this loop.
+ continue;
+ }
// Determine if any shared libraries are present.
if (inode != 0 && strstr(filename, "lib") && strstr(filename, ".so")) {
@@ -738,6 +757,14 @@ static int64 live_bytes_total = 0;
// (protected by our lock; IgnoreAllLiveObjectsLocked sets it)
static pid_t self_thread_pid = 0;
+// Status of our thread listing callback execution
+// (protected by our lock; used from within IgnoreAllLiveObjectsLocked)
+static enum {
+ CALLBACK_NOT_STARTED,
+ CALLBACK_STARTED,
+ CALLBACK_COMPLETED,
+} thread_listing_status = CALLBACK_NOT_STARTED;
+
// Ideally to avoid deadlocks this function should not result in any libc
// or other function calls that might need to lock a mutex:
// It is called when all threads of a process are stopped
@@ -774,6 +801,7 @@ int HeapLeakChecker::IgnoreLiveThreads(void* parameter,
int num_threads,
pid_t* thread_pids,
va_list ap) {
+ thread_listing_status = CALLBACK_STARTED;
if (HeapProfiler::kMaxLogging) {
HeapProfiler::MESSAGE(2, "HeapChecker: Found %d threads (from pid %d)\n",
num_threads, getpid());
@@ -838,6 +866,7 @@ int HeapLeakChecker::IgnoreLiveThreads(void* parameter,
IgnoreNonThreadLiveObjectsLocked();
// Can now resume the threads:
ResumeAllProcessThreads(num_threads, thread_pids);
+ thread_listing_status = CALLBACK_COMPLETED;
return failures;
}
@@ -928,7 +957,8 @@ IgnoreAllLiveObjectsLocked(const StackExtent& self_stack) {
UseProcMaps(RECORD_GLOBAL_DATA_LOCKED);
}
// Ignore all thread stacks:
- bool executed_with_threads_stopped = false;
+ thread_listing_status = CALLBACK_NOT_STARTED;
+ bool need_to_ignore_non_thread_objects = true;
self_thread_pid = getpid();
self_thread_stack = self_stack;
if (FLAGS_heap_check_ignore_thread_live) {
@@ -939,10 +969,22 @@ IgnoreAllLiveObjectsLocked(const StackExtent& self_stack) {
// if not suspended they could still mess with the pointer
// graph while we walk it).
int r = ListAllProcessThreads(NULL, IgnoreLiveThreads);
- executed_with_threads_stopped = (r >= 0);
- if (r == -1) {
- HeapProfiler::MESSAGE(0, "HeapChecker: Could not find thread stacks; "
- "may get false leak reports\n");
+ need_to_ignore_non_thread_objects = r < 0;
+ if (r < 0) {
+ HeapProfiler::MESSAGE(0, "HeapChecker: thread finding failed "
+ "with %d errno=%d\n", r, errno);
+ if (thread_listing_status == CALLBACK_COMPLETED) {
+ HeapProfiler::MESSAGE(0, "HeapChecker: thread finding callback "
+ "finished ok; hopefully everything is fine\n");
+ need_to_ignore_non_thread_objects = false;
+ } else if (thread_listing_status == CALLBACK_STARTED) {
+ HeapProfiler::MESSAGE(0, "HeapChecker: thread finding callback was "
+ "interrupted or crashed; can't fix this\n");
+ abort();
+ } else { // CALLBACK_NOT_STARTED
+ HeapProfiler::MESSAGE(0, "HeapChecker: Could not find thread stacks; "
+ "may get false leak reports\n");
+ }
} else if (r != 0) {
HeapProfiler::MESSAGE(0, "HeapChecker: Thread stacks not found "
"for %d threads; may get false leak reports\n",
@@ -960,7 +1002,7 @@ IgnoreAllLiveObjectsLocked(const StackExtent& self_stack) {
}
// Do all other live data ignoring here if we did not do it
// within thread listing callback with all threads stopped.
- if (!executed_with_threads_stopped) IgnoreNonThreadLiveObjectsLocked();
+ if (need_to_ignore_non_thread_objects) IgnoreNonThreadLiveObjectsLocked();
if (live_objects_total) {
HeapProfiler::MESSAGE(0, "HeapChecker: "
"Ignoring "LLD" reachable "
@@ -1349,10 +1391,13 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
(same_heap ? (inuse_bytes_increase_ != 0 || inuse_allocs_increase_ != 0)
: (inuse_bytes_increase_ > 0 || inuse_allocs_increase_ > 0));
if (see_leaks || do_full) {
+ bool pprof_can_ignore = false;
+ const char* command_tail = " --text 2>/dev/null"; // normal command
const char* gv_command_tail
= " --edgefraction=1e-10 --nodefraction=1e-10 --gv 2>/dev/null";
string ignore_re;
if (disabled_regexp) {
+ pprof_can_ignore = true;
ignore_re += " --ignore='^";
ignore_re += *disabled_regexp;
ignore_re += "$'";
@@ -1361,22 +1406,29 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
// some STLs can give us spurious leak alerts (since the STL tries to
// do its own memory pooling), so we avoid it by using STL as little
// as possible for "big" objects that might require "lots" of memory.
- char command[6 * PATH_MAX + 200];
+ char base_command[6 * PATH_MAX + 200];
+ char beg_profile[PATH_MAX+1], end_profile[PATH_MAX+1];
if (use_initial_profile) {
+ snprintf(beg_profile, sizeof(beg_profile), "%s.%s-beg.heap",
+ profile_prefix->c_str(), name_);
// compare against initial profile only if need to
const char* drop_negative = same_heap ? "" : " --drop_negative";
- snprintf(command, sizeof(command), "%s --base=\"%s.%s-beg.heap\" %s ",
- pprof_path(), profile_prefix->c_str(), name_,
- drop_negative);
+ snprintf(base_command, sizeof(base_command),
+ "%s --base=\"%s\" %s ",
+ pprof_path(), beg_profile, drop_negative);
} else {
- snprintf(command, sizeof(command), "%s",
+ beg_profile[0] = '\0';
+ snprintf(base_command, sizeof(base_command), "%s",
pprof_path());
}
- snprintf(command + strlen(command), sizeof(command) - strlen(command),
- " %s \"%s.%s-end.heap\" %s --inuse_objects --lines",
- invocation_path(), profile_prefix->c_str(),
- name_, ignore_re.c_str());
+ snprintf(end_profile, sizeof(end_profile), "%s.%s-end.heap",
+ profile_prefix->c_str(), name_);
+ snprintf(base_command + strlen(base_command),
+ sizeof(base_command) - strlen(base_command),
+ " %s \"%s\" %s --inuse_objects --lines",
+ invocation_path(), end_profile, ignore_re.c_str());
// --lines is important here to catch leaks when !see_leaks
+
char cwd[PATH_MAX+1];
if (getcwd(cwd, sizeof(cwd)) != cwd) abort();
if (see_leaks) {
@@ -1390,7 +1442,7 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
"To investigate leaks manually use e.g.\n"
"cd %s; " // for proper symbol resolution
"%s%s\n\n",
- cwd, command, gv_command_tail);
+ cwd, base_command, gv_command_tail);
}
string output;
int checked_leaks = 0;
@@ -1403,14 +1455,18 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
} else {
// We don't care about pprof's stderr as long as it
// succeeds with empty report:
- checked_leaks = GetStatusOutput(command, &output);
+ char full_command[6 * PATH_MAX + 200]; // needed to concatenate
+ snprintf(full_command, sizeof(full_command), "%s%s",
+ base_command, command_tail);
+ checked_leaks = GetStatusOutput(full_command, &output);
if (checked_leaks != 0) {
HeapProfiler::MESSAGE(-1, "ERROR: Could not run pprof at %s\n",
pprof_path());
abort();
}
}
- if (see_leaks && output.empty() && checked_leaks == 0) {
+ if (see_leaks && pprof_can_ignore &&
+ output.empty() && checked_leaks == 0) {
HeapProfiler::MESSAGE(-1, "HeapChecker: "
"These must be leaks that we disabled"
" (pprof succeeded)! This check WILL FAIL"
@@ -1420,7 +1476,24 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
// do not fail the check just due to us being a stripped binary
if (!see_leaks && strstr(output.c_str(), "nm: ") != NULL &&
strstr(output.c_str(), ": no symbols") != NULL) output.resize(0);
- if (!(see_leaks || checked_leaks == 0)) abort();
+ }
+ // Make sure the profiles we created are still there.
+ // They can get deleted e.g. if the program forks/executes itself
+ // and FLAGS_cleanup_old_heap_profiles was kept as true.
+ if (access(end_profile, R_OK) != 0 ||
+ (beg_profile[0] && access(beg_profile, R_OK) != 0)) {
+ HeapProfiler::MESSAGE(-1, "HeapChecker: "
+ "One of the heap profiles is gone: %s %s\n",
+ beg_profile, end_profile);
+ abort();
+ }
+ if (!(see_leaks || checked_leaks == 0)) {
+ // Crash if something went wrong with executing pprof
+ // and we rely on pprof to do its work:
+ HeapProfiler::MESSAGE(-1, "HeapChecker: "
+ "pprof command failed: %s%s\n",
+ base_command, command_tail);
+ abort();
}
if (see_leaks && use_initial_profile) {
HeapProfiler::MESSAGE(-1, "HeapChecker: "
@@ -1438,7 +1511,7 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
"To investigate leaks manually uge e.g.\n"
"cd %s; " // for proper symbol resolution
"%s%s\n\n",
- name_, cwd, command, gv_command_tail);
+ name_, cwd, base_command, gv_command_tail);
if (use_initial_profile) {
HeapProfiler::MESSAGE(-1, "HeapChecker: "
"CAVEAT: Some of the reported leaks might have "
@@ -1491,6 +1564,10 @@ HeapLeakChecker::~HeapLeakChecker() {
// HeapLeakChecker overall heap check components
//----------------------------------------------------------------------
+bool HeapLeakChecker::IsActive() {
+ return heap_checker_on;
+}
+
vector<HeapCleaner::void_function>* HeapCleaner::heap_cleanups_ = NULL;
// When a HeapCleaner object is intialized, add its function to the static list
@@ -1653,7 +1730,7 @@ void HeapLeakChecker::DoMainHeapCheck() {
HeapProfiler::MESSAGE(0, "HeapChecker: "
"Checking for whole-program memory leaks\n");
if (!main_heap_checker->DoNoLeaks(same_heap, do_full, do_report)) {
- HeapProfiler::MESSAGE(-1, "ERROR: Leaks found in main heap check, aborting\n");
+ HeapProfiler::MESSAGE(-1, "HeapChecker: crashing because of leaks\n");
abort();
}
delete main_heap_checker;
diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc
index 0260a34..686b4bc 100644
--- a/src/malloc_extension.cc
+++ b/src/malloc_extension.cc
@@ -166,6 +166,21 @@ struct StackTraceHash {
}
return h;
}
+ // Less operator for MSVC's hash containers.
+ bool operator()(void** entry1, void** entry2) const {
+ if (Depth(entry1) != Depth(entry2))
+ return Depth(entry1) < Depth(entry2);
+ for (int i = 0; i < Depth(entry1); i++) {
+ if (PC(entry1, i) != PC(entry2, i)) {
+ return PC(entry1, i) < PC(entry2, i);
+ }
+ }
+ return false; // entries are equal
+ }
+ // These two public members are required by msvc. 4 and 8 are the
+ // default values.
+ static const size_t bucket_size = 4;
+ static const size_t min_buckets = 8;
};
struct StackTraceEqual {
diff --git a/src/malloc_hook.cc b/src/malloc_hook.cc
index 8499c73..613e612 100644
--- a/src/malloc_hook.cc
+++ b/src/malloc_hook.cc
@@ -115,7 +115,7 @@ extern "C" void* mmap64(void *start, size_t length,
int fd, __off64_t offset) __THROW {
void *result;
- result = syscall(SYS_mmap, start, length, prot, flags, fd, offset);
+ result = (void *)syscall(SYS_mmap, start, length, prot, flags, fd, offset);
MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset);
return result;
}
diff --git a/src/pprof b/src/pprof
index 5df1798..24b5b74 100755
--- a/src/pprof
+++ b/src/pprof
@@ -41,6 +41,9 @@
# Examples:
#
# % tools/pprof "program" "profile"
+# Enters "interactive" mode
+#
+# % tools/pprof --text "program" "profile"
# Generates one line per procedure
#
# % tools/pprof --gv "program" "profile"
@@ -68,6 +71,8 @@
use strict;
use Getopt::Long;
+my $PPROF_VERSION = "0.8";
+
# These are the object tools we use, which come from various sources.
# We want to invoke them directly, rather than via users' aliases and/or
# search paths, because some people have colorizing versions of them that
@@ -79,9 +84,22 @@ my %obj_tool_map = (
"objdump" => "objdump",
"nm" => "nm",
"addr2line" => "addr2line",
+ "c++filt" => "c++filt",
);
my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local
my $GV = "gv";
+# These are used for dynamic profiles
+my $WGET = "wget";
+my $CURL = "curl";
+
+# These are the web pages that servers need to support for dynamic profiles
+my $HEAP_PAGE = "/pprof/heap";
+my $PROFILE_PAGE = "/pprof/profile"; # must support cgi-param "?seconds=#"
+my $GROWTH_PAGE = "/pprof/growth";
+my $CONTENTION_PAGE = "/pprof/contention";
+my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST
+my $PROGRAM_NAME_PAGE = "/pprof/cmdline";
+
# There is a pervasive dependency on the length (in hex characters, i.e.,
# nibbles) of an address, distinguishing between 32-bit and 64-bit profiles:
@@ -90,23 +108,40 @@ my $address_length = 8; # Hope for 32-bit, reset if 64-bit detected.
##### Argument parsing #####
sub usage_string {
- return <<'EOF';
-Usage: pprof [options] <program> <profile> ...
- Prints specified cpu- or heap-profile
-
+ return <<EOF;
+Usage:
+pprof [options] <program> <profiles>
+ <profiles> is a space separated list of profile names.
+pprof [options] <profile>
+ <profile> is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE
+
+ Each profile name can be:
+ /path/to/profile - a path to a profile file
+ host:port[/<service>] - a location of a service to get profile from
+
+ The /<service> can be $HEAP_PAGE, $PROFILE_PAGE, $GROWTH_PAGE, or $CONTENTION_PAGE.
+ For instance: "pprof http://myserver.com:80$HEAP_PAGE".
+ If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling).
+
+ For more help with querying remote servers, including how to add the
+ necessary server-side support code, see this filename (or one like it):
+
+ /usr/doc/google-perftools-$PPROF_VERSION/pprof_remote_servers.html
+
Options:
--cum Sort by cumulative data
--base=<base> Subtract <base> from <profile> before display
- --interactive Run in interactive mode (interactive "help" gives help)
-
+ --interactive Run in interactive mode (interactive "help" gives help) [default]
+ --seconds=<n> Length of time for dynamic profiles [default=30 secs]
+
Reporting Granularity:
--addresses Report at address level
--lines Report at source line level
--functions Report at function level [default]
--files Report at source file level
-
+
Output type:
- --text Generate text report [default]
+ --text Generate text report
--gv Generate Postscript and display
--list=<regexp> Generate source listing of matching routines
--disasm=<regexp> Generate disassembly of matching routines
@@ -114,7 +149,7 @@ Output type:
--ps Generate Postcript to stdout
--pdf Generate PDF to stdout
--gif Generate GIF to stdout
-
+
Heap-Profile Options:
--inuse_space Display in-use (mega)bytes [default]
--inuse_objects Display in-use objects
@@ -122,7 +157,12 @@ Heap-Profile Options:
--alloc_objects Display allocated objects
--show_bytes Display space in bytes
--drop_negative Ignore negative differences
-
+
+Contention-profile options:
+ --total_delay Display total delay at each region [default]
+ --contentions Display number of delays at each region
+ --mean_delay Display mean delay at each region
+
Call-graph Options:
--nodecount=<n> Show at most so many nodes [default=80]
--nodefraction=<f> Hide nodes below <f>*total [default=.005]
@@ -130,7 +170,7 @@ Call-graph Options:
--focus=<regexp> Focus on nodes matching <regexp>
--ignore=<regexp> Ignore nodes matching <regexp>
--scale=<n> Set GV scaling [default=0]
-
+
Miscellaneous:
--tools=<prefix> Prefix for object tool pathnames
--test Run unit tests
@@ -138,7 +178,7 @@ Miscellaneous:
--version Version information
Examples:
-
+
pprof /bin/ls ls.prof
Outputs one line per procedure
pprof --gv /bin/ls ls.prof
@@ -151,12 +191,14 @@ pprof --list=getdir /bin/ls ls.prof
(Per-line) annotated source listing for getdir()
pprof --disasm=getdir /bin/ls ls.prof
(Per-PC) annotated disassembly for getdir()
+pprof localhost:1234
+ Outputs one line per procedure for localhost:1234
EOF
}
sub version_string {
- return <<'EOF'
-pprof (part of google-perftools 0.7)
+ return <<EOF
+pprof (part of google-perftools $PPROF_VERSION)
Copyright 1998-2006 Google Inc.
@@ -175,301 +217,387 @@ sub usage {
exit(1);
}
+sub Init() {
+ # Setup tmp-file name and handler to clean it up.
+ # We do this in the very beginning so that we can use
+ # error() and cleanup() function anytime here after.
+ $main::tmpfile_sym = "/tmp/pprof$$.sym";
+ $main::tmpfile_ps = "/tmp/pprof$$";
+ $main::next_tmpfile = 0;
+ $SIG{'INT'} = \&sighandler;
-$main::opt_help = 0;
-$main::opt_version = 0;
-
-$main::opt_cum = 0;
-$main::opt_base = '';
-$main::opt_addresses = 0;
-$main::opt_lines = 0;
-$main::opt_functions = 0;
-$main::opt_files = 0;
-
-$main::opt_text = 0;
-$main::opt_list = "";
-$main::opt_disasm = "";
-$main::opt_gv = 0;
-$main::opt_dot = 0;
-$main::opt_ps = 0;
-$main::opt_pdf = 0;
-$main::opt_gif = 0;
-
-$main::opt_nodecount = 80;
-$main::opt_nodefraction = 0.005;
-$main::opt_edgefraction = 0.001;
-$main::opt_focus = '';
-$main::opt_ignore = '';
-$main::opt_scale = 0;
-
-$main::opt_inuse_space = 0;
-$main::opt_inuse_objects = 0;
-$main::opt_alloc_space = 0;
-$main::opt_alloc_objects = 0;
-$main::opt_show_bytes = 0;
-$main::opt_drop_negative = 0;
-$main::opt_interactive = 0;
-
-$main::opt_tools = "";
-$main::opt_debug = 0;
-$main::opt_test = 0;
-
-# Are we printing a heap profile?
-$main::heap_profile = 0;
-
-# Are we printing a lock profile?
-$main::lock_profile = 0;
-
-GetOptions("help!" => \$main::opt_help,
- "version!" => \$main::opt_version,
- "cum!" => \$main::opt_cum,
- "base=s" => \$main::opt_base,
- "functions!" => \$main::opt_functions,
- "lines!" => \$main::opt_lines,
- "addresses!" => \$main::opt_addresses,
- "files!" => \$main::opt_files,
- "text!" => \$main::opt_text,
- "list=s" => \$main::opt_list,
- "disasm=s" => \$main::opt_disasm,
- "gv!" => \$main::opt_gv,
- "dot!" => \$main::opt_dot,
- "ps!" => \$main::opt_ps,
- "pdf!" => \$main::opt_pdf,
- "gif!" => \$main::opt_gif,
- "interactive!" => \$main::opt_interactive,
- "nodecount=i" => \$main::opt_nodecount,
- "nodefraction=f" => \$main::opt_nodefraction,
- "edgefraction=f" => \$main::opt_edgefraction,
- "focus=s" => \$main::opt_focus,
- "ignore=s" => \$main::opt_ignore,
- "scale=i" => \$main::opt_scale,
- "inuse_space!" => \$main::opt_inuse_space,
- "inuse_objects!" => \$main::opt_inuse_objects,
- "alloc_space!" => \$main::opt_alloc_space,
- "alloc_objects!" => \$main::opt_alloc_objects,
- "show_bytes!" => \$main::opt_show_bytes,
- "drop_negative!" => \$main::opt_drop_negative,
- "tools=s" => \$main::opt_tools,
- "test!" => \$main::opt_test,
- "debug!" => \$main::opt_debug,
- ) || usage("Invalid option(s)");
-
-# Deal with the standard --help and --version
-if ($main::opt_help) {
- print usage_string();
- exit(0);
-}
-if ($main::opt_version) {
- print version_string();
- exit(0);
-}
+ $main::opt_help = 0;
+ $main::opt_version = 0;
-# Disassembly/listing mode requires address-level info
-if ($main::opt_disasm || $main::opt_list) {
- $main::opt_functions = 0;
+ $main::opt_cum = 0;
+ $main::opt_base = '';
+ $main::opt_addresses = 0;
$main::opt_lines = 0;
- $main::opt_addresses = 1;
+ $main::opt_functions = 0;
$main::opt_files = 0;
-}
-
-# Check heap-profiling flags
-if ($main::opt_inuse_space +
- $main::opt_inuse_objects +
- $main::opt_alloc_space +
- $main::opt_alloc_objects > 1) {
- usage("Specify at most on of --inuse/--alloc options");
-}
-# Check output granularities
-my $grains =
- $main::opt_functions +
- $main::opt_lines +
- $main::opt_addresses +
- $main::opt_files +
- 0;
-if ($grains > 1) {
- usage("Only specify one output granularity option");
-}
-if ($grains == 0) {
- $main::opt_functions = 1;
-}
+ $main::opt_text = 0;
+ $main::opt_list = "";
+ $main::opt_disasm = "";
+ $main::opt_gv = 0;
+ $main::opt_dot = 0;
+ $main::opt_ps = 0;
+ $main::opt_pdf = 0;
+ $main::opt_gif = 0;
+
+ $main::opt_nodecount = 80;
+ $main::opt_nodefraction = 0.005;
+ $main::opt_edgefraction = 0.001;
+ $main::opt_focus = '';
+ $main::opt_ignore = '';
+ $main::opt_scale = 0;
+ $main::opt_seconds = 30;
+
+ $main::opt_inuse_space = 0;
+ $main::opt_inuse_objects = 0;
+ $main::opt_alloc_space = 0;
+ $main::opt_alloc_objects = 0;
+ $main::opt_show_bytes = 0;
+ $main::opt_drop_negative = 0;
+ $main::opt_interactive = 0;
+
+ $main::opt_total_delay = 0;
+ $main::opt_contentions = 0;
+ $main::opt_mean_delay = 0;
+
+ $main::opt_tools = "";
+ $main::opt_debug = 0;
+ $main::opt_test = 0;
+
+ # Are we using $SYMBOL_PAGE?
+ $main::use_symbol_page = 0;
+
+ # Are we printing a heap profile?
+ $main::heap_profile = 0;
-# Check output modes
-my $modes =
- $main::opt_text +
- $main::opt_gv +
- $main::opt_dot +
- $main::opt_ps +
- $main::opt_pdf +
- $main::opt_gif +
- 0;
-if ($modes > 1) {
- usage("Only specify one output mode");
-}
-if ($modes == 0) {
- $main::opt_text = 1;
-}
+ # Are we printing a lock profile?
+ $main::lock_profile = 0;
-if ($main::opt_test) {
- RunUnitTests();
- # Should not return
- exit(1);
-}
+ GetOptions("help!" => \$main::opt_help,
+ "version!" => \$main::opt_version,
+ "cum!" => \$main::opt_cum,
+ "base=s" => \$main::opt_base,
+ "seconds=i" => \$main::opt_seconds,
+ "functions!" => \$main::opt_functions,
+ "lines!" => \$main::opt_lines,
+ "addresses!" => \$main::opt_addresses,
+ "files!" => \$main::opt_files,
+ "text!" => \$main::opt_text,
+ "list=s" => \$main::opt_list,
+ "disasm=s" => \$main::opt_disasm,
+ "gv!" => \$main::opt_gv,
+ "dot!" => \$main::opt_dot,
+ "ps!" => \$main::opt_ps,
+ "pdf!" => \$main::opt_pdf,
+ "gif!" => \$main::opt_gif,
+ "interactive!" => \$main::opt_interactive,
+ "nodecount=i" => \$main::opt_nodecount,
+ "nodefraction=f" => \$main::opt_nodefraction,
+ "edgefraction=f" => \$main::opt_edgefraction,
+ "focus=s" => \$main::opt_focus,
+ "ignore=s" => \$main::opt_ignore,
+ "scale=i" => \$main::opt_scale,
+ "inuse_space!" => \$main::opt_inuse_space,
+ "inuse_objects!" => \$main::opt_inuse_objects,
+ "alloc_space!" => \$main::opt_alloc_space,
+ "alloc_objects!" => \$main::opt_alloc_objects,
+ "show_bytes!" => \$main::opt_show_bytes,
+ "drop_negative!" => \$main::opt_drop_negative,
+ "total_delay!" => \$main::opt_total_delay,
+ "contentions!" => \$main::opt_contentions,
+ "mean_delay!" => \$main::opt_mean_delay,
+ "tools=s" => \$main::opt_tools,
+ "test!" => \$main::opt_test,
+ "debug!" => \$main::opt_debug,
+ ) || usage("Invalid option(s)");
+
+ # Deal with the standard --help and --version
+ if ($main::opt_help) {
+ print usage_string();
+ exit(0);
+ }
-# Binary name and profile arguments list
-$main::prog = "";
-@main::pfile_args = ();
+ if ($main::opt_version) {
+ print version_string();
+ exit(0);
+ }
-$main::prog = shift || usage("Did not specify program");
-scalar(@ARGV) || usage("Did not specify profile file");
+ # Disassembly/listing mode requires address-level info
+ if ($main::opt_disasm || $main::opt_list) {
+ $main::opt_functions = 0;
+ $main::opt_lines = 0;
+ $main::opt_addresses = 1;
+ $main::opt_files = 0;
+ }
+
+ # Check heap-profiling flags
+ if ($main::opt_inuse_space +
+ $main::opt_inuse_objects +
+ $main::opt_alloc_space +
+ $main::opt_alloc_objects > 1) {
+ usage("Specify at most on of --inuse/--alloc options");
+ }
+
+ # Check output granularities
+ my $grains =
+ $main::opt_functions +
+ $main::opt_lines +
+ $main::opt_addresses +
+ $main::opt_files +
+ 0;
+ if ($grains > 1) {
+ usage("Only specify one output granularity option");
+ }
+ if ($grains == 0) {
+ $main::opt_functions = 1;
+ }
+
+ # Check output modes
+ my $modes =
+ $main::opt_text +
+ $main::opt_gv +
+ $main::opt_dot +
+ $main::opt_ps +
+ $main::opt_pdf +
+ $main::opt_gif +
+ $main::opt_interactive +
+ 0;
+ if ($modes > 1) {
+ usage("Only specify one output mode");
+ }
+ if ($modes == 0) {
+ if (-t STDOUT) { # If STDOUT is a tty, activate interactive mode
+ $main::opt_interactive = 1;
+ } else {
+ $main::opt_text = 1;
+ }
+ }
-# Parse profile file/location arguments
-foreach my $farg (@ARGV) {
- unshift(@main::pfile_args, $farg);
-}
-ConfigureObjTools($main::prog);
+ if ($main::opt_test) {
+ RunUnitTests();
+ # Should not return
+ exit(1);
+ }
-##### Main section #####
+ # Binary name and profile arguments list
+ $main::prog = "";
+ @main::pfile_args = ();
-# Setup tmp-file name and handler to clean it up
-$main::tmpfile_sym = "/tmp/pprof$$.sym";
-$main::tmpfile_ps = "/tmp/pprof$$";
-$main::next_tmpfile = 0;
-$main::collected_profile = undef;
-@main::profile_files = ();
-#$main::op_time = time();
-$SIG{'INT'} = \&sighandler;
+ # Remote profiling without a binary (using $SYMBOL_PAGE instead)
+ if (IsProfileURL($ARGV[0])) {
+ $main::use_symbol_page = 1;
+ }
-# Fetch all profile data
-FetchDynamicProfiles();
+ if ($main::use_symbol_page) { # We don't need a binary!
+ my %disabled = ('--lines' => $main::opt_lines,
+ '--disasm' => $main::opt_disasm);
+ for my $option (keys %disabled) {
+ usage("$option cannot be used without a binary") if $disabled{$option};
+ }
+ # Set $main::prog later...
+ scalar(@ARGV) || usage("Did not specify profile file");
+ } else {
+ $main::prog = shift(@ARGV) || usage("Did not specify program");
+ scalar(@ARGV) || usage("Did not specify profile file");
+ }
-# Read one profile, pick the last item on the list
-my $data = ReadProfile($main::prog, pop(@main::profile_files));
-my $profile = $data->{profile};
-my $libs = $data->{libs}; # Info about main program and shared libraries
+ # Parse profile file/location arguments
+ foreach my $farg (@ARGV) {
+ if ($farg =~ m/(.*)\@([0-9]+)/ ) {
+ my $machine = $1;
+ my $num_machines = $2;
+ for (my $i = 0; $i < $num_machines; $i++) {
+ unshift(@main::pfile_args, "$i.$machine");
+ }
+ } else {
+ unshift(@main::pfile_args, $farg);
+ }
+ }
-# List of function names to skip
-$main::skip = ();
-$main::skip_regexp = 'NOMATCH';
-if ($main::heap_profile) {
- foreach my $name ('calloc',
- 'cfree',
- 'malloc',
- 'free',
- 'memalign',
- 'pvalloc',
- 'valloc',
- 'realloc',
- 'do_malloc',
- 'DoSampledAllocation',
- '__builtin_delete',
- '__builtin_new',
- '__builtin_vec_delete',
- '__builtin_vec_new') {
- $main::skip{$name} = 1;
+ if ($main::use_symbol_page) {
+ unless (IsProfileURL($main::pfile_args[0])) {
+ error("The first profile should be a remote form to use $SYMBOL_PAGE\n");
+ }
+ CheckSymbolPage();
+ $main::prog = FetchProgramName();
+ } else {
+ ConfigureObjTools($main::prog)
}
- $main::skip_regexp = "TCMalloc";
}
-if ($main::lock_profile) {
- foreach my $vname ('Mutex::Unlock', 'Mutex::UnlockSlow') {
- $main::skip{$vname} = 1;
+
+sub Main() {
+ Init();
+ $main::collected_profile = undef;
+ @main::profile_files = ();
+ $main::op_time = time();
+
+ # Fetch all profile data
+ FetchDynamicProfiles();
+
+ # Read one profile, pick the last item on the list
+ my $data = ReadProfile($main::prog, pop(@main::profile_files));
+ my $profile = $data->{profile};
+ my $libs = $data->{libs}; # Info about main program and shared libraries
+
+ # List of function names to skip
+ $main::skip = ();
+ $main::skip_regexp = 'NOMATCH';
+ if ($main::heap_profile) {
+ foreach my $name ('calloc',
+ 'cfree',
+ 'malloc',
+ 'free',
+ 'memalign',
+ 'pvalloc',
+ 'valloc',
+ 'realloc',
+ 'do_malloc',
+ 'DoSampledAllocation',
+ 'simple_alloc::allocate',
+ '__malloc_alloc_template::allocate',
+ '__builtin_delete',
+ '__builtin_new',
+ '__builtin_vec_delete',
+ '__builtin_vec_new') {
+ $main::skip{$name} = 1;
+ }
+ $main::skip_regexp = "TCMalloc";
+ }
+ if ($main::lock_profile) {
+ foreach my $vname ('Mutex::Unlock', 'Mutex::UnlockSlow') {
+ $main::skip{$vname} = 1;
+ }
}
-}
-# Add additional profiles, if available.
-if (scalar(@main::profile_files) > 0) {
- foreach my $pname (@main::profile_files) {
- my $p = ReadProfile($main::prog, $pname)->{profile};
- $profile = AddProfile($profile, $p);
+ # Add additional profiles, if available.
+ if (scalar(@main::profile_files) > 0) {
+ foreach my $pname (@main::profile_files) {
+ my $p = ReadProfile($main::prog, $pname)->{profile};
+ $profile = AddProfile($profile, $p);
+ }
}
-}
-# Subtract base from profile, if specified
-if ($main::opt_base ne '') {
- my $base = ReadProfile($main::prog, $main::opt_base)->{profile};
- $profile = SubtractProfile($profile, $base);
-}
+ # Subtract base from profile, if specified
+ if ($main::opt_base ne '') {
+ my $base = ReadProfile($main::prog, $main::opt_base)->{profile};
+ $profile = SubtractProfile($profile, $base);
+ }
-# Get total data in profile
-my $total = TotalProfile($profile);
+ # Get total data in profile
+ my $total = TotalProfile($profile);
-# Extract symbols
-my $symbols = ExtractSymbols($libs, $profile, $data->{pcs});
+ # Collect symbols
+ my $symbols = undef;
+ if ($main::use_symbol_page) {
+ $symbols = FetchSymbols($data->{pcs});
+ } else {
+ $symbols = ExtractSymbols($libs, $profile, $data->{pcs});
+ }
-# Focus?
-if ($main::opt_focus ne '') {
- $profile = FocusProfile($symbols, $profile, $main::opt_focus);
-}
+ # Focus?
+ if ($main::opt_focus ne '') {
+ $profile = FocusProfile($symbols, $profile, $main::opt_focus);
+ }
-# Ignore?
-if ($main::opt_ignore ne '') {
- $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore);
-}
+ # Ignore?
+ if ($main::opt_ignore ne '') {
+ $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore);
+ }
-# Reduce profiles to required output granularity, and also clean
-# each stack trace so a given entry exists at most once.
-my $reduced = ReduceProfile($symbols, $profile);
+ # Reduce profiles to required output granularity, and also clean
+ # each stack trace so a given entry exists at most once.
+ my $reduced = ReduceProfile($symbols, $profile);
-# Get derived profiles
-my $flat = FlatProfile($reduced);
-my $cumulative = CumulativeProfile($reduced);
+ # Get derived profiles
+ my $flat = FlatProfile($reduced);
+ my $cumulative = CumulativeProfile($reduced);
-# Print
-if (!$main::opt_interactive) {
- if ($main::opt_disasm) {
- PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm);
- } elsif ($main::opt_list) {
- PrintListing($libs, $flat, $cumulative, $main::opt_list);
- } elsif ($main::opt_text) {
- PrintText($symbols, $flat, $cumulative, $total, -1);
- } else {
- if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
- if ($main::opt_gv) {
- if (!system("$GV --version >/dev/null 2>&1")) {
- # Options using double dash are supported by this gv version.
- system("$GV --scale=$main::opt_scale " .
- PsTempName($main::next_tmpfile));
- } else {
- # Old gv version - only supports options that use single dash.
- system("$GV -scale $main::opt_scale " .
- PsTempName($main::next_tmpfile));
+ # Print
+ if (!$main::opt_interactive) {
+ if ($main::opt_disasm) {
+ PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm);
+ } elsif ($main::opt_list) {
+ PrintListing($libs, $flat, $cumulative, $main::opt_list);
+ } elsif ($main::opt_text) {
+ PrintText($symbols, $flat, $cumulative, $total, -1);
+ } else {
+ if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
+ if ($main::opt_gv) {
+ if (!system("$GV --version >/dev/null 2>&1")) {
+ # Options using double dash are supported by this gv version.
+ system("$GV --scale=$main::opt_scale " .
+ PsTempName($main::next_tmpfile));
+ } else {
+ # Old gv version - only supports options that use single dash.
+ system("$GV -scale $main::opt_scale " .
+ PsTempName($main::next_tmpfile));
+ }
}
+ } else {
+ exit(1);
}
- } else {
- exit(1);
}
+ } else {
+ InteractiveMode($profile, $symbols, $libs, $total);
}
-} else {
- InteractiveMode();
+
+ cleanup();
+ exit(0);
}
-cleanup();
-exit(0);
+##### Entry Point #####
+Main();
+
+# Temporary code to detect if we're running on a Goobuntu system.
+# These systems don't have the right stuff installed for the special
+# Readline libraries to work, so as a temporary workaround, we default
+# to using the normal stdio code, rather than the fancier readline-based
+# code
+sub ReadlineMightFail {
+ if (-e '/lib/libtermcap.so.2') {
+ return 0; # libtermcap exists, so readline should be okay
+ } else {
+ return 1;
+ }
+}
##### Interactive helper routines #####
sub InteractiveMode {
- $| = 1; # Make output unbuffered for interactive mode
- my $orig_profile = $profile;
+ $| = 1; # Make output unbuffered for interactive mode
+ my ($orig_profile, $symbols, $libs, $total) = @_;
# Use ReadLine if it's installed.
- if ( defined(eval {require Term::ReadLine}) ) {
+ if ( !ReadlineMightFail() &&
+ defined(eval {require Term::ReadLine}) ) {
my $term = new Term::ReadLine 'pprof';
while ( defined ($_ = $term->readline('(pprof) '))) {
$term->addhistory($_) if /\S/;
- if (!InteractiveCommand($orig_profile, $_)) {
- last; # exit when we get an interactive command to quit
+ if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) {
+ last; # exit when we get an interactive command to quit
}
}
} else { # don't have readline
while (1) {
print "(pprof) ";
$_ = <STDIN>;
- if (!InteractiveCommand($orig_profile, $_)) {
- last; # exit when we get an interactive command to quit
+
+ # Save some flags that might be reset by InteractiveCommand()
+ my $save_opt_lines = $main::opt_lines;
+
+ if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) {
+ last; # exit when we get an interactive command to quit
}
+
+ # Restore flags
+ $main::opt_lines = $save_opt_lines;
}
}
}
@@ -477,7 +605,7 @@ sub InteractiveMode {
# Takes two args: orig profile, and command to run.
# Returns 1 if we should keep going, or 0 if we were asked to quit
sub InteractiveCommand {
- my($orig_profile, $command) = @_;
+ my($orig_profile, $symbols, $libs, $total, $command) = @_;
$_ = $command; # just to make future m//'s easier
if (!defined($_)) {
print "\n";
@@ -490,8 +618,7 @@ sub InteractiveCommand {
InteractiveHelpMessage();
return 1;
}
- # Clear all the options
- $main::opt_lines = 0;
+ # Clear all the mode options -- mode is controlled by "$command"
$main::opt_text = 0;
$main::opt_disasm = 0;
$main::opt_list = 0;
@@ -507,7 +634,7 @@ sub InteractiveCommand {
my $ignore;
($routine, $ignore) = ParseInteractiveArgs($3);
- my $profile = ProcessProfile($orig_profile, "", $ignore);
+ my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore);
my $reduced = ReduceProfile($symbols, $profile);
# Get derived profiles
@@ -524,7 +651,7 @@ sub InteractiveCommand {
my $ignore;
($routine, $ignore) = ParseInteractiveArgs($1);
- my $profile = ProcessProfile($orig_profile, "", $ignore);
+ my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore);
my $reduced = ReduceProfile($symbols, $profile);
# Get derived profiles
@@ -542,7 +669,7 @@ sub InteractiveCommand {
($routine, $ignore) = ParseInteractiveArgs($1);
# Process current profile to account for various settings
- my $profile = ProcessProfile($orig_profile, "", $ignore);
+ my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore);
my $reduced = ReduceProfile($symbols, $profile);
# Get derived profiles
@@ -560,7 +687,7 @@ sub InteractiveCommand {
($focus, $ignore) = ParseInteractiveArgs($1);
# Process current profile to account for various settings
- my $profile = ProcessProfile($orig_profile, $focus, $ignore);
+ my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore);
my $reduced = ReduceProfile($symbols, $profile);
# Get derived profiles
@@ -587,6 +714,7 @@ sub InteractiveCommand {
sub ProcessProfile {
my $orig_profile = shift;
+ my $symbols = shift;
my $focus = shift;
my $ignore = shift;
@@ -598,18 +726,18 @@ sub ProcessProfile {
$profile = FocusProfile($symbols, $profile, $focus);
my $focus_count = TotalProfile($profile);
printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n",
- $focus,
- Unparse($focus_count), Units(),
- Unparse($total_count), ($focus_count*100.0) / $total_count);
+ $focus,
+ Unparse($focus_count), Units(),
+ Unparse($total_count), ($focus_count*100.0) / $total_count);
}
if ($ignore ne '') {
$profile = IgnoreProfile($symbols, $profile, $ignore);
my $ignore_count = TotalProfile($profile);
printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n",
- $ignore,
- Unparse($ignore_count), Units(),
- Unparse($total_count),
- ($ignore_count*100.0) / $total_count);
+ $ignore,
+ Unparse($ignore_count), Units(),
+ Unparse($total_count),
+ ($ignore_count*100.0) / $total_count);
}
return $profile;
@@ -637,7 +765,7 @@ Commands:
Show top lines ordered by flat profile count, or cumulative count
if --cum is specified. If a number is present after 'top', the
top K routines will be shown (defaults to showing the top 10)
-
+
disasm [routine_regexp] [-ignore1] [-ignore2]
Show disassembly of routines whose names match "routine_regexp",
annotated with sample counts.
@@ -649,6 +777,10 @@ For commands that accept optional -ignore tags, samples where any routine in
the stack trace matches the regular expression in any of the -ignore
parameters will be ignored.
+Further pprof details are available at this location (or one similar):
+
+ /usr/doc/google-perftools-$PPROF_VERSION/cpu_profiler.html
+
ENDOFHELP
}
sub ParseInteractiveArgs {
@@ -1023,8 +1155,8 @@ sub PrintDot {
if ($nodelimit > 0 || $edgelimit > 0) {
printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n",
- Unparse($nodelimit), Units(),
- Unparse($edgelimit), Units());
+ Unparse($nodelimit), Units(),
+ Unparse($edgelimit), Units());
}
# Open DOT output file
@@ -1160,8 +1292,6 @@ sub OutputKey {
# Skip large addresses since they sometimes show up as fake entries on RH9
if (length($a) > 8) {
if ($a gt "7fffffffffffffff") { return ''; }
- } else {
- if (hex($a) > 0x7fffffff) { return ''; }
}
# Extract symbolic info for address
@@ -1220,7 +1350,7 @@ sub Unparse {
return sprintf("%.1f", $num / 1048576.0);
}
}
- } elsif ($main::lock_profile) {
+ } elsif ($main::lock_profile && !$main::opt_contentions) {
return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds
} else {
return sprintf("%d", $num);
@@ -1249,7 +1379,7 @@ sub Units {
return "MB";
}
}
- } elsif ($main::lock_profile) {
+ } elsif ($main::lock_profile && !$main::opt_contentions) {
return "seconds";
} else {
return "samples";
@@ -1267,7 +1397,9 @@ sub FlatProfile {
foreach my $k (keys(%{$profile})) {
my $count = $profile->{$k};
my @addrs = split(/\n/, $k);
- AddEntry($result, $addrs[0], $count);
+ if ($#addrs >= 0) {
+ AddEntry($result, $addrs[0], $count);
+ }
}
return $result;
}
@@ -1458,14 +1590,191 @@ sub AddEntries {
##### Code to profile a server dynamically #####
+sub CheckSymbolPage {
+ my $url = SymbolPageURL();
+ open(SYMBOL, "$WGET -qO- '$url' |");
+ my $line = <SYMBOL>;
+ close(SYMBOL);
+ unless (defined($line)) {
+ error("$url doesn't exist\n");
+ }
+
+ if ($line =~ /^num_symbols:\s+(\d+)$/) {
+ if ($1 == 0) {
+ error("Stripped binary. No symbols available.\n");
+ }
+ } else {
+ error("Failed to get the number of symbols from $url\n");
+ }
+}
+
+sub IsProfileURL {
+ my $profile_name = shift;
+ my ($host, $port, $type) = ParseProfileURL($profile_name);
+ return defined($host) and defined($port) and defined($type);
+}
+
+sub ParseProfileURL {
+ my $profile_name = shift;
+ if ($profile_name =~ m,^(http://|)([^/:]+):(\d+)(|/|$PROFILE_PAGE|$HEAP_PAGE|$GROWTH_PAGE|$CONTENTION_PAGE)$,o) {
+ return ($2, $3, $4);
+ }
+ return ();
+}
+
+# We fetch symbols from the first profile argument.
+sub SymbolPageURL {
+ my ($host, $port, $type) = ParseProfileURL($main::pfile_args[0]);
+ return "http://$host:$port$SYMBOL_PAGE";
+}
+
+sub FetchProgramName() {
+ my ($host, $port, $type) = ParseProfileURL($main::pfile_args[0]);
+ my $url = "http://$host:$port$PROGRAM_NAME_PAGE";
+ my $command_line = "$WGET -qO- '$url'";
+ open(CMDLINE, "$command_line |") or error($command_line);
+ my $cmdline = <CMDLINE>;
+ close(CMDLINE);
+ error("Failed to get program name from $url\n") unless defined($cmdline);
+ $cmdline =~ s/\x00.+//; # Remove argv[1] and latters.
+ $cmdline =~ s!\n!!g; # Remove LFs.
+ return $cmdline;
+}
+
+# Gee, curl's -L (--location) option isn't reliable at least
+# with its 7.12.3 version. Curl will forget to post data if
+# there is a redirection. This function is a workaround for
+# curl. Redirection happens on borg hosts.
+sub ResolveRedirectionForCurl {
+ my $url = shift;
+ my $command_line = "$CURL -s --head '$url'";
+ open(CMDLINE, "$command_line |") or error($command_line);
+ while (<CMDLINE>) {
+ if (/^Location: (.*)/) {
+ $url = $1;
+ }
+ }
+ close(CMDLINE);
+ return $url;
+}
+
+# Fetch symbols from $SYMBOL_PAGE for all PC values found in profile
+sub FetchSymbols {
+ my $pcset = shift;
+
+ my %seen = ();
+ my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq
+ my $post_data = join("+", sort((map {"0x" . "$_"} @pcs)));
+ open(POSTFILE, ">$main::tmpfile_sym");
+ print POSTFILE $post_data;
+ close(POSTFILE);
+
+ my $url = SymbolPageURL();
+ # Here we use curl for sending data via POST since old
+ # wgets don't't have --post-file option.
+ $url = ResolveRedirectionForCurl($url);
+ my $command_line = "$CURL -sd '\@$main::tmpfile_sym' '$url'";
+ # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols.
+ my $cppfilt = $obj_tool_map{"c++filt"};
+ open(SYMBOL, "$command_line | $cppfilt |") or error($command_line);
+
+ my %map;
+ while (<SYMBOL>) {
+ if (m/^0x([0-9a-f]+)\s+(.+)/) {
+ $map{$1} = $2;
+ }
+ }
+ close(SYMBOL);
+
+ my $symbols = {};
+ for my $pc (@pcs) {
+ my $fullname;
+ if (defined($map{$pc})) {
+ $fullname = $map{$pc};
+ } else {
+ $fullname = "0x" . $pc; # Just use addresses
+ }
+ my $name = ShortFunctionName($fullname);
+ $symbols->{$pc} = [$name, "?", $fullname];
+ }
+ return $symbols;
+}
+
+sub BaseName {
+ my $file_name = shift;
+ $file_name =~ s!^.*/!!; # Remove directory name
+ return $file_name;
+}
+
+sub MakeProfileBaseName {
+ my ($binary_name, $profile_name) = @_;
+ my ($host, $port, $type) = ParseProfileURL($profile_name);
+ my $binary_shortname = BaseName($binary_name);
+ return sprintf("%s.%s.%s-port%s",
+ $binary_shortname, $main::op_time, $host, $port);
+}
+
sub FetchDynamicProfile {
my $binary_name = shift;
my $profile_name = shift;
my $fetch_name_only = shift;
my $encourage_patience = shift;
- # TODO: Add support for fetching profiles dynamically from a server
- return $profile_name;
+ my $user_dir = $ENV{HOME};
+ my $profile_dir = $user_dir . "/pprof";
+ if (!(-d $profile_dir)) {
+ mkdir($profile_dir) || die("Unable to create profile directory $profile_dir\n");
+ }
+ if (!IsProfileURL($profile_name)) {
+ return $profile_name;
+ } else {
+ my ($host, $port, $type) = ParseProfileURL($profile_name);
+ if ($type eq "" || $type eq "/") {
+ # Missing type specifier defaults to cpu-profile
+ $type = $PROFILE_PAGE;
+ }
+
+ my $profile_file = MakeProfileBaseName($binary_name, $profile_name);
+
+ my $url;
+ my $wget_timeout;
+ if ($type eq $PROFILE_PAGE) {
+ $url = sprintf("http://$host:$port$PROFILE_PAGE?seconds=%d",
+ $main::opt_seconds);
+ $wget_timeout = sprintf("--timeout=%d",
+ int($main::opt_seconds * 1.01 + 60));
+ } else {
+ # For non-CPU profiles, we add a type-extension to
+ # the target profile file name.
+ my $suffix = $type;
+ $suffix =~ s,/,.,g;
+ $profile_file .= "$suffix";
+ $url = "http://$host:$port$type";
+ $wget_timeout = "";
+ }
+ my $tmp_profile = "$profile_dir/.tmp.$profile_file";
+ my $real_profile = "$profile_dir/$profile_file";
+
+ if ($fetch_name_only > 0) {
+ return $real_profile;
+ }
+
+ my $cmd = "$WGET $wget_timeout -q -O $tmp_profile '$url'";
+ if ($type eq $PROFILE_PAGE) {
+ print STDERR "Gathering CPU profile from $host:$port for $main::opt_seconds seconds to\n ${real_profile}\n";
+ if ($encourage_patience) {
+ print STDERR "Be patient...\n";
+ }
+ } else {
+ print STDERR "Fetching $type profile from $host:$port to\n ${real_profile}\n";
+ }
+
+ (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n");
+ (system("mv $tmp_profile $real_profile") == 0) || error("Unable to rename profile\n");
+ print STDERR "Wrote profile to $real_profile\n";
+ $main::collected_profile = $real_profile;
+ return $main::collected_profile;
+ }
}
# Collect profiles in parallel
@@ -1543,10 +1852,11 @@ sub ReadProfile {
open(PROFILE, "<$fname") || error("$fname: $!\n");
binmode PROFILE; # New perls do UTF-8 processing
my $header = <PROFILE>;
+ my $contention_marker = substr($CONTENTION_PAGE, 1); # remove leading /
if ($header =~ m/^heap profile:/) {
$main::heap_profile = 1;
return ReadHeapProfile($prog, $fname, $header);
- } elsif ($header =~ m/^--- *contentionz/ ) {
+ } elsif ($header =~ m/^--- *$contention_marker/o ) {
$main::lock_profile = 1;
return ReadSynchProfile($prog, $fname);
} elsif ($header =~ m/^--- *Stacks:/ ) {
@@ -1581,17 +1891,11 @@ sub ReadCPUProfile {
my $pcs = {};
# Parse string into array of slots.
- # L! is needed for 64-bit # platforms, but not supported on 5.005
- # (despite the manpage claims)
+ # L! cannot be used because with a native 64-bit build, it will cause
+ # 1) a valid 64-bit profile to use the 32-bit codepath, and
+ # 2) a valid 32-bit profile to be unrecognized.
- my $format;
- if ($] >= 5.008) {
- $format = "L!*";
- } else {
- $format = "L*";
- }
-
- my @slots = unpack($format, $str);
+ my @slots = unpack("L*", $str);
# Read header. The current header version is a 5-element structure
# containing:
@@ -1713,15 +2017,55 @@ sub ReadHeapProfile {
$index = 2;
}
- # Find the type of this profile
+ # Find the type of this profile. The header line looks like:
+ # heap profile: 1246: 8800744 [ 1246: 8800744] @ <heap-url>/266053
+ # There are two pairs <count: size>, the first inuse objects/space, and the
+ # second allocated objects/space. This is followed optionally by a profile
+ # type, and if that is present, optionally by a sampling frequency. The
+ # interpretation of the sampling frequency is that the profiler, for each
+ # sample, calculates a uniformly distributed random integer less than the
+ # given value, and records the next sample after that many bytes have been
+ # allocated. Therefore, the expected sample interval is half of the given
+ # frequency. By default, if not specified, the expected sample interval is
+ # 128KB. Only remote-heap-page profiles are adjusted for sample size.
+ my $should_adjust_sample = 0;
+ my $sample_adjustment = 0;
chomp($header);
my $type = "unknown";
- if ($header =~ m/^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*(.*))?/) {
+ if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") {
if (defined($6) && ($6 ne '')) {
$type = $6;
+ # The regex test here is to see if type is a substring of HEAP_PAGE
+ if (($HEAP_PAGE =~ /$type/)) {
+ $should_adjust_sample = 1;
+ if (defined($8) && ($8 ne '')) {
+ $sample_adjustment = int($8)/2;
+ printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n",
+ $sample_adjustment);
+ }
+ }
+ } else {
+ # We detect whether or not this is a remote-heap profile by checking
+ # that the total-allocated stats ($n2,$s2) are exactly the
+ # same as the in-use stats ($n1,$s1). It is remotely conceivable
+ # that a non-remote-heap profile may pass this check, but it is hard
+ # to imagine how that could happen.
+ my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
+ if (($n1 == $n2) && ($s1 == $s2)) {
+ # This is likely to be a remote-heap based sample profile
+ $should_adjust_sample = 1;
+ }
}
}
+ # For remote-heap generated profiles, adjust the counts and sizes to
+ # account for the sample rate (we sample once every 128KB by default).
+ if ($should_adjust_sample && ($sample_adjustment == 0)) {
+ # Turn on profile adjustment.
+ $sample_adjustment = 128*1024;
+ print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n";
+ }
+
my $profile = {};
my $pcs = {};
my $map = "";
@@ -1739,13 +2083,13 @@ sub ReadHeapProfile {
# Read /proc/self/maps data as formatted by DumpAddressMap()
my $buildvar = "";
while (<PROFILE>) {
- # Parse "build=<dir>" specification if supplied
- if (m/^\s*build=(.*)\n/) {
- $buildvar = $1;
- }
+ # Parse "build=<dir>" specification if supplied
+ if (m/^\s*build=(.*)\n/) {
+ $buildvar = $1;
+ }
- # Expand "$build" variable if available
- $_ =~ s/\$build\b/$buildvar/g;
+ # Expand "$build" variable if available
+ $_ =~ s/\$build\b/$buildvar/g;
$map .= $_;
}
@@ -1760,6 +2104,20 @@ sub ReadHeapProfile {
my $stack = $5;
my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
+ if ($sample_adjustment) {
+ my $ratio;
+ $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
+ if ($ratio < 1) {
+ $n1 /= $ratio;
+ $s1 /= $ratio;
+ }
+ $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
+ if ($ratio < 1) {
+ $n2 /= $ratio;
+ $s2 /= $ratio;
+ }
+ }
+
my @counts = ($n1, $s1, $n2, $s2);
AddEntries($profile, $pcs, $stack, $counts[$index]);
}
@@ -1785,17 +2143,35 @@ sub ReadSynchProfile {
my $seen_clockrate = 0;
my $line;
+ my $index = 0;
+ if ($main::opt_total_delay) {
+ $index = 0;
+ } elsif ($main::opt_contentions) {
+ $index = 1;
+ } elsif ($main::opt_mean_delay) {
+ $index = 2;
+ }
+
while ( $line = <PROFILE> ) {
- if ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ ||
- $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) {
- my ($count, $stack) = ($1, $2);
- if ($count !~ /^\d+$/) {
+ if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) {
+ my ($cycles, $count, $stack) = ($1, $2, $3);
+
+ # Convert cycles to nanoseconds
+ $cycles /= $cyclespernanosec;
+
+ my @values = ($cycles, $count, $cycles / $count);
+ AddEntries($profile, $pcs, $stack, $values[$index]);
+
+ } elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ ||
+ $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) {
+ my ($cycles, $stack) = ($1, $2);
+ if ($cycles !~ /^\d+$/) {
next;
}
# Convert cycles to nanoseconds
- $count /= $cyclespernanosec;
- AddEntries($profile, $pcs, $stack, $count);
+ $cycles /= $cyclespernanosec;
+ AddEntries($profile, $pcs, $stack, $cycles);
} elsif ( $line =~ m|cycles/second = (\d+)|) {
$cyclespernanosec = $1 / 1e9;
@@ -1838,6 +2214,7 @@ sub HexExtend {
# Split /proc/pid/maps dump into a list of libraries
sub ParseLibraries {
+ return if $main::use_symbol_page; # We don't need libraries info.
my $prog = shift;
my $map = shift;
my $pcs = shift;
diff --git a/src/profiler.cc b/src/profiler.cc
index 5843720..8ddcc41 100644
--- a/src/profiler.cc
+++ b/src/profiler.cc
@@ -460,12 +460,12 @@ void ProfileData::SetHandler(void (*handler)(int)) {
}
void ProfileData::FlushTable() {
- if (out_ < 0) {
- // Profiling is not enabled
- return;
- }
-
LOCK(&state_lock_); {
+ if (out_ < 0) {
+ // Profiling is not enabled
+ UNLOCK(&state_lock_);
+ return;
+ }
SetHandler(SIG_IGN); // Disable timer interrupts while we're flushing
LOCK(&table_lock_); {
// Move data from hash table to eviction buffer
diff --git a/src/stacktrace.cc b/src/stacktrace.cc
index 859d52a..da20659 100644
--- a/src/stacktrace.cc
+++ b/src/stacktrace.cc
@@ -45,17 +45,14 @@
#include "stacktrace_x86-inl.h"
#endif
-#if !defined(IMPLEMENTED_STACK_TRACE) && defined(USE_LIBUNWIND) && HAVE_LIBUNWIND_H
+#if !defined(IMPLEMENTED_STACK_TRACE) && defined(__x86_64__) && HAVE_LIBUNWIND_H
#define IMPLEMENTED_STACK_TRACE
-// This is turned off by default. Possible reasons for turning on in the
-// future:
-// 1. Compiler independence
-// 2. Architecture independence
-// 3. A more liberal MIT license, which allows use with multiple compilers
+#define UNW_LOCAL_ONLY
#include "stacktrace_libunwind-inl.h"
#endif
#if !defined(IMPLEMENTED_STACK_TRACE) && defined(__x86_64__) && HAVE_UNWIND_H
+// This implementation suffers from deadlocks. Don't enable it.
#define IMPLEMENTED_STACK_TRACE
#include "stacktrace_x86_64-inl.h"
#endif
diff --git a/src/stacktrace_libunwind-inl.h b/src/stacktrace_libunwind-inl.h
index 42c28d3..bf39633 100644
--- a/src/stacktrace_libunwind-inl.h
+++ b/src/stacktrace_libunwind-inl.h
@@ -51,14 +51,14 @@ int GetStackTrace(void** result, int max_depth, int skip_count) {
do {
ret = unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip);
- assert(ret == 0);
+ if (ret < 0)
+ break;
if (skip_count > 0) {
skip_count--;
} else {
result[n++] = ip;
}
ret = unw_step(&cursor);
- assert(ret >= 0);
} while ((n < max_depth) && (ret > 0));
return n;
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index 5dc062e..bf45dfb 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -79,6 +79,7 @@
#include <unistd.h>
#include <errno.h>
#include <stdarg.h>
+#include "base/commandlineflags.h"
#include "google/malloc_hook.h"
#include "google/malloc_extension.h"
#include "google/stacktrace.h"
@@ -147,12 +148,27 @@ static const size_t kDefaultOverallThreadCacheSize = 16 << 20;
// REQUIRED: kMaxPages >= kMinSystemAlloc;
static const size_t kMaxPages = kMinSystemAlloc;
+/* The smallest prime > 2^n */
+static unsigned int primes_list[] = {
+ // Small values might cause high rates of sampling
+ // and hence commented out.
+ // 2, 5, 11, 17, 37, 67, 131, 257,
+ // 521, 1031, 2053, 4099, 8209, 16411,
+ 32771, 65537, 131101, 262147, 524309, 1048583,
+ 2097169, 4194319, 8388617, 16777259, 33554467 };
+
// Twice the approximate gap between sampling actions.
// I.e., we take one sample approximately once every
-// kSampleParameter/2
+// tcmalloc_sample_parameter/2
// bytes of allocation, i.e., ~ once every 128KB.
// Must be a prime number.
-static const size_t kSampleParameter = 266053;
+DEFINE_int64(tcmalloc_sample_parameter, 262147,
+ "Twice the approximate gap between sampling actions."
+ " Must be a prime number. Otherwise will be rounded up to a "
+ " larger prime number");
+static size_t sample_period = 262147;
+// Protects sample_period above
+static SpinLock sample_period_lock = SPINLOCK_INITIALIZER;
//-------------------------------------------------------------------
// Mapping from size to size_class and vice versa
@@ -303,6 +319,17 @@ static int NumMoveSize(size_t size) {
// and thread caches.
if (num > static_cast<int>(0.8 * kMaxFreeListLength))
num = static_cast<int>(0.8 * kMaxFreeListLength);
+
+ // Also, avoid bringing in too many objects into small object free
+ // lists. There are lots of such lists, and if we allow each one to
+ // fetch too many at a time, we end up having to scavenge too often
+ // (especially when there are lots of threads and each thread gets a
+ // small allowance for its thread cache).
+ //
+ // TODO: Make thread cache free list sizes dynamic so that we do not
+ // have to equally divide a fixed resource amongst lots of threads.
+ if (num > 32) num = 32;
+
return num;
}
@@ -918,7 +945,7 @@ void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) {
uint64_t large_pages = 0;
int large_spans = 0;
for (Span* s = large_.next; s != &large_; s = s->next) {
- out->printf(" [ %6" PRIuS " spans ]\n", s->length);
+ out->printf(" [ %6" PRIuS " pages ]\n", s->length);
large_pages += s->length;
large_spans++;
}
@@ -1057,6 +1084,7 @@ class TCMalloc_ThreadCache_FreeList {
SLL_PopRange(&list_, N, start, end);
ASSERT(length_ >= N);
length_ -= N;
+ if (length_ < lowater_) lowater_ = length_;
}
};
@@ -1669,9 +1697,23 @@ void TCMalloc_ThreadCache::PickNextSample() {
uint32_t r = rnd_;
rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly);
- // Next point is "rnd_ % (2*sample_period)". I.e., average
- // increment is "sample_period".
- bytes_until_sample_ = rnd_ % kSampleParameter;
+ // Next point is "rnd_ % (sample_period)". I.e., average
+ // increment is "sample_period/2".
+ const int flag_value = FLAGS_tcmalloc_sample_parameter;
+ static int last_flag_value = -1;
+
+ if (flag_value != last_flag_value) {
+ SpinLockHolder h(&sample_period_lock);
+ int i;
+ for (i = 0; i < (sizeof(primes_list)/sizeof(primes_list[0]) - 1); i++) {
+ if (primes_list[i] >= flag_value) {
+ break;
+ }
+ }
+ sample_period = primes_list[i];
+ last_flag_value = flag_value;
+ }
+ bytes_until_sample_ = rnd_ % sample_period;
}
void TCMalloc_ThreadCache::InitModule() {
@@ -2118,7 +2160,7 @@ static inline void* do_malloc(size_t size) {
}
// The following call forces module initialization
TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
- if (heap->SampleAllocation(size)) {
+ if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
Span* span = DoSampledAllocation(size);
if (span != NULL) {
ret = reinterpret_cast<void*>(span->start << kPageShift);
diff --git a/src/tests/heap-checker_unittest.cc b/src/tests/heap-checker_unittest.cc
index e9ec6c3..3e85e7a 100644
--- a/src/tests/heap-checker_unittest.cc
+++ b/src/tests/heap-checker_unittest.cc
@@ -273,6 +273,7 @@ static void DoRunHidden(Closure* c, int n) {
if (n) {
run_hidden_ptr(c, n-1);
wipe_stack_ptr(n);
+ sleep(0); // undo -foptimize-sibling-calls
} else {
c->Run();
}
@@ -284,6 +285,7 @@ static void DoWipeStack(int n) {
volatile int arr[sz];
for (int i = 0; i < sz; ++i) arr[i] = 0;
wipe_stack_ptr(n-1);
+ sleep(0); // undo -foptimize-sibling-calls
}
}
@@ -463,14 +465,14 @@ static void TestHeapLeakCheckerPProf() {
// trick heap change: same total # of bytes and objects, but
// different individual object sizes
static void TestHeapLeakCheckerTrick() {
- void* bar1 = AllocHidden(60 * sizeof(int));
+ void* bar1 = AllocHidden(240 * sizeof(int));
Use(&bar1);
- void* bar2 = AllocHidden(40 * sizeof(int));
+ void* bar2 = AllocHidden(160 * sizeof(int));
Use(&bar2);
HeapLeakChecker check("trick");
- void* foo1 = AllocHidden(70 * sizeof(int));
+ void* foo1 = AllocHidden(280 * sizeof(int));
Use(&foo1);
- void* foo2 = AllocHidden(30 * sizeof(int));
+ void* foo2 = AllocHidden(120 * sizeof(int));
Use(&foo2);
DeAllocHidden(&bar1);
DeAllocHidden(&bar2);
@@ -482,16 +484,16 @@ static void TestHeapLeakCheckerTrick() {
// no false negatives from pprof
static void TestHeapLeakCheckerDeathTrick() {
- void* bar1 = AllocHidden(60 * sizeof(int));
+ void* bar1 = AllocHidden(240 * sizeof(int));
Use(&bar1);
- void* bar2 = AllocHidden(40 * sizeof(int));
+ void* bar2 = AllocHidden(160 * sizeof(int));
Use(&bar2);
HeapLeakChecker check("death_trick");
DeAllocHidden(&bar1);
DeAllocHidden(&bar2);
- void* foo1 = AllocHidden(70 * sizeof(int));
+ void* foo1 = AllocHidden(280 * sizeof(int));
Use(&foo1);
- void* foo2 = AllocHidden(30 * sizeof(int));
+ void* foo2 = AllocHidden(120 * sizeof(int));
Use(&foo2);
// TODO(maxim): use the above if we make pprof work in automated test runs
if (!FLAGS_maybe_stripped) {
@@ -733,13 +735,19 @@ static void* HeapBusyThreadBody(void* a) {
}
}
if (FLAGS_test_register_leak) {
- // Hide the register pointer value with an xor mask.
+ // Hide the register "ptr" value with an xor mask.
// If one provides --test_register_leak flag, the test should
// (with very high probability) crash on some leak check
// with a leak report (of some x * sizeof(int) + y * sizeof(int*) bytes)
// pointing at the two lines above in this function
// with "new (initialized) int" in them as the allocators
// of the leaked objects.
+ // CAVEAT: We can't really prevent a compiler to save some
+ // temporary values of "ptr" on the stack and thus let us find
+ // the heap objects not via the register.
+ // Hence it's normal if for certain compilers or optimization modes
+ // --test_register_leak does not cause a leak crash of the above form
+ // (this happens e.g. for gcc 4.0.1 in opt mode).
ptr = reinterpret_cast<int **>(
reinterpret_cast<uintptr_t>(ptr) ^ kHideMask);
// busy loop to get the thread interrupted at:
diff --git a/src/tests/tcmalloc_unittest.cc b/src/tests/tcmalloc_unittest.cc
index b030e32..9f2df59 100644
--- a/src/tests/tcmalloc_unittest.cc
+++ b/src/tests/tcmalloc_unittest.cc
@@ -399,11 +399,14 @@ static void TestHugeAllocations() {
for (size_t i = 0; i < 10000; i++) {
TryHugeAllocation(kMaxSize - i);
}
-
- // Check that asking for stuff near signed/unsigned boundary returns NULL
+ // Asking for memory sizes near signed/unsigned boundary (kMaxSignedSize)
+ // might work or not, depending on the amount of virtual memory.
for (size_t i = 0; i < 100; i++) {
- TryHugeAllocation(kMaxSignedSize - i);
- TryHugeAllocation(kMaxSignedSize + i);
+ void* p = NULL;
+ p = malloc(kMaxSignedSize + i);
+ if (p) free(p); // if: free(NULL) is not necessarily defined
+ p = malloc(kMaxSignedSize - i);
+ if (p) free(p);
}
}
@@ -560,18 +563,6 @@ int main(int argc, char** argv) {
free(p);
}
- // Check that large allocations fail with NULL instead of crashing
- fprintf(LOGSTREAM, "==== Testing out of memory\n");
- for (int s = 0; ; s += (10<<20)) {
- void* large_object = malloc(s);
- if (large_object == NULL) break;
- free(large_object);
- }
-
- // Check that huge allocations fail with NULL instead of crashing
- fprintf(LOGSTREAM, "==== Testing huge allocations\n");
- TestHugeAllocations();
-
// Check calloc() with various arguments
fprintf(LOGSTREAM, "==== Testing calloc\n");
TestCalloc(0, 0, true);
@@ -611,10 +602,16 @@ int main(int argc, char** argv) {
threads[i] = new TesterThread(i);
}
- // Start
+ // Start the threads.
+ // Set the stack size to a small value to avoid inheriting 120MB+
+ // limit when running under the google make system.
+ pthread_attr_t attr;
+ pthread_attr_init(&attr);
+ pthread_attr_setstacksize(&attr, 1 << 20);
for (int i = 0; i < FLAGS_numthreads; ++i) {
- CHECK_EQ(pthread_create(&thread_ids[i], NULL, RunThread, threads[i]), 0);
+ CHECK_EQ(pthread_create(&thread_ids[i], &attr, RunThread, threads[i]), 0);
}
+ pthread_attr_destroy(&attr);
// Wait
for (int i = 0; i < FLAGS_numthreads; ++i) {
@@ -624,6 +621,21 @@ int main(int argc, char** argv) {
for (int i = 0; i < FLAGS_numthreads; ++i) delete threads[i]; // Cleanup
+ // Do the memory intensive tests after threads are done, since exhausting
+ // the available address space can make pthread_create to fail.
+
+ // Check that huge allocations fail with NULL instead of crashing
+ fprintf(LOGSTREAM, "==== Testing huge allocations\n");
+ TestHugeAllocations();
+
+ // Check that large allocations fail with NULL instead of crashing
+ fprintf(LOGSTREAM, "==== Testing out of memory\n");
+ for (int s = 0; ; s += (10<<20)) {
+ void* large_object = malloc(s);
+ if (large_object == NULL) break;
+ free(large_object);
+ }
+
fprintf(LOGSTREAM, "PASS\n");
return 0;
}