diff options
author | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2008-09-19 20:06:40 +0000 |
---|---|---|
committer | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2008-09-19 20:06:40 +0000 |
commit | 106aef86ce5697cf44bdbec90ab6833b9254d273 (patch) | |
tree | 5f105b677eb6b76afcbdc752778ed5ae14e63462 | |
parent | 100e657c5092bc274424286a728db5116a4bbc54 (diff) | |
download | gperftools-106aef86ce5697cf44bdbec90ab6833b9254d273.tar.gz |
Thu Sep 18 16:00:27 2008 Google Inc. <opensource@google.com>
* google-perftools: version 0.99 release
* Add IsHeapProfileRunning (csilvers)
* Add C shims for some of the C++ header files (csilvers)
* Fix heap profile file clean-up logic (maxim)
* Rename linuxthreads.c to .cc for better compiler support (csilvers)
* Add source info to disassembly in pprof (sanjay)
* Use open instead of fopen to avoid memory alloc (csilvers)
* Disable malloc extensions when running under valgrind (kcc)
* BUG FIX: Fix out-of-bound error by reordering a check (larryz)
* Add Options struct to ProfileData (cgd)
* Correct PC-handling of --base in pprof (csilvers)
* Handle 1 function occurring twice in an image (sanjay)
* Improve stack-data cleaning (maxim)
* Use 'struct Foo' to make header C compatible (csilvers)
* Add 'total' line to pprof --text (csilvers)
* Pre-allocate buffer for heap-profiler to avoid OOM errors (csilvers)
* Allow a few more env-settings to control tcmalloc (csilvers)
* Document some of the issues involving thread-local storage (csilvers)
* BUG FIX: Define strtoll and friends for windows (csilvers)
git-svn-id: http://gperftools.googlecode.com/svn/trunk@54 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
50 files changed, 1473 insertions, 405 deletions
@@ -1,3 +1,25 @@ +Thu Sep 18 16:00:27 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.99 release + * Add IsHeapProfileRunning (csilvers) + * Add C shims for some of the C++ header files (csilvers) + * Fix heap profile file clean-up logic (maxim) + * Rename linuxthreads.c to .cc for better compiler support (csilvers) + * Add source info to disassembly in pprof (sanjay) + * Use open instead of fopen to avoid memory alloc (csilvers) + * Disable malloc extensions when running under valgrind (kcc) + * BUG FIX: Fix out-of-bound error by reordering a check (larryz) + * Add Options struct to ProfileData (cgd) + * Correct PC-handling of --base in pprof (csilvers) + * Handle 1 function occurring twice in an image (sanjay) + * Improve stack-data cleaning (maxim) + * Use 'struct Foo' to make header C compatible (csilvers) + * Add 'total' line to pprof --text (csilvers) + * Pre-allocate buffer for heap-profiler to avoid OOM errors (csilvers) + * Allow a few more env-settings to control tcmalloc (csilvers) + * Document some of the issues involving thread-local storage (csilvers) + * BUG FIX: Define strtoll and friends for windows (csilvers) + Mon Jun 9 16:47:03 2008 Google Inc. <opensource@google.com> * google-perftools: version 0.98 release @@ -57,6 +57,19 @@ Even with the use of libunwind, there are still known problems with stack unwinding on 64-bit systems, particularly x86-64. See the "64-BIT ISSUES" section in README. +*** NOTE FOR ___tls_get_addr ERROR + +When compiling perftools on some old systems, like RedHat 8, you may +get an error like this: + ___tls_get_addr: symbol not found + +This means that you have a system where some parts are updated enough +to support Thread Local Storage, but others are not. The perftools +configure script can't always detect this kind of case, leading to +that error. To fix it, just uncomment the line + #define HAVE_TLS 1 +in your config.h file before building. + *** COMPILING ON NON-LINUX SYSTEMS @@ -164,8 +177,8 @@ above, by linking in libtcmalloc_minimal. not yet implemented (see TODOs in src/windows/port.cc if you're interested in playing around with this). But as in other ports, the basic tcmalloc library functionality, overriding malloc and new - and such, is working fine, both with VC++ 7.1 (Visual Studio 2003) - and VC++ 8.0 (Visual Studio 2005). See README.windows for + and such, is working fine at least with VC++ 8.0 (Visual Studio + 2005) when compiled in debug mode. See README.windows for instructions on how to install on Windows using Visual Studio. This Windows functionality is also available using MinGW and Msys. diff --git a/Makefile.am b/Makefile.am index 65399cb..2703c72 100644 --- a/Makefile.am +++ b/Makefile.am @@ -149,6 +149,8 @@ WINDOWS_PROJECTS += vsprojects/low_level_alloc_unittest/low_level_alloc_unittest LOW_LEVEL_ALLOC_UNITTEST_INCLUDES = src/base/low_level_alloc.h \ src/base/basictypes.h \ src/google/malloc_hook.h \ + src/google/malloc_hook_c.h \ + src/google/malloc_hook_c.h \ src/malloc_hook-inl.h \ $(SPINLOCK_INCLUDES) \ $(LOGGING_INCLUDES) @@ -240,6 +242,7 @@ S_TCMALLOC_MINIMAL_INCLUDES = src/internal_logging.h \ src/malloc_hook-inl.h \ src/maybe_threads.h SG_TCMALLOC_MINIMAL_INCLUDES = src/google/malloc_hook.h \ + src/google/malloc_hook_c.h \ src/google/malloc_extension.h \ src/google/stacktrace.h TCMALLOC_MINIMAL_INCLUDES = $(S_TCMALLOC_MINIMAL_INCLUDES) $(SG_TCMALLOC_MINIMAL_INCLUDES) @@ -271,6 +274,7 @@ LIBTCMALLOC_MINIMAL = libtcmalloc_minimal.la ## TESTS += malloc_unittest ## MALLOC_UNITEST_INCLUDES = src/google/malloc_extension.h \ ## src/google/malloc_hook.h \ +## src/google/malloc_hook_c.h \ ## src/malloc_hook-inl.h \ ## src/base/basictypes.h \ ## src/maybe_threads.h @@ -461,6 +465,7 @@ S_TCMALLOC_INCLUDES = src/internal_logging.h \ $(SPINLOCK_INCLUDES) \ $(LOGGING_INCLUDES) SG_TCMALLOC_INCLUDES = src/google/malloc_hook.h \ + src/google/malloc_hook_c.h \ src/google/malloc_extension.h \ src/google/heap-profiler.h \ src/google/heap-checker.h \ @@ -482,7 +487,7 @@ libtcmalloc_la_SOURCES = src/internal_logging.cc \ src/heap-profiler.cc \ src/heap-profile-table.cc \ src/heap-checker.cc \ - src/base/linuxthreads.c \ + src/base/linuxthreads.cc \ src/base/thread_lister.c \ src/base/low_level_alloc.cc \ $(TCMALLOC_INCLUDES) \ diff --git a/Makefile.in b/Makefile.in index f34f0e4..e64d67c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -212,7 +212,7 @@ am__libtcmalloc_la_SOURCES_DIST = src/internal_logging.cc \ src/malloc_hook.cc src/malloc_extension.cc \ src/maybe_threads.cc src/memory_region_map.cc \ src/heap-profiler.cc src/heap-profile-table.cc \ - src/heap-checker.cc src/base/linuxthreads.c \ + src/heap-checker.cc src/base/linuxthreads.cc \ src/base/thread_lister.c src/base/low_level_alloc.cc \ src/internal_logging.h src/system-alloc.h src/pagemap.h \ src/addressmap-inl.h src/malloc_hook-inl.h \ @@ -228,9 +228,9 @@ am__libtcmalloc_la_SOURCES_DIST = src/internal_logging.cc \ src/base/atomicops-internals-x86-msvc.h \ src/base/atomicops-internals-x86.h src/base/logging.h \ src/base/dynamic_annotations.h src/google/malloc_hook.h \ - src/google/malloc_extension.h src/google/heap-profiler.h \ - src/google/heap-checker.h src/google/stacktrace.h \ - src/heap-checker-bcad.cc + src/google/malloc_hook_c.h src/google/malloc_extension.h \ + src/google/heap-profiler.h src/google/heap-checker.h \ + src/google/stacktrace.h src/heap-checker-bcad.cc @MINGW_FALSE@am__objects_5 = libtcmalloc_la-system-alloc.lo @MINGW_FALSE@am__objects_6 = libtcmalloc_la-maybe_threads.lo @MINGW_FALSE@am__objects_7 = $(am__objects_1) $(am__objects_1) @@ -245,8 +245,8 @@ am__libtcmalloc_la_SOURCES_DIST = src/internal_logging.cc \ @MINGW_FALSE@ libtcmalloc_la-memory_region_map.lo \ @MINGW_FALSE@ libtcmalloc_la-heap-profiler.lo \ @MINGW_FALSE@ libtcmalloc_la-heap-profile-table.lo \ -@MINGW_FALSE@ libtcmalloc_la-heap-checker.lo linuxthreads.lo \ -@MINGW_FALSE@ thread_lister.lo \ +@MINGW_FALSE@ libtcmalloc_la-heap-checker.lo \ +@MINGW_FALSE@ libtcmalloc_la-linuxthreads.lo thread_lister.lo \ @MINGW_FALSE@ libtcmalloc_la-low_level_alloc.lo \ @MINGW_FALSE@ $(am__objects_8) \ @MINGW_FALSE@ libtcmalloc_la-heap-checker-bcad.lo @@ -265,7 +265,8 @@ am__libtcmalloc_minimal_la_SOURCES_DIST = src/internal_logging.cc \ src/base/atomicops-internals-x86.h src/base/commandlineflags.h \ src/base/basictypes.h src/pagemap.h src/malloc_hook-inl.h \ src/maybe_threads.h src/google/malloc_hook.h \ - src/google/malloc_extension.h src/google/stacktrace.h + src/google/malloc_hook_c.h src/google/malloc_extension.h \ + src/google/stacktrace.h @MINGW_FALSE@am__objects_9 = libtcmalloc_minimal_la-system-alloc.lo @MINGW_FALSE@am__objects_10 = libtcmalloc_minimal_la-maybe_threads.lo am__objects_11 = $(am__objects_1) @@ -390,8 +391,8 @@ am__low_level_alloc_unittest_SOURCES_DIST = \ src/base/low_level_alloc.cc src/malloc_hook.cc \ src/tests/low_level_alloc_unittest.cc \ src/base/low_level_alloc.h src/base/basictypes.h \ - src/google/malloc_hook.h src/malloc_hook-inl.h \ - src/base/spinlock.h src/base/atomicops.h \ + src/google/malloc_hook.h src/google/malloc_hook_c.h \ + src/malloc_hook-inl.h src/base/spinlock.h src/base/atomicops.h \ src/base/atomicops-internals-macosx.h \ src/base/atomicops-internals-linuxppc.h \ src/base/atomicops-internals-x86-msvc.h \ @@ -673,9 +674,9 @@ am__dist_doc_DATA_DIST = AUTHORS COPYING ChangeLog INSTALL NEWS README \ dist_docDATA_INSTALL = $(INSTALL_DATA) DATA = $(dist_doc_DATA) am__googleinclude_HEADERS_DIST = src/google/stacktrace.h \ - src/google/malloc_hook.h src/google/malloc_extension.h \ - src/google/heap-profiler.h src/google/heap-checker.h \ - src/google/profiler.h + src/google/malloc_hook.h src/google/malloc_hook_c.h \ + src/google/malloc_extension.h src/google/heap-profiler.h \ + src/google/heap-checker.h src/google/profiler.h googleincludeHEADERS_INSTALL = $(INSTALL_HEADER) HEADERS = $(googleinclude_HEADERS) ETAGS = etags @@ -993,6 +994,8 @@ libsysinfo_la_SOURCES = src/base/sysinfo.cc \ LOW_LEVEL_ALLOC_UNITTEST_INCLUDES = src/base/low_level_alloc.h \ src/base/basictypes.h \ src/google/malloc_hook.h \ + src/google/malloc_hook_c.h \ + src/google/malloc_hook_c.h \ src/malloc_hook-inl.h \ $(SPINLOCK_INCLUDES) \ $(LOGGING_INCLUDES) @@ -1064,6 +1067,7 @@ S_TCMALLOC_MINIMAL_INCLUDES = src/internal_logging.h \ src/maybe_threads.h SG_TCMALLOC_MINIMAL_INCLUDES = src/google/malloc_hook.h \ + src/google/malloc_hook_c.h \ src/google/malloc_extension.h \ src/google/stacktrace.h @@ -1188,6 +1192,7 @@ ptmalloc_unittest2_LDADD = $(PTHREAD_LIBS) @MINGW_FALSE@ $(LOGGING_INCLUDES) @MINGW_FALSE@SG_TCMALLOC_INCLUDES = src/google/malloc_hook.h \ +@MINGW_FALSE@ src/google/malloc_hook_c.h \ @MINGW_FALSE@ src/google/malloc_extension.h \ @MINGW_FALSE@ src/google/heap-profiler.h \ @MINGW_FALSE@ src/google/heap-checker.h \ @@ -1206,7 +1211,7 @@ ptmalloc_unittest2_LDADD = $(PTHREAD_LIBS) @MINGW_FALSE@ src/heap-profiler.cc \ @MINGW_FALSE@ src/heap-profile-table.cc \ @MINGW_FALSE@ src/heap-checker.cc \ -@MINGW_FALSE@ src/base/linuxthreads.c \ +@MINGW_FALSE@ src/base/linuxthreads.cc \ @MINGW_FALSE@ src/base/thread_lister.c \ @MINGW_FALSE@ src/base/low_level_alloc.cc \ @MINGW_FALSE@ $(TCMALLOC_INCLUDES) \ @@ -1590,6 +1595,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-heap-profile-table.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-heap-profiler.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-internal_logging.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-linuxthreads.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-low_level_alloc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-malloc_extension.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-malloc_hook.Plo@am__quote@ @@ -1605,7 +1611,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_minimal_la-memfs_malloc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_minimal_la-system-alloc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_minimal_la-tcmalloc.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/linuxthreads.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/logging.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/low_level_alloc.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/low_level_alloc_unittest.Po@am__quote@ @@ -1671,13 +1676,6 @@ distclean-compile: @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< -linuxthreads.lo: src/base/linuxthreads.c -@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT linuxthreads.lo -MD -MP -MF "$(DEPDIR)/linuxthreads.Tpo" -c -o linuxthreads.lo `test -f 'src/base/linuxthreads.c' || echo '$(srcdir)/'`src/base/linuxthreads.c; \ -@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/linuxthreads.Tpo" "$(DEPDIR)/linuxthreads.Plo"; else rm -f "$(DEPDIR)/linuxthreads.Tpo"; exit 1; fi -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='src/base/linuxthreads.c' object='linuxthreads.lo' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o linuxthreads.lo `test -f 'src/base/linuxthreads.c' || echo '$(srcdir)/'`src/base/linuxthreads.c - thread_lister.lo: src/base/thread_lister.c @am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT thread_lister.lo -MD -MP -MF "$(DEPDIR)/thread_lister.Tpo" -c -o thread_lister.lo `test -f 'src/base/thread_lister.c' || echo '$(srcdir)/'`src/base/thread_lister.c; \ @am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/thread_lister.Tpo" "$(DEPDIR)/thread_lister.Plo"; else rm -f "$(DEPDIR)/thread_lister.Tpo"; exit 1; fi @@ -1867,6 +1865,13 @@ libtcmalloc_la-heap-checker.lo: src/heap-checker.cc @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libtcmalloc_la_CXXFLAGS) $(CXXFLAGS) -c -o libtcmalloc_la-heap-checker.lo `test -f 'src/heap-checker.cc' || echo '$(srcdir)/'`src/heap-checker.cc +libtcmalloc_la-linuxthreads.lo: src/base/linuxthreads.cc +@am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libtcmalloc_la_CXXFLAGS) $(CXXFLAGS) -MT libtcmalloc_la-linuxthreads.lo -MD -MP -MF "$(DEPDIR)/libtcmalloc_la-linuxthreads.Tpo" -c -o libtcmalloc_la-linuxthreads.lo `test -f 'src/base/linuxthreads.cc' || echo '$(srcdir)/'`src/base/linuxthreads.cc; \ +@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/libtcmalloc_la-linuxthreads.Tpo" "$(DEPDIR)/libtcmalloc_la-linuxthreads.Plo"; else rm -f "$(DEPDIR)/libtcmalloc_la-linuxthreads.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='src/base/linuxthreads.cc' object='libtcmalloc_la-linuxthreads.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libtcmalloc_la_CXXFLAGS) $(CXXFLAGS) -c -o libtcmalloc_la-linuxthreads.lo `test -f 'src/base/linuxthreads.cc' || echo '$(srcdir)/'`src/base/linuxthreads.cc + libtcmalloc_la-low_level_alloc.lo: src/base/low_level_alloc.cc @am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libtcmalloc_la_CXXFLAGS) $(CXXFLAGS) -MT libtcmalloc_la-low_level_alloc.lo -MD -MP -MF "$(DEPDIR)/libtcmalloc_la-low_level_alloc.Tpo" -c -o libtcmalloc_la-low_level_alloc.lo `test -f 'src/base/low_level_alloc.cc' || echo '$(srcdir)/'`src/base/low_level_alloc.cc; \ @am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/libtcmalloc_la-low_level_alloc.Tpo" "$(DEPDIR)/libtcmalloc_la-low_level_alloc.Plo"; else rm -f "$(DEPDIR)/libtcmalloc_la-low_level_alloc.Tpo"; exit 1; fi @@ -142,6 +142,21 @@ win32's malloc. http://gaiacrtn.free.fr/articles/win32perftools.html +OLD SYSTEM ISSUES +----------------- + +When compiling perftools on some old systems, like RedHat 8, you may +get an error like this: + ___tls_get_addr: symbol not found + +This means that you have a system where some parts are updated enough +to support Thread Local Storage, but others are not. The perftools +configure script can't always detect this kind of case, leading to +that error. To fix it, just comment out (or delete) the line + #define HAVE_TLS 1 +in your config.h file before building. + + 64-BIT ISSUES ------------- diff --git a/README.windows b/README.windows index dd6aed1..8c16e0b 100644 --- a/README.windows +++ b/README.windows @@ -6,6 +6,12 @@ You can load this solution file into either VC++ 7.1 (Visual Studio 2003) or VC++ 8.0 (Visual Studio 2005) -- in the latter case, it will automatically convert the files to the latest format for you. +There are a few issues compiling with VC++ 7.1, due to its lack of +support for __VA_ARGS__. If you need to compile perftools in that +environment, send mail to google-perftools@googlegroups.com; there may +be ways to work around this issue. The code compiles without problem +in VC++ 8.0 (Visual Studio 2005). + When you build the solution, it will create a number of unittests, which you can run by hand (or, more easily, under the Visual Studio debugger) to make sure everything is working properly on your system. @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.59 for google-perftools 0.98. +# Generated by GNU Autoconf 2.59 for google-perftools 0.99. # # Report bugs to <opensource@google.com>. # @@ -423,8 +423,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='google-perftools' PACKAGE_TARNAME='google-perftools' -PACKAGE_VERSION='0.98' -PACKAGE_STRING='google-perftools 0.98' +PACKAGE_VERSION='0.99' +PACKAGE_STRING='google-perftools 0.99' PACKAGE_BUGREPORT='opensource@google.com' ac_unique_file="README" @@ -954,7 +954,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures google-perftools 0.98 to adapt to many kinds of systems. +\`configure' configures google-perftools 0.99 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1021,7 +1021,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of google-perftools 0.98:";; + short | recursive ) echo "Configuration of google-perftools 0.99:";; esac cat <<\_ACEOF @@ -1162,7 +1162,7 @@ fi test -n "$ac_init_help" && exit 0 if $ac_init_version; then cat <<\_ACEOF -google-perftools configure 0.98 +google-perftools configure 0.99 generated by GNU Autoconf 2.59 Copyright (C) 2003 Free Software Foundation, Inc. @@ -1176,7 +1176,7 @@ cat >&5 <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by google-perftools $as_me 0.98, which was +It was created by google-perftools $as_me 0.99, which was generated by GNU Autoconf 2.59. Invocation command line was $ $0 $@ @@ -1904,7 +1904,7 @@ fi # Define the identity of the package. PACKAGE='google-perftools' - VERSION='0.98' + VERSION='0.99' cat >>confdefs.h <<_ACEOF @@ -24323,7 +24323,7 @@ _ASBOX } >&5 cat >&5 <<_CSEOF -This file was extended by google-perftools $as_me 0.98, which was +This file was extended by google-perftools $as_me 0.99, which was generated by GNU Autoconf 2.59. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -24386,7 +24386,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -google-perftools config.status 0.98 +google-perftools config.status 0.99 configured by $0, generated by GNU Autoconf 2.59, with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" diff --git a/configure.ac b/configure.ac index 72361d1..059450b 100644 --- a/configure.ac +++ b/configure.ac @@ -4,7 +4,7 @@ # make sure we're interpreted by some minimal autoconf AC_PREREQ(2.57) -AC_INIT(google-perftools, 0.98, opensource@google.com) +AC_INIT(google-perftools, 0.99, opensource@google.com) # The argument here is just something that should be in the current directory # (for sanity checking) AC_CONFIG_SRCDIR(README) diff --git a/doc/heapprofile.html b/doc/heapprofile.html index 6d544e0..8dac2eb 100644 --- a/doc/heapprofile.html +++ b/doc/heapprofile.html @@ -70,7 +70,9 @@ for a given run of an executable:</p> <code>HeapProfilerStart()</code> will take the profile-filename-prefix as an argument. You can then use <code>HeapProfilerDump()</code> or - <code>GetHeapProfile()</code> to examine the profile.</p> + <code>GetHeapProfile()</code> to examine the profile. + In case it's useful, IsHeapProfilerRunning() will tell you + whether you've already called HeapProfilerStart() or not.</p> </ol> @@ -80,7 +82,7 @@ and is thus not usable -- for setuid programs.</p> <H2>Modifying Runtime Behavior</H2> <p>You can more finely control the behavior of the heap profiler via -commandline flags.</p> +environment variables.</p> <table frame=box rules=sides cellpadding=5 width=100%> diff --git a/doc/tcmalloc.html b/doc/tcmalloc.html index fd2d3cd..eb5a27b 100644 --- a/doc/tcmalloc.html +++ b/doc/tcmalloc.html @@ -349,6 +349,90 @@ time is being burned spinning waiting for locks in the heavily multi-threaded case).</p> +<H2>Modifying Runtime Behavior</H2> + +<p>You can more finely control the behavior of the tcmalloc via +environment variables.</p> + +<table frame=box rules=sides cellpadding=5 width=100%> + +<tr valign=top> + <td><code>TCMALLOC_SAMPLE_PARAMETER</code></td> + <td>default: 262147</td> + <td> + Twice the approximate gap between sampling actions. That is, we + take one sample approximately once every + <code>tcmalloc_sample_parmeter/2</code> bytes of allocation. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_RELEASE_RATE</code></td> + <td>default: 1</td> + <td> + Rate at which we release unused memory to the system, via + <code>madvise(MADV_DONTNEED)</code>, on systems that support + it. Zero means we never release memory back to the system. + Increase this flag to return memory faster; decrease it + to return memory slower. Reasonable rates are in the + range [0,10]. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD</code></td> + <td>default: 1073741824</td> + <td> + Allocations larger than this value cause a stack trace to be + dumped to stderr. The threshold for dumping stack traces is + increased by a factor of 1.125 every time we print a message so + that the threshold automatically goes up by a factor of ~1000 + every 60 messages. This bounds the amount of extra logging + generated by this flag. Default value of this flag is very large + and therefore you should see no extra logging unless the flag is + overridden. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_DEVMEM_START</code></td> + <td>default: 0</td> + <td> + Physical memory starting location in MB for <code>/dev/mem</code> + allocation. Setting this to 0 disables <code>/dev/mem</code> + allocation. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_DEVMEM_LIMIT</code></td> + <td>default: 0</td> + <td> + Physical memory limit location in MB for <code>/dev/mem</code> + allocation. Setting this to 0 means no limit. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_DEVMEM_DEVICE</code></td> + <td>default: /dev/mem</td> + <td> + Device to use for allocating unmanaged memory. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_SKIP_MMAP</code></td> + <td>default: false</td> + <td> + If true, do not try to use <code>mmap</code> to obtain memory + from the kernel. + </td> +</tr> + +</table> + + <h2>Caveats</h2> <p>For some systems, TCMalloc may not work correctly on with diff --git a/packages/deb/changelog b/packages/deb/changelog index 11b0560..8828607 100644 --- a/packages/deb/changelog +++ b/packages/deb/changelog @@ -1,3 +1,9 @@ +google-perftools (0.99-1) unstable; urgency=low + + * New upstream release. + + -- Google Inc. <opensource@google.com> Thu, 18 Sep 2008 16:00:27 -0700 + google-perftools (0.98-1) unstable; urgency=low * New upstream release. diff --git a/src/addressmap-inl.h b/src/addressmap-inl.h index c6041b0..b122f17 100644 --- a/src/addressmap-inl.h +++ b/src/addressmap-inl.h @@ -169,7 +169,7 @@ class AddressMap { // The data for a cluster is represented as a dense array of // linked-lists, one list per contained block. static const int kClusterBits = 13; - static const int kClusterSize = 1 << (kBlockBits + kClusterBits); + static const Number kClusterSize = 1 << (kBlockBits + kClusterBits); static const int kClusterBlocks = 1 << kClusterBits; // We use a simple chaining hash-table to represent the clusters. diff --git a/src/base/basictypes.h b/src/base/basictypes.h index 97f96d6..eb7531d 100644 --- a/src/base/basictypes.h +++ b/src/base/basictypes.h @@ -177,9 +177,11 @@ struct CompileAssert { reinterpret_cast<char*>(16)) #ifdef HAVE___ATTRIBUTE__ -# define ATTRIBUTE_WEAK __attribute__((weak)) +# define ATTRIBUTE_WEAK __attribute__((weak)) +# define ATTRIBUTE_NOINLINE __attribute__((noinline)) #else # define ATTRIBUTE_WEAK +# define ATTRIBUTE_NOINLINE #endif // Section attributes are supported for both ELF and Mach-O, but in diff --git a/src/base/commandlineflags.h b/src/base/commandlineflags.h index a5fcb8a..741f0fe 100644 --- a/src/base/commandlineflags.h +++ b/src/base/commandlineflags.h @@ -125,4 +125,7 @@ #define EnvToInt64(envname, dflt) \ (!getenv(envname) ? (dflt) : strtoll(getenv(envname), NULL, 10)) +#define EnvToDouble(envname, dflt) \ + (!getenv(envname) ? (dflt) : strtod(getenv(envname), NULL)) + #endif // BASE_COMMANDLINEFLAGS_H_ diff --git a/src/base/cycleclock.h b/src/base/cycleclock.h index bf014c4..e188d43 100644 --- a/src/base/cycleclock.h +++ b/src/base/cycleclock.h @@ -32,6 +32,14 @@ // A CycleClock tells you the current time in Cycles. The "time" // is actually time since power-on. This is like time() but doesn't // involve a system call and is much more precise. +// +// NOTE: Not all cpu/platform/kernel combinations guarantee that this +// clock increments at a constant rate or is synchronized across all logical +// cpus in a system. +// +// Also, in some out of order CPU implementations, the CycleClock is not +// serializing. So if you're trying to count at cycles granularity, your +// data might be inaccurate due to out of order instruction execution. // ---------------------------------------------------------------------- #ifndef GOOGLE_BASE_CYCLECLOCK_H_ diff --git a/src/base/dynamic_annotations.cc b/src/base/dynamic_annotations.cc index 66b3c45..68e9ec3 100644 --- a/src/base/dynamic_annotations.cc +++ b/src/base/dynamic_annotations.cc @@ -51,6 +51,9 @@ extern "C" void AnnotateCondVarSignal(const char *file, int line, const volatile void *cv){} extern "C" void AnnotateCondVarSignalAll(const char *file, int line, const volatile void *cv){} +extern "C" void AnnotatePublishMemoryRange(const char *file, int line, + const volatile void *address, + long size){} extern "C" void AnnotatePCQCreate(const char *file, int line, const volatile void *pcq){} extern "C" void AnnotatePCQDestroy(const char *file, int line, @@ -74,5 +77,11 @@ extern "C" void AnnotateTraceMemory(const char *file, int line, const volatile void *arg){} extern "C" void AnnotateIgnoreReadsBegin(const char *file, int line){} extern "C" void AnnotateIgnoreReadsEnd(const char *file, int line){} +extern "C" void AnnotateIgnoreWritesBegin(const char *file, int line){} +extern "C" void AnnotateIgnoreWritesEnd(const char *file, int line){} extern "C" void AnnotateNoOp(const char *file, int line, const volatile void *arg){} + +// When running under valgrind, this function will be intercepted +// and a non-zero value will be returned. +extern "C" int RunningOnValgrind() { return 0; } diff --git a/src/base/dynamic_annotations.h b/src/base/dynamic_annotations.h index 1a055cc..405a6c8 100644 --- a/src/base/dynamic_annotations.h +++ b/src/base/dynamic_annotations.h @@ -39,8 +39,8 @@ // Each such annotation is attached to a particular // instruction and/or to a particular object (address) in the program. // -// The annotations that should be used by users are macros -// (e.g. ANNOTATE_NEW_MEMORY). +// The annotations that should be used by users are macros in all upper-case +// (e.g., ANNOTATE_NEW_MEMORY). // // Actual implementation of these macros may differ depending on the // dynamic analysis tool being used. @@ -59,82 +59,100 @@ // All the annotation macros are in effect only in debug mode. #ifndef NDEBUG - // Report that "lock" has been created. - #define ANNOTATE_RWLOCK_CREATE(lock) \ - AnnotateRWLockCreate(__FILE__, __LINE__, lock) - - // Report that "lock" is about to be destroyed. - #define ANNOTATE_RWLOCK_DESTROY(lock) \ - AnnotateRWLockDestroy(__FILE__, __LINE__, lock) - - // Report that "lock" has been acquired. - // is_w=1 for writer lock, is_w=0 for reader lock. - #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \ - AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w) - - // Report that "lock" is about to be relased. - #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \ - AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w) - - // Report that wait on 'cv' has succeeded and 'lock' is held. + // ------------------------------------------------------------- + // Annotations useful when implementing condition variables such as CondVar, + // using conditional critical sections (Await/LockWhen) and when constructing + // user-defined synchronization mechanisms. + // + // The annotations ANNOTATE_CONDVAR_SIGNAL() and ANNOTATE_CONDVAR_WAIT() can + // be used to define happens-before arcs in user-defined synchronization + // mechanisms: the race detector will infer an arc from the former to the + // latter when they share the same argument pointer. + + // Report that wait on the condition variable at address "cv" has succeeded + // and the lock at address "lock" is held. #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \ AnnotateCondVarWait(__FILE__, __LINE__, cv, lock) - // Report that wait on 'cv' has succeeded. Variant w/o lock. + // Report that wait on the condition variable at "cv" has succeeded. Variant + // w/o lock. + // When used with user-defined synchronization mechanism at address "cv", + // indicates that a ANNOTATE_CONDVAR_SIGNAL(cv) "happened-before" this event. #define ANNOTATE_CONDVAR_WAIT(cv) \ AnnotateCondVarWait(__FILE__, __LINE__, cv, NULL) - // Report that we are about to signal on 'cv'. + // Report that we are about to signal on the condition variable at address + // "cv". When used with user-defined synchronization mechanism at address + // "cv", indicates that a ANNOTATE_CONDVAR_WAIT(cv) "happened-after" this + // event. This call should be applied to the mutex in critical sections + // that make LockWhen() and Await() conditions true. #define ANNOTATE_CONDVAR_SIGNAL(cv) \ AnnotateCondVarSignal(__FILE__, __LINE__, cv) - // Report that we are about to signal_all on 'cv'. + // Report that we are about to signal_all on the condition variable at "cv". #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \ AnnotateCondVarSignalAll(__FILE__, __LINE__, cv) - // Report that "pcq" (ProducerConsumerQueue) has been created. - // The ANNOTATE_PCQ_* annotations should be used only for FIFO queues. - // For non-FIFO queues use ANNOTATE_CONDVAR_SIGNAL (for put) and - // ANNOTATE_CONDVAR_WAIT (for get). + // Report that the bytes in the range [pointer, pointer+size) are about + // to be published safely. The race checker will create a happens-before + // arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to + // subsequent accesses to this memory. + #define ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \ + AnnotatePublishMemoryRange(__FILE__, __LINE__, pointer, size) + + // Instruct the tool to create a happens-before arc between mu->Unlock() and + // mu->Lock(). This annotation may slow down the race detector; normally it + // is used only when it would be difficult to annotate each of the mutex's + // critical sections individually using the annotations above. + #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \ + AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) + + // ------------------------------------------------------------- + // Annotations useful when defining memory allocators, or when memory that + // was protected in one way starts to be protected in another. + + // Report that a new memory at "address" of size "size" has been allocated. + // This might be used when the memory has been retrieved from a free list and + // is about to be reused, or when a the locking discipline for a variable + // changes. + #define ANNOTATE_NEW_MEMORY(address, size) \ + AnnotateNewMemory(__FILE__, __LINE__, address, size) + + // ------------------------------------------------------------- + // Annotations useful when defining FIFO queues that transfer data between + // threads. + + // Report that the producer-consumer queue (such as ProducerConsumerQueue) at + // address "pcq" has been created. The ANNOTATE_PCQ_* annotations + // should be used only for FIFO queues. For non-FIFO queues use + // ANNOTATE_CONDVAR_SIGNAL (for put) and ANNOTATE_CONDVAR_WAIT (for get). #define ANNOTATE_PCQ_CREATE(pcq) \ AnnotatePCQCreate(__FILE__, __LINE__, pcq) - // Report that "pcq" is about to be destroyed. + // Report that the queue at address "pcq" is about to be destroyed. #define ANNOTATE_PCQ_DESTROY(pcq) \ AnnotatePCQDestroy(__FILE__, __LINE__, pcq) - // Report that we are about to put an element into a FIFO queue 'pcq'. + // Report that we are about to put an element into a FIFO queue at address + // "pcq". #define ANNOTATE_PCQ_PUT(pcq) \ AnnotatePCQPut(__FILE__, __LINE__, pcq) - // Report that we've just got an element from a FIFO queue 'pcq'. + // Report that we've just got an element from a FIFO queue at address "pcq". #define ANNOTATE_PCQ_GET(pcq) \ AnnotatePCQGet(__FILE__, __LINE__, pcq) - // Report that a new memory 'mem' of size 'size' has been allocated. - #define ANNOTATE_NEW_MEMORY(mem, size) \ - AnnotateNewMemory(__FILE__, __LINE__, mem, size) - - // Report that we expect a race on 'mem'. - // To use only in unit tests for a race detector. - #define ANNOTATE_EXPECT_RACE(mem, description) \ - AnnotateExpectRace(__FILE__, __LINE__, mem, description) + // ------------------------------------------------------------- + // Annotations that suppress errors. It is usually better to express the + // program's synchronization using the other annotations, but these can + // be used when all else fails. - // Report that we may have a benign race on 'mem'. - // Insert at the point where 'mem' exists, preferably close to the point - // where the race happens. + // Report that we may have a benign race on at "address". + // Insert at the point where "address" has been allocated, preferably close + // to the point where the race happens. // See also ANNOTATE_BENIGN_RACE_STATIC. - #define ANNOTATE_BENIGN_RACE(mem, description) \ - AnnotateBenignRace(__FILE__, __LINE__, mem, description) - - // Instruct the tool to create a happens-before arc - // between mu->Unlock() and mu->Lock(). - #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \ - AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) - - // Request to trace every access to 'arg'. - #define ANNOTATE_TRACE_MEMORY(arg) \ - AnnotateTraceMemory(__FILE__, __LINE__, arg) + #define ANNOTATE_BENIGN_RACE(address, description) \ + AnnotateBenignRace(__FILE__, __LINE__, address, description) // Request the analysis tool to ignore all reads in the current thread // until ANNOTATE_IGNORE_READS_END is called. @@ -148,6 +166,65 @@ #define ANNOTATE_IGNORE_READS_END() \ AnnotateIgnoreReadsEnd(__FILE__, __LINE__) + // Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. + #define ANNOTATE_IGNORE_WRITES_BEGIN() \ + AnnotateIgnoreWritesBegin(__FILE__, __LINE__) + + // Stop ignoring writes. + #define ANNOTATE_IGNORE_WRITES_END() \ + AnnotateIgnoreWritesEnd(__FILE__, __LINE__) + + // Start ignoring all memory accesses (reads and writes). + #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ + do {\ + ANNOTATE_IGNORE_READS_BEGIN();\ + ANNOTATE_IGNORE_WRITES_BEGIN();\ + }while(0)\ + + // Stop ignoring all memory accesses. + #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ + do {\ + ANNOTATE_IGNORE_WRITES_END();\ + ANNOTATE_IGNORE_READS_END();\ + }while(0)\ + + // ------------------------------------------------------------- + // Annotations useful for debugging. + + // Request to trace every access to "address". + #define ANNOTATE_TRACE_MEMORY(address) \ + AnnotateTraceMemory(__FILE__, __LINE__, address) + + // ------------------------------------------------------------- + // Annotations useful when implementing locks. They are not + // normally needed by modules that merely use locks. + // The "lock" argument is a pointer to the lock object. + + // Report that a lock has been created at address "lock". + #define ANNOTATE_RWLOCK_CREATE(lock) \ + AnnotateRWLockCreate(__FILE__, __LINE__, lock) + + // Report that the lock at address "lock" is about to be destroyed. + #define ANNOTATE_RWLOCK_DESTROY(lock) \ + AnnotateRWLockDestroy(__FILE__, __LINE__, lock) + + // Report that the lock at address "lock" has been acquired. + // is_w=1 for writer lock, is_w=0 for reader lock. + #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \ + AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w) + + // Report that the lock at address "lock" is about to be released. + #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \ + AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w) + + // ------------------------------------------------------------- + // Annotations useful for testing race detectors. + + // Report that we expect a race on the variable at "address". + // Use only in unit tests for a race detector. + #define ANNOTATE_EXPECT_RACE(address, description) \ + AnnotateExpectRace(__FILE__, __LINE__, address, description) + // A no-op. Insert where you like to test the interceptors. #define ANNOTATE_NO_OP(arg) \ AnnotateNoOp(__FILE__, __LINE__, arg) @@ -162,17 +239,23 @@ #define ANNOTATE_CONDVAR_WAIT(cv) // empty #define ANNOTATE_CONDVAR_SIGNAL(cv) // empty #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) // empty + #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) // empty + #define ANNOTATE_PUBLISH_OBJECT(address) // empty #define ANNOTATE_PCQ_CREATE(pcq) // empty #define ANNOTATE_PCQ_DESTROY(pcq) // empty #define ANNOTATE_PCQ_PUT(pcq) // empty #define ANNOTATE_PCQ_GET(pcq) // empty - #define ANNOTATE_NEW_MEMORY(mem, size) // empty - #define ANNOTATE_EXPECT_RACE(mem, description) // empty - #define ANNOTATE_BENIGN_RACE(mem, description) // empty + #define ANNOTATE_NEW_MEMORY(address, size) // empty + #define ANNOTATE_EXPECT_RACE(address, description) // empty + #define ANNOTATE_BENIGN_RACE(address, description) // empty #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) // empty #define ANNOTATE_TRACE_MEMORY(arg) // empty #define ANNOTATE_IGNORE_READS_BEGIN() // empty #define ANNOTATE_IGNORE_READS_END() // empty + #define ANNOTATE_IGNORE_WRITES_BEGIN() // empty + #define ANNOTATE_IGNORE_WRITES_END() // empty + #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() // empty + #define ANNOTATE_IGNORE_READS_AND_WRITES_END() // empty #define ANNOTATE_NO_OP(arg) // empty #endif // NDEBUG @@ -193,6 +276,9 @@ extern "C" void AnnotateCondVarSignal(const char *file, int line, const volatile void *cv); extern "C" void AnnotateCondVarSignalAll(const char *file, int line, const volatile void *cv); +extern "C" void AnnotatePublishMemoryRange(const char *file, int line, + const volatile void *address, + long size); extern "C" void AnnotatePCQCreate(const char *file, int line, const volatile void *pcq); extern "C" void AnnotatePCQDestroy(const char *file, int line, @@ -202,13 +288,13 @@ extern "C" void AnnotatePCQPut(const char *file, int line, extern "C" void AnnotatePCQGet(const char *file, int line, const volatile void *pcq); extern "C" void AnnotateNewMemory(const char *file, int line, - const volatile void *mem, + const volatile void *address, long size); extern "C" void AnnotateExpectRace(const char *file, int line, - const volatile void *mem, + const volatile void *address, const char *description); extern "C" void AnnotateBenignRace(const char *file, int line, - const volatile void *mem, + const volatile void *address, const char *description); extern "C" void AnnotateMutexIsUsedAsCondVar(const char *file, int line, const volatile void *mu); @@ -216,38 +302,50 @@ extern "C" void AnnotateTraceMemory(const char *file, int line, const volatile void *arg); extern "C" void AnnotateIgnoreReadsBegin(const char *file, int line); extern "C" void AnnotateIgnoreReadsEnd(const char *file, int line); +extern "C" void AnnotateIgnoreWritesBegin(const char *file, int line); +extern "C" void AnnotateIgnoreWritesEnd(const char *file, int line); extern "C" void AnnotateNoOp(const char *file, int line, const volatile void *arg); +#ifndef NDEBUG -// ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads. -// -// Instead of doing -// ANNOTATE_IGNORE_READS_BEGIN(); -// ... = x; -// ANNOTATE_IGNORE_READS_END(); -// one can use -// ... = ANNOTATE_UNPROTECTED_READ(x); -template <class T> -inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) { - ANNOTATE_IGNORE_READS_BEGIN(); - T res = x; - ANNOTATE_IGNORE_READS_END(); - return res; -} - -// Apply ANNOTATE_BENIGN_RACE to a static variable. -#define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) \ - namespace { \ - class static_var ## _annotator { \ - public: \ - static_var ## _annotator() { \ - ANNOTATE_BENIGN_RACE(&static_var, \ - # static_var ": " description); \ - } \ - }; \ - static static_var ## _annotator the ## static_var ## _annotator;\ + // ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads. + // + // Instead of doing + // ANNOTATE_IGNORE_READS_BEGIN(); + // ... = x; + // ANNOTATE_IGNORE_READS_END(); + // one can use + // ... = ANNOTATE_UNPROTECTED_READ(x); + template <class T> + inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) { + ANNOTATE_IGNORE_READS_BEGIN(); + T res = x; + ANNOTATE_IGNORE_READS_END(); + return res; } + // Apply ANNOTATE_BENIGN_RACE to a static variable. + #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) \ + namespace { \ + class static_var ## _annotator { \ + public: \ + static_var ## _annotator() { \ + ANNOTATE_BENIGN_RACE(&static_var, \ + # static_var ": " description); \ + } \ + }; \ + static static_var ## _annotator the ## static_var ## _annotator;\ + } +#else // !NDEBUG + + #define ANNOTATE_UNPROTECTED_READ(x) (x) + #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) // empty + +#endif // !NDEBUG + +// Return non-zero value if running under valgrind. +extern "C" int RunningOnValgrind(); + #endif // BASE_DYNAMIC_ANNOTATIONS_H_ diff --git a/src/base/linuxthreads.c b/src/base/linuxthreads.cc index e67ac26..19da400 100644 --- a/src/base/linuxthreads.c +++ b/src/base/linuxthreads.cc @@ -38,19 +38,14 @@ extern "C" { #endif -#include <asm/stat.h> #include <sched.h> #include <signal.h> #include <stdlib.h> #include <string.h> +#include <fcntl.h> #include <sys/socket.h> #include <sys/wait.h> -#include <asm/fcntl.h> -#include <asm/posix_types.h> -#include <asm/types.h> -#include <linux/dirent.h> - #include "base/linux_syscall_support.h" #include "base/thread_lister.h" diff --git a/src/base/spinlock.h b/src/base/spinlock.h index 8c54680..3ffeb4d 100644 --- a/src/base/spinlock.h +++ b/src/base/spinlock.h @@ -37,7 +37,7 @@ // of a compare-and-swap which is expensive). // SpinLock is async signal safe. -// If used within a signal handler, all lock holders +// If used within a signal handler, all lock holders // should block the signal even outside the signal handler. #ifndef BASE_SPINLOCK_H_ @@ -70,7 +70,7 @@ class LOCKABLE SpinLock { // A SpinLock constructed like this can be freely used from global // initializers without worrying about the order in which global // initializers run. - explicit SpinLock(base::LinkerInitialized x) { + explicit SpinLock(base::LinkerInitialized /*x*/) { // Does nothing; lockword_ is already initialized } diff --git a/src/base/stl_allocator.h b/src/base/stl_allocator.h index 5aae092..6f5eaba 100644 --- a/src/base/stl_allocator.h +++ b/src/base/stl_allocator.h @@ -82,7 +82,7 @@ class STL_Allocator { RAW_DCHECK((n * sizeof(T)) / sizeof(T) == n, "n is too big to allocate"); return static_cast<T*>(Alloc::Allocate(n * sizeof(T))); } - void deallocate(pointer p, size_type n) { Alloc::Free(p); } + void deallocate(pointer p, size_type /*n*/) { Alloc::Free(p); } size_type max_size() const { return size_t(-1) / sizeof(T); } diff --git a/src/base/sysinfo.cc b/src/base/sysinfo.cc index 823292e..679b4d1 100644 --- a/src/base/sysinfo.cc +++ b/src/base/sysinfo.cc @@ -478,7 +478,6 @@ void ProcMapsIterator::Init(pid_t pid, Buffer *buffer, } ProcMapsIterator::~ProcMapsIterator() { - delete dynamic_buffer_; #if defined(WIN32) if (snapshot_ != INVALID_HANDLE_VALUE) CloseHandle(snapshot_); #elif defined(__MACH__) diff --git a/src/google/heap-checker.h b/src/google/heap-checker.h index a043ceb..7390691 100644 --- a/src/google/heap-checker.h +++ b/src/google/heap-checker.h @@ -290,6 +290,10 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker { // Type for DumpProfileLocked enum ProfileType { START_PROFILE, END_PROFILE }; + // Create the name of the heap profile file of profile_type; + // to be deleted via Allocator::Free(). + char* MakeProfileNameLocked(ProfileType profile_type); + // Helper for dumping start/end heap leak checking profiles // and getting the byte/object counts. void DumpProfileLocked(ProfileType profile_type, const void* self_stack_top, @@ -312,6 +316,10 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker { CheckFullness fullness, ReportMode report_mode); + // Handle a leak profile file: delete it if !keep_profiles_. + void HandleProfile(ProfileType profile_type); + void HandleProfileLocked(ProfileType profile_type); + // Helper for DisableChecksAt static void DisableChecksAtLocked(const void* address); @@ -433,6 +441,7 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker { ssize_t inuse_bytes_increase_; // bytes-in-use increase for this checker ssize_t inuse_allocs_increase_; // allocations-in-use increase // for this checker + bool keep_profiles_; // iff we should keep the heap profiles we've made // ----------------------------------------------------------------------- // diff --git a/src/google/heap-profiler.h b/src/google/heap-profiler.h index 3c3f2b4..33d7fae 100644 --- a/src/google/heap-profiler.h +++ b/src/google/heap-profiler.h @@ -71,6 +71,13 @@ extern "C" { */ PERFTOOLS_DLL_DECL void HeapProfilerStart(const char* prefix); +/* Returns true if we are currently profiling the heap. This is true + * between calls to HeapProfilerStart() and HeapProfilerStop(), and + * also if the program has been run with HEAPPROFILER, or some other + * way to turn on whole-program profiling. + */ +bool IsHeapProfilerRunning(); + /* Stop heap profiling. Can be restarted again with HeapProfilerStart(), * but the currently accumulated profiling information will be cleared. */ diff --git a/src/google/malloc_extension.h b/src/google/malloc_extension.h index d41f1ef..6be5d87 100644 --- a/src/google/malloc_extension.h +++ b/src/google/malloc_extension.h @@ -34,6 +34,9 @@ // extensions are accessed through a virtual base class so an // application can link against a malloc that does not implement these // extensions, and it will get default versions that do nothing. +// +// NOTE FOR C USERS: If you wish to use this functionality from within +// a C program, see malloc_extension_c.h. #ifndef BASE_MALLOC_EXTENSION_H_ #define BASE_MALLOC_EXTENSION_H_ diff --git a/src/google/malloc_extension_c.h b/src/google/malloc_extension_c.h new file mode 100644 index 0000000..dd6fdec --- /dev/null +++ b/src/google/malloc_extension_c.h @@ -0,0 +1,75 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * -- + * Author: Craig Silverstein + * + * C shims for the C++ malloc_extension.h. See malloc_extension.h for + * details. Note these C shims always work on + * MallocExtension::instance(); it is not possible to have more than + * one MallocExtension object in C applications. + */ + +#ifndef _MALLOC_EXTENSION_C_H_ +#define _MALLOC_EXTENSION_C_H_ + +#include <stddef.h> +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +bool MallocExtension_VerifyAllMemory(); +bool MallocExtension_VerifyAllMemory(); +bool MallocExtension_VerifyNewMemory(void* p); +bool MallocExtension_VerifyArrayNewMemory(void* p); +bool MallocExtension_VerifyMallocMemory(void* p); +bool MallocExtension_MallocMemoryStats(int* blocks, size_t* total, + int histogram[kMallocHistogramSize]); + +void MallocExtension_GetStats(char* buffer, int buffer_length); + +/* NOTE: commented out because they require a c++ string. + * TODO(csilvers): write a C version of these routines, that perhaps + * take a fixed-size buffer. + */ +/* void MallocExtension_GetHeapSample(string* result); */ +/* void MallocExtension_GetHeapGrowthStacks(string* result); */ + +bool MallocExtension_GetNumericProperty(const char* property, size_t* value); +bool MallocExtension_SetNumericProperty(const char* property, size_t value); +void MallocExtension_MarkThreadIdle(); +void MallocExtension_ReleaseFreeMemory(); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* _MALLOC_EXTENSION_C_H_ */ diff --git a/src/google/malloc_hook.h b/src/google/malloc_hook.h index 8383c2e..88d3e77 100644 --- a/src/google/malloc_hook.h +++ b/src/google/malloc_hook.h @@ -56,6 +56,9 @@ // shouldn't unless you're part of tcmalloc), be sure to #include // malloc_hook-inl.h in addition to malloc_hook.h. // +// NOTE FOR C USERS: If you want to use malloc_hook functionality from +// a C program, #include malloc_hook_c.h instead of this file. +// // TODO(csilvers): support a non-inlined function called // Assert*HookIs()? This is the context in which I normally see // Get*Hook() called in non-tcmalloc code. @@ -65,6 +68,9 @@ #include <stddef.h> #include <sys/types.h> +extern "C" { +#include <google/malloc_hook_c.h> // a C version of the malloc_hook interface +} // Annoying stuff for windows -- makes sure clients can import these functions #ifndef PERFTOOLS_DLL_DECL @@ -75,33 +81,38 @@ # endif #endif +// Note: malloc_hook_c.h defines MallocHook_*Hook and +// MallocHook_Set*Hook. The version of these inside the MallocHook +// class are defined in terms of the malloc_hook_c version. See +// malloc_hook_c.h for details of these types/functions. + class PERFTOOLS_DLL_DECL MallocHook { public: // The NewHook is invoked whenever an object is allocated. // It may be passed NULL if the allocator returned NULL. - typedef void (*NewHook)(const void* ptr, size_t size); + typedef MallocHook_NewHook NewHook; inline static NewHook GetNewHook(); - static NewHook SetNewHook(NewHook hook); + inline static NewHook SetNewHook(NewHook hook) { + return MallocHook_SetNewHook(hook); + } inline static void InvokeNewHook(const void* p, size_t s); // The DeleteHook is invoked whenever an object is deallocated. // It may be passed NULL if the caller is trying to delete NULL. - typedef void (*DeleteHook)(const void* ptr); + typedef MallocHook_DeleteHook DeleteHook; inline static DeleteHook GetDeleteHook(); - static DeleteHook SetDeleteHook(DeleteHook hook); + inline static DeleteHook SetDeleteHook(DeleteHook hook) { + return MallocHook_SetDeleteHook(hook); + } inline static void InvokeDeleteHook(const void* p); // The MmapHook is invoked whenever a region of memory is mapped. // It may be passed MAP_FAILED if the mmap failed. - typedef void (*MmapHook)(const void* result, - const void* start, - size_t size, - int protection, - int flags, - int fd, - off_t offset); + typedef MallocHook_MmapHook MmapHook; inline static MmapHook GetMmapHook(); - static MmapHook SetMmapHook(MmapHook hook); + inline static MmapHook SetMmapHook(MmapHook hook) { + return MallocHook_SetMmapHook(hook); + } inline static void InvokeMmapHook(const void* result, const void* start, size_t size, @@ -111,20 +122,19 @@ class PERFTOOLS_DLL_DECL MallocHook { off_t offset); // The MunmapHook is invoked whenever a region of memory is unmapped. - typedef void (*MunmapHook)(const void* ptr, size_t size); + typedef MallocHook_MunmapHook MunmapHook; inline static MunmapHook GetMunmapHook(); - static MunmapHook SetMunmapHook(MunmapHook hook); + inline static MunmapHook SetMunmapHook(MunmapHook hook) { + return MallocHook_SetMunmapHook(hook); + } inline static void InvokeMunmapHook(const void* p, size_t size); // The MremapHook is invoked whenever a region of memory is remapped. - typedef void (*MremapHook)(const void* result, - const void* old_addr, - size_t old_size, - size_t new_size, - int flags, - const void* new_addr); + typedef MallocHook_MremapHook MremapHook; inline static MremapHook GetMremapHook(); - static MremapHook SetMremapHook(MremapHook hook); + inline static MremapHook SetMremapHook(MremapHook hook) { + return MallocHook_SetMremapHook(hook); + } inline static void InvokeMremapHook(const void* result, const void* old_addr, size_t old_size, @@ -136,9 +146,11 @@ class PERFTOOLS_DLL_DECL MallocHook { // the increment is 0. This is because sbrk(0) is often called // to get the top of the memory stack, and is not actually a // memory-allocation call. - typedef void (*SbrkHook)(const void* result, ptrdiff_t increment); + typedef MallocHook_SbrkHook SbrkHook; inline static SbrkHook GetSbrkHook(); - static SbrkHook SetSbrkHook(SbrkHook hook); + inline static SbrkHook SetSbrkHook(SbrkHook hook) { + return MallocHook_SetSbrkHook(hook); + } inline static void InvokeSbrkHook(const void* result, ptrdiff_t increment); // Get the current stack trace. Try to skip all routines up to and @@ -146,7 +158,10 @@ class PERFTOOLS_DLL_DECL MallocHook { // Use "skip_count" (similarly to GetStackTrace from stacktrace.h) // as a hint about how many routines to skip if better information // is not available. - static int GetCallerStackTrace(void** result, int max_depth, int skip_count); + inline static int GetCallerStackTrace(void** result, int max_depth, + int skip_count) { + return MallocHook_GetCallerStackTrace(result, max_depth, skip_count); + } }; #endif /* _MALLOC_HOOK_H_ */ diff --git a/src/google/malloc_hook_c.h b/src/google/malloc_hook_c.h new file mode 100644 index 0000000..2100f28 --- /dev/null +++ b/src/google/malloc_hook_c.h @@ -0,0 +1,82 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * -- + * Author: Craig Silverstein + * + * C shims for the C++ malloc_hook.h. See malloc_hook.h for details + * on how to use these. + */ + +#ifndef _MALLOC_HOOK_C_H_ +#define _MALLOC_HOOK_C_H_ + +#include <stddef.h> +#include <sys/types.h> + +/* Get the current stack trace. Try to skip all routines up to and + * and including the caller of MallocHook::Invoke*. + * Use "skip_count" (similarly to GetStackTrace from stacktrace.h) + * as a hint about how many routines to skip if better information + * is not available. + */ +int MallocHook_GetCallerStackTrace(void** result, int max_depth, + int skip_count); + + +typedef void (*MallocHook_NewHook)(const void* ptr, size_t size); +MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook); + +typedef void (*MallocHook_DeleteHook)(const void* ptr); +MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook); + +typedef void (*MallocHook_MmapHook)(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); +MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook); + +typedef void (*MallocHook_MunmapHook)(const void* ptr, size_t size); +MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook); + +typedef void (*MallocHook_MremapHook)(const void* result, + const void* old_addr, + size_t old_size, + size_t new_size, + int flags, + const void* new_addr); +MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook); + +typedef void (*MallocHook_SbrkHook)(const void* result, ptrdiff_t increment); +MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook); + +#endif /* _MALLOC_HOOK_C_H_ */ diff --git a/src/google/profiler.h b/src/google/profiler.h index 46bd844..0a185d2 100644 --- a/src/google/profiler.h +++ b/src/google/profiler.h @@ -159,7 +159,7 @@ struct ProfilerState { char profile_name[1024]; /* Name of profile file being written, or '\0' */ int samples_gathered; /* Number of samples gatheered to far (or 0) */ }; -PERFTOOLS_DLL_DECL void ProfilerGetCurrentState(ProfilerState* state); +PERFTOOLS_DLL_DECL void ProfilerGetCurrentState(struct ProfilerState* state); #ifdef __cplusplus } // extern "C" diff --git a/src/google/stacktrace.h b/src/google/stacktrace.h index a59286c..1062ed6 100644 --- a/src/google/stacktrace.h +++ b/src/google/stacktrace.h @@ -87,7 +87,7 @@ extern PERFTOOLS_DLL_DECL int GetStackFrames(void** pcs, int* sizes, int max_dep // foo() { bar(); } // bar() { // void* result[10]; -// int depth = GetStackFrames(result, 10, 1); +// int depth = GetStackTrace(result, 10, 1); // } // // This produces: diff --git a/src/heap-checker.cc b/src/heap-checker.cc index 84ab03b..0d18c8d 100644 --- a/src/heap-checker.cc +++ b/src/heap-checker.cc @@ -1233,6 +1233,8 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); // Order tests by the likelyhood of the test failing in 64/32 bit modes. // Yes, this matters: we either lose 5..6% speed in 32 bit mode // (which is already slower) or by a factor of 1.5..1.91 in 64 bit mode. + // After the alignment test got dropped the above performance figures + // must have changed; might need to revisit this. #if defined(__x86_64__) addr < max_heap_address && min_heap_address <= addr; @@ -1412,6 +1414,19 @@ void HeapLeakChecker::UnIgnoreObject(const void* ptr) { // HeapLeakChecker non-static functions //---------------------------------------------------------------------- +char* HeapLeakChecker::MakeProfileNameLocked(ProfileType profile_type) { + RAW_DCHECK(lock_.IsHeld(), ""); + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + const int len = profile_name_prefix->size() + strlen(name_) + 5 + + strlen(HeapProfileTable::kFileExt) + 1; + char* file_name = reinterpret_cast<char*>(Allocator::Allocate(len)); + snprintf(file_name, len, "%s.%s%s%s", + profile_name_prefix->c_str(), name_, + profile_type == START_PROFILE ? "-beg" : "-end", + HeapProfileTable::kFileExt); + return file_name; +} + void HeapLeakChecker::DumpProfileLocked(ProfileType profile_type, const void* self_stack_top, size_t* alloc_bytes, @@ -1437,12 +1452,7 @@ void HeapLeakChecker::DumpProfileLocked(ProfileType profile_type, // Make the heap profile, other threads are locked out. const int alloc_count = Allocator::alloc_count(); IgnoreAllLiveObjectsLocked(self_stack_top); - const int len = profile_name_prefix->size() + strlen(name_) + 10 + 2; - char* file_name = reinterpret_cast<char*>(Allocator::Allocate(len)); - snprintf(file_name, len, "%s.%s%s%s", - profile_name_prefix->c_str(), name_, - profile_type == START_PROFILE ? "-beg" : "-end", - HeapProfileTable::kFileExt); + char* file_name = MakeProfileNameLocked(profile_type); HeapProfileTable::Stats stats; bool ok = heap_profile->DumpNonLiveProfile( file_name, FLAGS_heap_check_identify_leaks, &stats); @@ -1461,6 +1471,7 @@ void HeapLeakChecker::Create(const char *name) { SpinLockHolder l(lock_); name_ = NULL; // checker is inactive has_checked_ = false; + keep_profiles_ = false; char* n = new char[strlen(name) + 1]; // do this before we lock IgnoreObject(n); // otherwise it might be treated as live due to our stack { // Heap activity in other threads is paused for this whole scope. @@ -1515,6 +1526,12 @@ ssize_t HeapLeakChecker::ObjectsLeaked() const { return inuse_allocs_increase_; } +// pprof command execution mode: +enum RunMode { + TO_RUN_NOW, // to be run from within the test itself + TO_RUN_BY_USER // to be run manually by a human via cut and paste +}; + // Save pid of main thread for using in naming dump files static int32 main_thread_pid = getpid(); #ifdef HAVE_PROGRAM_INVOCATION_NAME @@ -1527,7 +1544,8 @@ static const char* invocation_name() { return "<your binary>"; } static const char* invocation_path() { return "<your binary>"; } #endif -static void MakeCommand(const char* basename, +static void MakeCommand(RunMode run_mode, + const char* basename, bool check_type_is_no_leaks, bool use_initial_profile, const string& prefix, @@ -1560,6 +1578,7 @@ static void MakeCommand(const char* basename, *command += " --addresses"; // stronger than --lines and prints // unresolvable object addresses } + *command += " --heapcheck"; } static int GetStatusOutput(const char* command, string* output) { @@ -1757,25 +1776,39 @@ bool HeapLeakChecker::DoNoLeaksOnceLocked(CheckType check_type, const bool pprof_can_ignore = disabled_regexp != NULL; string beg_profile; string end_profile; - string base_command; - MakeCommand(name_, check_type == NO_LEAKS, + string command; + MakeCommand(TO_RUN_NOW, name_, check_type == NO_LEAKS, result.use_initial_profile, *result.profile_prefix, *result.pprof_path, - &beg_profile, &end_profile, &base_command); - // Make the two command lines out of the base command, with - // appropriate mode options - string command = base_command + " --text"; + &beg_profile, &end_profile, &command); + string dot_file = end_profile; + dot_file.resize(dot_file.size() - 4 - strlen(HeapProfileTable::kFileExt)); + dot_file += HeapProfileTable::kFileExt; + dot_file += ".dot"; + string const dot_command = + command + " --edgefraction=1e-10 --nodefraction=1e-10 --dot" + " > " + dot_file; + command += " --text"; + // Make the user-executed command-line: + string local_beg_profile; + string local_end_profile; + string local_command; + string const profile_name = *result.profile_prefix; + string const local_pprof_path = *result.pprof_path; + MakeCommand(TO_RUN_BY_USER, name_, check_type == NO_LEAKS, + result.use_initial_profile, profile_name, local_pprof_path, + &local_beg_profile, &local_end_profile, &local_command); string gv_command; - gv_command = base_command; - gv_command += - " --edgefraction=1e-10 --nodefraction=1e-10 --heapcheck --gv"; + string gv_command_note; + gv_command = local_command; + gv_command += " --edgefraction=1e-10 --nodefraction=1e-10 --gv"; if (see_leaks) { - RAW_LOG(ERROR, "Heap memory leaks of %"PRIdS" bytes and/or " - "%"PRIdS" allocations detected by check \"%s\".", - inuse_bytes_increase_, inuse_allocs_increase_, name_); - RAW_LOG(ERROR, "TO INVESTIGATE leaks RUN e.g. THIS shell command:\n" - "\n%s\n", gv_command.c_str()); + RAW_LOG(WARNING, "Heap memory leaks of %"PRIdS" bytes and/or " + "%"PRIdS" allocations detected by check \"%s\".", + inuse_bytes_increase_, inuse_allocs_increase_, name_); + RAW_LOG(WARNING, "TO INVESTIGATE leaks RUN e.g. THIS shell command:\n" + "\n%s\n", gv_command.c_str()); } string output; bool checked_leaks = true; @@ -1786,10 +1819,11 @@ bool HeapLeakChecker::DoNoLeaksOnceLocked(CheckType check_type, result.pprof_path->c_str()); checked_leaks = false; } else { - // We don't care about pprof's stderr as long as it - // succeeds with empty report: - checked_leaks = GetStatusOutput((command + " 2>/dev/null").c_str(), - &output) == 0; + RAW_VLOG(2, "Running pprof command: \"%s\"", command.c_str()); + // We expect 0 exit code and an empty stdout and stderr + // from pprof that saw empty profile (difference): + checked_leaks = GetStatusOutput(command.c_str(), &output) == 0; + RAW_VLOG(2, "pprof produced \"%s\"", output.c_str()); } if (see_leaks && pprof_can_ignore && output.empty() && checked_leaks) { RAW_LOG(WARNING, "These must be leaks that we disabled" @@ -1798,13 +1832,15 @@ bool HeapLeakChecker::DoNoLeaksOnceLocked(CheckType check_type, see_leaks = false; reported_no_leaks = true; } - // do not fail the check just due to us being a stripped binary + // Do not fail the check just due to us being a stripped binary: if (!see_leaks && strstr(output.c_str(), "nm: ") != NULL && - strstr(output.c_str(), ": no symbols") != NULL) output.clear(); + strstr(output.c_str(), ": no symbols") != NULL) { + RAW_LOG(WARNING, "Dropping irrelevant pprof output \"%s\"", + output.c_str()); + output.clear(); + } } // Make sure the profiles we created are still there. - // They can get deleted e.g. if the program forks/executes itself - // and FLAGS_cleanup_old_heap_profiles was kept as true. if (access(end_profile.c_str(), R_OK) != 0 || (!beg_profile.empty() && access(beg_profile.c_str(), R_OK) != 0)) { RAW_LOG(FATAL, "One of the heap profiles is gone: %s %s", @@ -1813,7 +1849,9 @@ bool HeapLeakChecker::DoNoLeaksOnceLocked(CheckType check_type, if (!(see_leaks || checked_leaks)) { // Crash if something went wrong with executing pprof // and we rely on pprof to do its work: - RAW_LOG(FATAL, "The pprof command failed: %s", command.c_str()); + RAW_LOG(FATAL, "The following pprof command failed\n%s\n" + "with this output:\n%s", + command.c_str(), output.c_str()); } if (see_leaks && result.use_initial_profile) { RAW_LOG(WARNING, "CAVEAT: Some of the reported leaks might have " @@ -1831,13 +1869,27 @@ bool HeapLeakChecker::DoNoLeaksOnceLocked(CheckType check_type, } see_leaks = true; } + if (see_leaks && !gv_command_note.empty()) { + RAW_LOG(WARNING, "NOTE:\n\n%s", gv_command_note.c_str()); + } if (see_leaks && report_mode == PPROF_REPORT) { if (checked_leaks) { RAW_LOG(ERROR, "Below is (less informative) textual version " "of this pprof command's output:"); RawLogLines(ERROR, output); + string dot_output; + RAW_VLOG(2, "Running pprof dot command: \"%s\"", dot_command.c_str()); + int code = GetStatusOutput(dot_command.c_str(), &dot_output); + if (code != 0 || !dot_output.empty()) { + RAW_LOG(ERROR, + "This dot pprof command failed: \"%s\"\n" + "with exit code %d, output: \"%s\"", + dot_command.c_str(), code, dot_output.c_str()); + } } else { - RAW_LOG(ERROR, "The pprof command has failed"); + RAW_LOG(ERROR, "The following pprof command failed\n%s\n" + "with this output:\n%s", + command.c_str(), output.c_str()); } } } @@ -1848,15 +1900,38 @@ bool HeapLeakChecker::DoNoLeaksOnceLocked(CheckType check_type, "found %"PRId64" reachable heap objects of %"PRId64" bytes", name_, result.live_objects, result.live_bytes); } + // Keep when had leaks or needed to look into profiles to convince ourselves + // that there were no leaks: + keep_profiles_ = see_leaks || reported_no_leaks; + HandleProfileLocked(END_PROFILE); return !see_leaks; } +void HeapLeakChecker::HandleProfile(ProfileType profile_type) { + SpinLockHolder l(lock_); + HandleProfileLocked(profile_type); +} + +void HeapLeakChecker::HandleProfileLocked(ProfileType profile_type) { + const char* type = profile_type == START_PROFILE ? "beg" : "end"; + if (keep_profiles_) { + RAW_VLOG(2, "Keeping %s profile for %s", type, name_); + } else { + SpinLockHolder l(&heap_checker_lock); + char* beg_profile = MakeProfileNameLocked(profile_type); + RAW_VLOG(2, "Removing %s profile %s", type, beg_profile); + unlink(beg_profile); + Allocator::Free(beg_profile); + } +} + HeapLeakChecker::~HeapLeakChecker() { if (name_ != NULL) { // had leak checking enabled when created the checker if (!has_checked_) { RAW_LOG(FATAL, "Some *NoLeaks|SameHeap method" " must be called on any created HeapLeakChecker"); } + HandleProfile(START_PROFILE); UnIgnoreObject(name_); delete[] name_; name_ = NULL; @@ -2030,7 +2105,6 @@ static bool internal_init_start_has_run = false; // make a good place and name for heap profile leak dumps string* profile_prefix = new string(FLAGS_heap_check_dump_directory + "/" + invocation_name()); - HeapProfileTable::CleanupOldProfiles(profile_prefix->c_str()); // Finalize prefix for dumping leak checking profiles. const int32 our_pid = getpid(); // safest to call getpid() outside lock @@ -2389,6 +2463,8 @@ void HeapLeakChecker_AfterDestructors() { // on a free() call from pthreads. } } + HeapLeakChecker* hc = HeapLeakChecker::GlobalChecker(); + if (hc) hc->HandleProfile(HeapLeakChecker::START_PROFILE); SpinLockHolder l(&heap_checker_lock); RAW_CHECK(!do_main_heap_check, "should have done it"); } diff --git a/src/heap-profile-table.cc b/src/heap-profile-table.cc index 26eca4e..78812cf 100644 --- a/src/heap-profile-table.cc +++ b/src/heap-profile-table.cc @@ -320,9 +320,8 @@ int HeapProfileTable::UnparseBucket(const Bucket& b, return buflen; } -int HeapProfileTable::FillOrderedProfile(char buf[], int size) const { - // We can't allocate list on the stack, as this would overflow on threads - // running with a small stack size. +HeapProfileTable::Bucket** +HeapProfileTable::MakeSortedBucketList() const { Bucket** list = reinterpret_cast<Bucket**>(alloc_(sizeof(Bucket) * num_buckets_)); @@ -336,6 +335,25 @@ int HeapProfileTable::FillOrderedProfile(char buf[], int size) const { sort(list, list + num_buckets_, ByAllocatedSpace); + return list; +} + +void HeapProfileTable::IterateOrderedAllocContexts( + AllocContextIterator callback) const { + Bucket** list = MakeSortedBucketList(); + AllocContextInfo info; + for (int i = 0; i < num_buckets_; ++i) { + *static_cast<Stats*>(&info) = *static_cast<Stats*>(list[i]); + info.stack_depth = list[i]->depth; + info.call_stack = list[i]->stack; + callback(info); + } + dealloc_(list); +} + +int HeapProfileTable::FillOrderedProfile(char buf[], int size) const { + Bucket** list = MakeSortedBucketList(); + // Our file format is "bucket, bucket, ..., bucket, proc_self_maps_info". // In the cases buf is too small, we'd rather leave out the last // buckets than leave out the /proc/self/maps info. To ensure that, @@ -356,7 +374,7 @@ int HeapProfileTable::FillOrderedProfile(char buf[], int size) const { int bucket_length = snprintf(buf, size, "%s", kProfileHeader); if (bucket_length < 0 || bucket_length >= size) return 0; bucket_length = UnparseBucket(total_, buf, bucket_length, size, &stats); - for (int i = 0; i < n; i++) { + for (int i = 0; i < num_buckets_; i++) { bucket_length = UnparseBucket(*list[i], buf, bucket_length, size, &stats); } RAW_DCHECK(bucket_length < size, ""); diff --git a/src/heap-profile-table.h b/src/heap-profile-table.h index fc60d1c..416b8bc 100644 --- a/src/heap-profile-table.h +++ b/src/heap-profile-table.h @@ -77,6 +77,14 @@ class HeapProfileTable { int stack_depth; // depth of call_stack }; + // Info we return about an allocation context. + // An allocation context is a unique caller stack trace + // of an allocation operation. + struct AllocContextInfo : public Stats { + int stack_depth; // Depth of stack trace + const void* const* call_stack; // Stack trace + }; + // Memory (de)allocator interface we'll use. typedef void* (*Allocator)(size_t size); typedef void (*DeAllocator)(void* ptr); @@ -131,6 +139,14 @@ class HeapProfileTable { allocation_->Iterate(MapArgsAllocIterator, callback); } + // Allocation context profile data iteration callback + typedef void (*AllocContextIterator)(const AllocContextInfo& info); + + // Iterate over the allocation context profile data calling "callback" + // for every allocation context. Allocation contexts are ordered by the + // size of allocated space. + void IterateOrderedAllocContexts(AllocContextIterator callback) const; + // Fill profile data into buffer 'buf' of size 'size' // and return the actual size occupied by the dump in 'buf'. // The profile buckets are dumped in the decreasing order @@ -232,6 +248,11 @@ class HeapProfileTable { inline static void DumpNonLiveIterator(const void* ptr, AllocValue* v, const DumpArgs& args); + // Helper for IterateOrderedAllocContexts and FillOrderedProfile. + // Creates a sorted list of Buckets whose length is num_buckets_. + // The caller is responsible for dellocating the returned list. + Bucket** MakeSortedBucketList() const; + // data ---------------------------- // Size for table_. diff --git a/src/heap-profiler.cc b/src/heap-profiler.cc index 6db35f5..ff16cb4 100644 --- a/src/heap-profiler.cc +++ b/src/heap-profiler.cc @@ -38,11 +38,15 @@ #ifdef HAVE_UNISTD_H #include <unistd.h> #endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> // for open() +#endif #ifdef HAVE_MMAP #include <sys/mman.h> #endif #include <errno.h> #include <assert.h> +#include <sys/types.h> #include <algorithm> #include <string> @@ -127,6 +131,17 @@ static void ProfilerFree(void* p) { LowLevelAlloc::Free(p); } +// We use buffers of this size in DoGetHeapProfile. +static const int kProfileBufferSize = 1 << 20; + +// This is a last-ditch buffer we use in DumpProfileLocked in case we +// can't allocate more memory from ProfilerMalloc. We expect this +// will be used by HeapProfileEndWriter when the application has to +// exit due to out-of-memory. This buffer is allocated in +// HeapProfilerStart. Access to this must be protected by heap_lock. +static char* global_profiler_buffer = NULL; + + //---------------------------------------------------------------------- // Profiling control/state data //---------------------------------------------------------------------- @@ -169,21 +184,19 @@ static void AddRemoveMMapDataLocked(AddOrRemove mode) { } } -static char* DoGetHeapProfile(void* (*alloc_func)(size_t)) { +// Input must be a buffer of size at least 1MB. +static char* DoGetHeapProfileLocked(char* buf, int buflen) { // We used to be smarter about estimating the required memory and // then capping it to 1MB and generating the profile into that. - // However it should not cost us much to allocate 1MB every time. - static const int size = 1 << 20; - char* buf = reinterpret_cast<char*>((*alloc_func)(size)); - if (buf == NULL) return NULL; + if (buf == NULL || buflen < 1) + return NULL; - // Grab the lock and generate the profile. - SpinLockHolder l(&heap_lock); - int buflen = 0; + RAW_DCHECK(heap_lock.IsHeld(), ""); + int bytes_written = 0; if (is_on) { HeapProfileTable::Stats const stats = heap_profile->total(); AddRemoveMMapDataLocked(ADD); - buflen = heap_profile->FillOrderedProfile(buf, size - 1); + bytes_written = heap_profile->FillOrderedProfile(buf, buflen - 1); // FillOrderedProfile should not reduce the set of active mmap-ed regions, // hence MemoryRegionMap will let us remove everything we've added above: AddRemoveMMapDataLocked(REMOVE); @@ -191,21 +204,36 @@ static char* DoGetHeapProfile(void* (*alloc_func)(size_t)) { // if this fails, we somehow removed by AddRemoveMMapDataLocked // more than we have added. } - buf[buflen] = '\0'; - RAW_DCHECK(buflen == strlen(buf), ""); + buf[bytes_written] = '\0'; + RAW_DCHECK(bytes_written == strlen(buf), ""); return buf; } extern "C" char* GetHeapProfile() { // Use normal malloc: we return the profile to the user to free it: - return DoGetHeapProfile(&malloc); + SpinLockHolder l(&heap_lock); + return DoGetHeapProfileLocked( + reinterpret_cast<char*>(malloc(kProfileBufferSize)), kProfileBufferSize); } // defined below static void NewHook(const void* ptr, size_t size); static void DeleteHook(const void* ptr); +// Dump data to fd. Copied from heap-profile-table.cc. +#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) + +static void FDWrite(int fd, const char* buf, size_t len) { + while (len > 0) { + ssize_t r; + NO_INTR(r = write(fd, buf, len)); + RAW_CHECK(r >= 0, ""); // strerror(errno) + buf += r; + len -= r; + } +} + // Helper for HeapProfilerDump. static void DumpProfileLocked(const char* reason) { RAW_DCHECK(heap_lock.IsHeld(), ""); @@ -231,24 +259,28 @@ static void DumpProfileLocked(const char* reason) { snprintf(file_name, sizeof(file_name), "%s.%04d%s", filename_prefix, dump_count, HeapProfileTable::kFileExt); - // Release allocation lock around the meat of this routine - // thus not blocking other threads too much. - heap_lock.Unlock(); - { - // Dump the profile - RAW_VLOG(0, "Dumping heap profile to %s (%s)", file_name, reason); - FILE* f = fopen(file_name, "w"); - if (f != NULL) { - // Use ProfilerMalloc not to put info about profile itself into itself: - char* profile = DoGetHeapProfile(&ProfilerMalloc); - fputs(profile, f); - ProfilerFree(profile); - fclose(f); - } else { - RAW_LOG(ERROR, "Failed dumping heap profile to %s", file_name); - } + // Dump the profile + RAW_VLOG(0, "Dumping heap profile to %s (%s)", file_name, reason); + // We must use file routines that don't access memory, since we hold + // a memory lock now. + int fd = open(file_name, O_WRONLY | O_CREAT | O_EXCL, 0664); + if (fd < 0) { + RAW_LOG(ERROR, "Failed dumping heap profile to %s", file_name); + dumping = false; + return; } - heap_lock.Lock(); + + // This case may be impossible, but it's best to be safe. + // It's safe to use the global buffer: we're protected by heap_lock. + if (global_profiler_buffer == NULL) { + global_profiler_buffer = + reinterpret_cast<char*>(ProfilerMalloc(kProfileBufferSize)); + } + + char* profile = DoGetHeapProfileLocked(global_profiler_buffer, + kProfileBufferSize); + FDWrite(fd, profile, strlen(profile)); + close(fd); dumping = false; } @@ -431,6 +463,11 @@ extern "C" void HeapProfilerStart(const char* prefix) { heap_profiler_memory = LowLevelAlloc::NewArena(0, LowLevelAlloc::DefaultArena()); + // Reserve space now for the heap profiler, so we can still write a + // heap profile even if the application runs out of memory. + global_profiler_buffer = + reinterpret_cast<char*>(ProfilerMalloc(kProfileBufferSize)); + heap_profile = new(ProfilerMalloc(sizeof(HeapProfileTable))) HeapProfileTable(ProfilerMalloc, ProfilerFree); @@ -463,6 +500,11 @@ extern "C" void HeapProfilerStart(const char* prefix) { MallocExtension::Initialize(); } +extern "C" bool IsHeapProfilerRunning() { + SpinLockHolder l(&heap_lock); + return is_on; +} + extern "C" void HeapProfilerStop() { SpinLockHolder l(&heap_lock); @@ -494,6 +536,9 @@ extern "C" void HeapProfilerStop() { ProfilerFree(heap_profile); heap_profile = NULL; + // free output-buffer memory + ProfilerFree(global_profiler_buffer); + // free prefix ProfilerFree(filename_prefix); filename_prefix = NULL; diff --git a/src/internal_logging.cc b/src/internal_logging.cc index 237922e..e80f1e3 100644 --- a/src/internal_logging.cc +++ b/src/internal_logging.cc @@ -37,24 +37,45 @@ #include <unistd.h> // for write() #endif #include <string.h> +#include <google/malloc_extension.h> #include "internal_logging.h" -void TCMalloc_MESSAGE(const char* format, ...) { - va_list ap; - va_start(ap, format); - char buf[800]; - vsnprintf(buf, sizeof(buf), format, ap); - va_end(ap); +static const int kLogBufSize = 800; + +void TCMalloc_MESSAGE(const char* filename, + int line_number, + const char* format, ...) { + char buf[kLogBufSize]; + const int n = snprintf(buf, sizeof(buf), "%s:%d] ", filename, line_number); + if (n < kLogBufSize) { + va_list ap; + va_start(ap, format); + vsnprintf(buf + n, kLogBufSize - n, format, ap); + va_end(ap); + } write(STDERR_FILENO, buf, strlen(buf)); } -void TCMalloc_CRASH(const char* format, ...) { - va_list ap; - va_start(ap, format); - char buf[800]; - vsnprintf(buf, sizeof(buf), format, ap); - va_end(ap); +static const int kStatsBufferSize = 16 << 10; +static char stats_buffer[kStatsBufferSize] = { 0 }; + +void TCMalloc_CRASH(bool dump_stats, + const char* filename, + int line_number, + const char* format, ...) { + char buf[kLogBufSize]; + const int n = snprintf(buf, sizeof(buf), "%s:%d] ", filename, line_number); + if (n < kLogBufSize) { + va_list ap; + va_start(ap, format); + vsnprintf(buf + n, kLogBufSize - n, format, ap); + va_end(ap); + } write(STDERR_FILENO, buf, strlen(buf)); + if (dump_stats) { + MallocExtension::instance()->GetStats(stats_buffer, kStatsBufferSize); + write(STDERR_FILENO, stats_buffer, strlen(stats_buffer)); + } abort(); } diff --git a/src/internal_logging.h b/src/internal_logging.h index 8edaa47..59ff568 100644 --- a/src/internal_logging.h +++ b/src/internal_logging.h @@ -45,30 +45,42 @@ // Safe debugging routine: we write directly to the stderr file // descriptor and avoid FILE buffering because that may invoke // malloc() -extern void TCMalloc_MESSAGE(const char* format, ...) +extern void TCMalloc_MESSAGE(const char* filename, + int line_number, + const char* format, ...) #ifdef HAVE___ATTRIBUTE__ - __attribute__ ((__format__ (__printf__, 1, 2))) + __attribute__ ((__format__ (__printf__, 3, 4))) #endif ; // Short form for convenience -#define MESSAGE TCMalloc_MESSAGE +#define MESSAGE(format, ...) \ + TCMalloc_MESSAGE(__FILE__, __LINE__, format, __VA_ARGS__) -// Dumps the specified message and then calls abort() -extern void TCMalloc_CRASH(const char* format, ...) +// Dumps the specified message and then calls abort(). If +// "dump_stats" is specified, the first call will also dump the +// tcmalloc stats. +extern void TCMalloc_CRASH(bool dump_stats, + const char* filename, + int line_number, + const char* format, ...) #ifdef HAVE___ATTRIBUTE__ - __attribute__ ((__format__ (__printf__, 1, 2))) + __attribute__ ((__format__ (__printf__, 4, 5))) #endif ; -#define CRASH TCMalloc_CRASH +#define CRASH(format, ...) \ + TCMalloc_CRASH(false, __FILE__, __LINE__, format, __VA_ARGS__) + +#define CRASH_WITH_STATS(format, ...) \ + TCMalloc_CRASH(true, __FILE__, __LINE__, format, __VA_ARGS__) // Like assert(), but executed even in NDEBUG mode #undef CHECK_CONDITION #define CHECK_CONDITION(cond) \ do { \ if (!(cond)) { \ - CRASH("%s:%d: assertion failed: %s\n", __FILE__, __LINE__, #cond); \ + CRASH("assertion failed: %s\n", #cond); \ } \ } while (0) diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc index 611f1d2..31497ca 100644 --- a/src/malloc_extension.cc +++ b/src/malloc_extension.cc @@ -43,6 +43,7 @@ #endif #include <string> #include HASH_SET_H // defined in config.h +#include "base/dynamic_annotations.h" #include "google/malloc_extension.h" #include "maybe_threads.h" @@ -141,7 +142,11 @@ MallocExtension* MallocExtension::instance() { void MallocExtension::Register(MallocExtension* implementation) { perftools_pthread_once(&module_init, InitModule); - current_instance = implementation; + // When running under valgrind, our custom malloc is replaced with + // valgrind's one and malloc extensions will not work. + if (!RunningOnValgrind()) { + current_instance = implementation; + } } // ----------------------------------------------------------------------- @@ -308,3 +313,28 @@ void MallocExtension::GetHeapGrowthStacks(string* result) { // TODO(menage) Get this working in google-perftools //DumpAddressMap(DebugStringWriter, result); } + +// These are C shims that work on the current instance. + +#define C_SHIM(fn, retval, paramlist, arglist) \ + extern "C" retval MallocExtension_##fn paramlist { \ + return MallocExtension::instance()->fn arglist; \ + } + +C_SHIM(VerifyAllMemory, bool, (), ()); +C_SHIM(VerifyNewMemory, bool, (void* p), (p)); +C_SHIM(VerifyArrayNewMemory, bool, (void* p), (p)); +C_SHIM(VerifyMallocMemory, bool, (void* p), (p)); +C_SHIM(MallocMemoryStats, bool, + (int* blocks, size_t* total, int histogram[kMallocHistogramSize]), + (blocks, total, histogram)); + +C_SHIM(GetStats, void, + (char* buffer, int buffer_length), (buffer, buffer_length)); +C_SHIM(GetNumericProperty, bool, + (const char* property, size_t* value), (property, value)); +C_SHIM(SetNumericProperty, bool, + (const char* property, size_t value), (property, value)); + +C_SHIM(MarkThreadIdle, void, (), ()); +C_SHIM(ReleaseFreeMemory, void, (), ()); diff --git a/src/malloc_hook.cc b/src/malloc_hook.cc index 0f1e61f..91f5680 100644 --- a/src/malloc_hook.cc +++ b/src/malloc_hook.cc @@ -123,33 +123,35 @@ using base::internal::mremap_hook_; using base::internal::sbrk_hook_; -// static -MallocHook::NewHook MallocHook::SetNewHook(NewHook hook) { +// These are available as C bindings as well as C++, hence their +// definition outside the MallocHook class. +extern "C" +MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook) { return new_hook_.Exchange(hook); } -// static -MallocHook::DeleteHook MallocHook::SetDeleteHook(DeleteHook hook) { +extern "C" +MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook) { return delete_hook_.Exchange(hook); } -// static -MallocHook::MmapHook MallocHook::SetMmapHook(MmapHook hook) { +extern "C" +MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook) { return mmap_hook_.Exchange(hook); } -// static -MallocHook::MunmapHook MallocHook::SetMunmapHook(MunmapHook hook) { +extern "C" +MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook) { return munmap_hook_.Exchange(hook); } -// static -MallocHook::MremapHook MallocHook::SetMremapHook(MremapHook hook) { +extern "C" +MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook) { return mremap_hook_.Exchange(hook); } -// static -MallocHook::SbrkHook MallocHook::SetSbrkHook(SbrkHook hook) { +extern "C" +MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook) { return sbrk_hook_.Exchange(hook); } @@ -245,8 +247,8 @@ static inline void CheckInHookCaller() { // We can improve behavior/compactness of this function // if we pass a generic test function (with a generic arg) // into the implementations for GetStackTrace instead of the skip_count. -int MallocHook::GetCallerStackTrace(void** result, int max_depth, - int skip_count) { +extern "C" int MallocHook_GetCallerStackTrace(void** result, int max_depth, + int skip_count) { #if defined(NO_TCMALLOC_SAMPLES) return 0; #elif !defined(HAVE_ATTRIBUTE_SECTION_START) diff --git a/src/memory_region_map.cc b/src/memory_region_map.cc index 0c2dd20..8c5ebad 100644 --- a/src/memory_region_map.cc +++ b/src/memory_region_map.cc @@ -358,8 +358,18 @@ MemoryRegionMap::RegionIterator MemoryRegionMap::EndRegionLocked() { } inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) { + RAW_VLOG(4, "Inserting region %p..%p from %p", + reinterpret_cast<void*>(region.start_addr), + reinterpret_cast<void*>(region.end_addr), + reinterpret_cast<void*>(region.caller())); + RegionSet::const_iterator i = regions_->lower_bound(region); + if (i != regions_->end() && i->start_addr <= region.start_addr) { + RAW_DCHECK(region.end_addr <= i->end_addr, ""); // lower_bound ensures this + return; // 'region' is a subset of an already recorded region; do nothing + // We can be stricter and allow this only when *i has been created via + // an mmap with MAP_NORESERVE flag set. + } if (DEBUG_MODE) { - RegionSet::const_iterator i = regions_->lower_bound(region); RAW_CHECK(i == regions_->end() || !region.Overlaps(*i), "Wow, overlapping memory regions"); Region sample; @@ -368,10 +378,6 @@ inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) { RAW_CHECK(i == regions_->end() || !region.Overlaps(*i), "Wow, overlapping memory regions"); } - RAW_VLOG(4, "Inserting region %p..%p from %p", - reinterpret_cast<void*>(region.start_addr), - reinterpret_cast<void*>(region.end_addr), - reinterpret_cast<void*>(region.caller())); region.AssertIsConsistent(); // just making sure // This inserts and allocates permanent storage for region // and its call stack data: it's safe to do it now: @@ -72,15 +72,18 @@ use strict; use warnings; use Getopt::Long; -my $PPROF_VERSION = "0.98"; - -# These are the object tools we use, which come from various sources. -# We want to invoke them directly, rather than via users' aliases and/or -# search paths, because some people have colorizing versions of them that -# can cause trouble, or because the default versions on a system may not -# deal with 64-bit objects if 64-bit profiles are being analyzed. -# (However, we will default to the user's defaults if we can't find better.) -# See ConfigureObjTools near the bottom of this script. +my $PPROF_VERSION = "0.99"; + +# These are the object tools we use which can come from a +# user-specified location using --tools, from the PPROF_TOOLS +# environment variable, or from the latest installed crosstool under +# /usr/crosstool. We avoid using the system versions for more +# predictable behavior. System versions may not support 64-bit +# binaries and older object tools may have difficulties with binaries +# produced with the latest crosstool versions. For example, +# name-mangling changed with GCC 4.3 which can cause problems with +# demangling in the object tools. See ConfigureObjTools near the +# bottom of this script for how the tool paths are determined. my %obj_tool_map = ( "objdump" => "objdump", "nm" => "nm", @@ -188,6 +191,10 @@ Miscellaneous: --help This message --version Version information +Environment Variables: + PPROF_TMPDIR Profiles directory. Defaults to \$HOME/pprof + PPROF_TOOLS Prefix for object tools pathnames + Examples: pprof /bin/ls ls.prof @@ -239,6 +246,8 @@ sub Init() { $main::next_tmpfile = 0; $SIG{'INT'} = \&sighandler; + # Cache from filename/linenumber to source code + $main::source_cache = (); $main::opt_help = 0; $main::opt_version = 0; @@ -493,8 +502,9 @@ sub Main() { # Subtract base from profile, if specified if ($main::opt_base ne '') { - my $base = ReadProfile($main::prog, $main::opt_base)->{profile}; - $profile = SubtractProfile($profile, $base); + my $base = ReadProfile($main::prog, $main::opt_base); + $profile = SubtractProfile($profile, $base->{profile}); + $pcs = AddPcs($pcs, $base->{pcs}); } # Get total data in profile @@ -534,10 +544,16 @@ sub Main() { # Print if (!$main::opt_interactive) { if ($main::opt_disasm) { - PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); + PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm, $total); } elsif ($main::opt_list) { PrintListing($libs, $flat, $cumulative, $main::opt_list); } elsif ($main::opt_text) { + # Make sure the output is empty when have nothing to report + # (only matters when --heapcheck is given but we must be + # compatible with old branches that did not pass --heapcheck always): + if ($total != 0) { + printf("Total: %s %s\n", Unparse($total), Units()); + } PrintText($symbols, $flat, $cumulative, $total, -1); } elsif ($main::opt_callgrind) { PrintCallgrind($calls); @@ -704,7 +720,7 @@ sub InteractiveCommand { my $flat = FlatProfile($reduced); my $cumulative = CumulativeProfile($reduced); - PrintDisassembly($libs, $flat, $cumulative, $routine); + PrintDisassembly($libs, $flat, $cumulative, $routine, $total); return 1; } if (m/^ *gv *(.*)/) { @@ -905,6 +921,7 @@ sub PrintDisassembly { my $flat = shift; my $cumulative = shift; my $disasm_opts = shift; + my $total = shift; foreach my $lib (@{$libs}) { my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts); @@ -913,42 +930,16 @@ sub PrintDisassembly { my $start_addr = $symbol_table->{$routine}->[0]; my $end_addr = $symbol_table->{$routine}->[1]; # See if there are any samples in this routine - my $total_flat = 0; - my $total_cum = 0; my $length = hex(AddressSub($end_addr, $start_addr)); my $addr = AddressAdd($start_addr, $offset); for (my $i = 0; $i < $length; $i++) { - $total_cum += GetEntry($cumulative, $addr); - $total_flat += GetEntry($flat, $addr); - $addr = AddressInc($addr); - } - - # Skip disassembly if there are no samples in routine - next if ($total_cum == 0); - - print "ROUTINE ====================== $routine\n"; - printf "%6s %6s Total %s (flat / cumulative)\n", - Unparse($total_flat), Unparse($total_cum), Units(); - - my @instructions = Disassemble($lib->[0], $offset, - $start_addr, $end_addr); - foreach my $e (@instructions) { - my $location = ($e->[2] >= 0) ? "$e->[1]:$e->[2]" : ""; - $location =~ s|.*/||; # Remove directory portion, if any - if (length($location) >= 20) { - # For long locations, just show the last 20 characters - $location = substr($location, -20); + if (defined($cumulative->{$addr})) { + PrintDisassembledFunction($lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr, $total); + last; } - my $f = GetEntry($flat, $e->[0]); - my $c = GetEntry($cumulative, $e->[0]); - my $address = $e->[0]; - $address =~ s/^0x//; - printf("%6s %6s %-20s %8s: %6s\n", - UnparseAlt($f), - UnparseAlt($c), - $location, - $address, - $e->[3]); + $addr = AddressInc($addr); } } } @@ -965,7 +956,7 @@ sub Disassemble { my $end_addr = shift; my $objdump = $obj_tool_map{"objdump"}; - my $cmd = sprintf("$objdump -d -l --no-show-raw-insn " . + my $cmd = sprintf("$objdump -C -d -l --no-show-raw-insn " . "--start-address=0x$start_addr " . "--stop-address=0x$end_addr $prog"); open(OBJDUMP, "$cmd |") || error("$objdump: $!\n"); @@ -1179,6 +1170,160 @@ sub PrintSource { close(FILE); } +# Return the source line for the specified file/linenumber. +# Returns undef if not found. +sub SourceLine { + my $file = shift; + my $line = shift; + + # Look in cache + if (!defined($main::source_cache{$file})) { + if (100 < scalar keys(%main::source_cache)) { + # Clear the cache when it gets too big + $main::source_cache = (); + } + + # Read all lines from the file + if (!open(FILE, "<$file")) { + print STDERR "$file: $!\n"; + $main::source_cache{$file} = []; # Cache the negative result + return undef; + } + my $lines = []; + push(@{$lines}, ""); # So we can use 1-based line numbers as indices + while (<FILE>) { + push(@{$lines}, $_); + } + close(FILE); + + # Save the lines in the cache + $main::source_cache{$file} = $lines; + } + + my $lines = $main::source_cache{$file}; + if (($line < 0) || ($line > $#{$lines})) { + return undef; + } else { + return $lines->[$line]; + } +} + +# Print disassembly for one routine with interspersed source if available +sub PrintDisassembledFunction { + my $prog = shift; + my $offset = shift; + my $routine = shift; + my $flat = shift; + my $cumulative = shift; + my $start_addr = shift; + my $end_addr = shift; + my $total = shift; + + # Disassemble all instructions + my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + + # Make array of counts per instruction + my @flat_count = (); + my @cum_count = (); + my $flat_total = 0; + my $cum_total = 0; + foreach my $e (@instructions) { + my $c1 = GetEntry($flat, $e->[0]); + my $c2 = GetEntry($cumulative, $e->[0]); + push(@flat_count, $c1); + push(@cum_count, $c2); + $flat_total += $c1; + $cum_total += $c2; + } + + # Print header with total counts + printf("ROUTINE ====================== %s\n" . + "%6s %6s %s (flat, cumulative) %.1f%% of total\n", + ShortFunctionName($routine), + Unparse($flat_total), + Unparse($cum_total), + Units(), + ($cum_total * 100.0) / $total); + + # Process instructions in order + my $current_file = ""; + for (my $i = 0; $i <= $#instructions; ) { + my $e = $instructions[$i]; + + # Print the new file name whenever we switch files + if ($e->[1] ne $current_file) { + $current_file = $e->[1]; + my $fname = $current_file; + $fname =~ s|^\./||; # Trim leading "./" + + # Shorten long file names + if (length($fname) >= 58) { + $fname = "..." . substr($fname, -55); + } + printf("-------------------- %s\n", $fname); + } + + # TODO: Compute range of lines to print together to deal with + # small reorderings. + my $first_line = $e->[2]; + my $last_line = $first_line; + my %flat_sum = (); + my %cum_sum = (); + for (my $l = $first_line; $l <= $last_line; $l++) { + $flat_sum{$l} = 0; + $cum_sum{$l} = 0; + } + + # Find run of instructions for this range of source lines + my $first_inst = $i; + while (($i <= $#instructions) && + ($instructions[$i]->[2] >= $first_line) && + ($instructions[$i]->[2] <= $last_line)) { + $e = $instructions[$i]; + $flat_sum{$e->[2]} += $flat_count[$i]; + $cum_sum{$e->[2]} += $cum_count[$i]; + $i++; + } + my $last_inst = $i - 1; + + # Print source lines + for (my $l = $first_line; $l <= $last_line; $l++) { + my $line = SourceLine($current_file, $l); + if (!defined($line)) { + $line = "?\n"; + next; + } else { + $line =~ s/^\s+//; + } + printf("%6s %6s %5d: %s", + UnparseAlt($flat_sum{$l}), + UnparseAlt($cum_sum{$l}), + $l, + $line); + } + + # Print disassembly + for (my $x = $first_inst; $x <= $last_inst; $x++) { + my $e = $instructions[$x]; + my $address = $e->[0]; + $address = AddressSub($address, $offset); # Make relative to section + $address =~ s/^0x//; + $address =~ s/^0*//; + + # Trim symbols + my $d = $e->[3]; + while ($d =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types + while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments + + printf("%6s %6s %8s: %6s\n", + UnparseAlt($flat_count[$x]), + UnparseAlt($cum_count[$x]), + $address, + $d); + } + } +} + # Print DOT graph sub PrintDot { my $prog = shift; @@ -1582,6 +1727,7 @@ sub RemoveUninterestingFrames { foreach my $name ('ProfileData::Add', # historical 'ProfileData::prof_handler', # historical 'CpuProfiler::prof_handler', + '__FRAME_END__', '__pthread_sighandler', '__restore') { $skip{$name} = 1; @@ -2955,68 +3101,88 @@ sub ShortFunctionName { ##### Miscellaneous ##### -# Find the right versions of the above object tools to use. The argument -# is the program file being analyzed, and should be an ELF 32-bit or ELF -# 64-bit executable file. If it is, we select the default tools location -# based on that; otherwise, we'll emit a warning and take the user's -# defaults. +# Find the right versions of the above object tools to use. The +# argument is the program file being analyzed, and should be an ELF +# 32-bit or ELF 64-bit executable file. The location of the tools +# is determined by considering the following options in this order: +# 1) --tools option, if set +# 2) PPROF_TOOLS environment variable, if set +# 3) otherwise, the latest installed crosstool under /usr/crosstool +# It is a fatal error if --tools and PPROF_TOOLS are not specified and no +# tools exist under /usr/crosstool. sub ConfigureObjTools { my $prog_file = shift; - # Figure out the right default pathname prefix based on the program file - # type: - my $default_prefix = "/usr/bin/"; + # Check for the existence of $prog_file because /usr/bin/file does not + # predictably return error status in prod. + (-e $prog_file) || error("$prog_file does not exist.\n"); + # Follow symlinks (at least for systems where "file" supports that) my $file_type = `/usr/bin/file -L $prog_file 2>/dev/null || /usr/bin/file $prog_file`; - if ($file_type !~ /executable/) { - warn "WARNING: program $prog_file is apparently not an executable\n"; - # Don't change the default prefix. - } elsif ($file_type =~ /64-bit/) { + if ($file_type =~ /64-bit/) { # Change $address_length to 16 if the program file is ELF 64-bit. # We can't detect this from many (most?) heap or lock contention # profiles, since the actual addresses referenced are generally in low # memory even for 64-bit programs. - $address_length = 16; + $address_length = 16; } # Go fill in %obj_tool_map with the pathnames to use: foreach my $tool (keys %obj_tool_map) { - $obj_tool_map{$tool} = ConfigureTool($obj_tool_map{$tool}, - $default_prefix); + $obj_tool_map{$tool} = ConfigureTool($obj_tool_map{$tool}, + LatestCrosstoolPrefix()); + } +} + +# Return the prefix of the latest 64-bit crosstool available in +# /usr/crosstool. 64-bit (x86_64) utilities are used because they +# handle both 32-bit and 64-bit binaries. +sub LatestCrosstoolPrefix { + my $prefix = ""; + my $latest_version = 0; + for my $gcc (glob("/usr/crosstool/v*/gcc-*/x86_64*/bin/*-gcc")) { + if (($gcc =~ m{/usr/crosstool/v([0-9]+)}) && + ($1 > $latest_version)) { + $latest_version = $1; + $prefix = $gcc; + $prefix =~ s/gcc$//; + } } + return $prefix; } +# Returns the path of a caller-specified object tool. If --tools or +# PPROF_TOOLS are specified, then returns the full path to the tool +# with that prefix. Otherwise, returns the path of the tool in the +# latest installed crosstool version. Raises a fatal error if the +# tool cannot be found in these directories. sub ConfigureTool { my $tool = shift; - my $prefix = shift; + my $crosstool_prefix = shift; my $path; - if ($main::opt_debug) { - print STDERR "Configuring '$tool' with default prefix '$prefix'\n"; - } - - # Try a specific prefix specified by the user: if ($main::opt_tools ne "") { + # Use a prefix specified by the --tools option... $path = $main::opt_tools . $tool; - if ($main::opt_debug) { print STDERR " (a) Trying '$path'\n"; } - if (-x $path) { return $path; } - } - - # Try the default prefix passed to us: - $path = $prefix . $tool; - if ($main::opt_debug) { print STDERR " (b) Trying '$path'\n"; } - if (-x $path) { return $path; } - - # Try the normal system default (/usr/bin/): - if ($prefix ne "/usr/bin/") { - $path = "/usr/bin/$tool"; - if ($main::opt_debug) { print STDERR " (c) Trying '$path'\n"; } - if (-x $path) { return $path; } + if (!-x $path) { + error("No '$tool' found with prefix specified by --tools $main::opt_tools\n"); + } + } elsif (exists $ENV{"PPROF_TOOLS"} && + $ENV{"PPROF_TOOLS"} ne "") { + #... or specified with the PPROF_TOOLS environment variable... + $path = $ENV{"PPROF_TOOLS"} . $tool; + if (!-x $path) { + error("No '$tool' found with prefix specified by PPROF_TOOLS=$ENV{PPROF_TOOLS}\n"); + } + } else { + # ... otherwise use the latest crosstool. + $path = $crosstool_prefix . $tool; + if ($crosstool_prefix eq "" || !-x $path) { + error("No '$tool' found in /usr/crosstool. Specify prefix for object tool pathnames with --tools.\n"); + } } - - # If all else fails, hope the bare toolname works: - if ($main::opt_debug) { print STDERR " Returning '$tool'\n"; } - return $tool; + if ($main::opt_debug) { print STDERR "Using '$path' for '$tool'.\n"; } + return $path; } sub cleanup { @@ -3068,6 +3234,13 @@ sub GetProcedureBoundariesViaNm { if (m/^([0-9a-f]+) . (..*)/) { my $start_val = $1; my $this_routine = $2; + + # Tag this routine with the starting address in case the image + # has multiple occurrences of this routine. We use a syntax + # that resembles template paramters that are automatically + # stripped out by ShortFunctionName() + $this_routine .= "<$start_val>"; + if (defined($routine) && $routine =~ m/$regexp/) { $symbol_table->{$routine} = [HexExtend($last_start), HexExtend($start_val)]; @@ -3330,3 +3503,4 @@ sub RunUnitTests { } exit ($error_count); } + diff --git a/src/profiledata.cc b/src/profiledata.cc index ae79bdc..e622b28 100644 --- a/src/profiledata.cc +++ b/src/profiledata.cc @@ -56,6 +56,10 @@ const int ProfileData::kAssociativity; const int ProfileData::kBuckets; const int ProfileData::kBufferLength; +ProfileData::Options::Options() + : frequency_(1) { +} + // This function is safe to call from asynchronous signals (but is not // re-entrant). However, that's not part of its public interface. void ProfileData::Evict(const Entry& entry) { @@ -84,7 +88,8 @@ ProfileData::ProfileData() start_time_(0) { } -bool ProfileData::Start(const char* fname, int frequency) { +bool ProfileData::Start(const char* fname, + const ProfileData::Options& options) { if (enabled()) { return false; } @@ -113,7 +118,9 @@ bool ProfileData::Start(const char* fname, int frequency) { evict_[num_evicted_++] = 0; // count for header evict_[num_evicted_++] = 3; // depth for header evict_[num_evicted_++] = 0; // Version number - evict_[num_evicted_++] = 1000000 / frequency; // Period (microseconds) + CHECK_NE(0, options.frequency()); + int period = 1000000 / options.frequency(); + evict_[num_evicted_++] = period; // Period (microseconds) evict_[num_evicted_++] = 0; // Padding out_ = fd; diff --git a/src/profiledata.h b/src/profiledata.h index d9a8e7b..008c8a4 100644 --- a/src/profiledata.h +++ b/src/profiledata.h @@ -42,6 +42,7 @@ #include "config.h" #include <time.h> // for time_t +#include <stdint.h> #include "base/basictypes.h" // A class that accumulates profile samples and writes them to a file. @@ -83,19 +84,35 @@ class ProfileData { int samples_gathered; // Number of samples gathered to far (or 0) }; + class Options { + public: + Options(); + + // Get and set the sample frequency. + int frequency() const { + return frequency_; + } + void set_frequency(int frequency) { + frequency_ = frequency; + } + + private: + int frequency_; // Sample frequency. + }; + static const int kMaxStackDepth = 64; // Max stack depth stored in profile ProfileData(); ~ProfileData(); // If data collection is not already enabled start to collect data - // into fname. The data includes the frequency of samples, as - // provided by 'frequency'. + // into fname. Parameters related to this profiling run are specified + // by 'options'. // // Returns true if data collection could be started, otherwise (if an // error occurred or if data collection was already enabled) returns // false. - bool Start(const char* fname, int frequency); + bool Start(const char *fname, const Options& options); // If data collection is enabled, stop data collection and write the // data to disk. diff --git a/src/profiler.cc b/src/profiler.cc index 3149a89..5f7cdbe 100644 --- a/src/profiler.cc +++ b/src/profiler.cc @@ -229,7 +229,9 @@ bool CpuProfiler::Start(const char* fname, // recorded against the new profile, but there's no harm in that. SpinLockHolder sl(&signal_lock_); - if (!collector_.Start(fname, frequency_)) { + ProfileData::Options collector_options; + collector_options.set_frequency(frequency_); + if (!collector_.Start(fname, collector_options)) { return false; } diff --git a/src/system-alloc.cc b/src/system-alloc.cc index b262b42..0645735 100644 --- a/src/system-alloc.cc +++ b/src/system-alloc.cc @@ -89,13 +89,16 @@ static size_t pagesize = 0; // Configuration parameters. -DEFINE_int32(malloc_devmem_start, 0, +DEFINE_int32(malloc_devmem_start, + EnvToInt("TCMALLOC_DEVMEM_START", 0), "Physical memory starting location in MB for /dev/mem allocation." " Setting this to 0 disables /dev/mem allocation"); -DEFINE_int32(malloc_devmem_limit, 0, +DEFINE_int32(malloc_devmem_limit, + EnvToInt("TCMALLOC_DEVMEM_LIMIT", 0), "Physical memory limit location in MB for /dev/mem allocation." " Setting this to 0 means no limit."); -DEFINE_bool(malloc_skip_mmap, false, +DEFINE_bool(malloc_skip_mmap, + EnvToBool("TCMALLOC_SKIP_MMAP", false), "Whether mmap can be used to obtain memory."); // static allocators @@ -129,7 +132,7 @@ static char devmem_space[sizeof(DevMemSysAllocator)]; static const int kStaticAllocators = 3; // kMaxDynamicAllocators + kStaticAllocators; static const int kMaxAllocators = 5; -SysAllocator *allocators[kMaxAllocators]; +static SysAllocator *allocators[kMaxAllocators]; bool RegisterSystemAllocator(SysAllocator *a, int priority) { SpinLockHolder lock_holder(&spinlock); diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc index 94287ba..b0d9d04 100644 --- a/src/tcmalloc.cc +++ b/src/tcmalloc.cc @@ -245,23 +245,37 @@ DEFINE_int64(tcmalloc_sample_parameter, 0, "Unused: code is compiled with NO_TCMALLOC_SAMPLES"); static size_t sample_period = 0; #else -DEFINE_int64(tcmalloc_sample_parameter, 262147, +DEFINE_int64(tcmalloc_sample_parameter, + EnvToInt("TCMALLOC_SAMPLE_PARAMETER", 262147), "Twice the approximate gap between sampling actions." " Must be a prime number. Otherwise will be rounded up to a " " larger prime number"); -static size_t sample_period = 262147; +static size_t sample_period = EnvToInt("TCMALLOC_SAMPLE_PARAMETER", 262147); #endif // Protects sample_period above static SpinLock sample_period_lock(SpinLock::LINKER_INITIALIZED); // Parameters for controlling how fast memory is returned to the OS. -DEFINE_double(tcmalloc_release_rate, 1, +DEFINE_double(tcmalloc_release_rate, + EnvToDouble("TCMALLOC_RELEASE_RATE", 1.0), "Rate at which we release unused memory to the system. " "Zero means we never release memory back to the system. " "Increase this flag to return memory faster; decrease it " "to return memory slower. Reasonable rates are in the " - "range [0,10]"); + "range [0.0,10.0]"); + +DEFINE_int64(tcmalloc_large_alloc_report_threshold, + EnvToInt64("TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD", 1<<30), + "Allocations larger than this value cause a stack " + "trace to be dumped to stderr. The threshold for " + "dumping stack traces is increased by a factor of 1.125 " + "every time we print a message so that the threshold " + "automatically goes up by a factor of ~1000 every 60 " + "messages. This bounds the amount of extra logging " + "generated by this flag. Default value of this flag " + "is very large and therefore you should see no extra " + "logging unless the flag is overridden."); //------------------------------------------------------------------- // Mapping from size to size_class and vice versa @@ -504,16 +518,13 @@ static void InitSizeClasses() { // Double-check sizes just to be safe for (size_t size = 0; size <= kMaxSize; size++) { const int sc = SizeClass(size); - if (sc == 0) { + if (sc <= 0 || sc >= kNumClasses) { CRASH("Bad size class %d for %" PRIuS "\n", sc, size); } if (sc > 1 && size <= class_to_size[sc-1]) { CRASH("Allocating unnecessarily large class %d for %" PRIuS "\n", sc, size); } - if (sc >= kNumClasses) { - CRASH("Bad size class %d for %" PRIuS "\n", sc, size); - } const size_t s = class_to_size[sc]; if (size > s) { CRASH("Bad size %" PRIuS " for %" PRIuS " (sc = %d)\n", s, size, sc); @@ -2673,14 +2684,51 @@ static inline void* SpanToMallocResult(Span *span) { CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift)); } +// Copy of FLAGS_tcmalloc_large_alloc_report_threshold with +// automatic increases factored in. +static int64_t large_alloc_threshold = + (kPageSize > FLAGS_tcmalloc_large_alloc_report_threshold + ? kPageSize : FLAGS_tcmalloc_large_alloc_report_threshold); + +static void ReportLargeAlloc(Length num_pages, void* result) { + StackTrace stack; + stack.depth = GetStackTrace(stack.stack, kMaxStackDepth, 1); + + static const int N = 1000; + char buffer[N]; + TCMalloc_Printer printer(buffer, N); + printer.printf("tcmalloc: large alloc %lld bytes == %p @ ", + static_cast<long long>(num_pages << kPageShift), + result); + for (int i = 0; i < stack.depth; i++) { + printer.printf(" %p", stack.stack[i]); + } + printer.printf("\n"); + write(STDERR_FILENO, buffer, strlen(buffer)); +} + // Helper for do_malloc(). static inline void* do_malloc_pages(Length num_pages) { Span *span; + bool report_large = false; { SpinLockHolder h(&pageheap_lock); span = pageheap->New(num_pages); + const int64 threshold = large_alloc_threshold; + if (num_pages >= (threshold >> kPageShift)) { + // Increase the threshold by 1/8 every time we generate a report. + // We cap the threshold at 8GB to avoid overflow problems. + large_alloc_threshold = (threshold + threshold/8 < 8ll<<30 + ? threshold + threshold/8 : 8ll<<30); + report_large = true; + } } - return span == NULL ? NULL : SpanToMallocResult(span); + + void* result = (span == NULL ? NULL : SpanToMallocResult(span)); + if (report_large) { + ReportLargeAlloc(num_pages, result); + } + return result; } static inline void* do_malloc(size_t size) { diff --git a/src/tests/heap-checker-death_unittest.sh b/src/tests/heap-checker-death_unittest.sh index 0b4de37..e6ec597 100755 --- a/src/tests/heap-checker-death_unittest.sh +++ b/src/tests/heap-checker-death_unittest.sh @@ -155,13 +155,18 @@ Test 20 1 "Exiting .* because of .* leaks$" "" \ Test 20 1 "Exiting .* because of .* leaks$" "" \ HEAP_CHECKER_TEST_TEST_LOOP_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 || exit 9 +# Test that we produce a reasonable textual leak report. +Test 60 1 "DoAllocHidden heap-checker_unittest[.]cc:" "" \ + HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECK_TEST_NO_THREADS=1 \ + || exit 10 + # Test that very early log messages are present and controllable: Test 60 1 "Starting tracking the heap$" "" \ HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=1 \ - || exit 9 + || exit 11 Test 60 1 "" "Starting tracking the heap" \ HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=-1 \ - || exit 10 + || exit 12 cd / # so we're not in TMPDIR when we delete it rm -rf $TMPDIR diff --git a/src/tests/heap-checker_unittest.cc b/src/tests/heap-checker_unittest.cc index 36ebf81..d42a976 100644 --- a/src/tests/heap-checker_unittest.cc +++ b/src/tests/heap-checker_unittest.cc @@ -103,6 +103,7 @@ #include "base/googleinit.h" #include "base/logging.h" #include "base/commandlineflags.h" +#include "base/thread_lister.h" #include <google/heap-checker.h> #include "memory_region_map.h" #include <google/malloc_extension.h> @@ -257,6 +258,9 @@ void* operator new[](size_t size, const Initialized&) { return p; } +static void DoWipeStack(int n); // defined below +static void WipeStack() { DoWipeStack(20); } + static void Pause() { poll(NULL, 0, 77); // time for thread activity in HeapBusyThreadBody @@ -275,6 +279,8 @@ static void Pause() { } } } + WipeStack(); // e.g. MallocExtension::VerifyAllMemory + // can leave pointers to heap objects on stack } // Make gcc think a pointer is "used" @@ -315,30 +321,28 @@ static void LogHidden(const char* message, const void* ptr) { << ptr << " ^ " << reinterpret_cast<void*>(kHideMask) << endl; } -// non-static to fool the compiler against inlining -extern void (*run_hidden_ptr)(Closure* c, int n); -void (*run_hidden_ptr)(Closure* c, int n); -extern void (*wipe_stack_ptr)(int n); -void (*wipe_stack_ptr)(int n); +// volatile to fool the compiler against inlining the calls to these +void (*volatile run_hidden_ptr)(Closure* c, int n); +void (*volatile wipe_stack_ptr)(int n); static void DoRunHidden(Closure* c, int n) { if (n) { VLOG(10) << "Level " << n << " at " << &n; - run_hidden_ptr(c, n-1); - wipe_stack_ptr(n); + (*run_hidden_ptr)(c, n-1); + (*wipe_stack_ptr)(n); sleep(0); // undo -foptimize-sibling-calls } else { c->Run(); } } -static void DoWipeStack(int n) { +/*static*/ void DoWipeStack(int n) { VLOG(10) << "Wipe level " << n << " at " << &n; if (n) { const int sz = 30; volatile int arr[sz]; - for (int i = 0; i < sz; ++i) arr[i] = 0; - wipe_stack_ptr(n-1); + for (int i = 0; i < sz; ++i) arr[i] = 0; + (*wipe_stack_ptr)(n-1); sleep(0); // undo -foptimize-sibling-calls } } @@ -402,6 +406,7 @@ enum CheckType { SAME_HEAP, NO_LEAKS }; static void VerifyLeaks(HeapLeakChecker* check, CheckType type, int leaked_bytes, int leaked_objects) { + WipeStack(); // to help with can_create_leaks_reliably const bool no_leaks = type == NO_LEAKS ? RUN_SILENT(*check, BriefNoLeaks) : RUN_SILENT(*check, BriefSameHeap); @@ -595,6 +600,7 @@ static void TestHeapLeakCheckerTrick() { DeAllocHidden(&bar2); Pause(); if (can_create_leaks_reliably) { + WipeStack(); // to help with can_create_leaks_reliably // this might still fail occasionally, but it should be very rare: CHECK(check.BriefSameHeap()); } else { @@ -747,6 +753,7 @@ static void TestSTLAllocInverse() { RunHidden(NewCallback(DoTestSTLAllocInverse, &x)); LogHidden("Leaking", x); if (can_create_leaks_reliably) { + WipeStack(); // to help with can_create_leaks_reliably // these might still fail occasionally, but it should be very rare CHECK_EQ(RUN_SILENT(check, BriefNoLeaks), false); CHECK_GE(check.BytesLeaked(), 100 * sizeof(int)); @@ -904,7 +911,7 @@ static void* HeapBusyThreadBody(void* a) { ptr = reinterpret_cast<int **>( reinterpret_cast<uintptr_t>(ptr) ^ kHideMask); // busy loop to get the thread interrupted at: - for (int i = 1; i < 1000000; ++i) user += (1 + user * user * 5) / i; + for (int i = 1; i < 10000000; ++i) user += (1 + user * user * 5) / i; ptr = reinterpret_cast<int **>( reinterpret_cast<uintptr_t>(ptr) ^ kHideMask); } else { @@ -941,10 +948,19 @@ static void RunHeapBusyThreads() { Pause(); } +static int NumThreads(void* parameter, int num_threads, + pid_t* thread_pids, va_list ap) { + ResumeAllProcessThreads(num_threads, thread_pids); + return num_threads; +} + // tests disabling via function name REGISTER_MODULE_INITIALIZER(heap_checker_unittest, { // TODO(csilvers): figure out when this might be true can_create_leaks_reliably = false; + if (can_create_leaks_reliably) { // should have no threads: + CHECK_LE(ListAllProcessThreads(NULL, NumThreads), 1); + } HeapLeakChecker::DisableChecksIn("NamedDisabledLeaks"); }); @@ -1490,7 +1506,10 @@ int main(int argc, char** argv) { if (FLAGS_test_register_leak) { // make us fail only where the .sh test expects: Pause(); - for (int i = 0; i < 20; ++i) CHECK(HeapLeakChecker::NoGlobalLeaks()); + for (int i = 0; i < 100; ++i) { // give it some time to crash + CHECK(HeapLeakChecker::NoGlobalLeaks()); + Pause(); + } return Pass(); } @@ -1575,6 +1594,6 @@ int main(int argc, char** argv) { ThreadNamedDisabledLeaks(); CHECK(HeapLeakChecker::NoGlobalLeaks()); // so far, so good - + return Pass(); } diff --git a/src/tests/heap-profiler_unittest.cc b/src/tests/heap-profiler_unittest.cc index 9828cdd..d066880 100644 --- a/src/tests/heap-profiler_unittest.cc +++ b/src/tests/heap-profiler_unittest.cc @@ -40,23 +40,29 @@ #include "config_for_unittests.h" #include <stdlib.h> #include <stdio.h> +#include <fcntl.h> // for mkdir() #ifdef HAVE_UNISTD_H #include <unistd.h> // for fork() #endif #include <sys/wait.h> // for wait() +#include <string> +#include "base/basictypes.h" +#include "base/logging.h" #include <google/heap-profiler.h> +using std::string; + static const int kMaxCount = 100000; int* g_array[kMaxCount]; // an array of int-vectors -static void Allocate(int start, int end, int size) { +static ATTRIBUTE_NOINLINE void Allocate(int start, int end, int size) { for (int i = start; i < end; ++i) { if (i < kMaxCount) g_array[i] = new int[size]; } } -static void Allocate2(int start, int end, int size) { +static ATTRIBUTE_NOINLINE void Allocate2(int start, int end, int size) { for (int i = start; i < end; ++i) { if (i < kMaxCount) g_array[i] = new int[size]; @@ -70,6 +76,25 @@ static void Deallocate(int start, int end) { } } +static void TestHeapProfilerStartStopIsRunning() { + // If you run this with whole-program heap-profiling on, than + // IsHeapProfilerRunning should return true. + if (!IsHeapProfilerRunning()) { + const char* tmpdir = getenv("TMPDIR"); + if (tmpdir == NULL) + tmpdir = "/tmp"; + mkdir(tmpdir, 0755); // if necessary + HeapProfilerStart((string(tmpdir) + "/start_stop").c_str()); + CHECK(IsHeapProfilerRunning()); + + Allocate(0, 40, 100); + Deallocate(0, 40); + + HeapProfilerStop(); + CHECK(!IsHeapProfilerRunning()); + } +} + int main(int argc, char** argv) { if (argc > 2 || (argc == 2 && argv[1][0] == '-')) { printf("USAGE: %s [number of children to fork]\n", argv[0]); @@ -80,6 +105,8 @@ int main(int argc, char** argv) { num_forks = atoi(argv[1]); } + TestHeapProfilerStartStopIsRunning(); + Allocate(0, 40, 100); Deallocate(0, 40); @@ -107,5 +134,7 @@ int main(int argc, char** argv) { } } + printf("DONE.\n"); + return 0; } diff --git a/src/tests/markidle_unittest.cc b/src/tests/markidle_unittest.cc index a4c6e7b..0d9178f 100644 --- a/src/tests/markidle_unittest.cc +++ b/src/tests/markidle_unittest.cc @@ -81,17 +81,19 @@ static size_t GetTotalThreadCacheSize() { // of per-thread memory. static void TestIdleUsage() { const size_t original = GetTotalThreadCacheSize(); - VLOG(0, "Original usage: %"PRIuS"\n", original); TestAllocation(); const size_t post_allocation = GetTotalThreadCacheSize(); - VLOG(0, "Post allocation: %"PRIuS"\n", post_allocation); CHECK_GT(post_allocation, original); MallocExtension::instance()->MarkThreadIdle(); const size_t post_idle = GetTotalThreadCacheSize(); - VLOG(0, "Post idle: %"PRIuS"\n", post_idle); CHECK_LE(post_idle, original); + + // Log after testing because logging can allocate heap memory. + VLOG(0, "Original usage: %"PRIuS"\n", original); + VLOG(0, "Post allocation: %"PRIuS"\n", post_allocation); + VLOG(0, "Post idle: %"PRIuS"\n", post_idle); } int main(int argc, char** argv) { diff --git a/src/tests/profiledata_unittest.cc b/src/tests/profiledata_unittest.cc index a49257d..679b9e2 100644 --- a/src/tests/profiledata_unittest.cc +++ b/src/tests/profiledata_unittest.cc @@ -64,6 +64,22 @@ using std::string; namespace { +// Re-runs fn until it doesn't cause EINTR. +#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) + +// Thin wrapper around a file descriptor so that the file descriptor +// gets closed for sure. +struct FileDescriptor { + const int fd_; + explicit FileDescriptor(int fd) : fd_(fd) {} + ~FileDescriptor() { + if (fd_ >= 0) { + NO_INTR(close(fd_)); + } + } + int get() { return fd_; } +}; + // must be the same as with ProfileData::Slot. typedef uintptr_t ProfileDataSlot; @@ -84,16 +100,27 @@ class ProfileDataChecker { string filename() const { return filename_; } void Check(const ProfileDataSlot* slots, int num_slots) { - size_t num_bytes = num_slots * sizeof slots[0]; - char* filedata = new char[num_bytes]; - - int fd = open(filename_.c_str(), O_RDONLY); - EXPECT_GE(fd, 0); - ssize_t numread = read(fd, filedata, num_bytes); - EXPECT_EQ(numread, num_bytes); - EXPECT_EQ(0, memcmp(slots, filedata, num_bytes)); - close(fd); + CheckWithSkips(slots, num_slots, NULL, 0); + } + void CheckWithSkips(const ProfileDataSlot* slots, int num_slots, + const int* skips, int num_skips) { + FileDescriptor fd(open(filename_.c_str(), O_RDONLY)); + CHECK_GE(fd.get(), 0); + + ProfileDataSlot* filedata = new ProfileDataSlot[num_slots]; + size_t expected_bytes = num_slots * sizeof filedata[0]; + ssize_t bytes_read = read(fd.get(), filedata, expected_bytes); + CHECK_EQ(expected_bytes, bytes_read); + + for (int i = 0; i < num_slots; i++) { + if (num_skips > 0 && *skips == i) { + num_skips--; + skips++; + continue; + } + CHECK_EQ(slots[i], filedata[i]); // "first mismatch at slot " << i; + } delete[] filedata; } @@ -129,6 +156,7 @@ class ProfileDataTest { // The tests to run void OpsWhenStopped(); void StartStopEmpty(); + void StartStopNoOptionsEmpty(); void StartWhenStarted(); void StartStopEmpty2(); void CollectOne(); @@ -192,13 +220,36 @@ TEST_F(ProfileDataTest, StartStopEmpty) { }; ExpectStopped(); - EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), frequency)); + ProfileData::Options options; + options.set_frequency(frequency); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options)); ExpectRunningSamples(0); collector_.Stop(); ExpectStopped(); checker_.Check(slots, arraysize(slots)); } +// Start and Stop with no options, collecting no samples. Verify +// output contents. +TEST_F(ProfileDataTest, StartStopNoOptionsEmpty) { + // We're not requesting a specific period, implementation can do + // whatever it likes. + ProfileDataSlot slots[] = { + 0, 3, 0, 0 /* skipped */, 0, // binary header + 0, 1, 0 // binary trailer + }; + int slots_to_skip[] = { 3 }; + + ExpectStopped(); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), + ProfileData::Options())); + ExpectRunningSamples(0); + collector_.Stop(); + ExpectStopped(); + checker_.CheckWithSkips(slots, arraysize(slots), + slots_to_skip, arraysize(slots_to_skip)); +} + // Start after already started. Should return false and not impact // collected data or state. TEST_F(ProfileDataTest, StartWhenStarted) { @@ -208,12 +259,15 @@ TEST_F(ProfileDataTest, StartWhenStarted) { 0, 1, 0 // binary trailer }; - EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), frequency)); + ProfileData::Options options; + options.set_frequency(frequency); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options)); ProfileData::State state_before; collector_.GetCurrentState(&state_before); - CHECK(!collector_.Start("foobar", frequency * 2)); + options.set_frequency(frequency * 2); + CHECK(!collector_.Start("foobar", options)); ProfileData::State state_after; collector_.GetCurrentState(&state_after); @@ -233,7 +287,9 @@ TEST_F(ProfileDataTest, StartStopEmpty2) { }; ExpectStopped(); - EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), frequency)); + ProfileData::Options options; + options.set_frequency(frequency); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options)); ExpectRunningSamples(0); collector_.Stop(); ExpectStopped(); @@ -249,7 +305,9 @@ TEST_F(ProfileDataTest, CollectOne) { }; ExpectStopped(); - EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), frequency)); + ProfileData::Options options; + options.set_frequency(frequency); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options)); ExpectRunningSamples(0); const void *trace[] = { V(100), V(101), V(102), V(103), V(104) }; @@ -270,7 +328,9 @@ TEST_F(ProfileDataTest, CollectTwoMatching) { }; ExpectStopped(); - EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), frequency)); + ProfileData::Options options; + options.set_frequency(frequency); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options)); ExpectRunningSamples(0); for (int i = 0; i < 2; ++i) { @@ -294,7 +354,9 @@ TEST_F(ProfileDataTest, CollectTwoFlush) { }; ExpectStopped(); - EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), frequency)); + ProfileData::Options options; + options.set_frequency(frequency); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options)); ExpectRunningSamples(0); const void *trace[] = { V(100), V(201), V(302), V(403), V(504) }; diff --git a/src/windows/port.h b/src/windows/port.h index 8ab5fb8..989de93 100644 --- a/src/windows/port.h +++ b/src/windows/port.h @@ -233,6 +233,12 @@ extern PERFTOOLS_DLL_DECL int getpagesize(); // in port.cc #define random rand #define sleep(t) Sleep(t * 1000) +#define strtoq _strtoi64 +#define strtouq _strtoui64 +#define strtoll _strtoi64 +#define strtoull _strtoui64 +#define atoll _atoi64 + #define __THROW throw() // ----------------------------------- TCMALLOC-SPECIFIC |