Tue May 31 08:14:38 2005 Google Inc. <opensource@google.com>

* google-perftools: version 0.2 release * Use mmap2() instead of mmap(), to map more memory (menage) * Do correct pthread-local checking in heap-checker! (maxim) * Avoid overflow on 64-bit machines in pprof (sanjay) * Add a few more GetPC() functions, including for AMD (csilvers) * Better method for overriding pthread functions (menage) * (Hacky) fix to avoid overwriting profile files after fork() (csilvers) * Crashing bugfix involving dumping heaps on small-stack threads (tudor) * Allow library versions with letters at the end (csilvers) * Config fixes for systems that don't define PATH_MAX (csilvers) * Confix fixes so we no longer need config.h after install (csilvers) * Fix to pprof to correctly read very big cpu profiles (csilvers) * Fix to pprof to deal with new commandline flags in modern gv's * Better error reporting when we can't access /proc/maps (etune) * Get rid of the libc-preallocate code (which could crash on some systems); no longer needed with local-threads fix (csilvers) git-svn-id: http://gperftools.googlecode.com/svn/trunk@11 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> 2007-03-22 03:28:56 +0000
committer: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> 2007-03-22 03:28:56 +0000
commit: 91fad389784766782263133c5510976a8f76d89e (patch)
tree: 4058058dc6bd6eb12bf72efc06c1d1ac11cd170b
parent: 51b4875f8ade3e0930eed2dc2a842ec607a94a2c (diff)
download: gperftools-91fad389784766782263133c5510976a8f76d89e.tar.gz
82 files changed, 6498 insertions, 1589 deletions
diff --git a/ChangeLog b/ChangeLog
index 2f3ea0a..d5f9176 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,26 @@
-Tue Feb 8 09:57:17 2005  El Goog <opensource@google.com>
+Tue Feb 8 09:57:17 2005  Google Inc. <opensource@google.com>
 
 	* google-perftools: initial release:
 	  The google-perftools package contains some utilities to improve
 	  and analyze the performance of C++ programs.  This includes an
 	  optimized thread-caching malloc() and cpu and heap profiling
 	  utilities.
+
+Tue May 31 08:14:38 2005  Google Inc. <opensource@google.com>
+
+	* google-perftools: version 0.2 release
+	* Use mmap2() instead of mmap(), to map more memory (menage)
+	* Do correct pthread-local checking in heap-checker! (maxim)
+	* Avoid overflow on 64-bit machines in pprof (sanjay)
+	* Add a few more GetPC() functions, including for AMD (csilvers)
+	* Better method for overriding pthread functions (menage)
+	* (Hacky) fix to avoid overwriting profile files after fork() (csilvers)
+	* Crashing bugfix involving dumping heaps on small-stack threads (tudor)
+	* Allow library versions with letters at the end (csilvers)
+	* Config fixes for systems that don't define PATH_MAX (csilvers)
+	* Confix fixes so we no longer need config.h after install (csilvers)
+	* Fix to pprof to correctly read very big cpu profiles (csilvers)
+	* Fix to pprof to deal with new commandline flags in modern gv's
+	* Better error reporting when we can't access /proc/maps (etune)
+	* Get rid of the libc-preallocate code (which could crash on some
+	  systems); no longer needed with local-threads fix (csilvers)
diff --git a/Makefile.am b/Makefile.am
index 530aa47..21ab0af 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -26,7 +26,7 @@ docdir = $(prefix)/doc/$(PACKAGE)-$(VERSION)
 # Add your documentation files (in doc/) in addition to these
 # top-level boilerplate files.  Also add a TODO file if you have one.
 # We'll add to this later, on a library-by-library basis
-dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README TODO
+dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README TODO 
 
 # The libraries (.so's) you want to install
 # We'll add to this later, on a library-by-library basis
@@ -46,12 +46,14 @@ noinst_SCRIPTS =
 
 ## vvvv RULES TO MAKE THE LIBRARIES, BINARIES, AND UNITTESTS
 
+dist_doc_DATA += doc/index.html
+
 ### ------- stack trace
 
 ### The header files we use.  We divide into categories based on directory
 S_STACKTRACE_INCLUDES =
 SG_STACKTRACE_INCLUDES = src/google/stacktrace.h
-SGP_STACKTRACE_INCLUDES = src/google/perftools/config.h
+SGP_STACKTRACE_INCLUDES = src/config.h
 STACKTRACE_INCLUDES = $(S_STACKTRACE_INCLUDES) $(SG_STACKTRACE_INCLUDES) $(SGP_STACKTRACE_INCLUDES)
 googleinclude_HEADERS += $(SG_STACKTRACE_INCLUDES)
 perftoolsinclude_HEADERS += $(SGP_STACKTRACE_INCLUDES)
@@ -75,55 +77,58 @@ stacktrace_unittest_LDADD = libstacktrace.la
 ### Documentation
 dist_doc_DATA += 
 
-### ------- tcmalloc (thread-caching malloc)
+### ------- tcmalloc_minimal (thread-caching malloc)
 
 ### The header files we use.  We divide into categories based on directory
-S_TCMALLOC_INCLUDES = src/internal_logging.h \
-                      src/system-alloc.h \
-                      src/internal_spinlock.h \
-                      src/base/commandlineflags.h \
-                      src/pagemap.h 
-SG_TCMALLOC_INCLUDES = src/google/malloc_hook.h \
-                       src/google/malloc_interface.h \
-                       src/google/stacktrace.h 
-SGP_TCMALLOC_INCLUDES = src/google/perftools/config.h \
-                        src/google/perftools/basictypes.h \
-                        src/google/perftools/hash_set.h
-TCMALLOC_INCLUDES = $(S_TCMALLOC_INCLUDES) $(SG_TCMALLOC_INCLUDES) $(SGP_TCMALLOC_INCLUDES)
-googleinclude_HEADERS += $(SG_TCMALLOC_INCLUDES)
-perftoolsinclude_HEADERS += $(SGP_TCMALLOC_INCLUDES)
+S_TCMALLOC_MINIMAL_INCLUDES = src/config.h \
+                              src/internal_logging.h \
+                              src/system-alloc.h \
+                              src/internal_spinlock.h \
+                              src/base/commandlineflags.h \
+                              src/base/basictypes.h \
+                              src/pagemap.h \
+                              src/maybe_threads.h
+SG_TCMALLOC_MINIMAL_INCLUDES = src/google/malloc_hook.h \
+                               src/google/malloc_extension.h \
+                               src/google/stacktrace.h 
+SGP_TCMALLOC_MINIMAL_INCLUDES = src/google/perftools/hash_set.h
+TCMALLOC_MINIMAL_INCLUDES = $(S_TCMALLOC_MINIMAL_INCLUDES) $(SG_TCMALLOC_MINIMAL_INCLUDES) $(SGP_TCMALLOC_MINIMAL_INCLUDES)
+googleinclude_HEADERS += $(SG_TCMALLOC_MINIMAL_INCLUDES)
+perftoolsinclude_HEADERS += $(SGP_TCMALLOC_MINIMAL_INCLUDES)
 
 ### Making the library
-lib_LTLIBRARIES += libtcmalloc.la
-libtcmalloc_la_SOURCES = src/internal_logging.cc \
-                         src/system-alloc.cc \
-                         src/tcmalloc.cc \
-                         src/malloc_hook.cc \
-                         src/malloc_interface.cc \
-                         $(TCMALLOC_INCLUDES)
-libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG
-TCMALLOC_SYMBOLS = '(malloc|free|realloc|calloc|cfree|memalign|valloc|pvalloc|posix_memalign|malloc_stats|MallocInterface|MallocHook)'
-libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS) -export-symbols-regex $(TCMALLOC_SYMBOLS)
-libtcmalloc_la_LIBADD = $(PTHREAD_LIBS) libstacktrace.la
+lib_LTLIBRARIES += libtcmalloc_minimal.la
+libtcmalloc_minimal_la_SOURCES = src/internal_logging.cc \
+                                 src/system-alloc.cc \
+                                 src/tcmalloc.cc \
+                                 src/malloc_hook.cc \
+                                 src/malloc_extension.cc \
+                                 src/maybe_threads.cc \
+                                 $(TCMALLOC_MINIMAL_INCLUDES)
+libtcmalloc_minimal_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG
+TCMALLOC_MINIMAL_SYMBOLS = '(malloc|free|realloc|calloc|cfree|memalign|valloc|pvalloc|posix_memalign|malloc_stats|MallocExtension|MallocHook)'
+libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) -export-symbols-regex $(TCMALLOC_MINIMAL_SYMBOLS)
+libtcmalloc_minimal_la_LIBADD = $(PTHREAD_LIBS) libstacktrace.la
 
 ### Unittests
 TESTS += malloc_unittest
-MALLOC_UNITEST_INCLUDES = src/google/malloc_interface.h \
+MALLOC_UNITEST_INCLUDES = src/config.h \
+                          src/google/malloc_extension.h \
                           src/google/malloc_hook.h \
-                          src/google/perftools/basictypes.h \
-                          src/google/perftools/config.h \
+                          src/base/basictypes.h \
                           src/google/perftools/hash_set.h \
-                          src/google/malloc_interface.h
+                          src/maybe_threads.h
 malloc_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
                           src/malloc_hook.cc \
-                          src/malloc_interface.cc \
+                          src/malloc_extension.cc \
+                          src/maybe_threads.cc \
                           $(MALLOC_UNITTEST_INCLUDES)
 malloc_unittest_CXXFLAGS = $(PTHREAD_CFLAGS)
 malloc_unittest_LDFLAGS = $(PTHREAD_CFLAGS)
 malloc_unittest_LDADD = $(PTHREAD_LIBS)
 
 TESTS += tcmalloc_unittest
-TCMALLOC_UNITTEST_INCLUDES = src/google/malloc_interface.h
+TCMALLOC_UNITTEST_INCLUDES = src/google/malloc_extension.h
 tcmalloc_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
                             $(TCMALLOC_UNITTEST_INCLUDES)
 tcmalloc_unittest_CXXFLAGS = $(PTHREAD_CFLAGS)
@@ -153,7 +158,29 @@ dist_doc_DATA += doc/tcmalloc.html \
                  doc/overview.gif \
                  doc/pageheap.gif \
                  doc/spanmap.gif \
-                 doc/threadheap.gif
+                 doc/threadheap.gif \
+                 doc/t-test1.times.txt \
+                 doc/tcmalloc-opspercpusec.vs.threads.1024.bytes.png 	\
+                 doc/tcmalloc-opspercpusec.vs.threads.128.bytes.png 	\
+                 doc/tcmalloc-opspercpusec.vs.threads.131072.bytes.png 	\
+                 doc/tcmalloc-opspercpusec.vs.threads.16384.bytes.png 	\
+                 doc/tcmalloc-opspercpusec.vs.threads.2048.bytes.png 	\
+                 doc/tcmalloc-opspercpusec.vs.threads.256.bytes.png 	\
+                 doc/tcmalloc-opspercpusec.vs.threads.32768.bytes.png 	\
+                 doc/tcmalloc-opspercpusec.vs.threads.4096.bytes.png 	\
+                 doc/tcmalloc-opspercpusec.vs.threads.512.bytes.png 	\
+                 doc/tcmalloc-opspercpusec.vs.threads.64.bytes.png 	\
+                 doc/tcmalloc-opspercpusec.vs.threads.65536.bytes.png 	\
+                 doc/tcmalloc-opspercpusec.vs.threads.8192.bytes.png 	\
+                 doc/tcmalloc-opspersec.vs.size.1.threads.png 		\
+                 doc/tcmalloc-opspersec.vs.size.12.threads.png 		\
+                 doc/tcmalloc-opspersec.vs.size.16.threads.png 		\
+                 doc/tcmalloc-opspersec.vs.size.2.threads.png 		\
+                 doc/tcmalloc-opspersec.vs.size.20.threads.png 		\
+                 doc/tcmalloc-opspersec.vs.size.3.threads.png 		\
+                 doc/tcmalloc-opspersec.vs.size.4.threads.png 		\
+                 doc/tcmalloc-opspersec.vs.size.5.threads.png 		\
+                 doc/tcmalloc-opspersec.vs.size.8.threads.png 
 
 # I don't know how to say "distribute the .dot files but don't install them";
 # noinst doesn't seem to work with data.  I separate them out anyway, in case
@@ -164,15 +191,120 @@ dist_doc_DATA += doc/overview.dot \
                  doc/spanmap.dot \
                  doc/threadheap.dot
 
+### ------- tcmalloc (thread-caching malloc + heap profiler + heap checker)
+
+### The header files we use.  We divide into categories based on directory
+S_TCMALLOC_INCLUDES = src/config.h \
+                      src/internal_logging.h \
+                      src/system-alloc.h \
+                      src/internal_spinlock.h \
+                      src/pagemap.h \
+                      src/heap-profiler-inl.h \
+                      src/addressmap-inl.h \
+                      src/base/basictypes.h \
+                      src/base/commandlineflags.h \
+                      src/base/logging.h \
+                      src/base/googleinit.h \
+                      src/base/elfcore.h \
+                      src/base/linuxthreads.h \
+                      src/base/thread_lister.h \
+                      src/maybe_threads.h
+SG_TCMALLOC_INCLUDES = src/google/malloc_hook.h \
+                       src/google/malloc_extension.h \
+                       src/google/heap-profiler.h \
+                       src/google/heap-checker.h \
+                       src/google/stacktrace.h 
+SGP_TCMALLOC_INCLUDES = src/google/perftools/hash_set.h
+TCMALLOC_INCLUDES = $(S_TCMALLOC_INCLUDES) $(SG_TCMALLOC_INCLUDES) $(SGP_TCMALLOC_INCLUDES)
+googleinclude_HEADERS += $(SG_TCMALLOC_INCLUDES)
+perftoolsinclude_HEADERS += $(SGP_TCMALLOC_INCLUDES)
+
+### Making the library
+lib_LTLIBRARIES += libtcmalloc.la
+libtcmalloc_la_SOURCES = src/internal_logging.cc \
+                         src/system-alloc.cc \
+                         src/tcmalloc.cc \
+                         src/malloc_hook.cc \
+                         src/malloc_extension.cc \
+                         src/maybe_threads.cc \
+                         src/heap-profiler.cc \
+                         src/heap-checker.cc \
+                         src/heap-checker-bcad.cc \
+                         src/base/elfcore.c \
+                         src/base/linuxthreads.c \
+                         src/base/thread_lister.c \
+                         $(TCMALLOC_INCLUDES)
+libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG
+TCMALLOC_SYMBOLS = '(malloc|free|realloc|calloc|cfree|memalign|valloc|pvalloc|posix_memalign|malloc_stats|MallocExtension|MallocHook|HeapProfilerStart|HeapProfilerStop|HeapProfilerDump|GetHeapProfile|HeapCleaner|HeapLeakChecker)'
+libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS) -export-symbols-regex $(TCMALLOC_SYMBOLS)
+libtcmalloc_la_LIBADD = $(PTHREAD_LIBS) libstacktrace.la
+
+### Unittests
+TESTS += addressmap_unittest
+ADDRESSMAP_UNITTEST_INCLUDES = src/addressmap-inl.h \
+                               src/base/logging.h \
+                               src/base/commandlineflags.h
+addressmap_unittest_SOURCES = src/tests/addressmap_unittest.cc \
+                              $(ADDRESSMAP_UNITTEST_INCLUDES)
+addressmap_unittest_CXXFLAGS = -g
+
+check_SCRIPTS += heap-profiler_unittest_sh
+noinst_SCRIPTS += src/tests/heap-profiler_unittest.sh
+
+# These are sub-programs used by heap-profiler_unittest.sh
+HEAP_PROFILER_UNITTESTS = heap-profiler_unittest
+heap-profiler_unittest_sh: $(HEAP_PROFILER_UNITTESTS)
+	$(top_srcdir)/src/tests/heap-profiler_unittest.sh . $(top_srcdir)/src
+
+HEAP_PROFILER_UNITTEST_INCLUDES = src/google/heap-profiler.h
+heap_profiler_unittest_SOURCES = src/tests/heap-profiler_unittest.cc \
+                                 $(HEAP_PROFILER_UNITTEST_INCLUDES)
+heap_profiler_unittest_CXXFLAGS = -g
+heap_profiler_unittest_LDFLAGS = -g $(PTHREAD_CFLAGS)
+heap_profiler_unittest_LDADD = libtcmalloc.la $(PTHREAD_LIBS)
+
+check_SCRIPTS += heap_checker_unittest_sh
+noinst_SCRIPTS += src/tests/heap-checker_unittest.sh
+
+# These are sub-programs used by heap-checker_unittest.sh
+HEAP_CHECKER_UNITTESTS = heap-checker_unittest
+heap_checker_unittest_sh: $(HEAP_CHECKER_UNITTESTS)
+	$(top_srcdir)/src/tests/heap-checker_unittest.sh . $(top_srcdir)/src
+
+HEAP_CHECKER_UNITTEST_INCLUDES = src/config.h \
+                                 src/base/logging.h \
+                                 src/base/googleinit.h \
+                                 src/google/heap-profiler.h \
+                                 src/google/heap-checker.h
+heap_checker_unittest_SOURCES = src/tests/heap-checker_unittest.cc \
+                                $(HEAP_CHECKER_UNITTEST_INCLUDES)
+heap_checker_unittest_CXXFLAGS = -g $(PTHREAD_CFLAGS)
+heap_checker_unittest_LDFLAGS = -g $(PTHREAD_CFLAGS)
+# tcmalloc has to be specified last!
+heap_checker_unittest_LDADD =  $(PTHREAD_LIBS) -ltcmalloc
+
+check_SCRIPTS += heap-checker-death_unittest
+noinst_SCRIPTS += src/tests/heap-checker-death_unittest.sh
+
+heap-checker-death_unittest:
+	PPROF_PATH=$(top_srcdir)/src/pprof sh $(top_srcdir)/src/tests/heap-checker-death_unittest.sh
+
+### Documentation (above and beyond tcmalloc_minimal documentation)
+dist_doc_DATA += doc/heap_profiler.html \
+                 doc/heap-example1.png \
+                 doc/heap_checker.html
+
+
 ### ------- CPU profiler
 
 ### The header files we use.  We divide into categories based on directory
-S_CPU_PROFILER_INCLUDES = src/base/commandlineflags.h \
+S_CPU_PROFILER_INCLUDES = src/config.h \
+                          src/base/commandlineflags.h \
                           src/base/googleinit.h \
                           src/base/logging.h
 SG_CPU_PROFILER_INCLUDES = src/google/profiler.h \
                            src/google/stacktrace.h
-SGP_CPU_PROFILER_INCLUDES = src/google/perftools/config.h
+SGP_CPU_PROFILER_INCLUDES = 
 CPU_PROFILER_INCLUDES = $(S_CPU_PROFILER_INCLUDES) $(SG_CPU_PROFILER_INCLUDES) $(SGP_CPU_PROFILER_INCLUDES)
 googleinclude_HEADERS += $(SG_CPU_PROFILER_INCLUDES)
 perftoolsinclude_HEADERS += $(SGP_CPU_PROFILER_INCLUDES)
@@ -186,16 +318,16 @@ CPU_PROFILER_SYMBOLS = '(ProfilerStart|ProfilerStop|ProfilerEnable|ProfilerDisab
 libprofiler_la_LDFLAGS = -export-symbols-regex $(CPU_PROFILER_SYMBOLS)
 
 ### Unittests
-check_SCRIPTS += profiler_unittest
+check_SCRIPTS += profiler_unittest_sh
 noinst_SCRIPTS += src/tests/profiler_unittest.sh
 
 # These are sub-programs used by profiler_unittest.sh
 PROFILER_UNITTESTS = profiler1_unittest profiler2_unittest profiler3_unittest \
                      profiler4_unittest
-profiler_unittest: $(PROFILER_UNITTESTS)
+profiler_unittest_sh: $(PROFILER_UNITTESTS)
 	$(top_srcdir)/src/tests/profiler_unittest.sh . $(top_srcdir)/src
 
-PROFILER_UNITTEST_INCLUDES = src/google/perftools/config.h \
+PROFILER_UNITTEST_INCLUDES = src/config.h \
                             src/google/profiler.h
 PROFILER_UNITTEST_SRCS = src/tests/profiler_unittest.cc \
                         $(PROFILER_UNITTEST_INCLUDES)
@@ -222,112 +354,14 @@ dist_doc_DATA += doc/cpu_profiler.html \
                  doc/pprof-vsnprintf-big.gif \
                  doc/pprof-vsnprintf.gif
 
-
-### ------- Heap profiler
-
-### The header files we use.  We divide into categories based on directory
-S_HEAP_PROFILER_INCLUDES = src/heap-profiler-inl.h \
-                           src/internal_spinlock.h \
-                           src/addressmap-inl.h \
-                           src/base/commandlineflags.h \
-                           src/base/logging.h \
-                           src/base/googleinit.h
-SG_HEAP_PROFILER_INCLUDES = src/google/malloc_hook.h \
-                            src/google/heap-profiler.h \
-                            src/google/stacktrace.h 
-SGP_HEAP_PROFILER_INCLUDES = src/google/perftools/config.h \
-                             src/google/perftools/basictypes.h \
-                             src/google/perftools/hash_set.h
-HEAP_PROFILER_INCLUDES = $(S_HEAP_PROFILER_INCLUDES) $(SG_HEAP_PROFILER_INCLUDES) $(SGP_HEAP_PROFILER_INCLUDES)
-googleinclude_HEADERS += $(SG_HEAP_PROFILER_INCLUDES)
-perftoolsinclude_HEADERS += $(SGP_HEAP_PROFILER_INCLUDES)
-
-### Making the library
-lib_LTLIBRARIES += libheapprofiler.la
-libheapprofiler_la_SOURCES = src/heap-profiler.cc \
-                             src/heap-checker-bcad.cc \
-                             $(HEAP_PROFILER_INCLUDES)
-# We only need pthreads because we're linking with tcmalloc
-libheapprofiler_la_CXXFLAGS = $(PTHREAD_CFLAGS)
-# TODO: The lowercased symbols in this list are member variables of
-# HeapProfiler that the HeapLeakChecker requires access to in order to do its
-# job.  We should refactor the HeapProfiler so that these are proper accessors
-# (probably protected ones)
-HEAP_PROFILER_SYMBOLS = '(HeapProfilerStart|HeapProfilerStop|HeapProfilerDump|GetHeapProfile|filename_prefix_|is_on_|dumping_|temp_disable_|dump_for_leaks_|profile_)'
-libheapprofiler_la_LDFLAGS = $(PTHREAD_CFLAGS) -export-symbols-regex $(HEAP_PROFILER_SYMBOLS)
-# We may as well depend of libtcmalloc since it is part of the same package,
-# but it is worth noting that it would be easy to break this dependency by
-# depending on a malloc_normal.cc that overrides the system malloc & friends
-# with a version that has hooks into malloc_hook.cc.
-libheapprofiler_la_LIBADD = libtcmalloc.la $(PTHREAD_LIBS)
-
-### Unittests
-TESTS += addressmap_unittest
-ADDRESSMAP_UNITTEST_INCLUDES = src/addressmap-inl.h \
-                               src/base/logging.h \
-                               src/base/commandlineflags.h
-addressmap_unittest_SOURCES = src/tests/addressmap_unittest.cc \
-                              $(ADDRESSMAP_UNITTEST_INCLUDES)
-addressmap_unittest_CXXFLAGS = -g
-
-### Documentation
-dist_doc_DATA += doc/heap_profiler.html \
-                 doc/heap-example1.png
-
-### ------- Heap checker
-
-### The header files we use.  We divide into categories based on directory
-S_HEAP_CHECKER_INCLUDES = src/heap-profiler-inl.h \
-                          src/base/commandlineflags.h \
-                          src/base/logging.h
-SG_HEAP_CHECKER_INCLUDES = src/google/heap-profiler.h \
-                           src/google/heap-checker.h \
-                           src/google/stacktrace.h 
-SGP_HEAP_CHECKER_INCLUDES = src/google/perftools/config.h
-HEAP_CHECKER_INCLUDES = $(S_HEAP_CHECKER_INCLUDES) $(SG_HEAP_CHECKER_INCLUDES) $(SGP_HEAP_CHECKER_INCLUDES)
-googleinclude_HEADERS += $(SG_HEAP_CHECKER_INCLUDES)
-perftoolsinclude_HEADERS += $(SGP_HEAP_CHECKER_INCLUDES)
-
-### Making the library
-lib_LTLIBRARIES += libheapchecker.la
-libheapchecker_la_SOURCES = src/heap-checker.cc \
-                            src/heap-checker-bcad.cc \
-                            $(HEAP_CHECKER_INCLUDES)
-libheapchecker_la_CXXFLAGS = $(PTHREAD_CFLAGS)
-HEAP_CHECKER_SYMBOLS = '(HeapCleaner|HeapLeakChecker)'
-libheapchecker_la_LDFLAGS = $(PTHREAD_CFLAGS) -export-symbols-regex $(HEAP_CHECKER_SYMBOLS)
-libheapchecker_la_LIBADD = libheapprofiler.la $(PTHREAD_LIBS)
-
-### Unittests
-TESTS += heap-checker_unittest
-TESTS_ENVIRONMENT += PPROF_PATH=$(top_srcdir)/src/pprof
-HEAP_CHECKER_UNITTEST_INCLUDES = src/google/perftools/config.h \
-                                 src/base/logging.h \
-                                 src/base/googleinit.h \
-                                 src/google/heap-profiler.h \
-                                 src/google/heap-checker.h
-heap_checker_unittest_SOURCES = src/tests/heap-checker_unittest.cc \
-                                $(HEAP_CHECKER_UNITTEST_INCLUDES)
-heap_checker_unittest_CXXFLAGS = -g $(PTHREAD_CFLAGS)
-heap_checker_unittest_LDFLAGS = -g $(PTHREAD_CFLAGS)
-heap_checker_unittest_LDADD = libheapchecker.la $(PTHREAD_LIBS)
-
-check_SCRIPTS += heap-checker-death_unittest
-noinst_SCRIPTS += src/tests/heap-checker-death_unittest.sh
-heap-checker-death_unittest:
-	PPROF_PATH=$(top_srcdir)/src/pprof sh $(top_srcdir)/src/tests/heap-checker-death_unittest.sh
-
-### Documentation
-# TODO
-dist_doc_DATA +=
-
 ## ^^^^ END OF RULES TO MAKE YOUR LIBRARIES, BINARIES, AND UNITTESTS
 
 
 # This should always include $(TESTS), but may also include other
 # binaries that you compile but don't want automatically installed.
 # We'll add to this later, on a library-by-library basis
-noinst_PROGRAMS = $(TESTS) $(PROFILER_UNITTESTS)
+noinst_PROGRAMS = $(TESTS) $(PROFILER_UNITTESTS) $(HEAP_PROFILER_UNITTESTS) \
+                  $(HEAP_CHECKER_UNITTESTS)
 bin_SCRIPTS = src/pprof
 
 rpm: dist-gzip packages/rpm.sh packages/rpm/rpm.spec
diff --git a/README b/README
index 2ba6079..ea7edea 100644
--- a/README
+++ b/README
@@ -16,32 +16,87 @@ There are other environment variables, besides CPUPROFILE, you can set
 to adjust the cpu-profiler behavior; cf "ENVIRONMENT VARIABLES" below.
 
 
+TCMALLOC
+--------
+Just link in -ltcmalloc to get the advantages of tcmalloc.  See below
+for some environment variables you can use with tcmalloc, as well.
+
+
+HEAP PROFILER
+-------------
+See doc/heap-profiler.html for information about how to use tcmalloc's
+heap profiler and analyze its output.
+
+As a quick-start, do the following after installing this package:
+
+1) Link your executable with -ltcmalloc
+2) Run your executable with the HEAPPROFILE environment var set:
+     $ HEAPROFILE=/tmp/heapprof <path/to/binary> [binary args]
+3) Run pprof to analyze the heap usage
+     $ pprof <path/to/binary> /tmp/heapprof.0045.heap  # run 'ls' to see options
+     $ pprof --gv <path/to/binary> /tmp/heapprof.0045.heap
+
+You can also use LD_PRELOAD to heap-profile an executable that you
+didn't compile.
+
+There are other environment variables, besides HEAPPROFILE, you can
+set to adjust the heap-profiler behavior; cf "ENVIRONMENT VARIABLES"
+below.
+
+
 HEAP CHECKER
 ------------
-In order to catch all heap leaks, this library must be linked *last*
-into your executable.  It will not find leaks in libraries listed
-after it on the link line.
+See doc/heap-checker.html for information about how to use tcmalloc's
+heap checker.
 
+In order to catch all heap leaks, tcmalloc must be linked *last* into
+your executable.  The heap checker may mischaracterize some memory
+accesses in libraries listed after it on the link line.  For instance,
+it may report these libraries as leaking memory when they're not.
+(See the source code for more details.)
 
-TCMALLOC
---------
-Just link this in to get the advantages of tcmalloc.  See below for
-some environment variables you can use with tcmalloc, as well.
+Here's a quick-start for how to use:
+
+As a quick-start, do the following after installing this package:
+
+1) Link your executable with -ltcmalloc
+2) Run your executable with the HEAPCHECK environment var set:
+     $ HEAPCHECK=1 <path/to/binary> [binary args]
+
+Other values for HEAPCHECK: normal (equivalent to "1"), strict, draconian
+
+You can also use LD_PRELOAD to heap-check an executable that you
+didn't compile.
+
+IMPORTANT NOTE: pthreads handling is currently incomplete.  Heap leak
+checks will fail with bogus leaks if there are pthreads live at
+construction or leak checking time.  One solution, for global
+heap-checking, is to make sure all threads but the main thread have
+exited at program-end time.  We hope (as of March 2005) to have a fix
+soon.
 
 
 ENVIRONMENT VARIABLES
 ---------------------
-These libraries were written to be able to be linked into your
-applications all the time.  They'll lie dormant, using no memory or
-CPU, until you turn them on.  The easiest way to turn them on is by
-setting the appropriate environment variables.  We have many variables
-that let you enable/disable features as well as tweak parameters.
+The cpu profiler, heap checker, and heap profiler will lie dormant,
+using no memory or CPU, until you turn them on.  (Thus, there's no
+harm in linking -lprofiler into every application, and also -ltcmalloc
+assuming you're ok using the non-libc malloc library.)
+
+The easiest way to turn them on is by setting the appropriate
+environment variables.  We have several variables that let you
+enable/disable features as well as tweak parameters.
 
 CPUPROFILE=<file> -- turns on cpu profiling and dumps data to this file.
 PROFILESELECTED=1 -- if set, cpu-profiler will only profile regions of code
                      surrounded with ProfilerEnable()/ProfilerDisable().
-FREQUENCY         -- how may interrupts/second the cpu-profiler samples.
+PROFILEFREQUENCY=x-- how many interrupts/second the cpu-profiler samples.
+
+HEAPPROFILE=<pre> -- turns on heap profiling and dumps data using this prefix
+HEAPCHECK=<type>  -- turns on heap checking with strictness 'type'
 
 TCMALLOC_DEBUG=<level> -- the higher level, the more messages malloc emits
 MALLOCSTATS=<level>    -- prints memory-use stats at program-exit
 
+---
+16 March 2005
diff --git a/TODO b/TODO
index 3bbd885..929f802 100644
--- a/TODO
+++ b/TODO
@@ -1,16 +1,20 @@
 HEAP PROFILER
 
-1) Fix heap profiling under STL
+1) Fix heap profiling under all STLs
    * Find out how to force non-glibc STL libraries to call new() and
      delete() for every allocation / deallocation.
    * Make heap profiler ignore STL-internal allocations for those
      libraries under which we cannot profile accurately, so we only
      see object-level leaks.
-2) Remove dependency on tcmalloc
+2) Remove dependency on tcmalloc?
 3) Port to non-linux O/Ses (right now code uses /proc for library info)
 4) Port to non-x86 architectures (locking code in internal_spinlock is
    x86-specific)
 5) Port to C?
+6) Figure out how to get setenv() to work properly before main() in
+   shared libaries, and get rid of the profile-naming hack once we
+   do.  (See HeapProfiler::Init().)
+
 
 HEAP CHECKER
 
@@ -18,18 +22,27 @@ HEAP CHECKER
 2) Remove requirement that the heap-checker must be linked last into
    an application (hard! -- it needs its global constructor to run
    first)
+3) Improve heap_checker.html documentation.
 
 TCMALLOC
 
 1) Implement mallinfo/mallopt
 2) Have tcmalloc work correctly when libpthread is not linked in
+   (currently working for glibc, could use other libc's too)
 3) Return memory to the system when requirements drop
 4) Explore coloring allocated objects to avoid cache conflicts
 5) Explore biasing reclamation to larger addresses
 
+CPU PROFILER
+
+1) Figure out how to get setenv() to work properly before main() in
+   shared libaries(), and get rid of the profile-naming hack once we
+   do.  (See ProfileData::ProfileData().)
+
 STACKTRACE
 
 1) Document and advertise libstacktrace
 2) Remove dependency on linux/x86
 
-28 February 2005
+---
+20 May 2005
diff --git a/configure b/configure
index 1c02c17..13dc54a 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.57 for google-perftools 0.1.
+# Generated by GNU Autoconf 2.57 for google-perftools 0.2.
 #
 # Report bugs to <opensource@google.com>.
 #
@@ -422,8 +422,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='google-perftools'
 PACKAGE_TARNAME='google-perftools'
-PACKAGE_VERSION='0.1'
-PACKAGE_STRING='google-perftools 0.1'
+PACKAGE_VERSION='0.2'
+PACKAGE_STRING='google-perftools 0.2'
 PACKAGE_BUGREPORT='opensource@google.com'
 
 ac_unique_file="README"
@@ -953,7 +953,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures google-perftools 0.1 to adapt to many kinds of systems.
+\`configure' configures google-perftools 0.2 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1019,7 +1019,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of google-perftools 0.1:";;
+     short | recursive ) echo "Configuration of google-perftools 0.2:";;
    esac
   cat <<\_ACEOF
 
@@ -1125,7 +1125,7 @@ fi
 test -n "$ac_init_help" && exit 0
 if $ac_init_version; then
   cat <<\_ACEOF
-google-perftools configure 0.1
+google-perftools configure 0.2
 generated by GNU Autoconf 2.57
 
 Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002
@@ -1140,7 +1140,7 @@ cat >&5 <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by google-perftools $as_me 0.1, which was
+It was created by google-perftools $as_me 0.2, which was
 generated by GNU Autoconf 2.57.  Invocation command line was
 
   $ $0 $@
@@ -1733,7 +1733,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE=google-perftools
- VERSION=0.1
+ VERSION=0.2
 
 
 cat >>confdefs.h <<_ACEOF
@@ -1861,7 +1861,7 @@ INSTALL_STRIP_PROGRAM="\${SHELL} \$(install_sh) -c -s"
 
 # Add the stamp file to the list of files AC keeps track of,
 # along with our hook.
-          ac_config_headers="$ac_config_headers src/google/perftools/config.h"
+          ac_config_headers="$ac_config_headers src/config.h"
 
 
 
@@ -19388,7 +19388,7 @@ _ACEOF
 fi
 
 done
-    # for stacktrace?
+    # for stacktrace? and heapchecker_unittest
 
 for ac_header in conflict-signal.h
 do
@@ -19531,6 +19531,432 @@ fi
 
 done
       # defined on some windows platforms
+
+for ac_header in linux/ptrace.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+  echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
+else
+  # Is the header compilable?
+echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+         { ac_try='test -s conftest.$ac_objext'
+  { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_header_compiler=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6
+
+# Is the header present?
+echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+  (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+rm -f conftest.err conftest.$ac_ext
+echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc in
+  yes:no )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    (
+      cat <<\_ASBOX
+## ------------------------------------ ##
+## Report this to bug-autoconf@gnu.org. ##
+## ------------------------------------ ##
+_ASBOX
+    ) |
+      sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+  no:yes )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    (
+      cat <<\_ASBOX
+## ------------------------------------ ##
+## Report this to bug-autoconf@gnu.org. ##
+## ------------------------------------ ##
+_ASBOX
+    ) |
+      sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=$ac_header_preproc"
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+for ac_header in syscall.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+  echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
+else
+  # Is the header compilable?
+echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+         { ac_try='test -s conftest.$ac_objext'
+  { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_header_compiler=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6
+
+# Is the header present?
+echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+  (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+rm -f conftest.err conftest.$ac_ext
+echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc in
+  yes:no )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    (
+      cat <<\_ASBOX
+## ------------------------------------ ##
+## Report this to bug-autoconf@gnu.org. ##
+## ------------------------------------ ##
+_ASBOX
+    ) |
+      sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+  no:yes )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    (
+      cat <<\_ASBOX
+## ------------------------------------ ##
+## Report this to bug-autoconf@gnu.org. ##
+## ------------------------------------ ##
+_ASBOX
+    ) |
+      sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=$ac_header_preproc"
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+for ac_header in grp.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+  echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
+else
+  # Is the header compilable?
+echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+         { ac_try='test -s conftest.$ac_objext'
+  { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_header_compiler=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6
+
+# Is the header present?
+echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+  (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+rm -f conftest.err conftest.$ac_ext
+echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc in
+  yes:no )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    (
+      cat <<\_ASBOX
+## ------------------------------------ ##
+## Report this to bug-autoconf@gnu.org. ##
+## ------------------------------------ ##
+_ASBOX
+    ) |
+      sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+  no:yes )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    (
+      cat <<\_ASBOX
+## ------------------------------------ ##
+## Report this to bug-autoconf@gnu.org. ##
+## ------------------------------------ ##
+_ASBOX
+    ) |
+      sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=$ac_header_preproc"
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+         # for heapchecker_unittest
 echo "$as_me:$LINENO: checking for struct sigcontext.sc_eip" >&5
 echo $ECHO_N "checking for struct sigcontext.sc_eip... $ECHO_C" >&6
 if test "${ac_cv_member_struct_sigcontext_sc_eip+set}" = set; then
@@ -19624,11 +20050,9 @@ _ACEOF
 
 
 fi
-echo "$as_me:$LINENO: checking for # for the cpu-profiler
-                  struct ucontext.uc_mcontext" >&5
-echo $ECHO_N "checking for # for the cpu-profiler
-                  struct ucontext.uc_mcontext... $ECHO_C" >&6
-if test "${ac_cv_member___for_the_cpu_profiler___________________struct_ucontext_uc_mcontext+set}" = set; then
+echo "$as_me:$LINENO: checking for struct ucontext.uc_mcontext" >&5
+echo $ECHO_N "checking for struct ucontext.uc_mcontext... $ECHO_C" >&6
+if test "${ac_cv_member_struct_ucontext_uc_mcontext+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
   cat >conftest.$ac_ext <<_ACEOF
@@ -19643,8 +20067,7 @@ cat >>conftest.$ac_ext <<_ACEOF
 int
 main ()
 {
-static # for the cpu-profiler
-                  struct ucontext ac_aggr;
+static struct ucontext ac_aggr;
 if (ac_aggr.uc_mcontext)
 return 0;
   ;
@@ -19663,7 +20086,7 @@ if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  ac_cv_member___for_the_cpu_profiler___________________struct_ucontext_uc_mcontext=yes
+  ac_cv_member_struct_ucontext_uc_mcontext=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
@@ -19680,8 +20103,7 @@ cat >>conftest.$ac_ext <<_ACEOF
 int
 main ()
 {
-static # for the cpu-profiler
-                  struct ucontext ac_aggr;
+static struct ucontext ac_aggr;
 if (sizeof ac_aggr.uc_mcontext)
 return 0;
   ;
@@ -19700,23 +20122,23 @@ if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  ac_cv_member___for_the_cpu_profiler___________________struct_ucontext_uc_mcontext=yes
+  ac_cv_member_struct_ucontext_uc_mcontext=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-ac_cv_member___for_the_cpu_profiler___________________struct_ucontext_uc_mcontext=no
+ac_cv_member_struct_ucontext_uc_mcontext=no
 fi
 rm -f conftest.$ac_objext conftest.$ac_ext
 fi
 rm -f conftest.$ac_objext conftest.$ac_ext
 fi
-echo "$as_me:$LINENO: result: $ac_cv_member___for_the_cpu_profiler___________________struct_ucontext_uc_mcontext" >&5
-echo "${ECHO_T}$ac_cv_member___for_the_cpu_profiler___________________struct_ucontext_uc_mcontext" >&6
-if test $ac_cv_member___for_the_cpu_profiler___________________struct_ucontext_uc_mcontext = yes; then
+echo "$as_me:$LINENO: result: $ac_cv_member_struct_ucontext_uc_mcontext" >&5
+echo "${ECHO_T}$ac_cv_member_struct_ucontext_uc_mcontext" >&6
+if test $ac_cv_member_struct_ucontext_uc_mcontext = yes; then
 
 cat >>confdefs.h <<_ACEOF
-#define HAVE___FOR_THE_CPU_PROFILER___________________STRUCT_UCONTEXT_UC_MCONTEXT 1
+#define HAVE_STRUCT_UCONTEXT_UC_MCONTEXT 1
 _ACEOF
 
 
@@ -19814,6 +20236,99 @@ _ACEOF
 
 
 fi
+echo "$as_me:$LINENO: checking for struct sigcontext.rip" >&5
+echo $ECHO_N "checking for struct sigcontext.rip... $ECHO_C" >&6
+if test "${ac_cv_member_struct_sigcontext_rip+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <signal.h>
+
+int
+main ()
+{
+static struct sigcontext ac_aggr;
+if (ac_aggr.rip)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+         { ac_try='test -s conftest.$ac_objext'
+  { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_member_struct_sigcontext_rip=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <signal.h>
+
+int
+main ()
+{
+static struct sigcontext ac_aggr;
+if (sizeof ac_aggr.rip)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+         { ac_try='test -s conftest.$ac_objext'
+  { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_member_struct_sigcontext_rip=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_cv_member_struct_sigcontext_rip=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_member_struct_sigcontext_rip" >&5
+echo "${ECHO_T}$ac_cv_member_struct_sigcontext_rip" >&6
+if test $ac_cv_member_struct_sigcontext_rip = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_STRUCT_SIGCONTEXT_RIP 1
+_ACEOF
+
+
+fi
 echo "$as_me:$LINENO: checking for struct sigcontext.sc_ip" >&5
 echo $ECHO_N "checking for struct sigcontext.sc_ip... $ECHO_C" >&6
 if test "${ac_cv_member_struct_sigcontext_sc_ip+set}" = set; then
@@ -20126,6 +20641,53 @@ _ACEOF
 
 
 
+# Check if __builtin_stack_pointer() is available (for elfcore.h)
+echo "$as_me:$LINENO: checking for __builtin_stack_pointer()" >&5
+echo $ECHO_N "checking for __builtin_stack_pointer()... $ECHO_C" >&6
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+void *sp = __builtin_stack_pointer();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+         { ac_try='test -s conftest$ac_exeext'
+  { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_BUILTIN_STACK_POINTER 1
+_ACEOF
+
+                echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
+
 # A lot of the code in this directory depends on pthreads
 
 
@@ -21453,7 +22015,7 @@ _ASBOX
 } >&5
 cat >&5 <<_CSEOF
 
-This file was extended by google-perftools $as_me 0.1, which was
+This file was extended by google-perftools $as_me 0.2, which was
 generated by GNU Autoconf 2.57.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -21516,7 +22078,7 @@ _ACEOF
 
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-google-perftools config.status 0.1
+google-perftools config.status 0.2
 configured by $0, generated by GNU Autoconf 2.57,
   with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
 
@@ -21629,7 +22191,7 @@ do
   # Handling of arguments.
   "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
   "depfiles" ) CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;;
-  "src/google/perftools/config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS src/google/perftools/config.h" ;;
+  "src/config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS src/config.h" ;;
   *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
 echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
    { (exit 1); exit 1; }; };;
@@ -22205,8 +22767,8 @@ echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
   fi
   # Run the commands associated with the file.
   case $ac_file in
-    src/google/perftools/config.h ) # update the timestamp
-echo 'timestamp for src/google/perftools/config.h' >"src/google/perftools/stamp-h1"
+    src/config.h ) # update the timestamp
+echo 'timestamp for src/config.h' >"src/stamp-h1"
  ;;
   esac
 done
diff --git a/configure.ac b/configure.ac
index 875336d..0226ef5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -5,12 +5,12 @@
 # make sure we're interpreted by some minimal autoconf
 AC_PREREQ(2.57)
 
-AC_INIT(google-perftools, 0.1, opensource@google.com)
+AC_INIT(google-perftools, 0.2, opensource@google.com)
 # The argument here is just something that should be in the current directory
 # (for sanity checking)
 AC_CONFIG_SRCDIR(README)
 AM_INIT_AUTOMAKE
-AM_CONFIG_HEADER(src/google/perftools/config.h)
+AM_CONFIG_HEADER(src/config.h)
 
 # Checks for programs.
 AC_PROG_CC
@@ -31,11 +31,15 @@ AC_CHECK_TYPES([__int64])       # defined in some windows platforms
 AC_CHECK_FUNCS(sbrk)            # for tcmalloc to get memory
 AC_CHECK_FUNCS(munmap)
 AC_FUNC_MMAP
-AC_CHECK_HEADERS(execinfo.h)    # for stacktrace?
+AC_CHECK_HEADERS(execinfo.h)    # for stacktrace? and heapchecker_unittest
 AC_CHECK_HEADERS(conflict-signal.h)      # defined on some windows platforms
-AC_CHECK_MEMBERS([struct sigcontext.sc_eip,           # for the cpu-profiler
+AC_CHECK_HEADERS(linux/ptrace.h)
+AC_CHECK_HEADERS(syscall.h)
+AC_CHECK_HEADERS(grp.h)         # for heapchecker_unittest
+AC_CHECK_MEMBERS([struct sigcontext.sc_eip,
                   struct ucontext.uc_mcontext,
                   struct sigcontext.eip,
+                  struct sigcontext.rip,
                   struct sigcontext.sc_ip,
                   struct siginfo.si_faddr],,,
                  [#include <signal.h>])
@@ -43,6 +47,14 @@ AC_CHECK_MEMBERS([struct sigcontext.sc_eip,           # for the cpu-profiler
 # Defines PRIuS
 AC_COMPILER_CHARACTERISTICS
 
+# Check if __builtin_stack_pointer() is available (for elfcore.h)
+AC_MSG_CHECKING([for __builtin_stack_pointer()])
+AC_LINK_IFELSE([AC_LANG_PROGRAM(, [void *sp = __builtin_stack_pointer();])],
+               [AC_DEFINE(HAVE_BUILTIN_STACK_POINTER, 1,
+                      Define to 1 if compiler supports __builtin_stack_pointer)
+                AC_MSG_RESULT([yes])],
+               [AC_MSG_RESULT([no])])
+
 # A lot of the code in this directory depends on pthreads
 ACX_PTHREAD
 
diff --git a/doc/cpu_profiler.html b/doc/cpu_profiler.html
index 1f55443..ad0e9fd 100644
--- a/doc/cpu_profiler.html
+++ b/doc/cpu_profiler.html
@@ -47,13 +47,38 @@ given run of an executable:</p>
 
 <p>Profiling works correctly with threads.  To use, just call
 ProfilerRegisterThread() at the beginning of the routine the thread
-runs.</p>
+runs.  Profiling also works correctly with sub-processes: each child
+process gets its own profile with its own name (generated by combining
+CPUPROFILE with the child's process id).</p>
 
 <p>You can also turn profiling on and off throughout the code, and do
 other tweaks.  This functionality will not frequently be needed.  See
 /usr/local/include/google/profiler.h (or src/google/profiler.h in this
 directory) for more details.</p>
 
+<p>For security reasons, CPU profiling will not write to a file -- and
+is thus not usable -- for setuid programs.</p>
+
+<H2>Controlling Behavior via the Environment</H2>
+
+<p>In addition to the environment variable <code>CPUPROFILE</code>,
+which determines where profiles are written, there are several
+environment variables which control the performance of the CPU
+profile.</p>
+
+<table frame=box rules=sides cellpadding=5 width=100%>
+<tr>
+<td><code>PROFILESELECTED=1</code></td>
+    <td>If set, cpu-profiler will only profile regions of code
+        surrounded with
+        <code>ProfilerEnable()</code>/<code>ProfilerDisable()</code>.
+    </td>
+</tr><tr>
+<td><code>PROFILEFREQUENCY=<i>x</i></code></td>
+    <td>How many interrupts/second the cpu-profiler samples.
+    </td>
+</tr>
+</table>
 
 <H1>Analyzing the Output</H1>
 
@@ -387,9 +412,20 @@ only 40.</p>
      and the shared libraries are different on the two machines, the
      profiling output may be confusing: samples that fall within
      the shared libaries may be assigned to arbitrary procedures.
+<li> If your program forks, the children will also be profiled (since
+     they inherit the same CPUPROFILE setting).  Each process is
+     profiled separately; to distinguish the child profiles from the
+     parent profile and from each other, all children will have their
+     process-id appended to the CPUPROFILE name.
+<li> Due to a hack we make to work around a possible gcc bug, your
+     profiles may end up named strangely if the first character of
+     your CPUPROFILE variable has ascii value greater than 127.  This
+     should be exceedingly rare, but if you need to use such a name,
+     just set prepend <code>./</code> to your filename:
+     <code>CPUPROFILE=./&Auml;gypten</code>.
 </ul>
 
 <hr>
-Last modified: Tue Jan 25 18:06:38 PST 2005
+Last modified: Wed Apr 20 04:54:23 PDT 2005
 </body>
 </html>
diff --git a/doc/heap_checker.html b/doc/heap_checker.html
new file mode 100644
index 0000000..a8fa26c
--- /dev/null
+++ b/doc/heap_checker.html
@@ -0,0 +1,142 @@
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
+<html>
+<head>
+<title>Google Heap Checker</title>
+</head>
+
+<body>
+<h1>Automatic Leaks Checking Support</h1>
+
+This document describes how to check the heap usage of a C++
+program.  This facility can be useful for automatically detecting
+memory leaks.
+
+<h2>Linking in the Heap Checker</h2>
+
+<p>
+You can heap-check any program that has the tcmalloc library linked
+in.  No recompilation is necessary to use the heap checker.
+</p>
+
+<p>
+In order to catch all heap leaks, tcmalloc must be linked <i>last</i> into
+your executable.  The heap checker may mischaracterize some memory
+accesses in libraries listed after it on the link line.  For instance,
+it may report these libraries as leaking memory when they're not.
+(See the source code for more details.)
+</p>
+
+<p>
+It's safe to link in tcmalloc even if you don't expect to
+heap-check your program.  Your programs will not run any slower
+as long as you don't use any of the heap-checker features.
+</p>
+
+<p>
+You can run the heap checker on applications you didn't compile
+yourself, by using LD_PRELOAD:
+</p>
+<pre>
+   $ LD_PRELOAD="/usr/lib/libtcmalloc.so" HEAPCHECK=normal <binary>
+</pre>
+<p>
+We don't necessarily recommend this mode of usage.
+</p>
+
+<h2>Turning On Heap Checking</h2>
+
+<p>There are two alternatives to actually turn on heap checking for a
+given run of an executable.</p>
+
+<ul>
+<li> For whole-program heap-checking, define the environment variable
+     HEAPCHECK to the type of heap
+     checking you want: normal, strict, or draconian.  For instance,
+     to heap-check <code>/bin/ls</code>:
+     <pre>
+      $ HEAPCHECK=normal /bin/ls
+      % setenv HEAPCHECK normal; /bin/ls   # csh
+     </pre>
+     OR
+
+<li> For partial-code heap-checking, you need to modify your code.
+     For each piece of code you want heap-checked, bracket the code
+     by creating a <code>HeapLeakChecker</code> object
+     (which takes a descriptive label as an argument), and calling
+     <code>check.NoLeaks()</code> at the end of the code you want
+     checked.  This will verify no more memory is allocated at the
+     end of the code segment than was allocated in the beginning.  To
+     actually turn on the heap-checking, set the environment variable
+     HEAPCHECK to <code>local</code>.
+</ol>
+
+<p>
+Here is an example of the second usage.  The following code will
+die if <code>Foo()</code> leaks any memory
+(i.e. it allocates memory that is not freed by the time it returns):
+</p>
+<pre>
+    HeapProfileLeakChecker checker("foo");
+    Foo();
+    assert(checker.NoLeaks());
+</pre>
+
+<p>
+When the <code>checker</code> object is allocated, it creates
+one heap profile.  When <code>checker.NoLeaks()</code> is invoked,
+it creates another heap profile and compares it to the previously
+created profile.  If the new profile indicates memory growth
+(or any memory allocation change if one
+uses <code>checker.SameHeap()</code> instead), <code>NoLeaks()</code>
+will return false and the program will abort.  An error message will
+also be printed out saying how <code>pprof</code> command can be run
+to get a detailed analysis of the actual leaks.
+</p>
+
+<p>
+See the comments for <code>HeapProfileLeakChecker</code> class in
+<code>heap-checker.h</code> and the code in
+<code>heap-checker_unittest.cc</code> for more information and
+examples.  (TODO: document it all here instead!)
+</p>
+
+<p>
+<b>IMPORTANT NOTE</b>: pthreads handling is currently incomplete.
+Heap leak checks will fail with bogus leaks if there are pthreads live
+at construction or leak checking time.  One solution, for global
+heap-checking, is to make sure all threads but the main thread have
+exited at program-end time.  We hope (as of March 2005) to have a fix
+soon.
+</p>
+
+<h2>Disabling Heap-checking of Known Leaks</h2>
+
+<p>
+Sometimes your code has leaks that you know about and are willing to
+accept.  You would like the heap checker to ignore them when checking
+your program.  You can do this by bracketing the code in question with
+an appropriate heap-checking object:
+</p>
+<pre>
+   #include <google/heap-checker.h>
+   ...
+   void *mark = HeapLeakChecker::GetDisableChecksStart();
+   &lt;leaky code&gt;
+   HeapLeakChecker::DisableChecksToHereFrom(mark);
+</pre>
+
+<p>
+Some libc routines allocate memory, and may need to be 'disabled' in
+this way.  As time goes on, we hope to encode proper handling of
+these routines into the heap-checker library code, so applications
+needn't worry about them, but that process is not yet complete.
+</p>
+
+<hr>
+<address><a href="mailto:opensource@google.com">Maxim Lifantsev</a></address>
+<!-- Created: Tue Dec 19 10:43:14 PST 2000 -->
+<!-- hhmts start -->
+Last modified: Thu Mar  3 05:51:40 PST 2005
+<!-- hhmts end -->
+</body>
+</html>
diff --git a/doc/heap_profiler.html b/doc/heap_profiler.html
index 97516ae..ce7168c 100644
--- a/doc/heap_profiler.html
+++ b/doc/heap_profiler.html
@@ -1,117 +1,96 @@
 <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
 <html>
 <head>
-    <link rel="stylesheet" href="../../docstyle.css">
-<style type="text/css">
-  h1 { font-size: 24pt; }
-</style>
-<title>Profiling heap usage</title>
+<title>Google Heap Profiler</title>
 </head>
 
 <body>
 <h1>Profiling heap usage</h1>
 
-This document describes how to profile the heap usage of a Google3 C++
+This document describes how to profile the heap usage of a C++
 program.  This facility can be useful for
 <ul>
 <li> Figuring out what is in the program heap at any given time
-<li> Locating and automatically detecting memory leaks
+<li> Locating memory leaks
 <li> Finding places that do a lot of allocation
 </ul>
 
-<h2>Turning On Heap-Profiling</h2>
+<h2>Linking in the Heap Profiler</h2>
 
 <p>
-Start your program with the <code>--heap_profile=&lt;prefix&gt;</code>
-command-line flag.
+You can profile any program that has the tcmalloc library linked
+in.  No recompilation is necessary to use the heap profiler.
+</p>
 
-<h2>Caveats</h2>
+<p>
+It's safe to link in tcmalloc even if you don't expect to
+heap-profiler your program.  Your programs will not run any slower
+as long as you don't use any of the heap-profiler features.
+</p>
 
-<ul>
-<li> <p>
-     Heap profiling can only be used with programs that are
-     using either the <a href="../../designdocs/tcmalloc/tcmalloc.html">
-     tcmalloc</a> library, or Google's <code>debugallocation</code>
-     library.  The vast majority of Google3 programs fall into this
-     category.  If you have a program that is using a different
-     malloc library (perhaps the glibc malloc library), you will
-     have to edit the <code>BUILD</code> file and remove the
-     <code>malloc = &lt;lib&gt;</code> rule for the program.
+<p>
+You can run the heap profiler on applications you didn't compile
+yourself, by using LD_PRELOAD:
+</p>
+<pre>
+   $ LD_PRELOAD="/usr/lib/libtcmalloc.so" HEAPPROFILE=... <binary>
+</pre>
+<p>
+We don't necessarily recommend this mode of usage.
+</p>
 
-<li> <p>
-     If the program linked in a library that was not compiled
-     with enough symbolic information, all samples associated
-     with the library may be charged to the last symbol found
-     in the program before the libary.  This will artificially
-     inflate the count for that symbol.
 
-<li> <p>
-     If you run the program on one machine, and profile it on another,
-     and the shared libraries are different on the two machines, the
-     profiling output may be confusing: samples that fall within
-     the shared libaries may be assigned to arbitrary procedures.
+<h2>Turning On Heap Profiling</h2>
+
+<p>
+Define the environment variable HEAPPROFILE to the filename to dump the
+profile to.  For instance, to profile /usr/local/netscape:
+</p>
+<pre>
+ $ HEAPPROFILE=/tmp/profile /usr/local/netscape           # sh
+ % setenv HEAPPROFILE /tmp/profile; /usr/local/netscape   # csh
+</pre>
+
+<p>Profiling also works correctly with sub-processes: each child
+process gets its own profile with its own name (generated by combining
+HEAPPROFILE with the child's process id).</p>
+
+<p>For security reasons, heap profiling will not write to a file --
+and it thus not usable -- for setuid programs.</p>
 
-<li> <p>
-     Several places in the Google code-base do their own memory
-     management.  If the profile shows strange allocation patterns in
-     the DataBuffer code, you may want to uncomment
-     <code>HEAP_PROFILE</code> in <code>google3/iobuffer/databuffer.cc</code>
-     to turn-off DataBuffer free-list management code.
-
-     <p>
-     Similarly, if your program makes significant use of STL, you may
-     want to force STL to use the system allocator.  This is already
-     the default for most google3 programs, but may be different if
-     you are using a non-standard compiler or build setup.
-
-     <p>
-     To do this when building with gcc-3.3.3, just
-     set the environment variable GLIBCPP_FORCE_NEW=1, 
-     e.g. <code>export GLIBCPP_FORCE_NEW=1</code>,
-     before running your program.  
-     (In gcc-3.4.0, the variable to set is GLIBCXX_FORCE_NEW.)
-
-     <p>
-     To do this when building with gcc2, uncomment <code>HEAP_PROFILE</code> in
-     <code>google3/third_party/stl/gcc2/stl_alloc.h</code>,
-     and do a <code>make clean</code>.  
 
-</ul>
 
 <h2>Extracting a profile</h2>
 
+<p>
 If heap-profiling is turned on in a program, the program will periodically
 write profiles to the filesystem.  The sequence of profiles will be named:
+</p>
 <pre>
            &lt;prefix&gt;.0000.heap
            &lt;prefix&gt;.0001.heap
            &lt;prefix&gt;.0002.heap
            ...
 </pre>
-where <code>&lt;prefix&gt;</code> is the value supplied for the
-<code>--heap_profile</code> flag.  Note that if the supplied prefix
+<p>
+where <code>&lt;prefix&gt;</code> is the value supplied in
+<code>HEAPPROFILE</code>.  Note that if the supplied prefix
 does not start with a <code>/</code>, the profile files will be
 written to the program's working directory.
+</p>
 
 <p>
 By default, a new profile file is written after every 1GB of
-allocation.  The profile-writing interval can be adjusted by setting
-the command-line flag <code>--heap_profile_allocation_interval</code>
-to a numeric value that indicates the number of bytes of allocation
+allocation.  The profile-writing interval can be adjusted by calling
+HeapProfilerSetAllocationInterval() from your program.  This takes one
+argument: a numeric value that indicates the number of bytes of allocation
 between each profile dump.
-
-<p>
-If the program you are profiling is a Google2 server on which
-heap-profiling has been turned on, you can also telnet to it at any
-time and send it the "v hidden-heap" command.  The resulting output is
-the heap profile.  If the server has an HTTP interface built using the
-Google2 RPC infrastructure, you can fetch the heap profile by
-asking for the URL
-<code>http://server:port/varz?var=hidden-heap</code>
+</p>
 
 <p>
 You can also generate profiles from specific points in the program
 by inserting a call to <code>HeapProfile()</code>.  Example:
+</p>
 <pre>
     extern const char* HeapProfile();
     const char* profile = HeapProfile();
@@ -127,74 +106,19 @@ allocation site is defined as the active stack trace at the call to
 <code>malloc</code>, <code>calloc</code>, <code>realloc</code>, or,
 <code>new</code>.
 
-<h2>Automatic leaks checking support</h2>
-
-The profiling system provides a way to automatically check for heap
-memory leaks from your code.  This is typically very useful in
-unittests and regression tests.
-
-<p>
-The easiest (and the preferred) way to enable end-to-end leaks checking
-(from right after <code>InitGoogle</code> to right before program exit)
-in your Google3 test or regular binary is to add a dependency on
-<code>//base:heapcheck</code> for it.
-Alternatively you can e.g. set
-<code>FLAGS_heap_check = "normal";</code>
-before <code>InitGoogle</code> call.
-
-<p>
-If you need a more localized heap leaks check, here is an example:
-The following code will
-die if <code>Foo()</code> leaks any memory
-(i.e. it allocates memory that is not freed by the time it returns):
-<pre>
-    HeapProfileLeakChecker checker("foo");
-    Foo();
-    CHECK(checker.NoLeaks());
-</pre>
-When the <code>checker</code> object is allocated, it creates
-one heap profile.  When <code>checker.NoLeaks()</code> is invoked,
-it creates another heap profile and compares it to the previously
-created profile.  If the new profile indicates memory growth
-(or any memory allocation change if one
- uses <code>checker.SameHeap()</code> instead),
-<code>NoLeaks()</code> will return false and the program will
-abort.  An error message will also be printed out saying how
-<code>pprof</code> command can be run to get a detailed
-analysis of the actual leaks.
-
-<p>
-In the case of localized leaks checking
-you must enable heap profiling to activate leaks checking,
-for example, by adding <code>FLAGS_heap_profile = "tmpdir/foo";</code>
-before <code>InitGoogle</code> call
-or by calling <code>HeapProfilerStart("tmpdir/foo")</code>.
-
-<p>
-On the way of making and keeping Google's code leak free,
-please make the effort to fix discovered (pseudo) leaks properly.
-That is, fix the leaks in the libraries and add appropriate
-<code>REGISTER_HEAPCHECK_CLEANUP</code> code
-so that end-to-end leaks checking works,
-instead of just making your test succeed with the least effort possible.
-
-<p>
-See the comments for <code>HeapProfileLeakChecker</code> class
-in <code>heap-checker.h</code> and the code in
-<code>heap-checker_unittest.cc</code>
-for more information and examples.
-
 <h2>Interpreting the profile</h2>
 
 The profile output can be viewed by passing it to the
 <code>pprof</code> tool.  The <code>pprof</code> tool can print both
 CPU usage and heap usage information.  It is documented in detail
-on the <a href="cpuprofile.html">CPU Profiling</a> page.
+on the <a href="cpu_profiler.html">CPU Profiling</a> page.
+Heap-profile-specific flags and usage are explained below.
 
 <p>
 Here are some examples.  These examples assume the binary is named
 <code>gfs_master</code>, and a sequence of heap profile files can be
 found in files named:
+</p>
 <pre>
   profile.0001.heap
   profile.0002.heap
@@ -205,7 +129,7 @@ found in files named:
 <h3>Why is a process so big</h3>
 
 <pre>
-    % /home/build/google3/perftools/pprof --gv bin/gfs_master profile.0100.heap
+    % pprof --gv gfs_master profile.0100.heap
 </pre>
 
 This command will pop-up a <code>gv</code> window that displays
@@ -238,18 +162,21 @@ a program so you can find gradual memory leaks.  One simple way to do
 this is to compare two profiles -- both collected after the program
 has been running for a while.  Specify the name of the first profile
 using the <code>--base</code> option.  Example:
+</p>
 <pre>
-   % /home/build/google3/perftools/pprof --base=profile.0004.heap prog profile.0100.heap
+   % pprof --base=profile.0004.heap gfs_master profile.0100.heap
 </pre>
 
+<p>
 The memory-usage in <code>profile.0004.heap</code> will be subtracted from
 the memory-usage in <code>profile.0100.heap</code> and the result will
 be displayed.
+</p>
 
 <h3>Text display</h3>
 
 <pre>
-% /home/build/google3/perftools/pprof bin/gfs_master profile.0100.heap
+% pprof gfs_master profile.0100.heap
    255.6  24.7%  24.7%    255.6  24.7% GFS_MasterChunk::AddServer
    184.6  17.8%  42.5%    298.8  28.8% GFS_MasterChunkTable::Create
    176.2  17.0%  59.5%    729.9  70.5% GFS_MasterChunkTable::UpdateState
@@ -277,25 +204,25 @@ The following command will give a graphical display of a subset of
 the call-graph.  Only paths in the call-graph that match the
 regular expression <code>DataBuffer</code> are included:
 <pre>
-% /home/build/google3/perftools/pprof --gv --focus=DataBuffer bin/gfs_master profile.0100.heap
+% pprof --gv --focus=DataBuffer gfs_master profile.0100.heap
 </pre>
 
 Similarly, the following command will omit all paths subset of the
 call-graph.  All paths in the call-graph that match the regular
 expression <code>DataBuffer</code> are discarded:
 <pre>
-% /home/build/google3/perftools/pprof --gv --ignore=DataBuffer bin/gfs_master profile.0100.heap
+% pprof --gv --ignore=DataBuffer gfs_master profile.0100.heap
 </pre>
 
-<p>
-
 <h3>Total allocations + object-level information</h3>
 
+<P>
 All of the previous examples have displayed the amount of in-use
 space.  I.e., the number of bytes that have been allocated but not
 freed.  You can also get other types of information by supplying
 a flag to <code>pprof</code>:
-<p>
+</p>
+
 <center>
 <table frame=box rules=sides cellpadding=5 width=100%>
 
@@ -337,11 +264,62 @@ a flag to <code>pprof</code>:
 </table>
 </center>
 
+<h2>Caveats</h2>
+
+<ul>
+<li> <p>
+     Heap profiling requires the use of libtcmalloc.  This requirement
+     may be removed in a future version of the heap profiler, and the
+     heap profiler separated out into its own library.
+     </p>
+     
+<li> <p>
+     If the program linked in a library that was not compiled
+     with enough symbolic information, all samples associated
+     with the library may be charged to the last symbol found
+     in the program before the libary.  This will artificially
+     inflate the count for that symbol.
+     </p>
+
+<li> <p>
+     If you run the program on one machine, and profile it on another,
+     and the shared libraries are different on the two machines, the
+     profiling output may be confusing: samples that fall within
+     the shared libaries may be assigned to arbitrary procedures.
+     </p>
+
+<li> <p>
+     Several libraries, such as some STL implementations, do their own
+     memory management.  This may cause strange profiling results.  We
+     have code in libtcmalloc to cause STL to use tcmalloc for memory
+     management (which in our tests is better than STL's internal
+     management), though it only works for some STL implementations.
+     </p>
+
+<li> <p>
+     If your program forks, the children will also be profiled (since
+     they inherit the same HEAPPROFILE setting).  Each process is
+     profiled separately; to distinguish the child profiles from the
+     parent profile and from each other, all children will have their
+     process-id attached to the HEAPPROFILE name.
+     </p>
+     
+<li> <p>
+     Due to a hack we make to work around a possible gcc bug, your
+     profiles may end up named strangely if the first character of
+     your HEAPPROFILE variable has ascii value greater than 127.  This
+     should be exceedingly rare, but if you need to use such a name,
+     just set prepend <code>./</code> to your filename:
+     <code>HEAPPROFILE=./&Auml;gypten</code>.
+     </p>
+
+</ul>
+
 <hr>
-<address><a href="mailto:sanjay@google.com">Sanjay Ghemawat</a></address>
+<address><a href="mailto:opensource@google.com">Sanjay Ghemawat</a></address>
 <!-- Created: Tue Dec 19 10:43:14 PST 2000 -->
 <!-- hhmts start -->
-Last modified: Tue Nov 23 13:07:11 PST 2004
+Last modified: Wed Apr 20 05:46:16 PDT 2005
 <!-- hhmts end -->
 </body>
 </html>
diff --git a/doc/index.html b/doc/index.html
new file mode 100644
index 0000000..94226d5
--- /dev/null
+++ b/doc/index.html
@@ -0,0 +1,20 @@
+<HTML>
+
+<HEAD>
+<title>Google Performance Tools</title>
+</HEAD>
+
+<BODY>
+<ul>
+  <li> <A HREF="tcmalloc.html">thread-caching malloc</A>
+  <li> <A HREF="heap_checker.html">heap-checking using tcmalloc</A>
+  <li> <A HREF="heap_profiler.html">heap-profiling using tcmalloc</A>
+  <li> <A HREF="cpu_profiler.html">CPU profiler</A>
+</ul>
+
+<hr>
+Last modified: Fri Mar 11 05:58:27 PST 2005
+
+</BODY>
+
+</HTML>
diff --git a/doc/t-test1.times.txt b/doc/t-test1.times.txt
new file mode 100644
index 0000000..0163693
--- /dev/null
+++ b/doc/t-test1.times.txt
@@ -0,0 +1,480 @@
+time.1.ptmalloc.64:0.56 user 0.02 system 0.57 elapsed 100% CPU
+time.1.tcmalloc.64:0.38 user 0.02 system 0.40 elapsed 98% CPU
+time.1.ptmalloc.128:0.61 user 0.01 system 0.61 elapsed 101% CPU
+time.1.tcmalloc.128:0.35 user 0.00 system 0.35 elapsed 99% CPU
+time.1.ptmalloc.256:0.59 user 0.01 system 0.60 elapsed 100% CPU
+time.1.tcmalloc.256:0.27 user 0.02 system 0.28 elapsed 102% CPU
+time.1.ptmalloc.512:0.57 user 0.00 system 0.57 elapsed 100% CPU
+time.1.tcmalloc.512:0.25 user 0.01 system 0.25 elapsed 101% CPU
+time.1.ptmalloc.1024:0.52 user 0.00 system 0.52 elapsed 99% CPU
+time.1.tcmalloc.1024:0.22 user 0.02 system 0.24 elapsed 97% CPU
+time.1.ptmalloc.2048:0.47 user 0.00 system 0.47 elapsed 99% CPU
+time.1.tcmalloc.2048:0.22 user 0.02 system 0.25 elapsed 95% CPU
+time.1.ptmalloc.4096:0.48 user 0.01 system 0.48 elapsed 100% CPU
+time.1.tcmalloc.4096:0.25 user 0.01 system 0.25 elapsed 100% CPU
+time.1.ptmalloc.8192:0.49 user 0.02 system 0.49 elapsed 102% CPU
+time.1.tcmalloc.8192:0.27 user 0.02 system 0.28 elapsed 101% CPU
+time.1.ptmalloc.16384:0.51 user 0.04 system 0.55 elapsed 99% CPU
+time.1.tcmalloc.16384:0.35 user 0.02 system 0.37 elapsed 100% CPU
+time.1.ptmalloc.32768:0.53 user 0.14 system 0.66 elapsed 100% CPU
+time.1.tcmalloc.32768:0.67 user 0.02 system 0.69 elapsed 99% CPU
+time.1.ptmalloc.65536:0.68 user 0.31 system 0.98 elapsed 100% CPU
+time.1.tcmalloc.65536:0.71 user 0.01 system 0.72 elapsed 99% CPU
+time.1.ptmalloc.131072:0.90 user 0.72 system 1.62 elapsed 99% CPU
+time.1.tcmalloc.131072:0.94 user 0.03 system 0.97 elapsed 99% CPU
+time.2.ptmalloc.64:1.05 user 0.00 system 0.53 elapsed 196% CPU
+time.2.tcmalloc.64:0.66 user 0.03 system 0.37 elapsed 185% CPU
+time.2.ptmalloc.128:1.77 user 0.01 system 0.89 elapsed 198% CPU
+time.2.tcmalloc.128:0.53 user 0.01 system 0.29 elapsed 184% CPU
+time.2.ptmalloc.256:1.14 user 0.01 system 0.62 elapsed 182% CPU
+time.2.tcmalloc.256:0.45 user 0.02 system 0.26 elapsed 180% CPU
+time.2.ptmalloc.512:1.26 user 0.40 system 1.79 elapsed 92% CPU
+time.2.tcmalloc.512:0.43 user 0.02 system 0.27 elapsed 166% CPU
+time.2.ptmalloc.1024:0.98 user 0.03 system 0.56 elapsed 179% CPU
+time.2.tcmalloc.1024:0.44 user 0.02 system 0.34 elapsed 134% CPU
+time.2.ptmalloc.2048:0.87 user 0.02 system 0.44 elapsed 199% CPU
+time.2.tcmalloc.2048:0.49 user 0.02 system 0.34 elapsed 148% CPU
+time.2.ptmalloc.4096:0.92 user 0.03 system 0.48 elapsed 196% CPU
+time.2.tcmalloc.4096:0.50 user 0.02 system 0.49 elapsed 105% CPU
+time.2.ptmalloc.8192:1.05 user 0.04 system 0.55 elapsed 196% CPU
+time.2.tcmalloc.8192:0.59 user 0.01 system 0.51 elapsed 116% CPU
+time.2.ptmalloc.16384:1.30 user 0.14 system 0.72 elapsed 198% CPU
+time.2.tcmalloc.16384:0.63 user 0.03 system 0.68 elapsed 96% CPU
+time.2.ptmalloc.32768:1.33 user 0.56 system 1.00 elapsed 189% CPU
+time.2.tcmalloc.32768:1.16 user 0.01 system 1.17 elapsed 99% CPU
+time.2.ptmalloc.65536:1.86 user 1.79 system 2.01 elapsed 181% CPU
+time.2.tcmalloc.65536:1.35 user 0.01 system 1.35 elapsed 100% CPU
+time.2.ptmalloc.131072:2.61 user 5.19 system 4.81 elapsed 162% CPU
+time.2.tcmalloc.131072:1.86 user 0.04 system 1.90 elapsed 100% CPU
+time.3.ptmalloc.64:1.79 user 0.03 system 0.67 elapsed 268% CPU
+time.3.tcmalloc.64:1.58 user 0.04 system 0.62 elapsed 260% CPU
+time.3.ptmalloc.128:2.77 user 1.34 system 3.07 elapsed 133% CPU
+time.3.tcmalloc.128:1.19 user 0.01 system 0.50 elapsed 236% CPU
+time.3.ptmalloc.256:2.14 user 0.02 system 0.85 elapsed 252% CPU
+time.3.tcmalloc.256:0.96 user 0.01 system 0.41 elapsed 236% CPU
+time.3.ptmalloc.512:3.37 user 1.31 system 3.33 elapsed 140% CPU
+time.3.tcmalloc.512:0.93 user 0.04 system 0.39 elapsed 243% CPU
+time.3.ptmalloc.1024:1.66 user 0.01 system 0.64 elapsed 260% CPU
+time.3.tcmalloc.1024:0.81 user 0.02 system 0.44 elapsed 187% CPU
+time.3.ptmalloc.2048:2.07 user 0.01 system 0.82 elapsed 252% CPU
+time.3.tcmalloc.2048:1.10 user 0.04 system 0.59 elapsed 191% CPU
+time.3.ptmalloc.4096:2.01 user 0.03 system 0.79 elapsed 258% CPU
+time.3.tcmalloc.4096:0.87 user 0.03 system 0.65 elapsed 137% CPU
+time.3.ptmalloc.8192:2.22 user 0.11 system 0.83 elapsed 280% CPU
+time.3.tcmalloc.8192:0.96 user 0.06 system 0.75 elapsed 135% CPU
+time.3.ptmalloc.16384:2.56 user 0.47 system 1.02 elapsed 295% CPU
+time.3.tcmalloc.16384:0.99 user 0.04 system 1.03 elapsed 99% CPU
+time.3.ptmalloc.32768:3.29 user 1.75 system 1.96 elapsed 256% CPU
+time.3.tcmalloc.32768:1.67 user 0.02 system 1.69 elapsed 99% CPU
+time.3.ptmalloc.65536:4.04 user 6.62 system 4.92 elapsed 216% CPU
+time.3.tcmalloc.65536:1.91 user 0.02 system 1.98 elapsed 97% CPU
+time.3.ptmalloc.131072:5.55 user 17.86 system 12.44 elapsed 188% CPU
+time.3.tcmalloc.131072:2.78 user 0.02 system 2.82 elapsed 99% CPU
+time.4.ptmalloc.64:3.42 user 1.36 system 3.20 elapsed 149% CPU
+time.4.tcmalloc.64:2.42 user 0.02 system 0.71 elapsed 341% CPU
+time.4.ptmalloc.128:3.98 user 1.79 system 3.89 elapsed 148% CPU
+time.4.tcmalloc.128:1.87 user 0.02 system 0.58 elapsed 325% CPU
+time.4.ptmalloc.256:4.06 user 2.14 system 4.12 elapsed 150% CPU
+time.4.tcmalloc.256:1.69 user 0.02 system 0.51 elapsed 331% CPU
+time.4.ptmalloc.512:4.48 user 2.15 system 4.39 elapsed 150% CPU
+time.4.tcmalloc.512:1.62 user 0.03 system 0.52 elapsed 314% CPU
+time.4.ptmalloc.1024:3.18 user 0.03 system 0.84 elapsed 381% CPU
+time.4.tcmalloc.1024:1.53 user 0.02 system 0.56 elapsed 274% CPU
+time.4.ptmalloc.2048:3.24 user 0.02 system 0.84 elapsed 384% CPU
+time.4.tcmalloc.2048:1.44 user 0.04 system 0.66 elapsed 221% CPU
+time.4.ptmalloc.4096:3.50 user 0.04 system 0.91 elapsed 389% CPU
+time.4.tcmalloc.4096:1.31 user 0.01 system 0.89 elapsed 148% CPU
+time.4.ptmalloc.8192:6.77 user 3.85 system 4.14 elapsed 256% CPU
+time.4.tcmalloc.8192:1.20 user 0.05 system 0.97 elapsed 127% CPU
+time.4.ptmalloc.16384:7.08 user 5.06 system 4.63 elapsed 262% CPU
+time.4.tcmalloc.16384:1.27 user 0.03 system 1.25 elapsed 103% CPU
+time.4.ptmalloc.32768:5.57 user 4.22 system 3.31 elapsed 295% CPU
+time.4.tcmalloc.32768:2.17 user 0.03 system 2.25 elapsed 97% CPU
+time.4.ptmalloc.65536:6.11 user 15.05 system 9.19 elapsed 230% CPU
+time.4.tcmalloc.65536:2.51 user 0.02 system 2.57 elapsed 98% CPU
+time.4.ptmalloc.131072:7.58 user 33.15 system 21.28 elapsed 191% CPU
+time.4.tcmalloc.131072:3.57 user 0.07 system 3.66 elapsed 99% CPU
+time.5.ptmalloc.64:4.44 user 2.08 system 4.37 elapsed 148% CPU
+time.5.tcmalloc.64:2.87 user 0.02 system 0.79 elapsed 361% CPU
+time.5.ptmalloc.128:4.77 user 2.77 system 5.14 elapsed 146% CPU
+time.5.tcmalloc.128:2.65 user 0.03 system 0.72 elapsed 367% CPU
+time.5.ptmalloc.256:5.82 user 2.88 system 5.49 elapsed 158% CPU
+time.5.tcmalloc.256:2.33 user 0.01 system 0.66 elapsed 352% CPU
+time.5.ptmalloc.512:6.27 user 3.11 system 5.34 elapsed 175% CPU
+time.5.tcmalloc.512:2.14 user 0.03 system 0.70 elapsed 307% CPU
+time.5.ptmalloc.1024:6.82 user 3.18 system 5.23 elapsed 191% CPU
+time.5.tcmalloc.1024:2.20 user 0.02 system 0.70 elapsed 313% CPU
+time.5.ptmalloc.2048:6.57 user 3.46 system 5.22 elapsed 192% CPU
+time.5.tcmalloc.2048:2.15 user 0.03 system 0.82 elapsed 264% CPU
+time.5.ptmalloc.4096:8.75 user 5.09 system 5.26 elapsed 263% CPU
+time.5.tcmalloc.4096:1.68 user 0.03 system 1.08 elapsed 158% CPU
+time.5.ptmalloc.8192:4.48 user 0.61 system 1.51 elapsed 335% CPU
+time.5.tcmalloc.8192:1.47 user 0.07 system 1.18 elapsed 129% CPU
+time.5.ptmalloc.16384:5.71 user 1.98 system 2.14 elapsed 358% CPU
+time.5.tcmalloc.16384:1.58 user 0.03 system 1.52 elapsed 105% CPU
+time.5.ptmalloc.32768:7.19 user 7.81 system 5.53 elapsed 270% CPU
+time.5.tcmalloc.32768:2.63 user 0.05 system 2.72 elapsed 98% CPU
+time.5.ptmalloc.65536:8.45 user 23.51 system 14.30 elapsed 223% CPU
+time.5.tcmalloc.65536:3.12 user 0.05 system 3.21 elapsed 98% CPU
+time.5.ptmalloc.131072:10.22 user 43.63 system 27.84 elapsed 193% CPU
+time.5.tcmalloc.131072:4.42 user 0.07 system 4.51 elapsed 99% CPU
+time.6.ptmalloc.64:5.57 user 2.56 system 5.08 elapsed 159% CPU
+time.6.tcmalloc.64:3.20 user 0.01 system 0.89 elapsed 360% CPU
+time.6.ptmalloc.128:5.98 user 3.52 system 5.71 elapsed 166% CPU
+time.6.tcmalloc.128:2.76 user 0.02 system 0.78 elapsed 355% CPU
+time.6.ptmalloc.256:4.61 user 0.02 system 1.19 elapsed 389% CPU
+time.6.tcmalloc.256:2.65 user 0.02 system 0.74 elapsed 356% CPU
+time.6.ptmalloc.512:8.28 user 3.88 system 6.61 elapsed 183% CPU
+time.6.tcmalloc.512:2.60 user 0.02 system 0.72 elapsed 362% CPU
+time.6.ptmalloc.1024:4.75 user 0.00 system 1.22 elapsed 387% CPU
+time.6.tcmalloc.1024:2.56 user 0.02 system 0.79 elapsed 325% CPU
+time.6.ptmalloc.2048:8.90 user 4.59 system 6.15 elapsed 219% CPU
+time.6.tcmalloc.2048:2.37 user 0.06 system 0.96 elapsed 250% CPU
+time.6.ptmalloc.4096:11.41 user 7.02 system 6.31 elapsed 291% CPU
+time.6.tcmalloc.4096:1.82 user 0.03 system 1.19 elapsed 154% CPU
+time.6.ptmalloc.8192:11.64 user 8.25 system 5.97 elapsed 332% CPU
+time.6.tcmalloc.8192:1.83 user 0.07 system 1.38 elapsed 136% CPU
+time.6.ptmalloc.16384:7.44 user 2.98 system 3.01 elapsed 345% CPU
+time.6.tcmalloc.16384:1.83 user 0.08 system 1.80 elapsed 105% CPU
+time.6.ptmalloc.32768:8.69 user 12.35 system 8.04 elapsed 261% CPU
+time.6.tcmalloc.32768:3.14 user 0.06 system 3.24 elapsed 98% CPU
+time.6.ptmalloc.65536:10.52 user 35.43 system 20.75 elapsed 221% CPU
+time.6.tcmalloc.65536:3.62 user 0.03 system 3.72 elapsed 98% CPU
+time.6.ptmalloc.131072:11.74 user 59.00 system 36.93 elapsed 191% CPU
+time.6.tcmalloc.131072:5.33 user 0.04 system 5.42 elapsed 98% CPU
+time.7.ptmalloc.64:6.60 user 3.45 system 6.01 elapsed 167% CPU
+time.7.tcmalloc.64:3.50 user 0.04 system 0.94 elapsed 376% CPU
+time.7.ptmalloc.128:7.09 user 4.25 system 6.69 elapsed 169% CPU
+time.7.tcmalloc.128:3.13 user 0.03 system 0.84 elapsed 374% CPU
+time.7.ptmalloc.256:9.28 user 4.85 system 7.20 elapsed 196% CPU
+time.7.tcmalloc.256:3.06 user 0.02 system 0.82 elapsed 375% CPU
+time.7.ptmalloc.512:9.13 user 4.78 system 6.79 elapsed 204% CPU
+time.7.tcmalloc.512:2.99 user 0.03 system 0.83 elapsed 359% CPU
+time.7.ptmalloc.1024:10.85 user 6.41 system 7.52 elapsed 229% CPU
+time.7.tcmalloc.1024:3.05 user 0.04 system 0.89 elapsed 345% CPU
+time.7.ptmalloc.2048:5.65 user 0.08 system 1.47 elapsed 388% CPU
+time.7.tcmalloc.2048:3.01 user 0.01 system 0.98 elapsed 306% CPU
+time.7.ptmalloc.4096:6.09 user 0.08 system 1.58 elapsed 389% CPU
+time.7.tcmalloc.4096:2.25 user 0.03 system 1.32 elapsed 171% CPU
+time.7.ptmalloc.8192:6.73 user 0.85 system 1.99 elapsed 379% CPU
+time.7.tcmalloc.8192:2.22 user 0.08 system 1.61 elapsed 142% CPU
+time.7.ptmalloc.16384:8.87 user 4.66 system 4.04 elapsed 334% CPU
+time.7.tcmalloc.16384:2.07 user 0.07 system 2.07 elapsed 103% CPU
+time.7.ptmalloc.32768:10.61 user 17.85 system 11.22 elapsed 253% CPU
+time.7.tcmalloc.32768:3.68 user 0.06 system 3.79 elapsed 98% CPU
+time.7.ptmalloc.65536:13.05 user 45.97 system 27.28 elapsed 216% CPU
+time.7.tcmalloc.65536:4.16 user 0.07 system 4.31 elapsed 98% CPU
+time.7.ptmalloc.131072:13.22 user 62.67 system 41.33 elapsed 183% CPU
+time.7.tcmalloc.131072:6.10 user 0.06 system 6.25 elapsed 98% CPU
+time.8.ptmalloc.64:7.31 user 3.92 system 6.39 elapsed 175% CPU
+time.8.tcmalloc.64:4.00 user 0.01 system 1.04 elapsed 383% CPU
+time.8.ptmalloc.128:9.40 user 5.41 system 7.67 elapsed 192% CPU
+time.8.tcmalloc.128:3.61 user 0.02 system 0.94 elapsed 386% CPU
+time.8.ptmalloc.256:10.61 user 6.35 system 7.96 elapsed 212% CPU
+time.8.tcmalloc.256:3.30 user 0.02 system 0.99 elapsed 335% CPU
+time.8.ptmalloc.512:12.42 user 7.10 system 8.79 elapsed 221% CPU
+time.8.tcmalloc.512:3.35 user 0.04 system 0.94 elapsed 358% CPU
+time.8.ptmalloc.1024:13.63 user 8.54 system 8.95 elapsed 247% CPU
+time.8.tcmalloc.1024:3.44 user 0.02 system 0.96 elapsed 359% CPU
+time.8.ptmalloc.2048:6.45 user 0.03 system 1.67 elapsed 386% CPU
+time.8.tcmalloc.2048:3.55 user 0.05 system 1.09 elapsed 328% CPU
+time.8.ptmalloc.4096:6.83 user 0.26 system 1.80 elapsed 393% CPU
+time.8.tcmalloc.4096:2.78 user 0.06 system 1.53 elapsed 185% CPU
+time.8.ptmalloc.8192:7.59 user 1.29 system 2.36 elapsed 376% CPU
+time.8.tcmalloc.8192:2.57 user 0.07 system 1.84 elapsed 142% CPU
+time.8.ptmalloc.16384:10.15 user 6.20 system 5.20 elapsed 314% CPU
+time.8.tcmalloc.16384:2.40 user 0.05 system 2.42 elapsed 101% CPU
+time.8.ptmalloc.32768:11.82 user 24.48 system 14.60 elapsed 248% CPU
+time.8.tcmalloc.32768:4.37 user 0.05 system 4.47 elapsed 98% CPU
+time.8.ptmalloc.65536:15.41 user 58.94 system 34.42 elapsed 215% CPU
+time.8.tcmalloc.65536:4.90 user 0.04 system 4.96 elapsed 99% CPU
+time.8.ptmalloc.131072:16.07 user 82.93 system 52.51 elapsed 188% CPU
+time.8.tcmalloc.131072:7.13 user 0.04 system 7.19 elapsed 99% CPU
+time.9.ptmalloc.64:8.44 user 4.59 system 6.92 elapsed 188% CPU
+time.9.tcmalloc.64:4.00 user 0.02 system 1.05 elapsed 382% CPU
+time.9.ptmalloc.128:10.92 user 6.14 system 8.31 elapsed 205% CPU
+time.9.tcmalloc.128:3.88 user 0.02 system 1.01 elapsed 382% CPU
+time.9.ptmalloc.256:13.01 user 7.75 system 9.12 elapsed 227% CPU
+time.9.tcmalloc.256:3.89 user 0.01 system 1.00 elapsed 386% CPU
+time.9.ptmalloc.512:14.96 user 8.89 system 9.73 elapsed 244% CPU
+time.9.tcmalloc.512:3.80 user 0.03 system 1.01 elapsed 377% CPU
+time.9.ptmalloc.1024:15.42 user 10.20 system 9.80 elapsed 261% CPU
+time.9.tcmalloc.1024:3.86 user 0.03 system 1.19 elapsed 325% CPU
+time.9.ptmalloc.2048:7.24 user 0.02 system 1.87 elapsed 388% CPU
+time.9.tcmalloc.2048:3.98 user 0.05 system 1.26 elapsed 319% CPU
+time.9.ptmalloc.4096:7.96 user 0.18 system 2.06 elapsed 394% CPU
+time.9.tcmalloc.4096:3.27 user 0.04 system 1.69 elapsed 195% CPU
+time.9.ptmalloc.8192:9.00 user 1.63 system 2.79 elapsed 380% CPU
+time.9.tcmalloc.8192:3.00 user 0.06 system 2.05 elapsed 148% CPU
+time.9.ptmalloc.16384:12.07 user 8.13 system 6.55 elapsed 308% CPU
+time.9.tcmalloc.16384:2.85 user 0.05 system 2.75 elapsed 105% CPU
+time.9.ptmalloc.32768:13.99 user 29.65 system 18.02 elapsed 242% CPU
+time.9.tcmalloc.32768:4.98 user 0.06 system 5.13 elapsed 98% CPU
+time.9.ptmalloc.65536:16.89 user 70.42 system 42.11 elapsed 207% CPU
+time.9.tcmalloc.65536:5.55 user 0.04 system 5.65 elapsed 98% CPU
+time.9.ptmalloc.131072:18.53 user 94.11 system 61.17 elapsed 184% CPU
+time.9.tcmalloc.131072:8.06 user 0.04 system 8.16 elapsed 99% CPU
+time.10.ptmalloc.64:9.81 user 5.70 system 7.42 elapsed 208% CPU
+time.10.tcmalloc.64:4.43 user 0.03 system 1.20 elapsed 370% CPU
+time.10.ptmalloc.128:12.69 user 7.81 system 9.02 elapsed 227% CPU
+time.10.tcmalloc.128:4.27 user 0.02 system 1.13 elapsed 378% CPU
+time.10.ptmalloc.256:15.04 user 9.53 system 9.92 elapsed 247% CPU
+time.10.tcmalloc.256:4.23 user 0.02 system 1.09 elapsed 388% CPU
+time.10.ptmalloc.512:17.30 user 10.46 system 10.61 elapsed 261% CPU
+time.10.tcmalloc.512:4.14 user 0.05 system 1.10 elapsed 379% CPU
+time.10.ptmalloc.1024:16.96 user 9.38 system 9.30 elapsed 283% CPU
+time.10.tcmalloc.1024:4.27 user 0.06 system 1.18 elapsed 366% CPU
+time.10.ptmalloc.2048:8.07 user 0.03 system 2.06 elapsed 393% CPU
+time.10.tcmalloc.2048:4.49 user 0.07 system 1.33 elapsed 342% CPU
+time.10.ptmalloc.4096:8.66 user 0.25 system 2.25 elapsed 394% CPU
+time.10.tcmalloc.4096:3.61 user 0.05 system 1.78 elapsed 205% CPU
+time.10.ptmalloc.8192:21.52 user 17.43 system 10.41 elapsed 374% CPU
+time.10.tcmalloc.8192:3.59 user 0.10 system 2.33 elapsed 158% CPU
+time.10.ptmalloc.16384:20.55 user 24.85 system 12.55 elapsed 361% CPU
+time.10.tcmalloc.16384:3.29 user 0.04 system 3.22 elapsed 103% CPU
+time.10.ptmalloc.32768:15.23 user 38.13 system 22.49 elapsed 237% CPU
+time.10.tcmalloc.32768:5.62 user 0.05 system 5.72 elapsed 99% CPU
+time.10.ptmalloc.65536:19.80 user 85.42 system 49.98 elapsed 210% CPU
+time.10.tcmalloc.65536:6.23 user 0.09 system 6.36 elapsed 99% CPU
+time.10.ptmalloc.131072:20.91 user 106.97 system 69.08 elapsed 185% CPU
+time.10.tcmalloc.131072:8.94 user 0.09 system 9.09 elapsed 99% CPU
+time.11.ptmalloc.64:10.82 user 6.34 system 7.92 elapsed 216% CPU
+time.11.tcmalloc.64:4.80 user 0.03 system 1.24 elapsed 387% CPU
+time.11.ptmalloc.128:14.58 user 8.61 system 9.81 elapsed 236% CPU
+time.11.tcmalloc.128:4.65 user 0.03 system 1.21 elapsed 384% CPU
+time.11.ptmalloc.256:17.38 user 10.98 system 10.75 elapsed 263% CPU
+time.11.tcmalloc.256:4.51 user 0.03 system 1.18 elapsed 384% CPU
+time.11.ptmalloc.512:19.18 user 11.71 system 10.95 elapsed 282% CPU
+time.11.tcmalloc.512:4.57 user 0.02 system 1.19 elapsed 384% CPU
+time.11.ptmalloc.1024:19.94 user 12.41 system 10.48 elapsed 308% CPU
+time.11.tcmalloc.1024:4.71 user 0.05 system 1.29 elapsed 367% CPU
+time.11.ptmalloc.2048:8.70 user 0.04 system 2.35 elapsed 371% CPU
+time.11.tcmalloc.2048:4.97 user 0.07 system 1.43 elapsed 350% CPU
+time.11.ptmalloc.4096:22.47 user 18.43 system 10.82 elapsed 377% CPU
+time.11.tcmalloc.4096:4.22 user 0.03 system 1.91 elapsed 221% CPU
+time.11.ptmalloc.8192:11.61 user 2.38 system 3.73 elapsed 374% CPU
+time.11.tcmalloc.8192:3.74 user 0.09 system 2.46 elapsed 155% CPU
+time.11.ptmalloc.16384:14.13 user 13.38 system 9.60 elapsed 286% CPU
+time.11.tcmalloc.16384:3.61 user 0.03 system 3.63 elapsed 100% CPU
+time.11.ptmalloc.32768:17.92 user 43.84 system 26.74 elapsed 230% CPU
+time.11.tcmalloc.32768:6.31 user 0.03 system 6.45 elapsed 98% CPU
+time.11.ptmalloc.65536:22.40 user 96.38 system 58.30 elapsed 203% CPU
+time.11.tcmalloc.65536:6.92 user 0.12 system 6.98 elapsed 100% CPU
+time.11.ptmalloc.131072:21.03 user 108.04 system 72.78 elapsed 177% CPU
+time.11.tcmalloc.131072:9.79 user 0.08 system 9.94 elapsed 99% CPU
+time.12.ptmalloc.64:12.23 user 7.16 system 8.38 elapsed 231% CPU
+time.12.tcmalloc.64:5.21 user 0.05 system 1.41 elapsed 371% CPU
+time.12.ptmalloc.128:16.97 user 10.19 system 10.47 elapsed 259% CPU
+time.12.tcmalloc.128:5.10 user 0.02 system 1.31 elapsed 390% CPU
+time.12.ptmalloc.256:19.99 user 12.10 system 11.57 elapsed 277% CPU
+time.12.tcmalloc.256:5.01 user 0.03 system 1.29 elapsed 390% CPU
+time.12.ptmalloc.512:21.85 user 12.66 system 11.46 elapsed 300% CPU
+time.12.tcmalloc.512:5.05 user 0.00 system 1.32 elapsed 379% CPU
+time.12.ptmalloc.1024:9.40 user 0.04 system 2.40 elapsed 393% CPU
+time.12.tcmalloc.1024:5.14 user 0.02 system 1.39 elapsed 369% CPU
+time.12.ptmalloc.2048:9.72 user 0.04 system 2.49 elapsed 391% CPU
+time.12.tcmalloc.2048:5.74 user 0.05 system 1.62 elapsed 355% CPU
+time.12.ptmalloc.4096:10.64 user 0.20 system 2.75 elapsed 393% CPU
+time.12.tcmalloc.4096:4.45 user 0.03 system 2.04 elapsed 218% CPU
+time.12.ptmalloc.8192:12.66 user 3.30 system 4.30 elapsed 371% CPU
+time.12.tcmalloc.8192:4.21 user 0.13 system 2.65 elapsed 163% CPU
+time.12.ptmalloc.16384:15.73 user 15.68 system 11.14 elapsed 281% CPU
+time.12.tcmalloc.16384:4.17 user 0.06 system 4.10 elapsed 102% CPU
+time.12.ptmalloc.32768:19.45 user 56.00 system 32.74 elapsed 230% CPU
+time.12.tcmalloc.32768:6.96 user 0.08 system 7.14 elapsed 98% CPU
+time.12.ptmalloc.65536:23.33 user 110.45 system 65.06 elapsed 205% CPU
+time.12.tcmalloc.65536:7.77 user 0.15 system 7.72 elapsed 102% CPU
+time.12.ptmalloc.131072:24.03 user 124.74 system 82.94 elapsed 179% CPU
+time.12.tcmalloc.131072:10.81 user 0.06 system 10.94 elapsed 99% CPU
+time.13.ptmalloc.64:14.08 user 7.60 system 8.85 elapsed 244% CPU
+time.13.tcmalloc.64:5.51 user 0.01 system 1.47 elapsed 375% CPU
+time.13.ptmalloc.128:18.20 user 10.98 system 10.99 elapsed 265% CPU
+time.13.tcmalloc.128:5.34 user 0.01 system 1.39 elapsed 382% CPU
+time.13.ptmalloc.256:21.48 user 13.94 system 12.25 elapsed 289% CPU
+time.13.tcmalloc.256:5.33 user 0.01 system 1.39 elapsed 381% CPU
+time.13.ptmalloc.512:24.22 user 14.84 system 12.97 elapsed 301% CPU
+time.13.tcmalloc.512:5.49 user 0.02 system 1.41 elapsed 389% CPU
+time.13.ptmalloc.1024:25.26 user 17.03 system 12.85 elapsed 328% CPU
+time.13.tcmalloc.1024:5.65 user 0.04 system 1.50 elapsed 378% CPU
+time.13.ptmalloc.2048:10.41 user 0.03 system 2.69 elapsed 387% CPU
+time.13.tcmalloc.2048:5.93 user 0.10 system 1.77 elapsed 339% CPU
+time.13.ptmalloc.4096:11.37 user 0.52 system 3.04 elapsed 391% CPU
+time.13.tcmalloc.4096:5.08 user 0.11 system 2.22 elapsed 233% CPU
+time.13.ptmalloc.8192:21.76 user 18.54 system 10.58 elapsed 380% CPU
+time.13.tcmalloc.8192:5.04 user 0.16 system 2.93 elapsed 177% CPU
+time.13.ptmalloc.16384:26.35 user 34.47 system 17.01 elapsed 357% CPU
+time.13.tcmalloc.16384:4.66 user 0.04 system 4.66 elapsed 100% CPU
+time.13.ptmalloc.32768:21.41 user 63.59 system 38.14 elapsed 222% CPU
+time.13.tcmalloc.32768:7.71 user 0.03 system 7.83 elapsed 98% CPU
+time.13.ptmalloc.65536:24.99 user 120.80 system 71.59 elapsed 203% CPU
+time.13.tcmalloc.65536:8.87 user 0.64 system 8.37 elapsed 113% CPU
+time.13.ptmalloc.131072:25.97 user 142.27 system 96.00 elapsed 175% CPU
+time.13.tcmalloc.131072:11.48 user 0.06 system 11.67 elapsed 98% CPU
+time.14.ptmalloc.64:15.01 user 9.11 system 9.41 elapsed 256% CPU
+time.14.tcmalloc.64:5.98 user 0.02 system 1.58 elapsed 378% CPU
+time.14.ptmalloc.128:20.34 user 12.72 system 11.62 elapsed 284% CPU
+time.14.tcmalloc.128:5.88 user 0.04 system 1.51 elapsed 392% CPU
+time.14.ptmalloc.256:24.26 user 14.95 system 12.92 elapsed 303% CPU
+time.14.tcmalloc.256:5.72 user 0.02 system 1.50 elapsed 381% CPU
+time.14.ptmalloc.512:27.28 user 16.45 system 13.89 elapsed 314% CPU
+time.14.tcmalloc.512:5.99 user 0.02 system 1.54 elapsed 388% CPU
+time.14.ptmalloc.1024:25.84 user 16.99 system 12.61 elapsed 339% CPU
+time.14.tcmalloc.1024:5.94 user 0.06 system 1.59 elapsed 375% CPU
+time.14.ptmalloc.2048:11.96 user 0.01 system 3.12 elapsed 382% CPU
+time.14.tcmalloc.2048:6.39 user 0.07 system 1.79 elapsed 359% CPU
+time.14.ptmalloc.4096:20.19 user 11.77 system 8.26 elapsed 386% CPU
+time.14.tcmalloc.4096:5.65 user 0.05 system 2.32 elapsed 244% CPU
+time.14.ptmalloc.8192:22.01 user 16.39 system 9.89 elapsed 387% CPU
+time.14.tcmalloc.8192:5.44 user 0.11 system 3.07 elapsed 180% CPU
+time.14.ptmalloc.16384:18.15 user 22.40 system 15.02 elapsed 269% CPU
+time.14.tcmalloc.16384:5.29 user 0.08 system 5.34 elapsed 100% CPU
+time.14.ptmalloc.32768:24.29 user 72.07 system 42.63 elapsed 225% CPU
+time.14.tcmalloc.32768:8.47 user 0.02 system 8.62 elapsed 98% CPU
+time.14.ptmalloc.65536:27.63 user 130.56 system 78.64 elapsed 201% CPU
+time.14.tcmalloc.65536:9.85 user 1.61 system 9.04 elapsed 126% CPU
+time.14.ptmalloc.131072:28.87 user 146.38 system 100.54 elapsed 174% CPU
+time.14.tcmalloc.131072:12.46 user 0.11 system 12.71 elapsed 98% CPU
+time.15.ptmalloc.64:16.25 user 10.05 system 9.82 elapsed 267% CPU
+time.15.tcmalloc.64:6.30 user 0.02 system 1.64 elapsed 385% CPU
+time.15.ptmalloc.128:22.33 user 13.23 system 12.24 elapsed 290% CPU
+time.15.tcmalloc.128:6.08 user 0.03 system 1.59 elapsed 384% CPU
+time.15.ptmalloc.256:26.56 user 16.57 system 13.70 elapsed 314% CPU
+time.15.tcmalloc.256:6.14 user 0.03 system 1.61 elapsed 382% CPU
+time.15.ptmalloc.512:29.68 user 18.08 system 14.56 elapsed 327% CPU
+time.15.tcmalloc.512:6.12 user 0.04 system 1.68 elapsed 364% CPU
+time.15.ptmalloc.1024:17.07 user 6.22 system 6.26 elapsed 371% CPU
+time.15.tcmalloc.1024:6.38 user 0.02 system 1.75 elapsed 364% CPU
+time.15.ptmalloc.2048:26.64 user 17.25 system 11.51 elapsed 381% CPU
+time.15.tcmalloc.2048:6.77 user 0.18 system 1.92 elapsed 361% CPU
+time.15.ptmalloc.4096:13.21 user 0.74 system 3.57 elapsed 390% CPU
+time.15.tcmalloc.4096:6.03 user 0.09 system 2.36 elapsed 258% CPU
+time.15.ptmalloc.8192:22.92 user 17.51 system 10.50 elapsed 385% CPU
+time.15.tcmalloc.8192:5.96 user 0.12 system 3.36 elapsed 180% CPU
+time.15.ptmalloc.16384:19.37 user 24.87 system 16.69 elapsed 264% CPU
+time.15.tcmalloc.16384:5.88 user 0.07 system 5.84 elapsed 101% CPU
+time.15.ptmalloc.32768:25.43 user 82.30 system 48.98 elapsed 219% CPU
+time.15.tcmalloc.32768:9.11 user 0.05 system 9.30 elapsed 98% CPU
+time.15.ptmalloc.65536:29.31 user 140.07 system 83.78 elapsed 202% CPU
+time.15.tcmalloc.65536:8.51 user 1.59 system 9.75 elapsed 103% CPU
+time.15.ptmalloc.131072:30.22 user 163.15 system 109.50 elapsed 176% CPU
+time.15.tcmalloc.131072:13.35 user 0.10 system 13.54 elapsed 99% CPU
+time.16.ptmalloc.64:17.69 user 10.11 system 10.11 elapsed 274% CPU
+time.16.tcmalloc.64:6.63 user 0.04 system 1.72 elapsed 387% CPU
+time.16.ptmalloc.128:23.05 user 14.37 system 12.75 elapsed 293% CPU
+time.16.tcmalloc.128:6.61 user 0.02 system 1.71 elapsed 387% CPU
+time.16.ptmalloc.256:29.11 user 19.35 system 14.57 elapsed 332% CPU
+time.16.tcmalloc.256:6.62 user 0.03 system 1.73 elapsed 382% CPU
+time.16.ptmalloc.512:31.65 user 18.71 system 14.71 elapsed 342% CPU
+time.16.tcmalloc.512:6.63 user 0.04 system 1.73 elapsed 383% CPU
+time.16.ptmalloc.1024:31.99 user 21.22 system 14.87 elapsed 357% CPU
+time.16.tcmalloc.1024:6.81 user 0.04 system 1.79 elapsed 382% CPU
+time.16.ptmalloc.2048:30.35 user 21.36 system 13.30 elapsed 388% CPU
+time.16.tcmalloc.2048:6.91 user 0.50 system 2.01 elapsed 367% CPU
+time.16.ptmalloc.4096:18.85 user 7.18 system 6.61 elapsed 393% CPU
+time.16.tcmalloc.4096:6.70 user 0.10 system 2.62 elapsed 259% CPU
+time.16.ptmalloc.8192:22.19 user 14.30 system 9.37 elapsed 389% CPU
+time.16.tcmalloc.8192:6.18 user 0.19 system 3.58 elapsed 177% CPU
+time.16.ptmalloc.16384:31.22 user 46.78 system 22.92 elapsed 340% CPU
+time.16.tcmalloc.16384:6.79 user 0.07 system 6.86 elapsed 99% CPU
+time.16.ptmalloc.32768:27.31 user 87.32 system 52.00 elapsed 220% CPU
+time.16.tcmalloc.32768:9.85 user 0.06 system 10.07 elapsed 98% CPU
+time.16.ptmalloc.65536:32.83 user 160.62 system 95.67 elapsed 202% CPU
+time.16.tcmalloc.65536:10.18 user 0.09 system 10.41 elapsed 98% CPU
+time.16.ptmalloc.131072:31.99 user 173.41 system 115.98 elapsed 177% CPU
+time.16.tcmalloc.131072:14.52 user 0.05 system 14.67 elapsed 99% CPU
+time.17.ptmalloc.64:19.38 user 11.61 system 10.61 elapsed 291% CPU
+time.17.tcmalloc.64:7.11 user 0.02 system 1.84 elapsed 386% CPU
+time.17.ptmalloc.128:26.25 user 16.15 system 13.53 elapsed 313% CPU
+time.17.tcmalloc.128:6.97 user 0.02 system 1.78 elapsed 390% CPU
+time.17.ptmalloc.256:30.66 user 18.36 system 14.97 elapsed 327% CPU
+time.17.tcmalloc.256:6.94 user 0.04 system 1.80 elapsed 387% CPU
+time.17.ptmalloc.512:33.71 user 22.79 system 15.95 elapsed 354% CPU
+time.17.tcmalloc.512:7.00 user 0.02 system 1.83 elapsed 381% CPU
+time.17.ptmalloc.1024:33.49 user 22.47 system 15.00 elapsed 373% CPU
+time.17.tcmalloc.1024:7.20 user 0.03 system 1.90 elapsed 380% CPU
+time.17.ptmalloc.2048:23.87 user 11.92 system 9.26 elapsed 386% CPU
+time.17.tcmalloc.2048:6.01 user 1.83 system 2.15 elapsed 363% CPU
+time.17.ptmalloc.4096:14.69 user 0.95 system 3.98 elapsed 392% CPU
+time.17.tcmalloc.4096:7.25 user 0.10 system 2.62 elapsed 279% CPU
+time.17.ptmalloc.8192:22.44 user 13.52 system 9.39 elapsed 382% CPU
+time.17.tcmalloc.8192:7.21 user 0.24 system 3.95 elapsed 188% CPU
+time.17.ptmalloc.16384:23.33 user 33.67 system 21.89 elapsed 260% CPU
+time.17.tcmalloc.16384:7.28 user 0.06 system 7.10 elapsed 103% CPU
+time.17.ptmalloc.32768:29.35 user 103.11 system 60.36 elapsed 219% CPU
+time.17.tcmalloc.32768:10.53 user 0.07 system 10.71 elapsed 98% CPU
+time.17.ptmalloc.65536:33.21 user 170.89 system 100.84 elapsed 202% CPU
+time.17.tcmalloc.65536:10.85 user 0.05 system 11.04 elapsed 98% CPU
+time.17.ptmalloc.131072:34.98 user 182.87 system 122.05 elapsed 178% CPU
+time.17.tcmalloc.131072:15.27 user 0.09 system 15.49 elapsed 99% CPU
+time.18.ptmalloc.64:21.08 user 12.15 system 11.43 elapsed 290% CPU
+time.18.tcmalloc.64:7.45 user 0.03 system 1.95 elapsed 383% CPU
+time.18.ptmalloc.128:27.65 user 17.26 system 14.03 elapsed 320% CPU
+time.18.tcmalloc.128:7.46 user 0.03 system 1.92 elapsed 389% CPU
+time.18.ptmalloc.256:32.78 user 20.55 system 15.70 elapsed 339% CPU
+time.18.tcmalloc.256:7.31 user 0.02 system 1.88 elapsed 389% CPU
+time.18.ptmalloc.512:33.31 user 20.06 system 15.05 elapsed 354% CPU
+time.18.tcmalloc.512:7.33 user 0.02 system 1.91 elapsed 383% CPU
+time.18.ptmalloc.1024:35.46 user 24.83 system 16.30 elapsed 369% CPU
+time.18.tcmalloc.1024:7.60 user 0.06 system 2.05 elapsed 373% CPU
+time.18.ptmalloc.2048:19.98 user 6.80 system 6.76 elapsed 395% CPU
+time.18.tcmalloc.2048:6.89 user 1.29 system 2.28 elapsed 357% CPU
+time.18.ptmalloc.4096:15.99 user 0.93 system 4.32 elapsed 391% CPU
+time.18.tcmalloc.4096:7.70 user 0.10 system 2.77 elapsed 280% CPU
+time.18.ptmalloc.8192:23.51 user 14.84 system 9.97 elapsed 384% CPU
+time.18.tcmalloc.8192:8.16 user 0.27 system 4.25 elapsed 197% CPU
+time.18.ptmalloc.16384:35.79 user 52.41 system 26.47 elapsed 333% CPU
+time.18.tcmalloc.16384:7.81 user 0.07 system 7.61 elapsed 103% CPU
+time.18.ptmalloc.32768:33.17 user 116.07 system 68.64 elapsed 217% CPU
+time.18.tcmalloc.32768:11.34 user 0.13 system 11.57 elapsed 99% CPU
+time.18.ptmalloc.65536:35.91 user 177.82 system 106.75 elapsed 200% CPU
+time.18.tcmalloc.65536:11.54 user 0.06 system 11.74 elapsed 98% CPU
+time.18.ptmalloc.131072:36.38 user 187.18 system 126.91 elapsed 176% CPU
+time.18.tcmalloc.131072:16.34 user 0.05 system 16.43 elapsed 99% CPU
+time.19.ptmalloc.64:22.90 user 13.23 system 11.82 elapsed 305% CPU
+time.19.tcmalloc.64:7.81 user 0.02 system 2.01 elapsed 388% CPU
+time.19.ptmalloc.128:30.13 user 18.58 system 14.77 elapsed 329% CPU
+time.19.tcmalloc.128:7.74 user 0.02 system 2.01 elapsed 386% CPU
+time.19.ptmalloc.256:35.33 user 21.41 system 16.35 elapsed 347% CPU
+time.19.tcmalloc.256:7.79 user 0.04 system 2.04 elapsed 382% CPU
+time.19.ptmalloc.512:39.30 user 26.22 system 17.84 elapsed 367% CPU
+time.19.tcmalloc.512:7.80 user 0.06 system 2.05 elapsed 381% CPU
+time.19.ptmalloc.1024:35.70 user 23.90 system 15.66 elapsed 380% CPU
+time.19.tcmalloc.1024:8.08 user 0.06 system 2.16 elapsed 376% CPU
+time.19.ptmalloc.2048:18.33 user 3.28 system 5.47 elapsed 394% CPU
+time.19.tcmalloc.2048:8.71 user 0.05 system 2.40 elapsed 363% CPU
+time.19.ptmalloc.4096:16.94 user 0.89 system 4.64 elapsed 383% CPU
+time.19.tcmalloc.4096:8.21 user 0.07 system 2.85 elapsed 289% CPU
+time.19.ptmalloc.8192:25.61 user 17.15 system 11.33 elapsed 377% CPU
+time.19.tcmalloc.8192:8.79 user 0.30 system 4.58 elapsed 198% CPU
+time.19.ptmalloc.16384:27.11 user 46.66 system 29.67 elapsed 248% CPU
+time.19.tcmalloc.16384:8.64 user 0.05 system 8.58 elapsed 101% CPU
+time.19.ptmalloc.32768:33.80 user 117.69 system 70.65 elapsed 214% CPU
+time.19.tcmalloc.32768:11.88 user 0.07 system 12.04 elapsed 99% CPU
+time.19.ptmalloc.65536:36.90 user 180.21 system 109.01 elapsed 199% CPU
+time.19.tcmalloc.65536:12.17 user 0.07 system 12.40 elapsed 98% CPU
+time.19.ptmalloc.131072:38.50 user 195.15 system 132.81 elapsed 175% CPU
+time.19.tcmalloc.131072:17.44 user 0.10 system 17.65 elapsed 99% CPU
+time.20.ptmalloc.64:23.37 user 13.74 system 11.86 elapsed 312% CPU
+time.20.tcmalloc.64:8.18 user 0.02 system 2.10 elapsed 389% CPU
+time.20.ptmalloc.128:31.29 user 19.97 system 15.53 elapsed 329% CPU
+time.20.tcmalloc.128:8.03 user 0.02 system 2.12 elapsed 378% CPU
+time.20.ptmalloc.256:38.40 user 25.65 system 18.25 elapsed 350% CPU
+time.20.tcmalloc.256:8.05 user 0.05 system 2.12 elapsed 380% CPU
+time.20.ptmalloc.512:40.60 user 27.70 system 18.46 elapsed 369% CPU
+time.20.tcmalloc.512:8.22 user 0.08 system 2.20 elapsed 375% CPU
+time.20.ptmalloc.1024:40.02 user 28.52 system 17.56 elapsed 390% CPU
+time.20.tcmalloc.1024:8.50 user 0.07 system 2.19 elapsed 391% CPU
+time.20.ptmalloc.2048:16.13 user 0.23 system 4.23 elapsed 386% CPU
+time.20.tcmalloc.2048:8.98 user 0.03 system 2.45 elapsed 367% CPU
+time.20.ptmalloc.4096:17.14 user 0.87 system 4.60 elapsed 391% CPU
+time.20.tcmalloc.4096:8.93 user 0.20 system 2.97 elapsed 306% CPU
+time.20.ptmalloc.8192:25.24 user 17.16 system 11.14 elapsed 380% CPU
+time.20.tcmalloc.8192:9.78 user 0.30 system 5.14 elapsed 195% CPU
+time.20.ptmalloc.16384:39.93 user 60.36 system 30.24 elapsed 331% CPU
+time.20.tcmalloc.16384:9.57 user 0.09 system 9.43 elapsed 102% CPU
+time.20.ptmalloc.32768:36.44 user 130.23 system 76.79 elapsed 217% CPU
+time.20.tcmalloc.32768:12.71 user 0.09 system 12.97 elapsed 98% CPU
+time.20.ptmalloc.65536:39.79 user 202.09 system 120.34 elapsed 200% CPU
+time.20.tcmalloc.65536:12.93 user 0.06 system 13.15 elapsed 98% CPU
+time.20.ptmalloc.131072:41.91 user 202.76 system 138.51 elapsed 176% CPU
+time.20.tcmalloc.131072:18.23 user 0.07 system 18.42 elapsed 99% CPU
diff --git a/doc/tcmalloc-opspercpusec.vs.threads.1024.bytes.png b/doc/tcmalloc-opspercpusec.vs.threads.1024.bytes.png
new file mode 100644
index 0000000..8c0ae6b
--- /dev/null
+++ b/doc/tcmalloc-opspercpusec.vs.threads.1024.bytes.png
diff --git a/doc/tcmalloc-opspercpusec.vs.threads.128.bytes.png b/doc/tcmalloc-opspercpusec.vs.threads.128.bytes.png
new file mode 100644
index 0000000..24b2a27
--- /dev/null
+++ b/doc/tcmalloc-opspercpusec.vs.threads.128.bytes.png
diff --git a/doc/tcmalloc-opspercpusec.vs.threads.131072.bytes.png b/doc/tcmalloc-opspercpusec.vs.threads.131072.bytes.png
new file mode 100644
index 0000000..183a77b
--- /dev/null
+++ b/doc/tcmalloc-opspercpusec.vs.threads.131072.bytes.png
diff --git a/doc/tcmalloc-opspercpusec.vs.threads.16384.bytes.png b/doc/tcmalloc-opspercpusec.vs.threads.16384.bytes.png
new file mode 100644
index 0000000..db59d61
--- /dev/null
+++ b/doc/tcmalloc-opspercpusec.vs.threads.16384.bytes.png
diff --git a/doc/tcmalloc-opspercpusec.vs.threads.2048.bytes.png b/doc/tcmalloc-opspercpusec.vs.threads.2048.bytes.png
new file mode 100644
index 0000000..169546f
--- /dev/null
+++ b/doc/tcmalloc-opspercpusec.vs.threads.2048.bytes.png
diff --git a/doc/tcmalloc-opspercpusec.vs.threads.256.bytes.png b/doc/tcmalloc-opspercpusec.vs.threads.256.bytes.png
new file mode 100644
index 0000000..6213021
--- /dev/null
+++ b/doc/tcmalloc-opspercpusec.vs.threads.256.bytes.png
diff --git a/doc/tcmalloc-opspercpusec.vs.threads.32768.bytes.png b/doc/tcmalloc-opspercpusec.vs.threads.32768.bytes.png
new file mode 100644
index 0000000..18715e3
--- /dev/null
+++ b/doc/tcmalloc-opspercpusec.vs.threads.32768.bytes.png
diff --git a/doc/tcmalloc-opspercpusec.vs.threads.4096.bytes.png b/doc/tcmalloc-opspercpusec.vs.threads.4096.bytes.png
new file mode 100644
index 0000000..642e245
--- /dev/null
+++ b/doc/tcmalloc-opspercpusec.vs.threads.4096.bytes.png
diff --git a/doc/tcmalloc-opspercpusec.vs.threads.512.bytes.png b/doc/tcmalloc-opspercpusec.vs.threads.512.bytes.png
new file mode 100644
index 0000000..aea1d67
--- /dev/null
+++ b/doc/tcmalloc-opspercpusec.vs.threads.512.bytes.png
diff --git a/doc/tcmalloc-opspercpusec.vs.threads.64.bytes.png b/doc/tcmalloc-opspercpusec.vs.threads.64.bytes.png
new file mode 100644
index 0000000..3a080de
--- /dev/null
+++ b/doc/tcmalloc-opspercpusec.vs.threads.64.bytes.png
diff --git a/doc/tcmalloc-opspercpusec.vs.threads.65536.bytes.png b/doc/tcmalloc-opspercpusec.vs.threads.65536.bytes.png
new file mode 100644
index 0000000..48ebdb6
--- /dev/null
+++ b/doc/tcmalloc-opspercpusec.vs.threads.65536.bytes.png
diff --git a/doc/tcmalloc-opspercpusec.vs.threads.8192.bytes.png b/doc/tcmalloc-opspercpusec.vs.threads.8192.bytes.png
new file mode 100644
index 0000000..3a99cbc
--- /dev/null
+++ b/doc/tcmalloc-opspercpusec.vs.threads.8192.bytes.png
diff --git a/doc/tcmalloc-opspersec.vs.size.1.threads.png b/doc/tcmalloc-opspersec.vs.size.1.threads.png
new file mode 100644
index 0000000..37d406d
--- /dev/null
+++ b/doc/tcmalloc-opspersec.vs.size.1.threads.png
diff --git a/doc/tcmalloc-opspersec.vs.size.12.threads.png b/doc/tcmalloc-opspersec.vs.size.12.threads.png
new file mode 100644
index 0000000..d45458a
--- /dev/null
+++ b/doc/tcmalloc-opspersec.vs.size.12.threads.png
diff --git a/doc/tcmalloc-opspersec.vs.size.16.threads.png b/doc/tcmalloc-opspersec.vs.size.16.threads.png
new file mode 100644
index 0000000..e8a3c9f
--- /dev/null
+++ b/doc/tcmalloc-opspersec.vs.size.16.threads.png
diff --git a/doc/tcmalloc-opspersec.vs.size.2.threads.png b/doc/tcmalloc-opspersec.vs.size.2.threads.png
new file mode 100644
index 0000000..52d7aee
--- /dev/null
+++ b/doc/tcmalloc-opspersec.vs.size.2.threads.png
diff --git a/doc/tcmalloc-opspersec.vs.size.20.threads.png b/doc/tcmalloc-opspersec.vs.size.20.threads.png
new file mode 100644
index 0000000..da0328a
--- /dev/null
+++ b/doc/tcmalloc-opspersec.vs.size.20.threads.png
diff --git a/doc/tcmalloc-opspersec.vs.size.3.threads.png b/doc/tcmalloc-opspersec.vs.size.3.threads.png
new file mode 100644
index 0000000..1093e81
--- /dev/null
+++ b/doc/tcmalloc-opspersec.vs.size.3.threads.png
diff --git a/doc/tcmalloc-opspersec.vs.size.4.threads.png b/doc/tcmalloc-opspersec.vs.size.4.threads.png
new file mode 100644
index 0000000..d7c79ef
--- /dev/null
+++ b/doc/tcmalloc-opspersec.vs.size.4.threads.png
diff --git a/doc/tcmalloc-opspersec.vs.size.5.threads.png b/doc/tcmalloc-opspersec.vs.size.5.threads.png
new file mode 100644
index 0000000..779eec6
--- /dev/null
+++ b/doc/tcmalloc-opspersec.vs.size.5.threads.png
diff --git a/doc/tcmalloc-opspersec.vs.size.8.threads.png b/doc/tcmalloc-opspersec.vs.size.8.threads.png
new file mode 100644
index 0000000..76c125a
--- /dev/null
+++ b/doc/tcmalloc-opspersec.vs.size.8.threads.png
diff --git a/doc/tcmalloc.html b/doc/tcmalloc.html
index 8ffa71b..9ea3a1a 100644
--- a/doc/tcmalloc.html
+++ b/doc/tcmalloc.html
@@ -3,7 +3,6 @@
 <html>
 <head>
 <title>TCMalloc : Thread-Caching Malloc</title>
-<link rel="stylesheet" href="../../designdocs/designstyle.css">
 <style type="text/css">
   em {
     color: red;
@@ -15,11 +14,12 @@
 
 <h1>TCMalloc : Thread-Caching Malloc</h1>
 
-<address>Sanjay Ghemawat</address>
+<address>Sanjay Ghemawat, Paul Menage &lt;opensource@google.com&gt;</address>
 
 <h2>Motivation</h2>
 
-TCMalloc is faster than the glibc malloc, ptmalloc2 and other mallocs
+TCMalloc is faster than the glibc 2.3 malloc (available as a separate
+library called ptmalloc2) and other mallocs
 that I have tested.  ptmalloc2 takes approximately 300 nanoseconds to
 execute a malloc/free pair on a 2.8 GHz P4 (for small objects).  The
 TCMalloc implementation takes approximately 50 nanoseconds for the
@@ -37,15 +37,14 @@ objects, TCMalloc tries to use fine grained and efficient spinlocks.
 ptmalloc2 also reduces lock contention by using per-thread arenas but
 there is a big problem with ptmalloc2's use of per-thread arenas.  In
 ptmalloc2 memory can never move from one arena to another.  This can
-lead to huge amounts of wasted space.  For example, in one of the
-MapReduce operations used by the segment-indexer, the map phase would
-allocate approximately 300MB of memory for URL canonicalization data
-structures.  When the map phase finished, another map phase would be
-started in the same address space.  If this map phase was assigned a
+lead to huge amounts of wasted space.  For example, in one Google application, the first phase would
+allocate approximately 300MB of memory for its data
+structures.  When the first phase finished, a second phase would be
+started in the same address space.  If this second phase was assigned a
 different arena than the one used by the first phase, this phase would
 not reuse any of the memory left after the first phase and would add
 another 300MB to the address space.  Similar memory blowup problems
-were also noticed in <code>gfs_chunkserver</code>.
+were also noticed in other applications.
 
 <p>
 Another benefit of TCMalloc is space-efficient representation of small
@@ -55,6 +54,33 @@ space overhead.  ptmalloc2 uses a four-byte header for each object and
 (I think) rounds up the size to a multiple of 8 bytes and ends up
 using <code>16N</code> bytes.
 
+
+<h2>Usage</h2>
+
+<p>To use TCmalloc, just link tcmalloc into your application via the
+"-ltcmalloc" linker flag.</p>
+
+<p>
+You can use tcmalloc in applications you didn't compile yourself, by
+using LD_PRELOAD:
+</p>
+<pre>
+   $ LD_PRELOAD="/usr/lib/libtcmalloc.so" <binary>
+</pre>
+<p>
+LD_PRELOAD is tricky, and we don't necessarily recommend this mode of
+usage.
+</p>
+
+<p>TCMalloc includes a <A HREF="heap_checker.html">heap checker</A>
+and <A HREF="heap_profiler.html">heap profiler</A> as well.</p>
+
+<p>If you'd rather link in a version of TCMalloc that does not include
+the heap profiler and checker (perhaps to reduce binary size for a
+static binary), you can link in <code>libtcmalloc_minimal</code>
+instead.</p>
+
+
 <h2>Overview</h2>
 
 TCMalloc assigns each thread a thread-local cache.  Small allocations
@@ -217,13 +243,122 @@ nice property that if a thread stops using a particular size, all
 objects of that size will quickly move from the thread cache to the
 central free list where they can be used by other threads.
 
+<h2>Performance Notes</h2>
+
+<h3>PTMalloc2 unittest</h3>
+The PTMalloc2 package (now part of glibc) contains a unittest program
+t-test1.c. This forks a number of threads and performs a series of
+allocations and deallocations in each thread; the threads do not
+communicate other than by synchronization in the memory allocator.
+
+<p> t-test1 (included in google-perftools/tests/tcmalloc, and compiled
+as ptmalloc_unittest1) was run with a varying numbers of threads
+(1-20) and maximum allocation sizes (64 bytes - 32Kbytes). These tests
+were run on a 2.4GHz dual Xeon system with hyper-threading enabled,
+using Linux glibc-2.3.2 from RedHat 9, with one million operations per
+thread in each test. In each case, the test was run once normally, and
+once with LD_PRELOAD=libtcmalloc.so.
+
+<p>The graphs below show the performance of TCMalloc vs PTMalloc2 for
+several different metrics. Firstly, total operations (millions) per elapsed
+second vs max allocation size, for varying numbers of threads. The raw
+data used to generate these graphs (the output of the "time" utility)
+is available in t-test1.times.txt.
+
+<p>
+<table>
+<tr>
+<td><img src="tcmalloc-opspersec.vs.size.1.threads.png"></td>
+<td><img src="tcmalloc-opspersec.vs.size.2.threads.png"></td>
+<td><img src="tcmalloc-opspersec.vs.size.3.threads.png"></td>
+</tr>
+<tr>
+<td><img src="tcmalloc-opspersec.vs.size.4.threads.png"></td>
+<td><img src="tcmalloc-opspersec.vs.size.5.threads.png"></td>
+<td><img src="tcmalloc-opspersec.vs.size.8.threads.png"></td>
+</tr>
+<tr>
+<td><img src="tcmalloc-opspersec.vs.size.12.threads.png"></td>
+<td><img src="tcmalloc-opspersec.vs.size.16.threads.png"></td>
+<td><img src="tcmalloc-opspersec.vs.size.20.threads.png"></td>
+</tr>
+</table>
+
+
+<ul> 
+
+<li> TCMalloc is much more consistently scalable than PTMalloc2 - for
+all thread counts >1 it achieves ~7-9 million ops/sec for small
+allocations, falling to ~2 million ops/sec for larger allocations. The
+single-thread case is an obvious outlier, since it is only able to
+keep a single processor busy and hence can achieve fewer
+ops/sec. PTMalloc2 has a much higher variance on operations/sec -
+peaking somewhere around 4 million ops/sec for small allocations and
+falling to <1 million ops/sec for larger allocations.
+
+<li> TCMalloc is faster than PTMalloc2 in the vast majority of cases,
+and particularly for small allocations. Contention between threads is
+less of a problem in TCMalloc.
+
+<li> TCMalloc's performance drops off as the allocation size
+increases. This is because the per-thread cache is garbage-collected
+when it hits a threshold (defaulting to 2MB). With larger allocation
+sizes, fewer objects can be stored in the cache before it is
+garbage-collected.
+
+<li> There is a noticeably drop in the TCMalloc performance at ~32K
+maximum allocation size; at larger sizes performance drops less
+quickly. This is due to the 32K maximum size of objects in the
+per-thread caches; for objects larger than this tcmalloc allocates
+from the central page heap.
+
+</ul>
+
+<p> Next, operations (millions) per second of CPU time vs number of threads, for
+max allocation size 64 bytes - 128 Kbytes.
+
+<p>
+<table>
+<tr>
+<td><img src="tcmalloc-opspercpusec.vs.threads.64.bytes.png"></td>
+<td><img src="tcmalloc-opspercpusec.vs.threads.256.bytes.png"></td>
+<td><img src="tcmalloc-opspercpusec.vs.threads.1024.bytes.png"></td>
+</tr>
+<tr>
+<td><img src="tcmalloc-opspercpusec.vs.threads.4096.bytes.png"></td>
+<td><img src="tcmalloc-opspercpusec.vs.threads.8192.bytes.png"></td>
+<td><img src="tcmalloc-opspercpusec.vs.threads.16384.bytes.png"></td>
+</tr>
+<tr>
+<td><img src="tcmalloc-opspercpusec.vs.threads.32768.bytes.png"></td>
+<td><img src="tcmalloc-opspercpusec.vs.threads.65536.bytes.png"></td>
+<td><img src="tcmalloc-opspercpusec.vs.threads.131072.bytes.png"></td>
+</tr>
+</table>
+
+<p> Here we see again that TCMalloc is both more consistent and more
+efficient than PTMalloc2. For max allocation sizes &lt;32K, TCMalloc
+typically achieves ~2-2.5 million ops per second of CPU time with a
+large number of threads, whereas PTMalloc achieves generally 0.5-1
+million ops per second of CPU time, with a lot of cases achieving much
+less than this figure. Above 32K max allocation size, TCMalloc drops
+to 1-1.5 million ops per second of CPU time, and PTMalloc drops almost
+to zero for large numbers of threads (i.e. with PTMalloc, lots of CPU
+time is being burned spinning waiting for locks in the heavily
+multi-threaded case).
+
 <h2>Caveats</h2>
 
-TCMalloc may be somewhat more memory hungry than other mallocs, (but
-tends not to have the huge blowups that can happen with other
-mallocs).  In particular, at startup TCMalloc allocates approximately
-6 MB of memory.  It would be easy to roll a specialized version
-that trades-off a little bit of speed for more space efficiency.
+<p>For some systems, TCMalloc may not work correctly on with
+applications that aren't linked against libpthread.so (or the
+equivalent on your OS). It should work on Linux using glibc 2.3, but
+other OS/libc combinations have not been tested.
+
+<p>TCMalloc may be somewhat more memory hungry than other mallocs,
+though it tends not to have the huge blowups that can happen with
+other mallocs.  In particular, at startup TCMalloc allocates
+approximately 6 MB of memory.  It would be easy to roll a specialized
+version that trades a little bit of speed for more space efficiency.
 
 <p>
 TCMalloc currently does not return any memory to the system.
@@ -235,28 +370,6 @@ objects using the system malloc, and may try to pass them
 to TCMalloc for deallocation.  TCMalloc will not be able
 to handle such objects.
 
-<h2>Performance Notes</h2>
-
-Here is a log of some of the performance improvements seen
-by switching to tcmalloc:
-<p>
-
-<center>
-<table frame=box rules=all cellpadding=5>
-<tr> <th>Date       <th>Program                <th>Tester        <th>Improvement            </tr>
-<tr> <td>2003/10/30 <td>indexserver            <td>Gauthum       <td>5.8% speedup</tr>
-<tr> <td>2003/10/30 <td>Caribou storage server <td>Peter Mattis  <td>10% speedup</tr>
-<tr> <td>2003/11/28 <td>indexserver            <td>Paul Menage   <td>Allows 9 microshards instead of 8 on 4GB Xeons</tr>
-<tr> <td>2003/12/15 <td>concentrator           <td>Andrew Kirmse <td>Stopped "leak" of several hundred KB per minute</tr>
-</table>
-</center>
-
-<p>
-<address>
-October 26, 2003<br>
-This document is <A HREF="http://www.corp.google.com/confidential.html">
-Google Confidential</A>.
-</address>
 
 </body>
 </html>
diff --git a/packages/deb/changelog b/packages/deb/changelog
index 7db9caa..97a5436 100644
--- a/packages/deb/changelog
+++ b/packages/deb/changelog
@@ -2,4 +2,4 @@ google-perftools (0.1-1) unstable; urgency=low
 
   * Initial release.
 
- -- El Goog <opensource@google.com>  Tue, 15 Feb 2005 08:07:33 -0800
+ -- Google Inc. <opensource@google.com>  Fri, 11 Mar 2005 08:07:33 -0800
diff --git a/packages/deb/control b/packages/deb/control
index 51692d2..379a5b1 100644
--- a/packages/deb/control
+++ b/packages/deb/control
@@ -1,7 +1,7 @@
 Source: google-perftools
 Priority: optional
-Maintainer: El Goog <opensource@google.com>
-Build-Depends: debhelper (>= 4.0.0), binutils, coreutils
+Maintainer: Google Inc. <opensource@google.com>
+Build-Depends: debhelper (>= 4.0.0), binutils
 Standards-Version: 3.6.1
 
 Package: libgoogle-perftools-dev
@@ -11,7 +11,7 @@ Depends: libgoogle-perftools0 (= ${Source-Version})
 Description: libraries for CPU and heap analysis, plus an efficient thread-caching malloc
  The google-perftools package contains some utilities to improve and
  analyze the performance of C++ programs.  This includes an optimized
- thread-caching  malloc() and cpu and heap profiling utilities.  The
+ thread-caching malloc() and cpu and heap profiling utilities.  The
  devel package contains static and debug libraries and header files
  for developing applications that use the google-perftools package.
 
@@ -22,4 +22,4 @@ Depends: ${shlibs:Depends}
 Description: libraries for CPU and heap analysis, plus an efficient thread-caching malloc
  The google-perftools package contains some utilities to improve and
  analyze the performance of C++ programs.  This includes an optimized
- thread-caching  malloc() and cpu and heap profiling utilities.
+ thread-caching malloc() and cpu and heap profiling utilities.
diff --git a/packages/deb/copyright b/packages/deb/copyright
index 401e871..725a37d 100644
--- a/packages/deb/copyright
+++ b/packages/deb/copyright
@@ -1,4 +1,4 @@
-This package was debianized by El Goog <opensource@google.com> on
+This package was debianized by Google Inc. <opensource@google.com> on
 15 February 2005.
 
 It was downloaded from http://code.google.com/
diff --git a/packages/deb/files b/packages/deb/files
deleted file mode 100644
index e69de29..0000000
--- a/packages/deb/files
+++ /dev/null
diff --git a/packages/deb/libgoogle-perftools-dev.install b/packages/deb/libgoogle-perftools-dev.install
index cc83888..d81ff20 100644
--- a/packages/deb/libgoogle-perftools-dev.install
+++ b/packages/deb/libgoogle-perftools-dev.install
@@ -2,3 +2,7 @@ usr/include/google/*
 usr/lib/lib*.so
 usr/lib/lib*.a
 usr/lib/*.la
+debian/tmp/usr/include/google/*
+debian/tmp/usr/lib/lib*.so
+debian/tmp/usr/lib/lib*.a
+debian/tmp/usr/lib/*.la
diff --git a/packages/deb/libgoogle-perftools0.install b/packages/deb/libgoogle-perftools0.install
index 983962c..047eed5 100644
--- a/packages/deb/libgoogle-perftools0.install
+++ b/packages/deb/libgoogle-perftools0.install
@@ -1,2 +1,4 @@
 usr/lib/lib*.so.*
-usr/bin/pprof
+usr/bin/pprof*
+debian/tmp/usr/lib/lib*.so.*
+debian/tmp/usr/bin/pprof*
diff --git a/packages/rpm.sh b/packages/rpm.sh
index de936a3..a5699a2 100755
--- a/packages/rpm.sh
+++ b/packages/rpm.sh
@@ -68,7 +68,8 @@ fi
 
 rm -rf "$destdir"
 mkdir -p "$destdir"
-mv "$RPM_SOURCE_DIR"/*/"$fullname"*.rpm "$destdir"
+# We want to get not only the main package but devel etc, hence the middle *
+mv "$RPM_SOURCE_DIR"/*/"${PACKAGE}"-*"${VERSION}"*.rpm "$destdir"
 
 echo
 echo "The rpm package file(s) are located in $PWD/$destdir"
diff --git a/packages/rpm/rpm.spec b/packages/rpm/rpm.spec
index 9bb8238..24c0432 100644
--- a/packages/rpm/rpm.spec
+++ b/packages/rpm/rpm.spec
@@ -8,10 +8,11 @@ Summary: Performance tools for C++
 Version: %ver
 Release: %rel
 Group: Development/Libraries
-URL: http://google.sourceforge.net
+URL: http://goog-perftools.sourceforge.net
 Copyright: BSD
-Packager: El Goog <opensource@google.com>
-Source: http://google.sourceforge.net/%{NAME}-%{PACKAGE_VERSION}.tar.gz
+Vendor: Google
+Packager: Google <opensource@google.com>
+Source: http://goog-perftools.sourceforge.net/%{NAME}-%{PACKAGE_VERSION}.tar.gz
 Distribution: Redhat 7 and above.
 Buildroot: %{_tmppath}/%{name}-root
 Docdir: %prefix/doc
@@ -31,7 +32,7 @@ The %name-devel package contains static and debug libraries and header
 files for developing applications that use the %name package.
 
 %changelog
-    * Tue Feb 8 2005 <opensource@google.com>
+    * Fri Mar 11 2005 <opensource@google.com>
     - First draft
 
 %prep
@@ -51,18 +52,16 @@ rm -rf $RPM_BUILD_ROOT
 %files
 %defattr(-,root,root)
 
-%doc AUTHORS COPYING ChangeLog INSTALL NEWS README TODO doc/cpu_profiler.html doc/heap-example1.png doc/heap_profiler.html doc/overview.gif doc/pageheap.gif doc/pprof-test-big.gif doc/pprof-test.gif doc/pprof-vsnprintf-big.gif doc/pprof-vsnprintf.gif doc/spanmap.gif doc/tcmalloc.html doc/threadheap.gif
+%doc AUTHORS COPYING ChangeLog INSTALL NEWS README TODO doc/cpu_profiler.html doc/heap-example1.png doc/heap_checker.html doc/heap_profiler.html doc/index.html doc/overview.dot doc/overview.gif doc/pageheap.dot doc/pageheap.gif doc/pprof-test-big.gif doc/pprof-test.gif doc/pprof-vsnprintf-big.gif doc/pprof-vsnprintf.gif doc/spanmap.dot doc/spanmap.gif doc/t-test1.times.txt doc/tcmalloc-opspercpusec.vs.threads.1024.bytes.png doc/tcmalloc-opspercpusec.vs.threads.128.bytes.png doc/tcmalloc-opspercpusec.vs.threads.131072.bytes.png doc/tcmalloc-opspercpusec.vs.threads.16384.bytes.png doc/tcmalloc-opspercpusec.vs.threads.2048.bytes.png doc/tcmalloc-opspercpusec.vs.threads.256.bytes.png doc/tcmalloc-opspercpusec.vs.threads.32768.bytes.png doc/tcmalloc-opspercpusec.vs.threads.4096.bytes.png doc/tcmalloc-opspercpusec.vs.threads.512.bytes.png doc/tcmalloc-opspercpusec.vs.threads.64.bytes.png doc/tcmalloc-opspercpusec.vs.threads.65536.bytes.png doc/tcmalloc-opspercpusec.vs.threads.8192.bytes.png doc/tcmalloc-opspersec.vs.size.1.threads.png doc/tcmalloc-opspersec.vs.size.12.threads.png doc/tcmalloc-opspersec.vs.size.16.threads.png doc/tcmalloc-opspersec.vs.size.2.threads.png doc/tcmalloc-opspersec.vs.size.20.threads.png doc/tcmalloc-opspersec.vs.size.3.threads.png doc/tcmalloc-opspersec.vs.size.4.threads.png doc/tcmalloc-opspersec.vs.size.5.threads.png doc/tcmalloc-opspersec.vs.size.8.threads.png doc/tcmalloc.html doc/threadheap.dot doc/threadheap.gif
 
 %{prefix}/lib/libstacktrace.so.0
 %{prefix}/lib/libstacktrace.so.0.0.0
 %{prefix}/lib/libtcmalloc.so.0
 %{prefix}/lib/libtcmalloc.so.0.0.0
+%{prefix}/lib/libtcmalloc_minimal.so.0
+%{prefix}/lib/libtcmalloc_minimal.so.0.0.0
 %{prefix}/lib/libprofiler.so.0
 %{prefix}/lib/libprofiler.so.0.0.0
-%{prefix}/lib/libheapprofiler.so.0
-%{prefix}/lib/libheapprofiler.so.0.0.0
-%{prefix}/lib/libheapchecker.so.0
-%{prefix}/lib/libheapchecker.so.0.0.0
 %{prefix}/bin/pprof
 %{prefix}/man/man1/pprof.1.gz
 
@@ -77,13 +76,9 @@ rm -rf $RPM_BUILD_ROOT
 %{prefix}/lib/libtcmalloc.a
 %{prefix}/lib/libtcmalloc.la
 %{prefix}/lib/libtcmalloc.so
+%{prefix}/lib/libtcmalloc_minimal.a
+%{prefix}/lib/libtcmalloc_minimal.la
+%{prefix}/lib/libtcmalloc_minimal.so
 %{prefix}/lib/libprofiler.a
 %{prefix}/lib/libprofiler.la
 %{prefix}/lib/libprofiler.so
-%{prefix}/lib/libheapprofiler.a
-%{prefix}/lib/libheapprofiler.la
-%{prefix}/lib/libheapprofiler.so
-%{prefix}/lib/libheapchecker.a
-%{prefix}/lib/libheapchecker.la
-%{prefix}/lib/libheapchecker.so
-
diff --git a/src/addressmap-inl.h b/src/addressmap-inl.h
index 1daaeb2..93fe0a0 100644
--- a/src/addressmap-inl.h
+++ b/src/addressmap-inl.h
@@ -80,7 +80,7 @@
 #ifndef _ADDRESSMAP_H
 #define _ADDRESSMAP_H
 
-#include "google/perftools/config.h"
+#include "config.h"
 #include <stddef.h>
 #include <string.h>
 #if defined HAVE_STDINT_H
@@ -97,6 +97,7 @@ class AddressMap {
   typedef void* (*Allocator)(size_t);
   typedef void  (*DeAllocator)(void*);
   typedef void* Key;
+  typedef void  (*IteratorCallback)(Key, Value);
 
   // Create an AddressMap that uses the specified allocator/deallocator.
   // The allocator/deallocator should behave like malloc/free.
@@ -117,6 +118,10 @@ class AddressMap {
   // and returns true.  Else returns false.
   bool FindAndRemove(Key key, Value* removed_value);
 
+  // Iterate over the address map calling 'callback'
+  // for all stored key-value pairs.
+  void Iterate(IteratorCallback callback) const;
+
  private:
   typedef uintptr_t Number;
 
@@ -322,4 +327,17 @@ bool AddressMap<Value>::FindAndRemove(Key key, Value* removed_value) {
   return false;
 }
 
+template <class Value>
+void AddressMap<Value>::Iterate(IteratorCallback callback) const {
+  for (int h = 0; h < kHashSize; ++h) {
+    for (const Cluster* c = hashtable_[h]; c != NULL; c = c->next) {
+      for (int b = 0; b < kClusterBlocks; ++b) {
+        for (const Entry* e = c->blocks[b]; e != NULL; e = e->next) {
+          callback(e->key, e->value);
+        }
+      }
+    }
+  }
+}
+
 #endif /* _ADDRESSMAP_H */
diff --git a/src/google/perftools/basictypes.h b/src/base/basictypes.h
index ed6af90..37e0196 100644
--- a/src/google/perftools/basictypes.h
+++ b/src/base/basictypes.h
@@ -30,7 +30,7 @@
 #ifndef _BASICTYPES_H_
 #define _BASICTYPES_H_
 
-#include <google/perftools/config.h>
+#include "config.h"
 
 // To use this in an autoconf setting, make sure you run the following
 // autoconf macros:
diff --git a/src/base/commandlineflags.h b/src/base/commandlineflags.h
index a9aceba..c0d11ca 100644
--- a/src/base/commandlineflags.h
+++ b/src/base/commandlineflags.h
@@ -49,7 +49,7 @@
 #define BASE_COMMANDLINEFLAGS_H__
 
 #include <string>
-#include <google/perftools/basictypes.h>
+#include "base/basictypes.h"
 
 #define DECLARE_VARIABLE(type, name)                                          \
   namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead {  \
diff --git a/src/base/elfcore.c b/src/base/elfcore.c
new file mode 100644
index 0000000..d7bce9a
--- /dev/null
+++ b/src/base/elfcore.c
@@ -0,0 +1,1046 @@
+/* Copyright (c) 2005, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Markus Gutschke
+ */
+
+#include "base/elfcore.h"
+#if defined DUMPER
+
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/unistd.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "base/thread_lister.h"
+
+/* Definitions missing from the standard header files                        */
+#ifndef NT_PRFPXREG
+#define NT_PRFPXREG       20
+#endif
+#ifndef PTRACE_GETFPXREGS
+#define PTRACE_GETFPXREGS ((enum __ptrace_request)18)
+#endif
+#ifndef PR_GET_DUMPABLE
+#define PR_GET_DUMPABLE   3
+#endif
+#ifndef PR_SET_DUMPABLE
+#define PR_SET_DUMPABLE   4
+#endif
+
+
+/* Data structures found in x86-32/64 core dumps on Linux; similar data
+ * structures are defined in /usr/include/{linux,asm}/... but those
+ * headers conflict with the rest of the libc headers. So we cannot
+ * include them here.
+ */
+
+typedef struct i386_fpxregs {   /* SSE registers                             */
+  uint16_t  cwd;
+  uint16_t  swd;
+  uint16_t  twd;
+  uint16_t  fop;
+  uint32_t  fip;
+  uint32_t  fcs;
+  uint32_t  foo;
+  uint32_t  fos;
+  uint32_t  mxcsr;
+  uint32_t  mxcsr_mask;
+  uint32_t  st_space[32];       /*  8*16 bytes for each FP-reg  = 128 bytes  */
+  uint32_t  xmm_space[64];      /* 16*16 bytes for each XMM-reg = 128 bytes  */
+  uint32_t  padding[24];
+} i386_fpxregs;
+
+
+#ifdef __x86_64__
+/* Linux on x86-64 stores all FPU registers in the SSE structure             */
+typedef  i386_fpxregs i386_fpregs;
+#else
+typedef struct i386_fpregs {    /* FPU registers                             */
+  uint32_t  cwd;
+  uint32_t  swd;
+  uint32_t  twd;
+  uint32_t  fip;
+  uint32_t  fcs;
+  uint32_t  foo;
+  uint32_t  fos;
+  uint32_t  st_space[20];       /* 8*10 bytes for each FP-reg = 80 bytes     */
+} i386_fpregs;
+#endif
+
+
+typedef struct i386_timeval {   /* Time value with microsecond resolution    */
+  long tv_sec;                  /* Seconds                                   */
+  long tv_usec;                 /* Microseconds                              */
+} i386_timeval;
+
+
+typedef struct i386_siginfo {   /* Information about signal (unused)         */
+  int32_t si_signo;             /* Signal number                             */
+  int32_t si_code;              /* Extra code                                */
+  int32_t si_errno;             /* Errno                                     */
+} i386_siginfo;
+
+
+typedef struct i386_prstatus {  /* Information about thread; includes CPU reg*/
+  struct i386_siginfo pr_info;  /* Info associated with signal               */
+  uint16_t       pr_cursig;     /* Current signal                            */
+  unsigned long  pr_sigpend;    /* Set of pending signals                    */
+  unsigned long  pr_sighold;    /* Set of held signals                       */
+  pid_t          pr_pid;        /* Process ID                                */
+  pid_t          pr_ppid;       /* Parent's process ID                       */
+  pid_t          pr_pgrp;       /* Group ID                                  */
+  pid_t          pr_sid;        /* Session ID                                */
+  i386_timeval   pr_utime;      /* User time                                 */
+  i386_timeval   pr_stime;      /* System time                               */
+  i386_timeval   pr_cutime;     /* Cumulative user time                      */
+  i386_timeval   pr_cstime;     /* Cumulative system time                    */
+  i386_regs      pr_reg;        /* CPU registers                             */
+  uint32_t       pr_fpvalid;    /* True if math co-processor being used      */
+} i386_prstatus;
+
+
+typedef struct i386_prpsinfo {  /* Information about process                 */
+  unsigned char  pr_state;      /* Numeric process state                     */
+  char           pr_sname;      /* Char for pr_state                         */
+  unsigned char  pr_zomb;       /* Zombie                                    */
+  signed char    pr_nice;       /* Nice val                                  */
+  unsigned long  pr_flag;       /* Flags                                     */
+#ifdef __x86_64__
+  uint32_t       pr_uid;        /* User ID                                   */
+  uint32_t       pr_gid;        /* Group ID                                  */
+#else
+  uint16_t       pr_uid;        /* User ID                                   */
+  uint16_t       pr_gid;        /* Group ID                                  */
+#endif
+  pid_t          pr_pid;        /* Process ID                                */
+  pid_t          pr_ppid;       /* Parent's process ID                       */
+  pid_t          pr_pgrp;       /* Group ID                                  */
+  pid_t          pr_sid;        /* Session ID                                */
+  char           pr_fname[16];  /* Filename of executable                    */
+  char           pr_psargs[80]; /* Initial part of arg list                  */
+} i386_prpsinfo;
+
+
+typedef struct i386_user {      /* Ptrace returns this data for thread state */
+  i386_regs      regs;          /* CPU registers                             */
+  unsigned long  fpvalid;       /* True if math co-processor being used      */
+  i386_fpregs    fpregs;        /* FPU registers                             */
+  unsigned long  tsize;         /* Text segment size in pages                */
+  unsigned long  dsize;         /* Data segment size in pages                */
+  unsigned long  ssize;         /* Stack segment size in pages               */
+  unsigned long  start_code;    /* Starting virtual address of text          */
+  unsigned long  start_stack;   /* Starting virtual address of stack area    */
+  unsigned long  signal;        /* Signal that caused the core dump          */
+  unsigned long  reserved;      /* No longer used                            */
+  i386_regs      *regs_ptr;     /* Used by gdb to help find the CPU registers*/
+  i386_fpregs    *fpregs_ptr;   /* Pointer to FPU registers                  */
+  unsigned long  magic;         /* Magic for old A.OUT core files            */
+  char           comm[32];      /* User command that was responsible         */
+  unsigned long  debugreg[8];
+  unsigned long  error_code;    /* CPU error code or 0                       */
+  unsigned long  fault_address; /* CR3 or 0                                  */
+} i386_user;
+
+
+#ifdef __x86_64__
+  #define ELF_CLASS ELFCLASS64
+  #define ELF_ARCH  EM_X86_64
+  #define Ehdr      Elf64_Ehdr
+  #define Phdr      Elf64_Phdr
+  #define Shdr      Elf64_Shdr
+  #define Nhdr      Elf64_Nhdr
+#else
+  #define ELF_CLASS ELFCLASS32
+  #define ELF_ARCH  EM_386
+  #define Ehdr      Elf32_Ehdr
+  #define Phdr      Elf32_Phdr
+  #define Shdr      Elf32_Shdr
+  #define Nhdr      Elf32_Nhdr
+#endif
+
+
+/* After forking, we must make sure to only call system calls.               */
+#if __BOUNDED_POINTERS__
+  #error "Need to port invocations of syscalls for bounded ptrs"
+#else
+  /* The code in this file gets executed after threads have been suspended.
+   * As a consequence, we cannot call any functions that acquire locks.
+   * Unfortunately, libc wraps most system calls (e.g. in order to implement
+   * pthread_atfork, and to make calls cancellable), which means we cannot
+   * call these functions. Instead, we have to call syscall() directly.
+   */
+  #include <stdarg.h>
+  #include <syscall.h>
+  #ifdef __x86_64__
+    #define sys_recvmsg(s,m,f)      syscall(SYS_recvmsg,    (s), (m), (f))
+    #define sys_sendmsg(s,m,f)      syscall(SYS_sendmsg,    (s), (m), (f))
+    #define sys_shutdown(s,h)       syscall(SYS_shutdown,   (s), (h))
+    #define sys_sigaction(s,a,o)    syscall(SYS_rt_sigaction,    (s), (a),(o),\
+                                                                       _NSIG/8)
+    #define sys_sigprocmask(h,s,o)  syscall(SYS_rt_sigprocmask,  (h), (s),(o),\
+                                                                       _NSIG/8)
+    #define sys_socketpair(d,t,p,s) syscall(SYS_socketpair, (d), (t), (p),(s))
+    #define sys_waitpid(p,s,o)      syscall(SYS_wait4, (p), (s), (o),(void *)0)
+  #else
+    static int sys_socketcall(int op, ...) {
+      int rc;
+      va_list ap;
+      va_start(ap, op);
+      rc = syscall(SYS_socketcall, op, ap);
+      va_end(ap);
+      return rc;
+    }
+    #define sys_recvmsg(s,m,f)      sys_socketcall(17,      (s), (m), (f))
+    #define sys_sendmsg(s,m,f)      sys_socketcall(16,      (s), (m), (f))
+    #define sys_shutdown(s,h)       sys_socketcall(13,      (s), (h))
+    #define sys_sigaction(s,a,o)    syscall(SYS_sigaction,  (s), (a), (o))
+    #define sys_sigprocmask(h,s,o)  syscall(SYS_sigprocmask,(h), (s), (o))
+    #define sys_socketpair(d,t,p,s) sys_socketcall(8,       (d), (t), (p),(s))
+    #define sys_waitpid(p,s,o)      syscall(SYS_waitpid,    (p), (s), (o))
+  #endif
+  #define sys_close(f)              syscall(SYS_close,      (f))
+  #define sys_exit(r)               syscall(SYS_exit,       (r))
+  #define sys_fork()                syscall(SYS_fork)
+  #define sys_getegid()             syscall(SYS_getegid)
+  #define sys_geteuid()             syscall(SYS_geteuid)
+  #define sys_getpgrp()             syscall(SYS_getpgrp)
+  #define sys_getpid()              syscall(SYS_getpid)
+  #define sys_getppid()             syscall(SYS_getppid)
+  #define sys_getpriority(a,b)      syscall(SYS_getpriority)
+  #define sys_getrlimit(r,l)        syscall(SYS_getrlimit,  (r), (l))
+  #define sys_getsid(p)             syscall(SYS_getsid,     (p))
+  #define sys_open(f,p,m)           syscall(SYS_open,       (f), (p), (m))
+  #define sys_pipe(f)               syscall(SYS_pipe,       (f))
+  #define sys_prctl(o,a)            syscall(SYS_prctl,      (o), (a))
+  #define sys_ptrace(r,p,a,d)       syscall(SYS_ptrace,     (r), (p), (a),(d))
+  #define sys_read(f,b,c)           syscall(SYS_read,       (f), (b), (c))
+  #define sys_readlink(p,b,s)       syscall(SYS_readlink,   (p), (b), (s))
+  #define sys_write(f,b,c)          syscall(SYS_write,      (f), (b), (c))
+
+  static int sys_sysconf(int name) {
+    extern int __getpagesize(void);
+    switch (name) {
+      case _SC_OPEN_MAX: {
+        struct rlimit ru;
+        return sys_getrlimit(RLIMIT_NOFILE, &ru) < 0 ? 8192 : ru.rlim_cur;
+      }
+      case _SC_PAGESIZE:
+        return __getpagesize();
+      default:
+        errno = ENOSYS;
+        return -1;
+    }
+  }
+
+  static pid_t sys_gettid() {
+    #ifndef SYS_gettid
+      #define SYS_gettid 224
+    #endif
+    pid_t tid = syscall(SYS_gettid);
+    if (tid != -1) {
+      return tid;
+    }
+    return sys_getpid();
+  }
+#endif
+
+
+/* Re-runs fn until it doesn't cause EINTR
+ */
+
+#define NO_INTR(fn)   do {} while ((fn) < 0 && errno == EINTR)
+
+/* Wrapper for read() which is guaranteed to never return EINTR.
+ */
+static ssize_t c_read(int f, const void *buf, size_t bytes) {
+  if (bytes > 0) {
+    ssize_t rc;
+    NO_INTR(rc = sys_read(f, buf, bytes));
+    return rc;
+  }
+  return 0;
+}
+
+/* Wrapper for write() which is guaranteed to never return EINTR nor
+ * short writes.
+ */
+static ssize_t c_write(int f, const void *void_buf, size_t bytes) {
+  const unsigned char *buf = (const unsigned char*)void_buf;
+  size_t len = bytes;
+  while (len > 0) {
+    ssize_t rc;
+    NO_INTR(rc = sys_write(f, buf, len));
+    if (rc < 0)
+      return rc;
+    else if (rc == 0)
+      break;
+    buf += rc;
+    len -= rc;
+  }
+  return bytes;
+}
+
+
+struct io {
+  int fd;
+  unsigned char *data, *end;
+  unsigned char buf[4096];
+};
+
+
+/* Reads one character from the "io" file. This function has the same
+ * semantics as fgetc(), but we cannot call any library functions at this
+ * time.
+ */
+static int GetChar(struct io *io) {
+  unsigned char *ptr = io->data;
+  if (ptr == io->end) {
+    /* Even though we are parsing one character at a time, read in larger
+     * chunks.
+     */
+    ssize_t n = c_read(io->fd, io->buf, sizeof(io->buf));
+    if (n <= 0) {
+      if (n == 0)
+        errno = 0;
+      return -1;
+    }
+    ptr = &io->buf[0];
+    io->end = &io->buf[n];
+  }
+  io->data = ptr+1;
+  return *ptr;
+}
+
+
+/* Place the hex number read from "io" into "*hex".  The first non-hex
+ * character is returned (or -1 in the case of end-of-file).
+ */
+static int GetHex(struct io *io, size_t *hex) {
+  int ch;
+  *hex = 0;
+  while (((ch = GetChar(io)) >= '0' && ch <= '9') ||
+         (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f'))
+    *hex = (*hex << 4) | (ch < 'A' ? ch - '0' : (ch & 0xF) + 9);
+  return ch;
+}
+
+
+/* Computes the amount of leading zeros in a memory region.
+ */
+static size_t LeadingZeros(int *loopback, void *mem, size_t len,
+                           size_t pagesize) {
+  char   buf[pagesize];
+  size_t count;
+  char   *ptr = 0;
+  for (count = 0; count < len; ) {
+    /* Read a page by going through the pipe. Assume that we can write at
+     * least one page without blocking.
+     *
+     * "Normal" kernels do not require this hack. But some of the security
+     * patches (e.g. grsec) can be configured to disallow read access of
+     * executable pages. So, directly scanning the memory range would
+     * result in a segmentation fault.
+     *
+     * If we cannot access a page, we assume that it was all zeros.
+     */
+    if ((count % pagesize) == 0) {
+      if (c_write(loopback[1], (char *)mem + count, pagesize) < 0 ||
+          c_read(loopback[0],  buf,                 pagesize) < 0) {
+        count += pagesize;
+        continue;
+      } else
+        ptr = buf;
+    }
+    if (*ptr++)
+      break;
+    count++;
+  }
+  return count & ~(pagesize-1);
+}
+
+
+/* This function is invoked from a seperate process. It has access to a
+ * copy-on-write copy of the parents address space, and all crucial
+ * information about the parent has been computed by the caller.
+ */
+static void CreateElfCore(int fd, i386_prpsinfo *prpsinfo, i386_user *user,
+                          i386_prstatus *prstatus, int num_threads,
+                          pid_t *pids, i386_regs *regs, i386_fpregs *fpregs,
+                          i386_fpxregs *fpxregs, size_t pagesize) {
+  /* Count the number of mappings in "/proc/self/maps". We are guaranteed
+   * that this number is not going to change while this function executes.
+   */
+  int       num_mappings = 0;
+  struct io io;
+  int       loopback[2] = { -1, -1 };
+
+  if (sys_pipe(loopback) < 0)
+    goto done;
+
+  io.data = io.end = 0;
+  NO_INTR(io.fd = sys_open("/proc/self/maps", O_RDONLY, 0));
+  if (io.fd >= 0) {
+    int i, ch;
+    while ((ch = GetChar(&io)) >= 0) {
+      num_mappings += (ch == '\n');
+    }
+    if (errno != 0) {
+   read_error:
+      NO_INTR(sys_close(io.fd));
+      goto done;
+    }
+    NO_INTR(sys_close(io.fd));
+
+    /* Read all mappings. This requires re-opening "/proc/self/maps"         */
+    /* scope */ {
+      struct {
+        size_t start_address, end_address, offset;
+        int   flags;
+      } mappings[num_mappings];
+      io.data = io.end = 0;
+      NO_INTR(io.fd = sys_open("/proc/self/maps", O_RDONLY, 0));
+      if (io.fd >= 0) {
+        size_t note_align;
+        /* Parse entries of the form:
+         * "^[0-9A-F]*-[0-9A-F]* [r-][w-][x-][p-] [0-9A-F]*.*$"
+         */
+        for (i = 0; i < num_mappings;) {
+          static const char * const dev_zero = "/dev/zero";
+          const char *dev = dev_zero;
+          int    j, is_device;
+          size_t zeros;
+
+          memset(&mappings[i], 0, sizeof(mappings[i]));
+
+          /* Read start and end addresses                                    */
+          if (GetHex(&io, &mappings[i].start_address) != '-' ||
+              GetHex(&io, &mappings[i].end_address)   != ' ')
+            goto read_error;
+
+          /* Read flags                                                      */
+          while ((ch = GetChar(&io)) != ' ') {
+            if (ch < 0)
+              goto read_error;
+            mappings[i].flags = (mappings[i].flags << 1) | (ch != '-');
+          }
+          /* Drop the private/shared bit. This makes the flags compatible with
+           * the ELF access bits
+           */
+          mappings[i].flags >>= 1;
+
+          /* Read offset                                                     */
+          if ((ch = GetHex(&io, &mappings[i].offset)) != ' ')
+            goto read_error;
+
+          /* Skip over device numbers, and inode number                      */
+          for (j = 0; j < 2; j++) {
+            while (ch == ' ') {
+              ch = GetChar(&io);
+            }
+            while (ch != ' ' && ch != '\n') {
+              if (ch < 0)
+                goto read_error;
+              ch = GetChar(&io);
+            }
+            while (ch == ' ') {
+              ch = GetChar(&io);
+            }
+            if (ch < 0)
+              goto read_error;
+          }
+
+          /* Check whether this is a mapping for a device                    */
+          while (*dev && ch == *dev) {
+            ch = GetChar(&io);
+            dev++;
+          }
+          is_device = dev >= dev_zero + 5 &&
+                      ((ch != '\n' && ch != ' ') || *dev != '\000');
+
+          /* Skip until end of line                                          */
+          while (ch != '\n') {
+            if (ch < 0)
+              goto read_error;
+            ch = GetChar(&io);
+          }
+
+          /* Skip leading zeroed pages (as found in the stack segment)       */
+          if ((mappings[i].flags & PF_R) && !is_device) {
+            zeros = LeadingZeros(loopback, (void *)mappings[i].start_address,
+                         mappings[i].end_address - mappings[i].start_address,
+                         pagesize);
+            mappings[i].start_address += zeros;
+          }
+
+          /* Remove mapping, if it was not readable, or completely zero
+           * anyway. The former is usually the case of stack guard pages, and
+           * the latter occasionally happens for unused memory.
+           * Also, be careful not to touch mapped devices.
+           */
+          if ((mappings[i].flags & PF_R) == 0 ||
+              mappings[i].start_address == mappings[i].end_address ||
+              is_device) {
+            num_mappings--;
+          } else {
+            i++;
+          }
+        }
+        NO_INTR(sys_close(io.fd));
+
+        /* Write out the ELF header                                          */
+        /* scope */ {
+          Ehdr ehdr;
+          memset(&ehdr, 0, sizeof(ehdr));
+          ehdr.e_ident[0] = ELFMAG0;
+          ehdr.e_ident[1] = ELFMAG1;
+          ehdr.e_ident[2] = ELFMAG2;
+          ehdr.e_ident[3] = ELFMAG3;
+          ehdr.e_ident[4] = ELF_CLASS;
+          ehdr.e_ident[5] = ELFDATA2LSB;
+          ehdr.e_ident[6] = EV_CURRENT;
+          ehdr.e_type     = ET_CORE;
+          ehdr.e_machine  = ELF_ARCH;
+          ehdr.e_version  = EV_CURRENT;
+          ehdr.e_phoff    = sizeof(ehdr);
+          ehdr.e_ehsize   = sizeof(ehdr);
+          ehdr.e_phentsize= sizeof(Phdr);
+          ehdr.e_phnum    = num_mappings + 1;
+          ehdr.e_shentsize= sizeof(Shdr);
+          if (c_write(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr)) {
+            goto done;
+          }
+        }
+
+        /* Write program headers, starting with the PT_NOTE entry            */
+        /* scope */ {
+          Phdr   phdr;
+          size_t offset   = sizeof(Ehdr) + (num_mappings + 1)*sizeof(Phdr);
+          size_t filesz   = sizeof(Nhdr) + 4 + sizeof(i386_prpsinfo) +
+                            sizeof(Nhdr) + 4 + sizeof(i386_user) +
+                            num_threads*(
+                            + sizeof(Nhdr) + 4 + sizeof(i386_prstatus)
+                            + sizeof(Nhdr) + 4 + sizeof(i386_fpregs));
+          #ifndef __x86_64__
+          if (fpxregs) {
+            filesz       += num_threads*(
+                              sizeof(Nhdr) + 4 + sizeof(i386_fpxregs));
+          }
+          #endif
+          memset(&phdr, 0, sizeof(phdr));
+          phdr.p_type     = PT_NOTE;
+          phdr.p_offset   = offset;
+          phdr.p_filesz   = filesz;
+          if (c_write(fd, &phdr, sizeof(phdr)) != sizeof(phdr)) {
+            goto done;
+          }
+
+          /* Now follow with program headers for each of the memory segments */
+          phdr.p_type     = PT_LOAD;
+          phdr.p_align    = pagesize;
+          phdr.p_paddr    = 0;
+          note_align      = phdr.p_align - ((offset+filesz) % phdr.p_align);
+          if (note_align == phdr.p_align)
+            note_align    = 0;
+          offset         += note_align;
+          for (i = 0; i < num_mappings; i++) {
+            offset       += filesz;
+            filesz        = mappings[i].end_address -mappings[i].start_address;
+            phdr.p_offset = offset;
+            phdr.p_vaddr  = mappings[i].start_address;
+            phdr.p_memsz  = filesz;
+
+            /* Do not write contents for memory segments that are read-only  */
+            if ((mappings[i].flags & PF_W) == 0)
+              filesz      = 0;
+            phdr.p_filesz = filesz;
+            phdr.p_flags  = mappings[i].flags;
+            if (c_write(fd, &phdr, sizeof(phdr)) != sizeof(phdr)) {
+              goto done;
+            }
+          }
+        }
+
+        /* Write note section                                                */
+        /* scope */ {
+          Nhdr nhdr;
+          memset(&nhdr, 0, sizeof(nhdr));
+          nhdr.n_namesz   = 4;
+          nhdr.n_descsz   = sizeof(i386_prpsinfo);
+          nhdr.n_type     = NT_PRPSINFO;
+          if (c_write(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr) ||
+              c_write(fd, "CORE", 4) != 4 ||
+              c_write(fd, prpsinfo, sizeof(i386_prpsinfo)) !=
+              sizeof(i386_prpsinfo)) {
+            goto done;
+          }
+          nhdr.n_descsz   = sizeof(i386_user);
+          nhdr.n_type     = NT_PRXREG;
+          if (c_write(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr) ||
+              c_write(fd, "CORE", 4) != 4 ||
+              c_write(fd, user, sizeof(i386_user)) != sizeof(i386_user)) {
+            goto done;
+          }
+
+          for (i = num_threads; i-- > 0; ) {
+            /* Process status and integer registers                          */
+            nhdr.n_descsz = sizeof(i386_prstatus);
+            nhdr.n_type   = NT_PRSTATUS;
+            prstatus->pr_pid = pids[i];
+            prstatus->pr_reg = regs[i];
+            if (c_write(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr) ||
+                c_write(fd, "CORE", 4) != 4 ||
+                c_write(fd, prstatus, sizeof(i386_prstatus)) !=
+                sizeof(i386_prstatus)) {
+              goto done;
+            }
+
+            /* FPU registers                                                 */
+            nhdr.n_descsz = sizeof(i386_fpregs);
+            nhdr.n_type   = NT_FPREGSET;
+            if (c_write(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr) ||
+                c_write(fd, "CORE", 4) != 4 ||
+                c_write(fd, fpregs+1, sizeof(i386_fpregs)) !=
+                sizeof(i386_fpregs)) {
+              goto done;
+            }
+
+            /* SSE registers                                                 */
+            #ifndef __x86_64__
+            /* Linux on x86-64 stores all FPU registers in the SSE structure */
+            if (fpxregs) {
+              nhdr.n_descsz = sizeof(i386_fpxregs);
+              nhdr.n_type   = NT_PRFPXREG;
+              if (c_write(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr) ||
+                  c_write(fd, "CORE", 4) != 4 ||
+                  c_write(fd, fpxregs+1, sizeof(i386_fpxregs)) !=
+                  sizeof(i386_fpxregs)) {
+                goto done;
+              }
+            }
+            #endif
+          }
+        }
+
+        /* Align all following segments to multiples of page size            */
+        if (note_align) {
+          char scratch[note_align];
+          memset(scratch, 0, sizeof(scratch));
+          if (c_write(fd, scratch, sizeof(scratch)) != sizeof(scratch)) {
+            goto done;
+          }
+        }
+
+        /* Write all memory segments                                         */
+        for (i = 0; i < num_mappings; i++) {
+          if (mappings[i].flags & PF_W &&
+              c_write(fd, (void *)mappings[i].start_address,
+                      mappings[i].end_address - mappings[i].start_address) !=
+                      mappings[i].end_address - mappings[i].start_address) {
+            goto done;
+          }
+        }
+      }
+    }
+  }
+
+done:
+  if (loopback[0] >= 0)
+    NO_INTR(sys_close(loopback[0]));
+  if (loopback[1] >= 0)
+    NO_INTR(sys_close(loopback[1]));
+  NO_INTR(sys_close(fd));
+  return;
+}
+
+
+/* Internal function for generating a core file. This function works for
+ * both single- and multi-threaded core files. It assumes that all threads
+ * are already suspended, and will resume them before returning.
+ *
+ * The caller must make sure that prctl(PR_SET_DUMPABLE, 1) has been called,
+ * or this function might fail.
+ */
+int InternalGetCoreDump(void *frame, int num_threads, pid_t *thread_pids) {
+  long          i;
+  int           rc = -1, fd = -1, threads = num_threads, hasSSE = 0;
+  i386_prpsinfo prpsinfo;
+  i386_prstatus prstatus;
+  pid_t         pids[threads           + 1];
+  i386_regs     thread_regs[threads    + 1];
+  i386_fpregs   thread_fpregs[threads  + 1];
+  i386_fpxregs  thread_fpxregs[threads + 1];
+  int           pair[2];
+  int           main_pid = sys_gettid();
+
+  /* Get thread status                                                       */
+  if (threads)
+    memcpy(pids, thread_pids, threads * sizeof(pid_t));
+  memset(thread_regs,    0, (threads + 1) * sizeof(i386_regs));
+  memset(thread_fpregs,  0, (threads + 1) * sizeof(i386_fpregs));
+  memset(thread_fpxregs, 0, (threads + 1) * sizeof(i386_fpxregs));
+
+  /* Threads are already attached, read their registers now                  */
+  for (i = 0; i < threads; i++) {
+    char scratch[4096];
+    memset(scratch, 0xFF, sizeof(scratch));
+    if (sys_ptrace(PTRACE_GETREGS, pids[i], scratch, scratch) == 0) {
+      memcpy(thread_regs + i, scratch, sizeof(i386_regs));
+      memset(scratch, 0xFF, sizeof(scratch));
+      if (sys_ptrace(PTRACE_GETFPREGS, pids[i], scratch, scratch) == 0) {
+        memcpy(thread_fpregs + i, scratch, sizeof(i386_fpregs));
+        memset(scratch, 0xFF, sizeof(scratch));
+        #ifndef __x86_64__
+        /* Linux on x86-64 stores all FPU registers in the SSE structure     */
+        if (sys_ptrace(PTRACE_GETFPXREGS, pids[i], scratch, scratch) == 0) {
+          memcpy(thread_fpxregs + i, scratch, sizeof(i386_fpxregs));
+        } else {
+          hasSSE = 0;
+        }
+        #endif
+      } else
+        goto ptrace;
+    } else {
+   ptrace: /* Oh, well, undo everything and get out of here                  */
+      ResumeAllProcessThreads(threads, pids);
+      goto error;
+    }
+  }
+
+  /* Build the PRPSINFO data structure                                       */
+  memset(&prpsinfo, 0, sizeof(prpsinfo));
+  prpsinfo.pr_sname = 'R';
+  prpsinfo.pr_nice  = sys_getpriority(PRIO_PROCESS, 0);
+  prpsinfo.pr_uid   = sys_geteuid();
+  prpsinfo.pr_gid   = sys_getegid();
+  prpsinfo.pr_pid   = main_pid;
+  prpsinfo.pr_ppid  = sys_getppid();
+  prpsinfo.pr_pgrp  = sys_getpgrp();
+  prpsinfo.pr_sid   = sys_getsid(0);
+  /* scope */ {
+    char scratch[4096], *cmd = scratch, *ptr;
+    ssize_t size, len;
+    int cmd_fd;
+    memset(&scratch, 0, sizeof(scratch));
+    size = sys_readlink("/proc/self/exe", scratch, sizeof(scratch));
+    len = 0;
+    for (ptr = cmd; *ptr != '\000' && size-- > 0; ptr++) {
+      if (*ptr == '/') {
+        cmd = ptr+1;
+        len = 0;
+      } else
+        len++;
+    }
+    memcpy(prpsinfo.pr_fname, cmd,
+           len > sizeof(prpsinfo.pr_fname) ? sizeof(prpsinfo.pr_fname) : len);
+    NO_INTR(cmd_fd = sys_open("/proc/self/cmdline", O_RDONLY, 0));
+    if (cmd_fd >= 0) {
+      char *ptr;
+      ssize_t size = c_read(cmd_fd, &prpsinfo.pr_psargs,
+                            sizeof(prpsinfo.pr_psargs));
+      for (ptr = prpsinfo.pr_psargs; size-- > 0; ptr++)
+        if (*ptr == '\000')
+          *ptr = ' ';
+      NO_INTR(sys_close(cmd_fd));
+    }
+  }
+
+  /* Build the PRSTATUS data structure                                       */
+  /* scope */ {
+    int stat_fd;
+    memset(&prstatus, 0, sizeof(prstatus));
+    prstatus.pr_pid     = prpsinfo.pr_pid;
+    prstatus.pr_ppid    = prpsinfo.pr_ppid;
+    prstatus.pr_pgrp    = prpsinfo.pr_pgrp;
+    prstatus.pr_sid     = prpsinfo.pr_sid;
+    prstatus.pr_fpvalid = 1;
+    NO_INTR(stat_fd = sys_open("/proc/self/stat", O_RDONLY, 0));
+    if (stat_fd >= 0) {
+      char scratch[4096];
+      ssize_t size = c_read(stat_fd, scratch, sizeof(scratch) - 1);
+      if (size >= 0) {
+        unsigned long tms;
+        char *ptr = scratch;
+        scratch[size] = '\000';
+
+        /* User time                                                         */
+        for (i = 13; i && *ptr; ptr++) if (*ptr == ' ') i--;
+        tms = 0;
+        while (*ptr && *ptr != ' ') tms = 10*tms + *ptr++ - '0';
+        prstatus.pr_utime.tv_sec  = tms / 1000;
+        prstatus.pr_utime.tv_usec = (tms % 1000) * 1000;
+
+        /* System time                                                       */
+        if (*ptr) ptr++;
+        tms = 0;
+        while (*ptr && *ptr != ' ') tms = 10*tms + *ptr++ - '0';
+        prstatus.pr_stime.tv_sec  = tms / 1000;
+        prstatus.pr_stime.tv_usec = (tms % 1000) * 1000;
+
+        /* Cumulative user time                                              */
+        if (*ptr) ptr++;
+        tms = 0;
+        while (*ptr && *ptr != ' ') tms = 10*tms + *ptr++ - '0';
+        prstatus.pr_cutime.tv_sec  = tms / 1000;
+        prstatus.pr_cutime.tv_usec = (tms % 1000) * 1000;
+
+        /* Cumulative system time                                            */
+        if (*ptr) ptr++;
+        tms = 0;
+        while (*ptr && *ptr != ' ') tms = 10*tms + *ptr++ - '0';
+        prstatus.pr_cstime.tv_sec  = tms / 1000;
+        prstatus.pr_cstime.tv_usec = (tms % 1000) * 1000;
+
+        /* Pending signals                                                   */
+        for (i = 14; i && *ptr; ptr++) if (*ptr == ' ') i--;
+        while (*ptr && *ptr != ' ')
+          prstatus.pr_sigpend = 10*prstatus.pr_sigpend + *ptr++ - '0';
+
+        /* Held signals                                                      */
+        if (*ptr) ptr++;
+        while (*ptr && *ptr != ' ')
+          prstatus.pr_sigpend = 10*prstatus.pr_sigpend + *ptr++ - '0';
+      }
+      NO_INTR(sys_close(stat_fd));
+    }
+  }
+
+  /* Create a file descriptor that can be used for reading data from
+   * our child process. This is a little complicated because we need
+   * to make sure there is no race condition with other threads
+   * calling fork() at the same time (this is somewhat mitigated,
+   * because our threads are supposedly suspended at this time). We
+   * have to avoid other processes holding our file handles open. We
+   * can do this by creating the pipe in the child and passing the
+   * file handle back to the parent.
+   */
+  if (sys_socketpair(AF_UNIX, SOCK_STREAM, 0, pair) >= 0) {
+    int openmax  = sys_sysconf(_SC_OPEN_MAX);
+    int pagesize = sys_sysconf(_SC_PAGESIZE);
+
+    /* Block signals prior to forking. Technically, POSIX requires us to call
+     * pthread_sigmask(), if this is a threaded application. When using
+     * glibc, we are OK calling sigprocmask(), though. We will end up
+     * blocking additional signals that libpthread uses internally, but that
+     * is actually exactly what we want.
+     *
+     * Also, POSIX claims that this should not actually be necessarily, but
+     * reality says otherwise.
+     */
+    sigset_t old_signals, blocked_signals;
+    sigfillset(&blocked_signals);
+    sys_sigprocmask(SIG_BLOCK, &blocked_signals, &old_signals);
+
+    /* Create a new core dump in child process; call sys_fork() in order to
+     * avoid complications with pthread_atfork() handlers. In the child
+     * process, we should only ever call system calls.
+     */
+    if ((rc = sys_fork()) == 0) {
+      i386_user user;
+      int       fds[2];
+
+      /* All signals are blocked at this time, but we could still end up
+       * executing synchronous signals (such as SIGILL, SIGFPE, SIGSEGV,
+       * SIGBUS, or SIGTRAP). Reset them to SIG_DFL.
+       */
+      static const int signals[] = { SIGABRT, SIGILL, SIGFPE, SIGSEGV, SIGBUS};
+      for (i = 0; i < sizeof(signals)/sizeof(*signals); i++) {
+        struct sigaction act;
+        memset(&act, 0, sizeof(act));
+        act.sa_handler = SIG_DFL;
+        act.sa_flags   = SA_RESTART;
+        sys_sigaction(signals[i], &act, NULL);
+      }
+
+      /* Get parent's CPU registers, and user data structure                 */
+      if (sys_ptrace(PTRACE_ATTACH, main_pid, (void *)0, (void *)0) >= 0) {
+        char scratch[4096];
+        while (sys_waitpid(main_pid, (void *)0, __WALL) < 0) {
+          if (errno != EINTR)
+            sys_exit(1);
+        }
+        for (i = 0; i < sizeof(user); i += sizeof(int))
+          ((int *)&user)[i/sizeof(int)] = sys_ptrace(PTRACE_PEEKUSER,
+                                              main_pid, (void *)i, (void *) i);
+        memset(scratch, 0xFF, sizeof(scratch));
+        if (sys_ptrace(PTRACE_GETREGS, main_pid, scratch, scratch) == 0) {
+          memcpy(thread_regs + threads, scratch, sizeof(i386_regs));
+          memset(scratch, 0xFF, sizeof(scratch));
+          if (sys_ptrace(PTRACE_GETFPREGS, main_pid, scratch, scratch) == 0) {
+            memcpy(thread_fpregs + threads, scratch, sizeof(i386_fpregs));
+            memset(scratch, 0xFF, sizeof(scratch));
+            #ifndef __x86_64__
+            /* Linux on x86-64 stores all FPU regs in the SSE structure      */
+            if (sys_ptrace(PTRACE_GETFPXREGS,main_pid,scratch,scratch) == 0) {
+              memcpy(thread_fpxregs +threads,scratch,sizeof(i386_fpxregs));
+            } else {
+              hasSSE = 0;
+            }
+            #endif
+          } else
+            sys_exit(1);
+        } else
+          sys_exit(1);
+      } else
+        sys_exit(1);
+      sys_ptrace(PTRACE_DETACH, main_pid, (void *)0, (void *)0);
+
+      /* Fake a somewhat reasonable looking stack frame for the
+       * getCoreDump() function.
+       */
+      SET_FRAME(*(Frame *)frame, thread_regs[threads]);
+      memcpy(&user.regs, thread_regs + threads, sizeof(i386_regs));
+      pids[threads++] = main_pid;
+
+      /* Create a pipe for communicating with parent                         */
+      if (sys_pipe(fds) < 0)
+        sys_exit(1);
+
+      /* Pass file handle to parent                                          */
+      /* scope */ {
+        char cmsg_buf[CMSG_SPACE(sizeof(int))];
+        struct iovec  iov;
+        struct msghdr msg;
+        struct cmsghdr *cmsg;
+        memset(&iov, 0, sizeof(iov));
+        memset(&msg, 0, sizeof(msg));
+        iov.iov_base            = (void *)"";
+        iov.iov_len             = 1;
+        msg.msg_iov             = &iov;
+        msg.msg_iovlen          = 1;
+        msg.msg_control         = &cmsg_buf;
+        msg.msg_controllen      = sizeof(cmsg_buf);
+        cmsg                    = CMSG_FIRSTHDR(&msg);
+        cmsg->cmsg_level        = SOL_SOCKET;
+        cmsg->cmsg_type         = SCM_RIGHTS;
+        cmsg->cmsg_len          = CMSG_LEN(sizeof(int));
+        *(int *)CMSG_DATA(cmsg) = fds[0];
+        while (sys_sendmsg(pair[1], &msg, 0) < 0) {
+          if (errno != EINTR)
+            sys_exit(1);
+        }
+        while (sys_shutdown(pair[1], SHUT_RDWR) < 0) {
+          if (errno != EINTR)
+            sys_exit(1);
+        }
+      }
+
+      /* Close all file handles other than the write end of our pipe         */
+      for (i = 0; i < openmax; i++)
+        if (i != fds[1])
+          NO_INTR(sys_close(i));
+
+      /* Turn into a daemon process, so that "init" can reap us              */
+      if ((rc = sys_fork()) == 0) {
+        CreateElfCore(fds[1], &prpsinfo, &user, &prstatus, threads,
+                      pids, thread_regs, thread_fpregs,
+                      hasSSE ? thread_fpxregs : NULL, pagesize);
+        sys_exit(0);
+      } else {
+        sys_exit(rc < 0 ? 1 : 0);
+      }
+
+      /* Make the compiler happy. We never actually get here.                */
+      return 0;
+    }
+
+    /* In the parent                                                         */
+    sys_sigprocmask(SIG_SETMASK, &old_signals, (void *)0);
+    NO_INTR(sys_close(pair[1]));
+
+    /* Get pipe file handle from child                                       */
+    /* scope */ {
+      char buffer[1], cmsg_buf[CMSG_SPACE(sizeof(int))];
+      struct iovec  iov;
+      struct msghdr msg;
+      for (;;) {
+        int nbytes;
+        memset(&iov, 0, sizeof(iov));
+        memset(&msg, 0, sizeof(msg));
+        iov.iov_base       = buffer;
+        iov.iov_len        = 1;
+        msg.msg_iov        = &iov;
+        msg.msg_iovlen     = 1;
+        msg.msg_control    = &cmsg_buf;
+        msg.msg_controllen = sizeof(cmsg_buf);
+        if ((nbytes = sys_recvmsg(pair[0], &msg, 0)) > 0) {
+          struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+          if (cmsg != NULL && cmsg->cmsg_level == SOL_SOCKET &&
+              cmsg->cmsg_type == SCM_RIGHTS)
+            fd = *(int *)CMSG_DATA(cmsg);
+          break;
+        } else if (nbytes == 0 || errno != EINTR) {
+          break;
+        }
+      }
+    }
+    sys_shutdown(pair[0], SHUT_RDWR);
+    NO_INTR(sys_close(pair[0]));
+  }
+
+  ResumeAllProcessThreads(threads, pids);
+
+  /* Wait for child to detach itself                                         */
+  if (rc > 0) {
+    int status;
+    while (sys_waitpid(rc, &status, 0) < 0) {
+      if (errno != EINTR)
+        goto error;
+    }
+    rc = WEXITSTATUS(status) ? -1 : 0;
+  }
+
+  /* Check if child process ran successfully                                 */
+  if (rc >= 0) {
+    return fd;
+  }
+
+error:
+  if (fd > 0)
+    NO_INTR(sys_close(fd));
+  return -1;
+}
+#endif
diff --git a/src/base/elfcore.h b/src/base/elfcore.h
new file mode 100644
index 0000000..90dea58
--- /dev/null
+++ b/src/base/elfcore.h
@@ -0,0 +1,212 @@
+/* Copyright (c) 2005, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Markus Gutschke
+ */
+
+#ifndef _ELFCORE_H
+#define _ELFCORE_H
+
+/* We currently only support x86-32 and x86-64 on Linux. Porting to
+ * other related platforms should not be difficult.
+ */
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__linux)
+
+#include <stdint.h>
+#include <sys/types.h>
+#include "config.h"
+
+/* Define the DUMPER symbol to make sure that there is exactly one
+ * core dumper built into the library.
+ */
+#define DUMPER "ELF"
+
+/* By the time that we get a chance to read CPU registers in the
+ * calling thread, they are already in a not particularly useful
+ * state. Besides, there will be multiple frames on the stack that are
+ * just making the core file confusing. To fix this problem, we take a
+ * snapshot of the frame pointer, stack pointer, and instruction
+ * pointer at an earlier time, and then insert these values into the
+ * core file.
+ */
+
+typedef struct i386_regs {      /* Normal (non-FPU) CPU registers            */
+#ifdef __x86_64__
+  #define BP rbp
+  #define SP rsp
+  #define IP rip
+  uint64_t  r15,r14,r13,r12,rbp,rbx,r11,r10;
+  uint64_t  r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax;
+  uint64_t  rip,cs,eflags;
+  uint64_t  rsp,ss;
+  uint64_t  fs_base, gs_base;
+  uint64_t  ds,es,fs,gs;
+#else
+  #define BP ebp
+  #define SP esp
+  #define IP eip
+  uint32_t  ebx, ecx, edx, esi, edi, ebp, eax;
+  uint16_t  ds, __ds, es, __es;
+  uint16_t  fs, __fs, gs, __gs;
+  uint32_t  orig_eax, eip;
+  uint16_t  cs, __cs;
+  uint32_t  eflags, esp;
+  uint16_t  ss, __ss;
+#endif
+} i386_regs;
+
+#if defined(__i386__) && defined(__GNUC__)
+  /* On x86 we provide an optimized version of the FRAME() macro, if the
+   * compiler supports a GCC-style asm() directive. This results in somewhat
+   * more accurate values for CPU registers.
+   */
+  typedef struct Frame {
+    struct i386_regs regs;
+    int              errno_;
+  } Frame;
+  #define FRAME(f) Frame f;                                           \
+                   do {                                               \
+                     f.errno_ = errno;                                \
+                     __asm__ volatile (                               \
+                       "push %%eax\n"                                 \
+                       "mov  %%ebp,%%eax\n"                           \
+                       "push %%ebp\n"                                 \
+                       "lea  %0,%%ebp\n"                              \
+                       "mov  %%ebx,0(%%ebp)\n"                        \
+                       "mov  %%ecx,4(%%ebp)\n"                        \
+                       "mov  %%edx,8(%%ebp)\n"                        \
+                       "mov  %%esi,12(%%ebp)\n"                       \
+                       "mov  %%edi,16(%%ebp)\n"                       \
+                       "mov  %%eax,20(%%ebp)\n"                       \
+                       "mov  4(%%esp),%%eax\n"                        \
+                       "mov  %%eax,24(%%ebp)\n"                       \
+                       "mov  %%ds,%%eax\n"                            \
+                       "mov  %%eax,28(%%ebp)\n"                       \
+                       "mov  %%es,%%eax\n"                            \
+                       "mov  %%eax,32(%%ebp)\n"                       \
+                       "mov  %%fs,%%eax\n"                            \
+                       "mov  %%eax,36(%%ebp)\n"                       \
+                       "mov  %%gs,%%eax\n"                            \
+                       "mov  %%eax, 40(%%ebp)\n"                      \
+                       "lea  0f,%%eax\n"                              \
+                       "mov  %%eax,48(%%ebp)\n"                       \
+                       "mov  %%cs,%%eax\n"                            \
+                       "mov  %%eax,52(%%ebp)\n"                       \
+                       "pushf\n"                                      \
+                       "pop  %%eax\n"                                 \
+                       "mov  %%eax,56(%%ebp)\n"                       \
+                       "mov  %%esp,%%eax\n"                           \
+                       "add  $8,%%eax\n"                              \
+                       "mov  %%eax,60(%%ebp)\n"                       \
+                       "mov  %%ss,%%eax\n"                            \
+                       "mov  %%eax,64(%%ebp)\n"                       \
+                       "pop  %%ebp\n"                                 \
+                       "pop  %%eax\n"                                 \
+                     "0:"                                             \
+                       : : "m" (f) : "memory");                       \
+                     } while (0)
+  #define SET_FRAME(f,r)                                              \
+                     do {                                             \
+                       errno = (f).errno_;                            \
+                       (r)   = (f).regs;                              \
+                     } while (0)
+#else
+  /* If we do not have a hand-optimized assembly version of the FRAME()
+   * macro, we fall back to a generic version, which works across different
+   * platforms. This code has been tested on x86_32 and x86_64 with both
+   * gcc and icc.
+   */
+  #ifdef HAVE_BUILTIN_STACK_POINTER
+    #define BUILTIN_STACK_POINTER() __builtin_stack_pointer()
+  #else
+    #define BUILTIN_STACK_POINTER() __builtin_frame_address(0)
+  #endif
+  typedef struct Frame {
+    void *frame_address;
+    void *stack_pointer;
+    void *instruction_pointer;
+    int  errno_;
+  } Frame;
+  #define FRAME(f) Frame f = { __builtin_frame_address(0),            \
+                               BUILTIN_STACK_POINTER(),               \
+                               &&label };                             \
+                   /* Prevent the compiler from moving the label */   \
+                   do {                                               \
+                     f.errno_ = errno;                                \
+                     label: if (!f.instruction_pointer) goto label;   \
+                   } while (!f.stack_pointer)
+  #define SET_FRAME(f,r)                                              \
+                   do {                                               \
+                     errno  = (f).errno_;                             \
+                     (r).BP = (unsigned long)(f).frame_address;       \
+                     (r).SP = (unsigned long)(f).stack_pointer;       \
+                     (r).IP = (unsigned long)(f).instruction_pointer; \
+                   } while (0)
+#endif
+
+
+/* Internal function for generating a core file. This API can change without
+ * notice and is only supposed to be used internally by the core dumper.
+ *
+ * This function works for both single- and multi-threaded core
+ * dumps. If called as
+ *
+ *   FRAME(frame);
+ *   InternalGetCoreDump(&frame, 0, NULL);
+ *
+ * it creates a core file that only contains information about the
+ * calling thread.
+ *
+ * Optionally, the caller can provide information about other threads
+ * by passing their process ids in "thread_pids". The process id of
+ * the caller should not be included in this array. All of the threads
+ * must have been attached to with ptrace(), prior to calling this
+ * function. They will be detached when "InternalGetCoreDump()" returns.
+ *
+ * This function either returns a file handle that can be read for obtaining
+ * a core dump, or "-1" in case of an error. In the latter case, "errno"
+ * will be set appropriately.
+ *
+ * While "InternalGetCoreDump()" is not technically async signal safe, you
+ * might be tempted to invoke it from a signal handler. The code goes to
+ * great lengths to make a best effort that this will actually work. But in
+ * any case, you must make sure that you preserve the value of "errno"
+ * yourself. It is guaranteed to be clobbered otherwise.
+ *
+ * Also, "InternalGetCoreDump" is not strictly speaking re-entrant. Again,
+ * it makes a best effort to behave reasonably when called in a multi-
+ * threaded environment, but it is ultimately the caller's responsibility
+ * to provide locking.
+ */
+int InternalGetCoreDump(void *frame, int num_threads, pid_t *thread_pids);
+
+#endif
+
+#endif /* _ELFCORE_H */
diff --git a/src/base/linuxthreads.c b/src/base/linuxthreads.c
new file mode 100644
index 0000000..adc1e8e
--- /dev/null
+++ b/src/base/linuxthreads.c
@@ -0,0 +1,347 @@
+/* Copyright (c) 2005, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Markus Gutschke
+ */
+
+#include "base/linuxthreads.h"
+
+#ifdef THREADS
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "base/thread_lister.h"
+
+#ifndef O_DIRECTORY
+#define O_DIRECTORY 0200000
+#endif
+
+#if __BOUNDED_POINTERS__
+  #error "Need to port invocations of syscalls for bounded ptrs"
+#else
+  /* (Most of) the code in this file gets executed after threads have been
+   * suspended. As a consequence, we cannot call any functions that acquire
+   * locks. Unfortunately, libc wraps most system calls (e.g. in order to
+   * implement pthread_atfork, and to make calls cancellable), which means
+   * we cannot call these functions. Instead, we have to call syscall()
+   * directly.
+   */
+  #include <asm/stat.h>
+  #include <asm/posix_types.h>
+  #include <asm/types.h>
+  #include <linux/dirent.h>
+  #include <stdarg.h>
+  #include <syscall.h>
+  #ifdef __x86_64__
+    #define sys_socket(d,t,p)  syscall(SYS_socket,   (d), (t), (p))
+    #define sys_waitpid(p,s,o) syscall(SYS_wait4,    (p), (s), (o), (void *)0)
+  #else
+    static int sys_socketcall(int op, ...) {
+      int rc;
+      va_list ap;
+      va_start(ap, op);
+      rc = syscall(SYS_socketcall, op, ap);
+      va_end(ap);
+      return rc;
+    }
+    #define sys_socket(d,t,p)  sys_socketcall(1,     (d), (t), (p))
+    #define sys_waitpid(p,s,o) syscall(SYS_waitpid,  (p), (s), (o))
+  #endif
+
+  #define sys_close(f)         syscall(SYS_close,    (f))
+  #define sys_fcntl(f,c,a)     syscall(SYS_fcntl,    (f), (c), (a))
+  #define sys_fstat(f,b)       syscall(SYS_fstat,    (f), (b))
+  #define sys_getdents(f,d,c)  syscall(SYS_getdents, (f), (d), (c))
+  #define sys_getpid()         syscall(SYS_getpid)
+  #define sys_lseek(f,o,w)     syscall(SYS_lseek,    (f), (o), (w))
+  #define sys_open(f,p,m)      syscall(SYS_open,     (f), (p), (m))
+  #define sys_prctl(o,a)       syscall(SYS_prctl,    (o), (a))
+  #define sys_ptrace(r,p,a,d)  syscall(SYS_ptrace,   (r), (p), (a), (d))
+  #define sys_stat(f,b)        syscall(SYS_stat,     (f), (b))
+#endif
+
+
+/* itoa() is not a standard function, and we cannot safely call printf()
+ * after suspending threads. So, we just implement our own copy. A
+ * recursive approach is the easiest here.
+ */
+static char *local_itoa(char *buf, int i) {
+  if (i < 0) {
+    *buf++ = '-';
+    return local_itoa(buf, -i);
+  } else {
+    if (i >= 10)
+      buf = local_itoa(buf, i/10);
+    *buf++ = (i%10) + '0';
+    *buf   = '\000';
+    return buf;
+  }
+}
+
+
+/* Local substitute for the atoi() function, which is not necessarily safe
+ * to call once threads are suspended (depending on whether libc looks up
+ * locale information,  when executing atoi()).
+ */
+static int local_atoi(const char *s) {
+  int n   = 0;
+  int neg = *s == '-';
+  if (neg)
+    s++;
+  while (*s >= '0' && *s <= '9')
+    n = 10*n + (*s++ - '0');
+  return neg ? -n : n;
+}
+
+/* Re-runs fn until it doesn't cause EINTR
+ */
+#define NO_INTR(fn)   do {} while ((fn) < 0 && errno == EINTR)
+
+/* Wrapper for open() which is guaranteed to never return EINTR.
+ */
+static int c_open(const char *fname, int flags, int mode) {
+  ssize_t rc;
+  NO_INTR(rc = sys_open(fname, flags, mode));
+  return rc;
+}
+
+/* This function gets the list of all linux threads of the current process
+ * but this one and passes them to the 'callback' along with the 'parameter'
+ * pointer; at the call back call time all the threads are paused via
+ * PTRACE_ATTACH.
+ * 'callback' is supposed to do or arrange for ResumeAllProcessThreads.
+ * We return -1 on error and the return value of 'callback' on success.
+ */
+int GetAllProcessThreads(void *parameter,
+                         GetAllProcessThreadsCallBack callback) {
+  int              marker = -1, proc = -1, dumpable = 1;
+  int              num_threads = 0, max_threads = 0;
+  char             marker_name[48], *marker_path;
+  struct stat      proc_sb, marker_sb;
+  pid_t            my_pid = sys_getpid();
+
+  /* Create "marker" that we can use to detect threads sharing the same
+   * address space and the same file handles. By setting the FD_CLOEXEC flag
+   * we minimize the risk of misidentifying child processes as threads;
+   * and since there is still a race condition,  we will filter those out
+   * later, anyway.
+   */
+  if ((marker = sys_socket(PF_LOCAL, SOCK_DGRAM, 0)) < 0 ||
+      sys_fcntl(marker, F_SETFD, FD_CLOEXEC) < 0)
+    goto failure;
+
+  local_itoa(strrchr(strcpy(marker_name, "/proc/self/fd/"), '\000'), marker);
+  marker_path = marker_name + 10; /* Skip "/proc/self"                       */
+  if (sys_stat(marker_name, &marker_sb) < 0)
+    goto failure;
+
+  /* Make this process "dumpable". This is necessary in order to ptrace()
+   * after having called setuid().
+   */
+  dumpable = sys_prctl(PR_GET_DUMPABLE, 0);
+  if (!dumpable)
+    sys_prctl(PR_SET_DUMPABLE, 1);
+
+  /* Read process directories in /proc/...                                   */
+  for (;;) {
+    /* Some kernels know about threads, and hide them in "/proc" (although they
+     * are still there, if you know the process id). Threads are moved into
+     * a separate "task" directory. We check there first, and then fall back
+     * on the older naming convention if necessary.
+     */
+    if (((proc = c_open("/proc/self/task/", O_RDONLY|O_DIRECTORY, 0)) < 0 &&
+         (proc = c_open("/proc/", O_RDONLY|O_DIRECTORY, 0)) < 0) ||
+        sys_fstat(proc, &proc_sb) < 0)
+      goto failure;
+
+    /* Since we are suspending threads, we cannot call any libc functions that
+     * might acquire locks. Most notably, we cannot call malloc(). So, we have
+     * to allocate memory on the stack, instead. Since we do not know how
+     * much memory we need, we make a best guess. And if we guessed incorrectly
+     * we retry on a second iteration (by jumping to "detach_threads").
+     *
+     * Unless the number of threads is increasing very rapidly, we should
+     * never need to do so, though, as our guestimate is very conservative.
+     */
+    if (max_threads < proc_sb.st_nlink + 100)
+      max_threads = proc_sb.st_nlink + 100;
+
+    /* scope */ {
+      pid_t pids[max_threads];
+      int   result, added_entries = 0;
+      for (;;) {
+        struct dirent *entry;
+        char buf[proc_sb.st_blksize];
+        ssize_t nbytes = sys_getdents(proc, (struct dirent *)buf, sizeof(buf));
+        if (nbytes < 0)
+          goto failure;
+        else if (nbytes == 0) {
+          if (added_entries) {
+            /* Need to keep iterating over "/proc" in multiple passes until
+             * we no longer find any more threads. This algorithm eventually
+             * completes, when all threads have been suspended.
+             */
+            added_entries = 0;
+            sys_lseek(proc, 0, SEEK_SET);
+            continue;
+          }
+          break;
+        }
+        for (entry = (struct dirent *)buf;
+             entry < (struct dirent *)&buf[nbytes];
+             entry = (struct dirent *)((char *)entry + entry->d_reclen)) {
+          if (entry->d_ino != 0) {
+            const char *ptr = entry->d_name;
+            pid_t pid;
+
+            /* Some kernels hide threads by preceding the pid with a '.'     */
+            if (*ptr == '.')
+              ptr++;
+
+            /* If the directory is not numeric, it cannot be a process/thread*/
+            if (*ptr < '0' || *ptr > '9')
+              continue;
+            pid = local_atoi(ptr);
+
+            /* Attach (and suspend) all threads other than the current one   */
+            if (pid && pid != my_pid) {
+              struct stat tmp_sb;
+              char fname[entry->d_reclen + 48];
+              strcat(strcat(strcpy(fname, "/proc/"),
+                            entry->d_name), marker_path);
+
+              /* Check if the marker is identical to the one in our thread   */
+              if (sys_stat(fname, &tmp_sb) >= 0 &&
+                  marker_sb.st_dev == tmp_sb.st_dev &&
+                  marker_sb.st_ino == tmp_sb.st_ino) {
+                int i, j;
+
+                /* Found one of our threads, make sure it is no duplicate    */
+                for (i = 0; i < num_threads; i++) {
+                  /* Linear search is slow, but should not matter much for
+                   * the typically small number of threads.
+                   */
+                  if (pids[i] == pid) {
+                    /* Found a duplicate; most likely on second pass of scan */
+                    goto next_entry;
+                  }
+                }
+
+                /* Check whether data structure needs growing                */
+                if (num_threads >= max_threads) {
+                  /* Back to square one, this time with more memory allocated*/
+                  NO_INTR(sys_close(proc));
+                  goto detach_threads;
+                }
+
+                /* Attaching to thread suspends it                           */
+                if (sys_ptrace(PTRACE_ATTACH, pid, (void *)0, (void *)0) < 0) {
+                  /* If operation failed, ignore thread. Maybe it just died?
+                   * There might also be a race condition with a concurrent
+                   * core dumper or with a debugger. In that case, we will
+                   * just make a best effort, rather than failing entirely.
+                   */
+                  goto next_entry;
+                }
+                while (sys_waitpid(pid, (void *)0, __WALL) < 0) {
+                  if (errno != EINTR) {
+                    sys_ptrace(PTRACE_DETACH, pid, (void *)0, (void *)0);
+                    goto next_entry;
+                  }
+                }
+
+                if (sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i++ != j ||
+                    sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i   != j) {
+                  /* Address spaces are distinct, even though both processes
+                   * show the "marker". This is probably a forked child
+                   * process rather than a thread.
+                   */
+                  sys_ptrace(PTRACE_DETACH, pid, (void *)0, (void *)0);
+                } else {
+                  pids[num_threads++] = pid;
+                  added_entries++;
+                }
+              }
+            }
+          }
+       next_entry:;
+        }
+      }
+      NO_INTR(sys_close(marker));
+      NO_INTR(sys_close(proc));
+
+      /* Now we are ready to call the callback,
+       * which takes care of resuming the threads for us.
+       */
+      result = callback(parameter, num_threads, pids);
+
+      /* Restore the "dumpable" state of the process                         */
+      if (!dumpable)
+        sys_prctl(PR_SET_DUMPABLE, dumpable);
+      return result;
+
+   detach_threads:
+      /* Resume all threads prior to retrying the operation                  */
+      ResumeAllProcessThreads(num_threads, pids);
+      num_threads = 0;
+      max_threads += 100;
+    }
+  }
+
+failure:
+  if (!dumpable)
+    sys_prctl(PR_SET_DUMPABLE, dumpable);
+  if (proc >= 0)
+    NO_INTR(sys_close(proc));
+  if (marker >= 0)
+    NO_INTR(sys_close(marker));
+  return -1;
+}
+
+/* This function resumes the list of all linux threads that
+ * GetAllProcessThreads pauses before giving to its callback.
+ */
+void ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) {
+  while (num_threads-- > 0) {
+    sys_ptrace(PTRACE_DETACH, thread_pids[num_threads], (void *)0, (void *)0);
+  }
+}
+
+#endif
diff --git a/src/base/linuxthreads.h b/src/base/linuxthreads.h
new file mode 100644
index 0000000..636fd6c
--- /dev/null
+++ b/src/base/linuxthreads.h
@@ -0,0 +1,52 @@
+/* Copyright (c) 2005, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Markus Gutschke
+ */
+
+#ifndef _LINUXTHREADS_H
+#define _LINUXTHREADS_H
+
+/* Include thread_lister.h to get the interface that we implement for linux.
+ */
+
+/* We currently only support x86-32 and x86-64 on Linux. Porting to other
+ * related platforms should not be difficult.
+ */
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__linux)
+
+/* Define the THREADS symbol to make sure that there is exactly one core dumper
+ * built into the library.
+ */
+#define THREADS "Linux /proc"
+
+#endif
+
+#endif  /* _LINUXTHREADS_H */
diff --git a/src/base/thread_lister.c b/src/base/thread_lister.c
new file mode 100644
index 0000000..8de404d
--- /dev/null
+++ b/src/base/thread_lister.c
@@ -0,0 +1,54 @@
+/* Copyright (c) 2005, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Markus Gutschke
+ */
+
+#include "base/thread_lister.h"
+#include "base/linuxthreads.h"
+/* Include other thread listers here that define THREADS macro
+ * only when they can provide a good implementation.
+ */
+
+#ifndef THREADS
+
+/* Default trivial thread lister for single-threaded applications,
+ * or if the multi-threading code has not been ported, yet.
+ */
+
+int GetAllProcessThreads(void *parameter,
+                         GetAllProcessThreadsCallBack callback) {
+  return callback(parameter, 0, NULL);
+}
+
+void ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) {
+}
+
+#endif
diff --git a/src/base/thread_lister.h b/src/base/thread_lister.h
new file mode 100644
index 0000000..6bae064
--- /dev/null
+++ b/src/base/thread_lister.h
@@ -0,0 +1,66 @@
+/* Copyright (c) 2005, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Markus Gutschke
+ */
+
+#ifndef _THREAD_LISTER_H
+#define _THREAD_LISTER_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int (*GetAllProcessThreadsCallBack)(void *parameter,
+                                            int num_threads,
+                                            pid_t *thread_pids);
+
+/* This function gets the list of all linux threads of the current process
+ * but this one and passes them to the 'callback' along with the 'parameter'
+ * pointer; at the call back call time all the threads are paused via
+ * PTRACE_ATTACH.
+ * 'callback' is supposed to do or arrange for ResumeAllProcessThreads.
+ * We return -1 on error and the return value of 'callback' on success.
+ */
+int GetAllProcessThreads(void *parameter,
+                         GetAllProcessThreadsCallBack callback);
+
+/* This function resumes the list of all linux threads that
+ * GetAllProcessThreads pauses before giving to its callback.
+ */
+void ResumeAllProcessThreads(int num_threads, pid_t *thread_pids);
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif  /* _THREAD_LISTER_H */
diff --git a/src/google/heap-checker.h b/src/google/heap-checker.h
index 323da5d..ef6c343 100644
--- a/src/google/heap-checker.h
+++ b/src/google/heap-checker.h
@@ -32,22 +32,16 @@
 //
 // Heap memory leak checker (utilizes heap-profiler and pprof).
 //
-
-#ifndef BASE_HEAP_CHECKER_H__
-#define BASE_HEAP_CHECKER_H__
-
-#include <google/perftools/basictypes.h>
-#include <vector>
-
 // TODO(jandrews): rewrite this documentation
-// HeapLeakChecker, a memory leak checking class.
+// HeapLeakChecker, a heap memory leak checking class.
 //
 // Verifies that there are no memory leaks between its
 // construction and call to its *NoLeaks() or *SameHeap() member.
 //
 // It will dump two profiles at these two events
 // (named <prefix>.<name>-beg.heap and <prefix>.<name>-end.heap
-//  where <prefix> is given by --heap_profile= and <name> by our costructor)
+//  where <prefix> is determined automatically to some temporary location
+//  and <name> is given in the HeapLeakChecker's constructor)
 // and will return false in case the amount of in-use memory
 // is more at the time of *NoLeaks() call than
 // (or respectively differs at the time of *SameHeap() from)
@@ -56,116 +50,160 @@
 // profiles to locate leaks.
 //
 // GUIDELINE: In addition to the local heap leak checking between two arbitrary
-// points in program's execution, we provide a way for overall
-// whole-program heap leak checking, which is WHAT ONE SHOULD NORMALLY USE.
-//
-// In order to enable the recommended whole-program heap leak checking
-// in the BUILD rule for your binary, just depend on "//base:heapcheck"
-// Alternatively you can call your binary with e.g. "--heap_check=normal"
-// as one of the *early* command line arguments.
-//
-// CAVEAT: Doing the following alone will not work in many cases
-//   int main(int argc, char** argv) {
-//     FLAGS_heap_check = "normal";
-//     InitGoogle(argv[0], &argc, &argv, true);
-//     <do things>
-//   }
-// The reason is that the program must know that it's going to be
-// heap leak checking itself before construction of
-// its global variables happens and before main() is executed.
-// NOTE: Once "--heap_check=<smth>" is in the command line or //base:heapcheck
-// is linked in, you can change the value of FLAGS_heap_check in your program
-// any way you wish but before InitGoogle() exits
-// (which includes any REGISTER_MODULE_INITIALIZER).
-//
-// GUIDELINE CONT.: Depending on the value of the FLAGS_heap_check
-// -- as well as other flags of this module --
-// different modifications of leak checking between different points in
-// program's execution take place.
-// Currently supported values from less strict to more strict are:
-// "minimal", "normal", "strict", "draconian".
-// The "as-is" value leaves control to the other flags of this module.
-// The "local" value does not start whole-program heap leak checking
-// but activates all our Disable*() methods
-// for the benefit of local heap leak checking via HeapLeakChecker objects.
-//
-// For the case of FLAGS_heap_check == "normal"
-// everything from before execution of all global variable constructors
-// to normal program exit
-// (namely after main() returns and after all REGISTER_HEAPCHECK_CLEANUP's
-//  are executed, but before any global variable destructors are executed)
-// is checked for absense of heap memory leaks.
+// points in program's execution via an explicit HeapLeakChecker object,
+// we provide a way for overall whole-program heap leak checking,
+// which is WHAT ONE SHOULD NORMALLY USE.
+//
+// Currently supported heap-check types, from less strict to more
+// strict, are:
+//     "minimal", "normal", "strict", "draconian"
+//
+// There are also two more types: "as-is" and "local".
+//
+// GUIDELINE CONT.: Depending on the value of the HEAPCHECK variable
+// -- as well as other flags of this module -- different modifications
+// of leak checking between different points in program's execution
+// take place.  The "as-is" value leaves control to the other flags of
+// this module.  The "local" value does not start whole-program heap
+// leak checking but activates all the machinery needed for local heap
+// leak checking via explicitly created HeapLeakChecker objects.
+//
+// For the case of "normal" everything from before execution of all
+// global variable constructors to normal program exit (namely after
+// main() returns and after all REGISTER_HEAPCHECK_CLEANUP's are
+// executed, but before any global variable destructors are executed)
+// is checked for the absence of heap memory leaks.
 //
 // NOTE: For all but "draconian" whole-program leak check we also
 // ignore all heap objects reachable (a the time of the check)
 // from any global variable or any live thread stack variable
 // or from any object identified by a HeapLeakChecker::IgnoreObject() call.
-// The liveness check we do is not very portable and is not 100% exact
-// (it might ignore real leaks occasionally
-//  -- it might potentially not find some global data region to start from
-//     but we consider such cases to be our bugs to fix),
-// but it works in most cases and saves us from
-// writing a lot of explicit clean up code.
-//
-// THREADS and heap leak checking: At the beginning of HeapLeakChecker's
+//
+// CAVEAT: We do a liveness flood by traversing pointers to heap objects
+// starting from some initial memory regions we know to potentially
+// contain live pointer data.
+// -- It might potentially not find some (global)
+//    live data region to start the flood from,
+//    but we consider such cases to be our bugs to fix.
+// The liveness flood approach although not being very portable
+// and 100% exact works in most cases (see below)
+// and saves us from writing a lot of explicit clean up code
+// and other hassles when dealing with thread data.
+//
+// The liveness flood simply attempts to treat any properly aligned
+// byte sequences as pointers to heap objects and thinks that
+// it found a good pointer simply when the current heap memory map
+// contains an object with the address whose byte representation we found.
+// As a result of this simple approach, it's unlikely but very possible
+// for the flood to be inexact and occasionally result in leaked objects
+// being erroneously determined to be live.
+// Numerous reasons can lead to this, e.g.:
+// - Random bit patters can happen to look
+//   like pointers to leaked heap objects.
+// - Stale pointer data not corresponding to any live program variables
+//   can be still present in memory regions (e.g. thread stacks --see below)
+//   that we consider to be live.
+// - Stale pointer data that we did not clear can point
+//   to a now leaked heap object simply because the heap object
+//   address got reused by the memory allocator, e.g.
+//     char* p = new char[1];
+//     delete p;
+//     new char[1];  // this is leaked but p might be pointing to it
+//
+// The implications of these imprecisions of the liveness flood
+// are as follows:
+// - For any heap leak check we might miss some memory leaks.
+// - For a whole-program leak check, a leak report *does* always
+//   correspond to a real leak (unless of course the heap-checker has a bug).
+//   This is because in this case we start with an empty heap profile,
+//   so there's never an issue of it saying that some heap objects
+//   are live when they are not.
+// - For local leak checks, a leak report can be a partial false positive
+//   in the sense that the reported leaks might have actually occurred
+//   before this local leak check was started.
+//   Here's an example scenario: When we start a local check
+//   heap profile snapshot mistakenly says that some previously
+//   leaked objects are live.
+//   When we end the local check the heap profile snapshot now correctly
+//   determines that those objects are unreachable and reports them as leaks
+//   for this leak check, whereas they had been already leaked before it.
+//
+// THREADS and heap leak checking: At the time of HeapLeakChecker's
 // construction and during *NoLeaks()/*SameHeap() calls we grab a lock so that
 // heap activity in other threads is paused for the time
 // we are recording or analyzing the state of the heap.
-// To make non whole-program heap leak check meaningful there should be
-// no heap activity in other threads at the these times.
-//
-// For the whole-program heap leak check it is possible to have
-// other threads active and working with the heap when the program exits.
+// For any heap leak check it is possible to have
+// other threads active and working with the heap
+// when we make the HeapLeakChecker object or do its leak checking
+// provided all these threads are discoverable with the implemetation
+// of thread_lister.h (e.g. are linux pthreads).
+// In this case leak checking should deterministically work fine.
+//
+// CAVEAT: Thread stack data ignoring (via thread_lister.h)
+// does not work if the program is running under gdb, probably becauce the
+// ptrace functionality needed for thread_lister is already hooked to by gdb.
+//
+// As mentioned above thread stack liveness determination
+// might miss-classify objects that very recently became unreachable (leaked)
+// as reachable in the cases when the values of the pointers
+// to the now unreachable objects are still present in the active stack frames,
+// while the pointers actually no longer correspond to any live program
+// variables.
+// For this reason trivial code like the following
+// might not produce the expected leak checking outcome
+// depending on how the compiled code works with the stack:
+//
+//   int* foo = new int [20];
+//   HeapLeakChecker check("a_check");
+//   foo = NULL;
+//   CHECK(check.NoLeaks());  // this might succeed
 //
 // HINT: If you are debugging detected leaks, you can try different
-// (e.g. less strict) values for FLAGS_heap_check
-// to determine the cause of the reported leaks
-// (see the code of HeapLeakChecker::InternalInitStart for details).
+// (e.g. less strict) values for HEAPCHECK to determine the cause of
+// the reported leaks (see the code of
+// HeapLeakChecker::InternalInitStart for details).
 //
-// GUIDELINE: Below are the preferred ways of making your (test) binary
-// pass the above recommended overall heap leak check
-// in the order of decreasing preference:
+// GUIDELINE: Below are the preferred ways of making your (test)
+// binary pass the above recommended overall heap leak check in the
+// order of decreasing preference:
 //
 // 1. Fix the leaks if they are real leaks.
 //
 // 2. If you are sure that the reported leaks are not dangerous
 //    and there is no good way to fix them, then you can use
-//    HeapLeakChecker::DisableChecks(Up|In|At) calls (see below)
-//    in the relevant modules to disable certain stack traces
-//    for the purpose of leak checking.
-//    You can also use HeapLeakChecker::IgnoreObject() call
-//    to ignore certain leaked heap objects and everythign reachable from them.
-//
-// 3. If the leaks are due to some initialization in a third-party package,
-//    you might be able to force that initialization before the
-//    heap checking starts.
-//
-//    I.e. if FLAGS_heap_check == "minimal" or less strict, it is before
-//    calling InitGoogle or within some REGISTER_MODULE_INITIALIZER.
-//    If FLAGS_heap_check == "normal" or stricter, only
-//    HeapLeakChecker::LibCPreallocate() happens before heap checking starts.
-//
-// CAVEAT: Most Google (test) binaries are expected to pass heap leak check
-// at the FLAGS_heap_check == "normal" level.
-// In certain cases reverting to FLAGS_heap_check == "minimal" level is also
-// fine (provided there's no easy way to make it pass at the "normal" level).
-// Making a binary pass at "strict" or "draconian" level is not necessary
-// or even desirable in the numerous cases when it requires adding
-// a lot of (otherwise unused) heap cleanup code to various core libraries.
+//    HeapLeakChecker::DisableChecks(Up|In|At) calls (see below) in
+//    the relevant modules to disable certain stack traces for the
+//    purpose of leak checking.  You can also use
+//    HeapLeakChecker::IgnoreObject() call to ignore certain leaked
+//    heap objects and everythign reachable from them.
+//
+// 3. If the leaks are due to some initialization in a third-party
+//    package, you might be able to force that initialization before
+//    the heap checking starts.
+//
+//    I.e. if HEAPCHECK == "minimal" or less strict, if you put the
+//    initialization in a global constructor the heap-checker will
+//    ignore it.  If HEAPCHECK == "normal" or stricter, only
+//    HeapLeakChecker::LibCPreallocate() happens before heap checking
+//    starts.
+//
+// Making a binary pass at "strict" or "draconian" level is not
+// necessary or even desirable in the numerous cases when it requires
+// adding a lot of (otherwise unused) heap cleanup code to various
+// core libraries.
 //
 // NOTE: the following should apply only if
-//       FLAGS_heap_check == "strict" or stricter
+//       HEAPCHECK == "strict" or stricter
 //
-// 4. If the found leaks are due to incomplete cleanup
-//    in destructors of global variables,
-//    extend or add those destructors
-//    or use a REGISTER_HEAPCHECK_CLEANUP to do the deallocations instead
-//    to avoid cleanup overhead during normal execution.
-//    This type of leaks get reported when one goes
-//    from "normal" to "strict" checking.
+// 4. If the found leaks are due to incomplete cleanup in destructors
+//    of global variables, extend or add those destructors or use a
+//    REGISTER_HEAPCHECK_CLEANUP to do the deallocations instead to
+//    avoid cleanup overhead during normal execution.  This type of
+//    leaks get reported when one goes from "normal" to "strict"
+//    checking.
 //
 // NOTE: the following should apply only if
-//       FLAGS_heap_check == "draconian" or stricter
+//       HEAPCHECK == "draconian" or stricter
 //
 // 5. If the found leaks are for global static pointers whose values are
 //    allocated/grown (e.g on-demand) and never deallocated,
@@ -173,8 +211,7 @@
 //    or appropriate destructors into these modules
 //    to free those objects.
 //
-//
-// Example of local usage (anywhere in the program) -- but read caveat below:
+// Example of local usage (anywhere in the program):
 //
 //   HeapLeakChecker heap_checker("test_foo");
 //
@@ -183,30 +220,15 @@
 //
 //   CHECK(heap_checker.SameHeap());
 //
-// NOTE: One should set FLAGS_heap_check to a non-empty value e.g. "local"
+// NOTE: One should set HEAPCHECK to a non-empty value e.g. "local"
 // to help suppress some false leaks for these local checks.
-// CAVEAT: The problem with the above example of local checking
-// is that you can easily get false leak reports if the checked code
-// (indirectly) causes initialization or growth of some global structures
-// like caches or reused global temporaries.
-// In such cases you should either
-// switch to the above *preferred* whole-program checking,
-// or somehow *reliably* ensure that false leaks do not happen
-// in the portion of the code you are checking.
-//
-// IMPORTANT: One way people have been using in unit-tests
-// is to run some test functionality once
-// and then run it again under a HeapLeakChecker object.
-// While this helped in many cases, it is not guaranteed to always work
-// -- read it will someday break for some hard to debug reason.
-// These tricks are no longer needed and are now DEPRECATED
-// in favor of using the whole-program checking by just
-// adding a dependency on //base:heapcheck.
-//
-// CONCLUSION: Use the preferred checking via //base:heapcheck
-// in your tests even when it means fixing (or bugging someone to fix)
-// the leaks in the libraries the test depends on.
-//
+
+
+#ifndef BASE_HEAP_CHECKER_H__
+#define BASE_HEAP_CHECKER_H__
+
+#include <sys/types.h>    // for size_t
+#include <vector>
 
 // A macro to declare module heap check cleanup tasks
 // (they run only if we are doing heap leak checking.)
@@ -274,6 +296,17 @@ class HeapLeakChecker {
   bool QuickSameHeap() { return DoNoLeaks(true, false, true); }
   bool BriefSameHeap() { return DoNoLeaks(true, false, false); }
 
+  // Detailed information about the number of leaked bytes and objects
+  // (both of these can be negative as well).
+  // These are available only after a *SameHeap or *NoLeaks
+  // method has been called.
+  // Note that it's possible for both of these to be zero
+  // while SameHeap() or NoLeaks() returned false in case
+  // of a heap state change that is significant
+  // but preserves the byte and object counts.
+  ssize_t BytesLeaked() const;
+  ssize_t ObjectsLeaked() const;
+
   // Destructor (verifies that some *NoLeaks method has been called).
   ~HeapLeakChecker();
 
@@ -294,14 +327,14 @@ class HeapLeakChecker {
  private:  // data
 
   char* name_;  // our remembered name
-  size_t name_length_;  // length of the base part of name_
-  int64 start_inuse_bytes_;  // bytes in use at our construction
-  int64 start_inuse_allocs_;  // allocations in use at our construction
+  size_t start_inuse_bytes_;  // bytes in use at our construction
+  size_t start_inuse_allocs_;  // allocations in use at our construction
+  bool has_checked_;  // if we have done the leak check, so these are ready:
+  ssize_t inuse_bytes_increase_;  // bytes-in-use increase for this checker
+  ssize_t inuse_allocs_increase_;  // allocations-in-use increase for this checker
 
   static pid_t main_thread_pid_;  // For naming output files
-  static const char* invocation_name_; // For naming output files
-  static const char* invocation_path_; // For running 'pprof'
-  static std::string dump_directory_; // Location to write profile dumps
+  static std::string* dump_directory_; // Location to write profile dumps
 
  public:  // Static helpers to make us ignore certain leaks.
 
@@ -310,11 +343,14 @@ class HeapLeakChecker {
   // They do nothing when heap leak checking is turned off.
 
   // CAVEAT: Disabling via all the DisableChecks* functions happens only
-  // up to kMaxStackTrace (see heap-profiler.cc)
-  // stack frames down from the stack frame identified by the function.
+  // up to kMaxStackTrace stack frames (see heap-profiler.cc)
+  // down from the stack frame identified by the function.
   // Hence, this disabling will stop working for very deep call stacks
   // and you might see quite wierd leak profile dumps in such cases.
 
+  // CAVEAT: Disabling via DisableChecksIn works only with non-strip'ped
+  // binaries.  It's better not to use this function if at all possible.
+  //
   // Register 'pattern' as another variant of a regular expression to match
   // function_name, file_name:line_number, or function_address
   // of function call/return points for which allocations below them should be
@@ -322,8 +358,6 @@ class HeapLeakChecker {
   // (This becomes a part of pprof's '--ignore' argument.)
   // Usually this should be caled from a REGISTER_HEAPCHECK_CLEANUP
   // in the source file that is causing the leaks being ignored.
-  // CAVEAT: Disabling via DisableChecksIn works only with non-strip'ped
-  // binaries, but Google's automated unit tests currently run strip'ped.
   static void DisableChecksIn(const char* pattern);
 
   // A pair of functions to disable heap checking between them.
@@ -334,15 +368,22 @@ class HeapLeakChecker {
   //    HeapLeakChecker::DisableChecksToHereFrom(start_address);
   //    ...
   // will disable heap leak checking for everything that happens
-  // during any execution of <do things> (including any calls from it).
+  // during any execution of <do things> (including any calls from it),
+  // i.e. all objects allocated from there
+  // and everything reachable from them will not be considered a leak.
   // Each such pair of function calls must be from the same function,
   // because this disabling works by remembering the range of
-  // return addresses for the two calls.
+  // return program counter addresses for the two calls.
   static void* GetDisableChecksStart();
   static void DisableChecksToHereFrom(void* start_address);
 
-  // Register the function call point (address) 'stack_frames' above us for
-  // which allocations below it should be ignored during heap leak checking.
+  // ADVICE: Use GetDisableChecksStart, DisableChecksToHereFrom
+  //         instead of DisableChecksUp|At whenever possible
+  //         to make the code less fragile under different degrees of inlining.
+  // Register the function call point (return program counter address)
+  // 'stack_frames' above us for which allocations
+  // (everything reachable from them) below it should be
+  // ignored during heap leak checking.
   // 'stack_frames' must be >= 1 (in most cases one would use the value of 1).
   // For example
   //    void Foo() {  // Foo() should not get inlined
@@ -350,47 +391,53 @@ class HeapLeakChecker {
   //      <do things>
   //    }
   // will disable heap leak checking for everything that happens
-  // during any execution of <do things> (including any calls from it).
+  // during any execution of <do things> (including any calls from it),
+  // i.e. all objects allocated from there
+  // and everything reachable from them will not be considered a leak.
   // CAVEAT: If Foo() is inlined this will disable heap leak checking
   // under all processing of all functions Foo() is inlined into.
   // Hence, for potentially inlined functions, use the GetDisableChecksStart,
   // DisableChecksToHereFrom calls instead.
-  // (In the above example we store and use the return addresses
-  //  from Foo to do the disabling.)
+  // (In the above example we store and use the return program counter
+  //  addresses from Foo to do the disabling.)
   static void DisableChecksUp(int stack_frames);
 
   // Same as DisableChecksUp,
-  // but the function return address is given explicitly.
+  // but the function return program counter address is given explicitly.
   static void DisableChecksAt(void* address);
 
-  // Ignore an object at 'ptr'
+  // Tests for checking that DisableChecksUp and DisableChecksAt
+  // behaved as expected, for example
+  //    void Foo() {
+  //      HeapLeakChecker::DisableChecksUp(1);
+  //      <do things>
+  //    }
+  //    void Bar() {
+  //      Foo();
+  //      CHECK(!HeapLeakChecker::HaveDisabledChecksUp(1));
+  //        // This will fail if Foo() got inlined into Bar()
+  //        // (due to more aggressive optimization in the (new) compiler)
+  //        // which breaks the intended behavior of DisableChecksUp(1) in it.
+  //      <do things>
+  //    }
+  // These return false when heap leak checking is turned off.
+  static bool HaveDisabledChecksUp(int stack_frames);
+  static bool HaveDisabledChecksAt(void* address);
+
+  // Ignore an object located at 'ptr'
   // (as well as all heap objects (transitively) referenced from it)
   // for the purposes of heap leak checking.
   // If 'ptr' does not point to an active allocated object
   // at the time of this call, it is ignored;
   // but if it does, the object must not get deleted from the heap later on;
   // it must also be not already ignored at the time of this call.
-  // CAVEAT: Use one of the DisableChecks* calls instead of this if possible
-  // if you want somewhat easier future heap leak check portability.
   static void IgnoreObject(void* ptr);
 
-  // CAVEAT: DisableChecks* calls will not help you in such cases
-  // when you disable only e.g. "new vector<int>", but later grow
-  // this vector forcing it to allocate more memory.
-
-  // NOTE: All calls to *IgnoreObject affect only
-  // the overall whole-program heap leak check, not local checks with
-  // explicit HeapLeakChecker objects.
-  // They do nothing when heap leak checking is turned off.
-
   // Undo what an earlier IgnoreObject() call promised and asked to do.
   // At the time of this call 'ptr' must point to an active allocated object
-  // that was previously registered with IgnoreObject().
+  // which was previously registered with IgnoreObject().
   static void UnIgnoreObject(void* ptr);
 
-  // NOTE: One of the standard uses of IgnoreObject() and UnIgnoreObject()
-  //       is to ignore thread-specific objects allocated on heap.
-
  public:  // Initializations; to be called from main() only.
 
   // Full starting of recommended whole-program checking.  This runs after
@@ -402,14 +449,21 @@ class HeapLeakChecker {
   //  - "strict"
   //  - "draconian"
   //  - "local"
-  static void StartFromMain(const std::string& heap_check_type);
+  static void InternalInitStart(const std::string& heap_check_type);
+
+  struct RangeValue;
+  struct StackExtent;
 
  private:  // Various helpers
 
+  // Helper for dumping start/end heap leak checking profiles.
+  void DumpProfileLocked(bool start, const StackExtent& self_stack);
   // Helper for constructors
   void Create(const char *name);
   // Helper for *NoLeaks and *SameHeap
   bool DoNoLeaks(bool same_heap, bool do_full, bool do_report);
+  // Helper for IgnoreObject
+  static void IgnoreObjectLocked(void* ptr, bool profiler_locked);
   // Helper for DisableChecksAt
   static void DisableChecksAtLocked(void* address);
   // Helper for DisableChecksIn
@@ -419,33 +473,26 @@ class HeapLeakChecker {
                                   void* end_address,
                                   int max_depth);
   // Helper for DoNoLeaks to ignore all objects reachable from all live data
-  static void IgnoreAllLiveObjectsLocked();
+  static void IgnoreAllLiveObjectsLocked(const StackExtent& self_stack);
   // Helper for IgnoreAllLiveObjectsLocked to ignore all heap objects
   // reachable from currently considered live objects
   static void IgnoreLiveObjectsLocked(const char* name, const char* name2);
-  // Preallocates some libc data
-  static void LibCPreallocate();
   // Runs REGISTER_HEAPCHECK_CLEANUP cleanups and potentially
   // calls DoMainHeapCheck
-  static void RunHeapCleanups(void);
+  static void RunHeapCleanups();
   // Do the overall whole-program heap leak check
   static void DoMainHeapCheck();
 
   // Type of task for UseProcMaps
-  enum ProcMapsTask { IGNORE_GLOBAL_DATA_LOCKED, DISABLE_LIBRARY_ALLOCS };
+  enum ProcMapsTask { RECORD_GLOBAL_DATA_LOCKED, DISABLE_LIBRARY_ALLOCS };
   // Read /proc/self/maps, parse it, and do the 'proc_maps_task' for each line.
   static void UseProcMaps(ProcMapsTask proc_maps_task);
   // A ProcMapsTask to disable allocations from 'library'
   // that is mapped to [start_address..end_address)
   // (only if library is a certain system library).
   static void DisableLibraryAllocs(const char* library,
-                                   uint64 start_address,
-                                   uint64 end_address);
-  // A ProcMapsTask to ignore global data belonging to 'library'
-  // mapped at 'start_address' with 'file_offset'.
-  static void IgnoreGlobalDataLocked(const char* library,
-                                     uint64 start_address,
-                                     uint64 file_offset);
+                                   void* start_address,
+                                   void* end_address);
 
  private:
 
@@ -455,21 +502,13 @@ class HeapLeakChecker {
   // This gets to execute after destructors for all global objects
   friend void HeapLeakChecker_AfterDestructors();
 
- public: // TODO(maxim): make this private and remove 'Kind'
-         //              when all old clients are retired
-
-  // Kind of checker we want to create
-  enum Kind { MAIN, MAIN_DEBUG };
-
-  // Start whole-executable checking
-  // (this is public to support existing deprecated usage).
-  // This starts heap profiler with a good unique name for the dumped profiles.
-  // If kind == MAIN_DEBUG the checking and profiling
-  // happen only in the debug compilation mode.
-  explicit HeapLeakChecker(Kind kind);  // DEPRECATED
-
  private:
-  DISALLOW_EVIL_CONSTRUCTORS(HeapLeakChecker);
+  // Start whole-executable checking.
+  HeapLeakChecker();
+
+  // Don't allow copy constructors -- these are declared but not defined
+  HeapLeakChecker(const HeapLeakChecker&);
+  void operator=(const HeapLeakChecker&);
 };
 
 #endif  // BASE_HEAP_CHECKER_H__
diff --git a/src/google/heap-profiler.h b/src/google/heap-profiler.h
index 565428d..b62056e 100644
--- a/src/google/heap-profiler.h
+++ b/src/google/heap-profiler.h
@@ -32,38 +32,24 @@
 //
 // Module for heap-profiling.
 //
-// This module is safe to link into any program you may wish to profile at some
-// point.  It will not cause any noticeable slowdowns unless you activate it at
-// some point in your program.  So, for instance, you can do something like
-// this (using GNU getopt-long extensions):
+// This module is safe to link into any program you may wish to
+// profile at some point.  It will not cause any noticeable slowdowns
+// unless you activate by setting the environment variable
+// HEAPPROFILE, e.g.:
+// $ export HEAPPROFILE=/tmp/my_program_profile ; ./my_program
+// $ ls /tmp/my_program_profile.*
+//    /tmp/my_program_profile.0000.heap
+//    /tmp/my_program_profile.0001.heap
+//    /tmp/my_program_profile.0002.heap
+//    ...
 //
-// int main (int argc, char **argv) {
-//   static struct option long_options[] = {
-//     {"heap-profile", 1, 0, 0},
-//   };
-//   int option_index = 0;
-//   int c = getopt_long (argc, argv, "", long_options, &option_index);
-//
-//   if (c == 0 && !strcmp(long_options[option_index].name, "heap-profile")) {
-//     HeapProfilerStart(optarg);
-//   }
-//
-//   /* ... */
-// }
-//
-// This allows you to easily profile your program at any time without having to
-// recompile, and doesn't slow things down if you are not profiling.
-//
-// Heap profiles will be written to a sequence of files whose name
-// starts with the supplied prefix.
-//
-// Example:
-//   % bin/programname --heap_profile=foo ...
-//   % ls foo.*
-//      foo.0000.heap
-//      foo.0001.heap
-//      foo.0002.heap
-//      ...
+// This allows you to easily profile your program at any time without
+// having to recompile, and doesn't slow things down if HEAPPROFILE is
+// unset.  We refuse to do profiling if uid != euid, to avoid
+// environment-based security issues if your program is accidentally
+// setuid.  Note that this library should generally not be linked into
+// setuid programs.  It has not been reviewed or tested for security
+// under setuid conditions.
 //
 // If heap-profiling is turned on, a profile file is dumped every GB
 // of allocated data.  You can override this behavior by calling
@@ -77,24 +63,25 @@
 // high-water-mark.  This number can be changed by calling
 // HeapProfilerSetInuseInterval() with a different byte-value.
 //
-// STL WARNING: The HeapProfiler does not accurately track allocations in
-// many STL implementations.  This is because it is common for the default STL
-// allocator to keep an internal pool of memory and nevery return it to the
-// system.  This means that large allocations may be attributed to an object
-// that you know was destroyed.  For a simple example, see
-// TestHeapLeakCheckerSTL in src/tests/heap-checker_unittest.cc.
+// STL WARNING: The HeapProfiler does not accurately track allocations
+// in many STL implementations.  This is because it is common for the
+// default STL allocator to keep an internal pool of memory and nevery
+// return it to the system.  This means that large allocations may be
+// attributed to an object that you know was destroyed.  For a simple
+// example, see TestHeapLeakCheckerSTL in
+// src/tests/heap-checker_unittest.cc.
 //
-// This issue is resolved for GCC 3.3 and 3.4 by setting the environment
-// variable GLIBCXX_FORCE_NEW, which forces the STL allocator to call `new' and
-// `delete' explicitly for every allocation and deallocation.  For GCC 3.2 and
-// previous you will need to compile your source with -D__USE_MALLOC.  For
-// other compilers / STL libraries, there may be a similar solution;  See your
-// implementation's documentation for information.
+// This issue is resolved for GCC 3.3 and 3.4 by setting the
+// environment variable GLIBCXX_FORCE_NEW, which forces the STL
+// allocator to call `new' and `delete' explicitly for every
+// allocation and deallocation.  For GCC 3.2 and previous you will
+// need to compile your source with -D__USE_MALLOC.  For other
+// compilers / STL libraries, there may be a similar solution; See
+// your implementation's documentation for information.
 
 #ifndef _HEAP_PROFILER_H
 #define _HEAP_PROFILER_H
 
-#include <google/perftools/basictypes.h> // For int64 definition
 #include <stddef.h>
 
 // Start profiling and arrange to write profile data to file names
@@ -117,22 +104,17 @@ extern char* GetHeapProfile();
 
 // ---- Configuration accessors ----
 
-// Prefix to which we dump heap profiles.  If empty, we do not dump.  This
-// must be set to your desired value before HeapProfiler::Init() is called.
-// Default: empty
-extern void HeapProfilerSetDumpPath(const char* path);
-
 // Level of logging used by the heap profiler and heap checker (if applicable)
 // Default: 0
 extern void HeapProfilerSetLogLevel(int level);
 
 // Dump heap profiling information once every specified number of bytes
 // allocated by the program.  Default: 1GB
-extern void HeapProfilerSetAllocationInterval(int64 interval);
+extern void HeapProfilerSetAllocationInterval(size_t interval);
 
 // Dump heap profiling information whenever the high-water 
 // memory usage mark increases by the specified number of
 // bytes.  Default: 100MB
-extern void HeapProfilerSetInuseInterval(int64 interval);
+extern void HeapProfilerSetInuseInterval(size_t interval);
 
 #endif /* _HEAP_PROFILER_H */
diff --git a/src/google/malloc_interface.h b/src/google/malloc_extension.h
index a6cfe17..3de0955 100644
--- a/src/google/malloc_interface.h
+++ b/src/google/malloc_extension.h
@@ -35,19 +35,26 @@
 // application can link against a malloc that does not implement these
 // interfaces, and it will get default versions that do nothing.
 
-#ifndef _GOOGLE_MALLOC_INTERFACE_H__
-#define _GOOGLE_MALLOC_INTERFACE_H__
+#ifndef _GOOGLE_MALLOC_EXTENSION_H__
+#define _GOOGLE_MALLOC_EXTENSION_H__
 
-#include <google/perftools/config.h>
 #include <stddef.h>
 #include <string>
 
 static const int kMallocHistogramSize = 64;
 
 // The default implementations of the following routines do nothing.
-class MallocInterface {
+class MallocExtension {
  public:
-  virtual ~MallocInterface();
+  virtual ~MallocExtension();
+
+  // Call this very early in the program execution -- say, in a global
+  // constructor -- to set up parameters and state needed by all
+  // instrumented malloc implemenatations.  One example: this routine
+  // sets environemnt variables to tell STL to use libc's malloc()
+  // instead of doing its own memory management.  This is safe to call
+  // multiple times, as long as each time is before threads start up.
+  static void Initialize();
 
   // See "verify_memory.h" to see what these routines do
   virtual bool VerifyAllMemory();
@@ -70,7 +77,7 @@ class MallocInterface {
   //
   // The generated data is *appended* to "*result".  I.e., the old
   // contents of "*result" are preserved.
-  virtual void GetHeapSample(STL_NAMESPACE::string* result);
+  virtual void GetHeapSample(std::string* result);
 
   // -------------------------------------------------------------------
   // Control operations for getting and setting malloc implementation
@@ -122,11 +129,11 @@ class MallocInterface {
   virtual bool SetNumericProperty(const char* property, size_t value);
 
   // The current malloc implementation.  Always non-NULL.
-  static MallocInterface* instance();
+  static MallocExtension* instance();
 
   // Change the malloc implementation.  Typically called by the
   // malloc implementation during initialization.
-  static void Register(MallocInterface* implementation);
+  static void Register(MallocExtension* implementation);
 
  protected:
   // Get a list of stack traces of sampled allocation points.
@@ -145,9 +152,9 @@ class MallocInterface {
   //
   // May return NULL to indicate no results.
   //
-  // This is an internal interface.  Callers should use the more
+  // This is an internal extension.  Callers should use the more
   // convenient "GetHeapSample(string*)" method defined above.
   virtual void** ReadStackTraces();
 };
 
-#endif  // _GOOGLE_MALLOC_INTERFACE_H__
+#endif  // _GOOGLE_MALLOC_EXTENSION_H__
diff --git a/src/google/malloc_hook.h b/src/google/malloc_hook.h
index 4da4593..799658e 100644
--- a/src/google/malloc_hook.h
+++ b/src/google/malloc_hook.h
@@ -39,13 +39,7 @@
 #ifndef _GOOGLE_MALLOC_HOOK_H
 #define _GOOGLE_MALLOC_HOOK_H
 
-#include <google/perftools/config.h>
 #include <stddef.h>
-#if defined HAVE_STDINT_H
-#include <stdint.h>
-#elif defined HAVE_INTTYPES_H
-#include <inttypes.h>
-#endif
 #include <sys/types.h>
 
 class MallocHook {
diff --git a/src/google/perftools/config.h.in b/src/google/perftools/config.h.in
deleted file mode 100644
index 1ba97f8..0000000
--- a/src/google/perftools/config.h.in
+++ /dev/null
@@ -1,136 +0,0 @@
-/* src/google/perftools/config.h.in.  Generated from configure.ac by autoheader.  */
-
-/* the namespace of hash_map */
-#undef HASH_NAMESPACE
-
-/* Define to 1 if you have the <conflict-signal.h> header file. */
-#undef HAVE_CONFLICT_SIGNAL_H
-
-/* Define to 1 if you have the <dlfcn.h> header file. */
-#undef HAVE_DLFCN_H
-
-/* Define to 1 if you have the <execinfo.h> header file. */
-#undef HAVE_EXECINFO_H
-
-/* define if the compiler has hash_map */
-#undef HAVE_EXT_HASH_MAP
-
-/* define if the compiler has hash_set */
-#undef HAVE_EXT_HASH_SET
-
-/* Define to 1 if you have the `getpagesize' function. */
-#undef HAVE_GETPAGESIZE
-
-/* define if the compiler has hash_map */
-#undef HAVE_HASH_MAP
-
-/* define if the compiler has hash_set */
-#undef HAVE_HASH_SET
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-#undef HAVE_INTTYPES_H
-
-/* Define to 1 if you have the <memory.h> header file. */
-#undef HAVE_MEMORY_H
-
-/* Define to 1 if you have a working `mmap' system call. */
-#undef HAVE_MMAP
-
-/* Define to 1 if you have the `munmap' function. */
-#undef HAVE_MUNMAP
-
-/* define if the compiler implements namespaces */
-#undef HAVE_NAMESPACES
-
-/* define if libc has program_invocation_name */
-#undef HAVE_PROGRAM_INVOCATION_NAME
-
-/* Define if you have POSIX threads libraries and header files. */
-#undef HAVE_PTHREAD
-
-/* Define to 1 if you have the `sbrk' function. */
-#undef HAVE_SBRK
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#undef HAVE_STDINT_H
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#undef HAVE_STDLIB_H
-
-/* Define to 1 if you have the <strings.h> header file. */
-#undef HAVE_STRINGS_H
-
-/* Define to 1 if you have the <string.h> header file. */
-#undef HAVE_STRING_H
-
-/* Define to 1 if `eip' is member of `struct sigcontext'. */
-#undef HAVE_STRUCT_SIGCONTEXT_EIP
-
-/* Define to 1 if `sc_eip' is member of `struct sigcontext'. */
-#undef HAVE_STRUCT_SIGCONTEXT_SC_EIP
-
-/* Define to 1 if `sc_ip' is member of `struct sigcontext'. */
-#undef HAVE_STRUCT_SIGCONTEXT_SC_IP
-
-/* Define to 1 if `si_faddr' is member of `struct siginfo'. */
-#undef HAVE_STRUCT_SIGINFO_SI_FADDR
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#undef HAVE_SYS_STAT_H
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#undef HAVE_SYS_TYPES_H
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#undef HAVE_UNISTD_H
-
-/* define if your compiler has __attribute__ */
-#undef HAVE___ATTRIBUTE__
-
-/* Define to 1 if `uc_mcontext' is member of `# for the cpu-profiler struct
-   ucontext'. */
-#undef HAVE___FOR_THE_CPU_PROFILER___________________STRUCT_UCONTEXT_UC_MCONTEXT
-
-/* Define to 1 if the system has the type `__int64'. */
-#undef HAVE___INT64
-
-/* prefix where we look for installed files */
-#undef INSTALL_PREFIX
-
-/* Name of package */
-#undef PACKAGE
-
-/* Define to the address where bug reports for this package should be sent. */
-#undef PACKAGE_BUGREPORT
-
-/* Define to the full name of this package. */
-#undef PACKAGE_NAME
-
-/* Define to the full name and version of this package. */
-#undef PACKAGE_STRING
-
-/* Define to the one symbol short name of this package. */
-#undef PACKAGE_TARNAME
-
-/* Define to the version of this package. */
-#undef PACKAGE_VERSION
-
-/* printf format code for printing a size_t */
-#undef PRIuS
-
-/* Define to necessary symbol if this constant uses a non-standard name on
-   your system. */
-#undef PTHREAD_CREATE_JOINABLE
-
-/* Define to 1 if you have the ANSI C header files. */
-#undef STDC_HEADERS
-
-/* the namespace where STL code like vector<> is defined */
-#undef STL_NAMESPACE
-
-/* Version number of package */
-#undef VERSION
-
-/* Define as `__inline' if that's what the C compiler calls it, or to nothing
-   if it is not supported. */
-#undef inline
diff --git a/src/google/stacktrace.h b/src/google/stacktrace.h
index 6132d55..a70ade2 100644
--- a/src/google/stacktrace.h
+++ b/src/google/stacktrace.h
@@ -35,7 +35,6 @@
 #ifndef _GOOGLE_STACKTRACE_H
 #define _GOOGLE_STACKTRACE_H
 
-extern int GetStackTrace(void** result, int max_depth, int skip_count);
 // Skip the most recent "skip_count" stack frames (also skips the
 // frame generated for the "GetStackTrace" routine itself), and then
 // record the pc values for upto the next "max_depth" frames in
@@ -59,5 +58,24 @@ extern int GetStackTrace(void** result, int max_depth, int skip_count);
 //
 // This routine currently produces non-empty stack traces only for
 // Linux/x86 machines.
+extern int GetStackTrace(void** result, int max_depth, int skip_count);
+
+// Compute the extent of the function call stack by traversing it up.
+// Input: "sp" is either NULL, or is a stack pointer
+// (e.g., a value of the ebp register for x86).
+// If "sp == NULL", the stack pointer for the current thread is implied.
+//
+// Stores the range of addresses covered by the specified stack
+// in *stack_top and *stack_bottom.  Returns true if successful,
+// false on failure (e.g., an inability to walk the stack).
+//
+// If it returns true, *stack_top and *stack_bottom respectively correspond
+// to the most-recetly pushed frame of the call stack
+// and the intial frame that started the call stack.
+// Their relative ordering as integers though
+// depends on the underlying machine's architecture.
+extern bool GetStackExtent(void* sp,
+                           void** stack_top,
+                           void** stack_bottom);
 
 #endif /* _GOOGLE_STACKTRACE_H */
diff --git a/src/heap-checker-bcad.cc b/src/heap-checker-bcad.cc
index 878bc8f..64cfbb1 100644
--- a/src/heap-checker-bcad.cc
+++ b/src/heap-checker-bcad.cc
@@ -32,14 +32,17 @@
 //
 // Author: Maxim Lifantsev
 //
-// A file to ensure that components of heap leak checker run
-// before all global object constructors
-// and after all global object destructors.
-//
-// This file must be the last google library any google binary links against
-// (we achieve this by making //base:base depend
-//  on //base:heap-checker-bcad, the library containing this .cc)
+// A file to ensure that components of heap leak checker run before
+// all global object constructors and after all global object
+// destructors.
 //
+// This file must be the last library any binary links against.
+// Otherwise, the heap checker may not be able to run early enough to
+// catalog all the global objects in your program.  If this happens,
+// and later in the program you allocate memory and have one of these
+// "uncataloged" global objects point to it, the heap checker will
+// consider that allocation to be a leak, even though it's not (since
+// the allocated object is reachable from global data and hence "live").
 
 #include <stdlib.h>      // for abort()
 
diff --git a/src/heap-checker.cc b/src/heap-checker.cc
index 60701d3..c0ea994 100644
--- a/src/heap-checker.cc
+++ b/src/heap-checker.cc
@@ -37,7 +37,7 @@
 //       because we might be running before/after the logging susbystem
 //       is set up correctly.
 
-#include "google/perftools/config.h"
+#include "config.h"
 
 #include <string>
 #include <vector>
@@ -49,33 +49,49 @@
 #include <unistd.h>
 #include <string.h>
 #include <sys/stat.h>
-#include <netinet/in.h>         // inet_ntoa
-#include <arpa/inet.h>          // inet_ntoa
-#include <execinfo.h>           // backtrace
 #include <sys/poll.h>
 #include <sys/types.h>
 #include <fcntl.h>
 #include <assert.h>
 
+#ifdef HAVE_LINUX_PTRACE_H
+#include <linux/ptrace.h>
+#endif
+#ifdef HAVE_SYSCALL_H
+#include <syscall.h>
+#endif
+
 #include <google/stacktrace.h>
 #include <google/heap-profiler.h>
 #include <google/heap-checker.h>
 #include "heap-profiler-inl.h"
+#include "addressmap-inl.h"
 
+#include "base/basictypes.h"
 #include "base/commandlineflags.h"
 #include "base/logging.h"
+#include "base/elfcore.h"              // for i386_regs
+#include "base/thread_lister.h"
 
 #ifdef HAVE_INTTYPES_H
 #define __STDC_FORMAT_MACROS
 #include <inttypes.h>
 // TODO: have both SCNd64 and PRId64.  We don't bother since they're the same
-#define LLX    "%"SCNx64               // how to read 64-bit hex
-#define LLD    "%"SCNd64               // how to read 64-bit deciman
+#define LLX    "%"SCNx64             // how to read 64-bit hex
+#define LLD    "%"SCNd64             // how to read 64-bit deciman
 #else
-#define LLX    "%llx"                  // hope for the best
+#define LLX    "%llx"                // hope for the best
 #define LLD    "%lld"
 #endif
 
+#ifndef	PATH_MAX
+#ifdef MAXPATHLEN
+#define	PATH_MAX	MAXPATHLEN
+#else
+#define	PATH_MAX	4096         // seems conservative for max filename len!
+#endif
+#endif
+
 using std::string;
 using std::map;
 using std::vector;
@@ -90,7 +106,7 @@ using HASH_NAMESPACE::hash_set;
 //----------------------------------------------------------------------
 
 DEFINE_bool(heap_check_report, true,
-            "If overall heap check reports the found leaks via pprof");
+            "If overall heap check should report the found leaks via pprof");
 
 // These are not so much flags as internal configuration parameters that
 // are set based on the argument to StartFromMain().
@@ -108,45 +124,57 @@ DEFINE_bool(heap_check_strict_check, true,
             // heap_check_strict_check == false
             // is useful only when heap_check_before_constructors == false
 
-DEFINE_bool(heap_check_ignore_told_live, true,
-            "If overall heap check is to ignore heap objects reachable "
-            "from what was given to HeapLeakChecker::IgnoreObject");
-
 DEFINE_bool(heap_check_ignore_global_live, true,
             "If overall heap check is to ignore heap objects reachable "
             "from the global data");
 
 DEFINE_bool(heap_check_ignore_thread_live, true,
             "If set to true, objects reachable from thread stacks "
-            "are not reported as leaks");
+            "and registers are not reported as leaks");
 
-DEFINE_string(heap_profile_pprof, INSTALL_PREFIX "/bin/pprof",
-              "Path to pprof to call for full leaks checking.");
+// Normally we'd make this a flag, but we can't do that in this case
+// because it may need to be accessed after global destructors have
+// started to run, which would delete flags.  Instead we make it a pointer,
+// which will never get destroyed.
+static string* flags_heap_profile_pprof = NULL;
 
 // External accessors for the above
 void HeapLeakChecker::set_heap_check_report(bool b) {
   FLAGS_heap_check_report = b;
 }
 void HeapLeakChecker::set_pprof_path(const char* s) {
-  FLAGS_heap_profile_pprof = s;
+  if (flags_heap_profile_pprof == NULL) {
+    flags_heap_profile_pprof = new string(s);
+  } else {
+    flags_heap_profile_pprof->assign(s);
+  }
 }
+
 void HeapLeakChecker::set_dump_directory(const char* s) {
-  dump_directory_ = s;
+  if (dump_directory_ == NULL)  dump_directory_ = new string;
+  dump_directory_->assign(s);
 }
 
 bool HeapLeakChecker::heap_check_report() {
   return FLAGS_heap_check_report;
 }
 const char* HeapLeakChecker::pprof_path() {
-  return FLAGS_heap_profile_pprof.c_str();
+  if (flags_heap_profile_pprof == NULL) {
+    return INSTALL_PREFIX "/bin/pprof";  // our default value
+  } else {
+    return flags_heap_profile_pprof->c_str();
+  }
 }
 const char* HeapLeakChecker::dump_directory() {
-  return dump_directory_.c_str();
+  if (dump_directory_ == NULL) {
+    return "/tmp";  // our default value
+  } else {
+    return dump_directory_->c_str();
+  }
 }
 
 //----------------------------------------------------------------------
 
-DECLARE_string(heap_profile);    // in heap-profiler.cc
 DECLARE_int32(heap_profile_log); // in heap-profiler.cc
 
 //----------------------------------------------------------------------
@@ -154,7 +182,7 @@ DECLARE_int32(heap_profile_log); // in heap-profiler.cc
 //----------------------------------------------------------------------
 
 // Global lock for the global data of this module
-static pthread_mutex_t hc_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_mutex_t heap_checker_lock = PTHREAD_MUTEX_INITIALIZER;
 
 // the disabled regexp accumulated
 // via HeapLeakChecker::DisableChecksIn
@@ -162,8 +190,12 @@ static string* disabled_regexp = NULL;
 
 //----------------------------------------------------------------------
 
+// Heap profile prefix for leak checking profiles,
+static string* profile_prefix = NULL;
+
 // whole-program heap leak checker
 static HeapLeakChecker* main_heap_checker = NULL;
+
 // if we are doing (or going to do) any kind of heap-checking
 // heap_checker_on == true implies HeapProfiler::is_on_ == true
 static bool heap_checker_on = false;
@@ -181,8 +213,7 @@ static bool constructor_heap_profiling = false;
 enum ObjectPlacement {
   MUST_BE_ON_HEAP,  // Must point to a live object of the matching size in the
                     // map of the heap in HeapProfiler when we get to it.
-  WAS_ON_HEAP,      // Is a live object on heap, but now deleted from
-                    // the map of the heap objects in HeapProfiler.
+  IGNORED_ON_HEAP,  // Is a live (ignored) object on heap.
   IN_GLOBAL_DATA,   // Is part of global data region of the executable.
   THREAD_STACK,     // Part of a thread stack
 };
@@ -214,28 +245,144 @@ static IgnoredObjectsMap* ignored_objects = NULL;
 typedef vector<AllocObject> LiveObjectsStack;
 static LiveObjectsStack* live_objects = NULL;
 
-// This variable is set to non-NULL by thread/thread.cc if it has
-// threads whose stacks have to be scanned.
-typedef void (*StackRangeIterator)(void*, void*);
-int (*heap_checker_thread_stack_extractor)(StackRangeIterator) = NULL;
-
-
-// This routine is called by thread code for every thread stack it knows about
-static void RegisterStackRange(void* base, void* top) {
-  char* p1 = min(reinterpret_cast<char*>(base), reinterpret_cast<char*>(top));
-  char* p2 = max(reinterpret_cast<char*>(base), reinterpret_cast<char*>(top));
-  HeapProfiler::MESSAGE(1, "HeapChecker: Thread stack %p..%p (%d bytes)\n",
-                        p1, p2, int(p2-p1));
+// A placeholder to fill-in the starting values for live_objects
+// for each library so we can keep the library-name association for logging.
+typedef map<string, LiveObjectsStack> LibraryLiveObjectsStacks;
+static LibraryLiveObjectsStacks* library_live_objects = NULL;
+
+// The disabled program counter addresses for profile dumping
+// that are registered with HeapLeakChecker::DisableChecksUp
+typedef hash_set<uintptr_t> DisabledAddressSet;
+static DisabledAddressSet* disabled_addresses = NULL;
+
+// Value stored in the map of disabled address ranges;
+// its key is the end of the address range.
+// We'll ignore allocations with a return address in a disabled range
+// if the address occurs at 'max_depth' or less in the stack trace.
+struct HeapLeakChecker::RangeValue {
+  uintptr_t start_address;  // the start of the range
+  int       max_depth;      // the maximal stack depth to disable at
+};
+typedef map<uintptr_t, HeapLeakChecker::RangeValue> DisabledRangeMap;
+// The disabled program counter address ranges for profile dumping
+// that are registered with HeapLeakChecker::DisableChecksFromTo.
+static DisabledRangeMap* disabled_ranges = NULL;
+
+// Stack range map: maps from the start address to the end address.
+// These are used to not disable all allocated memory areas
+// that are used for stacks so that we do treat stack pointers
+// from dead stack frmes as live.
+typedef map<uintptr_t, uintptr_t> StackRangeMap;
+static StackRangeMap* stack_ranges = NULL;
+
+// We put the registers from other threads here
+// to make pointers stored in them live.
+static vector<void*>* thread_registers = NULL;
+
+// This routine is called for every thread stack we know about.
+static void RegisterStackRange(void* top, void* bottom) {
+  char* p1 = min(reinterpret_cast<char*>(top),
+                 reinterpret_cast<char*>(bottom));
+  char* p2 = max(reinterpret_cast<char*>(top),
+                 reinterpret_cast<char*>(bottom));
+  if (HeapProfiler::kMaxLogging) {
+    HeapProfiler::MESSAGE(1, "HeapChecker: Thread stack %p..%p (%d bytes)\n",
+                          p1, p2, int(p2-p1));
+  }
   live_objects->push_back(AllocObject(p1, uintptr_t(p2-p1), THREAD_STACK));
+  stack_ranges->insert(make_pair(reinterpret_cast<uintptr_t>(p1),
+                                 reinterpret_cast<uintptr_t>(p2)));
+}
+
+// Iterator for HeapProfiler::allocation_ to make objects allocated from
+// disabled code regions live.
+static void MakeDisabledLiveCallback(void* ptr, HeapProfiler::AllocValue v) {
+  bool stack_disable = false;
+  bool range_disable = false;
+  for (int depth = 0; depth < v.bucket->depth_; depth++) {
+    uintptr_t addr = reinterpret_cast<uintptr_t>(v.bucket->stack_[depth]);
+    if (disabled_addresses  &&
+        disabled_addresses->find(addr) != disabled_addresses->end()) {
+      stack_disable = true;  // found; dropping
+      break;
+    }
+    if (disabled_ranges) {
+      DisabledRangeMap::const_iterator iter
+        = disabled_ranges->upper_bound(addr);
+      if (iter != disabled_ranges->end()) {
+        assert(iter->first > addr);
+        if (iter->second.start_address < addr  &&
+            iter->second.max_depth > depth) {
+          range_disable = true;  // in range; dropping
+          break;
+        }
+      }
+    }
+  }
+  if (stack_disable || range_disable) {
+    uintptr_t start_address = reinterpret_cast<uintptr_t>(ptr);
+    uintptr_t end_address = start_address + v.bytes;
+    StackRangeMap::const_iterator iter
+      = stack_ranges->lower_bound(start_address);
+    if (iter != stack_ranges->end()) {
+      assert(iter->first >= start_address);
+      if (iter->second <= end_address) {
+        // We do not disable (treat as live) whole allocated regions
+        // if they are used to hold thread call stacks
+        // (i.e. when we find a stack inside).
+        // The reason is that we'll treat as live the currently used
+        // stack portions anyway (see RegisterStackRange),
+        // and the rest of the region where the stack lives can well
+        // contain outdated stack variables which are not live anymore,
+        // hence should not be treated as such.
+        HeapProfiler::MESSAGE(2, "HeapChecker: "
+                                 "Not %s-disabling %"PRIuS" bytes at %p"
+                                 ": have stack inside: %p-%p\n",
+                                 (stack_disable ? "stack" : "range"),
+                                 v.bytes, ptr,
+                                 (void*)iter->first, (void*)iter->second);
+        return;
+      }
+    }
+    if (HeapProfiler::kMaxLogging) {
+      HeapProfiler::MESSAGE(2, "HeapChecker: "
+                               "%s-disabling %"PRIuS" bytes at %p\n",
+                               (stack_disable ? "stack" : "range"),
+                               v.bytes, ptr);
+    }
+    live_objects->push_back(AllocObject(ptr, v.bytes, MUST_BE_ON_HEAP));
+  }
 }
 
 static int GetStatusOutput(const char*  command, string* output) {
+  // We don't want the heapchecker to run in the child helper
+  // processes that we fork() as part of this process' heap check.
+
+  // setenv() can call realloc(), so we don't want to call it while
+  // the heap profiling is disabled. Instead just overwrite the final
+  // char of the env var name, so it has a different name and gets
+  // ignored in the child.  We assume the env looks like 'VAR=VALUE\0VAR=VALUE'
+  char *env_heapcheck = getenv("HEAPCHECK");
+  char *env_ldpreload = getenv("LD_PRELOAD");
+
+  if (env_heapcheck) {
+    assert(env_heapcheck[-1] == '=');
+    env_heapcheck[-2] = '?';
+  }
+  if (env_ldpreload) {
+    assert(env_ldpreload[-1] == '=');
+    env_ldpreload[-2] = '?';
+  }
+
   FILE* f = popen(command, "r");
   if (f == NULL) {
     fprintf(stderr, "popen returned NULL!!!\n");  // This shouldn't happen
     exit(1);
   }
 
+  if (env_heapcheck) env_heapcheck[-2] = 'K';
+  if (env_ldpreload) env_heapcheck[-2] = 'D';
+
   const int kMaxOutputLine = 10000;
   char line[kMaxOutputLine];
   while (fgets(line, sizeof(line), f) != NULL) {
@@ -246,9 +393,11 @@ static int GetStatusOutput(const char*  command, string* output) {
   return pclose(f);
 }
 
-void HeapLeakChecker::IgnoreGlobalDataLocked(const char* library,
-                                             uint64 start_address,
-                                             uint64 file_offset) {
+// A ProcMapsTask to record global data to ignore later
+// that belongs to 'library' mapped at 'start_address' with 'file_offset'.
+static void RecordGlobalDataLocked(const char* library,
+                                   uint64 start_address,
+                                   uint64 file_offset) {
   HeapProfiler::MESSAGE(2, "HeapChecker: Looking into %s\n", library);
   string command("/usr/bin/objdump -hw ");
   command.append(library);
@@ -279,8 +428,9 @@ void HeapLeakChecker::IgnoreGlobalDataLocked(const char* library,
                               sec_name,
                               reinterpret_cast<void*>(real_start),
                               sec_size);
-        live_objects->push_back(AllocObject(reinterpret_cast<void*>(real_start),
-                                            sec_size, IN_GLOBAL_DATA));
+        (*library_live_objects)[library].
+          push_back(AllocObject(reinterpret_cast<void*>(real_start),
+                                sec_size, IN_GLOBAL_DATA));
       }
     }
     // skip to the next line
@@ -288,7 +438,6 @@ void HeapLeakChecker::IgnoreGlobalDataLocked(const char* library,
     if (next == NULL) break;
     output_start = next + 1;
   }
-  IgnoreLiveObjectsLocked("in globals of\n  ", library);
 }
 
 // See if 'library' from /proc/self/maps has base name 'library_base'
@@ -300,20 +449,25 @@ static bool IsLibraryNamed(const char* library, const char* library_base) {
 }
 
 void HeapLeakChecker::DisableLibraryAllocs(const char* library,
-                                           uint64 start_address,
-                                           uint64 end_address) {
+                                           void* start_address,
+                                           void* end_address) {
   // TODO(maxim): maybe this should be extended to also use objdump
   //              and pick the text portion of the library more precisely.
   if (IsLibraryNamed(library, "/libpthread")  ||
         // pthread has a lot of small "system" leaks we don't care about
       IsLibraryNamed(library, "/libdl")  ||
+      IsLibraryNamed(library, "/ld")  ||
         // library loaders leak some "system" heap that we don't care about
-      IsLibraryNamed(library, "/ld")) {
+      IsLibraryNamed(library, "/libcrypto")
+      // Sometimes libcrypto of OpenSSH is compiled with -fomit-frame-pointer
+      // (any library can be, of course, but this one often is because speed
+      // is so important for making crypto usable).  We ignore all its
+      // allocations because we can't see the call stacks.
+     ) {
     HeapProfiler::MESSAGE(1, "HeapChecker: "
                           "Disabling direct allocations from %s :\n",
                           library);
-    DisableChecksFromTo(reinterpret_cast<void*>(start_address),
-                        reinterpret_cast<void*>(end_address),
+    DisableChecksFromTo(start_address, end_address,
                         1);  // only disable allocation calls directly
                              // from the library code
   }
@@ -321,6 +475,14 @@ void HeapLeakChecker::DisableLibraryAllocs(const char* library,
 
 void HeapLeakChecker::UseProcMaps(ProcMapsTask proc_maps_task) {
   FILE* const fp = fopen("/proc/self/maps", "r");
+  if (!fp) {
+    int errsv = errno;
+    HeapProfiler::MESSAGE(-1, "HeapChecker:  "
+                          "Could not open /proc/self/maps: errno=%d.  "
+                          "Libraries will not be handled correctly.\n",
+                          errsv);
+    return;
+  }
   char proc_map_line[1024];
   while (fgets(proc_map_line, sizeof(proc_map_line), fp) != NULL) {
     // All lines starting like
@@ -340,16 +502,17 @@ void HeapLeakChecker::UseProcMaps(ProcMapsTask proc_maps_task) {
         strncmp(permissions, "r-xp", 4) == 0  &&  inode != 0) {
       if (start_address >= end_address)  abort();
       DisableLibraryAllocs(proc_map_line + size,
-                           start_address, end_address);
+                           reinterpret_cast<void*>(start_address),
+                           reinterpret_cast<void*>(end_address));
     }
-    if (proc_maps_task == IGNORE_GLOBAL_DATA_LOCKED  && 
+    if (proc_maps_task == RECORD_GLOBAL_DATA_LOCKED  &&
         // grhat based on Red Hat Linux 9
-        (strncmp(permissions, "rw-p", 4) == 0 ||
+        (strncmp(permissions, "rw-p", 4) == 0  ||
          // Fedora Core 3
-         strncmp(permissions, "rwxp", 4) == 0) &&
+         strncmp(permissions, "rwxp", 4) == 0)  &&
         inode != 0) {
       if (start_address >= end_address)  abort();
-      IgnoreGlobalDataLocked(proc_map_line + size, start_address, file_offset);
+      RecordGlobalDataLocked(proc_map_line + size, start_address, file_offset);
     }
   }
   fclose(fp);
@@ -359,26 +522,123 @@ void HeapLeakChecker::UseProcMaps(ProcMapsTask proc_maps_task) {
 static int64 live_objects_total = 0;
 static int64 live_bytes_total = 0;
 
-// This pointer needs to be outside, rather than inside, the function
-// HeapLeakChecker::IgnoreAllLiveObjectsLocked() so that the compiler, in
-// this case gcc 3.4.1, does not complain that it is an unused variable.
-// Nevertheless, the value's not actually used elsewhere, just retained.
-static IgnoredObjectsMap* reach_ignored_objects = NULL;
+// Arguments from the last call to IgnoreLiveThreads,
+// so we can resume the threads later.
+static int last_num_threads = 0;
+static pid_t* last_thread_pids = NULL;
+
+// Callback for GetAllProcessThreads to ignore
+// thread stacks and registers for all our threads.
+static int IgnoreLiveThreads(void* parameter,
+                             int num_threads,
+                             pid_t* thread_pids) {
+  last_num_threads = num_threads;
+  assert(last_thread_pids == NULL);
+  last_thread_pids = new pid_t[num_threads];
+  memcpy(last_thread_pids, thread_pids, num_threads * sizeof(pid_t));
+
+  int failures = 0;
+  for (int i = 0; i < num_threads; ++i) {
+    if (HeapProfiler::kMaxLogging) {
+      HeapProfiler::MESSAGE(2, "HeapChecker: Handling thread with pid %d\n",
+                            thread_pids[i]);
+    }
+#if defined(HAVE_LINUX_PTRACE_H) && defined(HAVE_SYSCALL_H) && defined(DUMPER)
+    i386_regs thread_regs;
+#define sys_ptrace(r,p,a,d)  syscall(SYS_ptrace, (r), (p), (a), (d))
+    // We use sys_ptrace to avoid thread locking
+    // because this is called from GetAllProcessThreads
+    // when all but this thread are suspended.
+    // (This does not seem to matter much though: allocations and
+    //  logging with HeapProfiler::MESSAGE seem to work just fine.)
+    if (sys_ptrace(PTRACE_GETREGS, thread_pids[i], NULL, &thread_regs) == 0) {
+      void* stack_top;
+      void* stack_bottom;
+      if (GetStackExtent((void*) thread_regs.BP, &stack_top, &stack_bottom)) {
+        // Need to use SP, not BP here to also get the data
+        // from the very last stack frame:
+        RegisterStackRange((void*) thread_regs.SP, stack_bottom);
+      } else {
+        failures += 1;
+      }
+      // Make registers live (just in case PTRACE_ATTACH resulted in some
+      // register pointers still being in the registers and not on the stack):
+      for (void** p = (void**)&thread_regs;
+           p < (void**)(&thread_regs + 1); ++p) {
+        if (HeapProfiler::kMaxLogging) {
+          HeapProfiler::MESSAGE(3, "HeapChecker: Thread register %p\n", *p);
+        }
+        thread_registers->push_back(*p);
+      }
+    } else {
+      failures += 1;
+    }
+#else
+    failures += 1;
+#endif
+  }
+  return failures;
+}
+
+// Info about the self thread stack extent
+struct HeapLeakChecker::StackExtent {
+  bool have;
+  void* top;
+  void* bottom;
+};
 
-void HeapLeakChecker::IgnoreAllLiveObjectsLocked() {
-  // the leaks of building live_objects below are ignored in our caller
-  CHECK(live_objects == NULL);
+// For this call we are free to call new/delete from this thread:
+// heap profiler will ignore them without acquiring its lock:
+void HeapLeakChecker::
+IgnoreAllLiveObjectsLocked(const StackExtent& self_stack) {
+  if (live_objects)  abort();
   live_objects = new LiveObjectsStack;
-  if (FLAGS_heap_check_ignore_thread_live &&
-      (heap_checker_thread_stack_extractor != NULL)) {
-    int drop = (*heap_checker_thread_stack_extractor)(&RegisterStackRange);
-    if (drop > 0) {
+  thread_registers = new vector<void*>;
+  IgnoreObjectLocked(thread_registers, true);
+    // in case we are not ignoring global data
+  stack_ranges = new StackRangeMap;
+  if (HeapProfiler::ignored_objects_)  abort();
+  HeapProfiler::ignored_objects_ = new HeapProfiler::IgnoredObjectSet;
+  // Record global data as live:
+  // We need to do it before we stop the threads in GetAllProcessThreads below;
+  // otherwise deadlocks are possible
+  // when we try to fork to execute objdump in UseProcMaps.
+  if (FLAGS_heap_check_ignore_global_live) {
+    library_live_objects = new LibraryLiveObjectsStacks;
+    UseProcMaps(RECORD_GLOBAL_DATA_LOCKED);
+  }
+  // Ignore all thread stacks:
+  if (FLAGS_heap_check_ignore_thread_live) {
+    // We fully suspend the threads right here before any liveness checking
+    // and keep them suspended for the whole time of liveness checking
+    // (they can't (de)allocate due to profiler's lock but they could still
+    //  mess with the pointer graph while we walk it).
+    int r = GetAllProcessThreads(NULL, IgnoreLiveThreads);
+    if (r == -1) {
+      HeapProfiler::MESSAGE(0, "HeapChecker: Could not find thread stacks; "
+                               "may get false leak reports\n");
+    } else if (r != 0) {
       HeapProfiler::MESSAGE(0, "HeapChecker: Thread stacks not found "
-                            "for %d threads; may get false leak reports\n",
-                            drop);
+                               "for %d threads; may get false leak reports\n",
+                            r);
     }
+    IgnoreLiveObjectsLocked("thread (stack) data", "");
+  }
+  // Register our own stack:
+  if (HeapProfiler::kMaxLogging) {
+    HeapProfiler::MESSAGE(2, "HeapChecker: Handling self thread with pid %d\n",
+                          getpid());
+  }
+  if (self_stack.have) {
+    RegisterStackRange(self_stack.top, self_stack.bottom);
+      // DoNoLeaks sets these
+    IgnoreLiveObjectsLocked("stack data", "");
+  } else {
+    HeapProfiler::MESSAGE(0, "HeapChecker: Stack not found "
+                             "for this thread; may get false leak reports\n");
   }
-  if (FLAGS_heap_check_ignore_told_live && ignored_objects) {
+  // Make objects we were told to ignore live:
+  if (ignored_objects) {
     HeapProfiler::AllocValue alloc_value;
     for (IgnoredObjectsMap::const_iterator object = ignored_objects->begin();
          object != ignored_objects->end(); ++object) {
@@ -397,30 +657,51 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked() {
         abort();
       }
     }
-    IgnoreLiveObjectsLocked("ignored", "");
+    IgnoreLiveObjectsLocked("ignored objects", "");
   }
-  // Just a pointer for reachability of ignored_objects;
-  // we can't delete them here because the deletions won't be recorded
-  // by profiler, whereas the allocations might have been.
-  reach_ignored_objects = ignored_objects;
-  ignored_objects = NULL;
+  // Make code-address-disabled objects live and ignored:
+  // This in particular makes all thread-specific data live
+  // because the basic data structure to hold pointers to thread-specific data
+  // is allocated from libpthreads and we have range-disabled that
+  // library code with UseProcMaps(DISABLE_LIBRARY_ALLOCS);
+  // so now we declare all thread-specific data reachable from there as live.
+  HeapProfiler::allocation_->Iterate(MakeDisabledLiveCallback);
+  IgnoreLiveObjectsLocked("disabled code", "");
+  // Actually make global data live:
   if (FLAGS_heap_check_ignore_global_live) {
-    UseProcMaps(IGNORE_GLOBAL_DATA_LOCKED);
+    for (LibraryLiveObjectsStacks::iterator l = library_live_objects->begin();
+         l != library_live_objects->end(); ++l) {
+      if (live_objects->size()) abort();
+      live_objects->swap(l->second);
+      IgnoreLiveObjectsLocked("in globals of\n  ", l->first.c_str());
+    }
+    delete library_live_objects;
+  }
+  // Can now resume the threads:
+  if (FLAGS_heap_check_ignore_thread_live) {
+    ResumeAllProcessThreads(last_num_threads, last_thread_pids);
+    delete [] last_thread_pids;
+    last_thread_pids = NULL;
   }
   if (live_objects_total) {
     HeapProfiler::MESSAGE(0, "HeapChecker: "
-                          "Not reporting "LLD" reachable "
+                          "Ignoring "LLD" reachable "
                           "objects of "LLD" bytes\n",
                           live_objects_total, live_bytes_total);
   }
   // Free these: we made them here and heap profiler never saw them
   delete live_objects;
   live_objects = NULL;
+  ignored_objects->erase(reinterpret_cast<uintptr_t>(thread_registers));
+  delete thread_registers;
+  thread_registers = NULL;
+  delete stack_ranges;
+  stack_ranges = NULL;
 }
 
-// This function irreparably changes HeapProfiler's state by dropping from it
-// the objects we consider live here.
-// But we don't care, since it is called only at program exit.
+// This function does not change HeapProfiler's state:
+// we record ignored live objects in HeapProfiler::ignored_objects_
+// instead of modifying the heap profile.
 void HeapLeakChecker::IgnoreLiveObjectsLocked(const char* name,
                                               const char* name2) {
   int64 live_object_count = 0;
@@ -432,8 +713,9 @@ void HeapLeakChecker::IgnoreLiveObjectsLocked(const char* name,
     live_objects->pop_back();
     HeapProfiler::AllocValue alloc_value;
     if (place == MUST_BE_ON_HEAP  &&
-        HeapProfiler::HaveOnHeapLocked(&object, &alloc_value)) {
-      HeapProfiler::RecordFreeLocked(object);  // drop it from the profile
+        HeapProfiler::HaveOnHeapLocked(&object, &alloc_value)  &&
+        HeapProfiler::ignored_objects_
+          ->insert(reinterpret_cast<uintptr_t>(object)).second) {
       live_object_count += 1;
       live_byte_count += size;
     }
@@ -460,13 +742,17 @@ void HeapLeakChecker::IgnoreLiveObjectsLocked(const char* name,
       void* current_object = object;
       reinterpret_cast<char*&>(object) += alignment;
       size -= alignment;
-      HeapProfiler::MESSAGE(6, "HeapChecker: "
+      if (ptr == NULL)  continue;
+      HeapProfiler::MESSAGE(8, "HeapChecker: "
                             "Trying pointer to %p at %p\n",
                             ptr, current_object);
-      // Do not need the following since the data for live_objects
+      // Do not need the following since the data for these
       // is not recorded by heap-profiler:
-      // if (ptr == live_objects)  continue;
-      if (HeapProfiler::HaveOnHeapLocked(&ptr, &alloc_value)) {
+      // if (ptr == live_objects  ||
+      //     ptr == HeapProfiler::ignored_objects_)  continue;
+      if (HeapProfiler::HaveOnHeapLocked(&ptr, &alloc_value)  &&
+          HeapProfiler::ignored_objects_
+            ->insert(reinterpret_cast<uintptr_t>(ptr)).second) {
         // We take the (hopefully low) risk here of encountering by accident
         // a byte sequence in memory that matches an address of
         // a heap object which is in fact leaked.
@@ -476,11 +762,10 @@ void HeapLeakChecker::IgnoreLiveObjectsLocked(const char* name,
                               "Found pointer to %p"
                               " of %"PRIuS" bytes at %p\n",
                               ptr, alloc_value.bytes, current_object);
-        HeapProfiler::RecordFreeLocked(ptr);  // drop it from the profile
         live_object_count += 1;
         live_byte_count += alloc_value.bytes;
         live_objects->push_back(AllocObject(ptr, alloc_value.bytes,
-                                            WAS_ON_HEAP));
+                                            IGNORED_ON_HEAP));
       }
     }
   }
@@ -508,16 +793,35 @@ void HeapLeakChecker::DisableChecksUp(int stack_frames) {
 
 void HeapLeakChecker::DisableChecksAt(void* address) {
   if (!heap_checker_on) return;
-  if (pthread_mutex_lock(&hc_lock) != 0)  abort();
+  if (pthread_mutex_lock(&heap_checker_lock) != 0)  abort();
   DisableChecksAtLocked(address);
-  if (pthread_mutex_unlock(&hc_lock) != 0)  abort();
+  if (pthread_mutex_unlock(&heap_checker_lock) != 0)  abort();
+}
+
+bool HeapLeakChecker::HaveDisabledChecksUp(int stack_frames) {
+  if (!heap_checker_on) return false;
+  if (stack_frames < 1)  abort();
+  void* stack[1];
+  if (GetStackTrace(stack, 1, stack_frames) != 1)  abort();
+  return HaveDisabledChecksAt(stack[0]);
+}
+
+bool HeapLeakChecker::HaveDisabledChecksAt(void* address) {
+  if (!heap_checker_on) return false;
+  if (pthread_mutex_lock(&heap_checker_lock) != 0)  abort();
+  bool result = disabled_addresses != NULL  &&
+                disabled_addresses->
+                  find(reinterpret_cast<uintptr_t>(address)) !=
+                disabled_addresses->end();
+  if (pthread_mutex_unlock(&heap_checker_lock) != 0)  abort();
+  return result;
 }
 
 void HeapLeakChecker::DisableChecksIn(const char* pattern) {
   if (!heap_checker_on) return;
-  if (pthread_mutex_lock(&hc_lock) != 0)  abort();
+  if (pthread_mutex_lock(&heap_checker_lock) != 0)  abort();
   DisableChecksInLocked(pattern);
-  if (pthread_mutex_unlock(&hc_lock) != 0)  abort();
+  if (pthread_mutex_unlock(&heap_checker_lock) != 0)  abort();
 }
 
 void* HeapLeakChecker::GetDisableChecksStart() {
@@ -539,14 +843,24 @@ void HeapLeakChecker::DisableChecksToHereFrom(void* start_address) {
 
 void HeapLeakChecker::IgnoreObject(void* ptr) {
   if (!heap_checker_on) return;
+  if (pthread_mutex_lock(&heap_checker_lock) != 0)  abort();
+  IgnoreObjectLocked(ptr, false);
+  if (pthread_mutex_unlock(&heap_checker_lock) != 0)  abort();
+}
+
+void HeapLeakChecker::IgnoreObjectLocked(void* ptr, bool profiler_locked) {
   HeapProfiler::AllocValue alloc_value;
-  if (pthread_mutex_lock(&hc_lock) != 0)  abort();
-  if (HeapProfiler::HaveOnHeap(&ptr, &alloc_value)) {
+  if (profiler_locked ? HeapProfiler::HaveOnHeapLocked(&ptr, &alloc_value)
+                      : HeapProfiler::HaveOnHeap(&ptr, &alloc_value)) {
     HeapProfiler::MESSAGE(1, "HeapChecker: "
                           "Going to ignore live object "
                           "at %p of %"PRIuS" bytes\n",
                           ptr, alloc_value.bytes);
-    if (ignored_objects == NULL)  ignored_objects = new IgnoredObjectsMap;
+    if (ignored_objects == NULL)  {
+      ignored_objects = new IgnoredObjectsMap;
+      IgnoreObjectLocked(ignored_objects, profiler_locked);
+        // ignore self in case we are not ignoring global data
+    }
     if (!ignored_objects->insert(make_pair(reinterpret_cast<uintptr_t>(ptr),
                                            alloc_value.bytes)).second) {
       HeapProfiler::MESSAGE(-1, "HeapChecker: "
@@ -554,13 +868,12 @@ void HeapLeakChecker::IgnoreObject(void* ptr) {
       abort();
     }
   }
-  if (pthread_mutex_unlock(&hc_lock) != 0)  abort();
 }
 
 void HeapLeakChecker::UnIgnoreObject(void* ptr) {
   if (!heap_checker_on) return;
   HeapProfiler::AllocValue alloc_value;
-  if (pthread_mutex_lock(&hc_lock) != 0)  abort();
+  if (pthread_mutex_lock(&heap_checker_lock) != 0)  abort();
   bool ok = HeapProfiler::HaveOnHeap(&ptr, &alloc_value);
   if (ok) {
     ok = false;
@@ -578,7 +891,7 @@ void HeapLeakChecker::UnIgnoreObject(void* ptr) {
       }
     }
   }
-  if (pthread_mutex_unlock(&hc_lock) != 0)  abort();
+  if (pthread_mutex_unlock(&heap_checker_lock) != 0)  abort();
   if (!ok) {
     HeapProfiler::MESSAGE(-1, "HeapChecker: "
                           "%p has not been ignored\n", ptr);
@@ -590,46 +903,88 @@ void HeapLeakChecker::UnIgnoreObject(void* ptr) {
 // HeapLeakChecker non-static functions
 //----------------------------------------------------------------------
 
+void HeapLeakChecker::DumpProfileLocked(bool start,
+                                        const StackExtent& self_stack) {
+  assert(!HeapProfiler::dumping_);  // not called from dumping code
+  HeapProfiler::MESSAGE(0, "HeapChecker: %s check \"%s\"\n",
+                        (start ? "Starting" : "Ending"), name_);
+  // Make the heap profile while letting our thread work with the heap
+  // without profiling this activity into the regular heap profile,
+  // while at the same time we hold the lock
+  // and do not let other threads work with the heap:
+  assert(!HeapProfiler::self_disable_);
+  HeapProfiler::self_disabled_tid_ = pthread_self();
+  // stop normal heap profiling in our thread:
+  HeapProfiler::self_disable_ = true;
+  { // scope
+    IgnoreAllLiveObjectsLocked(self_stack);
+    HeapProfiler::dump_for_leaks_ = true;
+    string* file_name = new string(*profile_prefix + "." + name_ +
+                                   (start ? "-beg.heap" : "-end.heap"));
+    HeapProfiler::DumpLocked("leak check", file_name->c_str());
+    delete file_name;  // want explicit control of the destruction point
+    HeapProfiler::dump_for_leaks_ = false;
+    delete HeapProfiler::ignored_objects_;
+    HeapProfiler::ignored_objects_ = NULL;
+  }
+  // resume normal heap profiling in our thread:
+  HeapProfiler::self_disable_ = false;
+  // Check that we made no heap changes ourselves
+  // while normal heap profiling was paused:
+  int64 self_disabled_bytes = HeapProfiler::self_disabled_.alloc_size_ -
+                              HeapProfiler::self_disabled_.free_size_;
+  int64 self_disabled_allocs = HeapProfiler::self_disabled_.allocs_ -
+                               HeapProfiler::self_disabled_.frees_;
+  if (self_disabled_bytes != 0  ||  self_disabled_allocs != 0) {
+    HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                          "internal HeapChecker leak of "LLD" objects "
+                          "and/or "LLD" bytes\n",
+                          self_disabled_allocs, self_disabled_bytes);
+    abort();
+  }
+}
+
 void HeapLeakChecker::Create(const char *name) {
   name_ = NULL;
-  if (!HeapProfiler::is_on_) return;  // fast escape
-  name_length_ = strlen(name);
-  char* n = new char[name_length_ + 4 + 1];
+  has_checked_ = false;
+  char* n = new char[strlen(name) + 1];   // do this before we lock
+  IgnoreObject(n);  // otherwise it might be treated as live due to our stack
+  if (pthread_mutex_lock(&heap_checker_lock) != 0)  abort();
   // Heap activity in other threads is paused for this whole function.
   HeapProfiler::Lock();
-  if (HeapProfiler::is_on_  &&  HeapProfiler::filename_prefix_) {
-    if (!heap_checker_on) {
-      HeapProfiler::MESSAGE(0, "HeapChecker: "
-                            "Checking was not activated via "
-                            "the heap_check command line flag. "
-                            "You might hence get more false leak reports!\n");
-      heap_checker_on = true;
-    }
-    assert(!HeapProfiler::dumping_);  // not called from dumping code
+  if (heap_checker_on) {
     assert(strchr(name, '/') == NULL);  // must be a simple name
+    assert(name_ == NULL);  // so this is not a memory leak
     name_ = n;
-    memcpy(name_, name, name_length_);
-    memcpy(name_ + name_length_, "-beg", 4 + 1);
-    // To make the profile let our thread work with the heap
-    // without profiling this while we hold the lock.
-    assert(!HeapProfiler::temp_disable_);
-    HeapProfiler::temp_disabled_tid_ = pthread_self();
-    HeapProfiler::temp_disable_ = true;
-    HeapProfiler::dump_for_leaks_ = true;
-    HeapProfiler::DumpLocked("leak check start", name_);
-    HeapProfiler::dump_for_leaks_ = false;
-    HeapProfiler::temp_disable_ = false;
-    start_inuse_bytes_ = HeapProfiler::profile_.alloc_size_ -
-                         HeapProfiler::profile_.free_size_;
-    start_inuse_allocs_ = HeapProfiler::profile_.allocs_ -
-                          HeapProfiler::profile_.frees_;
+    memcpy(name_, name, strlen(name) + 1);
+    // get our stack range to make its proper portion live
+    StackExtent self_stack;
+    self_stack.have = GetStackExtent(NULL, &self_stack.top, &self_stack.bottom);
+    DumpProfileLocked(true, self_stack);  // start
+    start_inuse_bytes_ = static_cast<size_t>(HeapProfiler::profile_.alloc_size_ -
+                                             HeapProfiler::profile_.free_size_);
+    start_inuse_allocs_ = static_cast<size_t>(HeapProfiler::profile_.allocs_ -
+                                              HeapProfiler::profile_.frees_);
+    if (HeapProfiler::kMaxLogging) {
+      HeapProfiler::MESSAGE(1, "HeapChecker: "
+                               "Start check \"%s\" profile: "
+                               "%"PRIuS"d bytes in %"PRIuS"d objects\n",
+                               name_, start_inuse_bytes_, start_inuse_allocs_);
+    }
   } else {
     HeapProfiler::MESSAGE(-1, "HeapChecker: "
-                          "Heap profiler is not active, "
+                          "Heap checker is not active, "
                           "hence checker \"%s\" will do nothing!\n", name);
+    HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                          "To activate set the HEAPCHECK environment "
+                          "variable.\n");
   }
   HeapProfiler::Unlock();
-  if (name_ == NULL)  delete[] n;
+  if (pthread_mutex_unlock(&heap_checker_lock) != 0)  abort();
+  if (name_ == NULL) {
+    UnIgnoreObject(n);
+    delete[] n;  // must be done after we unlock
+  }
 }
 
 HeapLeakChecker::HeapLeakChecker(const char *name) {
@@ -643,84 +998,47 @@ DECLARE_int64(heap_profile_inuse_interval);
 // Save pid of main thread for using in naming dump files
 int32 HeapLeakChecker::main_thread_pid_ = getpid();
 // Directory in which to dump profiles
-string HeapLeakChecker::dump_directory_ = "/tmp";
+string* HeapLeakChecker::dump_directory_ = NULL;
 #ifdef HAVE_PROGRAM_INVOCATION_NAME
 extern char* program_invocation_name;
 extern char* program_invocation_short_name;
-const char* HeapLeakChecker::invocation_name_ = program_invocation_short_name;
-const char* HeapLeakChecker::invocation_path_ = program_invocation_name;
+static const char* invocation_name() { return program_invocation_short_name; }
+static const char* invocation_path() { return program_invocation_name; }
 #else
-const char* HeapLeakChecker::invocation_name_ = "heap-checker";
-const char* HeapLeakChecker::invocation_path_ = "heap-checker";  // I guess?
+static const char* invocation_name() { return "heap_checker"; }
+static const char* invocation_path() { return "heap_checker"; }  // I guess?
 #endif
 
-HeapLeakChecker::HeapLeakChecker(Kind kind) {
-  if (!(kind == MAIN  ||  kind == MAIN_DEBUG))  abort();
-  bool start = true;
-  if (kind == MAIN_DEBUG)  start = false;
-  if (start) {
-    if (FLAGS_heap_profile.empty()) {
-      // doing just leaks checking: no periodic dumps
-      FLAGS_heap_profile_allocation_interval = kint64max;
-      FLAGS_heap_profile_inuse_interval = kint64max;
-    }
-    char pid_buf[15];
-    snprintf(pid_buf, sizeof(pid_buf), ".%d", main_thread_pid_);
-    HeapProfilerStart((dump_directory_ + "/" +
-                       invocation_name_ +
-                       pid_buf).c_str());
-  }
+HeapLeakChecker::HeapLeakChecker() {
   Create("_main_");
 }
 
-// Copy of FLAGS_heap_profile_pprof.
-// Need this since DoNoLeaks can happen
-// after FLAGS_heap_profile_pprof is destroyed.
-static string* flags_heap_profile_pprof = &FLAGS_heap_profile_pprof;
-
-// CAVEAT: Threads, liveness, and heap leak check:
-// It might be possible for to have a race leak condition
-// for a whole-program leak check due to heap activity in other threads
-// when HeapLeakChecker::DoNoLeaks is called at program's exit.
-// It can occur if after allocating a heap object a thread does not
-// quickly make the object reachable from some global/static variable
-// or from the thread's own stack variable.
-// Good news is that the only way to achieve this for a thread seems to be
-// to keep the only pointer to an allocated object in a CPU register
-// (i.e. in particular not call any other functions).
-// Probably thread context switching and thread stack boundary
-// acquisition via heap_checker_thread_stack_extractor
-// do not make the above in-CPU-pointer scenario possible.
+ssize_t HeapLeakChecker::BytesLeaked() const {
+  if (!has_checked_) {
+    HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                          "*NoLeaks|SameHeap must execute before this call\n");
+    abort();
+  }
+  return inuse_bytes_increase_;
+}
+
+ssize_t HeapLeakChecker::ObjectsLeaked() const {
+  if (!has_checked_) {
+    HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                          "*NoLeaks|SameHeap must execute before this call\n");
+    abort();
+  }
+  return inuse_allocs_increase_;
+}
 
 bool HeapLeakChecker::DoNoLeaks(bool same_heap,
                                 bool do_full,
                                 bool do_report) {
   // Heap activity in other threads is paused for this function
   // until we got all profile difference info.
+  if (pthread_mutex_lock(&heap_checker_lock) != 0)  abort();
   HeapProfiler::Lock();
-  if (HeapProfiler::is_on_  &&  this == main_heap_checker) {
-    // We do this only for the main atexit check
-    // not to distort the heap profile in the other cases.
-    if (FLAGS_heap_check_ignore_told_live  ||
-        FLAGS_heap_check_ignore_thread_live  ||
-        FLAGS_heap_check_ignore_global_live) {
-      // Give other threads some time (just in case)
-      // to make live-reachable the objects that they just allocated
-      // before we got the HeapProfiler's lock:
-      poll(NULL, 0, 100);
-      if (pthread_mutex_lock(&hc_lock) != 0)  abort();
-      assert(!HeapProfiler::temp_disable_);
-      HeapProfiler::temp_disabled_tid_ = pthread_self();
-      HeapProfiler::temp_disable_ = true;
-      // For this call we are free to call new/delete from this thread:
-      // heap profiler will ignore them without acquiring its lock:
-      IgnoreAllLiveObjectsLocked();
-      HeapProfiler::temp_disable_ = false;
-      if (pthread_mutex_unlock(&hc_lock) != 0)  abort();
-    }
-  }
-  assert(!HeapProfiler::dumping_);  // not called from dumping code
-  if (HeapProfiler::is_on_  &&  HeapProfiler::filename_prefix_) {
+  if (heap_checker_on) {
     if (name_ == NULL) {
       HeapProfiler::MESSAGE(-1, "HeapChecker: "
                             "*NoLeaks|SameHeap must be called only once"
@@ -728,107 +1046,101 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
                             "after construction of a HeapLeakChecker\n");
       abort();
     }
-    memcpy(name_ + name_length_, "-end", 4 + 1);
-    // To make the profile let our thread work with the heap
-    // without profiling this while we hold the lock.
-    assert(!HeapProfiler::temp_disable_);
-    HeapProfiler::temp_disabled_tid_ = pthread_self();
-    HeapProfiler::temp_disable_ = true;
-    HeapProfiler::dump_for_leaks_ = true;
-    HeapProfiler::DumpLocked("leak check end", name_);
-    HeapProfiler::dump_for_leaks_ = false;
-    HeapProfiler::temp_disable_ = false;
-    int64 disabled_bytes = HeapProfiler::disabled_.alloc_size_ -
-                           HeapProfiler::disabled_.free_size_;
-    int64 disabled_allocs = HeapProfiler::disabled_.allocs_ -
-                            HeapProfiler::disabled_.frees_;
-    if (disabled_bytes) {
-      HeapProfiler::MESSAGE(0, "HeapChecker: "
-                            "Not reporting "LLD" disabled objects"
-                            " of "LLD" bytes\n",
-                            disabled_allocs, disabled_bytes);
-    }
-    if (FLAGS_heap_check_before_constructors  &&  this == main_heap_checker) {
-      // compare against empty initial profile
+    // get our stack range to make its proper portion live
+    StackExtent self_stack;
+    self_stack.have = GetStackExtent(NULL, &self_stack.top, &self_stack.bottom);
+    DumpProfileLocked(false, self_stack);  // end
+    const bool use_initial_profile =
+      !(FLAGS_heap_check_before_constructors  &&  this == main_heap_checker);
+    if (!use_initial_profile) {  // compare against empty initial profile
       start_inuse_bytes_ = 0;
       start_inuse_allocs_ = 0;
     }
-    int64 increase_bytes =
-      (HeapProfiler::profile_.alloc_size_ -
-       HeapProfiler::profile_.free_size_) - start_inuse_bytes_;
-    int64 increase_allocs =
-      (HeapProfiler::profile_.allocs_ -
-       HeapProfiler::profile_.frees_) - start_inuse_allocs_;
+    int64 end_inuse_bytes = HeapProfiler::profile_.alloc_size_ -
+                            HeapProfiler::profile_.free_size_;
+    int64 end_inuse_allocs = HeapProfiler::profile_.allocs_ -
+                             HeapProfiler::profile_.frees_;
+    if (HeapProfiler::kMaxLogging) {
+      HeapProfiler::MESSAGE(1, "HeapChecker: "
+                               "End check \"%s\" profile: "
+                               ""LLD" bytes in "LLD" objects\n",
+                               name_, end_inuse_bytes, end_inuse_allocs);
+    }
+    inuse_bytes_increase_ = (ssize_t)(end_inuse_bytes - start_inuse_bytes_);
+    inuse_allocs_increase_ = (ssize_t)(end_inuse_allocs - start_inuse_allocs_);
+    has_checked_ = true;
     HeapProfiler::Unlock();
+    if (pthread_mutex_unlock(&heap_checker_lock) != 0)  abort();
     bool see_leaks =
-      (same_heap ? (increase_bytes != 0 || increase_allocs != 0)
-                 : (increase_bytes > 0 || increase_allocs > 0));
+      (same_heap ? (inuse_bytes_increase_ != 0 || inuse_allocs_increase_ != 0)
+                 : (inuse_bytes_increase_ > 0 || inuse_allocs_increase_ > 0));
     if (see_leaks || do_full) {
-      name_[name_length_] = '\0';
       const char* gv_command_tail
-        = " --edgefraction=1e-10 --nodefraction=1e-10 --gv";
+        = " --edgefraction=1e-10 --nodefraction=1e-10 --gv 2>/dev/null";
       string ignore_re;
       if (disabled_regexp) {
-        ignore_re += " --ignore=\"^";
+        ignore_re += " --ignore='^";
         ignore_re += *disabled_regexp;
-        ignore_re += "$\"";
+        ignore_re += "$'";
       }
-      // XXX(jandrews): This fix masks a bug where we detect STL leaks
-      // spuriously because the STL allocator allocates memory and never gives
-      // it back.  This did not occur before because we overrode the STL
-      // allocator to use tcmalloc, which called our hooks appropriately.
-      // The solution is probably to find a way to ignore memory held by the
-      // STL allocator, which may cause leaks in local variables to be ignored.
+      // It would be easier to use a string here than a static buffer, but
+      // some STLs can give us spurious leak alerts (since the STL tries to
+      // do its own memory pooling), so we avoid it by using STL as little
+      // as possible for "big" objects that might require "lots" of memory.
       char command[6 * PATH_MAX + 200];
-      const char* drop_negative = same_heap ? "" : " --drop_negative";
-      if (this != main_heap_checker  ||
-          !FLAGS_heap_check_before_constructors) {
+      if (use_initial_profile) {
         // compare against initial profile only if need to
+        const char* drop_negative = same_heap ? "" : " --drop_negative";
         snprintf(command, sizeof(command), "%s --base=\"%s.%s-beg.heap\" %s ",
-                 flags_heap_profile_pprof->c_str(),
-                 HeapProfiler::filename_prefix_,
-                 name_, drop_negative);
+                 pprof_path(), profile_prefix->c_str(), name_,
+                 drop_negative);
       } else {
         snprintf(command, sizeof(command), "%s",
-                 flags_heap_profile_pprof->c_str());
+                 pprof_path());
       }
       snprintf(command + strlen(command), sizeof(command) - strlen(command),
                " %s \"%s.%s-end.heap\" %s --inuse_objects --lines",
-               invocation_path_, HeapProfiler::filename_prefix_,
+               invocation_path(), profile_prefix->c_str(),
                name_, ignore_re.c_str());
                    // --lines is important here to catch leaks when !see_leaks
       char cwd[PATH_MAX+1];
-      if (getcwd(cwd, PATH_MAX) != cwd)  abort();
+      if (getcwd(cwd, sizeof(cwd)) != cwd)  abort();
       if (see_leaks) {
         HeapProfiler::MESSAGE(-1, "HeapChecker: "
                               "Heap memory leaks of "LLD" bytes and/or "
-                              ""LLD" allocations detected by check \"%s\".\n\n"
+                              ""LLD" allocations detected by check \"%s\".\n\n",
+                              (int64)inuse_bytes_increase_,
+                              (int64)inuse_allocs_increase_,
+                              name_);
+        HeapProfiler::MESSAGE(-1, "HeapChecker: "			      
                               "To investigate leaks manually use e.g.\n"
                               "cd %s; "  // for proper symbol resolution
                               "%s%s\n\n",
-                              increase_bytes, increase_allocs, name_,
                               cwd, command, gv_command_tail);
       }
       string output;
       int checked_leaks = 0;
       if ((see_leaks && do_report) || do_full) {
-        if (access(flags_heap_profile_pprof->c_str(), X_OK|R_OK) != 0) {
+        if (access(pprof_path(), X_OK|R_OK) != 0) {
           HeapProfiler::MESSAGE(-1, "HeapChecker: "
                                 "WARNING: Skipping pprof check:"
                                 " could not run it at %s\n",
-                                flags_heap_profile_pprof->c_str());
+                                pprof_path());
         } else {
+          // We don't care about pprof's stderr as long as it
+          // succeeds with empty report:
           checked_leaks = GetStatusOutput(command, &output);
           if (checked_leaks != 0) {
             HeapProfiler::MESSAGE(-1, "ERROR: Could not run pprof at %s\n",
-                                  flags_heap_profile_pprof->c_str());
+                                  pprof_path());
             abort();
           }
         }
         if (see_leaks && output.empty() && checked_leaks == 0) {
           HeapProfiler::MESSAGE(-1, "HeapChecker: "
                                 "These must be leaks that we disabled"
-                                " (pprof succeded)!\n");
+                                " (pprof succeeded)! This check WILL FAIL"
+                                " if the binary is strip'ped!\n");
           see_leaks = false;
         }
         // do not fail the check just due to us being a stripped binary
@@ -836,16 +1148,29 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
             strstr(output.c_str(), ": no symbols") != NULL)  output.resize(0);
         if (!(see_leaks || checked_leaks == 0))  abort();
       }
+      if (see_leaks  &&  use_initial_profile) {
+        HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                              "CAVEAT: Some of the reported leaks might have "
+                              "occurred before check \"%s\" was started!\n",
+                              name_);
+      }
       bool tricky_leaks = !output.empty();
       if (!see_leaks && tricky_leaks) {
         HeapProfiler::MESSAGE(-1, "HeapChecker: "
                               "Tricky heap memory leaks of"
                               " no bytes and no allocations "
-                              "detected by check \"%s\".\n"
+                              "detected by check \"%s\".\n", name_);
+        HeapProfiler::MESSAGE(-1, "HeapChecker: "
                               "To investigate leaks manually uge e.g.\n"
                               "cd %s; "  // for proper symbol resolution
                               "%s%s\n\n",
                               name_, cwd, command, gv_command_tail);
+        if (use_initial_profile) {
+          HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                                "CAVEAT: Some of the reported leaks might have "
+                                "occurred before check \"%s\" was started!\n",
+                                name_);
+        }
         see_leaks = true;
       }
       if (see_leaks && do_report) {
@@ -862,6 +1187,7 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
     } else {
       HeapProfiler::MESSAGE(0, "HeapChecker: No leaks found\n");
     }
+    UnIgnoreObject(name_);
     delete [] name_;
     name_ = NULL;
     return !see_leaks;
@@ -873,6 +1199,7 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
       abort();
     }
     HeapProfiler::Unlock();
+    if (pthread_mutex_unlock(&heap_checker_lock) != 0)  abort();
     return true;
   }
 }
@@ -914,45 +1241,32 @@ void HeapCleaner::RunHeapCleanups() {
 
 // Program exit heap cleanup registered with atexit().
 // Will not get executed when we crash on a signal.
-void HeapLeakChecker::RunHeapCleanups(void) {
+void HeapLeakChecker::RunHeapCleanups() {
   if (heap_checker_pid == getpid()) {  // can get here (via forks?)
                                        // with other pids
     HeapCleaner::RunHeapCleanups();
     if (!FLAGS_heap_check_after_destructors) {
       DoMainHeapCheck();
-      // Disable further dumping
-      if (HeapProfiler::is_on_)
-        HeapProfilerStop();
     }
   }
 }
 
-void HeapLeakChecker::LibCPreallocate() {
-  // force various C library static allocations before we start leak-checking
-  strerror(errno);
-  struct in_addr addr;
-  addr.s_addr = INADDR_ANY;
-  inet_ntoa(addr);
-  const time_t now = time(NULL);
-  ctime(&now);
-  void *stack[1];
-  backtrace(stack, 0);
-}
-
 // Called from main() immediately after setting any requisite parameters
 // from HeapChecker and HeapProfiler.
-void HeapLeakChecker::StartFromMain(const string& heap_check_type) {
-  if (heap_check_type != "") {
+void HeapLeakChecker::InternalInitStart(const string& heap_check_type) {
+  if (heap_check_type.empty()) {
+    heap_checker_on = false;
+  } else {
+    if (main_heap_checker) {
+      // means this method was called twice.  We'll just ignore the 2nd call
+      return;
+    }
     if (!constructor_heap_profiling) {
       HeapProfiler::MESSAGE(-1, "HeapChecker: Can not start so late. "
-                            "You have to enable heap checking with\n"
-                            "             --heapcheck=..."
-                            " or a dependency on //base:heapcheck\n");
+                            "You have to enable heap checking by\n"
+                            "setting the environment variable HEAPCHECK.\n");
       abort();
     }
-    // make an indestructible copy for heap leak checking
-    // happening after global variable destruction
-    flags_heap_profile_pprof = new string(FLAGS_heap_profile_pprof);
     // Set all flags
     if (heap_check_type == "minimal") {
       // The least we can check.
@@ -960,7 +1274,6 @@ void HeapLeakChecker::StartFromMain(const string& heap_check_type) {
       FLAGS_heap_check_after_destructors = false;  // to after cleanup
                                                    // (most data is live)
       FLAGS_heap_check_strict_check = false;  // < profile check (ignore more)
-      FLAGS_heap_check_ignore_told_live = true;  // ignore all live
       FLAGS_heap_check_ignore_thread_live = true;  // ignore all live
       FLAGS_heap_check_ignore_global_live = true;  // ignore all live
     } else if (heap_check_type == "normal") {
@@ -969,7 +1282,6 @@ void HeapLeakChecker::StartFromMain(const string& heap_check_type) {
       FLAGS_heap_check_after_destructors = false;  // to after cleanup
                                                    // (most data is live)
       FLAGS_heap_check_strict_check = true;  // == profile check (fast)
-      FLAGS_heap_check_ignore_told_live = true;  // ignore all live
       FLAGS_heap_check_ignore_thread_live = true;  // ignore all live
       FLAGS_heap_check_ignore_global_live = true;  // ignore all live
     } else if (heap_check_type == "strict") {
@@ -979,7 +1291,6 @@ void HeapLeakChecker::StartFromMain(const string& heap_check_type) {
       FLAGS_heap_check_after_destructors = true;  // to after destructors
                                                   // (less data live)
       FLAGS_heap_check_strict_check = true;  // == profile check (fast)
-      FLAGS_heap_check_ignore_told_live = true;  // ignore all live
       FLAGS_heap_check_ignore_thread_live = true;  // ignore all live
       FLAGS_heap_check_ignore_global_live = true;  // ignore all live
     } else if (heap_check_type == "draconian") {
@@ -988,7 +1299,6 @@ void HeapLeakChecker::StartFromMain(const string& heap_check_type) {
       FLAGS_heap_check_after_destructors = true;  // to after destructors
                                                   // (need them)
       FLAGS_heap_check_strict_check = true;  // == profile check (fast)
-      FLAGS_heap_check_ignore_told_live = false;  // no live flood (stricter)
       FLAGS_heap_check_ignore_thread_live = false;  // no live flood (stricter)
       FLAGS_heap_check_ignore_global_live = false;  // no live flood (stricter)
     } else if (heap_check_type == "as-is") {
@@ -1001,30 +1311,53 @@ void HeapLeakChecker::StartFromMain(const string& heap_check_type) {
     }
     assert(heap_checker_pid == getpid());
     heap_checker_on = true;
-    assert(HeapProfiler::is_on_);
+    if (!HeapProfiler::is_on_)  abort();
     UseProcMaps(DISABLE_LIBRARY_ALLOCS);
+      // might neeed to do this more than once
+      // if one later dynamically loads libraries that we want disabled
+
+    // make a good place and name for heap profile leak dumps
+    profile_prefix = new string(dump_directory());
+    *profile_prefix += "/";
+    *profile_prefix += invocation_name();
+    HeapProfiler::CleanupProfiles(profile_prefix->c_str());
+
+    // Finalize prefix for dumping leak checking profiles.
+    char pid_buf[15];
+    if (main_thread_pid_ == 0)  // possible if we're called before constructors
+      main_thread_pid_ = getpid();
+    snprintf(pid_buf, sizeof(pid_buf), ".%d", main_thread_pid_);
+    *profile_prefix += pid_buf;
+    assert(HeapProfiler::need_for_leaks_);
+
+    // Make sure new/delete hooks are installed properly
+    // and heap profiler is indeed able to keep track
+    // of the objects being allocated.
+    // We test this to make sure we are indeed checking for leaks.
+    HeapProfiler::AllocValue alloc_value;
+    char* test_str = new char[5];
+    void* ptr = test_str;
+    if (!HeapProfiler::HaveOnHeap(&ptr, &alloc_value))  abort();
+    ptr = test_str;
+    delete [] test_str;
+    if (HeapProfiler::HaveOnHeap(&ptr, &alloc_value))  abort();
+    // If we crash in the above code, it probably means that
+    // "nm <this_binary> | grep new" will show that tcmalloc's new/delete
+    // implementation did not get linked-in into this binary
+    // (i.e. nm will list __builtin_new and __builtin_vec_new as undefined).
+    // This is probably impossible.
+
     if (heap_check_type != "local") {
       // Schedule registered heap cleanup
       atexit(RunHeapCleanups);
       assert(main_heap_checker == NULL);
-      main_heap_checker = new HeapLeakChecker(MAIN);
-      // make sure new/delete hooks are installed properly:
-      IgnoreObject(main_heap_checker);
-      UnIgnoreObject(main_heap_checker);
-      // **
-      // ** If we crash here, it's probably because the binary is not
-      // ** linked with an instrumented malloc, such as tcmalloc.
-      // ** "nm <this_binary> | grep new" to verify.  An instrumented
-      // ** malloc is necessary for using heap-checker.
-      // **
+      main_heap_checker = new HeapLeakChecker();
     }
-  } else {
-    heap_checker_on = false;
   }
   if (!heap_checker_on  &&  constructor_heap_profiling) {
-    // turns out do not need checking in the end; stop profiling
+    // turns out do not need checking in the end; can stop profiling
     HeapProfiler::MESSAGE(0, "HeapChecker: Turning itself off\n");
-    HeapProfilerStop();
+    HeapProfiler::StopForLeaks();
   }
 }
 
@@ -1037,7 +1370,8 @@ void HeapLeakChecker::DoMainHeapCheck() {
       // (we don't use the starting profile anyway)
     bool do_full = !same_heap;  // do it if it can help ignore false leaks
     bool do_report = FLAGS_heap_check_report;
-    HeapProfiler::MESSAGE(0, "HeapChecker: Checking for memory leaks\n");
+    HeapProfiler::MESSAGE(0, "HeapChecker: "
+                             "Checking for whole-program memory leaks\n");
     if (!main_heap_checker->DoNoLeaks(same_heap, do_full, do_report)) {
       HeapProfiler::MESSAGE(-1, "ERROR: Leaks found in main heap check, aborting\n");
       abort();
@@ -1052,14 +1386,44 @@ void HeapLeakChecker::DoMainHeapCheck() {
 //----------------------------------------------------------------------
 
 void HeapLeakChecker::BeforeConstructors() {
+  // The user indicates a desire for heap-checking via the HEAPCHECK
+  // environment variable.  If it's not set, there's no way to do
+  // heap-checking.
+  if (!getenv("HEAPCHECK")) {
+    return;
+  }
+
+  // heap-checker writes out files.  Thus, for security reasons, we don't
+  // recognize the env. var. to turn on heap-checking if we're setuid.
+  if (getuid() != geteuid()) {
+    HeapProfiler::MESSAGE(0, ("HeapChecker: ignoring HEAPCHECK because "
+                              "program seems to be setuid\n"));
+    return;
+  }
+
   if (constructor_heap_profiling)  abort();
   constructor_heap_profiling = true;
-  LibCPreallocate();
-  HeapProfiler::Lock();
-  HeapProfiler::EarlyStartLocked();  // fire-up HeapProfiler hooks
+  HeapProfiler::Init();  // only necessary if our constructor runs before theirs
+  // If the user has HEAPPROFILE set, Init() will have turned on profiling.
+  // If not, we need to do it manually here.
+  HeapProfiler::StartForLeaks();
   heap_checker_on = true;
-  assert(HeapProfiler::is_on_);
-  HeapProfiler::Unlock();
+
+  // The value of HEAPCHECK is the mode they want.  If we don't
+  // recognize it, we default to "normal".
+  const char* heap_check_type = getenv("HEAPCHECK");
+  assert(heap_check_type);  // we checked that in the if above
+  if ( heap_check_type[0] == '\0') {
+    // don't turn on heap checking for missing or empty env. var.
+  } else if ( !strcmp(heap_check_type, "minimal") ||
+              !strcmp(heap_check_type, "normal") ||
+              !strcmp(heap_check_type, "strict") ||
+              !strcmp(heap_check_type, "draconian") ||
+              !strcmp(heap_check_type, "local") ) {
+    HeapLeakChecker::InternalInitStart(heap_check_type);
+  } else {
+    HeapLeakChecker::InternalInitStart("normal");         // the default
+  }
 }
 
 extern bool heap_leak_checker_bcad_variable;  // in heap-checker-bcad.cc
@@ -1081,11 +1445,9 @@ void HeapLeakChecker_AfterDestructors() {
     if (FLAGS_heap_check_after_destructors && main_heap_checker) {
       HeapLeakChecker::DoMainHeapCheck();
       poll(NULL, 0, 500);
-        // Need this hack to wait for other pthreads to exit.
-        // Otherwise tcmalloc or debugallocation find errors
-        // on a free() call from pthreads.
+      // Need this hack to wait for other pthreads to exit.
+      // Otherwise tcmalloc finds errors on a free() call from pthreads.
     }
-    if (main_heap_checker)  abort();
   }
 }
 
@@ -1096,12 +1458,12 @@ void HeapLeakChecker_AfterDestructors() {
 // These functions are at the end of the file to prevent their inlining:
 
 void HeapLeakChecker::DisableChecksInLocked(const char* pattern) {
-  // disable our leaks below for growing disabled_regexp
-  void* stack[1];
-  if (GetStackTrace(stack, 1, 1) != 1)  abort();
-  DisableChecksAtLocked(stack[0]);
   // make disabled_regexp
-  if (disabled_regexp == NULL)  disabled_regexp = new string;
+  if (disabled_regexp == NULL) {
+    disabled_regexp = new string;
+    IgnoreObjectLocked(disabled_regexp, false);
+      // in case we are not ignoring global data
+  }
   HeapProfiler::MESSAGE(1, "HeapChecker: "
                         "Disabling leaks checking in stack traces "
                         "under frames maching \"%s\"\n", pattern);
@@ -1113,16 +1475,16 @@ void HeapLeakChecker::DisableChecksFromTo(void* start_address,
                                           void* end_address,
                                           int max_depth) {
   assert(start_address < end_address);
-  // disable our leaks for constructing disabled_ranges_
-  DisableChecksUp(1);
-  if (pthread_mutex_lock(&hc_lock) != 0)  abort();
-  if (HeapProfiler::disabled_ranges_ == NULL) {
-    HeapProfiler::disabled_ranges_ = new HeapProfiler::DisabledRangeMap;
+  if (pthread_mutex_lock(&heap_checker_lock) != 0)  abort();
+  if (disabled_ranges == NULL) {
+    disabled_ranges = new DisabledRangeMap;
+    IgnoreObjectLocked(disabled_ranges, false);
+      // in case we are not ignoring global data
   }
-  HeapProfiler::RangeValue value;
+  RangeValue value;
   value.start_address = reinterpret_cast<uintptr_t>(start_address);
   value.max_depth = max_depth;
-  if (HeapProfiler::disabled_ranges_->
+  if (disabled_ranges->
         insert(make_pair(reinterpret_cast<uintptr_t>(end_address),
                          value)).second) {
     HeapProfiler::MESSAGE(1, "HeapChecker: "
@@ -1130,21 +1492,17 @@ void HeapLeakChecker::DisableChecksFromTo(void* start_address,
                           "under frame addresses between %p..%p\n",
                           start_address, end_address);
   }
-  if (pthread_mutex_unlock(&hc_lock) != 0)  abort();
+  if (pthread_mutex_unlock(&heap_checker_lock) != 0)  abort();
 }
 
 void HeapLeakChecker::DisableChecksAtLocked(void* address) {
-  if (HeapProfiler::disabled_addresses_ == NULL) {
-    HeapProfiler::disabled_addresses_ = new HeapProfiler::DisabledAddressesSet;
+  if (disabled_addresses == NULL) {
+    disabled_addresses = new DisabledAddressSet;
+    IgnoreObjectLocked(disabled_addresses, false);
+      // in case we are not ignoring global data
   }
-  // disable our leaks for constructing disabled_addresses_
-  void* stack[1];
-  if (GetStackTrace(stack, 1, 1) != 1)  abort();
-  HeapProfiler::disabled_addresses_->
-    insert(reinterpret_cast<uintptr_t>(stack[0]));
   // disable the requested address
-  if (HeapProfiler::disabled_addresses_->
-      insert(reinterpret_cast<uintptr_t>(address)).second) {
+  if (disabled_addresses->insert(reinterpret_cast<uintptr_t>(address)).second) {
     HeapProfiler::MESSAGE(1, "HeapChecker: "
                           "Disabling leaks checking in stack traces "
                           "under frame address %p\n",
diff --git a/src/heap-profiler-inl.h b/src/heap-profiler-inl.h
index c42eaba..a731cfa 100644
--- a/src/heap-profiler-inl.h
+++ b/src/heap-profiler-inl.h
@@ -39,7 +39,7 @@
 #ifndef BASE_HEAP_PROFILER_INL_H__
 #define BASE_HEAP_PROFILER_INL_H__
 
-#include <google/perftools/config.h>
+#include "config.h"
 
 #if defined HAVE_STDINT_H
 #include <stdint.h>             // to get uint16_t (ISO naming madness)
@@ -49,7 +49,7 @@
 #include <sys/types.h>          // our last best hope
 #endif
 #include <pthread.h>
-#include <google/perftools/basictypes.h>
+#include "base/basictypes.h"
 #include <google/heap-profiler.h>
 #include <map>
 #include <google/perftools/hash_set.h>
@@ -80,16 +80,7 @@ class HeapProfiler {
   };
   typedef AddressMap<AllocValue> AllocationMap;
 
-  // Value stored in the map of disabled address ranges;
-  // its key is the end of the address range.
-  // We'll ignore allocations with a return address in a disabled range
-  // if the address occurs at 'max_depth' or less in the stack trace.
-  struct RangeValue {
-    uintptr_t start_address;  // the start of the range
-    int       max_depth;      // the maximal stack depth to disable at
-  };
-  typedef STL_NAMESPACE::map<uintptr_t, RangeValue> DisabledRangeMap;
-  typedef HASH_NAMESPACE::hash_set<uintptr_t> DisabledAddressesSet;
+  typedef HASH_NAMESPACE::hash_set<uintptr_t> IgnoredObjectSet;
 
  private:  // state variables
            // NOTE: None of these have destructors that change their state.
@@ -97,19 +88,21 @@ class HeapProfiler {
 
   // Is heap-profiling on as a subsytem
   static bool is_on_;
+  // Is heap-profiling needed for heap leak checking.
+  static bool need_for_leaks_;
+  // Has Init() been called?  Used by heap-profiler to avoid initting
+  // more than once (since heap-checker may call Init() manually.)
+  static bool init_has_been_called_;
   // If we are disabling heap-profiling recording for incoming
-  // (de)allocation calls from the thread specified by temp_disabled_tid_.
+  // (de)allocation calls from the thread specified by self_disabled_tid_.
   // This is done for (de)allocations that are internal
   // to heap profiler or heap checker, so that we can hold the global
-  // profiler's lock and pause heap activity from other threads.
-  static bool temp_disable_;
-  static pthread_t temp_disabled_tid_;
-  // The disabled addresses registered
-  // with HeapLeakChecker::DisableChecksUp
-  static DisabledAddressesSet* disabled_addresses_;
-  // The disabled address ranges registered
-  // with HeapLeakChecker::DisableChecksFromTo.
-  static DisabledRangeMap* disabled_ranges_;
+  // profiler's lock and pause heap activity from other threads
+  // while working freely in our thread.
+  static bool self_disable_;
+  static pthread_t self_disabled_tid_;
+  // The ignored live object addresses for profile dumping.
+  static IgnoredObjectSet* ignored_objects_;
   // Flag if we are doing heap dump for leaks checking vs.
   // for general memory profiling
   static bool dump_for_leaks_;
@@ -119,9 +112,9 @@ class HeapProfiler {
   static Bucket total_;
   // Last dumped profile stats
   static Bucket profile_;
-  // Stats for the disabled part of the last dumped profile
-  static Bucket disabled_;
-  // Prefix used for profile file names (NULL if not ready for dumping yet)
+  // Stats for the (de)allocs disabled with the use of self_disable_
+  static Bucket self_disabled_;
+  // Prefix used for profile file names (NULL if no need for dumping yet)
   static char* filename_prefix_;
   // Map of all currently allocated object we know about
   static AllocationMap* allocation_;
@@ -147,9 +140,9 @@ class HeapProfiler {
   static void* Malloc(size_t bytes);
   static void Free(void* p);
   // Helper for HeapProfilerDump:
-  // second_prefix is not NULL when the dumped profile
-  // is to be named differently for leaks checking
-  static void DumpLocked(const char *reason, const char* second_prefix);
+  // If file_name is not NULL when it gives the name for the dumped profile,
+  // else we use the standard sequential name.
+  static void DumpLocked(const char *reason, const char* file_name);
 
  private:  // helpers of heap-checker.cc
 
@@ -165,15 +158,25 @@ class HeapProfiler {
 
   // Get bucket for current stack trace (skip "skip_count" most recent frames)
   static Bucket* GetBucket(int skip_count);
-  static int UnparseBucket(char* buf, int buflen, int bufsize, Bucket* b);
+  // Unparse bucket b and print its portion of profile dump into buf.
+  // We return the amount of space in buf that we use.  We start printing
+  // at buf + buflen, and promise not to go beyond buf + bufsize.
+  static int UnparseBucket(char* buf, int buflen, int bufsize, const Bucket* b);
+  // Add ignored_objects_ 'adjust' times (ususally -1 or 1)
+  // to the profile bucket data.
+  static void AdjustByIgnoredObjects(int adjust);
   static void RecordAlloc(void* ptr, size_t bytes, int skip_count);
   static void RecordFree(void* ptr);
-  static void RecordFreeLocked(void* ptr);
   // Activates profile collection before profile dumping.
   // Can be called before global object constructors.
   static void EarlyStartLocked();
+  // Cleanup any old profile files
+  static void CleanupProfiles(const char* prefix);
+  // Profiling subsystem starting and stopping.
   static void StartLocked(const char* prefix);
   static void StopLocked();
+  static void StartForLeaks();
+  static void StopForLeaks();
   static void NewHook(void* ptr, size_t size);
   static void DeleteHook(void* ptr);
   static void MmapHook(void* result,
@@ -203,6 +206,12 @@ class HeapProfiler {
 #endif
 ;
 
+  // Set this to true when you want maximal logging for
+  // debugging problems in heap profiler or checker themselves.
+  // We use this constant instead of logging_level in MESSAGE()
+  // to completely compile-out this extra logging in all normal cases.
+  static const bool kMaxLogging = false;
+
   // Module initialization
   static void Init();
 
diff --git a/src/heap-profiler.cc b/src/heap-profiler.cc
index 2f476a8..bfee34d 100644
--- a/src/heap-profiler.cc
+++ b/src/heap-profiler.cc
@@ -32,7 +32,7 @@
 //
 // TODO: Log large allocations
 
-#include <google/perftools/config.h>
+#include "config.h"
 
 #include <malloc.h>
 #include <unistd.h>
@@ -52,13 +52,14 @@
 
 #include <google/heap-profiler.h>
 #include <google/stacktrace.h>
+#include <google/malloc_extension.h>
 #include <google/malloc_hook.h>
-#include <google/perftools/basictypes.h>
 
 #include "heap-profiler-inl.h"
 #include "internal_spinlock.h"
 #include "addressmap-inl.h"
 
+#include "base/basictypes.h"
 #include "base/logging.h"
 #include "base/googleinit.h"
 #include "base/commandlineflags.h"
@@ -71,20 +72,24 @@
 #define LLD    "lld"                // hope for the best
 #endif
 
+#ifndef	PATH_MAX
+#ifdef MAXPATHLEN
+#define	PATH_MAX	MAXPATHLEN
+#else
+#define	PATH_MAX	4096         // seems conservative for max filename len!
+#endif
+#endif
+
 #define LOGF  STL_NAMESPACE::cout   // where we log to; LOGF is a historical name
 
 using HASH_NAMESPACE::hash_set;
-using std::string;
-using std::sort;
+using STL_NAMESPACE::string;
+using STL_NAMESPACE::sort;
 
 //----------------------------------------------------------------------
 // Flags that control heap-profiling
 //----------------------------------------------------------------------
 
-DEFINE_string(heap_profile, "",
-              "If non-empty, turn heap-profiling on, and dump heap "
-              "profiles to a sequence of files prefixed with the "
-              "specified --heap_profile string.");
 DEFINE_int64(heap_profile_allocation_interval, 1 << 30 /*1GB*/,
              "Dump heap profiling information once every specified "
              "number of bytes allocated by the program.");
@@ -97,18 +102,6 @@ DEFINE_bool(mmap_profile, false, "If heap-profiling on, also profile mmaps");
 DEFINE_int32(heap_profile_log, 0,
              "Logging level for heap profiler/checker messages");
 
-// Prefix to which we dump heap profiles.  If empty, we do not dump.
-// Default: empty
-void HeapProfilerSetDumpPath(const char* path) {
-  if (HeapProfiler::IsOn()) {
-    HeapProfiler::MESSAGE(-1,
-      "Cannot set dump path to %s, heap profiler is already running!\n",
-      path);
-  } else {
-    FLAGS_heap_profile = path;
-  }
-}
-
 // Level of logging used by the heap profiler and heap checker (if applicable)
 // Default: 0
 void HeapProfilerSetLogLevel(int level) {
@@ -117,14 +110,14 @@ void HeapProfilerSetLogLevel(int level) {
 
 // Dump heap profiling information once every specified number of bytes
 // allocated by the program.  Default: 1GB
-void HeapProfilerSetAllocationInterval(int64 interval) {
+void HeapProfilerSetAllocationInterval(size_t interval) {
   FLAGS_heap_profile_allocation_interval = interval;
 }
 
 // Dump heap profiling information whenever the high-water 
 // memory usage mark increases by the specified number of
 // bytes.  Default: 100MB
-void HeapProfilerSetInuseInterval(int64 interval) {
+void HeapProfilerSetInuseInterval(size_t interval) {
   FLAGS_heap_profile_inuse_interval = interval;
 }
 
@@ -139,7 +132,7 @@ void HeapProfiler::MESSAGE(int level, const char* format, ...) {
   // buffering because that may invoke malloc()
   va_list ap;
   va_start(ap, format);
-  char buf[500];
+  char buf[600];
   vsnprintf(buf, sizeof(buf), format, ap);
   write(STDERR_FILENO, buf, strlen(buf));
 }
@@ -177,7 +170,7 @@ class HeapProfilerMemory {
     const size_t pagesize = getpagesize();
     size = ((size + pagesize -1 ) / pagesize) * pagesize;
 
-    HeapProfiler::MESSAGE(0, "HeapProfiler: allocating %"PRIuS
+    HeapProfiler::MESSAGE(1, "HeapProfiler: allocating %"PRIuS
                           " bytes for internal use\n", size);
     if (nblocks_ == kMaxBlocks) {
       HeapProfiler::MESSAGE(-1, "HeapProfilerMemory: Alloc out of memory\n");
@@ -262,13 +255,23 @@ void HeapProfiler::Free(void* p) {
 // So we use a simple spinlock (just like the spinlocks used in tcmalloc)
 
 static TCMalloc_SpinLock heap_lock;
-static struct timespec delay = { 0, 5000000 };  // Five milliseconds
 
 void HeapProfiler::Lock() {
+  if (kMaxLogging) {
+    // for debugging deadlocks
+    HeapProfiler::MESSAGE(10, "HeapProfiler: Lock from %d\n",
+                          int(pthread_self()));
+  }
+
   heap_lock.Lock();
 }
 
 void HeapProfiler::Unlock() {
+  if (kMaxLogging) {
+    HeapProfiler::MESSAGE(10, "HeapProfiler: Unlock from %d\n",
+                          int(pthread_self()));
+  }
+
   heap_lock.Unlock();
 }
 
@@ -280,14 +283,15 @@ void HeapProfiler::Unlock() {
 typedef HeapProfiler::Bucket Bucket;
 
 bool HeapProfiler::is_on_ = false;
-bool HeapProfiler::temp_disable_ = false;
-pthread_t HeapProfiler::temp_disabled_tid_;
-HeapProfiler::DisabledAddressesSet* HeapProfiler::disabled_addresses_ = NULL;
-HeapProfiler::DisabledRangeMap* HeapProfiler::disabled_ranges_ = NULL;
+bool HeapProfiler::init_has_been_called_ = false;
+bool HeapProfiler::need_for_leaks_ = false;
+bool HeapProfiler::self_disable_ = false;
+pthread_t HeapProfiler::self_disabled_tid_;
+HeapProfiler::IgnoredObjectSet* HeapProfiler::ignored_objects_ = NULL;
 bool HeapProfiler::dump_for_leaks_ = false;
 bool HeapProfiler::dumping_ = false;
 Bucket HeapProfiler::total_;
-Bucket HeapProfiler::disabled_;
+Bucket HeapProfiler::self_disabled_;
 Bucket HeapProfiler::profile_;
 char* HeapProfiler::filename_prefix_ = NULL;
 
@@ -314,43 +318,18 @@ static bool ByAllocatedSpace(Bucket* a, Bucket* b) {
   return (a->alloc_size_ - a->free_size_) > (b->alloc_size_ - b->free_size_);
 }
 
-// We return the amount of space in buf that we use.  We start printing
-// at buf + buflen, and promise not to go beyond buf + bufsize.
-int HeapProfiler::UnparseBucket(char* buf, int buflen, int bufsize, Bucket* b) {
-  // do not dump the address-disabled allocations
-  if (dump_for_leaks_  &&  (disabled_addresses_ || disabled_ranges_)) {
-    bool disable = false;
-    for (int depth = 0; !disable && depth < b->depth_; depth++) {
-      uintptr_t addr = reinterpret_cast<uintptr_t>(b->stack_[depth]);
-      if (disabled_addresses_  &&
-          disabled_addresses_->find(addr) != disabled_addresses_->end()) {
-        disable = true;  // found; dropping
-      }
-      if (disabled_ranges_) {
-        DisabledRangeMap::const_iterator iter
-          = disabled_ranges_->lower_bound(addr);
-        if (iter != disabled_ranges_->end()) {
-          assert(iter->first > addr);
-          if (iter->second.start_address < addr  &&
-              iter->second.max_depth > depth) {
-            disable = true;  // in range; dropping
-          }
-        }
-      }
-    }
-    if (disable) {
-      disabled_.allocs_ += b->allocs_;
-      disabled_.alloc_size_ += b->alloc_size_;
-      disabled_.frees_ += b->frees_;
-      disabled_.free_size_ += b->free_size_;
-      return buflen;
-    }
-  }
-  // count non-disabled allocations for leaks checking
+int HeapProfiler::UnparseBucket(char* buf, int buflen, int bufsize,
+                                const Bucket* b) {
   profile_.allocs_ += b->allocs_;
   profile_.alloc_size_ += b->alloc_size_;
   profile_.frees_ += b->frees_;
   profile_.free_size_ += b->free_size_;
+  if (dump_for_leaks_  &&
+      b->allocs_ - b->frees_ == 0  &&
+      b->alloc_size_ - b->free_size_ == 0) {
+    // don't waste the profile space on buckets that do not matter
+    return buflen;
+  }
   int printed =
     snprintf(buf + buflen, bufsize - buflen, "%6d: %8"LLD" [%6d: %8"LLD"] @",
              b->allocs_ - b->frees_,
@@ -372,23 +351,73 @@ int HeapProfiler::UnparseBucket(char* buf, int buflen, int bufsize, Bucket* b) {
   return buflen;
 }
 
+void HeapProfiler::AdjustByIgnoredObjects(int adjust) {
+  if (ignored_objects_) {
+    assert(dump_for_leaks_);
+    for (IgnoredObjectSet::const_iterator i = ignored_objects_->begin();
+         i != ignored_objects_->end(); ++i) {
+      AllocValue v;
+      if (!allocation_->Find(reinterpret_cast<void*>(*i), &v))  abort();
+         // must be in
+      v.bucket->allocs_ += adjust;
+      v.bucket->alloc_size_ += adjust * int64(v.bytes);
+        // need explicit size_t to int64 conversion before multiplication
+        // in case size_t is unsigned and adjust is negative
+      assert(v.bucket->allocs_ >= 0  &&  v.bucket->alloc_size_ >= 0);
+      if (kMaxLogging  &&  adjust < 0) {
+        HeapProfiler::MESSAGE(4, "HeapChecker: "
+                              "Ignoring object of %"PRIuS" bytes\n", v.bytes);
+      }
+    }
+  }
+}
+
 char* GetHeapProfile() {
   // We used to be smarter about estimating the required memory and
   // then capping it to 1MB and generating the profile into that.
   // However it should not cost us much to allocate 1MB every time.
   static const int size = 1 << 20;
+  int buflen = 0;
   char* buf = reinterpret_cast<char*>(malloc(size));
   if (buf == NULL) {
     return NULL;
   }
 
-  // Grab the lock and generate the profile
-  // (for leak checking the lock is acquired higher up).
-  if (!HeapProfiler::dump_for_leaks_)  HeapProfiler::Lock();
-  if (HeapProfiler::is_on_) {
+  Bucket **list = NULL;
+
+  // We can't allocate list on the stack, as this would overflow on threads
+  // running with a small stack size.  We can't allocate it under the lock
+  // either, as this would cause a deadlock.  But num_buckets is only valid
+  // while holding the lock- new buckets can be created at any time otherwise.
+  // So we'll read num_buckets dirtily, allocate room for all the current
+  // buckets + a few more, and check the count when we get the lock; if we
+  // don't have enough, we release the lock and try again.
+  while (true) {
+    int nb = num_buckets + num_buckets / 16 + 8;
+
+    if (list)
+      delete[] list;
+
+    list = new Bucket *[nb];
+
+    // Grab the lock and generate the profile
+    // (for leak checking the lock is acquired higher up).
+    if (!HeapProfiler::dump_for_leaks_)  HeapProfiler::Lock();
+    if (!HeapProfiler::is_on_) {
+      if (!HeapProfiler::dump_for_leaks_)  HeapProfiler::Unlock();
+      break;
+    }
+
     // Get all buckets and sort
     assert(table != NULL);
-    Bucket* list[num_buckets];
+
+    // If we have allocated some extra buckets while waiting for the lock, we
+    // may have to reallocate list
+    if (num_buckets > nb) {
+      if (!HeapProfiler::dump_for_leaks_) HeapProfiler::Unlock();
+      continue;
+    }
+
     int n = 0;
     for (int b = 0; b < kHashTableSize; b++) {
       for (Bucket* x = table[b]; x != 0; x = x->next_) {
@@ -398,19 +427,23 @@ char* GetHeapProfile() {
     assert(n == num_buckets);
     sort(list, list + num_buckets, ByAllocatedSpace);
 
-    int buflen = snprintf(buf, size-1, "heap profile: ");
-    buflen =
-      HeapProfiler::UnparseBucket(buf, buflen, size-1, &HeapProfiler::total_);
+    buflen = snprintf(buf, size-1, "heap profile: ");
+    buflen = HeapProfiler::UnparseBucket(buf, buflen, size-1,
+                                         &HeapProfiler::total_);
     memset(&HeapProfiler::profile_, 0, sizeof(HeapProfiler::profile_));
-    memset(&HeapProfiler::disabled_, 0, sizeof(HeapProfiler::disabled_));
+    HeapProfiler::AdjustByIgnoredObjects(-1);  // drop from profile
     for (int i = 0; i < num_buckets; i++) {
       Bucket* b = list[i];
       buflen = HeapProfiler::UnparseBucket(buf, buflen, size-1, b);
     }
+    HeapProfiler::AdjustByIgnoredObjects(1);  // add back to profile
     assert(buflen < size);
-    buf[buflen] = '\0';
+    if (!HeapProfiler::dump_for_leaks_)  HeapProfiler::Unlock();
+    break;
   }
-  if (!HeapProfiler::dump_for_leaks_)  HeapProfiler::Unlock();
+
+  buf[buflen] = '\0';
+  delete[] list;
 
   return buf;
 }
@@ -421,25 +454,21 @@ extern char* HeapProfile() {
   return GetHeapProfile();
 }
 
-// second_prefix is not NULL when the dumped profile
-// is to be named differently for leaks checking
-void HeapProfiler::DumpLocked(const char *reason, const char* second_prefix) {
+void HeapProfiler::DumpLocked(const char *reason, const char* file_name) {
   assert(is_on_);
 
-  if (filename_prefix_ == NULL)  return;
-    // we are not yet ready for dumping
+  if (filename_prefix_ == NULL  &&  file_name == NULL)  return;
+    // we do not yet need dumping
 
   dumping_ = true;
 
   // Make file name
   char fname[1000];
-  if (second_prefix == NULL) {
+  if (file_name == NULL) {
     dump_count++;
     snprintf(fname, sizeof(fname), "%s.%04d.heap",
              filename_prefix_, dump_count);
-  } else {
-    snprintf(fname, sizeof(fname), "%s.%s.heap",
-             filename_prefix_, second_prefix);
+    file_name = fname;
   }
 
   // Release allocation lock around the meat of this routine
@@ -451,8 +480,8 @@ void HeapProfiler::DumpLocked(const char *reason, const char* second_prefix) {
     HeapProfiler::MESSAGE(dump_for_leaks_ ? 1 : 0,
                           "HeapProfiler: "
                           "Dumping heap profile to %s (%s)\n",
-                          fname, reason);
-    FILE* f = fopen(fname, "w");
+                          file_name, reason);
+    FILE* f = fopen(file_name, "w");
     if (f != NULL) {
       const char* profile = HeapProfile();
       fputs(profile, f);
@@ -475,7 +504,7 @@ void HeapProfiler::DumpLocked(const char *reason, const char* second_prefix) {
     } else {
       HeapProfiler::MESSAGE(0, "HeapProfiler: "
                             "FAILED Dumping heap profile to %s (%s)\n",
-                            fname, reason);
+                            file_name, reason);
       if (dump_for_leaks_)  abort();  // no sense to continue
     }
   }
@@ -489,13 +518,20 @@ void HeapProfilerDump(const char *reason) {
   if (HeapProfiler::is_on_ && (num_buckets > 0)) {
 
     HeapProfiler::Lock();
-    if(!HeapProfiler::dumping_) {
+    if (!HeapProfiler::dumping_) {
       HeapProfiler::DumpLocked(reason, NULL);
     }
     HeapProfiler::Unlock();
   }
 }
 
+// Allocation map for heap objects (de)allocated
+// while HeapProfiler::self_disable_ is true.
+// We use it to test if heap leak checking itself changed the heap state.
+// An own map seems cleaner than trying to keep everything
+// in HeapProfiler::allocation_.
+HeapProfiler::AllocationMap* self_disabled_allocation = NULL;
+
 // This is the number of bytes allocated by the first call to malloc() after
 // registering this handler.  We want to sanity check that our first call is
 // actually for this number of bytes.
@@ -519,7 +555,7 @@ void HeapProfiler::RecordAlloc(void* ptr, size_t bytes, int skip_count) {
     int i;
     for (i = 0; i < depth; i++) {
       if (stack[i] == recordalloc_reference_stack_position_) {
-        MESSAGE(-1, "Determined strip_frames_ to be %d\n", i - 1);
+        MESSAGE(1, "Determined strip_frames_ to be %d\n", i - 1);
         // Subtract one to offset the fact that
         // recordalloc_reference_stack_position_ actually records the stack
         // position one frame above the spot in EarlyStartLocked where we are
@@ -542,11 +578,22 @@ void HeapProfiler::RecordAlloc(void* ptr, size_t bytes, int skip_count) {
   // is not an overhead because with profiling off
   // this hook is not called at all.
 
-  // Uncomment for debugging:
-  // HeapProfiler::MESSAGE(7, "HeapProfiler: Alloc %p : %"PRIuS"\n",
-  //                       ptr, bytes);
+  if (kMaxLogging) {
+    HeapProfiler::MESSAGE(7, "HeapProfiler: Alloc: %p of %"PRIuS" from %d\n",
+                          ptr, bytes, int(pthread_self()));
+  }
+
+  if (self_disable_  &&  self_disabled_tid_ == pthread_self()) {
+    self_disabled_.allocs_++;
+    self_disabled_.alloc_size_ += bytes;
+    AllocValue v;
+    v.bucket = NULL;  // initialize just to make smart tools happy
+                      // (no one will read it)
+    v.bytes = bytes;
+    self_disabled_allocation->Insert(ptr, v);
+    return;
+  }
 
-  if (temp_disable_  &&  temp_disabled_tid_ == pthread_self())  return;
   HeapProfiler::Lock();
   if (is_on_) {
     Bucket* b = GetBucket(skip_count+1);
@@ -560,12 +607,17 @@ void HeapProfiler::RecordAlloc(void* ptr, size_t bytes, int skip_count) {
     v.bytes = bytes;
     allocation_->Insert(ptr, v);
 
+    if (kMaxLogging) {
+      HeapProfiler::MESSAGE(8, "HeapProfiler: Alloc Recorded: %p of %"PRIuS"\n",
+                            ptr, bytes);
+    }
+
     const int64 inuse_bytes = total_.alloc_size_ - total_.free_size_;
     if (!dumping_) {
       bool need_dump = false;
       char buf[128];
-      if(total_.alloc_size_ >=
-         last_dump + FLAGS_heap_profile_allocation_interval) {
+      if (total_.alloc_size_ >=
+          last_dump + FLAGS_heap_profile_allocation_interval) {
         snprintf(buf, sizeof(buf), "%"LLD" MB allocated",
                  total_.alloc_size_ >> 20);
         // Track that we made a "total allocation size" dump
@@ -588,29 +640,50 @@ void HeapProfiler::RecordAlloc(void* ptr, size_t bytes, int skip_count) {
   HeapProfiler::Unlock();
 }
 
-void HeapProfiler::RecordFreeLocked(void* ptr) {
-  assert(is_on_);
-  AllocValue v;
-  if (allocation_->FindAndRemove(ptr, &v)) {
-    Bucket* b = v.bucket;
-    b->frees_++;
-    b->free_size_ += v.bytes;
-    total_.frees_++;
-    total_.free_size_ += v.bytes;
-  }
-}
-
 void HeapProfiler::RecordFree(void* ptr) {
   // All activity before if (is_on_)
   // is not an overhead because with profiling turned off this hook
   // is not called at all.
 
-  // Uncomment for debugging:
-  // HeapProfiler::MESSAGE(7, "HeapProfiler: Free %p\n", ptr);
+  if (kMaxLogging) {
+    HeapProfiler::MESSAGE(7, "HeapProfiler: Free %p from %d\n",
+                          ptr, int(pthread_self()));
+  }
+
+  if (self_disable_  &&  self_disabled_tid_ == pthread_self()) {
+    AllocValue v;
+    if (self_disabled_allocation->FindAndRemove(ptr, &v)) {
+      self_disabled_.free_size_ += v.bytes;
+      self_disabled_.frees_++;
+    } else {
+      // Try to mess the counters up and fail later in
+      // HeapLeakChecker::DumpProfileLocked instead of failing right now:
+      // presently execution gets here only from within the guts
+      // of pthread library and only when being in an address space
+      // that is about to disappear completely.
+      // I.e. failing right here is wrong, but failing later if
+      // this happens in the course of normal execution is needed.
+      self_disabled_.free_size_ += 100000000;
+      self_disabled_.frees_ += 100000000;
+    }
+    return;
+  }
 
-  if (temp_disable_  &&  temp_disabled_tid_ == pthread_self())  return;
   HeapProfiler::Lock();
-  if (is_on_)  RecordFreeLocked(ptr);
+  if (is_on_) {
+    AllocValue v;
+    if (allocation_->FindAndRemove(ptr, &v)) {
+      Bucket* b = v.bucket;
+      b->frees_++;
+      b->free_size_ += v.bytes;
+      total_.frees_++;
+      total_.free_size_ += v.bytes;
+
+      if (kMaxLogging) {
+        HeapProfiler::MESSAGE(8, "HeapProfiler: Free Recorded: %p\n", ptr);
+      }
+    }
+  }
   HeapProfiler::Unlock();
 }
 
@@ -639,6 +712,10 @@ bool HeapProfiler::HaveOnHeapLocked(void** ptr, AllocValue* alloc_value) {
     // this case is to account for the array size stored inside of
     // the memory allocated by new FooClass[size] for classes with destructors
     *ptr = reinterpret_cast<char*>(*ptr) - kArraySizeOffset;
+    if (kMaxLogging) {
+      HeapProfiler::MESSAGE(7, "HeapProfiler: Got poiter into %p at +%d\n",
+                            ptr, kArraySizeOffset);
+    }
   } else if (allocation_->Find(reinterpret_cast<char*>(*ptr)
                                - kStringOffset,
                                alloc_value)  &&
@@ -647,6 +724,10 @@ bool HeapProfiler::HaveOnHeapLocked(void** ptr, AllocValue* alloc_value) {
     // newer C++ library versions when the kept pointer points to inside of
     // the allocated region
     *ptr = reinterpret_cast<char*>(*ptr) - kStringOffset;
+    if (kMaxLogging) {
+      HeapProfiler::MESSAGE(7, "HeapProfiler: Got poiter into %p at +%d\n",
+                            ptr, kStringOffset);
+    }
   } else {
     result = false;
   }
@@ -756,22 +837,13 @@ Bucket* HeapProfiler::GetBucket(int skip_count) {
 void HeapProfiler::EarlyStartLocked() {
   assert(!is_on_);
 
-  // GNU libc++ versions 3.3 and 3.4 obey the environment variables
-  // GLIBCPP_FORCE_NEW and GLIBCXX_FORCE_NEW respectively.  Setting one of
-  // these variables forces the STL default allocator to call new() or delete()
-  // for each allocation or deletion.  Otherwise the STL allocator tries to
-  // avoid the high cost of doing allocations by pooling memory internally.
-  // This STL pool makes it impossible to get an accurate heap profile.
-  // Luckily, our tcmalloc implementation gives us similar performance
-  // characteristics *and* allows to to profile accurately.
-  setenv("GLIBCPP_FORCE_NEW", "1", false /* no overwrite*/);
-  setenv("GLIBCXX_FORCE_NEW", "1", false /* no overwrite*/);
-
   heap_profiler_memory.Init();
 
   is_on_ = true;
-  if (temp_disable_) abort();
-  filename_prefix_ = NULL;
+  // we should be really turned off:
+  if (need_for_leaks_)  abort();
+  if (self_disable_)  abort();
+  if (filename_prefix_ != NULL)  abort();
 
   // Make the table
   const int table_bytes = kHashTableSize * sizeof(Bucket*);
@@ -807,13 +879,15 @@ void HeapProfiler::EarlyStartLocked() {
 
   MallocHook::SetDeleteHook(DeleteHook);
 
-  HeapProfiler::MESSAGE(0, "HeapProfiler: Starting heap tracking\n");
+  HeapProfiler::MESSAGE(1, "HeapProfiler: Starting heap tracking\n");
 }
 
 void HeapProfiler::StartLocked(const char* prefix) {
-  assert(filename_prefix_ == NULL);
+  if (filename_prefix_ != NULL) return;
 
-  if (!is_on_) EarlyStartLocked();
+  if (!is_on_) {
+    EarlyStartLocked();
+  }
 
   // Copy filename prefix
   const int prefix_length = strlen(prefix);
@@ -823,7 +897,14 @@ void HeapProfiler::StartLocked(const char* prefix) {
 }
 
 void HeapProfiler::StopLocked() {
-  assert(is_on_);
+  if (!is_on_) return;
+
+  filename_prefix_ = NULL;
+
+  if (need_for_leaks_)  return;
+
+  // Turn us off completely:
+
   MallocHook::SetNewHook(NULL);
   MallocHook::SetDeleteHook(NULL);
 
@@ -831,22 +912,43 @@ void HeapProfiler::StopLocked() {
   heap_profiler_memory.Clear();
 
   table             = NULL;
-  filename_prefix_  = NULL;
   allocation_       = NULL;
   is_on_            = false;
 }
 
+void HeapProfiler::StartForLeaks() {
+  Lock();
+
+  if (!is_on_) {
+    EarlyStartLocked();  // fire-up HeapProfiler hooks
+  }
+  need_for_leaks_ = true;
+
+  memset(&self_disabled_, 0, sizeof(self_disabled_));  // zero the counters
+
+  // Make allocation map for self-disabled allocations
+  void* aptr = Malloc(sizeof(AllocationMap));
+  self_disabled_allocation = new (aptr) AllocationMap(Malloc, Free);
+
+  Unlock();
+}
+
+void HeapProfiler::StopForLeaks() {
+  Lock();
+  need_for_leaks_ = false;
+  if (filename_prefix_ == NULL) StopLocked();
+  Unlock();
+}
+
 void HeapProfilerStart(const char* prefix) {
   HeapProfiler::Lock();
-  if (HeapProfiler::filename_prefix_ == NULL) {
-    HeapProfiler::StartLocked(prefix);
-  }
+  HeapProfiler::StartLocked(prefix);
   HeapProfiler::Unlock();
 }
 
 void HeapProfilerStop() {
   HeapProfiler::Lock();
-  if (HeapProfiler::is_on_) HeapProfiler::StopLocked();
+  HeapProfiler::StopLocked();
   HeapProfiler::Unlock();
 }
 
@@ -854,34 +956,78 @@ void HeapProfilerStop() {
 // Initialization/finalization code
 //----------------------------------------------------------------------
 
-// helper function for HeapProfiler::Init()
-inline static bool GlobOk(int r) {
-  return r == 0 || r == GLOB_NOMATCH;
-}
-
 // Initialization code
 void HeapProfiler::Init() {
+  // depending on the ordering of the global constructors (undefined
+  // according to the C++ spec, HeapProfiler::Init() can either be
+  // called from this file directly, or from heap-checker.cc's global
+  // constructor if it gets run first.  Either way is fine by us; we
+  // just want to be sure not to run twice.
+  if (init_has_been_called_)  return;  // we were already run, I guess
+  init_has_been_called_ = true;
+
+  // We want to make sure tcmalloc is set up properly, in order to
+  // profile as much as we can.
+  MallocExtension::Initialize();
+
   if (FLAGS_mmap_profile || FLAGS_mmap_log) {
     MallocHook::SetMmapHook(MmapHook);
     MallocHook::SetMunmapHook(MunmapHook);
   }
 
-  if (FLAGS_heap_profile.empty()) return;
+  // Everything after this point is for setting up the profiler based on envvar
 
-  // Cleanup any old profile files
-  string pattern = FLAGS_heap_profile + ".[0-9][0-9][0-9][0-9].heap";
+  char* heapprofile = getenv("HEAPPROFILE");
+  if (!heapprofile || heapprofile[0] == '\0') {
+    return;
+  }
+  // We do a uid check so we don't write out files in a setuid executable.
+  if (getuid() != geteuid()) {
+    HeapProfiler::MESSAGE(0, ("HeapProfiler: ignoring HEAPPROFILE because "
+                              "program seems to be setuid\n"));
+    return;
+  }
+
+  // If we're a child process of the 'main' process, we can't just use
+  // the name HEAPPROFILE -- the parent process will be using that.
+  // Instead we append our pid to the name.  How do we tell if we're a
+  // child process?  Ideally we'd set an environment variable that all
+  // our children would inherit.  But -- and perhaps this is a bug in
+  // gcc -- if you do a setenv() in a shared libarary in a global
+  // constructor, the environment setting is lost by the time main()
+  // is called.  The only safe thing we can do in such a situation is
+  // to modify the existing envvar.  So we do a hack: in the parent,
+  // we set the high bit of the 1st char of HEAPPROFILE.  In the child,
+  // we notice the high bit is set and append the pid().  This works
+  // assuming cpuprofile filenames don't normally have the high bit
+  // set in their first character!  If that assumption is violated,
+  // we'll still get a profile, but one with an unexpected name.
+  // TODO(csilvers): set an envvar instead when we can do it reliably.
+  char fname[PATH_MAX];
+  if (heapprofile[0] & 128) {                   // high bit is set
+    snprintf(fname, sizeof(fname), "%c%s_%u",   // add pid and clear high bit
+             heapprofile[0] & 127, heapprofile+1, (unsigned int)(getpid()));
+  } else {
+    snprintf(fname, sizeof(fname), "%s", heapprofile);
+    heapprofile[0] |= 128;                      // set high bit for kids to see
+  }
+
+  CleanupProfiles(fname);
+
+  HeapProfilerStart(fname);
+}
+
+void HeapProfiler::CleanupProfiles(const char* prefix) {
+  string pattern(prefix);
+  pattern += ".*.heap";
   glob_t g;
   const int r = glob(pattern.c_str(), GLOB_ERR, NULL, &g);
-  pattern = FLAGS_heap_profile + ".*-beg.heap";
-  const int r2 = glob(pattern.c_str(), GLOB_ERR|GLOB_APPEND, NULL, &g);
-  pattern = FLAGS_heap_profile + ".*-end.heap";
-  const int r3 = glob(pattern.c_str(), GLOB_ERR|GLOB_APPEND, NULL, &g);
-  if (GlobOk(r) && GlobOk(r2) && GlobOk(r3)) {
-    const int prefix_length = FLAGS_heap_profile.size();
+  if (r == 0 || r == GLOB_NOMATCH) {
+    const int prefix_length = strlen(prefix);
     for (int i = 0; i < g.gl_pathc; i++) {
       const char* fname = g.gl_pathv[i];
       if ((strlen(fname) >= prefix_length) &&
-          (memcmp(fname, FLAGS_heap_profile.data(), prefix_length) == 0)) {
+          (memcmp(fname, prefix, prefix_length) == 0)) {
         HeapProfiler::MESSAGE(0, "HeapProfiler: "
                               "Removing old profile %s\n", fname);
         unlink(fname);
@@ -889,8 +1035,6 @@ void HeapProfiler::Init() {
     }
   }
   globfree(&g);
-
-  HeapProfilerStart(FLAGS_heap_profile.c_str());
 }
 
 // class used for finalization -- dumps the heap-profile at program exit
diff --git a/src/internal_logging.h b/src/internal_logging.h
index b5a721e..10d8502 100644
--- a/src/internal_logging.h
+++ b/src/internal_logging.h
@@ -35,7 +35,7 @@
 #ifndef TCMALLOC_INTERNAL_LOGGING_H__
 #define TCMALLOC_INTERNAL_LOGGING_H__
 
-#include "google/perftools/config.h"
+#include "config.h"
 #include <stdarg.h>
 #include <stdlib.h>
 #include <unistd.h>
diff --git a/src/internal_spinlock.h b/src/internal_spinlock.h
index 75e3bba..2015763 100644
--- a/src/internal_spinlock.h
+++ b/src/internal_spinlock.h
@@ -33,7 +33,7 @@
 #ifndef TCMALLOC_INTERNAL_SPINLOCK_H__
 #define TCMALLOC_INTERNAL_SPINLOCK_H__
 
-#include "google/perftools/config.h"
+#include "config.h"
 #include <time.h>       /* For nanosleep() */
 #include <sched.h>      /* For sched_yield() */
 #if defined HAVE_STDINT_H
diff --git a/src/malloc_interface.cc b/src/malloc_extension.cc
index eca3459..8ca58a7 100644
--- a/src/malloc_interface.cc
+++ b/src/malloc_extension.cc
@@ -30,7 +30,7 @@
 // ---
 // Author: Sanjay Ghemawat <opensource@google.com>
 
-#include "google/perftools/config.h"
+#include "config.h"
 #include <assert.h>
 #include <string.h>
 #include <pthread.h>
@@ -44,31 +44,59 @@
 #endif
 #include <string>
 #include "google/perftools/hash_set.h"
-#include "google/malloc_interface.h"
+#include "google/malloc_extension.h"
+#include "maybe_threads.h"
 
 using STL_NAMESPACE::string;
 
+// Note: this routine is meant to be called before threads are spawned.
+void MallocExtension::Initialize() {
+  static bool initialize_called = false;
+
+  if (initialize_called) return;
+  initialize_called = true;
+
+  // GNU libc++ versions 3.3 and 3.4 obey the environment variables
+  // GLIBCPP_FORCE_NEW and GLIBCXX_FORCE_NEW respectively.  Setting
+  // one of these variables forces the STL default allocator to call
+  // new() or delete() for each allocation or deletion.  Otherwise
+  // the STL allocator tries to avoid the high cost of doing
+  // allocations by pooling memory internally.  However, tcmalloc
+  // does allocations really fast, especially for the types of small
+  // items one sees in STL, so it's better off just using us.
+  // TODO: control whether we do this via an environment variable?
+  setenv("GLIBCPP_FORCE_NEW", "1", false /* no overwrite*/);
+  setenv("GLIBCXX_FORCE_NEW", "1", false /* no overwrite*/);
+
+  // Now we need to make the setenv 'stick', which it may not do since
+  // the env is flakey before main() is called.  But luckily stl only
+  // looks at this env var the first time it tries to do an alloc, and
+  // caches what it finds.  So we just cause an stl alloc here.
+  string dummy("I need to be allocated");
+  dummy += "!";         // so the definition of dummy isn't optimized out
+}
+
 // Default implementation -- does nothing
-MallocInterface::~MallocInterface() { }
-bool MallocInterface::VerifyAllMemory() { return true; }
-bool MallocInterface::VerifyNewMemory(void* p) { return true; }
-bool MallocInterface::VerifyArrayNewMemory(void* p) { return true; }
-bool MallocInterface::VerifyMallocMemory(void* p) { return true; }
+MallocExtension::~MallocExtension() { }
+bool MallocExtension::VerifyAllMemory() { return true; }
+bool MallocExtension::VerifyNewMemory(void* p) { return true; }
+bool MallocExtension::VerifyArrayNewMemory(void* p) { return true; }
+bool MallocExtension::VerifyMallocMemory(void* p) { return true; }
 
-bool MallocInterface::GetNumericProperty(const char* property, size_t* value) {
+bool MallocExtension::GetNumericProperty(const char* property, size_t* value) {
   return false;
 }
 
-bool MallocInterface::SetNumericProperty(const char* property, size_t value) {
+bool MallocExtension::SetNumericProperty(const char* property, size_t value) {
   return false;
 }
 
-void MallocInterface::GetStats(char* buffer, int length) {
+void MallocExtension::GetStats(char* buffer, int length) {
   assert(length > 0);
   buffer[0] = '\0';
 }
 
-bool MallocInterface::MallocMemoryStats(int* blocks, size_t* total,
+bool MallocExtension::MallocMemoryStats(int* blocks, size_t* total,
                                        int histogram[kMallocHistogramSize]) {
   *blocks = 0;
   *total = 0;
@@ -76,30 +104,30 @@ bool MallocInterface::MallocMemoryStats(int* blocks, size_t* total,
   return true;
 }
 
-void** MallocInterface::ReadStackTraces() {
+void** MallocExtension::ReadStackTraces() {
   return NULL;
 }
 
-// The current malloc interface object.  We also keep a pointer to
+// The current malloc extension object.  We also keep a pointer to
 // the default implementation so that the heap-leak checker does not
 // complain about a memory leak.
 
 static pthread_once_t module_init = PTHREAD_ONCE_INIT;
-static MallocInterface* default_instance = NULL;
-static MallocInterface* current_instance = NULL;
+static MallocExtension* default_instance = NULL;
+static MallocExtension* current_instance = NULL;
 
 static void InitModule() {
-  default_instance = new MallocInterface;
+  default_instance = new MallocExtension;
   current_instance = default_instance;
 }
 
-MallocInterface* MallocInterface::instance() {
-  pthread_once(&module_init, InitModule);
+MallocExtension* MallocExtension::instance() {
+  perftools_pthread_once(&module_init, InitModule);
   return current_instance;
 }
 
-void MallocInterface::Register(MallocInterface* implementation) {
-  pthread_once(&module_init, InitModule);
+void MallocExtension::Register(MallocExtension* implementation) {
+  perftools_pthread_once(&module_init, InitModule);
   current_instance = implementation;
 }
 
@@ -127,7 +155,7 @@ void* PC(void** entry, int i) {
 struct StackTraceHash {
   size_t operator()(void** entry) const {
     uintptr_t h = 0;
-    for (int i = 0; i < Depth(entry); i++) {
+    for (unsigned int i = 0; i < Depth(entry); i++) {
       uintptr_t pc = reinterpret_cast<uintptr_t>(PC(entry, i));
       h = (h << 8) | (h >> (8*(sizeof(h)-1)));
       h += (pc * 31) + (pc * 7) + (pc * 3);
@@ -157,7 +185,7 @@ void DebugStringWriter(const char* str, void* arg) {
 
 }
 
-void MallocInterface::GetHeapSample(string* result) {
+void MallocExtension::GetHeapSample(string* result) {
   void** entries = ReadStackTraces();
   if (entries == NULL) {
     *result += "this malloc implementation does not support sampling\n";
diff --git a/src/malloc_hook.cc b/src/malloc_hook.cc
index 16710ee..a238fd1 100644
--- a/src/malloc_hook.cc
+++ b/src/malloc_hook.cc
@@ -31,7 +31,7 @@
 // Author: Sanjay Ghemawat <opensource@google.com>
 
 #include <google/malloc_hook.h>
-#include <google/perftools/basictypes.h>
+#include "base/basictypes.h"
 
 MallocHook::NewHook    MallocHook::new_hook_ = NULL;
 MallocHook::DeleteHook MallocHook::delete_hook_ = NULL;
@@ -47,21 +47,62 @@ MallocHook::MunmapHook MallocHook::munmap_hook_ = NULL;
 #include <sys/mman.h>
 #include <errno.h>
 
-extern "C" void* mmap(void *start, size_t length,
-                      int prot, int flags, 
-                      int fd, off_t offset) __THROW {
-  // Old syscall interface cannot handle six args, so pass in an array
-  int32 args[6] = { (int32) start, length, prot, flags, fd, (off_t) offset };
-  void* result = (void *)syscall(SYS_mmap, args);
-  MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset);
-  return result;
-}
-  
+// This somewhat reimplements libc's mmap syscall stubs. Unfortunately
+// libc only exports the stubs via weak symbols (which we're
+// overriding with our mmap64() and mmap() wrappers) so we can't just
+// call through to them.
 extern "C" void* mmap64(void *start, size_t length,
                         int prot, int flags, 
                         int fd, __off64_t offset) __THROW {
-  // TODO: Use 64 bit mmap2 system call if kernel is new enough
-  return mmap(start, length, prot, flags, fd, static_cast<off_t>(offset));
+
+  void *result;
+
+  // Try mmap2() unless it's not supported
+  static bool have_mmap2 = true;
+  if (have_mmap2) {
+    static int pagesize = 0;
+    if (!pagesize) pagesize = getpagesize();
+
+    // Check that the offset is page aligned
+    if (offset & (pagesize - 1)) {
+      result = MAP_FAILED;
+      errno = EINVAL;
+      goto out;
+    }
+
+    result = (void *)syscall(SYS_mmap2, 
+                             start, length, prot, flags, fd, offset / pagesize);
+    if (result != MAP_FAILED || errno != ENOSYS)  goto out;
+
+    // We don't have mmap2() after all - don't bother trying it in future
+    have_mmap2 = false;
+  }
+
+  if (((off_t)offset) != offset) {
+    // If we're trying to map a 64-bit offset, fail now since we don't
+    // have 64-bit mmap() support.
+    result = MAP_FAILED;
+    errno = EINVAL;
+    goto out;
+  }
+
+  {
+    // Fall back to old 32-bit offset mmap() call
+    // Old syscall interface cannot handle six args, so pass in an array
+    int32 args[6] = { (int32) start, length, prot, flags, fd, (off_t) offset };
+    result = (void *)syscall(SYS_mmap, args);
+  }
+ out:
+  MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset);
+  return result;
+
+}
+
+extern "C" void* mmap(void *start, size_t length,
+                      int prot, int flags, 
+                      int fd, off_t offset) __THROW {
+  return mmap64(start, length, prot, flags, fd, 
+                static_cast<size_t>(offset)); // avoid sign extension
 }
 
 extern "C" int munmap(void* start, size_t length) __THROW {
diff --git a/src/maybe_threads.cc b/src/maybe_threads.cc
new file mode 100644
index 0000000..38fd25f
--- /dev/null
+++ b/src/maybe_threads.cc
@@ -0,0 +1,94 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Paul Menage <opensource@google.com>
+
+//-------------------------------------------------------------------
+// Some wrappers for pthread functions so that we can be LD_PRELOADed
+// against non-pthreads apps.
+//-------------------------------------------------------------------
+
+#include "config.h"
+#include <assert.h>
+#include <pthread.h>
+// We don't actually need strings. But including this header seems to
+// stop the compiler trying to short-circuit our pthreads existence
+// tests and claiming that the address of a function is always
+// non-zero. I have no idea why ...
+#include <string>
+#include "maybe_threads.h"
+
+#define MAX_PERTHREAD_VALS 16
+static void *perftools_pthread_specific_vals[MAX_PERTHREAD_VALS];
+static pthread_key_t next_key;
+
+// This module will behave very strangely if some pthreads functions
+// exist and others don't
+
+int perftools_pthread_key_create(pthread_key_t *key,  
+                                 void (*destr_function) (void *)) {
+  if (pthread_key_create) {
+    return pthread_key_create(key, destr_function);
+  } else {
+    assert(next_key < MAX_PERTHREAD_VALS);
+    *key = next_key++;
+    return 0;
+  }
+}
+
+void *perftools_pthread_getspecific(pthread_key_t key) { 
+  if (pthread_getspecific) {
+    return pthread_getspecific(key);
+  } else {
+    return perftools_pthread_specific_vals[key];
+  }
+}
+
+int perftools_pthread_setspecific(pthread_key_t key, void *val) {
+  if (pthread_setspecific) {
+    return pthread_setspecific(key, val);
+  } else {
+    perftools_pthread_specific_vals[key] = val;
+    return 0;
+  }
+}
+
+int perftools_pthread_once(pthread_once_t *ctl,  
+                          void  (*init_routine) (void)) {
+  if (pthread_once) {
+    return pthread_once(ctl, init_routine);
+  } else {
+    if (*ctl == PTHREAD_ONCE_INIT) {
+      init_routine();
+      *ctl = 1;
+    }
+    return 0;
+  }
+}
diff --git a/src/maybe_threads.h b/src/maybe_threads.h
new file mode 100644
index 0000000..76b6a65
--- /dev/null
+++ b/src/maybe_threads.h
@@ -0,0 +1,45 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Paul Menage <opensource@google.com>
+
+//-------------------------------------------------------------------
+// Some wrappers for pthread functions so that we can be LD_PRELOADed
+// against non-pthreads apps.
+//-------------------------------------------------------------------
+
+#include <pthread.h>
+
+int perftools_pthread_key_create(pthread_key_t *key,  
+                                 void (*destr_function) (void *));
+void *perftools_pthread_getspecific(pthread_key_t key);
+int perftools_pthread_setspecific(pthread_key_t key, void *val);
+int perftools_pthread_once(pthread_once_t *ctl,  
+                           void  (*init_routine) (void));
diff --git a/src/pagemap.h b/src/pagemap.h
index 6fcddd7..326d970 100644
--- a/src/pagemap.h
+++ b/src/pagemap.h
@@ -45,7 +45,7 @@
 #ifndef TCMALLOC_PAGEMAP_H__
 #define TCMALLOC_PAGEMAP_H__
 
-#include "google/perftools/config.h"
+#include "config.h"
 #if defined HAVE_STDINT_H
 #include <stdint.h>
 #elif defined HAVE_INTTYPES_H
diff --git a/src/pprof b/src/pprof
index 1aa5544..494235d 100755
--- a/src/pprof
+++ b/src/pprof
@@ -75,6 +75,7 @@ my $OBJDUMP = "/usr/bin/objdump";
 my $NM = "/usr/bin/nm";
 my $ADDR2LINE = "/usr/bin/addr2line";
 my $DOT = "dot";          # leave non-absolute, since it may be in /usr/local
+my $GV = "gv";
 
 ##### Argument parsing #####
 
@@ -138,9 +139,14 @@ EOF
 
 sub version_string {
   return <<'EOF'
-pprof (part of google-perftools)
+pprof (part of google-perftools 0.1)
 
-Copyright (c) 2005 Google Inc.
+Copyright 1998-2005 Google Inc.
+
+This is BSD licensed software; see the source for copying conditions
+and license information.
+There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.
 EOF
 }
 
@@ -350,7 +356,11 @@ if ($main::opt_disasm) {
 } else {
   PrintDot($prog, $symbols, $profile, $flat, $cumulative, $total);
   if ($main::opt_gv) {
-    system("gv -scale $main::opt_scale $main::tmpfile_ps");
+    # Some versions of gv use -scale, and some use --scale.  *sigh*
+    # We use --help to determine if gv expects one dash or two.
+    system("$GV --help >/dev/null 2>&1 " .
+	   "&& $GV --scale=$main::opt_scale $main::tmpfile_ps " .
+	   "|| $GV -scale $main::opt_scale $main::tmpfile_ps")
   }
 }
 
@@ -1068,7 +1078,7 @@ sub ReadCPUProfile {
 
   # Read entire profile into a string
   my $str;
-  my $nbytes = read(PROFILE, $str, 100000000);
+  my $nbytes = read(PROFILE, $str, (stat PROFILE)[7]);   # read entire file
   close(PROFILE);
 
   # Parse string into array of slots.
@@ -1208,7 +1218,7 @@ sub ParseLibraries {
     my $finish;
     my $offset;
     my $lib;
-    if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.so(\.\d+)*)/) {
+    if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.so(\.\d+)*\w*)/) {
       # Full line from /proc/self/maps.  Example:
       #   40000000-40015000 r-xp 00000000 03:01 12845071   /lib/ld-2.3.2.so
       $start = hex($1);
diff --git a/src/profiler.cc b/src/profiler.cc
index 1622795..2597917 100644
--- a/src/profiler.cc
+++ b/src/profiler.cc
@@ -32,7 +32,7 @@
 //
 // Profile current program by sampling stack-trace every so often
 
-#include "google/perftools/config.h"
+#include "config.h"
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -59,6 +59,14 @@
 #endif
 #include "base/logging.h"
 
+#ifndef	PATH_MAX
+#ifdef MAXPATHLEN
+#define	PATH_MAX	MAXPATHLEN
+#else
+#define	PATH_MAX	4096         // seems conservative for max filename len!
+#endif
+#endif
+
 #if HAVE_PTHREAD
 #  include <pthread.h>
 #  define LOCK(m) pthread_mutex_lock(m)
@@ -97,6 +105,12 @@ inline void* GetPC(const SigStructure& sig_structure ) {
   return (void*)sig_structure.eip;
 }
 
+#elif defined HAVE_STRUCT_SIGCONTEXT_RIP
+typedef struct sigcontext SigStructure;
+inline void* GetPC(const SigStructure& sig_structure ) {
+  return (void*)sig_structure.rip;
+}
+
 #elif defined HAVE_STRUCT_SIGCONTEXT_SC_IP
 typedef struct sigcontext SigStructure;
 inline void* GetPC(const SigStructure& sig_structure ) {
@@ -228,7 +242,7 @@ ProfileData::ProfileData() :
 
   // Get frequency of interrupts (if specified)
   char junk;
-  const char* fr = getenv("FREQUENCY");
+  const char* fr = getenv("PROFILEFREQUENCY");
   if (fr != NULL && (sscanf(fr, "%d%c", &frequency_, &junk) == 1) &&
       (frequency_ > 0)) {
     // Limit to kMaxFrequency
@@ -238,14 +252,40 @@ ProfileData::ProfileData() :
   }
 
   // Should profiling be enabled?
-  const char* fname = getenv("CPUPROFILE");
-  if (fname == 0) {
+  char* cpuprofile = getenv("CPUPROFILE");
+  if (!cpuprofile || cpuprofile[0] == '\0') {
     return;
   }
   // We don't enable profiling if setuid -- it's a security risk
   if (getuid() != geteuid())
     return;
 
+  // If we're a child process of the 'main' process, we can't just use
+  // the name CPUPROFILE -- the parent process will be using that.
+  // Instead we append our pid to the name.  How do we tell if we're a
+  // child process?  Ideally we'd set an environment variable that all
+  // our children would inherit.  But -- and perhaps this is a bug in
+  // gcc -- if you do a setenv() in a shared libarary in a global
+  // constructor, the environment setting is lost by the time main()
+  // is called.  The only safe thing we can do in such a situation is
+  // to modify the existing envvar.  So we do a hack: in the parent,
+  // we set the high bit of the 1st char of CPUPROFILE.  In the child,
+  // we notice the high bit is set and append the pid().  This works
+  // assuming cpuprofile filenames don't normally have the high bit
+  // set in their first character!  If that assumption is violated,
+  // we'll still get a profile, but one with an unexpected name.
+  // TODO(csilvers): set an envvar instead when we can do it reliably.
+  char fname[PATH_MAX];
+  if (cpuprofile[0] & 128) {                    // high bit is set
+    snprintf(fname, sizeof(fname), "%c%s_%u",   // add pid and clear high bit
+             cpuprofile[0] & 127, cpuprofile+1, (unsigned int)(getpid()));
+  } else {
+    snprintf(fname, sizeof(fname), "%s", cpuprofile);
+    cpuprofile[0] |= 128;                       // set high bit for kids to see
+  }
+
+  // process being profiled.  CPU profiles are messed up in that case.
+
   if (!Start(fname)) {
     fprintf(stderr, "Can't turn on cpu profiling: ");
     perror(fname); 
diff --git a/src/stacktrace.cc b/src/stacktrace.cc
index 66a47a5..07fd321 100644
--- a/src/stacktrace.cc
+++ b/src/stacktrace.cc
@@ -32,7 +32,8 @@
 //
 // Produce stack trace
 
-#include "google/perftools/config.h"
+#include "config.h"
+#include <stdlib.h>
 #include "google/stacktrace.h"
 
 #undef IMPLEMENTED_STACK_TRACE
@@ -43,6 +44,10 @@
     defined(__linux) && !defined(NO_FRAME_POINTER) && !defined(_LP64)
 #define IMPLEMENTED_STACK_TRACE
 
+#include <stdint.h>   // for uintptr_t
+
+// Note: the code for GetStackExtent below is pretty similar to this one;
+//       change both if chaning one.
 int GetStackTrace(void** result, int max_depth, int skip_count) {
   void **sp;
 #ifdef __i386__
@@ -71,13 +76,56 @@ int GetStackTrace(void** result, int max_depth, int skip_count) {
     void** new_sp = (void**) *sp;
 
     // A little bit of sanity checking to avoid crashes
-    if (new_sp < sp || new_sp > sp + 100000) {
+    if (new_sp < sp ||
+        (uintptr_t)new_sp - (uintptr_t)sp > 100000) {
       break;
     }
     sp = new_sp;
   }
   return n;
 }
+
+// Note: the code is pretty similar to GetStackTrace above;
+//       change both if chaning one.
+bool GetStackExtent(void* sp,  void** stack_top, void** stack_bottom) {
+  void** cur_sp;
+
+  if (sp != NULL) {
+    cur_sp = (void**)sp;
+    *stack_top = sp;
+  } else {
+#ifdef __i386__
+    // Stack frame format:
+    //    sp[0]   pointer to previous frame
+    //    sp[1]   caller address
+    //    sp[2]   first argument
+    //    ...
+    cur_sp = (void**)&sp - 2;
+#endif
+
+#ifdef __x86_64__
+    // Arguments are passed in registers on x86-64, so we can't just
+    // offset from &result
+    cur_sp = (void**)__builtin_frame_address(0);
+#endif
+    *stack_top = NULL;
+  }
+
+  while (cur_sp) {
+    void** new_sp = (void**)*cur_sp;
+    // A little bit of sanity checking to avoid crashes
+    if (new_sp < cur_sp ||
+        (uintptr_t)new_sp - (uintptr_t)cur_sp > 100000) {
+      *stack_bottom = (void*)cur_sp;
+      return true;
+    }
+    cur_sp = new_sp;
+    if (*stack_top == NULL)  *stack_top = (void*)cur_sp;
+      // get out of the stack frame for this call
+  }
+  return false;
+}
+
 #endif
 
 // Portable implementation - just use glibc
@@ -89,7 +137,7 @@ int GetStackTrace(void** result, int max_depth, int skip_count) {
   static const int kStackLength = 64;
   void * stack[kStackLength];
   int size;
-  
+
   size = backtrace(stack, kStackLength);
   skip_count++;  // we want to skip the current frame as well
   int result_count = size - skip_count;
@@ -103,6 +151,11 @@ int GetStackTrace(void** result, int max_depth, int skip_count) {
 
   return result_count;
 }
+
+bool GetStackExtent(void* sp,  void** stack_bottom, void** stack_top) {
+  return false;  // can't climb up
+}
+
 #endif
 
 #if !defined(IMPLEMENTED_STACK_TRACE) && !defined(HAVE_EXECINFO_H)
diff --git a/src/system-alloc.cc b/src/system-alloc.cc
index 2476d83..40f0046 100644
--- a/src/system-alloc.cc
+++ b/src/system-alloc.cc
@@ -30,7 +30,7 @@
 // ---
 // Author: Sanjay Ghemawat
 
-#include "google/perftools/config.h"
+#include "config.h"
 #if defined HAVE_STDINT_H
 #include <stdint.h>
 #elif defined HAVE_INTTYPES_H
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index d34d477..80bb6d9 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -62,7 +62,7 @@
 // * allocation of a reasonably complicated struct
 //   goes from about 1100 ns to about 300 ns.
 
-#include "google/perftools/config.h"
+#include "config.h"
 #include <new>
 #include <stdio.h>
 #include <stddef.h>
@@ -80,12 +80,13 @@
 #include <errno.h>
 #include <stdarg.h>
 #include "google/malloc_hook.h"
-#include "google/malloc_interface.h"
+#include "google/malloc_extension.h"
 #include "google/stacktrace.h"
 #include "internal_logging.h"
 #include "internal_spinlock.h"
 #include "pagemap.h"
 #include "system-alloc.h"
+#include "maybe_threads.h"
 
 #if defined HAVE_INTTYPES_H
 #define __STDC_FORMAT_MACROS
@@ -1240,11 +1241,11 @@ void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl) {
   SpinLockHolder h(&src->lock_);
   for (int i = 0; i < kNumObjectsToMove; i++) {
     void* object = src->Remove();
-    if (object == NULL) {
+   if (object == NULL) {
       if (i == 0) {
         src->Populate();        // Temporarily releases src->lock_
         object = src->Remove();
-      }
+     }
       if (object == NULL) {
         break;
       }
@@ -1297,7 +1298,7 @@ inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() {
   if (!tsd_inited) {
     InitModule();
   } else {
-    ptr = pthread_getspecific(heap_key);
+    ptr = perftools_pthread_getspecific(heap_key);
   }
   if (ptr == NULL) ptr = CreateCacheIfNecessary();
   return reinterpret_cast<TCMalloc_ThreadCache*>(ptr);
@@ -1308,7 +1309,8 @@ inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() {
 // already cleaned up the cache for this thread.
 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() {
   if (!tsd_inited) return NULL;
-  return reinterpret_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key));
+  return reinterpret_cast<TCMalloc_ThreadCache*>
+    (perftools_pthread_getspecific(heap_key));
 }
 
 void TCMalloc_ThreadCache::PickNextSample() {
@@ -1344,11 +1346,9 @@ void TCMalloc_ThreadCache::InitModule() {
   }
 }
 
-
-
 void TCMalloc_ThreadCache::InitTSD() {
   ASSERT(!tsd_inited);
-  pthread_key_create(&heap_key, DeleteCache);
+  perftools_pthread_key_create(&heap_key, DeleteCache);
   tsd_inited = true;
     
   // We may have used a fake pthread_t for the main thread.  Fix it.
@@ -1405,7 +1405,7 @@ void* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
   // linked list of heaps.
   if (!heap->setspecific_ && tsd_inited) {
     heap->setspecific_ = true;
-    pthread_setspecific(heap_key, heap);
+    perftools_pthread_setspecific(heap_key, heap);
   }
   return heap;
 }
@@ -1592,7 +1592,7 @@ static void** DumpStackTraces() {
 }
 
 // TCMalloc's support for extra malloc interfaces
-class TCMallocImplementation : public MallocInterface {
+class TCMallocImplementation : public MallocExtension {
  public:
   virtual void GetStats(char* buffer, int buffer_length) {
     ASSERT(buffer_length > 0);
@@ -1668,40 +1668,6 @@ class TCMallocImplementation : public MallocInterface {
 };
 
 
-
-// The constructor allocates an object to ensure that initialization
-// runs before main(), and therefore we do not have a chance to become
-// multi-threaded before initialization.  We also create the TSD key
-// here.  Presumably by the time this constructor runs, glibc is in
-// good enough shape to handle pthread_key_create().
-//
-// The destructor prints stats when the program exits.
-
-class TCMallocGuard {
- public:
-  TCMallocGuard() {
-    char *envval;
-    if ((envval = getenv("TCMALLOC_DEBUG"))) {
-      TCMallocDebug::level = atoi(envval);
-      MESSAGE("Set tcmalloc debugging level to %d\n", TCMallocDebug::level);
-    }
-    free(malloc(1));
-    TCMalloc_ThreadCache::InitTSD();
-    free(malloc(1));
-    MallocInterface::Register(new TCMallocImplementation);
-  }
-
-  ~TCMallocGuard() {
-    const char* env = getenv("MALLOCSTATS");
-    if (env != NULL) {
-      int level = atoi(env);
-      if (level < 1) level = 1;
-      PrintStats(level);
-    }
-  }
-};
-static TCMallocGuard module_enter_exit_hook;
-
 //-------------------------------------------------------------------
 // Helpers for the exported routines below
 //-------------------------------------------------------------------
@@ -1857,6 +1823,48 @@ static void* do_memalign(size_t align, size_t size) {
   return reinterpret_cast<void*>(span->start << kPageShift);
 }
 
+
+
+// The constructor allocates an object to ensure that initialization
+// runs before main(), and therefore we do not have a chance to become
+// multi-threaded before initialization.  We also create the TSD key
+// here.  Presumably by the time this constructor runs, glibc is in
+// good enough shape to handle pthread_key_create().
+//
+// The constructor also takes the opportunity to tell STL to use
+// tcmalloc.  We want to do this early, before construct time, so
+// all user STL allocations go through tcmalloc (which works really
+// well for STL).
+//
+// The destructor prints stats when the program exits.
+
+class TCMallocGuard {
+ public:
+  TCMallocGuard() {
+    char *envval;
+    if ((envval = getenv("TCMALLOC_DEBUG"))) {
+      TCMallocDebug::level = atoi(envval);
+      MESSAGE("Set tcmalloc debugging level to %d\n", TCMallocDebug::level);
+    }
+    do_free(do_malloc(1));
+    TCMalloc_ThreadCache::InitTSD();
+    do_free(do_malloc(1));
+    MallocExtension::Register(new TCMallocImplementation);
+  }
+
+  ~TCMallocGuard() {
+    const char* env = getenv("MALLOCSTATS");
+    if (env != NULL) {
+      int level = atoi(env);
+      if (level < 1) level = 1;
+      PrintStats(level);
+    }
+  }
+};
+
+static TCMallocGuard module_enter_exit_hook;
+
+
 //-------------------------------------------------------------------
 // Exported routines
 //-------------------------------------------------------------------
diff --git a/src/tests/addressmap_unittest.cc b/src/tests/addressmap_unittest.cc
index 555ce9c..0636dc3 100644
--- a/src/tests/addressmap_unittest.cc
+++ b/src/tests/addressmap_unittest.cc
@@ -31,6 +31,7 @@
 // Author: Sanjay Ghemawat
 
 #include <vector>
+#include <set>
 #include <algorithm>
 #include "addressmap-inl.h"
 #include "base/logging.h"
@@ -39,9 +40,17 @@
 DEFINE_int32(iters, 20, "Number of test iterations");
 DEFINE_int32(N, 100000,  "Number of elements to test per iteration");
 
+using std::pair;
+using std::make_pair;
 using std::vector;
 using std::random_shuffle;
 
+static std::set<pair<void*, int> > check_set;
+
+static void SetCheckCallback(void* ptr, int val) {
+  check_set.insert(make_pair(ptr, val));
+}
+
 int main(int argc, char** argv) {
   // Get a bunch of pointers
   const int N = FLAGS_N;
@@ -96,11 +105,15 @@ int main(int argc, char** argv) {
     }
 
     // Check all entries
+    map.Iterate(SetCheckCallback);
+    CHECK_EQ(check_set.size(), N);
     for (int i = 0; i < N; ++i) {
       void* p = ptrs[i];
+      check_set.erase(make_pair(p, i + 2*N));
       CHECK(map.Find(p, &result));
       CHECK_EQ(result, i + 2*N);
     }
+    CHECK_EQ(check_set.size(), 0);
 
   }
 
diff --git a/src/tests/heap-checker-death_unittest.sh b/src/tests/heap-checker-death_unittest.sh
index 1f58173..c4f76d6 100755
--- a/src/tests/heap-checker-death_unittest.sh
+++ b/src/tests/heap-checker-death_unittest.sh
@@ -33,6 +33,8 @@
 # Run the heap checker unittest in a mode where it is supposed to crash and
 # return an error if it doesn't
 
+export HEAPCHECK=strict
+
 # When the environment variable HEAP_CHECKER_TEST_LEAK is set,
 # heap-checker_unittest should leak some memory and then crash on exit.
 HEAPCHECK_TEST_LEAK=1 ./heap-checker_unittest
diff --git a/src/tests/heap-checker_unittest.cc b/src/tests/heap-checker_unittest.cc
index b8893f4..b13d617 100644
--- a/src/tests/heap-checker_unittest.cc
+++ b/src/tests/heap-checker_unittest.cc
@@ -42,100 +42,395 @@
 //
 // Note: Both of the above commands *should* abort with an error message.
 
-#include "google/perftools/config.h"
+// CAVEAT: Do not use vector<>s and string-s in this test,
+// otherwise the test can sometimes fail for tricky leak checks
+// when we want some allocated object not to be found live by the heap checker.
+// This can happen with memory allocators like tcmalloc that can allocate
+// heap objects back to back without any book-keeping data in between.
+// What happens is that end-of-storage pointers of a live vector
+// (or a string depending on the STL implementation used)
+// can happen to point to that other heap-allocated
+// object that is not reachable otherwise and that
+// we don't want to be reachable.
+//
+// The implication of this for real leak checking
+// is just one more chance for the liveness flood to be inexact
+// (see the comment in our .h file).
+
+#include "config.h"
 #include "base/logging.h"
 #include "base/googleinit.h"
-
+#include <google/malloc_extension.h>
 #include <google/heap-profiler.h>
 #include <google/heap-checker.h>
 
 #include <stdlib.h>
+#include <sys/poll.h>
+#if defined HAVE_STDINT_H
+#include <stdint.h>             // to get uint16_t (ISO naming madness)
+#elif defined HAVE_INTTYPES_H
+#include <inttypes.h>           // another place uint16_t might be defined
+#else
+#include <sys/types.h>          // our last best hope
+#endif
+#include <iostream>             // for cout
 #include <vector>
+#include <set>
 #include <string>
 
+#include <netinet/in.h>         // inet_ntoa
+#include <arpa/inet.h>          // inet_ntoa
+#ifdef HAVE_EXECINFO_H
+#include <execinfo.h>           // backtrace
+#endif
+#ifdef HAVE_GRP_H
+#include <grp.h>                // getgrent, getgrnam
+#endif
+
+class Closure {
+ public:
+  virtual ~Closure() { }
+  virtual void Run() = 0;
+};
+
+class Callback0 : public Closure {
+ public:
+  typedef void (*FunctionSignature)();
+
+  inline Callback0(FunctionSignature f) : f_(f) {}
+  virtual void Run() { (*f_)(); delete this; }
+
+ private:
+  FunctionSignature f_;
+};
+
+template <class P1> class Callback1 : public Closure {
+ public:
+  typedef void (*FunctionSignature)(P1);
+
+  inline Callback1<P1>(FunctionSignature f, P1 p1) : f_(f), p1_(p1) {}
+  virtual void Run() { (*f_)(p1_); delete this; }
+
+ private:
+  FunctionSignature f_;
+  P1 p1_;
+};
+
+template <class P1, class P2> class Callback2 : public Closure {
+ public:
+  typedef void (*FunctionSignature)(P1,P2);
+
+  inline Callback2<P1,P2>(FunctionSignature f, P1 p1, P2 p2) : f_(f), p1_(p1), p2_(p2) {}
+  virtual void Run() { (*f_)(p1_, p2_); delete this; }
+
+ private:
+  FunctionSignature f_;
+  P1 p1_;
+  P2 p2_;
+};
+
+inline Callback0* NewCallback(void (*function)()) {
+  return new Callback0(function);
+}
+
+template <class P1>
+inline Callback1<P1>* NewCallback(void (*function)(P1), P1 p1) {
+  return new Callback1<P1>(function, p1);
+}
+
+template <class P1, class P2>
+inline Callback2<P1,P2>* NewCallback(void (*function)(P1,P2), P1 p1, P2 p2) {
+  return new Callback2<P1,P2>(function, p1, p2);
+}
+
+
 using namespace std;
 
-// Use an int* variable so that the compiler does not complain.
-static void Use(int* foo) { CHECK(foo == foo); }
+static bool FLAGS_maybe_stripped = false;   // TODO(csilvers): use this?
+static bool FLAGS_interfering_threads = true;
+
+// Set to true at end of main, so threads know.  Not entirely thread-safe!,
+// but probably good enough.
+static bool g_have_exited_main = false;
+
+// If our allocator guarantees that heap object addresses are never reused.
+// We need this property so that stale uncleared pointer data
+// does not accidentaly lead to heap-checker wrongly believing that
+// some data is live.
+static bool unique_heap_addresses = false;
+
+// We use a simple allocation wrapper
+// to make sure we wipe out the newly allocated objects
+// in case they still happened to contain some pointer data
+// accidently left by the memory allocator.
+struct Initialized { };
+static Initialized initialized;
+void* operator new(size_t size, const Initialized&) {
+  // Below we use "p = new (initialized) Foo[1];" and  "delete[] p;"
+  // instead of "p = new (initialized) Foo;"
+  // when we need to delete an allocated object.
+  void* p = malloc(size);
+  memset(p, 0, size);
+  return p;
+}
+void* operator new[](size_t size, const Initialized&) {
+  char* p = new char[size];
+  memset(p, 0, size);
+  return p;
+}
+
+static void CheckLeak(HeapLeakChecker* check, 
+                      size_t bytes_leaked, size_t objects_leaked) {
+  if (unique_heap_addresses) {
+    if (getenv("HEAPCHECK")) {
+      // these might still fail occasionally, but it should be very rare
+      CHECK_EQ(check->BriefNoLeaks(), false);
+      CHECK_EQ(check->BytesLeaked(), bytes_leaked);
+      CHECK_EQ(check->ObjectsLeaked(), objects_leaked);
+    }
+  } else if (check->BriefNoLeaks() != false) {
+    cout << "Some liveness flood must be too optimistic\n";
+  }
+}
+
+static void Pause() {
+  poll(NULL, 0, 77);  // time for thread activity in HeapBusyThreadBody
+
+  // Indirectly test debugallocation.* and malloc_interface.*:
+
+  CHECK(MallocExtension::instance()->VerifyAllMemory());
+  // Comment the printing of malloc-stats out for now.  It seems a bit broken
+#if 0
+  int blocks;
+  size_t total;
+  int histogram[kMallocHistogramSize];
+  if (MallocExtension::instance()
+       ->MallocMemoryStats(&blocks, &total, histogram)  &&  total != 0) {
+    cout << "Malloc stats: " << blocks << " blocks of "
+         << total << " bytes\n";
+    for (int i = 0; i < kMallocHistogramSize; ++i) {
+      if (histogram[i]) {
+        cout << "  Malloc histogram at " << i << " : " << histogram[i] << "\n";
+      }
+    }
+  }
+#endif
+}
+
+static bool noleak() {   // compare to this if you expect no leak
+  return true;
+}
+
+static bool leak() {   // compare to this if you expect a leak
+  // When we're not doing heap-checking, we can't tell if there's a leak
+  if ( !getenv("HEAPCHECK") ) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+// Make gcc think a pointer is "used"
+template <class T>
+static void Use(T** foo) {
+}
+
+// Arbitrary value, but not such that xor'ing with it is likely
+// to map one valid pointer to another valid pointer:
+static const uintptr_t kHideMask = 0xF03A5F7B;
+
+// Helpers to hide a pointer from live data traversal.
+// We just xor the pointer so that (with high probability)
+// it's not a valid address of a heap object anymore.
+// Both Hide and UnHide must be executed within RunHidden() below
+// to prevent leaving stale data on active stack that can be a pointer
+// to a heap object that is not actually reachable via live variables.
+// (UnHide might leave heap pointer value for an object
+//  that will be deallocated but later another object
+//  can be allocated at the same heap address.)
+template <class T>
+static void Hide(T** ptr) {
+  reinterpret_cast<uintptr_t&>(*ptr) =
+    (reinterpret_cast<uintptr_t&>(*ptr) ^ kHideMask);
+}
+
+template <class T>
+static void UnHide(T** ptr) {
+  reinterpret_cast<uintptr_t&>(*ptr) =
+    (reinterpret_cast<uintptr_t&>(*ptr) ^ kHideMask);
+}
+
+// non-static to fool the compiler against inlining
+extern void (*run_hidden_ptr)(Closure* c, int n);
+void (*run_hidden_ptr)(Closure* c, int n);
+extern void (*wipe_stack_ptr)(int n);
+void (*wipe_stack_ptr)(int n);
+
+static void DoRunHidden(Closure* c, int n) {
+  if (n) {
+    run_hidden_ptr(c, n-1);
+    wipe_stack_ptr(n);
+  } else {
+    c->Run();
+  }
+}
+
+static void DoWipeStack(int n) {
+  if (n) {
+    const int sz = 30;
+    volatile int arr[sz];
+    for (int i = 0; i < sz; ++i)  arr[i] = 0;
+    wipe_stack_ptr(n-1);
+  }
+}
+
+// This executes closure c several stack frames down from the current one
+// and then makes an effort to also wipe out the stack data that was used by
+// the closure.
+// This way we prevent leak checker from finding any temporary pointers
+// of the closure execution on the stack and deciding that
+// these pointers (and the pointed objects) are still live.
+static void RunHidden(Closure* c) {
+  DoRunHidden(c, 15);
+  DoWipeStack(20);
+}
+
+static void DoAllocHidden(size_t size, void** ptr) {
+  void* p = new (initialized) char[size];
+  Hide(&p);
+  Use(&p);  // use only hidden versions
+  *ptr = p;  // assign the hidden versions
+}
+
+static void* AllocHidden(size_t size) {
+  void* r;
+  RunHidden(NewCallback(DoAllocHidden, size, &r));
+  return r;
+}
+
+static void DoDeAllocHidden(void** ptr) {
+  Use(ptr);  // use only hidden versions
+  void* p = *ptr;
+  UnHide(&p);
+  delete [] (char*)p;
+}
+
+static void DeAllocHidden(void** ptr) {
+  RunHidden(NewCallback(DoDeAllocHidden, ptr));
+  *ptr = NULL;
+  Use(ptr);
+}
 
 // not deallocates
 static void TestHeapLeakCheckerDeathSimple() {
   HeapLeakChecker check("death_simple");
-  int* foo = new int[100];
-  void* bar = malloc(300);
-  Use(foo);
-  CHECK_EQ(check.BriefSameHeap(), false);
-  delete [] foo;
-  free(bar);
+  void* foo = AllocHidden(100 * sizeof(int));
+  Use(&foo);
+  void* bar = AllocHidden(300);
+  Use(&bar);
+  CheckLeak(&check, 300 + 100 * sizeof(int), 2);
+  DeAllocHidden(&foo);
+  DeAllocHidden(&bar);
+}
+
+static void MakeDeathLoop(void** arr1, void** arr2) {
+  void** a1 = new (initialized) void*[2];
+  void** a2 = new (initialized) void*[2];
+  a1[1] = (void*)a2;
+  a2[1] = (void*)a1;
+  Hide(&a1);
+  Hide(&a2);
+  Use(&a1);
+  Use(&a2);
+  *arr1 = a1;
+  *arr2 = a2;
+}
+
+// not deallocates two objects linked together
+static void TestHeapLeakCheckerDeathLoop() {
+  HeapLeakChecker check("death_loop");
+  void* arr1;
+  void* arr2;
+  RunHidden(NewCallback(MakeDeathLoop, &arr1, &arr2));
+  Use(&arr1);
+  Use(&arr2);
+  CheckLeak(&check, 4 * sizeof(void*), 2);
+  DeAllocHidden(&arr1);
+  DeAllocHidden(&arr2);
 }
 
 // deallocates more than allocates
 static void TestHeapLeakCheckerDeathInverse() {
-  int* bar = new int[250];
-  Use(bar);
+  void* bar = AllocHidden(250 * sizeof(int));
+  Use(&bar);
   HeapLeakChecker check("death_inverse");
-  int* foo = new int[100];
-  Use(foo);
-  delete [] bar;
-  CHECK_EQ(check.BriefSameHeap(), false);
-  delete [] foo;
+  void* foo = AllocHidden(100 * sizeof(int));
+  Use(&foo);
+  DeAllocHidden(&bar);
+  CheckLeak(&check, (size_t)(-150 * size_t(sizeof(int))), 0);
+  DeAllocHidden(&foo);
 }
 
 // deallocates more than allocates
 static void TestHeapLeakCheckerDeathNoLeaks() {
-  int* foo = new int[100];
-  int* bar = new int[250];
-  Use(foo);
-  Use(bar);
+  void* foo = AllocHidden(100 * sizeof(int));
+  Use(&foo);
+  void* bar = AllocHidden(250 * sizeof(int));
+  Use(&bar);
   HeapLeakChecker check("death_noleaks");
-  delete [] bar;
-  CHECK_EQ(check.NoLeaks(), true);
-  delete [] foo;
+  DeAllocHidden(&bar);
+  CHECK_EQ(check.BriefNoLeaks(), noleak());
+  DeAllocHidden(&foo);
 }
 
 // have less objecs
 static void TestHeapLeakCheckerDeathCountLess() {
-  int* bar1 = new int[50];
-  int* bar2 = new int[50];
-  Use(bar1);
-  Use(bar2);
+  void* bar1 = AllocHidden(50 * sizeof(int));
+  Use(&bar1);
+  void* bar2 = AllocHidden(50 * sizeof(int));
+  Use(&bar2);
   HeapLeakChecker check("death_count_less");
-  int* foo = new int[100];
-  Use(foo);
-  delete [] bar1;
-  delete [] bar2;
-  CHECK_EQ(check.BriefSameHeap(), false);
-  delete [] foo;
+  void* foo = AllocHidden(100 * sizeof(int));
+  Use(&foo);
+  DeAllocHidden(&bar1);
+  DeAllocHidden(&bar2);
+  CheckLeak(&check, 0, (size_t)-1);
+  DeAllocHidden(&foo);
 }
 
 // have more objecs
 static void TestHeapLeakCheckerDeathCountMore() {
-  int* foo = new int[100];
-  Use(foo);
+  void* foo = AllocHidden(100 * sizeof(int));
+  Use(&foo);
   HeapLeakChecker check("death_count_more");
-  int* bar1 = new int[50];
-  int* bar2 = new int[50];
-  Use(bar1);
-  Use(bar2);
-  delete [] foo;
-  CHECK_EQ(check.BriefSameHeap(), false);
-  delete [] bar1;
-  delete [] bar2;
+  void* bar1 = AllocHidden(50 * sizeof(int));
+  Use(&bar1);
+  void* bar2 = AllocHidden(50 * sizeof(int));
+  Use(&bar2);
+  DeAllocHidden(&foo);
+  CheckLeak(&check, 0, 1);
+  DeAllocHidden(&bar1);
+  DeAllocHidden(&bar2);
 }
 
+// simple tests that deallocate what they allocated
 static void TestHeapLeakChecker() {
   { HeapLeakChecker check("trivial");
     int foo = 5;
-    Use(&foo);
+    int* p = &foo;
+    Use(&p);
+    Pause();
     CHECK(check.BriefSameHeap());
   }
+  Pause();
   { HeapLeakChecker check("simple");
-    int* foo = new int[100];
-    int* bar = new int[200];
-    Use(foo);
-    Use(bar);
-    delete [] foo;
-    delete [] bar;
+    void* foo = AllocHidden(100 * sizeof(int));
+    Use(&foo);
+    void* bar = AllocHidden(200 * sizeof(int));
+    Use(&bar);
+    DeAllocHidden(&foo);
+    DeAllocHidden(&bar);
+    Pause();
     CHECK(check.BriefSameHeap());
   }
 }
@@ -144,79 +439,102 @@ static void TestHeapLeakChecker() {
 static void TestHeapLeakCheckerPProf() {
   { HeapLeakChecker check("trivial_p");
     int foo = 5;
-    Use(&foo);
-    CHECK(check.SameHeap());
+    int* p = &foo;
+    Use(&p);
+    Pause();
+    CHECK(check.BriefSameHeap());
   }
+  Pause();
   { HeapLeakChecker check("simple_p");
-    int* foo = new int[100];
-    int* bar = new int[200];
-    Use(foo);
-    Use(bar);
-    delete [] foo;
-    delete [] bar;
+    void* foo = AllocHidden(100 * sizeof(int));
+    Use(&foo);
+    void* bar = AllocHidden(200 * sizeof(int));
+    Use(&bar);
+    DeAllocHidden(&foo);
+    DeAllocHidden(&bar);
+    Pause();
     CHECK(check.SameHeap());
   }
 }
 
+// trick heap change: same total # of bytes and objects, but
+// different individual object sizes
 static void TestHeapLeakCheckerTrick() {
-  int* bar1 = new int[60];
-  int* bar2 = new int[40];
-  Use(bar1);
-  Use(bar2);
+  void* bar1 = AllocHidden(60 * sizeof(int));
+  Use(&bar1);
+  void* bar2 = AllocHidden(40 * sizeof(int));
+  Use(&bar2);
   HeapLeakChecker check("trick");
-  int* foo1 = new int[70];
-  int* foo2 = new int[30];
-  Use(foo1);
-  Use(foo2);
-  delete [] bar1;
-  delete [] bar2;
+  void* foo1 = AllocHidden(70 * sizeof(int));
+  Use(&foo1);
+  void* foo2 = AllocHidden(30 * sizeof(int));
+  Use(&foo2);
+  DeAllocHidden(&bar1);
+  DeAllocHidden(&bar2);
+  Pause();
   CHECK(check.BriefSameHeap());
-  delete [] foo1;
-  delete [] foo2;
+  DeAllocHidden(&foo1);
+  DeAllocHidden(&foo2);
 }
 
 // no false negatives from pprof
 static void TestHeapLeakCheckerDeathTrick() {
-  int* bar1 = new int[60];
-  int* bar2 = new int[40];
-  Use(bar1);
-  Use(bar2);
+  void* bar1 = AllocHidden(60 * sizeof(int));
+  Use(&bar1);
+  void* bar2 = AllocHidden(40 * sizeof(int));
+  Use(&bar2);
   HeapLeakChecker check("death_trick");
-  int* foo1 = new int[70];
-  int* foo2 = new int[30];
-  Use(foo1);
-  Use(foo2);
-  delete [] bar1;
-  delete [] bar2;
-  // If this check fails, you are probably running a stripped binary
-  CHECK_EQ(check.SameHeap(), false);  // pprof checking should catch it
-  delete [] foo1;
-  delete [] foo2;
+  DeAllocHidden(&bar1);
+  DeAllocHidden(&bar2);
+  void* foo1 = AllocHidden(70 * sizeof(int));
+  Use(&foo1);
+  void* foo2 = AllocHidden(30 * sizeof(int));
+  Use(&foo2);
+  // TODO(maxim): use the above if we make pprof work in automated test runs
+  if (!FLAGS_maybe_stripped) {
+    CHECK_EQ(check.SameHeap(), leak());  // pprof checking should catch it
+  } else if (check.SameHeap()) {
+    cout << "death_trick leak is not caught; we must be using stripped binary\n";
+  }
+  DeAllocHidden(&foo1);
+  DeAllocHidden(&foo2);
 }
 
+// simple leak
 static void TransLeaks() {
-  new char;
+  AllocHidden(1 * sizeof(char));
 }
 
+// have leaks but disable them
 static void DisabledLeaks() {
   HeapLeakChecker::DisableChecksUp(1);
+  AllocHidden(3 * sizeof(int));
   TransLeaks();
-  new int[3];
 }
 
+// have leaks but range-disable them
 static void RangeDisabledLeaks() {
   void* start_address = HeapLeakChecker::GetDisableChecksStart();
-  new int[3];
+  AllocHidden(3 * sizeof(int));
   TransLeaks();
   HeapLeakChecker::DisableChecksToHereFrom(start_address);
 }
 
+// We need this function pointer trickery to fool an aggressive
+// optimizing compiler such as icc into not inlining DisabledLeaks().
+// Otherwise the stack-frame-address-based disabling in it
+// will wrongly disable allocation tracking in
+// the functions into which it's inlined.
+static void (*disabled_leaks_addr)() = &DisabledLeaks;
+
+// have different disabled leaks
 static void* RunDisabledLeaks(void* a) {
-  DisabledLeaks();
+  disabled_leaks_addr();
   RangeDisabledLeaks();
   return a;
 }
 
+// have different disabled leaks inside of a thread
 static void ThreadDisabledLeaks() {
   pthread_t tid;
   pthread_attr_t attr;
@@ -226,6 +544,7 @@ static void ThreadDisabledLeaks() {
   CHECK(pthread_join(tid, &res) == 0);
 }
 
+// different disabled leaks (some in threads)
 static void TestHeapLeakCheckerDisabling() {
   HeapLeakChecker check("disabling");
 
@@ -236,41 +555,222 @@ static void TestHeapLeakCheckerDisabling() {
   ThreadDisabledLeaks();
   ThreadDisabledLeaks();
 
-  CHECK_EQ(check.SameHeap(), true);
+  // if this fails, some code with DisableChecksUp() got inlined into here;
+  // need to add more tricks to prevent this inlining.
+  CHECK(!HeapLeakChecker::HaveDisabledChecksUp(1));
+
+  Pause();
+
+  CHECK(check.SameHeap());
+}
+
+typedef set<int> IntSet;
+
+static int some_ints[] = { 1, 2, 3, 21, 22, 23, 24, 25 };
+
+static void DoTestSTLAlloc() {
+  IntSet* x = new (initialized) IntSet[1];
+  *x  = IntSet(some_ints, some_ints + 6);
+  for (int i = 0; i < 1000; i++) {
+    x->insert(i*3);
+  }
+  delete [] x;
+}
+
+// Check that normal STL usage does not result in a leak report.
+// (In particular we test that there's no complex STL's own allocator
+// running on top of our allocator with hooks to heap profiler
+// that can result in false leak report in this case.)
+static void TestSTLAlloc() {
+  HeapLeakChecker check("stl");
+  RunHidden(NewCallback(DoTestSTLAlloc));
+  CHECK_EQ(check.BriefSameHeap(), true);
+}
+
+static void DoTestSTLAllocInverse(IntSet** setx) {
+  IntSet* x = new (initialized) IntSet[1];
+  *x = IntSet(some_ints, some_ints + 3);
+  for (int i = 0; i < 100; i++) {
+    x->insert(i*2);
+  }
+  Hide(&x);
+  *setx = x;
+}
+
+static void FreeTestSTLAllocInverse(IntSet** setx) {
+  IntSet* x = *setx;
+  UnHide(&x);
+  delete [] x;
 }
 
+// Check that normal leaked STL usage *does* result in a leak report.
+// (In particular we test that there's no complex STL's own allocator
+// running on top of our allocator with hooks to heap profiler
+// that can result in false absence of leak report in this case.)
+static void TestSTLAllocInverse() {
+  HeapLeakChecker check("inverse_stl");
+  IntSet* x;
+  RunHidden(NewCallback(DoTestSTLAllocInverse, &x));
+  if (unique_heap_addresses) {
+    if (getenv("HEAPCHECK")) {
+      // these might still fail occasionally, but it should be very rare
+      CHECK_EQ(check.BriefNoLeaks(), false);
+      CHECK_GE(check.BytesLeaked(), 100 * sizeof(int));
+      CHECK_GE(check.ObjectsLeaked(), 100);
+      // assumes set<>s are represented by some kind of binary tree
+      // or something else allocating >=1 heap object per set object
+    }
+  } else if (check.BriefNoLeaks() != false) {
+    cout << "Some liveness flood must be too optimistic";
+  }
+  RunHidden(NewCallback(FreeTestSTLAllocInverse, &x));
+}
+
+template<class Alloc>
+static void DirectTestSTLAlloc(Alloc allocator, const char* name) {
+  HeapLeakChecker check((string("direct_stl-") + name).c_str());
+  const int size = 1000;
+  char* ptrs[size];
+  for (int i = 0; i < size; ++i) {
+    char* p = allocator.allocate(i*3+1);
+    HeapLeakChecker::IgnoreObject(p);
+    // This will crash if p is not known to heap profiler:
+    // (i.e. STL's "allocator" does not have a direct hook to heap profiler)
+    HeapLeakChecker::UnIgnoreObject(p);
+    ptrs[i] = p;
+  }
+  for (int i = 0; i < size; ++i) {
+    allocator.deallocate(ptrs[i], i*3+1);
+    ptrs[i] = NULL;
+  }
+  CHECK(check.BriefSameHeap());  // just in case
+}
+
+static struct group* grp = NULL;
+static pthread_once_t key_once = PTHREAD_ONCE_INIT;
+static const int kKeys = 50;
+static pthread_key_t key[kKeys];
 
+static void KeyFree(void* ptr) {
+  delete [] (char*)ptr;
+}
+
+static void KeyInit() {
+  for (int i = 0; i < kKeys; ++i) {
+    CHECK_EQ(pthread_key_create(&key[i], KeyFree), 0);
+  }
+}
+
+// force various C library static and thread-specific allocations
+static void TestLibCAllocate() {
+  pthread_once(&key_once, KeyInit);
+  for (int i = 0; i < kKeys; ++i) {
+    void* p = pthread_getspecific(key[i]);
+    if (NULL == p) {
+      p = new (initialized) char[77 + i];
+      pthread_setspecific(key[i], p);
+    }
+  }
+
+  strerror(errno);
+  struct in_addr addr;
+  addr.s_addr = INADDR_ANY;
+  inet_ntoa(addr);
+  const time_t now = time(NULL);
+  ctime(&now);
+#ifdef HAVE_EXECINFO_H
+  void *stack[1];
+  backtrace(stack, 1);
+#endif
+#ifdef HAVE_GRP_H
+  if (grp == NULL)  grp = getgrent();  // a race condition here is okay
+  getgrnam(grp->gr_name);
+#endif
+}
+
+// Continuous random heap memory activity to try to disrupt heap checking.
+static void* HeapBusyThreadBody(void* a) {
+  TestLibCAllocate();
+
+  int user = 0;
+  register int** ptr = NULL;
+  typedef set<int> Set;
+  Set s1;
+  while (1) {
+    // TestLibCAllocate() calls libc functions that don't work so well
+    // after main() has exited.  So we just don't do the test then.
+    if (!g_have_exited_main)
+      TestLibCAllocate();
+
+    if (ptr == NULL) {
+      ptr = new (initialized) int*[1];
+      *ptr = new (initialized) int[1];
+    }
+    set<int>* s2 = new (initialized) set<int>[1];
+    s1.insert(random());
+    s2->insert(*s1.begin());
+    user += *s2->begin();
+    **ptr += user;
+    if (random() % 51 == 0) {
+      s1.clear();
+      if (random() % 2 == 0) {
+        s1.~Set();
+        new (&s1) Set;
+      }
+    }
+    poll(NULL, 0, random() % 100);
+      // try to hide ptr from heap checker in a CPU register
+    if (random() % 3 == 0) {
+      delete [] *ptr;
+      delete [] ptr;
+      ptr = NULL;
+    }
+    delete [] s2;
+  }
+  return a;
+}
+
+static void RunHeapBusyThreads() {
+  const int n = 17;  // make many threads
+
+  pthread_t tid;
+  pthread_attr_t attr;
+  CHECK(pthread_attr_init(&attr) == 0);
+  // make them and let them run
+  for (int i = 0; i < n; ++i) {
+    CHECK(pthread_create(&tid, &attr, HeapBusyThreadBody, NULL) == 0);
+  }
+
+  Pause();
+  Pause();
+}
+
+// tests disabling via function name
 REGISTER_MODULE_INITIALIZER(heap_checker_unittest, {
   HeapLeakChecker::DisableChecksIn("NamedDisabledLeaks");
 });
 
+// have leaks that we disable via our function name in MODULE_INITIALIZER
 static void NamedDisabledLeaks() {
-  // We are testing two cases in this function: calling new[] directly and
-  // calling it at one level deep (inside TransLeaks).  We want to always call
-  // TransLeaks() first, because otherwise the compiler may turn this into a
-  // tail recursion when compiling in optimized mode.  This messes up the stack
-  // trace.
-  // TODO: Is there any way to prevent this from happening in the general case
-  // (i.e. user code)?
-  TransLeaks();
-  new float[5];
+  AllocHidden(5 * sizeof(float));
 }
 
+// have leaks that we disable via our function name ourselves
 static void NamedTwoDisabledLeaks() {
   static bool first = true;
   if (first) {
     HeapLeakChecker::DisableChecksIn("NamedTwoDisabledLeaks");
     first = false;
   }
-  TransLeaks();
-  new double[5];
+  AllocHidden(5 * sizeof(double));
 }
 
+// have leaks that we disable via our function name in our caller
 static void NamedThreeDisabledLeaks() {
-  TransLeaks();
-  new float[5];
+  AllocHidden(5 * sizeof(float));
 }
 
+// have leaks that we disable via function names
 static void* RunNamedDisabledLeaks(void* a) {
   void* start_address = NULL;
   if (a)  start_address = HeapLeakChecker::GetDisableChecksStart();
@@ -285,6 +785,7 @@ static void* RunNamedDisabledLeaks(void* a) {
   return a;
 }
 
+// have leaks inside of threads that we disable via function names
 static void ThreadNamedDisabledLeaks(void* a = NULL) {
   pthread_t tid;
   pthread_attr_t attr;
@@ -294,6 +795,7 @@ static void ThreadNamedDisabledLeaks(void* a = NULL) {
   CHECK(pthread_join(tid, &res) == 0);
 }
 
+// test leak disabling via function names
 static void TestHeapLeakCheckerNamedDisabling() {
   HeapLeakChecker::DisableChecksIn("NamedThreeDisabledLeaks");
 
@@ -306,8 +808,14 @@ static void TestHeapLeakCheckerNamedDisabling() {
   ThreadNamedDisabledLeaks();
   ThreadNamedDisabledLeaks();
 
-  // If this check fails, you are probably be running a stripped binary.
-  CHECK_EQ(check.SameHeap(), true);  // pprof checking should allow it
+  Pause();
+
+  if (!FLAGS_maybe_stripped) {
+    CHECK_EQ(check.SameHeap(), true);
+      // pprof checking should allow it
+  } else if (!check.SameHeap()) {
+    cout << "named_disabling leaks are caught; we must be using stripped binary\n";
+  }
 }
 
 // The code from here to main()
@@ -315,13 +823,54 @@ static void TestHeapLeakCheckerNamedDisabling() {
 // variables are not reported as leaks,
 // with the few exceptions like multiple-inherited objects.
 
-string* live_leak = NULL;
-string* live_leak2 = new string("ss");
-vector<int>* live_leak3 = new vector<int>(10,10);
-const char* live_leak4 = new char[5];
-vector<int> live_leak5(20,10);
-const vector<int> live_leak6(30,10);
-const string* live_leak_arr1 = new string[5];
+// A dummy class to mimic allocation behavior of string-s.
+template<class T>
+struct Array {
+  Array() {
+    size = 3 + random() % 30;
+    ptr = new (initialized) T[size];
+  }
+  ~Array() { delete [] ptr; }
+  Array(const Array& x) {
+    size = x.size;
+    ptr = new (initialized) T[size];
+    for (size_t i = 0; i < size; ++i) {
+      ptr[i] = x.ptr[i];
+    }
+  }
+  void operator=(const Array& x) {
+    delete [] ptr;
+    size = x.size;
+    ptr = new (initialized) T[size];
+    for (size_t i = 0; i < size; ++i) {
+      ptr[i] = x.ptr[i];
+    }
+  }
+  void append(const Array& x) {
+    T* p = new (initialized) T[size + x.size];
+    for (size_t i = 0; i < size; ++i) {
+      p[i] = ptr[i];
+    }
+    for (size_t i = 0; i < x.size; ++i) {
+      p[size+i] = x.ptr[i];
+    }
+    size += x.size;
+    delete [] ptr;
+    ptr = p;
+  }
+ private:
+  size_t size;
+  T* ptr;
+};
+
+static Array<char>* live_leak = NULL;
+static Array<char>* live_leak2 = new (initialized) Array<char>();
+static int* live_leak3 = new (initialized) int[10];
+static const char* live_leak4 = new (initialized) char[5];
+static int data[] = { 1, 2, 3, 4, 5, 6, 7, 21, 22, 23, 24, 25, 26, 27 };
+static set<int> live_leak5(data, data+7);
+static const set<int> live_leak6(data, data+14);
+static const Array<char>* live_leak_arr1 = new (initialized) Array<char>[5];
 
 class ClassA {
  public:
@@ -329,7 +878,7 @@ class ClassA {
   mutable char* ptr;
 };
 
-const ClassA live_leak7(1);
+static const ClassA live_leak7(1);
 
 template<class C>
 class TClass {
@@ -339,7 +888,7 @@ class TClass {
   mutable C* ptr;
 };
 
-const TClass<string> live_leak8(1);
+static const TClass<Array<char> > live_leak8(1);
 
 class ClassB {
  public:
@@ -373,154 +922,163 @@ class ClassD : public ClassD1, public ClassD2 {
   virtual void f2() { }
 };
 
-ClassB* live_leak_b;
-ClassD1* live_leak_d1;
-ClassD2* live_leak_d2;
-ClassD* live_leak_d;
+static ClassB* live_leak_b;
+static ClassD1* live_leak_d1;
+static ClassD2* live_leak_d2;
+static ClassD* live_leak_d;
 
-ClassB* live_leak_b_d1;
-ClassB2* live_leak_b2_d2;
-ClassB* live_leak_b_d;
-ClassB2* live_leak_b2_d;
+static ClassB* live_leak_b_d1;
+static ClassB2* live_leak_b2_d2;
+static ClassB* live_leak_b_d;
+static ClassB2* live_leak_b2_d;
 
-ClassD1* live_leak_d1_d;
-ClassD2* live_leak_d2_d;
+static ClassD1* live_leak_d1_d;
+static ClassD2* live_leak_d2_d;
 
+// have leaks but ignore the leaked objects
 static void IgnoredLeaks() {
-  int* p = new int;
+  int* p = new (initialized) int[1];
   HeapLeakChecker::IgnoreObject(p);
-  int** leak = new int*;
+  int** leak = new (initialized) int*;
   HeapLeakChecker::IgnoreObject(leak);
-  *leak = new int;
+  *leak = new (initialized) int;
   HeapLeakChecker::UnIgnoreObject(p);
-  delete p;
+  delete [] p;
 }
 
+// allocate many objects reachable from global data
 static void TestHeapLeakCheckerLiveness() {
-  live_leak_b = new ClassB;
-  live_leak_d1 = new ClassD1;
-  live_leak_d2 = new ClassD2;
-  live_leak_d = new ClassD;
+  live_leak_b = new (initialized) ClassB;
+  live_leak_d1 = new (initialized) ClassD1;
+  live_leak_d2 = new (initialized) ClassD2;
+  live_leak_d = new (initialized) ClassD;
 
-  live_leak_b_d1 = new ClassD1;
-  live_leak_b2_d2 = new ClassD2;
-  live_leak_b_d = new ClassD;
-  live_leak_b2_d = new ClassD;
+  live_leak_b_d1 = new (initialized) ClassD1;
+  live_leak_b2_d2 = new (initialized) ClassD2;
+  live_leak_b_d = new (initialized) ClassD;
+  live_leak_b2_d = new (initialized) ClassD;
 
-  live_leak_d1_d = new ClassD;
-  live_leak_d2_d = new ClassD;
+  live_leak_d1_d = new (initialized) ClassD;
+  live_leak_d2_d = new (initialized) ClassD;
 
-
-#ifndef NDEBUG
   HeapLeakChecker::IgnoreObject((ClassD*)live_leak_b2_d);
   HeapLeakChecker::IgnoreObject((ClassD*)live_leak_d2_d);
     // These two do not get deleted with liveness flood
     // because the base class pointer points inside of the objects
     // in such cases of multiple inheritance.
     // Luckily google code does not use multiple inheritance almost at all.
-    // Somehow this does not happen in optimized mode.
-#endif
 
-  live_leak = new string("live_leak");
-  live_leak3->insert(live_leak3->begin(), 20, 20);
+  live_leak = new (initialized) Array<char>();
+  delete [] live_leak3;
+  live_leak3 = new (initialized) int[33];
   live_leak2->append(*live_leak);
-  live_leak7.ptr = new char [77];
-  live_leak8.ptr = new string("aaa");
-  live_leak8.val = string("bbbbbb");
+  live_leak7.ptr = new (initialized) char[77];
+  live_leak8.ptr = new (initialized) Array<char>();
+  live_leak8.val = Array<char>();
 
   IgnoredLeaks();
   IgnoredLeaks();
   IgnoredLeaks();
 }
 
-// Check that we don't give false negatives or positives on leaks from the STL
-// allocator.
-void TestHeapLeakCheckerSTL() {
-  HeapLeakChecker stl_check("stl");
-  {
-    string x = "banana";
-    for (int i = 0; i < 10000; i++)
-      x += "na";
-  }
-  CHECK(stl_check.SameHeap());
-}
-
-void TestHeapLeakCheckerSTLInverse() {
-  HeapLeakChecker inverse_stl_checker("inverse_stl");
-  string x = "queue";
-  for (int i = 0; i < 1000; i++)
-    x += "ue";
-  CHECK_EQ(inverse_stl_checker.SameHeap(), false);
-}
-
 int main(int argc, char** argv) {
   // This needs to be set before InternalInitStart(), which makes a local copy
   if (getenv("PPROF_PATH"))
     HeapLeakChecker::set_pprof_path(getenv("PPROF_PATH"));
 
-  // This needs to be set early because it determines the behaviour of
-  // InternalInitStart().
-  string heap_check_type;
-  if (getenv("HEAPCHECK_MODE"))
-    heap_check_type = getenv("HEAPCHECK_MODE");
-  else
-    heap_check_type = "strict";
+  run_hidden_ptr = DoRunHidden;
+  wipe_stack_ptr = DoWipeStack;
 
-  HeapLeakChecker::StartFromMain(heap_check_type);
+  if (FLAGS_interfering_threads) {
+    RunHeapBusyThreads();  // add interference early
+  }
+  TestLibCAllocate();
 
   LogPrintf(INFO, "In main()");
 
   // The following two modes test whether the whole-program leak checker
   // appropriately detects leaks on exit.
   if (getenv("HEAPCHECK_TEST_LEAK")) {
-    void* arr = new vector<int>(10, 10);
+    void* arr = new (initialized) set<int>(data, data+10);
     LogPrintf(INFO, "Leaking %p", arr);
-    fprintf(stdout, "PASS\n");
-    return 0;
+    return 0;  // whole-program leak check should catch it
   }
 
   if (getenv("HEAPCHECK_TEST_LOOP_LEAK")) {
-    void** arr1 = new void*[2];
-    void** arr2 = new void*[2];
+    void** arr1 = new (initialized) void*[2];
+    void** arr2 = new (initialized) void*[2];
     arr1[1] = (void*)arr2;
     arr2[1] = (void*)arr1;
     LogPrintf(INFO, "Loop leaking %p and %p", arr1, arr2);
-    fprintf(stdout, "PASS\n");
-    return 0;
+    return 0;  // whole-program leak check should catch it
   }
 
   TestHeapLeakCheckerLiveness();
 
-  HeapProfilerStart("/tmp/leaks");
   HeapLeakChecker heap_check("all");
 
   TestHeapLeakChecker();
+  Pause();
   TestHeapLeakCheckerTrick();
+  Pause();
 
   TestHeapLeakCheckerDeathSimple();
+  Pause();
+  TestHeapLeakCheckerDeathLoop();
+  Pause();
   TestHeapLeakCheckerDeathInverse();
+  Pause();
   TestHeapLeakCheckerDeathNoLeaks();
+  Pause();
   TestHeapLeakCheckerDeathCountLess();
+  Pause();
   TestHeapLeakCheckerDeathCountMore();
+  Pause();
 
   TestHeapLeakCheckerDeathTrick();
+  Pause();
+
   TestHeapLeakCheckerPProf();
+  Pause();
 
   TestHeapLeakCheckerDisabling();
+  Pause();
+
+  TestSTLAlloc();
+  Pause();
+  TestSTLAllocInverse();
+  Pause();
+  DirectTestSTLAlloc(set<char>().get_allocator(), "alloc");
+    // default STL allocator
+  Pause();
+  // TODO: re-enable this test once we've change configure.ac to include
+  // the right header file that defines pthread_allocator.
+  //DirectTestSTLAlloc(pthread_allocator<char>(), "pthread_alloc");
+  Pause();
+
+  TestLibCAllocate();
+  Pause();
+
+  void* start_address = HeapLeakChecker::GetDisableChecksStart();
+
   TestHeapLeakCheckerNamedDisabling();
+  Pause();
 
-  TestHeapLeakCheckerSTL();
-  TestHeapLeakCheckerSTLInverse();
+  if (!FLAGS_maybe_stripped) {
+    CHECK(heap_check.SameHeap());
+  } else if (!heap_check.SameHeap()) {
+    cout << "overall leaks are caught; we must be using stripped binary\n";
+  }
 
   int a;
   ThreadNamedDisabledLeaks(&a);
 
-  CHECK(heap_check.SameHeap());
-
-  HeapLeakChecker::IgnoreObject(new vector<int>(10, 10));
+  HeapLeakChecker::IgnoreObject(new (initialized) set<int>(data, data + 13));
     // This checks both that IgnoreObject works, and
     // and the fact that we don't drop such leaks as live for some reason.
 
   fprintf(stdout, "PASS\n");
+
+  g_have_exited_main = true;
   return 0;
 }
diff --git a/src/tests/heap-checker_unittest.sh b/src/tests/heap-checker_unittest.sh
new file mode 100755
index 0000000..22bde9e
--- /dev/null
+++ b/src/tests/heap-checker_unittest.sh
@@ -0,0 +1,87 @@
+#!/bin/sh -e
+
+# Copyright (c) 2005, Google Inc.
+# All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ---
+# Author: Craig Silverstein
+#
+# Runs the heap-checker unittest with various environment variables.
+# This is necessary because we turn on features like the heap profiler
+# and heap checker via environment variables.  This test makes sure
+# they all play well together.
+#
+# Notice that we run this script with -e, so *any* error is fatal.
+
+if [ -z "$2" ]
+then
+    echo "USAGE: $0 <unittest dir> <pprof dir>"
+    exit 1
+fi
+
+UNITTEST_DIR=$1
+PPROF=$2/pprof
+
+HEAP_CHECKER="$1/heap-checker_unittest"
+
+TMPDIR=/tmp/heap_check_info
+
+rm -rf $TMPDIR
+mkdir $TMPDIR
+
+# $1: value of heap-profile env. var.  $2: value of heap-check env. var.
+run_check() {
+    export PPROF_PATH="$PPROF"
+    [ -n "$1" ] && export HEAPPROFILE="$1" || unset HEAPPROFILE
+    [ -n "$2" ] && export HEAPCHECK="$2" || unset HEAPCHECK
+
+    echo ""
+    echo ">>> TESTING $HEAP_CHECKER with HEAPPROFILE=$1 and HEAPCHECK=$2"
+    $HEAP_CHECKER
+    echo ">>> DONE testing $HEAP_CHECKER with HEAPPROFILE=$1 and HEAPCHECK=$2"
+
+    # If we set HEAPPROFILE, then we expect it to actually have emitted
+    # a profile.  Check that it did.
+    if [ -n "$HEAPPROFILE" ]; then
+      [ -e "$HEAPPROFILE.0001.heap" ] || exit 1
+    fi
+}
+
+run_check "" ""
+run_check "" "local"
+run_check "" "normal"
+run_check "" "strict"
+run_check "$TMPDIR/profile" ""
+run_check "$TMPDIR/profile" "local"
+run_check "$TMPDIR/profile" "normal"
+run_check "$TMPDIR/profile" "strict"
+
+rm -rf $TMPDIR      # clean up
+
+echo "ALL TESTS PASSED"
diff --git a/src/tests/heap-profiler_unittest.cc b/src/tests/heap-profiler_unittest.cc
new file mode 100644
index 0000000..332dbbc
--- /dev/null
+++ b/src/tests/heap-profiler_unittest.cc
@@ -0,0 +1,110 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Craig Silverstein
+//
+// A small program that just exercises our heap profiler by allocating
+// memory and letting the heap-profiler emit a profile.  We don't test
+// threads (TODO).  By itself, this unittest tests that the heap-profiler
+// doesn't crash on simple programs, but its output can be analyzed by
+// another testing script to actually verify correctness.  See, eg,
+// heap-profiler_unittest.sh.
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>                 // for fork()
+#include <sys/wait.h>               // for wait()
+#include <google/heap-profiler.h>
+
+const static int kMaxCount = 100000;
+int* g_array[kMaxCount];              // an array of int-vectors
+
+int Allocate(int start, int end, int size) {
+  for (int i = start; i < end; ++i) {
+    if (i < kMaxCount)
+      g_array[i] = new int[size];
+  }
+}
+
+int Allocate2(int start, int end, int size) {
+  for (int i = start; i < end; ++i) {
+    if (i < kMaxCount)
+      g_array[i] = new int[size];
+  }
+}
+
+int Deallocate(int start, int end) {
+  for (int i = start; i < end; ++i) {
+    delete g_array[i];
+    g_array[i] = 0;
+  }
+}
+
+int main(int argc, char** argv) {
+  if (argc > 2 || (argc == 2 && argv[1][0] == '-')) {
+    printf("USAGE: %s [number of children to fork]\n", argv[0]);
+    exit(0);
+  }
+  int num_forks = 0;
+  if (argc == 2) {
+    num_forks = atoi(argv[1]);
+  }
+
+  HeapProfilerSetInuseInterval(10 << 10);   // set inuse interval at 10MB
+
+  Allocate(0, 40, 100);
+  Deallocate(0, 40);
+
+  Allocate(0, 40, 100);
+  Allocate(0, 40, 100);
+  Allocate2(40, 400, 1000);
+  Allocate2(400, 1000, 10000);
+  Deallocate(0, 1000);
+
+  Allocate(0, 100, 100000);
+  Deallocate(0, 10);
+  Deallocate(10, 20);
+  Deallocate(90, 100);
+  Deallocate(20, 90);
+
+  while (num_forks-- > 0) {
+    switch (fork()) {
+      case -1: 
+        printf("FORK failed!\n"); 
+        return 1;
+      case 0:             // child
+        return execl(argv[0], argv[0], NULL);   // run child with no args
+      default:
+        wait(NULL);       // we'll let the kids run one at a time
+    }
+  }
+
+  return 0;
+}
diff --git a/src/tests/heap-profiler_unittest.sh b/src/tests/heap-profiler_unittest.sh
new file mode 100755
index 0000000..dd7e247
--- /dev/null
+++ b/src/tests/heap-profiler_unittest.sh
@@ -0,0 +1,109 @@
+#!/bin/sh
+
+# Copyright (c) 2005, Google Inc.
+# All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ---
+# Author: Craig Silverstein
+#
+# Runs the heap-profiler unittest and makes sure the profile looks appropriate.
+#
+# We run under the assumption that if $HEAP_PROFILER is run with --help,
+# it prints a usage line of the form
+#   USAGE: <actual executable being run> [...]
+#
+# This is because libtool sometimes turns the 'executable' into a
+# shell script which runs an actual binary somewhere else.
+
+if [ -z "$2" ]
+then
+    echo "USAGE: $0 <unittest dir> <pprof dir>"
+    exit 1
+fi
+
+UNITTEST_DIR=$1
+PPROF=$2/pprof
+
+HEAP_PROFILER=$UNITTEST_DIR/heap-profiler_unittest
+
+TMPDIR=/tmp/heap_profile_info
+
+# It's meaningful to the profiler, so make sure we know its state
+unset HEAPPROFILE
+
+rm -rf $TMPDIR
+mkdir $TMPDIR || exit 2
+
+num_failures=0
+
+# Given one profile (to check the contents of that profile) or two
+# profiles (to check the diff between the profiles), and a function
+# name, verify that the function name takes up at least 90% of the
+# allocated memory.  The function name is actually specified first.
+VerifyMemFunction() {
+    function=$1
+    shift
+
+    # Getting the 'real' name is annoying, since running HEAP_PROFILER
+    # at all tends to destroy the old profiles if we're not careful
+    HEAPPROFILE_SAVED="$HEAPPROFILE"
+    unset HEAPPROFILE
+    exec=`$HEAP_PROFILER --help | awk '{print $2; exit;}'` # get program name
+    export HEAPPROFILE="$HEAPPROFILE_SAVED"
+
+    if [ $# = 2 ]; then
+	[ -e "$1" -a -e "$2" ] || { echo "Profile not found: $1 or $2"; exit 1; }
+	$PPROF --base="$1" $exec "$2"
+    else
+	[ -e "$1" ] || { echo "Profile not found: $1"; exit 1; }
+	$PPROF $exec "$1"
+    fi | tr -d % | awk '$6 ~ /^'$function'$/ && $2 > 90 {exit 1;}'
+
+    if [ $? != 1 ]; then
+	echo
+	echo ">>> Test failed for $function: didn't use 90% of cpu"
+	echo
+	num_failures=`expr $num_failures + 1`
+    fi
+}
+
+export HEAPPROFILE=$TMPDIR/test
+$HEAP_PROFILER 1              # actually run the program, with a child process
+
+VerifyMemFunction Allocate2 $HEAPPROFILE.0723.heap
+VerifyMemFunction Allocate $HEAPPROFILE.0700.heap $HEAPPROFILE.0760.heap
+
+# Check the child process too
+VerifyMemFunction Allocate2 ${HEAPPROFILE}_*.0723.heap
+VerifyMemFunction Allocate ${HEAPPROFILE}_*.0700.heap ${HEAPPROFILE}_*.0760.heap
+
+rm -rf $TMPDIR      # clean up
+
+echo "Tests finished with $num_failures failures"
+exit $num_failures
diff --git a/src/tests/profiler_unittest.cc b/src/tests/profiler_unittest.cc
index 9a8aac8..0bf7bce 100644
--- a/src/tests/profiler_unittest.cc
+++ b/src/tests/profiler_unittest.cc
@@ -34,9 +34,11 @@
 // Define WITH_THREADS to add pthread functionality as well (otherwise, btw,
 // the num_threads argument to this program is ingored).
 
-#include "google/perftools/config.h"
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
+#include <unistd.h>                 // for fork()
+#include <sys/wait.h>               // for wait()
 #include "google/profiler.h"
 
 static int result = 0;
@@ -97,7 +99,8 @@ int main(int argc, char** argv) {
     fprintf(stderr, "USAGE: %s <iters> [num_threads] [filename]\n", argv[0]);
     fprintf(stderr, "   iters: How many million times to run the XOR test.\n");
     fprintf(stderr, "   num_threads: how many concurrent threads.\n");
-    fprintf(stderr, "                0 or 1 for single-threaded mode.\n");
+    fprintf(stderr, "                0 or 1 for single-threaded mode,\n");
+    fprintf(stderr, "                -# to fork instead of thread.\n");
     fprintf(stderr, "   filename: The name of the output profile.\n");
     fprintf(stderr, ("             If you don't specify, set CPUPROFILE "
                      "in the environment instead!\n"));
@@ -131,6 +134,24 @@ int main(int argc, char** argv) {
   }
 #endif
 
+  // Or maybe they asked to fork.  The fork test is only interesting
+  // when we use CPUPROFILE to name, so check for that
+  for (; num_threads < 0; ++num_threads) {   // -<num_threads> to fork
+    if (filename) {
+      printf("FORK test only makes sense when no filename is specified.\n");
+      return 2;
+    }
+    switch (fork()) {
+      case -1: 
+        printf("FORK failed!\n"); 
+        return 1;
+      case 0:             // child
+        return execl(argv[0], argv[0], argv[1], NULL);
+      default:
+        wait(NULL);       // we'll let the kids run one at a time
+    }
+  }
+
   int r = test_main_thread(iters);
   printf("The XOR test returns %d\n", r);
 
diff --git a/src/tests/profiler_unittest.sh b/src/tests/profiler_unittest.sh
index 91869c9..cc6c52f 100755
--- a/src/tests/profiler_unittest.sh
+++ b/src/tests/profiler_unittest.sh
@@ -33,15 +33,14 @@
 # Author: Craig Silverstein
 #
 # Runs the 4 profiler unittests and makes sure their profiles look
-# appropriate.  Takes three arguments: directory holding profilerX_unittest
-# scripts, directory holding profilerX_unittest executables, and directory
-# holding pprof.
-#
-# We expect two commandline args, as described below.
+# appropriate.  We expect two commandline args, as described below.
 #
 # We run under the assumption that if $PROFILER1 is run with no
 # arguments, it prints a usage line of the form
 #   USAGE: <actual executable being run> [...]
+#
+# This is because libtool sometimes turns the 'executable' into a
+# shell script which runs an actual binary somewhere else.
 
 if [ -z "$2" ]
 then
@@ -70,8 +69,9 @@ num_failures=0
 # Takes two filenames representing profiles, with their executable scripts,
 # and a multiplier, and verifies that the 'contentful' functions in
 # each profile take the same time (possibly scaled by the given
-# multiplier).  "Same" means within 50%, after adding an noise-reducing
-# X units to each value -- we're pretty forgiving.
+# multiplier).  It used to be "same" meant within 50%, after adding an 
+# noise-reducing X units to each value.  But even that would often
+# spuriously fail, so now it's "both non-zero".  We're pretty forgiving.
 VerifySimilar() {
     prof1=$TMPDIR/$1
     # We need to run the script with no args to get the actual exe name
@@ -85,8 +85,9 @@ VerifySimilar() {
     mthread1_plus=`expr $mthread1 + 5`
     mthread2_plus=`expr $mthread2 + 5`
     if [ -z "$mthread1" ] || [ -z "$mthread2" ] || \
-       [ `expr $mthread1_plus \* $mult` -gt `expr $mthread2_plus \* 2` -o \
-         `expr $mthread1_plus \* $mult \* 2` -lt `expr $mthread2_plus` ]
+       [ "$mthread1" -le 0 -o "$mthread2" -le 0 ]
+#    || [ `expr $mthread1_plus \* $mult` -gt `expr $mthread2_plus \* 2` -o \
+#         `expr $mthread1_plus \* $mult \* 2` -lt `expr $mthread2_plus` ]
     then
 	echo
 	echo ">>> profile on $exec1 vs $exec2 with multiplier $mult failed:"
@@ -101,8 +102,9 @@ VerifySimilar() {
 # the same amount of time as the other-threads function (possibly scaled
 # by the given multiplier).  Figuring out the multiplier can be tricky,
 # since by design the main thread runs twice as long as each of the
-# 'other' threads!  In any case, "same" means within 70% -- we're pretty
-# forgiving.
+# 'other' threads!  It used to be "same" meant within 50%, after adding an 
+# noise-reducing X units to each value.  But even that would often
+# spuriously fail, so now it's "both non-zero".  We're pretty forgiving.
 VerifyAcrossThreads() {
     prof1=$TMPDIR/$1
     # We need to run the script with no args to get the actual exe name
@@ -112,8 +114,9 @@ VerifyAcrossThreads() {
     mthread=`$PPROF $exec1 $prof1 | grep test_main_thread | awk '{print $1}'`
     othread=`$PPROF $exec2 $prof2 | grep test_other_thread | awk '{print $1}'`
     if [ -z "$mthread" ] || [ -z "$othread" ] || \
-       [ `expr $mthread \* $mult \* 3` -gt `expr $othread \* 10` -o \
-         `expr $mthread \* $mult \* 10` -lt `expr $othread \* 3` ]
+       [ "$mthread" -le 0 -o "$othread" -le 0 ]
+#    || [ `expr $mthread \* $mult \* 3` -gt `expr $othread \* 10` -o \
+#         `expr $mthread \* $mult \* 10` -lt `expr $othread \* 3` ]
     then
 	echo
 	echo ">>> profile on $exec1 vs $exec2 with multiplier $mult failed:"
@@ -169,6 +172,14 @@ VerifySimilar p9 $PROFILER4 p10 $PROFILER4 2
 $PROFILER4 2 4 $TMPDIR/p11
 VerifyAcrossThreads p11 $PROFILER4 2
 
+# Make sure that when we have a process with a fork, the profiles don't
+# clobber each other
+CPUPROFILE=$TMPDIR/p6 $PROFILER1 1 -2
+n=`ls $TMPDIR/p6* | wc -l`
+if [ $n != 3 ]; then
+  echo "FORK test FAILED: expected 3 profiles (for main + 2 children), found $n"
+  num_failures=`expr $num_failures + 1`
+fi
 
 rm -rf $TMPDIR      # clean up
 
diff --git a/src/tests/stacktrace_unittest.cc b/src/tests/stacktrace_unittest.cc
index ee2f126..afca1ff 100644
--- a/src/tests/stacktrace_unittest.cc
+++ b/src/tests/stacktrace_unittest.cc
@@ -27,7 +27,7 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#include "google/perftools/config.h"
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include "base/commandlineflags.h"
diff --git a/src/tests/tcmalloc_unittest.cc b/src/tests/tcmalloc_unittest.cc
index d823c13..91da8b3 100644
--- a/src/tests/tcmalloc_unittest.cc
+++ b/src/tests/tcmalloc_unittest.cc
@@ -35,7 +35,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
-#include "google/malloc_interface.h"
+#include "google/malloc_extension.h"
 
 #define BUFSIZE (100 << 10)
 
@@ -54,7 +54,7 @@ int main(int argc, char **argv) {
   delete[] buf2;
 
   char buffer[10 << 10];
-  MallocInterface::instance()->GetStats(buffer, sizeof(buffer));
+  MallocExtension::instance()->GetStats(buffer, sizeof(buffer));
   printf("Malloc stats:\n%s\n", buffer);
 
   return 0;
author	csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>	2007-03-22 03:28:56 +0000
committer	csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>	2007-03-22 03:28:56 +0000
commit	91fad389784766782263133c5510976a8f76d89e (patch)
tree	4058058dc6bd6eb12bf72efc06c1d1ac11cd170b
parent	51b4875f8ade3e0930eed2dc2a842ec607a94a2c (diff)
download	gperftools-91fad389784766782263133c5510976a8f76d89e.tar.gz