summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2010-01-14 16:26:05 +0000
committercsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2010-01-14 16:26:05 +0000
commit63b8d63beb7d771713774f9a5d57381cbd29bf19 (patch)
treed09417a7fd0710c1e728216656b9e0f002452adc
parenteeeacd5ec4fa36256091f45e5b3af81cee2a4d86 (diff)
downloadgperftools-63b8d63beb7d771713774f9a5d57381cbd29bf19.tar.gz
* PORTING: Revised patch_functions to avoid deadlock (csilvers, andrey)
* PORTING: Revised patch_functions to speed up .dll loads (csilvers) * PORTING: Build and run sampling_test for windows (csilvers) * Correctly init tc structs even when libc isn't patched (csilvers) * Make low-level allocs async-signal-safe (saito) git-svn-id: http://gperftools.googlecode.com/svn/trunk@83 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
-rw-r--r--Makefile.am10
-rw-r--r--Makefile.in202
-rw-r--r--README.windows3
-rw-r--r--src/base/low_level_alloc.cc89
-rw-r--r--src/base/low_level_alloc.h22
-rw-r--r--src/google/malloc_hook.h6
-rw-r--r--src/malloc_hook.cc28
-rwxr-xr-xsrc/pprof7
-rw-r--r--src/stacktrace_x86-inl.h5
-rw-r--r--src/tcmalloc.cc4
-rwxr-xr-xsrc/tests/sampling_test.sh16
-rw-r--r--src/thread_cache.cc6
-rw-r--r--src/windows/patch_functions.cc264
-rwxr-xr-xvsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj2
-rwxr-xr-xvsprojects/tmu-static/tmu-static.vcproj2
15 files changed, 452 insertions, 214 deletions
diff --git a/Makefile.am b/Makefile.am
index c81d8f3..3de910e 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -181,6 +181,8 @@ libwindows_la_SOURCES = $(WINDOWS_INCLUDES) \
src/windows/patch_functions.cc \
src/windows/preamble_patcher.cc \
src/windows/preamble_patcher_with_stub.cc
+# patch_functions.cc uses Psapi.lib. MSVC has a #pragma for that, but not us.
+libwindows_la_LIBADD = -lPsapi
SPINLOCK_INCLUDES = src/base/spinlock.h \
src/base/spinlock_win32-inl.h \
@@ -529,6 +531,12 @@ malloc_extension_test_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
malloc_extension_test_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
malloc_extension_test_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
+# This doesn't work with static linkage, because libtcmalloc.a isn't
+# happy with C linkage (it misses the stdc++ library). Likewise with
+# mingw, which links foo.a even though it doesn't set ENABLE_STATIC.
+# TODO(csilvers): set enable_static=true in configure.ac:36?
+if !MINGW
+if !ENABLE_STATIC
TESTS += malloc_extension_c_test
malloc_extension_c_test_SOURCES = src/tests/malloc_extension_c_test.c \
src/google/malloc_extension.h \
@@ -540,6 +548,8 @@ malloc_extension_c_test_CFLAGS += -ansi
endif GCC
malloc_extension_c_test_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
malloc_extension_c_test_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
+endif !ENABLE_STATIC
+endif !MINGW
if !MINGW
TESTS += memalign_unittest
diff --git a/Makefile.in b/Makefile.in
index dadedbd..0db79a2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -58,7 +58,7 @@ host_triplet = @host@
@MINGW_TRUE@am__append_5 = -Wl,-u__tcmalloc
noinst_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \
$(am__EXEEXT_4) $(am__EXEEXT_5) $(am__EXEEXT_6) \
- $(am__EXEEXT_7) $(am__EXEEXT_8) $(am__EXEEXT_22)
+ $(am__EXEEXT_7) $(am__EXEEXT_8) $(am__EXEEXT_23)
bin_PROGRAMS =
@MINGW_TRUE@am__append_6 = libwindows.la libspinlock.la
@@ -98,28 +98,34 @@ bin_PROGRAMS =
@ENABLE_STATIC_FALSE@@MINGW_FALSE@am__append_16 = $(maybe_threads_unittest_sh_SOURCES)
@MINGW_TRUE@am__append_17 = src/windows/port.h src/windows/port.cc
@MINGW_FALSE@am__append_18 = system_alloc_unittest
+
+# This doesn't work with static linkage, because libtcmalloc.a isn't
+# happy with C linkage (it misses the stdc++ library). Likewise with
+# mingw, which links foo.a even though it doesn't set ENABLE_STATIC.
+# TODO(csilvers): set enable_static=true in configure.ac:36?
+@ENABLE_STATIC_FALSE@@MINGW_FALSE@am__append_19 = malloc_extension_c_test
# -ansi here is just to help ensure the code is bog-standard C.
-@GCC_TRUE@am__append_19 = -ansi
-@MINGW_FALSE@am__append_20 = memalign_unittest
+@ENABLE_STATIC_FALSE@@GCC_TRUE@@MINGW_FALSE@am__append_20 = -ansi
+@MINGW_FALSE@am__append_21 = memalign_unittest
### ------- tcmalloc_minimal_debug (thread-caching malloc with debugallocation)
# Like tcmalloc.cc, debugallocation.cc needs exceptions to fulfill its
# API. Luckily, we can reuse everything else from tcmalloc_minimal.
-@WITH_DEBUGALLOC_TRUE@am__append_21 = libtcmalloc_minimal_debug.la
@WITH_DEBUGALLOC_TRUE@am__append_22 = libtcmalloc_minimal_debug.la
+@WITH_DEBUGALLOC_TRUE@am__append_23 = libtcmalloc_minimal_debug.la
### Unittests
-@WITH_DEBUGALLOC_TRUE@am__append_23 = tcmalloc_minimal_debug_unittest \
+@WITH_DEBUGALLOC_TRUE@am__append_24 = tcmalloc_minimal_debug_unittest \
@WITH_DEBUGALLOC_TRUE@ malloc_extension_debug_test \
@WITH_DEBUGALLOC_TRUE@ memalign_debug_unittest \
@WITH_DEBUGALLOC_TRUE@ realloc_debug_unittest \
@WITH_DEBUGALLOC_TRUE@ debugallocation_test.sh$(EXEEXT)
-@WITH_DEBUGALLOC_TRUE@am__append_24 = $(debugallocation_test_sh_SOURCES)
+@WITH_DEBUGALLOC_TRUE@am__append_25 = $(debugallocation_test_sh_SOURCES)
# This is the sub-program used by debugallocation_test.sh
-@WITH_DEBUGALLOC_TRUE@am__append_25 = debugallocation_test
-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_26 = $(SG_TCMALLOC_INCLUDES)
+@WITH_DEBUGALLOC_TRUE@am__append_26 = debugallocation_test
+@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_27 = $(SG_TCMALLOC_INCLUDES)
### Making the library
@@ -127,12 +133,12 @@ bin_PROGRAMS =
# for all files in this library -- except tcmalloc.cc which needs them
# to fulfill its API. Automake doesn't allow per-file CXXFLAGS, so we need
# to separate into two libraries.
-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_27 = libtcmalloc_internal.la
-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_28 = libtcmalloc.la
-@WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_29 = $(HEAP_CHECKER_SOURCES)
-@WITH_HEAP_CHECKER_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_30 = -DNO_HEAP_CHECK
+@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_28 = libtcmalloc_internal.la
+@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_29 = libtcmalloc.la
+@WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_30 = $(HEAP_CHECKER_SOURCES)
@WITH_HEAP_CHECKER_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_31 = -DNO_HEAP_CHECK
-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_32 = libtcmalloc.la
+@WITH_HEAP_CHECKER_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_32 = -DNO_HEAP_CHECK
+@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_33 = libtcmalloc.la
### Unittests
@@ -140,81 +146,81 @@ bin_PROGRAMS =
# tcmalloc_minimal. (One would never do this on purpose, but perhaps
# by accident...) When we can compile libprofiler, we also link it in
# to make sure that works too.
-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_33 = tcmalloc_unittest \
+@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_34 = tcmalloc_unittest \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ tcmalloc_both_unittest \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ tcmalloc_large_unittest \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ raw_printer_test \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampler_test \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampling_test.sh$(EXEEXT)
-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_34 = vsprojects/sampler_test/sampler_test.vcproj
+@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_35 = vsprojects/sampler_test/sampler_test.vcproj
# These unittests often need to run binaries. They're in the current dir
-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_35 = BINDIR=. \
+@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_36 = BINDIR=. \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ TMPDIR=/tmp/perftools
-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_36 = $(sampling_test_sh_SOURCES)
+@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_37 = $(sampling_test_sh_SOURCES)
# This is the sub-program used by sampling_test.sh
# The -g is so pprof can get symbol information.
-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_37 = sampling_test
-@WITH_HEAP_PROFILER_TRUE@am__append_38 = heap-profiler_unittest.sh$(EXEEXT)
-@WITH_HEAP_PROFILER_TRUE@am__append_39 = $(heap_profiler_unittest_sh_SOURCES)
+@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_38 = sampling_test
+@WITH_HEAP_PROFILER_TRUE@am__append_39 = heap-profiler_unittest.sh$(EXEEXT)
+@WITH_HEAP_PROFILER_TRUE@am__append_40 = $(heap_profiler_unittest_sh_SOURCES)
# These are sub-programs used by heap-profiler_unittest.sh
-@WITH_HEAP_PROFILER_TRUE@am__append_40 = heap-profiler_unittest
-@WITH_HEAP_CHECKER_TRUE@am__append_41 = \
+@WITH_HEAP_PROFILER_TRUE@am__append_41 = heap-profiler_unittest
+@WITH_HEAP_CHECKER_TRUE@am__append_42 = \
@WITH_HEAP_CHECKER_TRUE@ heap-checker_unittest.sh$(EXEEXT) \
@WITH_HEAP_CHECKER_TRUE@ heap-checker-death_unittest.sh$(EXEEXT)
-@WITH_HEAP_CHECKER_TRUE@am__append_42 = \
+@WITH_HEAP_CHECKER_TRUE@am__append_43 = \
@WITH_HEAP_CHECKER_TRUE@ $(heap_checker_unittest_sh_SOURCES) \
@WITH_HEAP_CHECKER_TRUE@ $(top_srcdir)/$(heap_checker_death_unittest_sh_SOURCES)
# These are sub-programs used by heap-checker_unittest.sh
-@WITH_HEAP_CHECKER_TRUE@am__append_43 = heap-checker_unittest
+@WITH_HEAP_CHECKER_TRUE@am__append_44 = heap-checker_unittest
### Documentation (above and beyond tcmalloc_minimal documentation)
-@WITH_HEAP_PROFILER_TRUE@am__append_44 = doc/heapprofile.html doc/heap-example1.png
-@WITH_HEAP_CHECKER_TRUE@am__append_45 = doc/heap_checker.html
+@WITH_HEAP_PROFILER_TRUE@am__append_45 = doc/heapprofile.html doc/heap-example1.png
+@WITH_HEAP_CHECKER_TRUE@am__append_46 = doc/heap_checker.html
### ------- tcmalloc with debugallocation
-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_46 = libtcmalloc_debug.la
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_47 = libtcmalloc_debug.la
+@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_48 = libtcmalloc_debug.la
### Unittests
-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_48 = tcmalloc_debug_unittest \
+@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_49 = tcmalloc_debug_unittest \
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampler_debug_test \
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampling_debug_test.sh$(EXEEXT)
# This is the sub-program using by sampling_debug_test.sh
# The -g is so pprof can get symbol information.
-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_49 = sampling_debug_test
-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_TRUE@am__append_50 = heap-profiler_debug_unittest.sh$(EXEEXT)
+@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_50 = sampling_debug_test
+@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_TRUE@am__append_51 = heap-profiler_debug_unittest.sh$(EXEEXT)
# These are sub-programs used by heap-profiler_debug_unittest.sh
-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_TRUE@am__append_51 = heap-profiler_debug_unittest
-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@am__append_52 = heap-checker_debug_unittest.sh$(EXEEXT)
+@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_TRUE@am__append_52 = heap-profiler_debug_unittest
+@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@am__append_53 = heap-checker_debug_unittest.sh$(EXEEXT)
# These are sub-programs used by heap-checker_debug_unittest.sh
-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@am__append_53 = heap-checker_debug_unittest
-@WITH_CPU_PROFILER_TRUE@am__append_54 = $(SG_CPU_PROFILER_INCLUDES)
+@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@am__append_54 = heap-checker_debug_unittest
+@WITH_CPU_PROFILER_TRUE@am__append_55 = $(SG_CPU_PROFILER_INCLUDES)
### Making the library
-@WITH_CPU_PROFILER_TRUE@am__append_55 = libprofiler.la
+@WITH_CPU_PROFILER_TRUE@am__append_56 = libprofiler.la
### Unittests
-@WITH_CPU_PROFILER_TRUE@am__append_56 = getpc_test \
+@WITH_CPU_PROFILER_TRUE@am__append_57 = getpc_test \
@WITH_CPU_PROFILER_TRUE@ profiledata_unittest \
@WITH_CPU_PROFILER_TRUE@ profile_handler_unittest \
@WITH_CPU_PROFILER_TRUE@ profiler_unittest.sh$(EXEEXT)
-@WITH_CPU_PROFILER_TRUE@am__append_57 = $(profiler_unittest_sh_SOURCES)
+@WITH_CPU_PROFILER_TRUE@am__append_58 = $(profiler_unittest_sh_SOURCES)
# These are sub-programs used by profiler_unittest.sh
-@WITH_CPU_PROFILER_TRUE@am__append_58 = profiler1_unittest profiler2_unittest profiler3_unittest \
+@WITH_CPU_PROFILER_TRUE@am__append_59 = profiler1_unittest profiler2_unittest profiler3_unittest \
@WITH_CPU_PROFILER_TRUE@ profiler4_unittest
@WITH_CPU_PROFILER_FALSE@profiler2_unittest_DEPENDENCIES =
### Documentation
-@WITH_CPU_PROFILER_TRUE@am__append_59 = doc/cpuprofile.html \
+@WITH_CPU_PROFILER_TRUE@am__append_60 = doc/cpuprofile.html \
@WITH_CPU_PROFILER_TRUE@ doc/cpuprofile-fileformat.html \
@WITH_CPU_PROFILER_TRUE@ doc/pprof-test-big.gif \
@WITH_CPU_PROFILER_TRUE@ doc/pprof-test.gif \
@@ -229,9 +235,9 @@ bin_PROGRAMS =
# works fine for .so files, it does not for .a files. The easiest way
# around this -- and I've tried a bunch of the hard ways -- is to just
# to create another set of libraries that has both functionality in it.
-@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_60 = libtcmalloc_and_profiler.la
-@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_61 = tcmalloc_and_profiler_unittest
-@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_62 = libtcmalloc_and_profiler.la
+@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_61 = libtcmalloc_and_profiler.la
+@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_62 = tcmalloc_and_profiler_unittest
+@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_63 = libtcmalloc_and_profiler.la
DIST_COMMON = README $(am__configure_deps) $(am__dist_doc_DATA_DIST) \
$(am__googleinclude_HEADERS_DIST) $(dist_man_MANS) \
$(noinst_HEADERS) $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
@@ -643,7 +649,7 @@ am_libtcmalloc_minimal_internal_la_OBJECTS = \
$(am__objects_24) $(am__objects_20)
libtcmalloc_minimal_internal_la_OBJECTS = \
$(am_libtcmalloc_minimal_internal_la_OBJECTS)
-libwindows_la_LIBADD =
+libwindows_la_DEPENDENCIES =
am__libwindows_la_SOURCES_DIST = src/windows/port.h \
src/windows/mingw.h src/windows/mini_disassembler.h \
src/windows/mini_disassembler_types.h \
@@ -675,47 +681,48 @@ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
@WITH_STACK_TRACE_TRUE@am__EXEEXT_9 = stacktrace_unittest$(EXEEXT)
@ENABLE_STATIC_FALSE@@MINGW_FALSE@am__EXEEXT_10 = maybe_threads_unittest.sh$(EXEEXT)
@MINGW_FALSE@am__EXEEXT_11 = system_alloc_unittest$(EXEEXT)
-@MINGW_FALSE@am__EXEEXT_12 = memalign_unittest$(EXEEXT)
-@WITH_DEBUGALLOC_TRUE@am__EXEEXT_13 = tcmalloc_minimal_debug_unittest$(EXEEXT) \
+@ENABLE_STATIC_FALSE@@MINGW_FALSE@am__EXEEXT_12 = malloc_extension_c_test$(EXEEXT)
+@MINGW_FALSE@am__EXEEXT_13 = memalign_unittest$(EXEEXT)
+@WITH_DEBUGALLOC_TRUE@am__EXEEXT_14 = tcmalloc_minimal_debug_unittest$(EXEEXT) \
@WITH_DEBUGALLOC_TRUE@ malloc_extension_debug_test$(EXEEXT) \
@WITH_DEBUGALLOC_TRUE@ memalign_debug_unittest$(EXEEXT) \
@WITH_DEBUGALLOC_TRUE@ realloc_debug_unittest$(EXEEXT) \
@WITH_DEBUGALLOC_TRUE@ debugallocation_test.sh$(EXEEXT)
-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__EXEEXT_14 = tcmalloc_unittest$(EXEEXT) \
+@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__EXEEXT_15 = tcmalloc_unittest$(EXEEXT) \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ tcmalloc_both_unittest$(EXEEXT) \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ tcmalloc_large_unittest$(EXEEXT) \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ raw_printer_test$(EXEEXT) \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampler_test$(EXEEXT) \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampling_test.sh$(EXEEXT)
-@WITH_HEAP_PROFILER_TRUE@am__EXEEXT_15 = \
+@WITH_HEAP_PROFILER_TRUE@am__EXEEXT_16 = \
@WITH_HEAP_PROFILER_TRUE@ heap-profiler_unittest.sh$(EXEEXT)
-@WITH_HEAP_CHECKER_TRUE@am__EXEEXT_16 = \
+@WITH_HEAP_CHECKER_TRUE@am__EXEEXT_17 = \
@WITH_HEAP_CHECKER_TRUE@ heap-checker_unittest.sh$(EXEEXT) \
@WITH_HEAP_CHECKER_TRUE@ heap-checker-death_unittest.sh$(EXEEXT)
-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__EXEEXT_17 = tcmalloc_debug_unittest$(EXEEXT) \
+@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__EXEEXT_18 = tcmalloc_debug_unittest$(EXEEXT) \
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampler_debug_test$(EXEEXT) \
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampling_debug_test.sh$(EXEEXT)
-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_TRUE@am__EXEEXT_18 = heap-profiler_debug_unittest.sh$(EXEEXT)
-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@am__EXEEXT_19 = heap-checker_debug_unittest.sh$(EXEEXT)
-@WITH_CPU_PROFILER_TRUE@am__EXEEXT_20 = getpc_test$(EXEEXT) \
+@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_TRUE@am__EXEEXT_19 = heap-profiler_debug_unittest.sh$(EXEEXT)
+@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@am__EXEEXT_20 = heap-checker_debug_unittest.sh$(EXEEXT)
+@WITH_CPU_PROFILER_TRUE@am__EXEEXT_21 = getpc_test$(EXEEXT) \
@WITH_CPU_PROFILER_TRUE@ profiledata_unittest$(EXEEXT) \
@WITH_CPU_PROFILER_TRUE@ profile_handler_unittest$(EXEEXT) \
@WITH_CPU_PROFILER_TRUE@ profiler_unittest.sh$(EXEEXT)
-@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__EXEEXT_21 = tcmalloc_and_profiler_unittest$(EXEEXT)
-am__EXEEXT_22 = low_level_alloc_unittest$(EXEEXT) \
+@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__EXEEXT_22 = tcmalloc_and_profiler_unittest$(EXEEXT)
+am__EXEEXT_23 = low_level_alloc_unittest$(EXEEXT) \
atomicops_unittest$(EXEEXT) $(am__EXEEXT_9) \
tcmalloc_minimal_unittest$(EXEEXT) \
tcmalloc_minimal_large_unittest$(EXEEXT) $(am__EXEEXT_10) \
addressmap_unittest$(EXEEXT) $(am__EXEEXT_11) \
packed_cache_test$(EXEEXT) frag_unittest$(EXEEXT) \
markidle_unittest$(EXEEXT) malloc_extension_test$(EXEEXT) \
- malloc_extension_c_test$(EXEEXT) $(am__EXEEXT_12) \
- page_heap_test$(EXEEXT) pagemap_unittest$(EXEEXT) \
- realloc_unittest$(EXEEXT) stack_trace_table_test$(EXEEXT) \
- thread_dealloc_unittest$(EXEEXT) $(am__EXEEXT_13) \
- $(am__EXEEXT_14) $(am__EXEEXT_15) $(am__EXEEXT_16) \
- $(am__EXEEXT_17) $(am__EXEEXT_18) $(am__EXEEXT_19) \
- $(am__EXEEXT_20) $(am__EXEEXT_21)
+ $(am__EXEEXT_12) $(am__EXEEXT_13) page_heap_test$(EXEEXT) \
+ pagemap_unittest$(EXEEXT) realloc_unittest$(EXEEXT) \
+ stack_trace_table_test$(EXEEXT) \
+ thread_dealloc_unittest$(EXEEXT) $(am__EXEEXT_14) \
+ $(am__EXEEXT_15) $(am__EXEEXT_16) $(am__EXEEXT_17) \
+ $(am__EXEEXT_18) $(am__EXEEXT_19) $(am__EXEEXT_20) \
+ $(am__EXEEXT_21) $(am__EXEEXT_22)
PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS)
am__addressmap_unittest_SOURCES_DIST = \
src/tests/addressmap_unittest.cc src/addressmap-inl.h \
@@ -856,12 +863,15 @@ am_low_level_alloc_unittest_OBJECTS = \
low_level_alloc_unittest_OBJECTS = \
$(am_low_level_alloc_unittest_OBJECTS)
low_level_alloc_unittest_DEPENDENCIES = $(am__DEPENDENCIES_2)
-am_malloc_extension_c_test_OBJECTS = \
- malloc_extension_c_test-malloc_extension_c_test.$(OBJEXT)
+am__malloc_extension_c_test_SOURCES_DIST = \
+ src/tests/malloc_extension_c_test.c \
+ src/google/malloc_extension.h src/google/malloc_extension_c.h
+@ENABLE_STATIC_FALSE@@MINGW_FALSE@am_malloc_extension_c_test_OBJECTS = malloc_extension_c_test-malloc_extension_c_test.$(OBJEXT)
malloc_extension_c_test_OBJECTS = \
$(am_malloc_extension_c_test_OBJECTS)
-malloc_extension_c_test_DEPENDENCIES = $(am__DEPENDENCIES_5) \
- $(am__DEPENDENCIES_1)
+@ENABLE_STATIC_FALSE@@MINGW_FALSE@malloc_extension_c_test_DEPENDENCIES = \
+@ENABLE_STATIC_FALSE@@MINGW_FALSE@ $(am__DEPENDENCIES_5) \
+@ENABLE_STATIC_FALSE@@MINGW_FALSE@ $(am__DEPENDENCIES_1)
am__malloc_extension_debug_test_SOURCES_DIST = \
src/tests/malloc_extension_test.cc src/config_for_unittests.h \
src/base/logging.h src/google/malloc_extension.h \
@@ -1277,7 +1287,7 @@ DIST_SOURCES = $(liblogging_la_SOURCES) \
$(am__heap_profiler_unittest_SOURCES_DIST) \
$(am__heap_profiler_unittest_sh_SOURCES_DIST) \
$(am__low_level_alloc_unittest_SOURCES_DIST) \
- $(malloc_extension_c_test_SOURCES) \
+ $(am__malloc_extension_c_test_SOURCES_DIST) \
$(am__malloc_extension_debug_test_SOURCES_DIST) \
$(malloc_extension_test_SOURCES) $(markidle_unittest_SOURCES) \
$(am__maybe_threads_unittest_sh_SOURCES_DIST) \
@@ -1561,15 +1571,15 @@ TCMALLOC_FLAGS = $(am__append_5)
@HAVE_OBJCOPY_WEAKEN_TRUE@ -W __Znwm -W __ZnwmRKSt9nothrow_t -W __Znam -W __ZnamRKSt9nothrow_t \
@HAVE_OBJCOPY_WEAKEN_TRUE@ -W __ZdlPv -W __ZdaPv
-LIBS_TO_WEAKEN = libtcmalloc_minimal.la $(am__append_22) \
- $(am__append_32) $(am__append_47) $(am__append_62)
+LIBS_TO_WEAKEN = libtcmalloc_minimal.la $(am__append_23) \
+ $(am__append_33) $(am__append_48) $(am__append_63)
googleincludedir = $(includedir)/google
# The .h files you want to install (that is, .h files that people
# who install this package can include in their own applications.)
# We'll add to this later, on a library-by-library basis
googleinclude_HEADERS = $(am__append_9) \
- $(SG_TCMALLOC_MINIMAL_INCLUDES) $(am__append_26) \
- $(am__append_54)
+ $(SG_TCMALLOC_MINIMAL_INCLUDES) $(am__append_27) \
+ $(am__append_55)
# tcmalloc.h is a special case, because it's a .h.in file
nodist_googleinclude_HEADERS = src/google/tcmalloc.h
noinst_HEADERS = src/google/tcmalloc.h.in
@@ -1611,13 +1621,13 @@ dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README \
doc/tcmalloc-opspersec.vs.size.5.threads.png \
doc/tcmalloc-opspersec.vs.size.8.threads.png doc/overview.dot \
doc/pageheap.dot doc/spanmap.dot doc/threadheap.dot \
- $(am__append_44) $(am__append_45) $(am__append_59)
+ $(am__append_45) $(am__append_46) $(am__append_60)
# The libraries (.so's) you want to install
# We'll add to this later, on a library-by-library basis
-lib_LTLIBRARIES = libtcmalloc_minimal.la $(am__append_21) \
- $(am__append_28) $(am__append_46) $(am__append_55) \
- $(am__append_60)
+lib_LTLIBRARIES = libtcmalloc_minimal.la $(am__append_22) \
+ $(am__append_29) $(am__append_47) $(am__append_56) \
+ $(am__append_61)
# This is for 'convenience libraries' -- basically just a container for sources
### Making the library
@@ -1628,7 +1638,7 @@ lib_LTLIBRARIES = libtcmalloc_minimal.la $(am__append_21) \
# to separate into two libraries.
noinst_LTLIBRARIES = liblogging.la libsysinfo.la $(am__append_6) \
$(am__append_8) $(am__append_10) \
- libtcmalloc_minimal_internal.la $(am__append_27)
+ libtcmalloc_minimal_internal.la $(am__append_28)
WINDOWS_PROJECTS = google-perftools.sln \
vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj \
$(am__append_14) \
@@ -1646,7 +1656,7 @@ WINDOWS_PROJECTS = google-perftools.sln \
vsprojects/realloc_unittest/realloc_unittest.vcproj \
vsprojects/stack_trace_table_test/stack_trace_table_test.vcproj \
vsprojects/thread_dealloc_unittest/thread_dealloc_unittest.vcproj \
- $(am__append_34)
+ $(am__append_35)
# unittests you want to run when people type 'make check'.
# Note: tests cannot take any arguments!
@@ -1669,19 +1679,19 @@ TESTS = low_level_alloc_unittest atomicops_unittest $(am__append_11) \
tcmalloc_minimal_unittest tcmalloc_minimal_large_unittest \
$(am__append_15) addressmap_unittest $(am__append_18) \
packed_cache_test frag_unittest markidle_unittest \
- malloc_extension_test malloc_extension_c_test $(am__append_20) \
+ malloc_extension_test $(am__append_19) $(am__append_21) \
page_heap_test pagemap_unittest realloc_unittest \
stack_trace_table_test thread_dealloc_unittest \
- $(am__append_23) $(am__append_33) $(am__append_38) \
- $(am__append_41) $(am__append_48) $(am__append_50) \
- $(am__append_52) $(am__append_56) $(am__append_61)
+ $(am__append_24) $(am__append_34) $(am__append_39) \
+ $(am__append_42) $(am__append_49) $(am__append_51) \
+ $(am__append_53) $(am__append_57) $(am__append_62)
# TESTS_ENVIRONMENT sets environment variables for when you run unittest.
# We always get "srcdir" set for free.
# We'll add to this later, on a library-by-library basis.
-TESTS_ENVIRONMENT = $(am__append_13) $(am__append_35)
+TESTS_ENVIRONMENT = $(am__append_13) $(am__append_36)
# All script tests should be added here
-noinst_SCRIPTS = $(am__append_16) $(am__append_24) $(am__append_36) \
- $(am__append_39) $(am__append_42) $(am__append_57)
+noinst_SCRIPTS = $(am__append_16) $(am__append_25) $(am__append_37) \
+ $(am__append_40) $(am__append_43) $(am__append_58)
# This is my own var, used for extra libraries I make that I need installed
EXTRA_INSTALL =
@@ -1731,6 +1741,8 @@ libsysinfo_la_LIBADD = $(NANOSLEEP_LIBS) $(am__append_7)
@MINGW_TRUE@ src/windows/preamble_patcher.cc \
@MINGW_TRUE@ src/windows/preamble_patcher_with_stub.cc
+# patch_functions.cc uses Psapi.lib. MSVC has a #pragma for that, but not us.
+@MINGW_TRUE@libwindows_la_LIBADD = -lPsapi
# spinlock is the only code that uses atomicops.
@MINGW_FALSE@SPINLOCK_INCLUDES = src/base/spinlock.h \
@MINGW_FALSE@ src/base/atomicops.h \
@@ -1962,14 +1974,16 @@ malloc_extension_test_SOURCES = src/tests/malloc_extension_test.cc \
malloc_extension_test_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
malloc_extension_test_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
malloc_extension_test_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
-malloc_extension_c_test_SOURCES = src/tests/malloc_extension_c_test.c \
- src/google/malloc_extension.h \
- src/google/malloc_extension_c.h
-
-malloc_extension_c_test_CFLAGS = $(PTHREAD_CFLAGS) $(AM_CFLAGS) \
- $(am__append_19)
-malloc_extension_c_test_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
-malloc_extension_c_test_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
+@ENABLE_STATIC_FALSE@@MINGW_FALSE@malloc_extension_c_test_SOURCES = src/tests/malloc_extension_c_test.c \
+@ENABLE_STATIC_FALSE@@MINGW_FALSE@ src/google/malloc_extension.h \
+@ENABLE_STATIC_FALSE@@MINGW_FALSE@ src/google/malloc_extension_c.h
+
+@ENABLE_STATIC_FALSE@@MINGW_FALSE@malloc_extension_c_test_CFLAGS = \
+@ENABLE_STATIC_FALSE@@MINGW_FALSE@ $(PTHREAD_CFLAGS) \
+@ENABLE_STATIC_FALSE@@MINGW_FALSE@ $(AM_CFLAGS) \
+@ENABLE_STATIC_FALSE@@MINGW_FALSE@ $(am__append_20)
+@ENABLE_STATIC_FALSE@@MINGW_FALSE@malloc_extension_c_test_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
+@ENABLE_STATIC_FALSE@@MINGW_FALSE@malloc_extension_c_test_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
@MINGW_FALSE@memalign_unittest_SOURCES = src/tests/memalign_unittest.cc \
@MINGW_FALSE@ src/tcmalloc.h \
@MINGW_FALSE@ src/config_for_unittests.h \
@@ -2080,17 +2094,17 @@ thread_dealloc_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(PTHREAD_CFLAGS) -DNDEBUG \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(AM_CXXFLAGS) \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(NO_EXCEPTIONS) \
-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__append_30)
+@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__append_31)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_internal_la_LDFLAGS = $(PTHREAD_CFLAGS)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_internal_la_LIBADD = $(PTHREAD_LIBS) libstacktrace.la
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_SOURCES = \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(TCMALLOC_CC) \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(TCMALLOC_INCLUDES) \
-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__append_29)
+@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__append_30)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_CXXFLAGS = \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(PTHREAD_CFLAGS) -DNDEBUG \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(AM_CXXFLAGS) \
-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__append_31)
+@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__append_32)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_LIBADD = $(PTHREAD_LIBS) libtcmalloc_internal.la
@WITH_HEAP_CHECKER_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@HEAP_CHECKER_SOURCES =
diff --git a/README.windows b/README.windows
index 0b82d04..750aa51 100644
--- a/README.windows
+++ b/README.windows
@@ -43,8 +43,7 @@ project tcmalloc_minimal_unittest-static, which does this. For this
to work, you'll need to add "/D PERFTOOLS_DLL_DECL=" to the compile
line of every perftools .cc file. You do not need to depend on the
tcmalloc symbol in this case (that is, you don't need to do either
-step 1 or step 2 from above). However, you will need to add a
-dependency to Psapi.lib, which tcmalloc uses.
+step 1 or step 2 from above).
The heap-profiler has had a preliminary port to Windows. It has not
been well tested, and probably does not work at all when Frame Pointer
diff --git a/src/base/low_level_alloc.cc b/src/base/low_level_alloc.cc
index 900ae4e..2bbce54 100644
--- a/src/base/low_level_alloc.cc
+++ b/src/base/low_level_alloc.cc
@@ -67,7 +67,7 @@ namespace {
// first. Valid in both allocated and unallocated blocks
intptr_t magic; // kMagicAllocated or kMagicUnallocated xor this
LowLevelAlloc::Arena *arena; // pointer to parent arena
- void *dummy_for_alignment; // aligns regions to 0 mod 2*sizeof(void*)
+ void *dummy_for_alignment; // aligns regions to 0 mod 2*sizeof(void*)
} header;
// Next two fields: in unallocated blocks: freelist skiplist data
@@ -197,15 +197,57 @@ struct LowLevelAlloc::Arena {
// pointer.
static struct LowLevelAlloc::Arena default_arena;
-// A non-malloc-hooked arena: used only to allocate metadata for arenas that
+// Non-malloc-hooked arenas: used only to allocate metadata for arenas that
// do not want malloc hook reporting, so that for them there's no malloc hook
// reporting even during arena creation.
static struct LowLevelAlloc::Arena unhooked_arena;
+static struct LowLevelAlloc::Arena unhooked_async_sig_safe_arena;
// magic numbers to identify allocated and unallocated blocks
static const intptr_t kMagicAllocated = 0x4c833e95;
static const intptr_t kMagicUnallocated = ~kMagicAllocated;
+namespace {
+ class ArenaLock {
+ public:
+ explicit ArenaLock(LowLevelAlloc::Arena *arena) :
+ left_(false), mask_valid_(false), arena_(arena) {
+ if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
+ // We've decided not to support async-signal-safe arena use until
+ // there a demonstrated need. Here's how one could do it though
+ // (would need to be made more portable).
+#if 0
+ sigset_t all;
+ sigfillset(&all);
+ this->mask_valid_ =
+ (pthread_sigmask(SIG_BLOCK, &all, &this->mask_) == 0);
+#else
+ RAW_CHECK(false, "We do not yet support async-signal-safe arena.");
+#endif
+ }
+ this->arena_->mu.Lock();
+ }
+ ~ArenaLock() { RAW_CHECK(this->left_, "haven't left Arena region"); }
+ void Leave() {
+ this->arena_->mu.Unlock();
+#if 0
+ if (this->mask_valid_) {
+ pthread_sigmask(SIG_SETMASK, &this->mask_, 0);
+ }
+#endif
+ this->left_ = true;
+ }
+ private:
+ bool left_; // whether left region
+ bool mask_valid_;
+#if 0
+ sigset_t mask_; // old mask of blocked signals
+#endif
+ LowLevelAlloc::Arena *arena_;
+ DISALLOW_COPY_AND_ASSIGN(ArenaLock);
+ };
+} // anonymous namespace
+
// create an appropriate magic number for an object at "ptr"
// "magic" should be kMagicAllocated or kMagicUnallocated
inline static intptr_t Magic(intptr_t magic, AllocList::Header *ptr) {
@@ -235,6 +277,8 @@ static void ArenaInit(LowLevelAlloc::Arena *arena) {
// Default arena should be hooked, e.g. for heap-checker to trace
// pointer chains through objects in the default arena.
arena->flags = LowLevelAlloc::kCallMallocHook;
+ } else if (arena == &unhooked_async_sig_safe_arena) {
+ arena->flags = LowLevelAlloc::kAsyncSignalSafe;
} else {
arena->flags = 0; // other arenas' flags may be overridden by client,
// but unhooked_arena will have 0 in 'flags'.
@@ -246,9 +290,12 @@ static void ArenaInit(LowLevelAlloc::Arena *arena) {
LowLevelAlloc::Arena *LowLevelAlloc::NewArena(int32 flags,
Arena *meta_data_arena) {
RAW_CHECK(meta_data_arena != 0, "must pass a valid arena");
- if (meta_data_arena == &default_arena &&
- (flags & LowLevelAlloc::kCallMallocHook) == 0) {
- meta_data_arena = &unhooked_arena;
+ if (meta_data_arena == &default_arena) {
+ if ((flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
+ meta_data_arena = &unhooked_async_sig_safe_arena;
+ } else if ((flags & LowLevelAlloc::kCallMallocHook) == 0) {
+ meta_data_arena = &unhooked_arena;
+ }
}
// Arena(0) uses the constructor for non-static contexts
Arena *result =
@@ -262,9 +309,9 @@ LowLevelAlloc::Arena *LowLevelAlloc::NewArena(int32 flags,
bool LowLevelAlloc::DeleteArena(Arena *arena) {
RAW_CHECK(arena != 0 && arena != &default_arena && arena != &unhooked_arena,
"may not delete default arena");
- arena->mu.Lock();
+ ArenaLock section(arena);
bool empty = (arena->allocation_count == 0);
- arena->mu.Unlock();
+ section.Leave();
if (empty) {
while (arena->freelist.next[0] != 0) {
AllocList *region = arena->freelist.next[0];
@@ -279,7 +326,13 @@ bool LowLevelAlloc::DeleteArena(Arena *arena) {
"empty arena has non-page-aligned block size");
RAW_CHECK(reinterpret_cast<intptr_t>(region) % arena->pagesize == 0,
"empty arena has non-page-aligned block");
- RAW_CHECK(munmap(region, size) == 0,
+ int munmap_result;
+ if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) == 0) {
+ munmap_result = munmap(region, size);
+ } else {
+ munmap_result = MallocHook::UnhookedMUnmap(region, size);
+ }
+ RAW_CHECK(munmap_result == 0,
"LowLevelAlloc::DeleteArena: munmap failed address");
}
Free(arena);
@@ -363,21 +416,21 @@ void LowLevelAlloc::Free(void *v) {
if ((arena->flags & kCallMallocHook) != 0) {
MallocHook::InvokeDeleteHook(v);
}
- arena->mu.Lock();
+ ArenaLock section(arena);
AddToFreelist(v, arena);
RAW_CHECK(arena->allocation_count > 0, "nothing in arena to free");
arena->allocation_count--;
- arena->mu.Unlock();
+ section.Leave();
}
}
// allocates and returns a block of size bytes, to be freed with Free()
// L < arena->mu
-void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) {
+static void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) {
void *result = 0;
if (request != 0) {
AllocList *s; // will point to region that satisfies request
- arena->mu.Lock();
+ ArenaLock section(arena);
ArenaInit(arena);
// round up with header
size_t req_rnd = RoundUp(request + sizeof (s->header), arena->roundup);
@@ -399,8 +452,14 @@ void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) {
// mmap generous 64K chunks to decrease
// the chances/impact of fragmentation:
size_t new_pages_size = RoundUp(req_rnd, arena->pagesize * 16);
- void *new_pages = mmap(0, new_pages_size,
- PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ void *new_pages;
+ if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
+ new_pages = MallocHook::UnhookedMMap(0, new_pages_size,
+ PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ } else {
+ new_pages = mmap(0, new_pages_size,
+ PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ }
RAW_CHECK(new_pages != MAP_FAILED, "mmap error");
arena->mu.Lock();
s = reinterpret_cast<AllocList *>(new_pages);
@@ -425,7 +484,7 @@ void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) {
s->header.magic = Magic(kMagicAllocated, &s->header);
RAW_CHECK(s->header.arena == arena, "");
arena->allocation_count++;
- arena->mu.Unlock();
+ section.Leave();
result = &s->levels;
}
ANNOTATE_NEW_MEMORY(result, request);
diff --git a/src/base/low_level_alloc.h b/src/base/low_level_alloc.h
index 0df1298..393b3d2 100644
--- a/src/base/low_level_alloc.h
+++ b/src/base/low_level_alloc.h
@@ -72,14 +72,20 @@ class LowLevelAlloc {
// meta_data_arena; the DefaultArena() can be passed for meta_data_arena.
// These values may be ored into flags:
enum {
- // Calls to Alloc() and Free() will be reported
- // via the MallocHook interface.
- // The DefaultArena() has this flag on.
- // NewArena(flags, DefaultArena()) w/o this bit in 'flags',
- // on the other hand, will not cause any MallocHook
- // calls even during the NewArena call itself
- // (it will in fact use meta_data_arena different from DefaultArena()).
- kCallMallocHook = 0x0001
+ // Report calls to Alloc() and Free() via the MallocHook interface.
+ // Set in the DefaultArena.
+ kCallMallocHook = 0x0001,
+
+ // Make calls to Alloc(), Free() be async-signal-safe. Not set in
+ // DefaultArena().
+ kAsyncSignalSafe = 0x0002,
+
+ // When used with DefaultArena(), the NewArena() and DeleteArena() calls
+ // obey the flags given explicitly in the NewArena() call, even if those
+ // flags differ from the settings in DefaultArena(). So the call
+ // NewArena(kAsyncSignalSafe, DefaultArena()) is itself async-signal-safe,
+ // as well as generatating an arena that provides async-signal-safe
+ // Alloc/Free.
};
static Arena *NewArena(int32 flags, Arena *meta_data_arena);
diff --git a/src/google/malloc_hook.h b/src/google/malloc_hook.h
index f2713e9..48d92da 100644
--- a/src/google/malloc_hook.h
+++ b/src/google/malloc_hook.h
@@ -191,6 +191,12 @@ class PERFTOOLS_DLL_DECL MallocHook {
int skip_count) {
return MallocHook_GetCallerStackTrace(result, max_depth, skip_count);
}
+
+ // Unhooked versions of mmap() and munmap(). These should be used
+ // only by experts, since they bypass heapchecking, etc.
+ static void* UnhookedMMap(void *start, size_t length, int prot, int flags,
+ int fd, off_t offset);
+ static int UnhookedMUnmap(void *start, size_t length);
};
#endif /* _MALLOC_HOOK_H_ */
diff --git a/src/malloc_hook.cc b/src/malloc_hook.cc
index d1ad12a..2a7f542 100644
--- a/src/malloc_hook.cc
+++ b/src/malloc_hook.cc
@@ -423,7 +423,7 @@ static inline void* do_mmap64(void *start, size_t length,
return result;
}
-# endif
+# endif // defined(__x86_64__)
// We use do_mmap64 abstraction to put MallocHook::InvokeMmapHook
// calls right into mmap and mmap64, so that the stack frames in the caller's
@@ -472,7 +472,7 @@ extern "C" void* mmap(void *start, size_t length, int prot, int flags,
return result;
}
-#endif
+#endif // !defined(__USE_FILE_OFFSET64) || !defined(__REDIRECT_NTH)
extern "C" int munmap(void* start, size_t length) __THROW {
MallocHook::InvokeMunmapHook(start, length);
@@ -501,4 +501,26 @@ extern "C" void* sbrk(ptrdiff_t increment) __THROW {
return result;
}
-#endif
+/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot,
+ int flags, int fd, off_t offset) {
+ return do_mmap64(start, length, prot, flags, fd, offset);
+}
+
+/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) {
+ return sys_munmap(start, length);
+}
+
+#else // defined(__linux) &&
+ // (defined(__i386__) || defined(__x86_64__) || defined(__PPC__))
+
+/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot,
+ int flags, int fd, off_t offset) {
+ return mmap(start, length, prot, flags, fd, offset);
+}
+
+/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) {
+ return munmap(start, length);
+}
+
+#endif // defined(__linux) &&
+ // (defined(__i386__) || defined(__x86_64__) || defined(__PPC__))
diff --git a/src/pprof b/src/pprof
index b7c7d07..62e19c7 100755
--- a/src/pprof
+++ b/src/pprof
@@ -1977,7 +1977,12 @@ sub RemoveUninterestingFrames {
'__builtin_vec_delete',
'__builtin_vec_new',
'operator new',
- 'operator new[]') {
+ 'operator new[]',
+ # These mark the beginning/end of our custom sections
+ '__start_google_malloc',
+ '__stop_google_malloc',
+ '__start_malloc_hook',
+ '__stop_malloc_hook') {
$skip{$name} = 1;
$skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything
}
diff --git a/src/stacktrace_x86-inl.h b/src/stacktrace_x86-inl.h
index 5650c86..05701e7 100644
--- a/src/stacktrace_x86-inl.h
+++ b/src/stacktrace_x86-inl.h
@@ -49,6 +49,11 @@
#elif defined(HAVE_UCONTEXT_H)
#include <ucontext.h> // for ucontext_t
#elif defined(HAVE_CYGWIN_SIGNAL_H)
+// cygwin/signal.h has a buglet where it uses pthread_attr_t without
+// #including <pthread.h> itself. So we have to do it.
+# ifdef HAVE_PTHREAD
+# include <pthread.h>
+# endif
#include <cygwin/signal.h>
typedef ucontext ucontext_t;
#endif
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index 70dd8c6..625301e 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -762,9 +762,9 @@ TCMallocGuard::TCMallocGuard() {
// patch the windows VirtualAlloc, etc.
PatchWindowsFunctions(); // defined in windows/patch_functions.cc
#endif
- free(malloc(1));
+ tc_free(tc_malloc(1));
ThreadCache::InitTSD();
- free(malloc(1));
+ tc_free(tc_malloc(1));
MallocExtension::Register(new TCMallocImplementation);
}
}
diff --git a/src/tests/sampling_test.sh b/src/tests/sampling_test.sh
index 149d27b..8c96bc1 100755
--- a/src/tests/sampling_test.sh
+++ b/src/tests/sampling_test.sh
@@ -62,10 +62,10 @@ if ! cat "$SAMPLING_TEST_BINARY" >/dev/null 2>&1; then
SAMPLING_TEST_BINARY="$SAMPLING_TEST_BINARY".exe
fi
-die() {
- echo "FAILED"
- echo "reason:"
- echo "$@"
+die() { # runs the command given as arguments, and then dies.
+ echo "FAILED. Output from $@"
+ echo "----"
+ "$@"
echo "----"
exit 1
}
@@ -79,16 +79,16 @@ mkdir "$OUTDIR" || die "Unable to create $OUTDIR"
# 50M to 99M.
"$SAMPLING_TEST" "$OUTDIR/out"
-echo -n "Testing heap output..."
+echo "Testing heap output..."
"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap" \
| grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \
- || die `"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap"`
+ || die "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap"
echo "OK"
-echo -n "Testing growth output..."
+echo "Testing growth output..."
"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth" \
| grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \
- || die `"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth"`
+ || die "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth"
echo "OK"
echo "PASS"
diff --git a/src/thread_cache.cc b/src/thread_cache.cc
index fd44a70..64f4deb 100644
--- a/src/thread_cache.cc
+++ b/src/thread_cache.cc
@@ -299,12 +299,6 @@ int ThreadCache::GetSamplePeriod() {
}
void ThreadCache::InitModule() {
- // There is a slight potential race here because of double-checked
- // locking idiom. However, as long as the program does a small
- // allocation before switching to multi-threaded mode, we will be
- // fine. We increase the chances of doing such a small allocation
- // by doing one in the constructor of the module_enter_exit_hook
- // object declared below.
SpinLockHolder h(Static::pageheap_lock());
if (!phinited) {
Static::InitStaticVars();
diff --git a/src/windows/patch_functions.cc b/src/windows/patch_functions.cc
index 992161f..d13b7b8 100644
--- a/src/windows/patch_functions.cc
+++ b/src/windows/patch_functions.cc
@@ -72,6 +72,11 @@
#error This file is intended for patching allocators - use override_functions.cc instead.
#endif
+// We use psapi. Non-MSVC systems will have to link this in themselves.
+#ifdef _MSC_VER
+#pragma comment(lib, "Psapi.lib")
+#endif
+
// Make sure we always use the 'old' names of the psapi functions.
#ifndef PSAPI_VERSION
#define PSAPI_VERSION 1
@@ -80,6 +85,8 @@
#include <windows.h>
#include <malloc.h> // for _msize and _expand
#include <Psapi.h> // for EnumProcessModules, GetModuleInformation, etc.
+#include <set>
+#include <map>
#include <vector>
#include <base/logging.h>
#include "base/spinlock.h"
@@ -122,29 +129,7 @@ typedef void (*GenericFnPtr)();
using sidestep::PreamblePatcher;
-// This is a subset of MODDULEENTRY32, that we need for patching
-struct ModuleEntryCopy {
- LPVOID modBaseAddr;
- DWORD modBaseSize;
- HMODULE hModule;
- TCHAR szModule[kMaxModuleNameSize];
-
- ModuleEntryCopy() {
- modBaseAddr = NULL;
- modBaseSize = 0;
- hModule = NULL;
- strcpy(szModule, "<executable>");
- }
- ModuleEntryCopy(HANDLE hprocess, HMODULE hmodule, const MODULEINFO& mi) {
- this->modBaseAddr = mi.lpBaseOfDll;
- this->modBaseSize = mi.SizeOfImage;
- this->hModule = hmodule;
- // TODO(csilvers): we could make more efficient by calling this
- // lazily (not until this->szModule is needed, which is often never).
- GetModuleBaseNameA(hprocess, hmodule,
- this->szModule, sizeof(this->szModule));
- }
-};
+struct ModuleEntryCopy; // defined below
// These functions are how we override the memory allocation
// functions, just like tcmalloc.cc and malloc_hook.cc do.
@@ -160,33 +145,19 @@ class LibcInfo {
LibcInfo() {
memset(this, 0, sizeof(*this)); // easiest way to initialize the array
}
- bool SameAs(const LibcInfo& that) const {
- return (is_valid() &&
- module_base_address_ == that.module_base_address_ &&
- module_base_size_ == that.module_base_size_);
- }
- bool SameAsME32(const ModuleEntryCopy& me32) const {
- return (is_valid() &&
- module_base_address_ == me32.modBaseAddr &&
- module_base_size_ == me32.modBaseSize);
- }
+ bool SameAs(const LibcInfo& that) const;
+ bool SameAsModuleEntry(const ModuleEntryCopy& module_entry) const;
+
bool patched() const { return is_valid() && module_name_[0] != '\0'; }
const char* module_name() const { return is_valid() ? module_name_ : ""; }
void set_is_valid(bool b) { is_valid_ = b; }
- // These shouldn't have to be public, since only subclasses of
- // LibcInfo need it, but they do. Maybe something to do with
- // templates. Shrug.
- bool is_valid() const { return is_valid_; }
- GenericFnPtr windows_fn(int ifunction) const {
- return windows_fn_[ifunction];
- }
-
// Populates all the windows_fn_[] vars based on our module info.
// Returns false if windows_fn_ is all NULL's, because there's
- // nothing to patch. Also populates the me32 info.
- bool PopulateWindowsFn(const ModuleEntryCopy& me32);
+ // nothing to patch. Also populates the rest of the module_entry
+ // info, such as the module's name.
+ bool PopulateWindowsFn(const ModuleEntryCopy& module_entry);
protected:
void CopyFrom(const LibcInfo& that) {
@@ -237,6 +208,24 @@ class LibcInfo {
const void *module_base_address_;
size_t module_base_size_;
char module_name_[kMaxModuleNameSize];
+
+ public:
+ // These shouldn't have to be public, since only subclasses of
+ // LibcInfo need it, but they do. Maybe something to do with
+ // templates. Shrug. I hide them down here so users won't see
+ // them. :-) (OK, I also need to define ctrgProcAddress late.)
+ bool is_valid() const { return is_valid_; }
+ GenericFnPtr windows_fn(int ifunction) const {
+ return windows_fn_[ifunction];
+ }
+ // These three are needed by ModuleEntryCopy.
+ static const int ctrgProcAddress = kNumFunctions;
+ static GenericFnPtr static_fn(int ifunction) {
+ return static_fn_[ifunction];
+ }
+ static const char* const function_name(int ifunction) {
+ return function_name_[ifunction];
+ }
};
// Template trickiness: logically, a LibcInfo would include
@@ -294,6 +283,43 @@ template<int> class LibcInfoWithPatchFunctions : public LibcInfo {
// But they seem pretty obscure, and I'm fine not overriding them for now.
};
+// This is a subset of MODDULEENTRY32, that we need for patching.
+struct ModuleEntryCopy {
+ LPVOID modBaseAddr;
+ DWORD modBaseSize;
+ HMODULE hModule;
+ TCHAR szModule[kMaxModuleNameSize];
+ // This is not part of MODDULEENTRY32, but is needed to avoid making
+ // windows syscalls while we're holding patch_all_modules_lock (see
+ // lock-inversion comments at patch_all_modules_lock definition, below).
+ GenericFnPtr rgProcAddresses[LibcInfo::ctrgProcAddress];
+
+ ModuleEntryCopy() {
+ modBaseAddr = NULL;
+ modBaseSize = 0;
+ hModule = NULL;
+ strcpy(szModule, "<executable>");
+ for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++)
+ rgProcAddresses[i] = LibcInfo::static_fn(i);
+ }
+ ModuleEntryCopy(HANDLE hprocess, HMODULE hmodule, const MODULEINFO& mi) {
+ this->modBaseAddr = mi.lpBaseOfDll;
+ this->modBaseSize = mi.SizeOfImage;
+ this->hModule = hmodule;
+ // TODO(csilvers): we could make more efficient by calling these
+ // lazily (not until the vars are needed, which is often never).
+ // However, there's tricky business with calling windows functions
+ // inside the patch_all_modules_lock (see the lock inversion
+ // comments with the patch_all_modules_lock definition, below), so
+ // it's safest to do it all here, where no lock is needed.
+ ::GetModuleBaseNameA(hprocess, hmodule,
+ this->szModule, sizeof(this->szModule));
+ for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++)
+ rgProcAddresses[i] =
+ (GenericFnPtr)::GetProcAddress(hModule, LibcInfo::function_name(i));
+ }
+};
+
// This class is easier because there's only one of them.
class WindowsInfo {
public:
@@ -347,6 +373,11 @@ class WindowsInfo {
// If you run out, just add a few more to the array. You'll also need
// to update the switch statement in PatchOneModule(), and the list in
// UnpatchWindowsFunctions().
+// main_executable and main_executable_windows are two windows into
+// the same executable. One is responsible for patching the libc
+// routines that live in the main executable (if any) to use tcmalloc;
+// the other is responsible for patching the windows routines like
+// HeapAlloc/etc to use tcmalloc.
static LibcInfoWithPatchFunctions<0> main_executable;
static LibcInfoWithPatchFunctions<1> libc1;
static LibcInfoWithPatchFunctions<2> libc2;
@@ -448,20 +479,28 @@ const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = {
{ "FreeLibrary", NULL, NULL, (GenericFnPtr)&Perftools_FreeLibrary },
};
-bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& me32) {
+bool LibcInfo::SameAs(const LibcInfo& that) const {
+ return (is_valid() &&
+ module_base_address_ == that.module_base_address_ &&
+ module_base_size_ == that.module_base_size_);
+}
+
+bool LibcInfo::SameAsModuleEntry(const ModuleEntryCopy& module_entry) const {
+ return (is_valid() &&
+ module_base_address_ == module_entry.modBaseAddr &&
+ module_base_size_ == module_entry.modBaseSize);
+}
+
+bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& module_entry) {
// First, store the location of the function to patch before
// patching it. If none of these functions are found in the module,
// then this module has no libc in it, and we just return false.
for (int i = 0; i < kNumFunctions; i++) {
if (!function_name_[i]) // we can turn off patching by unsetting name
continue;
- GenericFnPtr fn = NULL;
- if (me32.hModule == NULL) { // used for the main executable
- // This is used only for a statically-linked-in libc.
- fn = static_fn_[i];
- } else {
- fn = (GenericFnPtr)::GetProcAddress(me32.hModule, function_name_[i]);
- }
+ // The ::GetProcAddress calls were done in the ModuleEntryCopy
+ // constructor, so we don't have to make any windows calls here.
+ const GenericFnPtr fn = module_entry.rgProcAddresses[i];
if (fn) {
windows_fn_[i] = PreamblePatcher::ResolveTarget(fn);
}
@@ -514,15 +553,15 @@ bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& me32) {
CHECK(windows_fn_[kRealloc]);
// OK, we successfully patched. Let's store our member information.
- module_base_address_ = me32.modBaseAddr;
- module_base_size_ = me32.modBaseSize;
- strcpy(module_name_, me32.szModule);
+ module_base_address_ = module_entry.modBaseAddr;
+ module_base_size_ = module_entry.modBaseSize;
+ strcpy(module_name_, module_entry.szModule);
return true;
}
template<int T>
bool LibcInfoWithPatchFunctions<T>::Patch(const LibcInfo& me_info) {
- CopyFrom(me_info); // copies the me32 and the windows_fn_ array
+ CopyFrom(me_info); // copies the module_entry and the windows_fn_ array
for (int i = 0; i < kNumFunctions; i++) {
if (windows_fn_[i] && windows_fn_[i] != perftools_fn_[i]) {
// if origstub_fn_ is not NULL, it's left around from a previous
@@ -530,7 +569,10 @@ bool LibcInfoWithPatchFunctions<T>::Patch(const LibcInfo& me_info) {
// Since we've patched Unpatch() not to delete origstub_fn_ (it
// causes problems in some contexts, though obviously not this
// one), we should delete it now, before setting it to NULL.
- delete[] reinterpret_cast<char*>(origstub_fn_[i]);
+ // NOTE: casting from a function to a pointer is contra the C++
+ // spec. It's not safe on IA64, but is on i386. We use
+ // a C-style cast here to emphasize this is not legal C++.
+ delete[] (char*)(origstub_fn_[i]);
origstub_fn_[i] = NULL; // Patch() will fill this in
CHECK_EQ(sidestep::SIDESTEP_SUCCESS,
PreamblePatcher::Patch(windows_fn_[i], perftools_fn_[i],
@@ -569,7 +611,10 @@ void WindowsInfo::Patch() {
// Since we've patched Unpatch() not to delete origstub_fn_ (it
// causes problems in some contexts, though obviously not this
// one), we should delete it now, before setting it to NULL.
- delete[] reinterpret_cast<char*>(function_info_[i].origstub_fn);
+ // NOTE: casting from a function to a pointer is contra the C++
+ // spec. It's not safe on IA64, but is on i386. We use
+ // a C-style cast here to emphasize this is not legal C++.
+ delete[] (char*)(function_info_[i].origstub_fn);
function_info_[i].origstub_fn = NULL; // Patch() will fill this in
CHECK_EQ(sidestep::SIDESTEP_SUCCESS,
PreamblePatcher::Patch(function_info_[i].windows_fn,
@@ -594,7 +639,7 @@ void PatchOneModuleLocked(const LibcInfo& me_info) {
// Double-check we haven't seen this module before.
for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) {
if (g_module_libcs[i]->SameAs(me_info)) {
- fprintf(stderr, "%s:%d: FATAL ERROR: %s double-patched somehow.\n",
+ fprintf(stderr, "%s:%d: FATAL PERFTOOLS ERROR: %s double-patched somehow.\n",
__FILE__, __LINE__, g_module_libcs[i]->module_name());
CHECK(false);
}
@@ -617,34 +662,53 @@ void PatchOneModuleLocked(const LibcInfo& me_info) {
}
}
}
- printf("ERROR: Too many modules containing libc in this executable\n");
+ printf("PERFTOOLS ERROR: Too many modules containing libc in this executable\n");
}
void PatchMainExecutableLocked() {
if (main_executable.patched())
return; // main executable has already been patched
- ModuleEntryCopy fake_me32; // we make a fake one to pass into Patch()
- main_executable.PopulateWindowsFn(fake_me32);
+ ModuleEntryCopy fake_module_entry; // make a fake one to pass into Patch()
+ // No need to call PopulateModuleEntryProcAddresses on the main executable.
+ main_executable.PopulateWindowsFn(fake_module_entry);
main_executable.Patch(main_executable);
}
+// This lock is subject to a subtle and annoying lock inversion
+// problem: it may interact badly with unknown internal windows locks.
+// In particular, windows may be holding a lock when it calls
+// LoadLibraryExW and FreeLibrary, which we've patched. We have those
+// routines call PatchAllModules, which acquires this lock. If we
+// make windows system calls while holding this lock, those system
+// calls may need the internal windows locks that are being held in
+// the call to LoadLibraryExW, resulting in deadlock. The solution is
+// to be very careful not to call *any* windows routines while holding
+// patch_all_modules_lock, inside PatchAllModules().
static SpinLock patch_all_modules_lock(SpinLock::LINKER_INITIALIZED);
// Iterates over all the modules currently loaded by the executable,
// and makes sure they're all patched. For ones that aren't, we patch
// them in. We also check that every module we had patched in the
// past is still loaded, and update internal data structures if so.
-void PatchAllModules() {
+// We return true if this PatchAllModules did any work, false else.
+bool PatchAllModules() {
std::vector<ModuleEntryCopy> modules;
+ bool made_changes = false;
const HANDLE hCurrentProcess = GetCurrentProcess();
MODULEINFO mi;
DWORD cbNeeded = 0;
HMODULE hModules[kMaxModules]; // max # of modules we support in one process
- if (EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules),
- &cbNeeded)) {
+ if (::EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules),
+ &cbNeeded)) {
for (int i = 0; i < cbNeeded / sizeof(*hModules); ++i) {
- if (GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi)))
+ if (i >= kMaxModules) {
+ printf("PERFTOOLS ERROR: Too many modules in this executable to try"
+ " to patch them all (if you need to, raise kMaxModules in"
+ " patch_functions.cc).\n");
+ break;
+ }
+ if (::GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi)))
modules.push_back(ModuleEntryCopy(hCurrentProcess, hModules[i], mi));
}
}
@@ -658,7 +722,7 @@ void PatchAllModules() {
bool still_loaded = false;
for (std::vector<ModuleEntryCopy>::iterator it = modules.begin();
it != modules.end(); ++it) {
- if (g_module_libcs[i]->SameAsME32(*it)) {
+ if (g_module_libcs[i]->SameAsModuleEntry(*it)) {
// Both g_module_libcs[i] and it are still valid. Mark it by
// removing it from the vector; mark g_module_libcs[i] by
// setting a bool.
@@ -672,23 +736,30 @@ void PatchAllModules() {
// We could call Unpatch() here, but why bother? The module
// has gone away, so nobody is going to call into it anyway.
g_module_libcs[i]->set_is_valid(false);
+ made_changes = true;
}
}
// We've handled all the g_module_libcs. Now let's handle the rest
- // of the me32's: those that haven't already been loaded.
+ // of the module-entries: those that haven't already been loaded.
for (std::vector<ModuleEntryCopy>::const_iterator it = modules.begin();
it != modules.end(); ++it) {
LibcInfo libc_info;
- if (libc_info.PopulateWindowsFn(*it)) // true==module has libc routines
- PatchOneModuleLocked(libc_info); // updates num_patched_modules
+ if (libc_info.PopulateWindowsFn(*it)) { // true==module has libc routines
+ PatchOneModuleLocked(libc_info); // updates num_patched_modules
+ made_changes = true;
+ }
}
// Now that we've dealt with the modules (dlls), update the main
// executable. We do this last because PatchMainExecutableLocked
// wants to look at how other modules were patched.
- PatchMainExecutableLocked();
+ if (!main_executable.patched()) {
+ PatchMainExecutableLocked();
+ made_changes = true;
+ }
}
+ return made_changes;
}
@@ -697,6 +768,8 @@ void PatchAllModules() {
// ---------------------------------------------------------------------
// PatchWindowsFunctions()
// This is the function that is exposed to the outside world.
+// It should be called before the program becomes multi-threaded,
+// since main_executable_windows.Patch() is not thread-safe.
// ---------------------------------------------------------------------
void PatchWindowsFunctions() {
@@ -956,19 +1029,68 @@ BOOL WINAPI WindowsInfo::Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress) {
lpBaseAddress);
}
+// * nopatching_set holds modules (specified by full file-path) that
+// don't contain any libc routines, nor depend on any DLLs that do:
+// that is, files where PatchAllModules ended up being a no-op. We
+// know we don't need to patch those files if we see them again in the
+// future (which can happen in windows apps, which can load+unload a
+// given module many times during a single program run).
+// * patching_set holds modules that *do* require us to do patching
+// work when we load them. (This map is keyed by handle, so we need
+// to be careful to clear each entry in FreeLibrary; the map data is
+// the necessary refcount.) This set is used as an optimization in
+// FreeLibrary to tell if we had done any patching in LoadLibraryExW.
+// If we didn't, FreeLibrary doesn't need to clear up the
+// patching-work, but if we did, it does.
+static std::map<HMODULE, int>* patching_map = NULL;
+static std::set<std::string>* nopatching_set = NULL;
+
HMODULE WINAPI WindowsInfo::Perftools_LoadLibraryExW(LPCWSTR lpFileName,
HANDLE hFile,
DWORD dwFlags) {
HMODULE rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD))
function_info_[kLoadLibraryExW].origstub_fn)(
lpFileName, hFile, dwFlags);
- PatchAllModules(); // this will patch any newly loaded libraries
+ // This will patch any newly loaded libraries, if patching needs to
+ // be done. If so, PatchAllModules() will return true. If no
+ // patching needs to be done (this library didn't contain a libc),
+ // we'll mark it in a set, so we can skip patching next time.
+ char szFullPath[PATH_MAX];
+ if (::GetModuleFileNameA(rv, szFullPath, sizeof(szFullPath))) {
+ SpinLockHolder h(&patch_all_modules_lock);
+ if (nopatching_set && nopatching_set->count(szFullPath) > 0)
+ return rv;
+ }
+ const bool made_changes = PatchAllModules();
+ {
+ SpinLockHolder h(&patch_all_modules_lock);
+ if (made_changes) {
+ if (!patching_map) patching_map = new std::map<HMODULE, int>;
+ ++(*patching_map)[rv];
+ } else {
+ if (!nopatching_set) nopatching_set = new std::set<std::string>;
+ nopatching_set->insert(szFullPath);
+ }
+ }
return rv;
}
BOOL WINAPI WindowsInfo::Perftools_FreeLibrary(HMODULE hLibModule) {
BOOL rv = ((BOOL (WINAPI *)(HMODULE))
function_info_[kFreeLibrary].origstub_fn)(hLibModule);
+ {
+ // This is an optimization: if we know this module never did any work
+ // in LoadLibraryEx (because it didn't get put into patching_map), we
+ // know we don't need to revisit the patching here.
+ SpinLockHolder h(&patch_all_modules_lock);
+ if (patching_map) {
+ std::map<HMODULE, int>::iterator it = patching_map->find(hLibModule);
+ if (it == patching_map->end())
+ return rv;
+ if (--it->second <= 0) // decrement the refcount
+ patching_map->erase(it); // hLibModule is now a dangling handle
+ }
+ }
PatchAllModules(); // this will fix up the list of patched libraries
return rv;
}
diff --git a/vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj b/vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj
index 77e8391..3755fb0 100755
--- a/vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj
+++ b/vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj
@@ -30,7 +30,6 @@
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
- AdditionalDependencies="Psapi.lib"
OutputFile="$(OutDir)/libtcmalloc_minimal-debug.dll"
LinkIncremental="2"
GenerateDebugInformation="TRUE"
@@ -76,7 +75,6 @@
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
- AdditionalDependencies="Psapi.lib"
OutputFile="$(OutDir)/libtcmalloc_minimal.dll"
LinkIncremental="1"
GenerateDebugInformation="TRUE"
diff --git a/vsprojects/tmu-static/tmu-static.vcproj b/vsprojects/tmu-static/tmu-static.vcproj
index a76ffa7..a5d6402 100755
--- a/vsprojects/tmu-static/tmu-static.vcproj
+++ b/vsprojects/tmu-static/tmu-static.vcproj
@@ -30,7 +30,6 @@
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
- AdditionalDependencies="Psapi.lib"
OutputFile="$(OutDir)/tcmalloc_minimal_unittest-static.exe"
LinkIncremental="2"
GenerateDebugInformation="TRUE"
@@ -75,7 +74,6 @@
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
- AdditionalDependencies="Psapi.lib"
OutputFile="$(OutDir)/tcmalloc_minimal_unittest-static.exe"
LinkIncremental="1"
GenerateDebugInformation="TRUE"