diff options
author | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2010-01-14 16:26:05 +0000 |
---|---|---|
committer | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2010-01-14 16:26:05 +0000 |
commit | 63b8d63beb7d771713774f9a5d57381cbd29bf19 (patch) | |
tree | d09417a7fd0710c1e728216656b9e0f002452adc | |
parent | eeeacd5ec4fa36256091f45e5b3af81cee2a4d86 (diff) | |
download | gperftools-63b8d63beb7d771713774f9a5d57381cbd29bf19.tar.gz |
* PORTING: Revised patch_functions to avoid deadlock (csilvers, andrey)
* PORTING: Revised patch_functions to speed up .dll loads (csilvers)
* PORTING: Build and run sampling_test for windows (csilvers)
* Correctly init tc structs even when libc isn't patched (csilvers)
* Make low-level allocs async-signal-safe (saito)
git-svn-id: http://gperftools.googlecode.com/svn/trunk@83 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
-rw-r--r-- | Makefile.am | 10 | ||||
-rw-r--r-- | Makefile.in | 202 | ||||
-rw-r--r-- | README.windows | 3 | ||||
-rw-r--r-- | src/base/low_level_alloc.cc | 89 | ||||
-rw-r--r-- | src/base/low_level_alloc.h | 22 | ||||
-rw-r--r-- | src/google/malloc_hook.h | 6 | ||||
-rw-r--r-- | src/malloc_hook.cc | 28 | ||||
-rwxr-xr-x | src/pprof | 7 | ||||
-rw-r--r-- | src/stacktrace_x86-inl.h | 5 | ||||
-rw-r--r-- | src/tcmalloc.cc | 4 | ||||
-rwxr-xr-x | src/tests/sampling_test.sh | 16 | ||||
-rw-r--r-- | src/thread_cache.cc | 6 | ||||
-rw-r--r-- | src/windows/patch_functions.cc | 264 | ||||
-rwxr-xr-x | vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj | 2 | ||||
-rwxr-xr-x | vsprojects/tmu-static/tmu-static.vcproj | 2 |
15 files changed, 452 insertions, 214 deletions
diff --git a/Makefile.am b/Makefile.am index c81d8f3..3de910e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -181,6 +181,8 @@ libwindows_la_SOURCES = $(WINDOWS_INCLUDES) \ src/windows/patch_functions.cc \ src/windows/preamble_patcher.cc \ src/windows/preamble_patcher_with_stub.cc +# patch_functions.cc uses Psapi.lib. MSVC has a #pragma for that, but not us. +libwindows_la_LIBADD = -lPsapi SPINLOCK_INCLUDES = src/base/spinlock.h \ src/base/spinlock_win32-inl.h \ @@ -529,6 +531,12 @@ malloc_extension_test_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS) malloc_extension_test_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS) malloc_extension_test_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS) +# This doesn't work with static linkage, because libtcmalloc.a isn't +# happy with C linkage (it misses the stdc++ library). Likewise with +# mingw, which links foo.a even though it doesn't set ENABLE_STATIC. +# TODO(csilvers): set enable_static=true in configure.ac:36? +if !MINGW +if !ENABLE_STATIC TESTS += malloc_extension_c_test malloc_extension_c_test_SOURCES = src/tests/malloc_extension_c_test.c \ src/google/malloc_extension.h \ @@ -540,6 +548,8 @@ malloc_extension_c_test_CFLAGS += -ansi endif GCC malloc_extension_c_test_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS) malloc_extension_c_test_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS) +endif !ENABLE_STATIC +endif !MINGW if !MINGW TESTS += memalign_unittest diff --git a/Makefile.in b/Makefile.in index dadedbd..0db79a2 100644 --- a/Makefile.in +++ b/Makefile.in @@ -58,7 +58,7 @@ host_triplet = @host@ @MINGW_TRUE@am__append_5 = -Wl,-u__tcmalloc noinst_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \ $(am__EXEEXT_4) $(am__EXEEXT_5) $(am__EXEEXT_6) \ - $(am__EXEEXT_7) $(am__EXEEXT_8) $(am__EXEEXT_22) + $(am__EXEEXT_7) $(am__EXEEXT_8) $(am__EXEEXT_23) bin_PROGRAMS = @MINGW_TRUE@am__append_6 = libwindows.la libspinlock.la @@ -98,28 +98,34 @@ bin_PROGRAMS = @ENABLE_STATIC_FALSE@@MINGW_FALSE@am__append_16 = $(maybe_threads_unittest_sh_SOURCES) @MINGW_TRUE@am__append_17 = src/windows/port.h src/windows/port.cc @MINGW_FALSE@am__append_18 = system_alloc_unittest + +# This doesn't work with static linkage, because libtcmalloc.a isn't +# happy with C linkage (it misses the stdc++ library). Likewise with +# mingw, which links foo.a even though it doesn't set ENABLE_STATIC. +# TODO(csilvers): set enable_static=true in configure.ac:36? +@ENABLE_STATIC_FALSE@@MINGW_FALSE@am__append_19 = malloc_extension_c_test # -ansi here is just to help ensure the code is bog-standard C. -@GCC_TRUE@am__append_19 = -ansi -@MINGW_FALSE@am__append_20 = memalign_unittest +@ENABLE_STATIC_FALSE@@GCC_TRUE@@MINGW_FALSE@am__append_20 = -ansi +@MINGW_FALSE@am__append_21 = memalign_unittest ### ------- tcmalloc_minimal_debug (thread-caching malloc with debugallocation) # Like tcmalloc.cc, debugallocation.cc needs exceptions to fulfill its # API. Luckily, we can reuse everything else from tcmalloc_minimal. -@WITH_DEBUGALLOC_TRUE@am__append_21 = libtcmalloc_minimal_debug.la @WITH_DEBUGALLOC_TRUE@am__append_22 = libtcmalloc_minimal_debug.la +@WITH_DEBUGALLOC_TRUE@am__append_23 = libtcmalloc_minimal_debug.la ### Unittests -@WITH_DEBUGALLOC_TRUE@am__append_23 = tcmalloc_minimal_debug_unittest \ +@WITH_DEBUGALLOC_TRUE@am__append_24 = tcmalloc_minimal_debug_unittest \ @WITH_DEBUGALLOC_TRUE@ malloc_extension_debug_test \ @WITH_DEBUGALLOC_TRUE@ memalign_debug_unittest \ @WITH_DEBUGALLOC_TRUE@ realloc_debug_unittest \ @WITH_DEBUGALLOC_TRUE@ debugallocation_test.sh$(EXEEXT) -@WITH_DEBUGALLOC_TRUE@am__append_24 = $(debugallocation_test_sh_SOURCES) +@WITH_DEBUGALLOC_TRUE@am__append_25 = $(debugallocation_test_sh_SOURCES) # This is the sub-program used by debugallocation_test.sh -@WITH_DEBUGALLOC_TRUE@am__append_25 = debugallocation_test -@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_26 = $(SG_TCMALLOC_INCLUDES) +@WITH_DEBUGALLOC_TRUE@am__append_26 = debugallocation_test +@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_27 = $(SG_TCMALLOC_INCLUDES) ### Making the library @@ -127,12 +133,12 @@ bin_PROGRAMS = # for all files in this library -- except tcmalloc.cc which needs them # to fulfill its API. Automake doesn't allow per-file CXXFLAGS, so we need # to separate into two libraries. -@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_27 = libtcmalloc_internal.la -@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_28 = libtcmalloc.la -@WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_29 = $(HEAP_CHECKER_SOURCES) -@WITH_HEAP_CHECKER_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_30 = -DNO_HEAP_CHECK +@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_28 = libtcmalloc_internal.la +@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_29 = libtcmalloc.la +@WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_30 = $(HEAP_CHECKER_SOURCES) @WITH_HEAP_CHECKER_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_31 = -DNO_HEAP_CHECK -@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_32 = libtcmalloc.la +@WITH_HEAP_CHECKER_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_32 = -DNO_HEAP_CHECK +@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_33 = libtcmalloc.la ### Unittests @@ -140,81 +146,81 @@ bin_PROGRAMS = # tcmalloc_minimal. (One would never do this on purpose, but perhaps # by accident...) When we can compile libprofiler, we also link it in # to make sure that works too. -@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_33 = tcmalloc_unittest \ +@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_34 = tcmalloc_unittest \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ tcmalloc_both_unittest \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ tcmalloc_large_unittest \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ raw_printer_test \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampler_test \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampling_test.sh$(EXEEXT) -@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_34 = vsprojects/sampler_test/sampler_test.vcproj +@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_35 = vsprojects/sampler_test/sampler_test.vcproj # These unittests often need to run binaries. They're in the current dir -@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_35 = BINDIR=. \ +@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_36 = BINDIR=. \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ TMPDIR=/tmp/perftools -@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_36 = $(sampling_test_sh_SOURCES) +@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_37 = $(sampling_test_sh_SOURCES) # This is the sub-program used by sampling_test.sh # The -g is so pprof can get symbol information. -@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_37 = sampling_test -@WITH_HEAP_PROFILER_TRUE@am__append_38 = heap-profiler_unittest.sh$(EXEEXT) -@WITH_HEAP_PROFILER_TRUE@am__append_39 = $(heap_profiler_unittest_sh_SOURCES) +@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_38 = sampling_test +@WITH_HEAP_PROFILER_TRUE@am__append_39 = heap-profiler_unittest.sh$(EXEEXT) +@WITH_HEAP_PROFILER_TRUE@am__append_40 = $(heap_profiler_unittest_sh_SOURCES) # These are sub-programs used by heap-profiler_unittest.sh -@WITH_HEAP_PROFILER_TRUE@am__append_40 = heap-profiler_unittest -@WITH_HEAP_CHECKER_TRUE@am__append_41 = \ +@WITH_HEAP_PROFILER_TRUE@am__append_41 = heap-profiler_unittest +@WITH_HEAP_CHECKER_TRUE@am__append_42 = \ @WITH_HEAP_CHECKER_TRUE@ heap-checker_unittest.sh$(EXEEXT) \ @WITH_HEAP_CHECKER_TRUE@ heap-checker-death_unittest.sh$(EXEEXT) -@WITH_HEAP_CHECKER_TRUE@am__append_42 = \ +@WITH_HEAP_CHECKER_TRUE@am__append_43 = \ @WITH_HEAP_CHECKER_TRUE@ $(heap_checker_unittest_sh_SOURCES) \ @WITH_HEAP_CHECKER_TRUE@ $(top_srcdir)/$(heap_checker_death_unittest_sh_SOURCES) # These are sub-programs used by heap-checker_unittest.sh -@WITH_HEAP_CHECKER_TRUE@am__append_43 = heap-checker_unittest +@WITH_HEAP_CHECKER_TRUE@am__append_44 = heap-checker_unittest ### Documentation (above and beyond tcmalloc_minimal documentation) -@WITH_HEAP_PROFILER_TRUE@am__append_44 = doc/heapprofile.html doc/heap-example1.png -@WITH_HEAP_CHECKER_TRUE@am__append_45 = doc/heap_checker.html +@WITH_HEAP_PROFILER_TRUE@am__append_45 = doc/heapprofile.html doc/heap-example1.png +@WITH_HEAP_CHECKER_TRUE@am__append_46 = doc/heap_checker.html ### ------- tcmalloc with debugallocation -@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_46 = libtcmalloc_debug.la @WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_47 = libtcmalloc_debug.la +@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_48 = libtcmalloc_debug.la ### Unittests -@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_48 = tcmalloc_debug_unittest \ +@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_49 = tcmalloc_debug_unittest \ @WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampler_debug_test \ @WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampling_debug_test.sh$(EXEEXT) # This is the sub-program using by sampling_debug_test.sh # The -g is so pprof can get symbol information. -@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_49 = sampling_debug_test -@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_TRUE@am__append_50 = heap-profiler_debug_unittest.sh$(EXEEXT) +@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_50 = sampling_debug_test +@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_TRUE@am__append_51 = heap-profiler_debug_unittest.sh$(EXEEXT) # These are sub-programs used by heap-profiler_debug_unittest.sh -@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_TRUE@am__append_51 = heap-profiler_debug_unittest -@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@am__append_52 = heap-checker_debug_unittest.sh$(EXEEXT) +@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_TRUE@am__append_52 = heap-profiler_debug_unittest +@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@am__append_53 = heap-checker_debug_unittest.sh$(EXEEXT) # These are sub-programs used by heap-checker_debug_unittest.sh -@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@am__append_53 = heap-checker_debug_unittest -@WITH_CPU_PROFILER_TRUE@am__append_54 = $(SG_CPU_PROFILER_INCLUDES) +@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@am__append_54 = heap-checker_debug_unittest +@WITH_CPU_PROFILER_TRUE@am__append_55 = $(SG_CPU_PROFILER_INCLUDES) ### Making the library -@WITH_CPU_PROFILER_TRUE@am__append_55 = libprofiler.la +@WITH_CPU_PROFILER_TRUE@am__append_56 = libprofiler.la ### Unittests -@WITH_CPU_PROFILER_TRUE@am__append_56 = getpc_test \ +@WITH_CPU_PROFILER_TRUE@am__append_57 = getpc_test \ @WITH_CPU_PROFILER_TRUE@ profiledata_unittest \ @WITH_CPU_PROFILER_TRUE@ profile_handler_unittest \ @WITH_CPU_PROFILER_TRUE@ profiler_unittest.sh$(EXEEXT) -@WITH_CPU_PROFILER_TRUE@am__append_57 = $(profiler_unittest_sh_SOURCES) +@WITH_CPU_PROFILER_TRUE@am__append_58 = $(profiler_unittest_sh_SOURCES) # These are sub-programs used by profiler_unittest.sh -@WITH_CPU_PROFILER_TRUE@am__append_58 = profiler1_unittest profiler2_unittest profiler3_unittest \ +@WITH_CPU_PROFILER_TRUE@am__append_59 = profiler1_unittest profiler2_unittest profiler3_unittest \ @WITH_CPU_PROFILER_TRUE@ profiler4_unittest @WITH_CPU_PROFILER_FALSE@profiler2_unittest_DEPENDENCIES = ### Documentation -@WITH_CPU_PROFILER_TRUE@am__append_59 = doc/cpuprofile.html \ +@WITH_CPU_PROFILER_TRUE@am__append_60 = doc/cpuprofile.html \ @WITH_CPU_PROFILER_TRUE@ doc/cpuprofile-fileformat.html \ @WITH_CPU_PROFILER_TRUE@ doc/pprof-test-big.gif \ @WITH_CPU_PROFILER_TRUE@ doc/pprof-test.gif \ @@ -229,9 +235,9 @@ bin_PROGRAMS = # works fine for .so files, it does not for .a files. The easiest way # around this -- and I've tried a bunch of the hard ways -- is to just # to create another set of libraries that has both functionality in it. -@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_60 = libtcmalloc_and_profiler.la -@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_61 = tcmalloc_and_profiler_unittest -@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_62 = libtcmalloc_and_profiler.la +@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_61 = libtcmalloc_and_profiler.la +@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_62 = tcmalloc_and_profiler_unittest +@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_63 = libtcmalloc_and_profiler.la DIST_COMMON = README $(am__configure_deps) $(am__dist_doc_DATA_DIST) \ $(am__googleinclude_HEADERS_DIST) $(dist_man_MANS) \ $(noinst_HEADERS) $(srcdir)/Makefile.am $(srcdir)/Makefile.in \ @@ -643,7 +649,7 @@ am_libtcmalloc_minimal_internal_la_OBJECTS = \ $(am__objects_24) $(am__objects_20) libtcmalloc_minimal_internal_la_OBJECTS = \ $(am_libtcmalloc_minimal_internal_la_OBJECTS) -libwindows_la_LIBADD = +libwindows_la_DEPENDENCIES = am__libwindows_la_SOURCES_DIST = src/windows/port.h \ src/windows/mingw.h src/windows/mini_disassembler.h \ src/windows/mini_disassembler_types.h \ @@ -675,47 +681,48 @@ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) @WITH_STACK_TRACE_TRUE@am__EXEEXT_9 = stacktrace_unittest$(EXEEXT) @ENABLE_STATIC_FALSE@@MINGW_FALSE@am__EXEEXT_10 = maybe_threads_unittest.sh$(EXEEXT) @MINGW_FALSE@am__EXEEXT_11 = system_alloc_unittest$(EXEEXT) -@MINGW_FALSE@am__EXEEXT_12 = memalign_unittest$(EXEEXT) -@WITH_DEBUGALLOC_TRUE@am__EXEEXT_13 = tcmalloc_minimal_debug_unittest$(EXEEXT) \ +@ENABLE_STATIC_FALSE@@MINGW_FALSE@am__EXEEXT_12 = malloc_extension_c_test$(EXEEXT) +@MINGW_FALSE@am__EXEEXT_13 = memalign_unittest$(EXEEXT) +@WITH_DEBUGALLOC_TRUE@am__EXEEXT_14 = tcmalloc_minimal_debug_unittest$(EXEEXT) \ @WITH_DEBUGALLOC_TRUE@ malloc_extension_debug_test$(EXEEXT) \ @WITH_DEBUGALLOC_TRUE@ memalign_debug_unittest$(EXEEXT) \ @WITH_DEBUGALLOC_TRUE@ realloc_debug_unittest$(EXEEXT) \ @WITH_DEBUGALLOC_TRUE@ debugallocation_test.sh$(EXEEXT) -@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__EXEEXT_14 = tcmalloc_unittest$(EXEEXT) \ +@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__EXEEXT_15 = tcmalloc_unittest$(EXEEXT) \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ tcmalloc_both_unittest$(EXEEXT) \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ tcmalloc_large_unittest$(EXEEXT) \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ raw_printer_test$(EXEEXT) \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampler_test$(EXEEXT) \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampling_test.sh$(EXEEXT) -@WITH_HEAP_PROFILER_TRUE@am__EXEEXT_15 = \ +@WITH_HEAP_PROFILER_TRUE@am__EXEEXT_16 = \ @WITH_HEAP_PROFILER_TRUE@ heap-profiler_unittest.sh$(EXEEXT) -@WITH_HEAP_CHECKER_TRUE@am__EXEEXT_16 = \ +@WITH_HEAP_CHECKER_TRUE@am__EXEEXT_17 = \ @WITH_HEAP_CHECKER_TRUE@ heap-checker_unittest.sh$(EXEEXT) \ @WITH_HEAP_CHECKER_TRUE@ heap-checker-death_unittest.sh$(EXEEXT) -@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__EXEEXT_17 = tcmalloc_debug_unittest$(EXEEXT) \ +@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__EXEEXT_18 = tcmalloc_debug_unittest$(EXEEXT) \ @WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampler_debug_test$(EXEEXT) \ @WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampling_debug_test.sh$(EXEEXT) -@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_TRUE@am__EXEEXT_18 = heap-profiler_debug_unittest.sh$(EXEEXT) -@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@am__EXEEXT_19 = heap-checker_debug_unittest.sh$(EXEEXT) -@WITH_CPU_PROFILER_TRUE@am__EXEEXT_20 = getpc_test$(EXEEXT) \ +@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_TRUE@am__EXEEXT_19 = heap-profiler_debug_unittest.sh$(EXEEXT) +@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@am__EXEEXT_20 = heap-checker_debug_unittest.sh$(EXEEXT) +@WITH_CPU_PROFILER_TRUE@am__EXEEXT_21 = getpc_test$(EXEEXT) \ @WITH_CPU_PROFILER_TRUE@ profiledata_unittest$(EXEEXT) \ @WITH_CPU_PROFILER_TRUE@ profile_handler_unittest$(EXEEXT) \ @WITH_CPU_PROFILER_TRUE@ profiler_unittest.sh$(EXEEXT) -@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__EXEEXT_21 = tcmalloc_and_profiler_unittest$(EXEEXT) -am__EXEEXT_22 = low_level_alloc_unittest$(EXEEXT) \ +@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__EXEEXT_22 = tcmalloc_and_profiler_unittest$(EXEEXT) +am__EXEEXT_23 = low_level_alloc_unittest$(EXEEXT) \ atomicops_unittest$(EXEEXT) $(am__EXEEXT_9) \ tcmalloc_minimal_unittest$(EXEEXT) \ tcmalloc_minimal_large_unittest$(EXEEXT) $(am__EXEEXT_10) \ addressmap_unittest$(EXEEXT) $(am__EXEEXT_11) \ packed_cache_test$(EXEEXT) frag_unittest$(EXEEXT) \ markidle_unittest$(EXEEXT) malloc_extension_test$(EXEEXT) \ - malloc_extension_c_test$(EXEEXT) $(am__EXEEXT_12) \ - page_heap_test$(EXEEXT) pagemap_unittest$(EXEEXT) \ - realloc_unittest$(EXEEXT) stack_trace_table_test$(EXEEXT) \ - thread_dealloc_unittest$(EXEEXT) $(am__EXEEXT_13) \ - $(am__EXEEXT_14) $(am__EXEEXT_15) $(am__EXEEXT_16) \ - $(am__EXEEXT_17) $(am__EXEEXT_18) $(am__EXEEXT_19) \ - $(am__EXEEXT_20) $(am__EXEEXT_21) + $(am__EXEEXT_12) $(am__EXEEXT_13) page_heap_test$(EXEEXT) \ + pagemap_unittest$(EXEEXT) realloc_unittest$(EXEEXT) \ + stack_trace_table_test$(EXEEXT) \ + thread_dealloc_unittest$(EXEEXT) $(am__EXEEXT_14) \ + $(am__EXEEXT_15) $(am__EXEEXT_16) $(am__EXEEXT_17) \ + $(am__EXEEXT_18) $(am__EXEEXT_19) $(am__EXEEXT_20) \ + $(am__EXEEXT_21) $(am__EXEEXT_22) PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS) am__addressmap_unittest_SOURCES_DIST = \ src/tests/addressmap_unittest.cc src/addressmap-inl.h \ @@ -856,12 +863,15 @@ am_low_level_alloc_unittest_OBJECTS = \ low_level_alloc_unittest_OBJECTS = \ $(am_low_level_alloc_unittest_OBJECTS) low_level_alloc_unittest_DEPENDENCIES = $(am__DEPENDENCIES_2) -am_malloc_extension_c_test_OBJECTS = \ - malloc_extension_c_test-malloc_extension_c_test.$(OBJEXT) +am__malloc_extension_c_test_SOURCES_DIST = \ + src/tests/malloc_extension_c_test.c \ + src/google/malloc_extension.h src/google/malloc_extension_c.h +@ENABLE_STATIC_FALSE@@MINGW_FALSE@am_malloc_extension_c_test_OBJECTS = malloc_extension_c_test-malloc_extension_c_test.$(OBJEXT) malloc_extension_c_test_OBJECTS = \ $(am_malloc_extension_c_test_OBJECTS) -malloc_extension_c_test_DEPENDENCIES = $(am__DEPENDENCIES_5) \ - $(am__DEPENDENCIES_1) +@ENABLE_STATIC_FALSE@@MINGW_FALSE@malloc_extension_c_test_DEPENDENCIES = \ +@ENABLE_STATIC_FALSE@@MINGW_FALSE@ $(am__DEPENDENCIES_5) \ +@ENABLE_STATIC_FALSE@@MINGW_FALSE@ $(am__DEPENDENCIES_1) am__malloc_extension_debug_test_SOURCES_DIST = \ src/tests/malloc_extension_test.cc src/config_for_unittests.h \ src/base/logging.h src/google/malloc_extension.h \ @@ -1277,7 +1287,7 @@ DIST_SOURCES = $(liblogging_la_SOURCES) \ $(am__heap_profiler_unittest_SOURCES_DIST) \ $(am__heap_profiler_unittest_sh_SOURCES_DIST) \ $(am__low_level_alloc_unittest_SOURCES_DIST) \ - $(malloc_extension_c_test_SOURCES) \ + $(am__malloc_extension_c_test_SOURCES_DIST) \ $(am__malloc_extension_debug_test_SOURCES_DIST) \ $(malloc_extension_test_SOURCES) $(markidle_unittest_SOURCES) \ $(am__maybe_threads_unittest_sh_SOURCES_DIST) \ @@ -1561,15 +1571,15 @@ TCMALLOC_FLAGS = $(am__append_5) @HAVE_OBJCOPY_WEAKEN_TRUE@ -W __Znwm -W __ZnwmRKSt9nothrow_t -W __Znam -W __ZnamRKSt9nothrow_t \ @HAVE_OBJCOPY_WEAKEN_TRUE@ -W __ZdlPv -W __ZdaPv -LIBS_TO_WEAKEN = libtcmalloc_minimal.la $(am__append_22) \ - $(am__append_32) $(am__append_47) $(am__append_62) +LIBS_TO_WEAKEN = libtcmalloc_minimal.la $(am__append_23) \ + $(am__append_33) $(am__append_48) $(am__append_63) googleincludedir = $(includedir)/google # The .h files you want to install (that is, .h files that people # who install this package can include in their own applications.) # We'll add to this later, on a library-by-library basis googleinclude_HEADERS = $(am__append_9) \ - $(SG_TCMALLOC_MINIMAL_INCLUDES) $(am__append_26) \ - $(am__append_54) + $(SG_TCMALLOC_MINIMAL_INCLUDES) $(am__append_27) \ + $(am__append_55) # tcmalloc.h is a special case, because it's a .h.in file nodist_googleinclude_HEADERS = src/google/tcmalloc.h noinst_HEADERS = src/google/tcmalloc.h.in @@ -1611,13 +1621,13 @@ dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README \ doc/tcmalloc-opspersec.vs.size.5.threads.png \ doc/tcmalloc-opspersec.vs.size.8.threads.png doc/overview.dot \ doc/pageheap.dot doc/spanmap.dot doc/threadheap.dot \ - $(am__append_44) $(am__append_45) $(am__append_59) + $(am__append_45) $(am__append_46) $(am__append_60) # The libraries (.so's) you want to install # We'll add to this later, on a library-by-library basis -lib_LTLIBRARIES = libtcmalloc_minimal.la $(am__append_21) \ - $(am__append_28) $(am__append_46) $(am__append_55) \ - $(am__append_60) +lib_LTLIBRARIES = libtcmalloc_minimal.la $(am__append_22) \ + $(am__append_29) $(am__append_47) $(am__append_56) \ + $(am__append_61) # This is for 'convenience libraries' -- basically just a container for sources ### Making the library @@ -1628,7 +1638,7 @@ lib_LTLIBRARIES = libtcmalloc_minimal.la $(am__append_21) \ # to separate into two libraries. noinst_LTLIBRARIES = liblogging.la libsysinfo.la $(am__append_6) \ $(am__append_8) $(am__append_10) \ - libtcmalloc_minimal_internal.la $(am__append_27) + libtcmalloc_minimal_internal.la $(am__append_28) WINDOWS_PROJECTS = google-perftools.sln \ vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj \ $(am__append_14) \ @@ -1646,7 +1656,7 @@ WINDOWS_PROJECTS = google-perftools.sln \ vsprojects/realloc_unittest/realloc_unittest.vcproj \ vsprojects/stack_trace_table_test/stack_trace_table_test.vcproj \ vsprojects/thread_dealloc_unittest/thread_dealloc_unittest.vcproj \ - $(am__append_34) + $(am__append_35) # unittests you want to run when people type 'make check'. # Note: tests cannot take any arguments! @@ -1669,19 +1679,19 @@ TESTS = low_level_alloc_unittest atomicops_unittest $(am__append_11) \ tcmalloc_minimal_unittest tcmalloc_minimal_large_unittest \ $(am__append_15) addressmap_unittest $(am__append_18) \ packed_cache_test frag_unittest markidle_unittest \ - malloc_extension_test malloc_extension_c_test $(am__append_20) \ + malloc_extension_test $(am__append_19) $(am__append_21) \ page_heap_test pagemap_unittest realloc_unittest \ stack_trace_table_test thread_dealloc_unittest \ - $(am__append_23) $(am__append_33) $(am__append_38) \ - $(am__append_41) $(am__append_48) $(am__append_50) \ - $(am__append_52) $(am__append_56) $(am__append_61) + $(am__append_24) $(am__append_34) $(am__append_39) \ + $(am__append_42) $(am__append_49) $(am__append_51) \ + $(am__append_53) $(am__append_57) $(am__append_62) # TESTS_ENVIRONMENT sets environment variables for when you run unittest. # We always get "srcdir" set for free. # We'll add to this later, on a library-by-library basis. -TESTS_ENVIRONMENT = $(am__append_13) $(am__append_35) +TESTS_ENVIRONMENT = $(am__append_13) $(am__append_36) # All script tests should be added here -noinst_SCRIPTS = $(am__append_16) $(am__append_24) $(am__append_36) \ - $(am__append_39) $(am__append_42) $(am__append_57) +noinst_SCRIPTS = $(am__append_16) $(am__append_25) $(am__append_37) \ + $(am__append_40) $(am__append_43) $(am__append_58) # This is my own var, used for extra libraries I make that I need installed EXTRA_INSTALL = @@ -1731,6 +1741,8 @@ libsysinfo_la_LIBADD = $(NANOSLEEP_LIBS) $(am__append_7) @MINGW_TRUE@ src/windows/preamble_patcher.cc \ @MINGW_TRUE@ src/windows/preamble_patcher_with_stub.cc +# patch_functions.cc uses Psapi.lib. MSVC has a #pragma for that, but not us. +@MINGW_TRUE@libwindows_la_LIBADD = -lPsapi # spinlock is the only code that uses atomicops. @MINGW_FALSE@SPINLOCK_INCLUDES = src/base/spinlock.h \ @MINGW_FALSE@ src/base/atomicops.h \ @@ -1962,14 +1974,16 @@ malloc_extension_test_SOURCES = src/tests/malloc_extension_test.cc \ malloc_extension_test_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS) malloc_extension_test_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS) malloc_extension_test_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS) -malloc_extension_c_test_SOURCES = src/tests/malloc_extension_c_test.c \ - src/google/malloc_extension.h \ - src/google/malloc_extension_c.h - -malloc_extension_c_test_CFLAGS = $(PTHREAD_CFLAGS) $(AM_CFLAGS) \ - $(am__append_19) -malloc_extension_c_test_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS) -malloc_extension_c_test_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS) +@ENABLE_STATIC_FALSE@@MINGW_FALSE@malloc_extension_c_test_SOURCES = src/tests/malloc_extension_c_test.c \ +@ENABLE_STATIC_FALSE@@MINGW_FALSE@ src/google/malloc_extension.h \ +@ENABLE_STATIC_FALSE@@MINGW_FALSE@ src/google/malloc_extension_c.h + +@ENABLE_STATIC_FALSE@@MINGW_FALSE@malloc_extension_c_test_CFLAGS = \ +@ENABLE_STATIC_FALSE@@MINGW_FALSE@ $(PTHREAD_CFLAGS) \ +@ENABLE_STATIC_FALSE@@MINGW_FALSE@ $(AM_CFLAGS) \ +@ENABLE_STATIC_FALSE@@MINGW_FALSE@ $(am__append_20) +@ENABLE_STATIC_FALSE@@MINGW_FALSE@malloc_extension_c_test_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS) +@ENABLE_STATIC_FALSE@@MINGW_FALSE@malloc_extension_c_test_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS) @MINGW_FALSE@memalign_unittest_SOURCES = src/tests/memalign_unittest.cc \ @MINGW_FALSE@ src/tcmalloc.h \ @MINGW_FALSE@ src/config_for_unittests.h \ @@ -2080,17 +2094,17 @@ thread_dealloc_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS) @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(PTHREAD_CFLAGS) -DNDEBUG \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(AM_CXXFLAGS) \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(NO_EXCEPTIONS) \ -@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__append_30) +@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__append_31) @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_internal_la_LDFLAGS = $(PTHREAD_CFLAGS) @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_internal_la_LIBADD = $(PTHREAD_LIBS) libstacktrace.la @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_SOURCES = \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(TCMALLOC_CC) \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(TCMALLOC_INCLUDES) \ -@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__append_29) +@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__append_30) @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_CXXFLAGS = \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(PTHREAD_CFLAGS) -DNDEBUG \ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(AM_CXXFLAGS) \ -@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__append_31) +@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__append_32) @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS) @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_LIBADD = $(PTHREAD_LIBS) libtcmalloc_internal.la @WITH_HEAP_CHECKER_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@HEAP_CHECKER_SOURCES = diff --git a/README.windows b/README.windows index 0b82d04..750aa51 100644 --- a/README.windows +++ b/README.windows @@ -43,8 +43,7 @@ project tcmalloc_minimal_unittest-static, which does this. For this to work, you'll need to add "/D PERFTOOLS_DLL_DECL=" to the compile line of every perftools .cc file. You do not need to depend on the tcmalloc symbol in this case (that is, you don't need to do either -step 1 or step 2 from above). However, you will need to add a -dependency to Psapi.lib, which tcmalloc uses. +step 1 or step 2 from above). The heap-profiler has had a preliminary port to Windows. It has not been well tested, and probably does not work at all when Frame Pointer diff --git a/src/base/low_level_alloc.cc b/src/base/low_level_alloc.cc index 900ae4e..2bbce54 100644 --- a/src/base/low_level_alloc.cc +++ b/src/base/low_level_alloc.cc @@ -67,7 +67,7 @@ namespace { // first. Valid in both allocated and unallocated blocks intptr_t magic; // kMagicAllocated or kMagicUnallocated xor this LowLevelAlloc::Arena *arena; // pointer to parent arena - void *dummy_for_alignment; // aligns regions to 0 mod 2*sizeof(void*) + void *dummy_for_alignment; // aligns regions to 0 mod 2*sizeof(void*) } header; // Next two fields: in unallocated blocks: freelist skiplist data @@ -197,15 +197,57 @@ struct LowLevelAlloc::Arena { // pointer. static struct LowLevelAlloc::Arena default_arena; -// A non-malloc-hooked arena: used only to allocate metadata for arenas that +// Non-malloc-hooked arenas: used only to allocate metadata for arenas that // do not want malloc hook reporting, so that for them there's no malloc hook // reporting even during arena creation. static struct LowLevelAlloc::Arena unhooked_arena; +static struct LowLevelAlloc::Arena unhooked_async_sig_safe_arena; // magic numbers to identify allocated and unallocated blocks static const intptr_t kMagicAllocated = 0x4c833e95; static const intptr_t kMagicUnallocated = ~kMagicAllocated; +namespace { + class ArenaLock { + public: + explicit ArenaLock(LowLevelAlloc::Arena *arena) : + left_(false), mask_valid_(false), arena_(arena) { + if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) { + // We've decided not to support async-signal-safe arena use until + // there a demonstrated need. Here's how one could do it though + // (would need to be made more portable). +#if 0 + sigset_t all; + sigfillset(&all); + this->mask_valid_ = + (pthread_sigmask(SIG_BLOCK, &all, &this->mask_) == 0); +#else + RAW_CHECK(false, "We do not yet support async-signal-safe arena."); +#endif + } + this->arena_->mu.Lock(); + } + ~ArenaLock() { RAW_CHECK(this->left_, "haven't left Arena region"); } + void Leave() { + this->arena_->mu.Unlock(); +#if 0 + if (this->mask_valid_) { + pthread_sigmask(SIG_SETMASK, &this->mask_, 0); + } +#endif + this->left_ = true; + } + private: + bool left_; // whether left region + bool mask_valid_; +#if 0 + sigset_t mask_; // old mask of blocked signals +#endif + LowLevelAlloc::Arena *arena_; + DISALLOW_COPY_AND_ASSIGN(ArenaLock); + }; +} // anonymous namespace + // create an appropriate magic number for an object at "ptr" // "magic" should be kMagicAllocated or kMagicUnallocated inline static intptr_t Magic(intptr_t magic, AllocList::Header *ptr) { @@ -235,6 +277,8 @@ static void ArenaInit(LowLevelAlloc::Arena *arena) { // Default arena should be hooked, e.g. for heap-checker to trace // pointer chains through objects in the default arena. arena->flags = LowLevelAlloc::kCallMallocHook; + } else if (arena == &unhooked_async_sig_safe_arena) { + arena->flags = LowLevelAlloc::kAsyncSignalSafe; } else { arena->flags = 0; // other arenas' flags may be overridden by client, // but unhooked_arena will have 0 in 'flags'. @@ -246,9 +290,12 @@ static void ArenaInit(LowLevelAlloc::Arena *arena) { LowLevelAlloc::Arena *LowLevelAlloc::NewArena(int32 flags, Arena *meta_data_arena) { RAW_CHECK(meta_data_arena != 0, "must pass a valid arena"); - if (meta_data_arena == &default_arena && - (flags & LowLevelAlloc::kCallMallocHook) == 0) { - meta_data_arena = &unhooked_arena; + if (meta_data_arena == &default_arena) { + if ((flags & LowLevelAlloc::kAsyncSignalSafe) != 0) { + meta_data_arena = &unhooked_async_sig_safe_arena; + } else if ((flags & LowLevelAlloc::kCallMallocHook) == 0) { + meta_data_arena = &unhooked_arena; + } } // Arena(0) uses the constructor for non-static contexts Arena *result = @@ -262,9 +309,9 @@ LowLevelAlloc::Arena *LowLevelAlloc::NewArena(int32 flags, bool LowLevelAlloc::DeleteArena(Arena *arena) { RAW_CHECK(arena != 0 && arena != &default_arena && arena != &unhooked_arena, "may not delete default arena"); - arena->mu.Lock(); + ArenaLock section(arena); bool empty = (arena->allocation_count == 0); - arena->mu.Unlock(); + section.Leave(); if (empty) { while (arena->freelist.next[0] != 0) { AllocList *region = arena->freelist.next[0]; @@ -279,7 +326,13 @@ bool LowLevelAlloc::DeleteArena(Arena *arena) { "empty arena has non-page-aligned block size"); RAW_CHECK(reinterpret_cast<intptr_t>(region) % arena->pagesize == 0, "empty arena has non-page-aligned block"); - RAW_CHECK(munmap(region, size) == 0, + int munmap_result; + if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) == 0) { + munmap_result = munmap(region, size); + } else { + munmap_result = MallocHook::UnhookedMUnmap(region, size); + } + RAW_CHECK(munmap_result == 0, "LowLevelAlloc::DeleteArena: munmap failed address"); } Free(arena); @@ -363,21 +416,21 @@ void LowLevelAlloc::Free(void *v) { if ((arena->flags & kCallMallocHook) != 0) { MallocHook::InvokeDeleteHook(v); } - arena->mu.Lock(); + ArenaLock section(arena); AddToFreelist(v, arena); RAW_CHECK(arena->allocation_count > 0, "nothing in arena to free"); arena->allocation_count--; - arena->mu.Unlock(); + section.Leave(); } } // allocates and returns a block of size bytes, to be freed with Free() // L < arena->mu -void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) { +static void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) { void *result = 0; if (request != 0) { AllocList *s; // will point to region that satisfies request - arena->mu.Lock(); + ArenaLock section(arena); ArenaInit(arena); // round up with header size_t req_rnd = RoundUp(request + sizeof (s->header), arena->roundup); @@ -399,8 +452,14 @@ void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) { // mmap generous 64K chunks to decrease // the chances/impact of fragmentation: size_t new_pages_size = RoundUp(req_rnd, arena->pagesize * 16); - void *new_pages = mmap(0, new_pages_size, - PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + void *new_pages; + if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) { + new_pages = MallocHook::UnhookedMMap(0, new_pages_size, + PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + } else { + new_pages = mmap(0, new_pages_size, + PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + } RAW_CHECK(new_pages != MAP_FAILED, "mmap error"); arena->mu.Lock(); s = reinterpret_cast<AllocList *>(new_pages); @@ -425,7 +484,7 @@ void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) { s->header.magic = Magic(kMagicAllocated, &s->header); RAW_CHECK(s->header.arena == arena, ""); arena->allocation_count++; - arena->mu.Unlock(); + section.Leave(); result = &s->levels; } ANNOTATE_NEW_MEMORY(result, request); diff --git a/src/base/low_level_alloc.h b/src/base/low_level_alloc.h index 0df1298..393b3d2 100644 --- a/src/base/low_level_alloc.h +++ b/src/base/low_level_alloc.h @@ -72,14 +72,20 @@ class LowLevelAlloc { // meta_data_arena; the DefaultArena() can be passed for meta_data_arena. // These values may be ored into flags: enum { - // Calls to Alloc() and Free() will be reported - // via the MallocHook interface. - // The DefaultArena() has this flag on. - // NewArena(flags, DefaultArena()) w/o this bit in 'flags', - // on the other hand, will not cause any MallocHook - // calls even during the NewArena call itself - // (it will in fact use meta_data_arena different from DefaultArena()). - kCallMallocHook = 0x0001 + // Report calls to Alloc() and Free() via the MallocHook interface. + // Set in the DefaultArena. + kCallMallocHook = 0x0001, + + // Make calls to Alloc(), Free() be async-signal-safe. Not set in + // DefaultArena(). + kAsyncSignalSafe = 0x0002, + + // When used with DefaultArena(), the NewArena() and DeleteArena() calls + // obey the flags given explicitly in the NewArena() call, even if those + // flags differ from the settings in DefaultArena(). So the call + // NewArena(kAsyncSignalSafe, DefaultArena()) is itself async-signal-safe, + // as well as generatating an arena that provides async-signal-safe + // Alloc/Free. }; static Arena *NewArena(int32 flags, Arena *meta_data_arena); diff --git a/src/google/malloc_hook.h b/src/google/malloc_hook.h index f2713e9..48d92da 100644 --- a/src/google/malloc_hook.h +++ b/src/google/malloc_hook.h @@ -191,6 +191,12 @@ class PERFTOOLS_DLL_DECL MallocHook { int skip_count) { return MallocHook_GetCallerStackTrace(result, max_depth, skip_count); } + + // Unhooked versions of mmap() and munmap(). These should be used + // only by experts, since they bypass heapchecking, etc. + static void* UnhookedMMap(void *start, size_t length, int prot, int flags, + int fd, off_t offset); + static int UnhookedMUnmap(void *start, size_t length); }; #endif /* _MALLOC_HOOK_H_ */ diff --git a/src/malloc_hook.cc b/src/malloc_hook.cc index d1ad12a..2a7f542 100644 --- a/src/malloc_hook.cc +++ b/src/malloc_hook.cc @@ -423,7 +423,7 @@ static inline void* do_mmap64(void *start, size_t length, return result; } -# endif +# endif // defined(__x86_64__) // We use do_mmap64 abstraction to put MallocHook::InvokeMmapHook // calls right into mmap and mmap64, so that the stack frames in the caller's @@ -472,7 +472,7 @@ extern "C" void* mmap(void *start, size_t length, int prot, int flags, return result; } -#endif +#endif // !defined(__USE_FILE_OFFSET64) || !defined(__REDIRECT_NTH) extern "C" int munmap(void* start, size_t length) __THROW { MallocHook::InvokeMunmapHook(start, length); @@ -501,4 +501,26 @@ extern "C" void* sbrk(ptrdiff_t increment) __THROW { return result; } -#endif +/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot, + int flags, int fd, off_t offset) { + return do_mmap64(start, length, prot, flags, fd, offset); +} + +/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) { + return sys_munmap(start, length); +} + +#else // defined(__linux) && + // (defined(__i386__) || defined(__x86_64__) || defined(__PPC__)) + +/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot, + int flags, int fd, off_t offset) { + return mmap(start, length, prot, flags, fd, offset); +} + +/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) { + return munmap(start, length); +} + +#endif // defined(__linux) && + // (defined(__i386__) || defined(__x86_64__) || defined(__PPC__)) @@ -1977,7 +1977,12 @@ sub RemoveUninterestingFrames { '__builtin_vec_delete', '__builtin_vec_new', 'operator new', - 'operator new[]') { + 'operator new[]', + # These mark the beginning/end of our custom sections + '__start_google_malloc', + '__stop_google_malloc', + '__start_malloc_hook', + '__stop_malloc_hook') { $skip{$name} = 1; $skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything } diff --git a/src/stacktrace_x86-inl.h b/src/stacktrace_x86-inl.h index 5650c86..05701e7 100644 --- a/src/stacktrace_x86-inl.h +++ b/src/stacktrace_x86-inl.h @@ -49,6 +49,11 @@ #elif defined(HAVE_UCONTEXT_H) #include <ucontext.h> // for ucontext_t #elif defined(HAVE_CYGWIN_SIGNAL_H) +// cygwin/signal.h has a buglet where it uses pthread_attr_t without +// #including <pthread.h> itself. So we have to do it. +# ifdef HAVE_PTHREAD +# include <pthread.h> +# endif #include <cygwin/signal.h> typedef ucontext ucontext_t; #endif diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc index 70dd8c6..625301e 100644 --- a/src/tcmalloc.cc +++ b/src/tcmalloc.cc @@ -762,9 +762,9 @@ TCMallocGuard::TCMallocGuard() { // patch the windows VirtualAlloc, etc. PatchWindowsFunctions(); // defined in windows/patch_functions.cc #endif - free(malloc(1)); + tc_free(tc_malloc(1)); ThreadCache::InitTSD(); - free(malloc(1)); + tc_free(tc_malloc(1)); MallocExtension::Register(new TCMallocImplementation); } } diff --git a/src/tests/sampling_test.sh b/src/tests/sampling_test.sh index 149d27b..8c96bc1 100755 --- a/src/tests/sampling_test.sh +++ b/src/tests/sampling_test.sh @@ -62,10 +62,10 @@ if ! cat "$SAMPLING_TEST_BINARY" >/dev/null 2>&1; then SAMPLING_TEST_BINARY="$SAMPLING_TEST_BINARY".exe fi -die() { - echo "FAILED" - echo "reason:" - echo "$@" +die() { # runs the command given as arguments, and then dies. + echo "FAILED. Output from $@" + echo "----" + "$@" echo "----" exit 1 } @@ -79,16 +79,16 @@ mkdir "$OUTDIR" || die "Unable to create $OUTDIR" # 50M to 99M. "$SAMPLING_TEST" "$OUTDIR/out" -echo -n "Testing heap output..." +echo "Testing heap output..." "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap" \ | grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \ - || die `"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap"` + || die "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap" echo "OK" -echo -n "Testing growth output..." +echo "Testing growth output..." "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth" \ | grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \ - || die `"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth"` + || die "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth" echo "OK" echo "PASS" diff --git a/src/thread_cache.cc b/src/thread_cache.cc index fd44a70..64f4deb 100644 --- a/src/thread_cache.cc +++ b/src/thread_cache.cc @@ -299,12 +299,6 @@ int ThreadCache::GetSamplePeriod() { } void ThreadCache::InitModule() { - // There is a slight potential race here because of double-checked - // locking idiom. However, as long as the program does a small - // allocation before switching to multi-threaded mode, we will be - // fine. We increase the chances of doing such a small allocation - // by doing one in the constructor of the module_enter_exit_hook - // object declared below. SpinLockHolder h(Static::pageheap_lock()); if (!phinited) { Static::InitStaticVars(); diff --git a/src/windows/patch_functions.cc b/src/windows/patch_functions.cc index 992161f..d13b7b8 100644 --- a/src/windows/patch_functions.cc +++ b/src/windows/patch_functions.cc @@ -72,6 +72,11 @@ #error This file is intended for patching allocators - use override_functions.cc instead. #endif +// We use psapi. Non-MSVC systems will have to link this in themselves. +#ifdef _MSC_VER +#pragma comment(lib, "Psapi.lib") +#endif + // Make sure we always use the 'old' names of the psapi functions. #ifndef PSAPI_VERSION #define PSAPI_VERSION 1 @@ -80,6 +85,8 @@ #include <windows.h> #include <malloc.h> // for _msize and _expand #include <Psapi.h> // for EnumProcessModules, GetModuleInformation, etc. +#include <set> +#include <map> #include <vector> #include <base/logging.h> #include "base/spinlock.h" @@ -122,29 +129,7 @@ typedef void (*GenericFnPtr)(); using sidestep::PreamblePatcher; -// This is a subset of MODDULEENTRY32, that we need for patching -struct ModuleEntryCopy { - LPVOID modBaseAddr; - DWORD modBaseSize; - HMODULE hModule; - TCHAR szModule[kMaxModuleNameSize]; - - ModuleEntryCopy() { - modBaseAddr = NULL; - modBaseSize = 0; - hModule = NULL; - strcpy(szModule, "<executable>"); - } - ModuleEntryCopy(HANDLE hprocess, HMODULE hmodule, const MODULEINFO& mi) { - this->modBaseAddr = mi.lpBaseOfDll; - this->modBaseSize = mi.SizeOfImage; - this->hModule = hmodule; - // TODO(csilvers): we could make more efficient by calling this - // lazily (not until this->szModule is needed, which is often never). - GetModuleBaseNameA(hprocess, hmodule, - this->szModule, sizeof(this->szModule)); - } -}; +struct ModuleEntryCopy; // defined below // These functions are how we override the memory allocation // functions, just like tcmalloc.cc and malloc_hook.cc do. @@ -160,33 +145,19 @@ class LibcInfo { LibcInfo() { memset(this, 0, sizeof(*this)); // easiest way to initialize the array } - bool SameAs(const LibcInfo& that) const { - return (is_valid() && - module_base_address_ == that.module_base_address_ && - module_base_size_ == that.module_base_size_); - } - bool SameAsME32(const ModuleEntryCopy& me32) const { - return (is_valid() && - module_base_address_ == me32.modBaseAddr && - module_base_size_ == me32.modBaseSize); - } + bool SameAs(const LibcInfo& that) const; + bool SameAsModuleEntry(const ModuleEntryCopy& module_entry) const; + bool patched() const { return is_valid() && module_name_[0] != '\0'; } const char* module_name() const { return is_valid() ? module_name_ : ""; } void set_is_valid(bool b) { is_valid_ = b; } - // These shouldn't have to be public, since only subclasses of - // LibcInfo need it, but they do. Maybe something to do with - // templates. Shrug. - bool is_valid() const { return is_valid_; } - GenericFnPtr windows_fn(int ifunction) const { - return windows_fn_[ifunction]; - } - // Populates all the windows_fn_[] vars based on our module info. // Returns false if windows_fn_ is all NULL's, because there's - // nothing to patch. Also populates the me32 info. - bool PopulateWindowsFn(const ModuleEntryCopy& me32); + // nothing to patch. Also populates the rest of the module_entry + // info, such as the module's name. + bool PopulateWindowsFn(const ModuleEntryCopy& module_entry); protected: void CopyFrom(const LibcInfo& that) { @@ -237,6 +208,24 @@ class LibcInfo { const void *module_base_address_; size_t module_base_size_; char module_name_[kMaxModuleNameSize]; + + public: + // These shouldn't have to be public, since only subclasses of + // LibcInfo need it, but they do. Maybe something to do with + // templates. Shrug. I hide them down here so users won't see + // them. :-) (OK, I also need to define ctrgProcAddress late.) + bool is_valid() const { return is_valid_; } + GenericFnPtr windows_fn(int ifunction) const { + return windows_fn_[ifunction]; + } + // These three are needed by ModuleEntryCopy. + static const int ctrgProcAddress = kNumFunctions; + static GenericFnPtr static_fn(int ifunction) { + return static_fn_[ifunction]; + } + static const char* const function_name(int ifunction) { + return function_name_[ifunction]; + } }; // Template trickiness: logically, a LibcInfo would include @@ -294,6 +283,43 @@ template<int> class LibcInfoWithPatchFunctions : public LibcInfo { // But they seem pretty obscure, and I'm fine not overriding them for now. }; +// This is a subset of MODDULEENTRY32, that we need for patching. +struct ModuleEntryCopy { + LPVOID modBaseAddr; + DWORD modBaseSize; + HMODULE hModule; + TCHAR szModule[kMaxModuleNameSize]; + // This is not part of MODDULEENTRY32, but is needed to avoid making + // windows syscalls while we're holding patch_all_modules_lock (see + // lock-inversion comments at patch_all_modules_lock definition, below). + GenericFnPtr rgProcAddresses[LibcInfo::ctrgProcAddress]; + + ModuleEntryCopy() { + modBaseAddr = NULL; + modBaseSize = 0; + hModule = NULL; + strcpy(szModule, "<executable>"); + for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) + rgProcAddresses[i] = LibcInfo::static_fn(i); + } + ModuleEntryCopy(HANDLE hprocess, HMODULE hmodule, const MODULEINFO& mi) { + this->modBaseAddr = mi.lpBaseOfDll; + this->modBaseSize = mi.SizeOfImage; + this->hModule = hmodule; + // TODO(csilvers): we could make more efficient by calling these + // lazily (not until the vars are needed, which is often never). + // However, there's tricky business with calling windows functions + // inside the patch_all_modules_lock (see the lock inversion + // comments with the patch_all_modules_lock definition, below), so + // it's safest to do it all here, where no lock is needed. + ::GetModuleBaseNameA(hprocess, hmodule, + this->szModule, sizeof(this->szModule)); + for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) + rgProcAddresses[i] = + (GenericFnPtr)::GetProcAddress(hModule, LibcInfo::function_name(i)); + } +}; + // This class is easier because there's only one of them. class WindowsInfo { public: @@ -347,6 +373,11 @@ class WindowsInfo { // If you run out, just add a few more to the array. You'll also need // to update the switch statement in PatchOneModule(), and the list in // UnpatchWindowsFunctions(). +// main_executable and main_executable_windows are two windows into +// the same executable. One is responsible for patching the libc +// routines that live in the main executable (if any) to use tcmalloc; +// the other is responsible for patching the windows routines like +// HeapAlloc/etc to use tcmalloc. static LibcInfoWithPatchFunctions<0> main_executable; static LibcInfoWithPatchFunctions<1> libc1; static LibcInfoWithPatchFunctions<2> libc2; @@ -448,20 +479,28 @@ const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = { { "FreeLibrary", NULL, NULL, (GenericFnPtr)&Perftools_FreeLibrary }, }; -bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& me32) { +bool LibcInfo::SameAs(const LibcInfo& that) const { + return (is_valid() && + module_base_address_ == that.module_base_address_ && + module_base_size_ == that.module_base_size_); +} + +bool LibcInfo::SameAsModuleEntry(const ModuleEntryCopy& module_entry) const { + return (is_valid() && + module_base_address_ == module_entry.modBaseAddr && + module_base_size_ == module_entry.modBaseSize); +} + +bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& module_entry) { // First, store the location of the function to patch before // patching it. If none of these functions are found in the module, // then this module has no libc in it, and we just return false. for (int i = 0; i < kNumFunctions; i++) { if (!function_name_[i]) // we can turn off patching by unsetting name continue; - GenericFnPtr fn = NULL; - if (me32.hModule == NULL) { // used for the main executable - // This is used only for a statically-linked-in libc. - fn = static_fn_[i]; - } else { - fn = (GenericFnPtr)::GetProcAddress(me32.hModule, function_name_[i]); - } + // The ::GetProcAddress calls were done in the ModuleEntryCopy + // constructor, so we don't have to make any windows calls here. + const GenericFnPtr fn = module_entry.rgProcAddresses[i]; if (fn) { windows_fn_[i] = PreamblePatcher::ResolveTarget(fn); } @@ -514,15 +553,15 @@ bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& me32) { CHECK(windows_fn_[kRealloc]); // OK, we successfully patched. Let's store our member information. - module_base_address_ = me32.modBaseAddr; - module_base_size_ = me32.modBaseSize; - strcpy(module_name_, me32.szModule); + module_base_address_ = module_entry.modBaseAddr; + module_base_size_ = module_entry.modBaseSize; + strcpy(module_name_, module_entry.szModule); return true; } template<int T> bool LibcInfoWithPatchFunctions<T>::Patch(const LibcInfo& me_info) { - CopyFrom(me_info); // copies the me32 and the windows_fn_ array + CopyFrom(me_info); // copies the module_entry and the windows_fn_ array for (int i = 0; i < kNumFunctions; i++) { if (windows_fn_[i] && windows_fn_[i] != perftools_fn_[i]) { // if origstub_fn_ is not NULL, it's left around from a previous @@ -530,7 +569,10 @@ bool LibcInfoWithPatchFunctions<T>::Patch(const LibcInfo& me_info) { // Since we've patched Unpatch() not to delete origstub_fn_ (it // causes problems in some contexts, though obviously not this // one), we should delete it now, before setting it to NULL. - delete[] reinterpret_cast<char*>(origstub_fn_[i]); + // NOTE: casting from a function to a pointer is contra the C++ + // spec. It's not safe on IA64, but is on i386. We use + // a C-style cast here to emphasize this is not legal C++. + delete[] (char*)(origstub_fn_[i]); origstub_fn_[i] = NULL; // Patch() will fill this in CHECK_EQ(sidestep::SIDESTEP_SUCCESS, PreamblePatcher::Patch(windows_fn_[i], perftools_fn_[i], @@ -569,7 +611,10 @@ void WindowsInfo::Patch() { // Since we've patched Unpatch() not to delete origstub_fn_ (it // causes problems in some contexts, though obviously not this // one), we should delete it now, before setting it to NULL. - delete[] reinterpret_cast<char*>(function_info_[i].origstub_fn); + // NOTE: casting from a function to a pointer is contra the C++ + // spec. It's not safe on IA64, but is on i386. We use + // a C-style cast here to emphasize this is not legal C++. + delete[] (char*)(function_info_[i].origstub_fn); function_info_[i].origstub_fn = NULL; // Patch() will fill this in CHECK_EQ(sidestep::SIDESTEP_SUCCESS, PreamblePatcher::Patch(function_info_[i].windows_fn, @@ -594,7 +639,7 @@ void PatchOneModuleLocked(const LibcInfo& me_info) { // Double-check we haven't seen this module before. for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) { if (g_module_libcs[i]->SameAs(me_info)) { - fprintf(stderr, "%s:%d: FATAL ERROR: %s double-patched somehow.\n", + fprintf(stderr, "%s:%d: FATAL PERFTOOLS ERROR: %s double-patched somehow.\n", __FILE__, __LINE__, g_module_libcs[i]->module_name()); CHECK(false); } @@ -617,34 +662,53 @@ void PatchOneModuleLocked(const LibcInfo& me_info) { } } } - printf("ERROR: Too many modules containing libc in this executable\n"); + printf("PERFTOOLS ERROR: Too many modules containing libc in this executable\n"); } void PatchMainExecutableLocked() { if (main_executable.patched()) return; // main executable has already been patched - ModuleEntryCopy fake_me32; // we make a fake one to pass into Patch() - main_executable.PopulateWindowsFn(fake_me32); + ModuleEntryCopy fake_module_entry; // make a fake one to pass into Patch() + // No need to call PopulateModuleEntryProcAddresses on the main executable. + main_executable.PopulateWindowsFn(fake_module_entry); main_executable.Patch(main_executable); } +// This lock is subject to a subtle and annoying lock inversion +// problem: it may interact badly with unknown internal windows locks. +// In particular, windows may be holding a lock when it calls +// LoadLibraryExW and FreeLibrary, which we've patched. We have those +// routines call PatchAllModules, which acquires this lock. If we +// make windows system calls while holding this lock, those system +// calls may need the internal windows locks that are being held in +// the call to LoadLibraryExW, resulting in deadlock. The solution is +// to be very careful not to call *any* windows routines while holding +// patch_all_modules_lock, inside PatchAllModules(). static SpinLock patch_all_modules_lock(SpinLock::LINKER_INITIALIZED); // Iterates over all the modules currently loaded by the executable, // and makes sure they're all patched. For ones that aren't, we patch // them in. We also check that every module we had patched in the // past is still loaded, and update internal data structures if so. -void PatchAllModules() { +// We return true if this PatchAllModules did any work, false else. +bool PatchAllModules() { std::vector<ModuleEntryCopy> modules; + bool made_changes = false; const HANDLE hCurrentProcess = GetCurrentProcess(); MODULEINFO mi; DWORD cbNeeded = 0; HMODULE hModules[kMaxModules]; // max # of modules we support in one process - if (EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules), - &cbNeeded)) { + if (::EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules), + &cbNeeded)) { for (int i = 0; i < cbNeeded / sizeof(*hModules); ++i) { - if (GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi))) + if (i >= kMaxModules) { + printf("PERFTOOLS ERROR: Too many modules in this executable to try" + " to patch them all (if you need to, raise kMaxModules in" + " patch_functions.cc).\n"); + break; + } + if (::GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi))) modules.push_back(ModuleEntryCopy(hCurrentProcess, hModules[i], mi)); } } @@ -658,7 +722,7 @@ void PatchAllModules() { bool still_loaded = false; for (std::vector<ModuleEntryCopy>::iterator it = modules.begin(); it != modules.end(); ++it) { - if (g_module_libcs[i]->SameAsME32(*it)) { + if (g_module_libcs[i]->SameAsModuleEntry(*it)) { // Both g_module_libcs[i] and it are still valid. Mark it by // removing it from the vector; mark g_module_libcs[i] by // setting a bool. @@ -672,23 +736,30 @@ void PatchAllModules() { // We could call Unpatch() here, but why bother? The module // has gone away, so nobody is going to call into it anyway. g_module_libcs[i]->set_is_valid(false); + made_changes = true; } } // We've handled all the g_module_libcs. Now let's handle the rest - // of the me32's: those that haven't already been loaded. + // of the module-entries: those that haven't already been loaded. for (std::vector<ModuleEntryCopy>::const_iterator it = modules.begin(); it != modules.end(); ++it) { LibcInfo libc_info; - if (libc_info.PopulateWindowsFn(*it)) // true==module has libc routines - PatchOneModuleLocked(libc_info); // updates num_patched_modules + if (libc_info.PopulateWindowsFn(*it)) { // true==module has libc routines + PatchOneModuleLocked(libc_info); // updates num_patched_modules + made_changes = true; + } } // Now that we've dealt with the modules (dlls), update the main // executable. We do this last because PatchMainExecutableLocked // wants to look at how other modules were patched. - PatchMainExecutableLocked(); + if (!main_executable.patched()) { + PatchMainExecutableLocked(); + made_changes = true; + } } + return made_changes; } @@ -697,6 +768,8 @@ void PatchAllModules() { // --------------------------------------------------------------------- // PatchWindowsFunctions() // This is the function that is exposed to the outside world. +// It should be called before the program becomes multi-threaded, +// since main_executable_windows.Patch() is not thread-safe. // --------------------------------------------------------------------- void PatchWindowsFunctions() { @@ -956,19 +1029,68 @@ BOOL WINAPI WindowsInfo::Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress) { lpBaseAddress); } +// * nopatching_set holds modules (specified by full file-path) that +// don't contain any libc routines, nor depend on any DLLs that do: +// that is, files where PatchAllModules ended up being a no-op. We +// know we don't need to patch those files if we see them again in the +// future (which can happen in windows apps, which can load+unload a +// given module many times during a single program run). +// * patching_set holds modules that *do* require us to do patching +// work when we load them. (This map is keyed by handle, so we need +// to be careful to clear each entry in FreeLibrary; the map data is +// the necessary refcount.) This set is used as an optimization in +// FreeLibrary to tell if we had done any patching in LoadLibraryExW. +// If we didn't, FreeLibrary doesn't need to clear up the +// patching-work, but if we did, it does. +static std::map<HMODULE, int>* patching_map = NULL; +static std::set<std::string>* nopatching_set = NULL; + HMODULE WINAPI WindowsInfo::Perftools_LoadLibraryExW(LPCWSTR lpFileName, HANDLE hFile, DWORD dwFlags) { HMODULE rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD)) function_info_[kLoadLibraryExW].origstub_fn)( lpFileName, hFile, dwFlags); - PatchAllModules(); // this will patch any newly loaded libraries + // This will patch any newly loaded libraries, if patching needs to + // be done. If so, PatchAllModules() will return true. If no + // patching needs to be done (this library didn't contain a libc), + // we'll mark it in a set, so we can skip patching next time. + char szFullPath[PATH_MAX]; + if (::GetModuleFileNameA(rv, szFullPath, sizeof(szFullPath))) { + SpinLockHolder h(&patch_all_modules_lock); + if (nopatching_set && nopatching_set->count(szFullPath) > 0) + return rv; + } + const bool made_changes = PatchAllModules(); + { + SpinLockHolder h(&patch_all_modules_lock); + if (made_changes) { + if (!patching_map) patching_map = new std::map<HMODULE, int>; + ++(*patching_map)[rv]; + } else { + if (!nopatching_set) nopatching_set = new std::set<std::string>; + nopatching_set->insert(szFullPath); + } + } return rv; } BOOL WINAPI WindowsInfo::Perftools_FreeLibrary(HMODULE hLibModule) { BOOL rv = ((BOOL (WINAPI *)(HMODULE)) function_info_[kFreeLibrary].origstub_fn)(hLibModule); + { + // This is an optimization: if we know this module never did any work + // in LoadLibraryEx (because it didn't get put into patching_map), we + // know we don't need to revisit the patching here. + SpinLockHolder h(&patch_all_modules_lock); + if (patching_map) { + std::map<HMODULE, int>::iterator it = patching_map->find(hLibModule); + if (it == patching_map->end()) + return rv; + if (--it->second <= 0) // decrement the refcount + patching_map->erase(it); // hLibModule is now a dangling handle + } + } PatchAllModules(); // this will fix up the list of patched libraries return rv; } diff --git a/vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj b/vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj index 77e8391..3755fb0 100755 --- a/vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +++ b/vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj @@ -30,7 +30,6 @@ Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
- AdditionalDependencies="Psapi.lib"
OutputFile="$(OutDir)/libtcmalloc_minimal-debug.dll"
LinkIncremental="2"
GenerateDebugInformation="TRUE"
@@ -76,7 +75,6 @@ Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
- AdditionalDependencies="Psapi.lib"
OutputFile="$(OutDir)/libtcmalloc_minimal.dll"
LinkIncremental="1"
GenerateDebugInformation="TRUE"
diff --git a/vsprojects/tmu-static/tmu-static.vcproj b/vsprojects/tmu-static/tmu-static.vcproj index a76ffa7..a5d6402 100755 --- a/vsprojects/tmu-static/tmu-static.vcproj +++ b/vsprojects/tmu-static/tmu-static.vcproj @@ -30,7 +30,6 @@ Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
- AdditionalDependencies="Psapi.lib"
OutputFile="$(OutDir)/tcmalloc_minimal_unittest-static.exe"
LinkIncremental="2"
GenerateDebugInformation="TRUE"
@@ -75,7 +74,6 @@ Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
- AdditionalDependencies="Psapi.lib"
OutputFile="$(OutDir)/tcmalloc_minimal_unittest-static.exe"
LinkIncremental="1"
GenerateDebugInformation="TRUE"
|