diff options
author | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2010-03-23 20:39:55 +0000 |
---|---|---|
committer | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2010-03-23 20:39:55 +0000 |
commit | 92beff88437b31f4a618640b88487e0f8dfb7017 (patch) | |
tree | d15e670fdc74a690d012c25e16a2d6efa4ab7d26 | |
parent | 23dd124970bc11636feaa240394063ba5889ca54 (diff) | |
download | gperftools-92beff88437b31f4a618640b88487e0f8dfb7017.tar.gz |
* Add new (std::nothrow) to debugallocation (corrado)
* Add a flag to ingore unaligned-ptr leaks (archanakannan)
* PORTING: Add get-pc capabilities for a new OS (csilvers)
* Don't register malloc extension under valgrind (csilvers)
* Fix throw specs for our global operator new (chandlerc)
* PORTING: link to instructions on windows static overrides (mbelshe)
* Fix prototype differences in debugalloc (chandlerc, csilvers, wan)
* Change pprof to handle big-endian input files (csilvers)
* Properly align allocation sizes on Windows (antonm)
* Improve IsRunningOnValgrind, using valgrind.h (csilvers, kcc)
* Improve the accuracy of system_alloc actual_size (csilvers)
* Add interactive callgrind support to pprof (weidenri...)
* Fix off-by-one problems when symbolizing in pprof (dpeng)
* Be more permissive in allowed library names, in pprof (csilvers)
* PORTING: Fix pc_from_ucontext to handle cygwin and redhat7 (csilvers)
* Fix stacktrace to avoid inlining (ppluzhnikov)
git-svn-id: http://gperftools.googlecode.com/svn/trunk@91 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
38 files changed, 5372 insertions, 1431 deletions
diff --git a/Makefile.am b/Makefile.am index 3de910e..73635db 100644 --- a/Makefile.am +++ b/Makefile.am @@ -143,10 +143,11 @@ dist_doc_DATA += doc/index.html doc/designstyle.css LOGGING_INCLUDES = src/base/logging.h \ src/base/commandlineflags.h \ src/base/basictypes.h \ - src/base/dynamic_annotations.h + src/base/dynamic_annotations.h \ + src/third_party/valgrind.h noinst_LTLIBRARIES += liblogging.la liblogging_la_SOURCES = src/base/logging.cc \ - src/base/dynamic_annotations.cc \ + src/base/dynamic_annotations.c \ $(LOGGING_INCLUDES) SYSINFO_INCLUDES = src/base/sysinfo.h \ @@ -279,7 +280,6 @@ googleinclude_HEADERS += $(SG_STACKTRACE_INCLUDES) ### Making the library noinst_LTLIBRARIES += libstacktrace.la libstacktrace_la_SOURCES = src/stacktrace.cc \ - src/stacktrace_with_context.cc \ src/base/vdso_support.cc \ $(STACKTRACE_INCLUDES) libstacktrace_la_LIBADD = $(UNWIND_LIBS) $(LIBSPINLOCK) diff --git a/Makefile.in b/Makefile.in index 0db79a2..a717bed 100644 --- a/Makefile.in +++ b/Makefile.in @@ -294,7 +294,8 @@ am__libprofiler_la_SOURCES_DIST = src/profiler.cc \ src/base/atomicops-internals-x86.h \ src/base/spinlock_win32-inl.h src/base/spinlock_linux-inl.h \ src/base/spinlock_posix-inl.h src/base/dynamic_annotations.h \ - src/google/profiler.h src/google/stacktrace.h + src/third_party/valgrind.h src/google/profiler.h \ + src/google/stacktrace.h @WITH_CPU_PROFILER_TRUE@am__objects_2 = $(am__objects_1) \ @WITH_CPU_PROFILER_TRUE@ $(am__objects_1) @WITH_CPU_PROFILER_TRUE@am__objects_3 = $(am__objects_2) \ @@ -328,16 +329,14 @@ libspinlock_la_OBJECTS = $(am_libspinlock_la_OBJECTS) @WITH_STACK_TRACE_TRUE@ $(am__DEPENDENCIES_1) \ @WITH_STACK_TRACE_TRUE@ $(am__DEPENDENCIES_2) am__libstacktrace_la_SOURCES_DIST = src/stacktrace.cc \ - src/stacktrace_with_context.cc src/base/vdso_support.cc \ - src/stacktrace_config.h src/stacktrace_generic-inl.h \ - src/stacktrace_libunwind-inl.h src/stacktrace_powerpc-inl.h \ - src/stacktrace_x86_64-inl.h src/stacktrace_x86-inl.h \ - src/stacktrace_win32-inl.h src/base/vdso_support.h \ - src/google/stacktrace.h + src/base/vdso_support.cc src/stacktrace_config.h \ + src/stacktrace_generic-inl.h src/stacktrace_libunwind-inl.h \ + src/stacktrace_powerpc-inl.h src/stacktrace_x86_64-inl.h \ + src/stacktrace_x86-inl.h src/stacktrace_win32-inl.h \ + src/base/vdso_support.h src/google/stacktrace.h @WITH_STACK_TRACE_TRUE@am__objects_4 = $(am__objects_1) \ @WITH_STACK_TRACE_TRUE@ $(am__objects_1) @WITH_STACK_TRACE_TRUE@am_libstacktrace_la_OBJECTS = stacktrace.lo \ -@WITH_STACK_TRACE_TRUE@ stacktrace_with_context.lo \ @WITH_STACK_TRACE_TRUE@ vdso_support.lo $(am__objects_4) libstacktrace_la_OBJECTS = $(am_libstacktrace_la_OBJECTS) @WITH_STACK_TRACE_TRUE@am_libstacktrace_la_rpath = @@ -364,17 +363,17 @@ am__libtcmalloc_la_SOURCES_DIST = src/tcmalloc.cc src/common.h \ src/thread_cache.h src/stack_trace_table.h \ src/base/thread_annotations.h src/malloc_hook-inl.h \ src/maybe_threads.h src/base/logging.h \ - src/base/dynamic_annotations.h src/addressmap-inl.h \ - src/raw_printer.h src/base/elfcore.h src/base/googleinit.h \ - src/base/linux_syscall_support.h src/base/linuxthreads.h \ - src/base/stl_allocator.h src/base/sysinfo.h \ - src/base/thread_lister.h src/heap-profile-table.h \ - src/google/malloc_hook.h src/google/malloc_hook_c.h \ - src/google/malloc_extension.h src/google/malloc_extension_c.h \ - src/google/stacktrace.h src/google/heap-profiler.h \ - src/google/heap-checker.h src/base/thread_lister.c \ - src/base/linuxthreads.cc src/heap-checker.cc \ - src/heap-checker-bcad.cc + src/base/dynamic_annotations.h src/third_party/valgrind.h \ + src/addressmap-inl.h src/raw_printer.h src/base/elfcore.h \ + src/base/googleinit.h src/base/linux_syscall_support.h \ + src/base/linuxthreads.h src/base/stl_allocator.h \ + src/base/sysinfo.h src/base/thread_lister.h \ + src/heap-profile-table.h src/google/malloc_hook.h \ + src/google/malloc_hook_c.h src/google/malloc_extension.h \ + src/google/malloc_extension_c.h src/google/stacktrace.h \ + src/google/heap-profiler.h src/google/heap-checker.h \ + src/base/thread_lister.c src/base/linuxthreads.cc \ + src/heap-checker.cc src/heap-checker-bcad.cc @MINGW_FALSE@am__objects_5 = libtcmalloc_la-tcmalloc.lo am__objects_6 = $(am__objects_1) @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_7 = $(am__objects_6) \ @@ -414,17 +413,17 @@ am__libtcmalloc_and_profiler_la_SOURCES_DIST = src/tcmalloc.cc \ src/thread_cache.h src/stack_trace_table.h \ src/base/thread_annotations.h src/malloc_hook-inl.h \ src/maybe_threads.h src/base/logging.h \ - src/base/dynamic_annotations.h src/addressmap-inl.h \ - src/raw_printer.h src/base/elfcore.h src/base/googleinit.h \ - src/base/linux_syscall_support.h src/base/linuxthreads.h \ - src/base/stl_allocator.h src/base/sysinfo.h \ - src/base/thread_lister.h src/heap-profile-table.h \ - src/google/malloc_hook.h src/google/malloc_hook_c.h \ - src/google/malloc_extension.h src/google/malloc_extension_c.h \ - src/google/stacktrace.h src/google/heap-profiler.h \ - src/google/heap-checker.h src/base/thread_lister.c \ - src/base/linuxthreads.cc src/heap-checker.cc \ - src/heap-checker-bcad.cc src/profiler.cc \ + src/base/dynamic_annotations.h src/third_party/valgrind.h \ + src/addressmap-inl.h src/raw_printer.h src/base/elfcore.h \ + src/base/googleinit.h src/base/linux_syscall_support.h \ + src/base/linuxthreads.h src/base/stl_allocator.h \ + src/base/sysinfo.h src/base/thread_lister.h \ + src/heap-profile-table.h src/google/malloc_hook.h \ + src/google/malloc_hook_c.h src/google/malloc_extension.h \ + src/google/malloc_extension_c.h src/google/stacktrace.h \ + src/google/heap-profiler.h src/google/heap-checker.h \ + src/base/thread_lister.c src/base/linuxthreads.cc \ + src/heap-checker.cc src/heap-checker-bcad.cc src/profiler.cc \ src/profile-handler.cc src/profiledata.cc src/profiledata.h \ src/profile-handler.h src/getpc.h src/base/simple_mutex.h \ src/google/profiler.h @@ -467,15 +466,15 @@ am__libtcmalloc_debug_la_SOURCES_DIST = src/debugallocation.cc \ src/thread_cache.h src/stack_trace_table.h \ src/base/thread_annotations.h src/malloc_hook-inl.h \ src/maybe_threads.h src/base/logging.h \ - src/base/dynamic_annotations.h src/addressmap-inl.h \ - src/raw_printer.h src/base/elfcore.h src/base/googleinit.h \ - src/base/linux_syscall_support.h src/base/linuxthreads.h \ - src/base/stl_allocator.h src/base/sysinfo.h \ - src/base/thread_lister.h src/heap-profile-table.h \ - src/google/malloc_hook.h src/google/malloc_hook_c.h \ - src/google/malloc_extension.h src/google/malloc_extension_c.h \ - src/google/stacktrace.h src/google/heap-profiler.h \ - src/google/heap-checker.h + src/base/dynamic_annotations.h src/third_party/valgrind.h \ + src/addressmap-inl.h src/raw_printer.h src/base/elfcore.h \ + src/base/googleinit.h src/base/linux_syscall_support.h \ + src/base/linuxthreads.h src/base/stl_allocator.h \ + src/base/sysinfo.h src/base/thread_lister.h \ + src/heap-profile-table.h src/google/malloc_hook.h \ + src/google/malloc_hook_c.h src/google/malloc_extension.h \ + src/google/malloc_extension_c.h src/google/stacktrace.h \ + src/google/heap-profiler.h src/google/heap-checker.h @WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_17 = thread_lister.lo \ @WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_debug_la-linuxthreads.lo \ @WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_debug_la-heap-checker.lo \ @@ -513,14 +512,15 @@ am__libtcmalloc_internal_la_SOURCES_DIST = src/common.cc \ src/google/malloc_hook_c.h src/google/malloc_extension.h \ src/google/malloc_extension_c.h src/google/stacktrace.h \ src/base/logging.h src/base/dynamic_annotations.h \ - src/addressmap-inl.h src/raw_printer.h src/base/elfcore.h \ - src/base/googleinit.h src/base/linux_syscall_support.h \ - src/base/linuxthreads.h src/base/stl_allocator.h \ - src/base/sysinfo.h src/base/thread_lister.h \ - src/heap-profile-table.h src/google/heap-profiler.h \ - src/google/heap-checker.h src/base/low_level_alloc.cc \ - src/heap-profile-table.cc src/heap-profiler.cc \ - src/raw_printer.cc src/memory_region_map.cc + src/third_party/valgrind.h src/addressmap-inl.h \ + src/raw_printer.h src/base/elfcore.h src/base/googleinit.h \ + src/base/linux_syscall_support.h src/base/linuxthreads.h \ + src/base/stl_allocator.h src/base/sysinfo.h \ + src/base/thread_lister.h src/heap-profile-table.h \ + src/google/heap-profiler.h src/google/heap-checker.h \ + src/base/low_level_alloc.cc src/heap-profile-table.cc \ + src/heap-profiler.cc src/raw_printer.cc \ + src/memory_region_map.cc @MINGW_FALSE@am__objects_18 = libtcmalloc_internal_la-system-alloc.lo @MINGW_FALSE@am__objects_19 = \ @MINGW_FALSE@ libtcmalloc_internal_la-maybe_threads.lo @@ -728,7 +728,8 @@ am__addressmap_unittest_SOURCES_DIST = \ src/tests/addressmap_unittest.cc src/addressmap-inl.h \ src/base/commandlineflags.h src/base/logging.h \ src/base/basictypes.h src/base/dynamic_annotations.h \ - src/windows/port.h src/windows/port.cc + src/third_party/valgrind.h src/windows/port.h \ + src/windows/port.cc @MINGW_TRUE@am__objects_25 = addressmap_unittest-port.$(OBJEXT) am_addressmap_unittest_OBJECTS = \ addressmap_unittest-addressmap_unittest.$(OBJEXT) \ @@ -774,7 +775,7 @@ am__heap_checker_debug_unittest_SOURCES_DIST = \ src/memory_region_map.h src/base/commandlineflags.h \ src/base/googleinit.h src/google/heap-checker.h \ src/base/logging.h src/base/basictypes.h \ - src/base/dynamic_annotations.h + src/base/dynamic_annotations.h src/third_party/valgrind.h @WITH_HEAP_CHECKER_TRUE@am__objects_26 = $(am__objects_1) @WITH_HEAP_CHECKER_TRUE@am__objects_27 = heap_checker_debug_unittest-heap-checker_unittest.$(OBJEXT) \ @WITH_HEAP_CHECKER_TRUE@ $(am__objects_26) @@ -795,7 +796,7 @@ am__heap_checker_unittest_SOURCES_DIST = \ src/memory_region_map.h src/base/commandlineflags.h \ src/base/googleinit.h src/google/heap-checker.h \ src/base/logging.h src/base/basictypes.h \ - src/base/dynamic_annotations.h + src/base/dynamic_annotations.h src/third_party/valgrind.h @WITH_HEAP_CHECKER_TRUE@am_heap_checker_unittest_OBJECTS = heap_checker_unittest-heap-checker_unittest.$(OBJEXT) \ @WITH_HEAP_CHECKER_TRUE@ $(am__objects_26) heap_checker_unittest_OBJECTS = $(am_heap_checker_unittest_OBJECTS) @@ -853,7 +854,8 @@ am__low_level_alloc_unittest_SOURCES_DIST = \ src/base/atomicops-internals-x86.h \ src/base/spinlock_win32-inl.h src/base/spinlock_linux-inl.h \ src/base/spinlock_posix-inl.h src/base/logging.h \ - src/base/commandlineflags.h src/base/dynamic_annotations.h + src/base/commandlineflags.h src/base/dynamic_annotations.h \ + src/third_party/valgrind.h am__objects_29 = $(am__objects_1) $(am__objects_1) am_low_level_alloc_unittest_OBJECTS = \ low_level_alloc_unittest-low_level_alloc.$(OBJEXT) \ @@ -1077,7 +1079,7 @@ am__stacktrace_unittest_SOURCES_DIST = \ src/stacktrace_x86-inl.h src/stacktrace_win32-inl.h \ src/base/vdso_support.h src/google/stacktrace.h \ src/base/logging.h src/base/basictypes.h \ - src/base/dynamic_annotations.h + src/base/dynamic_annotations.h src/third_party/valgrind.h @WITH_STACK_TRACE_TRUE@am__objects_39 = $(am__objects_4) \ @WITH_STACK_TRACE_TRUE@ $(am__objects_1) @WITH_STACK_TRACE_TRUE@am_stacktrace_unittest_OBJECTS = \ @@ -1702,10 +1704,11 @@ EXTRA_INSTALL = LOGGING_INCLUDES = src/base/logging.h \ src/base/commandlineflags.h \ src/base/basictypes.h \ - src/base/dynamic_annotations.h + src/base/dynamic_annotations.h \ + src/third_party/valgrind.h liblogging_la_SOURCES = src/base/logging.cc \ - src/base/dynamic_annotations.cc \ + src/base/dynamic_annotations.c \ $(LOGGING_INCLUDES) SYSINFO_INCLUDES = src/base/sysinfo.h \ @@ -1823,7 +1826,6 @@ atomicops_unittest_LDADD = $(LIBSPINLOCK) @WITH_STACK_TRACE_TRUE@SG_STACKTRACE_INCLUDES = src/google/stacktrace.h @WITH_STACK_TRACE_TRUE@STACKTRACE_INCLUDES = $(S_STACKTRACE_INCLUDES) $(SG_STACKTRACE_INCLUDES) @WITH_STACK_TRACE_TRUE@libstacktrace_la_SOURCES = src/stacktrace.cc \ -@WITH_STACK_TRACE_TRUE@ src/stacktrace_with_context.cc \ @WITH_STACK_TRACE_TRUE@ src/base/vdso_support.cc \ @WITH_STACK_TRACE_TRUE@ $(STACKTRACE_INCLUDES) @@ -2787,7 +2789,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stack_trace_table_test-stack_trace_table_test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stacktrace.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stacktrace_unittest.Po@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stacktrace_with_context.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sysinfo.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/system_alloc_unittest-system-alloc_unittest.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tcmalloc_and_profiler_unittest-tcmalloc_unittest.Po@am__quote@ @@ -2830,6 +2831,13 @@ distclean-compile: @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< +dynamic_annotations.lo: src/base/dynamic_annotations.c +@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT dynamic_annotations.lo -MD -MP -MF "$(DEPDIR)/dynamic_annotations.Tpo" -c -o dynamic_annotations.lo `test -f 'src/base/dynamic_annotations.c' || echo '$(srcdir)/'`src/base/dynamic_annotations.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/dynamic_annotations.Tpo" "$(DEPDIR)/dynamic_annotations.Plo"; else rm -f "$(DEPDIR)/dynamic_annotations.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='src/base/dynamic_annotations.c' object='dynamic_annotations.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o dynamic_annotations.lo `test -f 'src/base/dynamic_annotations.c' || echo '$(srcdir)/'`src/base/dynamic_annotations.c + thread_lister.lo: src/base/thread_lister.c @am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT thread_lister.lo -MD -MP -MF "$(DEPDIR)/thread_lister.Tpo" -c -o thread_lister.lo `test -f 'src/base/thread_lister.c' || echo '$(srcdir)/'`src/base/thread_lister.c; \ @am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/thread_lister.Tpo" "$(DEPDIR)/thread_lister.Plo"; else rm -f "$(DEPDIR)/thread_lister.Tpo"; exit 1; fi @@ -2879,13 +2887,6 @@ logging.lo: src/base/logging.cc @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o logging.lo `test -f 'src/base/logging.cc' || echo '$(srcdir)/'`src/base/logging.cc -dynamic_annotations.lo: src/base/dynamic_annotations.cc -@am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT dynamic_annotations.lo -MD -MP -MF "$(DEPDIR)/dynamic_annotations.Tpo" -c -o dynamic_annotations.lo `test -f 'src/base/dynamic_annotations.cc' || echo '$(srcdir)/'`src/base/dynamic_annotations.cc; \ -@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/dynamic_annotations.Tpo" "$(DEPDIR)/dynamic_annotations.Plo"; else rm -f "$(DEPDIR)/dynamic_annotations.Tpo"; exit 1; fi -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='src/base/dynamic_annotations.cc' object='dynamic_annotations.lo' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o dynamic_annotations.lo `test -f 'src/base/dynamic_annotations.cc' || echo '$(srcdir)/'`src/base/dynamic_annotations.cc - profiler.lo: src/profiler.cc @am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT profiler.lo -MD -MP -MF "$(DEPDIR)/profiler.Tpo" -c -o profiler.lo `test -f 'src/profiler.cc' || echo '$(srcdir)/'`src/profiler.cc; \ @am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/profiler.Tpo" "$(DEPDIR)/profiler.Plo"; else rm -f "$(DEPDIR)/profiler.Tpo"; exit 1; fi @@ -2928,13 +2929,6 @@ stacktrace.lo: src/stacktrace.cc @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o stacktrace.lo `test -f 'src/stacktrace.cc' || echo '$(srcdir)/'`src/stacktrace.cc -stacktrace_with_context.lo: src/stacktrace_with_context.cc -@am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT stacktrace_with_context.lo -MD -MP -MF "$(DEPDIR)/stacktrace_with_context.Tpo" -c -o stacktrace_with_context.lo `test -f 'src/stacktrace_with_context.cc' || echo '$(srcdir)/'`src/stacktrace_with_context.cc; \ -@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/stacktrace_with_context.Tpo" "$(DEPDIR)/stacktrace_with_context.Plo"; else rm -f "$(DEPDIR)/stacktrace_with_context.Tpo"; exit 1; fi -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='src/stacktrace_with_context.cc' object='stacktrace_with_context.lo' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o stacktrace_with_context.lo `test -f 'src/stacktrace_with_context.cc' || echo '$(srcdir)/'`src/stacktrace_with_context.cc - vdso_support.lo: src/base/vdso_support.cc @am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT vdso_support.lo -MD -MP -MF "$(DEPDIR)/vdso_support.Tpo" -c -o vdso_support.lo `test -f 'src/base/vdso_support.cc' || echo '$(srcdir)/'`src/base/vdso_support.cc; \ @am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/vdso_support.Tpo" "$(DEPDIR)/vdso_support.Plo"; else rm -f "$(DEPDIR)/vdso_support.Tpo"; exit 1; fi diff --git a/README.windows b/README.windows index 750aa51..9844087 100644 --- a/README.windows +++ b/README.windows @@ -1,3 +1,5 @@ +--- COMPILING + This project has begun being ported to Windows. A working solution file exists in this directory: google-perftools.sln @@ -45,6 +47,17 @@ line of every perftools .cc file. You do not need to depend on the tcmalloc symbol in this case (that is, you don't need to do either step 1 or step 2 from above). +An alternative to all the above is to statically link your application +with libc, and then replace its malloc with tcmalloc. This allows you +to just build and link your program normally; the tcmalloc support +comes in a post-processing step. This is more reliable than the above +technique (which depends on run-time patching, which is inherently +fragile), though more work to set up. For details, see +https://groups.google.com/group/google-perftools/browse_thread/thread/41cd3710af85e57b + + +--- THE HEAP-PROFILER + The heap-profiler has had a preliminary port to Windows. It has not been well tested, and probably does not work at all when Frame Pointer Optimization (FPO) is enabled -- that is, in release mode. The other @@ -52,6 +65,8 @@ features of perftools, such as the cpu-profiler and leak-checker, have not yet been ported to Windows at all. +--- ISSUES + NOTE FOR WIN2K USERS: According to reports (http://code.google.com/p/google-perftools/issues/detail?id=127) the stack-tracing necessary for the heap-profiler does not work on @@ -60,7 +75,6 @@ is to add "/D NO_TCMALLOC_SAMPLES=" to your build, to turn off the stack-tracing. You will not be able to use the heap-profiler if you do this. - NOTE ON _MSIZE and _RECALLOC: The tcmalloc version of _msize returns the size of the region tcmalloc allocated for you -- which is at least as many bytes you asked for, but may be more. (btw, these *are* bytes @@ -82,4 +96,4 @@ them on the google-perftools Google Code site: -- craig -Last modified: 18 November 2008 +Last modified: 3 February 2010 @@ -19797,42 +19797,6 @@ fi done # for being nice in our spinlock code -for ac_header in ucontext.h -do : - ac_fn_c_check_header_mongrel "$LINENO" "ucontext.h" "ac_cv_header_ucontext_h" "$ac_includes_default" -if test "x$ac_cv_header_ucontext_h" = x""yes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_UCONTEXT_H 1 -_ACEOF - -fi - -done - # for profiler.cc (cpu profiler) -for ac_header in sys/ucontext.h -do : - ac_fn_c_check_header_mongrel "$LINENO" "sys/ucontext.h" "ac_cv_header_sys_ucontext_h" "$ac_includes_default" -if test "x$ac_cv_header_sys_ucontext_h" = x""yes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_SYS_UCONTEXT_H 1 -_ACEOF - -fi - -done - # ucontext on OS X 10.6 (at least) -for ac_header in cygwin/signal.h -do : - ac_fn_c_check_header_mongrel "$LINENO" "cygwin/signal.h" "ac_cv_header_cygwin_signal_h" "$ac_includes_default" -if test "x$ac_cv_header_cygwin_signal_h" = x""yes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_CYGWIN_SIGNAL_H 1 -_ACEOF - -fi - -done - # ucontext on cywgin for ac_header in conflict-signal.h do : ac_fn_c_check_header_mongrel "$LINENO" "conflict-signal.h" "ac_cv_header_conflict_signal_h" "$ac_includes_default" @@ -19965,6 +19929,21 @@ fi done # for memalign_unittest.cc +for ac_header in valgrind.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "valgrind.h" "ac_cv_header_valgrind_h" "$ac_includes_default" +if test "x$ac_cv_header_valgrind_h" = x""yes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_VALGRIND_H 1 +_ACEOF + +fi + +done + # we have a local copy if this isn't found +# We also need <ucontext.h>/<sys/ucontext.h>, but we get those from +# AC_PC_FROM_UCONTEXT, below. + # We override a lot of memory allocation routines, not all of which are # standard. For those the system doesn't declare, we'll declare ourselves. ac_fn_c_check_decl "$LINENO" "cfree" "ac_cv_have_decl_cfree" "#define _XOPEN_SOURCE 600 @@ -20283,7 +20262,15 @@ fi done - for ac_header in sys/ucontext.h + # Redhat 7 has <sys/ucontext.h>, but it barfs if we #include it directly + # (this was fixed in later redhats). <ucontext.h> works fine, so use that. + if grep "Red Hat Linux release 7" /etc/redhat-release >/dev/null 2>&1; then + +$as_echo "#define HAVE_SYS_UCONTEXT_H 0" >>confdefs.h + + ac_cv_header_sys_ucontext_h=no + else + for ac_header in sys/ucontext.h do : ac_fn_c_check_header_mongrel "$LINENO" "sys/ucontext.h" "ac_cv_header_sys_ucontext_h" "$ac_includes_default" if test "x$ac_cv_header_sys_ucontext_h" = x""yes; then : @@ -20295,6 +20282,19 @@ fi done # ucontext on OS X 10.6 (at least) + fi + for ac_header in cygwin/signal.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "cygwin/signal.h" "ac_cv_header_cygwin_signal_h" "$ac_includes_default" +if test "x$ac_cv_header_cygwin_signal_h" = x""yes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_CYGWIN_SIGNAL_H 1 +_ACEOF + +fi + +done + # ucontext on cywgin { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to access the program counter from a struct ucontext" >&5 $as_echo_n "checking how to access the program counter from a struct ucontext... " >&6; } pc_fields=" uc_mcontext.gregs[REG_PC]" # Solaris x86 (32 + 64 bit) @@ -20304,6 +20304,7 @@ $as_echo_n "checking how to access the program counter from a struct ucontext... pc_fields="$pc_fields uc_mcontext.uc_regs->gregs[PT_NIP]" # Linux (ppc) pc_fields="$pc_fields uc_mcontext.gregs[R15]" # Linux (arm old [untested]) pc_fields="$pc_fields uc_mcontext.arm_pc" # Linux (arm new [untested]) + pc_fields="$pc_fields uc_mcontext.gp_regs[PT_NIP]" # Suse SLES 11 (ppc64) pc_fields="$pc_fields uc_mcontext.mc_eip" # FreeBSD (i386) pc_fields="$pc_fields uc_mcontext.mc_rip" # FreeBSD (x86_64 [untested]) pc_fields="$pc_fields uc_mcontext.__gregs[_REG_EIP]" # NetBSD (i386) @@ -20317,7 +20318,32 @@ $as_echo_n "checking how to access the program counter from a struct ucontext... pc_field_found=false for pc_field in $pc_fields; do if ! $pc_field_found; then - if test "x$ac_cv_header_sys_ucontext_h" = xyes; then + # Prefer sys/ucontext.h to ucontext.h, for OS X's sake. + if test "x$ac_cv_header_cygwin_signal_h" = xyes; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#define _GNU_SOURCE 1 + #include <cygwin/signal.h> +int +main () +{ +ucontext_t u; return u.$pc_field == 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +cat >>confdefs.h <<_ACEOF +#define PC_FROM_UCONTEXT $pc_field +_ACEOF + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $pc_field" >&5 +$as_echo "$pc_field" >&6; } + pc_field_found=true +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + elif test "x$ac_cv_header_sys_ucontext_h" = xyes; then cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #define _GNU_SOURCE 1 @@ -20341,7 +20367,7 @@ $as_echo "$pc_field" >&6; } pc_field_found=true fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - else + elif test "x$ac_cv_header_ucontext_h" = xyes; then cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #define _GNU_SOURCE 1 @@ -20365,6 +20391,29 @@ $as_echo "$pc_field" >&6; } pc_field_found=true fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + else # hope some standard header gives it to us + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +ucontext_t u; return u.$pc_field == 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +cat >>confdefs.h <<_ACEOF +#define PC_FROM_UCONTEXT $pc_field +_ACEOF + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $pc_field" >&5 +$as_echo "$pc_field" >&6; } + pc_field_found=true +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi fi done @@ -21500,6 +21549,15 @@ else fi +# Redhat 7 (and below?) has sys/ucontext.h, but if you try to #include +# it directly, the compiler gets upset. So we pretend we don't have +# it. +if cat /etc/redhat-release 2>/dev/null | grep "Red Hat Linux release 7" >/dev/null 2>&1; then + +$as_echo "#define HAVE_SYS_UCONTEXT_H 0" >>confdefs.h + +fi + # Export the --enable flags we set above. We do this at the end so # other configure rules can enable or disable targets based on what # they find. diff --git a/configure.ac b/configure.ac index 8252ff5..e93cdc4 100644 --- a/configure.ac +++ b/configure.ac @@ -119,9 +119,6 @@ AC_CHECK_HEADERS(execinfo.h) # for stacktrace? and heapchecker_unittest AC_CHECK_HEADERS(libunwind.h) # for stacktrace AC_CHECK_HEADERS(unwind.h) # for stacktrace AC_CHECK_HEADERS(sched.h) # for being nice in our spinlock code -AC_CHECK_HEADERS(ucontext.h) # for profiler.cc (cpu profiler) -AC_CHECK_HEADERS(sys/ucontext.h) # ucontext on OS X 10.6 (at least) -AC_CHECK_HEADERS(cygwin/signal.h) # ucontext on cywgin AC_CHECK_HEADERS(conflict-signal.h) # defined on some windows platforms? AC_CHECK_HEADERS(sys/prctl.h) # for thread_lister (needed by leak-checker) AC_CHECK_HEADERS(linux/ptrace.h)# also needed by leak-checker @@ -133,6 +130,10 @@ AC_CHECK_HEADERS(fcntl.h) # for tcmalloc_unittest AC_CHECK_HEADERS(grp.h) # for heapchecker_unittest AC_CHECK_HEADERS(pwd.h) # for heapchecker_unittest AC_CHECK_HEADERS(sys/resource.h) # for memalign_unittest.cc +AC_CHECK_HEADERS(valgrind.h) # we have a local copy if this isn't found +# We also need <ucontext.h>/<sys/ucontext.h>, but we get those from +# AC_PC_FROM_UCONTEXT, below. + # We override a lot of memory allocation routines, not all of which are # standard. For those the system doesn't declare, we'll declare ourselves. AC_CHECK_DECLS([cfree, @@ -311,6 +312,13 @@ AH_BOTTOM([ ]) AM_CONDITIONAL(MINGW, expr $host : '.*-mingw' >/dev/null 2>&1) +# Redhat 7 (and below?) has sys/ucontext.h, but if you try to #include +# it directly, the compiler gets upset. So we pretend we don't have +# it. +if cat /etc/redhat-release 2>/dev/null | grep "Red Hat Linux release 7" >/dev/null 2>&1; then +AC_DEFINE(HAVE_SYS_UCONTEXT_H, 0, [<sys/ucontext.h> is broken on redhat 7]) +fi + # Export the --enable flags we set above. We do this at the end so # other configure rules can enable or disable targets based on what # they find. diff --git a/doc/heap_checker.html b/doc/heap_checker.html index 87e497a..caf46ef 100644 --- a/doc/heap_checker.html +++ b/doc/heap_checker.html @@ -351,6 +351,15 @@ checking.</p> </tr> <tr valign=top> + <td><code>HEAP_CHECK_POINTER_SOURCE_ALIGNMENT</code></td> + <td>Default: sizeof(void*)</td> + <td> + Alignment at which all pointers in memory are supposed to be located. + Use 1 if any alignment is ok. + </td> +</tr> + +<tr valign=top> <td><code>PPROF_PATH</code></td> <td>Default: pprof</td> <td> diff --git a/m4/pc_from_ucontext.m4 b/m4/pc_from_ucontext.m4 index daffddb..19ec347 100644 --- a/m4/pc_from_ucontext.m4 +++ b/m4/pc_from_ucontext.m4 @@ -11,7 +11,15 @@ AC_DEFUN([AC_PC_FROM_UCONTEXT], [AC_CHECK_HEADERS(ucontext.h) - AC_CHECK_HEADERS(sys/ucontext.h) # ucontext on OS X 10.6 (at least) + # Redhat 7 has <sys/ucontext.h>, but it barfs if we #include it directly + # (this was fixed in later redhats). <ucontext.h> works fine, so use that. + if grep "Red Hat Linux release 7" /etc/redhat-release >/dev/null 2>&1; then + AC_DEFINE(HAVE_SYS_UCONTEXT_H, 0, [<sys/ucontext.h> is broken on redhat 7]) + ac_cv_header_sys_ucontext_h=no + else + AC_CHECK_HEADERS(sys/ucontext.h) # ucontext on OS X 10.6 (at least) + fi + AC_CHECK_HEADERS(cygwin/signal.h) # ucontext on cywgin AC_MSG_CHECKING([how to access the program counter from a struct ucontext]) pc_fields=" uc_mcontext.gregs[[REG_PC]]" # Solaris x86 (32 + 64 bit) pc_fields="$pc_fields uc_mcontext.gregs[[REG_EIP]]" # Linux (i386) @@ -20,6 +28,7 @@ AC_DEFUN([AC_PC_FROM_UCONTEXT], pc_fields="$pc_fields uc_mcontext.uc_regs->gregs[[PT_NIP]]" # Linux (ppc) pc_fields="$pc_fields uc_mcontext.gregs[[R15]]" # Linux (arm old [untested]) pc_fields="$pc_fields uc_mcontext.arm_pc" # Linux (arm new [untested]) + pc_fields="$pc_fields uc_mcontext.gp_regs[[PT_NIP]]" # Suse SLES 11 (ppc64) pc_fields="$pc_fields uc_mcontext.mc_eip" # FreeBSD (i386) pc_fields="$pc_fields uc_mcontext.mc_rip" # FreeBSD (x86_64 [untested]) pc_fields="$pc_fields uc_mcontext.__gregs[[_REG_EIP]]" # NetBSD (i386) @@ -33,7 +42,16 @@ AC_DEFUN([AC_PC_FROM_UCONTEXT], pc_field_found=false for pc_field in $pc_fields; do if ! $pc_field_found; then - if test "x$ac_cv_header_sys_ucontext_h" = xyes; then + # Prefer sys/ucontext.h to ucontext.h, for OS X's sake. + if test "x$ac_cv_header_cygwin_signal_h" = xyes; then + AC_TRY_COMPILE([#define _GNU_SOURCE 1 + #include <cygwin/signal.h>], + [ucontext_t u; return u.$pc_field == 0;], + AC_DEFINE_UNQUOTED(PC_FROM_UCONTEXT, $pc_field, + How to access the PC from a struct ucontext) + AC_MSG_RESULT([$pc_field]) + pc_field_found=true) + elif test "x$ac_cv_header_sys_ucontext_h" = xyes; then AC_TRY_COMPILE([#define _GNU_SOURCE 1 #include <sys/ucontext.h>], [ucontext_t u; return u.$pc_field == 0;], @@ -41,7 +59,7 @@ AC_DEFUN([AC_PC_FROM_UCONTEXT], How to access the PC from a struct ucontext) AC_MSG_RESULT([$pc_field]) pc_field_found=true) - else + elif test "x$ac_cv_header_ucontext_h" = xyes; then AC_TRY_COMPILE([#define _GNU_SOURCE 1 #include <ucontext.h>], [ucontext_t u; return u.$pc_field == 0;], @@ -49,6 +67,13 @@ AC_DEFUN([AC_PC_FROM_UCONTEXT], How to access the PC from a struct ucontext) AC_MSG_RESULT([$pc_field]) pc_field_found=true) + else # hope some standard header gives it to us + AC_TRY_COMPILE([], + [ucontext_t u; return u.$pc_field == 0;], + AC_DEFINE_UNQUOTED(PC_FROM_UCONTEXT, $pc_field, + How to access the PC from a struct ucontext) + AC_MSG_RESULT([$pc_field]) + pc_field_found=true) fi fi done diff --git a/src/base/dynamic_annotations.c b/src/base/dynamic_annotations.c new file mode 100644 index 0000000..65c4158 --- /dev/null +++ b/src/base/dynamic_annotations.c @@ -0,0 +1,152 @@ +/* Copyright (c) 2008-2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Kostya Serebryany + */ + +#ifdef __cplusplus +# error "This file should be built as pure C to avoid name mangling" +#endif + +#include <stdlib.h> +#include <string.h> + +#include "base/dynamic_annotations.h" + +#ifdef __GNUC__ +/* valgrind.h uses gcc extensions so it won't build with other compilers */ +# ifdef HAVE_VALGRIND_H /* prefer the user's copy if they have it */ +# include <valgrind.h> +# else /* otherwise just use the copy that we have */ +# include "third_party/valgrind.h" +# endif +#endif + +/* Each function is empty and called (via a macro) only in debug mode. + The arguments are captured by dynamic tools at runtime. */ + +#if DYNAMIC_ANNOTATIONS_ENABLED == 1 + +void AnnotateRWLockCreate(const char *file, int line, + const volatile void *lock){} +void AnnotateRWLockDestroy(const char *file, int line, + const volatile void *lock){} +void AnnotateRWLockAcquired(const char *file, int line, + const volatile void *lock, long is_w){} +void AnnotateRWLockReleased(const char *file, int line, + const volatile void *lock, long is_w){} +void AnnotateBarrierInit(const char *file, int line, + const volatile void *barrier, long count, + long reinitialization_allowed) {} +void AnnotateBarrierWaitBefore(const char *file, int line, + const volatile void *barrier) {} +void AnnotateBarrierWaitAfter(const char *file, int line, + const volatile void *barrier) {} +void AnnotateBarrierDestroy(const char *file, int line, + const volatile void *barrier) {} + +void AnnotateCondVarWait(const char *file, int line, + const volatile void *cv, + const volatile void *lock){} +void AnnotateCondVarSignal(const char *file, int line, + const volatile void *cv){} +void AnnotateCondVarSignalAll(const char *file, int line, + const volatile void *cv){} +void AnnotatePublishMemoryRange(const char *file, int line, + const volatile void *address, + long size){} +void AnnotateUnpublishMemoryRange(const char *file, int line, + const volatile void *address, + long size){} +void AnnotatePCQCreate(const char *file, int line, + const volatile void *pcq){} +void AnnotatePCQDestroy(const char *file, int line, + const volatile void *pcq){} +void AnnotatePCQPut(const char *file, int line, + const volatile void *pcq){} +void AnnotatePCQGet(const char *file, int line, + const volatile void *pcq){} +void AnnotateNewMemory(const char *file, int line, + const volatile void *mem, + long size){} +void AnnotateExpectRace(const char *file, int line, + const volatile void *mem, + const char *description){} +void AnnotateBenignRace(const char *file, int line, + const volatile void *mem, + const char *description){} +void AnnotateBenignRaceSized(const char *file, int line, + const volatile void *mem, + long size, + const char *description) { + long i; + for (i = 0; i < size; i++) { + AnnotateBenignRace(file, line, (char*)(mem) + i, description); + } +} +void AnnotateMutexIsUsedAsCondVar(const char *file, int line, + const volatile void *mu){} +void AnnotateTraceMemory(const char *file, int line, + const volatile void *arg){} +void AnnotateThreadName(const char *file, int line, + const char *name){} +void AnnotateIgnoreReadsBegin(const char *file, int line){} +void AnnotateIgnoreReadsEnd(const char *file, int line){} +void AnnotateIgnoreWritesBegin(const char *file, int line){} +void AnnotateIgnoreWritesEnd(const char *file, int line){} +void AnnotateNoOp(const char *file, int line, + const volatile void *arg){} +void AnnotateFlushState(const char *file, int line){} + +#endif /* DYNAMIC_ANNOTATIONS_ENABLED == 1 */ + +static int GetRunningOnValgrind(void) { +#ifdef RUNNING_ON_VALGRIND + if (RUNNING_ON_VALGRIND) return 1; +#endif + // TODO(csilvers): use GetenvBeforeMain() instead? Will need to + // change it to be extern "C". + char *running_on_valgrind_str = getenv("RUNNING_ON_VALGRIND"); + if (running_on_valgrind_str) { + return strcmp(running_on_valgrind_str, "0") != 0; + } + return 0; +} + +/* See the comments in dynamic_annotations.h */ +int RunningOnValgrind(void) { + static volatile int running_on_valgrind = -1; + /* C doesn't have thread-safe initialization of statics, and we + don't want to depend on pthread_once here, so hack it. */ + int local_running_on_valgrind = running_on_valgrind; + if (local_running_on_valgrind == -1) + running_on_valgrind = local_running_on_valgrind = GetRunningOnValgrind(); + return local_running_on_valgrind; +} diff --git a/src/base/dynamic_annotations.cc b/src/base/dynamic_annotations.cc deleted file mode 100644 index c8bbcd7..0000000 --- a/src/base/dynamic_annotations.cc +++ /dev/null @@ -1,110 +0,0 @@ -/* Copyright (c) 2008, Google Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * --- - * Author: Kostya Serebryany - */ - -#include <config.h> -#include <stdlib.h> -#include <string.h> - -#include "base/dynamic_annotations.h" -#include "base/sysinfo.h" - -// Each function is empty and called (via a macro) only in debug mode. -// The arguments are captured by dynamic tools at runtime. - -extern "C" void AnnotateRWLockCreate(const char *file, int line, - const volatile void *lock){} -extern "C" void AnnotateRWLockDestroy(const char *file, int line, - const volatile void *lock){} -extern "C" void AnnotateRWLockAcquired(const char *file, int line, - const volatile void *lock, long is_w){} -extern "C" void AnnotateRWLockReleased(const char *file, int line, - const volatile void *lock, long is_w){} -extern "C" void AnnotateCondVarWait(const char *file, int line, - const volatile void *cv, - const volatile void *lock){} -extern "C" void AnnotateCondVarSignal(const char *file, int line, - const volatile void *cv){} -extern "C" void AnnotateCondVarSignalAll(const char *file, int line, - const volatile void *cv){} -extern "C" void AnnotatePublishMemoryRange(const char *file, int line, - const volatile void *address, - long size){} -extern "C" void AnnotateUnpublishMemoryRange(const char *file, int line, - const volatile void *address, - long size){} -extern "C" void AnnotatePCQCreate(const char *file, int line, - const volatile void *pcq){} -extern "C" void AnnotatePCQDestroy(const char *file, int line, - const volatile void *pcq){} -extern "C" void AnnotatePCQPut(const char *file, int line, - const volatile void *pcq){} -extern "C" void AnnotatePCQGet(const char *file, int line, - const volatile void *pcq){} -extern "C" void AnnotateNewMemory(const char *file, int line, - const volatile void *mem, - long size){} -extern "C" void AnnotateExpectRace(const char *file, int line, - const volatile void *mem, - const char *description){} -extern "C" void AnnotateBenignRace(const char *file, int line, - const volatile void *mem, - const char *description){} -extern "C" void AnnotateMutexIsUsedAsCondVar(const char *file, int line, - const volatile void *mu){} -extern "C" void AnnotateTraceMemory(const char *file, int line, - const volatile void *arg){} -extern "C" void AnnotateThreadName(const char *file, int line, - const char *name){} -extern "C" void AnnotateIgnoreReadsBegin(const char *file, int line){} -extern "C" void AnnotateIgnoreReadsEnd(const char *file, int line){} -extern "C" void AnnotateIgnoreWritesBegin(const char *file, int line){} -extern "C" void AnnotateIgnoreWritesEnd(const char *file, int line){} -extern "C" void AnnotateNoOp(const char *file, int line, - const volatile void *arg){} - -static int GetRunningOnValgrind() { - const char *running_on_valgrind_str = GetenvBeforeMain("RUNNING_ON_VALGRIND"); - if (running_on_valgrind_str) { - return strcmp(running_on_valgrind_str, "0") != 0; - } - return 0; -} - -// When running under valgrind, this function will be intercepted -// and a non-zero value will be returned. -// Some valgrind-based tools (e.g. callgrind) do not intercept functions, -// so we also read environment variable. -extern "C" int RunningOnValgrind() { - static int running_on_valgrind = GetRunningOnValgrind(); - return running_on_valgrind; -} diff --git a/src/base/dynamic_annotations.h b/src/base/dynamic_annotations.h index a2a268f..3980b24 100644 --- a/src/base/dynamic_annotations.h +++ b/src/base/dynamic_annotations.h @@ -1,10 +1,10 @@ /* Copyright (c) 2008, Google Inc. * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: - * + * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above @@ -14,7 +14,7 @@ * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -31,445 +31,463 @@ * Author: Kostya Serebryany */ -// This file defines dynamic annotations for use with dynamic analysis -// tool such as valgrind, PIN, etc. -// -// Dynamic annotation is a source code annotation that affects -// the generated code (that is, the annotation is not a comment). -// Each such annotation is attached to a particular -// instruction and/or to a particular object (address) in the program. -// -// The annotations that should be used by users are macros in all upper-case -// (e.g., ANNOTATE_NEW_MEMORY). -// -// Actual implementation of these macros may differ depending on the -// dynamic analysis tool being used. -// -// This file supports the following dynamic analysis tools: -// - None (NDEBUG is defined). -// Macros are defined empty. -// - Helgrind (NDEBUG is not defined). -// Macros are defined as calls to non-inlinable empty functions -// that are intercepted by helgrind. -// +/* This file defines dynamic annotations for use with dynamic analysis + tool such as valgrind, PIN, etc. + + Dynamic annotation is a source code annotation that affects + the generated code (that is, the annotation is not a comment). + Each such annotation is attached to a particular + instruction and/or to a particular object (address) in the program. + + The annotations that should be used by users are macros in all upper-case + (e.g., ANNOTATE_NEW_MEMORY). + + Actual implementation of these macros may differ depending on the + dynamic analysis tool being used. + + See http://code.google.com/p/data-race-test/ for more information. + + This file supports the following dynamic analysis tools: + - None (DYNAMIC_ANNOTATIONS_ENABLED is not defined or zero). + Macros are defined empty. + - ThreadSanitizer, Helgrind, DRD (DYNAMIC_ANNOTATIONS_ENABLED is 1). + Macros are defined as calls to non-inlinable empty functions + that are intercepted by Valgrind. */ + #ifndef BASE_DYNAMIC_ANNOTATIONS_H_ #define BASE_DYNAMIC_ANNOTATIONS_H_ -#include "base/thread_annotations.h" - -// All the annotation macros are in effect only in debug mode. -#ifndef NDEBUG - - // ------------------------------------------------------------- - // Annotations useful when implementing condition variables such as CondVar, - // using conditional critical sections (Await/LockWhen) and when constructing - // user-defined synchronization mechanisms. - // - // The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can - // be used to define happens-before arcs in user-defined synchronization - // mechanisms: the race detector will infer an arc from the former to the - // latter when they share the same argument pointer. - // - // Example 1 (reference counting): - // - // void Unref() { - // ANNOTATE_HAPPENS_BEFORE(&refcount_); - // if (AtomicDecrementByOne(&refcount_) == 0) { - // ANNOTATE_HAPPENS_AFTER(&refcount_); - // delete this; - // } - // } - // - // Example 2 (message queue): - // - // void MyQueue::Put(Type *e) { - // MutexLock lock(&mu_); - // ANNOTATE_HAPPENS_BEFORE(e); - // PutElementIntoMyQueue(e); - // } - // - // Type *MyQueue::Get() { - // MutexLock lock(&mu_); - // Type *e = GetElementFromMyQueue(); - // ANNOTATE_HAPPENS_AFTER(e); - // return e; - // } - // - // Note: when possible, please use the existing reference counting and message - // queue implementations instead of inventing new ones. - - // Report that wait on the condition variable at address "cv" has succeeded - // and the lock at address "lock" is held. +#ifndef DYNAMIC_ANNOTATIONS_ENABLED +# define DYNAMIC_ANNOTATIONS_ENABLED 0 +#endif + +#if DYNAMIC_ANNOTATIONS_ENABLED != 0 + + /* ------------------------------------------------------------- + Annotations useful when implementing condition variables such as CondVar, + using conditional critical sections (Await/LockWhen) and when constructing + user-defined synchronization mechanisms. + + The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can + be used to define happens-before arcs in user-defined synchronization + mechanisms: the race detector will infer an arc from the former to the + latter when they share the same argument pointer. + + Example 1 (reference counting): + + void Unref() { + ANNOTATE_HAPPENS_BEFORE(&refcount_); + if (AtomicDecrementByOne(&refcount_) == 0) { + ANNOTATE_HAPPENS_AFTER(&refcount_); + delete this; + } + } + + Example 2 (message queue): + + void MyQueue::Put(Type *e) { + MutexLock lock(&mu_); + ANNOTATE_HAPPENS_BEFORE(e); + PutElementIntoMyQueue(e); + } + + Type *MyQueue::Get() { + MutexLock lock(&mu_); + Type *e = GetElementFromMyQueue(); + ANNOTATE_HAPPENS_AFTER(e); + return e; + } + + Note: when possible, please use the existing reference counting and message + queue implementations instead of inventing new ones. */ + + /* Report that wait on the condition variable at address "cv" has succeeded + and the lock at address "lock" is held. */ #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \ AnnotateCondVarWait(__FILE__, __LINE__, cv, lock) - // Report that wait on the condition variable at "cv" has succeeded. Variant - // w/o lock. + /* Report that wait on the condition variable at "cv" has succeeded. Variant + w/o lock. */ #define ANNOTATE_CONDVAR_WAIT(cv) \ AnnotateCondVarWait(__FILE__, __LINE__, cv, NULL) - // Report that we are about to signal on the condition variable at address - // "cv". + /* Report that we are about to signal on the condition variable at address + "cv". */ #define ANNOTATE_CONDVAR_SIGNAL(cv) \ AnnotateCondVarSignal(__FILE__, __LINE__, cv) - // Report that we are about to signal_all on the condition variable at "cv". + /* Report that we are about to signal_all on the condition variable at "cv". */ #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \ AnnotateCondVarSignalAll(__FILE__, __LINE__, cv) - // Annotations for user-defined synchronization mechanisms. + /* Annotations for user-defined synchronization mechanisms. */ #define ANNOTATE_HAPPENS_BEFORE(obj) ANNOTATE_CONDVAR_SIGNAL(obj) #define ANNOTATE_HAPPENS_AFTER(obj) ANNOTATE_CONDVAR_WAIT(obj) - // Report that the bytes in the range [pointer, pointer+size) are about - // to be published safely. The race checker will create a happens-before - // arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to - // subsequent accesses to this memory. + /* Report that the bytes in the range [pointer, pointer+size) are about + to be published safely. The race checker will create a happens-before + arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to + subsequent accesses to this memory. + Note: this annotation may not work properly if the race detector uses + sampling, i.e. does not observe all memory accesses. + */ #define ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \ AnnotatePublishMemoryRange(__FILE__, __LINE__, pointer, size) - // Report that the bytes in the range [pointer, pointer+size) are not shared - // between threads any more and can be safely used by the current thread w/o - // synchronization. The race checker will create a happens-before arc from - // all previous accesses to this memory to this call. - // - // This annotation could be applied to complex objects, such as STL - // containers, with one condition: the accesses to the object itself - // and its internal data should not be separated with any synchronization. - // - // Example that works: - // - // map<int, int> the_map; - // void Thread1() { - // MutexLock lock(&mu); - // // Ok: accesses to the_map and its internal data is not separated by - // // synchronization. - // the_map[1]++; - // } - // void Thread2() { - // { - // MutexLock lock(&mu); - // ... - // // because of some reason we know that the_map will not be used by - // // other threads any more - // ANNOTATE_UNPUBLISH_MEMORY_RANGE(&the_map, sizeof(the_map)); - // } - // the_map->DoSomething(); - // } - // - // Example that does not work (due to the way happens-before arcs are - // represented in some race detectors): - // - // void Thread1() { - // MutexLock lock(&mu); - // int *guts_of_the_map = &(*the_map)[1]; - // // we have some synchronization between access to 'c' and its guts. - // // This will make ANNOTATE_UNPUBLISH_MEMORY_RANGE in Thread2 useless. - // some_other_lock_or_other_synchronization_utility.Lock(); - // (*guts_of_the_map)++; - // ... - // } - // - // void Thread1() { // same as above... + /* DEPRECATED. Don't use it. */ #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size) \ AnnotateUnpublishMemoryRange(__FILE__, __LINE__, pointer, size) - // This annotation should be used to annotate thread-safe swapping of - // containers. Required only when using hybrid (i.e. not pure happens-before) - // detectors. - // - // This annotation has the same limitation as ANNOTATE_UNPUBLISH_MEMORY_RANGE - // (see above). - // - // Example: - // map<int, int> the_map; - // void Thread1() { - // MutexLock lock(&mu); - // the_map[1]++; - // } - // void Thread2() { - // map<int,int> tmp; - // { - // MutexLock lock(&mu); - // the_map.swap(tmp); - // ANNOTATE_SWAP_MEMORY_RANGE(&the_map, sizeof(the_map)); - // } - // tmp->DoSomething(); - // } + /* DEPRECATED. Don't use it. */ #define ANNOTATE_SWAP_MEMORY_RANGE(pointer, size) \ do { \ ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size); \ ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size); \ } while (0) - // Instruct the tool to create a happens-before arc between mu->Unlock() and - // mu->Lock(). This annotation may slow down the race detector and hide real - // races. Normally it is used only when it would be difficult to annotate each - // of the mutex's critical sections individually using the annotations above. - // This annotation makes sense only for hybrid race detectors. For pure - // happens-before detectors this is a no-op. For more details see - // http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . + /* Instruct the tool to create a happens-before arc between mu->Unlock() and + mu->Lock(). This annotation may slow down the race detector and hide real + races. Normally it is used only when it would be difficult to annotate each + of the mutex's critical sections individually using the annotations above. + This annotation makes sense only for hybrid race detectors. For pure + happens-before detectors this is a no-op. For more details see + http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . */ #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \ AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) - // Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. + /* Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. */ #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \ AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) - // ------------------------------------------------------------- - // Annotations useful when defining memory allocators, or when memory that - // was protected in one way starts to be protected in another. + /* ------------------------------------------------------------- + Annotations useful when defining memory allocators, or when memory that + was protected in one way starts to be protected in another. */ - // Report that a new memory at "address" of size "size" has been allocated. - // This might be used when the memory has been retrieved from a free list and - // is about to be reused, or when a the locking discipline for a variable - // changes. + /* Report that a new memory at "address" of size "size" has been allocated. + This might be used when the memory has been retrieved from a free list and + is about to be reused, or when a the locking discipline for a variable + changes. */ #define ANNOTATE_NEW_MEMORY(address, size) \ AnnotateNewMemory(__FILE__, __LINE__, address, size) - // ------------------------------------------------------------- - // Annotations useful when defining FIFO queues that transfer data between - // threads. + /* ------------------------------------------------------------- + Annotations useful when defining FIFO queues that transfer data between + threads. */ - // Report that the producer-consumer queue (such as ProducerConsumerQueue) at - // address "pcq" has been created. The ANNOTATE_PCQ_* annotations - // should be used only for FIFO queues. For non-FIFO queues use - // ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get). + /* Report that the producer-consumer queue (such as ProducerConsumerQueue) at + address "pcq" has been created. The ANNOTATE_PCQ_* annotations + should be used only for FIFO queues. For non-FIFO queues use + ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get). */ #define ANNOTATE_PCQ_CREATE(pcq) \ AnnotatePCQCreate(__FILE__, __LINE__, pcq) - // Report that the queue at address "pcq" is about to be destroyed. + /* Report that the queue at address "pcq" is about to be destroyed. */ #define ANNOTATE_PCQ_DESTROY(pcq) \ AnnotatePCQDestroy(__FILE__, __LINE__, pcq) - // Report that we are about to put an element into a FIFO queue at address - // "pcq". + /* Report that we are about to put an element into a FIFO queue at address + "pcq". */ #define ANNOTATE_PCQ_PUT(pcq) \ AnnotatePCQPut(__FILE__, __LINE__, pcq) - // Report that we've just got an element from a FIFO queue at address "pcq". + /* Report that we've just got an element from a FIFO queue at address "pcq". */ #define ANNOTATE_PCQ_GET(pcq) \ AnnotatePCQGet(__FILE__, __LINE__, pcq) - // ------------------------------------------------------------- - // Annotations that suppress errors. It is usually better to express the - // program's synchronization using the other annotations, but these can - // be used when all else fails. - - // Report that we may have a benign race on at "address". - // Insert at the point where "address" has been allocated, preferably close - // to the point where the race happens. - // See also ANNOTATE_BENIGN_RACE_STATIC. - #define ANNOTATE_BENIGN_RACE(address, description) \ - AnnotateBenignRace(__FILE__, __LINE__, address, description) - - // Request the analysis tool to ignore all reads in the current thread - // until ANNOTATE_IGNORE_READS_END is called. - // Useful to ignore intentional racey reads, while still checking - // other reads and all writes. - // See also ANNOTATE_UNPROTECTED_READ. + /* ------------------------------------------------------------- + Annotations that suppress errors. It is usually better to express the + program's synchronization using the other annotations, but these can + be used when all else fails. */ + + /* Report that we may have a benign race at "pointer", with size + "sizeof(*(pointer))". "pointer" must be a non-void* pointer. Insert at the + point where "pointer" has been allocated, preferably close to the point + where the race happens. See also ANNOTATE_BENIGN_RACE_STATIC. */ + #define ANNOTATE_BENIGN_RACE(pointer, description) \ + AnnotateBenignRaceSized(__FILE__, __LINE__, pointer, \ + sizeof(*(pointer)), description) + + /* Same as ANNOTATE_BENIGN_RACE(address, description), but applies to + the memory range [address, address+size). */ + #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) \ + AnnotateBenignRaceSized(__FILE__, __LINE__, address, size, description) + + /* Request the analysis tool to ignore all reads in the current thread + until ANNOTATE_IGNORE_READS_END is called. + Useful to ignore intentional racey reads, while still checking + other reads and all writes. + See also ANNOTATE_UNPROTECTED_READ. */ #define ANNOTATE_IGNORE_READS_BEGIN() \ AnnotateIgnoreReadsBegin(__FILE__, __LINE__) - // Stop ignoring reads. + /* Stop ignoring reads. */ #define ANNOTATE_IGNORE_READS_END() \ AnnotateIgnoreReadsEnd(__FILE__, __LINE__) - // Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. + /* Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. */ #define ANNOTATE_IGNORE_WRITES_BEGIN() \ AnnotateIgnoreWritesBegin(__FILE__, __LINE__) - // Stop ignoring writes. + /* Stop ignoring writes. */ #define ANNOTATE_IGNORE_WRITES_END() \ AnnotateIgnoreWritesEnd(__FILE__, __LINE__) - // Start ignoring all memory accesses (reads and writes). + /* Start ignoring all memory accesses (reads and writes). */ #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ do {\ ANNOTATE_IGNORE_READS_BEGIN();\ ANNOTATE_IGNORE_WRITES_BEGIN();\ }while(0)\ - // Stop ignoring all memory accesses. + /* Stop ignoring all memory accesses. */ #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ do {\ ANNOTATE_IGNORE_WRITES_END();\ ANNOTATE_IGNORE_READS_END();\ }while(0)\ - // ------------------------------------------------------------- - // Annotations useful for debugging. + /* ------------------------------------------------------------- + Annotations useful for debugging. */ - // Request to trace every access to "address". + /* Request to trace every access to "address". */ #define ANNOTATE_TRACE_MEMORY(address) \ AnnotateTraceMemory(__FILE__, __LINE__, address) - // Report the current thread name to a race detector. + /* Report the current thread name to a race detector. */ #define ANNOTATE_THREAD_NAME(name) \ AnnotateThreadName(__FILE__, __LINE__, name) - // ------------------------------------------------------------- - // Annotations useful when implementing locks. They are not - // normally needed by modules that merely use locks. - // The "lock" argument is a pointer to the lock object. + /* ------------------------------------------------------------- + Annotations useful when implementing locks. They are not + normally needed by modules that merely use locks. + The "lock" argument is a pointer to the lock object. */ - // Report that a lock has been created at address "lock". + /* Report that a lock has been created at address "lock". */ #define ANNOTATE_RWLOCK_CREATE(lock) \ AnnotateRWLockCreate(__FILE__, __LINE__, lock) - // Report that the lock at address "lock" is about to be destroyed. + /* Report that the lock at address "lock" is about to be destroyed. */ #define ANNOTATE_RWLOCK_DESTROY(lock) \ AnnotateRWLockDestroy(__FILE__, __LINE__, lock) - // Report that the lock at address "lock" has been acquired. - // is_w=1 for writer lock, is_w=0 for reader lock. + /* Report that the lock at address "lock" has been acquired. + is_w=1 for writer lock, is_w=0 for reader lock. */ #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \ AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w) - // Report that the lock at address "lock" is about to be released. + /* Report that the lock at address "lock" is about to be released. */ #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \ AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w) - // ------------------------------------------------------------- - // Annotations useful for testing race detectors. + /* ------------------------------------------------------------- + Annotations useful when implementing barriers. They are not + normally needed by modules that merely use barriers. + The "barrier" argument is a pointer to the barrier object. */ + + /* Report that the "barrier" has been initialized with initial "count". + If 'reinitialization_allowed' is true, initialization is allowed to happen + multiple times w/o calling barrier_destroy() */ + #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) \ + AnnotateBarrierInit(__FILE__, __LINE__, barrier, count, \ + reinitialization_allowed) + + /* Report that we are about to enter barrier_wait("barrier"). */ + #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) \ + AnnotateBarrierWaitBefore(__FILE__, __LINE__, barrier) + + /* Report that we just exited barrier_wait("barrier"). */ + #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) \ + AnnotateBarrierWaitAfter(__FILE__, __LINE__, barrier) + + /* Report that the "barrier" has been destroyed. */ + #define ANNOTATE_BARRIER_DESTROY(barrier) \ + AnnotateBarrierDestroy(__FILE__, __LINE__, barrier) + + /* ------------------------------------------------------------- + Annotations useful for testing race detectors. */ - // Report that we expect a race on the variable at "address". - // Use only in unit tests for a race detector. + /* Report that we expect a race on the variable at "address". + Use only in unit tests for a race detector. */ #define ANNOTATE_EXPECT_RACE(address, description) \ AnnotateExpectRace(__FILE__, __LINE__, address, description) - // A no-op. Insert where you like to test the interceptors. + /* A no-op. Insert where you like to test the interceptors. */ #define ANNOTATE_NO_OP(arg) \ AnnotateNoOp(__FILE__, __LINE__, arg) -#else // NDEBUG is defined - - #define ANNOTATE_RWLOCK_CREATE(lock) // empty - #define ANNOTATE_RWLOCK_DESTROY(lock) // empty - #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) // empty - #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) // empty - #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) // empty - #define ANNOTATE_CONDVAR_WAIT(cv) // empty - #define ANNOTATE_CONDVAR_SIGNAL(cv) // empty - #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) // empty - #define ANNOTATE_HAPPENS_BEFORE(obj) // empty - #define ANNOTATE_HAPPENS_AFTER(obj) // empty - #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) // empty - #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size) // empty - #define ANNOTATE_SWAP_MEMORY_RANGE(address, size) // empty - #define ANNOTATE_PCQ_CREATE(pcq) // empty - #define ANNOTATE_PCQ_DESTROY(pcq) // empty - #define ANNOTATE_PCQ_PUT(pcq) // empty - #define ANNOTATE_PCQ_GET(pcq) // empty - #define ANNOTATE_NEW_MEMORY(address, size) // empty - #define ANNOTATE_EXPECT_RACE(address, description) // empty - #define ANNOTATE_BENIGN_RACE(address, description) // empty - #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) // empty - #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) // empty - #define ANNOTATE_TRACE_MEMORY(arg) // empty - #define ANNOTATE_THREAD_NAME(name) // empty - #define ANNOTATE_IGNORE_READS_BEGIN() // empty - #define ANNOTATE_IGNORE_READS_END() // empty - #define ANNOTATE_IGNORE_WRITES_BEGIN() // empty - #define ANNOTATE_IGNORE_WRITES_END() // empty - #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() // empty - #define ANNOTATE_IGNORE_READS_AND_WRITES_END() // empty - #define ANNOTATE_NO_OP(arg) // empty - -#endif // NDEBUG - -// Use the macros above rather than using these functions directly. -extern "C" void AnnotateRWLockCreate(const char *file, int line, - const volatile void *lock); -extern "C" void AnnotateRWLockDestroy(const char *file, int line, - const volatile void *lock); -extern "C" void AnnotateRWLockAcquired(const char *file, int line, - const volatile void *lock, long is_w); -extern "C" void AnnotateRWLockReleased(const char *file, int line, - const volatile void *lock, long is_w); -extern "C" void AnnotateCondVarWait(const char *file, int line, - const volatile void *cv, - const volatile void *lock); -extern "C" void AnnotateCondVarSignal(const char *file, int line, - const volatile void *cv); -extern "C" void AnnotateCondVarSignalAll(const char *file, int line, - const volatile void *cv); -extern "C" void AnnotatePublishMemoryRange(const char *file, int line, - const volatile void *address, - long size); -extern "C" void AnnotateUnpublishMemoryRange(const char *file, int line, - const volatile void *address, - long size); -extern "C" void AnnotatePCQCreate(const char *file, int line, - const volatile void *pcq); -extern "C" void AnnotatePCQDestroy(const char *file, int line, - const volatile void *pcq); -extern "C" void AnnotatePCQPut(const char *file, int line, - const volatile void *pcq); -extern "C" void AnnotatePCQGet(const char *file, int line, - const volatile void *pcq); -extern "C" void AnnotateNewMemory(const char *file, int line, + /* Force the race detector to flush its state. The actual effect depends on + * the implementation of the detector. */ + #define ANNOTATE_FLUSH_STATE() \ + AnnotateFlushState(__FILE__, __LINE__) + + +#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ + + #define ANNOTATE_RWLOCK_CREATE(lock) /* empty */ + #define ANNOTATE_RWLOCK_DESTROY(lock) /* empty */ + #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) /* empty */ + #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) /* empty */ + #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) /* */ + #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) /* empty */ + #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) /* empty */ + #define ANNOTATE_BARRIER_DESTROY(barrier) /* empty */ + #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) /* empty */ + #define ANNOTATE_CONDVAR_WAIT(cv) /* empty */ + #define ANNOTATE_CONDVAR_SIGNAL(cv) /* empty */ + #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) /* empty */ + #define ANNOTATE_HAPPENS_BEFORE(obj) /* empty */ + #define ANNOTATE_HAPPENS_AFTER(obj) /* empty */ + #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) /* empty */ + #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size) /* empty */ + #define ANNOTATE_SWAP_MEMORY_RANGE(address, size) /* empty */ + #define ANNOTATE_PCQ_CREATE(pcq) /* empty */ + #define ANNOTATE_PCQ_DESTROY(pcq) /* empty */ + #define ANNOTATE_PCQ_PUT(pcq) /* empty */ + #define ANNOTATE_PCQ_GET(pcq) /* empty */ + #define ANNOTATE_NEW_MEMORY(address, size) /* empty */ + #define ANNOTATE_EXPECT_RACE(address, description) /* empty */ + #define ANNOTATE_BENIGN_RACE(address, description) /* empty */ + #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) /* empty */ + #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) /* empty */ + #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) /* empty */ + #define ANNOTATE_TRACE_MEMORY(arg) /* empty */ + #define ANNOTATE_THREAD_NAME(name) /* empty */ + #define ANNOTATE_IGNORE_READS_BEGIN() /* empty */ + #define ANNOTATE_IGNORE_READS_END() /* empty */ + #define ANNOTATE_IGNORE_WRITES_BEGIN() /* empty */ + #define ANNOTATE_IGNORE_WRITES_END() /* empty */ + #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() /* empty */ + #define ANNOTATE_IGNORE_READS_AND_WRITES_END() /* empty */ + #define ANNOTATE_NO_OP(arg) /* empty */ + #define ANNOTATE_FLUSH_STATE() /* empty */ + +#endif /* DYNAMIC_ANNOTATIONS_ENABLED */ + +/* Use the macros above rather than using these functions directly. */ +#ifdef __cplusplus +extern "C" { +#endif +void AnnotateRWLockCreate(const char *file, int line, + const volatile void *lock); +void AnnotateRWLockDestroy(const char *file, int line, + const volatile void *lock); +void AnnotateRWLockAcquired(const char *file, int line, + const volatile void *lock, long is_w); +void AnnotateRWLockReleased(const char *file, int line, + const volatile void *lock, long is_w); +void AnnotateBarrierInit(const char *file, int line, + const volatile void *barrier, long count, + long reinitialization_allowed); +void AnnotateBarrierWaitBefore(const char *file, int line, + const volatile void *barrier); +void AnnotateBarrierWaitAfter(const char *file, int line, + const volatile void *barrier); +void AnnotateBarrierDestroy(const char *file, int line, + const volatile void *barrier); +void AnnotateCondVarWait(const char *file, int line, + const volatile void *cv, + const volatile void *lock); +void AnnotateCondVarSignal(const char *file, int line, + const volatile void *cv); +void AnnotateCondVarSignalAll(const char *file, int line, + const volatile void *cv); +void AnnotatePublishMemoryRange(const char *file, int line, + const volatile void *address, + long size); +void AnnotateUnpublishMemoryRange(const char *file, int line, const volatile void *address, long size); -extern "C" void AnnotateExpectRace(const char *file, int line, - const volatile void *address, - const char *description); -extern "C" void AnnotateBenignRace(const char *file, int line, - const volatile void *address, - const char *description); -extern "C" void AnnotateMutexIsUsedAsCondVar(const char *file, int line, - const volatile void *mu); -extern "C" void AnnotateTraceMemory(const char *file, int line, - const volatile void *arg); -extern "C" void AnnotateThreadName(const char *file, int line, - const char *name); -extern "C" void AnnotateIgnoreReadsBegin(const char *file, int line); -extern "C" void AnnotateIgnoreReadsEnd(const char *file, int line); -extern "C" void AnnotateIgnoreWritesBegin(const char *file, int line); -extern "C" void AnnotateIgnoreWritesEnd(const char *file, int line); -extern "C" void AnnotateNoOp(const char *file, int line, - const volatile void *arg); - -#ifndef NDEBUG - - // ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads. - // - // Instead of doing - // ANNOTATE_IGNORE_READS_BEGIN(); - // ... = x; - // ANNOTATE_IGNORE_READS_END(); - // one can use - // ... = ANNOTATE_UNPROTECTED_READ(x); +void AnnotatePCQCreate(const char *file, int line, + const volatile void *pcq); +void AnnotatePCQDestroy(const char *file, int line, + const volatile void *pcq); +void AnnotatePCQPut(const char *file, int line, + const volatile void *pcq); +void AnnotatePCQGet(const char *file, int line, + const volatile void *pcq); +void AnnotateNewMemory(const char *file, int line, + const volatile void *address, + long size); +void AnnotateExpectRace(const char *file, int line, + const volatile void *address, + const char *description); +void AnnotateBenignRace(const char *file, int line, + const volatile void *address, + const char *description); +void AnnotateBenignRaceSized(const char *file, int line, + const volatile void *address, + long size, + const char *description); +void AnnotateMutexIsUsedAsCondVar(const char *file, int line, + const volatile void *mu); +void AnnotateTraceMemory(const char *file, int line, + const volatile void *arg); +void AnnotateThreadName(const char *file, int line, + const char *name); +void AnnotateIgnoreReadsBegin(const char *file, int line); +void AnnotateIgnoreReadsEnd(const char *file, int line); +void AnnotateIgnoreWritesBegin(const char *file, int line); +void AnnotateIgnoreWritesEnd(const char *file, int line); +void AnnotateNoOp(const char *file, int line, + const volatile void *arg); +void AnnotateFlushState(const char *file, int line); + +/* Return non-zero value if running under valgrind. + + If "valgrind.h" is included into dynamic_annotations.c, + the regular valgrind mechanism will be used. + See http://valgrind.org/docs/manual/manual-core-adv.html about + RUNNING_ON_VALGRIND and other valgrind "client requests". + The file "valgrind.h" may be obtained by doing + svn co svn://svn.valgrind.org/valgrind/trunk/include + + If for some reason you can't use "valgrind.h" or want to fake valgrind, + there are two ways to make this function return non-zero: + - Use environment variable: export RUNNING_ON_VALGRIND=1 + - Make your tool intercept the function RunningOnValgrind() and + change its return value. + */ +int RunningOnValgrind(void); + +#ifdef __cplusplus +} +#endif + +#if DYNAMIC_ANNOTATIONS_ENABLED != 0 && defined(__cplusplus) + + /* ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads. + + Instead of doing + ANNOTATE_IGNORE_READS_BEGIN(); + ... = x; + ANNOTATE_IGNORE_READS_END(); + one can use + ... = ANNOTATE_UNPROTECTED_READ(x); */ template <class T> - inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) - NO_THREAD_SAFETY_ANALYSIS { + inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) { ANNOTATE_IGNORE_READS_BEGIN(); T res = x; ANNOTATE_IGNORE_READS_END(); return res; } - - // Apply ANNOTATE_BENIGN_RACE to a static variable. + /* Apply ANNOTATE_BENIGN_RACE_SIZED to a static variable. */ #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) \ namespace { \ class static_var ## _annotator { \ public: \ static_var ## _annotator() { \ - ANNOTATE_BENIGN_RACE(&static_var, \ + ANNOTATE_BENIGN_RACE_SIZED(&static_var, \ + sizeof(static_var), \ # static_var ": " description); \ } \ }; \ static static_var ## _annotator the ## static_var ## _annotator;\ } -#else // !NDEBUG +#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ #define ANNOTATE_UNPROTECTED_READ(x) (x) - #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) // empty - -#endif // !NDEBUG - -// Return non-zero value if running under valgrind. -extern "C" int RunningOnValgrind(); + #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) /* empty */ +#endif /* DYNAMIC_ANNOTATIONS_ENABLED */ -#endif // BASE_DYNAMIC_ANNOTATIONS_H_ +#endif /* BASE_DYNAMIC_ANNOTATIONS_H_ */ diff --git a/src/base/low_level_alloc.cc b/src/base/low_level_alloc.cc index 2bbce54..7ca3953 100644 --- a/src/base/low_level_alloc.cc +++ b/src/base/low_level_alloc.cc @@ -210,8 +210,9 @@ static const intptr_t kMagicUnallocated = ~kMagicAllocated; namespace { class ArenaLock { public: - explicit ArenaLock(LowLevelAlloc::Arena *arena) : - left_(false), mask_valid_(false), arena_(arena) { + explicit ArenaLock(LowLevelAlloc::Arena *arena) + EXCLUSIVE_LOCK_FUNCTION(arena->mu) + : left_(false), mask_valid_(false), arena_(arena) { if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) { // We've decided not to support async-signal-safe arena use until // there a demonstrated need. Here's how one could do it though @@ -228,7 +229,7 @@ namespace { this->arena_->mu.Lock(); } ~ArenaLock() { RAW_CHECK(this->left_, "haven't left Arena region"); } - void Leave() { + void Leave() UNLOCK_FUNCTION(arena_->mu) { this->arena_->mu.Unlock(); #if 0 if (this->mask_valid_) { diff --git a/src/config.h.in b/src/config.h.in index 1ad2642..49bbf0d 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -132,7 +132,7 @@ /* Define to 1 if you have the <sys/types.h> header file. */ #undef HAVE_SYS_TYPES_H -/* Define to 1 if you have the <sys/ucontext.h> header file. */ +/* <sys/ucontext.h> is broken on redhat 7 */ #undef HAVE_SYS_UCONTEXT_H /* Define to 1 if you have the <sys/wait.h> header file. */ @@ -150,6 +150,9 @@ /* Define to 1 if you have the <unwind.h> header file. */ #undef HAVE_UNWIND_H +/* Define to 1 if you have the <valgrind.h> header file. */ +#undef HAVE_VALGRIND_H + /* define if your compiler has __attribute__ */ #undef HAVE___ATTRIBUTE__ diff --git a/src/debugallocation.cc b/src/debugallocation.cc index 1a9ddcb..949fbe9 100644 --- a/src/debugallocation.cc +++ b/src/debugallocation.cc @@ -1010,7 +1010,7 @@ static void *MemalignOverride(size_t align, size_t size, const void *caller) __THROW ATTRIBUTE_SECTION(google_malloc); -void* operator new(size_t size) +void* operator new(size_t size) throw (std::bad_alloc) ATTRIBUTE_SECTION(google_malloc); void* operator new(size_t size, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); @@ -1018,7 +1018,7 @@ void operator delete(void* p) __THROW ATTRIBUTE_SECTION(google_malloc); void operator delete(void* p, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); -void* operator new[](size_t size) +void* operator new[](size_t size) throw (std::bad_alloc) ATTRIBUTE_SECTION(google_malloc); void* operator new[](size_t size, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); @@ -1176,12 +1176,12 @@ extern "C" void* pvalloc(size_t size) __THROW { return p; } -extern "C" int mallopt(int cmd, int value) { +extern "C" int mallopt(int cmd, int value) __THROW { return BASE_MALLOPT(cmd, value); } #ifdef HAVE_STRUCT_MALLINFO -extern "C" struct mallinfo mallinfo(void) { +extern "C" struct mallinfo mallinfo(void) __THROW { return BASE_MALLINFO(); } #endif @@ -1239,7 +1239,7 @@ inline void* cpp_debug_alloc(size_t size, int new_type, bool nothrow) { } } -void* operator new(size_t size) { +void* operator new(size_t size) throw (std::bad_alloc) { void* ptr = cpp_debug_alloc(size, MallocBlock::kNewType, false); MallocHook::InvokeNewHook(ptr, size); if (ptr == NULL) { @@ -1259,7 +1259,8 @@ void operator delete(void* ptr) __THROW { DebugDeallocate(ptr, MallocBlock::kNewType); } -// Compilers use this, though I can't see how it differs from normal delete. +// Some STL implementations explicitly invoke this. +// It is completely equivalent to a normal delete (delete never throws). void operator delete(void* ptr, const std::nothrow_t&) __THROW { MallocHook::InvokeDeleteHook(ptr); DebugDeallocate(ptr, MallocBlock::kNewType); @@ -1269,7 +1270,7 @@ void operator delete(void* ptr, const std::nothrow_t&) __THROW { // Alloc/free stuff for debug operator new[] & friends -void* operator new[](size_t size) { +void* operator new[](size_t size) throw (std::bad_alloc) { void* ptr = cpp_debug_alloc(size, MallocBlock::kArrayNewType, false); MallocHook::InvokeNewHook(ptr, size); if (ptr == NULL) { @@ -1289,7 +1290,8 @@ void operator delete[](void* ptr) __THROW { DebugDeallocate(ptr, MallocBlock::kArrayNewType); } -// Compilers use this, though I can't see how it differs from normal delete. +// Some STL implementations explicitly invoke this. +// It is completely equivalent to a normal delete (delete never throws). void operator delete[](void* ptr, const std::nothrow_t&) __THROW { MallocHook::InvokeDeleteHook(ptr); DebugDeallocate(ptr, MallocBlock::kArrayNewType); @@ -1359,17 +1361,22 @@ class DebugMallocImplementation : public ParentImplementation { static DebugMallocImplementation debug_malloc_implementation; REGISTER_MODULE_INITIALIZER(debugallocation, { - MallocExtension::Register(&debug_malloc_implementation); - - // When the program exits, check all blocks still in the free queue for - // corruption. - atexit(DanglingWriteChecker); + // Either we or valgrind will control memory management. We + // register our extension if we're the winner. + if (RunningOnValgrind()) { + // Let Valgrind uses its own malloc (so don't register our extension). + } else { + MallocExtension::Register(&debug_malloc_implementation); + // When the program exits, check all blocks still in the free + // queue for corruption. + atexit(DanglingWriteChecker); + } }); #ifdef TCMALLOC_FOR_DEBUGALLOCATION // Redefine malloc_stats to use tcmalloc's implementation: -extern "C" void malloc_stats(void) { +extern "C" void malloc_stats(void) __THROW { do_malloc_stats(); } diff --git a/src/google/stacktrace.h b/src/google/stacktrace.h index 8188ce3..fd186d6 100644 --- a/src/google/stacktrace.h +++ b/src/google/stacktrace.h @@ -49,23 +49,23 @@ // Skips the most recent "skip_count" stack frames (also skips the // frame generated for the "GetStackFrames" routine itself), and then // records the pc values for up to the next "max_depth" frames in -// "pcs", and the corresponding stack frame sizes in "sizes". Returns -// the number of values recorded in "pcs"/"sizes". +// "result", and the corresponding stack frame sizes in "sizes". +// Returns the number of values recorded in "result"/"sizes". // // Example: // main() { foo(); } // foo() { bar(); } // bar() { -// void* pcs[10]; +// void* result[10]; // int sizes[10]; -// int depth = GetStackFrames(pcs, sizes, 10, 1); +// int depth = GetStackFrames(result, sizes, 10, 1); // } // // The GetStackFrames call will skip the frame for "bar". It will // return 2 and will produce pc values that map to the following // procedures: -// pcs[0] foo -// pcs[1] main +// result[0] foo +// result[1] main // (Actually, there may be a few more entries after "main" to account for // startup procedures.) // And corresponding stack frame sizes will also be recorded: @@ -76,15 +76,15 @@ // be identified. // // This routine may return fewer stack frame entries than are -// available. Also note that "pcs" and "sizes" must both be non-NULL. -extern PERFTOOLS_DLL_DECL int GetStackFrames(void** pcs, int* sizes, int max_depth, +// available. Also note that "result" and "sizes" must both be non-NULL. +extern PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth, int skip_count); // Same as above, but to be used from a signal handler. The "uc" parameter // should be the pointer to ucontext_t which was passed as the 3rd parameter // to sa_sigaction signal handler. It may help the unwinder to get a // better stack trace under certain conditions. The "uc" may safely be NULL. -extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** pcs, int* sizes, int max_depth, +extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** result, int* sizes, int max_depth, int skip_count, const void *uc); // This is similar to the GetStackFrames routine, except that it returns diff --git a/src/google/tcmalloc.h.in b/src/google/tcmalloc.h.in index e5c873d..fbb70ab 100644 --- a/src/google/tcmalloc.h.in +++ b/src/google/tcmalloc.h.in @@ -60,7 +60,8 @@ #endif #ifdef __cplusplus -#include <new> // for nothrow_t +#include <new> // for std::nothrow_t + extern "C" { #endif // Returns a human-readable version string. If major, minor, @@ -91,16 +92,15 @@ extern "C" { #ifdef __cplusplus PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW; PERFTOOLS_DLL_DECL void* tc_new(size_t size); - PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW; - PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); - PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW; - PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW; - PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, - const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW; PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW; PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) __THROW; } diff --git a/src/heap-checker.cc b/src/heap-checker.cc index 82a7adb..b539ed8 100644 --- a/src/heap-checker.cc +++ b/src/heap-checker.cc @@ -159,6 +159,23 @@ DEFINE_bool(heap_check_test_pointer_alignment, "Set to true to check if the found leak can be due to " "use of unaligned pointers"); +// Alignment at which all pointers in memory are supposed to be located; +// use 1 if any alignment is ok. +// heap_check_test_pointer_alignment flag guides if we try the value of 1. +// The larger it can be, the lesser is the chance of missing real leaks. +// +// sizeof(void)* is correct. However gold (the new linker) has a bug where it +// sometimes places global pointers on 4-byte boundaries, even when pointers +// are 8 bytes long. While we are fixing the linker, degrade to 4-byte +// alignment on all targets. http://b/1226481 +// +static const size_t kPointerSourceAlignment = sizeof(void*); +DEFINE_int32(heap_check_pointer_source_alignment, + EnvToInt("HEAP_CHECK_POINTER_SOURCE_ALIGNMENT", + kPointerSourceAlignment), + "Alignment at which all pointers in memory are supposed to be " + "located. Use 1 if any alignment is ok."); + // A reasonable default to handle pointers inside of typical class objects: // Too low and we won't be able to traverse pointers to normally-used // nested objects and base parts of multiple-inherited objects. @@ -245,13 +262,6 @@ static bool constructor_heap_profiling = false; static const int heap_checker_info_level = 0; //---------------------------------------------------------------------- - -// Alignment at which all pointers in memory are supposed to be located; -// use 1 if any alignment is ok. -// heap_check_test_pointer_alignment flag guides if we try the value of 1. -// The larger it can be, the lesser is the chance of missing real leaks. -static const size_t kPointerSourceAlignment = sizeof(void*); - // Cancel our InitialMallocHook_* if present. static void CancelInitialMallocHooks(); // defined below @@ -484,7 +494,7 @@ HeapLeakChecker::Disabler::Disabler() { // in a thread-safe manner. int counter = get_thread_disable_counter(); set_thread_disable_counter(counter + 1); - RAW_VLOG(1, "Increasing thread disable counter to %d", counter + 1); + RAW_VLOG(10, "Increasing thread disable counter to %d", counter + 1); } HeapLeakChecker::Disabler::~Disabler() { @@ -492,7 +502,7 @@ HeapLeakChecker::Disabler::~Disabler() { RAW_DCHECK(counter > 0, ""); if (counter > 0) { set_thread_disable_counter(counter - 1); - RAW_VLOG(1, "Decreasing thread disable counter to %d", counter); + RAW_VLOG(10, "Decreasing thread disable counter to %d", counter); } else { RAW_VLOG(0, "Thread disable counter underflow : %d", counter); } @@ -525,7 +535,7 @@ static void NewHook(const void* ptr, size_t size) { if (ptr != NULL) { const int counter = get_thread_disable_counter(); const bool ignore = (counter > 0); - RAW_VLOG(7, "Recording Alloc: %p of %"PRIuS "; %d", ptr, size, + RAW_VLOG(16, "Recording Alloc: %p of %"PRIuS "; %d", ptr, size, int(counter)); { SpinLockHolder l(&heap_checker_lock); if (size > max_heap_object_size) max_heap_object_size = size; @@ -540,17 +550,17 @@ static void NewHook(const void* ptr, size_t size) { } } } - RAW_VLOG(8, "Alloc Recorded: %p of %"PRIuS"", ptr, size); + RAW_VLOG(17, "Alloc Recorded: %p of %"PRIuS"", ptr, size); } } static void DeleteHook(const void* ptr) { if (ptr != NULL) { - RAW_VLOG(7, "Recording Free %p", ptr); + RAW_VLOG(16, "Recording Free %p", ptr); { SpinLockHolder l(&heap_checker_lock); if (heap_checker_on) heap_profile->RecordFree(ptr); } - RAW_VLOG(8, "Free Recorded: %p", ptr); + RAW_VLOG(17, "Free Recorded: %p", ptr); } } @@ -584,7 +594,7 @@ static StackDirection stack_direction = UNKNOWN_DIRECTION; static void RegisterStackLocked(const void* top_ptr) { RAW_DCHECK(heap_checker_lock.IsHeld(), ""); RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); - RAW_VLOG(1, "Thread stack at %p", top_ptr); + RAW_VLOG(10, "Thread stack at %p", top_ptr); uintptr_t top = AsInt(top_ptr); stack_tops->insert(top); // add for later use @@ -598,12 +608,12 @@ static void RegisterStackLocked(const void* top_ptr) { if (MemoryRegionMap::FindAndMarkStackRegion(top, ®ion)) { // Make the proper portion of the stack live: if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) { - RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes", + RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes", top_ptr, region.end_addr - top); live_objects->push_back(AllocObject(top_ptr, region.end_addr - top, THREAD_DATA)); } else { // GROWS_TOWARDS_HIGH_ADDRESSES - RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes", + RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes", AsPtr(region.start_addr), top - region.start_addr); live_objects->push_back(AllocObject(AsPtr(region.start_addr), @@ -619,7 +629,7 @@ static void RegisterStackLocked(const void* top_ptr) { uintptr_t start = AsInt(span->ptr); uintptr_t end = start + span->size; if (start <= top && top < end) { - RAW_VLOG(2, "Stack at %p is inside /proc/self/maps chunk %p..%p", + RAW_VLOG(11, "Stack at %p is inside /proc/self/maps chunk %p..%p", top_ptr, AsPtr(start), AsPtr(end)); // Shrink start..end region by chopping away the memory regions in // MemoryRegionMap that land in it to undo merging of regions @@ -640,17 +650,17 @@ static void RegisterStackLocked(const void* top_ptr) { } } if (stack_start != start || stack_end != end) { - RAW_VLOG(2, "Stack at %p is actually inside memory chunk %p..%p", + RAW_VLOG(11, "Stack at %p is actually inside memory chunk %p..%p", top_ptr, AsPtr(stack_start), AsPtr(stack_end)); } // Make the proper portion of the stack live: if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) { - RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes", + RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes", top_ptr, stack_end - top); live_objects->push_back( AllocObject(top_ptr, stack_end - top, THREAD_DATA)); } else { // GROWS_TOWARDS_HIGH_ADDRESSES - RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes", + RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes", AsPtr(stack_start), top - stack_start); live_objects->push_back( AllocObject(AsPtr(stack_start), top - stack_start, THREAD_DATA)); @@ -723,14 +733,14 @@ static void MakeDisabledLiveCallbackLocked( // and the rest of the region where the stack lives can well // contain outdated stack variables which are not live anymore, // hence should not be treated as such. - RAW_VLOG(2, "Not %s-disabling %"PRIuS" bytes at %p" + RAW_VLOG(11, "Not %s-disabling %"PRIuS" bytes at %p" ": have stack inside: %p", (stack_disable ? "stack" : "range"), info.object_size, ptr, AsPtr(*iter)); return; } } - RAW_VLOG(2, "%s-disabling %"PRIuS" bytes at %p", + RAW_VLOG(11, "%s-disabling %"PRIuS" bytes at %p", (stack_disable ? "Stack" : "Range"), info.object_size, ptr); live_objects->push_back(AllocObject(ptr, info.object_size, MUST_BE_ON_HEAP)); @@ -755,7 +765,7 @@ static void RecordGlobalDataLocked(uintptr_t start_address, // Ignore non-writeable regions. if (strchr(permissions, 'w') == NULL) return; if (filename == NULL || *filename == '\0') filename = "UNNAMED"; - RAW_VLOG(2, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR, + RAW_VLOG(11, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR, filename, start_address, end_address); (*library_live_objects)[filename]. push_back(AllocObject(AsPtr(start_address), @@ -814,12 +824,12 @@ void HeapLeakChecker::DisableLibraryAllocsLocked(const char* library, // does not call user code. } if (depth) { - RAW_VLOG(1, "Disabling allocations from %s at depth %d:", library, depth); + RAW_VLOG(10, "Disabling allocations from %s at depth %d:", library, depth); DisableChecksFromToLocked(AsPtr(start_address), AsPtr(end_address), depth); if (IsLibraryNamed(library, "/libpthread") || IsLibraryNamed(library, "/libdl") || IsLibraryNamed(library, "/ld")) { - RAW_VLOG(1, "Global memory regions made by %s will be live data", + RAW_VLOG(10, "Global memory regions made by %s will be live data", library); if (global_region_caller_ranges == NULL) { global_region_caller_ranges = @@ -936,7 +946,7 @@ static enum { va_list /*ap*/) { RAW_DCHECK(heap_checker_lock.IsHeld(), ""); thread_listing_status = CALLBACK_STARTED; - RAW_VLOG(2, "Found %d threads (from pid %d)", num_threads, getpid()); + RAW_VLOG(11, "Found %d threads (from pid %d)", num_threads, getpid()); if (FLAGS_heap_check_ignore_global_live) { UseProcMapsLocked(RECORD_GLOBAL_DATA); @@ -951,7 +961,7 @@ static enum { // the leak checking thread itself is handled // specially via self_thread_stack, not here: if (thread_pids[i] == self_thread_pid) continue; - RAW_VLOG(2, "Handling thread with pid %d", thread_pids[i]); + RAW_VLOG(11, "Handling thread with pid %d", thread_pids[i]); #if defined(HAVE_LINUX_PTRACE_H) && defined(HAVE_SYS_SYSCALL_H) && defined(DUMPER) i386_regs thread_regs; #define sys_ptrace(r, p, a, d) syscall(SYS_ptrace, (r), (p), (a), (d)) @@ -967,7 +977,7 @@ static enum { // register pointers still being in the registers and not on the stack): for (void** p = reinterpret_cast<void**>(&thread_regs); p < reinterpret_cast<void**>(&thread_regs + 1); ++p) { - RAW_VLOG(3, "Thread register %p", *p); + RAW_VLOG(12, "Thread register %p", *p); thread_registers.push_back(*p); } } else { @@ -982,7 +992,7 @@ static enum { if (thread_registers.size()) { // Make thread registers be live heap data sources. // we rely here on the fact that vector is in one memory chunk: - RAW_VLOG(2, "Live registers at %p of %"PRIuS" bytes", + RAW_VLOG(11, "Live registers at %p of %"PRIuS" bytes", &thread_registers[0], thread_registers.size() * sizeof(void*)); live_objects->push_back(AllocObject(&thread_registers[0], thread_registers.size() * sizeof(void*), @@ -1005,7 +1015,7 @@ static const void* self_thread_stack_top; void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() { RAW_DCHECK(heap_checker_lock.IsHeld(), ""); RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); - RAW_VLOG(2, "Handling self thread with pid %d", self_thread_pid); + RAW_VLOG(11, "Handling self thread with pid %d", self_thread_pid); // Register our own stack: // Important that all stack ranges (including the one here) @@ -1019,7 +1029,7 @@ void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() { for (IgnoredObjectsMap::const_iterator object = ignored_objects->begin(); object != ignored_objects->end(); ++object) { const void* ptr = AsPtr(object->first); - RAW_VLOG(2, "Ignored live object at %p of %"PRIuS" bytes", + RAW_VLOG(11, "Ignored live object at %p of %"PRIuS" bytes", ptr, object->second); live_objects-> push_back(AllocObject(ptr, object->second, MUST_BE_ON_HEAP)); @@ -1132,10 +1142,10 @@ void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() { } } // Now get and use live_objects from the final version of l->second: - if (VLOG_IS_ON(2)) { + if (VLOG_IS_ON(11)) { for (LiveObjectsStack::const_iterator i = l->second.begin(); i != l->second.end(); ++i) { - RAW_VLOG(2, "Library live region at %p of %"PRIuPTR" bytes", + RAW_VLOG(11, "Library live region at %p of %"PRIuPTR" bytes", i->ptr, i->size); } } @@ -1240,7 +1250,7 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) { RAW_LOG(ERROR, "Thread stacks not found for %d threads. " "Will likely report false leak positives.", r); } else { - RAW_VLOG(2, "Thread stacks appear to be found for all threads"); + RAW_VLOG(11, "Thread stacks appear to be found for all threads"); } } else { RAW_LOG(WARNING, "Not looking for thread stacks; " @@ -1256,7 +1266,7 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) { IgnoreNonThreadLiveObjectsLocked(); } if (live_objects_total) { - RAW_VLOG(1, "Ignoring %"PRId64" reachable objects of %"PRId64" bytes", + RAW_VLOG(10, "Ignoring %"PRId64" reachable objects of %"PRId64" bytes", live_objects_total, live_bytes_total); } // Free these: we made them here and heap_profile never saw them @@ -1266,7 +1276,8 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) { } // Alignment at which we should consider pointer positions -// in IgnoreLiveObjectsLocked. Use 1 if any alignment is ok. +// in IgnoreLiveObjectsLocked. Will normally use the value of +// FLAGS_heap_check_pointer_source_alignment. static size_t pointer_source_alignment = kPointerSourceAlignment; // Global lock for HeapLeakChecker::DoNoLeaks // to protect pointer_source_alignment. @@ -1314,7 +1325,7 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); live_object_count += 1; live_byte_count += size; } - RAW_VLOG(4, "Looking for heap pointers in %p of %"PRIuS" bytes", + RAW_VLOG(13, "Looking for heap pointers in %p of %"PRIuS" bytes", object, size); const char* const whole_object = object; size_t const whole_size = size; @@ -1351,7 +1362,7 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); if (can_be_on_heap) { const void* ptr = reinterpret_cast<const void*>(addr); // Too expensive (inner loop): manually uncomment when debugging: - // RAW_VLOG(8, "Trying pointer to %p at %p", ptr, object); + // RAW_VLOG(17, "Trying pointer to %p at %p", ptr, object); size_t object_size; if (HaveOnHeapLocked(&ptr, &object_size) && heap_profile->MarkAsLive(ptr)) { @@ -1360,10 +1371,10 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); // a heap object which is in fact leaked. // I.e. in very rare and probably not repeatable/lasting cases // we might miss some real heap memory leaks. - RAW_VLOG(5, "Found pointer to %p of %"PRIuS" bytes at %p " + RAW_VLOG(14, "Found pointer to %p of %"PRIuS" bytes at %p " "inside %p of size %"PRIuS"", ptr, object_size, object, whole_object, whole_size); - if (VLOG_IS_ON(6)) { + if (VLOG_IS_ON(15)) { // log call stacks to help debug how come something is not a leak HeapProfileTable::AllocInfo alloc; bool r = heap_profile->FindAllocDetails(ptr, &alloc); @@ -1386,7 +1397,7 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); live_objects_total += live_object_count; live_bytes_total += live_byte_count; if (live_object_count) { - RAW_VLOG(1, "Removed %"PRId64" live heap objects of %"PRId64" bytes: %s%s", + RAW_VLOG(10, "Removed %"PRId64" live heap objects of %"PRId64" bytes: %s%s", live_object_count, live_byte_count, name, name2); } } @@ -1408,7 +1419,7 @@ void HeapLeakChecker::IgnoreObject(const void* ptr) { if (!HaveOnHeapLocked(&ptr, &object_size)) { RAW_LOG(ERROR, "No live heap object at %p to ignore", ptr); } else { - RAW_VLOG(1, "Going to ignore live object at %p of %"PRIuS" bytes", + RAW_VLOG(10, "Going to ignore live object at %p of %"PRIuS" bytes", ptr, object_size); if (ignored_objects == NULL) { ignored_objects = new(Allocator::Allocate(sizeof(IgnoredObjectsMap))) @@ -1434,7 +1445,7 @@ void HeapLeakChecker::UnIgnoreObject(const void* ptr) { if (object != ignored_objects->end() && object_size == object->second) { ignored_objects->erase(object); found = true; - RAW_VLOG(1, "Now not going to ignore live object " + RAW_VLOG(10, "Now not going to ignore live object " "at %p of %"PRIuS" bytes", ptr, object_size); } } @@ -1483,7 +1494,7 @@ void HeapLeakChecker::Create(const char *name, bool make_start_snapshot) { const HeapProfileTable::Stats& t = heap_profile->total(); const size_t start_inuse_bytes = t.alloc_size - t.free_size; const size_t start_inuse_allocs = t.allocs - t.frees; - RAW_VLOG(1, "Start check \"%s\" profile: %"PRIuS" bytes " + RAW_VLOG(10, "Start check \"%s\" profile: %"PRIuS" bytes " "in %"PRIuS" objects", name_, start_inuse_bytes, start_inuse_allocs); } else { @@ -1649,6 +1660,8 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) { // Make the heap profile, other threads are locked out. HeapProfileTable::Snapshot* base = reinterpret_cast<HeapProfileTable::Snapshot*>(start_snapshot_); + RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, ""); + pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment; IgnoreAllLiveObjectsLocked(&a_local_var); leaks = heap_profile->NonLiveSnapshot(base); @@ -1668,23 +1681,28 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) { initial_allocs, Allocator::alloc_count()); } } else if (FLAGS_heap_check_test_pointer_alignment) { - // Try with reduced pointer aligment - pointer_source_alignment = 1; - IgnoreAllLiveObjectsLocked(&a_local_var); - HeapProfileTable::Snapshot* leaks_wo_align = - heap_profile->NonLiveSnapshot(base); - pointer_source_alignment = kPointerSourceAlignment; - if (leaks_wo_align->Empty()) { - RAW_LOG(WARNING, "Found no leaks without pointer alignment: " - "something might be placing pointers at " - "unaligned addresses! This needs to be fixed."); + if (pointer_source_alignment == 1) { + RAW_LOG(WARNING, "--heap_check_test_pointer_alignment has no effect: " + "--heap_check_pointer_source_alignment was already set to 1"); } else { - RAW_LOG(INFO, "Found leaks without pointer alignment as well: " - "unaligned pointers must not be the cause of leaks."); - RAW_LOG(INFO, "--heap_check_test_pointer_alignment did not help " - "to diagnose the leaks."); + // Try with reduced pointer aligment + pointer_source_alignment = 1; + IgnoreAllLiveObjectsLocked(&a_local_var); + HeapProfileTable::Snapshot* leaks_wo_align = + heap_profile->NonLiveSnapshot(base); + pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment; + if (leaks_wo_align->Empty()) { + RAW_LOG(WARNING, "Found no leaks without pointer alignment: " + "something might be placing pointers at " + "unaligned addresses! This needs to be fixed."); + } else { + RAW_LOG(INFO, "Found leaks without pointer alignment as well: " + "unaligned pointers must not be the cause of leaks."); + RAW_LOG(INFO, "--heap_check_test_pointer_alignment did not help " + "to diagnose the leaks."); + } + heap_profile->ReleaseSnapshot(leaks_wo_align); } - heap_profile->ReleaseSnapshot(leaks_wo_align); } if (leaks != NULL) { @@ -1874,6 +1892,7 @@ static bool internal_init_start_has_run = false; } // Set all flags + RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, ""); if (FLAGS_heap_check == "minimal") { // The least we can check. FLAGS_heap_check_before_constructors = false; // from after main @@ -2043,7 +2062,7 @@ bool HeapLeakChecker::NoGlobalLeaks() { // we never delete or change main_heap_checker once it's set: HeapLeakChecker* main_hc = GlobalChecker(); if (main_hc) { - RAW_VLOG(1, "Checking for whole-program memory leaks"); + RAW_VLOG(10, "Checking for whole-program memory leaks"); // The program is over, so it's safe to symbolize addresses (which // requires a fork) because no serious work is expected to be done // after this. Symbolizing is really useful -- knowing what @@ -2165,7 +2184,7 @@ void HeapLeakChecker::BeforeConstructorsLocked() { RAW_CHECK(heap_profile == NULL, ""); heap_profile = new(Allocator::Allocate(sizeof(HeapProfileTable))) HeapProfileTable(&Allocator::Allocate, &Allocator::Free); - RAW_VLOG(1, "Starting tracking the heap"); + RAW_VLOG(10, "Starting tracking the heap"); heap_checker_on = true; } @@ -2329,7 +2348,7 @@ void HeapLeakChecker::DisableChecksFromToLocked(const void* start_address, value.start_address = AsInt(start_address); value.max_depth = max_depth; if (disabled_ranges->insert(make_pair(AsInt(end_address), value)).second) { - RAW_VLOG(1, "Disabling leak checking in stack traces " + RAW_VLOG(10, "Disabling leak checking in stack traces " "under frame addresses between %p..%p", start_address, end_address); } else { // check that this is just a verbatim repetition @@ -2352,7 +2371,7 @@ inline bool HeapLeakChecker::HaveOnHeapLocked(const void** ptr, const uintptr_t addr = AsInt(*ptr); if (heap_profile->FindInsideAlloc( *ptr, max_heap_object_size, ptr, object_size)) { - RAW_VLOG(7, "Got pointer into %p at +%"PRIuPTR" offset", + RAW_VLOG(16, "Got pointer into %p at +%"PRIuPTR" offset", *ptr, addr - AsInt(*ptr)); return true; } diff --git a/src/heap-profile-table.cc b/src/heap-profile-table.cc index 66e4f20..ecaf75f 100644 --- a/src/heap-profile-table.cc +++ b/src/heap-profile-table.cc @@ -99,7 +99,7 @@ const char HeapProfileTable::kFileExt[] = ".heap"; //---------------------------------------------------------------------- static const int kHashTableSize = 179999; // Size for table_. -/*static*/ const int HeapProfileTable::kMaxStackDepth = 32; +/*static*/ const int HeapProfileTable::kMaxStackDepth; //---------------------------------------------------------------------- diff --git a/src/heap-profile-table.h b/src/heap-profile-table.h index 5403257..c9bee15 100644 --- a/src/heap-profile-table.h +++ b/src/heap-profile-table.h @@ -52,8 +52,8 @@ class HeapProfileTable { // Extension to be used for heap pforile files. static const char kFileExt[]; - // Longest stack trace we record. Defined in the .cc file. - static const int kMaxStackDepth; + // Longest stack trace we record. + static const int kMaxStackDepth = 32; // data types ---------------------------- diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc index 4ce262f..c2f8b54 100644 --- a/src/malloc_extension.cc +++ b/src/malloc_extension.cc @@ -187,7 +187,10 @@ MallocExtension* MallocExtension::instance() { void MallocExtension::Register(MallocExtension* implementation) { perftools_pthread_once(&module_init, InitModule); // When running under valgrind, our custom malloc is replaced with - // valgrind's one and malloc extensions will not work. + // valgrind's one and malloc extensions will not work. (Note: + // callers should be responsible for checking that they are the + // malloc that is really being run, before calling Register. This + // is just here as an extra sanity check.) if (!RunningOnValgrind()) { current_instance = implementation; } diff --git a/src/malloc_hook.cc b/src/malloc_hook.cc index 2a7f542..4315b86 100644 --- a/src/malloc_hook.cc +++ b/src/malloc_hook.cc @@ -326,8 +326,8 @@ extern "C" int MallocHook_GetCallerStackTrace(void** result, int max_depth, return 0; for (int i = 0; i < depth; ++i) { // stack[0] is our immediate caller if (InHookCaller(stack[i])) { - RAW_VLOG(4, "Found hooked allocator at %d: %p <- %p", - i, stack[i], stack[i+1]); + RAW_VLOG(10, "Found hooked allocator at %d: %p <- %p", + i, stack[i], stack[i+1]); i += 1; // skip hook caller frame depth -= i; // correct depth if (depth > max_depth) depth = max_depth; diff --git a/src/memory_region_map.cc b/src/memory_region_map.cc index 05fdc06..f6bed45 100644 --- a/src/memory_region_map.cc +++ b/src/memory_region_map.cc @@ -181,7 +181,7 @@ static MemoryRegionMap::RegionSetRep regions_rep; static bool recursive_insert = false; void MemoryRegionMap::Init(int max_stack_depth) { - RAW_VLOG(2, "MemoryRegionMap Init"); + RAW_VLOG(10, "MemoryRegionMap Init"); RAW_CHECK(max_stack_depth >= 0, ""); // Make sure we don't overflow the memory in region stacks: RAW_CHECK(max_stack_depth <= kMaxStackDepth, @@ -192,7 +192,7 @@ void MemoryRegionMap::Init(int max_stack_depth) { if (client_count_ > 1) { // not first client: already did initialization-proper Unlock(); - RAW_VLOG(2, "MemoryRegionMap Init increment done"); + RAW_VLOG(10, "MemoryRegionMap Init increment done"); return; } // Set our hooks and make sure no other hooks existed: @@ -217,17 +217,17 @@ void MemoryRegionMap::Init(int max_stack_depth) { // recursive_insert = false; as InsertRegionLocked will also construct // regions_ on demand for us. Unlock(); - RAW_VLOG(2, "MemoryRegionMap Init done"); + RAW_VLOG(10, "MemoryRegionMap Init done"); } bool MemoryRegionMap::Shutdown() { - RAW_VLOG(2, "MemoryRegionMap Shutdown"); + RAW_VLOG(10, "MemoryRegionMap Shutdown"); Lock(); RAW_CHECK(client_count_ > 0, ""); client_count_ -= 1; if (client_count_ != 0) { // not last client; need not really shutdown Unlock(); - RAW_VLOG(2, "MemoryRegionMap Shutdown decrement done"); + RAW_VLOG(10, "MemoryRegionMap Shutdown decrement done"); return true; } CheckMallocHooks(); // we assume no other hooks @@ -244,7 +244,7 @@ bool MemoryRegionMap::Shutdown() { RAW_LOG(WARNING, "Can't delete LowLevelAlloc arena: it's being used"); } Unlock(); - RAW_VLOG(2, "MemoryRegionMap Shutdown done"); + RAW_VLOG(10, "MemoryRegionMap Shutdown done"); return deleted_arena; } @@ -336,7 +336,7 @@ bool MemoryRegionMap::FindAndMarkStackRegion(uintptr_t stack_top, Lock(); const Region* region = DoFindRegionLocked(stack_top); if (region != NULL) { - RAW_VLOG(2, "Stack at %p is inside region %p..%p", + RAW_VLOG(10, "Stack at %p is inside region %p..%p", reinterpret_cast<void*>(stack_top), reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); @@ -361,7 +361,7 @@ MemoryRegionMap::RegionIterator MemoryRegionMap::EndRegionLocked() { } inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) { - RAW_VLOG(4, "Inserting region %p..%p from %p", + RAW_VLOG(12, "Inserting region %p..%p from %p", reinterpret_cast<void*>(region.start_addr), reinterpret_cast<void*>(region.end_addr), reinterpret_cast<void*>(region.caller())); @@ -385,10 +385,10 @@ inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) { // This inserts and allocates permanent storage for region // and its call stack data: it's safe to do it now: regions_->insert(region); - RAW_VLOG(4, "Inserted region %p..%p :", + RAW_VLOG(12, "Inserted region %p..%p :", reinterpret_cast<void*>(region.start_addr), reinterpret_cast<void*>(region.end_addr)); - if (VLOG_IS_ON(4)) LogAllLocked(); + if (VLOG_IS_ON(12)) LogAllLocked(); } // These variables are local to MemoryRegionMap::InsertRegionLocked() @@ -425,7 +425,7 @@ inline void MemoryRegionMap::InsertRegionLocked(const Region& region) { // and taken into account when the recursion unwinds. // Do the insert: if (recursive_insert) { // recursion: save in saved_regions - RAW_VLOG(4, "Saving recursive insert of region %p..%p from %p", + RAW_VLOG(12, "Saving recursive insert of region %p..%p from %p", reinterpret_cast<void*>(region.start_addr), reinterpret_cast<void*>(region.end_addr), reinterpret_cast<void*>(region.caller())); @@ -436,7 +436,7 @@ inline void MemoryRegionMap::InsertRegionLocked(const Region& region) { saved_regions[saved_regions_count++] = region; } else { // not a recusrive call if (regions_ == NULL) { // init regions_ - RAW_VLOG(4, "Initializing region set"); + RAW_VLOG(12, "Initializing region set"); regions_ = regions_rep.region_set(); recursive_insert = true; new(regions_) RegionSet(); @@ -470,7 +470,7 @@ void MemoryRegionMap::RecordRegionAddition(const void* start, size_t size) { max_stack_depth_, kStripFrames + 1) : 0; region.set_call_stack_depth(depth); // record stack info fully - RAW_VLOG(2, "New global region %p..%p from %p", + RAW_VLOG(10, "New global region %p..%p from %p", reinterpret_cast<void*>(region.start_addr), reinterpret_cast<void*>(region.end_addr), reinterpret_cast<void*>(region.caller())); @@ -499,7 +499,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { // An exact match, so it's safe to remove. --saved_regions_count; --put_pos; - RAW_VLOG(2, ("Insta-Removing saved region %p..%p; " + RAW_VLOG(10, ("Insta-Removing saved region %p..%p; " "now have %d saved regions"), reinterpret_cast<void*>(start_addr), reinterpret_cast<void*>(end_addr), @@ -523,7 +523,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { uintptr_t start_addr = reinterpret_cast<uintptr_t>(start); uintptr_t end_addr = start_addr + size; // subtract start_addr, end_addr from all the regions - RAW_VLOG(2, "Removing global region %p..%p; have %"PRIuS" regions", + RAW_VLOG(10, "Removing global region %p..%p; have %"PRIuS" regions", reinterpret_cast<void*>(start_addr), reinterpret_cast<void*>(end_addr), regions_->size()); @@ -533,12 +533,12 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { for (RegionSet::iterator region = regions_->lower_bound(sample); region != regions_->end() && region->start_addr < end_addr; /*noop*/) { - RAW_VLOG(5, "Looking at region %p..%p", + RAW_VLOG(13, "Looking at region %p..%p", reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); if (start_addr <= region->start_addr && region->end_addr <= end_addr) { // full deletion - RAW_VLOG(4, "Deleting region %p..%p", + RAW_VLOG(12, "Deleting region %p..%p", reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); RegionSet::iterator d = region; @@ -547,7 +547,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { continue; } else if (region->start_addr < start_addr && end_addr < region->end_addr) { // cutting-out split - RAW_VLOG(4, "Splitting region %p..%p in two", + RAW_VLOG(12, "Splitting region %p..%p in two", reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); // Make another region for the start portion: @@ -560,13 +560,13 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { const_cast<Region&>(*region).set_start_addr(end_addr); } else if (end_addr > region->start_addr && start_addr <= region->start_addr) { // cut from start - RAW_VLOG(4, "Start-chopping region %p..%p", + RAW_VLOG(12, "Start-chopping region %p..%p", reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); const_cast<Region&>(*region).set_start_addr(end_addr); } else if (start_addr > region->start_addr && start_addr < region->end_addr) { // cut from end - RAW_VLOG(4, "End-chopping region %p..%p", + RAW_VLOG(12, "End-chopping region %p..%p", reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); // Can't just modify region->end_addr (it's the sorting key): @@ -582,11 +582,11 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { } ++region; } - RAW_VLOG(4, "Removed region %p..%p; have %"PRIuS" regions", + RAW_VLOG(12, "Removed region %p..%p; have %"PRIuS" regions", reinterpret_cast<void*>(start_addr), reinterpret_cast<void*>(end_addr), regions_->size()); - if (VLOG_IS_ON(4)) LogAllLocked(); + if (VLOG_IS_ON(12)) LogAllLocked(); Unlock(); } @@ -596,7 +596,7 @@ void MemoryRegionMap::MmapHook(const void* result, int fd, off_t offset) { // TODO(maxim): replace all 0x%"PRIxS" by %p when RAW_VLOG uses a safe // snprintf reimplementation that does not malloc to pretty-print NULL - RAW_VLOG(2, "MMap = 0x%"PRIxPTR" of %"PRIuS" at %llu " + RAW_VLOG(10, "MMap = 0x%"PRIxPTR" of %"PRIuS" at %llu " "prot %d flags %d fd %d offs %lld", reinterpret_cast<uintptr_t>(result), size, reinterpret_cast<uint64>(start), prot, flags, fd, @@ -607,7 +607,7 @@ void MemoryRegionMap::MmapHook(const void* result, } void MemoryRegionMap::MunmapHook(const void* ptr, size_t size) { - RAW_VLOG(2, "MUnmap of %p %"PRIuS"", ptr, size); + RAW_VLOG(10, "MUnmap of %p %"PRIuS"", ptr, size); if (size != 0) { RecordRegionRemoval(ptr, size); } @@ -617,7 +617,7 @@ void MemoryRegionMap::MremapHook(const void* result, const void* old_addr, size_t old_size, size_t new_size, int flags, const void* new_addr) { - RAW_VLOG(2, "MRemap = 0x%"PRIxPTR" of 0x%"PRIxPTR" %"PRIuS" " + RAW_VLOG(10, "MRemap = 0x%"PRIxPTR" of 0x%"PRIxPTR" %"PRIuS" " "to %"PRIuS" flags %d new_addr=0x%"PRIxPTR, (uintptr_t)result, (uintptr_t)old_addr, old_size, new_size, flags, @@ -631,7 +631,7 @@ void MemoryRegionMap::MremapHook(const void* result, extern "C" void* __sbrk(ptrdiff_t increment); // defined in libc void MemoryRegionMap::SbrkHook(const void* result, ptrdiff_t increment) { - RAW_VLOG(2, "Sbrk = 0x%"PRIxPTR" of %"PRIdS"", (uintptr_t)result, increment); + RAW_VLOG(10, "Sbrk = 0x%"PRIxPTR" of %"PRIdS"", (uintptr_t)result, increment); if (result != reinterpret_cast<void*>(-1)) { if (increment > 0) { void* new_end = sbrk(0); @@ -89,6 +89,7 @@ my %obj_tool_map = ( ); my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local my $GV = "gv"; +my $KCACHEGRIND = "kcachegrind"; my $PS2PDF = "ps2pdf"; # These are used for dynamic profiles my $WGET = "wget"; @@ -332,11 +333,11 @@ sub Init() { # Type of profile we are dealing with # Supported types: - # cpu - # heap - # growth - # contention - $main::profile_type = ''; # Empty type means "unknown" + # cpu + # heap + # growth + # contention + $main::profile_type = ''; # Empty type means "unknown" GetOptions("help!" => \$main::opt_help, "version!" => \$main::opt_version, @@ -380,8 +381,8 @@ sub Init() { "tools=s" => \$main::opt_tools, "test!" => \$main::opt_test, "debug!" => \$main::opt_debug, - # Undocumented flags used only by unittests: - "test_stride=i" => \$main::opt_test_stride, + # Undocumented flags used only by unittests: + "test_stride=i" => \$main::opt_test_stride, ) || usage("Invalid option(s)"); # Deal with the standard --help and --version @@ -634,7 +635,7 @@ sub Main() { } else { if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { if ($main::opt_gv) { - RunGV(PsTempName($main::next_tmpfile), ""); + RunGV(PsTempName($main::next_tmpfile), ""); } } else { exit(1); @@ -667,7 +668,7 @@ sub ReadlineMightFail { sub RunGV { my $fname = shift; - my $bg = shift; # "" or " &" if we should run in background + my $bg = shift; # "" or " &" if we should run in background if (!system("$GV --version >/dev/null 2>&1")) { # Options using double dash are supported by this gv version. # Also, turn on noantialias to better handle bug in gv for @@ -682,6 +683,13 @@ sub RunGV { } } +sub RunKcachegrind { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + print STDERR "Starting '$KCACHEGRIND " . $fname . $bg . "'\n"; + system("$KCACHEGRIND " . $fname . $bg); +} + ##### Interactive helper routines ##### @@ -689,10 +697,11 @@ sub InteractiveMode { $| = 1; # Make output unbuffered for interactive mode my ($orig_profile, $symbols, $libs, $total) = @_; - print "Welcome to pprof! For help, type 'help'.\n"; + print STDERR "Welcome to pprof! For help, type 'help'.\n"; - # Use ReadLine if it's installed. - if ( !ReadlineMightFail() && + # Use ReadLine if it's installed and input comes from a console. + if ( -t STDIN && + !ReadlineMightFail() && defined(eval {require Term::ReadLine}) ) { my $term = new Term::ReadLine 'pprof'; while ( defined ($_ = $term->readline('(pprof) '))) { @@ -703,7 +712,7 @@ sub InteractiveMode { } } else { # don't have readline while (1) { - print "(pprof) "; + print STDERR "(pprof) "; $_ = <STDIN>; last if ! defined $_ ; s/\r//g; # turn windows-looking lines into unix-looking lines @@ -727,7 +736,7 @@ sub InteractiveCommand { my($orig_profile, $symbols, $libs, $total, $command) = @_; $_ = $command; # just to make future m//'s easier if (!defined($_)) { - print "\n"; + print STDERR "\n"; return 0; } if (m/^ *quit/) { @@ -764,6 +773,23 @@ sub InteractiveCommand { PrintText($symbols, $flat, $cumulative, $total, $line_limit); return 1; } + if (m/^ *callgrind *([^ \n]*)/) { + $main::opt_callgrind = 1; + + # Get derived profiles + my $calls = ExtractCalls($symbols, $orig_profile); + my $filename = $1; + if ( $1 eq '' ) { + $filename = CallgrindTempName($main::next_tmpfile); + } + PrintCallgrind($calls, $filename); + if ( $1 eq '' ) { + RunKcachegrind($filename, " & "); + $main::next_tmpfile++; + } + + return 1; + } if (m/^ *list *(.+)/) { $main::opt_list = 1; @@ -856,7 +882,7 @@ sub ProcessProfile { } sub InteractiveHelpMessage { - print <<ENDOFHELP; + print STDERR <<ENDOFHELP; Interactive pprof mode Commands: @@ -882,6 +908,10 @@ Commands: Show disassembly of routines whose names match "routine_regexp", annotated with sample counts. + callgrind + callgrind [filename] + Generates callgrind file. If no filename is given, kcachegrind is called. + help - This listing quit or ^D - End pprof @@ -913,7 +943,7 @@ sub ParseInteractiveArgs { } } if ($ignore ne "") { - print "Ignoring samples in call stacks that match '$ignore'\n"; + print STDERR "Ignoring samples in call stacks that match '$ignore'\n"; } return ($focus, $ignore); } @@ -925,6 +955,11 @@ sub PsTempName { return "$main::tmpfile_ps" . "." . "$fnum" . ".ps"; } +sub CallgrindTempName { + my $fnum = shift; + return "$main::tmpfile_ps" . "." . "$fnum" . ".callgrind"; +} + # Print profile data in packed binary format (64-bit) to standard out sub PrintProfileData { my $profile = shift; @@ -1045,7 +1080,15 @@ sub PrintText { # Print the call graph in a way that's suiteable for callgrind. sub PrintCallgrind { my $calls = shift; - printf("events: Hits\n\n"); + my $filename; + if ($main::opt_interactive) { + $filename = shift; + print STDERR "Writing callgrind file to '$filename'.\n" + } else { + $filename = "&STDOUT"; + } + open(CG, ">".$filename ); + printf CG ("events: Hits\n\n"); foreach my $call ( map { $_->[0] } sort { $a->[1] cmp $b ->[1] || $a->[2] <=> $b->[2] } @@ -1057,13 +1100,15 @@ sub PrintCallgrind { my ( $caller_file, $caller_line, $caller_function, $callee_file, $callee_line, $callee_function ) = ( $1, $2, $3, $5, $6, $7 ); - printf("fl=$caller_file\nfn=$caller_function\n"); + + + printf CG ("fl=$caller_file\nfn=$caller_function\n"); if (defined $6) { - printf("cfl=$callee_file\n"); - printf("cfn=$callee_function\n"); - printf("calls=$count $callee_line\n"); + printf CG ("cfl=$callee_file\n"); + printf CG ("cfn=$callee_function\n"); + printf CG ("calls=$count $callee_line\n"); } - printf("$caller_line $count\n\n"); + printf CG ("$caller_line $count\n\n"); } } @@ -1385,7 +1430,7 @@ sub SourceLine { return undef; } my $lines = []; - push(@{$lines}, ""); # So we can use 1-based line numbers as indices + push(@{$lines}, ""); # So we can use 1-based line numbers as indices while (<FILE>) { push(@{$lines}, $_); } @@ -1477,8 +1522,8 @@ sub PrintDisassembledFunction { # Find run of instructions for this range of source lines my $first_inst = $i; while (($i <= $#instructions) && - ($instructions[$i]->[2] >= $first_line) && - ($instructions[$i]->[2] <= $last_line)) { + ($instructions[$i]->[2] >= $first_line) && + ($instructions[$i]->[2] <= $last_line)) { $e = $instructions[$i]; $flat_sum{$e->[2]} += $flat_count[$i]; $cum_sum{$e->[2]} += $cum_count[$i]; @@ -1490,16 +1535,16 @@ sub PrintDisassembledFunction { for (my $l = $first_line; $l <= $last_line; $l++) { my $line = SourceLine($current_file, $l); if (!defined($line)) { - $line = "?\n"; + $line = "?\n"; next; } else { $line =~ s/^\s+//; } printf("%6s %6s %5d: %s", - UnparseAlt($flat_sum{$l}), - UnparseAlt($cum_sum{$l}), - $l, - $line); + UnparseAlt($flat_sum{$l}), + UnparseAlt($cum_sum{$l}), + $l, + $line); } # Print disassembly @@ -1516,9 +1561,9 @@ sub PrintDisassembledFunction { while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments printf("%6s %6s %8s: %6s\n", - UnparseAlt($flat_count[$x]), - UnparseAlt($cum_count[$x]), - $address, + UnparseAlt($flat_count[$x]), + UnparseAlt($cum_count[$x]), + $address, $d); } } @@ -1542,7 +1587,7 @@ sub PrintDot { # Find nodes to include my @list = (sort { abs(GetEntry($cumulative, $b)) <=> abs(GetEntry($cumulative, $a)) - || $a cmp $b } + || $a cmp $b } keys(%{$cumulative})); my $last = $nodecount - 1; if ($last > $#list) { @@ -1806,7 +1851,7 @@ sub Unparse { } } } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { - return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds + return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds } else { return sprintf("%d", $num); } @@ -1947,42 +1992,42 @@ sub RemoveUninterestingFrames { 'malloc', 'free', 'memalign', - 'posix_memalign', + 'posix_memalign', 'pvalloc', 'valloc', 'realloc', - 'tc_calloc', + 'tc_calloc', 'tc_cfree', 'tc_malloc', 'tc_free', 'tc_memalign', - 'tc_posix_memalign', + 'tc_posix_memalign', 'tc_pvalloc', 'tc_valloc', 'tc_realloc', - 'tc_new', - 'tc_delete', - 'tc_newarray', - 'tc_deletearray', - 'tc_new_nothrow', - 'tc_newarray_nothrow', - 'do_malloc', + 'tc_new', + 'tc_delete', + 'tc_newarray', + 'tc_deletearray', + 'tc_new_nothrow', + 'tc_newarray_nothrow', + 'do_malloc', '::do_malloc', # new name -- got moved to an unnamed ns '::do_malloc_or_cpp_alloc', 'DoSampledAllocation', - 'simple_alloc::allocate', - '__malloc_alloc_template::allocate', + 'simple_alloc::allocate', + '__malloc_alloc_template::allocate', '__builtin_delete', '__builtin_new', '__builtin_vec_delete', '__builtin_vec_new', 'operator new', 'operator new[]', - # These mark the beginning/end of our custom sections - '__start_google_malloc', - '__stop_google_malloc', - '__start_malloc_hook', - '__stop_malloc_hook') { + # These mark the beginning/end of our custom sections + '__start_google_malloc', + '__stop_google_malloc', + '__start_malloc_hook', + '__stop_malloc_hook') { $skip{$name} = 1; $skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything } @@ -1999,11 +2044,11 @@ sub RemoveUninterestingFrames { # TODO(dpeng): this should not be necessary; it's taken # care of by the general 2nd-pc mechanism below. foreach my $name ('ProfileData::Add', # historical - 'ProfileData::prof_handler', # historical - 'CpuProfiler::prof_handler', + 'ProfileData::prof_handler', # historical + 'CpuProfiler::prof_handler', '__FRAME_END__', - '__pthread_sighandler', - '__restore') { + '__pthread_sighandler', + '__restore') { $skip{$name} = 1; } } else { @@ -2042,10 +2087,10 @@ sub RemoveUninterestingFrames { my @path = (); foreach my $a (@addrs) { if (exists($symbols->{$a})) { - my $func = $symbols->{$a}->[0]; - if ($skip{$func} || ($func =~ m/$skip_regexp/)) { - next; - } + my $func = $symbols->{$a}->[0]; + if ($skip{$func} || ($func =~ m/$skip_regexp/)) { + next; + } } push(@path, $a); } @@ -2070,8 +2115,8 @@ sub ReduceProfile { # To avoid double-counting due to recursion, skip a stack-trace # entry if it has already been seen if (!$seen{$e}) { - $seen{$e} = 1; - push(@path, $e); + $seen{$e} = 1; + push(@path, $e); } } my $reduced_path = join("\n", @path); @@ -2404,7 +2449,6 @@ sub FetchSymbols { my $pcset = shift; my $symbol_map = shift; - my %seen = (); my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq @@ -2414,7 +2458,7 @@ sub FetchSymbols { open(POSTFILE, ">$main::tmpfile_sym"); print POSTFILE $post_data; close(POSTFILE); - + my $url = SymbolPageURL(); # Here we use curl for sending data via POST since old # wget doesn't have --post-file option. @@ -2517,7 +2561,7 @@ sub FetchDynamicProfile { my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof"); if (!(-d $profile_dir)) { mkdir($profile_dir) - || die("Unable to create profile directory $profile_dir: $!\n"); + || die("Unable to create profile directory $profile_dir: $!\n"); } my $tmp_profile = "$profile_dir/.tmp.$profile_file"; my $real_profile = "$profile_dir/$profile_file"; @@ -2603,22 +2647,69 @@ sub TryCollectProfile { # Provide a small streaming-read module to handle very large # cpu-profile files. Stream in chunks along a sliding window. +# Provides an interface to get one 'slot', correctly handling +# endian-ness differences. A slot is one 32-bit or 64-bit word +# (depending on the input profile). We tell endianness and bit-size +# for the profile by looking at the first 8 bytes: in cpu profiles, +# the second slot is always 3 (we'll accept anything that's not 0). BEGIN { package CpuProfileStream; sub new { - my ($class, $file) = @_; - my $self = { file => $file, - base => 0, - stride => 512 * 1024, # must be a multiple of |long| - slots => [] + my ($class, $file, $fname) = @_; + my $self = { file => $file, + base => 0, + stride => 512 * 1024, # must be a multiple of bitsize/8 + slots => [], + unpack_code => "", # N for big-endian, V for little }; bless $self, $class; # Let unittests adjust the stride if ($main::opt_test_stride > 0) { $self->{stride} = $main::opt_test_stride; } - $self->overflow(); + # Read the first two slots to figure out bitsize and endianness. + my $slots = $self->{slots}; + my $str; + read($self->{file}, $str, 8); + # Set the global $address_length based on what we see here. + # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars). + $address_length = ($str eq (chr(0)x8)) ? 16 : 8; + if ($address_length == 8) { + if (substr($str, 6, 2) eq chr(0)x2) { + $self->{unpack_code} = 'V'; # Little-endian. + } elsif (substr($str, 4, 2) eq chr(0)x2) { + $self->{unpack_code} = 'N'; # Big-endian + } else { + ::error("$fname: header size >= 2**16\n"); + } + @$slots = unpack($self->{unpack_code} . "*", $str); + } else { + # If we're a 64-bit profile, make sure we're a 64-bit-capable + # perl. Otherwise, each slot will be represented as a float + # instead of an int64, losing precision and making all the + # 64-bit addresses right. We *could* try to handle this with + # software emulation of 64-bit ints, but that's added complexity + # for no clear benefit (yet). We use 'Q' to test for 64-bit-ness; + # perl docs say it's only available on 64-bit perl systems. + my $has_q = 0; + eval { $has_q = pack("Q", "1") ? 1 : 1; }; + if (!$has_q) { + ::error("$fname: need a 64-bit perl to process this 64-bit profile.\n"); + } + read($self->{file}, $str, 8); + if (substr($str, 4, 4) eq chr(0)x4) { + # We'd love to use 'Q', but it's a) not universal, b) not endian-proof. + $self->{unpack_code} = 'V'; # Little-endian. + } elsif (substr($str, 0, 4) eq chr(0)x4) { + $self->{unpack_code} = 'N'; # Big-endian + } else { + ::error("$fname: header size >= 2**32\n"); + } + my @pair = unpack($self->{unpack_code} . "*", $str); + # Since we know one of the pair is 0, it's fine to just add them. + @$slots = (0, $pair[0] + $pair[1]); + } return $self; } @@ -2629,7 +2720,25 @@ BEGIN { $self->{base} += $#$slots + 1; # skip over data we're replacing my $str; read($self->{file}, $str, $self->{stride}); - @$slots = unpack("L*", $str); + if ($address_length == 8) { # the 32-bit case + # This is the easy case: unpack provides 32-bit unpacking primitives. + @$slots = unpack($self->{unpack_code} . "*", $str); + } else { + # We need to unpack 32 bits at a time and combine. + my @b32_values = unpack($self->{unpack_code} . "*", $str); + my @b64_values = (); + for (my $i = 0; $i < $#b32_values; $i += 2) { + # TODO(csilvers): if this is a 32-bit perl, the math below + # could end up in a too-large int, which perl will promote + # to a double, losing necessary precision. Deal with that. + if ($self->{unpack_code} eq 'V') { # little-endian + push(@b64_values, $b32_values[$i] + $b32_values[$i+1] * (2**32)); + } else { + push(@b64_values, $b32_values[$i] * (2**32) + $b32_values[$i+1]); + } + } + @$slots = @b64_values; + } } # Access the i-th long in the file (logically), or -1 at EOF. @@ -2638,16 +2747,16 @@ BEGIN { my $slots = $self->{slots}; while ($#$slots >= 0) { if ($idx < $self->{base}) { - # The only time we expect a reference to $slots[$i - something] - # after referencing $slots[$i] is reading the very first header. - # Since $stride > |header|, that shouldn't cause any lookback - # errors. And everything after the header is sequential. - print STDERR "Unexpected look-back reading CPU profile"; - return -1; # shrug, don't know what better to return + # The only time we expect a reference to $slots[$i - something] + # after referencing $slots[$i] is reading the very first header. + # Since $stride > |header|, that shouldn't cause any lookback + # errors. And everything after the header is sequential. + print STDERR "Unexpected look-back reading CPU profile"; + return -1; # shrug, don't know what better to return } elsif ($idx > $self->{base} + $#$slots) { - $self->overflow(); + $self->overflow(); } else { - return $slots->[$idx - $self->{base}]; + return $slots->[$idx - $self->{base}]; } } # If we get here, $slots is [], which means we've reached EOF @@ -2752,6 +2861,33 @@ sub ReadProfile { return $result; } +# Subtract one from caller pc so we map back to call instr. +# However, don't do this if we're reading a symbolized profile +# file, in which case the subtract-one was done when the file +# was written. +# +# We apply the same logic to all readers, though ReadCPUProfile uses an +# independent implementation. +sub FixCallerAddresses { + my $stack = shift; + if ($main::use_symbolized_profile) { + return $stack; + } else { + $stack =~ /(\s)/; + my $delimiter = $1; + my @addrs = split(' ', $stack); + my @fixedaddrs; + $#fixedaddrs = $#addrs; + if ($#addrs >= 0) { + $fixedaddrs[0] = $addrs[0]; + } + for (my $i = 1; $i <= $#addrs; $i++) { + $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1"); + } + return join $delimiter, @fixedaddrs; + } +} + # CPU profile reader sub ReadCPUProfile { my $prog = shift; @@ -2763,10 +2899,7 @@ sub ReadCPUProfile { my $pcs = {}; # Parse string into array of slots. - # L! cannot be used because with a native 64-bit build, it will cause - # 1) a valid 64-bit profile to use the 32-bit codepath, and - # 2) a valid 32-bit profile to be unrecognized. - my $slots = CpuProfileStream->new(*PROFILE); + my $slots = CpuProfileStream->new(*PROFILE, $fname); # Read header. The current header version is a 5-element structure # containing: @@ -2775,108 +2908,50 @@ sub ReadCPUProfile { # 2: format version (0) # 3: sampling period (usec) # 4: unused padding (always 0) - # The header words are 32-bit or 64-bit depending on the ABI of the program - # that generated the profile. In the 64-bit case, since our x86-architecture - # machines are little-endian, the actual value of each of these elements is - # in the first 32-bit word, and the second is always zero. The @slots array - # above was read as a sequence of 32-bit words in both cases, so we need to - # explicitly check for both cases. A typical slot sequence for each is: - # 32-bit: 0 3 0 100 0 - # 64-bit: 0 0 3 0 0 0 100 0 0 0 - # if ($slots->get(0) != 0 ) { error("$fname: not a profile file, or old format profile file\n"); } - if ($slots->get(1) >= 3) { - # Normal 32-bit header: - $version = $slots->get(2); - $period = $slots->get(3); - $i = 2 + $slots->get(1); - $address_length = 8; - - # Parse profile - while ($slots->get($i) != -1) { - my $n = $slots->get($i++); - my $d = $slots->get($i++); - if ($slots->get($i) == 0) { - # End of profile data marker - $i += $d; - last; - } - - # Make key out of the stack entries - my @k = (); - for (my $j = 0; $j < $d; $j++) { - my $pc = sprintf("%08x", $slots->get($i+$j)); - $pcs->{$pc} = 1; - push @k, $pc; - } - - AddEntry($profile, (join "\n", @k), $n); + $i = 2 + $slots->get(1); + $version = $slots->get(2); + $period = $slots->get(3); + # Do some sanity checking on these header values. + if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) { + error("$fname: not a profile file, or corrupted profile file\n"); + } + + # Parse profile + while ($slots->get($i) != -1) { + my $n = $slots->get($i++); + my $d = $slots->get($i++); + if ($d > (2**16)) { # TODO(csilvers): what's a reasonable max-stack-depth? + my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8)); + print STDERR "At index $i (address $addr):\n"; + error("$fname: stack trace depth >= 2**32\n"); + } + if ($slots->get($i) == 0) { + # End of profile data marker $i += $d; + last; } - # Normal 64-bit header: All entries are doubled in size. The first - # word (little-endian) should contain the real value, the second should - # be zero. - } elsif ($slots->get(1) != 0 || - $slots->get(2) < 3 || - $slots->get(3) != 0 || - $slots->get(5) != 0 || - $slots->get(7) != 0) { - error("$fname: not a profile file, or old format profile file\n"); - } else { - $version = $slots->get(4); - $period = $slots->get(6); - $i = 4 + 2 * $slots->get(2); - $address_length = 16; - - # Parse profile - while ($slots->get($i) != -1) { - my $n = $slots->get($i++); - my $nhi = $slots->get($i++); - # Huge counts may coerce to floating point, keeping scale, not precision - if ($nhi != 0) { $n += $nhi*(2**32); } - my $d = $slots->get($i++); - if ($slots->get($i++) != 0) { - my $addr = sprintf("%o", 4 * $i); - print STDERR "At index $i ($addr):\n"; - error("$fname: stack trace depth >= 2**32\n"); - } - if ($slots->get($i) == 0 && $slots->get($i+1) == 0) { - # End of profile data marker - $i += 2 * $d; - last; - } - - # Make key out of the stack entries - my @k = (); - for (my $j = 0; $j < $d; $j++) { - my $pclo = $slots->get($i++); - my $pchi = $slots->get($i++); - if ($pclo == -1 || $pchi == -1) { - error("$fname: Unexpected EOF when reading stack of depth $d\n"); - } - - # Subtract one from caller pc so we map back to call instr. - # However, don't do this if we're reading a symbolized profile - # file, in which case the subtract-one was done when the file - # was written. - if ($j > 0 && !$main::use_symbolized_profile) { - if ($pclo == 0) { - $pchi--; - $pclo = 0xffffffff; - } else { - $pclo--; - } - } - - my $pc = sprintf("%08x%08x", $pchi, $pclo); - $pcs->{$pc} = 1; - push @k, $pc; + # Make key out of the stack entries + my @k = (); + for (my $j = 0; $j < $d; $j++) { + my $pc = $slots->get($i+$j); + # Subtract one from caller pc so we map back to call instr. + # However, don't do this if we're reading a symbolized profile + # file, in which case the subtract-one was done when the file + # was written. + if ($j > 0 && !$main::use_symbolized_profile) { + $pc--; } - AddEntry($profile, (join "\n", @k), $n); + $pc = sprintf("%0*x", $address_length, $pc); + $pcs->{$pc} = 1; + push @k, $pc; } + + AddEntry($profile, (join "\n", @k), $n); + $i += $d; } # Parse map @@ -2947,18 +3022,18 @@ sub ReadHeapProfile { # found for profiles generated locally, and the others for # remote profiles. if (($type eq "heapprofile") || ($type !~ /heap/) ) { - # No need to adjust for the sampling rate with heap-profiler-derived data - $sampling_algorithm = 0; + # No need to adjust for the sampling rate with heap-profiler-derived data + $sampling_algorithm = 0; } elsif ($type =~ /_v2/) { - $sampling_algorithm = 2; # version 2 sampling + $sampling_algorithm = 2; # version 2 sampling if (defined($sample_period) && ($sample_period ne '')) { - $sample_adjustment = int($sample_period); - } + $sample_adjustment = int($sample_period); + } } else { - $sampling_algorithm = 1; # version 1 sampling + $sampling_algorithm = 1; # version 1 sampling if (defined($sample_period) && ($sample_period ne '')) { - $sample_adjustment = int($sample_period)/2; - } + $sample_adjustment = int($sample_period)/2; + } } } else { # We detect whether or not this is a remote-heap profile by checking @@ -2970,7 +3045,7 @@ sub ReadHeapProfile { my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); if (($n1 == $n2) && ($s1 == $s2)) { # This is likely to be a remote-heap based sample profile - $sampling_algorithm = 1; + $sampling_algorithm = 1; } } } @@ -2984,7 +3059,7 @@ sub ReadHeapProfile { print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n"; } else { printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n", - $sample_adjustment); + $sample_adjustment); } if ($sampling_algorithm > 1) { # We don't bother printing anything for the original version (version 1) @@ -3001,7 +3076,7 @@ sub ReadHeapProfile { if (/^MAPPED_LIBRARIES:/) { # Read the /proc/self/maps data while (<PROFILE>) { - s/\r//g; # turn windows-looking lines into unix-looking lines + s/\r//g; # turn windows-looking lines into unix-looking lines $map .= $_; } last; @@ -3011,7 +3086,7 @@ sub ReadHeapProfile { # Read /proc/self/maps data as formatted by DumpAddressMap() my $buildvar = ""; while (<PROFILE>) { - s/\r//g; # turn windows-looking lines into unix-looking lines + s/\r//g; # turn windows-looking lines into unix-looking lines # Parse "build=<dir>" specification if supplied if (m/^\s*build=(.*)\n/) { $buildvar = $1; @@ -3066,7 +3141,7 @@ sub ReadHeapProfile { } my @counts = ($n1, $s1, $n2, $s2); - AddEntries($profile, $pcs, $stack, $counts[$index]); + AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); } } @@ -3086,7 +3161,7 @@ sub ReadSynchProfile { my $profile = {}; my $pcs = {}; my $sampling_period = 1; - my $cyclespernanosec = 2.8; # Default assumption for old binaries + my $cyclespernanosec = 2.8; # Default assumption for old binaries my $seen_clockrate = 0; my $line; @@ -3112,7 +3187,7 @@ sub ReadSynchProfile { $count *= $sampling_period; my @values = ($cycles, $count, $cycles / $count); - AddEntries($profile, $pcs, $stack, $values[$index]); + AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]); } elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ || $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) { @@ -3127,7 +3202,7 @@ sub ReadSynchProfile { # Adjust for sampling done by application $cycles *= $sampling_period; - AddEntries($profile, $pcs, $stack, $cycles); + AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles); } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) { my ($variable, $value) = ($1,$2); @@ -3308,8 +3383,8 @@ sub ParseTextSectionHeaderFromOtool { } elsif ($line =~ /segname (\w+)/) { $segname = $1; } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") && - $sectname eq "__text" && - $segname eq "__TEXT")) { + $sectname eq "__text" && + $segname eq "__TEXT")) { next; } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) { $vma = $1; @@ -3369,7 +3444,7 @@ sub ParseLibraries { my $finish; my $offset; my $lib; - if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*)?)$/i) { + if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$/i) { # Full line from /proc/self/maps. Example: # 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so $start = HexExtend($1); @@ -3675,7 +3750,7 @@ sub MapToSymbols { if ($debug) { print("---- $image ---\n"); } for (my $i = 0; $i <= $#{$pclist}; $i++) { # addr2line always reads hex addresses, and does not need '0x' prefix. - if ($debug) { printf("%s\n", $pclist->[$i]); } + if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); } printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); if (defined($sep_address)) { printf ADDRESSES ("%s\n", $sep_address); @@ -3727,7 +3802,7 @@ sub MapToSymbols { $symbols->{$pcstr} = $sym; } unshift(@{$sym}, $function, $filelinenum, $fullfunction); - if ($debug) { printf("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } + if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } if (!defined($sep_address)) { # Inlining is off, se this entry ends immediately $count++; @@ -3783,7 +3858,7 @@ sub MapSymbolsWithNM { } return 1; } - + sub ShortFunctionName { my $function = shift; while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types @@ -3942,12 +4017,12 @@ sub GetProcedureBoundariesViaNm { # we'll just go ahead and process the first entry (which never # got touched in the queue), and ignore the others. if ($start_val eq $last_start && $type =~ /t/i) { - # We are the 'T' symbol at this address, replace previous symbol. - $routine = $this_routine; - next; + # We are the 'T' symbol at this address, replace previous symbol. + $routine = $this_routine; + next; } elsif ($start_val eq $last_start) { - # We're not the 'T' symbol at this address, so ignore us. - next; + # We're not the 'T' symbol at this address, so ignore us. + next; } if ($this_routine eq $sep_symbol) { @@ -3962,7 +4037,7 @@ sub GetProcedureBoundariesViaNm { if (defined($routine) && $routine =~ m/$regexp/) { $symbol_table->{$routine} = [HexExtend($last_start), - HexExtend($start_val)]; + HexExtend($start_val)]; } $last_start = $start_val; $routine = $this_routine; @@ -3981,7 +4056,7 @@ sub GetProcedureBoundariesViaNm { # TODO(csilvers): do better here. if (defined($routine) && $routine =~ m/$regexp/) { $symbol_table->{$routine} = [HexExtend($last_start), - HexExtend($last_start)]; + HexExtend($last_start)]; } return $symbol_table; @@ -4029,9 +4104,9 @@ sub GetProcedureBoundaries { # -D to at least get *exported* symbols. If we can't use --demangle, # we use c++filt instead, if it exists on this system. my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" . - " $image 2>/dev/null $cppfilt_flag", - "$nm -D -n $flatten_flag $demangle_flag" . - " $image 2>/dev/null $cppfilt_flag"); + " $image 2>/dev/null $cppfilt_flag", + "$nm -D -n $flatten_flag $demangle_flag" . + " $image 2>/dev/null $cppfilt_flag"); # If the executable is an MS Windows PDB-format executable, we'll # have set up obj_tool_map("nm_pdb"). In this case, we actually # want to use both unix nm and windows-specific nm_pdb, since @@ -4263,4 +4338,3 @@ sub RunUnitTests { } exit ($error_count); } - diff --git a/src/stacktrace.cc b/src/stacktrace.cc index d158eea..68cb865 100644 --- a/src/stacktrace.cc +++ b/src/stacktrace.cc @@ -57,7 +57,45 @@ #include "stacktrace_config.h" #if defined(STACKTRACE_INL_HEADER) -# include STACKTRACE_INL_HEADER + +#define IS_STACK_FRAMES 0 +#define IS_WITH_CONTEXT 0 +#define GET_STACK_TRACE_OR_FRAMES \ + GetStackTrace(void **result, int max_depth, int skip_count) +#include STACKTRACE_INL_HEADER +#undef IS_STACK_FRAMES +#undef IS_WITH_CONTEXT +#undef GET_STACK_TRACE_OR_FRAMES + +#define IS_STACK_FRAMES 1 +#define IS_WITH_CONTEXT 0 +#define GET_STACK_TRACE_OR_FRAMES \ + GetStackFrames(void **result, int *sizes, int max_depth, int skip_count) +#include STACKTRACE_INL_HEADER +#undef IS_STACK_FRAMES +#undef IS_WITH_CONTEXT +#undef GET_STACK_TRACE_OR_FRAMES + +#define IS_STACK_FRAMES 0 +#define IS_WITH_CONTEXT 1 +#define GET_STACK_TRACE_OR_FRAMES \ + GetStackTraceWithContext(void **result, int max_depth, \ + int skip_count, const void *ucp) +#include STACKTRACE_INL_HEADER +#undef IS_STACK_FRAMES +#undef IS_WITH_CONTEXT +#undef GET_STACK_TRACE_OR_FRAMES + +#define IS_STACK_FRAMES 1 +#define IS_WITH_CONTEXT 1 +#define GET_STACK_TRACE_OR_FRAMES \ + GetStackFramesWithContext(void **result, int *sizes, int max_depth, \ + int skip_count, const void *ucp) +#include STACKTRACE_INL_HEADER +#undef IS_STACK_FRAMES +#undef IS_WITH_CONTEXT +#undef GET_STACK_TRACE_OR_FRAMES + #elif 0 // This is for the benefit of code analysis tools that may have // trouble with the computed #include above. diff --git a/src/stacktrace_generic-inl.h b/src/stacktrace_generic-inl.h index 490cd9d..0e72ee7 100644 --- a/src/stacktrace_generic-inl.h +++ b/src/stacktrace_generic-inl.h @@ -34,57 +34,32 @@ // // Note: The glibc implementation may cause a call to malloc. // This can cause a deadlock in HeapProfiler. + +#ifndef BASE_STACKTRACE_GENERIC_INL_H_ +#define BASE_STACKTRACE_GENERIC_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + #include <execinfo.h> #include <string.h> #include "google/stacktrace.h" +#endif // BASE_STACKTRACE_GENERIC_INL_H_ -// If you change this function, also change GetStackFrames below. -int GetStackTrace(void** result, int max_depth, int skip_count) { - static const int kStackLength = 64; - void * stack[kStackLength]; - int size; - - size = backtrace(stack, kStackLength); - skip_count++; // we want to skip the current frame as well - int result_count = size - skip_count; - if (result_count < 0) - result_count = 0; - if (result_count > max_depth) - result_count = max_depth; - for (int i = 0; i < result_count; i++) - result[i] = stack[i + skip_count]; - - return result_count; -} +// Note: this part of the file is included several times. +// Do not put globals below. -// If you change this function, also change GetStackTrace above: -// -// This GetStackFrames routine shares a lot of code with GetStackTrace -// above. This code could have been refactored into a common routine, -// and then both GetStackTrace/GetStackFrames could call that routine. -// There are two problems with that: -// -// (1) The performance of the refactored-code suffers substantially - the -// refactored needs to be able to record the stack trace when called -// from GetStackTrace, and both the stack trace and stack frame sizes, -// when called from GetStackFrames - this introduces enough new -// conditionals that GetStackTrace performance can degrade by as much -// as 50%. +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() // -// (2) Whether the refactored routine gets inlined into GetStackTrace and -// GetStackFrames depends on the compiler, and we can't guarantee the -// behavior either-way, even with "__attribute__ ((always_inline))" -// or "__attribute__ ((noinline))". But we need this guarantee or the -// frame counts may be off by one. -// -// Both (1) and (2) can be addressed without this code duplication, by -// clever use of template functions, and by defining GetStackTrace and -// GetStackFrames as macros that expand to these template functions. -// However, this approach comes with its own set of problems - namely, -// macros and preprocessor trouble - for example, if GetStackTrace -// and/or GetStackFrames is ever defined as a member functions in some -// class, we are in trouble. -int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +int GET_STACK_TRACE_OR_FRAMES { static const int kStackLength = 64; void * stack[kStackLength]; int size; @@ -97,10 +72,12 @@ int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { if (result_count > max_depth) result_count = max_depth; for (int i = 0; i < result_count; i++) - pcs[i] = stack[i + skip_count]; + result[i] = stack[i + skip_count]; +#if IS_STACK_FRAMES // No implementation for finding out the stack frame sizes yet. memset(sizes, 0, sizeof(*sizes) * result_count); +#endif return result_count; } diff --git a/src/stacktrace_libunwind-inl.h b/src/stacktrace_libunwind-inl.h index d9d829a..a1d5249 100644 --- a/src/stacktrace_libunwind-inl.h +++ b/src/stacktrace_libunwind-inl.h @@ -32,6 +32,11 @@ // // Produce stack trace using libunwind +#ifndef BASE_STACKTRACE_LIBINWIND_INL_H_ +#define BASE_STACKTRACE_LIBINWIND_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + // We only need local unwinder. #define UNW_LOCAL_ONLY @@ -52,73 +57,30 @@ extern "C" { // cases, we return 0 to indicate the situation. static __thread int recursive; -// If you change this function, also change GetStackFrames below. -int GetStackTrace(void** result, int max_depth, int skip_count) { - void *ip; - int n = 0; - unw_cursor_t cursor; - unw_context_t uc; +#endif // BASE_STACKTRACE_LIBINWIND_INL_H_ - if (recursive) { - return 0; - } - ++recursive; - - unw_getcontext(&uc); - int ret = unw_init_local(&cursor, &uc); - assert(ret >= 0); - skip_count++; // Do not include the "GetStackTrace" frame - - while (n < max_depth) { - if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) { - break; - } - if (skip_count > 0) { - skip_count--; - } else { - result[n++] = ip; - } - if (unw_step(&cursor) <= 0) { - break; - } - } - --recursive; - return n; -} +// Note: this part of the file is included several times. +// Do not put globals below. -// If you change this function, also change GetStackTrace above: -// -// This GetStackFrames routine shares a lot of code with GetStackTrace -// above. This code could have been refactored into a common routine, -// and then both GetStackTrace/GetStackFrames could call that routine. -// There are two problems with that: +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() // -// (1) The performance of the refactored-code suffers substantially - the -// refactored needs to be able to record the stack trace when called -// from GetStackTrace, and both the stack trace and stack frame sizes, -// when called from GetStackFrames - this introduces enough new -// conditionals that GetStackTrace performance can degrade by as much -// as 50%. -// -// (2) Whether the refactored routine gets inlined into GetStackTrace and -// GetStackFrames depends on the compiler, and we can't guarantee the -// behavior either-way, even with "__attribute__ ((always_inline))" -// or "__attribute__ ((noinline))". But we need this guarantee or the -// frame counts may be off by one. -// -// Both (1) and (2) can be addressed without this code duplication, by -// clever use of template functions, and by defining GetStackTrace and -// GetStackFrames as macros that expand to these template functions. -// However, this approach comes with its own set of problems - namely, -// macros and preprocessor trouble - for example, if GetStackTrace -// and/or GetStackFrames is ever defined as a member functions in some -// class, we are in trouble. -int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +int GET_STACK_TRACE_OR_FRAMES { void *ip; int n = 0; unw_cursor_t cursor; unw_context_t uc; +#if IS_STACK_FRAMES unw_word_t sp = 0, next_sp = 0; +#endif if (recursive) { return 0; @@ -126,31 +88,41 @@ int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { ++recursive; unw_getcontext(&uc); - RAW_CHECK(unw_init_local(&cursor, &uc) >= 0, "unw_init_local failed"); - skip_count++; // Do not include the "GetStackFrames" frame + int ret = unw_init_local(&cursor, &uc); + assert(ret >= 0); + skip_count++; // Do not include current frame while (skip_count--) { - if (unw_step(&cursor) <= 0 || - unw_get_reg(&cursor, UNW_REG_SP, &next_sp) < 0) { + if (unw_step(&cursor) <= 0) { goto out; } +#if IS_STACK_FRAMES + if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp)) { + goto out; + } +#endif } + while (n < max_depth) { - sp = next_sp; - if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) + if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) { break; - if (unw_step(&cursor) <= 0 || - unw_get_reg(&cursor, UNW_REG_SP, &next_sp)) { - // We couldn't step any further (possibly because we reached _start). - // Provide the last good PC we've got, and get out. - sizes[n] = 0; - pcs[n++] = ip; + } +#if IS_STACK_FRAMES + sizes[n] = 0; +#endif + result[n++] = ip; + if (unw_step(&cursor) <= 0) { + break; + } +#if IS_STACK_FRAMES + sp = next_sp; + if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp) , 0) { break; } - sizes[n] = next_sp - sp; - pcs[n++] = ip; + sizes[n - 1] = next_sp - sp; +#endif } - out: +out: --recursive; return n; } diff --git a/src/stacktrace_powerpc-inl.h b/src/stacktrace_powerpc-inl.h index 5631e49..9a07eea 100644 --- a/src/stacktrace_powerpc-inl.h +++ b/src/stacktrace_powerpc-inl.h @@ -36,6 +36,11 @@ // http://www.linux-foundation.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#STACK // Linux has similar code: http://patchwork.ozlabs.org/linuxppc/patch?id=8882 +#ifndef BASE_STACKTRACE_POWERPC_INL_H_ +#define BASE_STACKTRACE_POWERPC_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + #include <stdint.h> // for uintptr_t #include <stdlib.h> // for NULL #include <google/stacktrace.h> @@ -71,9 +76,23 @@ static void **NextStackFrame(void **old_sp) { // This ensures that GetStackTrace stes up the Link Register properly. void StacktracePowerPCDummyFunction() __attribute__((noinline)); void StacktracePowerPCDummyFunction() { __asm__ volatile(""); } +#endif // BASE_STACKTRACE_POWERPC_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. -// If you change this function, also change GetStackFrames below. -int GetStackTrace(void** result, int max_depth, int skip_count) { +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +int GET_STACK_TRACE_OR_FRAMES { void **sp; // Apple OS X uses an old version of gnu as -- both Darwin 7.9.0 (Panther) // and Darwin 8.8.1 (Tiger) use as 1.38. This means we have to use a @@ -95,11 +114,29 @@ int GetStackTrace(void** result, int max_depth, int skip_count) { // This routine forces the compiler (at least gcc) to push it anyway. StacktracePowerPCDummyFunction(); +#if IS_STACK_FRAMES + // Note we do *not* increment skip_count here for the SYSV ABI. If + // we did, the list of stack frames wouldn't properly match up with + // the list of return addresses. Note this means the top pc entry + // is probably bogus for linux/ppc (and other SYSV-ABI systems). +#else // The LR save area is used by the callee, so the top entry is bogus. skip_count++; +#endif int n = 0; while (sp && n < max_depth) { +#if IS_STACK_FRAMES + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few bogus + // entries in some rare cases). + void **next_sp = NextStackFrame<false>(sp); +#else + void **next_sp = NextStackFrame<true>(sp); +#endif + if (skip_count > 0) { skip_count--; } else { @@ -120,85 +157,15 @@ int GetStackTrace(void** result, int max_depth, int skip_count) { #else #error Need to specify the PPC ABI for your archiecture. #endif - } - // Use strict unwinding rules. - sp = NextStackFrame<true>(sp); - } - return n; -} - -// If you change this function, also change GetStackTrace above: -// -// This GetStackFrames routine shares a lot of code with GetStackTrace -// above. This code could have been refactored into a common routine, -// and then both GetStackTrace/GetStackFrames could call that routine. -// There are two problems with that: -// -// (1) The performance of the refactored-code suffers substantially - the -// refactored needs to be able to record the stack trace when called -// from GetStackTrace, and both the stack trace and stack frame sizes, -// when called from GetStackFrames - this introduces enough new -// conditionals that GetStackTrace performance can degrade by as much -// as 50%. -// -// (2) Whether the refactored routine gets inlined into GetStackTrace and -// GetStackFrames depends on the compiler, and we can't guarantee the -// behavior either-way, even with "__attribute__ ((always_inline))" -// or "__attribute__ ((noinline))". But we need this guarantee or the -// frame counts may be off by one. -// -// Both (1) and (2) can be addressed without this code duplication, by -// clever use of template functions, and by defining GetStackTrace and -// GetStackFrames as macros that expand to these template functions. -// However, this approach comes with its own set of problems - namely, -// macros and preprocessor trouble - for example, if GetStackTrace -// and/or GetStackFrames is ever defined as a member functions in some -// class, we are in trouble. -int GetStackFrames(void** pcs, int *sizes, int max_depth, int skip_count) { - void **sp; -#ifdef __APPLE__ - __asm__ volatile ("mr %0,r1" : "=r" (sp)); -#else - __asm__ volatile ("mr %0,1" : "=r" (sp)); -#endif - StacktracePowerPCDummyFunction(); - // Note we do *not* increment skip_count here for the SYSV ABI. If - // we did, the list of stack frames wouldn't properly match up with - // the list of return addresses. Note this means the top pc entry - // is probably bogus for linux/ppc (and other SYSV-ABI systems). - - int n = 0; - while (sp && n < max_depth) { - // The GetStackFrames routine is called when we are in some - // informational context (the failure signal handler for example). - // Use the non-strict unwinding rules to produce a stack trace - // that is as complete as possible (even if it contains a few bogus - // entries in some rare cases). - void **next_sp = NextStackFrame<false>(sp); - if (skip_count > 0) { - skip_count--; - } else { -#if defined(_CALL_AIX) || defined(_CALL_DARWIN) - pcs[n++] = *(sp+2); -#elif defined(_CALL_SYSV) - pcs[n++] = *(sp+1); -#elif defined(__APPLE__) || (defined(__linux) && defined(__PPC64__)) - // This check is in case the compiler doesn't define _CALL_AIX/etc. - pcs[n++] = *(sp+2); -#elif defined(__linux) - // This check is in case the compiler doesn't define _CALL_SYSV. - pcs[n++] = *(sp+1); -#else -#error Need to specify the PPC ABI for your archiecture. -#endif +#if IS_STACK_FRAME if (next_sp > sp) { sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; } else { // A frame-size of 0 is used to indicate unknown frame size. sizes[n] = 0; } - n++; +#endif } sp = next_sp; } diff --git a/src/stacktrace_with_context.cc b/src/stacktrace_with_context.cc deleted file mode 100644 index ed7bfe3..0000000 --- a/src/stacktrace_with_context.cc +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (c) 2009, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// --- -// Author: Paul Pluzhnikov -// -// This code logically belongs in stacktrace.cc, but -// it is moved into (this) separate file in order to -// prevent inlining of routines defined here. -// -// Inlining causes skip_count to be incorrect, and there -// is no portable way to prevent it. -// -// Eventually LTO (link-time optimization) and/or LLVM -// may inline this code anyway. Let's hope they respect -// ATTRIBUTE_NOINLINE. - -#include <config.h> -#include <google/stacktrace.h> -#include "stacktrace_config.h" -#include "base/basictypes.h" - -#if !defined(STACKTRACE_SKIP_CONTEXT_ROUTINES) -ATTRIBUTE_NOINLINE PERFTOOLS_DLL_DECL -int GetStackFramesWithContext(void** pcs, int* sizes, int max_depth, - int skip_count, const void * /* uc */) { - return GetStackFrames(pcs, sizes, max_depth, skip_count + 1); -} - -ATTRIBUTE_NOINLINE PERFTOOLS_DLL_DECL -int GetStackTraceWithContext(void** result, int max_depth, - int skip_count, const void * /* uc */) { - return GetStackTrace(result, max_depth, skip_count + 1); -} -#endif diff --git a/src/stacktrace_x86-inl.h b/src/stacktrace_x86-inl.h index 05701e7..6753fdb 100644 --- a/src/stacktrace_x86-inl.h +++ b/src/stacktrace_x86-inl.h @@ -31,17 +31,13 @@ // Author: Sanjay Ghemawat // // Produce stack trace -// -// NOTE: there is code duplication between -// GetStackTrace, GetStackTraceWithContext, GetStackFrames and -// GetStackFramesWithContext. If you update one, update them all. -// -// There is no easy way to avoid this, because inlining -// interferes with skip_count, and there is no portable -// way to turn inlining off, or force it always on. -#include "config.h" +#ifndef BASE_STACKTRACE_X86_INL_H_ +#define BASE_STACKTRACE_X86_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: +#include "config.h" #include <stdlib.h> // for NULL #include <assert.h> #if defined(HAVE_SYS_UCONTEXT_H) @@ -190,8 +186,8 @@ static void **NextStackFrame(void **old_sp, const void *uc) { const ucontext_t *ucv = static_cast<const ucontext_t *>(uc); // This kernel does not use frame pointer in its VDSO code, // and so %ebp is not suitable for unwinding. - const void **const reg_ebp = - reinterpret_cast<const void **>(ucv->uc_mcontext.gregs[REG_EBP]); + void **const reg_ebp = + reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_EBP]); const unsigned char *const reg_eip = reinterpret_cast<unsigned char *>(ucv->uc_mcontext.gregs[REG_EIP]); if (new_sp == reg_ebp && @@ -269,209 +265,24 @@ static void **NextStackFrame(void **old_sp, const void *uc) { return new_sp; } -// If you change this function, see NOTE at the top of file. -// Same as above, but with signal ucontext_t pointer. -int GetStackTraceWithContext(void** result, - int max_depth, - int skip_count, - const void *uc) { - void **sp; -#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ - // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. - // It's always correct on llvm, and the techniques below aren't (in - // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]), - // so we also prefer __builtin_frame_address when running under llvm. - sp = reinterpret_cast<void**>(__builtin_frame_address(0)); -#elif defined(__i386__) - // Stack frame format: - // sp[0] pointer to previous frame - // sp[1] caller address - // sp[2] first argument - // ... - // NOTE: This will break under llvm, since result is a copy and not in sp[2] - sp = (void **)&result - 2; -#elif defined(__x86_64__) - unsigned long rbp; - // Move the value of the register %rbp into the local variable rbp. - // We need 'volatile' to prevent this instruction from getting moved - // around during optimization to before function prologue is done. - // An alternative way to achieve this - // would be (before this __asm__ instruction) to call Noop() defined as - // static void Noop() __attribute__ ((noinline)); // prevent inlining - // static void Noop() { asm(""); } // prevent optimizing-away - __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); - // Arguments are passed in registers on x86-64, so we can't just - // offset from &result - sp = (void **) rbp; -#else -# error Using stacktrace_x86-inl.h on a non x86 architecture! -#endif - - int n = 0; - while (sp && n < max_depth) { - if (*(sp+1) == reinterpret_cast<void *>(0)) { - // In 64-bit code, we often see a frame that - // points to itself and has a return address of 0. - break; - } - if (skip_count > 0) { - skip_count--; - } else { - result[n++] = *(sp+1); - } - // Use strict unwinding rules. - sp = NextStackFrame<true, true>(sp, uc); - } - return n; -} - -int GetStackTrace(void** result, int max_depth, int skip_count) { - void **sp; -#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ - // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. - // It's always correct on llvm, and the techniques below aren't (in - // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]), - // so we also prefer __builtin_frame_address when running under llvm. - sp = reinterpret_cast<void**>(__builtin_frame_address(0)); -#elif defined(__i386__) - // Stack frame format: - // sp[0] pointer to previous frame - // sp[1] caller address - // sp[2] first argument - // ... - // NOTE: This will break under llvm, since result is a copy and not in sp[2] - sp = (void **)&result - 2; -#elif defined(__x86_64__) - unsigned long rbp; - // Move the value of the register %rbp into the local variable rbp. - // We need 'volatile' to prevent this instruction from getting moved - // around during optimization to before function prologue is done. - // An alternative way to achieve this - // would be (before this __asm__ instruction) to call Noop() defined as - // static void Noop() __attribute__ ((noinline)); // prevent inlining - // static void Noop() { asm(""); } // prevent optimizing-away - __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); - // Arguments are passed in registers on x86-64, so we can't just - // offset from &result - sp = (void **) rbp; -#else -# error Using stacktrace_x86-inl.h on a non x86 architecture! -#endif +#endif // BASE_STACKTRACE_X86_INL_H_ - int n = 0; - while (sp && n < max_depth) { - if (*(sp+1) == reinterpret_cast<void *>(0)) { - // In 64-bit code, we often see a frame that - // points to itself and has a return address of 0. - break; - } - if (skip_count > 0) { - skip_count--; - } else { - result[n++] = *(sp+1); - } - // Use strict unwinding rules. - sp = NextStackFrame<true, false>(sp, NULL); - } - return n; -} +// Note: this part of the file is included several times. +// Do not put globals below. -// If you change this function, see NOTE at the top of file. -// -// This GetStackFrames routine shares a lot of code with GetStackTrace -// above. This code could have been refactored into a common routine, -// and then both GetStackTrace/GetStackFrames could call that routine. -// There are two problems with that: +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() // -// (1) The performance of the refactored-code suffers substantially - the -// refactored needs to be able to record the stack trace when called -// from GetStackTrace, and both the stack trace and stack frame sizes, -// when called from GetStackFrames - this introduces enough new -// conditionals that GetStackTrace performance can degrade by as much -// as 50%. -// -// (2) Whether the refactored routine gets inlined into GetStackTrace and -// GetStackFrames depends on the compiler, and we can't guarantee the -// behavior either-way, even with "__attribute__ ((always_inline))" -// or "__attribute__ ((noinline))". But we need this guarantee or the -// frame counts may be off by one. -// -// Both (1) and (2) can be addressed without this code duplication, by -// clever use of template functions, and by defining GetStackTrace and -// GetStackFrames as macros that expand to these template functions. -// However, this approach comes with its own set of problems - namely, -// macros and preprocessor trouble - for example, if GetStackTrace -// and/or GetStackFrames is ever defined as a member functions in some -// class, we are in trouble. -int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { - void **sp; -#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ - // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. - // It's always correct on llvm, and the techniques below aren't (in - // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]), - // so we also prefer __builtin_frame_address when running under llvm. - sp = reinterpret_cast<void**>(__builtin_frame_address(0)); -#elif defined(__i386__) - // Stack frame format: - // sp[0] pointer to previous frame - // sp[1] caller address - // sp[2] first argument - // ... - sp = (void **)&pcs - 2; -#elif defined(__x86_64__) - unsigned long rbp; - // Move the value of the register %rbp into the local variable rbp. - // We need 'volatile' to prevent this instruction from getting moved - // around during optimization to before function prologue is done. - // An alternative way to achieve this - // would be (before this __asm__ instruction) to call Noop() defined as - // static void Noop() __attribute__ ((noinline)); // prevent inlining - // static void Noop() { asm(""); } // prevent optimizing-away - __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); - // Arguments are passed in registers on x86-64, so we can't just - // offset from &result - sp = (void **) rbp; -#else -# error Using stacktrace_x86-inl.h on a non x86 architecture! -#endif - - int n = 0; - while (sp && n < max_depth) { - if (*(sp+1) == reinterpret_cast<void *>(0)) { - // In 64-bit code, we often see a frame that - // points to itself and has a return address of 0. - break; - } - // The GetStackFrames routine is called when we are in some - // informational context (the failure signal handler for example). - // Use the non-strict unwinding rules to produce a stack trace - // that is as complete as possible (even if it contains a few bogus - // entries in some rare cases). - void **next_sp = NextStackFrame<false, false>(sp, NULL); - if (skip_count > 0) { - skip_count--; - } else { - pcs[n] = *(sp+1); - if (next_sp > sp) { - sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; - } else { - // A frame-size of 0 is used to indicate unknown frame size. - sizes[n] = 0; - } - n++; - } - sp = next_sp; - } - return n; -} +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) -// If you change this function, see NOTE at the top of file. -// Same as above, but with signal ucontext_t pointer. -int GetStackFramesWithContext(void** pcs, - int* sizes, - int max_depth, - int skip_count, - const void *uc) { +int GET_STACK_TRACE_OR_FRAMES { void **sp; #if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. @@ -511,22 +322,22 @@ int GetStackFramesWithContext(void** pcs, // points to itself and has a return address of 0. break; } - // The GetStackFrames routine is called when we are in some - // informational context (the failure signal handler for example). - // Use the non-strict unwinding rules to produce a stack trace - // that is as complete as possible (even if it contains a few bogus - // entries in some rare cases). - void **next_sp = NextStackFrame<false, true>(sp, uc); +#if !IS_WITH_CONTEXT + const void *const ucp = NULL; +#endif + void **next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(sp, ucp); if (skip_count > 0) { skip_count--; } else { - pcs[n] = *(sp+1); + result[n] = *(sp+1); +#if IS_STACK_FRAMES if (next_sp > sp) { sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; } else { // A frame-size of 0 is used to indicate unknown frame size. sizes[n] = 0; } +#endif n++; } sp = next_sp; diff --git a/src/system-alloc.cc b/src/system-alloc.cc index 3341f17..6d2e1c6 100644 --- a/src/system-alloc.cc +++ b/src/system-alloc.cc @@ -168,16 +168,16 @@ void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, // a strict check here if (static_cast<ptrdiff_t>(size + alignment) < 0) return NULL; - // could theoretically return the "extra" bytes here, but this - // is simple and correct. - if (actual_size) { - *actual_size = size; - } - // This doesn't overflow because TCMalloc_SystemAlloc has already // tested for overflow at the alignment boundary. size = ((size + alignment - 1) / alignment) * alignment; + // "actual_size" indicates that the bytes from the returned pointer + // p up to and including (p + actual_size - 1) have been allocated. + if (actual_size) { + *actual_size = size; + } + // Check that we we're not asking for so much more memory that we'd // wrap around the end of the virtual address space. (This seems // like something sbrk() should check for us, and indeed opensolaris @@ -243,12 +243,6 @@ void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, return NULL; } - // could theoretically return the "extra" bytes here, but this - // is simple and correct. - if (actual_size) { - *actual_size = size; - } - // Enforce page alignment if (pagesize == 0) pagesize = getpagesize(); if (alignment < pagesize) alignment = pagesize; @@ -258,6 +252,12 @@ void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, } size = aligned_size; + // "actual_size" indicates that the bytes from the returned pointer + // p up to and including (p + actual_size - 1) have been allocated. + if (actual_size) { + *actual_size = size; + } + // Ask for extra memory if alignment > pagesize size_t extra = 0; if (alignment > pagesize) { @@ -333,12 +333,6 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, initialized = true; } - // could theoretically return the "extra" bytes here, but this - // is simple and correct. - if (actual_size) { - *actual_size = size; - } - // Enforce page alignment if (pagesize == 0) pagesize = getpagesize(); if (alignment < pagesize) alignment = pagesize; @@ -348,6 +342,12 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, } size = aligned_size; + // "actual_size" indicates that the bytes from the returned pointer + // p up to and including (p + actual_size - 1) have been allocated. + if (actual_size) { + *actual_size = size; + } + // Ask for extra memory if alignment > pagesize size_t extra = 0; if (alignment > pagesize) { diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc index 625301e..48cf328 100644 --- a/src/tcmalloc.cc +++ b/src/tcmalloc.cc @@ -231,8 +231,9 @@ extern "C" { ATTRIBUTE_SECTION(google_malloc); void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); - // Surprisingly, compilers use a nothrow-delete internally. See, eg: - // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html + // Surprisingly, standard C++ library implementations use a + // nothrow-delete internally. See, eg: + // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html void tc_delete_nothrow(void* ptr, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); void tc_deletearray_nothrow(void* ptr, const std::nothrow_t&) __THROW @@ -253,9 +254,9 @@ extern "C" { // NOTE: we make many of these symbols weak, but do so in the makefile // (via objcopy -W) and not here. That ends up being more portable. # define ALIAS(x) __attribute__ ((alias (x))) -void* operator new(size_t size) ALIAS("tc_new"); +void* operator new(size_t size) throw (std::bad_alloc) ALIAS("tc_new"); void operator delete(void* p) __THROW ALIAS("tc_delete"); -void* operator new[](size_t size) ALIAS("tc_newarray"); +void* operator new[](size_t size) throw (std::bad_alloc) ALIAS("tc_newarray"); void operator delete[](void* p) __THROW ALIAS("tc_deletearray"); void* operator new(size_t size, const std::nothrow_t&) __THROW ALIAS("tc_new_nothrow"); @@ -264,7 +265,7 @@ void* operator new[](size_t size, const std::nothrow_t&) __THROW void operator delete(void* size, const std::nothrow_t&) __THROW ALIAS("tc_delete_nothrow"); void operator delete[](void* size, const std::nothrow_t&) __THROW - ALIAS("tc_deletearray_nothrow"); + ALIAS("tc_deletearray_nothrow"); extern "C" { void* malloc(size_t size) __THROW ALIAS("tc_malloc"); void free(void* ptr) __THROW ALIAS("tc_free"); @@ -765,7 +766,17 @@ TCMallocGuard::TCMallocGuard() { tc_free(tc_malloc(1)); ThreadCache::InitTSD(); tc_free(tc_malloc(1)); - MallocExtension::Register(new TCMallocImplementation); + // Either we, or debugallocation.cc, or valgrind will control memory + // management. We register our extension if we're the winner. +#ifdef TCMALLOC_FOR_DEBUGALLOCATION + // Let debugallocation register its extension. +#else + if (RunningOnValgrind()) { + // Let Valgrind uses its own malloc (so don't register our extension). + } else { + MallocExtension::Register(new TCMallocImplementation); + } +#endif } } @@ -1353,8 +1364,7 @@ extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) { return p; } -extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow( - size_t size, const std::nothrow_t&) __THROW { +extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW { void* p = cpp_alloc(size, true); MallocHook::InvokeNewHook(p, size); return p; @@ -1365,10 +1375,10 @@ extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW { do_free(p); } -// Compilers define and use this (via ::operator delete(ptr, nothrow)). +// Standard C++ library implementations define and use this +// (via ::operator delete(ptr, nothrow)). // But it's really the same as normal delete, so we just do the same thing. -extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow( - void* p, const std::nothrow_t&) __THROW { +extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) __THROW { MallocHook::InvokeDeleteHook(p); do_free(p); } @@ -1384,8 +1394,8 @@ extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) { return p; } -extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow( - size_t size, const std::nothrow_t&) __THROW { +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) + __THROW { void* p = cpp_alloc(size, true); MallocHook::InvokeNewHook(p, size); return p; @@ -1396,8 +1406,7 @@ extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW { do_free(p); } -extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow( - void* p, const std::nothrow_t&) __THROW { +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) __THROW { MallocHook::InvokeDeleteHook(p); do_free(p); } diff --git a/src/tests/debugallocation_test.cc b/src/tests/debugallocation_test.cc index ca00e36..c482187 100644 --- a/src/tests/debugallocation_test.cc +++ b/src/tests/debugallocation_test.cc @@ -75,7 +75,14 @@ static int test_counter = 0; // incremented every time the macro is called // This flag won't be compiled in in opt mode. DECLARE_int32(max_free_queue_size); +// Test match as well as mismatch rules: TEST(DebugAllocationTest, DeallocMismatch) { + // malloc can be matched only by free + // new can be matched only by delete and delete(nothrow) + // new[] can be matched only by delete[] and delete[](nothrow) + // new(nothrow) can be matched only by delete and delete(nothrow) + // new(nothrow)[] can be matched only by delete[] and delete[](nothrow) + // Allocate with malloc. { int* x = static_cast<int*>(malloc(sizeof(*x))); @@ -88,17 +95,41 @@ TEST(DebugAllocationTest, DeallocMismatch) { // Allocate with new. { int* x = new int; + int* y = new int; IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free"); IF_DEBUG_EXPECT_DEATH(delete [] x, "mismatch.*being dealloc.*delete *[[]"); delete x; + ::operator delete(y, std::nothrow); } // Allocate with new[]. { int* x = new int[1]; + int* y = new int[1]; + IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free"); + IF_DEBUG_EXPECT_DEATH(delete x, "mismatch.*being dealloc.*delete"); + delete [] x; + ::operator delete[](y, std::nothrow); + } + + // Allocate with new(nothrow). + { + int* x = new(std::nothrow) int; + int* y = new(std::nothrow) int; + IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free"); + IF_DEBUG_EXPECT_DEATH(delete [] x, "mismatch.*being dealloc.*delete *[[]"); + delete x; + ::operator delete(y, std::nothrow); + } + + // Allocate with new(nothrow)[]. + { + int* x = new(std::nothrow) int[1]; + int* y = new(std::nothrow) int[1]; IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free"); IF_DEBUG_EXPECT_DEATH(delete x, "mismatch.*being dealloc.*delete"); delete [] x; + ::operator delete[](y, std::nothrow); } } diff --git a/src/tests/heap-checker-death_unittest.sh b/src/tests/heap-checker-death_unittest.sh index 9f0c08c..4a83fc2 100755 --- a/src/tests/heap-checker-death_unittest.sh +++ b/src/tests/heap-checker-death_unittest.sh @@ -139,13 +139,13 @@ EARLY_MSG="Starting tracking the heap$" Test 60 0 "$EARLY_MSG" "" \ HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \ - PERFTOOLS_VERBOSE=1 || exit 5 + PERFTOOLS_VERBOSE=10 || exit 5 Test 60 0 "MemoryRegionMap Init$" "" \ HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \ - PERFTOOLS_VERBOSE=2 || exit 6 + PERFTOOLS_VERBOSE=11 || exit 6 Test 60 0 "" "$EARLY_MSG" \ HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \ - PERFTOOLS_VERBOSE=-2 || exit 7 + PERFTOOLS_VERBOSE=-11 || exit 7 # These invocations should fail with very high probability, # rather than return 0 or hang (1 == exit(1), 134 == abort(), 139 = SIGSEGV): @@ -162,10 +162,10 @@ Test 60 1 "MakeALeak" "" \ # Test that very early log messages are present and controllable: Test 60 1 "Starting tracking the heap$" "" \ - HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=1 \ + HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=10 \ || exit 11 Test 60 1 "" "Starting tracking the heap" \ - HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=-1 \ + HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=-10 \ || exit 12 cd / # so we're not in TMPDIR when we delete it diff --git a/src/tests/profiler_unittest.cc b/src/tests/profiler_unittest.cc index 1908b03..19371b7 100644 --- a/src/tests/profiler_unittest.cc +++ b/src/tests/profiler_unittest.cc @@ -56,12 +56,11 @@ static void test_other_thread() { int i, m; char b[128]; + MutexLock ml(&mutex); for (m = 0; m < 1000000; ++m) { // run millions of times for (i = 0; i < g_iters; ++i ) { - MutexLock ml(&mutex); result ^= i; } - MutexLock ml(&mutex); snprintf(b, sizeof(b), "%d", result); // get some libc action } #endif @@ -70,12 +69,11 @@ static void test_other_thread() { static void test_main_thread() { int i, m; char b[128]; + MutexLock ml(&mutex); for (m = 0; m < 1000000; ++m) { // run millions of times for (i = 0; i < g_iters; ++i ) { - MutexLock ml(&mutex); result ^= i; } - MutexLock ml(&mutex); snprintf(b, sizeof(b), "%d", result); // get some libc action } } diff --git a/src/tests/profiler_unittest.sh b/src/tests/profiler_unittest.sh index 5766f2e..4668fa7 100755 --- a/src/tests/profiler_unittest.sh +++ b/src/tests/profiler_unittest.sh @@ -206,28 +206,27 @@ CPUPROFILE="$TMPDIR/p5" "$PROFILER2" 50 || RegisterFailure CPUPROFILE="$TMPDIR/p6" "$PROFILER2" 100 || RegisterFailure VerifySimilar p5 "$PROFILER2_REALNAME" p6 "$PROFILER2_REALNAME" 2 -# When we compile with threads, things take a lot longer even when we only use 1 -CPUPROFILE="$TMPDIR/p5b" "$PROFILER3" 10 || RegisterFailure -CPUPROFILE="$TMPDIR/p5c" "$PROFILER3" 20 || RegisterFailure +CPUPROFILE="$TMPDIR/p5b" "$PROFILER3" 30 || RegisterFailure +CPUPROFILE="$TMPDIR/p5c" "$PROFILER3" 60 || RegisterFailure VerifySimilar p5b "$PROFILER3_REALNAME" p5c "$PROFILER3_REALNAME" 2 # Now try what happens when we use threads -"$PROFILER3" 5 2 "$TMPDIR/p7" || RegisterFailure -"$PROFILER3" 10 2 "$TMPDIR/p8" || RegisterFailure +"$PROFILER3" 30 2 "$TMPDIR/p7" || RegisterFailure +"$PROFILER3" 60 2 "$TMPDIR/p8" || RegisterFailure VerifySimilar p7 "$PROFILER3_REALNAME" p8 "$PROFILER3_REALNAME" 2 -"$PROFILER4" 5 2 "$TMPDIR/p9" || RegisterFailure -"$PROFILER4" 10 2 "$TMPDIR/p10" || RegisterFailure +"$PROFILER4" 30 2 "$TMPDIR/p9" || RegisterFailure +"$PROFILER4" 60 2 "$TMPDIR/p10" || RegisterFailure VerifySimilar p9 "$PROFILER4_REALNAME" p10 "$PROFILER4_REALNAME" 2 # More threads! -"$PROFILER4" 2 3 "$TMPDIR/p9" || RegisterFailure -"$PROFILER4" 4 3 "$TMPDIR/p10" || RegisterFailure +"$PROFILER4" 25 3 "$TMPDIR/p9" || RegisterFailure +"$PROFILER4" 50 3 "$TMPDIR/p10" || RegisterFailure VerifySimilar p9 "$PROFILER4_REALNAME" p10 "$PROFILER4_REALNAME" 2 # Compare how much time the main thread takes compared to the other threads # Recall the main thread runs twice as long as the other threads, by design. -"$PROFILER4" 2 4 "$TMPDIR/p11" || RegisterFailure +"$PROFILER4" 20 4 "$TMPDIR/p11" || RegisterFailure VerifyAcrossThreads p11 "$PROFILER4_REALNAME" 2 # Test symbol save and restore @@ -236,14 +235,14 @@ VerifyAcrossThreads p11 "$PROFILER4_REALNAME" 2 >"$TMPDIR/p13" 2>/dev/null || RegisterFailure VerifyIdentical p12 "$PROFILER1_REALNAME" p13 "" || RegisterFailure -"$PROFILER3" 5 2 "$TMPDIR/p14" || RegisterFailure +"$PROFILER3" 30 2 "$TMPDIR/p14" || RegisterFailure "$PPROF" $PPROF_FLAGS "$PROFILER3_REALNAME" "$TMPDIR/p14" --raw \ >"$TMPDIR/p15" 2>/dev/null || RegisterFailure VerifyIdentical p14 "$PROFILER3_REALNAME" p15 "" || RegisterFailure # Test using ITIMER_REAL instead of ITIMER_PROF. -env CPUPROFILE_REALTIME=1 "$PROFILER3" 5 2 "$TMPDIR/p16" || RegisterFailure -env CPUPROFILE_REALTIME=1 "$PROFILER3" 10 2 "$TMPDIR/p17" || RegisterFailure +env CPUPROFILE_REALTIME=1 "$PROFILER3" 30 2 "$TMPDIR/p16" || RegisterFailure +env CPUPROFILE_REALTIME=1 "$PROFILER3" 60 2 "$TMPDIR/p17" || RegisterFailure VerifySimilar p16 "$PROFILER3_REALNAME" p17 "$PROFILER3_REALNAME" 2 diff --git a/src/tests/tcmalloc_unittest.cc b/src/tests/tcmalloc_unittest.cc index 25bfd6a..6b2ec26 100644 --- a/src/tests/tcmalloc_unittest.cc +++ b/src/tests/tcmalloc_unittest.cc @@ -977,7 +977,7 @@ static int RunAllTests(int argc, char** argv) { } // This code stresses some of the memory allocation via STL. - // In particular, it calls operator delete(void*, nothrow_t). + // It may call operator delete(void*, nothrow_t). fprintf(LOGSTREAM, "Testing STL use\n"); { std::vector<int> v; diff --git a/src/third_party/valgrind.h b/src/third_party/valgrind.h new file mode 100644 index 0000000..577c59a --- /dev/null +++ b/src/third_party/valgrind.h @@ -0,0 +1,3924 @@ +/* -*- c -*- + ---------------------------------------------------------------- + + Notice that the following BSD-style license applies to this one + file (valgrind.h) only. The rest of Valgrind is licensed under the + terms of the GNU General Public License, version 2, unless + otherwise indicated. See the COPYING file in the source + distribution for details. + + ---------------------------------------------------------------- + + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2008 Julian Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------------------------------------------------------- + + Notice that the above BSD-style license applies to this one file + (valgrind.h) only. The entire rest of Valgrind is licensed under + the terms of the GNU General Public License, version 2. See the + COPYING file in the source distribution for details. + + ---------------------------------------------------------------- +*/ + + +/* This file is for inclusion into client (your!) code. + + You can use these macros to manipulate and query Valgrind's + execution inside your own programs. + + The resulting executables will still run without Valgrind, just a + little bit more slowly than they otherwise would, but otherwise + unchanged. When not running on valgrind, each client request + consumes very few (eg. 7) instructions, so the resulting performance + loss is negligible unless you plan to execute client requests + millions of times per second. Nevertheless, if that is still a + problem, you can compile with the NVALGRIND symbol defined (gcc + -DNVALGRIND) so that client requests are not even compiled in. */ + +#ifndef __VALGRIND_H +#define __VALGRIND_H + +#include <stdarg.h> + +/* Nb: this file might be included in a file compiled with -ansi. So + we can't use C++ style "//" comments nor the "asm" keyword (instead + use "__asm__"). */ + +/* Derive some tags indicating what the target platform is. Note + that in this file we're using the compiler's CPP symbols for + identifying architectures, which are different to the ones we use + within the rest of Valgrind. Note, __powerpc__ is active for both + 32 and 64-bit PPC, whereas __powerpc64__ is only active for the + latter (on Linux, that is). */ +#undef PLAT_x86_linux +#undef PLAT_amd64_linux +#undef PLAT_ppc32_linux +#undef PLAT_ppc64_linux +#undef PLAT_ppc32_aix5 +#undef PLAT_ppc64_aix5 + +#if !defined(_AIX) && defined(__i386__) +# define PLAT_x86_linux 1 +#elif !defined(_AIX) && defined(__x86_64__) +# define PLAT_amd64_linux 1 +#elif !defined(_AIX) && defined(__powerpc__) && !defined(__powerpc64__) +# define PLAT_ppc32_linux 1 +#elif !defined(_AIX) && defined(__powerpc__) && defined(__powerpc64__) +# define PLAT_ppc64_linux 1 +#elif defined(_AIX) && defined(__64BIT__) +# define PLAT_ppc64_aix5 1 +#elif defined(_AIX) && !defined(__64BIT__) +# define PLAT_ppc32_aix5 1 +#endif + + +/* If we're not compiling for our target platform, don't generate + any inline asms. */ +#if !defined(PLAT_x86_linux) && !defined(PLAT_amd64_linux) \ + && !defined(PLAT_ppc32_linux) && !defined(PLAT_ppc64_linux) \ + && !defined(PLAT_ppc32_aix5) && !defined(PLAT_ppc64_aix5) +# if !defined(NVALGRIND) +# define NVALGRIND 1 +# endif +#endif + + +/* ------------------------------------------------------------------ */ +/* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS. There is nothing */ +/* in here of use to end-users -- skip to the next section. */ +/* ------------------------------------------------------------------ */ + +#if defined(NVALGRIND) + +/* Define NVALGRIND to completely remove the Valgrind magic sequence + from the compiled code (analogous to NDEBUG's effects on + assert()) */ +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { \ + (_zzq_rlval) = (_zzq_default); \ + } + +#else /* ! NVALGRIND */ + +/* The following defines the magic code sequences which the JITter + spots and handles magically. Don't look too closely at them as + they will rot your brain. + + The assembly code sequences for all architectures is in this one + file. This is because this file must be stand-alone, and we don't + want to have multiple files. + + For VALGRIND_DO_CLIENT_REQUEST, we must ensure that the default + value gets put in the return slot, so that everything works when + this is executed not under Valgrind. Args are passed in a memory + block, and so there's no intrinsic limit to the number that could + be passed, but it's currently five. + + The macro args are: + _zzq_rlval result lvalue + _zzq_default default value (result returned when running on real CPU) + _zzq_request request code + _zzq_arg1..5 request params + + The other two macros are used to support function wrapping, and are + a lot simpler. VALGRIND_GET_NR_CONTEXT returns the value of the + guest's NRADDR pseudo-register and whatever other information is + needed to safely run the call original from the wrapper: on + ppc64-linux, the R2 value at the divert point is also needed. This + information is abstracted into a user-visible type, OrigFn. + + VALGRIND_CALL_NOREDIR_* behaves the same as the following on the + guest, but guarantees that the branch instruction will not be + redirected: x86: call *%eax, amd64: call *%rax, ppc32/ppc64: + branch-and-link-to-r11. VALGRIND_CALL_NOREDIR is just text, not a + complete inline asm, since it needs to be combined with more magic + inline asm stuff to be useful. +*/ + +/* ------------------------- x86-linux ------------------------- */ + +#if defined(PLAT_x86_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "roll $3, %%edi ; roll $13, %%edi\n\t" \ + "roll $29, %%edi ; roll $19, %%edi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { volatile unsigned int _zzq_args[6]; \ + volatile unsigned int _zzq_result; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EDX = client_request ( %EAX ) */ \ + "xchgl %%ebx,%%ebx" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "memory" \ + ); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EAX = guest_NRADDR */ \ + "xchgl %%ecx,%%ecx" \ + : "=a" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_EAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%EAX */ \ + "xchgl %%edx,%%edx\n\t" +#endif /* PLAT_x86_linux */ + +/* ------------------------ amd64-linux ------------------------ */ + +#if defined(PLAT_amd64_linux) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rolq $3, %%rdi ; rolq $13, %%rdi\n\t" \ + "rolq $61, %%rdi ; rolq $51, %%rdi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { volatile unsigned long long int _zzq_args[6]; \ + volatile unsigned long long int _zzq_result; \ + _zzq_args[0] = (unsigned long long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %RDX = client_request ( %RAX ) */ \ + "xchgq %%rbx,%%rbx" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "memory" \ + ); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned long long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %RAX = guest_NRADDR */ \ + "xchgq %%rcx,%%rcx" \ + : "=a" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_RAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%RAX */ \ + "xchgq %%rdx,%%rdx\n\t" +#endif /* PLAT_amd64_linux */ + +/* ------------------------ ppc32-linux ------------------------ */ + +#if defined(PLAT_ppc32_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ + "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned int _zzq_args[6]; \ + unsigned int _zzq_result; \ + unsigned int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 3,%1\n\t" /*default*/ \ + "mr 4,%2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" /*result*/ \ + : "=b" (_zzq_result) \ + : "b" (_zzq_default), "b" (_zzq_ptr) \ + : "cc", "memory", "r3", "r4"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" +#endif /* PLAT_ppc32_linux */ + +/* ------------------------ ppc64-linux ------------------------ */ + +#if defined(PLAT_ppc64_linux) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + unsigned long long int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned long long int _zzq_args[6]; \ + register unsigned long long int _zzq_result __asm__("r3"); \ + register unsigned long long int* _zzq_ptr __asm__("r4"); \ + _zzq_args[0] = (unsigned long long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1" \ + : "=r" (_zzq_result) \ + : "0" (_zzq_default), "r" (_zzq_ptr) \ + : "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned long long int __addr __asm__("r3"); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2" \ + : "=r" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4" \ + : "=r" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#endif /* PLAT_ppc64_linux */ + +/* ------------------------ ppc32-aix5 ------------------------- */ + +#if defined(PLAT_ppc32_aix5) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + unsigned int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ + "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned int _zzq_args[7]; \ + register unsigned int _zzq_result; \ + register unsigned int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + _zzq_args[6] = (unsigned int)(_zzq_default); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 4,%1\n\t" \ + "lwz 3, 24(4)\n\t" \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" \ + : "=b" (_zzq_result) \ + : "b" (_zzq_ptr) \ + : "r3", "r4", "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#endif /* PLAT_ppc32_aix5 */ + +/* ------------------------ ppc64-aix5 ------------------------- */ + +#if defined(PLAT_ppc64_aix5) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + unsigned long long int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned long long int _zzq_args[7]; \ + register unsigned long long int _zzq_result; \ + register unsigned long long int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int long long)(_zzq_request); \ + _zzq_args[1] = (unsigned int long long)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int long long)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int long long)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int long long)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int long long)(_zzq_arg5); \ + _zzq_args[6] = (unsigned int long long)(_zzq_default); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 4,%1\n\t" \ + "ld 3, 48(4)\n\t" \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" \ + : "=b" (_zzq_result) \ + : "b" (_zzq_ptr) \ + : "r3", "r4", "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned long long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#endif /* PLAT_ppc64_aix5 */ + +/* Insert assembly code for other platforms here... */ + +#endif /* NVALGRIND */ + + +/* ------------------------------------------------------------------ */ +/* PLATFORM SPECIFICS for FUNCTION WRAPPING. This is all very */ +/* ugly. It's the least-worst tradeoff I can think of. */ +/* ------------------------------------------------------------------ */ + +/* This section defines magic (a.k.a appalling-hack) macros for doing + guaranteed-no-redirection macros, so as to get from function + wrappers to the functions they are wrapping. The whole point is to + construct standard call sequences, but to do the call itself with a + special no-redirect call pseudo-instruction that the JIT + understands and handles specially. This section is long and + repetitious, and I can't see a way to make it shorter. + + The naming scheme is as follows: + + CALL_FN_{W,v}_{v,W,WW,WWW,WWWW,5W,6W,7W,etc} + + 'W' stands for "word" and 'v' for "void". Hence there are + different macros for calling arity 0, 1, 2, 3, 4, etc, functions, + and for each, the possibility of returning a word-typed result, or + no result. +*/ + +/* Use these to write the name of your wrapper. NOTE: duplicates + VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. */ + +#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname) \ + _vgwZU_##soname##_##fnname + +#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname) \ + _vgwZZ_##soname##_##fnname + +/* Use this macro from within a wrapper function to collect the + context (address and possibly other info) of the original function. + Once you have that you can then use it in one of the CALL_FN_ + macros. The type of the argument _lval is OrigFn. */ +#define VALGRIND_GET_ORIG_FN(_lval) VALGRIND_GET_NR_CONTEXT(_lval) + +/* Derivatives of the main macros below, for calling functions + returning void. */ + +#define CALL_FN_v_v(fnptr) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_v(_junk,fnptr); } while (0) + +#define CALL_FN_v_W(fnptr, arg1) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_W(_junk,fnptr,arg1); } while (0) + +#define CALL_FN_v_WW(fnptr, arg1,arg2) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0) + +#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0) + +/* ------------------------- x86-linux ------------------------- */ + +#if defined(PLAT_x86_linux) + +/* These regs are trashed by the hidden call. No need to mention eax + as gcc can already see that, plus causes gcc to bomb. */ +#define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx" + +/* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $4, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $8, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $12, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $16, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $20, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $24, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $28, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $32, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $36, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $40, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $44, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "pushl 48(%%eax)\n\t" \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $48, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_x86_linux */ + +/* ------------------------ amd64-linux ------------------------ */ + +#if defined(PLAT_amd64_linux) + +/* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi", \ + "rdi", "r8", "r9", "r10", "r11" + +/* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned + long) == 8. */ + +/* NB 9 Sept 07. There is a nasty kludge here in all these CALL_FN_ + macros. In order not to trash the stack redzone, we need to drop + %rsp by 128 before the hidden call, and restore afterwards. The + nastyness is that it is only by luck that the stack still appears + to be unwindable during the hidden call - since then the behaviour + of any routine using this macro does not match what the CFI data + says. Sigh. + + Why is this important? Imagine that a wrapper has a stack + allocated local, and passes to the hidden call, a pointer to it. + Because gcc does not know about the hidden call, it may allocate + that local in the redzone. Unfortunately the hidden call may then + trash it before it comes to use it. So we must step clear of the + redzone, for the duration of the hidden call, to make it safe. + + Probably the same problem afflicts the other redzone-style ABIs too + (ppc64-linux, ppc32-aix5, ppc64-aix5); but for those, the stack is + self describing (none of this CFI nonsense) so at least messing + with the stack pointer doesn't give a danger of non-unwindable + stack. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + "addq $128,%%rsp\n\t" \ + VALGRIND_CALL_NOREDIR_RAX \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $8, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $16, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $24, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $32, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $40, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 96(%%rax)\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $48, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_amd64_linux */ + +/* ------------------------ ppc32-linux ------------------------ */ + +#if defined(PLAT_ppc32_linux) + +/* This is useful for finding out about the on-stack stuff: + + extern int f9 ( int,int,int,int,int,int,int,int,int ); + extern int f10 ( int,int,int,int,int,int,int,int,int,int ); + extern int f11 ( int,int,int,int,int,int,int,int,int,int,int ); + extern int f12 ( int,int,int,int,int,int,int,int,int,int,int,int ); + + int g9 ( void ) { + return f9(11,22,33,44,55,66,77,88,99); + } + int g10 ( void ) { + return f10(11,22,33,44,55,66,77,88,99,110); + } + int g11 ( void ) { + return f11(11,22,33,44,55,66,77,88,99,110,121); + } + int g12 ( void ) { + return f12(11,22,33,44,55,66,77,88,99,110,121,132); + } +*/ + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* These CALL_FN_ macros assume that on ppc32-linux, + sizeof(unsigned long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,16\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,16\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,32\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + _argvec[12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" \ + /* arg12 */ \ + "lwz 3,48(11)\n\t" \ + "stw 3,20(1)\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,32\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc32_linux */ + +/* ------------------------ ppc64-linux ------------------------ */ + +#if defined(PLAT_ppc64_linux) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned + long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,128" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,128" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,144" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg12 */ \ + "ld 3,96(11)\n\t" \ + "std 3,136(1)\n\t" \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,144" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc64_linux */ + +/* ------------------------ ppc32-aix5 ------------------------- */ + +#if defined(PLAT_ppc32_aix5) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* Expand the stack frame, copying enough info that unwinding + still works. Trashes r3. */ + +#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \ + "addi 1,1,-" #_n_fr "\n\t" \ + "lwz 3," #_n_fr "(1)\n\t" \ + "stw 3,0(1)\n\t" + +#define VG_CONTRACT_FRAME_BY(_n_fr) \ + "addi 1,1," #_n_fr "\n\t" + +/* These CALL_FN_ macros assume that on ppc32-aix5, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(64) \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(64) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(64) \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,60(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(64) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(72) \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,64(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,60(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(72) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(72) \ + /* arg12 */ \ + "lwz 3,48(11)\n\t" \ + "stw 3,68(1)\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,64(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,60(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(72) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc32_aix5 */ + +/* ------------------------ ppc64-aix5 ------------------------- */ + +#if defined(PLAT_ppc64_aix5) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* Expand the stack frame, copying enough info that unwinding + still works. Trashes r3. */ + +#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \ + "addi 1,1,-" #_n_fr "\n\t" \ + "ld 3," #_n_fr "(1)\n\t" \ + "std 3,0(1)\n\t" + +#define VG_CONTRACT_FRAME_BY(_n_fr) \ + "addi 1,1," #_n_fr "\n\t" + +/* These CALL_FN_ macros assume that on ppc64-aix5, sizeof(unsigned + long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(128) \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(128) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(128) \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(128) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(144) \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(144) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(144) \ + /* arg12 */ \ + "ld 3,96(11)\n\t" \ + "std 3,136(1)\n\t" \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(144) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc64_aix5 */ + + +/* ------------------------------------------------------------------ */ +/* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS. */ +/* */ +/* ------------------------------------------------------------------ */ + +/* Some request codes. There are many more of these, but most are not + exposed to end-user view. These are the public ones, all of the + form 0x1000 + small_number. + + Core ones are in the range 0x00000000--0x0000ffff. The non-public + ones start at 0x2000. +*/ + +/* These macros are used by tools -- they must be public, but don't + embed them into other programs. */ +#define VG_USERREQ_TOOL_BASE(a,b) \ + ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16)) +#define VG_IS_TOOL_USERREQ(a, b, v) \ + (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000)) + +/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! + This enum comprises an ABI exported by Valgrind to programs + which use client requests. DO NOT CHANGE THE ORDER OF THESE + ENTRIES, NOR DELETE ANY -- add new ones at the end. */ +typedef + enum { VG_USERREQ__RUNNING_ON_VALGRIND = 0x1001, + VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002, + + /* These allow any function to be called from the simulated + CPU but run on the real CPU. Nb: the first arg passed to + the function is always the ThreadId of the running + thread! So CLIENT_CALL0 actually requires a 1 arg + function, etc. */ + VG_USERREQ__CLIENT_CALL0 = 0x1101, + VG_USERREQ__CLIENT_CALL1 = 0x1102, + VG_USERREQ__CLIENT_CALL2 = 0x1103, + VG_USERREQ__CLIENT_CALL3 = 0x1104, + + /* Can be useful in regression testing suites -- eg. can + send Valgrind's output to /dev/null and still count + errors. */ + VG_USERREQ__COUNT_ERRORS = 0x1201, + + /* These are useful and can be interpreted by any tool that + tracks malloc() et al, by using vg_replace_malloc.c. */ + VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301, + VG_USERREQ__FREELIKE_BLOCK = 0x1302, + /* Memory pool support. */ + VG_USERREQ__CREATE_MEMPOOL = 0x1303, + VG_USERREQ__DESTROY_MEMPOOL = 0x1304, + VG_USERREQ__MEMPOOL_ALLOC = 0x1305, + VG_USERREQ__MEMPOOL_FREE = 0x1306, + VG_USERREQ__MEMPOOL_TRIM = 0x1307, + VG_USERREQ__MOVE_MEMPOOL = 0x1308, + VG_USERREQ__MEMPOOL_CHANGE = 0x1309, + VG_USERREQ__MEMPOOL_EXISTS = 0x130a, + + /* Allow printfs to valgrind log. */ + VG_USERREQ__PRINTF = 0x1401, + VG_USERREQ__PRINTF_BACKTRACE = 0x1402, + + /* Stack support. */ + VG_USERREQ__STACK_REGISTER = 0x1501, + VG_USERREQ__STACK_DEREGISTER = 0x1502, + VG_USERREQ__STACK_CHANGE = 0x1503 + } Vg_ClientRequest; + +#if !defined(__GNUC__) +# define __extension__ /* */ +#endif + +/* Returns the number of Valgrinds this code is running under. That + is, 0 if running natively, 1 if running under Valgrind, 2 if + running under Valgrind which is running under another Valgrind, + etc. */ +#define RUNNING_ON_VALGRIND __extension__ \ + ({unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* if not */, \ + VG_USERREQ__RUNNING_ON_VALGRIND, \ + 0, 0, 0, 0, 0); \ + _qzz_res; \ + }) + + +/* Discard translation of code in the range [_qzz_addr .. _qzz_addr + + _qzz_len - 1]. Useful if you are debugging a JITter or some such, + since it provides a way to make sure valgrind will retranslate the + invalidated area. Returns no value. */ +#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__DISCARD_TRANSLATIONS, \ + _qzz_addr, _qzz_len, 0, 0, 0); \ + } + + +/* These requests are for getting Valgrind itself to print something. + Possibly with a backtrace. This is a really ugly hack. */ + +#if defined(NVALGRIND) + +# define VALGRIND_PRINTF(...) +# define VALGRIND_PRINTF_BACKTRACE(...) + +#else /* NVALGRIND */ + +/* Modern GCC will optimize the static routine out if unused, + and unused attribute will shut down warnings about it. */ +static int VALGRIND_PRINTF(const char *format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +static int +VALGRIND_PRINTF(const char *format, ...) +{ + unsigned long _qzz_res; + va_list vargs; + va_start(vargs, format); + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF, + (unsigned long)format, (unsigned long)vargs, + 0, 0, 0); + va_end(vargs); + return (int)_qzz_res; +} + +static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +static int +VALGRIND_PRINTF_BACKTRACE(const char *format, ...) +{ + unsigned long _qzz_res; + va_list vargs; + va_start(vargs, format); + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF_BACKTRACE, + (unsigned long)format, (unsigned long)vargs, + 0, 0, 0); + va_end(vargs); + return (int)_qzz_res; +} + +#endif /* NVALGRIND */ + + +/* These requests allow control to move from the simulated CPU to the + real CPU, calling an arbitary function. + + Note that the current ThreadId is inserted as the first argument. + So this call: + + VALGRIND_NON_SIMD_CALL2(f, arg1, arg2) + + requires f to have this signature: + + Word f(Word tid, Word arg1, Word arg2) + + where "Word" is a word-sized type. + + Note that these client requests are not entirely reliable. For example, + if you call a function with them that subsequently calls printf(), + there's a high chance Valgrind will crash. Generally, your prospects of + these working are made higher if the called function does not refer to + any global variables, and does not refer to any libc or other functions + (printf et al). Any kind of entanglement with libc or dynamic linking is + likely to have a bad outcome, for tricky reasons which we've grappled + with a lot in the past. +*/ +#define VALGRIND_NON_SIMD_CALL0(_qyy_fn) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL0, \ + _qyy_fn, \ + 0, 0, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL1, \ + _qyy_fn, \ + _qyy_arg1, 0, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL2, \ + _qyy_fn, \ + _qyy_arg1, _qyy_arg2, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL3, \ + _qyy_fn, \ + _qyy_arg1, _qyy_arg2, \ + _qyy_arg3, 0); \ + _qyy_res; \ + }) + + +/* Counts the number of errors that have been recorded by a tool. Nb: + the tool must record the errors with VG_(maybe_record_error)() or + VG_(unique_error)() for them to be counted. */ +#define VALGRIND_COUNT_ERRORS \ + __extension__ \ + ({unsigned int _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__COUNT_ERRORS, \ + 0, 0, 0, 0, 0); \ + _qyy_res; \ + }) + +/* Mark a block of memory as having been allocated by a malloc()-like + function. `addr' is the start of the usable block (ie. after any + redzone) `rzB' is redzone size if the allocator can apply redzones; + use '0' if not. Adding redzones makes it more likely Valgrind will spot + block overruns. `is_zeroed' indicates if the memory is zeroed, as it is + for calloc(). Put it immediately after the point where a block is + allocated. + + If you're using Memcheck: If you're allocating memory via superblocks, + and then handing out small chunks of each superblock, if you don't have + redzones on your small blocks, it's worth marking the superblock with + VALGRIND_MAKE_MEM_NOACCESS when it's created, so that block overruns are + detected. But if you can put redzones on, it's probably better to not do + this, so that messages for small overruns are described in terms of the + small block rather than the superblock (but if you have a big overrun + that skips over a redzone, you could miss an error this way). See + memcheck/tests/custom_alloc.c for an example. + + WARNING: if your allocator uses malloc() or 'new' to allocate + superblocks, rather than mmap() or brk(), this will not work properly -- + you'll likely get assertion failures during leak detection. This is + because Valgrind doesn't like seeing overlapping heap blocks. Sorry. + + Nb: block must be freed via a free()-like function specified + with VALGRIND_FREELIKE_BLOCK or mismatch errors will occur. */ +#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MALLOCLIKE_BLOCK, \ + addr, sizeB, rzB, is_zeroed, 0); \ + } + +/* Mark a block of memory as having been freed by a free()-like function. + `rzB' is redzone size; it must match that given to + VALGRIND_MALLOCLIKE_BLOCK. Memory not freed will be detected by the leak + checker. Put it immediately after the point where the block is freed. */ +#define VALGRIND_FREELIKE_BLOCK(addr, rzB) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__FREELIKE_BLOCK, \ + addr, rzB, 0, 0, 0); \ + } + +/* Create a memory pool. */ +#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__CREATE_MEMPOOL, \ + pool, rzB, is_zeroed, 0, 0); \ + } + +/* Destroy a memory pool. */ +#define VALGRIND_DESTROY_MEMPOOL(pool) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__DESTROY_MEMPOOL, \ + pool, 0, 0, 0, 0); \ + } + +/* Associate a piece of memory with a memory pool. */ +#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_ALLOC, \ + pool, addr, size, 0, 0); \ + } + +/* Disassociate a piece of memory from a memory pool. */ +#define VALGRIND_MEMPOOL_FREE(pool, addr) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_FREE, \ + pool, addr, 0, 0, 0); \ + } + +/* Disassociate any pieces outside a particular range. */ +#define VALGRIND_MEMPOOL_TRIM(pool, addr, size) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_TRIM, \ + pool, addr, size, 0, 0); \ + } + +/* Resize and/or move a piece associated with a memory pool. */ +#define VALGRIND_MOVE_MEMPOOL(poolA, poolB) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MOVE_MEMPOOL, \ + poolA, poolB, 0, 0, 0); \ + } + +/* Resize and/or move a piece associated with a memory pool. */ +#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_CHANGE, \ + pool, addrA, addrB, size, 0); \ + } + +/* Return 1 if a mempool exists, else 0. */ +#define VALGRIND_MEMPOOL_EXISTS(pool) \ + ({unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_EXISTS, \ + pool, 0, 0, 0, 0); \ + _qzz_res; \ + }) + +/* Mark a piece of memory as being a stack. Returns a stack id. */ +#define VALGRIND_STACK_REGISTER(start, end) \ + ({unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__STACK_REGISTER, \ + start, end, 0, 0, 0); \ + _qzz_res; \ + }) + +/* Unmark the piece of memory associated with a stack id as being a + stack. */ +#define VALGRIND_STACK_DEREGISTER(id) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__STACK_DEREGISTER, \ + id, 0, 0, 0, 0); \ + } + +/* Change the start and end address of the stack id. */ +#define VALGRIND_STACK_CHANGE(id, start, end) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__STACK_CHANGE, \ + id, start, end, 0, 0); \ + } + + +#undef PLAT_x86_linux +#undef PLAT_amd64_linux +#undef PLAT_ppc32_linux +#undef PLAT_ppc64_linux +#undef PLAT_ppc32_aix5 +#undef PLAT_ppc64_aix5 + +#endif /* __VALGRIND_H */ diff --git a/src/windows/google/tcmalloc.h b/src/windows/google/tcmalloc.h index 4b97b15..663b7f9 100644 --- a/src/windows/google/tcmalloc.h +++ b/src/windows/google/tcmalloc.h @@ -61,7 +61,8 @@ #endif #ifdef __cplusplus -#include <new> // for nothrow_t +#include <new> // for std::nothrow_t + extern "C" { #endif // Returns a human-readable version string. If major, minor, @@ -92,16 +93,15 @@ extern "C" { #ifdef __cplusplus PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW; PERFTOOLS_DLL_DECL void* tc_new(size_t size); - PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW; - PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); - PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW; - PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW; - PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, - const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW; PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW; PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) __THROW; } diff --git a/src/windows/port.cc b/src/windows/port.cc index 76a9e38..bf3b106 100644 --- a/src/windows/port.cc +++ b/src/windows/port.cc @@ -71,7 +71,8 @@ int getpagesize() { if (pagesize == 0) { SYSTEM_INFO system_info; GetSystemInfo(&system_info); - pagesize = system_info.dwPageSize; + pagesize = std::max(system_info.dwPageSize, + system_info.dwAllocationGranularity); } return pagesize; } @@ -186,16 +187,16 @@ pthread_key_t PthreadKeyCreate(void (*destr_fn)(void*)) { // munmap's in the middle of the page, which is forbidden in windows. extern void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size, size_t alignment) { - // Safest is to make actual_size same as input-size. - if (actual_size) { - *actual_size = size; - } - // Align on the pagesize boundary const int pagesize = getpagesize(); if (alignment < pagesize) alignment = pagesize; size = ((size + alignment - 1) / alignment) * alignment; + // Safest is to make actual_size same as input-size. + if (actual_size) { + *actual_size = size; + } + // Ask for extra memory if alignment > pagesize size_t extra = 0; if (alignment > pagesize) { |