diff options
author | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2008-02-13 00:55:09 +0000 |
---|---|---|
committer | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2008-02-13 00:55:09 +0000 |
commit | 8a0a3101bc6a7d56ac04b278f28bdf3f95b00a3c (patch) | |
tree | 46f871a3160a4023201d72b1b04a9a88e3d88b78 | |
parent | b43ba444fcd74fa7c3260f6b2494dcbaa3fdb296 (diff) | |
download | gperftools-8a0a3101bc6a7d56ac04b278f28bdf3f95b00a3c.tar.gz |
Tue Feb 12 12:28:32 2008 Google Inc. <opensource@google.com>
* google-perftools: version 0.95 release
* Better -- not perfect -- support for linux-ppc (csilvers)
* Fix race condition in libunwind stacktrace (aruns)
* Speed up x86 spinlock locking (m3b)
* Improve heap-checker performance (maxim)
* Heap checker traverses more ptrs inside heap-alloced objects (maxim)
* Remove deprecated ProfilerThreadState function (cgd)
* Update libunwind documentation for statically linked binaries (aruns)
git-svn-id: http://gperftools.googlecode.com/svn/trunk@44 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
51 files changed, 3088 insertions, 1054 deletions
@@ -1,3 +1,14 @@ +Tue Feb 12 12:28:32 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.95 release + * Better -- not perfect -- support for linux-ppc (csilvers) + * Fix race condition in libunwind stacktrace (aruns) + * Speed up x86 spinlock locking (m3b) + * Improve heap-checker performance (maxim) + * Heap checker traverses more ptrs inside heap-alloced objects (maxim) + * Remove deprecated ProfilerThreadState function (cgd) + * Update libunwind documentation for statically linked binaries (aruns) + Mon Dec 3 23:51:54 2007 Google Inc. <opensource@google.com> * google-perftools: version 0.94.1 release (bugfix release) @@ -32,6 +32,12 @@ perftools: that is, if you link with 'gcc -static -lgcc_eh ...'. This is because both libunwind and libgcc implement the same C++ exception handling APIs, but they implement them differently on some platforms. This is not likely to be a problem on ia64, but may be on x86-64. + +Also, if you link binaries statically, make sure that you add +-Wl,--eh-frame-hdr to your linker options. This is required so that +libunwind can find the information generated by the compiler required +for stack unwinding. + Using -static is rare, though, so unless you know this will affect you it probably won't. @@ -63,6 +69,7 @@ Perftools has been tested on the following systems: Linux Ubuntu 6.06.1 (x86) Linux Ubuntu 6.06.1 (x86_64) Linux RedHat 9 (x86) + Linux Debian 4.0 (PPC) Mac OS X 10.3.9 (Panther) (PowerPC) Mac OS X 10.4.8 (Tiger) (PowerPC) Mac OS X 10.4.8 (Tiger) (x86) @@ -72,9 +79,9 @@ Perftools has been tested on the following systems: Windows XP, Visual Studio 2005 (VC++ 8) (x86) Windows XP, MinGW 5.1.3 (x86) -It works in its full generality on all the Linux systems tested, both -x86 and x86_64 (though see 64-bit notes above). Portions of perftools -work on the other systems. The basic memory-allocation library, +It works in its full generality on the Linux x86 and x86_64 systems +tested (though see 64-bit notes above). Portions of perftools work on +the other systems. The basic memory-allocation library, tcmalloc_minimal, works on all systems. The cpu-profiler also works fairly widely. However, the heap-profiler and heap-checker are not yet as widely supported. @@ -107,23 +114,33 @@ above, by linking in libtcmalloc_minimal. is working fine. This only affects programs that call fork(); for most programs, the cpu profiler is entirely safe to use. - libtcmalloc.so does not successfully build, so neither do these - unittests that depend on it: - % make -k heap-profiler_unittest.sh heap-checker_unittest.sh \ - heap-checker-death_unittest.sh maybe_threads_unittest.sh \ - tcmalloc_unittest tcmalloc_both_unittest \ - tcmalloc_large_unittest + libtcmalloc.so successfully builds, and the "advanced" tcmalloc + functionality all works except for the leak-checker, which has + Linux-specific code: + % make heap-profiler_unittest.sh maybe_threads_unittest.sh \ + tcmalloc_unittest tcmalloc_both_unittest \ + tcmalloc_large_unittest # THESE WORK + % make -k heap-checker_unittest.sh \ + heap-checker-death_unittest.sh # THESE DO NOT I have not tested other *BSD systems, but they are probably similar. +** Linux/PPC: + + I've tested on a PowerPC Linux box using qemu against Debian Etch + (4.0). Most of the tests pass. The heap-checker unittest does not + pass for reasons which are not yet clear but seem to be related to + the clone() system call. Heap checking may work properly for + single-threaded programs, though I haven't tested that. + ** Mac OS X: - I've tested OS X 10.4 [Tiger] and OS X 10.3 [Panther] on both intel - (x86) and PowerPC systems. For Panther/ppc systems, perftools does - not work at all: it depends on a header file, OSAtomic.h, which is - new in 10.4. + I've tested OS X 10.5 [Leopard], OS X 10.4 [Tiger] and OS X 10.3 + [Panther] on both intel (x86) and PowerPC systems. For Panther/ppc + systems, perftools does not work at all: it depends on a header + file, OSAtomic.h, which is new in 10.4. - For the other three systems, the binaries and libraries that + For the other seven systems, the binaries and libraries that successfully build are exactly the same as for FreeBSD. See that section for a list of binaries and instructions on building them. diff --git a/Makefile.am b/Makefile.am index 9dd93f1..6ac4748 100644 --- a/Makefile.am +++ b/Makefile.am @@ -20,7 +20,7 @@ endif # These are x86-specific, having to do with frame-pointers if X86_64 -if ENABLE_FRAME_POINTER +if ENABLE_FRAME_POINTERS AM_CXXFLAGS += -fno-omit-frame-pointer else # TODO(csilvers): check if -fomit-frame-pointer might be in $(CXXFLAGS), @@ -86,6 +86,15 @@ noinst_LTLIBRARIES += liblogging.la liblogging_la_SOURCES = src/base/logging.cc \ $(LOGGING_INCLUDES) +SYSINFO_INCLUDES = src/base/sysinfo.h \ + src/base/logging.h \ + src/base/commandlineflags.h \ + src/base/cycleclock.h \ + src/base/basictypes.h +noinst_LTLIBRARIES += libsysinfo.la +libsysinfo_la_SOURCES = src/base/sysinfo.cc \ + $(SYSINFO_INCLUDES) + # For MinGW, we use libwindows and not libspinlock. For every other # unix system, we use libspinlock and don't need libwindows. Luckily, # we need the windows.a library in exactly the same place we need @@ -108,7 +117,7 @@ libwindows_la_SOURCES = $(WINDOWS_INCLUDES) \ src/windows/patch_functions.cc \ src/windows/preamble_patcher.cc \ src/windows/preamble_patcher_with_stub.cc -LIBSPINLOCK = libwindows.la +LIBSPINLOCK = libwindows.la libsysinfo.la liblogging.la MAYBE_THREADS_CC = SYSTEM_ALLOC_CC = @@ -117,6 +126,7 @@ else SPINLOCK_INCLUDES = src/base/spinlock.h \ src/base/atomicops.h \ src/base/atomicops-internals-macosx.h \ + src/base/atomicops-internals-linuxppc.h \ src/base/atomicops-internals-x86-msvc.h \ src/base/atomicops-internals-x86.h @@ -124,7 +134,8 @@ noinst_LTLIBRARIES += libspinlock.la libspinlock_la_SOURCES = src/base/spinlock.cc \ src/base/atomicops-internals-x86.cc \ $(SPINLOCK_INCLUDES) -LIBSPINLOCK = libspinlock.la +# spinlock also needs NumCPUs, from libsysinfo, which in turn needs liblogging +LIBSPINLOCK = libspinlock.la libsysinfo.la liblogging.la MAYBE_THREADS_CC = src/maybe_threads.cc SYSTEM_ALLOC_CC = src/system-alloc.cc @@ -142,7 +153,7 @@ low_level_alloc_unittest_SOURCES = src/base/low_level_alloc.cc \ src/malloc_hook.cc \ src/tests/low_level_alloc_unittest.cc \ $(LOW_LEVEL_ALLOC_UNITTEST_INCLUDES) -low_level_alloc_unittest_LDADD = $(LIBSPINLOCK) liblogging.la libstacktrace.la +low_level_alloc_unittest_LDADD = $(LIBSPINLOCK) libstacktrace.la if !MINGW TESTS += atomicops_unittest @@ -153,7 +164,7 @@ ATOMICOPS_UNITTEST_INCLUDES = src/base/atomicops.h \ $(LOGGING_INCLUDES) atomicops_unittest_SOURCES = src/tests/atomicops_unittest.cc \ $(ATOMICOPS_UNITTEST_INCLUDES) -atomicops_unittest_LDADD = $(LIBSPINLOCK) liblogging.la +atomicops_unittest_LDADD = $(LIBSPINLOCK) endif !MINGW @@ -246,7 +257,7 @@ libtcmalloc_minimal_la_SOURCES = src/internal_logging.cc \ libtcmalloc_minimal_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) libtcmalloc_minimal_la_LIBADD = $(PTHREAD_LIBS) \ - libstacktrace.la $(LIBSPINLOCK) liblogging.la + libstacktrace.la $(LIBSPINLOCK) # Whenever we link in tcmalloc_minimal, we also need to link in # libstacktrace.so (we also need libspinlock and liblogging, but those @@ -478,14 +489,13 @@ libtcmalloc_la_SOURCES = src/internal_logging.cc \ src/heap-checker.cc \ src/base/linuxthreads.c \ src/base/thread_lister.c \ - src/base/sysinfo.cc \ src/base/low_level_alloc.cc \ $(TCMALLOC_INCLUDES) \ src/heap-checker-bcad.cc libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS) libtcmalloc_la_LIBADD = $(PTHREAD_LIBS) \ - libstacktrace.la $(LIBSPINLOCK) liblogging.la + libstacktrace.la $(LIBSPINLOCK) # See discussion above (under LIBTCMALLOC_MINIMAL) for why we do this. # Basically it's to work around systems where --rpath doesn't work right. @@ -595,7 +605,7 @@ S_CPU_PROFILER_INCLUDES = src/profiledata.h \ src/base/commandlineflags.h \ src/base/googleinit.h \ src/base/logging.h \ - src/base/mutex.h \ + src/base/simple_mutex.h \ src/base/sysinfo.h \ $(SPINLOCK_INCLUDES) \ $(LOGGING_INCLUDES) @@ -608,11 +618,10 @@ googleinclude_HEADERS += $(SG_CPU_PROFILER_INCLUDES) lib_LTLIBRARIES += libprofiler.la libprofiler_la_SOURCES = src/profiler.cc \ src/profiledata.cc \ - src/base/sysinfo.cc \ $(CPU_PROFILER_INCLUDES) -libprofiler_la_LIBADD = $(LIBSPINLOCK) liblogging.la libstacktrace.la +libprofiler_la_LIBADD = $(LIBSPINLOCK) libstacktrace.la # We have to include ProfileData for profiledata_unittest -CPU_PROFILER_SYMBOLS = '(ProfilerStart|ProfilerStop|ProfilerEnable|ProfilerDisable|ProfilerFlush|ProfilerRegisterThread|ProfilerThreadState|ProfileData)' +CPU_PROFILER_SYMBOLS = '(ProfilerStart|ProfilerStop|ProfilerEnable|ProfilerDisable|ProfilerFlush|ProfilerRegisterThread|ProfileData)' libprofiler_la_LDFLAGS = -export-symbols-regex $(CPU_PROFILER_SYMBOLS) # See discussion above (under LIBTCMALLOC_MINIMAL) for why we do this. @@ -631,7 +640,7 @@ profiledata_unittest_SOURCES = src/tests/profiledata_unittest.cc \ src/base/commandlineflags.h \ src/base/logging.h \ src/base/basictypes.h -profiledata_unittest_LDADD = -lprofiler +profiledata_unittest_LDADD = $(LIBPROFILER) TESTS += profiler_unittest.sh profiler_unittest_sh_SOURCES = src/tests/profiler_unittest.sh @@ -672,6 +681,7 @@ profiler4_unittest_DEPENDENCIES = $(LIBPROFILER) ### Documentation dist_doc_DATA += doc/cpuprofile.html \ + doc/cpuprofile-fileformat.html \ doc/pprof-test-big.gif \ doc/pprof-test.gif \ doc/pprof-vsnprintf-big.gif \ diff --git a/Makefile.in b/Makefile.in index 0c0e850..f47672a 100644 --- a/Makefile.in +++ b/Makefile.in @@ -50,8 +50,8 @@ target_triplet = @target@ @GCC_TRUE@am__append_1 = -Wall -Wwrite-strings -Woverloaded-virtual -Wno-sign-compare # These are x86-specific, having to do with frame-pointers -@ENABLE_FRAME_POINTER_TRUE@@X86_64_TRUE@am__append_2 = -fno-omit-frame-pointer -@ENABLE_FRAME_POINTER_FALSE@@X86_64_TRUE@am__append_3 = -DNO_FRAME_POINTER +@ENABLE_FRAME_POINTERS_TRUE@@X86_64_TRUE@am__append_2 = -fno-omit-frame-pointer +@ENABLE_FRAME_POINTERS_FALSE@@X86_64_TRUE@am__append_3 = -DNO_FRAME_POINTER noinst_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_5) @MINGW_TRUE@am__append_4 = libwindows.la @MINGW_FALSE@am__append_5 = libspinlock.la @@ -105,8 +105,10 @@ EXTRA_PROGRAMS = ptmalloc_unittest1$(EXEEXT) \ ### Documentation @MINGW_FALSE@am__append_15 = doc/heapprofile.html \ @MINGW_FALSE@ doc/heap-example1.png doc/heap_checker.html \ -@MINGW_FALSE@ doc/cpuprofile.html doc/pprof-test-big.gif \ -@MINGW_FALSE@ doc/pprof-test.gif doc/pprof-vsnprintf-big.gif \ +@MINGW_FALSE@ doc/cpuprofile.html \ +@MINGW_FALSE@ doc/cpuprofile-fileformat.html \ +@MINGW_FALSE@ doc/pprof-test-big.gif doc/pprof-test.gif \ +@MINGW_FALSE@ doc/pprof-vsnprintf-big.gif \ @MINGW_FALSE@ doc/pprof-vsnprintf.gif @MINGW_TRUE@profiler2_unittest_DEPENDENCIES = DIST_COMMON = README $(am__configure_deps) $(am__dist_doc_DATA_DIST) \ @@ -147,29 +149,33 @@ liblogging_la_LIBADD = am__objects_1 = am_liblogging_la_OBJECTS = logging.lo $(am__objects_1) liblogging_la_OBJECTS = $(am_liblogging_la_OBJECTS) -@MINGW_FALSE@am__DEPENDENCIES_1 = libspinlock.la -@MINGW_TRUE@am__DEPENDENCIES_1 = libwindows.la +@MINGW_FALSE@am__DEPENDENCIES_1 = libspinlock.la libsysinfo.la \ +@MINGW_FALSE@ liblogging.la +@MINGW_TRUE@am__DEPENDENCIES_1 = libwindows.la libsysinfo.la \ +@MINGW_TRUE@ liblogging.la @MINGW_FALSE@libprofiler_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \ -@MINGW_FALSE@ liblogging.la libstacktrace.la +@MINGW_FALSE@ libstacktrace.la am__libprofiler_la_SOURCES_DIST = src/profiler.cc src/profiledata.cc \ - src/base/sysinfo.cc src/profiledata.h src/getpc.h \ - src/base/basictypes.h src/base/commandlineflags.h \ - src/base/googleinit.h src/base/logging.h src/base/mutex.h \ - src/base/sysinfo.h src/base/spinlock.h src/base/atomicops.h \ + src/profiledata.h src/getpc.h src/base/basictypes.h \ + src/base/commandlineflags.h src/base/googleinit.h \ + src/base/logging.h src/base/simple_mutex.h src/base/sysinfo.h \ + src/base/spinlock.h src/base/atomicops.h \ src/base/atomicops-internals-macosx.h \ + src/base/atomicops-internals-linuxppc.h \ src/base/atomicops-internals-x86-msvc.h \ src/base/atomicops-internals-x86.h src/google/profiler.h \ src/google/stacktrace.h @MINGW_FALSE@am__objects_2 = $(am__objects_1) $(am__objects_1) @MINGW_FALSE@am__objects_3 = $(am__objects_2) $(am__objects_1) @MINGW_FALSE@am_libprofiler_la_OBJECTS = profiler.lo profiledata.lo \ -@MINGW_FALSE@ sysinfo.lo $(am__objects_3) +@MINGW_FALSE@ $(am__objects_3) libprofiler_la_OBJECTS = $(am_libprofiler_la_OBJECTS) @MINGW_FALSE@am_libprofiler_la_rpath = -rpath $(libdir) libspinlock_la_LIBADD = am__libspinlock_la_SOURCES_DIST = src/base/spinlock.cc \ src/base/atomicops-internals-x86.cc src/base/spinlock.h \ src/base/atomicops.h src/base/atomicops-internals-macosx.h \ + src/base/atomicops-internals-linuxppc.h \ src/base/atomicops-internals-x86-msvc.h \ src/base/atomicops-internals-x86.h @MINGW_FALSE@am_libspinlock_la_OBJECTS = spinlock.lo \ @@ -182,26 +188,28 @@ libstacktrace_la_DEPENDENCIES = $(am__DEPENDENCIES_2) \ am__objects_4 = $(am__objects_1) $(am__objects_1) am_libstacktrace_la_OBJECTS = stacktrace.lo $(am__objects_4) libstacktrace_la_OBJECTS = $(am_libstacktrace_la_OBJECTS) +libsysinfo_la_LIBADD = +am_libsysinfo_la_OBJECTS = sysinfo.lo $(am__objects_1) +libsysinfo_la_OBJECTS = $(am_libsysinfo_la_OBJECTS) @MINGW_FALSE@libtcmalloc_la_DEPENDENCIES = $(am__DEPENDENCIES_2) \ -@MINGW_FALSE@ libstacktrace.la $(am__DEPENDENCIES_1) \ -@MINGW_FALSE@ liblogging.la +@MINGW_FALSE@ libstacktrace.la $(am__DEPENDENCIES_1) am__libtcmalloc_la_SOURCES_DIST = src/internal_logging.cc \ src/system-alloc.cc src/memfs_malloc.cc src/tcmalloc.cc \ src/malloc_hook.cc src/malloc_extension.cc \ src/maybe_threads.cc src/memory_region_map.cc \ src/heap-profiler.cc src/heap-profile-table.cc \ src/heap-checker.cc src/base/linuxthreads.c \ - src/base/thread_lister.c src/base/sysinfo.cc \ - src/base/low_level_alloc.cc src/internal_logging.h \ - src/system-alloc.h src/pagemap.h src/addressmap-inl.h \ - src/packed-cache-inl.h src/heap-profile-table.h \ - src/base/basictypes.h src/base/commandlineflags.h \ - src/base/googleinit.h src/base/elfcore.h \ - src/base/linux_syscall_support.h src/base/linuxthreads.h \ - src/base/thread_lister.h src/base/sysinfo.h \ - src/base/stl_allocator.h src/maybe_threads.h \ - src/base/spinlock.h src/base/atomicops.h \ + src/base/thread_lister.c src/base/low_level_alloc.cc \ + src/internal_logging.h src/system-alloc.h src/pagemap.h \ + src/addressmap-inl.h src/packed-cache-inl.h \ + src/heap-profile-table.h src/base/basictypes.h \ + src/base/commandlineflags.h src/base/googleinit.h \ + src/base/elfcore.h src/base/linux_syscall_support.h \ + src/base/linuxthreads.h src/base/thread_lister.h \ + src/base/sysinfo.h src/base/stl_allocator.h \ + src/maybe_threads.h src/base/spinlock.h src/base/atomicops.h \ src/base/atomicops-internals-macosx.h \ + src/base/atomicops-internals-linuxppc.h \ src/base/atomicops-internals-x86-msvc.h \ src/base/atomicops-internals-x86.h src/base/logging.h \ src/google/malloc_hook.h src/google/malloc_extension.h \ @@ -220,20 +228,21 @@ am__libtcmalloc_la_SOURCES_DIST = src/internal_logging.cc \ @MINGW_FALSE@ libtcmalloc_la-heap-profiler.lo \ @MINGW_FALSE@ libtcmalloc_la-heap-profile-table.lo \ @MINGW_FALSE@ libtcmalloc_la-heap-checker.lo linuxthreads.lo \ -@MINGW_FALSE@ thread_lister.lo libtcmalloc_la-sysinfo.lo \ +@MINGW_FALSE@ thread_lister.lo \ @MINGW_FALSE@ libtcmalloc_la-low_level_alloc.lo \ @MINGW_FALSE@ $(am__objects_3) \ @MINGW_FALSE@ libtcmalloc_la-heap-checker-bcad.lo libtcmalloc_la_OBJECTS = $(am_libtcmalloc_la_OBJECTS) @MINGW_FALSE@am_libtcmalloc_la_rpath = -rpath $(libdir) libtcmalloc_minimal_la_DEPENDENCIES = $(am__DEPENDENCIES_2) \ - libstacktrace.la $(am__DEPENDENCIES_1) liblogging.la + libstacktrace.la $(am__DEPENDENCIES_1) am__libtcmalloc_minimal_la_SOURCES_DIST = src/internal_logging.cc \ src/system-alloc.cc src/memfs_malloc.cc src/tcmalloc.cc \ src/malloc_hook.cc src/malloc_extension.cc \ src/maybe_threads.cc src/internal_logging.h src/system-alloc.h \ src/packed-cache-inl.h src/base/spinlock.h \ src/base/atomicops.h src/base/atomicops-internals-macosx.h \ + src/base/atomicops-internals-linuxppc.h \ src/base/atomicops-internals-x86-msvc.h \ src/base/atomicops-internals-x86.h src/base/commandlineflags.h \ src/base/basictypes.h src/pagemap.h src/maybe_threads.h \ @@ -306,8 +315,7 @@ am__atomicops_unittest_SOURCES_DIST = src/tests/atomicops_unittest.cc \ @MINGW_FALSE@am_atomicops_unittest_OBJECTS = \ @MINGW_FALSE@ atomicops_unittest.$(OBJEXT) $(am__objects_11) atomicops_unittest_OBJECTS = $(am_atomicops_unittest_OBJECTS) -@MINGW_FALSE@atomicops_unittest_DEPENDENCIES = $(am__DEPENDENCIES_1) \ -@MINGW_FALSE@ liblogging.la +@MINGW_FALSE@atomicops_unittest_DEPENDENCIES = $(am__DEPENDENCIES_1) am_frag_unittest_OBJECTS = frag_unittest-frag_unittest.$(OBJEXT) frag_unittest_OBJECTS = $(am_frag_unittest_OBJECTS) am__DEPENDENCIES_3 = libstacktrace.la libtcmalloc_minimal.la @@ -361,6 +369,7 @@ am__low_level_alloc_unittest_SOURCES_DIST = \ src/base/low_level_alloc.h src/base/basictypes.h \ src/google/malloc_hook.h src/base/spinlock.h \ src/base/atomicops.h src/base/atomicops-internals-macosx.h \ + src/base/atomicops-internals-linuxppc.h \ src/base/atomicops-internals-x86-msvc.h \ src/base/atomicops-internals-x86.h src/base/logging.h \ src/base/commandlineflags.h @@ -370,7 +379,7 @@ am_low_level_alloc_unittest_OBJECTS = low_level_alloc.$(OBJEXT) \ low_level_alloc_unittest_OBJECTS = \ $(am_low_level_alloc_unittest_OBJECTS) low_level_alloc_unittest_DEPENDENCIES = $(am__DEPENDENCIES_1) \ - liblogging.la libstacktrace.la + libstacktrace.la am_markidle_unittest_OBJECTS = \ markidle_unittest-markidle_unittest.$(OBJEXT) \ markidle_unittest-testutil.$(OBJEXT) @@ -396,7 +405,9 @@ am__profiledata_unittest_SOURCES_DIST = \ @MINGW_FALSE@am_profiledata_unittest_OBJECTS = \ @MINGW_FALSE@ profiledata_unittest.$(OBJEXT) profiledata_unittest_OBJECTS = $(am_profiledata_unittest_OBJECTS) -profiledata_unittest_DEPENDENCIES = +@MINGW_FALSE@am__DEPENDENCIES_5 = libstacktrace.la libprofiler.la +@MINGW_FALSE@profiledata_unittest_DEPENDENCIES = \ +@MINGW_FALSE@ $(am__DEPENDENCIES_5) am__profiler1_unittest_SOURCES_DIST = src/tests/profiler_unittest.cc \ src/tests/testutil.h src/tests/testutil.cc \ src/config_for_unittests.h src/google/profiler.h @@ -406,7 +417,6 @@ am__profiler1_unittest_SOURCES_DIST = src/tests/profiler_unittest.cc \ @MINGW_FALSE@ $(am__objects_1) @MINGW_FALSE@am_profiler1_unittest_OBJECTS = $(am__objects_12) profiler1_unittest_OBJECTS = $(am_profiler1_unittest_OBJECTS) -@MINGW_FALSE@am__DEPENDENCIES_5 = libstacktrace.la libprofiler.la @MINGW_FALSE@profiler1_unittest_DEPENDENCIES = $(am__DEPENDENCIES_5) am__profiler2_unittest_SOURCES_DIST = src/tests/profiler_unittest.cc \ src/tests/testutil.h src/tests/testutil.cc \ @@ -535,10 +545,10 @@ CXXLINK = $(LIBTOOL) --tag=CXX --mode=link $(CXXLD) $(AM_CXXFLAGS) \ $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ SOURCES = $(liblogging_la_SOURCES) $(libprofiler_la_SOURCES) \ $(libspinlock_la_SOURCES) $(libstacktrace_la_SOURCES) \ - $(libtcmalloc_la_SOURCES) $(libtcmalloc_minimal_la_SOURCES) \ - $(libwindows_la_SOURCES) $(addressmap_unittest_SOURCES) \ - $(atomicops_unittest_SOURCES) $(frag_unittest_SOURCES) \ - $(getpc_test_SOURCES) \ + $(libsysinfo_la_SOURCES) $(libtcmalloc_la_SOURCES) \ + $(libtcmalloc_minimal_la_SOURCES) $(libwindows_la_SOURCES) \ + $(addressmap_unittest_SOURCES) $(atomicops_unittest_SOURCES) \ + $(frag_unittest_SOURCES) $(getpc_test_SOURCES) \ $(heap_checker_death_unittest_sh_SOURCES) \ $(heap_checker_unittest_SOURCES) \ $(heap_checker_unittest_sh_SOURCES) \ @@ -563,7 +573,7 @@ SOURCES = $(liblogging_la_SOURCES) $(libprofiler_la_SOURCES) \ DIST_SOURCES = $(liblogging_la_SOURCES) \ $(am__libprofiler_la_SOURCES_DIST) \ $(am__libspinlock_la_SOURCES_DIST) $(libstacktrace_la_SOURCES) \ - $(am__libtcmalloc_la_SOURCES_DIST) \ + $(libsysinfo_la_SOURCES) $(am__libtcmalloc_la_SOURCES_DIST) \ $(am__libtcmalloc_minimal_la_SOURCES_DIST) \ $(am__libwindows_la_SOURCES_DIST) \ $(addressmap_unittest_SOURCES) \ @@ -625,8 +635,9 @@ am__dist_doc_DATA_DIST = AUTHORS COPYING ChangeLog INSTALL NEWS README \ doc/pageheap.dot doc/spanmap.dot doc/threadheap.dot \ doc/heapprofile.html doc/heap-example1.png \ doc/heap_checker.html doc/cpuprofile.html \ - doc/pprof-test-big.gif doc/pprof-test.gif \ - doc/pprof-vsnprintf-big.gif doc/pprof-vsnprintf.gif + doc/cpuprofile-fileformat.html doc/pprof-test-big.gif \ + doc/pprof-test.gif doc/pprof-vsnprintf-big.gif \ + doc/pprof-vsnprintf.gif dist_docDATA_INSTALL = $(INSTALL_DATA) DATA = $(dist_doc_DATA) am__googleinclude_HEADERS_DIST = src/google/stacktrace.h \ @@ -674,8 +685,8 @@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ -ENABLE_FRAME_POINTER_FALSE = @ENABLE_FRAME_POINTER_FALSE@ -ENABLE_FRAME_POINTER_TRUE = @ENABLE_FRAME_POINTER_TRUE@ +ENABLE_FRAME_POINTERS_FALSE = @ENABLE_FRAME_POINTERS_FALSE@ +ENABLE_FRAME_POINTERS_TRUE = @ENABLE_FRAME_POINTERS_TRUE@ EXEEXT = @EXEEXT@ F77 = @F77@ FFLAGS = @FFLAGS@ @@ -830,7 +841,8 @@ dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README \ lib_LTLIBRARIES = libstacktrace.la libtcmalloc_minimal.la \ $(am__append_11) # This is for 'convenience libraries' -- basically just a container for sources -noinst_LTLIBRARIES = liblogging.la $(am__append_4) $(am__append_5) +noinst_LTLIBRARIES = liblogging.la libsysinfo.la $(am__append_4) \ + $(am__append_5) WINDOWS_PROJECTS = google-perftools.sln \ vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj \ vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj \ @@ -889,6 +901,15 @@ LOGGING_INCLUDES = src/base/logging.h \ liblogging_la_SOURCES = src/base/logging.cc \ $(LOGGING_INCLUDES) +SYSINFO_INCLUDES = src/base/sysinfo.h \ + src/base/logging.h \ + src/base/commandlineflags.h \ + src/base/cycleclock.h \ + src/base/basictypes.h + +libsysinfo_la_SOURCES = src/base/sysinfo.cc \ + $(SYSINFO_INCLUDES) + # For MinGW, we use libwindows and not libspinlock. For every other # unix system, we use libspinlock and don't need libwindows. Luckily, @@ -912,8 +933,9 @@ liblogging_la_SOURCES = src/base/logging.cc \ @MINGW_TRUE@ src/windows/preamble_patcher.cc \ @MINGW_TRUE@ src/windows/preamble_patcher_with_stub.cc -@MINGW_FALSE@LIBSPINLOCK = libspinlock.la -@MINGW_TRUE@LIBSPINLOCK = libwindows.la +# spinlock also needs NumCPUs, from libsysinfo, which in turn needs liblogging +@MINGW_FALSE@LIBSPINLOCK = libspinlock.la libsysinfo.la liblogging.la +@MINGW_TRUE@LIBSPINLOCK = libwindows.la libsysinfo.la liblogging.la @MINGW_FALSE@MAYBE_THREADS_CC = src/maybe_threads.cc @MINGW_TRUE@MAYBE_THREADS_CC = @MINGW_FALSE@SYSTEM_ALLOC_CC = src/system-alloc.cc @@ -922,6 +944,7 @@ liblogging_la_SOURCES = src/base/logging.cc \ @MINGW_FALSE@SPINLOCK_INCLUDES = src/base/spinlock.h \ @MINGW_FALSE@ src/base/atomicops.h \ @MINGW_FALSE@ src/base/atomicops-internals-macosx.h \ +@MINGW_FALSE@ src/base/atomicops-internals-linuxppc.h \ @MINGW_FALSE@ src/base/atomicops-internals-x86-msvc.h \ @MINGW_FALSE@ src/base/atomicops-internals-x86.h @@ -940,7 +963,7 @@ low_level_alloc_unittest_SOURCES = src/base/low_level_alloc.cc \ src/tests/low_level_alloc_unittest.cc \ $(LOW_LEVEL_ALLOC_UNITTEST_INCLUDES) -low_level_alloc_unittest_LDADD = $(LIBSPINLOCK) liblogging.la libstacktrace.la +low_level_alloc_unittest_LDADD = $(LIBSPINLOCK) libstacktrace.la @MINGW_FALSE@ATOMICOPS_UNITTEST_INCLUDES = src/base/atomicops.h \ @MINGW_FALSE@ src/base/atomicops-internals-macosx.h \ @MINGW_FALSE@ src/base/atomicops-internals-x86-msvc.h \ @@ -950,7 +973,7 @@ low_level_alloc_unittest_LDADD = $(LIBSPINLOCK) liblogging.la libstacktrace.la @MINGW_FALSE@atomicops_unittest_SOURCES = src/tests/atomicops_unittest.cc \ @MINGW_FALSE@ $(ATOMICOPS_UNITTEST_INCLUDES) -@MINGW_FALSE@atomicops_unittest_LDADD = $(LIBSPINLOCK) liblogging.la +@MINGW_FALSE@atomicops_unittest_LDADD = $(LIBSPINLOCK) ### ------- stack trace @@ -1019,7 +1042,7 @@ libtcmalloc_minimal_la_SOURCES = src/internal_logging.cc \ libtcmalloc_minimal_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) libtcmalloc_minimal_la_LIBADD = $(PTHREAD_LIBS) \ - libstacktrace.la $(LIBSPINLOCK) liblogging.la + libstacktrace.la $(LIBSPINLOCK) # Whenever we link in tcmalloc_minimal, we also need to link in @@ -1155,7 +1178,6 @@ ptmalloc_unittest2_LDADD = $(PTHREAD_LIBS) @MINGW_FALSE@ src/heap-checker.cc \ @MINGW_FALSE@ src/base/linuxthreads.c \ @MINGW_FALSE@ src/base/thread_lister.c \ -@MINGW_FALSE@ src/base/sysinfo.cc \ @MINGW_FALSE@ src/base/low_level_alloc.cc \ @MINGW_FALSE@ $(TCMALLOC_INCLUDES) \ @MINGW_FALSE@ src/heap-checker-bcad.cc @@ -1163,7 +1185,7 @@ ptmalloc_unittest2_LDADD = $(PTHREAD_LIBS) @MINGW_FALSE@libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) @MINGW_FALSE@libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS) @MINGW_FALSE@libtcmalloc_la_LIBADD = $(PTHREAD_LIBS) \ -@MINGW_FALSE@ libstacktrace.la $(LIBSPINLOCK) liblogging.la +@MINGW_FALSE@ libstacktrace.la $(LIBSPINLOCK) # See discussion above (under LIBTCMALLOC_MINIMAL) for why we do this. @@ -1232,7 +1254,7 @@ ptmalloc_unittest2_LDADD = $(PTHREAD_LIBS) @MINGW_FALSE@ src/base/commandlineflags.h \ @MINGW_FALSE@ src/base/googleinit.h \ @MINGW_FALSE@ src/base/logging.h \ -@MINGW_FALSE@ src/base/mutex.h \ +@MINGW_FALSE@ src/base/simple_mutex.h \ @MINGW_FALSE@ src/base/sysinfo.h \ @MINGW_FALSE@ $(SPINLOCK_INCLUDES) \ @MINGW_FALSE@ $(LOGGING_INCLUDES) @@ -1243,12 +1265,11 @@ ptmalloc_unittest2_LDADD = $(PTHREAD_LIBS) @MINGW_FALSE@CPU_PROFILER_INCLUDES = $(S_CPU_PROFILER_INCLUDES) $(SG_CPU_PROFILER_INCLUDES) @MINGW_FALSE@libprofiler_la_SOURCES = src/profiler.cc \ @MINGW_FALSE@ src/profiledata.cc \ -@MINGW_FALSE@ src/base/sysinfo.cc \ @MINGW_FALSE@ $(CPU_PROFILER_INCLUDES) -@MINGW_FALSE@libprofiler_la_LIBADD = $(LIBSPINLOCK) liblogging.la libstacktrace.la +@MINGW_FALSE@libprofiler_la_LIBADD = $(LIBSPINLOCK) libstacktrace.la # We have to include ProfileData for profiledata_unittest -@MINGW_FALSE@CPU_PROFILER_SYMBOLS = '(ProfilerStart|ProfilerStop|ProfilerEnable|ProfilerDisable|ProfilerFlush|ProfilerRegisterThread|ProfilerThreadState|ProfileData)' +@MINGW_FALSE@CPU_PROFILER_SYMBOLS = '(ProfilerStart|ProfilerStop|ProfilerEnable|ProfilerDisable|ProfilerFlush|ProfilerRegisterThread|ProfileData)' @MINGW_FALSE@libprofiler_la_LDFLAGS = -export-symbols-regex $(CPU_PROFILER_SYMBOLS) # See discussion above (under LIBTCMALLOC_MINIMAL) for why we do this. @@ -1263,7 +1284,7 @@ ptmalloc_unittest2_LDADD = $(PTHREAD_LIBS) @MINGW_FALSE@ src/base/logging.h \ @MINGW_FALSE@ src/base/basictypes.h -@MINGW_FALSE@profiledata_unittest_LDADD = -lprofiler +@MINGW_FALSE@profiledata_unittest_LDADD = $(LIBPROFILER) @MINGW_FALSE@profiler_unittest_sh_SOURCES = src/tests/profiler_unittest.sh @MINGW_FALSE@PROFILER_UNITTEST_INCLUDES = src/config_for_unittests.h \ @MINGW_FALSE@ src/google/profiler.h @@ -1393,6 +1414,8 @@ libspinlock.la: $(libspinlock_la_OBJECTS) $(libspinlock_la_DEPENDENCIES) $(CXXLINK) $(am_libspinlock_la_rpath) $(libspinlock_la_LDFLAGS) $(libspinlock_la_OBJECTS) $(libspinlock_la_LIBADD) $(LIBS) libstacktrace.la: $(libstacktrace_la_OBJECTS) $(libstacktrace_la_DEPENDENCIES) $(CXXLINK) -rpath $(libdir) $(libstacktrace_la_LDFLAGS) $(libstacktrace_la_OBJECTS) $(libstacktrace_la_LIBADD) $(LIBS) +libsysinfo.la: $(libsysinfo_la_OBJECTS) $(libsysinfo_la_DEPENDENCIES) + $(CXXLINK) $(libsysinfo_la_LDFLAGS) $(libsysinfo_la_OBJECTS) $(libsysinfo_la_LIBADD) $(LIBS) libtcmalloc.la: $(libtcmalloc_la_OBJECTS) $(libtcmalloc_la_DEPENDENCIES) $(CXXLINK) $(am_libtcmalloc_la_rpath) $(libtcmalloc_la_LDFLAGS) $(libtcmalloc_la_OBJECTS) $(libtcmalloc_la_LIBADD) $(LIBS) libtcmalloc_minimal.la: $(libtcmalloc_minimal_la_OBJECTS) $(libtcmalloc_minimal_la_DEPENDENCIES) @@ -1539,7 +1562,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-maybe_threads.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-memfs_malloc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-memory_region_map.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-sysinfo.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-system-alloc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-tcmalloc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_minimal_la-internal_logging.Plo@am__quote@ @@ -1698,13 +1720,6 @@ profiledata.lo: src/profiledata.cc @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o profiledata.lo `test -f 'src/profiledata.cc' || echo '$(srcdir)/'`src/profiledata.cc -sysinfo.lo: src/base/sysinfo.cc -@am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT sysinfo.lo -MD -MP -MF "$(DEPDIR)/sysinfo.Tpo" -c -o sysinfo.lo `test -f 'src/base/sysinfo.cc' || echo '$(srcdir)/'`src/base/sysinfo.cc; \ -@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/sysinfo.Tpo" "$(DEPDIR)/sysinfo.Plo"; else rm -f "$(DEPDIR)/sysinfo.Tpo"; exit 1; fi -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='src/base/sysinfo.cc' object='sysinfo.lo' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o sysinfo.lo `test -f 'src/base/sysinfo.cc' || echo '$(srcdir)/'`src/base/sysinfo.cc - spinlock.lo: src/base/spinlock.cc @am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT spinlock.lo -MD -MP -MF "$(DEPDIR)/spinlock.Tpo" -c -o spinlock.lo `test -f 'src/base/spinlock.cc' || echo '$(srcdir)/'`src/base/spinlock.cc; \ @am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/spinlock.Tpo" "$(DEPDIR)/spinlock.Plo"; else rm -f "$(DEPDIR)/spinlock.Tpo"; exit 1; fi @@ -1726,6 +1741,13 @@ stacktrace.lo: src/stacktrace.cc @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o stacktrace.lo `test -f 'src/stacktrace.cc' || echo '$(srcdir)/'`src/stacktrace.cc +sysinfo.lo: src/base/sysinfo.cc +@am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT sysinfo.lo -MD -MP -MF "$(DEPDIR)/sysinfo.Tpo" -c -o sysinfo.lo `test -f 'src/base/sysinfo.cc' || echo '$(srcdir)/'`src/base/sysinfo.cc; \ +@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/sysinfo.Tpo" "$(DEPDIR)/sysinfo.Plo"; else rm -f "$(DEPDIR)/sysinfo.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='src/base/sysinfo.cc' object='sysinfo.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o sysinfo.lo `test -f 'src/base/sysinfo.cc' || echo '$(srcdir)/'`src/base/sysinfo.cc + libtcmalloc_la-internal_logging.lo: src/internal_logging.cc @am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libtcmalloc_la_CXXFLAGS) $(CXXFLAGS) -MT libtcmalloc_la-internal_logging.lo -MD -MP -MF "$(DEPDIR)/libtcmalloc_la-internal_logging.Tpo" -c -o libtcmalloc_la-internal_logging.lo `test -f 'src/internal_logging.cc' || echo '$(srcdir)/'`src/internal_logging.cc; \ @am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/libtcmalloc_la-internal_logging.Tpo" "$(DEPDIR)/libtcmalloc_la-internal_logging.Plo"; else rm -f "$(DEPDIR)/libtcmalloc_la-internal_logging.Tpo"; exit 1; fi @@ -1803,13 +1825,6 @@ libtcmalloc_la-heap-checker.lo: src/heap-checker.cc @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libtcmalloc_la_CXXFLAGS) $(CXXFLAGS) -c -o libtcmalloc_la-heap-checker.lo `test -f 'src/heap-checker.cc' || echo '$(srcdir)/'`src/heap-checker.cc -libtcmalloc_la-sysinfo.lo: src/base/sysinfo.cc -@am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libtcmalloc_la_CXXFLAGS) $(CXXFLAGS) -MT libtcmalloc_la-sysinfo.lo -MD -MP -MF "$(DEPDIR)/libtcmalloc_la-sysinfo.Tpo" -c -o libtcmalloc_la-sysinfo.lo `test -f 'src/base/sysinfo.cc' || echo '$(srcdir)/'`src/base/sysinfo.cc; \ -@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/libtcmalloc_la-sysinfo.Tpo" "$(DEPDIR)/libtcmalloc_la-sysinfo.Plo"; else rm -f "$(DEPDIR)/libtcmalloc_la-sysinfo.Tpo"; exit 1; fi -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='src/base/sysinfo.cc' object='libtcmalloc_la-sysinfo.lo' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libtcmalloc_la_CXXFLAGS) $(CXXFLAGS) -c -o libtcmalloc_la-sysinfo.lo `test -f 'src/base/sysinfo.cc' || echo '$(srcdir)/'`src/base/sysinfo.cc - libtcmalloc_la-low_level_alloc.lo: src/base/low_level_alloc.cc @am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libtcmalloc_la_CXXFLAGS) $(CXXFLAGS) -MT libtcmalloc_la-low_level_alloc.lo -MD -MP -MF "$(DEPDIR)/libtcmalloc_la-low_level_alloc.Tpo" -c -o libtcmalloc_la-low_level_alloc.lo `test -f 'src/base/low_level_alloc.cc' || echo '$(srcdir)/'`src/base/low_level_alloc.cc; \ @am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/libtcmalloc_la-low_level_alloc.Tpo" "$(DEPDIR)/libtcmalloc_la-low_level_alloc.Plo"; else rm -f "$(DEPDIR)/libtcmalloc_la-low_level_alloc.Tpo"; exit 1; fi @@ -2852,8 +2867,8 @@ uninstall-man: uninstall-man1 uninstall-info-am uninstall-libLTLIBRARIES uninstall-man \ uninstall-man1 -@ENABLE_FRAME_POINTER_FALSE@@X86_64_TRUE@ # TODO(csilvers): check if -fomit-frame-pointer might be in $(CXXFLAGS), -@ENABLE_FRAME_POINTER_FALSE@@X86_64_TRUE@ # before setting this. +@ENABLE_FRAME_POINTERS_FALSE@@X86_64_TRUE@ # TODO(csilvers): check if -fomit-frame-pointer might be in $(CXXFLAGS), +@ENABLE_FRAME_POINTERS_FALSE@@X86_64_TRUE@ # before setting this. @MINGW_FALSE@pprof_unittest: $(top_srcdir)/src/pprof @MINGW_FALSE@ $(top_srcdir)/src/pprof -test # This script preloads libtcmalloc, and calls two other binaries as well @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.59 for google-perftools 0.94.1. +# Generated by GNU Autoconf 2.59 for google-perftools 0.95. # # Report bugs to <opensource@google.com>. # @@ -423,8 +423,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='google-perftools' PACKAGE_TARNAME='google-perftools' -PACKAGE_VERSION='0.94.1' -PACKAGE_STRING='google-perftools 0.94.1' +PACKAGE_VERSION='0.95' +PACKAGE_STRING='google-perftools 0.95' PACKAGE_BUGREPORT='opensource@google.com' ac_unique_file="README" @@ -465,7 +465,7 @@ ac_includes_default="\ # include <unistd.h> #endif" -ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA CYGPATH_W PACKAGE VERSION ACLOCAL AUTOCONF AUTOMAKE AUTOHEADER MAKEINFO install_sh STRIP ac_ct_STRIP INSTALL_STRIP_PROGRAM mkdir_p AWK SET_MAKE am__leading_dot AMTAR am__tar am__untar CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT DEPDIR am__include am__quote AMDEP_TRUE AMDEP_FALSE AMDEPBACKSLASH CCDEPMODE am__fastdepCC_TRUE am__fastdepCC_FALSE CPP CXX CXXFLAGS ac_ct_CXX CXXDEPMODE am__fastdepCXX_TRUE am__fastdepCXX_FALSE GCC_TRUE GCC_FALSE EGREP LN_S ECHO AR ac_ct_AR RANLIB ac_ct_RANLIB CXXCPP F77 FFLAGS ac_ct_F77 LIBTOOL LIBTOOL_DEPS UNWIND_LIBS ENABLE_FRAME_POINTER_TRUE ENABLE_FRAME_POINTER_FALSE X86_64_TRUE X86_64_FALSE acx_pthread_config PTHREAD_CC PTHREAD_LIBS PTHREAD_CFLAGS MINGW_TRUE MINGW_FALSE LIBOBJS LTLIBOBJS' +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA CYGPATH_W PACKAGE VERSION ACLOCAL AUTOCONF AUTOMAKE AUTOHEADER MAKEINFO install_sh STRIP ac_ct_STRIP INSTALL_STRIP_PROGRAM mkdir_p AWK SET_MAKE am__leading_dot AMTAR am__tar am__untar CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT DEPDIR am__include am__quote AMDEP_TRUE AMDEP_FALSE AMDEPBACKSLASH CCDEPMODE am__fastdepCC_TRUE am__fastdepCC_FALSE CPP CXX CXXFLAGS ac_ct_CXX CXXDEPMODE am__fastdepCXX_TRUE am__fastdepCXX_FALSE GCC_TRUE GCC_FALSE EGREP LN_S ECHO AR ac_ct_AR RANLIB ac_ct_RANLIB CXXCPP F77 FFLAGS ac_ct_F77 LIBTOOL LIBTOOL_DEPS UNWIND_LIBS ENABLE_FRAME_POINTERS_TRUE ENABLE_FRAME_POINTERS_FALSE X86_64_TRUE X86_64_FALSE acx_pthread_config PTHREAD_CC PTHREAD_LIBS PTHREAD_CFLAGS MINGW_TRUE MINGW_FALSE LIBOBJS LTLIBOBJS' ac_subst_files='' # Initialize some variables set by options. @@ -954,7 +954,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures google-perftools 0.94.1 to adapt to many kinds of systems. +\`configure' configures google-perftools 0.95 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1021,7 +1021,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of google-perftools 0.94.1:";; + short | recursive ) echo "Configuration of google-perftools 0.95:";; esac cat <<\_ACEOF @@ -1037,7 +1037,7 @@ Optional Features: --enable-fast-install[=PKGS] optimize for fast installation [default=yes] --disable-libtool-lock avoid locking (might break parallel builds) - --enable-frame-pointer On x86_64 systems, compile with + --enable-frame-pointers On x86_64 systems, compile with -fno-omit-frame-pointer (see INSTALL) Optional Packages: @@ -1162,7 +1162,7 @@ fi test -n "$ac_init_help" && exit 0 if $ac_init_version; then cat <<\_ACEOF -google-perftools configure 0.94.1 +google-perftools configure 0.95 generated by GNU Autoconf 2.59 Copyright (C) 2003 Free Software Foundation, Inc. @@ -1176,7 +1176,7 @@ cat >&5 <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by google-perftools $as_me 0.94.1, which was +It was created by google-perftools $as_me 0.95, which was generated by GNU Autoconf 2.59. Invocation command line was $ $0 $@ @@ -1904,7 +1904,7 @@ fi # Define the identity of the package. PACKAGE='google-perftools' - VERSION='0.94.1' + VERSION='0.95' cat >>confdefs.h <<_ACEOF @@ -22042,6 +22042,7 @@ pc_fields=" uc_mcontext.gregs[REG_PC]" # Solaris x86 (32 + 64 bit) pc_fields="$pc_fields uc_mcontext.gregs[REG_EIP]" # Linux (i386) pc_fields="$pc_fields uc_mcontext.gregs[REG_RIP]" # Linux (x86_64) pc_fields="$pc_fields uc_mcontext.sc_ip" # Linux (ia64) +pc_fields="$pc_fields uc_mcontext.uc_regs->gregs[PT_NIP]" # Linux (ppc) pc_fields="$pc_fields uc_mcontext.mc_eip" # FreeBSD (i386) pc_fields="$pc_fields uc_mcontext.mc_rip" # FreeBSD (x86_64 [untested]) pc_fields="$pc_fields uc_mcontext->ss.eip" # OS X (i386, <=10.4) @@ -22185,12 +22186,12 @@ fi # On x86_64, instead of libunwind, we can choose to compile with frame-pointers # (This isn't needed on i386, where -fno-omit-frame-pointer is the default). -# Check whether --enable-frame_pointer or --disable-frame_pointer was given. -if test "${enable_frame_pointer+set}" = set; then - enableval="$enable_frame_pointer" +# Check whether --enable-frame_pointers or --disable-frame_pointers was given. +if test "${enable_frame_pointers+set}" = set; then + enableval="$enable_frame_pointers" else - enable_frame_pointer=no + enable_frame_pointers=no fi; cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ @@ -22238,12 +22239,12 @@ fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -if test "$enable_frame_pointer" = yes; then - ENABLE_FRAME_POINTER_TRUE= - ENABLE_FRAME_POINTER_FALSE='#' +if test "$enable_frame_pointers" = yes; then + ENABLE_FRAME_POINTERS_TRUE= + ENABLE_FRAME_POINTERS_FALSE='#' else - ENABLE_FRAME_POINTER_TRUE='#' - ENABLE_FRAME_POINTER_FALSE= + ENABLE_FRAME_POINTERS_TRUE='#' + ENABLE_FRAME_POINTERS_FALSE= fi @@ -23799,10 +23800,10 @@ echo "$as_me: error: conditional \"GCC\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi -if test -z "${ENABLE_FRAME_POINTER_TRUE}" && test -z "${ENABLE_FRAME_POINTER_FALSE}"; then - { { echo "$as_me:$LINENO: error: conditional \"ENABLE_FRAME_POINTER\" was never defined. +if test -z "${ENABLE_FRAME_POINTERS_TRUE}" && test -z "${ENABLE_FRAME_POINTERS_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"ENABLE_FRAME_POINTERS\" was never defined. Usually this means the macro was only invoked conditionally." >&5 -echo "$as_me: error: conditional \"ENABLE_FRAME_POINTER\" was never defined. +echo "$as_me: error: conditional \"ENABLE_FRAME_POINTERS\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi @@ -24091,7 +24092,7 @@ _ASBOX } >&5 cat >&5 <<_CSEOF -This file was extended by google-perftools $as_me 0.94.1, which was +This file was extended by google-perftools $as_me 0.95, which was generated by GNU Autoconf 2.59. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -24154,7 +24155,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -google-perftools config.status 0.94.1 +google-perftools config.status 0.95 configured by $0, generated by GNU Autoconf 2.59, with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" @@ -24425,8 +24426,8 @@ s,@ac_ct_F77@,$ac_ct_F77,;t t s,@LIBTOOL@,$LIBTOOL,;t t s,@LIBTOOL_DEPS@,$LIBTOOL_DEPS,;t t s,@UNWIND_LIBS@,$UNWIND_LIBS,;t t -s,@ENABLE_FRAME_POINTER_TRUE@,$ENABLE_FRAME_POINTER_TRUE,;t t -s,@ENABLE_FRAME_POINTER_FALSE@,$ENABLE_FRAME_POINTER_FALSE,;t t +s,@ENABLE_FRAME_POINTERS_TRUE@,$ENABLE_FRAME_POINTERS_TRUE,;t t +s,@ENABLE_FRAME_POINTERS_FALSE@,$ENABLE_FRAME_POINTERS_FALSE,;t t s,@X86_64_TRUE@,$X86_64_TRUE,;t t s,@X86_64_FALSE@,$X86_64_FALSE,;t t s,@acx_pthread_config@,$acx_pthread_config,;t t diff --git a/configure.ac b/configure.ac index b008309..bcec57a 100644 --- a/configure.ac +++ b/configure.ac @@ -4,7 +4,7 @@ # make sure we're interpreted by some minimal autoconf AC_PREREQ(2.57) -AC_INIT(google-perftools, 0.94.1, opensource@google.com) +AC_INIT(google-perftools, 0.95, opensource@google.com) # The argument here is just something that should be in the current directory # (for sanity checking) AC_CONFIG_SRCDIR(README) @@ -75,6 +75,7 @@ pc_fields=" uc_mcontext.gregs[[REG_PC]]" # Solaris x86 (32 + 64 bit) pc_fields="$pc_fields uc_mcontext.gregs[[REG_EIP]]" # Linux (i386) pc_fields="$pc_fields uc_mcontext.gregs[[REG_RIP]]" # Linux (x86_64) pc_fields="$pc_fields uc_mcontext.sc_ip" # Linux (ia64) +pc_fields="$pc_fields uc_mcontext.uc_regs->gregs[[PT_NIP]]" # Linux (ppc) pc_fields="$pc_fields uc_mcontext.mc_eip" # FreeBSD (i386) pc_fields="$pc_fields uc_mcontext.mc_rip" # FreeBSD (x86_64 [untested]) pc_fields="$pc_fields uc_mcontext->ss.eip" # OS X (i386, <=10.4) @@ -105,13 +106,13 @@ AC_SUBST(UNWIND_LIBS) # On x86_64, instead of libunwind, we can choose to compile with frame-pointers # (This isn't needed on i386, where -fno-omit-frame-pointer is the default). -AC_ARG_ENABLE(frame_pointer, - AS_HELP_STRING([--enable-frame-pointer], +AC_ARG_ENABLE(frame_pointers, + AS_HELP_STRING([--enable-frame-pointers], [On x86_64 systems, compile with -fno-omit-frame-pointer (see INSTALL)]), - , enable_frame_pointer=no) + , enable_frame_pointers=no) AC_COMPILE_IFELSE([AC_LANG_PROGRAM(, [return __x86_64__ == 1 ? 0 : 1])], [is_x86_64=yes], [is_x86_64=no]) -AM_CONDITIONAL(ENABLE_FRAME_POINTER, test "$enable_frame_pointer" = yes) +AM_CONDITIONAL(ENABLE_FRAME_POINTERS, test "$enable_frame_pointers" = yes) AM_CONDITIONAL(X86_64, test "$is_x86_64" = yes) # Defines PRIuS diff --git a/doc/heap_checker.html b/doc/heap_checker.html index 1ee2668..0b84c67 100644 --- a/doc/heap_checker.html +++ b/doc/heap_checker.html @@ -182,7 +182,7 @@ referred to in this file, are declared in code that exercises some foo functionality; this code should preserve memory allocation state; } - if (!heap_checker.SameHeap()) assert("" == "heap memory leak"); + if (!heap_checker.SameHeap()) assert(NULL == "heap memory leak"); </pre> <p>The various flavors of these functions -- <code>SameHeap()</code>, diff --git a/doc/tcmalloc.html b/doc/tcmalloc.html index c399188..fd2d3cd 100644 --- a/doc/tcmalloc.html +++ b/doc/tcmalloc.html @@ -57,10 +57,10 @@ and ends up using <code>16N</code> bytes.</p> <h2>Usage</h2> -<p>To use TCmalloc, just link tcmalloc into your application via the +<p>To use TCMalloc, just link TCMalloc into your application via the "-ltcmalloc" linker flag.</p> -<p>You can use tcmalloc in applications you didn't compile yourself, +<p>You can use TCMalloc in applications you didn't compile yourself, by using LD_PRELOAD:</p> <pre> $ LD_PRELOAD="/usr/lib/libtcmalloc.so" <binary> @@ -86,7 +86,7 @@ needed, and periodic garbage collections are used to migrate memory back from a thread-local cache into the central data structures.</p> <center><img src="overview.gif"></center> -<p>TCMalloc treates objects with size <= 32K ("small" objects) +<p>TCMalloc treats objects with size <= 32K ("small" objects) differently from larger objects. Large objects are allocated directly from the central heap using a page-level allocator (a page is a 4K aligned region of memory). I.e., a large object is always @@ -104,7 +104,7 @@ size-classes. For example, all allocations in the range 961 to 1024 bytes are rounded up to 1024. The size-classes are spaced so that small sizes are separated by 8 bytes, larger sizes by 16 bytes, even larger sizes by 32 bytes, and so forth. The maximal spacing (for -sizes >= ~2K) is 256 bytes.</p> +sizes >= ~2K) is 256 bytes.</p> <p>A thread cache contains a singly linked list of free objects per size-class.</p> @@ -290,13 +290,13 @@ threads. The raw data used to generate these graphs (the output of the <ul> <li> TCMalloc is much more consistently scalable than PTMalloc2 - for - all thread counts >1 it achieves ~7-9 million ops/sec for small + all thread counts >1 it achieves ~7-9 million ops/sec for small allocations, falling to ~2 million ops/sec for larger allocations. The single-thread case is an obvious outlier, since it is only able to keep a single processor busy and hence can achieve fewer ops/sec. PTMalloc2 has a much higher variance on operations/sec - peaking somewhere around 4 million ops/sec - for small allocations and falling to <1 million ops/sec for + for small allocations and falling to <1 million ops/sec for larger allocations. <li> TCMalloc is faster than PTMalloc2 in the vast majority of @@ -309,10 +309,10 @@ threads. The raw data used to generate these graphs (the output of the 2MB). With larger allocation sizes, fewer objects can be stored in the cache before it is garbage-collected. - <li> There is a noticeably drop in the TCMalloc performance at ~32K + <li> There is a noticeable drop in TCMalloc's performance at ~32K maximum allocation size; at larger sizes performance drops less quickly. This is due to the 32K maximum size of objects in the - per-thread caches; for objects larger than this tcmalloc + per-thread caches; for objects larger than this TCMalloc allocates from the central page heap. </ul> diff --git a/packages/deb/changelog b/packages/deb/changelog index ee237af..167d6c0 100644 --- a/packages/deb/changelog +++ b/packages/deb/changelog @@ -1,3 +1,9 @@ +google-perftools (0.95-1) unstable; urgency=low + + * New upstream release. + + -- Google Inc. <opensource@google.com> Tue, 12 Feb 2008 12:28:32 -0800 + google-perftools (0.94-1) unstable; urgency=low * New upstream release. diff --git a/packages/deb/docs b/packages/deb/docs index d43c7dc..df891c9 100644 --- a/packages/deb/docs +++ b/packages/deb/docs @@ -6,6 +6,7 @@ NEWS README TODO doc/cpuprofile.html +doc/cpuprofile-fileformat.html doc/designstyle.css doc/heap-example1.png doc/heap_checker.html diff --git a/packages/rpm/rpm.spec b/packages/rpm/rpm.spec index fb513a7..15c7e63 100644 --- a/packages/rpm/rpm.spec +++ b/packages/rpm/rpm.spec @@ -51,7 +51,20 @@ rm -rf $RPM_BUILD_ROOT %files %defattr(-,root,root) -%doc AUTHORS COPYING ChangeLog INSTALL NEWS README TODO doc/cpuprofile.html doc/designstyle.css doc/heap-example1.png doc/heap_checker.html doc/heapprofile.html doc/index.html doc/overview.dot doc/overview.gif doc/pageheap.dot doc/pageheap.gif doc/pprof-test-big.gif doc/pprof-test.gif doc/pprof-vsnprintf-big.gif doc/pprof-vsnprintf.gif doc/pprof.1 doc/pprof_remote_servers.html doc/spanmap.dot doc/spanmap.gif doc/t-test1.times.txt doc/tcmalloc-opspercpusec.vs.threads.1024.bytes.png doc/tcmalloc-opspercpusec.vs.threads.128.bytes.png doc/tcmalloc-opspercpusec.vs.threads.131072.bytes.png doc/tcmalloc-opspercpusec.vs.threads.16384.bytes.png doc/tcmalloc-opspercpusec.vs.threads.2048.bytes.png doc/tcmalloc-opspercpusec.vs.threads.256.bytes.png doc/tcmalloc-opspercpusec.vs.threads.32768.bytes.png doc/tcmalloc-opspercpusec.vs.threads.4096.bytes.png doc/tcmalloc-opspercpusec.vs.threads.512.bytes.png doc/tcmalloc-opspercpusec.vs.threads.64.bytes.png doc/tcmalloc-opspercpusec.vs.threads.65536.bytes.png doc/tcmalloc-opspercpusec.vs.threads.8192.bytes.png doc/tcmalloc-opspersec.vs.size.1.threads.png doc/tcmalloc-opspersec.vs.size.12.threads.png doc/tcmalloc-opspersec.vs.size.16.threads.png doc/tcmalloc-opspersec.vs.size.2.threads.png doc/tcmalloc-opspersec.vs.size.20.threads.png doc/tcmalloc-opspersec.vs.size.3.threads.png doc/tcmalloc-opspersec.vs.size.4.threads.png doc/tcmalloc-opspersec.vs.size.5.threads.png doc/tcmalloc-opspersec.vs.size.8.threads.png doc/tcmalloc.html doc/threadheap.dot doc/threadheap.gif +%doc AUTHORS COPYING ChangeLog INSTALL NEWS README TODO +%doc doc/index.html doc/designstyle.css +%doc doc/pprof.1 doc/pprof_remote_servers.html +%doc doc/tcmalloc.html +%doc doc/overview.dot doc/overview.gif +%doc doc/pageheap.dot doc/pageheap.gif +%doc doc/spanmap.dot doc/spanmap.gif +%doc doc/threadheap.dot doc/threadheap.gif +%doc doc/t-test1.times.txt +%doc doc/tcmalloc-opspercpusec.vs.threads.1024.bytes.png doc/tcmalloc-opspercpusec.vs.threads.128.bytes.png doc/tcmalloc-opspercpusec.vs.threads.131072.bytes.png doc/tcmalloc-opspercpusec.vs.threads.16384.bytes.png doc/tcmalloc-opspercpusec.vs.threads.2048.bytes.png doc/tcmalloc-opspercpusec.vs.threads.256.bytes.png doc/tcmalloc-opspercpusec.vs.threads.32768.bytes.png doc/tcmalloc-opspercpusec.vs.threads.4096.bytes.png doc/tcmalloc-opspercpusec.vs.threads.512.bytes.png doc/tcmalloc-opspercpusec.vs.threads.64.bytes.png doc/tcmalloc-opspercpusec.vs.threads.65536.bytes.png doc/tcmalloc-opspercpusec.vs.threads.8192.bytes.png +%doc doc/tcmalloc-opspersec.vs.size.1.threads.png doc/tcmalloc-opspersec.vs.size.12.threads.png doc/tcmalloc-opspersec.vs.size.16.threads.png doc/tcmalloc-opspersec.vs.size.2.threads.png doc/tcmalloc-opspersec.vs.size.20.threads.png doc/tcmalloc-opspersec.vs.size.3.threads.png doc/tcmalloc-opspersec.vs.size.4.threads.png doc/tcmalloc-opspersec.vs.size.5.threads.png doc/tcmalloc-opspersec.vs.size.8.threads.png +%doc doc/heapprofile.html doc/heap-example1.png doc/heap_checker.html +%doc doc/cpuprofile.html doc/cpuprofile-fileformat.html +%doc doc/pprof-test-big.gif doc/pprof-test.gif doc/pprof-vsnprintf-big.gif doc/pprof-vsnprintf.gif %{prefix}/lib/libstacktrace.so.0 %{prefix}/lib/libstacktrace.so.0.0.0 diff --git a/src/addressmap-inl.h b/src/addressmap-inl.h index 7760710..a8cbb77 100644 --- a/src/addressmap-inl.h +++ b/src/addressmap-inl.h @@ -97,16 +97,16 @@ class AddressMap { typedef void* (*Allocator)(size_t); typedef void (*DeAllocator)(void*); typedef const void* Key; - + // Create an AddressMap that uses the specified allocator/deallocator. // The allocator/deallocator should behave like malloc/free. // For instance, the allocator does not need to return initialized memory. AddressMap(Allocator alloc, DeAllocator dealloc); ~AddressMap(); - // If the map contains an entry for "key", store the associated - // value in "*result" and return true. Else return false. - bool Find(Key key, Value* result); + // If the map contains an entry for "key", return it. Else return NULL. + inline const Value* Find(Key key) const; + inline Value* FindMutable(Key key); // Insert <key,value> into the map. Any old value associated // with key is forgotten. @@ -117,12 +117,24 @@ class AddressMap { // and returns true. Else returns false. bool FindAndRemove(Key key, Value* removed_value); + // Similar to Find but we assume that keys are addresses of non-overlapping + // memory ranges whose sizes are given by size_func. + // If the map contains a range into which "key" points + // (at its start or inside of it, but not at the end), + // return the address of the associated value + // and store its key in "*res_key". + // Else return NULL. + // max_size specifies largest range size possibly in existence now. + typedef size_t (*ValueSizeFunc)(const Value& v); + const Value* FindInside(ValueSizeFunc size_func, size_t max_size, + Key key, Key* res_key); + // Iterate over the address map calling 'callback' // for all stored key-value pairs and passing 'arg' to it. // We don't use full Closure/Callback machinery not to add // unnecessary dependencies to this class with low-level uses. template<class Type> - void Iterate(void (*callback)(Key, Value, Type), Type arg) const; + inline void Iterate(void (*callback)(Key, Value*, Type), Type arg) const; private: typedef uintptr_t Number; @@ -154,7 +166,7 @@ class AddressMap { // We use a simple chaining hash-table to represent the clusters. struct Cluster { - Cluster* next; // Next cluster in chain + Cluster* next; // Next cluster in hash table chain Number id; // Cluster ID Entry* blocks[kClusterBlocks]; // Per-block linked-lists }; @@ -169,8 +181,8 @@ class AddressMap { // Number of entry objects allocated at a time static const int ALLOC_COUNT = 64; - Cluster** hashtable_; // The hash-table - Entry* free_; // Free list of unused Entry objects + Cluster** hashtable_; // The hash-table + Entry* free_; // Free list of unused Entry objects // Multiplicative hash function: // The value "kHashMultiplier" is the bottom 32 bits of @@ -208,7 +220,7 @@ class AddressMap { } return NULL; } - + // Return the block ID for an address within its cluster static int BlockID(Number address) { return (address >> kBlockBits) & (kClusterBlocks - 1); @@ -264,18 +276,22 @@ AddressMap<Value>::~AddressMap() { } template <class Value> -bool AddressMap<Value>::Find(Key key, Value* result) { +inline const Value* AddressMap<Value>::Find(Key key) const { + return const_cast<AddressMap*>(this)->FindMutable(key); +} + +template <class Value> +inline Value* AddressMap<Value>::FindMutable(Key key) { const Number num = reinterpret_cast<Number>(key); const Cluster* const c = FindCluster(num, false/*do not create*/); if (c != NULL) { - for (const Entry* e = c->blocks[BlockID(num)]; e != NULL; e = e->next) { + for (Entry* e = c->blocks[BlockID(num)]; e != NULL; e = e->next) { if (e->key == key) { - *result = e->value; - return true; + return &e->value; } } } - return false; + return NULL; } template <class Value> @@ -330,14 +346,62 @@ bool AddressMap<Value>::FindAndRemove(Key key, Value* removed_value) { } template <class Value> +const Value* AddressMap<Value>::FindInside(ValueSizeFunc size_func, + size_t max_size, + Key key, + Key* res_key) { + const Number key_num = reinterpret_cast<Number>(key); + Number num = key_num; // we'll move this to move back through the clusters + while (1) { + const Cluster* c = FindCluster(num, false/*do not create*/); + if (c != NULL) { + while (1) { + const int block = BlockID(num); + bool had_smaller_key = false; + for (const Entry* e = c->blocks[block]; e != NULL; e = e->next) { + const Number e_num = reinterpret_cast<Number>(e->key); + if (e_num <= key_num) { + if (e_num == key_num || // to handle 0-sized ranges + key_num < e_num + (*size_func)(e->value)) { + *res_key = e->key; + return &e->value; + } + had_smaller_key = true; + } + } + if (had_smaller_key) return NULL; // got a range before 'key' + // and it did not contain 'key' + if (block == 0) break; + // try address-wise previous block + num |= kBlockSize - 1; // start at the last addr of prev block + num -= kBlockSize; + if (key_num - num > max_size) return NULL; + } + } + if (num < kClusterSize) return NULL; // first cluster + // go to address-wise previous cluster to try + num |= kClusterSize - 1; // start at the last block of previous cluster + num -= kClusterSize; + if (key_num - num > max_size) return NULL; + // Having max_size to limit the search is crucial: else + // we have to traverse a lot of empty clusters (or blocks). + // We can avoid needing max_size if we put clusters into + // a search tree, but performance suffers considerably + // if we use this approach by using stl::set. + } +} + +template <class Value> template <class Type> -void AddressMap<Value>::Iterate(void (*callback)(Key, Value, Type), - Type arg) const { +inline void AddressMap<Value>::Iterate(void (*callback)(Key, Value*, Type), + Type arg) const { + // We could optimize this by traversing only non-empty clusters and/or blocks + // but it does not speed up heap-checker noticeably. for (int h = 0; h < kHashSize; ++h) { for (const Cluster* c = hashtable_[h]; c != NULL; c = c->next) { for (int b = 0; b < kClusterBlocks; ++b) { - for (const Entry* e = c->blocks[b]; e != NULL; e = e->next) { - callback(e->key, e->value, arg); + for (Entry* e = c->blocks[b]; e != NULL; e = e->next) { + callback(e->key, &e->value, arg); } } } diff --git a/src/base/atomicops-internals-linuxppc.h b/src/base/atomicops-internals-linuxppc.h new file mode 100644 index 0000000..6ddc5a8 --- /dev/null +++ b/src/base/atomicops-internals-linuxppc.h @@ -0,0 +1,193 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + */ + +// Implementation of atomic operations for ppc-linux. This file should not +// be included directly. Clients should instead include +// "base/atomicops.h". + +#ifndef BASE_ATOMICOPS_INTERNALS_LINUXPPC_H__ +#define BASE_ATOMICOPS_INTERNALS_LINUXPPC_H__ + +#define LWSYNC_ON_SMP +#define PPC405_ERR77(a, b) +#define ISYNC_ON_SMP + + +/* Adapted from atomic_add in asm-powerpc/atomic.h */ +inline int32_t OSAtomicAdd32(int32_t amount, int32_t *value) { + int t; + __asm__ __volatile__( +"1: lwarx %0,0,%3 # atomic_add\n\ + add %0,%2,%0\n" + PPC405_ERR77(0,%3) +" stwcx. %0,0,%3 \n\ + bne- 1b" + : "=&r" (t), "+m" (*value) + : "r" (amount), "r" (value) + : "cc"); + return *value; +} + +/* Adapted from __cmpxchg_u32 in asm-powerpc/atomic.h */ +inline bool OSAtomicCompareAndSwap32(int32_t old_value, int32_t new_value, + int32_t *value) { + unsigned int prev; + __asm__ __volatile__ ( + LWSYNC_ON_SMP +"1: lwarx %0,0,%2 # __cmpxchg_u32\n\ + cmpw 0,%0,%3\n\ + bne- 2f\n" + PPC405_ERR77(0,%2) +" stwcx. %4,0,%2\n\ + bne- 1b" + ISYNC_ON_SMP + "\n\ +2:" + : "=&r" (prev), "+m" (*value) + : "r" (value), "r" (old_value), "r" (new_value) + : "cc", "memory"); + return true; +} + +/* Adapted from __cmpxchg_u32 in asm-powerpc/atomic.h */ +inline int32_t OSAtomicCompareAndSwap32Barrier(int32_t old_value, + int32_t new_value, + int32_t *value) { + unsigned int prev; + __asm__ __volatile__ ( + LWSYNC_ON_SMP +"1: lwarx %0,0,%2 # __cmpxchg_u32\n\ + cmpw 0,%0,%3\n\ + bne- 2f\n" + PPC405_ERR77(0,%2) +" stwcx. %4,0,%2\n\ + bne- 1b" + ISYNC_ON_SMP + "\n\ +2:" + : "=&r" (prev), "+m" (*value) + : "r" (value), "r" (old_value), "r" (new_value) + : "cc", "memory"); + return true; +} + +inline void MemoryBarrier() { + // TODO +} + +// int32_t and intptr_t seems to be equal on ppc-linux +// therefore we have no extra Atomic32 function versions. +typedef int32_t Atomic32; +typedef intptr_t AtomicWord; + +#define OSAtomicCastIntPtr(p) \ + reinterpret_cast<int32_t *>(const_cast<AtomicWord *>(p)) +#define OSAtomicCompareAndSwapIntPtr OSAtomicCompareAndSwap32 +#define OSAtomicAddIntPtr OSAtomicAdd32 +#define OSAtomicCompareAndSwapIntPtrBarrier OSAtomicCompareAndSwap32Barrier + + +inline AtomicWord CompareAndSwap(volatile AtomicWord *ptr, + AtomicWord old_value, + AtomicWord new_value) { + AtomicWord prev_value; + do { + if (OSAtomicCompareAndSwapIntPtr(old_value, new_value, + OSAtomicCastIntPtr(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline AtomicWord AtomicExchange(volatile AtomicWord *ptr, + AtomicWord new_value) { + AtomicWord old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwapIntPtr(old_value, new_value, + OSAtomicCastIntPtr(ptr))); + return old_value; +} + + +inline AtomicWord AtomicIncrement(volatile AtomicWord *ptr, AtomicWord increment) { + return OSAtomicAddIntPtr(increment, OSAtomicCastIntPtr(ptr)); +} + +inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord *ptr, + AtomicWord old_value, + AtomicWord new_value) { + AtomicWord prev_value; + do { + if (OSAtomicCompareAndSwapIntPtrBarrier(old_value, new_value, + OSAtomicCastIntPtr(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline AtomicWord Release_CompareAndSwap(volatile AtomicWord *ptr, + AtomicWord old_value, + AtomicWord new_value) { + // The ppc interface does not distinguish between Acquire and + // Release memory barriers; they are equivalent. + return Acquire_CompareAndSwap(ptr, old_value, new_value); +} + + +inline void Acquire_Store(volatile AtomicWord *ptr, AtomicWord value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile AtomicWord *ptr, AtomicWord value) { + MemoryBarrier(); + *ptr = value; +} + +inline AtomicWord Acquire_Load(volatile const AtomicWord *ptr) { + AtomicWord value = *ptr; + MemoryBarrier(); + return value; +} + +inline AtomicWord Release_Load(volatile const AtomicWord *ptr) { + MemoryBarrier(); + return *ptr; +} + + +#endif // BASE_ATOMICOPS_INTERNALS_LINUXPPC_H__ diff --git a/src/base/atomicops-internals-x86-msvc.h b/src/base/atomicops-internals-x86-msvc.h index 8d10de4..cce120c 100644 --- a/src/base/atomicops-internals-x86-msvc.h +++ b/src/base/atomicops-internals-x86-msvc.h @@ -99,7 +99,7 @@ inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr, } // In msvc8/vs2005, winnt.h already contains a definition for MemoryBarrier. -#if !(COMPILER_MSVC && _MSC_VER >= 1400) +#if !(defined(_MSC_VER) && _MSC_VER >= 1400) inline void MemoryBarrier() { AtomicWord value = 0; AtomicExchange(&value, 0); // acts as a barrier diff --git a/src/base/atomicops-internals-x86.h b/src/base/atomicops-internals-x86.h index 210e419..db3d4d2 100644 --- a/src/base/atomicops-internals-x86.h +++ b/src/base/atomicops-internals-x86.h @@ -147,7 +147,9 @@ inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) { inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) { ATOMICOPS_COMPILER_BARRIER(); - *ptr = value; // works w/o barrier for current Intel chips as of June 2005 + *ptr = value; // An x86 store acts as a release barrier + // for current AMD/Intel chips as of Jan 2008. + // See also Acquire_Load(), below. // When new chips come out, check: // IA-32 Intel Architecture Software Developer's Manual, Volume 3: @@ -155,11 +157,19 @@ inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) { // Section 7.2, Memory Ordering. // Last seen at: // http://developer.intel.com/design/pentium4/manuals/index_new.htm + // + // x86 stores/loads fail to act as barriers for a few instructions (clflush + // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are + // not generated by the compiler, and are rare. Users of these instructions + // need to know about cache behaviour in any case since all of these involve + // either flushing cache lines or non-temporal cache hints. } inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) { - AtomicWord value = *ptr; - MemoryBarrier(); + AtomicWord value = *ptr; // An x86 load acts as a acquire barrier, + // for current AMD/Intel chips as of Jan 2008. + // See also Release_Store(), above. + ATOMICOPS_COMPILER_BARRIER(); return value; } @@ -225,19 +235,14 @@ inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { ATOMICOPS_COMPILER_BARRIER(); - *ptr = value; // works w/o barrier for current Intel chips as of June 2005 - - // When new chips come out, check: - // IA-32 Intel Architecture Software Developer's Manual, Volume 3: - // System Programming Guide, Chatper 7: Multiple-processor management, - // Section 7.2, Memory Ordering. - // Last seen at: - // http://developer.intel.com/design/pentium4/manuals/index_new.htm + *ptr = value; // An x86 store acts as a release barrier. + // See comments in AtomicWord version of Release_Store(), above. } inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { - Atomic32 value = *ptr; - MemoryBarrier(); + Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. + // See comments in AtomicWord version of Release_Store(), above. + ATOMICOPS_COMPILER_BARRIER(); return value; } diff --git a/src/base/atomicops.h b/src/base/atomicops.h index 0c8da9a..81c365f 100644 --- a/src/base/atomicops.h +++ b/src/base/atomicops.h @@ -56,6 +56,8 @@ #include "base/atomicops-internals-x86.h" #elif defined(__i386) && defined(MSVC) #include "base/atomicops-internals-x86-msvc.h" +#elif defined(__linux__) && defined(__PPC__) +#include "base/atomicops-internals-linuxppc.h" #else // Assume x86 for now. If you need to support a new architecture and // don't know how to implement atomic ops, you can probably get away diff --git a/src/base/commandlineflags.h b/src/base/commandlineflags.h index 782b7c7..9514458 100644 --- a/src/base/commandlineflags.h +++ b/src/base/commandlineflags.h @@ -51,6 +51,7 @@ #include "config.h" #include <string> #include <string.h> // for memchr +#include <stdlib.h> // for getenv #include "base/basictypes.h" #define DECLARE_VARIABLE(type, name) \ diff --git a/src/base/cycleclock.h b/src/base/cycleclock.h new file mode 100644 index 0000000..b5e012d --- /dev/null +++ b/src/base/cycleclock.h @@ -0,0 +1,95 @@ +// Copyright (c) 2004, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ---------------------------------------------------------------------- +// CycleClock +// A CycleClock tells you the current time in Cycles. The "time" +// is actually time since power-on. This is like time() but doesn't +// involve a system call and is much more precise. +// ---------------------------------------------------------------------- + +#ifndef GOOGLE_BASE_CYCLECLOCK_H__ +#define GOOGLE_BASE_CYCLECLOCK_H__ + +#include "base/basictypes.h" // make sure we get the def for int64 +#if defined(__MACH__) && defined(__APPLE__) +#include <mach/mach_time.h> +#endif + +// NOTE: only i386 and x86_64 have been well tested. +// PPC, sparc, alpha, and ia64 are based on +// http://peter.kuscsik.com/wordpress/?p=14 +// with modifications by m3b. cf +// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h +struct CycleClock { + // This should return the number of cycles since power-on + static inline int64 Now() { +#if defined(__i386__) + int64 ret; + __asm__ volatile ("rdtsc" + : "=A" (ret) ); + return ret; +#elif defined(__x86_64__) || defined(__amd64__) + int64 low, high; + __asm__ volatile ("rdtsc\n" + "shl $0x20, %%rdx" + : "=a" (low), "=d" (high)); + return high | low; +#elif defined(__powerpc__) || defined(__ppc__) + // This returns a time-base, which is not always precisely a cycle-count. + int64 tbl, tbu0, tbu1; + asm("mftbu %0" : "=r" (tbu0)); + asm("mftb %0" : "=r" (tbl )); + asm("mftbu %0" : "=r" (tbu1)); + tbl &= -static_cast<int64>(tbu0 == tbu1); + // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) + return (tbu1 << 32) | tbl; +#elif defined(__sparc__) + int64 tick; + asm(".byte 0x83, 0x41, 0x00, 0x00"); + asm("mov %%g1, %0" : "=r" (tick)); + return tick; +#elif defined(__ia64__) + int64 itc; + asm("mov %0 = ar.itc" : "=r" (itc)); + return itc; +#elif defined(_MSC_VER) && defined(_M_IX86) + _asm rdtsc +#elif defined(__MACH__) && defined(__APPLE__) + return mach_absolute_time(); +#else + // We could define __alpha here as well, but it only has a 32-bit + // timer (good for like 4 seconds), which isn't very useful. +#error You need to define CycleTimer for your O/S and CPU +#endif + } +}; + + +#endif // GOOGLE_BASE_CYCLECLOCK_H__ diff --git a/src/base/elfcore.h b/src/base/elfcore.h index eb93e05..9f7002a 100644 --- a/src/base/elfcore.h +++ b/src/base/elfcore.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2005-2007, Google Inc. +/* Copyright (c) 2005-2008, Google Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,7 +41,7 @@ extern "C" { * Porting to other related platforms should not be difficult. */ #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \ - defined(mips)) && defined(__linux) + defined(__mips__)) && defined(__linux) #include <stdarg.h> #include <stdint.h> @@ -96,16 +96,16 @@ extern "C" { #define LR uregs[14] /* Link register */ long uregs[18]; } arm_regs; -#elif defined(mips) +#elif defined(__mips__) typedef struct mips_regs { unsigned long pad[6]; /* Unused padding to match kernel structures */ unsigned long uregs[32]; /* General purpose registers. */ - unsigned long cp0_status; - unsigned long lo; /* Used for multiplication and division. */ - unsigned long hi; + unsigned long hi; /* Used for multiplication and division. */ + unsigned long lo; + unsigned long cp0_epc; /* Program counter. */ unsigned long cp0_badvaddr; + unsigned long cp0_status; unsigned long cp0_cause; - unsigned long cp0_epc; /* Program counter. */ unsigned long unused; } mips_regs; #endif @@ -263,7 +263,7 @@ extern "C" { (r) = (f).arm; \ (r).uregs[16] = fps; \ } while (0) -#elif defined(mips) && defined(__GNUC__) +#elif defined(__mips__) && defined(__GNUC__) typedef struct Frame { struct mips_regs mips_regs; int errno_; diff --git a/src/base/linux_syscall_support.h b/src/base/linux_syscall_support.h index 0c11170..c41b712 100644 --- a/src/base/linux_syscall_support.h +++ b/src/base/linux_syscall_support.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2005-2007, Google Inc. +/* Copyright (c) 2005-2008, Google Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -73,11 +73,11 @@ #ifndef SYS_LINUX_SYSCALL_SUPPORT_H #define SYS_LINUX_SYSCALL_SUPPORT_H -/* We currently only support x86-32, x86-64, ARM, and MIPS on Linux. +/* We currently only support x86-32, x86-64, ARM, MIPS, and PPC on Linux. * Porting to other related platforms should not be difficult. */ -#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \ - defined(mips)) && defined(__linux) +#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \ + defined(__mips__) || defined(__PPC__)) && defined(__linux) #ifndef SYS_CPLUSPLUS #ifdef __cplusplus @@ -91,6 +91,7 @@ extern "C" { #include <errno.h> #include <signal.h> #include <stdarg.h> +#include <string.h> #include <sys/ptrace.h> #include <sys/resource.h> #include <sys/time.h> @@ -100,30 +101,420 @@ extern "C" { #include <linux/unistd.h> #include <endian.h> -/* libc defines versions of stat, dirent, dirent64, and statfs64 that are - * incompatible with the structures that the kernel API expects. If you wish - * to use sys_fstat(), sys_stat(), sys_getdents(), sys_getdents64(), - * sys_statfs64(), or sys_statfs() you will need to include the kernel - * headers in your code. +#ifdef __mips__ +/* Include definitions of the ABI currently in use. */ +#include <sgidefs.h> +#endif + +#endif + +/* As glibc often provides subtly incompatible data structures (and implicit + * wrapper functions that convert them), we provide our own kernel data + * structures for use by the system calls. + * These structures have been developed by using Linux 2.6.23 headers for + * reference. Note though, we do not care about exact API compatibility + * with the kernel, and in fact the kernel often does not have a single + * API that works across architectures. Instead, we try to mimic the glibc + * API where reasonable, and only guarantee ABI compatibility with the + * kernel headers. + * Most notably, here are a few changes that were made to the structures + * defined by kernel headers: * - * asm/posix_types.h - * asm/stat.h - * asm/statfs.h - * asm/types.h - * linux/dirent.h + * - we only define structures, but not symbolic names for kernel data + * types. For the latter, we directly use the native C datatype + * (i.e. "unsigned" instead of "mode_t"). + * - in a few cases, it is possible to define identical structures for + * both 32bit (e.g. i386) and 64bit (e.g. x86-64) platforms by + * standardizing on the 64bit version of the data types. In particular, + * this means that we use "unsigned" where the 32bit headers say + * "unsigned long". + * - overall, we try to minimize the number of cases where we need to + * conditionally define different structures. + * - the "struct kernel_sigaction" class of structures have been + * modified to more closely mimic glibc's API by introducing an + * anonymous union for the function pointer. + * - a small number of field names had to have an underscore appended to + * them, because glibc defines a global macro by the same name. */ -struct dirent64; -struct dirent; -struct iovec; -struct msghdr; -struct pollfd; -struct rlimit; -struct sockaddr; -struct stat; -struct stat64; -struct statfs; -struct statfs64; +/* include/linux/dirent.h */ +struct kernel_dirent64 { + unsigned long long d_ino; + long long d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[256]; +}; + +/* include/linux/dirent.h */ +struct kernel_dirent { + long d_ino; + long d_off; + unsigned short d_reclen; + char d_name[256]; +}; + +/* include/linux/uio.h */ +struct kernel_iovec { + void *iov_base; + unsigned long iov_len; +}; + +/* include/linux/socket.h */ +struct kernel_msghdr { + void *msg_name; + int msg_namelen; + struct kernel_iovec*msg_iov; + unsigned long msg_iovlen; + void *msg_control; + unsigned long msg_controllen; + unsigned msg_flags; +}; + +/* include/asm-generic/poll.h */ +struct kernel_pollfd { + int fd; + short events; + short revents; +}; + +/* include/linux/resource.h */ +struct kernel_rlimit { + unsigned long rlim_cur; + unsigned long rlim_max; +}; + +/* include/linux/time.h */ +struct kernel_timespec { + long tv_sec; + long tv_nsec; +}; + +/* include/linux/time.h */ +struct kernel_timeval { + long tv_sec; + long tv_usec; +}; + +/* include/linux/resource.h */ +struct kernel_rusage { + struct kernel_timeval ru_utime; + struct kernel_timeval ru_stime; + long ru_maxrss; + long ru_ixrss; + long ru_idrss; + long ru_isrss; + long ru_minflt; + long ru_majflt; + long ru_nswap; + long ru_inblock; + long ru_oublock; + long ru_msgsnd; + long ru_msgrcv; + long ru_nsignals; + long ru_nvcsw; + long ru_nivcsw; +}; + +struct siginfo; +#if defined(__i386__) || defined(__ARM_ARCH_3__) || defined(__PPC__) + +/* include/asm-{arm,i386,mips,ppc}/signal.h */ +struct kernel_old_sigaction { + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, struct siginfo *, void *); + }; + unsigned long sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +} __attribute__((packed,aligned(4))); +#elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) + #define kernel_old_sigaction kernel_sigaction +#endif + +/* Some kernel functions (e.g. sigaction() in 2.6.23) require that the + * exactly match the size of the signal set, even though the API was + * intended to be extensible. We define our own KERNEL_NSIG to deal with + * this. + * Please note that glibc provides signals [1.._NSIG-1], whereas the + * kernel (and this header) provides the range [1..KERNEL_NSIG]. The + * actual number of signals is obviously the same, but the constants + * differ by one. + */ +#ifdef __mips__ +#define KERNEL_NSIG 128 +#else +#define KERNEL_NSIG 64 +#endif + +/* include/asm-{arm,i386,mips,x86_64}/signal.h */ +struct kernel_sigset_t { + unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/ + (8*sizeof(unsigned long))]; +}; + +/* include/asm-{arm,i386,mips,x86_64,ppc}/signal.h */ +struct kernel_sigaction { +#ifdef __mips__ + unsigned long sa_flags; + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, struct siginfo *, void *); + }; + struct kernel_sigset_t sa_mask; +#else + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, struct siginfo *, void *); + }; + unsigned long sa_flags; + void (*sa_restorer)(void); + struct kernel_sigset_t sa_mask; +#endif +}; + +/* include/linux/socket.h */ +struct kernel_sockaddr { + unsigned short sa_family; + char sa_data[14]; +}; + +/* include/asm-{arm,i386,mips,ppc}/stat.h */ +#ifdef __mips__ +#if _MIPS_SIM == _MIPS_SIM_ABI64 +struct kernel_stat { +#else +struct kernel_stat64 { +#endif + unsigned st_dev; + unsigned __pad0[3]; + unsigned long long st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned st_rdev; + unsigned __pad1[3]; + long long st_size; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned st_blksize; + unsigned __pad2; + unsigned long long st_blocks; +}; +#elif defined __PPC__ +struct kernel_stat64 { + unsigned long long st_dev; + unsigned long long st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned long long st_rdev; + unsigned short int __pad2; + long long st_size; + long st_blksize; + long long st_blocks; + long st_atime_; + unsigned long st_atime_nsec_; + long st_mtime_; + unsigned long st_mtime_nsec_; + long st_ctime_; + unsigned long st_ctime_nsec_; + unsigned long __unused4; + unsigned long __unused5; +}; +#else +struct kernel_stat64 { + unsigned long long st_dev; + unsigned char __pad0[4]; + unsigned __st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned long long st_rdev; + unsigned char __pad3[4]; + long long st_size; + unsigned st_blksize; + unsigned long long st_blocks; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned long long st_ino; +}; +#endif + +/* include/asm-{arm,i386,mips,x86_64,ppc}/stat.h */ +#if defined(__i386__) || defined(__ARM_ARCH_3__) +struct kernel_stat { + /* The kernel headers suggest that st_dev and st_rdev should be 32bit + * quantities encoding 12bit major and 20bit minor numbers in an interleaved + * format. In reality, we do not see useful data in the top bits. So, + * we'll leave the padding in here, until we find a better solution. + */ + unsigned short st_dev; + short pad1; + unsigned st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + short pad2; + unsigned st_size; + unsigned st_blksize; + unsigned st_blocks; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned __unused4; + unsigned __unused5; +}; +#elif defined(__x86_64__) +struct kernel_stat { + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned st_mode; + unsigned st_uid; + unsigned st_gid; + unsigned __pad0; + unsigned long st_rdev; + long st_size; + long st_blksize; + long st_blocks; + unsigned long st_atime_; + unsigned long st_atime_nsec_; + unsigned long st_mtime_; + unsigned long st_mtime_nsec_; + unsigned long st_ctime_; + unsigned long st_ctime_nsec_; + long __unused[3]; +}; +#elif defined(__PPC__) +struct kernel_stat { + unsigned st_dev; + unsigned long st_ino; // ino_t + unsigned long st_mode; // mode_t + unsigned short st_nlink; // nlink_t + unsigned st_uid; // uid_t + unsigned st_gid; // gid_t + unsigned st_rdev; + long st_size; // off_t + unsigned long st_blksize; + unsigned long st_blocks; + unsigned long st_atime_; + unsigned long st_atime_nsec_; + unsigned long st_mtime_; + unsigned long st_mtime_nsec_; + unsigned long st_ctime_; + unsigned long st_ctime_nsec_; + unsigned long __unused4; + unsigned long __unused5; +}; +#elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64) +struct kernel_stat { + unsigned st_dev; + int st_pad1[3]; + unsigned st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned st_rdev; + int st_pad2[2]; + long st_size; + int st_pad3; + long st_atime_; + long st_atime_nsec_; + long st_mtime_; + long st_mtime_nsec_; + long st_ctime_; + long st_ctime_nsec_; + int st_blksize; + int st_blocks; + int st_pad4[14]; +}; +#endif + +/* include/asm-{arm,i386,mips,x86_64,ppc}/statfs.h */ +#ifdef __mips__ +#if _MIPS_SIM != _MIPS_SIM_ABI64 +struct kernel_statfs64 { + unsigned long f_type; + unsigned long f_bsize; + unsigned long f_frsize; + unsigned long __pad; + unsigned long long f_blocks; + unsigned long long f_bfree; + unsigned long long f_files; + unsigned long long f_ffree; + unsigned long long f_bavail; + struct { int val[2]; } f_fsid; + unsigned long f_namelen; + unsigned long f_spare[6]; +}; +#endif +#elif !defined(__x86_64__) +struct kernel_statfs64 { + unsigned long f_type; + unsigned long f_bsize; + unsigned long long f_blocks; + unsigned long long f_bfree; + unsigned long long f_bavail; + unsigned long long f_files; + unsigned long long f_ffree; + struct { int val[2]; } f_fsid; + unsigned long f_namelen; + unsigned long f_frsize; + unsigned long f_spare[5]; +}; +#endif + +/* include/asm-{arm,i386,mips,x86_64,ppc,generic}/statfs.h */ +#ifdef __mips__ +struct kernel_statfs { + long f_type; + long f_bsize; + long f_frsize; + long f_blocks; + long f_bfree; + long f_files; + long f_ffree; + long f_bavail; + struct { int val[2]; } f_fsid; + long f_namelen; + long f_spare[6]; +}; +#else +struct kernel_statfs { + /* x86_64 actually defines all these fields as signed, whereas all other */ + /* platforms define them as unsigned. Leaving them at unsigned should not */ + /* cause any problems. */ + unsigned long f_type; + unsigned long f_bsize; + unsigned long f_blocks; + unsigned long f_bfree; + unsigned long f_bavail; + unsigned long f_files; + unsigned long f_ffree; + struct { int val[2]; } f_fsid; + unsigned long f_namelen; + unsigned long f_frsize; + unsigned long f_spare[5]; +}; #endif @@ -159,15 +550,36 @@ struct statfs64; #ifndef MREMAP_FIXED #define MREMAP_FIXED 2 #endif +#ifndef SA_RESTORER +#define SA_RESTORER 0x04000000 +#endif #if defined(__i386__) #ifndef __NR_setresuid #define __NR_setresuid 164 #define __NR_setresgid 170 #endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigpending 176 +#define __NR_rt_sigsuspend 179 +#endif +#ifndef __NR_pread64 +#define __NR_pread64 180 +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 181 +#endif #ifndef __NR_ugetrlimit #define __NR_ugetrlimit 191 #endif +#ifndef __NR_stat64 +#define __NR_stat64 195 +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 197 +#endif #ifndef __NR_setresuid32 #define __NR_setresuid32 208 #define __NR_setresgid32 210 @@ -234,9 +646,27 @@ struct statfs64; #define __NR_setresuid (__NR_SYSCALL_BASE + 164) #define __NR_setresgid (__NR_SYSCALL_BASE + 170) #endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174) +#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175) +#define __NR_rt_sigpending (__NR_SYSCALL_BASE + 176) +#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE + 179) +#endif +#ifndef __NR_pread64 +#define __NR_pread64 (__NR_SYSCALL_BASE + 180) +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 (__NR_SYSCALL_BASE + 181) +#endif #ifndef __NR_ugetrlimit #define __NR_ugetrlimit (__NR_SYSCALL_BASE + 191) #endif +#ifndef __NR_stat64 +#define __NR_stat64 (__NR_SYSCALL_BASE + 195) +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 (__NR_SYSCALL_BASE + 197) +#endif #ifndef __NR_setresuid32 #define __NR_setresuid32 (__NR_SYSCALL_BASE + 208) #define __NR_setresgid32 (__NR_SYSCALL_BASE + 210) @@ -338,12 +768,30 @@ struct statfs64; #define __NR_move_pages 279 #endif /* End of x86-64 definitions */ -#elif defined(mips) +#elif defined(__mips__) #if _MIPS_SIM == _MIPS_SIM_ABI32 #ifndef __NR_setresuid #define __NR_setresuid (__NR_Linux + 185) #define __NR_setresgid (__NR_Linux + 190) #endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction (__NR_Linux + 194) +#define __NR_rt_sigprocmask (__NR_Linux + 195) +#define __NR_rt_sigpending (__NR_Linux + 196) +#define __NR_rt_sigsuspend (__NR_Linux + 199) +#endif +#ifndef __NR_pread64 +#define __NR_pread64 (__NR_Linux + 200) +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 (__NR_Linux + 201) +#endif +#ifndef __NR_stat64 +#define __NR_stat64 (__NR_Linux + 213) +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 (__NR_Linux + 215) +#endif #ifndef __NR_getdents64 #define __NR_getdents64 (__NR_Linux + 219) #endif @@ -491,9 +939,92 @@ struct statfs64; #ifndef __NR_move_pages #define __NR_move_pages (__NR_Linux + 271) #endif -/* End of MIPS (new 32bit API) definitions */ +/* End of MIPS (new 32bit API) definitions */ +#endif +/* End of MIPS definitions */ +#elif defined(__PPC__) +#ifndef __NR_setfsuid +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#endif +#ifndef __NR_setresuid +#define __NR_setresuid 164 +#define __NR_setresgid 169 +#endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction 173 +#define __NR_rt_sigprocmask 174 +#define __NR_rt_sigpending 175 +#define __NR_rt_sigsuspend 178 +#endif +#ifndef __NR_pread64 +#define __NR_pread64 179 +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 180 +#endif +#ifndef __NR_ugetrlimit +#define __NR_ugetrlimit 190 +#endif +#ifndef __NR_readahead +#define __NR_readahead 191 +#endif +#ifndef __NR_stat64 +#define __NR_stat64 195 +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 197 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 202 +#endif +#ifndef __NR_gettid +#define __NR_gettid 207 +#endif +#ifndef __NR_setxattr +#define __NR_setxattr 209 +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr 210 +#endif +#ifndef __NR_getxattr +#define __NR_getxattr 212 +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr 213 +#endif +#ifndef __NR_futex +#define __NR_futex 221 +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 222 +#define __NR_sched_getaffinity 223 +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address 232 +#endif +#ifndef __NR_statfs64 +#define __NR_statfs64 252 +#endif +#ifndef __NR_fstatfs64 +#define __NR_fstatfs64 253 +#endif +#ifndef __NR_fadvise64_64 +#define __NR_fadvise64_64 254 +#endif +#ifndef __NR_openat +#define __NR_openat 286 +#endif +#ifndef __NR_fstatat64 +#define __NR_fstatat64 291 #endif -/* End of MIPS definitions */ +#ifndef __NR_unlinkat +#define __NR_unlinkat 292 +#endif +#ifndef __NR_move_pages +#define __NR_move_pages 301 +#endif +/* End of powerpc defininitions */ #endif @@ -556,7 +1087,7 @@ struct statfs64; #endif #undef LSS_RETURN - #ifndef mips + #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__)) /* Failing system calls return a negative result in the range of * -1..-4095. These are "errno" values with the sign inverted. */ @@ -568,7 +1099,7 @@ struct statfs64; } \ return (type) (res); \ } while (0) - #else + #elif defined(__mips__) /* On MIPS, failing system calls return -1, and set errno in a * separate CPU register. */ @@ -580,6 +1111,18 @@ struct statfs64; } \ return (type) (res); \ } while (0) + #elif defined(__PPC__) + /* On PPC, failing system calls return -1, and set errno in a + * separate CPU register. See linux/unistd.h. + */ + #define LSS_RETURN(type, res, err) \ + do { \ + if (err & 0x10000000 ) { \ + LSS_ERRNO = (res); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) #endif #if defined(__i386__) /* In PIC mode (e.g. when building shared libraries), gcc for i386 @@ -776,20 +1319,29 @@ struct statfs64; LSS_INLINE int LSS_NAME(fadvise64)(int fd, loff_t offset, loff_t len, int advice) { return LSS_NAME(_fadvise64_64)(fd, - (unsigned)offset, (unsigned)(offset >> 32), + (unsigned)offset, (unsigned)(offset >>32), (unsigned)len, (unsigned)(len >> 32), advice); } - #define __NR__readahead32 __NR_readahead - LSS_INLINE _syscall4(int, _readahead32, int, fd, unsigned, offset_lo, - unsigned, offset_hi, unsigned, len); - LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t offset, int len) { - return LSS_NAME(_readahead32)(fd, - (unsigned)offset, (unsigned)(offset >> 32), - (unsigned)len); + LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { + /* For real-time signals, the kernel does not know how to return from + * a signal handler. Instead, it relies on user space to provide a + * restorer function that calls the rt_sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + void (*res)(void); + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:movl %1,%%eax\n" + "int $0x80\n" + "2:popl %0\n" + "addl $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_rt_sigreturn)); + return res; } - #elif defined(__x86_64__) /* There are no known problems with any of the _syscallX() macros * currently shipping for x86_64, but we still need to be able to define @@ -935,8 +1487,25 @@ struct statfs64; } LSS_INLINE _syscall4(int, fadvise64, int, fd, loff_t, offset, loff_t, len, int, advice) - LSS_INLINE _syscall2(int, statfs, const char *, path, struct statfs *, buf) - LSS_INLINE _syscall2(int, fstatfs, int, fd, struct statfs *, buf) + + LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { + /* On x86-64, the kernel does not know how to return from + * a signal handler. Instead, it relies on user space to provide a + * restorer function that calls the rt_sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + void (*res)(void); + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:movq %1,%%rax\n" + "syscall\n" + "2:popq %0\n" + "addq $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_rt_sigreturn)); + return res; + } #elif defined(__ARM_ARCH_3__) /* Most definitions of _syscallX() neglect to mark "memory" as being * clobbered. This causes problems with compilers, that do a better job @@ -1058,7 +1627,7 @@ struct statfs64; } LSS_RETURN(int, __res); } - #elif defined(mips) + #elif defined(__mips__) #undef LSS_REG #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) = \ (unsigned long)(a) @@ -1166,14 +1735,14 @@ struct statfs64; ".set reorder\n" \ : "=&r"(__v0), "+r" (__r7) \ : "i" (__NR_##name), "r"(__r4), "r"(__r5), \ - "r"(__r6), "m" ((unsigned long)arg5), \ - "m" ((unsigned long)arg6) \ + "r"(__r6), "r" ((unsigned long)arg5), \ + "r" ((unsigned long)arg6) \ : "$8", "$9", "$10", "$11", "$12", \ "$13", "$14", "$15", "$24", "memory"); \ LSS_RETURN(type, __v0, __r7); \ } #else - #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ type5,arg5,type6,arg6) \ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ type5 arg5,type6 arg6) { \ @@ -1278,6 +1847,189 @@ struct statfs64; } LSS_RETURN(int, __v0, __r7); } + #elif defined (__PPC__) + #undef LSS_LOADARGS_0 + #define LSS_LOADARGS_0(name, dummy...) \ + __sc_0 = __NR_##name + #undef LSS_LOADARGS_1 + #define LSS_LOADARGS_1(name, arg1) \ + LSS_LOADARGS_0(name); \ + __sc_3 = (unsigned long) (arg1) + #undef LSS_LOADARGS_2 + #define LSS_LOADARGS_2(name, arg1, arg2) \ + LSS_LOADARGS_1(name, arg1); \ + __sc_4 = (unsigned long) (arg2) + #undef LSS_LOADARGS_3 + #define LSS_LOADARGS_3(name, arg1, arg2, arg3) \ + LSS_LOADARGS_2(name, arg1, arg2); \ + __sc_5 = (unsigned long) (arg3) + #undef LSS_LOADARGS_4 + #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4) \ + LSS_LOADARGS_3(name, arg1, arg2, arg3); \ + __sc_6 = (unsigned long) (arg4) + #undef LSS_LOADARGS_5 + #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5) \ + LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4); \ + __sc_7 = (unsigned long) (arg5) + #undef LSS_LOADARGS_6 + #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6) \ + LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5); \ + __sc_8 = (unsigned long) (arg6) + #undef LSS_ASMINPUT_0 + #define LSS_ASMINPUT_0 "0" (__sc_0) + #undef LSS_ASMINPUT_1 + #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3) + #undef LSS_ASMINPUT_2 + #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4) + #undef LSS_ASMINPUT_3 + #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5) + #undef LSS_ASMINPUT_4 + #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6) + #undef LSS_ASMINPUT_5 + #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7) + #undef LSS_ASMINPUT_6 + #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8) + #undef LSS_BODY + #define LSS_BODY(nr, type, name, args...) \ + long __sc_ret, __sc_err; \ + { \ + register unsigned long __sc_0 __asm__ ("r0"); \ + register unsigned long __sc_3 __asm__ ("r3"); \ + register unsigned long __sc_4 __asm__ ("r4"); \ + register unsigned long __sc_5 __asm__ ("r5"); \ + register unsigned long __sc_6 __asm__ ("r6"); \ + register unsigned long __sc_7 __asm__ ("r7"); \ + register unsigned long __sc_8 __asm__ ("r8"); \ + \ + LSS_LOADARGS_##nr(name, args); \ + __asm__ __volatile__ \ + ("sc\n\t" \ + "mfcr %0" \ + : "=&r" (__sc_0), \ + "=&r" (__sc_3), "=&r" (__sc_4), \ + "=&r" (__sc_5), "=&r" (__sc_6), \ + "=&r" (__sc_7), "=&r" (__sc_8) \ + : LSS_ASMINPUT_##nr \ + : "cr0", "ctr", "memory", \ + "r9", "r10", "r11", "r12"); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + LSS_RETURN(type, __sc_ret, __sc_err) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(0, type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(1, type, name, arg1); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_BODY(2, type, name, arg1, arg2); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_BODY(3, type, name, arg1, arg2, arg3); \ + } + #undef _syscall4 + #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(4, type, name, arg1, arg2, arg3, arg4); \ + } + #undef _syscall5 + #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5); \ + } + #undef _syscall6 + #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6); \ + } + /* clone function adapted from glibc 2.3.6 clone.S */ + /* TODO(csilvers): consider wrapping some args up in a struct, like we + * do for i386's _syscall6, so we can compile successfully on gcc 2.95 + */ + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __ret, __err; + { + register int (*__fn)(void *) __asm__ ("r8") = fn; + register void *__cstack __asm__ ("r4") = child_stack; + register int __flags __asm__ ("r3") = flags; + register void * __arg __asm__ ("r9") = arg; + register int * __ptidptr __asm__ ("r5") = parent_tidptr; + register void * __newtls __asm__ ("r6") = newtls; + register int * __ctidptr __asm__ ("r7") = child_tidptr; + __asm__ __volatile__( + /* check for fn == NULL + * and child_stack == NULL + */ + "cmpwi cr0, %6, 0\n\t" + "cmpwi cr1, %7, 0\n\t" + "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t" + "beq- cr0, 1f\n\t" + + /* set up stack frame for child */ + "clrrwi %7, %7, 4\n\t" + "li 0, 0\n\t" + "stwu 0, -16(%7)\n\t" + + /* fn, arg, child_stack are saved across the syscall: r28-30 */ + "mr 28, %6\n\t" + "mr 29, %7\n\t" + "mr 27, %9\n\t" + + /* syscall */ + "li 0, %4\n\t" + /* flags already in r3 + * child_stack already in r4 + * ptidptr already in r5 + * newtls already in r6 + * ctidptr already in r7 + */ + "sc\n\t" + + /* Test if syscall was successful */ + "cmpwi cr1, 3, 0\n\t" + "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" + "bne- cr1, 1f\n\t" + + /* Do the function call */ + "mtctr 28\n\t" + "mr 3, 27\n\t" + "bctrl\n\t" + + /* Call _exit(r3) */ + "li 0, %5\n\t" + "sc\n\t" + + /* Return to parent */ + "1:\n" + "mfcr %1\n\t" + "mr %0, 3\n\t" + : "=r" (__ret), "=r" (__err) + : "0" (-1), "1" (EINVAL), + "i" (__NR_clone), "i" (__NR_exit), + "r" (__fn), "r" (__cstack), "r" (__flags), + "r" (__arg), "r" (__ptidptr), "r" (__newtls), + "r" (__ctidptr) + : "cr0", "cr1", "memory", "ctr", + "r0", "r29", "r27", "r28"); + } + LSS_RETURN(int, __ret, __err); + } #endif #define __NR__exit __NR_exit #define __NR__gettid __NR_gettid @@ -1294,13 +2046,16 @@ struct statfs64; int, c, long, a) LSS_INLINE _syscall0(pid_t, fork) LSS_INLINE _syscall2(int, fstat, int, f, - struct stat*, b) + struct kernel_stat*, b) + LSS_INLINE _syscall2(int, fstatfs, int, f, + struct kernel_statfs*, b) LSS_INLINE _syscall4(int, futex, int*, a, - int, o, int, v, struct timespec *, t) + int, o, int, v, + struct kernel_timespec*, t) LSS_INLINE _syscall3(int, getdents, int, f, - struct dirent*, d, int, c) + struct kernel_dirent*, d, int, c) LSS_INLINE _syscall3(int, getdents64, int, f, - struct dirent64*, d, int, c) + struct kernel_dirent64*, d, int, c) LSS_INLINE _syscall0(gid_t, getegid) LSS_INLINE _syscall0(uid_t, geteuid) LSS_INLINE _syscall0(pid_t, getpgrp) @@ -1309,7 +2064,7 @@ struct statfs64; LSS_INLINE _syscall2(int, getpriority, int, a, int, b) LSS_INLINE _syscall2(int, getrlimit, int, r, - struct rlimit*, l) + struct kernel_rlimit*, l) LSS_INLINE _syscall1(pid_t, getsid, pid_t, p) LSS_INLINE _syscall0(pid_t, _gettid) LSS_INLINE _syscall5(int, setxattr, const char *,p, @@ -1336,7 +2091,7 @@ struct statfs64; unsigned long, f, void *, a) LSS_INLINE _syscall3(int, open, const char*, p, int, f, int, m) - LSS_INLINE _syscall3(int, poll, struct pollfd*, u, + LSS_INLINE _syscall3(int, poll, struct kernel_pollfd*, u, unsigned int, n, int, t) LSS_INLINE _syscall2(int, prctl, int, o, long, a) @@ -1346,6 +2101,16 @@ struct statfs64; void *, b, size_t, c) LSS_INLINE _syscall3(int, readlink, const char*, p, char*, b, size_t, s) + LSS_INLINE _syscall4(int, rt_sigaction, int, s, + const struct kernel_sigaction*, a, + struct kernel_sigaction*, o, size_t, c) + LSS_INLINE _syscall2(int, rt_sigpending, struct kernel_sigset_t *, s, + size_t, c) + LSS_INLINE _syscall4(int, rt_sigprocmask, int, h, + const struct kernel_sigset_t*, s, + struct kernel_sigset_t*, o, size_t, c); + LSS_INLINE _syscall2(int, rt_sigsuspend, + const struct kernel_sigset_t*, s, size_t, c); LSS_INLINE _syscall3(int, sched_getaffinity,pid_t, p, unsigned int, l, unsigned long *, m) LSS_INLINE _syscall3(int, sched_setaffinity,pid_t, p, @@ -1363,26 +2128,28 @@ struct statfs64; LSS_INLINE _syscall3(int, setresuid, uid_t, r, uid_t, e, uid_t, s) LSS_INLINE _syscall2(int, setrlimit, int, r, - const struct rlimit*, l) + const struct kernel_rlimit*, l) LSS_INLINE _syscall0(pid_t, setsid) LSS_INLINE _syscall2(int, sigaltstack, const stack_t*, s, const stack_t*, o) LSS_INLINE _syscall2(int, stat, const char*, f, - struct stat*, b) + struct kernel_stat*, b) + LSS_INLINE _syscall2(int, statfs, const char*, f, + struct kernel_statfs*, b) LSS_INLINE _syscall3(ssize_t, write, int, f, const void *, b, size_t, c) LSS_INLINE _syscall3(ssize_t, writev, int, f, - const struct iovec *, v, size_t, c) + const struct kernel_iovec*, v, size_t, c) #if defined(__x86_64__) || \ - (defined(mips) && _MIPS_SIM != _MIPS_SIM_ABI32) + (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) LSS_INLINE _syscall3(int, recvmsg, int, s, - struct msghdr*, m, int, f) + struct kernel_msghdr*, m, int, f) LSS_INLINE _syscall3(int, sendmsg, int, s, - const struct msghdr*, m, int, f) + const struct kernel_msghdr*, m, int, f) LSS_INLINE _syscall6(int, sendto, int, s, const void*, m, size_t, l, int, f, - const struct sockaddr*, a, int, t) + const struct kernel_sockaddr*, a, int, t) LSS_INLINE _syscall2(int, shutdown, int, s, int, h) LSS_INLINE _syscall3(int, socket, int, d, @@ -1397,14 +2164,7 @@ struct statfs64; __off64_t, o) LSS_INLINE _syscall4(int, newfstatat, int, d, const char *, p, - struct stat *, b, int, f) - LSS_INLINE _syscall4(int, rt_sigaction, int, s, - const struct sigaction*, a, - struct sigaction*, o, int, c) - LSS_INLINE _syscall2(int, rt_sigpending, sigset_t*, s, - int, c) - LSS_INLINE _syscall4(int, rt_sigprocmask, int, h, - const sigset_t*, s, sigset_t*, o, int, c); + struct kernel_stat*, b, int, f) LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) { return LSS_NAME(setfsgid)(gid); @@ -1423,25 +2183,42 @@ struct statfs64; } LSS_INLINE int LSS_NAME(sigaction)(int signum, - const struct sigaction *act, - struct sigaction *oldact) { - return LSS_NAME(rt_sigaction)(signum, act, oldact, (_NSIG+6)/8); + const struct kernel_sigaction *act, + struct kernel_sigaction *oldact) { + /* On x86_64, the kernel requires us to always set our own + * SA_RESTORER in order to be able to return from a signal handler. + * This function must have a "magic" signature that the "gdb" + * (and maybe the kernel?) can recognize. + */ + struct kernel_sigaction a; + if (act != NULL) { + a = *act; + a.sa_flags |= SA_RESTORER; + a.sa_restorer = LSS_NAME(restore_rt)(); + } + return LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact, + (KERNEL_NSIG+7)/8); + } + + LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) { + return LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8); } - LSS_INLINE int LSS_NAME(sigpending)(sigset_t *set) { - return LSS_NAME(rt_sigpending)(set, (_NSIG+6)/8); + LSS_INLINE int LSS_NAME(sigprocmask)(int how, + const struct kernel_sigset_t *set, + struct kernel_sigset_t *oldset) { + return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); } - LSS_INLINE int LSS_NAME(sigprocmask)(int how, const sigset_t *set, - sigset_t *oldset) { - return LSS_NAME(rt_sigprocmask)(how, set, oldset, (_NSIG+6)/8); + LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) { + return LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8); } #endif #if defined(__x86_64__) || defined(__ARM_ARCH_3__) || \ - (defined(mips) && _MIPS_SIM != _MIPS_SIM_ABI32) + (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) LSS_INLINE _syscall4(pid_t, wait4, pid_t, p, int*, s, int, o, - struct rusage*, r) + struct kernel_rusage*, r) LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){ return LSS_NAME(wait4)(pid, status, options, 0); @@ -1456,7 +2233,8 @@ struct statfs64; #define __NR__setfsuid32 __NR_setfsuid32 #define __NR__setresgid32 __NR_setresgid32 #define __NR__setresuid32 __NR_setresuid32 - LSS_INLINE _syscall2(int, ugetrlimit, int, r,struct rlimit*,l) + LSS_INLINE _syscall2(int, ugetrlimit, int, r, + struct kernel_rlimit*, l) LSS_INLINE _syscall1(int, _setfsgid32, gid_t, f) LSS_INLINE _syscall1(int, _setfsuid32, uid_t, f) LSS_INLINE _syscall3(int, _setresgid32, gid_t, r, @@ -1520,9 +2298,59 @@ struct statfs64; return rc; } #endif + LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) { + memset(&set->sig, 0, sizeof(set->sig)); + return 0; + } + + LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) { + memset(&set->sig, -1, sizeof(set->sig)); + return 0; + } + + LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set, + int signum) { + if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; + } else { + set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] + |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0]))); + return 0; + } + } + + LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set, + int signum) { + if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; + } else { + set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] + &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0])))); + return 0; + } + } + + LSS_INLINE int LSS_NAME(sigismember)(struct kernel_sigset_t *set, + int signum) { + if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; + } else { + return !!(set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] & + (1UL << ((signum - 1) % (8*sizeof(set->sig[0]))))); + } + } #if defined(__i386__) || defined(__ARM_ARCH_3__) || \ - (defined(mips) && _MIPS_SIM == _MIPS_SIM_ABI32) + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__PPC__) + #define __NR__sigaction __NR_sigaction + #define __NR__sigpending __NR_sigpending + #define __NR__sigprocmask __NR_sigprocmask + #define __NR__sigsuspend __NR_sigsuspend #define __NR__socketcall __NR_socketcall + LSS_INLINE _syscall2(int, fstat64, int, f, + struct kernel_stat64 *, b) LSS_INLINE _syscall5(int, _llseek, uint, fd, ulong, hi, ulong, lo, loff_t *, res, uint, wh) LSS_INLINE _syscall1(void*, mmap, void*, a) @@ -1530,11 +2358,216 @@ struct statfs64; size_t, l, int, p, int, f, int, d, __off64_t, o) - LSS_INLINE _syscall3(int, sigaction, int, s, - const struct sigaction*, a, struct sigaction*, o) - LSS_INLINE _syscall1(int, sigpending, sigset_t*, s) - LSS_INLINE _syscall3(int, sigprocmask, int, h, - const sigset_t*, s, sigset_t*, o) + LSS_INLINE _syscall3(int, _sigaction, int, s, + const struct kernel_old_sigaction*, a, + struct kernel_old_sigaction*, o) + LSS_INLINE _syscall1(int, _sigpending, unsigned long*, s) + LSS_INLINE _syscall3(int, _sigprocmask, int, h, + const unsigned long*, s, + unsigned long*, o) + #ifdef __PPC__ + LSS_INLINE _syscall1(int, _sigsuspend, unsigned long, s) + #else + LSS_INLINE _syscall3(int, _sigsuspend, const void*, a, + int, b, + unsigned long, s) + #endif + LSS_INLINE _syscall2(int, stat64, const char *, p, + struct kernel_stat64 *, b) + + LSS_INLINE int LSS_NAME(sigaction)(int signum, + const struct kernel_sigaction *act, + struct kernel_sigaction *oldact) { + int old_errno = LSS_ERRNO; + int rc; + struct kernel_sigaction a; + if (act != NULL) { + a = *act; + #ifdef __i386__ + /* On i386, the kernel requires us to always set our own + * SA_RESTORER when using realtime signals. Otherwise, it does not + * know how to return from a signal handler. This function must have + * a "magic" signature that the "gdb" (and maybe the kernel?) can + * recognize. + * Apparently, a SA_RESTORER is implicitly set by the kernel, when + * using non-realtime signals. + * + * TODO: Test whether ARM needs a restorer + */ + a.sa_flags |= SA_RESTORER; + a.sa_restorer = LSS_NAME(restore_rt)(); + #endif + } + rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact, + (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa; + if (!act) { + ptr_a = NULL; + } else { + oa.sa_handler_ = act->sa_handler_; + memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask)); + #ifndef __mips__ + oa.sa_restorer = act->sa_restorer; + #endif + oa.sa_flags = act->sa_flags; + } + if (!oldact) { + ptr_oa = NULL; + } + LSS_ERRNO = old_errno; + rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa); + if (rc == 0 && oldact) { + if (act) { + memcpy(oldact, act, sizeof(*act)); + } else { + memset(oldact, 0, sizeof(*oldact)); + } + oldact->sa_handler_ = ptr_oa->sa_handler_; + oldact->sa_flags = ptr_oa->sa_flags; + memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask)); + #ifndef __mips__ + oldact->sa_restorer = ptr_oa->sa_restorer; + #endif + } + } + return rc; + } + + LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) { + int old_errno = LSS_ERRNO; + int rc = LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + LSS_ERRNO = old_errno; + LSS_NAME(sigemptyset)(set); + rc = LSS_NAME(_sigpending)(&set->sig[0]); + } + return rc; + } + + LSS_INLINE int LSS_NAME(sigprocmask)(int how, + const struct kernel_sigset_t *set, + struct kernel_sigset_t *oldset) { + int olderrno = LSS_ERRNO; + int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + LSS_ERRNO = olderrno; + if (oldset) { + LSS_NAME(sigemptyset)(oldset); + } + rc = LSS_NAME(_sigprocmask)(how, + set ? &set->sig[0] : NULL, + oldset ? &oldset->sig[0] : NULL); + } + return rc; + } + + LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) { + int olderrno = LSS_ERRNO; + int rc = LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + LSS_ERRNO = olderrno; + rc = LSS_NAME(_sigsuspend)( + #ifndef __PPC__ + set, 0, + #endif + set->sig[0]); + } + return rc; + } + #endif + #if defined(__PPC__) + #undef LSS_SC_LOADARGS_0 + #define LSS_SC_LOADARGS_0(dummy...) + #undef LSS_SC_LOADARGS_1 + #define LSS_SC_LOADARGS_1(arg1) \ + __sc_4 = (unsigned long) (arg1) + #undef LSS_SC_LOADARGS_2 + #define LSS_SC_LOADARGS_2(arg1, arg2) \ + LSS_SC_LOADARGS_1(arg1); \ + __sc_5 = (unsigned long) (arg2) + #undef LSS_SC_LOADARGS_3 + #define LSS_SC_LOADARGS_3(arg1, arg2, arg3) \ + LSS_SC_LOADARGS_2(arg1, arg2); \ + __sc_6 = (unsigned long) (arg3) + #undef LSS_SC_LOADARGS_4 + #define LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4) \ + LSS_SC_LOADARGS_3(arg1, arg2, arg3); \ + __sc_7 = (unsigned long) (arg4) + #undef LSS_SC_LOADARGS_5 + #define LSS_SC_LOADARGS_5(arg1, arg2, arg3, arg4, arg5) \ + LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4); \ + __sc_8 = (unsigned long) (arg5) + #undef LSS_SC_BODY + #define LSS_SC_BODY(nr, type, opt, args...) \ + long __sc_ret, __sc_err; \ + { \ + register unsigned long __sc_0 __asm__ ("r0") = __NR_socketcall; \ + register unsigned long __sc_3 __asm__ ("r3") = opt; \ + register unsigned long __sc_4 __asm__ ("r4"); \ + register unsigned long __sc_5 __asm__ ("r5"); \ + register unsigned long __sc_6 __asm__ ("r6"); \ + register unsigned long __sc_7 __asm__ ("r7"); \ + register unsigned long __sc_8 __asm__ ("r8"); \ + LSS_SC_LOADARGS_##nr(args); \ + __asm__ __volatile__ \ + ("stwu 1, -48(1)\n\t" \ + "stw 4, 20(1)\n\t" \ + "stw 5, 24(1)\n\t" \ + "stw 6, 28(1)\n\t" \ + "stw 7, 32(1)\n\t" \ + "stw 8, 36(1)\n\t" \ + "addi 4, 1, 20\n\t" \ + "sc\n\t" \ + "mfcr %0" \ + : "=&r" (__sc_0), \ + "=&r" (__sc_3), "=&r" (__sc_4), \ + "=&r" (__sc_5), "=&r" (__sc_6), \ + "=&r" (__sc_7), "=&r" (__sc_8) \ + : LSS_ASMINPUT_##nr \ + : "cr0", "ctr", "memory"); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + LSS_RETURN(type, __sc_ret, __sc_err) + + LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg, + int flags){ + LSS_SC_BODY(3, ssize_t, 17, s, msg, flags); + } + + LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s, + const struct kernel_msghdr *msg, + int flags) { + LSS_SC_BODY(3, ssize_t, 16, s, msg, flags); + } + + // TODO(csilvers): why is this ifdef'ed out? +#if 0 + LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len, + int flags, + const struct kernel_sockaddr *to, + unsigned int tolen) { + LSS_BODY(6, ssize_t, 11, s, buf, len, flags, to, tolen); + } +#endif + + LSS_INLINE int LSS_NAME(shutdown)(int s, int how) { + LSS_SC_BODY(2, int, 13, s, how); + } + + LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { + LSS_SC_BODY(3, int, 1, domain, type, protocol); + } + + LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol, + int sv[2]) { + LSS_SC_BODY(4, int, 8, d, type, protocol, sv); + } + #endif + #if defined(__i386__) || defined(__ARM_ARCH_3__) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) + #define __NR__socketcall __NR_socketcall LSS_INLINE _syscall2(int, _socketcall, int, c, va_list, a) @@ -1547,17 +2580,20 @@ struct statfs64; return rc; } - LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct msghdr*msg,int flags){ + LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg, + int flags){ return (ssize_t)LSS_NAME(socketcall)(17, s, msg, flags); } - LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s, const struct msghdr *msg, + LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s, + const struct kernel_msghdr *msg, int flags) { return (ssize_t)LSS_NAME(socketcall)(16, s, msg, flags); } LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len, - int flags, const struct sockaddr*to, + int flags, + const struct kernel_sockaddr *to, unsigned int tolen) { return (ssize_t)LSS_NAME(socketcall)(11, s, buf, len, flags, to, tolen); } @@ -1575,17 +2611,17 @@ struct statfs64; return LSS_NAME(socketcall)(8, d, type, protocol, sv); } #endif - #if defined(__i386__) + #if defined(__i386__) || defined(__PPC__) LSS_INLINE _syscall4(int, fstatat64, int, d, const char *, p, - struct stat64 *, b, int, f) + struct kernel_stat64 *, b, int, f) #endif - #if defined(__i386__) || \ - (defined(mips) && _MIPS_SIM == _MIPS_SIM_ABI32) + #if defined(__i386__) || defined(__PPC__) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) LSS_INLINE _syscall3(pid_t, waitpid, pid_t, p, int*, s, int, o) #endif - #if defined(mips) + #if defined(__mips__) /* sys_pipe() on MIPS has non-standard calling conventions, as it returns * both file handles through CPU registers. */ @@ -1610,17 +2646,27 @@ struct statfs64; #else LSS_INLINE _syscall1(int, pipe, int *, p) #endif + /* TODO(csilvers): see if ppc can/should support this as well */ #if defined(__i386__) || defined(__ARM_ARCH_3__) || \ - (defined(mips) && _MIPS_SIM != _MIPS_SIM_ABI64) - LSS_INLINE _syscall2(int, statfs64, const char*, p, - struct statfs64 *, b) - LSS_INLINE _syscall2(int, fstatfs64, int, f, - struct statfs64 *, b) + (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64) + #define __NR__statfs64 __NR_statfs64 + #define __NR__fstatfs64 __NR_fstatfs64 + LSS_INLINE _syscall3(int, _statfs64, const char*, p, + size_t, s,struct kernel_statfs64*, b) + LSS_INLINE _syscall3(int, _fstatfs64, int, f, + size_t, s,struct kernel_statfs64*, b) + LSS_INLINE int LSS_NAME(statfs64)(const char *p, + struct kernel_statfs64 *b) { + return LSS_NAME(_statfs64)(p, sizeof(*b), b); + } + LSS_INLINE int LSS_NAME(fstatfs64)(int f,struct kernel_statfs64 *b) { + return LSS_NAME(_fstatfs64)(f, sizeof(*b), b); + } #endif - LSS_INLINE int LSS_NAME(execv)(const char *path, const char * const argv[]) { + LSS_INLINE int LSS_NAME(execv)(const char *path, const char *const argv[]) { extern char **environ; - return LSS_NAME(execve)(path, argv, (const char * const *)environ); + return LSS_NAME(execve)(path, argv, (const char *const *)environ); } LSS_INLINE pid_t LSS_NAME(gettid)() { @@ -1671,9 +2717,9 @@ struct statfs64; extern int __getpagesize(void); switch (name) { case _SC_OPEN_MAX: { - unsigned long limit[2]; - return LSS_NAME(getrlimit)(RLIMIT_NOFILE, (struct rlimit *)limit)<0 - ? 8192 : limit[0]; + struct kernel_rlimit limit; + return LSS_NAME(getrlimit)(RLIMIT_NOFILE, &limit) < 0 + ? 8192 : limit.rlim_cur; } case _SC_PAGESIZE: return __getpagesize(); @@ -1682,29 +2728,46 @@ struct statfs64; return -1; } } - #if defined(__x86_64__) || \ - (defined(mips) && _MIPS_SIM == _MIPS_SIM_ABI64) - LSS_INLINE _syscall3(int, readahead, int, fd, loff_t, offset, - unsigned, len) - #elif defined(mips) || defined(__ARM_ARCH_3__) - LSS_INLINE _syscall4(int, _readahead32, int, fd, unsigned, offset_lo, - unsigned, offset_hi, unsigned, len); - LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t offset, int len) { - return LSS_NAME(_readahead32)(fd, - #if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN)) || \ - (defined(__LITTLE_ENDIAN__)) - (unsigned)offset, (unsigned)(offset >> 32), - #elif (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || \ - (defined(__BIG_ENDIAN__)) - (unsigned)(offset >> 32), (unsigned)offset, - #else - #error "unknown endinness, don't know how to pass syscall arguments" - #endif - (unsigned)len); + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI64) + LSS_INLINE _syscall4(ssize_t, pread64, int, f, + void *, b, size_t, c, loff_t, o) + LSS_INLINE _syscall4(ssize_t, pwrite64, int, f, + const void *, b, size_t, c, loff_t, o) + LSS_INLINE _syscall3(int, readahead, int, f, + loff_t, o, unsigned, c) + #else + #define __NR__pread64 __NR_pread64 + #define __NR__pwrite64 __NR_pwrite64 + #define __NR__readahead __NR_readahead + LSS_INLINE _syscall5(ssize_t, _pread64, int, f, + void *, b, size_t, c, unsigned, o1, + unsigned, o2) + LSS_INLINE _syscall5(ssize_t, _pwrite64, int, f, + const void *, b, size_t, c, unsigned, o1, + long, o2) + LSS_INLINE _syscall4(int, _readahead, int, f, + unsigned, o1, unsigned, o2, size_t, c); + /* We force 64bit-wide parameters onto the stack, then access each + * 32-bit component individually. This guarantees that we build the + * correct parameters independent of the native byte-order of the + * underlying architecture. + */ + LSS_INLINE ssize_t LSS_NAME(pread64)(int fd, void *buf, size_t count, + loff_t off) { + union { loff_t off; unsigned arg[2]; } o = { off }; + return LSS_NAME(_pread64)(fd, buf, count, o.arg[0], o.arg[1]); + } + LSS_INLINE ssize_t LSS_NAME(pwrite64)(int fd, const void *buf, + size_t count, loff_t off) { + union { loff_t off; unsigned arg[2]; } o = { off }; + return LSS_NAME(_pwrite64)(fd, buf, count, o.arg[0], o.arg[1]); + } + LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t off, int len) { + union { loff_t off; unsigned arg[2]; } o = { off }; + return LSS_NAME(_readahead)(fd, o.arg[0], o.arg[1], len); } #endif - #endif #if defined(__cplusplus) && !defined(SYS_CPLUSPLUS) diff --git a/src/base/linuxthreads.c b/src/base/linuxthreads.c index 2f9d547..e67ac26 100644 --- a/src/base/linuxthreads.c +++ b/src/base/linuxthreads.c @@ -249,15 +249,15 @@ struct ListerParams { static void ListerThread(struct ListerParams *args) { - int found_parent = 0; - pid_t clone_pid = sys_gettid(), ppid = sys_getppid(); - char proc_self_task[80], marker_name[48], *marker_path; - const char *proc_paths[3]; - const char *const *proc_path = proc_paths; - int proc = -1, marker = -1, num_threads = 0; - int max_threads = 0, sig; - struct stat marker_sb, proc_sb; - stack_t altstack; + int found_parent = 0; + pid_t clone_pid = sys_gettid(), ppid = sys_getppid(); + char proc_self_task[80], marker_name[48], *marker_path; + const char *proc_paths[3]; + const char *const *proc_path = proc_paths; + int proc = -1, marker = -1, num_threads = 0; + int max_threads = 0, sig; + struct kernel_stat marker_sb, proc_sb; + stack_t altstack; /* Create "marker" that we can use to detect threads sharing the same * address space and the same file handles. By setting the FD_CLOEXEC flag @@ -312,12 +312,12 @@ static void ListerThread(struct ListerParams *args) { sig_marker = marker; sig_proc = -1; for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) { - struct sigaction sa; + struct kernel_sigaction sa; memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = SignalHandler; - sigfillset(&sa.sa_mask); - sa.sa_flags = SA_ONSTACK|SA_SIGINFO|SA_RESETHAND; - sys_sigaction(sync_signals[sig], &sa, (struct sigaction *)NULL); + sa.sa_sigaction_ = SignalHandler; + sys_sigfillset(&sa.sa_mask); + sa.sa_flags = SA_ONSTACK|SA_SIGINFO|SA_RESETHAND; + sys_sigaction(sync_signals[sig], &sa, (struct kernel_sigaction *)NULL); } /* Read process directories in /proc/... */ @@ -356,9 +356,9 @@ static void ListerThread(struct ListerParams *args) { sig_num_threads = num_threads; sig_pids = pids; for (;;) { - struct dirent *entry; + struct kernel_dirent *entry; char buf[4096]; - ssize_t nbytes = sys_getdents(proc, (struct dirent *)buf, + ssize_t nbytes = sys_getdents(proc, (struct kernel_dirent *)buf, sizeof(buf)); if (nbytes < 0) goto failure; @@ -375,9 +375,9 @@ static void ListerThread(struct ListerParams *args) { } break; } - for (entry = (struct dirent *)buf; - entry < (struct dirent *)&buf[nbytes]; - entry = (struct dirent *)((char *)entry + entry->d_reclen)) { + for (entry = (struct kernel_dirent *)buf; + entry < (struct kernel_dirent *)&buf[nbytes]; + entry = (struct kernel_dirent *)((char *)entry+entry->d_reclen)) { if (entry->d_ino != 0) { const char *ptr = entry->d_name; pid_t pid; @@ -395,7 +395,7 @@ static void ListerThread(struct ListerParams *args) { /* Attach (and suspend) all threads */ if (pid && pid != clone_pid) { - struct stat tmp_sb; + struct kernel_stat tmp_sb; char fname[entry->d_reclen + 48]; strcat(strcat(strcpy(fname, "/proc/"), entry->d_name), marker_path); @@ -536,11 +536,11 @@ static void ListerThread(struct ListerParams *args) { */ int ListAllProcessThreads(void *parameter, ListAllProcessThreadsCallBack callback, ...) { - char altstack_mem[ALT_STACKSIZE]; - struct ListerParams args; - pid_t clone_pid; - int dumpable = 1, sig; - sigset_t sig_blocked, sig_old; + char altstack_mem[ALT_STACKSIZE]; + struct ListerParams args; + pid_t clone_pid; + int dumpable = 1, sig; + struct kernel_sigset_t sig_blocked, sig_old; va_start(args.ap, callback); @@ -573,9 +573,9 @@ int ListAllProcessThreads(void *parameter, /* Before cloning the thread lister, block all asynchronous signals, as we */ /* are not prepared to handle them. */ - sigfillset(&sig_blocked); + sys_sigfillset(&sig_blocked); for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) { - sigdelset(&sig_blocked, sync_signals[sig]); + sys_sigdelset(&sig_blocked, sync_signals[sig]); } if (sys_sigprocmask(SIG_BLOCK, &sig_blocked, &sig_old)) { args.err = errno; @@ -606,7 +606,7 @@ int ListAllProcessThreads(void *parameter, clone_pid = local_clone((int (*)(void *))ListerThread, &args); clone_errno = errno; - sys0_sigprocmask(SIG_SETMASK, &sig_old, &sig_old); + sys_sigprocmask(SIG_SETMASK, &sig_old, &sig_old); if (clone_pid >= 0) { int status, rc; diff --git a/src/base/linuxthreads.h b/src/base/linuxthreads.h index 829fd79..5c318fe 100644 --- a/src/base/linuxthreads.h +++ b/src/base/linuxthreads.h @@ -41,7 +41,7 @@ * related platforms should not be difficult. */ #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \ - defined(mips)) && defined(__linux) + defined(__mips__) || defined(__PPC__)) && defined(__linux) /* Define the THREADS symbol to make sure that there is exactly one core dumper * built into the library. diff --git a/src/base/logging.h b/src/base/logging.h index 95035b3..3ecfea2 100644 --- a/src/base/logging.h +++ b/src/base/logging.h @@ -160,7 +160,7 @@ enum { DEBUG_MODE = 1 }; #ifdef ERROR #undef ERROR // may conflict with ERROR macro on windows #endif -enum {INFO = -1, WARNING = -2, ERROR = -3, FATAL = -4}; +enum LogSeverity {INFO = -1, WARNING = -2, ERROR = -3, FATAL = -4}; // NOTE: we add a newline to the end of the output if it's not there already inline void LogPrintf(int severity, const char* pat, va_list ap) { diff --git a/src/base/mutex.h b/src/base/simple_mutex.h index 6eeb995..2474b57 100644 --- a/src/base/mutex.h +++ b/src/base/simple_mutex.h @@ -46,8 +46,8 @@ * mode. */ -#ifndef GOOGLE_MUTEX_H__ -#define GOOGLE_MUTEX_H__ +#ifndef GOOGLE_SIMPLE_MUTEX_H__ +#define GOOGLE_SIMPLE_MUTEX_H__ #include "config.h" // to figure out pthreads support @@ -114,19 +114,24 @@ class Mutex { // Now the implementation of Mutex for various systems #if defined(NO_THREADS) -// In debug mode, we'll assert some invariants: we don't unlock if we -// didn't lock first, the lock is not held when Lock() is called -// (since we're not re-entrant), etc. In non-debug mode, we do -// nothing, for efficiency. That's why we do everything in an assert. +// When we don't have threads, we can be either reading or writing, +// but not both. We can have lots of readers at once (in no-threads +// mode, that's most likely to happen in recursive function calls), +// but only one writer. We represent this by having mutex_ be -1 when +// writing and a number > 0 when reading (and 0 when no lock is held). +// +// In debug mode, we assert these invariants, while in non-debug mode +// we do nothing, for efficiency. That's why everything is in an +// assert. #include <assert.h> -Mutex::Mutex() : mutex_(0) { } // mutex_ counts number of current Lock()s +Mutex::Mutex() : mutex_(0) { } Mutex::~Mutex() { assert(mutex_ == 0); } -void Mutex::Lock() { assert(mutex_++ == 0); } -void Mutex::Unlock() { assert(mutex_-- == 1); } +void Mutex::Lock() { assert(--mutex_ == -1); } +void Mutex::Unlock() { assert(mutex_++ == -1); } bool Mutex::TryLock() { if (mutex_) return false; Lock(); return true; } -void Mutex::ReaderLock() { Lock(); } -void Mutex::ReaderUnlock() { Unlock(); } +void Mutex::ReaderLock() { assert(++mutex_ > 0); } +void Mutex::ReaderUnlock() { assert(mutex_-- > 0); } #elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK) @@ -212,4 +217,4 @@ class WriterMutexLock { #define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name) #define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name) -#endif /* #define GOOGLE_MUTEX_H__ */ +#endif /* #define GOOGLE_SIMPLE_MUTEX_H__ */ diff --git a/src/base/spinlock.cc b/src/base/spinlock.cc index 12761b1..2e601eb 100644 --- a/src/base/spinlock.cc +++ b/src/base/spinlock.cc @@ -37,61 +37,19 @@ #ifdef HAVE_UNISTD_H #include <unistd.h> /* For nanosleep() on Windows, read() */ #endif -#if defined(__MACH__) && defined(__APPLE__) -#include <sys/types.h> -#include <sys/sysctl.h> /* how we figure out numcpu's on OS X */ -#endif #include <fcntl.h> /* for open(), O_RDONLY */ #include <string.h> /* for strncmp */ #include <errno.h> #include "base/spinlock.h" +#include "base/sysinfo.h" /* for NumCPUs() */ -static int adaptive_spin_count = 0; -static int num_cpus = -1; - -// It's important this not call malloc! -- it is called at global-construct -// time, before we've set up all our proper malloc hooks and such. -static int NumCPUs() { - if (num_cpus > 0) - return num_cpus; // value is cached +// We can do contention-profiling of SpinLocks, but the code is in +// mutex.cc, which is not always linked in with spinlock. Hence we +// provide this weak definition, which is used if mutex.cc isn't linked in. +ATTRIBUTE_WEAK extern void SubmitSpinLockProfileData(const void *, int64); +void SubmitSpinLockProfileData(const void *, int64) {} -#if defined(__MACH__) && defined(__APPLE__) - int cpus; - size_t size = sizeof(cpus); - int numcpus_name[] = { CTL_HW, HW_NCPU }; - if (::sysctl(numcpus_name, arraysize(numcpus_name), &cpus, &size, 0, 0) == 0 - && (size == sizeof(cpus))) - num_cpus = cpus; - else - num_cpus = 1; // conservative assumption - return num_cpus; -#else /* hope the information is in /proc */ - const char* pname = "/proc/cpuinfo"; - int fd = open(pname, O_RDONLY); - if (fd == -1) { - num_cpus = 1; // conservative assumption; TODO: do better - return num_cpus; - } - char line[1024]; - int chars_read; - num_cpus = 0; - do { // a line at a time - chars_read = 0; - char* linepos = line; - while (linepos - line < sizeof(line)-1 && - (chars_read=read(fd, linepos, 1)) == 1 && - *linepos != '\n') // read one line - linepos++; - *linepos = '\0'; // terminate the line at the \n - if (strncmp(line, "processor ", sizeof("processor ")-1) == 0) - num_cpus++; - } while (chars_read > 0); // stop when we get to EOF - close(fd); - if (num_cpus == 0) - num_cpus = 1; // conservative assumption - return num_cpus; -#endif -} +static int adaptive_spin_count = 0; struct SpinLock_InitHelper { SpinLock_InitHelper() { diff --git a/src/base/spinlock.h b/src/base/spinlock.h index b3c9596..0ad8587 100644 --- a/src/base/spinlock.h +++ b/src/base/spinlock.h @@ -71,8 +71,24 @@ class SpinLock { } } + inline bool TryLock() { + return (Acquire_CompareAndSwap(&lockword_, 0, 1) == 0); + } + inline void Unlock() { + // This is defined in mutex.cc. + extern void SubmitSpinLockProfileData(const void *, int64); + + int64 wait_timestamp = static_cast<uint32>(lockword_); + Release_Store(&lockword_, 0); + // Collect contention profile info if this lock was contended. + // The lockword_ value indicates when the waiter started waiting + if (wait_timestamp != 1) { + // Subtract one from wait_timestamp as antidote to "now |= 1;" + // in SlowLock(). + SubmitSpinLockProfileData(this, wait_timestamp - 1); + } } // Report if we think the lock can be held by this thread. @@ -83,6 +99,17 @@ class SpinLock { return lockword_ != 0; } + // The timestamp for contention lock profiling must fit into 31 bits. + // as lockword_ is 32 bits and we loose an additional low-order bit due + // to the statement "now |= 1" in SlowLock(). + // To select 31 bits from the 64-bit cycle counter, we shift right by + // PROFILE_TIMESTAMP_SHIFT = 7. + // Using these 31 bits, we reduce granularity of time measurement to + // 256 cycles, and will loose track of wait time for waits greater than + // 109 seconds on a 5 GHz machine, longer for faster clock cycles. + // Waits this long should be very rare. + enum { PROFILE_TIMESTAMP_SHIFT = 7 }; + private: // Lock-state: 0 means unlocked, 1 means locked volatile AtomicWord lockword_; diff --git a/src/base/sysinfo.cc b/src/base/sysinfo.cc index 2d7a593..df0452a 100644 --- a/src/base/sysinfo.cc +++ b/src/base/sysinfo.cc @@ -42,12 +42,20 @@ #if defined __MACH__ // Mac OS X, almost certainly #include <mach-o/dyld.h> // for iterating over dll's in ProcMapsIter #include <mach-o/loader.h> // for iterating over dll's in ProcMapsIter +#include <sys/types.h> +#include <sys/sysctl.h> // how we figure out numcpu's on OS X +#elif defined __FreeBSD__ +#include <sys/sysctl.h> #elif defined __sun__ // Solaris #include <procfs.h> // for, e.g., prmap_t +#elif defined _MSC_VER // Windows +#include <process.h> // for getpid() (actually, _getpid()) +#include <shlwapi.h> // for SHGetValueA() #endif #include "base/sysinfo.h" #include "base/commandlineflags.h" #include "base/logging.h" +#include "base/cycleclock.h" #if defined(WIN32) && defined(MODULEENTRY32) // In a change from the usual W-A pattern, there is no A variant of @@ -81,6 +89,12 @@ # define safeclose(fd) close(fd) #endif +// ---------------------------------------------------------------------- +// GetenvBeforeMain() +// GetUniquePathFromEnv() +// Some non-trivial getenv-related functions. +// ---------------------------------------------------------------------- + const char* GetenvBeforeMain(const char* name) { // static is ok because this function should only be called before // main(), when we're single-threaded. @@ -111,6 +125,244 @@ const char* GetenvBeforeMain(const char* name) { return NULL; // env var never found } +// This takes as an argument an environment-variable name (like +// CPUPROFILE) whose value is supposed to be a file-path, and sets +// path to that path, and returns true. If the env var doesn't exist, +// or is the empty string, leave path unchanged and returns false. +// The reason this is non-trivial is that this function handles munged +// pathnames. Here's why: +// +// If we're a child process of the 'main' process, we can't just use +// getenv("CPUPROFILE") -- the parent process will be using that path. +// Instead we append our pid to the pathname. How do we tell if we're a +// child process? Ideally we'd set an environment variable that all +// our children would inherit. But -- and this is seemingly a bug in +// gcc -- if you do a setenv() in a shared libarary in a global +// constructor, the environment setting is lost by the time main() is +// called. The only safe thing we can do in such a situation is to +// modify the existing envvar. So we do a hack: in the parent, we set +// the high bit of the 1st char of CPUPROFILE. In the child, we +// notice the high bit is set and append the pid(). This works +// assuming cpuprofile filenames don't normally have the high bit set +// in their first character! If that assumption is violated, we'll +// still get a profile, but one with an unexpected name. +// TODO(csilvers): set an envvar instead when we can do it reliably. +bool GetUniquePathFromEnv(const char* env_name, char* path) { + char* envval = getenv(env_name); + if (envval == NULL || *envval == '\0') + return false; + if (envval[0] & 128) { // high bit is set + snprintf(path, PATH_MAX, "%c%s_%u", // add pid and clear high bit + envval[0] & 127, envval+1, (unsigned int)(getpid())); + } else { + snprintf(path, PATH_MAX, "%s", envval); + envval[0] |= 128; // set high bit for kids to see + } + return true; +} + +// ---------------------------------------------------------------------- +// CyclesPerSecond() +// NumCPUs() +// It's important this not call malloc! -- they may be called at +// global-construct time, before we've set up all our proper malloc +// hooks and such. +// ---------------------------------------------------------------------- + +static double cpuinfo_cycles_per_second = 1.0; // 0.0 might be dangerous +static int cpuinfo_num_cpus = 1; // Conservative guess + +static void SleepForMilliseconds(int milliseconds) { +#ifdef WIN32 + _sleep(milliseconds); // Windows's _sleep takes milliseconds argument +#else + // Sleep for a few milliseconds + struct timespec sleep_time; + sleep_time.tv_sec = milliseconds / 1000; + sleep_time.tv_nsec = (milliseconds % 1000) * 1000000; + while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) + ; // Ignore signals and wait for the full interval to elapse. +#endif +} + +// Helper function estimates cycles/sec by observing cycles elapsed during +// sleep(). Using small sleep time decreases accuracy significantly. +static int64 EstimateCyclesPerSecond(const int estimate_time_ms) { + assert(estimate_time_ms > 0); + if (estimate_time_ms <= 0) + return 1; + double multiplier = 1000.0 / (double)estimate_time_ms; // scale by this much + + const int64 start_ticks = CycleClock::Now(); + SleepForMilliseconds(estimate_time_ms); + const int64 guess = int64(multiplier * (CycleClock::Now() - start_ticks)); + return guess; +} + +// WARNING: logging calls back to InitializeSystemInfo() so it must +// not invoke any logging code. Also, InitializeSystemInfo() can be +// called before main() -- in fact it *must* be since already_called +// isn't protected -- before malloc hooks are properly set up, so +// we make an effort not to call any routines which might allocate +// memory. + +static void InitializeSystemInfo() { + static bool already_called = false; // safe if we run before threads + if (already_called) return; + already_called = true; + + // I put in a never-called reference to EstimateCyclesPerSecond() here + // to silence the compiler for OS's that don't need it + if (0) EstimateCyclesPerSecond(0); + +#if defined __linux__ + char line[1024]; + char* err; + + // If CPU scaling is in effect, we want to use the *maximum* frequency, + // not whatever CPU speed some random processor happens to be using now. + bool saw_mhz = false; + const char* pname0 = "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq"; + int fd0 = open(pname0, O_RDONLY); + if (fd0 != -1) { + memset(line, '\0', sizeof(line)); + read(fd0, line, sizeof(line)); + const int max_freq = strtol(line, &err, 10); + if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { + // The value is in kHz. For example, on a 2GHz machine, the file + // contains the value "2000000". Historically this file contained no + // newline, but at some point the kernel started appending a newline. + cpuinfo_cycles_per_second = max_freq * 1000.0; + saw_mhz = true; + } + close(fd0); + } + + // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq. + const char* pname = "/proc/cpuinfo"; + int fd = open(pname, O_RDONLY); + if (fd == -1) { + perror(pname); + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); + return; // TODO: use generic tester instead? + } + + double bogo_clock = 1.0; + int num_cpus = 0; + line[0] = line[1] = '\0'; + int chars_read = 0; + do { // we'll exit when the last read didn't read anything + // Move the next line to the beginning of the buffer + const int oldlinelen = strlen(line); + if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line + line[0] = '\0'; + else // still other lines left to save + memmove(line, line + oldlinelen+1, sizeof(line) - (oldlinelen+1)); + // Terminate the new line, reading more if we can't find the newline + char* newline = strchr(line, '\n'); + if (newline == NULL) { + const int linelen = strlen(line); + const int bytes_to_read = sizeof(line)-1 - linelen; + assert(bytes_to_read > 0); // because the memmove recovered >=1 bytes + chars_read = read(fd, line + linelen, bytes_to_read); + line[linelen + chars_read] = '\0'; + newline = strchr(line, '\n'); + } + if (newline != NULL) + *newline = '\0'; + + if (!saw_mhz && strncmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) { + const char* freqstr = strchr(line, ':'); + if (freqstr) { + cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0; + if (freqstr[1] != '\0' && *err == '\0') + saw_mhz = true; + } + } else if (strncmp(line, "bogomips", sizeof("bogomips")-1) == 0) { + const char* freqstr = strchr(line, ':'); + if (freqstr) + bogo_clock = strtod(freqstr+1, &err) * 1000000.0; + if (freqstr == NULL || freqstr[1] == '\0' || *err != '\0') + bogo_clock = 1.0; + } else if (strncmp(line, "processor", sizeof("processor")-1) == 0) { + num_cpus++; // count up every time we see an "processor :" entry + } + } while (chars_read > 0); + close(fd); + + if (!saw_mhz) { + // If we didn't find anything better, we'll use bogomips, but + // we're not happy about it. + cpuinfo_cycles_per_second = bogo_clock; + } + if (cpuinfo_cycles_per_second == 0.0) { + cpuinfo_cycles_per_second = 1.0; // maybe unnecessary, but safe + } + if (num_cpus > 0) { + cpuinfo_num_cpus = num_cpus; + } + +#elif defined __FreeBSD__ + // For this sysctl to work, the machine must be configured without + // SMP, APIC, or APM support. + unsigned int hz = 0; + size_t sz = sizeof(hz); + const char *sysctl_path = "machdep.tsc_freq"; + if ( sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0 ) { + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); + fprintf(stderr, "Unable to determine clock rate from sysctl: %s\n", + sysctl_path); + } else { + cpuinfo_cycles_per_second = hz; + } + // TODO(csilvers): also figure out cpuinfo_num_cpus + +#elif defined WIN32 +# pragma comment(lib, "shlwapi.lib") // for SHGetValue() + // In NT, read MHz from the registry. If we fail to do so or we're in win9x + // then make a crude estimate. + OSVERSIONINFO os; + os.dwOSVersionInfoSize = sizeof(os); + DWORD data, data_size = sizeof(data); + if (GetVersionEx(&os) && + os.dwPlatformId == VER_PLATFORM_WIN32_NT && + SUCCEEDED(SHGetValueA(HKEY_LOCAL_MACHINE, + "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", + "~MHz", NULL, &data, &data_size))) + cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz + else + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500? + // TODO(csilvers): also figure out cpuinfo_num_cpus + +#elif defined(__MACH__) && defined(__APPLE__) + // TODO(csilvers): can we do better than this? + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); + + int num_cpus = 0; + size_t size = sizeof(num_cpus); + int numcpus_name[] = { CTL_HW, HW_NCPU }; + if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, 0, 0) + == 0 + && (size == sizeof(num_cpus))) + cpuinfo_num_cpus = num_cpus; + +#else + // Generic cycles per second counter + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); +#endif +} + +double CyclesPerSecond(void) { + InitializeSystemInfo(); + return cpuinfo_cycles_per_second; +} + +int NumCPUs(void) { + InitializeSystemInfo(); + return cpuinfo_num_cpus; +} + +// ---------------------------------------------------------------------- #if defined __linux__ || defined __FreeBSD__ || defined __sun__ static void ConstructFilename(const char* spec, pid_t pid, diff --git a/src/base/sysinfo.h b/src/base/sysinfo.h index 35f31ad..e22736c 100644 --- a/src/base/sysinfo.h +++ b/src/base/sysinfo.h @@ -55,7 +55,18 @@ // Note that /proc only has the environment at the time the application was // started, so this routine ignores setenv() calls/etc. Also note it only // reads the first 16K of the environment. -const char* GetenvBeforeMain(const char* name); +extern const char* GetenvBeforeMain(const char* name); + +// This takes as an argument an environment-variable name (like +// CPUPROFILE) whose value is supposed to be a file-path, and sets +// path to that path, and returns true. Non-trivial for surprising +// reasons, as documented in sysinfo.cc. path must have space PATH_MAX. +extern bool GetUniquePathFromEnv(const char* env_name, char* path); + +extern int NumCPUs(); + +// processor cycles per second of each processor +extern double CyclesPerSecond(void); // A ProcMapsIterator abstracts access to /proc/maps for a given diff --git a/src/google/heap-checker.h b/src/google/heap-checker.h index 4cd077a..3ed6587 100644 --- a/src/google/heap-checker.h +++ b/src/google/heap-checker.h @@ -258,18 +258,18 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker { static bool HaveDisabledChecksUp(int stack_frames); static bool HaveDisabledChecksAt(const void* address); - // Ignore an object located at 'ptr' - // (as well as all heap objects (transitively) referenced from it) + // Ignore an object located at 'ptr' (can go at the start or into the object) + // as well as all heap objects (transitively) referenced from it // for the purposes of heap leak checking. // If 'ptr' does not point to an active allocated object // at the time of this call, it is ignored; // but if it does, the object must not get deleted from the heap later on; - // it must also be not already ignored at the time of this call. + // it must also be not already ignored at the time of this call. static void IgnoreObject(const void* ptr); // Undo what an earlier IgnoreObject() call promised and asked to do. - // At the time of this call 'ptr' must point to an active allocated object - // which was previously registered with IgnoreObject(). + // At the time of this call 'ptr' must point at or inside of an active + // allocated object which was previously registered with IgnoreObject(). static void UnIgnoreObject(const void* ptr); public: // Initializations; to be called from main() only. @@ -303,8 +303,6 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker { bool DoNoLeaksOnce(CheckType check_type, CheckFullness fullness, ReportMode report_mode); - // Helper for IgnoreObject - static void IgnoreObjectLocked(const void* ptr); // Helper for DisableChecksAt static void DisableChecksAtLocked(const void* address); // Helper for DisableChecksIn @@ -345,8 +343,6 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker { // in a debug message to describe what kind of live object sources // are being used. static void IgnoreLiveObjectsLocked(const char* name, const char* name2); - // Heap profile object filtering callback to filter out live objects. - static bool HeapProfileFilter(const void* ptr, size_t size); // Runs REGISTER_HEAPCHECK_CLEANUP cleanups and potentially // calls DoMainHeapCheck static void RunHeapCleanups(); @@ -373,12 +369,13 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker { uintptr_t start_address, uintptr_t end_address); - // Return true iff "*ptr" points to a heap object; - // we also fill *object_size for this object then. - // If yes, we might move "*ptr" to point to the very start of the object - // (this needs to happen for C++ class array allocations - // and for basic_string-s of C++ library that comes with gcc 3.4). - static bool HaveOnHeapLocked(const void** ptr, size_t* object_size); + // Return true iff "*ptr" points to a heap object + // ("*ptr" can point at the start or inside of a heap object + // so that this works e.g. for pointers to C++ arrays, C++ strings, + // multiple-inherited objects, or pointers to members). + // We also fill *object_size for this object then + // and we move "*ptr" to point to the very start of the heap object. + static inline bool HaveOnHeapLocked(const void** ptr, size_t* object_size); private: diff --git a/src/google/profiler.h b/src/google/profiler.h index 3c4106b..24ef2f3 100644 --- a/src/google/profiler.h +++ b/src/google/profiler.h @@ -121,27 +121,4 @@ struct ProfilerState { }; extern "C" PERFTOOLS_DLL_DECL void ProfilerGetCurrentState(ProfilerState* state); -// ------------------------- ProfilerThreadState ----------------------- -// DEPRECATED: this class is no longer needed. -// -// A small helper class that allows a thread to periodically check if -// profiling has been enabled or disabled, and to react appropriately -// to ensure that activity in the current thread is included in the -// profile. Usage: -// -// ProfileThreadState profile_state; -// while (true) { -// ... do some thread work ... -// profile_state.ThreadCheck(); -// } -class ProfilerThreadState { - public: - ProfilerThreadState() { } - - // Called in a thread to enable or disable profiling on the thread - // based on whether profiling is currently on or off. - // DEPRECATED: No longer needed - void ThreadCheck() { } -}; - #endif /* BASE_PROFILER_H__ */ diff --git a/src/heap-checker.cc b/src/heap-checker.cc index 7af372d..7b3376b 100644 --- a/src/heap-checker.cc +++ b/src/heap-checker.cc @@ -94,6 +94,12 @@ using std::max; using std::less; using std::char_traits; +// This is the default if you don't link in -lprofiler +extern "C" { +ATTRIBUTE_WEAK PERFTOOLS_DLL_DECL bool ProfilingIsEnabledForAllThreads(); +bool ProfilingIsEnabledForAllThreads() { return false; } +} + //---------------------------------------------------------------------- // Flags that control heap-checking //---------------------------------------------------------------------- @@ -147,11 +153,37 @@ DEFINE_bool(heap_check_test_pointer_alignment, "Set to true to check if the found leak can be due to " "use of unaligned pointers"); +// A reasonable default to handle pointers inside of typical class objects: +// Too low and we won't be able to traverse pointers to normally-used +// nested objects and base parts of multiple-inherited objects. +// Too high and it will both slow down leak checking (FindInsideAlloc +// in HaveOnHeapLocked will get slower when there are large on-heap objects) +// and make it probabilistically more likely to miss leaks +// of large-sized objects. +static const int64 kHeapCheckMaxPointerOffset = 1024; +DEFINE_int64(heap_check_max_pointer_offset, + EnvToInt("HEAP_CHECK_MAX_POINTER_OFFSET", + kHeapCheckMaxPointerOffset), + "Largest pointer offset for which we traverse " + "pointers going inside of heap allocated objects. " + "Set to -1 to use the actual largest heap object size."); + DEFINE_bool(heap_check_run_under_gdb, EnvToBool("HEAP_CHECK_RUN_UNDER_GDB", false), "If false, turns off heap-checking library when running under gdb " "(normally, set to 'true' only when debugging the heap-checker)"); +DEFINE_int32(heap_check_delay_seconds, 0, + "Number of seconds to delay on-exit heap checking." + " If you set this flag," + " you may also want to set exit_timeout_seconds in order to" + " avoid exit timeouts.\n" + "NOTE: This flag is to be used only to help diagnose issues" + " where it is suspected that the heap checker is reporting" + " false leaks that will disappear if the heap checker delays" + " its checks. Report any such issues to the heap-checker" + " maintainer(s)."); + //---------------------------------------------------------------------- DEFINE_string(heap_profile_pprof, @@ -197,6 +229,24 @@ static pid_t heap_checker_pid = 0; static bool constructor_heap_profiling = false; //---------------------------------------------------------------------- + +// Alignment at which all pointers in memory are supposed to be located; +// use 1 if any alignment is ok. +// heap_check_test_pointer_alignment flag guides if we try the value of 1. +// The larger it can be, the lesser is the chance of missing real leaks. +static const size_t kPointerSourceAlignment = sizeof(void*); + +// Alignment at which all pointers (in)to heap memory objects +// are supposed to point; use 1 if any alignment is ok. +// sizeof(void*) is good enough for all the cases we want to support +// -- see PointsIntoHeapObject +// The larger it can be, the lesser is the chance of missing real leaks. +static const size_t kPointerDestAlignment = sizeof(uint32); + // need sizeof(uint32) even for 64 bit binaries to support + // e.g. the internal structure of UnicodeString in ICU. +static const size_t kPointerDestAlignmentMask = kPointerDestAlignment - 1; + +//---------------------------------------------------------------------- // HeapLeakChecker's own memory allocator that is // independent of the normal program allocator. //---------------------------------------------------------------------- @@ -309,12 +359,6 @@ typedef map<HCL_string, LiveObjectsStack, less<HCL_string>, > LibraryLiveObjectsStacks; static LibraryLiveObjectsStacks* library_live_objects = NULL; -// Objects to be removed from the heap profile when we dump it. -typedef set<const void*, less<const void*>, - STL_Allocator<const void*, HeapLeakChecker::Allocator> - > ProfileAdjustObjectSet; -static ProfileAdjustObjectSet* profile_adjust_objects = NULL; - // The disabled program counter addresses for profile dumping // that are registered with HeapLeakChecker::DisableChecksUp typedef set<uintptr_t, less<uintptr_t>, @@ -361,16 +405,25 @@ static GlobalRegionCallerRangeMap* global_region_caller_ranges = NULL; //---------------------------------------------------------------------- -// Simple hook into execution of global object constructors, -// so that we do not call pthread_self() when it does not yet work. -static bool libpthread_initialized = false; -static bool initializer = (libpthread_initialized = true, true); +// The size of the largest heap object allocated so far. +static size_t max_heap_object_size = 0; +// The possible range of addresses that can point +// into one of the elements of heap_objects. +static uintptr_t min_heap_address = uintptr_t(-1LL); +static uintptr_t max_heap_address = 0; + +//---------------------------------------------------------------------- // Our hooks for MallocHook static void NewHook(const void* ptr, size_t size) { if (ptr != NULL) { RAW_VLOG(7, "Recording Alloc: %p of %"PRIuS, ptr, size); heap_checker_lock.Lock(); + if (size > max_heap_object_size) max_heap_object_size = size; + uintptr_t addr = reinterpret_cast<uintptr_t>(ptr); + if (addr < min_heap_address) min_heap_address = addr; + addr += size; + if (addr > max_heap_address) max_heap_address = addr; heap_profile->RecordAlloc(ptr, size, 0); heap_checker_lock.Unlock(); RAW_VLOG(8, "Alloc Recorded: %p of %"PRIuS"", ptr, size); @@ -570,7 +623,6 @@ static void MakeDisabledLiveCallback(const void* ptr, // If the region is not writeable, then it cannot have any heap // pointers in it, otherwise we record it as a candidate live region // to get filtered later. - static void RecordGlobalDataLocked(uintptr_t start_address, uintptr_t end_address, const char* permissions, @@ -716,9 +768,10 @@ HeapLeakChecker::ProcMapsResult HeapLeakChecker::UseProcMapsLocked( return PROC_MAPS_USED; } -// Total number and size of live objects dropped from the profile. -static int64 live_objects_total = 0; -static int64 live_bytes_total = 0; +// Total number and size of live objects dropped from the profile; +// (re)initialized in IgnoreAllLiveObjectsLocked. +static int64 live_objects_total; +static int64 live_bytes_total; // pid of the thread that is doing the current leak check // (protected by our lock; IgnoreAllLiveObjectsLocked sets it) @@ -755,7 +808,7 @@ static enum { int HeapLeakChecker::IgnoreLiveThreads(void* parameter, int num_threads, pid_t* thread_pids, - va_list ap) { + va_list /*ap*/) { thread_listing_status = CALLBACK_STARTED; RAW_VLOG(2, "Found %d threads (from pid %d)", num_threads, getpid()); @@ -841,7 +894,7 @@ void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() { // we do this liveness check for ignored_objects before doing any // live heap walking to make sure it does not fail needlessly: size_t object_size; - if (!(HaveOnHeapLocked(&ptr, &object_size) && + if (!(heap_profile->FindAlloc(ptr, &object_size) && object->second == object_size)) { RAW_LOG(FATAL, "Object at %p of %"PRIuS" bytes from an" " IgnoreObject() has disappeared", ptr, object->second); @@ -957,11 +1010,44 @@ void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() { } } +// Callback for ListAllProcessThreads in IgnoreAllLiveObjectsLocked below +// to test/verify that we have just the one main thread, in which case +// we can do everything in that main thread, +// so that CPU profiler can collect all its samples. +// Returns the number of threads in the process. +static int IsOneThread(void* parameter, int num_threads, + pid_t* thread_pids, va_list ap) { + if (num_threads != 1) { + RAW_LOG(WARNING, "Have threads: Won't CPU-profile the bulk of " + "leak checking work happening in IgnoreLiveThreads!"); + } + ResumeAllProcessThreads(num_threads, thread_pids); + return num_threads; +} + +// Dummy for IgnoreAllLiveObjectsLocked below. +// Making it global helps with compiler warnings. +static va_list dummy_ap; + void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) { RAW_CHECK(live_objects == NULL, ""); live_objects = new (Allocator::Allocate(sizeof(LiveObjectsStack))) LiveObjectsStack; stack_tops = new (Allocator::Allocate(sizeof(StackTopSet))) StackTopSet; + // reset the counts + live_objects_total = 0; + live_bytes_total = 0; + // Reduce max_heap_object_size to FLAGS_heap_check_max_pointer_offset + // for the time of leak check. + // FLAGS_heap_check_max_pointer_offset caps max_heap_object_size + // to manage reasonably low chances of random bytes + // appearing to be pointing into large actually leaked heap objects. + const size_t old_max_heap_object_size = max_heap_object_size; + max_heap_object_size = ( + FLAGS_heap_check_max_pointer_offset != -1 + ? min(size_t(FLAGS_heap_check_max_pointer_offset), max_heap_object_size) + : max_heap_object_size + ); // Record global data as live: if (FLAGS_heap_check_ignore_global_live) { library_live_objects = @@ -974,13 +1060,27 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) { self_thread_pid = getpid(); self_thread_stack_top = self_stack_top; if (FLAGS_heap_check_ignore_thread_live) { - // We fully suspend the threads right here before any liveness checking + // In case we are doing CPU profiling we'd like to do all the work + // in the main thread, not in the special thread created by + // ListAllProcessThreads, so that CPU profiler can collect all its samples. + // The machinery of ListAllProcessThreads conflicts with the CPU profiler + // by also relying on signals and ::sigaction. + // We can do this (run everything in the main thread) safely + // only if there's just the main thread itself in our process. + // This variable reflects these two conditions: + bool want_and_can_run_in_main_thread = + ProfilingIsEnabledForAllThreads() && + ListAllProcessThreads(NULL, IsOneThread) == 1; + // When the normal path of ListAllProcessThreads below is taken, + // we fully suspend the threads right here before any liveness checking // and keep them suspended for the whole time of liveness checking // inside of the IgnoreLiveThreads callback. // (The threads can't (de)allocate due to lock on the delete hook but // if not suspended they could still mess with the pointer // graph while we walk it). - int r = ListAllProcessThreads(NULL, IgnoreLiveThreads); + int r = want_and_can_run_in_main_thread + ? IgnoreLiveThreads(NULL, 1, &self_thread_pid, dummy_ap) + : ListAllProcessThreads(NULL, IgnoreLiveThreads); need_to_ignore_non_thread_objects = r < 0; if (r < 0) { RAW_LOG(WARNING, "Thread finding failed with %d errno=%d", r, errno); @@ -1021,69 +1121,116 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) { // Free these: we made them here and heap_profile never saw them Allocator::DeleteAndNull(&live_objects); Allocator::DeleteAndNull(&stack_tops); + max_heap_object_size = old_max_heap_object_size; // reset this var } // Alignment at which we should consider pointer positions // in IgnoreLiveObjectsLocked. Use 1 if any alignment is ok. -static size_t pointer_alignment = sizeof(void*); -// Global lock for HeapLeakChecker::DoNoLeaks to protect pointer_alignment. +static size_t pointer_source_alignment = kPointerSourceAlignment; +// Global lock for HeapLeakChecker::DoNoLeaks +// to protect pointer_source_alignment. static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); -// This function does not change heap_profile's state: -// we only record live objects to be skipped into profile_adjust_objects -// instead of modifying the heap_profile itself. +static const size_t kUnicodeStringOffset = sizeof(uint32); +static const size_t kUnicodeStringAlignmentMask = kUnicodeStringOffset - 1; + +// This function changes the live bits in the heap_profile-table's state: +// we only record the live objects to be skipped. +// +// When checking if a byte sequence points to a heap object we use +// HeapProfileTable::FindInsideAlloc to handle both pointers to +// the start and inside of heap-allocated objects. +// The "inside" case needs to be checked to support +// at least the following relatively common cases: +// - C++ arrays allocated with new FooClass[size] for classes +// with destructors have their size recorded in a sizeof(int) field +// before the place normal pointers point to. +// - basic_string<>-s for e.g. the C++ library of gcc 3.4 +// have the meta-info in basic_string<...>::_Rep recorded +// before the place normal pointers point to. +// - Multiple-inherited objects have their pointers when cast to +// different base classes pointing inside of the actually +// allocated object. +// - Sometimes reachability pointers point to member objects of heap objects, +// and then those member objects point to the full heap object. +// - Third party UnicodeString: it stores a 32-bit refcount +// (in both 32-bit and 64-bit binaries) as the first uint32 +// in the allocated memory and a normal pointer points at +// the second uint32 behind the refcount. +// By finding these additional objects here +// we slightly increase the chance to mistake random memory bytes +// for a pointer and miss a leak in a particular run of a binary. void HeapLeakChecker::IgnoreLiveObjectsLocked(const char* name, const char* name2) { int64 live_object_count = 0; int64 live_byte_count = 0; while (!live_objects->empty()) { - const void* object = live_objects->back().ptr; + const char* object = + reinterpret_cast<const char*>(live_objects->back().ptr); size_t size = live_objects->back().size; const ObjectPlacement place = live_objects->back().place; live_objects->pop_back(); - size_t object_size; - if (place == MUST_BE_ON_HEAP && - HaveOnHeapLocked(&object, &object_size) && - profile_adjust_objects->insert(object).second) { + if (place == MUST_BE_ON_HEAP && heap_profile->MarkAsLive(object)) { live_object_count += 1; live_byte_count += size; } RAW_VLOG(4, "Looking for heap pointers in %p of %"PRIuS" bytes", object, size); + const char* const whole_object = object; + size_t const whole_size = size; // Try interpretting any byte sequence in object,size as a heap pointer: const size_t remainder = - reinterpret_cast<uintptr_t>(object) % pointer_alignment; + reinterpret_cast<uintptr_t>(object) % pointer_source_alignment; if (remainder) { - object = (reinterpret_cast<const char*>(object) + - pointer_alignment - remainder); - if (size >= pointer_alignment - remainder) { - size -= pointer_alignment - remainder; + object += pointer_source_alignment - remainder; + if (size >= pointer_source_alignment - remainder) { + size -= pointer_source_alignment - remainder; } else { size = 0; } } - while (size >= sizeof(void*)) { - const void* ptr; - memcpy(&ptr, object, sizeof(ptr)); // size-independent UNALIGNED_LOAD - const void* current_object = object; - object = reinterpret_cast<const char*>(object) + pointer_alignment; - size -= pointer_alignment; - if (ptr == NULL) continue; - RAW_VLOG(8, "Trying pointer to %p at %p", ptr, current_object); - size_t object_size; - if (HaveOnHeapLocked(&ptr, &object_size) && - profile_adjust_objects->insert(ptr).second) { - // We take the (hopefully low) risk here of encountering by accident - // a byte sequence in memory that matches an address of - // a heap object which is in fact leaked. - // I.e. in very rare and probably not repeatable/lasting cases - // we might miss some real heap memory leaks. - RAW_VLOG(5, "Found pointer to %p of %"PRIuS" bytes at %p", - ptr, object_size, current_object); - live_object_count += 1; - live_byte_count += object_size; - live_objects->push_back(AllocObject(ptr, object_size, IGNORED_ON_HEAP)); + if (size < sizeof(void*)) continue; + const char* const max_object = object + size - sizeof(void*); + while (object <= max_object) { + // potentially unaligned load: + const uintptr_t addr = *reinterpret_cast<const uintptr_t*>(object); + // Do fast check before the more expensive HaveOnHeapLocked lookup: + // this code runs for all memory words that are potentially pointers: + const bool can_be_on_heap = + // Order tests by the likelyhood of the test failing in 64/32 bit modes. + // Yes, this matters: we either lose 5..6% speed in 32 bit mode + // (which is already slower) or by a factor of 1.5..1.91 in 64 bit mode. +#if defined(__x86_64__) + addr < max_heap_address && + (addr & kUnicodeStringAlignmentMask) == 0 && // must be aligned + min_heap_address <= addr; +#else + (addr & kUnicodeStringAlignmentMask) == 0 && // must be aligned + min_heap_address <= addr && + addr < max_heap_address; +#endif + if (can_be_on_heap) { + const void* ptr = reinterpret_cast<const void*>(addr); + // Too expensive (inner loop): manually uncomment when debugging: + // RAW_VLOG(8, "Trying pointer to %p at %p", ptr, object); + size_t object_size; + if (HaveOnHeapLocked(&ptr, &object_size) && + heap_profile->MarkAsLive(ptr)) { + // We take the (hopefully low) risk here of encountering by accident + // a byte sequence in memory that matches an address of + // a heap object which is in fact leaked. + // I.e. in very rare and probably not repeatable/lasting cases + // we might miss some real heap memory leaks. + RAW_VLOG(5, "Found pointer to %p of %"PRIuS" bytes at %p " + "inside %p of size %"PRIuS"", + ptr, object_size, object, whole_object, whole_size); + live_object_count += 1; + live_byte_count += object_size; + live_objects->push_back(AllocObject(ptr, object_size, + IGNORED_ON_HEAP)); + } } + object += pointer_source_alignment; } } live_objects_total += live_object_count; @@ -1094,16 +1241,6 @@ void HeapLeakChecker::IgnoreLiveObjectsLocked(const char* name, } } -bool HeapLeakChecker::HeapProfileFilter(const void* ptr, size_t size) { - if (profile_adjust_objects->find(ptr) != profile_adjust_objects->end()) { - RAW_VLOG(4, "Ignoring object at %p of %"PRIuS" bytes", ptr, size); - // erase so we can later test that all adjust-objects got utilized - profile_adjust_objects->erase(ptr); - return true; - } - return false; -} - //---------------------------------------------------------------------- // HeapLeakChecker leak check disabling components //---------------------------------------------------------------------- @@ -1180,13 +1317,10 @@ void HeapLeakChecker::DisableChecksToHereFrom(const void* start_address) { void HeapLeakChecker::IgnoreObject(const void* ptr) { if (!heap_checker_on) return; heap_checker_lock.Lock(); - IgnoreObjectLocked(ptr); - heap_checker_lock.Unlock(); -} - -void HeapLeakChecker::IgnoreObjectLocked(const void* ptr) { size_t object_size; - if (HaveOnHeapLocked(&ptr, &object_size)) { + if (!HaveOnHeapLocked(&ptr, &object_size)) { + RAW_LOG(ERROR, "No live heap object at %p to ignore", ptr); + } else { RAW_VLOG(1, "Going to ignore live object at %p of %"PRIuS" bytes", ptr, object_size); if (ignored_objects == NULL) { @@ -1194,32 +1328,34 @@ void HeapLeakChecker::IgnoreObjectLocked(const void* ptr) { IgnoredObjectsMap; } if (!ignored_objects->insert(make_pair(reinterpret_cast<uintptr_t>(ptr), - object_size)).second) { + object_size)).second) { RAW_LOG(FATAL, "Object at %p is already being ignored", ptr); } } + heap_checker_lock.Unlock(); } void HeapLeakChecker::UnIgnoreObject(const void* ptr) { if (!heap_checker_on) return; heap_checker_lock.Lock(); size_t object_size; - bool ok = HaveOnHeapLocked(&ptr, &object_size); - if (ok) { - ok = false; + if (!HaveOnHeapLocked(&ptr, &object_size)) { + RAW_LOG(FATAL, "No live heap object at %p to un-ignore", ptr); + } else { + bool found = false; if (ignored_objects) { IgnoredObjectsMap::iterator object = ignored_objects->find(reinterpret_cast<uintptr_t>(ptr)); if (object != ignored_objects->end() && object_size == object->second) { ignored_objects->erase(object); - ok = true; + found = true; RAW_VLOG(1, "Now not going to ignore live object " "at %p of %"PRIuS" bytes", ptr, object_size); } } + if (!found) RAW_LOG(FATAL, "Object at %p has not been ignored", ptr); } heap_checker_lock.Unlock(); - if (!ok) RAW_LOG(FATAL, "Object at %p has not been ignored", ptr); } //---------------------------------------------------------------------- @@ -1234,7 +1370,7 @@ void HeapLeakChecker::DumpProfileLocked(ProfileType profile_type, (profile_type == START_PROFILE ? "Starting" : "At an end point for"), name_, - (pointer_alignment == 1 ? " w/o pointer alignment" : "")); + (pointer_source_alignment == 1 ? " w/o pointer alignment" : "")); // Sanity check that nobody is messing with the hooks we need: // Important to have it here: else we can misteriously SIGSEGV // in IgnoreLiveObjectsLocked inside ListAllProcessThreads's callback @@ -1245,11 +1381,7 @@ void HeapLeakChecker::DumpProfileLocked(ProfileType profile_type, RAW_LOG(FATAL, "new/delete malloc hooks got changed"); } // Make the heap profile, other threads are locked out. - RAW_CHECK(profile_adjust_objects == NULL, ""); const int alloc_count = Allocator::alloc_count(); - profile_adjust_objects = - new (Allocator::Allocate(sizeof(ProfileAdjustObjectSet))) - ProfileAdjustObjectSet; IgnoreAllLiveObjectsLocked(self_stack_top); const int len = profile_prefix->size() + strlen(name_) + 10 + 2; char* file_name = reinterpret_cast<char*>(Allocator::Allocate(len)); @@ -1258,15 +1390,12 @@ void HeapLeakChecker::DumpProfileLocked(ProfileType profile_type, profile_type == START_PROFILE ? "-beg" : "-end", HeapProfileTable::kFileExt); HeapProfileTable::Stats stats; - bool ok = heap_profile->DumpFilteredProfile( - file_name, HeapProfileFilter, FLAGS_heap_check_identify_leaks, &stats); + bool ok = heap_profile->DumpNonLiveProfile( + file_name, FLAGS_heap_check_identify_leaks, &stats); RAW_CHECK(ok, "No sense to continue"); *alloc_bytes = stats.alloc_size - stats.free_size; *alloc_objects = stats.allocs - stats.frees; Allocator::Free(file_name); - RAW_CHECK(profile_adjust_objects->empty(), - "Some objects to ignore are not on the heap"); - Allocator::DeleteAndNull(&profile_adjust_objects); // Check that we made no leaks ourselves: if (Allocator::alloc_count() != alloc_count) { RAW_LOG(FATAL, "Internal HeapChecker leak of %d objects", @@ -1416,18 +1545,18 @@ static int GetStatusOutput(const char* command, string* output) { } // RAW_LOG 'str' line by line to prevent its truncation in RAW_LOG: -static void RawLogLines(const string& str) { +static void RawLogLines(LogSeverity severity, const string& str) { int p = 0; while (1) { int l = str.find('\n', p); if (l == string::npos) { if (str[p]) { // print last line if non empty - RAW_LOG(INFO, "%s", str.c_str() + p); + RAW_LOG(severity, "%s", str.c_str() + p); } break; } const_cast<string&>(str)[l] = '\0'; // safe for our use case - RAW_LOG(INFO, "%s", str.c_str() + p); + RAW_LOG(severity, "%s", str.c_str() + p); const_cast<string&>(str)[l] = '\n'; p = l + 1; } @@ -1441,9 +1570,9 @@ bool HeapLeakChecker::DoNoLeaks(CheckType check_type, alignment_checker_lock.Lock(); bool result; if (FLAGS_heap_check_test_pointer_alignment) { - pointer_alignment = 1; + pointer_source_alignment = 1; bool result_wo_align = DoNoLeaksOnce(check_type, fullness, NO_REPORT); - pointer_alignment = sizeof(void*); + pointer_source_alignment = kPointerSourceAlignment; result = DoNoLeaksOnce(check_type, fullness, report_mode); if (!result) { if (result_wo_align) { @@ -1467,9 +1596,17 @@ bool HeapLeakChecker::DoNoLeaks(CheckType check_type, } RAW_LOG(INFO, "If you are totally puzzled about why the leaks are there, " "try rerunning it with " - "setenv HEAP_CHECK_TEST_POINTER_ALIGNMENT=1"); + "setenv HEAP_CHECK_TEST_POINTER_ALIGNMENT=1 and/or with " + "setenv HEAP_CHECK_MAX_POINTER_OFFSET=-1"); } } + if (result && FLAGS_heap_check_max_pointer_offset == -1) { + RAW_LOG(WARNING, "Found no leaks without max_pointer_offset restriction: " + "it's possible that the default value of " + "heap_check_max_pointer_offset flag is too low. " + "Do you use pointers with larger than that offsets " + "pointing in the middle of heap-allocated objects?"); + } alignment_checker_lock.Unlock(); return result; } @@ -1590,9 +1727,9 @@ bool HeapLeakChecker::DoNoLeaksOnce(CheckType check_type, } if (see_leaks && report_mode == PPROF_REPORT) { if (checked_leaks) { - RAW_LOG(INFO, "Below is (less informative) textual version " - "of this pprof command's output:"); - RawLogLines(output); + RAW_LOG(ERROR, "Below is (less informative) textual version " + "of this pprof command's output:"); + RawLogLines(ERROR, output); } else { RAW_LOG(ERROR, "The pprof command has failed"); } @@ -1818,6 +1955,9 @@ REGISTER_MODULE_INITIALIZER(init_start, HeapLeakChecker::InternalInitStart()); void HeapLeakChecker::DoMainHeapCheck() { RAW_DCHECK(heap_checker_pid == getpid() && do_main_heap_check, ""); + if (FLAGS_heap_check_delay_seconds > 0) { + sleep(FLAGS_heap_check_delay_seconds); + } if (!NoGlobalLeaks()) { if (FLAGS_heap_check_identify_leaks) { RAW_LOG(FATAL, "Whole-program memory leaks found."); @@ -2142,66 +2282,32 @@ void HeapLeakChecker::DisableChecksAtLocked(const void* address) { } } -bool HeapLeakChecker::HaveOnHeapLocked(const void** ptr, size_t* object_size) { - RAW_DCHECK(heap_checker_lock.IsHeld(), ""); - // Size of the C++ object array size integer - // (potentially compiler dependent; 4 on i386 and gcc; 8 on x86_64 and gcc) - const int kArraySizeOffset = sizeof(size_t); - // sizeof(basic_string<...>::_Rep) for C++ library of gcc 3.4 - // (basically three integer counters; - // library/compiler dependent; 12 on i386 and gcc) - const int kStringOffset = sizeof(size_t) * 3; - // Size of refcount used by UnicodeString in third_party/icu. - const int kUnicodeStringOffset = sizeof(uint32); - // NOTE: One can add more similar offset cases below - // even when they do not happen for the used compiler/library; - // all that's impacted is - // - HeapLeakChecker's performace during live heap walking - // - and a slightly greater chance to mistake random memory bytes - // for a pointer and miss a leak in a particular run of a binary. - bool result = true; - if (heap_profile->FindAlloc(*ptr, object_size)) { - // done - } else if (heap_profile->FindAlloc(reinterpret_cast<const char*>(*ptr) - - kArraySizeOffset, - object_size) && - *object_size > kArraySizeOffset) { - // this case is to account for the array size stored inside of - // the memory allocated by new FooClass[size] for classes with destructors - *ptr = reinterpret_cast<const char*>(*ptr) - kArraySizeOffset; - RAW_VLOG(7, "Got poiter into %p at +%d", ptr, kArraySizeOffset); - } else if (heap_profile->FindAlloc(reinterpret_cast<const char*>(*ptr) - - kStringOffset, - object_size) && - *object_size > kStringOffset) { - // this case is to account for basic_string<> representation in - // newer C++ library versions when the kept pointer points to inside of - // the allocated region - *ptr = reinterpret_cast<const char*>(*ptr) - kStringOffset; - RAW_VLOG(7, "Got poiter into %p at +%d", ptr, kStringOffset); - } else if (kUnicodeStringOffset != kArraySizeOffset && - heap_profile->FindAlloc( - reinterpret_cast<const char*>(*ptr) - kUnicodeStringOffset, - object_size) && - *object_size > kUnicodeStringOffset) { - // this case is to account for third party UnicodeString. - // UnicodeString stores a 32-bit refcount (in both 32-bit and - // 64-bit binaries) as the first uint32 in the allocated memory - // and a pointer points into the second uint32 behind the refcount. - *ptr = reinterpret_cast<const char*>(*ptr) - kUnicodeStringOffset; - RAW_VLOG(7, "Got poiter into %p at +%d", ptr, kUnicodeStringOffset); - } else { - result = false; +inline bool HeapLeakChecker::HaveOnHeapLocked(const void** ptr, + size_t* object_size) { + const uintptr_t addr = reinterpret_cast<uintptr_t>(*ptr); + if (heap_profile->FindInsideAlloc( + *ptr, max_heap_object_size, ptr, object_size)) { + const size_t offset = addr - reinterpret_cast<uintptr_t>(*ptr); + // must be aligned to kPointerDestAlignmentMask, + // kUnicodeStringOffset is a special case. + if ((offset & kPointerDestAlignmentMask) == 0 || + offset == kUnicodeStringOffset) { + RAW_VLOG(7, "Got pointer into %p at +%"PRIuS" offset", *ptr, offset); + RAW_DCHECK((addr & kUnicodeStringAlignmentMask) == 0, ""); + // alignment of at least kUnicodeStringAlignment + // must have been already ensured + return true; + } } - return result; + return false; } const void* HeapLeakChecker::GetAllocCaller(void* ptr) { - // this is used only in unittest, so the heavy checks are fine + // this is used only in the unittest, so the heavy checks are fine HeapProfileTable::AllocInfo info; heap_checker_lock.Lock(); - CHECK(heap_profile->FindAllocDetails(ptr, &info)); + RAW_CHECK(heap_profile->FindAllocDetails(ptr, &info), ""); heap_checker_lock.Unlock(); - CHECK(info.stack_depth >= 1); + RAW_CHECK(info.stack_depth >= 1, ""); return info.call_stack[0]; } diff --git a/src/heap-profile-table.cc b/src/heap-profile-table.cc index 877951a..ec591b2 100644 --- a/src/heap-profile-table.cc +++ b/src/heap-profile-table.cc @@ -233,7 +233,7 @@ void HeapProfileTable::RecordAlloc(const void* ptr, size_t bytes, total_.alloc_size += bytes; AllocValue v; - v.bucket = b; + v.set_bucket(b); // also did set_live(false) v.bytes = bytes; allocation_->Insert(ptr, v); } @@ -241,7 +241,7 @@ void HeapProfileTable::RecordAlloc(const void* ptr, size_t bytes, void HeapProfileTable::RecordFree(const void* ptr) { AllocValue v; if (allocation_->FindAndRemove(ptr, &v)) { - Bucket* b = v.bucket; + Bucket* b = v.bucket(); b->frees++; b->free_size += v.bytes; total_.frees++; @@ -250,36 +250,39 @@ void HeapProfileTable::RecordFree(const void* ptr) { } bool HeapProfileTable::FindAlloc(const void* ptr, size_t* object_size) const { - AllocValue alloc_value; - if (allocation_->Find(ptr, &alloc_value)) { - *object_size = alloc_value.bytes; - return true; - } - return false; + const AllocValue* alloc_value = allocation_->Find(ptr); + if (alloc_value != NULL) *object_size = alloc_value->bytes; + return alloc_value != NULL; } -bool HeapProfileTable::FindAllocDetails(const void* ptr, AllocInfo* info) const { - AllocValue alloc_value; - if (allocation_->Find(ptr, &alloc_value)) { - info->object_size = alloc_value.bytes; - info->call_stack = alloc_value.bucket->stack; - info->stack_depth = alloc_value.bucket->depth; - return true; +bool HeapProfileTable::FindAllocDetails(const void* ptr, + AllocInfo* info) const { + const AllocValue* alloc_value = allocation_->Find(ptr); + if (alloc_value != NULL) { + info->object_size = alloc_value->bytes; + info->call_stack = alloc_value->bucket()->stack; + info->stack_depth = alloc_value->bucket()->depth; } - return false; + return alloc_value != NULL; } -void HeapProfileTable::MapArgsAllocIterator( - const void* ptr, AllocValue v, AllocIterator callback) { - AllocInfo info; - info.object_size = v.bytes; - info.call_stack = v.bucket->stack; - info.stack_depth = v.bucket->depth; - callback(ptr, info); +bool HeapProfileTable::FindInsideAlloc(const void* ptr, + size_t max_size, + const void** object_ptr, + size_t* object_size) const { + const AllocValue* alloc_value = + allocation_->FindInside(&AllocValueSize, max_size, ptr, object_ptr); + if (alloc_value != NULL) *object_size = alloc_value->bytes; + return alloc_value != NULL; } -void HeapProfileTable::IterateAllocs(AllocIterator callback) const { - allocation_->Iterate(MapArgsAllocIterator, callback); +bool HeapProfileTable::MarkAsLive(const void* ptr) { + AllocValue* alloc = allocation_->FindMutable(ptr); + if (alloc && !alloc->live()) { + alloc->set_live(true); + return true; + } + return false; } // We'd be happier using snprintfer, but we don't to reduce dependencies. @@ -360,29 +363,32 @@ int HeapProfileTable::FillOrderedProfile(char buf[], int size) const { return bucket_length + map_length; } -void HeapProfileTable::FilteredDumpIterator(const void* ptr, AllocValue v, - const DumpArgs& args) { - if (args.filter(ptr, v.bytes)) return; +inline +void HeapProfileTable::DumpNonLiveIterator(const void* ptr, AllocValue* v, + const DumpArgs& args) { + if (v->live()) { + v->set_live(false); + return; + } Bucket b; memset(&b, 0, sizeof(b)); b.allocs = 1; - b.alloc_size = v.bytes; + b.alloc_size = v->bytes; const void* stack[kMaxStackTrace + 1]; - b.depth = v.bucket->depth + int(args.dump_alloc_addresses); + b.depth = v->bucket()->depth + int(args.dump_alloc_addresses); b.stack = stack; if (args.dump_alloc_addresses) stack[0] = ptr; memcpy(stack + int(args.dump_alloc_addresses), - v.bucket->stack, sizeof(stack[0]) * v.bucket->depth); + v->bucket()->stack, sizeof(stack[0]) * v->bucket()->depth); char buf[1024]; int len = UnparseBucket(b, buf, 0, sizeof(buf), args.profile_stats); FDWrite(args.fd, buf, len); } -bool HeapProfileTable::DumpFilteredProfile(const char* file_name, - DumpFilter filter, - bool dump_alloc_addresses, - Stats* profile_stats) const { - RAW_VLOG(1, "Dumping filtered heap profile to %s", file_name); +bool HeapProfileTable::DumpNonLiveProfile(const char* file_name, + bool dump_alloc_addresses, + Stats* profile_stats) const { + RAW_VLOG(1, "Dumping non-live heap profile to %s", file_name); int fd = open(file_name, O_WRONLY|O_CREAT|O_TRUNC, 0644); if (fd >= 0) { FDWrite(fd, kProfileHeader, strlen(kProfileHeader)); @@ -390,8 +396,8 @@ bool HeapProfileTable::DumpFilteredProfile(const char* file_name, int len = UnparseBucket(total_, buf, 0, sizeof(buf), profile_stats); FDWrite(fd, buf, len); memset(profile_stats, 0, sizeof(*profile_stats)); - const DumpArgs args(fd, dump_alloc_addresses, filter, profile_stats); - allocation_->Iterate<const DumpArgs&>(FilteredDumpIterator, args); + const DumpArgs args(fd, dump_alloc_addresses, profile_stats); + allocation_->Iterate<const DumpArgs&>(DumpNonLiveIterator, args); FDWrite(fd, kProcSelfMapsHeader, strlen(kProcSelfMapsHeader)); DumpProcSelfMaps(fd); NO_INTR(close(fd)); diff --git a/src/heap-profile-table.h b/src/heap-profile-table.h index 2775edd..3ad4a5d 100644 --- a/src/heap-profile-table.h +++ b/src/heap-profile-table.h @@ -91,6 +91,19 @@ class HeapProfileTable { // Same as FindAlloc, but fills all of *info. bool FindAllocDetails(const void* ptr, AllocInfo* info) const; + // Return true iff "ptr" points into a recorded allocation + // If yes, fill *object_ptr with the actual allocation address + // and *object_size with the allocation byte size. + // max_size specifies largest currently possible allocation size. + bool FindInsideAlloc(const void* ptr, size_t max_size, + const void** object_ptr, size_t* object_size) const; + + // If "ptr" points to a recorded allocation and it's not marked as live + // mark it as live and return true. Else return false. + // All allocations start as non-live, DumpNonLiveProfile below + // also makes all allocations non-live. + bool MarkAsLive(const void* ptr); + // Return current total (de)allocation statistics. const Stats& total() const { return total_; } @@ -100,7 +113,9 @@ class HeapProfileTable { // Iterate over the allocation profile data calling "callback" // for every allocation. - void IterateAllocs(AllocIterator callback) const; + void IterateAllocs(AllocIterator callback) const { + allocation_->Iterate(MapArgsAllocIterator, callback); + } // Fill profile data into buffer 'buf' of size 'size' // and return the actual size occupied by the dump in 'buf'. @@ -114,15 +129,15 @@ class HeapProfileTable { // needs to return true iff the object is to be filtered out of the dump. typedef bool (*DumpFilter)(const void* ptr, size_t size); - // Dump current heap profile for leak checking purposes to file_name - // while filtering the objects by "filter". - // Also write the sums of allocated byte and object counts in the dump + // Dump non-live portion of the current heap profile + // for leak checking purposes to file_name. + // Also reset all objects to non-live state + // and write the sums of allocated byte and object counts in the dump // to *alloc_bytes and *alloc_objects. // dump_alloc_addresses controls if object addresses are dumped. - bool DumpFilteredProfile(const char* file_name, - DumpFilter filter, - bool dump_alloc_addresses, - Stats* profile_stats) const; + bool DumpNonLiveProfile(const char* file_name, + bool dump_alloc_addresses, + Stats* profile_stats) const; // Cleanup any old profile files matching prefix + ".*" + kFileExt. static void CleanupOldProfiles(const char* prefix); @@ -142,21 +157,34 @@ class HeapProfileTable { // Info stored in the address map struct AllocValue { - Bucket* bucket; // The stack-trace bucket + // Access to the stack-trace bucket + Bucket* bucket() const { + return reinterpret_cast<Bucket*>(bucket_rep & ~uintptr_t(1)); + } + // This also does set_live(false). + void set_bucket(Bucket* b) { bucket_rep = reinterpret_cast<uintptr_t>(b); } size_t bytes; // Number of bytes in this allocation + // Access to the allocation liveness flag (for leak checking) + bool live() const { return bucket_rep & 1; } + void set_live(bool l) { bucket_rep = (bucket_rep & ~uintptr_t(1)) | l; } + private: + uintptr_t bucket_rep; // Bucket* with the lower bit being the "live" flag: + // pointers point to at least 4-byte aligned storage. }; + // helper for FindInsideAlloc + static size_t AllocValueSize(const AllocValue& v) { return v.bytes; } + typedef AddressMap<AllocValue> AllocationMap; - // Arguments that need to be passed FilteredDumpIterator callback below. + // Arguments that need to be passed DumpNonLiveIterator callback below. struct DumpArgs { int fd; // file to write to bool dump_alloc_addresses; // if we are dumping allocation's addresses - DumpFilter filter; // dumping filter Stats* profile_stats; // stats to update - DumpArgs(int a, bool b, DumpFilter c, Stats* d) - : fd(a), dump_alloc_addresses(b), filter(c), profile_stats(d) { } + DumpArgs(int a, bool b, Stats* d) + : fd(a), dump_alloc_addresses(b), profile_stats(d) { } }; // helpers ---------------------------- @@ -176,13 +204,19 @@ class HeapProfileTable { // Helper for IterateAllocs to do callback signature conversion // from AllocationMap::Iterate to AllocIterator. - static void MapArgsAllocIterator(const void* ptr, AllocValue v, - AllocIterator callback); - - // Helper for DumpFilteredProfile to do object-granularity + static void MapArgsAllocIterator(const void* ptr, AllocValue* v, + AllocIterator callback) { + AllocInfo info; + info.object_size = v->bytes; + info.call_stack = v->bucket()->stack; + info.stack_depth = v->bucket()->depth; + callback(ptr, info); + } + + // Helper for DumpNonLiveProfile to do object-granularity // heap profile dumping. It gets passed to AllocationMap::Iterate. - static void FilteredDumpIterator(const void* ptr, AllocValue v, - const DumpArgs& args); + inline static void DumpNonLiveIterator(const void* ptr, AllocValue* v, + const DumpArgs& args); // data ---------------------------- diff --git a/src/heap-profiler.cc b/src/heap-profiler.cc index 2ffbef8..3c59262 100644 --- a/src/heap-profiler.cc +++ b/src/heap-profiler.cc @@ -57,6 +57,7 @@ #include <google/malloc_extension.h> #include "base/spinlock.h" #include "base/low_level_alloc.h" +#include "base/sysinfo.h" // for GetUniquePathFromEnv() #include "heap-profile-table.h" @@ -382,9 +383,8 @@ static void HeapProfilerInit() { } // Everything after this point is for setting up the profiler based on envvar - - char* heapprofile = getenv("HEAPPROFILE"); - if (!heapprofile || heapprofile[0] == '\0') { + char fname[PATH_MAX]; + if (!GetUniquePathFromEnv("HEAPPROFILE", fname)) { return; } // We do a uid check so we don't write out files in a setuid executable. @@ -396,30 +396,6 @@ static void HeapProfilerInit() { } #endif - // If we're a child process of the 'main' process, we can't just use - // the name HEAPPROFILE -- the parent process will be using that. - // Instead we append our pid to the name. How do we tell if we're a - // child process? Ideally we'd set an environment variable that all - // our children would inherit. But -- and perhaps this is a bug in - // gcc -- if you do a setenv() in a shared libarary in a global - // constructor, the environment setting is lost by the time main() - // is called. The only safe thing we can do in such a situation is - // to modify the existing envvar. So we do a hack: in the parent, - // we set the high bit of the 1st char of HEAPPROFILE. In the child, - // we notice the high bit is set and append the pid(). This works - // assuming cpuprofile filenames don't normally have the high bit - // set in their first character! If that assumption is violated, - // we'll still get a profile, but one with an unexpected name. - // TODO(csilvers): set an envvar instead when we can do it reliably. - char fname[PATH_MAX]; - if (heapprofile[0] & 128) { // high bit is set - snprintf(fname, sizeof(fname), "%c%s_%u", // add pid and clear high bit - heapprofile[0] & 127, heapprofile+1, (unsigned int)(getpid())); - } else { - snprintf(fname, sizeof(fname), "%s", heapprofile); - heapprofile[0] |= 128; // set high bit for kids to see - } - HeapProfileTable::CleanupOldProfiles(fname); HeapProfilerStart(fname); diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc index a7eee11..611f1d2 100644 --- a/src/malloc_extension.cc +++ b/src/malloc_extension.cc @@ -212,6 +212,17 @@ typedef HASH_NAMESPACE::hash_set<void**, StackTraceHash> StackTraceTable; typedef HASH_NAMESPACE::hash_set<void**, StackTraceHash, StackTraceEqual> StackTraceTable; #endif +void PrintCountAndSize(string* result, uintptr_t count, uintptr_t size) { + char buf[100]; + snprintf(buf, sizeof(buf), + "%6lld: %8lld [%6lld: %8lld] @", + static_cast<long long>(count), + static_cast<long long>(size), + static_cast<long long>(count), + static_cast<long long>(size)); + *result += buf; +} + void PrintHeader(string* result, const char* label, void** entries) { // Compute the total count and total size uintptr_t total_count = 0; @@ -221,24 +232,16 @@ void PrintHeader(string* result, const char* label, void** entries) { total_size += Size(entry); } - char buf[200]; - snprintf(buf, sizeof(buf), - "heap profile: %6lld: %8lld [%6lld: %8lld] @ %s\n", - static_cast<long long>(total_count), - static_cast<long long>(total_size), - static_cast<long long>(total_count), - static_cast<long long>(total_size), - label); - *result += buf; + *result += string("heap profile: "); + PrintCountAndSize(result, total_count, total_size); + *result += string(" ") + label + "\n"; } void PrintStackEntry(string* result, void** entry) { - char buf[100]; - snprintf(buf, sizeof(buf), "%6d: %8d [%6d: %8d] @", - int(Count(entry)), int(Size(entry)), - int(Count(entry)), int(Size(entry))); - *result += buf; + PrintCountAndSize(result, Count(entry), Size(entry)); + for (int i = 0; i < Depth(entry); i++) { + char buf[32]; snprintf(buf, sizeof(buf), " %p", PC(entry, i)); *result += buf; } diff --git a/src/malloc_hook.cc b/src/malloc_hook.cc index a84973f..4fbb956 100644 --- a/src/malloc_hook.cc +++ b/src/malloc_hook.cc @@ -225,7 +225,10 @@ int MallocHook::GetCallerStackTrace(void** result, int max_depth, } } RAW_LOG(WARNING, "Hooked allocator frame not found, returning empty trace"); - // Try increasing kMaxSkip or else something must be wrong with InHookCaller + // If this happens try increasing kMaxSkip + // or else something must be wrong with InHookCaller, + // e.g. for every section used in InHookCaller + // all functions in that section must be inside the same library. return 0; } @@ -238,7 +241,8 @@ int MallocHook::GetCallerStackTrace(void** result, int max_depth, // just call through to them. -#if defined(__linux) && (defined(__i386__) || defined(__x86_64__)) +#if defined(__linux) && \ + (defined(__i386__) || defined(__x86_64__) || defined(__PPC__)) #include <unistd.h> #include <syscall.h> #include <sys/mman.h> @@ -248,10 +252,10 @@ int MallocHook::GetCallerStackTrace(void** result, int max_depth, // The x86-32 case and the x86-64 case differ: // 32b has a mmap2() syscall, 64b does not. // 64b and 32b have different calling conventions for mmap(). -#if defined(__i386__) +#if defined(__i386__) || defined(__PPC__) static inline void* do_mmap64(void *start, size_t length, - int prot, int flags, + int prot, int flags, int fd, __off64_t offset) __THROW { void *result; diff --git a/src/maybe_threads.cc b/src/maybe_threads.cc index a3e0450..f1a8f76 100644 --- a/src/maybe_threads.cc +++ b/src/maybe_threads.cc @@ -38,6 +38,7 @@ #include "config.h" #include <assert.h> +#include <string.h> // for memcmp // We don't actually need strings. But including this header seems to // stop the compiler trying to short-circuit our pthreads existence // tests and claiming that the address of a function is always diff --git a/src/packed-cache-inl.h b/src/packed-cache-inl.h index 6114553..ff1ac23 100644 --- a/src/packed-cache-inl.h +++ b/src/packed-cache-inl.h @@ -99,12 +99,12 @@ // // Implementation details: // -// This is a direct-mapped cache with 2^kHashbits entries; -// the hash function simply takes the low bits of the key. -// So, we don't have to store the low bits of the key in the entries. -// Instead, an entry is the high bits of a key and a value, packed -// together. E.g., a 20 bit key and a 7 bit value only require -// a uint16 for each entry if kHashbits >= 11. +// This is a direct-mapped cache with 2^kHashbits entries; the hash +// function simply takes the low bits of the key. We store whole keys +// if a whole key plus a whole value fits in an entry. Otherwise, an +// entry is the high bits of a key and a value, packed together. +// E.g., a 20 bit key and a 7 bit value only require a uint16 for each +// entry if kHashbits >= 11. // // Alternatives to this scheme will be added as needed. @@ -131,7 +131,8 @@ class PackedCache { typedef uintptr_t K; typedef size_t V; static const int kHashbits = 12; - static const int kValuebits = 8; + static const int kValuebits = 7; + static const bool kUseWholeKeys = kKeybits + kValuebits <= 8 * sizeof(T); explicit PackedCache(V initial_value) { COMPILE_ASSERT(kKeybits <= sizeof(K) * 8, key_size); @@ -166,50 +167,47 @@ class PackedCache { void Clear(V value) { DCHECK_EQ(value, value & kValueMask); for (int i = 0; i < 1 << kHashbits; i++) { - array_[i] = value; + RAW_DCHECK(kUseWholeKeys || KeyToUpper(i) == 0, "KeyToUpper failure"); + array_[i] = kUseWholeKeys ? (value | KeyToUpper(i)) : value; } } private: - // We are going to pack a value and the upper part of a key into - // an entry of type T. The UPPER type is for the upper part of a key, - // after the key has been masked and shifted for inclusion in an entry. + // We are going to pack a value and the upper part of a key (or a + // whole key) into an entry of type T. The UPPER type is for the + // upper part of a key, after the key has been masked and shifted + // for inclusion in an entry. typedef T UPPER; static V EntryToValue(T t) { return t & kValueMask; } - static UPPER EntryToUpper(T t) { return t & kUpperMask; } - - // If v is a V and u is an UPPER then you can create an entry by - // doing u | v. kHashbits determines where in a K to find the upper - // part of the key, and kValuebits determines where in the entry to put - // it. + // If we have space for a whole key, we just shift it left. + // Otherwise kHashbits determines where in a K to find the upper + // part of the key, and kValuebits determines where in the entry to + // put it. static UPPER KeyToUpper(K k) { - const int shift = kHashbits - kValuebits; - // Assume kHashbits >= kValuebits. It would be easy to lift this assumption. - return static_cast<T>(k >> shift) & kUpperMask; - } - - // This is roughly the inverse of KeyToUpper(). Some of the key has been - // thrown away, since KeyToUpper() masks off the low bits of the key. - static K UpperToPartialKey(UPPER u) { - DCHECK_EQ(u, u & kUpperMask); - const int shift = kHashbits - kValuebits; - // Assume kHashbits >= kValuebits. It would be easy to lift this assumption. - return static_cast<K>(u) << shift; + if (kUseWholeKeys) { + return static_cast<T>(k) << kValuebits; + } else { + const int shift = kHashbits - kValuebits; + // Assume kHashbits >= kValuebits. It'd be easy to lift this assumption. + return static_cast<T>(k >> shift) & kUpperMask; + } } static size_t Hash(K key) { return static_cast<size_t>(key) & N_ONES_(size_t, kHashbits); } - // Does the entry's partial key match the relevant part of the given key? + // Does the entry match the relevant part of the given key? static bool KeyMatch(T entry, K key) { - return ((KeyToUpper(key) ^ entry) & kUpperMask) == 0; + return kUseWholeKeys ? + (entry >> kValuebits == key) : + ((KeyToUpper(key) ^ entry) & kUpperMask) == 0; } static const int kTbits = 8 * sizeof(T); - static const int kUpperbits = kKeybits - kHashbits; + static const int kUpperbits = kUseWholeKeys ? kKeybits : kKeybits - kHashbits; // For masking a K. static const K kKeyMask = N_ONES_(K, kKeybits); diff --git a/src/profiledata.h b/src/profiledata.h index f39dfe1..aa50241 100644 --- a/src/profiledata.h +++ b/src/profiledata.h @@ -68,12 +68,12 @@ // A profiler which uses asyncronous signals to add samples will // typically use two locks to protect this data structure: // -// - A MutexLock, which is held over all calls except for the 'Add' +// - A SpinLock which is held over all calls except for the 'Add' // call made from the signal handler. // -// - A SpinLock, which is held over calls to 'Start', 'Stop', +// - A SpinLock which is held over calls to 'Start', 'Stop', // 'Flush', and 'Add'. (This SpinLock should be acquired after -// the MutexLock in all cases where both are needed.) +// the first SpinLock in all cases where both are needed.) class ProfileData { public: struct State { diff --git a/src/profiler.cc b/src/profiler.cc index cca1d03..79cb4f8 100644 --- a/src/profiler.cc +++ b/src/profiler.cc @@ -53,9 +53,8 @@ #include "base/commandlineflags.h" #include "base/logging.h" #include "base/googleinit.h" -#include "base/mutex.h" #include "base/spinlock.h" -#include "base/sysinfo.h" +#include "base/sysinfo.h" /* for GetUniquePathFromEnv, etc */ #include "profiledata.h" #ifdef HAVE_CONFLICT_SIGNAL_H #include "conflict-signal.h" /* used on msvc machines */ @@ -66,47 +65,6 @@ using std::string; DEFINE_string(cpu_profile, "", "Profile file name (used if CPUPROFILE env var not specified)"); -// This takes as an argument an environment-variable name (like -// CPUPROFILE) whose value is supposed to be a file-path, and sets -// path to that path, and returns true. If the env var doesn't exist, -// or is the empty string, leave path unchanged and returns false. -// The reason this is non-trivial is that this function handles munged -// pathnames. Here's why: -// -// If we're a child process of the 'main' process, we can't just use -// getenv("CPUPROFILE") -- the parent process will be using that path. -// Instead we append our pid to the pathname. How do we tell if we're a -// child process? Ideally we'd set an environment variable that all -// our children would inherit. But -- and this is seemingly a bug in -// gcc -- if you do a setenv() in a shared libarary in a global -// constructor, the environment setting is lost by the time main() is -// called. The only safe thing we can do in such a situation is to -// modify the existing envvar. So we do a hack: in the parent, we set -// the high bit of the 1st char of CPUPROFILE. In the child, we -// notice the high bit is set and append the pid(). This works -// assuming cpuprofile filenames don't normally have the high bit set -// in their first character! If that assumption is violated, we'll -// still get a profile, but one with an unexpected name. -// TODO(csilvers): set an envvar instead when we can do it reliably. -static bool GetUniquePathFromEnv(const char* env_name, string* path) { - char* envval = getenv(env_name); - if (envval == NULL || *envval == '\0') - return false; - if (envval[0] & 128) { // high bit is set - char pid[64]; // pids are smaller than this! - snprintf(pid, sizeof(pid), "%u", (unsigned int)(getpid())); - *path = envval; - *path += "_"; - *path += pid; - (*path)[0] &= 127; - } else { - *path = string(envval); - envval[0] |= 128; // set high bit for kids to see - } - return true; -} - - // Collects up all profile data. This is a singleton, which is // initialized by a constructor at startup. class CpuProfiler { @@ -156,7 +114,7 @@ class CpuProfiler { // Locking order is control_lock_ first, and then signal_lock_. // signal_lock_ is acquired by the prof_handler without first // acquiring control_lock_. - Mutex control_lock_; + SpinLock control_lock_; SpinLock signal_lock_; ProfileData collector_; @@ -202,8 +160,8 @@ CpuProfiler::CpuProfiler() { RegisterThread(); // Should profiling be enabled automatically at start? - string fname; - if (!GetUniquePathFromEnv("CPUPROFILE", &fname)) { + char fname[PATH_MAX]; + if (!GetUniquePathFromEnv("CPUPROFILE", fname)) { return; } // We don't enable profiling if setuid -- it's a security risk @@ -212,15 +170,15 @@ CpuProfiler::CpuProfiler() { return; #endif - if (!Start(fname.c_str(), NULL, NULL)) { + if (!Start(fname, NULL, NULL)) { RAW_LOG(FATAL, "Can't turn on cpu profiling for '%s': %s\n", - fname.c_str(), strerror(errno)); + fname, strerror(errno)); } } bool CpuProfiler::Start(const char* fname, bool (*filter)(void*), void* filter_arg) { - MutexLock cl(&control_lock_); + SpinLockHolder cl(&control_lock_); if (collector_.enabled()) { return false; @@ -258,7 +216,7 @@ CpuProfiler::~CpuProfiler() { // Stop profiling and write out any collected profile data void CpuProfiler::Stop() { - MutexLock cl(&control_lock_); + SpinLockHolder cl(&control_lock_); if (!collector_.enabled()) { return; @@ -278,7 +236,7 @@ void CpuProfiler::Stop() { } void CpuProfiler::FlushTable() { - MutexLock cl(&control_lock_); + SpinLockHolder cl(&control_lock_); if (!collector_.enabled()) { return; @@ -295,14 +253,14 @@ void CpuProfiler::FlushTable() { } bool CpuProfiler::Enabled() { - MutexLock cl(&control_lock_); + SpinLockHolder cl(&control_lock_); return collector_.enabled(); } void CpuProfiler::GetCurrentState(ProfilerState* state) { ProfileData::State collector_state; { - MutexLock cl(&control_lock_); + SpinLockHolder cl(&control_lock_); collector_.GetCurrentState(&collector_state); } diff --git a/src/stacktrace.cc b/src/stacktrace.cc index 21b172c..9826e98 100644 --- a/src/stacktrace.cc +++ b/src/stacktrace.cc @@ -82,7 +82,7 @@ # endif // The PowerPC case -#elif defined(__ppc__) && __GNUC__ >= 2 +#elif (defined(__ppc__) || defined(__PPC__)) && __GNUC__ >= 2 # if !defined(NO_FRAME_POINTER) # include "stacktrace_powerpc-inl.h" # else diff --git a/src/stacktrace_libunwind-inl.h b/src/stacktrace_libunwind-inl.h index 3434e88..529f309 100644 --- a/src/stacktrace_libunwind-inl.h +++ b/src/stacktrace_libunwind-inl.h @@ -49,7 +49,6 @@ extern "C" { // ignore those subsequent traces. In such cases, we return 0 to // indicate the situation. static SpinLock libunwind_lock(SpinLock::LINKER_INITIALIZED); -static bool in_get_stack_trace = false; // If you change this function, also change GetStackFrames below. int GetStackTrace(void** result, int max_depth, int skip_count) { @@ -58,13 +57,8 @@ int GetStackTrace(void** result, int max_depth, int skip_count) { unw_cursor_t cursor; unw_context_t uc; - { - SpinLockHolder sh(&libunwind_lock); - if (in_get_stack_trace) { - return 0; - } else { - in_get_stack_trace = true; - } + if (!libunwind_lock.TryLock()) { + return 0; } unw_getcontext(&uc); @@ -86,9 +80,7 @@ int GetStackTrace(void** result, int max_depth, int skip_count) { break; } - SpinLockHolder sh(&libunwind_lock); - in_get_stack_trace = false; - + libunwind_lock.Unlock(); return n; } @@ -125,13 +117,8 @@ int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { unw_cursor_t cursor; unw_context_t uc; - { - SpinLockHolder sh(&libunwind_lock); - if (in_get_stack_trace) { - return 0; - } else { - in_get_stack_trace = true; - } + if (!libunwind_lock.TryLock()) { + return 0; } unw_getcontext(&uc); @@ -155,8 +142,6 @@ int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { // No implementation for finding out the stack frame sizes yet. memset(sizes, 0, sizeof(*sizes) * n); - SpinLockHolder sh(&libunwind_lock); - in_get_stack_trace = false; - + libunwind_lock.Unlock(); return n; } diff --git a/src/stacktrace_powerpc-inl.h b/src/stacktrace_powerpc-inl.h index bf64284..5631e49 100644 --- a/src/stacktrace_powerpc-inl.h +++ b/src/stacktrace_powerpc-inl.h @@ -68,6 +68,10 @@ static void **NextStackFrame(void **old_sp) { return new_sp; } +// This ensures that GetStackTrace stes up the Link Register properly. +void StacktracePowerPCDummyFunction() __attribute__((noinline)); +void StacktracePowerPCDummyFunction() { __asm__ volatile(""); } + // If you change this function, also change GetStackFrames below. int GetStackTrace(void** result, int max_depth, int skip_count) { void **sp; @@ -82,23 +86,40 @@ int GetStackTrace(void** result, int max_depth, int skip_count) { __asm__ volatile ("mr %0,1" : "=r" (sp)); #endif + // On PowerPC, the "Link Register" or "Link Record" (LR), is a stack + // entry that holds the return address of the subroutine call (what + // instruction we run after our function finishes). This is the + // same as the stack-pointer of our parent routine, which is what we + // want here. While the compiler will always(?) set up LR for + // subroutine calls, it may not for leaf functions (such as this one). + // This routine forces the compiler (at least gcc) to push it anyway. + StacktracePowerPCDummyFunction(); + + // The LR save area is used by the callee, so the top entry is bogus. + skip_count++; + int n = 0; while (sp && n < max_depth) { if (skip_count > 0) { skip_count--; } else { - // sp[2] holds the "Link Record", according to RTArch-59.html. - // On PPC, the Link Record is the return address of the - // subroutine call (what instruction we run after our function - // finishes). This is the same as the stack-pointer of our - // parent routine, which is what we want here. We believe that - // the compiler will always set up the LR for subroutine calls. - // - // It may be possible to get the stack-pointer of the parent - // routine directly. In my experiments, this code works: - // result[n++] = NextStackFrame(sp)[-18] - // But I'm not sure what this is doing, exactly, or how reliable it is. - result[n++] = *(sp+2); // sp[2] holds the Link Record (return address) + // PowerPC has 3 main ABIs, which say where in the stack the + // Link Register is. For DARWIN and AIX (used by apple and + // linux ppc64), it's in sp[2]. For SYSV (used by linux ppc), + // it's in sp[1]. +#if defined(_CALL_AIX) || defined(_CALL_DARWIN) + result[n++] = *(sp+2); +#elif defined(_CALL_SYSV) + result[n++] = *(sp+1); +#elif defined(__APPLE__) || (defined(__linux) && defined(__PPC64__)) + // This check is in case the compiler doesn't define _CALL_AIX/etc. + result[n++] = *(sp+2); +#elif defined(__linux) + // This check is in case the compiler doesn't define _CALL_SYSV. + result[n++] = *(sp+1); +#else +#error Need to specify the PPC ABI for your archiecture. +#endif } // Use strict unwinding rules. sp = NextStackFrame<true>(sp); @@ -135,16 +156,18 @@ int GetStackTrace(void** result, int max_depth, int skip_count) { // class, we are in trouble. int GetStackFrames(void** pcs, int *sizes, int max_depth, int skip_count) { void **sp; - // Apple OS X uses an old version of gnu as -- both Darwin 7.9.0 (Panther) - // and Darwin 8.8.1 (Tiger) use as 1.38. This means we have to use a - // different asm syntax. I don't know quite the best way to discriminate - // systems using the old as from the new one; I've gone with __APPLE__. #ifdef __APPLE__ __asm__ volatile ("mr %0,r1" : "=r" (sp)); #else __asm__ volatile ("mr %0,1" : "=r" (sp)); #endif + StacktracePowerPCDummyFunction(); + // Note we do *not* increment skip_count here for the SYSV ABI. If + // we did, the list of stack frames wouldn't properly match up with + // the list of return addresses. Note this means the top pc entry + // is probably bogus for linux/ppc (and other SYSV-ABI systems). + int n = 0; while (sp && n < max_depth) { // The GetStackFrames routine is called when we are in some @@ -156,18 +179,19 @@ int GetStackFrames(void** pcs, int *sizes, int max_depth, int skip_count) { if (skip_count > 0) { skip_count--; } else { - // sp[2] holds the "Link Record", according to RTArch-59.html. - // On PPC, the Link Record is the return address of the - // subroutine call (what instruction we run after our function - // finishes). This is the same as the stack-pointer of our - // parent routine, which is what we want here. We believe that - // the compiler will always set up the LR for subroutine calls. - // - // It may be possible to get the stack-pointer of the parent - // routine directly. In my experiments, this code works: - // pcs[n] = NextStackFrame(sp)[-18] - // But I'm not sure what this is doing, exactly, or how reliable it is. - pcs[n] = *(sp+2); // sp[2] holds the Link Record (return address) +#if defined(_CALL_AIX) || defined(_CALL_DARWIN) + pcs[n++] = *(sp+2); +#elif defined(_CALL_SYSV) + pcs[n++] = *(sp+1); +#elif defined(__APPLE__) || (defined(__linux) && defined(__PPC64__)) + // This check is in case the compiler doesn't define _CALL_AIX/etc. + pcs[n++] = *(sp+2); +#elif defined(__linux) + // This check is in case the compiler doesn't define _CALL_SYSV. + pcs[n++] = *(sp+1); +#else +#error Need to specify the PPC ABI for your archiecture. +#endif if (next_sp > sp) { sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; } else { diff --git a/src/system-alloc.cc b/src/system-alloc.cc index 96906ca..3a0d665 100644 --- a/src/system-alloc.cc +++ b/src/system-alloc.cc @@ -78,11 +78,6 @@ static SpinLock spinlock(SpinLock::LINKER_INITIALIZED); static size_t pagesize = 0; // Configuration parameters. -// -// if use_devmem is true, either use_sbrk or use_mmap must also be true. -// For 2.2 kernels, it looks like the sbrk address space (500MBish) and -// the mmap address space (1300MBish) are disjoint, so we need both allocators -// to get as much virtual memory as possible. DEFINE_int32(malloc_devmem_start, 0, "Physical memory starting location in MB for /dev/mem allocation." @@ -90,6 +85,8 @@ DEFINE_int32(malloc_devmem_start, 0, DEFINE_int32(malloc_devmem_limit, 0, "Physical memory limit location in MB for /dev/mem allocation." " Setting this to 0 means no limit."); +DEFINE_bool(malloc_skip_mmap, false, + "Whether mmap can be used to obtain memory."); // static allocators class SbrkSysAllocator : public SysAllocator { @@ -188,6 +185,16 @@ void SbrkSysAllocator::DumpStats(TCMalloc_Printer* printer) { void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, size_t alignment) { + // Check if we should use mmap allocation. + // FLAGS_malloc_skip_mmap starts out as false (its uninitialized + // state) and eventually gets initialized to the specified value. Note + // that this code runs for a while before the flags are initialized. + // Chances are we never get here before the flags are initialized since + // sbrk is used until the heap is exhausted (before mmap is used). + if (FLAGS_malloc_skip_mmap) { + return NULL; + } + // could theoretically return the "extra" bytes here, but this // is simple and correct. if (actual_size) { diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc index 76435a7..a3f8733 100644 --- a/src/tcmalloc.cc +++ b/src/tcmalloc.cc @@ -277,7 +277,9 @@ static const int add_amount[2] = { 7, 127 + (120 << 7) }; static unsigned char class_array[377]; // Compute index of the class_array[] entry for a given size -static inline int ClassIndex(size_t s) { +static inline int ClassIndex(int s) { + ASSERT(0 <= s); + ASSERT(s <= kMaxSize); const int i = (s > kMaxSmallSize); return (s + add_amount[i]) >> shift_amount[i]; } @@ -381,7 +383,7 @@ static inline size_t SLL_Size(void *head) { // Setup helper functions. -static inline int SizeClass(size_t size) { +static inline int SizeClass(int size) { return class_array[ClassIndex(size)]; } @@ -780,14 +782,14 @@ static StackTrace* growth_stacks = NULL; template <int BITS> class MapSelector { public: typedef TCMalloc_PageMap3<BITS-kPageShift> Type; - typedef PackedCache<BITS, uint64> CacheType; + typedef PackedCache<BITS-kPageShift, uint64_t> CacheType; }; // A two-level map for 32-bit machines template <> class MapSelector<32> { public: typedef TCMalloc_PageMap2<32-kPageShift> Type; - typedef PackedCache<32-kPageShift, uint16> CacheType; + typedef PackedCache<32-kPageShift, uint16_t> CacheType; }; // ------------------------------------------------------------------------- @@ -893,10 +895,11 @@ class TCMalloc_PageHeap { // Remove span from its free list, and move any leftover part of // span into appropriate free lists. Also update "span" to have // length exactly "n" and mark it as non-free so it can be returned - // to the client. + // to the client. After all that, decrease free_pages_ by n and + // return span. // // "released" is true iff "span" was found on a "returned" list. - void Carve(Span* span, Length n, bool released); + Span* Carve(Span* span, Length n, bool released); void RecordSpan(Span* span) { pagemap_.set(span->start, span); @@ -943,25 +946,19 @@ Span* TCMalloc_PageHeap::New(Length n) { // Find first size >= n that has a non-empty list for (Length s = n; s < kMaxPages; s++) { - Span* ll = NULL; + Span* ll = &free_[s].normal; bool released = false; - if (!DLL_IsEmpty(&free_[s].normal)) { - // Found normal span - ll = &free_[s].normal; - } else if (!DLL_IsEmpty(&free_[s].returned)) { - // Found returned span; reallocate it + // If we're lucky, ll is non-empty, meaning it has a suitable span. + if (DLL_IsEmpty(ll)) { + // Alternatively, maybe there's a usable returned span. ll = &free_[s].returned; released = true; - } else { - // Keep looking in larger classes - continue; + if (DLL_IsEmpty(ll)) { + // Still no luck, so keep looking in larger classes. + continue; + } } - - Span* result = ll->next; - Carve(result, n, released); - ASSERT(Check()); - free_pages_ -= n; - return result; + return Carve(ll->next, n, released); } Span* result = AllocLarge(n); @@ -1010,13 +1007,7 @@ Span* TCMalloc_PageHeap::AllocLarge(Length n) { } } - if (best != NULL) { - Carve(best, n, from_released); - ASSERT(Check()); - free_pages_ -= n; - return best; - } - return NULL; + return best == NULL ? NULL : Carve(best, n, from_released); } Span* TCMalloc_PageHeap::Split(Span* span, Length n) { @@ -1036,7 +1027,7 @@ Span* TCMalloc_PageHeap::Split(Span* span, Length n) { return leftover; } -void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) { +Span* TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) { ASSERT(n > 0); DLL_Remove(span); span->free = 0; @@ -1058,6 +1049,9 @@ void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) { span->length = n; pagemap_.set(span->start + n - 1, span); } + ASSERT(Check()); + free_pages_ -= n; + return span; } void TCMalloc_PageHeap::Delete(Span* span) { @@ -1357,8 +1351,17 @@ void TCMalloc_PageHeap::ReleaseFreePages() { class TCMalloc_ThreadCache_FreeList { private: void* list_; // Linked list of nodes - uint16_t length_; // Current length - uint16_t lowater_; // Low water mark for list length + +#ifdef _LP64 + // On 64-bit hardware, manipulating 16-bit values may be slightly slow. + // Since it won't cost any space, let's make these fields 32 bits each. + uint32_t length_; // Current length + uint32_t lowater_; // Low water mark for list length +#else + // If we aren't using 64-bit pointers then pack these into less space. + uint16_t length_; + uint16_t lowater_; +#endif public: void Init() { @@ -1368,7 +1371,7 @@ class TCMalloc_ThreadCache_FreeList { } // Return current length of list - int length() const { + size_t length() const { return length_; } @@ -1414,14 +1417,19 @@ class TCMalloc_ThreadCache { private: typedef TCMalloc_ThreadCache_FreeList FreeList; - size_t size_; // Combined size of data - pthread_t tid_; // Which thread owns it - bool in_setspecific_; // In call to pthread_setspecific? - FreeList list_[kNumClasses]; // Array indexed by size-class + // Warning: the offset of list_ affects performance. On general + // principles, we don't like list_[x] to span multiple L1 cache + // lines. However, merely placing list_ at offset 0 here seems to + // cause cache conflicts. // We sample allocations, biased by the size of the allocation - uint32_t rnd_; // Cheap random number generator size_t bytes_until_sample_; // Bytes until we sample next + uint32_t rnd_; // Cheap random number generator + + size_t size_; // Combined size of data + pthread_t tid_; // Which thread owns it + FreeList list_[kNumClasses]; // Array indexed by size-class + bool in_setspecific_; // In call to pthread_setspecific? // Allocate a new heap. REQUIRES: pageheap_lock is held. static inline TCMalloc_ThreadCache* NewHeap(pthread_t tid); @@ -1445,8 +1453,13 @@ class TCMalloc_ThreadCache { void* Allocate(size_t size); void Deallocate(void* ptr, size_t size_class); - void FetchFromCentralCache(size_t cl); - void ReleaseToCentralCache(size_t cl, int N); + // Gets and returns an object from the central cache, and, if possible, + // also adds some objects of that size class to this thread cache. + void* FetchFromCentralCache(size_t cl, size_t byte_size); + + // Releases N items from this thread cache. Returns size_. + size_t ReleaseToCentralCache(FreeList* src, size_t cl, int N); + void Scavenge(); void Print() const; @@ -1479,11 +1492,11 @@ class TCMalloc_Central_FreeList { // These methods all do internal locking. // Insert the specified range into the central freelist. N is the number of - // elements in the range. + // elements in the range. RemoveRange() is the opposite operation. void InsertRange(void *start, void *end, int N); - // Returns the actual number of fetched elements into N. - void RemoveRange(void **start, void **end, int *N); + // Returns the actual number of fetched elements and sets *start and *end. + int RemoveRange(void **start, void **end, int N); // Returns the number of free objects in cache. int length() { @@ -1542,7 +1555,7 @@ class TCMalloc_Central_FreeList { // spans if it allows it to shrink the cache. Return false if it failed to // shrink the cache. Decrements cache_size_ on succeess. // May temporarily take lock_. If it takes lock_, the locked_size_class - // lock is released to the thread from holding two size class locks + // lock is released to keep the thread from holding two size class locks // concurrently which could lead to a deadlock. bool ShrinkCache(int locked_size_class, bool force); @@ -1780,41 +1793,39 @@ void TCMalloc_Central_FreeList::InsertRange(void *start, void *end, int N) { ReleaseListToSpans(start); } -void TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int *N) { - int num = *N; - ASSERT(num > 0); - - SpinLockHolder h(&lock_); - if (num == num_objects_to_move[size_class_] && used_slots_ > 0) { +int TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int N) { + ASSERT(N > 0); + lock_.Lock(); + if (N == num_objects_to_move[size_class_] && used_slots_ > 0) { int slot = --used_slots_; ASSERT(slot >= 0); TCEntry *entry = &tc_slots_[slot]; *start = entry->head; *end = entry->tail; - return; + lock_.Unlock(); + return N; } + int result = 0; + void* head = NULL; + void* tail = NULL; // TODO: Prefetch multiple TCEntries? - void *tail = FetchFromSpansSafe(); - if (!tail) { - // We are completely out of memory. - *start = *end = NULL; - *N = 0; - return; - } - - SLL_SetNext(tail, NULL); - void *head = tail; - int count = 1; - while (count < num) { - void *t = FetchFromSpans(); - if (!t) break; - SLL_Push(&head, t); - count++; + tail = FetchFromSpansSafe(); + if (tail != NULL) { + SLL_SetNext(tail, NULL); + head = tail; + result = 1; + while (result < N) { + void *t = FetchFromSpans(); + if (!t) break; + SLL_Push(&head, t); + result++; + } } + lock_.Unlock(); *start = head; *end = tail; - *N = count; + return result; } @@ -1929,7 +1940,7 @@ void TCMalloc_ThreadCache::Cleanup() { // Put unused memory back into central cache for (int cl = 0; cl < kNumClasses; ++cl) { if (list_[cl].length() > 0) { - ReleaseToCentralCache(cl, list_[cl].length()); + ReleaseToCentralCache(&list_[cl], cl, list_[cl].length()); } } } @@ -1937,41 +1948,55 @@ void TCMalloc_ThreadCache::Cleanup() { inline void* TCMalloc_ThreadCache::Allocate(size_t size) { ASSERT(size <= kMaxSize); const size_t cl = SizeClass(size); + const size_t alloc_size = ByteSizeForClass(cl); FreeList* list = &list_[cl]; if (list->empty()) { - FetchFromCentralCache(cl); - if (list->empty()) return NULL; + return FetchFromCentralCache(cl, alloc_size); } - size_ -= ByteSizeForClass(cl); + size_ -= alloc_size; return list->Pop(); } inline void TCMalloc_ThreadCache::Deallocate(void* ptr, size_t cl) { - size_ += ByteSizeForClass(cl); FreeList* list = &list_[cl]; + ssize_t list_headroom = + static_cast<ssize_t>(kMaxFreeListLength - 1) - list->length(); + size_ += ByteSizeForClass(cl); + size_t cache_size = size_; + ssize_t size_headroom = per_thread_cache_size - cache_size - 1; list->Push(ptr); - // If enough data is free, put back into central cache - if (list->length() > kMaxFreeListLength) { - ReleaseToCentralCache(cl, num_objects_to_move[cl]); + + // There are two relatively uncommon things that require further work. + // In the common case we're done, and in that case we need a single branch + // because of the bitwise-or trick that follows. + if ((list_headroom | size_headroom) < 0) { + if (list_headroom < 0) { + cache_size = ReleaseToCentralCache(list, cl, num_objects_to_move[cl]); + } + if (cache_size >= per_thread_cache_size) Scavenge(); } - if (size_ >= per_thread_cache_size) Scavenge(); } -// Remove some objects of class "cl" from central cache and add to thread heap -void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl) { - int fetch_count = num_objects_to_move[cl]; +// Remove some objects of class "cl" from central cache and add to thread heap. +// On success, return the first object for immediate use; otherwise return NULL. +void* TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t byte_size) { void *start, *end; - central_cache[cl].RemoveRange(&start, &end, &fetch_count); - list_[cl].PushRange(fetch_count, start, end); - size_ += ByteSizeForClass(cl) * fetch_count; + int fetch_count = central_cache[cl].RemoveRange(&start, &end, + num_objects_to_move[cl]); + ASSERT((start == NULL) == (fetch_count == 0)); + if (--fetch_count >= 0) { + size_ += byte_size * fetch_count; + list_[cl].PushRange(fetch_count, SLL_Next(start), end); + } + return start; } // Remove some objects of class "cl" from thread heap and add to central cache -void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) { - ASSERT(N > 0); - FreeList* src = &list_[cl]; +size_t TCMalloc_ThreadCache::ReleaseToCentralCache(FreeList* src, + size_t cl, int N) { + ASSERT(src == &list_[cl]); if (N > src->length()) N = src->length(); - size_ -= N*ByteSizeForClass(cl); + size_t delta_bytes = N * ByteSizeForClass(cl); // We return prepackaged chains of the correct size to the central cache. // TODO: Use the same format internally in the thread caches? @@ -1985,6 +2010,7 @@ void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) { void *tail, *head; src->PopRange(N, &head, &tail); central_cache[cl].InsertRange(head, tail, N); + return size_ -= delta_bytes; } // Release idle memory to the central cache @@ -2002,7 +2028,7 @@ void TCMalloc_ThreadCache::Scavenge() { const int lowmark = list->lowwatermark(); if (lowmark > 0) { const int drop = (lowmark > 1) ? lowmark/2 : 1; - ReleaseToCentralCache(cl, drop); + ReleaseToCentralCache(list, cl, drop); } list->clear_lowwatermark(); } @@ -2249,7 +2275,7 @@ void TCMalloc_ThreadCache::RecomputeThreadCacheSize() { void TCMalloc_ThreadCache::Print() const { for (int cl = 0; cl < kNumClasses; ++cl) { - MESSAGE(" %5" PRIuS " : %4d len; %4d lo\n", + MESSAGE(" %5" PRIuS " : %4" PRIuS " len; %4d lo\n", ByteSizeForClass(cl), list_[cl].length(), list_[cl].lowwatermark()); @@ -2642,6 +2668,16 @@ static inline void* SpanToMallocResult(Span *span) { CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift)); } +// Helper for do_malloc(). +static inline void* do_malloc_pages(Length num_pages) { + Span *span; + { + SpinLockHolder h(&pageheap_lock); + span = pageheap->New(num_pages); + } + return span == NULL ? NULL : SpanToMallocResult(span); +} + static inline void* do_malloc(size_t size) { void* ret = NULL; @@ -2652,17 +2688,12 @@ static inline void* do_malloc(size_t size) { if (span != NULL) { ret = SpanToMallocResult(span); } - } else if (size > kMaxSize) { - // Use page-level allocator - SpinLockHolder h(&pageheap_lock); - Span* span = pageheap->New(pages(size)); - if (span != NULL) { - ret = SpanToMallocResult(span); - } - } else { + } else if (size <= kMaxSize) { // The common case, and also the simplest. This just pops the - // size-appropriate freelist, afer replenishing it if it's empty. + // size-appropriate freelist, after replenishing it if it's empty. ret = CheckedMallocResult(heap->Allocate(size)); + } else { + ret = do_malloc_pages(pages(size)); } if (ret == NULL) errno = ENOMEM; return ret; diff --git a/src/tests/addressmap_unittest.cc b/src/tests/addressmap_unittest.cc index 50d9b94..6b65cc3 100644 --- a/src/tests/addressmap_unittest.cc +++ b/src/tests/addressmap_unittest.cc @@ -30,6 +30,7 @@ // --- // Author: Sanjay Ghemawat +#include <stdlib.h> // for rand() #include <vector> #include <set> #include <algorithm> @@ -47,62 +48,102 @@ using std::vector; using std::set; using std::random_shuffle; -static void SetCheckCallback(const void* ptr, int val, +struct UniformRandomNumberGenerator { + size_t Uniform(size_t max_size) { + if (max_size == 0) + return 0; + return rand() % max_size; // not a great random-number fn, but portable + } +}; +static UniformRandomNumberGenerator rnd; + + +// pair of associated value and object size +typedef pair<int, size_t> ValueT; + +struct PtrAndSize { + char* ptr; + size_t size; + PtrAndSize(char* p, size_t s) : ptr(p), size(s) {} +}; + +size_t SizeFunc(const ValueT& v) { return v.second; } + +static void SetCheckCallback(const void* ptr, ValueT* val, set<pair<const void*, int> >* check_set) { - check_set->insert(make_pair(ptr, val)); + check_set->insert(make_pair(ptr, val->first)); } int main(int argc, char** argv) { // Get a bunch of pointers const int N = FLAGS_N; - static const int kObjectLength = 19; - vector<char*> ptrs; + static const int kMaxRealSize = 49; + // 100Mb to stress not finding previous object (AddressMap's cluster is 1Mb): + static const size_t kMaxSize = 100*1000*1000; + vector<PtrAndSize> ptrs_and_sizes; for (int i = 0; i < N; ++i) { - ptrs.push_back(new char[kObjectLength]); + size_t s = rnd.Uniform(kMaxRealSize); + ptrs_and_sizes.push_back(PtrAndSize(new char[s], s)); } for (int x = 0; x < FLAGS_iters; ++x) { // Permute pointers to get rid of allocation order issues - random_shuffle(ptrs.begin(), ptrs.end()); + random_shuffle(ptrs_and_sizes.begin(), ptrs_and_sizes.end()); - AddressMap<int> map(malloc, free); - int result; + AddressMap<ValueT> map(malloc, free); + const ValueT* result; + const void* res_p; // Insert a bunch of entries for (int i = 0; i < N; ++i) { - void* p = ptrs[i]; - CHECK(!map.Find(p, &result)); - map.Insert(p, i); - CHECK(map.Find(p, &result)); - CHECK_EQ(result, i); - map.Insert(p, i + N); - CHECK(map.Find(p, &result)); - CHECK_EQ(result, i + N); + char* p = ptrs_and_sizes[i].ptr; + CHECK(!map.Find(p)); + int offs = rnd.Uniform(ptrs_and_sizes[i].size); + CHECK(!map.FindInside(&SizeFunc, kMaxSize, p + offs, &res_p)); + map.Insert(p, make_pair(i, ptrs_and_sizes[i].size)); + CHECK(result = map.Find(p)); + CHECK_EQ(result->first, i); + CHECK(result = map.FindInside(&SizeFunc, kMaxRealSize, p + offs, &res_p)); + CHECK_EQ(res_p, p); + CHECK_EQ(result->first, i); + map.Insert(p, make_pair(i + N, ptrs_and_sizes[i].size)); + CHECK(result = map.Find(p)); + CHECK_EQ(result->first, i + N); } // Delete the even entries for (int i = 0; i < N; i += 2) { - void* p = ptrs[i]; - CHECK(map.FindAndRemove(p, &result)); - CHECK_EQ(result, i + N); + void* p = ptrs_and_sizes[i].ptr; + ValueT removed; + CHECK(map.FindAndRemove(p, &removed)); + CHECK_EQ(removed.first, i + N); } // Lookup the odd entries and adjust them for (int i = 1; i < N; i += 2) { - void* p = ptrs[i]; - CHECK(map.Find(p, &result)); - CHECK_EQ(result, i + N); - map.Insert(p, i + 2*N); - CHECK(map.Find(p, &result)); - CHECK_EQ(result, i + 2*N); + char* p = ptrs_and_sizes[i].ptr; + CHECK(result = map.Find(p)); + CHECK_EQ(result->first, i + N); + int offs = rnd.Uniform(ptrs_and_sizes[i].size); + CHECK(result = map.FindInside(&SizeFunc, kMaxRealSize, p + offs, &res_p)); + CHECK_EQ(res_p, p); + CHECK_EQ(result->first, i + N); + map.Insert(p, make_pair(i + 2*N, ptrs_and_sizes[i].size)); + CHECK(result = map.Find(p)); + CHECK_EQ(result->first, i + 2*N); } // Insert even entries back for (int i = 0; i < N; i += 2) { - void* p = ptrs[i]; - map.Insert(p, i + 2*N); - CHECK(map.Find(p, &result)); - CHECK_EQ(result, i + 2*N); + char* p = ptrs_and_sizes[i].ptr; + int offs = rnd.Uniform(ptrs_and_sizes[i].size); + CHECK(!map.FindInside(&SizeFunc, kMaxSize, p + offs, &res_p)); + map.Insert(p, make_pair(i + 2*N, ptrs_and_sizes[i].size)); + CHECK(result = map.Find(p)); + CHECK_EQ(result->first, i + 2*N); + CHECK(result = map.FindInside(&SizeFunc, kMaxRealSize, p + offs, &res_p)); + CHECK_EQ(res_p, p); + CHECK_EQ(result->first, i + 2*N); } // Check all entries @@ -110,17 +151,16 @@ int main(int argc, char** argv) { map.Iterate(SetCheckCallback, &check_set); CHECK_EQ(check_set.size(), N); for (int i = 0; i < N; ++i) { - void* p = ptrs[i]; + void* p = ptrs_and_sizes[i].ptr; check_set.erase(make_pair(p, i + 2*N)); - CHECK(map.Find(p, &result)); - CHECK_EQ(result, i + 2*N); + CHECK(result = map.Find(p)); + CHECK_EQ(result->first, i + 2*N); } CHECK_EQ(check_set.size(), 0); - } for (int i = 0; i < N; ++i) { - delete[] ptrs[i]; + delete[] ptrs_and_sizes[i].ptr; } printf("PASS\n"); diff --git a/src/tests/heap-checker_unittest.cc b/src/tests/heap-checker_unittest.cc index 5b64b8b..ac38fb0 100644 --- a/src/tests/heap-checker_unittest.cc +++ b/src/tests/heap-checker_unittest.cc @@ -95,6 +95,7 @@ #include <iomanip> // for hex #include <set> #include <map> +#include <list> #include <memory> #include <vector> #include <string> @@ -115,6 +116,7 @@ using namespace std; +// ========================================================================= // // TODO(maxim): write a shell script to test that these indeed crash us // (i.e. we do detect leaks) @@ -156,6 +158,9 @@ DEFINE_bool(no_threads, // This is used so we can make can_create_leaks_reliably true // for any pthread implementation and test with that. +DECLARE_int64(heap_check_max_pointer_offset); // heap-checker.cc +DECLARE_string(heap_check); // in heap-checker.cc + #define WARN_IF(cond, msg) LOG_IF(WARNING, cond, msg) // This is an evil macro! Be very careful using it... @@ -164,6 +169,8 @@ DEFINE_bool(no_threads, // This is, likewise, evil #define LOGF VLOG(INFO) +static void RunHeapBusyThreads(); // below + class Closure { public: @@ -233,7 +240,7 @@ static bool can_create_leaks_reliably = false; // We use a simple allocation wrapper // to make sure we wipe out the newly allocated objects // in case they still happened to contain some pointer data -// accidently left by the memory allocator. +// accidentally left by the memory allocator. struct Initialized { }; static Initialized initialized; void* operator new(size_t size, const Initialized&) { @@ -833,6 +840,8 @@ static void TestLibCAllocate() { // Continuous random heap memory activity to try to disrupt heap checking. static void* HeapBusyThreadBody(void* a) { + const int thread_num = reinterpret_cast<intptr_t>(a); + VLOG(0) << "A new HeapBusyThread " << thread_num; TestLibCAllocate(); int user = 0; @@ -878,7 +887,7 @@ static void* HeapBusyThreadBody(void* a) { << reinterpret_cast<void*>( reinterpret_cast<uintptr_t>(ptr) ^ kHideMask) << "^" << reinterpret_cast<void*>(kHideMask); - if (FLAGS_test_register_leak) { + if (FLAGS_test_register_leak && thread_num % 5 == 0) { // Hide the register "ptr" value with an xor mask. // If one provides --test_register_leak flag, the test should // (with very high probability) crash on some leak check @@ -913,7 +922,8 @@ static void* HeapBusyThreadBody(void* a) { } static void RunHeapBusyThreads() { - if (FLAGS_no_threads) return; + KeyInit(); + if (!FLAGS_interfering_threads || FLAGS_no_threads) return; const int n = 17; // make many threads @@ -923,7 +933,8 @@ static void RunHeapBusyThreads() { // make them and let them run for (int i = 0; i < n; ++i) { VLOG(0) << "Creating extra thread " << i + 1; - CHECK(pthread_create(&tid, &attr, HeapBusyThreadBody, NULL) == 0); + CHECK(pthread_create(&tid, &attr, HeapBusyThreadBody, + reinterpret_cast<void*>(i)) == 0); } Pause(); @@ -1038,10 +1049,62 @@ static void TestHeapLeakCheckerNamedDisabling() { } } -// The code from here to main() -// is to test that objects that are reachable from global -// variables are not reported as leaks, -// with the few exceptions like multiple-inherited objects. +// ========================================================================= // + +// This code section is to test that objects that are reachable from global +// variables are not reported as leaks +// as well as that (Un)IgnoreObject work for such objects fine. + +// An object making functions: +// returns a "weird" pointer to a new object for which +// it's worth checking that the object is reachable via that pointer. +typedef void* (*ObjMakerFunc)(); +static list<ObjMakerFunc> obj_makers; // list of registered object makers + +// Helper macro to register an object making function +// 'name' is an identifier of this object maker, +// 'body' is its function body that must declare +// pointer 'p' to the nex object to return. +// Usage example: +// REGISTER_OBJ_MAKER(trivial, int* p = new(initialized) int;) +#define REGISTER_OBJ_MAKER(name, body) \ + void* ObjMaker_##name##_() { \ + VLOG(1) << "Obj making " << #name; \ + body; \ + return p; \ + } \ + static ObjMakerRegistrar maker_reg_##name##__(&ObjMaker_##name##_); +// helper class for REGISTER_OBJ_MAKER +struct ObjMakerRegistrar { + ObjMakerRegistrar(ObjMakerFunc obj_maker) { obj_makers.push_back(obj_maker); } +}; + +// List of the objects/pointers made with all the obj_makers +// to test reachability via global data pointers during leak checks. +static list<void*>* live_objects = new list<void*>; + // pointer so that it does not get destructed on exit + +// Exerciser for one ObjMakerFunc. +static void TestPointerReach(ObjMakerFunc obj_maker) { + HeapLeakChecker::IgnoreObject(obj_maker()); // test IgnoreObject + + void* obj = obj_maker(); + HeapLeakChecker::IgnoreObject(obj); + HeapLeakChecker::UnIgnoreObject(obj); // test UnIgnoreObject + HeapLeakChecker::IgnoreObject(obj); // not to need deletion for obj + + live_objects->push_back(obj_maker()); // test reachability at leak check +} + +// Test all ObjMakerFunc registred via REGISTER_OBJ_MAKER. +static void TestObjMakers() { + for (list<ObjMakerFunc>::const_iterator i = obj_makers.begin(); + i != obj_makers.end(); ++i) { + TestPointerReach(*i); + TestPointerReach(*i); // a couple more times would not hurt + TestPointerReach(*i); + } +} // A dummy class to mimic allocation behavior of string-s. template<class T> @@ -1083,22 +1146,45 @@ struct Array { T* ptr; }; -static Array<char>* live_leak = NULL; -static Array<char>* live_leak2 = new(initialized) Array<char>(); -static int* live_leak3 = new(initialized) int[10]; -static const char* live_leak4 = new(initialized) char[5]; -static int data[] = { 1, 2, 3, 4, 5, 6, 7, 21, 22, 23, 24, 25, 26, 27 }; -static set<int> live_leak5(data, data+7); -static const set<int> live_leak6(data, data+14); -static const Array<char>* live_leak_arr1 = new(initialized) Array<char>[5]; +// to test pointers to objects, built-in arrays, string, etc: +REGISTER_OBJ_MAKER(plain, int* p = new(initialized) int;) +REGISTER_OBJ_MAKER(int_array_1, int* p = new(initialized) int[1];) +REGISTER_OBJ_MAKER(int_array, int* p = new(initialized) int[10];) +REGISTER_OBJ_MAKER(string, Array<char>* p = new(initialized) Array<char>();) +REGISTER_OBJ_MAKER(string_array, + Array<char>* p = new(initialized) Array<char>[5];) +REGISTER_OBJ_MAKER(char_array, char* p = new(initialized) char[5];) +REGISTER_OBJ_MAKER(appended_string, + Array<char>* p = new Array<char>(); + p->append(Array<char>()); +) +REGISTER_OBJ_MAKER(plain_ptr, int** p = new(initialized) int*;) +REGISTER_OBJ_MAKER(linking_ptr, + int** p = new(initialized) int*; + *p = new(initialized) int; +) + +// small objects: +REGISTER_OBJ_MAKER(0_sized, void* p = malloc(0);) // 0-sized object (important) +REGISTER_OBJ_MAKER(1_sized, void* p = malloc(1);) +REGISTER_OBJ_MAKER(2_sized, void* p = malloc(2);) +REGISTER_OBJ_MAKER(3_sized, void* p = malloc(3);) +REGISTER_OBJ_MAKER(4_sized, void* p = malloc(4);) + +static int set_data[] = { 1, 2, 3, 4, 5, 6, 7, 21, 22, 23, 24, 25, 26, 27 }; +static set<int> live_leak_set(set_data, set_data+7); +static const set<int> live_leak_const_set(set_data, set_data+14); + +REGISTER_OBJ_MAKER(set, + set<int>* p = new(initialized) set<int>(set_data, set_data + 13); +) class ClassA { public: ClassA(int a) : ptr(NULL) { } mutable char* ptr; }; - -static const ClassA live_leak7(1); +static const ClassA live_leak_mutable(1); template<class C> class TClass { @@ -1107,13 +1193,12 @@ class TClass { mutable C val; mutable C* ptr; }; - -static const TClass<Array<char> > live_leak8(1); +static const TClass<Array<char> > live_leak_templ_mutable(1); class ClassB { public: ClassB() { } - int b[10]; + char b[7]; virtual void f() { } virtual ~ClassB() { } }; @@ -1121,102 +1206,140 @@ class ClassB { class ClassB2 { public: ClassB2() { } - int b2[10]; + char b2[11]; virtual void f2() { } virtual ~ClassB2() { } }; class ClassD1 : public ClassB { - int d1[10]; + char d1[15]; virtual void f() { } }; class ClassD2 : public ClassB2 { - int d2[10]; + char d2[19]; virtual void f2() { } }; class ClassD : public ClassD1, public ClassD2 { - int d[10]; + char d[3]; virtual void f() { } virtual void f2() { } }; -static ClassB* live_leak_b; -static ClassD1* live_leak_d1; -static ClassD2* live_leak_d2; -static ClassD* live_leak_d; +// to test pointers to objects of base subclasses: -static ClassB* live_leak_b_d1; -static ClassB2* live_leak_b2_d2; -static ClassB* live_leak_b_d; -static ClassB2* live_leak_b2_d; +REGISTER_OBJ_MAKER(B, ClassB* p = new(initialized) ClassB;) +REGISTER_OBJ_MAKER(D1, ClassD1* p = new(initialized) ClassD1;) +REGISTER_OBJ_MAKER(D2, ClassD2* p = new(initialized) ClassD2;) +REGISTER_OBJ_MAKER(D, ClassD* p = new(initialized) ClassD;) -static ClassD1* live_leak_d1_d; -static ClassD2* live_leak_d2_d; +REGISTER_OBJ_MAKER(D1_as_B, ClassB* p = new(initialized) ClassD1;) +REGISTER_OBJ_MAKER(D2_as_B2, ClassB2* p = new(initialized) ClassD2;) +REGISTER_OBJ_MAKER(D_as_B, ClassB* p = new(initialized) ClassD;) +REGISTER_OBJ_MAKER(D_as_D1, ClassD1* p = new(initialized) ClassD;) +// inside-object pointers: +REGISTER_OBJ_MAKER(D_as_B2, ClassB2* p = new(initialized) ClassD;) +REGISTER_OBJ_MAKER(D_as_D2, ClassD2* p = new(initialized) ClassD;) -// A dummy string class mimics certain third party string -// implementations, which store a refcount in the first -// few bytes and keeps a pointer pointing behind the refcount. -template <typename R> -class RefcountStr { +class InterfaceA { public: - RefcountStr() { - ptr = new char[sizeof(R) * 10]; - ptr = ptr + sizeof(R); - } - char* ptr; + virtual void A() = 0; + virtual ~InterfaceA() { } + protected: + InterfaceA() { } }; -// E.g., mimics UnicodeString defined in third_party/icu. -static const RefcountStr<uint32> live_leak_refcountstr_4; - -// have leaks but ignore the leaked objects -static void IgnoredLeaks() { - int* p = new(initialized) int[1]; - HeapLeakChecker::IgnoreObject(p); - int** leak = new(initialized) int*; - HeapLeakChecker::IgnoreObject(leak); - *leak = new(initialized) int; - HeapLeakChecker::UnIgnoreObject(p); - delete [] p; -} + +class InterfaceB { + public: + virtual void B() = 0; + virtual ~InterfaceB() { } + protected: + InterfaceB() { } +}; + +class InterfaceC : public InterfaceA { + public: + virtual void C() = 0; + virtual ~InterfaceC() { } + protected: + InterfaceC() { } +}; + +class ClassMltD1 : public ClassB, public InterfaceB, public InterfaceC { + public: + char d1[11]; + virtual void f() { } + virtual void A() { } + virtual void B() { } + virtual void C() { } +}; + +class ClassMltD2 : public InterfaceA, public InterfaceB, public ClassB { + public: + char d2[15]; + virtual void f() { } + virtual void A() { } + virtual void B() { } +}; + +// to specifically test heap reachability under +// inerface-only multiple inheritance (some use inside-object pointers): +REGISTER_OBJ_MAKER(MltD1, ClassMltD1* p = new (initialized) ClassMltD1;) +REGISTER_OBJ_MAKER(MltD1_as_B, ClassB* p = new (initialized) ClassMltD1;) +REGISTER_OBJ_MAKER(MltD1_as_IA, InterfaceA* p = new (initialized) ClassMltD1;) +REGISTER_OBJ_MAKER(MltD1_as_IB, InterfaceB* p = new (initialized) ClassMltD1;) +REGISTER_OBJ_MAKER(MltD1_as_IC, InterfaceC* p = new (initialized) ClassMltD1;) + +REGISTER_OBJ_MAKER(MltD2, ClassMltD2* p = new (initialized) ClassMltD2;) +REGISTER_OBJ_MAKER(MltD2_as_B, ClassB* p = new (initialized) ClassMltD2;) +REGISTER_OBJ_MAKER(MltD2_as_IA, InterfaceA* p = new (initialized) ClassMltD2;) +REGISTER_OBJ_MAKER(MltD2_as_IB, InterfaceB* p = new (initialized) ClassMltD2;) + +// to mimic UnicodeString defined in third_party/icu, +// which store a platform-independent-sized refcount in the first +// few bytes and keeps a pointer pointing behind the refcount. +REGISTER_OBJ_MAKER(unicode_string, + char* p = new char[sizeof(uint32) * 10]; + p += sizeof(uint32); +) +// similar, but for platform-dependent-sized refcount +REGISTER_OBJ_MAKER(ref_counted, + char* p = new char[sizeof(int) * 20]; + p += sizeof(int); +) + +struct Nesting { + struct Inner { + Nesting* parent; + Inner(Nesting* p) : parent(p) {} + }; + Inner i0; + char n1[5]; + Inner i1; + char n2[11]; + Inner i2; + char n3[27]; + Inner i3; + Nesting() : i0(this), i1(this), i2(this), i3(this) {} +}; + +// to test inside-object pointers pointing at objects nested into heap objects: +REGISTER_OBJ_MAKER(nesting_i0, Nesting::Inner* p = &((new Nesting())->i0);) +REGISTER_OBJ_MAKER(nesting_i1, Nesting::Inner* p = &((new Nesting())->i1);) +REGISTER_OBJ_MAKER(nesting_i2, Nesting::Inner* p = &((new Nesting())->i2);) +REGISTER_OBJ_MAKER(nesting_i3, Nesting::Inner* p = &((new Nesting())->i3);) // allocate many objects reachable from global data static void TestHeapLeakCheckerLiveness() { - live_leak_b = new(initialized) ClassB; - live_leak_d1 = new(initialized) ClassD1; - live_leak_d2 = new(initialized) ClassD2; - live_leak_d = new(initialized) ClassD; - - live_leak_b_d1 = new(initialized) ClassD1; - live_leak_b2_d2 = new(initialized) ClassD2; - live_leak_b_d = new(initialized) ClassD; - live_leak_b2_d = new(initialized) ClassD; - - live_leak_d1_d = new(initialized) ClassD; - live_leak_d2_d = new(initialized) ClassD; - - HeapLeakChecker::IgnoreObject((ClassD*)live_leak_b2_d); - HeapLeakChecker::IgnoreObject((ClassD*)live_leak_d2_d); - // These two do not get deleted with liveness flood - // because the base class pointer points inside of the objects - // in such cases of multiple inheritance. - // Luckily google code does not use multiple inheritance almost at all. - - live_leak = new(initialized) Array<char>(); - delete [] live_leak3; - live_leak3 = new(initialized) int[33]; - live_leak2->append(*live_leak); - live_leak7.ptr = new(initialized) char[77]; - live_leak8.ptr = new(initialized) Array<char>(); - live_leak8.val = Array<char>(); - - IgnoredLeaks(); - IgnoredLeaks(); - IgnoredLeaks(); + live_leak_mutable.ptr = new(initialized) char[77]; + live_leak_templ_mutable.ptr = new(initialized) Array<char>(); + live_leak_templ_mutable.val = Array<char>(); + + TestObjMakers(); } -DECLARE_string(heap_check); +// ========================================================================= // // Get address (PC value) following the mmap call into addr_after_mmap_call static void* Mmapper(uintptr_t* addr_after_mmap_call) { @@ -1294,6 +1417,8 @@ extern void VerifyHeapProfileTableStackGet() { free(addr); } +// ========================================================================= // + // Helper to do 'return 0;' inside main(): insted we do 'return Pass();' static int Pass() { fprintf(stdout, "PASS\n"); @@ -1362,6 +1487,13 @@ int main(int argc, char** argv) { // (when !FLAGS_test_cancel_global_check) } + if (FLAGS_test_register_leak) { + // make us fail only where the .sh test expects: + Pause(); + for (int i = 0; i < 20; ++i) CHECK(HeapLeakChecker::NoGlobalLeaks()); + return Pass(); + } + TestHeapLeakCheckerLiveness(); HeapLeakChecker heap_check("all"); @@ -1441,10 +1573,6 @@ int main(int argc, char** argv) { range_disable_named = true; ThreadNamedDisabledLeaks(); - HeapLeakChecker::IgnoreObject(new(initialized) set<int>(data, data + 13)); - // This checks both that IgnoreObject works, and - // and the fact that we don't drop such leaks as live for some reason. - CHECK(HeapLeakChecker::NoGlobalLeaks()); // so far, so good return Pass(); diff --git a/src/tests/profiler_unittest.cc b/src/tests/profiler_unittest.cc index e5729a1..1063751 100644 --- a/src/tests/profiler_unittest.cc +++ b/src/tests/profiler_unittest.cc @@ -42,7 +42,7 @@ #endif #include <sys/wait.h> // for wait() #include "google/profiler.h" -#include "base/mutex.h" +#include "base/simple_mutex.h" #include "tests/testutil.h" static int result = 0; diff --git a/src/tests/stacktrace_unittest.cc b/src/tests/stacktrace_unittest.cc index 710743d..6bd28a2 100644 --- a/src/tests/stacktrace_unittest.cc +++ b/src/tests/stacktrace_unittest.cc @@ -39,7 +39,7 @@ // Obtain a backtrace, verify that the expected callers are present in the -// backtrace, and maybe print the backtrace to stdout. +// backtrace, and maybe print the backtrace to stdout. //-----------------------------------------------------------------------// void CheckStackTraceLeaf(); @@ -53,7 +53,7 @@ void CheckStackTrace(int i); // The sequence of functions whose return addresses we expect to see in the // backtrace. const int BACKTRACE_STEPS = 6; -void * expected_stack[BACKTRACE_STEPS] = { +void * expected_stack[BACKTRACE_STEPS] = { (void *) &CheckStackTraceLeaf, (void *) &CheckStackTrace4, (void *) &CheckStackTrace3, @@ -81,11 +81,12 @@ void * expected_stack[BACKTRACE_STEPS] = { //-----------------------------------------------------------------------// +const int kMaxFnLen = 0x40; // assume relevant functions are only this long + void CheckRetAddrIsInFunction( void * ret_addr, void * function_start_addr) { - const int typ_fn_len = 0x40; // assume relevant functions are only 0x40 bytes long CHECK_GE(ret_addr, function_start_addr); - CHECK_LE(ret_addr, (void *) ((char *) function_start_addr + typ_fn_len)); + CHECK_LE(ret_addr, (void *) ((char *) function_start_addr + kMaxFnLen)); } //-----------------------------------------------------------------------// @@ -111,7 +112,11 @@ void CheckStackTraceLeaf(void) { #endif for (int i = 0; i < BACKTRACE_STEPS; i++) { + printf("Backtrace %d: expected: %p..%p actual: %p ... ", + i, expected_stack[i], + reinterpret_cast<char*>(expected_stack[i]) + kMaxFnLen, stack[i]); CheckRetAddrIsInFunction(stack[i], expected_stack[i]); + printf("OK\n"); } } diff --git a/src/tests/tcmalloc_unittest.cc b/src/tests/tcmalloc_unittest.cc index e818a21..d3df596 100644 --- a/src/tests/tcmalloc_unittest.cc +++ b/src/tests/tcmalloc_unittest.cc @@ -76,7 +76,7 @@ #include <string> #include <new> #include "base/logging.h" -#include "base/mutex.h" +#include "base/simple_mutex.h" #include "google/malloc_hook.h" #include "google/malloc_extension.h" #include "tests/testutil.h" diff --git a/src/windows/preamble_patcher.h b/src/windows/preamble_patcher.h index cc69d72..dc6d4db 100644 --- a/src/windows/preamble_patcher.h +++ b/src/windows/preamble_patcher.h @@ -65,6 +65,9 @@ enum SideStepError { SIDESTEP_UNEXPECTED, }; +#define SIDESTEP_TO_HRESULT(error) \ + MAKE_HRESULT(SEVERITY_ERROR, FACILITY_NULL, error) + // Implements a patching mechanism that overwrites the first few bytes of // a function preamble with a jump to our hook function, which is then // able to call the original function via a specially-made preamble-stub |