summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2008-02-13 00:55:09 +0000
committercsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2008-02-13 00:55:09 +0000
commit8a0a3101bc6a7d56ac04b278f28bdf3f95b00a3c (patch)
tree46f871a3160a4023201d72b1b04a9a88e3d88b78
parentb43ba444fcd74fa7c3260f6b2494dcbaa3fdb296 (diff)
downloadgperftools-8a0a3101bc6a7d56ac04b278f28bdf3f95b00a3c.tar.gz
Tue Feb 12 12:28:32 2008 Google Inc. <opensource@google.com>
* google-perftools: version 0.95 release * Better -- not perfect -- support for linux-ppc (csilvers) * Fix race condition in libunwind stacktrace (aruns) * Speed up x86 spinlock locking (m3b) * Improve heap-checker performance (maxim) * Heap checker traverses more ptrs inside heap-alloced objects (maxim) * Remove deprecated ProfilerThreadState function (cgd) * Update libunwind documentation for statically linked binaries (aruns) git-svn-id: http://gperftools.googlecode.com/svn/trunk@44 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
-rw-r--r--ChangeLog11
-rw-r--r--INSTALL45
-rw-r--r--Makefile.am36
-rw-r--r--Makefile.in155
-rwxr-xr-xconfigure53
-rw-r--r--configure.ac11
-rw-r--r--doc/heap_checker.html2
-rw-r--r--doc/tcmalloc.html16
-rw-r--r--packages/deb/changelog6
-rw-r--r--packages/deb/docs1
-rw-r--r--packages/rpm/rpm.spec15
-rw-r--r--src/addressmap-inl.h100
-rw-r--r--src/base/atomicops-internals-linuxppc.h193
-rw-r--r--src/base/atomicops-internals-x86-msvc.h2
-rw-r--r--src/base/atomicops-internals-x86.h31
-rw-r--r--src/base/atomicops.h2
-rw-r--r--src/base/commandlineflags.h1
-rw-r--r--src/base/cycleclock.h95
-rw-r--r--src/base/elfcore.h16
-rw-r--r--src/base/linux_syscall_support.h1305
-rw-r--r--src/base/linuxthreads.c56
-rw-r--r--src/base/linuxthreads.h2
-rw-r--r--src/base/logging.h2
-rw-r--r--src/base/simple_mutex.h (renamed from src/base/mutex.h)29
-rw-r--r--src/base/spinlock.cc56
-rw-r--r--src/base/spinlock.h27
-rw-r--r--src/base/sysinfo.cc252
-rw-r--r--src/base/sysinfo.h13
-rw-r--r--src/google/heap-checker.h27
-rw-r--r--src/google/profiler.h23
-rw-r--r--src/heap-checker.cc404
-rw-r--r--src/heap-profile-table.cc82
-rw-r--r--src/heap-profile-table.h72
-rw-r--r--src/heap-profiler.cc30
-rw-r--r--src/malloc_extension.cc31
-rw-r--r--src/malloc_hook.cc12
-rw-r--r--src/maybe_threads.cc1
-rw-r--r--src/packed-cache-inl.h62
-rw-r--r--src/profiledata.h6
-rw-r--r--src/profiler.cc64
-rw-r--r--src/stacktrace.cc2
-rw-r--r--src/stacktrace_libunwind-inl.h27
-rw-r--r--src/stacktrace_powerpc-inl.h80
-rw-r--r--src/system-alloc.cc17
-rw-r--r--src/tcmalloc.cc225
-rw-r--r--src/tests/addressmap_unittest.cc108
-rw-r--r--src/tests/heap-checker_unittest.cc316
-rw-r--r--src/tests/profiler_unittest.cc2
-rw-r--r--src/tests/stacktrace_unittest.cc13
-rw-r--r--src/tests/tcmalloc_unittest.cc2
-rw-r--r--src/windows/preamble_patcher.h3
51 files changed, 3088 insertions, 1054 deletions
diff --git a/ChangeLog b/ChangeLog
index 9b4908b..6d99a97 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+Tue Feb 12 12:28:32 2008 Google Inc. <opensource@google.com>
+
+ * google-perftools: version 0.95 release
+ * Better -- not perfect -- support for linux-ppc (csilvers)
+ * Fix race condition in libunwind stacktrace (aruns)
+ * Speed up x86 spinlock locking (m3b)
+ * Improve heap-checker performance (maxim)
+ * Heap checker traverses more ptrs inside heap-alloced objects (maxim)
+ * Remove deprecated ProfilerThreadState function (cgd)
+ * Update libunwind documentation for statically linked binaries (aruns)
+
Mon Dec 3 23:51:54 2007 Google Inc. <opensource@google.com>
* google-perftools: version 0.94.1 release (bugfix release)
diff --git a/INSTALL b/INSTALL
index a42e74b..d717ea6 100644
--- a/INSTALL
+++ b/INSTALL
@@ -32,6 +32,12 @@ perftools: that is, if you link with 'gcc -static -lgcc_eh ...'. This
is because both libunwind and libgcc implement the same C++ exception
handling APIs, but they implement them differently on some platforms.
This is not likely to be a problem on ia64, but may be on x86-64.
+
+Also, if you link binaries statically, make sure that you add
+-Wl,--eh-frame-hdr to your linker options. This is required so that
+libunwind can find the information generated by the compiler required
+for stack unwinding.
+
Using -static is rare, though, so unless you know this will affect you
it probably won't.
@@ -63,6 +69,7 @@ Perftools has been tested on the following systems:
Linux Ubuntu 6.06.1 (x86)
Linux Ubuntu 6.06.1 (x86_64)
Linux RedHat 9 (x86)
+ Linux Debian 4.0 (PPC)
Mac OS X 10.3.9 (Panther) (PowerPC)
Mac OS X 10.4.8 (Tiger) (PowerPC)
Mac OS X 10.4.8 (Tiger) (x86)
@@ -72,9 +79,9 @@ Perftools has been tested on the following systems:
Windows XP, Visual Studio 2005 (VC++ 8) (x86)
Windows XP, MinGW 5.1.3 (x86)
-It works in its full generality on all the Linux systems tested, both
-x86 and x86_64 (though see 64-bit notes above). Portions of perftools
-work on the other systems. The basic memory-allocation library,
+It works in its full generality on the Linux x86 and x86_64 systems
+tested (though see 64-bit notes above). Portions of perftools work on
+the other systems. The basic memory-allocation library,
tcmalloc_minimal, works on all systems. The cpu-profiler also works
fairly widely. However, the heap-profiler and heap-checker are not
yet as widely supported.
@@ -107,23 +114,33 @@ above, by linking in libtcmalloc_minimal.
is working fine. This only affects programs that call fork(); for
most programs, the cpu profiler is entirely safe to use.
- libtcmalloc.so does not successfully build, so neither do these
- unittests that depend on it:
- % make -k heap-profiler_unittest.sh heap-checker_unittest.sh \
- heap-checker-death_unittest.sh maybe_threads_unittest.sh \
- tcmalloc_unittest tcmalloc_both_unittest \
- tcmalloc_large_unittest
+ libtcmalloc.so successfully builds, and the "advanced" tcmalloc
+ functionality all works except for the leak-checker, which has
+ Linux-specific code:
+ % make heap-profiler_unittest.sh maybe_threads_unittest.sh \
+ tcmalloc_unittest tcmalloc_both_unittest \
+ tcmalloc_large_unittest # THESE WORK
+ % make -k heap-checker_unittest.sh \
+ heap-checker-death_unittest.sh # THESE DO NOT
I have not tested other *BSD systems, but they are probably similar.
+** Linux/PPC:
+
+ I've tested on a PowerPC Linux box using qemu against Debian Etch
+ (4.0). Most of the tests pass. The heap-checker unittest does not
+ pass for reasons which are not yet clear but seem to be related to
+ the clone() system call. Heap checking may work properly for
+ single-threaded programs, though I haven't tested that.
+
** Mac OS X:
- I've tested OS X 10.4 [Tiger] and OS X 10.3 [Panther] on both intel
- (x86) and PowerPC systems. For Panther/ppc systems, perftools does
- not work at all: it depends on a header file, OSAtomic.h, which is
- new in 10.4.
+ I've tested OS X 10.5 [Leopard], OS X 10.4 [Tiger] and OS X 10.3
+ [Panther] on both intel (x86) and PowerPC systems. For Panther/ppc
+ systems, perftools does not work at all: it depends on a header
+ file, OSAtomic.h, which is new in 10.4.
- For the other three systems, the binaries and libraries that
+ For the other seven systems, the binaries and libraries that
successfully build are exactly the same as for FreeBSD. See that
section for a list of binaries and instructions on building them.
diff --git a/Makefile.am b/Makefile.am
index 9dd93f1..6ac4748 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -20,7 +20,7 @@ endif
# These are x86-specific, having to do with frame-pointers
if X86_64
-if ENABLE_FRAME_POINTER
+if ENABLE_FRAME_POINTERS
AM_CXXFLAGS += -fno-omit-frame-pointer
else
# TODO(csilvers): check if -fomit-frame-pointer might be in $(CXXFLAGS),
@@ -86,6 +86,15 @@ noinst_LTLIBRARIES += liblogging.la
liblogging_la_SOURCES = src/base/logging.cc \
$(LOGGING_INCLUDES)
+SYSINFO_INCLUDES = src/base/sysinfo.h \
+ src/base/logging.h \
+ src/base/commandlineflags.h \
+ src/base/cycleclock.h \
+ src/base/basictypes.h
+noinst_LTLIBRARIES += libsysinfo.la
+libsysinfo_la_SOURCES = src/base/sysinfo.cc \
+ $(SYSINFO_INCLUDES)
+
# For MinGW, we use libwindows and not libspinlock. For every other
# unix system, we use libspinlock and don't need libwindows. Luckily,
# we need the windows.a library in exactly the same place we need
@@ -108,7 +117,7 @@ libwindows_la_SOURCES = $(WINDOWS_INCLUDES) \
src/windows/patch_functions.cc \
src/windows/preamble_patcher.cc \
src/windows/preamble_patcher_with_stub.cc
-LIBSPINLOCK = libwindows.la
+LIBSPINLOCK = libwindows.la libsysinfo.la liblogging.la
MAYBE_THREADS_CC =
SYSTEM_ALLOC_CC =
@@ -117,6 +126,7 @@ else
SPINLOCK_INCLUDES = src/base/spinlock.h \
src/base/atomicops.h \
src/base/atomicops-internals-macosx.h \
+ src/base/atomicops-internals-linuxppc.h \
src/base/atomicops-internals-x86-msvc.h \
src/base/atomicops-internals-x86.h
@@ -124,7 +134,8 @@ noinst_LTLIBRARIES += libspinlock.la
libspinlock_la_SOURCES = src/base/spinlock.cc \
src/base/atomicops-internals-x86.cc \
$(SPINLOCK_INCLUDES)
-LIBSPINLOCK = libspinlock.la
+# spinlock also needs NumCPUs, from libsysinfo, which in turn needs liblogging
+LIBSPINLOCK = libspinlock.la libsysinfo.la liblogging.la
MAYBE_THREADS_CC = src/maybe_threads.cc
SYSTEM_ALLOC_CC = src/system-alloc.cc
@@ -142,7 +153,7 @@ low_level_alloc_unittest_SOURCES = src/base/low_level_alloc.cc \
src/malloc_hook.cc \
src/tests/low_level_alloc_unittest.cc \
$(LOW_LEVEL_ALLOC_UNITTEST_INCLUDES)
-low_level_alloc_unittest_LDADD = $(LIBSPINLOCK) liblogging.la libstacktrace.la
+low_level_alloc_unittest_LDADD = $(LIBSPINLOCK) libstacktrace.la
if !MINGW
TESTS += atomicops_unittest
@@ -153,7 +164,7 @@ ATOMICOPS_UNITTEST_INCLUDES = src/base/atomicops.h \
$(LOGGING_INCLUDES)
atomicops_unittest_SOURCES = src/tests/atomicops_unittest.cc \
$(ATOMICOPS_UNITTEST_INCLUDES)
-atomicops_unittest_LDADD = $(LIBSPINLOCK) liblogging.la
+atomicops_unittest_LDADD = $(LIBSPINLOCK)
endif !MINGW
@@ -246,7 +257,7 @@ libtcmalloc_minimal_la_SOURCES = src/internal_logging.cc \
libtcmalloc_minimal_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS)
libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS)
libtcmalloc_minimal_la_LIBADD = $(PTHREAD_LIBS) \
- libstacktrace.la $(LIBSPINLOCK) liblogging.la
+ libstacktrace.la $(LIBSPINLOCK)
# Whenever we link in tcmalloc_minimal, we also need to link in
# libstacktrace.so (we also need libspinlock and liblogging, but those
@@ -478,14 +489,13 @@ libtcmalloc_la_SOURCES = src/internal_logging.cc \
src/heap-checker.cc \
src/base/linuxthreads.c \
src/base/thread_lister.c \
- src/base/sysinfo.cc \
src/base/low_level_alloc.cc \
$(TCMALLOC_INCLUDES) \
src/heap-checker-bcad.cc
libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS)
libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS)
libtcmalloc_la_LIBADD = $(PTHREAD_LIBS) \
- libstacktrace.la $(LIBSPINLOCK) liblogging.la
+ libstacktrace.la $(LIBSPINLOCK)
# See discussion above (under LIBTCMALLOC_MINIMAL) for why we do this.
# Basically it's to work around systems where --rpath doesn't work right.
@@ -595,7 +605,7 @@ S_CPU_PROFILER_INCLUDES = src/profiledata.h \
src/base/commandlineflags.h \
src/base/googleinit.h \
src/base/logging.h \
- src/base/mutex.h \
+ src/base/simple_mutex.h \
src/base/sysinfo.h \
$(SPINLOCK_INCLUDES) \
$(LOGGING_INCLUDES)
@@ -608,11 +618,10 @@ googleinclude_HEADERS += $(SG_CPU_PROFILER_INCLUDES)
lib_LTLIBRARIES += libprofiler.la
libprofiler_la_SOURCES = src/profiler.cc \
src/profiledata.cc \
- src/base/sysinfo.cc \
$(CPU_PROFILER_INCLUDES)
-libprofiler_la_LIBADD = $(LIBSPINLOCK) liblogging.la libstacktrace.la
+libprofiler_la_LIBADD = $(LIBSPINLOCK) libstacktrace.la
# We have to include ProfileData for profiledata_unittest
-CPU_PROFILER_SYMBOLS = '(ProfilerStart|ProfilerStop|ProfilerEnable|ProfilerDisable|ProfilerFlush|ProfilerRegisterThread|ProfilerThreadState|ProfileData)'
+CPU_PROFILER_SYMBOLS = '(ProfilerStart|ProfilerStop|ProfilerEnable|ProfilerDisable|ProfilerFlush|ProfilerRegisterThread|ProfileData)'
libprofiler_la_LDFLAGS = -export-symbols-regex $(CPU_PROFILER_SYMBOLS)
# See discussion above (under LIBTCMALLOC_MINIMAL) for why we do this.
@@ -631,7 +640,7 @@ profiledata_unittest_SOURCES = src/tests/profiledata_unittest.cc \
src/base/commandlineflags.h \
src/base/logging.h \
src/base/basictypes.h
-profiledata_unittest_LDADD = -lprofiler
+profiledata_unittest_LDADD = $(LIBPROFILER)
TESTS += profiler_unittest.sh
profiler_unittest_sh_SOURCES = src/tests/profiler_unittest.sh
@@ -672,6 +681,7 @@ profiler4_unittest_DEPENDENCIES = $(LIBPROFILER)
### Documentation
dist_doc_DATA += doc/cpuprofile.html \
+ doc/cpuprofile-fileformat.html \
doc/pprof-test-big.gif \
doc/pprof-test.gif \
doc/pprof-vsnprintf-big.gif \
diff --git a/Makefile.in b/Makefile.in
index 0c0e850..f47672a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -50,8 +50,8 @@ target_triplet = @target@
@GCC_TRUE@am__append_1 = -Wall -Wwrite-strings -Woverloaded-virtual -Wno-sign-compare
# These are x86-specific, having to do with frame-pointers
-@ENABLE_FRAME_POINTER_TRUE@@X86_64_TRUE@am__append_2 = -fno-omit-frame-pointer
-@ENABLE_FRAME_POINTER_FALSE@@X86_64_TRUE@am__append_3 = -DNO_FRAME_POINTER
+@ENABLE_FRAME_POINTERS_TRUE@@X86_64_TRUE@am__append_2 = -fno-omit-frame-pointer
+@ENABLE_FRAME_POINTERS_FALSE@@X86_64_TRUE@am__append_3 = -DNO_FRAME_POINTER
noinst_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_5)
@MINGW_TRUE@am__append_4 = libwindows.la
@MINGW_FALSE@am__append_5 = libspinlock.la
@@ -105,8 +105,10 @@ EXTRA_PROGRAMS = ptmalloc_unittest1$(EXEEXT) \
### Documentation
@MINGW_FALSE@am__append_15 = doc/heapprofile.html \
@MINGW_FALSE@ doc/heap-example1.png doc/heap_checker.html \
-@MINGW_FALSE@ doc/cpuprofile.html doc/pprof-test-big.gif \
-@MINGW_FALSE@ doc/pprof-test.gif doc/pprof-vsnprintf-big.gif \
+@MINGW_FALSE@ doc/cpuprofile.html \
+@MINGW_FALSE@ doc/cpuprofile-fileformat.html \
+@MINGW_FALSE@ doc/pprof-test-big.gif doc/pprof-test.gif \
+@MINGW_FALSE@ doc/pprof-vsnprintf-big.gif \
@MINGW_FALSE@ doc/pprof-vsnprintf.gif
@MINGW_TRUE@profiler2_unittest_DEPENDENCIES =
DIST_COMMON = README $(am__configure_deps) $(am__dist_doc_DATA_DIST) \
@@ -147,29 +149,33 @@ liblogging_la_LIBADD =
am__objects_1 =
am_liblogging_la_OBJECTS = logging.lo $(am__objects_1)
liblogging_la_OBJECTS = $(am_liblogging_la_OBJECTS)
-@MINGW_FALSE@am__DEPENDENCIES_1 = libspinlock.la
-@MINGW_TRUE@am__DEPENDENCIES_1 = libwindows.la
+@MINGW_FALSE@am__DEPENDENCIES_1 = libspinlock.la libsysinfo.la \
+@MINGW_FALSE@ liblogging.la
+@MINGW_TRUE@am__DEPENDENCIES_1 = libwindows.la libsysinfo.la \
+@MINGW_TRUE@ liblogging.la
@MINGW_FALSE@libprofiler_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \
-@MINGW_FALSE@ liblogging.la libstacktrace.la
+@MINGW_FALSE@ libstacktrace.la
am__libprofiler_la_SOURCES_DIST = src/profiler.cc src/profiledata.cc \
- src/base/sysinfo.cc src/profiledata.h src/getpc.h \
- src/base/basictypes.h src/base/commandlineflags.h \
- src/base/googleinit.h src/base/logging.h src/base/mutex.h \
- src/base/sysinfo.h src/base/spinlock.h src/base/atomicops.h \
+ src/profiledata.h src/getpc.h src/base/basictypes.h \
+ src/base/commandlineflags.h src/base/googleinit.h \
+ src/base/logging.h src/base/simple_mutex.h src/base/sysinfo.h \
+ src/base/spinlock.h src/base/atomicops.h \
src/base/atomicops-internals-macosx.h \
+ src/base/atomicops-internals-linuxppc.h \
src/base/atomicops-internals-x86-msvc.h \
src/base/atomicops-internals-x86.h src/google/profiler.h \
src/google/stacktrace.h
@MINGW_FALSE@am__objects_2 = $(am__objects_1) $(am__objects_1)
@MINGW_FALSE@am__objects_3 = $(am__objects_2) $(am__objects_1)
@MINGW_FALSE@am_libprofiler_la_OBJECTS = profiler.lo profiledata.lo \
-@MINGW_FALSE@ sysinfo.lo $(am__objects_3)
+@MINGW_FALSE@ $(am__objects_3)
libprofiler_la_OBJECTS = $(am_libprofiler_la_OBJECTS)
@MINGW_FALSE@am_libprofiler_la_rpath = -rpath $(libdir)
libspinlock_la_LIBADD =
am__libspinlock_la_SOURCES_DIST = src/base/spinlock.cc \
src/base/atomicops-internals-x86.cc src/base/spinlock.h \
src/base/atomicops.h src/base/atomicops-internals-macosx.h \
+ src/base/atomicops-internals-linuxppc.h \
src/base/atomicops-internals-x86-msvc.h \
src/base/atomicops-internals-x86.h
@MINGW_FALSE@am_libspinlock_la_OBJECTS = spinlock.lo \
@@ -182,26 +188,28 @@ libstacktrace_la_DEPENDENCIES = $(am__DEPENDENCIES_2) \
am__objects_4 = $(am__objects_1) $(am__objects_1)
am_libstacktrace_la_OBJECTS = stacktrace.lo $(am__objects_4)
libstacktrace_la_OBJECTS = $(am_libstacktrace_la_OBJECTS)
+libsysinfo_la_LIBADD =
+am_libsysinfo_la_OBJECTS = sysinfo.lo $(am__objects_1)
+libsysinfo_la_OBJECTS = $(am_libsysinfo_la_OBJECTS)
@MINGW_FALSE@libtcmalloc_la_DEPENDENCIES = $(am__DEPENDENCIES_2) \
-@MINGW_FALSE@ libstacktrace.la $(am__DEPENDENCIES_1) \
-@MINGW_FALSE@ liblogging.la
+@MINGW_FALSE@ libstacktrace.la $(am__DEPENDENCIES_1)
am__libtcmalloc_la_SOURCES_DIST = src/internal_logging.cc \
src/system-alloc.cc src/memfs_malloc.cc src/tcmalloc.cc \
src/malloc_hook.cc src/malloc_extension.cc \
src/maybe_threads.cc src/memory_region_map.cc \
src/heap-profiler.cc src/heap-profile-table.cc \
src/heap-checker.cc src/base/linuxthreads.c \
- src/base/thread_lister.c src/base/sysinfo.cc \
- src/base/low_level_alloc.cc src/internal_logging.h \
- src/system-alloc.h src/pagemap.h src/addressmap-inl.h \
- src/packed-cache-inl.h src/heap-profile-table.h \
- src/base/basictypes.h src/base/commandlineflags.h \
- src/base/googleinit.h src/base/elfcore.h \
- src/base/linux_syscall_support.h src/base/linuxthreads.h \
- src/base/thread_lister.h src/base/sysinfo.h \
- src/base/stl_allocator.h src/maybe_threads.h \
- src/base/spinlock.h src/base/atomicops.h \
+ src/base/thread_lister.c src/base/low_level_alloc.cc \
+ src/internal_logging.h src/system-alloc.h src/pagemap.h \
+ src/addressmap-inl.h src/packed-cache-inl.h \
+ src/heap-profile-table.h src/base/basictypes.h \
+ src/base/commandlineflags.h src/base/googleinit.h \
+ src/base/elfcore.h src/base/linux_syscall_support.h \
+ src/base/linuxthreads.h src/base/thread_lister.h \
+ src/base/sysinfo.h src/base/stl_allocator.h \
+ src/maybe_threads.h src/base/spinlock.h src/base/atomicops.h \
src/base/atomicops-internals-macosx.h \
+ src/base/atomicops-internals-linuxppc.h \
src/base/atomicops-internals-x86-msvc.h \
src/base/atomicops-internals-x86.h src/base/logging.h \
src/google/malloc_hook.h src/google/malloc_extension.h \
@@ -220,20 +228,21 @@ am__libtcmalloc_la_SOURCES_DIST = src/internal_logging.cc \
@MINGW_FALSE@ libtcmalloc_la-heap-profiler.lo \
@MINGW_FALSE@ libtcmalloc_la-heap-profile-table.lo \
@MINGW_FALSE@ libtcmalloc_la-heap-checker.lo linuxthreads.lo \
-@MINGW_FALSE@ thread_lister.lo libtcmalloc_la-sysinfo.lo \
+@MINGW_FALSE@ thread_lister.lo \
@MINGW_FALSE@ libtcmalloc_la-low_level_alloc.lo \
@MINGW_FALSE@ $(am__objects_3) \
@MINGW_FALSE@ libtcmalloc_la-heap-checker-bcad.lo
libtcmalloc_la_OBJECTS = $(am_libtcmalloc_la_OBJECTS)
@MINGW_FALSE@am_libtcmalloc_la_rpath = -rpath $(libdir)
libtcmalloc_minimal_la_DEPENDENCIES = $(am__DEPENDENCIES_2) \
- libstacktrace.la $(am__DEPENDENCIES_1) liblogging.la
+ libstacktrace.la $(am__DEPENDENCIES_1)
am__libtcmalloc_minimal_la_SOURCES_DIST = src/internal_logging.cc \
src/system-alloc.cc src/memfs_malloc.cc src/tcmalloc.cc \
src/malloc_hook.cc src/malloc_extension.cc \
src/maybe_threads.cc src/internal_logging.h src/system-alloc.h \
src/packed-cache-inl.h src/base/spinlock.h \
src/base/atomicops.h src/base/atomicops-internals-macosx.h \
+ src/base/atomicops-internals-linuxppc.h \
src/base/atomicops-internals-x86-msvc.h \
src/base/atomicops-internals-x86.h src/base/commandlineflags.h \
src/base/basictypes.h src/pagemap.h src/maybe_threads.h \
@@ -306,8 +315,7 @@ am__atomicops_unittest_SOURCES_DIST = src/tests/atomicops_unittest.cc \
@MINGW_FALSE@am_atomicops_unittest_OBJECTS = \
@MINGW_FALSE@ atomicops_unittest.$(OBJEXT) $(am__objects_11)
atomicops_unittest_OBJECTS = $(am_atomicops_unittest_OBJECTS)
-@MINGW_FALSE@atomicops_unittest_DEPENDENCIES = $(am__DEPENDENCIES_1) \
-@MINGW_FALSE@ liblogging.la
+@MINGW_FALSE@atomicops_unittest_DEPENDENCIES = $(am__DEPENDENCIES_1)
am_frag_unittest_OBJECTS = frag_unittest-frag_unittest.$(OBJEXT)
frag_unittest_OBJECTS = $(am_frag_unittest_OBJECTS)
am__DEPENDENCIES_3 = libstacktrace.la libtcmalloc_minimal.la
@@ -361,6 +369,7 @@ am__low_level_alloc_unittest_SOURCES_DIST = \
src/base/low_level_alloc.h src/base/basictypes.h \
src/google/malloc_hook.h src/base/spinlock.h \
src/base/atomicops.h src/base/atomicops-internals-macosx.h \
+ src/base/atomicops-internals-linuxppc.h \
src/base/atomicops-internals-x86-msvc.h \
src/base/atomicops-internals-x86.h src/base/logging.h \
src/base/commandlineflags.h
@@ -370,7 +379,7 @@ am_low_level_alloc_unittest_OBJECTS = low_level_alloc.$(OBJEXT) \
low_level_alloc_unittest_OBJECTS = \
$(am_low_level_alloc_unittest_OBJECTS)
low_level_alloc_unittest_DEPENDENCIES = $(am__DEPENDENCIES_1) \
- liblogging.la libstacktrace.la
+ libstacktrace.la
am_markidle_unittest_OBJECTS = \
markidle_unittest-markidle_unittest.$(OBJEXT) \
markidle_unittest-testutil.$(OBJEXT)
@@ -396,7 +405,9 @@ am__profiledata_unittest_SOURCES_DIST = \
@MINGW_FALSE@am_profiledata_unittest_OBJECTS = \
@MINGW_FALSE@ profiledata_unittest.$(OBJEXT)
profiledata_unittest_OBJECTS = $(am_profiledata_unittest_OBJECTS)
-profiledata_unittest_DEPENDENCIES =
+@MINGW_FALSE@am__DEPENDENCIES_5 = libstacktrace.la libprofiler.la
+@MINGW_FALSE@profiledata_unittest_DEPENDENCIES = \
+@MINGW_FALSE@ $(am__DEPENDENCIES_5)
am__profiler1_unittest_SOURCES_DIST = src/tests/profiler_unittest.cc \
src/tests/testutil.h src/tests/testutil.cc \
src/config_for_unittests.h src/google/profiler.h
@@ -406,7 +417,6 @@ am__profiler1_unittest_SOURCES_DIST = src/tests/profiler_unittest.cc \
@MINGW_FALSE@ $(am__objects_1)
@MINGW_FALSE@am_profiler1_unittest_OBJECTS = $(am__objects_12)
profiler1_unittest_OBJECTS = $(am_profiler1_unittest_OBJECTS)
-@MINGW_FALSE@am__DEPENDENCIES_5 = libstacktrace.la libprofiler.la
@MINGW_FALSE@profiler1_unittest_DEPENDENCIES = $(am__DEPENDENCIES_5)
am__profiler2_unittest_SOURCES_DIST = src/tests/profiler_unittest.cc \
src/tests/testutil.h src/tests/testutil.cc \
@@ -535,10 +545,10 @@ CXXLINK = $(LIBTOOL) --tag=CXX --mode=link $(CXXLD) $(AM_CXXFLAGS) \
$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
SOURCES = $(liblogging_la_SOURCES) $(libprofiler_la_SOURCES) \
$(libspinlock_la_SOURCES) $(libstacktrace_la_SOURCES) \
- $(libtcmalloc_la_SOURCES) $(libtcmalloc_minimal_la_SOURCES) \
- $(libwindows_la_SOURCES) $(addressmap_unittest_SOURCES) \
- $(atomicops_unittest_SOURCES) $(frag_unittest_SOURCES) \
- $(getpc_test_SOURCES) \
+ $(libsysinfo_la_SOURCES) $(libtcmalloc_la_SOURCES) \
+ $(libtcmalloc_minimal_la_SOURCES) $(libwindows_la_SOURCES) \
+ $(addressmap_unittest_SOURCES) $(atomicops_unittest_SOURCES) \
+ $(frag_unittest_SOURCES) $(getpc_test_SOURCES) \
$(heap_checker_death_unittest_sh_SOURCES) \
$(heap_checker_unittest_SOURCES) \
$(heap_checker_unittest_sh_SOURCES) \
@@ -563,7 +573,7 @@ SOURCES = $(liblogging_la_SOURCES) $(libprofiler_la_SOURCES) \
DIST_SOURCES = $(liblogging_la_SOURCES) \
$(am__libprofiler_la_SOURCES_DIST) \
$(am__libspinlock_la_SOURCES_DIST) $(libstacktrace_la_SOURCES) \
- $(am__libtcmalloc_la_SOURCES_DIST) \
+ $(libsysinfo_la_SOURCES) $(am__libtcmalloc_la_SOURCES_DIST) \
$(am__libtcmalloc_minimal_la_SOURCES_DIST) \
$(am__libwindows_la_SOURCES_DIST) \
$(addressmap_unittest_SOURCES) \
@@ -625,8 +635,9 @@ am__dist_doc_DATA_DIST = AUTHORS COPYING ChangeLog INSTALL NEWS README \
doc/pageheap.dot doc/spanmap.dot doc/threadheap.dot \
doc/heapprofile.html doc/heap-example1.png \
doc/heap_checker.html doc/cpuprofile.html \
- doc/pprof-test-big.gif doc/pprof-test.gif \
- doc/pprof-vsnprintf-big.gif doc/pprof-vsnprintf.gif
+ doc/cpuprofile-fileformat.html doc/pprof-test-big.gif \
+ doc/pprof-test.gif doc/pprof-vsnprintf-big.gif \
+ doc/pprof-vsnprintf.gif
dist_docDATA_INSTALL = $(INSTALL_DATA)
DATA = $(dist_doc_DATA)
am__googleinclude_HEADERS_DIST = src/google/stacktrace.h \
@@ -674,8 +685,8 @@ ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
-ENABLE_FRAME_POINTER_FALSE = @ENABLE_FRAME_POINTER_FALSE@
-ENABLE_FRAME_POINTER_TRUE = @ENABLE_FRAME_POINTER_TRUE@
+ENABLE_FRAME_POINTERS_FALSE = @ENABLE_FRAME_POINTERS_FALSE@
+ENABLE_FRAME_POINTERS_TRUE = @ENABLE_FRAME_POINTERS_TRUE@
EXEEXT = @EXEEXT@
F77 = @F77@
FFLAGS = @FFLAGS@
@@ -830,7 +841,8 @@ dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README \
lib_LTLIBRARIES = libstacktrace.la libtcmalloc_minimal.la \
$(am__append_11)
# This is for 'convenience libraries' -- basically just a container for sources
-noinst_LTLIBRARIES = liblogging.la $(am__append_4) $(am__append_5)
+noinst_LTLIBRARIES = liblogging.la libsysinfo.la $(am__append_4) \
+ $(am__append_5)
WINDOWS_PROJECTS = google-perftools.sln \
vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj \
vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj \
@@ -889,6 +901,15 @@ LOGGING_INCLUDES = src/base/logging.h \
liblogging_la_SOURCES = src/base/logging.cc \
$(LOGGING_INCLUDES)
+SYSINFO_INCLUDES = src/base/sysinfo.h \
+ src/base/logging.h \
+ src/base/commandlineflags.h \
+ src/base/cycleclock.h \
+ src/base/basictypes.h
+
+libsysinfo_la_SOURCES = src/base/sysinfo.cc \
+ $(SYSINFO_INCLUDES)
+
# For MinGW, we use libwindows and not libspinlock. For every other
# unix system, we use libspinlock and don't need libwindows. Luckily,
@@ -912,8 +933,9 @@ liblogging_la_SOURCES = src/base/logging.cc \
@MINGW_TRUE@ src/windows/preamble_patcher.cc \
@MINGW_TRUE@ src/windows/preamble_patcher_with_stub.cc
-@MINGW_FALSE@LIBSPINLOCK = libspinlock.la
-@MINGW_TRUE@LIBSPINLOCK = libwindows.la
+# spinlock also needs NumCPUs, from libsysinfo, which in turn needs liblogging
+@MINGW_FALSE@LIBSPINLOCK = libspinlock.la libsysinfo.la liblogging.la
+@MINGW_TRUE@LIBSPINLOCK = libwindows.la libsysinfo.la liblogging.la
@MINGW_FALSE@MAYBE_THREADS_CC = src/maybe_threads.cc
@MINGW_TRUE@MAYBE_THREADS_CC =
@MINGW_FALSE@SYSTEM_ALLOC_CC = src/system-alloc.cc
@@ -922,6 +944,7 @@ liblogging_la_SOURCES = src/base/logging.cc \
@MINGW_FALSE@SPINLOCK_INCLUDES = src/base/spinlock.h \
@MINGW_FALSE@ src/base/atomicops.h \
@MINGW_FALSE@ src/base/atomicops-internals-macosx.h \
+@MINGW_FALSE@ src/base/atomicops-internals-linuxppc.h \
@MINGW_FALSE@ src/base/atomicops-internals-x86-msvc.h \
@MINGW_FALSE@ src/base/atomicops-internals-x86.h
@@ -940,7 +963,7 @@ low_level_alloc_unittest_SOURCES = src/base/low_level_alloc.cc \
src/tests/low_level_alloc_unittest.cc \
$(LOW_LEVEL_ALLOC_UNITTEST_INCLUDES)
-low_level_alloc_unittest_LDADD = $(LIBSPINLOCK) liblogging.la libstacktrace.la
+low_level_alloc_unittest_LDADD = $(LIBSPINLOCK) libstacktrace.la
@MINGW_FALSE@ATOMICOPS_UNITTEST_INCLUDES = src/base/atomicops.h \
@MINGW_FALSE@ src/base/atomicops-internals-macosx.h \
@MINGW_FALSE@ src/base/atomicops-internals-x86-msvc.h \
@@ -950,7 +973,7 @@ low_level_alloc_unittest_LDADD = $(LIBSPINLOCK) liblogging.la libstacktrace.la
@MINGW_FALSE@atomicops_unittest_SOURCES = src/tests/atomicops_unittest.cc \
@MINGW_FALSE@ $(ATOMICOPS_UNITTEST_INCLUDES)
-@MINGW_FALSE@atomicops_unittest_LDADD = $(LIBSPINLOCK) liblogging.la
+@MINGW_FALSE@atomicops_unittest_LDADD = $(LIBSPINLOCK)
### ------- stack trace
@@ -1019,7 +1042,7 @@ libtcmalloc_minimal_la_SOURCES = src/internal_logging.cc \
libtcmalloc_minimal_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS)
libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS)
libtcmalloc_minimal_la_LIBADD = $(PTHREAD_LIBS) \
- libstacktrace.la $(LIBSPINLOCK) liblogging.la
+ libstacktrace.la $(LIBSPINLOCK)
# Whenever we link in tcmalloc_minimal, we also need to link in
@@ -1155,7 +1178,6 @@ ptmalloc_unittest2_LDADD = $(PTHREAD_LIBS)
@MINGW_FALSE@ src/heap-checker.cc \
@MINGW_FALSE@ src/base/linuxthreads.c \
@MINGW_FALSE@ src/base/thread_lister.c \
-@MINGW_FALSE@ src/base/sysinfo.cc \
@MINGW_FALSE@ src/base/low_level_alloc.cc \
@MINGW_FALSE@ $(TCMALLOC_INCLUDES) \
@MINGW_FALSE@ src/heap-checker-bcad.cc
@@ -1163,7 +1185,7 @@ ptmalloc_unittest2_LDADD = $(PTHREAD_LIBS)
@MINGW_FALSE@libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS)
@MINGW_FALSE@libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS)
@MINGW_FALSE@libtcmalloc_la_LIBADD = $(PTHREAD_LIBS) \
-@MINGW_FALSE@ libstacktrace.la $(LIBSPINLOCK) liblogging.la
+@MINGW_FALSE@ libstacktrace.la $(LIBSPINLOCK)
# See discussion above (under LIBTCMALLOC_MINIMAL) for why we do this.
@@ -1232,7 +1254,7 @@ ptmalloc_unittest2_LDADD = $(PTHREAD_LIBS)
@MINGW_FALSE@ src/base/commandlineflags.h \
@MINGW_FALSE@ src/base/googleinit.h \
@MINGW_FALSE@ src/base/logging.h \
-@MINGW_FALSE@ src/base/mutex.h \
+@MINGW_FALSE@ src/base/simple_mutex.h \
@MINGW_FALSE@ src/base/sysinfo.h \
@MINGW_FALSE@ $(SPINLOCK_INCLUDES) \
@MINGW_FALSE@ $(LOGGING_INCLUDES)
@@ -1243,12 +1265,11 @@ ptmalloc_unittest2_LDADD = $(PTHREAD_LIBS)
@MINGW_FALSE@CPU_PROFILER_INCLUDES = $(S_CPU_PROFILER_INCLUDES) $(SG_CPU_PROFILER_INCLUDES)
@MINGW_FALSE@libprofiler_la_SOURCES = src/profiler.cc \
@MINGW_FALSE@ src/profiledata.cc \
-@MINGW_FALSE@ src/base/sysinfo.cc \
@MINGW_FALSE@ $(CPU_PROFILER_INCLUDES)
-@MINGW_FALSE@libprofiler_la_LIBADD = $(LIBSPINLOCK) liblogging.la libstacktrace.la
+@MINGW_FALSE@libprofiler_la_LIBADD = $(LIBSPINLOCK) libstacktrace.la
# We have to include ProfileData for profiledata_unittest
-@MINGW_FALSE@CPU_PROFILER_SYMBOLS = '(ProfilerStart|ProfilerStop|ProfilerEnable|ProfilerDisable|ProfilerFlush|ProfilerRegisterThread|ProfilerThreadState|ProfileData)'
+@MINGW_FALSE@CPU_PROFILER_SYMBOLS = '(ProfilerStart|ProfilerStop|ProfilerEnable|ProfilerDisable|ProfilerFlush|ProfilerRegisterThread|ProfileData)'
@MINGW_FALSE@libprofiler_la_LDFLAGS = -export-symbols-regex $(CPU_PROFILER_SYMBOLS)
# See discussion above (under LIBTCMALLOC_MINIMAL) for why we do this.
@@ -1263,7 +1284,7 @@ ptmalloc_unittest2_LDADD = $(PTHREAD_LIBS)
@MINGW_FALSE@ src/base/logging.h \
@MINGW_FALSE@ src/base/basictypes.h
-@MINGW_FALSE@profiledata_unittest_LDADD = -lprofiler
+@MINGW_FALSE@profiledata_unittest_LDADD = $(LIBPROFILER)
@MINGW_FALSE@profiler_unittest_sh_SOURCES = src/tests/profiler_unittest.sh
@MINGW_FALSE@PROFILER_UNITTEST_INCLUDES = src/config_for_unittests.h \
@MINGW_FALSE@ src/google/profiler.h
@@ -1393,6 +1414,8 @@ libspinlock.la: $(libspinlock_la_OBJECTS) $(libspinlock_la_DEPENDENCIES)
$(CXXLINK) $(am_libspinlock_la_rpath) $(libspinlock_la_LDFLAGS) $(libspinlock_la_OBJECTS) $(libspinlock_la_LIBADD) $(LIBS)
libstacktrace.la: $(libstacktrace_la_OBJECTS) $(libstacktrace_la_DEPENDENCIES)
$(CXXLINK) -rpath $(libdir) $(libstacktrace_la_LDFLAGS) $(libstacktrace_la_OBJECTS) $(libstacktrace_la_LIBADD) $(LIBS)
+libsysinfo.la: $(libsysinfo_la_OBJECTS) $(libsysinfo_la_DEPENDENCIES)
+ $(CXXLINK) $(libsysinfo_la_LDFLAGS) $(libsysinfo_la_OBJECTS) $(libsysinfo_la_LIBADD) $(LIBS)
libtcmalloc.la: $(libtcmalloc_la_OBJECTS) $(libtcmalloc_la_DEPENDENCIES)
$(CXXLINK) $(am_libtcmalloc_la_rpath) $(libtcmalloc_la_LDFLAGS) $(libtcmalloc_la_OBJECTS) $(libtcmalloc_la_LIBADD) $(LIBS)
libtcmalloc_minimal.la: $(libtcmalloc_minimal_la_OBJECTS) $(libtcmalloc_minimal_la_DEPENDENCIES)
@@ -1539,7 +1562,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-maybe_threads.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-memfs_malloc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-memory_region_map.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-sysinfo.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-system-alloc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_la-tcmalloc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtcmalloc_minimal_la-internal_logging.Plo@am__quote@
@@ -1698,13 +1720,6 @@ profiledata.lo: src/profiledata.cc
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o profiledata.lo `test -f 'src/profiledata.cc' || echo '$(srcdir)/'`src/profiledata.cc
-sysinfo.lo: src/base/sysinfo.cc
-@am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT sysinfo.lo -MD -MP -MF "$(DEPDIR)/sysinfo.Tpo" -c -o sysinfo.lo `test -f 'src/base/sysinfo.cc' || echo '$(srcdir)/'`src/base/sysinfo.cc; \
-@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/sysinfo.Tpo" "$(DEPDIR)/sysinfo.Plo"; else rm -f "$(DEPDIR)/sysinfo.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='src/base/sysinfo.cc' object='sysinfo.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o sysinfo.lo `test -f 'src/base/sysinfo.cc' || echo '$(srcdir)/'`src/base/sysinfo.cc
-
spinlock.lo: src/base/spinlock.cc
@am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT spinlock.lo -MD -MP -MF "$(DEPDIR)/spinlock.Tpo" -c -o spinlock.lo `test -f 'src/base/spinlock.cc' || echo '$(srcdir)/'`src/base/spinlock.cc; \
@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/spinlock.Tpo" "$(DEPDIR)/spinlock.Plo"; else rm -f "$(DEPDIR)/spinlock.Tpo"; exit 1; fi
@@ -1726,6 +1741,13 @@ stacktrace.lo: src/stacktrace.cc
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o stacktrace.lo `test -f 'src/stacktrace.cc' || echo '$(srcdir)/'`src/stacktrace.cc
+sysinfo.lo: src/base/sysinfo.cc
+@am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT sysinfo.lo -MD -MP -MF "$(DEPDIR)/sysinfo.Tpo" -c -o sysinfo.lo `test -f 'src/base/sysinfo.cc' || echo '$(srcdir)/'`src/base/sysinfo.cc; \
+@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/sysinfo.Tpo" "$(DEPDIR)/sysinfo.Plo"; else rm -f "$(DEPDIR)/sysinfo.Tpo"; exit 1; fi
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='src/base/sysinfo.cc' object='sysinfo.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o sysinfo.lo `test -f 'src/base/sysinfo.cc' || echo '$(srcdir)/'`src/base/sysinfo.cc
+
libtcmalloc_la-internal_logging.lo: src/internal_logging.cc
@am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libtcmalloc_la_CXXFLAGS) $(CXXFLAGS) -MT libtcmalloc_la-internal_logging.lo -MD -MP -MF "$(DEPDIR)/libtcmalloc_la-internal_logging.Tpo" -c -o libtcmalloc_la-internal_logging.lo `test -f 'src/internal_logging.cc' || echo '$(srcdir)/'`src/internal_logging.cc; \
@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/libtcmalloc_la-internal_logging.Tpo" "$(DEPDIR)/libtcmalloc_la-internal_logging.Plo"; else rm -f "$(DEPDIR)/libtcmalloc_la-internal_logging.Tpo"; exit 1; fi
@@ -1803,13 +1825,6 @@ libtcmalloc_la-heap-checker.lo: src/heap-checker.cc
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libtcmalloc_la_CXXFLAGS) $(CXXFLAGS) -c -o libtcmalloc_la-heap-checker.lo `test -f 'src/heap-checker.cc' || echo '$(srcdir)/'`src/heap-checker.cc
-libtcmalloc_la-sysinfo.lo: src/base/sysinfo.cc
-@am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libtcmalloc_la_CXXFLAGS) $(CXXFLAGS) -MT libtcmalloc_la-sysinfo.lo -MD -MP -MF "$(DEPDIR)/libtcmalloc_la-sysinfo.Tpo" -c -o libtcmalloc_la-sysinfo.lo `test -f 'src/base/sysinfo.cc' || echo '$(srcdir)/'`src/base/sysinfo.cc; \
-@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/libtcmalloc_la-sysinfo.Tpo" "$(DEPDIR)/libtcmalloc_la-sysinfo.Plo"; else rm -f "$(DEPDIR)/libtcmalloc_la-sysinfo.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='src/base/sysinfo.cc' object='libtcmalloc_la-sysinfo.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libtcmalloc_la_CXXFLAGS) $(CXXFLAGS) -c -o libtcmalloc_la-sysinfo.lo `test -f 'src/base/sysinfo.cc' || echo '$(srcdir)/'`src/base/sysinfo.cc
-
libtcmalloc_la-low_level_alloc.lo: src/base/low_level_alloc.cc
@am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libtcmalloc_la_CXXFLAGS) $(CXXFLAGS) -MT libtcmalloc_la-low_level_alloc.lo -MD -MP -MF "$(DEPDIR)/libtcmalloc_la-low_level_alloc.Tpo" -c -o libtcmalloc_la-low_level_alloc.lo `test -f 'src/base/low_level_alloc.cc' || echo '$(srcdir)/'`src/base/low_level_alloc.cc; \
@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/libtcmalloc_la-low_level_alloc.Tpo" "$(DEPDIR)/libtcmalloc_la-low_level_alloc.Plo"; else rm -f "$(DEPDIR)/libtcmalloc_la-low_level_alloc.Tpo"; exit 1; fi
@@ -2852,8 +2867,8 @@ uninstall-man: uninstall-man1
uninstall-info-am uninstall-libLTLIBRARIES uninstall-man \
uninstall-man1
-@ENABLE_FRAME_POINTER_FALSE@@X86_64_TRUE@ # TODO(csilvers): check if -fomit-frame-pointer might be in $(CXXFLAGS),
-@ENABLE_FRAME_POINTER_FALSE@@X86_64_TRUE@ # before setting this.
+@ENABLE_FRAME_POINTERS_FALSE@@X86_64_TRUE@ # TODO(csilvers): check if -fomit-frame-pointer might be in $(CXXFLAGS),
+@ENABLE_FRAME_POINTERS_FALSE@@X86_64_TRUE@ # before setting this.
@MINGW_FALSE@pprof_unittest: $(top_srcdir)/src/pprof
@MINGW_FALSE@ $(top_srcdir)/src/pprof -test
# This script preloads libtcmalloc, and calls two other binaries as well
diff --git a/configure b/configure
index b3dc9b8..b027010 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.59 for google-perftools 0.94.1.
+# Generated by GNU Autoconf 2.59 for google-perftools 0.95.
#
# Report bugs to <opensource@google.com>.
#
@@ -423,8 +423,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='google-perftools'
PACKAGE_TARNAME='google-perftools'
-PACKAGE_VERSION='0.94.1'
-PACKAGE_STRING='google-perftools 0.94.1'
+PACKAGE_VERSION='0.95'
+PACKAGE_STRING='google-perftools 0.95'
PACKAGE_BUGREPORT='opensource@google.com'
ac_unique_file="README"
@@ -465,7 +465,7 @@ ac_includes_default="\
# include <unistd.h>
#endif"
-ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA CYGPATH_W PACKAGE VERSION ACLOCAL AUTOCONF AUTOMAKE AUTOHEADER MAKEINFO install_sh STRIP ac_ct_STRIP INSTALL_STRIP_PROGRAM mkdir_p AWK SET_MAKE am__leading_dot AMTAR am__tar am__untar CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT DEPDIR am__include am__quote AMDEP_TRUE AMDEP_FALSE AMDEPBACKSLASH CCDEPMODE am__fastdepCC_TRUE am__fastdepCC_FALSE CPP CXX CXXFLAGS ac_ct_CXX CXXDEPMODE am__fastdepCXX_TRUE am__fastdepCXX_FALSE GCC_TRUE GCC_FALSE EGREP LN_S ECHO AR ac_ct_AR RANLIB ac_ct_RANLIB CXXCPP F77 FFLAGS ac_ct_F77 LIBTOOL LIBTOOL_DEPS UNWIND_LIBS ENABLE_FRAME_POINTER_TRUE ENABLE_FRAME_POINTER_FALSE X86_64_TRUE X86_64_FALSE acx_pthread_config PTHREAD_CC PTHREAD_LIBS PTHREAD_CFLAGS MINGW_TRUE MINGW_FALSE LIBOBJS LTLIBOBJS'
+ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA CYGPATH_W PACKAGE VERSION ACLOCAL AUTOCONF AUTOMAKE AUTOHEADER MAKEINFO install_sh STRIP ac_ct_STRIP INSTALL_STRIP_PROGRAM mkdir_p AWK SET_MAKE am__leading_dot AMTAR am__tar am__untar CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT DEPDIR am__include am__quote AMDEP_TRUE AMDEP_FALSE AMDEPBACKSLASH CCDEPMODE am__fastdepCC_TRUE am__fastdepCC_FALSE CPP CXX CXXFLAGS ac_ct_CXX CXXDEPMODE am__fastdepCXX_TRUE am__fastdepCXX_FALSE GCC_TRUE GCC_FALSE EGREP LN_S ECHO AR ac_ct_AR RANLIB ac_ct_RANLIB CXXCPP F77 FFLAGS ac_ct_F77 LIBTOOL LIBTOOL_DEPS UNWIND_LIBS ENABLE_FRAME_POINTERS_TRUE ENABLE_FRAME_POINTERS_FALSE X86_64_TRUE X86_64_FALSE acx_pthread_config PTHREAD_CC PTHREAD_LIBS PTHREAD_CFLAGS MINGW_TRUE MINGW_FALSE LIBOBJS LTLIBOBJS'
ac_subst_files=''
# Initialize some variables set by options.
@@ -954,7 +954,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures google-perftools 0.94.1 to adapt to many kinds of systems.
+\`configure' configures google-perftools 0.95 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1021,7 +1021,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of google-perftools 0.94.1:";;
+ short | recursive ) echo "Configuration of google-perftools 0.95:";;
esac
cat <<\_ACEOF
@@ -1037,7 +1037,7 @@ Optional Features:
--enable-fast-install[=PKGS]
optimize for fast installation [default=yes]
--disable-libtool-lock avoid locking (might break parallel builds)
- --enable-frame-pointer On x86_64 systems, compile with
+ --enable-frame-pointers On x86_64 systems, compile with
-fno-omit-frame-pointer (see INSTALL)
Optional Packages:
@@ -1162,7 +1162,7 @@ fi
test -n "$ac_init_help" && exit 0
if $ac_init_version; then
cat <<\_ACEOF
-google-perftools configure 0.94.1
+google-perftools configure 0.95
generated by GNU Autoconf 2.59
Copyright (C) 2003 Free Software Foundation, Inc.
@@ -1176,7 +1176,7 @@ cat >&5 <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by google-perftools $as_me 0.94.1, which was
+It was created by google-perftools $as_me 0.95, which was
generated by GNU Autoconf 2.59. Invocation command line was
$ $0 $@
@@ -1904,7 +1904,7 @@ fi
# Define the identity of the package.
PACKAGE='google-perftools'
- VERSION='0.94.1'
+ VERSION='0.95'
cat >>confdefs.h <<_ACEOF
@@ -22042,6 +22042,7 @@ pc_fields=" uc_mcontext.gregs[REG_PC]" # Solaris x86 (32 + 64 bit)
pc_fields="$pc_fields uc_mcontext.gregs[REG_EIP]" # Linux (i386)
pc_fields="$pc_fields uc_mcontext.gregs[REG_RIP]" # Linux (x86_64)
pc_fields="$pc_fields uc_mcontext.sc_ip" # Linux (ia64)
+pc_fields="$pc_fields uc_mcontext.uc_regs->gregs[PT_NIP]" # Linux (ppc)
pc_fields="$pc_fields uc_mcontext.mc_eip" # FreeBSD (i386)
pc_fields="$pc_fields uc_mcontext.mc_rip" # FreeBSD (x86_64 [untested])
pc_fields="$pc_fields uc_mcontext->ss.eip" # OS X (i386, <=10.4)
@@ -22185,12 +22186,12 @@ fi
# On x86_64, instead of libunwind, we can choose to compile with frame-pointers
# (This isn't needed on i386, where -fno-omit-frame-pointer is the default).
-# Check whether --enable-frame_pointer or --disable-frame_pointer was given.
-if test "${enable_frame_pointer+set}" = set; then
- enableval="$enable_frame_pointer"
+# Check whether --enable-frame_pointers or --disable-frame_pointers was given.
+if test "${enable_frame_pointers+set}" = set; then
+ enableval="$enable_frame_pointers"
else
- enable_frame_pointer=no
+ enable_frame_pointers=no
fi;
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
@@ -22238,12 +22239,12 @@ fi
rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
-if test "$enable_frame_pointer" = yes; then
- ENABLE_FRAME_POINTER_TRUE=
- ENABLE_FRAME_POINTER_FALSE='#'
+if test "$enable_frame_pointers" = yes; then
+ ENABLE_FRAME_POINTERS_TRUE=
+ ENABLE_FRAME_POINTERS_FALSE='#'
else
- ENABLE_FRAME_POINTER_TRUE='#'
- ENABLE_FRAME_POINTER_FALSE=
+ ENABLE_FRAME_POINTERS_TRUE='#'
+ ENABLE_FRAME_POINTERS_FALSE=
fi
@@ -23799,10 +23800,10 @@ echo "$as_me: error: conditional \"GCC\" was never defined.
Usually this means the macro was only invoked conditionally." >&2;}
{ (exit 1); exit 1; }; }
fi
-if test -z "${ENABLE_FRAME_POINTER_TRUE}" && test -z "${ENABLE_FRAME_POINTER_FALSE}"; then
- { { echo "$as_me:$LINENO: error: conditional \"ENABLE_FRAME_POINTER\" was never defined.
+if test -z "${ENABLE_FRAME_POINTERS_TRUE}" && test -z "${ENABLE_FRAME_POINTERS_FALSE}"; then
+ { { echo "$as_me:$LINENO: error: conditional \"ENABLE_FRAME_POINTERS\" was never defined.
Usually this means the macro was only invoked conditionally." >&5
-echo "$as_me: error: conditional \"ENABLE_FRAME_POINTER\" was never defined.
+echo "$as_me: error: conditional \"ENABLE_FRAME_POINTERS\" was never defined.
Usually this means the macro was only invoked conditionally." >&2;}
{ (exit 1); exit 1; }; }
fi
@@ -24091,7 +24092,7 @@ _ASBOX
} >&5
cat >&5 <<_CSEOF
-This file was extended by google-perftools $as_me 0.94.1, which was
+This file was extended by google-perftools $as_me 0.95, which was
generated by GNU Autoconf 2.59. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -24154,7 +24155,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
-google-perftools config.status 0.94.1
+google-perftools config.status 0.95
configured by $0, generated by GNU Autoconf 2.59,
with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
@@ -24425,8 +24426,8 @@ s,@ac_ct_F77@,$ac_ct_F77,;t t
s,@LIBTOOL@,$LIBTOOL,;t t
s,@LIBTOOL_DEPS@,$LIBTOOL_DEPS,;t t
s,@UNWIND_LIBS@,$UNWIND_LIBS,;t t
-s,@ENABLE_FRAME_POINTER_TRUE@,$ENABLE_FRAME_POINTER_TRUE,;t t
-s,@ENABLE_FRAME_POINTER_FALSE@,$ENABLE_FRAME_POINTER_FALSE,;t t
+s,@ENABLE_FRAME_POINTERS_TRUE@,$ENABLE_FRAME_POINTERS_TRUE,;t t
+s,@ENABLE_FRAME_POINTERS_FALSE@,$ENABLE_FRAME_POINTERS_FALSE,;t t
s,@X86_64_TRUE@,$X86_64_TRUE,;t t
s,@X86_64_FALSE@,$X86_64_FALSE,;t t
s,@acx_pthread_config@,$acx_pthread_config,;t t
diff --git a/configure.ac b/configure.ac
index b008309..bcec57a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -4,7 +4,7 @@
# make sure we're interpreted by some minimal autoconf
AC_PREREQ(2.57)
-AC_INIT(google-perftools, 0.94.1, opensource@google.com)
+AC_INIT(google-perftools, 0.95, opensource@google.com)
# The argument here is just something that should be in the current directory
# (for sanity checking)
AC_CONFIG_SRCDIR(README)
@@ -75,6 +75,7 @@ pc_fields=" uc_mcontext.gregs[[REG_PC]]" # Solaris x86 (32 + 64 bit)
pc_fields="$pc_fields uc_mcontext.gregs[[REG_EIP]]" # Linux (i386)
pc_fields="$pc_fields uc_mcontext.gregs[[REG_RIP]]" # Linux (x86_64)
pc_fields="$pc_fields uc_mcontext.sc_ip" # Linux (ia64)
+pc_fields="$pc_fields uc_mcontext.uc_regs->gregs[[PT_NIP]]" # Linux (ppc)
pc_fields="$pc_fields uc_mcontext.mc_eip" # FreeBSD (i386)
pc_fields="$pc_fields uc_mcontext.mc_rip" # FreeBSD (x86_64 [untested])
pc_fields="$pc_fields uc_mcontext->ss.eip" # OS X (i386, <=10.4)
@@ -105,13 +106,13 @@ AC_SUBST(UNWIND_LIBS)
# On x86_64, instead of libunwind, we can choose to compile with frame-pointers
# (This isn't needed on i386, where -fno-omit-frame-pointer is the default).
-AC_ARG_ENABLE(frame_pointer,
- AS_HELP_STRING([--enable-frame-pointer],
+AC_ARG_ENABLE(frame_pointers,
+ AS_HELP_STRING([--enable-frame-pointers],
[On x86_64 systems, compile with -fno-omit-frame-pointer (see INSTALL)]),
- , enable_frame_pointer=no)
+ , enable_frame_pointers=no)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(, [return __x86_64__ == 1 ? 0 : 1])],
[is_x86_64=yes], [is_x86_64=no])
-AM_CONDITIONAL(ENABLE_FRAME_POINTER, test "$enable_frame_pointer" = yes)
+AM_CONDITIONAL(ENABLE_FRAME_POINTERS, test "$enable_frame_pointers" = yes)
AM_CONDITIONAL(X86_64, test "$is_x86_64" = yes)
# Defines PRIuS
diff --git a/doc/heap_checker.html b/doc/heap_checker.html
index 1ee2668..0b84c67 100644
--- a/doc/heap_checker.html
+++ b/doc/heap_checker.html
@@ -182,7 +182,7 @@ referred to in this file, are declared in
code that exercises some foo functionality;
this code should preserve memory allocation state;
}
- if (!heap_checker.SameHeap()) assert("" == "heap memory leak");
+ if (!heap_checker.SameHeap()) assert(NULL == "heap memory leak");
</pre>
<p>The various flavors of these functions -- <code>SameHeap()</code>,
diff --git a/doc/tcmalloc.html b/doc/tcmalloc.html
index c399188..fd2d3cd 100644
--- a/doc/tcmalloc.html
+++ b/doc/tcmalloc.html
@@ -57,10 +57,10 @@ and ends up using <code>16N</code> bytes.</p>
<h2>Usage</h2>
-<p>To use TCmalloc, just link tcmalloc into your application via the
+<p>To use TCMalloc, just link TCMalloc into your application via the
"-ltcmalloc" linker flag.</p>
-<p>You can use tcmalloc in applications you didn't compile yourself,
+<p>You can use TCMalloc in applications you didn't compile yourself,
by using LD_PRELOAD:</p>
<pre>
$ LD_PRELOAD="/usr/lib/libtcmalloc.so" <binary>
@@ -86,7 +86,7 @@ needed, and periodic garbage collections are used to migrate memory
back from a thread-local cache into the central data structures.</p>
<center><img src="overview.gif"></center>
-<p>TCMalloc treates objects with size &lt;= 32K ("small" objects)
+<p>TCMalloc treats objects with size &lt;= 32K ("small" objects)
differently from larger objects. Large objects are allocated directly
from the central heap using a page-level allocator (a page is a 4K
aligned region of memory). I.e., a large object is always
@@ -104,7 +104,7 @@ size-classes. For example, all allocations in the range 961 to 1024
bytes are rounded up to 1024. The size-classes are spaced so that
small sizes are separated by 8 bytes, larger sizes by 16 bytes, even
larger sizes by 32 bytes, and so forth. The maximal spacing (for
-sizes >= ~2K) is 256 bytes.</p>
+sizes &gt;= ~2K) is 256 bytes.</p>
<p>A thread cache contains a singly linked list of free objects per
size-class.</p>
@@ -290,13 +290,13 @@ threads. The raw data used to generate these graphs (the output of the
<ul>
<li> TCMalloc is much more consistently scalable than PTMalloc2 - for
- all thread counts >1 it achieves ~7-9 million ops/sec for small
+ all thread counts &gt;1 it achieves ~7-9 million ops/sec for small
allocations, falling to ~2 million ops/sec for larger
allocations. The single-thread case is an obvious outlier,
since it is only able to keep a single processor busy and hence
can achieve fewer ops/sec. PTMalloc2 has a much higher variance
on operations/sec - peaking somewhere around 4 million ops/sec
- for small allocations and falling to <1 million ops/sec for
+ for small allocations and falling to &lt;1 million ops/sec for
larger allocations.
<li> TCMalloc is faster than PTMalloc2 in the vast majority of
@@ -309,10 +309,10 @@ threads. The raw data used to generate these graphs (the output of the
2MB). With larger allocation sizes, fewer objects can be stored
in the cache before it is garbage-collected.
- <li> There is a noticeably drop in the TCMalloc performance at ~32K
+ <li> There is a noticeable drop in TCMalloc's performance at ~32K
maximum allocation size; at larger sizes performance drops less
quickly. This is due to the 32K maximum size of objects in the
- per-thread caches; for objects larger than this tcmalloc
+ per-thread caches; for objects larger than this TCMalloc
allocates from the central page heap.
</ul>
diff --git a/packages/deb/changelog b/packages/deb/changelog
index ee237af..167d6c0 100644
--- a/packages/deb/changelog
+++ b/packages/deb/changelog
@@ -1,3 +1,9 @@
+google-perftools (0.95-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Google Inc. <opensource@google.com> Tue, 12 Feb 2008 12:28:32 -0800
+
google-perftools (0.94-1) unstable; urgency=low
* New upstream release.
diff --git a/packages/deb/docs b/packages/deb/docs
index d43c7dc..df891c9 100644
--- a/packages/deb/docs
+++ b/packages/deb/docs
@@ -6,6 +6,7 @@ NEWS
README
TODO
doc/cpuprofile.html
+doc/cpuprofile-fileformat.html
doc/designstyle.css
doc/heap-example1.png
doc/heap_checker.html
diff --git a/packages/rpm/rpm.spec b/packages/rpm/rpm.spec
index fb513a7..15c7e63 100644
--- a/packages/rpm/rpm.spec
+++ b/packages/rpm/rpm.spec
@@ -51,7 +51,20 @@ rm -rf $RPM_BUILD_ROOT
%files
%defattr(-,root,root)
-%doc AUTHORS COPYING ChangeLog INSTALL NEWS README TODO doc/cpuprofile.html doc/designstyle.css doc/heap-example1.png doc/heap_checker.html doc/heapprofile.html doc/index.html doc/overview.dot doc/overview.gif doc/pageheap.dot doc/pageheap.gif doc/pprof-test-big.gif doc/pprof-test.gif doc/pprof-vsnprintf-big.gif doc/pprof-vsnprintf.gif doc/pprof.1 doc/pprof_remote_servers.html doc/spanmap.dot doc/spanmap.gif doc/t-test1.times.txt doc/tcmalloc-opspercpusec.vs.threads.1024.bytes.png doc/tcmalloc-opspercpusec.vs.threads.128.bytes.png doc/tcmalloc-opspercpusec.vs.threads.131072.bytes.png doc/tcmalloc-opspercpusec.vs.threads.16384.bytes.png doc/tcmalloc-opspercpusec.vs.threads.2048.bytes.png doc/tcmalloc-opspercpusec.vs.threads.256.bytes.png doc/tcmalloc-opspercpusec.vs.threads.32768.bytes.png doc/tcmalloc-opspercpusec.vs.threads.4096.bytes.png doc/tcmalloc-opspercpusec.vs.threads.512.bytes.png doc/tcmalloc-opspercpusec.vs.threads.64.bytes.png doc/tcmalloc-opspercpusec.vs.threads.65536.bytes.png doc/tcmalloc-opspercpusec.vs.threads.8192.bytes.png doc/tcmalloc-opspersec.vs.size.1.threads.png doc/tcmalloc-opspersec.vs.size.12.threads.png doc/tcmalloc-opspersec.vs.size.16.threads.png doc/tcmalloc-opspersec.vs.size.2.threads.png doc/tcmalloc-opspersec.vs.size.20.threads.png doc/tcmalloc-opspersec.vs.size.3.threads.png doc/tcmalloc-opspersec.vs.size.4.threads.png doc/tcmalloc-opspersec.vs.size.5.threads.png doc/tcmalloc-opspersec.vs.size.8.threads.png doc/tcmalloc.html doc/threadheap.dot doc/threadheap.gif
+%doc AUTHORS COPYING ChangeLog INSTALL NEWS README TODO
+%doc doc/index.html doc/designstyle.css
+%doc doc/pprof.1 doc/pprof_remote_servers.html
+%doc doc/tcmalloc.html
+%doc doc/overview.dot doc/overview.gif
+%doc doc/pageheap.dot doc/pageheap.gif
+%doc doc/spanmap.dot doc/spanmap.gif
+%doc doc/threadheap.dot doc/threadheap.gif
+%doc doc/t-test1.times.txt
+%doc doc/tcmalloc-opspercpusec.vs.threads.1024.bytes.png doc/tcmalloc-opspercpusec.vs.threads.128.bytes.png doc/tcmalloc-opspercpusec.vs.threads.131072.bytes.png doc/tcmalloc-opspercpusec.vs.threads.16384.bytes.png doc/tcmalloc-opspercpusec.vs.threads.2048.bytes.png doc/tcmalloc-opspercpusec.vs.threads.256.bytes.png doc/tcmalloc-opspercpusec.vs.threads.32768.bytes.png doc/tcmalloc-opspercpusec.vs.threads.4096.bytes.png doc/tcmalloc-opspercpusec.vs.threads.512.bytes.png doc/tcmalloc-opspercpusec.vs.threads.64.bytes.png doc/tcmalloc-opspercpusec.vs.threads.65536.bytes.png doc/tcmalloc-opspercpusec.vs.threads.8192.bytes.png
+%doc doc/tcmalloc-opspersec.vs.size.1.threads.png doc/tcmalloc-opspersec.vs.size.12.threads.png doc/tcmalloc-opspersec.vs.size.16.threads.png doc/tcmalloc-opspersec.vs.size.2.threads.png doc/tcmalloc-opspersec.vs.size.20.threads.png doc/tcmalloc-opspersec.vs.size.3.threads.png doc/tcmalloc-opspersec.vs.size.4.threads.png doc/tcmalloc-opspersec.vs.size.5.threads.png doc/tcmalloc-opspersec.vs.size.8.threads.png
+%doc doc/heapprofile.html doc/heap-example1.png doc/heap_checker.html
+%doc doc/cpuprofile.html doc/cpuprofile-fileformat.html
+%doc doc/pprof-test-big.gif doc/pprof-test.gif doc/pprof-vsnprintf-big.gif doc/pprof-vsnprintf.gif
%{prefix}/lib/libstacktrace.so.0
%{prefix}/lib/libstacktrace.so.0.0.0
diff --git a/src/addressmap-inl.h b/src/addressmap-inl.h
index 7760710..a8cbb77 100644
--- a/src/addressmap-inl.h
+++ b/src/addressmap-inl.h
@@ -97,16 +97,16 @@ class AddressMap {
typedef void* (*Allocator)(size_t);
typedef void (*DeAllocator)(void*);
typedef const void* Key;
-
+
// Create an AddressMap that uses the specified allocator/deallocator.
// The allocator/deallocator should behave like malloc/free.
// For instance, the allocator does not need to return initialized memory.
AddressMap(Allocator alloc, DeAllocator dealloc);
~AddressMap();
- // If the map contains an entry for "key", store the associated
- // value in "*result" and return true. Else return false.
- bool Find(Key key, Value* result);
+ // If the map contains an entry for "key", return it. Else return NULL.
+ inline const Value* Find(Key key) const;
+ inline Value* FindMutable(Key key);
// Insert <key,value> into the map. Any old value associated
// with key is forgotten.
@@ -117,12 +117,24 @@ class AddressMap {
// and returns true. Else returns false.
bool FindAndRemove(Key key, Value* removed_value);
+ // Similar to Find but we assume that keys are addresses of non-overlapping
+ // memory ranges whose sizes are given by size_func.
+ // If the map contains a range into which "key" points
+ // (at its start or inside of it, but not at the end),
+ // return the address of the associated value
+ // and store its key in "*res_key".
+ // Else return NULL.
+ // max_size specifies largest range size possibly in existence now.
+ typedef size_t (*ValueSizeFunc)(const Value& v);
+ const Value* FindInside(ValueSizeFunc size_func, size_t max_size,
+ Key key, Key* res_key);
+
// Iterate over the address map calling 'callback'
// for all stored key-value pairs and passing 'arg' to it.
// We don't use full Closure/Callback machinery not to add
// unnecessary dependencies to this class with low-level uses.
template<class Type>
- void Iterate(void (*callback)(Key, Value, Type), Type arg) const;
+ inline void Iterate(void (*callback)(Key, Value*, Type), Type arg) const;
private:
typedef uintptr_t Number;
@@ -154,7 +166,7 @@ class AddressMap {
// We use a simple chaining hash-table to represent the clusters.
struct Cluster {
- Cluster* next; // Next cluster in chain
+ Cluster* next; // Next cluster in hash table chain
Number id; // Cluster ID
Entry* blocks[kClusterBlocks]; // Per-block linked-lists
};
@@ -169,8 +181,8 @@ class AddressMap {
// Number of entry objects allocated at a time
static const int ALLOC_COUNT = 64;
- Cluster** hashtable_; // The hash-table
- Entry* free_; // Free list of unused Entry objects
+ Cluster** hashtable_; // The hash-table
+ Entry* free_; // Free list of unused Entry objects
// Multiplicative hash function:
// The value "kHashMultiplier" is the bottom 32 bits of
@@ -208,7 +220,7 @@ class AddressMap {
}
return NULL;
}
-
+
// Return the block ID for an address within its cluster
static int BlockID(Number address) {
return (address >> kBlockBits) & (kClusterBlocks - 1);
@@ -264,18 +276,22 @@ AddressMap<Value>::~AddressMap() {
}
template <class Value>
-bool AddressMap<Value>::Find(Key key, Value* result) {
+inline const Value* AddressMap<Value>::Find(Key key) const {
+ return const_cast<AddressMap*>(this)->FindMutable(key);
+}
+
+template <class Value>
+inline Value* AddressMap<Value>::FindMutable(Key key) {
const Number num = reinterpret_cast<Number>(key);
const Cluster* const c = FindCluster(num, false/*do not create*/);
if (c != NULL) {
- for (const Entry* e = c->blocks[BlockID(num)]; e != NULL; e = e->next) {
+ for (Entry* e = c->blocks[BlockID(num)]; e != NULL; e = e->next) {
if (e->key == key) {
- *result = e->value;
- return true;
+ return &e->value;
}
}
}
- return false;
+ return NULL;
}
template <class Value>
@@ -330,14 +346,62 @@ bool AddressMap<Value>::FindAndRemove(Key key, Value* removed_value) {
}
template <class Value>
+const Value* AddressMap<Value>::FindInside(ValueSizeFunc size_func,
+ size_t max_size,
+ Key key,
+ Key* res_key) {
+ const Number key_num = reinterpret_cast<Number>(key);
+ Number num = key_num; // we'll move this to move back through the clusters
+ while (1) {
+ const Cluster* c = FindCluster(num, false/*do not create*/);
+ if (c != NULL) {
+ while (1) {
+ const int block = BlockID(num);
+ bool had_smaller_key = false;
+ for (const Entry* e = c->blocks[block]; e != NULL; e = e->next) {
+ const Number e_num = reinterpret_cast<Number>(e->key);
+ if (e_num <= key_num) {
+ if (e_num == key_num || // to handle 0-sized ranges
+ key_num < e_num + (*size_func)(e->value)) {
+ *res_key = e->key;
+ return &e->value;
+ }
+ had_smaller_key = true;
+ }
+ }
+ if (had_smaller_key) return NULL; // got a range before 'key'
+ // and it did not contain 'key'
+ if (block == 0) break;
+ // try address-wise previous block
+ num |= kBlockSize - 1; // start at the last addr of prev block
+ num -= kBlockSize;
+ if (key_num - num > max_size) return NULL;
+ }
+ }
+ if (num < kClusterSize) return NULL; // first cluster
+ // go to address-wise previous cluster to try
+ num |= kClusterSize - 1; // start at the last block of previous cluster
+ num -= kClusterSize;
+ if (key_num - num > max_size) return NULL;
+ // Having max_size to limit the search is crucial: else
+ // we have to traverse a lot of empty clusters (or blocks).
+ // We can avoid needing max_size if we put clusters into
+ // a search tree, but performance suffers considerably
+ // if we use this approach by using stl::set.
+ }
+}
+
+template <class Value>
template <class Type>
-void AddressMap<Value>::Iterate(void (*callback)(Key, Value, Type),
- Type arg) const {
+inline void AddressMap<Value>::Iterate(void (*callback)(Key, Value*, Type),
+ Type arg) const {
+ // We could optimize this by traversing only non-empty clusters and/or blocks
+ // but it does not speed up heap-checker noticeably.
for (int h = 0; h < kHashSize; ++h) {
for (const Cluster* c = hashtable_[h]; c != NULL; c = c->next) {
for (int b = 0; b < kClusterBlocks; ++b) {
- for (const Entry* e = c->blocks[b]; e != NULL; e = e->next) {
- callback(e->key, e->value, arg);
+ for (Entry* e = c->blocks[b]; e != NULL; e = e->next) {
+ callback(e->key, &e->value, arg);
}
}
}
diff --git a/src/base/atomicops-internals-linuxppc.h b/src/base/atomicops-internals-linuxppc.h
new file mode 100644
index 0000000..6ddc5a8
--- /dev/null
+++ b/src/base/atomicops-internals-linuxppc.h
@@ -0,0 +1,193 @@
+/* Copyright (c) 2008, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ */
+
+// Implementation of atomic operations for ppc-linux. This file should not
+// be included directly. Clients should instead include
+// "base/atomicops.h".
+
+#ifndef BASE_ATOMICOPS_INTERNALS_LINUXPPC_H__
+#define BASE_ATOMICOPS_INTERNALS_LINUXPPC_H__
+
+#define LWSYNC_ON_SMP
+#define PPC405_ERR77(a, b)
+#define ISYNC_ON_SMP
+
+
+/* Adapted from atomic_add in asm-powerpc/atomic.h */
+inline int32_t OSAtomicAdd32(int32_t amount, int32_t *value) {
+ int t;
+ __asm__ __volatile__(
+"1: lwarx %0,0,%3 # atomic_add\n\
+ add %0,%2,%0\n"
+ PPC405_ERR77(0,%3)
+" stwcx. %0,0,%3 \n\
+ bne- 1b"
+ : "=&r" (t), "+m" (*value)
+ : "r" (amount), "r" (value)
+ : "cc");
+ return *value;
+}
+
+/* Adapted from __cmpxchg_u32 in asm-powerpc/atomic.h */
+inline bool OSAtomicCompareAndSwap32(int32_t old_value, int32_t new_value,
+ int32_t *value) {
+ unsigned int prev;
+ __asm__ __volatile__ (
+ LWSYNC_ON_SMP
+"1: lwarx %0,0,%2 # __cmpxchg_u32\n\
+ cmpw 0,%0,%3\n\
+ bne- 2f\n"
+ PPC405_ERR77(0,%2)
+" stwcx. %4,0,%2\n\
+ bne- 1b"
+ ISYNC_ON_SMP
+ "\n\
+2:"
+ : "=&r" (prev), "+m" (*value)
+ : "r" (value), "r" (old_value), "r" (new_value)
+ : "cc", "memory");
+ return true;
+}
+
+/* Adapted from __cmpxchg_u32 in asm-powerpc/atomic.h */
+inline int32_t OSAtomicCompareAndSwap32Barrier(int32_t old_value,
+ int32_t new_value,
+ int32_t *value) {
+ unsigned int prev;
+ __asm__ __volatile__ (
+ LWSYNC_ON_SMP
+"1: lwarx %0,0,%2 # __cmpxchg_u32\n\
+ cmpw 0,%0,%3\n\
+ bne- 2f\n"
+ PPC405_ERR77(0,%2)
+" stwcx. %4,0,%2\n\
+ bne- 1b"
+ ISYNC_ON_SMP
+ "\n\
+2:"
+ : "=&r" (prev), "+m" (*value)
+ : "r" (value), "r" (old_value), "r" (new_value)
+ : "cc", "memory");
+ return true;
+}
+
+inline void MemoryBarrier() {
+ // TODO
+}
+
+// int32_t and intptr_t seems to be equal on ppc-linux
+// therefore we have no extra Atomic32 function versions.
+typedef int32_t Atomic32;
+typedef intptr_t AtomicWord;
+
+#define OSAtomicCastIntPtr(p) \
+ reinterpret_cast<int32_t *>(const_cast<AtomicWord *>(p))
+#define OSAtomicCompareAndSwapIntPtr OSAtomicCompareAndSwap32
+#define OSAtomicAddIntPtr OSAtomicAdd32
+#define OSAtomicCompareAndSwapIntPtrBarrier OSAtomicCompareAndSwap32Barrier
+
+
+inline AtomicWord CompareAndSwap(volatile AtomicWord *ptr,
+ AtomicWord old_value,
+ AtomicWord new_value) {
+ AtomicWord prev_value;
+ do {
+ if (OSAtomicCompareAndSwapIntPtr(old_value, new_value,
+ OSAtomicCastIntPtr(ptr))) {
+ return old_value;
+ }
+ prev_value = *ptr;
+ } while (prev_value == old_value);
+ return prev_value;
+}
+
+inline AtomicWord AtomicExchange(volatile AtomicWord *ptr,
+ AtomicWord new_value) {
+ AtomicWord old_value;
+ do {
+ old_value = *ptr;
+ } while (!OSAtomicCompareAndSwapIntPtr(old_value, new_value,
+ OSAtomicCastIntPtr(ptr)));
+ return old_value;
+}
+
+
+inline AtomicWord AtomicIncrement(volatile AtomicWord *ptr, AtomicWord increment) {
+ return OSAtomicAddIntPtr(increment, OSAtomicCastIntPtr(ptr));
+}
+
+inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord *ptr,
+ AtomicWord old_value,
+ AtomicWord new_value) {
+ AtomicWord prev_value;
+ do {
+ if (OSAtomicCompareAndSwapIntPtrBarrier(old_value, new_value,
+ OSAtomicCastIntPtr(ptr))) {
+ return old_value;
+ }
+ prev_value = *ptr;
+ } while (prev_value == old_value);
+ return prev_value;
+}
+
+inline AtomicWord Release_CompareAndSwap(volatile AtomicWord *ptr,
+ AtomicWord old_value,
+ AtomicWord new_value) {
+ // The ppc interface does not distinguish between Acquire and
+ // Release memory barriers; they are equivalent.
+ return Acquire_CompareAndSwap(ptr, old_value, new_value);
+}
+
+
+inline void Acquire_Store(volatile AtomicWord *ptr, AtomicWord value) {
+ *ptr = value;
+ MemoryBarrier();
+}
+
+inline void Release_Store(volatile AtomicWord *ptr, AtomicWord value) {
+ MemoryBarrier();
+ *ptr = value;
+}
+
+inline AtomicWord Acquire_Load(volatile const AtomicWord *ptr) {
+ AtomicWord value = *ptr;
+ MemoryBarrier();
+ return value;
+}
+
+inline AtomicWord Release_Load(volatile const AtomicWord *ptr) {
+ MemoryBarrier();
+ return *ptr;
+}
+
+
+#endif // BASE_ATOMICOPS_INTERNALS_LINUXPPC_H__
diff --git a/src/base/atomicops-internals-x86-msvc.h b/src/base/atomicops-internals-x86-msvc.h
index 8d10de4..cce120c 100644
--- a/src/base/atomicops-internals-x86-msvc.h
+++ b/src/base/atomicops-internals-x86-msvc.h
@@ -99,7 +99,7 @@ inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr,
}
// In msvc8/vs2005, winnt.h already contains a definition for MemoryBarrier.
-#if !(COMPILER_MSVC && _MSC_VER >= 1400)
+#if !(defined(_MSC_VER) && _MSC_VER >= 1400)
inline void MemoryBarrier() {
AtomicWord value = 0;
AtomicExchange(&value, 0); // acts as a barrier
diff --git a/src/base/atomicops-internals-x86.h b/src/base/atomicops-internals-x86.h
index 210e419..db3d4d2 100644
--- a/src/base/atomicops-internals-x86.h
+++ b/src/base/atomicops-internals-x86.h
@@ -147,7 +147,9 @@ inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) {
inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) {
ATOMICOPS_COMPILER_BARRIER();
- *ptr = value; // works w/o barrier for current Intel chips as of June 2005
+ *ptr = value; // An x86 store acts as a release barrier
+ // for current AMD/Intel chips as of Jan 2008.
+ // See also Acquire_Load(), below.
// When new chips come out, check:
// IA-32 Intel Architecture Software Developer's Manual, Volume 3:
@@ -155,11 +157,19 @@ inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) {
// Section 7.2, Memory Ordering.
// Last seen at:
// http://developer.intel.com/design/pentium4/manuals/index_new.htm
+ //
+ // x86 stores/loads fail to act as barriers for a few instructions (clflush
+ // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
+ // not generated by the compiler, and are rare. Users of these instructions
+ // need to know about cache behaviour in any case since all of these involve
+ // either flushing cache lines or non-temporal cache hints.
}
inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) {
- AtomicWord value = *ptr;
- MemoryBarrier();
+ AtomicWord value = *ptr; // An x86 load acts as a acquire barrier,
+ // for current AMD/Intel chips as of Jan 2008.
+ // See also Release_Store(), above.
+ ATOMICOPS_COMPILER_BARRIER();
return value;
}
@@ -225,19 +235,14 @@ inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
ATOMICOPS_COMPILER_BARRIER();
- *ptr = value; // works w/o barrier for current Intel chips as of June 2005
-
- // When new chips come out, check:
- // IA-32 Intel Architecture Software Developer's Manual, Volume 3:
- // System Programming Guide, Chatper 7: Multiple-processor management,
- // Section 7.2, Memory Ordering.
- // Last seen at:
- // http://developer.intel.com/design/pentium4/manuals/index_new.htm
+ *ptr = value; // An x86 store acts as a release barrier.
+ // See comments in AtomicWord version of Release_Store(), above.
}
inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
- Atomic32 value = *ptr;
- MemoryBarrier();
+ Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.
+ // See comments in AtomicWord version of Release_Store(), above.
+ ATOMICOPS_COMPILER_BARRIER();
return value;
}
diff --git a/src/base/atomicops.h b/src/base/atomicops.h
index 0c8da9a..81c365f 100644
--- a/src/base/atomicops.h
+++ b/src/base/atomicops.h
@@ -56,6 +56,8 @@
#include "base/atomicops-internals-x86.h"
#elif defined(__i386) && defined(MSVC)
#include "base/atomicops-internals-x86-msvc.h"
+#elif defined(__linux__) && defined(__PPC__)
+#include "base/atomicops-internals-linuxppc.h"
#else
// Assume x86 for now. If you need to support a new architecture and
// don't know how to implement atomic ops, you can probably get away
diff --git a/src/base/commandlineflags.h b/src/base/commandlineflags.h
index 782b7c7..9514458 100644
--- a/src/base/commandlineflags.h
+++ b/src/base/commandlineflags.h
@@ -51,6 +51,7 @@
#include "config.h"
#include <string>
#include <string.h> // for memchr
+#include <stdlib.h> // for getenv
#include "base/basictypes.h"
#define DECLARE_VARIABLE(type, name) \
diff --git a/src/base/cycleclock.h b/src/base/cycleclock.h
new file mode 100644
index 0000000..b5e012d
--- /dev/null
+++ b/src/base/cycleclock.h
@@ -0,0 +1,95 @@
+// Copyright (c) 2004, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ----------------------------------------------------------------------
+// CycleClock
+// A CycleClock tells you the current time in Cycles. The "time"
+// is actually time since power-on. This is like time() but doesn't
+// involve a system call and is much more precise.
+// ----------------------------------------------------------------------
+
+#ifndef GOOGLE_BASE_CYCLECLOCK_H__
+#define GOOGLE_BASE_CYCLECLOCK_H__
+
+#include "base/basictypes.h" // make sure we get the def for int64
+#if defined(__MACH__) && defined(__APPLE__)
+#include <mach/mach_time.h>
+#endif
+
+// NOTE: only i386 and x86_64 have been well tested.
+// PPC, sparc, alpha, and ia64 are based on
+// http://peter.kuscsik.com/wordpress/?p=14
+// with modifications by m3b. cf
+// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h
+struct CycleClock {
+ // This should return the number of cycles since power-on
+ static inline int64 Now() {
+#if defined(__i386__)
+ int64 ret;
+ __asm__ volatile ("rdtsc"
+ : "=A" (ret) );
+ return ret;
+#elif defined(__x86_64__) || defined(__amd64__)
+ int64 low, high;
+ __asm__ volatile ("rdtsc\n"
+ "shl $0x20, %%rdx"
+ : "=a" (low), "=d" (high));
+ return high | low;
+#elif defined(__powerpc__) || defined(__ppc__)
+ // This returns a time-base, which is not always precisely a cycle-count.
+ int64 tbl, tbu0, tbu1;
+ asm("mftbu %0" : "=r" (tbu0));
+ asm("mftb %0" : "=r" (tbl ));
+ asm("mftbu %0" : "=r" (tbu1));
+ tbl &= -static_cast<int64>(tbu0 == tbu1);
+ // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage)
+ return (tbu1 << 32) | tbl;
+#elif defined(__sparc__)
+ int64 tick;
+ asm(".byte 0x83, 0x41, 0x00, 0x00");
+ asm("mov %%g1, %0" : "=r" (tick));
+ return tick;
+#elif defined(__ia64__)
+ int64 itc;
+ asm("mov %0 = ar.itc" : "=r" (itc));
+ return itc;
+#elif defined(_MSC_VER) && defined(_M_IX86)
+ _asm rdtsc
+#elif defined(__MACH__) && defined(__APPLE__)
+ return mach_absolute_time();
+#else
+ // We could define __alpha here as well, but it only has a 32-bit
+ // timer (good for like 4 seconds), which isn't very useful.
+#error You need to define CycleTimer for your O/S and CPU
+#endif
+ }
+};
+
+
+#endif // GOOGLE_BASE_CYCLECLOCK_H__
diff --git a/src/base/elfcore.h b/src/base/elfcore.h
index eb93e05..9f7002a 100644
--- a/src/base/elfcore.h
+++ b/src/base/elfcore.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2005-2007, Google Inc.
+/* Copyright (c) 2005-2008, Google Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -41,7 +41,7 @@ extern "C" {
* Porting to other related platforms should not be difficult.
*/
#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \
- defined(mips)) && defined(__linux)
+ defined(__mips__)) && defined(__linux)
#include <stdarg.h>
#include <stdint.h>
@@ -96,16 +96,16 @@ extern "C" {
#define LR uregs[14] /* Link register */
long uregs[18];
} arm_regs;
-#elif defined(mips)
+#elif defined(__mips__)
typedef struct mips_regs {
unsigned long pad[6]; /* Unused padding to match kernel structures */
unsigned long uregs[32]; /* General purpose registers. */
- unsigned long cp0_status;
- unsigned long lo; /* Used for multiplication and division. */
- unsigned long hi;
+ unsigned long hi; /* Used for multiplication and division. */
+ unsigned long lo;
+ unsigned long cp0_epc; /* Program counter. */
unsigned long cp0_badvaddr;
+ unsigned long cp0_status;
unsigned long cp0_cause;
- unsigned long cp0_epc; /* Program counter. */
unsigned long unused;
} mips_regs;
#endif
@@ -263,7 +263,7 @@ extern "C" {
(r) = (f).arm; \
(r).uregs[16] = fps; \
} while (0)
-#elif defined(mips) && defined(__GNUC__)
+#elif defined(__mips__) && defined(__GNUC__)
typedef struct Frame {
struct mips_regs mips_regs;
int errno_;
diff --git a/src/base/linux_syscall_support.h b/src/base/linux_syscall_support.h
index 0c11170..c41b712 100644
--- a/src/base/linux_syscall_support.h
+++ b/src/base/linux_syscall_support.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2005-2007, Google Inc.
+/* Copyright (c) 2005-2008, Google Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -73,11 +73,11 @@
#ifndef SYS_LINUX_SYSCALL_SUPPORT_H
#define SYS_LINUX_SYSCALL_SUPPORT_H
-/* We currently only support x86-32, x86-64, ARM, and MIPS on Linux.
+/* We currently only support x86-32, x86-64, ARM, MIPS, and PPC on Linux.
* Porting to other related platforms should not be difficult.
*/
-#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \
- defined(mips)) && defined(__linux)
+#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \
+ defined(__mips__) || defined(__PPC__)) && defined(__linux)
#ifndef SYS_CPLUSPLUS
#ifdef __cplusplus
@@ -91,6 +91,7 @@ extern "C" {
#include <errno.h>
#include <signal.h>
#include <stdarg.h>
+#include <string.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <sys/time.h>
@@ -100,30 +101,420 @@ extern "C" {
#include <linux/unistd.h>
#include <endian.h>
-/* libc defines versions of stat, dirent, dirent64, and statfs64 that are
- * incompatible with the structures that the kernel API expects. If you wish
- * to use sys_fstat(), sys_stat(), sys_getdents(), sys_getdents64(),
- * sys_statfs64(), or sys_statfs() you will need to include the kernel
- * headers in your code.
+#ifdef __mips__
+/* Include definitions of the ABI currently in use. */
+#include <sgidefs.h>
+#endif
+
+#endif
+
+/* As glibc often provides subtly incompatible data structures (and implicit
+ * wrapper functions that convert them), we provide our own kernel data
+ * structures for use by the system calls.
+ * These structures have been developed by using Linux 2.6.23 headers for
+ * reference. Note though, we do not care about exact API compatibility
+ * with the kernel, and in fact the kernel often does not have a single
+ * API that works across architectures. Instead, we try to mimic the glibc
+ * API where reasonable, and only guarantee ABI compatibility with the
+ * kernel headers.
+ * Most notably, here are a few changes that were made to the structures
+ * defined by kernel headers:
*
- * asm/posix_types.h
- * asm/stat.h
- * asm/statfs.h
- * asm/types.h
- * linux/dirent.h
+ * - we only define structures, but not symbolic names for kernel data
+ * types. For the latter, we directly use the native C datatype
+ * (i.e. "unsigned" instead of "mode_t").
+ * - in a few cases, it is possible to define identical structures for
+ * both 32bit (e.g. i386) and 64bit (e.g. x86-64) platforms by
+ * standardizing on the 64bit version of the data types. In particular,
+ * this means that we use "unsigned" where the 32bit headers say
+ * "unsigned long".
+ * - overall, we try to minimize the number of cases where we need to
+ * conditionally define different structures.
+ * - the "struct kernel_sigaction" class of structures have been
+ * modified to more closely mimic glibc's API by introducing an
+ * anonymous union for the function pointer.
+ * - a small number of field names had to have an underscore appended to
+ * them, because glibc defines a global macro by the same name.
*/
-struct dirent64;
-struct dirent;
-struct iovec;
-struct msghdr;
-struct pollfd;
-struct rlimit;
-struct sockaddr;
-struct stat;
-struct stat64;
-struct statfs;
-struct statfs64;
+/* include/linux/dirent.h */
+struct kernel_dirent64 {
+ unsigned long long d_ino;
+ long long d_off;
+ unsigned short d_reclen;
+ unsigned char d_type;
+ char d_name[256];
+};
+
+/* include/linux/dirent.h */
+struct kernel_dirent {
+ long d_ino;
+ long d_off;
+ unsigned short d_reclen;
+ char d_name[256];
+};
+
+/* include/linux/uio.h */
+struct kernel_iovec {
+ void *iov_base;
+ unsigned long iov_len;
+};
+
+/* include/linux/socket.h */
+struct kernel_msghdr {
+ void *msg_name;
+ int msg_namelen;
+ struct kernel_iovec*msg_iov;
+ unsigned long msg_iovlen;
+ void *msg_control;
+ unsigned long msg_controllen;
+ unsigned msg_flags;
+};
+
+/* include/asm-generic/poll.h */
+struct kernel_pollfd {
+ int fd;
+ short events;
+ short revents;
+};
+
+/* include/linux/resource.h */
+struct kernel_rlimit {
+ unsigned long rlim_cur;
+ unsigned long rlim_max;
+};
+
+/* include/linux/time.h */
+struct kernel_timespec {
+ long tv_sec;
+ long tv_nsec;
+};
+
+/* include/linux/time.h */
+struct kernel_timeval {
+ long tv_sec;
+ long tv_usec;
+};
+
+/* include/linux/resource.h */
+struct kernel_rusage {
+ struct kernel_timeval ru_utime;
+ struct kernel_timeval ru_stime;
+ long ru_maxrss;
+ long ru_ixrss;
+ long ru_idrss;
+ long ru_isrss;
+ long ru_minflt;
+ long ru_majflt;
+ long ru_nswap;
+ long ru_inblock;
+ long ru_oublock;
+ long ru_msgsnd;
+ long ru_msgrcv;
+ long ru_nsignals;
+ long ru_nvcsw;
+ long ru_nivcsw;
+};
+
+struct siginfo;
+#if defined(__i386__) || defined(__ARM_ARCH_3__) || defined(__PPC__)
+
+/* include/asm-{arm,i386,mips,ppc}/signal.h */
+struct kernel_old_sigaction {
+ union {
+ void (*sa_handler_)(int);
+ void (*sa_sigaction_)(int, struct siginfo *, void *);
+ };
+ unsigned long sa_mask;
+ unsigned long sa_flags;
+ void (*sa_restorer)(void);
+} __attribute__((packed,aligned(4)));
+#elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
+ #define kernel_old_sigaction kernel_sigaction
+#endif
+
+/* Some kernel functions (e.g. sigaction() in 2.6.23) require that the
+ * exactly match the size of the signal set, even though the API was
+ * intended to be extensible. We define our own KERNEL_NSIG to deal with
+ * this.
+ * Please note that glibc provides signals [1.._NSIG-1], whereas the
+ * kernel (and this header) provides the range [1..KERNEL_NSIG]. The
+ * actual number of signals is obviously the same, but the constants
+ * differ by one.
+ */
+#ifdef __mips__
+#define KERNEL_NSIG 128
+#else
+#define KERNEL_NSIG 64
+#endif
+
+/* include/asm-{arm,i386,mips,x86_64}/signal.h */
+struct kernel_sigset_t {
+ unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/
+ (8*sizeof(unsigned long))];
+};
+
+/* include/asm-{arm,i386,mips,x86_64,ppc}/signal.h */
+struct kernel_sigaction {
+#ifdef __mips__
+ unsigned long sa_flags;
+ union {
+ void (*sa_handler_)(int);
+ void (*sa_sigaction_)(int, struct siginfo *, void *);
+ };
+ struct kernel_sigset_t sa_mask;
+#else
+ union {
+ void (*sa_handler_)(int);
+ void (*sa_sigaction_)(int, struct siginfo *, void *);
+ };
+ unsigned long sa_flags;
+ void (*sa_restorer)(void);
+ struct kernel_sigset_t sa_mask;
+#endif
+};
+
+/* include/linux/socket.h */
+struct kernel_sockaddr {
+ unsigned short sa_family;
+ char sa_data[14];
+};
+
+/* include/asm-{arm,i386,mips,ppc}/stat.h */
+#ifdef __mips__
+#if _MIPS_SIM == _MIPS_SIM_ABI64
+struct kernel_stat {
+#else
+struct kernel_stat64 {
+#endif
+ unsigned st_dev;
+ unsigned __pad0[3];
+ unsigned long long st_ino;
+ unsigned st_mode;
+ unsigned st_nlink;
+ unsigned st_uid;
+ unsigned st_gid;
+ unsigned st_rdev;
+ unsigned __pad1[3];
+ long long st_size;
+ unsigned st_atime_;
+ unsigned st_atime_nsec_;
+ unsigned st_mtime_;
+ unsigned st_mtime_nsec_;
+ unsigned st_ctime_;
+ unsigned st_ctime_nsec_;
+ unsigned st_blksize;
+ unsigned __pad2;
+ unsigned long long st_blocks;
+};
+#elif defined __PPC__
+struct kernel_stat64 {
+ unsigned long long st_dev;
+ unsigned long long st_ino;
+ unsigned st_mode;
+ unsigned st_nlink;
+ unsigned st_uid;
+ unsigned st_gid;
+ unsigned long long st_rdev;
+ unsigned short int __pad2;
+ long long st_size;
+ long st_blksize;
+ long long st_blocks;
+ long st_atime_;
+ unsigned long st_atime_nsec_;
+ long st_mtime_;
+ unsigned long st_mtime_nsec_;
+ long st_ctime_;
+ unsigned long st_ctime_nsec_;
+ unsigned long __unused4;
+ unsigned long __unused5;
+};
+#else
+struct kernel_stat64 {
+ unsigned long long st_dev;
+ unsigned char __pad0[4];
+ unsigned __st_ino;
+ unsigned st_mode;
+ unsigned st_nlink;
+ unsigned st_uid;
+ unsigned st_gid;
+ unsigned long long st_rdev;
+ unsigned char __pad3[4];
+ long long st_size;
+ unsigned st_blksize;
+ unsigned long long st_blocks;
+ unsigned st_atime_;
+ unsigned st_atime_nsec_;
+ unsigned st_mtime_;
+ unsigned st_mtime_nsec_;
+ unsigned st_ctime_;
+ unsigned st_ctime_nsec_;
+ unsigned long long st_ino;
+};
+#endif
+
+/* include/asm-{arm,i386,mips,x86_64,ppc}/stat.h */
+#if defined(__i386__) || defined(__ARM_ARCH_3__)
+struct kernel_stat {
+ /* The kernel headers suggest that st_dev and st_rdev should be 32bit
+ * quantities encoding 12bit major and 20bit minor numbers in an interleaved
+ * format. In reality, we do not see useful data in the top bits. So,
+ * we'll leave the padding in here, until we find a better solution.
+ */
+ unsigned short st_dev;
+ short pad1;
+ unsigned st_ino;
+ unsigned short st_mode;
+ unsigned short st_nlink;
+ unsigned short st_uid;
+ unsigned short st_gid;
+ unsigned short st_rdev;
+ short pad2;
+ unsigned st_size;
+ unsigned st_blksize;
+ unsigned st_blocks;
+ unsigned st_atime_;
+ unsigned st_atime_nsec_;
+ unsigned st_mtime_;
+ unsigned st_mtime_nsec_;
+ unsigned st_ctime_;
+ unsigned st_ctime_nsec_;
+ unsigned __unused4;
+ unsigned __unused5;
+};
+#elif defined(__x86_64__)
+struct kernel_stat {
+ unsigned long st_dev;
+ unsigned long st_ino;
+ unsigned long st_nlink;
+ unsigned st_mode;
+ unsigned st_uid;
+ unsigned st_gid;
+ unsigned __pad0;
+ unsigned long st_rdev;
+ long st_size;
+ long st_blksize;
+ long st_blocks;
+ unsigned long st_atime_;
+ unsigned long st_atime_nsec_;
+ unsigned long st_mtime_;
+ unsigned long st_mtime_nsec_;
+ unsigned long st_ctime_;
+ unsigned long st_ctime_nsec_;
+ long __unused[3];
+};
+#elif defined(__PPC__)
+struct kernel_stat {
+ unsigned st_dev;
+ unsigned long st_ino; // ino_t
+ unsigned long st_mode; // mode_t
+ unsigned short st_nlink; // nlink_t
+ unsigned st_uid; // uid_t
+ unsigned st_gid; // gid_t
+ unsigned st_rdev;
+ long st_size; // off_t
+ unsigned long st_blksize;
+ unsigned long st_blocks;
+ unsigned long st_atime_;
+ unsigned long st_atime_nsec_;
+ unsigned long st_mtime_;
+ unsigned long st_mtime_nsec_;
+ unsigned long st_ctime_;
+ unsigned long st_ctime_nsec_;
+ unsigned long __unused4;
+ unsigned long __unused5;
+};
+#elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64)
+struct kernel_stat {
+ unsigned st_dev;
+ int st_pad1[3];
+ unsigned st_ino;
+ unsigned st_mode;
+ unsigned st_nlink;
+ unsigned st_uid;
+ unsigned st_gid;
+ unsigned st_rdev;
+ int st_pad2[2];
+ long st_size;
+ int st_pad3;
+ long st_atime_;
+ long st_atime_nsec_;
+ long st_mtime_;
+ long st_mtime_nsec_;
+ long st_ctime_;
+ long st_ctime_nsec_;
+ int st_blksize;
+ int st_blocks;
+ int st_pad4[14];
+};
+#endif
+
+/* include/asm-{arm,i386,mips,x86_64,ppc}/statfs.h */
+#ifdef __mips__
+#if _MIPS_SIM != _MIPS_SIM_ABI64
+struct kernel_statfs64 {
+ unsigned long f_type;
+ unsigned long f_bsize;
+ unsigned long f_frsize;
+ unsigned long __pad;
+ unsigned long long f_blocks;
+ unsigned long long f_bfree;
+ unsigned long long f_files;
+ unsigned long long f_ffree;
+ unsigned long long f_bavail;
+ struct { int val[2]; } f_fsid;
+ unsigned long f_namelen;
+ unsigned long f_spare[6];
+};
+#endif
+#elif !defined(__x86_64__)
+struct kernel_statfs64 {
+ unsigned long f_type;
+ unsigned long f_bsize;
+ unsigned long long f_blocks;
+ unsigned long long f_bfree;
+ unsigned long long f_bavail;
+ unsigned long long f_files;
+ unsigned long long f_ffree;
+ struct { int val[2]; } f_fsid;
+ unsigned long f_namelen;
+ unsigned long f_frsize;
+ unsigned long f_spare[5];
+};
+#endif
+
+/* include/asm-{arm,i386,mips,x86_64,ppc,generic}/statfs.h */
+#ifdef __mips__
+struct kernel_statfs {
+ long f_type;
+ long f_bsize;
+ long f_frsize;
+ long f_blocks;
+ long f_bfree;
+ long f_files;
+ long f_ffree;
+ long f_bavail;
+ struct { int val[2]; } f_fsid;
+ long f_namelen;
+ long f_spare[6];
+};
+#else
+struct kernel_statfs {
+ /* x86_64 actually defines all these fields as signed, whereas all other */
+ /* platforms define them as unsigned. Leaving them at unsigned should not */
+ /* cause any problems. */
+ unsigned long f_type;
+ unsigned long f_bsize;
+ unsigned long f_blocks;
+ unsigned long f_bfree;
+ unsigned long f_bavail;
+ unsigned long f_files;
+ unsigned long f_ffree;
+ struct { int val[2]; } f_fsid;
+ unsigned long f_namelen;
+ unsigned long f_frsize;
+ unsigned long f_spare[5];
+};
#endif
@@ -159,15 +550,36 @@ struct statfs64;
#ifndef MREMAP_FIXED
#define MREMAP_FIXED 2
#endif
+#ifndef SA_RESTORER
+#define SA_RESTORER 0x04000000
+#endif
#if defined(__i386__)
#ifndef __NR_setresuid
#define __NR_setresuid 164
#define __NR_setresgid 170
#endif
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigaction 174
+#define __NR_rt_sigprocmask 175
+#define __NR_rt_sigpending 176
+#define __NR_rt_sigsuspend 179
+#endif
+#ifndef __NR_pread64
+#define __NR_pread64 180
+#endif
+#ifndef __NR_pwrite64
+#define __NR_pwrite64 181
+#endif
#ifndef __NR_ugetrlimit
#define __NR_ugetrlimit 191
#endif
+#ifndef __NR_stat64
+#define __NR_stat64 195
+#endif
+#ifndef __NR_fstat64
+#define __NR_fstat64 197
+#endif
#ifndef __NR_setresuid32
#define __NR_setresuid32 208
#define __NR_setresgid32 210
@@ -234,9 +646,27 @@ struct statfs64;
#define __NR_setresuid (__NR_SYSCALL_BASE + 164)
#define __NR_setresgid (__NR_SYSCALL_BASE + 170)
#endif
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174)
+#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175)
+#define __NR_rt_sigpending (__NR_SYSCALL_BASE + 176)
+#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE + 179)
+#endif
+#ifndef __NR_pread64
+#define __NR_pread64 (__NR_SYSCALL_BASE + 180)
+#endif
+#ifndef __NR_pwrite64
+#define __NR_pwrite64 (__NR_SYSCALL_BASE + 181)
+#endif
#ifndef __NR_ugetrlimit
#define __NR_ugetrlimit (__NR_SYSCALL_BASE + 191)
#endif
+#ifndef __NR_stat64
+#define __NR_stat64 (__NR_SYSCALL_BASE + 195)
+#endif
+#ifndef __NR_fstat64
+#define __NR_fstat64 (__NR_SYSCALL_BASE + 197)
+#endif
#ifndef __NR_setresuid32
#define __NR_setresuid32 (__NR_SYSCALL_BASE + 208)
#define __NR_setresgid32 (__NR_SYSCALL_BASE + 210)
@@ -338,12 +768,30 @@ struct statfs64;
#define __NR_move_pages 279
#endif
/* End of x86-64 definitions */
-#elif defined(mips)
+#elif defined(__mips__)
#if _MIPS_SIM == _MIPS_SIM_ABI32
#ifndef __NR_setresuid
#define __NR_setresuid (__NR_Linux + 185)
#define __NR_setresgid (__NR_Linux + 190)
#endif
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigaction (__NR_Linux + 194)
+#define __NR_rt_sigprocmask (__NR_Linux + 195)
+#define __NR_rt_sigpending (__NR_Linux + 196)
+#define __NR_rt_sigsuspend (__NR_Linux + 199)
+#endif
+#ifndef __NR_pread64
+#define __NR_pread64 (__NR_Linux + 200)
+#endif
+#ifndef __NR_pwrite64
+#define __NR_pwrite64 (__NR_Linux + 201)
+#endif
+#ifndef __NR_stat64
+#define __NR_stat64 (__NR_Linux + 213)
+#endif
+#ifndef __NR_fstat64
+#define __NR_fstat64 (__NR_Linux + 215)
+#endif
#ifndef __NR_getdents64
#define __NR_getdents64 (__NR_Linux + 219)
#endif
@@ -491,9 +939,92 @@ struct statfs64;
#ifndef __NR_move_pages
#define __NR_move_pages (__NR_Linux + 271)
#endif
-/* End of MIPS (new 32bit API) definitions */
+/* End of MIPS (new 32bit API) definitions */
+#endif
+/* End of MIPS definitions */
+#elif defined(__PPC__)
+#ifndef __NR_setfsuid
+#define __NR_setfsuid 138
+#define __NR_setfsgid 139
+#endif
+#ifndef __NR_setresuid
+#define __NR_setresuid 164
+#define __NR_setresgid 169
+#endif
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigaction 173
+#define __NR_rt_sigprocmask 174
+#define __NR_rt_sigpending 175
+#define __NR_rt_sigsuspend 178
+#endif
+#ifndef __NR_pread64
+#define __NR_pread64 179
+#endif
+#ifndef __NR_pwrite64
+#define __NR_pwrite64 180
+#endif
+#ifndef __NR_ugetrlimit
+#define __NR_ugetrlimit 190
+#endif
+#ifndef __NR_readahead
+#define __NR_readahead 191
+#endif
+#ifndef __NR_stat64
+#define __NR_stat64 195
+#endif
+#ifndef __NR_fstat64
+#define __NR_fstat64 197
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64 202
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid 207
+#endif
+#ifndef __NR_setxattr
+#define __NR_setxattr 209
+#endif
+#ifndef __NR_lsetxattr
+#define __NR_lsetxattr 210
+#endif
+#ifndef __NR_getxattr
+#define __NR_getxattr 212
+#endif
+#ifndef __NR_lgetxattr
+#define __NR_lgetxattr 213
+#endif
+#ifndef __NR_futex
+#define __NR_futex 221
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity 222
+#define __NR_sched_getaffinity 223
+#endif
+#ifndef __NR_set_tid_address
+#define __NR_set_tid_address 232
+#endif
+#ifndef __NR_statfs64
+#define __NR_statfs64 252
+#endif
+#ifndef __NR_fstatfs64
+#define __NR_fstatfs64 253
+#endif
+#ifndef __NR_fadvise64_64
+#define __NR_fadvise64_64 254
+#endif
+#ifndef __NR_openat
+#define __NR_openat 286
+#endif
+#ifndef __NR_fstatat64
+#define __NR_fstatat64 291
#endif
-/* End of MIPS definitions */
+#ifndef __NR_unlinkat
+#define __NR_unlinkat 292
+#endif
+#ifndef __NR_move_pages
+#define __NR_move_pages 301
+#endif
+/* End of powerpc defininitions */
#endif
@@ -556,7 +1087,7 @@ struct statfs64;
#endif
#undef LSS_RETURN
- #ifndef mips
+ #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__))
/* Failing system calls return a negative result in the range of
* -1..-4095. These are "errno" values with the sign inverted.
*/
@@ -568,7 +1099,7 @@ struct statfs64;
} \
return (type) (res); \
} while (0)
- #else
+ #elif defined(__mips__)
/* On MIPS, failing system calls return -1, and set errno in a
* separate CPU register.
*/
@@ -580,6 +1111,18 @@ struct statfs64;
} \
return (type) (res); \
} while (0)
+ #elif defined(__PPC__)
+ /* On PPC, failing system calls return -1, and set errno in a
+ * separate CPU register. See linux/unistd.h.
+ */
+ #define LSS_RETURN(type, res, err) \
+ do { \
+ if (err & 0x10000000 ) { \
+ LSS_ERRNO = (res); \
+ res = -1; \
+ } \
+ return (type) (res); \
+ } while (0)
#endif
#if defined(__i386__)
/* In PIC mode (e.g. when building shared libraries), gcc for i386
@@ -776,20 +1319,29 @@ struct statfs64;
LSS_INLINE int LSS_NAME(fadvise64)(int fd, loff_t offset,
loff_t len, int advice) {
return LSS_NAME(_fadvise64_64)(fd,
- (unsigned)offset, (unsigned)(offset >> 32),
+ (unsigned)offset, (unsigned)(offset >>32),
(unsigned)len, (unsigned)(len >> 32),
advice);
}
- #define __NR__readahead32 __NR_readahead
- LSS_INLINE _syscall4(int, _readahead32, int, fd, unsigned, offset_lo,
- unsigned, offset_hi, unsigned, len);
- LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t offset, int len) {
- return LSS_NAME(_readahead32)(fd,
- (unsigned)offset, (unsigned)(offset >> 32),
- (unsigned)len);
+ LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
+ /* For real-time signals, the kernel does not know how to return from
+ * a signal handler. Instead, it relies on user space to provide a
+ * restorer function that calls the rt_sigreturn() system call.
+ * Unfortunately, we cannot just reference the glibc version of this
+ * function, as glibc goes out of its way to make it inaccessible.
+ */
+ void (*res)(void);
+ __asm__ __volatile__("call 2f\n"
+ "0:.align 16\n"
+ "1:movl %1,%%eax\n"
+ "int $0x80\n"
+ "2:popl %0\n"
+ "addl $(1b-0b),%0\n"
+ : "=a" (res)
+ : "i" (__NR_rt_sigreturn));
+ return res;
}
-
#elif defined(__x86_64__)
/* There are no known problems with any of the _syscallX() macros
* currently shipping for x86_64, but we still need to be able to define
@@ -935,8 +1487,25 @@ struct statfs64;
}
LSS_INLINE _syscall4(int, fadvise64, int, fd, loff_t, offset, loff_t, len,
int, advice)
- LSS_INLINE _syscall2(int, statfs, const char *, path, struct statfs *, buf)
- LSS_INLINE _syscall2(int, fstatfs, int, fd, struct statfs *, buf)
+
+ LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
+ /* On x86-64, the kernel does not know how to return from
+ * a signal handler. Instead, it relies on user space to provide a
+ * restorer function that calls the rt_sigreturn() system call.
+ * Unfortunately, we cannot just reference the glibc version of this
+ * function, as glibc goes out of its way to make it inaccessible.
+ */
+ void (*res)(void);
+ __asm__ __volatile__("call 2f\n"
+ "0:.align 16\n"
+ "1:movq %1,%%rax\n"
+ "syscall\n"
+ "2:popq %0\n"
+ "addq $(1b-0b),%0\n"
+ : "=a" (res)
+ : "i" (__NR_rt_sigreturn));
+ return res;
+ }
#elif defined(__ARM_ARCH_3__)
/* Most definitions of _syscallX() neglect to mark "memory" as being
* clobbered. This causes problems with compilers, that do a better job
@@ -1058,7 +1627,7 @@ struct statfs64;
}
LSS_RETURN(int, __res);
}
- #elif defined(mips)
+ #elif defined(__mips__)
#undef LSS_REG
#define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) = \
(unsigned long)(a)
@@ -1166,14 +1735,14 @@ struct statfs64;
".set reorder\n" \
: "=&r"(__v0), "+r" (__r7) \
: "i" (__NR_##name), "r"(__r4), "r"(__r5), \
- "r"(__r6), "m" ((unsigned long)arg5), \
- "m" ((unsigned long)arg6) \
+ "r"(__r6), "r" ((unsigned long)arg5), \
+ "r" ((unsigned long)arg6) \
: "$8", "$9", "$10", "$11", "$12", \
"$13", "$14", "$15", "$24", "memory"); \
LSS_RETURN(type, __v0, __r7); \
}
#else
- #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
+ #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
type5,arg5,type6,arg6) \
type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
type5 arg5,type6 arg6) { \
@@ -1278,6 +1847,189 @@ struct statfs64;
}
LSS_RETURN(int, __v0, __r7);
}
+ #elif defined (__PPC__)
+ #undef LSS_LOADARGS_0
+ #define LSS_LOADARGS_0(name, dummy...) \
+ __sc_0 = __NR_##name
+ #undef LSS_LOADARGS_1
+ #define LSS_LOADARGS_1(name, arg1) \
+ LSS_LOADARGS_0(name); \
+ __sc_3 = (unsigned long) (arg1)
+ #undef LSS_LOADARGS_2
+ #define LSS_LOADARGS_2(name, arg1, arg2) \
+ LSS_LOADARGS_1(name, arg1); \
+ __sc_4 = (unsigned long) (arg2)
+ #undef LSS_LOADARGS_3
+ #define LSS_LOADARGS_3(name, arg1, arg2, arg3) \
+ LSS_LOADARGS_2(name, arg1, arg2); \
+ __sc_5 = (unsigned long) (arg3)
+ #undef LSS_LOADARGS_4
+ #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4) \
+ LSS_LOADARGS_3(name, arg1, arg2, arg3); \
+ __sc_6 = (unsigned long) (arg4)
+ #undef LSS_LOADARGS_5
+ #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5) \
+ LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4); \
+ __sc_7 = (unsigned long) (arg5)
+ #undef LSS_LOADARGS_6
+ #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6) \
+ LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5); \
+ __sc_8 = (unsigned long) (arg6)
+ #undef LSS_ASMINPUT_0
+ #define LSS_ASMINPUT_0 "0" (__sc_0)
+ #undef LSS_ASMINPUT_1
+ #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3)
+ #undef LSS_ASMINPUT_2
+ #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4)
+ #undef LSS_ASMINPUT_3
+ #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5)
+ #undef LSS_ASMINPUT_4
+ #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6)
+ #undef LSS_ASMINPUT_5
+ #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7)
+ #undef LSS_ASMINPUT_6
+ #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8)
+ #undef LSS_BODY
+ #define LSS_BODY(nr, type, name, args...) \
+ long __sc_ret, __sc_err; \
+ { \
+ register unsigned long __sc_0 __asm__ ("r0"); \
+ register unsigned long __sc_3 __asm__ ("r3"); \
+ register unsigned long __sc_4 __asm__ ("r4"); \
+ register unsigned long __sc_5 __asm__ ("r5"); \
+ register unsigned long __sc_6 __asm__ ("r6"); \
+ register unsigned long __sc_7 __asm__ ("r7"); \
+ register unsigned long __sc_8 __asm__ ("r8"); \
+ \
+ LSS_LOADARGS_##nr(name, args); \
+ __asm__ __volatile__ \
+ ("sc\n\t" \
+ "mfcr %0" \
+ : "=&r" (__sc_0), \
+ "=&r" (__sc_3), "=&r" (__sc_4), \
+ "=&r" (__sc_5), "=&r" (__sc_6), \
+ "=&r" (__sc_7), "=&r" (__sc_8) \
+ : LSS_ASMINPUT_##nr \
+ : "cr0", "ctr", "memory", \
+ "r9", "r10", "r11", "r12"); \
+ __sc_ret = __sc_3; \
+ __sc_err = __sc_0; \
+ } \
+ LSS_RETURN(type, __sc_ret, __sc_err)
+ #undef _syscall0
+ #define _syscall0(type, name) \
+ type LSS_NAME(name)(void) { \
+ LSS_BODY(0, type, name); \
+ }
+ #undef _syscall1
+ #define _syscall1(type, name, type1, arg1) \
+ type LSS_NAME(name)(type1 arg1) { \
+ LSS_BODY(1, type, name, arg1); \
+ }
+ #undef _syscall2
+ #define _syscall2(type, name, type1, arg1, type2, arg2) \
+ type LSS_NAME(name)(type1 arg1, type2 arg2) { \
+ LSS_BODY(2, type, name, arg1, arg2); \
+ }
+ #undef _syscall3
+ #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \
+ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \
+ LSS_BODY(3, type, name, arg1, arg2, arg3); \
+ }
+ #undef _syscall4
+ #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \
+ type4, arg4) \
+ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \
+ LSS_BODY(4, type, name, arg1, arg2, arg3, arg4); \
+ }
+ #undef _syscall5
+ #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \
+ type4, arg4, type5, arg5) \
+ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
+ type5 arg5) { \
+ LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5); \
+ }
+ #undef _syscall6
+ #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \
+ type4, arg4, type5, arg5, type6, arg6) \
+ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
+ type5 arg5, type6 arg6) { \
+ LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6); \
+ }
+ /* clone function adapted from glibc 2.3.6 clone.S */
+ /* TODO(csilvers): consider wrapping some args up in a struct, like we
+ * do for i386's _syscall6, so we can compile successfully on gcc 2.95
+ */
+ LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+ int flags, void *arg, int *parent_tidptr,
+ void *newtls, int *child_tidptr) {
+ long __ret, __err;
+ {
+ register int (*__fn)(void *) __asm__ ("r8") = fn;
+ register void *__cstack __asm__ ("r4") = child_stack;
+ register int __flags __asm__ ("r3") = flags;
+ register void * __arg __asm__ ("r9") = arg;
+ register int * __ptidptr __asm__ ("r5") = parent_tidptr;
+ register void * __newtls __asm__ ("r6") = newtls;
+ register int * __ctidptr __asm__ ("r7") = child_tidptr;
+ __asm__ __volatile__(
+ /* check for fn == NULL
+ * and child_stack == NULL
+ */
+ "cmpwi cr0, %6, 0\n\t"
+ "cmpwi cr1, %7, 0\n\t"
+ "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t"
+ "beq- cr0, 1f\n\t"
+
+ /* set up stack frame for child */
+ "clrrwi %7, %7, 4\n\t"
+ "li 0, 0\n\t"
+ "stwu 0, -16(%7)\n\t"
+
+ /* fn, arg, child_stack are saved across the syscall: r28-30 */
+ "mr 28, %6\n\t"
+ "mr 29, %7\n\t"
+ "mr 27, %9\n\t"
+
+ /* syscall */
+ "li 0, %4\n\t"
+ /* flags already in r3
+ * child_stack already in r4
+ * ptidptr already in r5
+ * newtls already in r6
+ * ctidptr already in r7
+ */
+ "sc\n\t"
+
+ /* Test if syscall was successful */
+ "cmpwi cr1, 3, 0\n\t"
+ "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t"
+ "bne- cr1, 1f\n\t"
+
+ /* Do the function call */
+ "mtctr 28\n\t"
+ "mr 3, 27\n\t"
+ "bctrl\n\t"
+
+ /* Call _exit(r3) */
+ "li 0, %5\n\t"
+ "sc\n\t"
+
+ /* Return to parent */
+ "1:\n"
+ "mfcr %1\n\t"
+ "mr %0, 3\n\t"
+ : "=r" (__ret), "=r" (__err)
+ : "0" (-1), "1" (EINVAL),
+ "i" (__NR_clone), "i" (__NR_exit),
+ "r" (__fn), "r" (__cstack), "r" (__flags),
+ "r" (__arg), "r" (__ptidptr), "r" (__newtls),
+ "r" (__ctidptr)
+ : "cr0", "cr1", "memory", "ctr",
+ "r0", "r29", "r27", "r28");
+ }
+ LSS_RETURN(int, __ret, __err);
+ }
#endif
#define __NR__exit __NR_exit
#define __NR__gettid __NR_gettid
@@ -1294,13 +2046,16 @@ struct statfs64;
int, c, long, a)
LSS_INLINE _syscall0(pid_t, fork)
LSS_INLINE _syscall2(int, fstat, int, f,
- struct stat*, b)
+ struct kernel_stat*, b)
+ LSS_INLINE _syscall2(int, fstatfs, int, f,
+ struct kernel_statfs*, b)
LSS_INLINE _syscall4(int, futex, int*, a,
- int, o, int, v, struct timespec *, t)
+ int, o, int, v,
+ struct kernel_timespec*, t)
LSS_INLINE _syscall3(int, getdents, int, f,
- struct dirent*, d, int, c)
+ struct kernel_dirent*, d, int, c)
LSS_INLINE _syscall3(int, getdents64, int, f,
- struct dirent64*, d, int, c)
+ struct kernel_dirent64*, d, int, c)
LSS_INLINE _syscall0(gid_t, getegid)
LSS_INLINE _syscall0(uid_t, geteuid)
LSS_INLINE _syscall0(pid_t, getpgrp)
@@ -1309,7 +2064,7 @@ struct statfs64;
LSS_INLINE _syscall2(int, getpriority, int, a,
int, b)
LSS_INLINE _syscall2(int, getrlimit, int, r,
- struct rlimit*, l)
+ struct kernel_rlimit*, l)
LSS_INLINE _syscall1(pid_t, getsid, pid_t, p)
LSS_INLINE _syscall0(pid_t, _gettid)
LSS_INLINE _syscall5(int, setxattr, const char *,p,
@@ -1336,7 +2091,7 @@ struct statfs64;
unsigned long, f, void *, a)
LSS_INLINE _syscall3(int, open, const char*, p,
int, f, int, m)
- LSS_INLINE _syscall3(int, poll, struct pollfd*, u,
+ LSS_INLINE _syscall3(int, poll, struct kernel_pollfd*, u,
unsigned int, n, int, t)
LSS_INLINE _syscall2(int, prctl, int, o,
long, a)
@@ -1346,6 +2101,16 @@ struct statfs64;
void *, b, size_t, c)
LSS_INLINE _syscall3(int, readlink, const char*, p,
char*, b, size_t, s)
+ LSS_INLINE _syscall4(int, rt_sigaction, int, s,
+ const struct kernel_sigaction*, a,
+ struct kernel_sigaction*, o, size_t, c)
+ LSS_INLINE _syscall2(int, rt_sigpending, struct kernel_sigset_t *, s,
+ size_t, c)
+ LSS_INLINE _syscall4(int, rt_sigprocmask, int, h,
+ const struct kernel_sigset_t*, s,
+ struct kernel_sigset_t*, o, size_t, c);
+ LSS_INLINE _syscall2(int, rt_sigsuspend,
+ const struct kernel_sigset_t*, s, size_t, c);
LSS_INLINE _syscall3(int, sched_getaffinity,pid_t, p,
unsigned int, l, unsigned long *, m)
LSS_INLINE _syscall3(int, sched_setaffinity,pid_t, p,
@@ -1363,26 +2128,28 @@ struct statfs64;
LSS_INLINE _syscall3(int, setresuid, uid_t, r,
uid_t, e, uid_t, s)
LSS_INLINE _syscall2(int, setrlimit, int, r,
- const struct rlimit*, l)
+ const struct kernel_rlimit*, l)
LSS_INLINE _syscall0(pid_t, setsid)
LSS_INLINE _syscall2(int, sigaltstack, const stack_t*, s,
const stack_t*, o)
LSS_INLINE _syscall2(int, stat, const char*, f,
- struct stat*, b)
+ struct kernel_stat*, b)
+ LSS_INLINE _syscall2(int, statfs, const char*, f,
+ struct kernel_statfs*, b)
LSS_INLINE _syscall3(ssize_t, write, int, f,
const void *, b, size_t, c)
LSS_INLINE _syscall3(ssize_t, writev, int, f,
- const struct iovec *, v, size_t, c)
+ const struct kernel_iovec*, v, size_t, c)
#if defined(__x86_64__) || \
- (defined(mips) && _MIPS_SIM != _MIPS_SIM_ABI32)
+ (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
LSS_INLINE _syscall3(int, recvmsg, int, s,
- struct msghdr*, m, int, f)
+ struct kernel_msghdr*, m, int, f)
LSS_INLINE _syscall3(int, sendmsg, int, s,
- const struct msghdr*, m, int, f)
+ const struct kernel_msghdr*, m, int, f)
LSS_INLINE _syscall6(int, sendto, int, s,
const void*, m, size_t, l,
int, f,
- const struct sockaddr*, a, int, t)
+ const struct kernel_sockaddr*, a, int, t)
LSS_INLINE _syscall2(int, shutdown, int, s,
int, h)
LSS_INLINE _syscall3(int, socket, int, d,
@@ -1397,14 +2164,7 @@ struct statfs64;
__off64_t, o)
LSS_INLINE _syscall4(int, newfstatat, int, d,
const char *, p,
- struct stat *, b, int, f)
- LSS_INLINE _syscall4(int, rt_sigaction, int, s,
- const struct sigaction*, a,
- struct sigaction*, o, int, c)
- LSS_INLINE _syscall2(int, rt_sigpending, sigset_t*, s,
- int, c)
- LSS_INLINE _syscall4(int, rt_sigprocmask, int, h,
- const sigset_t*, s, sigset_t*, o, int, c);
+ struct kernel_stat*, b, int, f)
LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) {
return LSS_NAME(setfsgid)(gid);
@@ -1423,25 +2183,42 @@ struct statfs64;
}
LSS_INLINE int LSS_NAME(sigaction)(int signum,
- const struct sigaction *act,
- struct sigaction *oldact) {
- return LSS_NAME(rt_sigaction)(signum, act, oldact, (_NSIG+6)/8);
+ const struct kernel_sigaction *act,
+ struct kernel_sigaction *oldact) {
+ /* On x86_64, the kernel requires us to always set our own
+ * SA_RESTORER in order to be able to return from a signal handler.
+ * This function must have a "magic" signature that the "gdb"
+ * (and maybe the kernel?) can recognize.
+ */
+ struct kernel_sigaction a;
+ if (act != NULL) {
+ a = *act;
+ a.sa_flags |= SA_RESTORER;
+ a.sa_restorer = LSS_NAME(restore_rt)();
+ }
+ return LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact,
+ (KERNEL_NSIG+7)/8);
+ }
+
+ LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) {
+ return LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8);
}
- LSS_INLINE int LSS_NAME(sigpending)(sigset_t *set) {
- return LSS_NAME(rt_sigpending)(set, (_NSIG+6)/8);
+ LSS_INLINE int LSS_NAME(sigprocmask)(int how,
+ const struct kernel_sigset_t *set,
+ struct kernel_sigset_t *oldset) {
+ return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
}
- LSS_INLINE int LSS_NAME(sigprocmask)(int how, const sigset_t *set,
- sigset_t *oldset) {
- return LSS_NAME(rt_sigprocmask)(how, set, oldset, (_NSIG+6)/8);
+ LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) {
+ return LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8);
}
#endif
#if defined(__x86_64__) || defined(__ARM_ARCH_3__) || \
- (defined(mips) && _MIPS_SIM != _MIPS_SIM_ABI32)
+ (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
LSS_INLINE _syscall4(pid_t, wait4, pid_t, p,
int*, s, int, o,
- struct rusage*, r)
+ struct kernel_rusage*, r)
LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){
return LSS_NAME(wait4)(pid, status, options, 0);
@@ -1456,7 +2233,8 @@ struct statfs64;
#define __NR__setfsuid32 __NR_setfsuid32
#define __NR__setresgid32 __NR_setresgid32
#define __NR__setresuid32 __NR_setresuid32
- LSS_INLINE _syscall2(int, ugetrlimit, int, r,struct rlimit*,l)
+ LSS_INLINE _syscall2(int, ugetrlimit, int, r,
+ struct kernel_rlimit*, l)
LSS_INLINE _syscall1(int, _setfsgid32, gid_t, f)
LSS_INLINE _syscall1(int, _setfsuid32, uid_t, f)
LSS_INLINE _syscall3(int, _setresgid32, gid_t, r,
@@ -1520,9 +2298,59 @@ struct statfs64;
return rc;
}
#endif
+ LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) {
+ memset(&set->sig, 0, sizeof(set->sig));
+ return 0;
+ }
+
+ LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) {
+ memset(&set->sig, -1, sizeof(set->sig));
+ return 0;
+ }
+
+ LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set,
+ int signum) {
+ if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
+ LSS_ERRNO = EINVAL;
+ return -1;
+ } else {
+ set->sig[(signum - 1)/(8*sizeof(set->sig[0]))]
+ |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0])));
+ return 0;
+ }
+ }
+
+ LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set,
+ int signum) {
+ if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
+ LSS_ERRNO = EINVAL;
+ return -1;
+ } else {
+ set->sig[(signum - 1)/(8*sizeof(set->sig[0]))]
+ &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0]))));
+ return 0;
+ }
+ }
+
+ LSS_INLINE int LSS_NAME(sigismember)(struct kernel_sigset_t *set,
+ int signum) {
+ if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
+ LSS_ERRNO = EINVAL;
+ return -1;
+ } else {
+ return !!(set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] &
+ (1UL << ((signum - 1) % (8*sizeof(set->sig[0])))));
+ }
+ }
#if defined(__i386__) || defined(__ARM_ARCH_3__) || \
- (defined(mips) && _MIPS_SIM == _MIPS_SIM_ABI32)
+ (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__PPC__)
+ #define __NR__sigaction __NR_sigaction
+ #define __NR__sigpending __NR_sigpending
+ #define __NR__sigprocmask __NR_sigprocmask
+ #define __NR__sigsuspend __NR_sigsuspend
#define __NR__socketcall __NR_socketcall
+ LSS_INLINE _syscall2(int, fstat64, int, f,
+ struct kernel_stat64 *, b)
LSS_INLINE _syscall5(int, _llseek, uint, fd, ulong, hi, ulong, lo,
loff_t *, res, uint, wh)
LSS_INLINE _syscall1(void*, mmap, void*, a)
@@ -1530,11 +2358,216 @@ struct statfs64;
size_t, l, int, p,
int, f, int, d,
__off64_t, o)
- LSS_INLINE _syscall3(int, sigaction, int, s,
- const struct sigaction*, a, struct sigaction*, o)
- LSS_INLINE _syscall1(int, sigpending, sigset_t*, s)
- LSS_INLINE _syscall3(int, sigprocmask, int, h,
- const sigset_t*, s, sigset_t*, o)
+ LSS_INLINE _syscall3(int, _sigaction, int, s,
+ const struct kernel_old_sigaction*, a,
+ struct kernel_old_sigaction*, o)
+ LSS_INLINE _syscall1(int, _sigpending, unsigned long*, s)
+ LSS_INLINE _syscall3(int, _sigprocmask, int, h,
+ const unsigned long*, s,
+ unsigned long*, o)
+ #ifdef __PPC__
+ LSS_INLINE _syscall1(int, _sigsuspend, unsigned long, s)
+ #else
+ LSS_INLINE _syscall3(int, _sigsuspend, const void*, a,
+ int, b,
+ unsigned long, s)
+ #endif
+ LSS_INLINE _syscall2(int, stat64, const char *, p,
+ struct kernel_stat64 *, b)
+
+ LSS_INLINE int LSS_NAME(sigaction)(int signum,
+ const struct kernel_sigaction *act,
+ struct kernel_sigaction *oldact) {
+ int old_errno = LSS_ERRNO;
+ int rc;
+ struct kernel_sigaction a;
+ if (act != NULL) {
+ a = *act;
+ #ifdef __i386__
+ /* On i386, the kernel requires us to always set our own
+ * SA_RESTORER when using realtime signals. Otherwise, it does not
+ * know how to return from a signal handler. This function must have
+ * a "magic" signature that the "gdb" (and maybe the kernel?) can
+ * recognize.
+ * Apparently, a SA_RESTORER is implicitly set by the kernel, when
+ * using non-realtime signals.
+ *
+ * TODO: Test whether ARM needs a restorer
+ */
+ a.sa_flags |= SA_RESTORER;
+ a.sa_restorer = LSS_NAME(restore_rt)();
+ #endif
+ }
+ rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact,
+ (KERNEL_NSIG+7)/8);
+ if (rc < 0 && LSS_ERRNO == ENOSYS) {
+ struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa;
+ if (!act) {
+ ptr_a = NULL;
+ } else {
+ oa.sa_handler_ = act->sa_handler_;
+ memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask));
+ #ifndef __mips__
+ oa.sa_restorer = act->sa_restorer;
+ #endif
+ oa.sa_flags = act->sa_flags;
+ }
+ if (!oldact) {
+ ptr_oa = NULL;
+ }
+ LSS_ERRNO = old_errno;
+ rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa);
+ if (rc == 0 && oldact) {
+ if (act) {
+ memcpy(oldact, act, sizeof(*act));
+ } else {
+ memset(oldact, 0, sizeof(*oldact));
+ }
+ oldact->sa_handler_ = ptr_oa->sa_handler_;
+ oldact->sa_flags = ptr_oa->sa_flags;
+ memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask));
+ #ifndef __mips__
+ oldact->sa_restorer = ptr_oa->sa_restorer;
+ #endif
+ }
+ }
+ return rc;
+ }
+
+ LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) {
+ int old_errno = LSS_ERRNO;
+ int rc = LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8);
+ if (rc < 0 && LSS_ERRNO == ENOSYS) {
+ LSS_ERRNO = old_errno;
+ LSS_NAME(sigemptyset)(set);
+ rc = LSS_NAME(_sigpending)(&set->sig[0]);
+ }
+ return rc;
+ }
+
+ LSS_INLINE int LSS_NAME(sigprocmask)(int how,
+ const struct kernel_sigset_t *set,
+ struct kernel_sigset_t *oldset) {
+ int olderrno = LSS_ERRNO;
+ int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
+ if (rc < 0 && LSS_ERRNO == ENOSYS) {
+ LSS_ERRNO = olderrno;
+ if (oldset) {
+ LSS_NAME(sigemptyset)(oldset);
+ }
+ rc = LSS_NAME(_sigprocmask)(how,
+ set ? &set->sig[0] : NULL,
+ oldset ? &oldset->sig[0] : NULL);
+ }
+ return rc;
+ }
+
+ LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) {
+ int olderrno = LSS_ERRNO;
+ int rc = LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8);
+ if (rc < 0 && LSS_ERRNO == ENOSYS) {
+ LSS_ERRNO = olderrno;
+ rc = LSS_NAME(_sigsuspend)(
+ #ifndef __PPC__
+ set, 0,
+ #endif
+ set->sig[0]);
+ }
+ return rc;
+ }
+ #endif
+ #if defined(__PPC__)
+ #undef LSS_SC_LOADARGS_0
+ #define LSS_SC_LOADARGS_0(dummy...)
+ #undef LSS_SC_LOADARGS_1
+ #define LSS_SC_LOADARGS_1(arg1) \
+ __sc_4 = (unsigned long) (arg1)
+ #undef LSS_SC_LOADARGS_2
+ #define LSS_SC_LOADARGS_2(arg1, arg2) \
+ LSS_SC_LOADARGS_1(arg1); \
+ __sc_5 = (unsigned long) (arg2)
+ #undef LSS_SC_LOADARGS_3
+ #define LSS_SC_LOADARGS_3(arg1, arg2, arg3) \
+ LSS_SC_LOADARGS_2(arg1, arg2); \
+ __sc_6 = (unsigned long) (arg3)
+ #undef LSS_SC_LOADARGS_4
+ #define LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4) \
+ LSS_SC_LOADARGS_3(arg1, arg2, arg3); \
+ __sc_7 = (unsigned long) (arg4)
+ #undef LSS_SC_LOADARGS_5
+ #define LSS_SC_LOADARGS_5(arg1, arg2, arg3, arg4, arg5) \
+ LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4); \
+ __sc_8 = (unsigned long) (arg5)
+ #undef LSS_SC_BODY
+ #define LSS_SC_BODY(nr, type, opt, args...) \
+ long __sc_ret, __sc_err; \
+ { \
+ register unsigned long __sc_0 __asm__ ("r0") = __NR_socketcall; \
+ register unsigned long __sc_3 __asm__ ("r3") = opt; \
+ register unsigned long __sc_4 __asm__ ("r4"); \
+ register unsigned long __sc_5 __asm__ ("r5"); \
+ register unsigned long __sc_6 __asm__ ("r6"); \
+ register unsigned long __sc_7 __asm__ ("r7"); \
+ register unsigned long __sc_8 __asm__ ("r8"); \
+ LSS_SC_LOADARGS_##nr(args); \
+ __asm__ __volatile__ \
+ ("stwu 1, -48(1)\n\t" \
+ "stw 4, 20(1)\n\t" \
+ "stw 5, 24(1)\n\t" \
+ "stw 6, 28(1)\n\t" \
+ "stw 7, 32(1)\n\t" \
+ "stw 8, 36(1)\n\t" \
+ "addi 4, 1, 20\n\t" \
+ "sc\n\t" \
+ "mfcr %0" \
+ : "=&r" (__sc_0), \
+ "=&r" (__sc_3), "=&r" (__sc_4), \
+ "=&r" (__sc_5), "=&r" (__sc_6), \
+ "=&r" (__sc_7), "=&r" (__sc_8) \
+ : LSS_ASMINPUT_##nr \
+ : "cr0", "ctr", "memory"); \
+ __sc_ret = __sc_3; \
+ __sc_err = __sc_0; \
+ } \
+ LSS_RETURN(type, __sc_ret, __sc_err)
+
+ LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg,
+ int flags){
+ LSS_SC_BODY(3, ssize_t, 17, s, msg, flags);
+ }
+
+ LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s,
+ const struct kernel_msghdr *msg,
+ int flags) {
+ LSS_SC_BODY(3, ssize_t, 16, s, msg, flags);
+ }
+
+ // TODO(csilvers): why is this ifdef'ed out?
+#if 0
+ LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len,
+ int flags,
+ const struct kernel_sockaddr *to,
+ unsigned int tolen) {
+ LSS_BODY(6, ssize_t, 11, s, buf, len, flags, to, tolen);
+ }
+#endif
+
+ LSS_INLINE int LSS_NAME(shutdown)(int s, int how) {
+ LSS_SC_BODY(2, int, 13, s, how);
+ }
+
+ LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) {
+ LSS_SC_BODY(3, int, 1, domain, type, protocol);
+ }
+
+ LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol,
+ int sv[2]) {
+ LSS_SC_BODY(4, int, 8, d, type, protocol, sv);
+ }
+ #endif
+ #if defined(__i386__) || defined(__ARM_ARCH_3__) || \
+ (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
+ #define __NR__socketcall __NR_socketcall
LSS_INLINE _syscall2(int, _socketcall, int, c,
va_list, a)
@@ -1547,17 +2580,20 @@ struct statfs64;
return rc;
}
- LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct msghdr*msg,int flags){
+ LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg,
+ int flags){
return (ssize_t)LSS_NAME(socketcall)(17, s, msg, flags);
}
- LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s, const struct msghdr *msg,
+ LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s,
+ const struct kernel_msghdr *msg,
int flags) {
return (ssize_t)LSS_NAME(socketcall)(16, s, msg, flags);
}
LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len,
- int flags, const struct sockaddr*to,
+ int flags,
+ const struct kernel_sockaddr *to,
unsigned int tolen) {
return (ssize_t)LSS_NAME(socketcall)(11, s, buf, len, flags, to, tolen);
}
@@ -1575,17 +2611,17 @@ struct statfs64;
return LSS_NAME(socketcall)(8, d, type, protocol, sv);
}
#endif
- #if defined(__i386__)
+ #if defined(__i386__) || defined(__PPC__)
LSS_INLINE _syscall4(int, fstatat64, int, d,
const char *, p,
- struct stat64 *, b, int, f)
+ struct kernel_stat64 *, b, int, f)
#endif
- #if defined(__i386__) || \
- (defined(mips) && _MIPS_SIM == _MIPS_SIM_ABI32)
+ #if defined(__i386__) || defined(__PPC__) || \
+ (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
LSS_INLINE _syscall3(pid_t, waitpid, pid_t, p,
int*, s, int, o)
#endif
- #if defined(mips)
+ #if defined(__mips__)
/* sys_pipe() on MIPS has non-standard calling conventions, as it returns
* both file handles through CPU registers.
*/
@@ -1610,17 +2646,27 @@ struct statfs64;
#else
LSS_INLINE _syscall1(int, pipe, int *, p)
#endif
+ /* TODO(csilvers): see if ppc can/should support this as well */
#if defined(__i386__) || defined(__ARM_ARCH_3__) || \
- (defined(mips) && _MIPS_SIM != _MIPS_SIM_ABI64)
- LSS_INLINE _syscall2(int, statfs64, const char*, p,
- struct statfs64 *, b)
- LSS_INLINE _syscall2(int, fstatfs64, int, f,
- struct statfs64 *, b)
+ (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64)
+ #define __NR__statfs64 __NR_statfs64
+ #define __NR__fstatfs64 __NR_fstatfs64
+ LSS_INLINE _syscall3(int, _statfs64, const char*, p,
+ size_t, s,struct kernel_statfs64*, b)
+ LSS_INLINE _syscall3(int, _fstatfs64, int, f,
+ size_t, s,struct kernel_statfs64*, b)
+ LSS_INLINE int LSS_NAME(statfs64)(const char *p,
+ struct kernel_statfs64 *b) {
+ return LSS_NAME(_statfs64)(p, sizeof(*b), b);
+ }
+ LSS_INLINE int LSS_NAME(fstatfs64)(int f,struct kernel_statfs64 *b) {
+ return LSS_NAME(_fstatfs64)(f, sizeof(*b), b);
+ }
#endif
- LSS_INLINE int LSS_NAME(execv)(const char *path, const char * const argv[]) {
+ LSS_INLINE int LSS_NAME(execv)(const char *path, const char *const argv[]) {
extern char **environ;
- return LSS_NAME(execve)(path, argv, (const char * const *)environ);
+ return LSS_NAME(execve)(path, argv, (const char *const *)environ);
}
LSS_INLINE pid_t LSS_NAME(gettid)() {
@@ -1671,9 +2717,9 @@ struct statfs64;
extern int __getpagesize(void);
switch (name) {
case _SC_OPEN_MAX: {
- unsigned long limit[2];
- return LSS_NAME(getrlimit)(RLIMIT_NOFILE, (struct rlimit *)limit)<0
- ? 8192 : limit[0];
+ struct kernel_rlimit limit;
+ return LSS_NAME(getrlimit)(RLIMIT_NOFILE, &limit) < 0
+ ? 8192 : limit.rlim_cur;
}
case _SC_PAGESIZE:
return __getpagesize();
@@ -1682,29 +2728,46 @@ struct statfs64;
return -1;
}
}
-
#if defined(__x86_64__) || \
- (defined(mips) && _MIPS_SIM == _MIPS_SIM_ABI64)
- LSS_INLINE _syscall3(int, readahead, int, fd, loff_t, offset,
- unsigned, len)
- #elif defined(mips) || defined(__ARM_ARCH_3__)
- LSS_INLINE _syscall4(int, _readahead32, int, fd, unsigned, offset_lo,
- unsigned, offset_hi, unsigned, len);
- LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t offset, int len) {
- return LSS_NAME(_readahead32)(fd,
- #if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN)) || \
- (defined(__LITTLE_ENDIAN__))
- (unsigned)offset, (unsigned)(offset >> 32),
- #elif (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || \
- (defined(__BIG_ENDIAN__))
- (unsigned)(offset >> 32), (unsigned)offset,
- #else
- #error "unknown endinness, don't know how to pass syscall arguments"
- #endif
- (unsigned)len);
+ (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI64)
+ LSS_INLINE _syscall4(ssize_t, pread64, int, f,
+ void *, b, size_t, c, loff_t, o)
+ LSS_INLINE _syscall4(ssize_t, pwrite64, int, f,
+ const void *, b, size_t, c, loff_t, o)
+ LSS_INLINE _syscall3(int, readahead, int, f,
+ loff_t, o, unsigned, c)
+ #else
+ #define __NR__pread64 __NR_pread64
+ #define __NR__pwrite64 __NR_pwrite64
+ #define __NR__readahead __NR_readahead
+ LSS_INLINE _syscall5(ssize_t, _pread64, int, f,
+ void *, b, size_t, c, unsigned, o1,
+ unsigned, o2)
+ LSS_INLINE _syscall5(ssize_t, _pwrite64, int, f,
+ const void *, b, size_t, c, unsigned, o1,
+ long, o2)
+ LSS_INLINE _syscall4(int, _readahead, int, f,
+ unsigned, o1, unsigned, o2, size_t, c);
+ /* We force 64bit-wide parameters onto the stack, then access each
+ * 32-bit component individually. This guarantees that we build the
+ * correct parameters independent of the native byte-order of the
+ * underlying architecture.
+ */
+ LSS_INLINE ssize_t LSS_NAME(pread64)(int fd, void *buf, size_t count,
+ loff_t off) {
+ union { loff_t off; unsigned arg[2]; } o = { off };
+ return LSS_NAME(_pread64)(fd, buf, count, o.arg[0], o.arg[1]);
+ }
+ LSS_INLINE ssize_t LSS_NAME(pwrite64)(int fd, const void *buf,
+ size_t count, loff_t off) {
+ union { loff_t off; unsigned arg[2]; } o = { off };
+ return LSS_NAME(_pwrite64)(fd, buf, count, o.arg[0], o.arg[1]);
+ }
+ LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t off, int len) {
+ union { loff_t off; unsigned arg[2]; } o = { off };
+ return LSS_NAME(_readahead)(fd, o.arg[0], o.arg[1], len);
}
#endif
-
#endif
#if defined(__cplusplus) && !defined(SYS_CPLUSPLUS)
diff --git a/src/base/linuxthreads.c b/src/base/linuxthreads.c
index 2f9d547..e67ac26 100644
--- a/src/base/linuxthreads.c
+++ b/src/base/linuxthreads.c
@@ -249,15 +249,15 @@ struct ListerParams {
static void ListerThread(struct ListerParams *args) {
- int found_parent = 0;
- pid_t clone_pid = sys_gettid(), ppid = sys_getppid();
- char proc_self_task[80], marker_name[48], *marker_path;
- const char *proc_paths[3];
- const char *const *proc_path = proc_paths;
- int proc = -1, marker = -1, num_threads = 0;
- int max_threads = 0, sig;
- struct stat marker_sb, proc_sb;
- stack_t altstack;
+ int found_parent = 0;
+ pid_t clone_pid = sys_gettid(), ppid = sys_getppid();
+ char proc_self_task[80], marker_name[48], *marker_path;
+ const char *proc_paths[3];
+ const char *const *proc_path = proc_paths;
+ int proc = -1, marker = -1, num_threads = 0;
+ int max_threads = 0, sig;
+ struct kernel_stat marker_sb, proc_sb;
+ stack_t altstack;
/* Create "marker" that we can use to detect threads sharing the same
* address space and the same file handles. By setting the FD_CLOEXEC flag
@@ -312,12 +312,12 @@ static void ListerThread(struct ListerParams *args) {
sig_marker = marker;
sig_proc = -1;
for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) {
- struct sigaction sa;
+ struct kernel_sigaction sa;
memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = SignalHandler;
- sigfillset(&sa.sa_mask);
- sa.sa_flags = SA_ONSTACK|SA_SIGINFO|SA_RESETHAND;
- sys_sigaction(sync_signals[sig], &sa, (struct sigaction *)NULL);
+ sa.sa_sigaction_ = SignalHandler;
+ sys_sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_ONSTACK|SA_SIGINFO|SA_RESETHAND;
+ sys_sigaction(sync_signals[sig], &sa, (struct kernel_sigaction *)NULL);
}
/* Read process directories in /proc/... */
@@ -356,9 +356,9 @@ static void ListerThread(struct ListerParams *args) {
sig_num_threads = num_threads;
sig_pids = pids;
for (;;) {
- struct dirent *entry;
+ struct kernel_dirent *entry;
char buf[4096];
- ssize_t nbytes = sys_getdents(proc, (struct dirent *)buf,
+ ssize_t nbytes = sys_getdents(proc, (struct kernel_dirent *)buf,
sizeof(buf));
if (nbytes < 0)
goto failure;
@@ -375,9 +375,9 @@ static void ListerThread(struct ListerParams *args) {
}
break;
}
- for (entry = (struct dirent *)buf;
- entry < (struct dirent *)&buf[nbytes];
- entry = (struct dirent *)((char *)entry + entry->d_reclen)) {
+ for (entry = (struct kernel_dirent *)buf;
+ entry < (struct kernel_dirent *)&buf[nbytes];
+ entry = (struct kernel_dirent *)((char *)entry+entry->d_reclen)) {
if (entry->d_ino != 0) {
const char *ptr = entry->d_name;
pid_t pid;
@@ -395,7 +395,7 @@ static void ListerThread(struct ListerParams *args) {
/* Attach (and suspend) all threads */
if (pid && pid != clone_pid) {
- struct stat tmp_sb;
+ struct kernel_stat tmp_sb;
char fname[entry->d_reclen + 48];
strcat(strcat(strcpy(fname, "/proc/"),
entry->d_name), marker_path);
@@ -536,11 +536,11 @@ static void ListerThread(struct ListerParams *args) {
*/
int ListAllProcessThreads(void *parameter,
ListAllProcessThreadsCallBack callback, ...) {
- char altstack_mem[ALT_STACKSIZE];
- struct ListerParams args;
- pid_t clone_pid;
- int dumpable = 1, sig;
- sigset_t sig_blocked, sig_old;
+ char altstack_mem[ALT_STACKSIZE];
+ struct ListerParams args;
+ pid_t clone_pid;
+ int dumpable = 1, sig;
+ struct kernel_sigset_t sig_blocked, sig_old;
va_start(args.ap, callback);
@@ -573,9 +573,9 @@ int ListAllProcessThreads(void *parameter,
/* Before cloning the thread lister, block all asynchronous signals, as we */
/* are not prepared to handle them. */
- sigfillset(&sig_blocked);
+ sys_sigfillset(&sig_blocked);
for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) {
- sigdelset(&sig_blocked, sync_signals[sig]);
+ sys_sigdelset(&sig_blocked, sync_signals[sig]);
}
if (sys_sigprocmask(SIG_BLOCK, &sig_blocked, &sig_old)) {
args.err = errno;
@@ -606,7 +606,7 @@ int ListAllProcessThreads(void *parameter,
clone_pid = local_clone((int (*)(void *))ListerThread, &args);
clone_errno = errno;
- sys0_sigprocmask(SIG_SETMASK, &sig_old, &sig_old);
+ sys_sigprocmask(SIG_SETMASK, &sig_old, &sig_old);
if (clone_pid >= 0) {
int status, rc;
diff --git a/src/base/linuxthreads.h b/src/base/linuxthreads.h
index 829fd79..5c318fe 100644
--- a/src/base/linuxthreads.h
+++ b/src/base/linuxthreads.h
@@ -41,7 +41,7 @@
* related platforms should not be difficult.
*/
#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \
- defined(mips)) && defined(__linux)
+ defined(__mips__) || defined(__PPC__)) && defined(__linux)
/* Define the THREADS symbol to make sure that there is exactly one core dumper
* built into the library.
diff --git a/src/base/logging.h b/src/base/logging.h
index 95035b3..3ecfea2 100644
--- a/src/base/logging.h
+++ b/src/base/logging.h
@@ -160,7 +160,7 @@ enum { DEBUG_MODE = 1 };
#ifdef ERROR
#undef ERROR // may conflict with ERROR macro on windows
#endif
-enum {INFO = -1, WARNING = -2, ERROR = -3, FATAL = -4};
+enum LogSeverity {INFO = -1, WARNING = -2, ERROR = -3, FATAL = -4};
// NOTE: we add a newline to the end of the output if it's not there already
inline void LogPrintf(int severity, const char* pat, va_list ap) {
diff --git a/src/base/mutex.h b/src/base/simple_mutex.h
index 6eeb995..2474b57 100644
--- a/src/base/mutex.h
+++ b/src/base/simple_mutex.h
@@ -46,8 +46,8 @@
* mode.
*/
-#ifndef GOOGLE_MUTEX_H__
-#define GOOGLE_MUTEX_H__
+#ifndef GOOGLE_SIMPLE_MUTEX_H__
+#define GOOGLE_SIMPLE_MUTEX_H__
#include "config.h" // to figure out pthreads support
@@ -114,19 +114,24 @@ class Mutex {
// Now the implementation of Mutex for various systems
#if defined(NO_THREADS)
-// In debug mode, we'll assert some invariants: we don't unlock if we
-// didn't lock first, the lock is not held when Lock() is called
-// (since we're not re-entrant), etc. In non-debug mode, we do
-// nothing, for efficiency. That's why we do everything in an assert.
+// When we don't have threads, we can be either reading or writing,
+// but not both. We can have lots of readers at once (in no-threads
+// mode, that's most likely to happen in recursive function calls),
+// but only one writer. We represent this by having mutex_ be -1 when
+// writing and a number > 0 when reading (and 0 when no lock is held).
+//
+// In debug mode, we assert these invariants, while in non-debug mode
+// we do nothing, for efficiency. That's why everything is in an
+// assert.
#include <assert.h>
-Mutex::Mutex() : mutex_(0) { } // mutex_ counts number of current Lock()s
+Mutex::Mutex() : mutex_(0) { }
Mutex::~Mutex() { assert(mutex_ == 0); }
-void Mutex::Lock() { assert(mutex_++ == 0); }
-void Mutex::Unlock() { assert(mutex_-- == 1); }
+void Mutex::Lock() { assert(--mutex_ == -1); }
+void Mutex::Unlock() { assert(mutex_++ == -1); }
bool Mutex::TryLock() { if (mutex_) return false; Lock(); return true; }
-void Mutex::ReaderLock() { Lock(); }
-void Mutex::ReaderUnlock() { Unlock(); }
+void Mutex::ReaderLock() { assert(++mutex_ > 0); }
+void Mutex::ReaderUnlock() { assert(mutex_-- > 0); }
#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
@@ -212,4 +217,4 @@ class WriterMutexLock {
#define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name)
#define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name)
-#endif /* #define GOOGLE_MUTEX_H__ */
+#endif /* #define GOOGLE_SIMPLE_MUTEX_H__ */
diff --git a/src/base/spinlock.cc b/src/base/spinlock.cc
index 12761b1..2e601eb 100644
--- a/src/base/spinlock.cc
+++ b/src/base/spinlock.cc
@@ -37,61 +37,19 @@
#ifdef HAVE_UNISTD_H
#include <unistd.h> /* For nanosleep() on Windows, read() */
#endif
-#if defined(__MACH__) && defined(__APPLE__)
-#include <sys/types.h>
-#include <sys/sysctl.h> /* how we figure out numcpu's on OS X */
-#endif
#include <fcntl.h> /* for open(), O_RDONLY */
#include <string.h> /* for strncmp */
#include <errno.h>
#include "base/spinlock.h"
+#include "base/sysinfo.h" /* for NumCPUs() */
-static int adaptive_spin_count = 0;
-static int num_cpus = -1;
-
-// It's important this not call malloc! -- it is called at global-construct
-// time, before we've set up all our proper malloc hooks and such.
-static int NumCPUs() {
- if (num_cpus > 0)
- return num_cpus; // value is cached
+// We can do contention-profiling of SpinLocks, but the code is in
+// mutex.cc, which is not always linked in with spinlock. Hence we
+// provide this weak definition, which is used if mutex.cc isn't linked in.
+ATTRIBUTE_WEAK extern void SubmitSpinLockProfileData(const void *, int64);
+void SubmitSpinLockProfileData(const void *, int64) {}
-#if defined(__MACH__) && defined(__APPLE__)
- int cpus;
- size_t size = sizeof(cpus);
- int numcpus_name[] = { CTL_HW, HW_NCPU };
- if (::sysctl(numcpus_name, arraysize(numcpus_name), &cpus, &size, 0, 0) == 0
- && (size == sizeof(cpus)))
- num_cpus = cpus;
- else
- num_cpus = 1; // conservative assumption
- return num_cpus;
-#else /* hope the information is in /proc */
- const char* pname = "/proc/cpuinfo";
- int fd = open(pname, O_RDONLY);
- if (fd == -1) {
- num_cpus = 1; // conservative assumption; TODO: do better
- return num_cpus;
- }
- char line[1024];
- int chars_read;
- num_cpus = 0;
- do { // a line at a time
- chars_read = 0;
- char* linepos = line;
- while (linepos - line < sizeof(line)-1 &&
- (chars_read=read(fd, linepos, 1)) == 1 &&
- *linepos != '\n') // read one line
- linepos++;
- *linepos = '\0'; // terminate the line at the \n
- if (strncmp(line, "processor ", sizeof("processor ")-1) == 0)
- num_cpus++;
- } while (chars_read > 0); // stop when we get to EOF
- close(fd);
- if (num_cpus == 0)
- num_cpus = 1; // conservative assumption
- return num_cpus;
-#endif
-}
+static int adaptive_spin_count = 0;
struct SpinLock_InitHelper {
SpinLock_InitHelper() {
diff --git a/src/base/spinlock.h b/src/base/spinlock.h
index b3c9596..0ad8587 100644
--- a/src/base/spinlock.h
+++ b/src/base/spinlock.h
@@ -71,8 +71,24 @@ class SpinLock {
}
}
+ inline bool TryLock() {
+ return (Acquire_CompareAndSwap(&lockword_, 0, 1) == 0);
+ }
+
inline void Unlock() {
+ // This is defined in mutex.cc.
+ extern void SubmitSpinLockProfileData(const void *, int64);
+
+ int64 wait_timestamp = static_cast<uint32>(lockword_);
+
Release_Store(&lockword_, 0);
+ // Collect contention profile info if this lock was contended.
+ // The lockword_ value indicates when the waiter started waiting
+ if (wait_timestamp != 1) {
+ // Subtract one from wait_timestamp as antidote to "now |= 1;"
+ // in SlowLock().
+ SubmitSpinLockProfileData(this, wait_timestamp - 1);
+ }
}
// Report if we think the lock can be held by this thread.
@@ -83,6 +99,17 @@ class SpinLock {
return lockword_ != 0;
}
+ // The timestamp for contention lock profiling must fit into 31 bits.
+ // as lockword_ is 32 bits and we loose an additional low-order bit due
+ // to the statement "now |= 1" in SlowLock().
+ // To select 31 bits from the 64-bit cycle counter, we shift right by
+ // PROFILE_TIMESTAMP_SHIFT = 7.
+ // Using these 31 bits, we reduce granularity of time measurement to
+ // 256 cycles, and will loose track of wait time for waits greater than
+ // 109 seconds on a 5 GHz machine, longer for faster clock cycles.
+ // Waits this long should be very rare.
+ enum { PROFILE_TIMESTAMP_SHIFT = 7 };
+
private:
// Lock-state: 0 means unlocked, 1 means locked
volatile AtomicWord lockword_;
diff --git a/src/base/sysinfo.cc b/src/base/sysinfo.cc
index 2d7a593..df0452a 100644
--- a/src/base/sysinfo.cc
+++ b/src/base/sysinfo.cc
@@ -42,12 +42,20 @@
#if defined __MACH__ // Mac OS X, almost certainly
#include <mach-o/dyld.h> // for iterating over dll's in ProcMapsIter
#include <mach-o/loader.h> // for iterating over dll's in ProcMapsIter
+#include <sys/types.h>
+#include <sys/sysctl.h> // how we figure out numcpu's on OS X
+#elif defined __FreeBSD__
+#include <sys/sysctl.h>
#elif defined __sun__ // Solaris
#include <procfs.h> // for, e.g., prmap_t
+#elif defined _MSC_VER // Windows
+#include <process.h> // for getpid() (actually, _getpid())
+#include <shlwapi.h> // for SHGetValueA()
#endif
#include "base/sysinfo.h"
#include "base/commandlineflags.h"
#include "base/logging.h"
+#include "base/cycleclock.h"
#if defined(WIN32) && defined(MODULEENTRY32)
// In a change from the usual W-A pattern, there is no A variant of
@@ -81,6 +89,12 @@
# define safeclose(fd) close(fd)
#endif
+// ----------------------------------------------------------------------
+// GetenvBeforeMain()
+// GetUniquePathFromEnv()
+// Some non-trivial getenv-related functions.
+// ----------------------------------------------------------------------
+
const char* GetenvBeforeMain(const char* name) {
// static is ok because this function should only be called before
// main(), when we're single-threaded.
@@ -111,6 +125,244 @@ const char* GetenvBeforeMain(const char* name) {
return NULL; // env var never found
}
+// This takes as an argument an environment-variable name (like
+// CPUPROFILE) whose value is supposed to be a file-path, and sets
+// path to that path, and returns true. If the env var doesn't exist,
+// or is the empty string, leave path unchanged and returns false.
+// The reason this is non-trivial is that this function handles munged
+// pathnames. Here's why:
+//
+// If we're a child process of the 'main' process, we can't just use
+// getenv("CPUPROFILE") -- the parent process will be using that path.
+// Instead we append our pid to the pathname. How do we tell if we're a
+// child process? Ideally we'd set an environment variable that all
+// our children would inherit. But -- and this is seemingly a bug in
+// gcc -- if you do a setenv() in a shared libarary in a global
+// constructor, the environment setting is lost by the time main() is
+// called. The only safe thing we can do in such a situation is to
+// modify the existing envvar. So we do a hack: in the parent, we set
+// the high bit of the 1st char of CPUPROFILE. In the child, we
+// notice the high bit is set and append the pid(). This works
+// assuming cpuprofile filenames don't normally have the high bit set
+// in their first character! If that assumption is violated, we'll
+// still get a profile, but one with an unexpected name.
+// TODO(csilvers): set an envvar instead when we can do it reliably.
+bool GetUniquePathFromEnv(const char* env_name, char* path) {
+ char* envval = getenv(env_name);
+ if (envval == NULL || *envval == '\0')
+ return false;
+ if (envval[0] & 128) { // high bit is set
+ snprintf(path, PATH_MAX, "%c%s_%u", // add pid and clear high bit
+ envval[0] & 127, envval+1, (unsigned int)(getpid()));
+ } else {
+ snprintf(path, PATH_MAX, "%s", envval);
+ envval[0] |= 128; // set high bit for kids to see
+ }
+ return true;
+}
+
+// ----------------------------------------------------------------------
+// CyclesPerSecond()
+// NumCPUs()
+// It's important this not call malloc! -- they may be called at
+// global-construct time, before we've set up all our proper malloc
+// hooks and such.
+// ----------------------------------------------------------------------
+
+static double cpuinfo_cycles_per_second = 1.0; // 0.0 might be dangerous
+static int cpuinfo_num_cpus = 1; // Conservative guess
+
+static void SleepForMilliseconds(int milliseconds) {
+#ifdef WIN32
+ _sleep(milliseconds); // Windows's _sleep takes milliseconds argument
+#else
+ // Sleep for a few milliseconds
+ struct timespec sleep_time;
+ sleep_time.tv_sec = milliseconds / 1000;
+ sleep_time.tv_nsec = (milliseconds % 1000) * 1000000;
+ while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
+ ; // Ignore signals and wait for the full interval to elapse.
+#endif
+}
+
+// Helper function estimates cycles/sec by observing cycles elapsed during
+// sleep(). Using small sleep time decreases accuracy significantly.
+static int64 EstimateCyclesPerSecond(const int estimate_time_ms) {
+ assert(estimate_time_ms > 0);
+ if (estimate_time_ms <= 0)
+ return 1;
+ double multiplier = 1000.0 / (double)estimate_time_ms; // scale by this much
+
+ const int64 start_ticks = CycleClock::Now();
+ SleepForMilliseconds(estimate_time_ms);
+ const int64 guess = int64(multiplier * (CycleClock::Now() - start_ticks));
+ return guess;
+}
+
+// WARNING: logging calls back to InitializeSystemInfo() so it must
+// not invoke any logging code. Also, InitializeSystemInfo() can be
+// called before main() -- in fact it *must* be since already_called
+// isn't protected -- before malloc hooks are properly set up, so
+// we make an effort not to call any routines which might allocate
+// memory.
+
+static void InitializeSystemInfo() {
+ static bool already_called = false; // safe if we run before threads
+ if (already_called) return;
+ already_called = true;
+
+ // I put in a never-called reference to EstimateCyclesPerSecond() here
+ // to silence the compiler for OS's that don't need it
+ if (0) EstimateCyclesPerSecond(0);
+
+#if defined __linux__
+ char line[1024];
+ char* err;
+
+ // If CPU scaling is in effect, we want to use the *maximum* frequency,
+ // not whatever CPU speed some random processor happens to be using now.
+ bool saw_mhz = false;
+ const char* pname0 = "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq";
+ int fd0 = open(pname0, O_RDONLY);
+ if (fd0 != -1) {
+ memset(line, '\0', sizeof(line));
+ read(fd0, line, sizeof(line));
+ const int max_freq = strtol(line, &err, 10);
+ if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
+ // The value is in kHz. For example, on a 2GHz machine, the file
+ // contains the value "2000000". Historically this file contained no
+ // newline, but at some point the kernel started appending a newline.
+ cpuinfo_cycles_per_second = max_freq * 1000.0;
+ saw_mhz = true;
+ }
+ close(fd0);
+ }
+
+ // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq.
+ const char* pname = "/proc/cpuinfo";
+ int fd = open(pname, O_RDONLY);
+ if (fd == -1) {
+ perror(pname);
+ cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
+ return; // TODO: use generic tester instead?
+ }
+
+ double bogo_clock = 1.0;
+ int num_cpus = 0;
+ line[0] = line[1] = '\0';
+ int chars_read = 0;
+ do { // we'll exit when the last read didn't read anything
+ // Move the next line to the beginning of the buffer
+ const int oldlinelen = strlen(line);
+ if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line
+ line[0] = '\0';
+ else // still other lines left to save
+ memmove(line, line + oldlinelen+1, sizeof(line) - (oldlinelen+1));
+ // Terminate the new line, reading more if we can't find the newline
+ char* newline = strchr(line, '\n');
+ if (newline == NULL) {
+ const int linelen = strlen(line);
+ const int bytes_to_read = sizeof(line)-1 - linelen;
+ assert(bytes_to_read > 0); // because the memmove recovered >=1 bytes
+ chars_read = read(fd, line + linelen, bytes_to_read);
+ line[linelen + chars_read] = '\0';
+ newline = strchr(line, '\n');
+ }
+ if (newline != NULL)
+ *newline = '\0';
+
+ if (!saw_mhz && strncmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) {
+ const char* freqstr = strchr(line, ':');
+ if (freqstr) {
+ cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0;
+ if (freqstr[1] != '\0' && *err == '\0')
+ saw_mhz = true;
+ }
+ } else if (strncmp(line, "bogomips", sizeof("bogomips")-1) == 0) {
+ const char* freqstr = strchr(line, ':');
+ if (freqstr)
+ bogo_clock = strtod(freqstr+1, &err) * 1000000.0;
+ if (freqstr == NULL || freqstr[1] == '\0' || *err != '\0')
+ bogo_clock = 1.0;
+ } else if (strncmp(line, "processor", sizeof("processor")-1) == 0) {
+ num_cpus++; // count up every time we see an "processor :" entry
+ }
+ } while (chars_read > 0);
+ close(fd);
+
+ if (!saw_mhz) {
+ // If we didn't find anything better, we'll use bogomips, but
+ // we're not happy about it.
+ cpuinfo_cycles_per_second = bogo_clock;
+ }
+ if (cpuinfo_cycles_per_second == 0.0) {
+ cpuinfo_cycles_per_second = 1.0; // maybe unnecessary, but safe
+ }
+ if (num_cpus > 0) {
+ cpuinfo_num_cpus = num_cpus;
+ }
+
+#elif defined __FreeBSD__
+ // For this sysctl to work, the machine must be configured without
+ // SMP, APIC, or APM support.
+ unsigned int hz = 0;
+ size_t sz = sizeof(hz);
+ const char *sysctl_path = "machdep.tsc_freq";
+ if ( sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0 ) {
+ cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
+ fprintf(stderr, "Unable to determine clock rate from sysctl: %s\n",
+ sysctl_path);
+ } else {
+ cpuinfo_cycles_per_second = hz;
+ }
+ // TODO(csilvers): also figure out cpuinfo_num_cpus
+
+#elif defined WIN32
+# pragma comment(lib, "shlwapi.lib") // for SHGetValue()
+ // In NT, read MHz from the registry. If we fail to do so or we're in win9x
+ // then make a crude estimate.
+ OSVERSIONINFO os;
+ os.dwOSVersionInfoSize = sizeof(os);
+ DWORD data, data_size = sizeof(data);
+ if (GetVersionEx(&os) &&
+ os.dwPlatformId == VER_PLATFORM_WIN32_NT &&
+ SUCCEEDED(SHGetValueA(HKEY_LOCAL_MACHINE,
+ "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
+ "~MHz", NULL, &data, &data_size)))
+ cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz
+ else
+ cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500?
+ // TODO(csilvers): also figure out cpuinfo_num_cpus
+
+#elif defined(__MACH__) && defined(__APPLE__)
+ // TODO(csilvers): can we do better than this?
+ cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
+
+ int num_cpus = 0;
+ size_t size = sizeof(num_cpus);
+ int numcpus_name[] = { CTL_HW, HW_NCPU };
+ if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, 0, 0)
+ == 0
+ && (size == sizeof(num_cpus)))
+ cpuinfo_num_cpus = num_cpus;
+
+#else
+ // Generic cycles per second counter
+ cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
+#endif
+}
+
+double CyclesPerSecond(void) {
+ InitializeSystemInfo();
+ return cpuinfo_cycles_per_second;
+}
+
+int NumCPUs(void) {
+ InitializeSystemInfo();
+ return cpuinfo_num_cpus;
+}
+
+// ----------------------------------------------------------------------
#if defined __linux__ || defined __FreeBSD__ || defined __sun__
static void ConstructFilename(const char* spec, pid_t pid,
diff --git a/src/base/sysinfo.h b/src/base/sysinfo.h
index 35f31ad..e22736c 100644
--- a/src/base/sysinfo.h
+++ b/src/base/sysinfo.h
@@ -55,7 +55,18 @@
// Note that /proc only has the environment at the time the application was
// started, so this routine ignores setenv() calls/etc. Also note it only
// reads the first 16K of the environment.
-const char* GetenvBeforeMain(const char* name);
+extern const char* GetenvBeforeMain(const char* name);
+
+// This takes as an argument an environment-variable name (like
+// CPUPROFILE) whose value is supposed to be a file-path, and sets
+// path to that path, and returns true. Non-trivial for surprising
+// reasons, as documented in sysinfo.cc. path must have space PATH_MAX.
+extern bool GetUniquePathFromEnv(const char* env_name, char* path);
+
+extern int NumCPUs();
+
+// processor cycles per second of each processor
+extern double CyclesPerSecond(void);
// A ProcMapsIterator abstracts access to /proc/maps for a given
diff --git a/src/google/heap-checker.h b/src/google/heap-checker.h
index 4cd077a..3ed6587 100644
--- a/src/google/heap-checker.h
+++ b/src/google/heap-checker.h
@@ -258,18 +258,18 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker {
static bool HaveDisabledChecksUp(int stack_frames);
static bool HaveDisabledChecksAt(const void* address);
- // Ignore an object located at 'ptr'
- // (as well as all heap objects (transitively) referenced from it)
+ // Ignore an object located at 'ptr' (can go at the start or into the object)
+ // as well as all heap objects (transitively) referenced from it
// for the purposes of heap leak checking.
// If 'ptr' does not point to an active allocated object
// at the time of this call, it is ignored;
// but if it does, the object must not get deleted from the heap later on;
- // it must also be not already ignored at the time of this call.
+ // it must also be not already ignored at the time of this call.
static void IgnoreObject(const void* ptr);
// Undo what an earlier IgnoreObject() call promised and asked to do.
- // At the time of this call 'ptr' must point to an active allocated object
- // which was previously registered with IgnoreObject().
+ // At the time of this call 'ptr' must point at or inside of an active
+ // allocated object which was previously registered with IgnoreObject().
static void UnIgnoreObject(const void* ptr);
public: // Initializations; to be called from main() only.
@@ -303,8 +303,6 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker {
bool DoNoLeaksOnce(CheckType check_type,
CheckFullness fullness,
ReportMode report_mode);
- // Helper for IgnoreObject
- static void IgnoreObjectLocked(const void* ptr);
// Helper for DisableChecksAt
static void DisableChecksAtLocked(const void* address);
// Helper for DisableChecksIn
@@ -345,8 +343,6 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker {
// in a debug message to describe what kind of live object sources
// are being used.
static void IgnoreLiveObjectsLocked(const char* name, const char* name2);
- // Heap profile object filtering callback to filter out live objects.
- static bool HeapProfileFilter(const void* ptr, size_t size);
// Runs REGISTER_HEAPCHECK_CLEANUP cleanups and potentially
// calls DoMainHeapCheck
static void RunHeapCleanups();
@@ -373,12 +369,13 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker {
uintptr_t start_address,
uintptr_t end_address);
- // Return true iff "*ptr" points to a heap object;
- // we also fill *object_size for this object then.
- // If yes, we might move "*ptr" to point to the very start of the object
- // (this needs to happen for C++ class array allocations
- // and for basic_string-s of C++ library that comes with gcc 3.4).
- static bool HaveOnHeapLocked(const void** ptr, size_t* object_size);
+ // Return true iff "*ptr" points to a heap object
+ // ("*ptr" can point at the start or inside of a heap object
+ // so that this works e.g. for pointers to C++ arrays, C++ strings,
+ // multiple-inherited objects, or pointers to members).
+ // We also fill *object_size for this object then
+ // and we move "*ptr" to point to the very start of the heap object.
+ static inline bool HaveOnHeapLocked(const void** ptr, size_t* object_size);
private:
diff --git a/src/google/profiler.h b/src/google/profiler.h
index 3c4106b..24ef2f3 100644
--- a/src/google/profiler.h
+++ b/src/google/profiler.h
@@ -121,27 +121,4 @@ struct ProfilerState {
};
extern "C" PERFTOOLS_DLL_DECL void ProfilerGetCurrentState(ProfilerState* state);
-// ------------------------- ProfilerThreadState -----------------------
-// DEPRECATED: this class is no longer needed.
-//
-// A small helper class that allows a thread to periodically check if
-// profiling has been enabled or disabled, and to react appropriately
-// to ensure that activity in the current thread is included in the
-// profile. Usage:
-//
-// ProfileThreadState profile_state;
-// while (true) {
-// ... do some thread work ...
-// profile_state.ThreadCheck();
-// }
-class ProfilerThreadState {
- public:
- ProfilerThreadState() { }
-
- // Called in a thread to enable or disable profiling on the thread
- // based on whether profiling is currently on or off.
- // DEPRECATED: No longer needed
- void ThreadCheck() { }
-};
-
#endif /* BASE_PROFILER_H__ */
diff --git a/src/heap-checker.cc b/src/heap-checker.cc
index 7af372d..7b3376b 100644
--- a/src/heap-checker.cc
+++ b/src/heap-checker.cc
@@ -94,6 +94,12 @@ using std::max;
using std::less;
using std::char_traits;
+// This is the default if you don't link in -lprofiler
+extern "C" {
+ATTRIBUTE_WEAK PERFTOOLS_DLL_DECL bool ProfilingIsEnabledForAllThreads();
+bool ProfilingIsEnabledForAllThreads() { return false; }
+}
+
//----------------------------------------------------------------------
// Flags that control heap-checking
//----------------------------------------------------------------------
@@ -147,11 +153,37 @@ DEFINE_bool(heap_check_test_pointer_alignment,
"Set to true to check if the found leak can be due to "
"use of unaligned pointers");
+// A reasonable default to handle pointers inside of typical class objects:
+// Too low and we won't be able to traverse pointers to normally-used
+// nested objects and base parts of multiple-inherited objects.
+// Too high and it will both slow down leak checking (FindInsideAlloc
+// in HaveOnHeapLocked will get slower when there are large on-heap objects)
+// and make it probabilistically more likely to miss leaks
+// of large-sized objects.
+static const int64 kHeapCheckMaxPointerOffset = 1024;
+DEFINE_int64(heap_check_max_pointer_offset,
+ EnvToInt("HEAP_CHECK_MAX_POINTER_OFFSET",
+ kHeapCheckMaxPointerOffset),
+ "Largest pointer offset for which we traverse "
+ "pointers going inside of heap allocated objects. "
+ "Set to -1 to use the actual largest heap object size.");
+
DEFINE_bool(heap_check_run_under_gdb,
EnvToBool("HEAP_CHECK_RUN_UNDER_GDB", false),
"If false, turns off heap-checking library when running under gdb "
"(normally, set to 'true' only when debugging the heap-checker)");
+DEFINE_int32(heap_check_delay_seconds, 0,
+ "Number of seconds to delay on-exit heap checking."
+ " If you set this flag,"
+ " you may also want to set exit_timeout_seconds in order to"
+ " avoid exit timeouts.\n"
+ "NOTE: This flag is to be used only to help diagnose issues"
+ " where it is suspected that the heap checker is reporting"
+ " false leaks that will disappear if the heap checker delays"
+ " its checks. Report any such issues to the heap-checker"
+ " maintainer(s).");
+
//----------------------------------------------------------------------
DEFINE_string(heap_profile_pprof,
@@ -197,6 +229,24 @@ static pid_t heap_checker_pid = 0;
static bool constructor_heap_profiling = false;
//----------------------------------------------------------------------
+
+// Alignment at which all pointers in memory are supposed to be located;
+// use 1 if any alignment is ok.
+// heap_check_test_pointer_alignment flag guides if we try the value of 1.
+// The larger it can be, the lesser is the chance of missing real leaks.
+static const size_t kPointerSourceAlignment = sizeof(void*);
+
+// Alignment at which all pointers (in)to heap memory objects
+// are supposed to point; use 1 if any alignment is ok.
+// sizeof(void*) is good enough for all the cases we want to support
+// -- see PointsIntoHeapObject
+// The larger it can be, the lesser is the chance of missing real leaks.
+static const size_t kPointerDestAlignment = sizeof(uint32);
+ // need sizeof(uint32) even for 64 bit binaries to support
+ // e.g. the internal structure of UnicodeString in ICU.
+static const size_t kPointerDestAlignmentMask = kPointerDestAlignment - 1;
+
+//----------------------------------------------------------------------
// HeapLeakChecker's own memory allocator that is
// independent of the normal program allocator.
//----------------------------------------------------------------------
@@ -309,12 +359,6 @@ typedef map<HCL_string, LiveObjectsStack, less<HCL_string>,
> LibraryLiveObjectsStacks;
static LibraryLiveObjectsStacks* library_live_objects = NULL;
-// Objects to be removed from the heap profile when we dump it.
-typedef set<const void*, less<const void*>,
- STL_Allocator<const void*, HeapLeakChecker::Allocator>
- > ProfileAdjustObjectSet;
-static ProfileAdjustObjectSet* profile_adjust_objects = NULL;
-
// The disabled program counter addresses for profile dumping
// that are registered with HeapLeakChecker::DisableChecksUp
typedef set<uintptr_t, less<uintptr_t>,
@@ -361,16 +405,25 @@ static GlobalRegionCallerRangeMap* global_region_caller_ranges = NULL;
//----------------------------------------------------------------------
-// Simple hook into execution of global object constructors,
-// so that we do not call pthread_self() when it does not yet work.
-static bool libpthread_initialized = false;
-static bool initializer = (libpthread_initialized = true, true);
+// The size of the largest heap object allocated so far.
+static size_t max_heap_object_size = 0;
+// The possible range of addresses that can point
+// into one of the elements of heap_objects.
+static uintptr_t min_heap_address = uintptr_t(-1LL);
+static uintptr_t max_heap_address = 0;
+
+//----------------------------------------------------------------------
// Our hooks for MallocHook
static void NewHook(const void* ptr, size_t size) {
if (ptr != NULL) {
RAW_VLOG(7, "Recording Alloc: %p of %"PRIuS, ptr, size);
heap_checker_lock.Lock();
+ if (size > max_heap_object_size) max_heap_object_size = size;
+ uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
+ if (addr < min_heap_address) min_heap_address = addr;
+ addr += size;
+ if (addr > max_heap_address) max_heap_address = addr;
heap_profile->RecordAlloc(ptr, size, 0);
heap_checker_lock.Unlock();
RAW_VLOG(8, "Alloc Recorded: %p of %"PRIuS"", ptr, size);
@@ -570,7 +623,6 @@ static void MakeDisabledLiveCallback(const void* ptr,
// If the region is not writeable, then it cannot have any heap
// pointers in it, otherwise we record it as a candidate live region
// to get filtered later.
-
static void RecordGlobalDataLocked(uintptr_t start_address,
uintptr_t end_address,
const char* permissions,
@@ -716,9 +768,10 @@ HeapLeakChecker::ProcMapsResult HeapLeakChecker::UseProcMapsLocked(
return PROC_MAPS_USED;
}
-// Total number and size of live objects dropped from the profile.
-static int64 live_objects_total = 0;
-static int64 live_bytes_total = 0;
+// Total number and size of live objects dropped from the profile;
+// (re)initialized in IgnoreAllLiveObjectsLocked.
+static int64 live_objects_total;
+static int64 live_bytes_total;
// pid of the thread that is doing the current leak check
// (protected by our lock; IgnoreAllLiveObjectsLocked sets it)
@@ -755,7 +808,7 @@ static enum {
int HeapLeakChecker::IgnoreLiveThreads(void* parameter,
int num_threads,
pid_t* thread_pids,
- va_list ap) {
+ va_list /*ap*/) {
thread_listing_status = CALLBACK_STARTED;
RAW_VLOG(2, "Found %d threads (from pid %d)", num_threads, getpid());
@@ -841,7 +894,7 @@ void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() {
// we do this liveness check for ignored_objects before doing any
// live heap walking to make sure it does not fail needlessly:
size_t object_size;
- if (!(HaveOnHeapLocked(&ptr, &object_size) &&
+ if (!(heap_profile->FindAlloc(ptr, &object_size) &&
object->second == object_size)) {
RAW_LOG(FATAL, "Object at %p of %"PRIuS" bytes from an"
" IgnoreObject() has disappeared", ptr, object->second);
@@ -957,11 +1010,44 @@ void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() {
}
}
+// Callback for ListAllProcessThreads in IgnoreAllLiveObjectsLocked below
+// to test/verify that we have just the one main thread, in which case
+// we can do everything in that main thread,
+// so that CPU profiler can collect all its samples.
+// Returns the number of threads in the process.
+static int IsOneThread(void* parameter, int num_threads,
+ pid_t* thread_pids, va_list ap) {
+ if (num_threads != 1) {
+ RAW_LOG(WARNING, "Have threads: Won't CPU-profile the bulk of "
+ "leak checking work happening in IgnoreLiveThreads!");
+ }
+ ResumeAllProcessThreads(num_threads, thread_pids);
+ return num_threads;
+}
+
+// Dummy for IgnoreAllLiveObjectsLocked below.
+// Making it global helps with compiler warnings.
+static va_list dummy_ap;
+
void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) {
RAW_CHECK(live_objects == NULL, "");
live_objects = new (Allocator::Allocate(sizeof(LiveObjectsStack)))
LiveObjectsStack;
stack_tops = new (Allocator::Allocate(sizeof(StackTopSet))) StackTopSet;
+ // reset the counts
+ live_objects_total = 0;
+ live_bytes_total = 0;
+ // Reduce max_heap_object_size to FLAGS_heap_check_max_pointer_offset
+ // for the time of leak check.
+ // FLAGS_heap_check_max_pointer_offset caps max_heap_object_size
+ // to manage reasonably low chances of random bytes
+ // appearing to be pointing into large actually leaked heap objects.
+ const size_t old_max_heap_object_size = max_heap_object_size;
+ max_heap_object_size = (
+ FLAGS_heap_check_max_pointer_offset != -1
+ ? min(size_t(FLAGS_heap_check_max_pointer_offset), max_heap_object_size)
+ : max_heap_object_size
+ );
// Record global data as live:
if (FLAGS_heap_check_ignore_global_live) {
library_live_objects =
@@ -974,13 +1060,27 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) {
self_thread_pid = getpid();
self_thread_stack_top = self_stack_top;
if (FLAGS_heap_check_ignore_thread_live) {
- // We fully suspend the threads right here before any liveness checking
+ // In case we are doing CPU profiling we'd like to do all the work
+ // in the main thread, not in the special thread created by
+ // ListAllProcessThreads, so that CPU profiler can collect all its samples.
+ // The machinery of ListAllProcessThreads conflicts with the CPU profiler
+ // by also relying on signals and ::sigaction.
+ // We can do this (run everything in the main thread) safely
+ // only if there's just the main thread itself in our process.
+ // This variable reflects these two conditions:
+ bool want_and_can_run_in_main_thread =
+ ProfilingIsEnabledForAllThreads() &&
+ ListAllProcessThreads(NULL, IsOneThread) == 1;
+ // When the normal path of ListAllProcessThreads below is taken,
+ // we fully suspend the threads right here before any liveness checking
// and keep them suspended for the whole time of liveness checking
// inside of the IgnoreLiveThreads callback.
// (The threads can't (de)allocate due to lock on the delete hook but
// if not suspended they could still mess with the pointer
// graph while we walk it).
- int r = ListAllProcessThreads(NULL, IgnoreLiveThreads);
+ int r = want_and_can_run_in_main_thread
+ ? IgnoreLiveThreads(NULL, 1, &self_thread_pid, dummy_ap)
+ : ListAllProcessThreads(NULL, IgnoreLiveThreads);
need_to_ignore_non_thread_objects = r < 0;
if (r < 0) {
RAW_LOG(WARNING, "Thread finding failed with %d errno=%d", r, errno);
@@ -1021,69 +1121,116 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) {
// Free these: we made them here and heap_profile never saw them
Allocator::DeleteAndNull(&live_objects);
Allocator::DeleteAndNull(&stack_tops);
+ max_heap_object_size = old_max_heap_object_size; // reset this var
}
// Alignment at which we should consider pointer positions
// in IgnoreLiveObjectsLocked. Use 1 if any alignment is ok.
-static size_t pointer_alignment = sizeof(void*);
-// Global lock for HeapLeakChecker::DoNoLeaks to protect pointer_alignment.
+static size_t pointer_source_alignment = kPointerSourceAlignment;
+// Global lock for HeapLeakChecker::DoNoLeaks
+// to protect pointer_source_alignment.
static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED);
-// This function does not change heap_profile's state:
-// we only record live objects to be skipped into profile_adjust_objects
-// instead of modifying the heap_profile itself.
+static const size_t kUnicodeStringOffset = sizeof(uint32);
+static const size_t kUnicodeStringAlignmentMask = kUnicodeStringOffset - 1;
+
+// This function changes the live bits in the heap_profile-table's state:
+// we only record the live objects to be skipped.
+//
+// When checking if a byte sequence points to a heap object we use
+// HeapProfileTable::FindInsideAlloc to handle both pointers to
+// the start and inside of heap-allocated objects.
+// The "inside" case needs to be checked to support
+// at least the following relatively common cases:
+// - C++ arrays allocated with new FooClass[size] for classes
+// with destructors have their size recorded in a sizeof(int) field
+// before the place normal pointers point to.
+// - basic_string<>-s for e.g. the C++ library of gcc 3.4
+// have the meta-info in basic_string<...>::_Rep recorded
+// before the place normal pointers point to.
+// - Multiple-inherited objects have their pointers when cast to
+// different base classes pointing inside of the actually
+// allocated object.
+// - Sometimes reachability pointers point to member objects of heap objects,
+// and then those member objects point to the full heap object.
+// - Third party UnicodeString: it stores a 32-bit refcount
+// (in both 32-bit and 64-bit binaries) as the first uint32
+// in the allocated memory and a normal pointer points at
+// the second uint32 behind the refcount.
+// By finding these additional objects here
+// we slightly increase the chance to mistake random memory bytes
+// for a pointer and miss a leak in a particular run of a binary.
void HeapLeakChecker::IgnoreLiveObjectsLocked(const char* name,
const char* name2) {
int64 live_object_count = 0;
int64 live_byte_count = 0;
while (!live_objects->empty()) {
- const void* object = live_objects->back().ptr;
+ const char* object =
+ reinterpret_cast<const char*>(live_objects->back().ptr);
size_t size = live_objects->back().size;
const ObjectPlacement place = live_objects->back().place;
live_objects->pop_back();
- size_t object_size;
- if (place == MUST_BE_ON_HEAP &&
- HaveOnHeapLocked(&object, &object_size) &&
- profile_adjust_objects->insert(object).second) {
+ if (place == MUST_BE_ON_HEAP && heap_profile->MarkAsLive(object)) {
live_object_count += 1;
live_byte_count += size;
}
RAW_VLOG(4, "Looking for heap pointers in %p of %"PRIuS" bytes",
object, size);
+ const char* const whole_object = object;
+ size_t const whole_size = size;
// Try interpretting any byte sequence in object,size as a heap pointer:
const size_t remainder =
- reinterpret_cast<uintptr_t>(object) % pointer_alignment;
+ reinterpret_cast<uintptr_t>(object) % pointer_source_alignment;
if (remainder) {
- object = (reinterpret_cast<const char*>(object) +
- pointer_alignment - remainder);
- if (size >= pointer_alignment - remainder) {
- size -= pointer_alignment - remainder;
+ object += pointer_source_alignment - remainder;
+ if (size >= pointer_source_alignment - remainder) {
+ size -= pointer_source_alignment - remainder;
} else {
size = 0;
}
}
- while (size >= sizeof(void*)) {
- const void* ptr;
- memcpy(&ptr, object, sizeof(ptr)); // size-independent UNALIGNED_LOAD
- const void* current_object = object;
- object = reinterpret_cast<const char*>(object) + pointer_alignment;
- size -= pointer_alignment;
- if (ptr == NULL) continue;
- RAW_VLOG(8, "Trying pointer to %p at %p", ptr, current_object);
- size_t object_size;
- if (HaveOnHeapLocked(&ptr, &object_size) &&
- profile_adjust_objects->insert(ptr).second) {
- // We take the (hopefully low) risk here of encountering by accident
- // a byte sequence in memory that matches an address of
- // a heap object which is in fact leaked.
- // I.e. in very rare and probably not repeatable/lasting cases
- // we might miss some real heap memory leaks.
- RAW_VLOG(5, "Found pointer to %p of %"PRIuS" bytes at %p",
- ptr, object_size, current_object);
- live_object_count += 1;
- live_byte_count += object_size;
- live_objects->push_back(AllocObject(ptr, object_size, IGNORED_ON_HEAP));
+ if (size < sizeof(void*)) continue;
+ const char* const max_object = object + size - sizeof(void*);
+ while (object <= max_object) {
+ // potentially unaligned load:
+ const uintptr_t addr = *reinterpret_cast<const uintptr_t*>(object);
+ // Do fast check before the more expensive HaveOnHeapLocked lookup:
+ // this code runs for all memory words that are potentially pointers:
+ const bool can_be_on_heap =
+ // Order tests by the likelyhood of the test failing in 64/32 bit modes.
+ // Yes, this matters: we either lose 5..6% speed in 32 bit mode
+ // (which is already slower) or by a factor of 1.5..1.91 in 64 bit mode.
+#if defined(__x86_64__)
+ addr < max_heap_address &&
+ (addr & kUnicodeStringAlignmentMask) == 0 && // must be aligned
+ min_heap_address <= addr;
+#else
+ (addr & kUnicodeStringAlignmentMask) == 0 && // must be aligned
+ min_heap_address <= addr &&
+ addr < max_heap_address;
+#endif
+ if (can_be_on_heap) {
+ const void* ptr = reinterpret_cast<const void*>(addr);
+ // Too expensive (inner loop): manually uncomment when debugging:
+ // RAW_VLOG(8, "Trying pointer to %p at %p", ptr, object);
+ size_t object_size;
+ if (HaveOnHeapLocked(&ptr, &object_size) &&
+ heap_profile->MarkAsLive(ptr)) {
+ // We take the (hopefully low) risk here of encountering by accident
+ // a byte sequence in memory that matches an address of
+ // a heap object which is in fact leaked.
+ // I.e. in very rare and probably not repeatable/lasting cases
+ // we might miss some real heap memory leaks.
+ RAW_VLOG(5, "Found pointer to %p of %"PRIuS" bytes at %p "
+ "inside %p of size %"PRIuS"",
+ ptr, object_size, object, whole_object, whole_size);
+ live_object_count += 1;
+ live_byte_count += object_size;
+ live_objects->push_back(AllocObject(ptr, object_size,
+ IGNORED_ON_HEAP));
+ }
}
+ object += pointer_source_alignment;
}
}
live_objects_total += live_object_count;
@@ -1094,16 +1241,6 @@ void HeapLeakChecker::IgnoreLiveObjectsLocked(const char* name,
}
}
-bool HeapLeakChecker::HeapProfileFilter(const void* ptr, size_t size) {
- if (profile_adjust_objects->find(ptr) != profile_adjust_objects->end()) {
- RAW_VLOG(4, "Ignoring object at %p of %"PRIuS" bytes", ptr, size);
- // erase so we can later test that all adjust-objects got utilized
- profile_adjust_objects->erase(ptr);
- return true;
- }
- return false;
-}
-
//----------------------------------------------------------------------
// HeapLeakChecker leak check disabling components
//----------------------------------------------------------------------
@@ -1180,13 +1317,10 @@ void HeapLeakChecker::DisableChecksToHereFrom(const void* start_address) {
void HeapLeakChecker::IgnoreObject(const void* ptr) {
if (!heap_checker_on) return;
heap_checker_lock.Lock();
- IgnoreObjectLocked(ptr);
- heap_checker_lock.Unlock();
-}
-
-void HeapLeakChecker::IgnoreObjectLocked(const void* ptr) {
size_t object_size;
- if (HaveOnHeapLocked(&ptr, &object_size)) {
+ if (!HaveOnHeapLocked(&ptr, &object_size)) {
+ RAW_LOG(ERROR, "No live heap object at %p to ignore", ptr);
+ } else {
RAW_VLOG(1, "Going to ignore live object at %p of %"PRIuS" bytes",
ptr, object_size);
if (ignored_objects == NULL) {
@@ -1194,32 +1328,34 @@ void HeapLeakChecker::IgnoreObjectLocked(const void* ptr) {
IgnoredObjectsMap;
}
if (!ignored_objects->insert(make_pair(reinterpret_cast<uintptr_t>(ptr),
- object_size)).second) {
+ object_size)).second) {
RAW_LOG(FATAL, "Object at %p is already being ignored", ptr);
}
}
+ heap_checker_lock.Unlock();
}
void HeapLeakChecker::UnIgnoreObject(const void* ptr) {
if (!heap_checker_on) return;
heap_checker_lock.Lock();
size_t object_size;
- bool ok = HaveOnHeapLocked(&ptr, &object_size);
- if (ok) {
- ok = false;
+ if (!HaveOnHeapLocked(&ptr, &object_size)) {
+ RAW_LOG(FATAL, "No live heap object at %p to un-ignore", ptr);
+ } else {
+ bool found = false;
if (ignored_objects) {
IgnoredObjectsMap::iterator object =
ignored_objects->find(reinterpret_cast<uintptr_t>(ptr));
if (object != ignored_objects->end() && object_size == object->second) {
ignored_objects->erase(object);
- ok = true;
+ found = true;
RAW_VLOG(1, "Now not going to ignore live object "
"at %p of %"PRIuS" bytes", ptr, object_size);
}
}
+ if (!found) RAW_LOG(FATAL, "Object at %p has not been ignored", ptr);
}
heap_checker_lock.Unlock();
- if (!ok) RAW_LOG(FATAL, "Object at %p has not been ignored", ptr);
}
//----------------------------------------------------------------------
@@ -1234,7 +1370,7 @@ void HeapLeakChecker::DumpProfileLocked(ProfileType profile_type,
(profile_type == START_PROFILE ? "Starting"
: "At an end point for"),
name_,
- (pointer_alignment == 1 ? " w/o pointer alignment" : ""));
+ (pointer_source_alignment == 1 ? " w/o pointer alignment" : ""));
// Sanity check that nobody is messing with the hooks we need:
// Important to have it here: else we can misteriously SIGSEGV
// in IgnoreLiveObjectsLocked inside ListAllProcessThreads's callback
@@ -1245,11 +1381,7 @@ void HeapLeakChecker::DumpProfileLocked(ProfileType profile_type,
RAW_LOG(FATAL, "new/delete malloc hooks got changed");
}
// Make the heap profile, other threads are locked out.
- RAW_CHECK(profile_adjust_objects == NULL, "");
const int alloc_count = Allocator::alloc_count();
- profile_adjust_objects =
- new (Allocator::Allocate(sizeof(ProfileAdjustObjectSet)))
- ProfileAdjustObjectSet;
IgnoreAllLiveObjectsLocked(self_stack_top);
const int len = profile_prefix->size() + strlen(name_) + 10 + 2;
char* file_name = reinterpret_cast<char*>(Allocator::Allocate(len));
@@ -1258,15 +1390,12 @@ void HeapLeakChecker::DumpProfileLocked(ProfileType profile_type,
profile_type == START_PROFILE ? "-beg" : "-end",
HeapProfileTable::kFileExt);
HeapProfileTable::Stats stats;
- bool ok = heap_profile->DumpFilteredProfile(
- file_name, HeapProfileFilter, FLAGS_heap_check_identify_leaks, &stats);
+ bool ok = heap_profile->DumpNonLiveProfile(
+ file_name, FLAGS_heap_check_identify_leaks, &stats);
RAW_CHECK(ok, "No sense to continue");
*alloc_bytes = stats.alloc_size - stats.free_size;
*alloc_objects = stats.allocs - stats.frees;
Allocator::Free(file_name);
- RAW_CHECK(profile_adjust_objects->empty(),
- "Some objects to ignore are not on the heap");
- Allocator::DeleteAndNull(&profile_adjust_objects);
// Check that we made no leaks ourselves:
if (Allocator::alloc_count() != alloc_count) {
RAW_LOG(FATAL, "Internal HeapChecker leak of %d objects",
@@ -1416,18 +1545,18 @@ static int GetStatusOutput(const char* command, string* output) {
}
// RAW_LOG 'str' line by line to prevent its truncation in RAW_LOG:
-static void RawLogLines(const string& str) {
+static void RawLogLines(LogSeverity severity, const string& str) {
int p = 0;
while (1) {
int l = str.find('\n', p);
if (l == string::npos) {
if (str[p]) { // print last line if non empty
- RAW_LOG(INFO, "%s", str.c_str() + p);
+ RAW_LOG(severity, "%s", str.c_str() + p);
}
break;
}
const_cast<string&>(str)[l] = '\0'; // safe for our use case
- RAW_LOG(INFO, "%s", str.c_str() + p);
+ RAW_LOG(severity, "%s", str.c_str() + p);
const_cast<string&>(str)[l] = '\n';
p = l + 1;
}
@@ -1441,9 +1570,9 @@ bool HeapLeakChecker::DoNoLeaks(CheckType check_type,
alignment_checker_lock.Lock();
bool result;
if (FLAGS_heap_check_test_pointer_alignment) {
- pointer_alignment = 1;
+ pointer_source_alignment = 1;
bool result_wo_align = DoNoLeaksOnce(check_type, fullness, NO_REPORT);
- pointer_alignment = sizeof(void*);
+ pointer_source_alignment = kPointerSourceAlignment;
result = DoNoLeaksOnce(check_type, fullness, report_mode);
if (!result) {
if (result_wo_align) {
@@ -1467,9 +1596,17 @@ bool HeapLeakChecker::DoNoLeaks(CheckType check_type,
}
RAW_LOG(INFO, "If you are totally puzzled about why the leaks are there, "
"try rerunning it with "
- "setenv HEAP_CHECK_TEST_POINTER_ALIGNMENT=1");
+ "setenv HEAP_CHECK_TEST_POINTER_ALIGNMENT=1 and/or with "
+ "setenv HEAP_CHECK_MAX_POINTER_OFFSET=-1");
}
}
+ if (result && FLAGS_heap_check_max_pointer_offset == -1) {
+ RAW_LOG(WARNING, "Found no leaks without max_pointer_offset restriction: "
+ "it's possible that the default value of "
+ "heap_check_max_pointer_offset flag is too low. "
+ "Do you use pointers with larger than that offsets "
+ "pointing in the middle of heap-allocated objects?");
+ }
alignment_checker_lock.Unlock();
return result;
}
@@ -1590,9 +1727,9 @@ bool HeapLeakChecker::DoNoLeaksOnce(CheckType check_type,
}
if (see_leaks && report_mode == PPROF_REPORT) {
if (checked_leaks) {
- RAW_LOG(INFO, "Below is (less informative) textual version "
- "of this pprof command's output:");
- RawLogLines(output);
+ RAW_LOG(ERROR, "Below is (less informative) textual version "
+ "of this pprof command's output:");
+ RawLogLines(ERROR, output);
} else {
RAW_LOG(ERROR, "The pprof command has failed");
}
@@ -1818,6 +1955,9 @@ REGISTER_MODULE_INITIALIZER(init_start, HeapLeakChecker::InternalInitStart());
void HeapLeakChecker::DoMainHeapCheck() {
RAW_DCHECK(heap_checker_pid == getpid() && do_main_heap_check, "");
+ if (FLAGS_heap_check_delay_seconds > 0) {
+ sleep(FLAGS_heap_check_delay_seconds);
+ }
if (!NoGlobalLeaks()) {
if (FLAGS_heap_check_identify_leaks) {
RAW_LOG(FATAL, "Whole-program memory leaks found.");
@@ -2142,66 +2282,32 @@ void HeapLeakChecker::DisableChecksAtLocked(const void* address) {
}
}
-bool HeapLeakChecker::HaveOnHeapLocked(const void** ptr, size_t* object_size) {
- RAW_DCHECK(heap_checker_lock.IsHeld(), "");
- // Size of the C++ object array size integer
- // (potentially compiler dependent; 4 on i386 and gcc; 8 on x86_64 and gcc)
- const int kArraySizeOffset = sizeof(size_t);
- // sizeof(basic_string<...>::_Rep) for C++ library of gcc 3.4
- // (basically three integer counters;
- // library/compiler dependent; 12 on i386 and gcc)
- const int kStringOffset = sizeof(size_t) * 3;
- // Size of refcount used by UnicodeString in third_party/icu.
- const int kUnicodeStringOffset = sizeof(uint32);
- // NOTE: One can add more similar offset cases below
- // even when they do not happen for the used compiler/library;
- // all that's impacted is
- // - HeapLeakChecker's performace during live heap walking
- // - and a slightly greater chance to mistake random memory bytes
- // for a pointer and miss a leak in a particular run of a binary.
- bool result = true;
- if (heap_profile->FindAlloc(*ptr, object_size)) {
- // done
- } else if (heap_profile->FindAlloc(reinterpret_cast<const char*>(*ptr)
- - kArraySizeOffset,
- object_size) &&
- *object_size > kArraySizeOffset) {
- // this case is to account for the array size stored inside of
- // the memory allocated by new FooClass[size] for classes with destructors
- *ptr = reinterpret_cast<const char*>(*ptr) - kArraySizeOffset;
- RAW_VLOG(7, "Got poiter into %p at +%d", ptr, kArraySizeOffset);
- } else if (heap_profile->FindAlloc(reinterpret_cast<const char*>(*ptr)
- - kStringOffset,
- object_size) &&
- *object_size > kStringOffset) {
- // this case is to account for basic_string<> representation in
- // newer C++ library versions when the kept pointer points to inside of
- // the allocated region
- *ptr = reinterpret_cast<const char*>(*ptr) - kStringOffset;
- RAW_VLOG(7, "Got poiter into %p at +%d", ptr, kStringOffset);
- } else if (kUnicodeStringOffset != kArraySizeOffset &&
- heap_profile->FindAlloc(
- reinterpret_cast<const char*>(*ptr) - kUnicodeStringOffset,
- object_size) &&
- *object_size > kUnicodeStringOffset) {
- // this case is to account for third party UnicodeString.
- // UnicodeString stores a 32-bit refcount (in both 32-bit and
- // 64-bit binaries) as the first uint32 in the allocated memory
- // and a pointer points into the second uint32 behind the refcount.
- *ptr = reinterpret_cast<const char*>(*ptr) - kUnicodeStringOffset;
- RAW_VLOG(7, "Got poiter into %p at +%d", ptr, kUnicodeStringOffset);
- } else {
- result = false;
+inline bool HeapLeakChecker::HaveOnHeapLocked(const void** ptr,
+ size_t* object_size) {
+ const uintptr_t addr = reinterpret_cast<uintptr_t>(*ptr);
+ if (heap_profile->FindInsideAlloc(
+ *ptr, max_heap_object_size, ptr, object_size)) {
+ const size_t offset = addr - reinterpret_cast<uintptr_t>(*ptr);
+ // must be aligned to kPointerDestAlignmentMask,
+ // kUnicodeStringOffset is a special case.
+ if ((offset & kPointerDestAlignmentMask) == 0 ||
+ offset == kUnicodeStringOffset) {
+ RAW_VLOG(7, "Got pointer into %p at +%"PRIuS" offset", *ptr, offset);
+ RAW_DCHECK((addr & kUnicodeStringAlignmentMask) == 0, "");
+ // alignment of at least kUnicodeStringAlignment
+ // must have been already ensured
+ return true;
+ }
}
- return result;
+ return false;
}
const void* HeapLeakChecker::GetAllocCaller(void* ptr) {
- // this is used only in unittest, so the heavy checks are fine
+ // this is used only in the unittest, so the heavy checks are fine
HeapProfileTable::AllocInfo info;
heap_checker_lock.Lock();
- CHECK(heap_profile->FindAllocDetails(ptr, &info));
+ RAW_CHECK(heap_profile->FindAllocDetails(ptr, &info), "");
heap_checker_lock.Unlock();
- CHECK(info.stack_depth >= 1);
+ RAW_CHECK(info.stack_depth >= 1, "");
return info.call_stack[0];
}
diff --git a/src/heap-profile-table.cc b/src/heap-profile-table.cc
index 877951a..ec591b2 100644
--- a/src/heap-profile-table.cc
+++ b/src/heap-profile-table.cc
@@ -233,7 +233,7 @@ void HeapProfileTable::RecordAlloc(const void* ptr, size_t bytes,
total_.alloc_size += bytes;
AllocValue v;
- v.bucket = b;
+ v.set_bucket(b); // also did set_live(false)
v.bytes = bytes;
allocation_->Insert(ptr, v);
}
@@ -241,7 +241,7 @@ void HeapProfileTable::RecordAlloc(const void* ptr, size_t bytes,
void HeapProfileTable::RecordFree(const void* ptr) {
AllocValue v;
if (allocation_->FindAndRemove(ptr, &v)) {
- Bucket* b = v.bucket;
+ Bucket* b = v.bucket();
b->frees++;
b->free_size += v.bytes;
total_.frees++;
@@ -250,36 +250,39 @@ void HeapProfileTable::RecordFree(const void* ptr) {
}
bool HeapProfileTable::FindAlloc(const void* ptr, size_t* object_size) const {
- AllocValue alloc_value;
- if (allocation_->Find(ptr, &alloc_value)) {
- *object_size = alloc_value.bytes;
- return true;
- }
- return false;
+ const AllocValue* alloc_value = allocation_->Find(ptr);
+ if (alloc_value != NULL) *object_size = alloc_value->bytes;
+ return alloc_value != NULL;
}
-bool HeapProfileTable::FindAllocDetails(const void* ptr, AllocInfo* info) const {
- AllocValue alloc_value;
- if (allocation_->Find(ptr, &alloc_value)) {
- info->object_size = alloc_value.bytes;
- info->call_stack = alloc_value.bucket->stack;
- info->stack_depth = alloc_value.bucket->depth;
- return true;
+bool HeapProfileTable::FindAllocDetails(const void* ptr,
+ AllocInfo* info) const {
+ const AllocValue* alloc_value = allocation_->Find(ptr);
+ if (alloc_value != NULL) {
+ info->object_size = alloc_value->bytes;
+ info->call_stack = alloc_value->bucket()->stack;
+ info->stack_depth = alloc_value->bucket()->depth;
}
- return false;
+ return alloc_value != NULL;
}
-void HeapProfileTable::MapArgsAllocIterator(
- const void* ptr, AllocValue v, AllocIterator callback) {
- AllocInfo info;
- info.object_size = v.bytes;
- info.call_stack = v.bucket->stack;
- info.stack_depth = v.bucket->depth;
- callback(ptr, info);
+bool HeapProfileTable::FindInsideAlloc(const void* ptr,
+ size_t max_size,
+ const void** object_ptr,
+ size_t* object_size) const {
+ const AllocValue* alloc_value =
+ allocation_->FindInside(&AllocValueSize, max_size, ptr, object_ptr);
+ if (alloc_value != NULL) *object_size = alloc_value->bytes;
+ return alloc_value != NULL;
}
-void HeapProfileTable::IterateAllocs(AllocIterator callback) const {
- allocation_->Iterate(MapArgsAllocIterator, callback);
+bool HeapProfileTable::MarkAsLive(const void* ptr) {
+ AllocValue* alloc = allocation_->FindMutable(ptr);
+ if (alloc && !alloc->live()) {
+ alloc->set_live(true);
+ return true;
+ }
+ return false;
}
// We'd be happier using snprintfer, but we don't to reduce dependencies.
@@ -360,29 +363,32 @@ int HeapProfileTable::FillOrderedProfile(char buf[], int size) const {
return bucket_length + map_length;
}
-void HeapProfileTable::FilteredDumpIterator(const void* ptr, AllocValue v,
- const DumpArgs& args) {
- if (args.filter(ptr, v.bytes)) return;
+inline
+void HeapProfileTable::DumpNonLiveIterator(const void* ptr, AllocValue* v,
+ const DumpArgs& args) {
+ if (v->live()) {
+ v->set_live(false);
+ return;
+ }
Bucket b;
memset(&b, 0, sizeof(b));
b.allocs = 1;
- b.alloc_size = v.bytes;
+ b.alloc_size = v->bytes;
const void* stack[kMaxStackTrace + 1];
- b.depth = v.bucket->depth + int(args.dump_alloc_addresses);
+ b.depth = v->bucket()->depth + int(args.dump_alloc_addresses);
b.stack = stack;
if (args.dump_alloc_addresses) stack[0] = ptr;
memcpy(stack + int(args.dump_alloc_addresses),
- v.bucket->stack, sizeof(stack[0]) * v.bucket->depth);
+ v->bucket()->stack, sizeof(stack[0]) * v->bucket()->depth);
char buf[1024];
int len = UnparseBucket(b, buf, 0, sizeof(buf), args.profile_stats);
FDWrite(args.fd, buf, len);
}
-bool HeapProfileTable::DumpFilteredProfile(const char* file_name,
- DumpFilter filter,
- bool dump_alloc_addresses,
- Stats* profile_stats) const {
- RAW_VLOG(1, "Dumping filtered heap profile to %s", file_name);
+bool HeapProfileTable::DumpNonLiveProfile(const char* file_name,
+ bool dump_alloc_addresses,
+ Stats* profile_stats) const {
+ RAW_VLOG(1, "Dumping non-live heap profile to %s", file_name);
int fd = open(file_name, O_WRONLY|O_CREAT|O_TRUNC, 0644);
if (fd >= 0) {
FDWrite(fd, kProfileHeader, strlen(kProfileHeader));
@@ -390,8 +396,8 @@ bool HeapProfileTable::DumpFilteredProfile(const char* file_name,
int len = UnparseBucket(total_, buf, 0, sizeof(buf), profile_stats);
FDWrite(fd, buf, len);
memset(profile_stats, 0, sizeof(*profile_stats));
- const DumpArgs args(fd, dump_alloc_addresses, filter, profile_stats);
- allocation_->Iterate<const DumpArgs&>(FilteredDumpIterator, args);
+ const DumpArgs args(fd, dump_alloc_addresses, profile_stats);
+ allocation_->Iterate<const DumpArgs&>(DumpNonLiveIterator, args);
FDWrite(fd, kProcSelfMapsHeader, strlen(kProcSelfMapsHeader));
DumpProcSelfMaps(fd);
NO_INTR(close(fd));
diff --git a/src/heap-profile-table.h b/src/heap-profile-table.h
index 2775edd..3ad4a5d 100644
--- a/src/heap-profile-table.h
+++ b/src/heap-profile-table.h
@@ -91,6 +91,19 @@ class HeapProfileTable {
// Same as FindAlloc, but fills all of *info.
bool FindAllocDetails(const void* ptr, AllocInfo* info) const;
+ // Return true iff "ptr" points into a recorded allocation
+ // If yes, fill *object_ptr with the actual allocation address
+ // and *object_size with the allocation byte size.
+ // max_size specifies largest currently possible allocation size.
+ bool FindInsideAlloc(const void* ptr, size_t max_size,
+ const void** object_ptr, size_t* object_size) const;
+
+ // If "ptr" points to a recorded allocation and it's not marked as live
+ // mark it as live and return true. Else return false.
+ // All allocations start as non-live, DumpNonLiveProfile below
+ // also makes all allocations non-live.
+ bool MarkAsLive(const void* ptr);
+
// Return current total (de)allocation statistics.
const Stats& total() const { return total_; }
@@ -100,7 +113,9 @@ class HeapProfileTable {
// Iterate over the allocation profile data calling "callback"
// for every allocation.
- void IterateAllocs(AllocIterator callback) const;
+ void IterateAllocs(AllocIterator callback) const {
+ allocation_->Iterate(MapArgsAllocIterator, callback);
+ }
// Fill profile data into buffer 'buf' of size 'size'
// and return the actual size occupied by the dump in 'buf'.
@@ -114,15 +129,15 @@ class HeapProfileTable {
// needs to return true iff the object is to be filtered out of the dump.
typedef bool (*DumpFilter)(const void* ptr, size_t size);
- // Dump current heap profile for leak checking purposes to file_name
- // while filtering the objects by "filter".
- // Also write the sums of allocated byte and object counts in the dump
+ // Dump non-live portion of the current heap profile
+ // for leak checking purposes to file_name.
+ // Also reset all objects to non-live state
+ // and write the sums of allocated byte and object counts in the dump
// to *alloc_bytes and *alloc_objects.
// dump_alloc_addresses controls if object addresses are dumped.
- bool DumpFilteredProfile(const char* file_name,
- DumpFilter filter,
- bool dump_alloc_addresses,
- Stats* profile_stats) const;
+ bool DumpNonLiveProfile(const char* file_name,
+ bool dump_alloc_addresses,
+ Stats* profile_stats) const;
// Cleanup any old profile files matching prefix + ".*" + kFileExt.
static void CleanupOldProfiles(const char* prefix);
@@ -142,21 +157,34 @@ class HeapProfileTable {
// Info stored in the address map
struct AllocValue {
- Bucket* bucket; // The stack-trace bucket
+ // Access to the stack-trace bucket
+ Bucket* bucket() const {
+ return reinterpret_cast<Bucket*>(bucket_rep & ~uintptr_t(1));
+ }
+ // This also does set_live(false).
+ void set_bucket(Bucket* b) { bucket_rep = reinterpret_cast<uintptr_t>(b); }
size_t bytes; // Number of bytes in this allocation
+ // Access to the allocation liveness flag (for leak checking)
+ bool live() const { return bucket_rep & 1; }
+ void set_live(bool l) { bucket_rep = (bucket_rep & ~uintptr_t(1)) | l; }
+ private:
+ uintptr_t bucket_rep; // Bucket* with the lower bit being the "live" flag:
+ // pointers point to at least 4-byte aligned storage.
};
+ // helper for FindInsideAlloc
+ static size_t AllocValueSize(const AllocValue& v) { return v.bytes; }
+
typedef AddressMap<AllocValue> AllocationMap;
- // Arguments that need to be passed FilteredDumpIterator callback below.
+ // Arguments that need to be passed DumpNonLiveIterator callback below.
struct DumpArgs {
int fd; // file to write to
bool dump_alloc_addresses; // if we are dumping allocation's addresses
- DumpFilter filter; // dumping filter
Stats* profile_stats; // stats to update
- DumpArgs(int a, bool b, DumpFilter c, Stats* d)
- : fd(a), dump_alloc_addresses(b), filter(c), profile_stats(d) { }
+ DumpArgs(int a, bool b, Stats* d)
+ : fd(a), dump_alloc_addresses(b), profile_stats(d) { }
};
// helpers ----------------------------
@@ -176,13 +204,19 @@ class HeapProfileTable {
// Helper for IterateAllocs to do callback signature conversion
// from AllocationMap::Iterate to AllocIterator.
- static void MapArgsAllocIterator(const void* ptr, AllocValue v,
- AllocIterator callback);
-
- // Helper for DumpFilteredProfile to do object-granularity
+ static void MapArgsAllocIterator(const void* ptr, AllocValue* v,
+ AllocIterator callback) {
+ AllocInfo info;
+ info.object_size = v->bytes;
+ info.call_stack = v->bucket()->stack;
+ info.stack_depth = v->bucket()->depth;
+ callback(ptr, info);
+ }
+
+ // Helper for DumpNonLiveProfile to do object-granularity
// heap profile dumping. It gets passed to AllocationMap::Iterate.
- static void FilteredDumpIterator(const void* ptr, AllocValue v,
- const DumpArgs& args);
+ inline static void DumpNonLiveIterator(const void* ptr, AllocValue* v,
+ const DumpArgs& args);
// data ----------------------------
diff --git a/src/heap-profiler.cc b/src/heap-profiler.cc
index 2ffbef8..3c59262 100644
--- a/src/heap-profiler.cc
+++ b/src/heap-profiler.cc
@@ -57,6 +57,7 @@
#include <google/malloc_extension.h>
#include "base/spinlock.h"
#include "base/low_level_alloc.h"
+#include "base/sysinfo.h" // for GetUniquePathFromEnv()
#include "heap-profile-table.h"
@@ -382,9 +383,8 @@ static void HeapProfilerInit() {
}
// Everything after this point is for setting up the profiler based on envvar
-
- char* heapprofile = getenv("HEAPPROFILE");
- if (!heapprofile || heapprofile[0] == '\0') {
+ char fname[PATH_MAX];
+ if (!GetUniquePathFromEnv("HEAPPROFILE", fname)) {
return;
}
// We do a uid check so we don't write out files in a setuid executable.
@@ -396,30 +396,6 @@ static void HeapProfilerInit() {
}
#endif
- // If we're a child process of the 'main' process, we can't just use
- // the name HEAPPROFILE -- the parent process will be using that.
- // Instead we append our pid to the name. How do we tell if we're a
- // child process? Ideally we'd set an environment variable that all
- // our children would inherit. But -- and perhaps this is a bug in
- // gcc -- if you do a setenv() in a shared libarary in a global
- // constructor, the environment setting is lost by the time main()
- // is called. The only safe thing we can do in such a situation is
- // to modify the existing envvar. So we do a hack: in the parent,
- // we set the high bit of the 1st char of HEAPPROFILE. In the child,
- // we notice the high bit is set and append the pid(). This works
- // assuming cpuprofile filenames don't normally have the high bit
- // set in their first character! If that assumption is violated,
- // we'll still get a profile, but one with an unexpected name.
- // TODO(csilvers): set an envvar instead when we can do it reliably.
- char fname[PATH_MAX];
- if (heapprofile[0] & 128) { // high bit is set
- snprintf(fname, sizeof(fname), "%c%s_%u", // add pid and clear high bit
- heapprofile[0] & 127, heapprofile+1, (unsigned int)(getpid()));
- } else {
- snprintf(fname, sizeof(fname), "%s", heapprofile);
- heapprofile[0] |= 128; // set high bit for kids to see
- }
-
HeapProfileTable::CleanupOldProfiles(fname);
HeapProfilerStart(fname);
diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc
index a7eee11..611f1d2 100644
--- a/src/malloc_extension.cc
+++ b/src/malloc_extension.cc
@@ -212,6 +212,17 @@ typedef HASH_NAMESPACE::hash_set<void**, StackTraceHash> StackTraceTable;
typedef HASH_NAMESPACE::hash_set<void**, StackTraceHash, StackTraceEqual> StackTraceTable;
#endif
+void PrintCountAndSize(string* result, uintptr_t count, uintptr_t size) {
+ char buf[100];
+ snprintf(buf, sizeof(buf),
+ "%6lld: %8lld [%6lld: %8lld] @",
+ static_cast<long long>(count),
+ static_cast<long long>(size),
+ static_cast<long long>(count),
+ static_cast<long long>(size));
+ *result += buf;
+}
+
void PrintHeader(string* result, const char* label, void** entries) {
// Compute the total count and total size
uintptr_t total_count = 0;
@@ -221,24 +232,16 @@ void PrintHeader(string* result, const char* label, void** entries) {
total_size += Size(entry);
}
- char buf[200];
- snprintf(buf, sizeof(buf),
- "heap profile: %6lld: %8lld [%6lld: %8lld] @ %s\n",
- static_cast<long long>(total_count),
- static_cast<long long>(total_size),
- static_cast<long long>(total_count),
- static_cast<long long>(total_size),
- label);
- *result += buf;
+ *result += string("heap profile: ");
+ PrintCountAndSize(result, total_count, total_size);
+ *result += string(" ") + label + "\n";
}
void PrintStackEntry(string* result, void** entry) {
- char buf[100];
- snprintf(buf, sizeof(buf), "%6d: %8d [%6d: %8d] @",
- int(Count(entry)), int(Size(entry)),
- int(Count(entry)), int(Size(entry)));
- *result += buf;
+ PrintCountAndSize(result, Count(entry), Size(entry));
+
for (int i = 0; i < Depth(entry); i++) {
+ char buf[32];
snprintf(buf, sizeof(buf), " %p", PC(entry, i));
*result += buf;
}
diff --git a/src/malloc_hook.cc b/src/malloc_hook.cc
index a84973f..4fbb956 100644
--- a/src/malloc_hook.cc
+++ b/src/malloc_hook.cc
@@ -225,7 +225,10 @@ int MallocHook::GetCallerStackTrace(void** result, int max_depth,
}
}
RAW_LOG(WARNING, "Hooked allocator frame not found, returning empty trace");
- // Try increasing kMaxSkip or else something must be wrong with InHookCaller
+ // If this happens try increasing kMaxSkip
+ // or else something must be wrong with InHookCaller,
+ // e.g. for every section used in InHookCaller
+ // all functions in that section must be inside the same library.
return 0;
}
@@ -238,7 +241,8 @@ int MallocHook::GetCallerStackTrace(void** result, int max_depth,
// just call through to them.
-#if defined(__linux) && (defined(__i386__) || defined(__x86_64__))
+#if defined(__linux) && \
+ (defined(__i386__) || defined(__x86_64__) || defined(__PPC__))
#include <unistd.h>
#include <syscall.h>
#include <sys/mman.h>
@@ -248,10 +252,10 @@ int MallocHook::GetCallerStackTrace(void** result, int max_depth,
// The x86-32 case and the x86-64 case differ:
// 32b has a mmap2() syscall, 64b does not.
// 64b and 32b have different calling conventions for mmap().
-#if defined(__i386__)
+#if defined(__i386__) || defined(__PPC__)
static inline void* do_mmap64(void *start, size_t length,
- int prot, int flags,
+ int prot, int flags,
int fd, __off64_t offset) __THROW {
void *result;
diff --git a/src/maybe_threads.cc b/src/maybe_threads.cc
index a3e0450..f1a8f76 100644
--- a/src/maybe_threads.cc
+++ b/src/maybe_threads.cc
@@ -38,6 +38,7 @@
#include "config.h"
#include <assert.h>
+#include <string.h> // for memcmp
// We don't actually need strings. But including this header seems to
// stop the compiler trying to short-circuit our pthreads existence
// tests and claiming that the address of a function is always
diff --git a/src/packed-cache-inl.h b/src/packed-cache-inl.h
index 6114553..ff1ac23 100644
--- a/src/packed-cache-inl.h
+++ b/src/packed-cache-inl.h
@@ -99,12 +99,12 @@
//
// Implementation details:
//
-// This is a direct-mapped cache with 2^kHashbits entries;
-// the hash function simply takes the low bits of the key.
-// So, we don't have to store the low bits of the key in the entries.
-// Instead, an entry is the high bits of a key and a value, packed
-// together. E.g., a 20 bit key and a 7 bit value only require
-// a uint16 for each entry if kHashbits >= 11.
+// This is a direct-mapped cache with 2^kHashbits entries; the hash
+// function simply takes the low bits of the key. We store whole keys
+// if a whole key plus a whole value fits in an entry. Otherwise, an
+// entry is the high bits of a key and a value, packed together.
+// E.g., a 20 bit key and a 7 bit value only require a uint16 for each
+// entry if kHashbits >= 11.
//
// Alternatives to this scheme will be added as needed.
@@ -131,7 +131,8 @@ class PackedCache {
typedef uintptr_t K;
typedef size_t V;
static const int kHashbits = 12;
- static const int kValuebits = 8;
+ static const int kValuebits = 7;
+ static const bool kUseWholeKeys = kKeybits + kValuebits <= 8 * sizeof(T);
explicit PackedCache(V initial_value) {
COMPILE_ASSERT(kKeybits <= sizeof(K) * 8, key_size);
@@ -166,50 +167,47 @@ class PackedCache {
void Clear(V value) {
DCHECK_EQ(value, value & kValueMask);
for (int i = 0; i < 1 << kHashbits; i++) {
- array_[i] = value;
+ RAW_DCHECK(kUseWholeKeys || KeyToUpper(i) == 0, "KeyToUpper failure");
+ array_[i] = kUseWholeKeys ? (value | KeyToUpper(i)) : value;
}
}
private:
- // We are going to pack a value and the upper part of a key into
- // an entry of type T. The UPPER type is for the upper part of a key,
- // after the key has been masked and shifted for inclusion in an entry.
+ // We are going to pack a value and the upper part of a key (or a
+ // whole key) into an entry of type T. The UPPER type is for the
+ // upper part of a key, after the key has been masked and shifted
+ // for inclusion in an entry.
typedef T UPPER;
static V EntryToValue(T t) { return t & kValueMask; }
- static UPPER EntryToUpper(T t) { return t & kUpperMask; }
-
- // If v is a V and u is an UPPER then you can create an entry by
- // doing u | v. kHashbits determines where in a K to find the upper
- // part of the key, and kValuebits determines where in the entry to put
- // it.
+ // If we have space for a whole key, we just shift it left.
+ // Otherwise kHashbits determines where in a K to find the upper
+ // part of the key, and kValuebits determines where in the entry to
+ // put it.
static UPPER KeyToUpper(K k) {
- const int shift = kHashbits - kValuebits;
- // Assume kHashbits >= kValuebits. It would be easy to lift this assumption.
- return static_cast<T>(k >> shift) & kUpperMask;
- }
-
- // This is roughly the inverse of KeyToUpper(). Some of the key has been
- // thrown away, since KeyToUpper() masks off the low bits of the key.
- static K UpperToPartialKey(UPPER u) {
- DCHECK_EQ(u, u & kUpperMask);
- const int shift = kHashbits - kValuebits;
- // Assume kHashbits >= kValuebits. It would be easy to lift this assumption.
- return static_cast<K>(u) << shift;
+ if (kUseWholeKeys) {
+ return static_cast<T>(k) << kValuebits;
+ } else {
+ const int shift = kHashbits - kValuebits;
+ // Assume kHashbits >= kValuebits. It'd be easy to lift this assumption.
+ return static_cast<T>(k >> shift) & kUpperMask;
+ }
}
static size_t Hash(K key) {
return static_cast<size_t>(key) & N_ONES_(size_t, kHashbits);
}
- // Does the entry's partial key match the relevant part of the given key?
+ // Does the entry match the relevant part of the given key?
static bool KeyMatch(T entry, K key) {
- return ((KeyToUpper(key) ^ entry) & kUpperMask) == 0;
+ return kUseWholeKeys ?
+ (entry >> kValuebits == key) :
+ ((KeyToUpper(key) ^ entry) & kUpperMask) == 0;
}
static const int kTbits = 8 * sizeof(T);
- static const int kUpperbits = kKeybits - kHashbits;
+ static const int kUpperbits = kUseWholeKeys ? kKeybits : kKeybits - kHashbits;
// For masking a K.
static const K kKeyMask = N_ONES_(K, kKeybits);
diff --git a/src/profiledata.h b/src/profiledata.h
index f39dfe1..aa50241 100644
--- a/src/profiledata.h
+++ b/src/profiledata.h
@@ -68,12 +68,12 @@
// A profiler which uses asyncronous signals to add samples will
// typically use two locks to protect this data structure:
//
-// - A MutexLock, which is held over all calls except for the 'Add'
+// - A SpinLock which is held over all calls except for the 'Add'
// call made from the signal handler.
//
-// - A SpinLock, which is held over calls to 'Start', 'Stop',
+// - A SpinLock which is held over calls to 'Start', 'Stop',
// 'Flush', and 'Add'. (This SpinLock should be acquired after
-// the MutexLock in all cases where both are needed.)
+// the first SpinLock in all cases where both are needed.)
class ProfileData {
public:
struct State {
diff --git a/src/profiler.cc b/src/profiler.cc
index cca1d03..79cb4f8 100644
--- a/src/profiler.cc
+++ b/src/profiler.cc
@@ -53,9 +53,8 @@
#include "base/commandlineflags.h"
#include "base/logging.h"
#include "base/googleinit.h"
-#include "base/mutex.h"
#include "base/spinlock.h"
-#include "base/sysinfo.h"
+#include "base/sysinfo.h" /* for GetUniquePathFromEnv, etc */
#include "profiledata.h"
#ifdef HAVE_CONFLICT_SIGNAL_H
#include "conflict-signal.h" /* used on msvc machines */
@@ -66,47 +65,6 @@ using std::string;
DEFINE_string(cpu_profile, "",
"Profile file name (used if CPUPROFILE env var not specified)");
-// This takes as an argument an environment-variable name (like
-// CPUPROFILE) whose value is supposed to be a file-path, and sets
-// path to that path, and returns true. If the env var doesn't exist,
-// or is the empty string, leave path unchanged and returns false.
-// The reason this is non-trivial is that this function handles munged
-// pathnames. Here's why:
-//
-// If we're a child process of the 'main' process, we can't just use
-// getenv("CPUPROFILE") -- the parent process will be using that path.
-// Instead we append our pid to the pathname. How do we tell if we're a
-// child process? Ideally we'd set an environment variable that all
-// our children would inherit. But -- and this is seemingly a bug in
-// gcc -- if you do a setenv() in a shared libarary in a global
-// constructor, the environment setting is lost by the time main() is
-// called. The only safe thing we can do in such a situation is to
-// modify the existing envvar. So we do a hack: in the parent, we set
-// the high bit of the 1st char of CPUPROFILE. In the child, we
-// notice the high bit is set and append the pid(). This works
-// assuming cpuprofile filenames don't normally have the high bit set
-// in their first character! If that assumption is violated, we'll
-// still get a profile, but one with an unexpected name.
-// TODO(csilvers): set an envvar instead when we can do it reliably.
-static bool GetUniquePathFromEnv(const char* env_name, string* path) {
- char* envval = getenv(env_name);
- if (envval == NULL || *envval == '\0')
- return false;
- if (envval[0] & 128) { // high bit is set
- char pid[64]; // pids are smaller than this!
- snprintf(pid, sizeof(pid), "%u", (unsigned int)(getpid()));
- *path = envval;
- *path += "_";
- *path += pid;
- (*path)[0] &= 127;
- } else {
- *path = string(envval);
- envval[0] |= 128; // set high bit for kids to see
- }
- return true;
-}
-
-
// Collects up all profile data. This is a singleton, which is
// initialized by a constructor at startup.
class CpuProfiler {
@@ -156,7 +114,7 @@ class CpuProfiler {
// Locking order is control_lock_ first, and then signal_lock_.
// signal_lock_ is acquired by the prof_handler without first
// acquiring control_lock_.
- Mutex control_lock_;
+ SpinLock control_lock_;
SpinLock signal_lock_;
ProfileData collector_;
@@ -202,8 +160,8 @@ CpuProfiler::CpuProfiler() {
RegisterThread();
// Should profiling be enabled automatically at start?
- string fname;
- if (!GetUniquePathFromEnv("CPUPROFILE", &fname)) {
+ char fname[PATH_MAX];
+ if (!GetUniquePathFromEnv("CPUPROFILE", fname)) {
return;
}
// We don't enable profiling if setuid -- it's a security risk
@@ -212,15 +170,15 @@ CpuProfiler::CpuProfiler() {
return;
#endif
- if (!Start(fname.c_str(), NULL, NULL)) {
+ if (!Start(fname, NULL, NULL)) {
RAW_LOG(FATAL, "Can't turn on cpu profiling for '%s': %s\n",
- fname.c_str(), strerror(errno));
+ fname, strerror(errno));
}
}
bool CpuProfiler::Start(const char* fname,
bool (*filter)(void*), void* filter_arg) {
- MutexLock cl(&control_lock_);
+ SpinLockHolder cl(&control_lock_);
if (collector_.enabled()) {
return false;
@@ -258,7 +216,7 @@ CpuProfiler::~CpuProfiler() {
// Stop profiling and write out any collected profile data
void CpuProfiler::Stop() {
- MutexLock cl(&control_lock_);
+ SpinLockHolder cl(&control_lock_);
if (!collector_.enabled()) {
return;
@@ -278,7 +236,7 @@ void CpuProfiler::Stop() {
}
void CpuProfiler::FlushTable() {
- MutexLock cl(&control_lock_);
+ SpinLockHolder cl(&control_lock_);
if (!collector_.enabled()) {
return;
@@ -295,14 +253,14 @@ void CpuProfiler::FlushTable() {
}
bool CpuProfiler::Enabled() {
- MutexLock cl(&control_lock_);
+ SpinLockHolder cl(&control_lock_);
return collector_.enabled();
}
void CpuProfiler::GetCurrentState(ProfilerState* state) {
ProfileData::State collector_state;
{
- MutexLock cl(&control_lock_);
+ SpinLockHolder cl(&control_lock_);
collector_.GetCurrentState(&collector_state);
}
diff --git a/src/stacktrace.cc b/src/stacktrace.cc
index 21b172c..9826e98 100644
--- a/src/stacktrace.cc
+++ b/src/stacktrace.cc
@@ -82,7 +82,7 @@
# endif
// The PowerPC case
-#elif defined(__ppc__) && __GNUC__ >= 2
+#elif (defined(__ppc__) || defined(__PPC__)) && __GNUC__ >= 2
# if !defined(NO_FRAME_POINTER)
# include "stacktrace_powerpc-inl.h"
# else
diff --git a/src/stacktrace_libunwind-inl.h b/src/stacktrace_libunwind-inl.h
index 3434e88..529f309 100644
--- a/src/stacktrace_libunwind-inl.h
+++ b/src/stacktrace_libunwind-inl.h
@@ -49,7 +49,6 @@ extern "C" {
// ignore those subsequent traces. In such cases, we return 0 to
// indicate the situation.
static SpinLock libunwind_lock(SpinLock::LINKER_INITIALIZED);
-static bool in_get_stack_trace = false;
// If you change this function, also change GetStackFrames below.
int GetStackTrace(void** result, int max_depth, int skip_count) {
@@ -58,13 +57,8 @@ int GetStackTrace(void** result, int max_depth, int skip_count) {
unw_cursor_t cursor;
unw_context_t uc;
- {
- SpinLockHolder sh(&libunwind_lock);
- if (in_get_stack_trace) {
- return 0;
- } else {
- in_get_stack_trace = true;
- }
+ if (!libunwind_lock.TryLock()) {
+ return 0;
}
unw_getcontext(&uc);
@@ -86,9 +80,7 @@ int GetStackTrace(void** result, int max_depth, int skip_count) {
break;
}
- SpinLockHolder sh(&libunwind_lock);
- in_get_stack_trace = false;
-
+ libunwind_lock.Unlock();
return n;
}
@@ -125,13 +117,8 @@ int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) {
unw_cursor_t cursor;
unw_context_t uc;
- {
- SpinLockHolder sh(&libunwind_lock);
- if (in_get_stack_trace) {
- return 0;
- } else {
- in_get_stack_trace = true;
- }
+ if (!libunwind_lock.TryLock()) {
+ return 0;
}
unw_getcontext(&uc);
@@ -155,8 +142,6 @@ int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) {
// No implementation for finding out the stack frame sizes yet.
memset(sizes, 0, sizeof(*sizes) * n);
- SpinLockHolder sh(&libunwind_lock);
- in_get_stack_trace = false;
-
+ libunwind_lock.Unlock();
return n;
}
diff --git a/src/stacktrace_powerpc-inl.h b/src/stacktrace_powerpc-inl.h
index bf64284..5631e49 100644
--- a/src/stacktrace_powerpc-inl.h
+++ b/src/stacktrace_powerpc-inl.h
@@ -68,6 +68,10 @@ static void **NextStackFrame(void **old_sp) {
return new_sp;
}
+// This ensures that GetStackTrace stes up the Link Register properly.
+void StacktracePowerPCDummyFunction() __attribute__((noinline));
+void StacktracePowerPCDummyFunction() { __asm__ volatile(""); }
+
// If you change this function, also change GetStackFrames below.
int GetStackTrace(void** result, int max_depth, int skip_count) {
void **sp;
@@ -82,23 +86,40 @@ int GetStackTrace(void** result, int max_depth, int skip_count) {
__asm__ volatile ("mr %0,1" : "=r" (sp));
#endif
+ // On PowerPC, the "Link Register" or "Link Record" (LR), is a stack
+ // entry that holds the return address of the subroutine call (what
+ // instruction we run after our function finishes). This is the
+ // same as the stack-pointer of our parent routine, which is what we
+ // want here. While the compiler will always(?) set up LR for
+ // subroutine calls, it may not for leaf functions (such as this one).
+ // This routine forces the compiler (at least gcc) to push it anyway.
+ StacktracePowerPCDummyFunction();
+
+ // The LR save area is used by the callee, so the top entry is bogus.
+ skip_count++;
+
int n = 0;
while (sp && n < max_depth) {
if (skip_count > 0) {
skip_count--;
} else {
- // sp[2] holds the "Link Record", according to RTArch-59.html.
- // On PPC, the Link Record is the return address of the
- // subroutine call (what instruction we run after our function
- // finishes). This is the same as the stack-pointer of our
- // parent routine, which is what we want here. We believe that
- // the compiler will always set up the LR for subroutine calls.
- //
- // It may be possible to get the stack-pointer of the parent
- // routine directly. In my experiments, this code works:
- // result[n++] = NextStackFrame(sp)[-18]
- // But I'm not sure what this is doing, exactly, or how reliable it is.
- result[n++] = *(sp+2); // sp[2] holds the Link Record (return address)
+ // PowerPC has 3 main ABIs, which say where in the stack the
+ // Link Register is. For DARWIN and AIX (used by apple and
+ // linux ppc64), it's in sp[2]. For SYSV (used by linux ppc),
+ // it's in sp[1].
+#if defined(_CALL_AIX) || defined(_CALL_DARWIN)
+ result[n++] = *(sp+2);
+#elif defined(_CALL_SYSV)
+ result[n++] = *(sp+1);
+#elif defined(__APPLE__) || (defined(__linux) && defined(__PPC64__))
+ // This check is in case the compiler doesn't define _CALL_AIX/etc.
+ result[n++] = *(sp+2);
+#elif defined(__linux)
+ // This check is in case the compiler doesn't define _CALL_SYSV.
+ result[n++] = *(sp+1);
+#else
+#error Need to specify the PPC ABI for your archiecture.
+#endif
}
// Use strict unwinding rules.
sp = NextStackFrame<true>(sp);
@@ -135,16 +156,18 @@ int GetStackTrace(void** result, int max_depth, int skip_count) {
// class, we are in trouble.
int GetStackFrames(void** pcs, int *sizes, int max_depth, int skip_count) {
void **sp;
- // Apple OS X uses an old version of gnu as -- both Darwin 7.9.0 (Panther)
- // and Darwin 8.8.1 (Tiger) use as 1.38. This means we have to use a
- // different asm syntax. I don't know quite the best way to discriminate
- // systems using the old as from the new one; I've gone with __APPLE__.
#ifdef __APPLE__
__asm__ volatile ("mr %0,r1" : "=r" (sp));
#else
__asm__ volatile ("mr %0,1" : "=r" (sp));
#endif
+ StacktracePowerPCDummyFunction();
+ // Note we do *not* increment skip_count here for the SYSV ABI. If
+ // we did, the list of stack frames wouldn't properly match up with
+ // the list of return addresses. Note this means the top pc entry
+ // is probably bogus for linux/ppc (and other SYSV-ABI systems).
+
int n = 0;
while (sp && n < max_depth) {
// The GetStackFrames routine is called when we are in some
@@ -156,18 +179,19 @@ int GetStackFrames(void** pcs, int *sizes, int max_depth, int skip_count) {
if (skip_count > 0) {
skip_count--;
} else {
- // sp[2] holds the "Link Record", according to RTArch-59.html.
- // On PPC, the Link Record is the return address of the
- // subroutine call (what instruction we run after our function
- // finishes). This is the same as the stack-pointer of our
- // parent routine, which is what we want here. We believe that
- // the compiler will always set up the LR for subroutine calls.
- //
- // It may be possible to get the stack-pointer of the parent
- // routine directly. In my experiments, this code works:
- // pcs[n] = NextStackFrame(sp)[-18]
- // But I'm not sure what this is doing, exactly, or how reliable it is.
- pcs[n] = *(sp+2); // sp[2] holds the Link Record (return address)
+#if defined(_CALL_AIX) || defined(_CALL_DARWIN)
+ pcs[n++] = *(sp+2);
+#elif defined(_CALL_SYSV)
+ pcs[n++] = *(sp+1);
+#elif defined(__APPLE__) || (defined(__linux) && defined(__PPC64__))
+ // This check is in case the compiler doesn't define _CALL_AIX/etc.
+ pcs[n++] = *(sp+2);
+#elif defined(__linux)
+ // This check is in case the compiler doesn't define _CALL_SYSV.
+ pcs[n++] = *(sp+1);
+#else
+#error Need to specify the PPC ABI for your archiecture.
+#endif
if (next_sp > sp) {
sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
} else {
diff --git a/src/system-alloc.cc b/src/system-alloc.cc
index 96906ca..3a0d665 100644
--- a/src/system-alloc.cc
+++ b/src/system-alloc.cc
@@ -78,11 +78,6 @@ static SpinLock spinlock(SpinLock::LINKER_INITIALIZED);
static size_t pagesize = 0;
// Configuration parameters.
-//
-// if use_devmem is true, either use_sbrk or use_mmap must also be true.
-// For 2.2 kernels, it looks like the sbrk address space (500MBish) and
-// the mmap address space (1300MBish) are disjoint, so we need both allocators
-// to get as much virtual memory as possible.
DEFINE_int32(malloc_devmem_start, 0,
"Physical memory starting location in MB for /dev/mem allocation."
@@ -90,6 +85,8 @@ DEFINE_int32(malloc_devmem_start, 0,
DEFINE_int32(malloc_devmem_limit, 0,
"Physical memory limit location in MB for /dev/mem allocation."
" Setting this to 0 means no limit.");
+DEFINE_bool(malloc_skip_mmap, false,
+ "Whether mmap can be used to obtain memory.");
// static allocators
class SbrkSysAllocator : public SysAllocator {
@@ -188,6 +185,16 @@ void SbrkSysAllocator::DumpStats(TCMalloc_Printer* printer) {
void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size,
size_t alignment) {
+ // Check if we should use mmap allocation.
+ // FLAGS_malloc_skip_mmap starts out as false (its uninitialized
+ // state) and eventually gets initialized to the specified value. Note
+ // that this code runs for a while before the flags are initialized.
+ // Chances are we never get here before the flags are initialized since
+ // sbrk is used until the heap is exhausted (before mmap is used).
+ if (FLAGS_malloc_skip_mmap) {
+ return NULL;
+ }
+
// could theoretically return the "extra" bytes here, but this
// is simple and correct.
if (actual_size) {
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index 76435a7..a3f8733 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -277,7 +277,9 @@ static const int add_amount[2] = { 7, 127 + (120 << 7) };
static unsigned char class_array[377];
// Compute index of the class_array[] entry for a given size
-static inline int ClassIndex(size_t s) {
+static inline int ClassIndex(int s) {
+ ASSERT(0 <= s);
+ ASSERT(s <= kMaxSize);
const int i = (s > kMaxSmallSize);
return (s + add_amount[i]) >> shift_amount[i];
}
@@ -381,7 +383,7 @@ static inline size_t SLL_Size(void *head) {
// Setup helper functions.
-static inline int SizeClass(size_t size) {
+static inline int SizeClass(int size) {
return class_array[ClassIndex(size)];
}
@@ -780,14 +782,14 @@ static StackTrace* growth_stacks = NULL;
template <int BITS> class MapSelector {
public:
typedef TCMalloc_PageMap3<BITS-kPageShift> Type;
- typedef PackedCache<BITS, uint64> CacheType;
+ typedef PackedCache<BITS-kPageShift, uint64_t> CacheType;
};
// A two-level map for 32-bit machines
template <> class MapSelector<32> {
public:
typedef TCMalloc_PageMap2<32-kPageShift> Type;
- typedef PackedCache<32-kPageShift, uint16> CacheType;
+ typedef PackedCache<32-kPageShift, uint16_t> CacheType;
};
// -------------------------------------------------------------------------
@@ -893,10 +895,11 @@ class TCMalloc_PageHeap {
// Remove span from its free list, and move any leftover part of
// span into appropriate free lists. Also update "span" to have
// length exactly "n" and mark it as non-free so it can be returned
- // to the client.
+ // to the client. After all that, decrease free_pages_ by n and
+ // return span.
//
// "released" is true iff "span" was found on a "returned" list.
- void Carve(Span* span, Length n, bool released);
+ Span* Carve(Span* span, Length n, bool released);
void RecordSpan(Span* span) {
pagemap_.set(span->start, span);
@@ -943,25 +946,19 @@ Span* TCMalloc_PageHeap::New(Length n) {
// Find first size >= n that has a non-empty list
for (Length s = n; s < kMaxPages; s++) {
- Span* ll = NULL;
+ Span* ll = &free_[s].normal;
bool released = false;
- if (!DLL_IsEmpty(&free_[s].normal)) {
- // Found normal span
- ll = &free_[s].normal;
- } else if (!DLL_IsEmpty(&free_[s].returned)) {
- // Found returned span; reallocate it
+ // If we're lucky, ll is non-empty, meaning it has a suitable span.
+ if (DLL_IsEmpty(ll)) {
+ // Alternatively, maybe there's a usable returned span.
ll = &free_[s].returned;
released = true;
- } else {
- // Keep looking in larger classes
- continue;
+ if (DLL_IsEmpty(ll)) {
+ // Still no luck, so keep looking in larger classes.
+ continue;
+ }
}
-
- Span* result = ll->next;
- Carve(result, n, released);
- ASSERT(Check());
- free_pages_ -= n;
- return result;
+ return Carve(ll->next, n, released);
}
Span* result = AllocLarge(n);
@@ -1010,13 +1007,7 @@ Span* TCMalloc_PageHeap::AllocLarge(Length n) {
}
}
- if (best != NULL) {
- Carve(best, n, from_released);
- ASSERT(Check());
- free_pages_ -= n;
- return best;
- }
- return NULL;
+ return best == NULL ? NULL : Carve(best, n, from_released);
}
Span* TCMalloc_PageHeap::Split(Span* span, Length n) {
@@ -1036,7 +1027,7 @@ Span* TCMalloc_PageHeap::Split(Span* span, Length n) {
return leftover;
}
-void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) {
+Span* TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) {
ASSERT(n > 0);
DLL_Remove(span);
span->free = 0;
@@ -1058,6 +1049,9 @@ void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) {
span->length = n;
pagemap_.set(span->start + n - 1, span);
}
+ ASSERT(Check());
+ free_pages_ -= n;
+ return span;
}
void TCMalloc_PageHeap::Delete(Span* span) {
@@ -1357,8 +1351,17 @@ void TCMalloc_PageHeap::ReleaseFreePages() {
class TCMalloc_ThreadCache_FreeList {
private:
void* list_; // Linked list of nodes
- uint16_t length_; // Current length
- uint16_t lowater_; // Low water mark for list length
+
+#ifdef _LP64
+ // On 64-bit hardware, manipulating 16-bit values may be slightly slow.
+ // Since it won't cost any space, let's make these fields 32 bits each.
+ uint32_t length_; // Current length
+ uint32_t lowater_; // Low water mark for list length
+#else
+ // If we aren't using 64-bit pointers then pack these into less space.
+ uint16_t length_;
+ uint16_t lowater_;
+#endif
public:
void Init() {
@@ -1368,7 +1371,7 @@ class TCMalloc_ThreadCache_FreeList {
}
// Return current length of list
- int length() const {
+ size_t length() const {
return length_;
}
@@ -1414,14 +1417,19 @@ class TCMalloc_ThreadCache {
private:
typedef TCMalloc_ThreadCache_FreeList FreeList;
- size_t size_; // Combined size of data
- pthread_t tid_; // Which thread owns it
- bool in_setspecific_; // In call to pthread_setspecific?
- FreeList list_[kNumClasses]; // Array indexed by size-class
+ // Warning: the offset of list_ affects performance. On general
+ // principles, we don't like list_[x] to span multiple L1 cache
+ // lines. However, merely placing list_ at offset 0 here seems to
+ // cause cache conflicts.
// We sample allocations, biased by the size of the allocation
- uint32_t rnd_; // Cheap random number generator
size_t bytes_until_sample_; // Bytes until we sample next
+ uint32_t rnd_; // Cheap random number generator
+
+ size_t size_; // Combined size of data
+ pthread_t tid_; // Which thread owns it
+ FreeList list_[kNumClasses]; // Array indexed by size-class
+ bool in_setspecific_; // In call to pthread_setspecific?
// Allocate a new heap. REQUIRES: pageheap_lock is held.
static inline TCMalloc_ThreadCache* NewHeap(pthread_t tid);
@@ -1445,8 +1453,13 @@ class TCMalloc_ThreadCache {
void* Allocate(size_t size);
void Deallocate(void* ptr, size_t size_class);
- void FetchFromCentralCache(size_t cl);
- void ReleaseToCentralCache(size_t cl, int N);
+ // Gets and returns an object from the central cache, and, if possible,
+ // also adds some objects of that size class to this thread cache.
+ void* FetchFromCentralCache(size_t cl, size_t byte_size);
+
+ // Releases N items from this thread cache. Returns size_.
+ size_t ReleaseToCentralCache(FreeList* src, size_t cl, int N);
+
void Scavenge();
void Print() const;
@@ -1479,11 +1492,11 @@ class TCMalloc_Central_FreeList {
// These methods all do internal locking.
// Insert the specified range into the central freelist. N is the number of
- // elements in the range.
+ // elements in the range. RemoveRange() is the opposite operation.
void InsertRange(void *start, void *end, int N);
- // Returns the actual number of fetched elements into N.
- void RemoveRange(void **start, void **end, int *N);
+ // Returns the actual number of fetched elements and sets *start and *end.
+ int RemoveRange(void **start, void **end, int N);
// Returns the number of free objects in cache.
int length() {
@@ -1542,7 +1555,7 @@ class TCMalloc_Central_FreeList {
// spans if it allows it to shrink the cache. Return false if it failed to
// shrink the cache. Decrements cache_size_ on succeess.
// May temporarily take lock_. If it takes lock_, the locked_size_class
- // lock is released to the thread from holding two size class locks
+ // lock is released to keep the thread from holding two size class locks
// concurrently which could lead to a deadlock.
bool ShrinkCache(int locked_size_class, bool force);
@@ -1780,41 +1793,39 @@ void TCMalloc_Central_FreeList::InsertRange(void *start, void *end, int N) {
ReleaseListToSpans(start);
}
-void TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int *N) {
- int num = *N;
- ASSERT(num > 0);
-
- SpinLockHolder h(&lock_);
- if (num == num_objects_to_move[size_class_] && used_slots_ > 0) {
+int TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int N) {
+ ASSERT(N > 0);
+ lock_.Lock();
+ if (N == num_objects_to_move[size_class_] && used_slots_ > 0) {
int slot = --used_slots_;
ASSERT(slot >= 0);
TCEntry *entry = &tc_slots_[slot];
*start = entry->head;
*end = entry->tail;
- return;
+ lock_.Unlock();
+ return N;
}
+ int result = 0;
+ void* head = NULL;
+ void* tail = NULL;
// TODO: Prefetch multiple TCEntries?
- void *tail = FetchFromSpansSafe();
- if (!tail) {
- // We are completely out of memory.
- *start = *end = NULL;
- *N = 0;
- return;
- }
-
- SLL_SetNext(tail, NULL);
- void *head = tail;
- int count = 1;
- while (count < num) {
- void *t = FetchFromSpans();
- if (!t) break;
- SLL_Push(&head, t);
- count++;
+ tail = FetchFromSpansSafe();
+ if (tail != NULL) {
+ SLL_SetNext(tail, NULL);
+ head = tail;
+ result = 1;
+ while (result < N) {
+ void *t = FetchFromSpans();
+ if (!t) break;
+ SLL_Push(&head, t);
+ result++;
+ }
}
+ lock_.Unlock();
*start = head;
*end = tail;
- *N = count;
+ return result;
}
@@ -1929,7 +1940,7 @@ void TCMalloc_ThreadCache::Cleanup() {
// Put unused memory back into central cache
for (int cl = 0; cl < kNumClasses; ++cl) {
if (list_[cl].length() > 0) {
- ReleaseToCentralCache(cl, list_[cl].length());
+ ReleaseToCentralCache(&list_[cl], cl, list_[cl].length());
}
}
}
@@ -1937,41 +1948,55 @@ void TCMalloc_ThreadCache::Cleanup() {
inline void* TCMalloc_ThreadCache::Allocate(size_t size) {
ASSERT(size <= kMaxSize);
const size_t cl = SizeClass(size);
+ const size_t alloc_size = ByteSizeForClass(cl);
FreeList* list = &list_[cl];
if (list->empty()) {
- FetchFromCentralCache(cl);
- if (list->empty()) return NULL;
+ return FetchFromCentralCache(cl, alloc_size);
}
- size_ -= ByteSizeForClass(cl);
+ size_ -= alloc_size;
return list->Pop();
}
inline void TCMalloc_ThreadCache::Deallocate(void* ptr, size_t cl) {
- size_ += ByteSizeForClass(cl);
FreeList* list = &list_[cl];
+ ssize_t list_headroom =
+ static_cast<ssize_t>(kMaxFreeListLength - 1) - list->length();
+ size_ += ByteSizeForClass(cl);
+ size_t cache_size = size_;
+ ssize_t size_headroom = per_thread_cache_size - cache_size - 1;
list->Push(ptr);
- // If enough data is free, put back into central cache
- if (list->length() > kMaxFreeListLength) {
- ReleaseToCentralCache(cl, num_objects_to_move[cl]);
+
+ // There are two relatively uncommon things that require further work.
+ // In the common case we're done, and in that case we need a single branch
+ // because of the bitwise-or trick that follows.
+ if ((list_headroom | size_headroom) < 0) {
+ if (list_headroom < 0) {
+ cache_size = ReleaseToCentralCache(list, cl, num_objects_to_move[cl]);
+ }
+ if (cache_size >= per_thread_cache_size) Scavenge();
}
- if (size_ >= per_thread_cache_size) Scavenge();
}
-// Remove some objects of class "cl" from central cache and add to thread heap
-void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl) {
- int fetch_count = num_objects_to_move[cl];
+// Remove some objects of class "cl" from central cache and add to thread heap.
+// On success, return the first object for immediate use; otherwise return NULL.
+void* TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t byte_size) {
void *start, *end;
- central_cache[cl].RemoveRange(&start, &end, &fetch_count);
- list_[cl].PushRange(fetch_count, start, end);
- size_ += ByteSizeForClass(cl) * fetch_count;
+ int fetch_count = central_cache[cl].RemoveRange(&start, &end,
+ num_objects_to_move[cl]);
+ ASSERT((start == NULL) == (fetch_count == 0));
+ if (--fetch_count >= 0) {
+ size_ += byte_size * fetch_count;
+ list_[cl].PushRange(fetch_count, SLL_Next(start), end);
+ }
+ return start;
}
// Remove some objects of class "cl" from thread heap and add to central cache
-void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) {
- ASSERT(N > 0);
- FreeList* src = &list_[cl];
+size_t TCMalloc_ThreadCache::ReleaseToCentralCache(FreeList* src,
+ size_t cl, int N) {
+ ASSERT(src == &list_[cl]);
if (N > src->length()) N = src->length();
- size_ -= N*ByteSizeForClass(cl);
+ size_t delta_bytes = N * ByteSizeForClass(cl);
// We return prepackaged chains of the correct size to the central cache.
// TODO: Use the same format internally in the thread caches?
@@ -1985,6 +2010,7 @@ void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) {
void *tail, *head;
src->PopRange(N, &head, &tail);
central_cache[cl].InsertRange(head, tail, N);
+ return size_ -= delta_bytes;
}
// Release idle memory to the central cache
@@ -2002,7 +2028,7 @@ void TCMalloc_ThreadCache::Scavenge() {
const int lowmark = list->lowwatermark();
if (lowmark > 0) {
const int drop = (lowmark > 1) ? lowmark/2 : 1;
- ReleaseToCentralCache(cl, drop);
+ ReleaseToCentralCache(list, cl, drop);
}
list->clear_lowwatermark();
}
@@ -2249,7 +2275,7 @@ void TCMalloc_ThreadCache::RecomputeThreadCacheSize() {
void TCMalloc_ThreadCache::Print() const {
for (int cl = 0; cl < kNumClasses; ++cl) {
- MESSAGE(" %5" PRIuS " : %4d len; %4d lo\n",
+ MESSAGE(" %5" PRIuS " : %4" PRIuS " len; %4d lo\n",
ByteSizeForClass(cl),
list_[cl].length(),
list_[cl].lowwatermark());
@@ -2642,6 +2668,16 @@ static inline void* SpanToMallocResult(Span *span) {
CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
}
+// Helper for do_malloc().
+static inline void* do_malloc_pages(Length num_pages) {
+ Span *span;
+ {
+ SpinLockHolder h(&pageheap_lock);
+ span = pageheap->New(num_pages);
+ }
+ return span == NULL ? NULL : SpanToMallocResult(span);
+}
+
static inline void* do_malloc(size_t size) {
void* ret = NULL;
@@ -2652,17 +2688,12 @@ static inline void* do_malloc(size_t size) {
if (span != NULL) {
ret = SpanToMallocResult(span);
}
- } else if (size > kMaxSize) {
- // Use page-level allocator
- SpinLockHolder h(&pageheap_lock);
- Span* span = pageheap->New(pages(size));
- if (span != NULL) {
- ret = SpanToMallocResult(span);
- }
- } else {
+ } else if (size <= kMaxSize) {
// The common case, and also the simplest. This just pops the
- // size-appropriate freelist, afer replenishing it if it's empty.
+ // size-appropriate freelist, after replenishing it if it's empty.
ret = CheckedMallocResult(heap->Allocate(size));
+ } else {
+ ret = do_malloc_pages(pages(size));
}
if (ret == NULL) errno = ENOMEM;
return ret;
diff --git a/src/tests/addressmap_unittest.cc b/src/tests/addressmap_unittest.cc
index 50d9b94..6b65cc3 100644
--- a/src/tests/addressmap_unittest.cc
+++ b/src/tests/addressmap_unittest.cc
@@ -30,6 +30,7 @@
// ---
// Author: Sanjay Ghemawat
+#include <stdlib.h> // for rand()
#include <vector>
#include <set>
#include <algorithm>
@@ -47,62 +48,102 @@ using std::vector;
using std::set;
using std::random_shuffle;
-static void SetCheckCallback(const void* ptr, int val,
+struct UniformRandomNumberGenerator {
+ size_t Uniform(size_t max_size) {
+ if (max_size == 0)
+ return 0;
+ return rand() % max_size; // not a great random-number fn, but portable
+ }
+};
+static UniformRandomNumberGenerator rnd;
+
+
+// pair of associated value and object size
+typedef pair<int, size_t> ValueT;
+
+struct PtrAndSize {
+ char* ptr;
+ size_t size;
+ PtrAndSize(char* p, size_t s) : ptr(p), size(s) {}
+};
+
+size_t SizeFunc(const ValueT& v) { return v.second; }
+
+static void SetCheckCallback(const void* ptr, ValueT* val,
set<pair<const void*, int> >* check_set) {
- check_set->insert(make_pair(ptr, val));
+ check_set->insert(make_pair(ptr, val->first));
}
int main(int argc, char** argv) {
// Get a bunch of pointers
const int N = FLAGS_N;
- static const int kObjectLength = 19;
- vector<char*> ptrs;
+ static const int kMaxRealSize = 49;
+ // 100Mb to stress not finding previous object (AddressMap's cluster is 1Mb):
+ static const size_t kMaxSize = 100*1000*1000;
+ vector<PtrAndSize> ptrs_and_sizes;
for (int i = 0; i < N; ++i) {
- ptrs.push_back(new char[kObjectLength]);
+ size_t s = rnd.Uniform(kMaxRealSize);
+ ptrs_and_sizes.push_back(PtrAndSize(new char[s], s));
}
for (int x = 0; x < FLAGS_iters; ++x) {
// Permute pointers to get rid of allocation order issues
- random_shuffle(ptrs.begin(), ptrs.end());
+ random_shuffle(ptrs_and_sizes.begin(), ptrs_and_sizes.end());
- AddressMap<int> map(malloc, free);
- int result;
+ AddressMap<ValueT> map(malloc, free);
+ const ValueT* result;
+ const void* res_p;
// Insert a bunch of entries
for (int i = 0; i < N; ++i) {
- void* p = ptrs[i];
- CHECK(!map.Find(p, &result));
- map.Insert(p, i);
- CHECK(map.Find(p, &result));
- CHECK_EQ(result, i);
- map.Insert(p, i + N);
- CHECK(map.Find(p, &result));
- CHECK_EQ(result, i + N);
+ char* p = ptrs_and_sizes[i].ptr;
+ CHECK(!map.Find(p));
+ int offs = rnd.Uniform(ptrs_and_sizes[i].size);
+ CHECK(!map.FindInside(&SizeFunc, kMaxSize, p + offs, &res_p));
+ map.Insert(p, make_pair(i, ptrs_and_sizes[i].size));
+ CHECK(result = map.Find(p));
+ CHECK_EQ(result->first, i);
+ CHECK(result = map.FindInside(&SizeFunc, kMaxRealSize, p + offs, &res_p));
+ CHECK_EQ(res_p, p);
+ CHECK_EQ(result->first, i);
+ map.Insert(p, make_pair(i + N, ptrs_and_sizes[i].size));
+ CHECK(result = map.Find(p));
+ CHECK_EQ(result->first, i + N);
}
// Delete the even entries
for (int i = 0; i < N; i += 2) {
- void* p = ptrs[i];
- CHECK(map.FindAndRemove(p, &result));
- CHECK_EQ(result, i + N);
+ void* p = ptrs_and_sizes[i].ptr;
+ ValueT removed;
+ CHECK(map.FindAndRemove(p, &removed));
+ CHECK_EQ(removed.first, i + N);
}
// Lookup the odd entries and adjust them
for (int i = 1; i < N; i += 2) {
- void* p = ptrs[i];
- CHECK(map.Find(p, &result));
- CHECK_EQ(result, i + N);
- map.Insert(p, i + 2*N);
- CHECK(map.Find(p, &result));
- CHECK_EQ(result, i + 2*N);
+ char* p = ptrs_and_sizes[i].ptr;
+ CHECK(result = map.Find(p));
+ CHECK_EQ(result->first, i + N);
+ int offs = rnd.Uniform(ptrs_and_sizes[i].size);
+ CHECK(result = map.FindInside(&SizeFunc, kMaxRealSize, p + offs, &res_p));
+ CHECK_EQ(res_p, p);
+ CHECK_EQ(result->first, i + N);
+ map.Insert(p, make_pair(i + 2*N, ptrs_and_sizes[i].size));
+ CHECK(result = map.Find(p));
+ CHECK_EQ(result->first, i + 2*N);
}
// Insert even entries back
for (int i = 0; i < N; i += 2) {
- void* p = ptrs[i];
- map.Insert(p, i + 2*N);
- CHECK(map.Find(p, &result));
- CHECK_EQ(result, i + 2*N);
+ char* p = ptrs_and_sizes[i].ptr;
+ int offs = rnd.Uniform(ptrs_and_sizes[i].size);
+ CHECK(!map.FindInside(&SizeFunc, kMaxSize, p + offs, &res_p));
+ map.Insert(p, make_pair(i + 2*N, ptrs_and_sizes[i].size));
+ CHECK(result = map.Find(p));
+ CHECK_EQ(result->first, i + 2*N);
+ CHECK(result = map.FindInside(&SizeFunc, kMaxRealSize, p + offs, &res_p));
+ CHECK_EQ(res_p, p);
+ CHECK_EQ(result->first, i + 2*N);
}
// Check all entries
@@ -110,17 +151,16 @@ int main(int argc, char** argv) {
map.Iterate(SetCheckCallback, &check_set);
CHECK_EQ(check_set.size(), N);
for (int i = 0; i < N; ++i) {
- void* p = ptrs[i];
+ void* p = ptrs_and_sizes[i].ptr;
check_set.erase(make_pair(p, i + 2*N));
- CHECK(map.Find(p, &result));
- CHECK_EQ(result, i + 2*N);
+ CHECK(result = map.Find(p));
+ CHECK_EQ(result->first, i + 2*N);
}
CHECK_EQ(check_set.size(), 0);
-
}
for (int i = 0; i < N; ++i) {
- delete[] ptrs[i];
+ delete[] ptrs_and_sizes[i].ptr;
}
printf("PASS\n");
diff --git a/src/tests/heap-checker_unittest.cc b/src/tests/heap-checker_unittest.cc
index 5b64b8b..ac38fb0 100644
--- a/src/tests/heap-checker_unittest.cc
+++ b/src/tests/heap-checker_unittest.cc
@@ -95,6 +95,7 @@
#include <iomanip> // for hex
#include <set>
#include <map>
+#include <list>
#include <memory>
#include <vector>
#include <string>
@@ -115,6 +116,7 @@
using namespace std;
+// ========================================================================= //
// TODO(maxim): write a shell script to test that these indeed crash us
// (i.e. we do detect leaks)
@@ -156,6 +158,9 @@ DEFINE_bool(no_threads,
// This is used so we can make can_create_leaks_reliably true
// for any pthread implementation and test with that.
+DECLARE_int64(heap_check_max_pointer_offset); // heap-checker.cc
+DECLARE_string(heap_check); // in heap-checker.cc
+
#define WARN_IF(cond, msg) LOG_IF(WARNING, cond, msg)
// This is an evil macro! Be very careful using it...
@@ -164,6 +169,8 @@ DEFINE_bool(no_threads,
// This is, likewise, evil
#define LOGF VLOG(INFO)
+static void RunHeapBusyThreads(); // below
+
class Closure {
public:
@@ -233,7 +240,7 @@ static bool can_create_leaks_reliably = false;
// We use a simple allocation wrapper
// to make sure we wipe out the newly allocated objects
// in case they still happened to contain some pointer data
-// accidently left by the memory allocator.
+// accidentally left by the memory allocator.
struct Initialized { };
static Initialized initialized;
void* operator new(size_t size, const Initialized&) {
@@ -833,6 +840,8 @@ static void TestLibCAllocate() {
// Continuous random heap memory activity to try to disrupt heap checking.
static void* HeapBusyThreadBody(void* a) {
+ const int thread_num = reinterpret_cast<intptr_t>(a);
+ VLOG(0) << "A new HeapBusyThread " << thread_num;
TestLibCAllocate();
int user = 0;
@@ -878,7 +887,7 @@ static void* HeapBusyThreadBody(void* a) {
<< reinterpret_cast<void*>(
reinterpret_cast<uintptr_t>(ptr) ^ kHideMask)
<< "^" << reinterpret_cast<void*>(kHideMask);
- if (FLAGS_test_register_leak) {
+ if (FLAGS_test_register_leak && thread_num % 5 == 0) {
// Hide the register "ptr" value with an xor mask.
// If one provides --test_register_leak flag, the test should
// (with very high probability) crash on some leak check
@@ -913,7 +922,8 @@ static void* HeapBusyThreadBody(void* a) {
}
static void RunHeapBusyThreads() {
- if (FLAGS_no_threads) return;
+ KeyInit();
+ if (!FLAGS_interfering_threads || FLAGS_no_threads) return;
const int n = 17; // make many threads
@@ -923,7 +933,8 @@ static void RunHeapBusyThreads() {
// make them and let them run
for (int i = 0; i < n; ++i) {
VLOG(0) << "Creating extra thread " << i + 1;
- CHECK(pthread_create(&tid, &attr, HeapBusyThreadBody, NULL) == 0);
+ CHECK(pthread_create(&tid, &attr, HeapBusyThreadBody,
+ reinterpret_cast<void*>(i)) == 0);
}
Pause();
@@ -1038,10 +1049,62 @@ static void TestHeapLeakCheckerNamedDisabling() {
}
}
-// The code from here to main()
-// is to test that objects that are reachable from global
-// variables are not reported as leaks,
-// with the few exceptions like multiple-inherited objects.
+// ========================================================================= //
+
+// This code section is to test that objects that are reachable from global
+// variables are not reported as leaks
+// as well as that (Un)IgnoreObject work for such objects fine.
+
+// An object making functions:
+// returns a "weird" pointer to a new object for which
+// it's worth checking that the object is reachable via that pointer.
+typedef void* (*ObjMakerFunc)();
+static list<ObjMakerFunc> obj_makers; // list of registered object makers
+
+// Helper macro to register an object making function
+// 'name' is an identifier of this object maker,
+// 'body' is its function body that must declare
+// pointer 'p' to the nex object to return.
+// Usage example:
+// REGISTER_OBJ_MAKER(trivial, int* p = new(initialized) int;)
+#define REGISTER_OBJ_MAKER(name, body) \
+ void* ObjMaker_##name##_() { \
+ VLOG(1) << "Obj making " << #name; \
+ body; \
+ return p; \
+ } \
+ static ObjMakerRegistrar maker_reg_##name##__(&ObjMaker_##name##_);
+// helper class for REGISTER_OBJ_MAKER
+struct ObjMakerRegistrar {
+ ObjMakerRegistrar(ObjMakerFunc obj_maker) { obj_makers.push_back(obj_maker); }
+};
+
+// List of the objects/pointers made with all the obj_makers
+// to test reachability via global data pointers during leak checks.
+static list<void*>* live_objects = new list<void*>;
+ // pointer so that it does not get destructed on exit
+
+// Exerciser for one ObjMakerFunc.
+static void TestPointerReach(ObjMakerFunc obj_maker) {
+ HeapLeakChecker::IgnoreObject(obj_maker()); // test IgnoreObject
+
+ void* obj = obj_maker();
+ HeapLeakChecker::IgnoreObject(obj);
+ HeapLeakChecker::UnIgnoreObject(obj); // test UnIgnoreObject
+ HeapLeakChecker::IgnoreObject(obj); // not to need deletion for obj
+
+ live_objects->push_back(obj_maker()); // test reachability at leak check
+}
+
+// Test all ObjMakerFunc registred via REGISTER_OBJ_MAKER.
+static void TestObjMakers() {
+ for (list<ObjMakerFunc>::const_iterator i = obj_makers.begin();
+ i != obj_makers.end(); ++i) {
+ TestPointerReach(*i);
+ TestPointerReach(*i); // a couple more times would not hurt
+ TestPointerReach(*i);
+ }
+}
// A dummy class to mimic allocation behavior of string-s.
template<class T>
@@ -1083,22 +1146,45 @@ struct Array {
T* ptr;
};
-static Array<char>* live_leak = NULL;
-static Array<char>* live_leak2 = new(initialized) Array<char>();
-static int* live_leak3 = new(initialized) int[10];
-static const char* live_leak4 = new(initialized) char[5];
-static int data[] = { 1, 2, 3, 4, 5, 6, 7, 21, 22, 23, 24, 25, 26, 27 };
-static set<int> live_leak5(data, data+7);
-static const set<int> live_leak6(data, data+14);
-static const Array<char>* live_leak_arr1 = new(initialized) Array<char>[5];
+// to test pointers to objects, built-in arrays, string, etc:
+REGISTER_OBJ_MAKER(plain, int* p = new(initialized) int;)
+REGISTER_OBJ_MAKER(int_array_1, int* p = new(initialized) int[1];)
+REGISTER_OBJ_MAKER(int_array, int* p = new(initialized) int[10];)
+REGISTER_OBJ_MAKER(string, Array<char>* p = new(initialized) Array<char>();)
+REGISTER_OBJ_MAKER(string_array,
+ Array<char>* p = new(initialized) Array<char>[5];)
+REGISTER_OBJ_MAKER(char_array, char* p = new(initialized) char[5];)
+REGISTER_OBJ_MAKER(appended_string,
+ Array<char>* p = new Array<char>();
+ p->append(Array<char>());
+)
+REGISTER_OBJ_MAKER(plain_ptr, int** p = new(initialized) int*;)
+REGISTER_OBJ_MAKER(linking_ptr,
+ int** p = new(initialized) int*;
+ *p = new(initialized) int;
+)
+
+// small objects:
+REGISTER_OBJ_MAKER(0_sized, void* p = malloc(0);) // 0-sized object (important)
+REGISTER_OBJ_MAKER(1_sized, void* p = malloc(1);)
+REGISTER_OBJ_MAKER(2_sized, void* p = malloc(2);)
+REGISTER_OBJ_MAKER(3_sized, void* p = malloc(3);)
+REGISTER_OBJ_MAKER(4_sized, void* p = malloc(4);)
+
+static int set_data[] = { 1, 2, 3, 4, 5, 6, 7, 21, 22, 23, 24, 25, 26, 27 };
+static set<int> live_leak_set(set_data, set_data+7);
+static const set<int> live_leak_const_set(set_data, set_data+14);
+
+REGISTER_OBJ_MAKER(set,
+ set<int>* p = new(initialized) set<int>(set_data, set_data + 13);
+)
class ClassA {
public:
ClassA(int a) : ptr(NULL) { }
mutable char* ptr;
};
-
-static const ClassA live_leak7(1);
+static const ClassA live_leak_mutable(1);
template<class C>
class TClass {
@@ -1107,13 +1193,12 @@ class TClass {
mutable C val;
mutable C* ptr;
};
-
-static const TClass<Array<char> > live_leak8(1);
+static const TClass<Array<char> > live_leak_templ_mutable(1);
class ClassB {
public:
ClassB() { }
- int b[10];
+ char b[7];
virtual void f() { }
virtual ~ClassB() { }
};
@@ -1121,102 +1206,140 @@ class ClassB {
class ClassB2 {
public:
ClassB2() { }
- int b2[10];
+ char b2[11];
virtual void f2() { }
virtual ~ClassB2() { }
};
class ClassD1 : public ClassB {
- int d1[10];
+ char d1[15];
virtual void f() { }
};
class ClassD2 : public ClassB2 {
- int d2[10];
+ char d2[19];
virtual void f2() { }
};
class ClassD : public ClassD1, public ClassD2 {
- int d[10];
+ char d[3];
virtual void f() { }
virtual void f2() { }
};
-static ClassB* live_leak_b;
-static ClassD1* live_leak_d1;
-static ClassD2* live_leak_d2;
-static ClassD* live_leak_d;
+// to test pointers to objects of base subclasses:
-static ClassB* live_leak_b_d1;
-static ClassB2* live_leak_b2_d2;
-static ClassB* live_leak_b_d;
-static ClassB2* live_leak_b2_d;
+REGISTER_OBJ_MAKER(B, ClassB* p = new(initialized) ClassB;)
+REGISTER_OBJ_MAKER(D1, ClassD1* p = new(initialized) ClassD1;)
+REGISTER_OBJ_MAKER(D2, ClassD2* p = new(initialized) ClassD2;)
+REGISTER_OBJ_MAKER(D, ClassD* p = new(initialized) ClassD;)
-static ClassD1* live_leak_d1_d;
-static ClassD2* live_leak_d2_d;
+REGISTER_OBJ_MAKER(D1_as_B, ClassB* p = new(initialized) ClassD1;)
+REGISTER_OBJ_MAKER(D2_as_B2, ClassB2* p = new(initialized) ClassD2;)
+REGISTER_OBJ_MAKER(D_as_B, ClassB* p = new(initialized) ClassD;)
+REGISTER_OBJ_MAKER(D_as_D1, ClassD1* p = new(initialized) ClassD;)
+// inside-object pointers:
+REGISTER_OBJ_MAKER(D_as_B2, ClassB2* p = new(initialized) ClassD;)
+REGISTER_OBJ_MAKER(D_as_D2, ClassD2* p = new(initialized) ClassD;)
-// A dummy string class mimics certain third party string
-// implementations, which store a refcount in the first
-// few bytes and keeps a pointer pointing behind the refcount.
-template <typename R>
-class RefcountStr {
+class InterfaceA {
public:
- RefcountStr() {
- ptr = new char[sizeof(R) * 10];
- ptr = ptr + sizeof(R);
- }
- char* ptr;
+ virtual void A() = 0;
+ virtual ~InterfaceA() { }
+ protected:
+ InterfaceA() { }
};
-// E.g., mimics UnicodeString defined in third_party/icu.
-static const RefcountStr<uint32> live_leak_refcountstr_4;
-
-// have leaks but ignore the leaked objects
-static void IgnoredLeaks() {
- int* p = new(initialized) int[1];
- HeapLeakChecker::IgnoreObject(p);
- int** leak = new(initialized) int*;
- HeapLeakChecker::IgnoreObject(leak);
- *leak = new(initialized) int;
- HeapLeakChecker::UnIgnoreObject(p);
- delete [] p;
-}
+
+class InterfaceB {
+ public:
+ virtual void B() = 0;
+ virtual ~InterfaceB() { }
+ protected:
+ InterfaceB() { }
+};
+
+class InterfaceC : public InterfaceA {
+ public:
+ virtual void C() = 0;
+ virtual ~InterfaceC() { }
+ protected:
+ InterfaceC() { }
+};
+
+class ClassMltD1 : public ClassB, public InterfaceB, public InterfaceC {
+ public:
+ char d1[11];
+ virtual void f() { }
+ virtual void A() { }
+ virtual void B() { }
+ virtual void C() { }
+};
+
+class ClassMltD2 : public InterfaceA, public InterfaceB, public ClassB {
+ public:
+ char d2[15];
+ virtual void f() { }
+ virtual void A() { }
+ virtual void B() { }
+};
+
+// to specifically test heap reachability under
+// inerface-only multiple inheritance (some use inside-object pointers):
+REGISTER_OBJ_MAKER(MltD1, ClassMltD1* p = new (initialized) ClassMltD1;)
+REGISTER_OBJ_MAKER(MltD1_as_B, ClassB* p = new (initialized) ClassMltD1;)
+REGISTER_OBJ_MAKER(MltD1_as_IA, InterfaceA* p = new (initialized) ClassMltD1;)
+REGISTER_OBJ_MAKER(MltD1_as_IB, InterfaceB* p = new (initialized) ClassMltD1;)
+REGISTER_OBJ_MAKER(MltD1_as_IC, InterfaceC* p = new (initialized) ClassMltD1;)
+
+REGISTER_OBJ_MAKER(MltD2, ClassMltD2* p = new (initialized) ClassMltD2;)
+REGISTER_OBJ_MAKER(MltD2_as_B, ClassB* p = new (initialized) ClassMltD2;)
+REGISTER_OBJ_MAKER(MltD2_as_IA, InterfaceA* p = new (initialized) ClassMltD2;)
+REGISTER_OBJ_MAKER(MltD2_as_IB, InterfaceB* p = new (initialized) ClassMltD2;)
+
+// to mimic UnicodeString defined in third_party/icu,
+// which store a platform-independent-sized refcount in the first
+// few bytes and keeps a pointer pointing behind the refcount.
+REGISTER_OBJ_MAKER(unicode_string,
+ char* p = new char[sizeof(uint32) * 10];
+ p += sizeof(uint32);
+)
+// similar, but for platform-dependent-sized refcount
+REGISTER_OBJ_MAKER(ref_counted,
+ char* p = new char[sizeof(int) * 20];
+ p += sizeof(int);
+)
+
+struct Nesting {
+ struct Inner {
+ Nesting* parent;
+ Inner(Nesting* p) : parent(p) {}
+ };
+ Inner i0;
+ char n1[5];
+ Inner i1;
+ char n2[11];
+ Inner i2;
+ char n3[27];
+ Inner i3;
+ Nesting() : i0(this), i1(this), i2(this), i3(this) {}
+};
+
+// to test inside-object pointers pointing at objects nested into heap objects:
+REGISTER_OBJ_MAKER(nesting_i0, Nesting::Inner* p = &((new Nesting())->i0);)
+REGISTER_OBJ_MAKER(nesting_i1, Nesting::Inner* p = &((new Nesting())->i1);)
+REGISTER_OBJ_MAKER(nesting_i2, Nesting::Inner* p = &((new Nesting())->i2);)
+REGISTER_OBJ_MAKER(nesting_i3, Nesting::Inner* p = &((new Nesting())->i3);)
// allocate many objects reachable from global data
static void TestHeapLeakCheckerLiveness() {
- live_leak_b = new(initialized) ClassB;
- live_leak_d1 = new(initialized) ClassD1;
- live_leak_d2 = new(initialized) ClassD2;
- live_leak_d = new(initialized) ClassD;
-
- live_leak_b_d1 = new(initialized) ClassD1;
- live_leak_b2_d2 = new(initialized) ClassD2;
- live_leak_b_d = new(initialized) ClassD;
- live_leak_b2_d = new(initialized) ClassD;
-
- live_leak_d1_d = new(initialized) ClassD;
- live_leak_d2_d = new(initialized) ClassD;
-
- HeapLeakChecker::IgnoreObject((ClassD*)live_leak_b2_d);
- HeapLeakChecker::IgnoreObject((ClassD*)live_leak_d2_d);
- // These two do not get deleted with liveness flood
- // because the base class pointer points inside of the objects
- // in such cases of multiple inheritance.
- // Luckily google code does not use multiple inheritance almost at all.
-
- live_leak = new(initialized) Array<char>();
- delete [] live_leak3;
- live_leak3 = new(initialized) int[33];
- live_leak2->append(*live_leak);
- live_leak7.ptr = new(initialized) char[77];
- live_leak8.ptr = new(initialized) Array<char>();
- live_leak8.val = Array<char>();
-
- IgnoredLeaks();
- IgnoredLeaks();
- IgnoredLeaks();
+ live_leak_mutable.ptr = new(initialized) char[77];
+ live_leak_templ_mutable.ptr = new(initialized) Array<char>();
+ live_leak_templ_mutable.val = Array<char>();
+
+ TestObjMakers();
}
-DECLARE_string(heap_check);
+// ========================================================================= //
// Get address (PC value) following the mmap call into addr_after_mmap_call
static void* Mmapper(uintptr_t* addr_after_mmap_call) {
@@ -1294,6 +1417,8 @@ extern void VerifyHeapProfileTableStackGet() {
free(addr);
}
+// ========================================================================= //
+
// Helper to do 'return 0;' inside main(): insted we do 'return Pass();'
static int Pass() {
fprintf(stdout, "PASS\n");
@@ -1362,6 +1487,13 @@ int main(int argc, char** argv) {
// (when !FLAGS_test_cancel_global_check)
}
+ if (FLAGS_test_register_leak) {
+ // make us fail only where the .sh test expects:
+ Pause();
+ for (int i = 0; i < 20; ++i) CHECK(HeapLeakChecker::NoGlobalLeaks());
+ return Pass();
+ }
+
TestHeapLeakCheckerLiveness();
HeapLeakChecker heap_check("all");
@@ -1441,10 +1573,6 @@ int main(int argc, char** argv) {
range_disable_named = true;
ThreadNamedDisabledLeaks();
- HeapLeakChecker::IgnoreObject(new(initialized) set<int>(data, data + 13));
- // This checks both that IgnoreObject works, and
- // and the fact that we don't drop such leaks as live for some reason.
-
CHECK(HeapLeakChecker::NoGlobalLeaks()); // so far, so good
return Pass();
diff --git a/src/tests/profiler_unittest.cc b/src/tests/profiler_unittest.cc
index e5729a1..1063751 100644
--- a/src/tests/profiler_unittest.cc
+++ b/src/tests/profiler_unittest.cc
@@ -42,7 +42,7 @@
#endif
#include <sys/wait.h> // for wait()
#include "google/profiler.h"
-#include "base/mutex.h"
+#include "base/simple_mutex.h"
#include "tests/testutil.h"
static int result = 0;
diff --git a/src/tests/stacktrace_unittest.cc b/src/tests/stacktrace_unittest.cc
index 710743d..6bd28a2 100644
--- a/src/tests/stacktrace_unittest.cc
+++ b/src/tests/stacktrace_unittest.cc
@@ -39,7 +39,7 @@
// Obtain a backtrace, verify that the expected callers are present in the
-// backtrace, and maybe print the backtrace to stdout.
+// backtrace, and maybe print the backtrace to stdout.
//-----------------------------------------------------------------------//
void CheckStackTraceLeaf();
@@ -53,7 +53,7 @@ void CheckStackTrace(int i);
// The sequence of functions whose return addresses we expect to see in the
// backtrace.
const int BACKTRACE_STEPS = 6;
-void * expected_stack[BACKTRACE_STEPS] = {
+void * expected_stack[BACKTRACE_STEPS] = {
(void *) &CheckStackTraceLeaf,
(void *) &CheckStackTrace4,
(void *) &CheckStackTrace3,
@@ -81,11 +81,12 @@ void * expected_stack[BACKTRACE_STEPS] = {
//-----------------------------------------------------------------------//
+const int kMaxFnLen = 0x40; // assume relevant functions are only this long
+
void CheckRetAddrIsInFunction( void * ret_addr, void * function_start_addr)
{
- const int typ_fn_len = 0x40; // assume relevant functions are only 0x40 bytes long
CHECK_GE(ret_addr, function_start_addr);
- CHECK_LE(ret_addr, (void *) ((char *) function_start_addr + typ_fn_len));
+ CHECK_LE(ret_addr, (void *) ((char *) function_start_addr + kMaxFnLen));
}
//-----------------------------------------------------------------------//
@@ -111,7 +112,11 @@ void CheckStackTraceLeaf(void) {
#endif
for (int i = 0; i < BACKTRACE_STEPS; i++) {
+ printf("Backtrace %d: expected: %p..%p actual: %p ... ",
+ i, expected_stack[i],
+ reinterpret_cast<char*>(expected_stack[i]) + kMaxFnLen, stack[i]);
CheckRetAddrIsInFunction(stack[i], expected_stack[i]);
+ printf("OK\n");
}
}
diff --git a/src/tests/tcmalloc_unittest.cc b/src/tests/tcmalloc_unittest.cc
index e818a21..d3df596 100644
--- a/src/tests/tcmalloc_unittest.cc
+++ b/src/tests/tcmalloc_unittest.cc
@@ -76,7 +76,7 @@
#include <string>
#include <new>
#include "base/logging.h"
-#include "base/mutex.h"
+#include "base/simple_mutex.h"
#include "google/malloc_hook.h"
#include "google/malloc_extension.h"
#include "tests/testutil.h"
diff --git a/src/windows/preamble_patcher.h b/src/windows/preamble_patcher.h
index cc69d72..dc6d4db 100644
--- a/src/windows/preamble_patcher.h
+++ b/src/windows/preamble_patcher.h
@@ -65,6 +65,9 @@ enum SideStepError {
SIDESTEP_UNEXPECTED,
};
+#define SIDESTEP_TO_HRESULT(error) \
+ MAKE_HRESULT(SEVERITY_ERROR, FACILITY_NULL, error)
+
// Implements a patching mechanism that overwrites the first few bytes of
// a function preamble with a jump to our hook function, which is then
// able to call the original function via a specially-made preamble-stub