summaryrefslogtreecommitdiff
path: root/Makefile.am
diff options
context:
space:
mode:
authorAliaksey Kandratsenka <alk@tut.by>2014-09-07 13:09:14 -0700
committerAliaksey Kandratsenka <alk@tut.by>2014-11-02 18:29:55 -0800
commit1108d83cf4a1692fce3b736e16d3e98c33329177 (patch)
tree9448adf78207da3b9007bbf5de0793d64fa8851a /Makefile.am
parent714bd93e42535e759716324a90fbb395506499d2 (diff)
downloadgperftools-1108d83cf4a1692fce3b736e16d3e98c33329177.tar.gz
implemented cpu-profiling mode that profiles threads separately
Default mode of operation of cpu profiler uses itimer and SIGPROF. This timer is by definition per-process and no spec defines which thread is going to receive SIGPROF. And it provides correct profiles only if we assume that probability of picking threads will be proportional to cpu time spent by threads. It is easy to see, that recent Linux (at least on common SMP hardware) doesn't satisfy that assumption. Quite big skews of SIGPROF ticks between threads is visible. I.e. I could see as big as 70%/20% division instead of 50%/50% for pair of cpu-hog threads. (And I do see it become 50/50 with new mode) Fortunately POSIX provides mechanism to track per-thread cpu time via posix timers facility. And even more fortunately, Linux also provides mechanism to deliver timer ticks to specific threads. Interestingly, it looks like FreeBSD also has very similar facility and seems to suffer from same skew. But due to difference in a way how threads are identified, I haven't bothered to try to support this mode on FreeBSD. This commit implements new profiling mode where every thread creates posix timer which tracks thread's cpu time. Threads also also set up signal delivery to itself on overflows of that timer. This new mode requires every thread to be registered in cpu profiler. Existing ProfilerRegisterThread function is used for that. Because registering threads requires application support (or suitable LD_PRELOAD-able wrapper for thread creation API), new mode is off by default. And it has to be manually activated by setting environment variable CPUPROFILE_PER_THREAD_TIMERS. New mode also requires librt symbols to be available. Which we do not link to due to librt's dependency on libpthread. Which we avoid due to perf impact of bringing in libpthread to otherwise single-threaded programs. So it has to be either already loaded by profiling program or LD_PRELOAD-ed.
Diffstat (limited to 'Makefile.am')
-rwxr-xr-xMakefile.am23
1 files changed, 11 insertions, 12 deletions
diff --git a/Makefile.am b/Makefile.am
index 25618b7..d163435 100755
--- a/Makefile.am
+++ b/Makefile.am
@@ -203,6 +203,10 @@ libsysinfo_la_SOURCES = src/base/sysinfo.cc \
$(SYSINFO_INCLUDES)
libsysinfo_la_LIBADD = $(NANOSLEEP_LIBS)
+noinst_LTLIBRARIES += libmaybe_threads.la
+# .cc is conditionally added below
+libmaybe_threads_la_SOURCES = src/maybe_threads.h
+
# For MinGW, we use also have to use libwindows Luckily, we need the
# windows.a library in exactly the same place we need spinlock.a
# (pretty much everywhere), so we can use the same variable name for
@@ -271,9 +275,7 @@ libsysinfo_la_LIBADD += -lshlwapi
TCMALLOC_CC =
# windows has its own system for threads and system memory allocation.
if HAVE_PTHREAD_DESPITE_ASKING_FOR
-MAYBE_THREADS_CC = src/maybe_threads.cc
-else
-MAYBE_THREADS_CC =
+libmaybe_threads_la_SOURCES += src/maybe_threads.cc
endif
SYSTEM_ALLOC_CC =
else !MINGW
@@ -296,7 +298,7 @@ libspinlock_la_LIBADD = $(NANOSLEEP_LIBS)
LIBSPINLOCK = libspinlock.la libsysinfo.la liblogging.la
TCMALLOC_CC = src/tcmalloc.cc
-MAYBE_THREADS_CC = src/maybe_threads.cc
+libmaybe_threads_la_SOURCES += src/maybe_threads.cc
SYSTEM_ALLOC_CC = src/system-alloc.cc
endif !MINGW
@@ -322,13 +324,12 @@ LOW_LEVEL_ALLOC_UNITTEST_INCLUDES = src/base/low_level_alloc.h \
$(LOGGING_INCLUDES)
low_level_alloc_unittest_SOURCES = src/base/low_level_alloc.cc \
src/malloc_hook.cc \
- $(MAYBE_THREADS_CC) \
src/tests/low_level_alloc_unittest.cc \
$(LOW_LEVEL_ALLOC_UNITTEST_INCLUDES)
# By default, MallocHook takes stack traces for use by the heap-checker.
# We don't need that functionality here, so we turn it off to reduce deps.
low_level_alloc_unittest_CXXFLAGS = -DNO_TCMALLOC_SAMPLES
-low_level_alloc_unittest_LDADD = $(LIBSPINLOCK)
+low_level_alloc_unittest_LDADD = $(LIBSPINLOCK) libmaybe_threads.la
TESTS += atomicops_unittest
ATOMICOPS_UNITTEST_INCLUDES = src/base/atomicops.h \
@@ -447,8 +448,7 @@ S_TCMALLOC_MINIMAL_INCLUDES = src/common.h \
src/base/thread_annotations.h \
src/malloc_hook-inl.h \
src/malloc_hook_mmap_linux.h \
- src/malloc_hook_mmap_freebsd.h \
- src/maybe_threads.h
+ src/malloc_hook_mmap_freebsd.h
SG_TCMALLOC_MINIMAL_INCLUDES = src/gperftools/malloc_hook.h \
src/gperftools/malloc_hook_c.h \
src/gperftools/malloc_extension.h \
@@ -477,7 +477,6 @@ libtcmalloc_minimal_internal_la_SOURCES = src/common.cc \
src/thread_cache.cc \
src/malloc_hook.cc \
src/malloc_extension.cc \
- $(MAYBE_THREADS_CC) \
$(TCMALLOC_MINIMAL_INCLUDES)
# We #define NO_TCMALLOC_SAMPLES, since sampling is turned off for _minimal.
libtcmalloc_minimal_internal_la_CXXFLAGS = -DNO_TCMALLOC_SAMPLES \
@@ -485,7 +484,7 @@ libtcmalloc_minimal_internal_la_CXXFLAGS = -DNO_TCMALLOC_SAMPLES \
$(PTHREAD_CFLAGS) -DNDEBUG \
$(AM_CXXFLAGS) $(NO_EXCEPTIONS)
libtcmalloc_minimal_internal_la_LDFLAGS = $(PTHREAD_CFLAGS) $(AM_LDFLAGS)
-libtcmalloc_minimal_internal_la_LIBADD = $(PTHREAD_LIBS) $(LIBSPINLOCK)
+libtcmalloc_minimal_internal_la_LIBADD = $(PTHREAD_LIBS) $(LIBSPINLOCK) libmaybe_threads.la
lib_LTLIBRARIES += libtcmalloc_minimal.la
WINDOWS_PROJECTS += vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj
@@ -898,7 +897,7 @@ lib_LTLIBRARIES += libtcmalloc.la
libtcmalloc_la_SOURCES = $(TCMALLOC_CC) $(TCMALLOC_INCLUDES)
libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS)
libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@
-libtcmalloc_la_LIBADD = libtcmalloc_internal.la $(PTHREAD_LIBS)
+libtcmalloc_la_LIBADD = libtcmalloc_internal.la libmaybe_threads.la $(PTHREAD_LIBS)
if WITH_HEAP_CHECKER
# heap-checker-bcad is last, in hopes its global ctor will run first.
@@ -1238,7 +1237,7 @@ libprofiler_la_SOURCES = src/profiler.cc \
src/profile-handler.cc \
src/profiledata.cc \
$(CPU_PROFILER_INCLUDES)
-libprofiler_la_LIBADD = libstacktrace.la
+libprofiler_la_LIBADD = libstacktrace.la libmaybe_threads.la
# We have to include ProfileData for profiledata_unittest
CPU_PROFILER_SYMBOLS = '(ProfilerStart|ProfilerStartWithOptions|ProfilerStop|ProfilerFlush|ProfilerEnable|ProfilerDisable|ProfilingIsEnabledForAllThreads|ProfilerRegisterThread|ProfilerGetCurrentState|ProfilerState|ProfileData|ProfileHandler)'
libprofiler_la_LDFLAGS = -export-symbols-regex $(CPU_PROFILER_SYMBOLS) \