summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorcsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2010-11-18 01:07:25 +0000
committercsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2010-11-18 01:07:25 +0000
commit3014cf142e5a2409c88ab4559f3274434ed9a29b (patch)
tree13c2dc85e8800116c4c7d3ef5f27ea0edc37d20f /src
parent682ff7da1205398376ee725b4ce3219c107b3f8a (diff)
downloadgperftools-3014cf142e5a2409c88ab4559f3274434ed9a29b.tar.gz
* Suppress all large allocs when report threshold==0
* Clarified meaning of various malloc stats * Change from ATTRIBUTED_DEPRECATED to comments * Make array-size a var to compile under clang * Reduce page map key size under x86_64 by 4.4MB * Added full qualification to MemoryBarrier * Support systems that capitalize /proc weirdly * Avoid gcc warning: exporting type in unnamed ns * Add some dynamic annotations for gcc attributes * Add support for census profiler in pprof * Speed up pprof's ExtractSymbols * Speed up GoogleOnce * Add pkg-config (.pc) files * Detect when __environ exists but is NULL * Improve spinlock contention performance * Add GetFreeListSizes * Improve sampling_test, eg by adding no-inline * Relax malloc_extension test-check for big pages * Add proper library version number information * Update from autoconf 2.64 to 2.65 * Better document how to write a server that works with pprof * Change FillProcSelfMaps to better handle out-of-space * No longer hook _aligned_malloc/free in windows * Handle function-forwarding in DLLs when patching (in windows) * Update .vcproj files that had wrong .cc files in them (!) * get rid of unnecessary 'size < 0' * fix comments a bit in sysinfo.cc * another go at improving malloc-stats output * fix comment typo in profiler.cc * Add a few more thread annotations * Try to read TSC frequency from 'tsc_freq_khz' * Fix annotalysis/TSAN incompatibility * Add pprof --evince to go along with --gv * Document need for sampling to use GetHeapSample * Fix flakiness in malloc_extension_test * Separate out synchronization profiling routines git-svn-id: http://gperftools.googlecode.com/svn/trunk@99 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
Diffstat (limited to 'src')
-rw-r--r--src/base/atomicops-internals-arm-gcc.h234
-rw-r--r--src/base/basictypes.h2
-rw-r--r--src/base/dynamic_annotations.h117
-rw-r--r--src/base/logging.h5
-rw-r--r--src/base/low_level_alloc.cc6
-rw-r--r--src/base/spinlock.cc166
-rw-r--r--src/base/spinlock.h65
-rw-r--r--src/base/spinlock_internal.cc77
-rw-r--r--src/base/spinlock_internal.h64
-rw-r--r--src/base/spinlock_linux-inl.h52
-rw-r--r--src/base/spinlock_posix-inl.h32
-rw-r--r--src/base/spinlock_win32-inl.h18
-rw-r--r--src/base/synchronization_profiling.h50
-rw-r--r--src/base/sysinfo.cc107
-rw-r--r--src/base/sysinfo.h2
-rw-r--r--src/base/thread_annotations.h4
-rw-r--r--src/base/vdso_support.cc2
-rw-r--r--src/common.h13
-rw-r--r--src/config.h.in7
-rw-r--r--src/debugallocation.cc16
-rw-r--r--src/google/heap-checker.h2
-rw-r--r--src/google/malloc_extension.h47
-rw-r--r--src/heap-profile-table.cc3
-rw-r--r--src/malloc_extension.cc12
-rw-r--r--src/memory_region_map.cc1
-rw-r--r--src/page_heap.cc29
-rw-r--r--src/page_heap.h15
-rwxr-xr-xsrc/pprof85
-rw-r--r--src/profiler.cc2
-rw-r--r--src/system-alloc.cc30
-rw-r--r--src/tcmalloc.cc190
-rw-r--r--src/tests/debugallocation_test.cc10
-rw-r--r--src/tests/malloc_extension_test.cc26
-rw-r--r--src/tests/sampling_test.cc2
-rwxr-xr-xsrc/tests/sampling_test.sh4
-rw-r--r--src/tests/system-alloc_unittest.cc14
-rw-r--r--src/tests/tcmalloc_unittest.cc23
-rw-r--r--src/windows/config.h2
-rw-r--r--src/windows/patch_functions.cc46
-rw-r--r--src/windows/port.cc12
-rw-r--r--src/windows/port.h2
41 files changed, 1299 insertions, 297 deletions
diff --git a/src/base/atomicops-internals-arm-gcc.h b/src/base/atomicops-internals-arm-gcc.h
new file mode 100644
index 0000000..423e993
--- /dev/null
+++ b/src/base/atomicops-internals-arm-gcc.h
@@ -0,0 +1,234 @@
+/* Copyright (c) 2010, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Lei Zhang, Sasha Levitskiy
+ */
+
+// This file is an internal atomic implementation, use base/atomicops.h instead.
+//
+// LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears.
+
+#ifndef BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
+#define BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
+
+#include <stdio.h>
+#include "base/basictypes.h" // For COMPILE_ASSERT
+
+typedef int32_t Atomic32;
+
+namespace base {
+namespace subtle {
+
+typedef int64_t Atomic64;
+
+// 0xffff0fc0 is the hard coded address of a function provided by
+// the kernel which implements an atomic compare-exchange. On older
+// ARM architecture revisions (pre-v6) this may be implemented using
+// a syscall. This address is stable, and in active use (hard coded)
+// by at least glibc-2.7 and the Android C library.
+// pLinuxKernelCmpxchg has both acquire and release barrier sematincs.
+typedef Atomic32 (*LinuxKernelCmpxchgFunc)(Atomic32 old_value,
+ Atomic32 new_value,
+ volatile Atomic32* ptr);
+LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg __attribute__((weak)) =
+ (LinuxKernelCmpxchgFunc) 0xffff0fc0;
+
+typedef void (*LinuxKernelMemoryBarrierFunc)(void);
+LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier __attribute__((weak)) =
+ (LinuxKernelMemoryBarrierFunc) 0xffff0fa0;
+
+
+inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
+ Atomic32 old_value,
+ Atomic32 new_value) {
+ Atomic32 prev_value = *ptr;
+ do {
+ if (!pLinuxKernelCmpxchg(old_value, new_value,
+ const_cast<Atomic32*>(ptr))) {
+ return old_value;
+ }
+ prev_value = *ptr;
+ } while (prev_value == old_value);
+ return prev_value;
+}
+
+inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
+ Atomic32 new_value) {
+ Atomic32 old_value;
+ do {
+ old_value = *ptr;
+ } while (pLinuxKernelCmpxchg(old_value, new_value,
+ const_cast<Atomic32*>(ptr)));
+ return old_value;
+}
+
+inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
+ Atomic32 increment) {
+ for (;;) {
+ // Atomic exchange the old value with an incremented one.
+ Atomic32 old_value = *ptr;
+ Atomic32 new_value = old_value + increment;
+ if (pLinuxKernelCmpxchg(old_value, new_value,
+ const_cast<Atomic32*>(ptr)) == 0) {
+ // The exchange took place as expected.
+ return new_value;
+ }
+ // Otherwise, *ptr changed mid-loop and we need to retry.
+ }
+}
+
+inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
+ Atomic32 increment) {
+ return Barrier_AtomicIncrement(ptr, increment);
+}
+
+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
+ Atomic32 old_value,
+ Atomic32 new_value) {
+ return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+}
+
+inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
+ Atomic32 old_value,
+ Atomic32 new_value) {
+ return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+}
+
+inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
+ *ptr = value;
+}
+
+inline void MemoryBarrier() {
+ pLinuxKernelMemoryBarrier();
+}
+
+inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
+ *ptr = value;
+ MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
+ MemoryBarrier();
+ *ptr = value;
+}
+
+inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
+ return *ptr;
+}
+
+inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
+ Atomic32 value = *ptr;
+ MemoryBarrier();
+ return value;
+}
+
+inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
+ MemoryBarrier();
+ return *ptr;
+}
+
+
+// 64-bit versions are not implemented yet.
+
+inline void NotImplementedFatalError(const char *function_name) {
+ fprintf(stderr, "64-bit %s() not implemented on this platform\n",
+ function_name);
+ abort();
+}
+
+inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
+ Atomic64 old_value,
+ Atomic64 new_value) {
+ NotImplementedFatalError("NoBarrier_CompareAndSwap");
+ return 0;
+}
+
+inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_value) {
+ NotImplementedFatalError("NoBarrier_AtomicExchange");
+ return 0;
+}
+
+inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
+ Atomic64 increment) {
+ NotImplementedFatalError("NoBarrier_AtomicIncrement");
+ return 0;
+}
+
+inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
+ Atomic64 increment) {
+ NotImplementedFatalError("Barrier_AtomicIncrement");
+ return 0;
+}
+
+inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
+ NotImplementedFatalError("NoBarrier_Store");
+}
+
+inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
+ NoBarrier_AtomicExchange(ptr, value);
+ // acts as a barrier in this implementation
+}
+
+inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
+ NotImplementedFatalError("Release_Store");
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
+ NotImplementedFatalError("NoBarrier_Load");
+ return 0;
+}
+
+inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
+ Atomic64 value = NoBarrier_Load(ptr);
+ return value;
+}
+
+inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
+ MemoryBarrier();
+ return NoBarrier_Load(ptr);
+}
+
+inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
+ Atomic64 old_value,
+ Atomic64 new_value) {
+ return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+}
+
+inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
+ Atomic64 old_value,
+ Atomic64 new_value) {
+ return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+}
+
+} // namespace base::subtle
+} // namespace base
+
+#endif // BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
diff --git a/src/base/basictypes.h b/src/base/basictypes.h
index ab9cdab..0f21fca 100644
--- a/src/base/basictypes.h
+++ b/src/base/basictypes.h
@@ -109,7 +109,7 @@ const int64 kint64min = ( ((( int64) kint32min) << 32) | 0 );
// Also allow for printing of a pthread_t.
#define GPRIuPTHREAD "lu"
#define GPRIxPTHREAD "lx"
-#if defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__APPLE__)
+#if defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__APPLE__) || defined(__FreeBSD__)
#define PRINTABLE_PTHREAD(pthreadt) reinterpret_cast<uintptr_t>(pthreadt)
#else
#define PRINTABLE_PTHREAD(pthreadt) pthreadt
diff --git a/src/base/dynamic_annotations.h b/src/base/dynamic_annotations.h
index 10642fd..6283f7e 100644
--- a/src/base/dynamic_annotations.h
+++ b/src/base/dynamic_annotations.h
@@ -370,6 +370,41 @@
#endif /* DYNAMIC_ANNOTATIONS_ENABLED */
+/* Macro definitions for GCC attributes that allow static thread safety
+ analysis to recognize and use some of the dynamic annotations as
+ escape hatches.
+ TODO(lcwu): remove the check for __SUPPORT_DYN_ANNOTATION__ once the
+ default crosstool/GCC supports these GCC attributes. */
+
+#define ANNOTALYSIS_STATIC_INLINE
+#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY ;
+
+#if defined(__GNUC__) && defined(__SUPPORT_TS_ANNOTATION__) \
+ && (!defined(SWIG)) && defined(__SUPPORT_DYN_ANNOTATION__)
+
+#if DYNAMIC_ANNOTATIONS_ENABLED == 0
+#define ANNOTALYSIS_ONLY 1
+#undef ANNOTALYSIS_STATIC_INLINE
+#define ANNOTALYSIS_STATIC_INLINE static inline
+#undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
+#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY {}
+#endif
+#define ANNOTALYSIS_IGNORE_READS_BEGIN __attribute__ ((ignore_reads_begin))
+#define ANNOTALYSIS_IGNORE_READS_END __attribute__ ((ignore_reads_end))
+#define ANNOTALYSIS_IGNORE_WRITES_BEGIN __attribute__ ((ignore_writes_begin))
+#define ANNOTALYSIS_IGNORE_WRITES_END __attribute__ ((ignore_writes_end))
+#define ANNOTALYSIS_UNPROTECTED_READ __attribute__ ((unprotected_read))
+
+#else
+
+#define ANNOTALYSIS_IGNORE_READS_BEGIN
+#define ANNOTALYSIS_IGNORE_READS_END
+#define ANNOTALYSIS_IGNORE_WRITES_BEGIN
+#define ANNOTALYSIS_IGNORE_WRITES_END
+#define ANNOTALYSIS_UNPROTECTED_READ
+
+#endif
+
/* Use the macros above rather than using these functions directly. */
#ifdef __cplusplus
extern "C" {
@@ -431,10 +466,18 @@ void AnnotateTraceMemory(const char *file, int line,
const volatile void *arg);
void AnnotateThreadName(const char *file, int line,
const char *name);
-void AnnotateIgnoreReadsBegin(const char *file, int line);
-void AnnotateIgnoreReadsEnd(const char *file, int line);
-void AnnotateIgnoreWritesBegin(const char *file, int line);
-void AnnotateIgnoreWritesEnd(const char *file, int line);
+ANNOTALYSIS_STATIC_INLINE
+void AnnotateIgnoreReadsBegin(const char *file, int line)
+ ANNOTALYSIS_IGNORE_READS_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
+ANNOTALYSIS_STATIC_INLINE
+void AnnotateIgnoreReadsEnd(const char *file, int line)
+ ANNOTALYSIS_IGNORE_READS_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
+ANNOTALYSIS_STATIC_INLINE
+void AnnotateIgnoreWritesBegin(const char *file, int line)
+ ANNOTALYSIS_IGNORE_WRITES_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
+ANNOTALYSIS_STATIC_INLINE
+void AnnotateIgnoreWritesEnd(const char *file, int line)
+ ANNOTALYSIS_IGNORE_WRITES_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
void AnnotateEnableRaceDetection(const char *file, int line, int enable);
void AnnotateNoOp(const char *file, int line,
const volatile void *arg);
@@ -485,7 +528,8 @@ double ValgrindSlowdown(void);
one can use
... = ANNOTATE_UNPROTECTED_READ(x); */
template <class T>
- inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) {
+ inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x)
+ ANNOTALYSIS_UNPROTECTED_READ {
ANNOTATE_IGNORE_READS_BEGIN();
T res = x;
ANNOTATE_IGNORE_READS_END();
@@ -511,4 +555,67 @@ double ValgrindSlowdown(void);
#endif /* DYNAMIC_ANNOTATIONS_ENABLED */
+/* Annotalysis, a GCC based static analyzer, is able to understand and use
+ some of the dynamic annotations defined in this file. However, dynamic
+ annotations are usually disabled in the opt mode (to avoid additional
+ runtime overheads) while Annotalysis only works in the opt mode.
+ In order for Annotalysis to use these dynamic annotations when they
+ are disabled, we re-define these annotations here. Note that unlike the
+ original macro definitions above, these macros are expanded to calls to
+ static inline functions so that the compiler will be able to remove the
+ calls after the analysis. */
+
+#ifdef ANNOTALYSIS_ONLY
+
+ #undef ANNOTALYSIS_ONLY
+
+ /* Undefine and re-define the macros that the static analyzer understands. */
+ #undef ANNOTATE_IGNORE_READS_BEGIN
+ #define ANNOTATE_IGNORE_READS_BEGIN() \
+ AnnotateIgnoreReadsBegin(__FILE__, __LINE__)
+
+ #undef ANNOTATE_IGNORE_READS_END
+ #define ANNOTATE_IGNORE_READS_END() \
+ AnnotateIgnoreReadsEnd(__FILE__, __LINE__)
+
+ #undef ANNOTATE_IGNORE_WRITES_BEGIN
+ #define ANNOTATE_IGNORE_WRITES_BEGIN() \
+ AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
+
+ #undef ANNOTATE_IGNORE_WRITES_END
+ #define ANNOTATE_IGNORE_WRITES_END() \
+ AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
+
+ #undef ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN
+ #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \
+ do { \
+ ANNOTATE_IGNORE_READS_BEGIN(); \
+ ANNOTATE_IGNORE_WRITES_BEGIN(); \
+ }while(0) \
+
+ #undef ANNOTATE_IGNORE_READS_AND_WRITES_END
+ #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \
+ do { \
+ ANNOTATE_IGNORE_WRITES_END(); \
+ ANNOTATE_IGNORE_READS_END(); \
+ }while(0) \
+
+ #if defined(__cplusplus)
+ #undef ANNOTATE_UNPROTECTED_READ
+ template <class T>
+ inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x)
+ __attribute__ ((unprotected_read)) {
+ ANNOTATE_IGNORE_READS_BEGIN();
+ T res = x;
+ ANNOTATE_IGNORE_READS_END();
+ return res;
+ }
+ #endif /* __cplusplus */
+
+#endif /* ANNOTALYSIS_ONLY */
+
+/* Undefine the macros intended only in this file. */
+#undef ANNOTALYSIS_STATIC_INLINE
+#undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
+
#endif /* BASE_DYNAMIC_ANNOTATIONS_H_ */
diff --git a/src/base/logging.h b/src/base/logging.h
index 6aa5c3f..fe25acf 100644
--- a/src/base/logging.h
+++ b/src/base/logging.h
@@ -49,10 +49,13 @@
// On some systems (like freebsd), we can't call write() at all in a
// global constructor, perhaps because errno hasn't been set up.
+// (In windows, we can't call it because it might call malloc.)
// Calling the write syscall is safer (it doesn't set errno), so we
// prefer that. Note we don't care about errno for logging: we just
// do logging on a best-effort basis.
-#ifdef HAVE_SYS_SYSCALL_H
+#if defined(_MSC_VER)
+#define WRITE_TO_STDERR(buf, len) WriteToStderr(buf, len); // in port.cc
+#elif defined(HAVE_SYS_SYSCALL_H)
#include <sys/syscall.h>
#define WRITE_TO_STDERR(buf, len) syscall(SYS_write, STDERR_FILENO, buf, len)
#else
diff --git a/src/base/low_level_alloc.cc b/src/base/low_level_alloc.cc
index 7ca3953..8864629 100644
--- a/src/base/low_level_alloc.cc
+++ b/src/base/low_level_alloc.cc
@@ -59,7 +59,9 @@
// ---------------------------------------------------------------------------
static const int kMaxLevel = 30;
-namespace {
+// We put this class-only struct in a namespace to avoid polluting the
+// global namespace with this struct name (thus risking an ODR violation).
+namespace low_level_alloc_internal {
// This struct describes one allocated block, or one free block.
struct AllocList {
struct Header {
@@ -79,6 +81,8 @@ namespace {
// LLA_SkiplistLevels()
};
}
+using low_level_alloc_internal::AllocList;
+
// ---------------------------------------------------------------------------
// A trivial skiplist implementation. This is used to keep the freelist
diff --git a/src/base/spinlock.cc b/src/base/spinlock.cc
index 48cdc89..1413923 100644
--- a/src/base/spinlock.cc
+++ b/src/base/spinlock.cc
@@ -32,47 +32,28 @@
*/
#include <config.h>
-#include <time.h> /* For nanosleep() */
-#ifdef HAVE_SCHED_H
-#include <sched.h> /* For sched_yield() */
-#endif
-#ifdef HAVE_UNISTD_H
-#include <unistd.h> /* For read() */
-#endif
-#include <fcntl.h> /* for open(), O_RDONLY */
-#include <string.h> /* for strncmp */
-#include <errno.h>
#include "base/spinlock.h"
+#include "base/synchronization_profiling.h"
+#include "base/spinlock_internal.h"
#include "base/cycleclock.h"
#include "base/sysinfo.h" /* for NumCPUs() */
-// We can do contention-profiling of SpinLocks, but the code is in
-// mutex.cc, which is not always linked in with spinlock. Hence we
-// provide this weak definition, which is used if mutex.cc isn't linked in.
-ATTRIBUTE_WEAK extern void SubmitSpinLockProfileData(const void *, int64);
-void SubmitSpinLockProfileData(const void *, int64) {}
+// NOTE on the Lock-state values:
+//
+// kSpinLockFree represents the unlocked state
+// kSpinLockHeld represents the locked state with no waiters
+//
+// Values greater than kSpinLockHeld represent the locked state with waiters,
+// where the value is the time the current lock holder had to
+// wait before obtaining the lock. The kSpinLockSleeper state is a special
+// "locked with waiters" state that indicates that a sleeper needs to
+// be woken, but the thread that just released the lock didn't wait.
static int adaptive_spin_count = 0;
const base::LinkerInitialized SpinLock::LINKER_INITIALIZED =
base::LINKER_INITIALIZED;
-// The OS-specific header included below must provide two calls:
-// Wait until *w becomes zero, atomically set it to 1 and return.
-// static void SpinLockWait(volatile Atomic32 *w);
-//
-// Hint that a thread waiting in SpinLockWait() could now make progress. May
-// do nothing. This call may not read or write *w; it must use only the
-// address.
-// static void SpinLockWake(volatile Atomic32 *w);
-#if defined(_WIN32)
-#include "base/spinlock_win32-inl.h"
-#elif defined(__linux__)
-#include "base/spinlock_linux-inl.h"
-#else
-#include "base/spinlock_posix-inl.h"
-#endif
-
namespace {
struct SpinLock_InitHelper {
SpinLock_InitHelper() {
@@ -91,36 +72,111 @@ static SpinLock_InitHelper init_helper;
} // unnamed namespace
+// Monitor the lock to see if its value changes within some time period
+// (adaptive_spin_count loop iterations). A timestamp indicating
+// when the thread initially started waiting for the lock is passed in via
+// the initial_wait_timestamp value. The total wait time in cycles for the
+// lock is returned in the wait_cycles parameter. The last value read
+// from the lock is returned from the method.
+Atomic32 SpinLock::SpinLoop(int64 initial_wait_timestamp,
+ Atomic32* wait_cycles) {
+ int c = adaptive_spin_count;
+ while (base::subtle::NoBarrier_Load(&lockword_) != kSpinLockFree && --c > 0) {
+ }
+ Atomic32 spin_loop_wait_cycles = CalculateWaitCycles(initial_wait_timestamp);
+ Atomic32 lock_value =
+ base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree,
+ spin_loop_wait_cycles);
+ *wait_cycles = spin_loop_wait_cycles;
+ return lock_value;
+}
void SpinLock::SlowLock() {
- int c = adaptive_spin_count;
+ // The lock was not obtained initially, so this thread needs to wait for
+ // it. Record the current timestamp in the local variable wait_start_time
+ // so the total wait time can be stored in the lockword once this thread
+ // obtains the lock.
+ int64 wait_start_time = CycleClock::Now();
+ Atomic32 wait_cycles;
+ Atomic32 lock_value = SpinLoop(wait_start_time, &wait_cycles);
- // Spin a few times in the hope that the lock holder releases the lock
- while ((c > 0) && (lockword_ != 0)) {
- c--;
- }
+ int lock_wait_call_count = 0;
+ while (lock_value != kSpinLockFree) {
+ // If the lock is currently held, but not marked as having a sleeper, mark
+ // it as having a sleeper.
+ if (lock_value == kSpinLockHeld) {
+ // Here, just "mark" that the thread is going to sleep. Don't store the
+ // lock wait time in the lock as that will cause the current lock
+ // owner to think it experienced contention.
+ lock_value = base::subtle::Acquire_CompareAndSwap(&lockword_,
+ kSpinLockHeld,
+ kSpinLockSleeper);
+ if (lock_value == kSpinLockHeld) {
+ // Successfully transitioned to kSpinLockSleeper. Pass
+ // kSpinLockSleeper to the SpinLockWait routine to properly indicate
+ // the last lock_value observed.
+ lock_value = kSpinLockSleeper;
+ } else if (lock_value == kSpinLockFree) {
+ // Lock is free again, so try and aquire it before sleeping. The
+ // new lock state will be the number of cycles this thread waited if
+ // this thread obtains the lock.
+ lock_value = base::subtle::Acquire_CompareAndSwap(&lockword_,
+ kSpinLockFree,
+ wait_cycles);
+ continue; // skip the delay at the end of the loop
+ }
+ }
- if (lockword_ == 1) {
- int32 now = (CycleClock::Now() >> PROFILE_TIMESTAMP_SHIFT);
- // Don't loose the lock: make absolutely sure "now" is not zero
- now |= 1;
- // Atomically replace the value of lockword_ with "now" if
- // lockword_ is 1, thereby remembering the first timestamp to
- // be recorded.
- base::subtle::NoBarrier_CompareAndSwap(&lockword_, 1, now);
- // base::subtle::NoBarrier_CompareAndSwap() returns:
- // 0: the lock is/was available; nothing stored
- // 1: our timestamp was stored
- // > 1: an older timestamp is already in lockword_; nothing stored
+ // Wait for an OS specific delay.
+ base::internal::SpinLockDelay(&lockword_, lock_value,
+ ++lock_wait_call_count);
+ // Spin again after returning from the wait routine to give this thread
+ // some chance of obtaining the lock.
+ lock_value = SpinLoop(wait_start_time, &wait_cycles);
}
-
- SpinLockWait(&lockword_); // wait until lock acquired; OS specific
}
-void SpinLock::SlowUnlock(int64 wait_timestamp) {
- SpinLockWake(&lockword_); // wake waiter if necessary; OS specific
+// The wait time for contentionz lock profiling must fit into 32 bits.
+// However, the lower 32-bits of the cycle counter wrap around too quickly
+// with high frequency processors, so a right-shift by 7 is performed to
+// quickly divide the cycles by 128. Using these 32 bits, reduces the
+// granularity of time measurement to 128 cycles, and loses track
+// of wait time for waits greater than 109 seconds on a 5 GHz machine
+// [(2^32 cycles/5 Ghz)*128 = 109.95 seconds]. Waits this long should be
+// very rare and the reduced granularity should not be an issue given
+// processors in the Google fleet operate at a minimum of one billion
+// cycles/sec.
+enum { PROFILE_TIMESTAMP_SHIFT = 7 };
+
+void SpinLock::SlowUnlock(uint64 wait_cycles) {
+ base::internal::SpinLockWake(&lockword_, false); // wake waiter if necessary
+
+ // Collect contentionz profile info, expanding the wait_cycles back out to
+ // the full value. If wait_cycles is <= kSpinLockSleeper, then no wait
+ // was actually performed, so don't record the wait time. Note, that the
+ // CalculateWaitCycles method adds in kSpinLockSleeper cycles
+ // unconditionally to guarantee the wait time is not kSpinLockFree or
+ // kSpinLockHeld. The adding in of these small number of cycles may
+ // overestimate the contention by a slight amount 50% of the time. However,
+ // if this code tried to correct for that addition by subtracting out the
+ // kSpinLockSleeper amount that would underestimate the contention slightly
+ // 50% of the time. Both ways get the wrong answer, so the code
+ // overestimates to be more conservative. Overestimating also makes the code
+ // a little simpler.
+ //
+ if (wait_cycles > kSpinLockSleeper) {
+ base::SubmitSpinLockProfileData(this,
+ wait_cycles << PROFILE_TIMESTAMP_SHIFT);
+ }
+}
- // Collect contentionz profile info. Subtract one from wait_timestamp as
- // antidote to "now |= 1;" in SlowLock().
- SubmitSpinLockProfileData(this, wait_timestamp - 1);
+inline int32 SpinLock::CalculateWaitCycles(int64 wait_start_time) {
+ int32 wait_cycles = ((CycleClock::Now() - wait_start_time) >>
+ PROFILE_TIMESTAMP_SHIFT);
+ // The number of cycles waiting for the lock is used as both the
+ // wait_cycles and lock value, so it can't be kSpinLockFree or
+ // kSpinLockHeld. Make sure the value returned is at least
+ // kSpinLockSleeper.
+ wait_cycles |= kSpinLockSleeper;
+ return wait_cycles;
}
diff --git a/src/base/spinlock.h b/src/base/spinlock.h
index 9e633c4..c2be4fd 100644
--- a/src/base/spinlock.h
+++ b/src/base/spinlock.h
@@ -44,14 +44,14 @@
#define BASE_SPINLOCK_H_
#include <config.h>
-#include "base/basictypes.h"
#include "base/atomicops.h"
+#include "base/basictypes.h"
#include "base/dynamic_annotations.h"
#include "base/thread_annotations.h"
class LOCKABLE SpinLock {
public:
- SpinLock() : lockword_(0) { }
+ SpinLock() : lockword_(kSpinLockFree) { }
// Special constructor for use with static SpinLock objects. E.g.,
//
@@ -70,18 +70,21 @@ class LOCKABLE SpinLock {
// TODO(csilvers): uncomment the annotation when we figure out how to
// support this macro with 0 args (see thread_annotations.h)
inline void Lock() /*EXCLUSIVE_LOCK_FUNCTION()*/ {
- if (Acquire_CompareAndSwap(&lockword_, 0, 1) != 0) {
+ if (base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree,
+ kSpinLockHeld) != kSpinLockFree) {
SlowLock();
}
ANNOTATE_RWLOCK_ACQUIRED(this, 1);
}
- // Acquire this SpinLock and return true if the acquisition can be
- // done without blocking, else return false. If this SpinLock is
- // free at the time of the call, TryLock will return true with high
- // probability.
+ // Try to acquire this SpinLock without blocking and return true if the
+ // acquisition was successful. If the lock was not acquired, false is
+ // returned. If this SpinLock is free at the time of the call, TryLock
+ // will return true with high probability.
inline bool TryLock() EXCLUSIVE_TRYLOCK_FUNCTION(true) {
- bool res = (Acquire_CompareAndSwap(&lockword_, 0, 1) == 0);
+ bool res =
+ (base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree,
+ kSpinLockHeld) == kSpinLockFree);
if (res) {
ANNOTATE_RWLOCK_ACQUIRED(this, 1);
}
@@ -92,47 +95,37 @@ class LOCKABLE SpinLock {
// TODO(csilvers): uncomment the annotation when we figure out how to
// support this macro with 0 args (see thread_annotations.h)
inline void Unlock() /*UNLOCK_FUNCTION()*/ {
- // This is defined in mutex.cc.
- extern void SubmitSpinLockProfileData(const void *, int64);
-
- int64 wait_timestamp = static_cast<uint32>(lockword_);
+ uint64 wait_cycles =
+ static_cast<uint64>(base::subtle::NoBarrier_Load(&lockword_));
ANNOTATE_RWLOCK_RELEASED(this, 1);
- Release_Store(&lockword_, 0);
- if (wait_timestamp != 1) {
+ base::subtle::Release_Store(&lockword_, kSpinLockFree);
+ if (wait_cycles != kSpinLockHeld) {
// Collect contentionz profile info, and speed the wakeup of any waiter.
- // The lockword_ value indicates when the waiter started waiting.
- SlowUnlock(wait_timestamp);
+ // The wait_cycles value indicates how long this thread spent waiting
+ // for the lock.
+ SlowUnlock(wait_cycles);
}
}
- // Report if we think the lock can be held by this thread.
- // When the lock is truly held by the invoking thread
- // we will always return true.
- // Indended to be used as CHECK(lock.IsHeld());
+ // Determine if the lock is held. When the lock is held by the invoking
+ // thread, true will always be returned. Intended to be used as
+ // CHECK(lock.IsHeld()).
inline bool IsHeld() const {
- return lockword_ != 0;
+ return base::subtle::NoBarrier_Load(&lockword_) != kSpinLockFree;
}
- // The timestamp for contention lock profiling must fit into 31 bits.
- // as lockword_ is 32 bits and we loose an additional low-order bit due
- // to the statement "now |= 1" in SlowLock().
- // To select 31 bits from the 64-bit cycle counter, we shift right by
- // PROFILE_TIMESTAMP_SHIFT = 7.
- // Using these 31 bits, we reduce granularity of time measurement to
- // 256 cycles, and will loose track of wait time for waits greater than
- // 109 seconds on a 5 GHz machine, longer for faster clock cycles.
- // Waits this long should be very rare.
- enum { PROFILE_TIMESTAMP_SHIFT = 7 };
-
static const base::LinkerInitialized LINKER_INITIALIZED; // backwards compat
private:
- // Lock-state: 0 means unlocked; 1 means locked with no waiters; values
- // greater than 1 indicate locked with waiters, where the value is the time
- // the first waiter started waiting and is used for contention profiling.
+ enum { kSpinLockFree = 0 };
+ enum { kSpinLockHeld = 1 };
+ enum { kSpinLockSleeper = 2 };
+
volatile Atomic32 lockword_;
void SlowLock();
- void SlowUnlock(int64 wait_timestamp);
+ void SlowUnlock(uint64 wait_cycles);
+ Atomic32 SpinLoop(int64 initial_wait_timestamp, Atomic32* wait_cycles);
+ inline int32 CalculateWaitCycles(int64 wait_start_time);
DISALLOW_COPY_AND_ASSIGN(SpinLock);
};
diff --git a/src/base/spinlock_internal.cc b/src/base/spinlock_internal.cc
new file mode 100644
index 0000000..b5b6ca4
--- /dev/null
+++ b/src/base/spinlock_internal.cc
@@ -0,0 +1,77 @@
+/* Copyright (c) 2010, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// The OS-specific header included below must provide two calls:
+// base::internal::SpinLockDelay() and base::internal::SpinLockWake().
+// See spinlock_internal.h for the spec of SpinLockWake().
+
+// void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop)
+// SpinLockDelay() generates an apprproate spin delay on iteration "loop" of a
+// spin loop on location *w, whose previously observed value was "value".
+// SpinLockDelay() may do nothing, may yield the CPU, may sleep a clock tick,
+// or may wait for a delay that can be truncated by a call to SpinlockWake(w).
+// In all cases, it must return in bounded time even if SpinlockWake() is not
+// called.
+
+#include "base/spinlock_internal.h"
+
+#if defined(_WIN32)
+#include "base/spinlock_win32-inl.h"
+#elif defined(__linux__)
+#include "base/spinlock_linux-inl.h"
+#else
+#include "base/spinlock_posix-inl.h"
+#endif
+
+namespace base {
+namespace internal {
+
+// See spinlock_internal.h for spec.
+int32 SpinLockWait(volatile Atomic32 *w, int n,
+ const SpinLockWaitTransition trans[]) {
+ int32 v;
+ bool done = false;
+ for (int loop = 0; !done; loop++) {
+ v = base::subtle::Acquire_Load(w);
+ int i;
+ for (i = 0; i != n && v != trans[i].from; i++) {
+ }
+ if (i == n) {
+ SpinLockDelay(w, v, loop); // no matching transition
+ } else if (trans[i].to == v || // null transition
+ base::subtle::Acquire_CompareAndSwap(w, v, trans[i].to) == v) {
+ done = trans[i].done;
+ }
+ }
+ return v;
+}
+
+} // namespace internal
+} // namespace base
diff --git a/src/base/spinlock_internal.h b/src/base/spinlock_internal.h
new file mode 100644
index 0000000..4494260
--- /dev/null
+++ b/src/base/spinlock_internal.h
@@ -0,0 +1,64 @@
+/* Copyright (c) 2010, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * This file is an internal part spinlock.cc and once.cc
+ * It may not be used directly by code outside of //base.
+ */
+
+#ifndef BASE_SPINLOCK_INTERNAL_H_
+#define BASE_SPINLOCK_INTERNAL_H_
+
+#include <config.h>
+#include "base/basictypes.h"
+#include "base/atomicops.h"
+
+namespace base {
+namespace internal {
+
+// SpinLockWait() waits until it can perform one of several transitions from
+// "from" to "to". It returns when it performs a transition where done==true.
+struct SpinLockWaitTransition {
+ int32 from;
+ int32 to;
+ bool done;
+};
+
+// Wait until *w can transition from trans[i].from to trans[i].to for some i
+// satisfying 0<=i<n && trans[i].done, atomically make the transition,
+// then return the old value of *w. Make any other atomic tranistions
+// where !trans[i].done, but continue waiting.
+int32 SpinLockWait(volatile Atomic32 *w, int n,
+ const SpinLockWaitTransition trans[]);
+void SpinLockWake(volatile Atomic32 *w, bool all);
+void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop);
+
+} // namespace internal
+} // namespace base
+#endif
diff --git a/src/base/spinlock_linux-inl.h b/src/base/spinlock_linux-inl.h
index 0df09a3..5e571b1 100644
--- a/src/base/spinlock_linux-inl.h
+++ b/src/base/spinlock_linux-inl.h
@@ -28,11 +28,12 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ---
- * This file is a Linux-specific part of spinlock.cc
+ * This file is a Linux-specific part of spinlock_internal.cc
*/
#include <sched.h>
#include <time.h>
+#include <limits.h>
#include "base/linux_syscall_support.h"
#define FUTEX_WAIT 0
@@ -48,7 +49,7 @@ static struct InitModule {
int x = 0;
// futexes are ints, so we can use them only when
// that's the same size as the lockword_ in SpinLock.
- have_futex = (sizeof (Atomic32) == sizeof (int) &&
+ have_futex = (sizeof (Atomic32) == sizeof (int) &&
sys_futex(&x, FUTEX_WAKE, 1, 0) >= 0);
if (have_futex &&
sys_futex(&x, FUTEX_WAKE | futex_private_flag, 1, 0) < 0) {
@@ -56,36 +57,41 @@ static struct InitModule {
}
}
} init_module;
+
} // anonymous namespace
-static void SpinLockWait(volatile Atomic32 *w) {
- int save_errno = errno;
- struct timespec tm;
- tm.tv_sec = 0;
- if (have_futex) {
- int value;
- tm.tv_nsec = 1000000; // 1ms; really we're trying to sleep for one kernel
- // clock tick
- while ((value = base::subtle::Acquire_CompareAndSwap(w, 0, 1)) != 0) {
- sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)),
- FUTEX_WAIT | futex_private_flag,
- value, reinterpret_cast<struct kernel_timespec *>(&tm));
- }
- } else {
- tm.tv_nsec = 2000001; // above 2ms so linux 2.4 doesn't spin
- if (base::subtle::NoBarrier_Load(w) != 0) {
- sched_yield();
+
+namespace base {
+namespace internal {
+
+void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
+ if (loop != 0) {
+ int save_errno = errno;
+ struct timespec tm;
+ tm.tv_sec = 0;
+ if (have_futex) {
+ tm.tv_nsec = 1000000; // 1ms; really we're trying to sleep for one
+ // kernel clock tick
+ } else {
+ tm.tv_nsec = 2000001; // above 2ms so linux 2.4 doesn't spin
}
- while (base::subtle::Acquire_CompareAndSwap(w, 0, 1) != 0) {
+ if (have_futex) {
+ sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)),
+ FUTEX_WAIT | futex_private_flag,
+ value, reinterpret_cast<struct kernel_timespec *>(&tm));
+ } else {
nanosleep(&tm, NULL);
}
+ errno = save_errno;
}
- errno = save_errno;
}
-static void SpinLockWake(volatile Atomic32 *w) {
+void SpinLockWake(volatile Atomic32 *w, bool all) {
if (have_futex) {
sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)),
- FUTEX_WAKE | futex_private_flag, 1, 0);
+ FUTEX_WAKE | futex_private_flag, all? INT_MAX : 1, 0);
}
}
+
+} // namespace internal
+} // namespace base
diff --git a/src/base/spinlock_posix-inl.h b/src/base/spinlock_posix-inl.h
index 0d933c0..d188ebd 100644
--- a/src/base/spinlock_posix-inl.h
+++ b/src/base/spinlock_posix-inl.h
@@ -28,25 +28,35 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ---
- * This file is a Posix-specific part of spinlock.cc
+ * This file is a Posix-specific part of spinlock_internal.cc
*/
-#include <sched.h>
-#include <time.h>
+#include <config.h>
+#include <errno.h>
+#ifdef HAVE_SCHED_H
+#include <sched.h> /* For sched_yield() */
+#endif
+#include <time.h> /* For nanosleep() */
-static void SpinLockWait(volatile Atomic32 *w) {
+namespace base {
+namespace internal {
+
+void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
int save_errno = errno;
- struct timespec tm;
- tm.tv_sec = 0;
- tm.tv_nsec = 1000000;
- if (base::subtle::NoBarrier_Load(w) != 0) {
+ if (loop == 0) {
+ } else if (loop == 1) {
sched_yield();
- }
- while (base::subtle::Acquire_CompareAndSwap(w, 0, 1) != 0) {
+ } else {
+ struct timespec tm;
+ tm.tv_sec = 0;
+ tm.tv_nsec = 1000000;
nanosleep(&tm, NULL);
}
errno = save_errno;
}
-static void SpinLockWake(volatile Atomic32 *w) {
+void SpinLockWake(volatile Atomic32 *w, bool all) {
}
+
+} // namespace internal
+} // namespace base
diff --git a/src/base/spinlock_win32-inl.h b/src/base/spinlock_win32-inl.h
index 9058939..ee23541 100644
--- a/src/base/spinlock_win32-inl.h
+++ b/src/base/spinlock_win32-inl.h
@@ -28,20 +28,26 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ---
- * This file is a Win32-specific part of spinlock.cc
+ * This file is a Win32-specific part of spinlock_internal.cc
*/
#include <windows.h>
-static void SpinLockWait(volatile Atomic32 *w) {
- if (base::subtle::NoBarrier_Load(w) != 0) {
+namespace base {
+namespace internal {
+
+void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
+ if (loop == 0) {
+ } else if (loop == 1) {
Sleep(0);
- }
- while (base::subtle::Acquire_CompareAndSwap(w, 0, 1) != 0) {
+ } else {
Sleep(1);
}
}
-static void SpinLockWake(volatile Atomic32 *w) {
+void SpinLockWake(volatile Atomic32 *w, bool all) {
}
+
+} // namespace internal
+} // namespace base
diff --git a/src/base/synchronization_profiling.h b/src/base/synchronization_profiling.h
new file mode 100644
index 0000000..cf02c21
--- /dev/null
+++ b/src/base/synchronization_profiling.h
@@ -0,0 +1,50 @@
+/* Copyright (c) 2010, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Chris Ruemmler
+ */
+
+#ifndef BASE_AUXILIARY_SYNCHRONIZATION_PROFILING_H_
+#define BASE_AUXILIARY_SYNCHRONIZATION_PROFILING_H_
+
+#include "base/basictypes.h"
+
+namespace base {
+
+// We can do contention-profiling of SpinLocks, but the code is in
+// mutex.cc, which is not always linked in with spinlock. Hence we
+// provide a weak definition, which are used if mutex.cc isn't linked in.
+
+// Submit the number of cycles the spinlock spent contending.
+ATTRIBUTE_WEAK extern void SubmitSpinLockProfileData(const void *, int64);
+extern void SubmitSpinLockProfileData(const void *contendedlock,
+ int64 wait_cycles) {}
+}
+#endif // BASE_AUXILIARY_SYNCHRONIZATION_PROFILING_H_
diff --git a/src/base/sysinfo.cc b/src/base/sysinfo.cc
index 7cfa051..c1e2aef 100644
--- a/src/base/sysinfo.cc
+++ b/src/base/sysinfo.cc
@@ -111,20 +111,23 @@
// 8K), so it's not an ideal solution.
const char* GetenvBeforeMain(const char* name) {
#if defined(HAVE___ENVIRON) // if we have it, it's declared in unistd.h
- const int namelen = strlen(name);
- for (char** p = __environ; *p; p++) {
- if (!memcmp(*p, name, namelen) && (*p)[namelen] == '=') // it's a match
- return *p + namelen+1; // point after =
+ if (__environ) { // can exist but be NULL, if statically linked
+ const int namelen = strlen(name);
+ for (char** p = __environ; *p; p++) {
+ if (!memcmp(*p, name, namelen) && (*p)[namelen] == '=') // it's a match
+ return *p + namelen+1; // point after =
+ }
+ return NULL;
}
- return NULL;
-#elif defined(PLATFORM_WINDOWS)
+#endif
+#if defined(PLATFORM_WINDOWS)
// TODO(mbelshe) - repeated calls to this function will overwrite the
// contents of the static buffer.
- static char envbuf[1024]; // enough to hold any envvar we care about
- if (!GetEnvironmentVariableA(name, envbuf, sizeof(envbuf)-1))
+ static char envvar_buf[1024]; // enough to hold any envvar we care about
+ if (!GetEnvironmentVariableA(name, envvar_buf, sizeof(envvar_buf)-1))
return NULL;
- return envbuf;
-#else
+ return envvar_buf;
+#endif
// static is ok because this function should only be called before
// main(), when we're single-threaded.
static char envbuf[16<<10];
@@ -152,7 +155,6 @@ const char* GetenvBeforeMain(const char* name) {
p = endp + 1;
}
return NULL; // env var never found
-#endif
}
// This takes as an argument an environment-variable name (like
@@ -229,6 +231,26 @@ static int64 EstimateCyclesPerSecond(const int estimate_time_ms) {
return guess;
}
+// Helper function for reading an int from a file. Returns true if successful
+// and the memory location pointed to by value is set to the value read.
+static bool ReadIntFromFile(const char *file, int *value) {
+ bool ret = false;
+ int fd = open(file, O_RDONLY);
+ if (fd != -1) {
+ char line[1024];
+ char* err;
+ memset(line, '\0', sizeof(line));
+ read(fd, line, sizeof(line) - 1);
+ const int temp_value = strtol(line, &err, 10);
+ if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
+ *value = temp_value;
+ ret = true;
+ }
+ close(fd);
+ }
+ return ret;
+}
+
// WARNING: logging calls back to InitializeSystemInfo() so it must
// not invoke any logging code. Also, InitializeSystemInfo() can be
// called before main() -- in fact it *must* be since already_called
@@ -254,26 +276,31 @@ static void InitializeSystemInfo() {
#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
char line[1024];
char* err;
+ int freq;
+
+ // If the kernel is exporting the tsc frequency use that. There are issues
+ // where cpuinfo_max_freq cannot be relied on because the BIOS may be
+ // exporintg an invalid p-state (on x86) or p-states may be used to put the
+ // processor in a new mode (turbo mode). Essentially, those frequencies
+ // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
+ // well.
+ if (!saw_mhz &&
+ ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
+ // The value is in kHz (as the file name suggests). For example, on a
+ // 2GHz warpstation, the file contains the value "2000000".
+ cpuinfo_cycles_per_second = freq * 1000.0;
+ saw_mhz = true;
+ }
// If CPU scaling is in effect, we want to use the *maximum* frequency,
// not whatever CPU speed some random processor happens to be using now.
- if (!saw_mhz) {
- const char* pname0 =
- "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq";
- int fd0 = open(pname0, O_RDONLY);
- if (fd0 != -1) {
- memset(line, '\0', sizeof(line));
- read(fd0, line, sizeof(line));
- const int max_freq = strtol(line, &err, 10);
- if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
- // The value is in kHz. For example, on a 2GHz machine, the file
- // contains the value "2000000". Historically this file contained no
- // newline, but at some point the kernel started appending a newline.
- cpuinfo_cycles_per_second = max_freq * 1000.0;
- saw_mhz = true;
- }
- close(fd0);
- }
+ if (!saw_mhz &&
+ ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
+ &freq)) {
+ // The value is in kHz. For example, on a 2GHz machine, the file
+ // contains the value "2000000".
+ cpuinfo_cycles_per_second = freq * 1000.0;
+ saw_mhz = true;
}
// Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq.
@@ -311,20 +338,20 @@ static void InitializeSystemInfo() {
if (newline != NULL)
*newline = '\0';
- if (!saw_mhz && strncmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) {
+ if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) {
const char* freqstr = strchr(line, ':');
if (freqstr) {
cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0;
if (freqstr[1] != '\0' && *err == '\0')
saw_mhz = true;
}
- } else if (strncmp(line, "bogomips", sizeof("bogomips")-1) == 0) {
+ } else if (strncasecmp(line, "bogomips", sizeof("bogomips")-1) == 0) {
const char* freqstr = strchr(line, ':');
if (freqstr)
bogo_clock = strtod(freqstr+1, &err) * 1000000.0;
if (freqstr == NULL || freqstr[1] == '\0' || *err != '\0')
bogo_clock = 1.0;
- } else if (strncmp(line, "processor", sizeof("processor")-1) == 0) {
+ } else if (strncasecmp(line, "processor", sizeof("processor")-1) == 0) {
num_cpus++; // count up every time we see an "processor :" entry
}
} while (chars_read > 0);
@@ -888,9 +915,10 @@ namespace tcmalloc {
// Helper to add the list of mapped shared libraries to a profile.
// Fill formatted "/proc/self/maps" contents into buffer 'buf' of size 'size'
-// and return the actual size occupied in 'buf'.
+// and return the actual size occupied in 'buf'. We fill wrote_all to true
+// if we successfully wrote all proc lines to buf, false else.
// We do not provision for 0-terminating 'buf'.
-int FillProcSelfMaps(char buf[], int size) {
+int FillProcSelfMaps(char buf[], int size, bool* wrote_all) {
ProcMapsIterator::Buffer iterbuf;
ProcMapsIterator it(0, &iterbuf); // 0 means "current pid"
@@ -898,10 +926,17 @@ int FillProcSelfMaps(char buf[], int size) {
int64 inode;
char *flags, *filename;
int bytes_written = 0;
+ *wrote_all = true;
while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) {
- bytes_written += it.FormatLine(buf + bytes_written, size - bytes_written,
- start, end, flags, offset, inode, filename,
- 0);
+ const int line_length = it.FormatLine(buf + bytes_written,
+ size - bytes_written,
+ start, end, flags, offset,
+ inode, filename, 0);
+ if (line_length == 0)
+ *wrote_all = false; // failed to write this line out
+ else
+ bytes_written += line_length;
+
}
return bytes_written;
}
diff --git a/src/base/sysinfo.h b/src/base/sysinfo.h
index 0bcc1f5..8bae5e3 100644
--- a/src/base/sysinfo.h
+++ b/src/base/sysinfo.h
@@ -226,7 +226,7 @@ class ProcMapsIterator {
// Helper routines
namespace tcmalloc {
-int FillProcSelfMaps(char buf[], int size);
+int FillProcSelfMaps(char buf[], int size, bool* wrote_all);
void DumpProcSelfMaps(RawFD fd);
}
diff --git a/src/base/thread_annotations.h b/src/base/thread_annotations.h
index f1b3593..f57b299 100644
--- a/src/base/thread_annotations.h
+++ b/src/base/thread_annotations.h
@@ -46,7 +46,9 @@
#define BASE_THREAD_ANNOTATIONS_H_
-#if defined(__GNUC__) && defined(__SUPPORT_TS_ANNOTATION__) && (!defined(SWIG))
+#if defined(__GNUC__) \
+ && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) \
+ && defined(__SUPPORT_TS_ANNOTATION__) && (!defined(SWIG))
#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x))
#else
#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op
diff --git a/src/base/vdso_support.cc b/src/base/vdso_support.cc
index fce7c2c..73c6545 100644
--- a/src/base/vdso_support.cc
+++ b/src/base/vdso_support.cc
@@ -395,7 +395,7 @@ const void *VDSOSupport::Init() {
}
// Subtle: this code runs outside of any locks; prevent compiler
// from assigning to getcpu_fn_ more than once.
- MemoryBarrier();
+ base::subtle::MemoryBarrier();
getcpu_fn_ = fn;
return vdso_base_;
}
diff --git a/src/common.h b/src/common.h
index e2906d6..53050ca 100644
--- a/src/common.h
+++ b/src/common.h
@@ -77,6 +77,8 @@ static const size_t kPageSize = 1 << kPageShift;
static const size_t kMaxSize = 8u * kPageSize;
static const size_t kAlignment = 8;
static const size_t kLargeSizeClass = 0;
+// For all span-lengths < kMaxPages we keep an exact-size list.
+static const size_t kMaxPages = 1 << (20 - kPageShift);
// Default bound on the total amount of thread caches.
static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize;
@@ -102,6 +104,17 @@ static const int kMaxDynamicFreeListLength = 8192;
static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
+#ifdef __x86_64__
+// All current and planned x86_64 processors only look at the lower 48 bits
+// in virtual to physical address translation. The top 16 are thus unused.
+// TODO(rus): Under what operating systems can we increase it safely to 17?
+// This lets us use smaller page maps. On first allocation, a 36-bit page map
+// uses only 96 KB instead of the 4.5 MB used by a 52-bit page map.
+static const int kAddressBits = 48;
+#else
+static const int kAddressBits = 8 * sizeof(void*);
+#endif
+
namespace tcmalloc {
// Convert byte size into pages. This won't overflow, but may return
diff --git a/src/config.h.in b/src/config.h.in
index a1d5c68..6ee2db0 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -119,6 +119,9 @@
/* Define to 1 if the system has the type `struct mallinfo'. */
#undef HAVE_STRUCT_MALLINFO
+/* Define to 1 if you have the <sys/param.h> header file. */
+#undef HAVE_SYS_PARAM_H
+
/* Define to 1 if you have the <sys/prctl.h> header file. */
#undef HAVE_SYS_PRCTL_H
@@ -173,6 +176,10 @@
/* Define to 1 if int32_t is equivalent to intptr_t */
#undef INT32_EQUALS_INTPTR
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+ */
+#undef LT_OBJDIR
+
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
#undef NO_MINUS_C_MINUS_O
diff --git a/src/debugallocation.cc b/src/debugallocation.cc
index 949fbe9..3b34c8c 100644
--- a/src/debugallocation.cc
+++ b/src/debugallocation.cc
@@ -497,7 +497,7 @@ class MallocBlock {
// practical effect is that allocations are limited to 4Gb or so, even if
// the address space could take more.
static size_t max_size_t = ~0;
- if (size < 0 || size > max_size_t - sizeof(MallocBlock)) {
+ if (size > max_size_t - sizeof(MallocBlock)) {
RAW_LOG(ERROR, "Massive size passed to malloc: %"PRIuS"", size);
return NULL;
}
@@ -1356,6 +1356,20 @@ class DebugMallocImplementation : public ParentImplementation {
virtual size_t GetEstimatedAllocatedSize(size_t size) {
return size;
}
+
+ virtual void GetFreeListSizes(vector<MallocExtension::FreeListInfo>* v) {
+ static const char* kDebugFreeQueue = "debug.free_queue";
+
+ ParentImplementation::GetFreeListSizes(v);
+
+ MallocExtension::FreeListInfo i;
+ i.type = kDebugFreeQueue;
+ i.min_object_size = 0;
+ i.max_object_size = numeric_limits<size_t>::max();
+ i.total_bytes_free = MallocBlock::FreeQueueSize();
+ v->push_back(i);
+ }
+
};
static DebugMallocImplementation debug_malloc_implementation;
diff --git a/src/google/heap-checker.h b/src/google/heap-checker.h
index c0ee8a8..f46f353 100644
--- a/src/google/heap-checker.h
+++ b/src/google/heap-checker.h
@@ -136,7 +136,7 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker {
bool NoLeaks() { return DoNoLeaks(DO_NOT_SYMBOLIZE); }
// These forms are obsolete; use NoLeaks() instead.
- // TODO(csilvers): mark with ATTRIBUTE_DEPRECATED.
+ // TODO(csilvers): mark as DEPRECATED.
bool QuickNoLeaks() { return NoLeaks(); }
bool BriefNoLeaks() { return NoLeaks(); }
bool SameHeap() { return NoLeaks(); }
diff --git a/src/google/malloc_extension.h b/src/google/malloc_extension.h
index 3fbefc9..a2e956e 100644
--- a/src/google/malloc_extension.h
+++ b/src/google/malloc_extension.h
@@ -50,6 +50,7 @@
#include <stdint.h>
#endif
#include <string>
+#include <vector>
// Annoying stuff for windows -- makes sure clients can import these functions
#ifndef PERFTOOLS_DLL_DECL
@@ -102,12 +103,17 @@ class PERFTOOLS_DLL_DECL MallocExtension {
// that allocated these objects. The format of the returned output
// is equivalent to the output of the heap profiler and can
// therefore be passed to "pprof".
+ // NOTE: by default, tcmalloc does not do any heap sampling, and this
+ // function will always return an empty sample. To get useful
+ // data from GetHeapSample, you must also set the environment
+ // variable TCMALLOC_SAMPLE_PARAMETER to a value such as 524288.
virtual void GetHeapSample(MallocExtensionWriter* writer);
// Outputs to "writer" the stack traces that caused growth in the
// address space size. The format of the returned output is
// equivalent to the output of the heap profiler and can therefore
- // be passed to "pprof".
+ // be passed to "pprof". (This does not depend on, or require,
+ // TCMALLOC_SAMPLE_PARAMETER.)
virtual void GetHeapGrowthStacks(MallocExtensionWriter* writer);
// Invokes func(arg, range) for every controlled memory
@@ -244,6 +250,45 @@ class PERFTOOLS_DLL_DECL MallocExtension {
// malloc implementation during initialization.
static void Register(MallocExtension* implementation);
+ // Returns detailed information about malloc's freelists. For each list,
+ // return a FreeListInfo:
+ struct FreeListInfo {
+ size_t min_object_size;
+ size_t max_object_size;
+ size_t total_bytes_free;
+ const char* type;
+ };
+ // Each item in the vector refers to a different freelist. The lists
+ // are identified by the range of allocations that objects in the
+ // list can satisfy ([min_object_size, max_object_size]) and the
+ // type of freelist (see below). The current size of the list is
+ // returned in total_bytes_free (which count against a processes
+ // resident and virtual size).
+ //
+ // Currently supported types are:
+ //
+ // "tcmalloc.page{_unmapped}" - tcmalloc's page heap. An entry for each size
+ // class in the page heap is returned. Bytes in "page_unmapped"
+ // are no longer backed by physical memory and do not count against
+ // the resident size of a process.
+ //
+ // "tcmalloc.large{_unmapped}" - tcmalloc's list of objects larger
+ // than the largest page heap size class. Only one "large"
+ // entry is returned. There is no upper-bound on the size
+ // of objects in the large free list; this call returns
+ // kint64max for max_object_size. Bytes in
+ // "large_unmapped" are no longer backed by physical memory
+ // and do not count against the resident size of a process.
+ //
+ // "tcmalloc.central" - tcmalloc's central free-list. One entry per
+ // size-class is returned. Never unmapped.
+ //
+ // "debug.free_queue" - free objects queued by the debug allocator
+ // and not returned to tcmalloc.
+ //
+ // "tcmalloc.thread" - tcmalloc's per-thread caches. Never unmapped.
+ virtual void GetFreeListSizes(std::vector<FreeListInfo>* v);
+
protected:
// Get a list of stack traces of sampled allocation points. Returns
// a pointer to a "new[]-ed" result array, and stores the sample
diff --git a/src/heap-profile-table.cc b/src/heap-profile-table.cc
index ecaf75f..6d75c4a 100644
--- a/src/heap-profile-table.cc
+++ b/src/heap-profile-table.cc
@@ -342,7 +342,8 @@ int HeapProfileTable::FillOrderedProfile(char buf[], int size) const {
// any gaps. Whew!
int map_length = snprintf(buf, size, "%s", kProcSelfMapsHeader);
if (map_length < 0 || map_length >= size) return 0;
- map_length += FillProcSelfMaps(buf + map_length, size - map_length);
+ bool dummy; // "wrote_all" -- did /proc/self/maps fit in its entirety?
+ map_length += FillProcSelfMaps(buf + map_length, size - map_length, &dummy);
RAW_DCHECK(map_length <= size, "");
char* const map_start = buf + size - map_length; // move to end
memmove(map_start, buf, map_length);
diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc
index c2f8b54..1272068 100644
--- a/src/malloc_extension.cc
+++ b/src/malloc_extension.cc
@@ -52,6 +52,7 @@
#include "maybe_threads.h"
using STL_NAMESPACE::string;
+using STL_NAMESPACE::vector;
static void DumpAddressMap(string* result) {
*result += "\nMAPPED_LIBRARIES:\n";
@@ -59,9 +60,11 @@ static void DumpAddressMap(string* result) {
const size_t old_resultlen = result->size();
for (int amap_size = 10240; amap_size < 10000000; amap_size *= 2) {
result->resize(old_resultlen + amap_size);
+ bool wrote_all = false;
const int bytes_written =
- tcmalloc::FillProcSelfMaps(&((*result)[old_resultlen]), amap_size);
- if (bytes_written < amap_size - 1) { // we fit!
+ tcmalloc::FillProcSelfMaps(&((*result)[old_resultlen]), amap_size,
+ &wrote_all);
+ if (wrote_all) { // we fit!
(*result)[old_resultlen + bytes_written] = '\0';
result->resize(old_resultlen + bytes_written);
return;
@@ -167,6 +170,11 @@ size_t MallocExtension::GetAllocatedSize(void* p) {
return 0;
}
+void MallocExtension::GetFreeListSizes(
+ vector<MallocExtension::FreeListInfo>* v) {
+ v->clear();
+}
+
// The current malloc extension object.
static pthread_once_t module_init = PTHREAD_ONCE_INIT;
diff --git a/src/memory_region_map.cc b/src/memory_region_map.cc
index f6bed45..3f8509f 100644
--- a/src/memory_region_map.cc
+++ b/src/memory_region_map.cc
@@ -117,7 +117,6 @@
#include "memory_region_map.h"
-#include "base/linux_syscall_support.h"
#include "base/logging.h"
#include "base/low_level_alloc.h"
#include "malloc_hook-inl.h"
diff --git a/src/page_heap.cc b/src/page_heap.cc
index 1e63cb9..c92e16b 100644
--- a/src/page_heap.cc
+++ b/src/page_heap.cc
@@ -338,6 +338,35 @@ static double PagesToMB(uint64_t pages) {
return (pages << kPageShift) / 1048576.0;
}
+void PageHeap::GetClassSizes(int64 class_sizes_normal[kMaxPages],
+ int64 class_sizes_returned[kMaxPages],
+ int64* normal_pages_in_spans,
+ int64* returned_pages_in_spans) {
+
+ for (int s = 0; s < kMaxPages; s++) {
+ if (class_sizes_normal != NULL) {
+ class_sizes_normal[s] = DLL_Length(&free_[s].normal);
+ }
+ if (class_sizes_returned != NULL) {
+ class_sizes_returned[s] = DLL_Length(&free_[s].returned);
+ }
+ }
+
+ if (normal_pages_in_spans != NULL) {
+ *normal_pages_in_spans = 0;
+ for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) {
+ *normal_pages_in_spans += s->length;;
+ }
+ }
+
+ if (returned_pages_in_spans != NULL) {
+ *returned_pages_in_spans = 0;
+ for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) {
+ *returned_pages_in_spans += s->length;
+ }
+ }
+}
+
void PageHeap::Dump(TCMalloc_Printer* out) {
int nonempty_sizes = 0;
for (int s = 0; s < kMaxPages; s++) {
diff --git a/src/page_heap.h b/src/page_heap.h
index 74030d2..545bdda 100644
--- a/src/page_heap.h
+++ b/src/page_heap.h
@@ -140,6 +140,10 @@ class PERFTOOLS_DLL_DECL PageHeap {
uint64_t unmapped_bytes; // Total bytes on returned freelists
};
inline Stats stats() const { return stats_; }
+ void GetClassSizes(int64 class_sizes_normal[kMaxPages],
+ int64 class_sizes_returned[kMaxPages],
+ int64* normal_pages_in_spans,
+ int64* returned_pages_in_spans);
bool Check();
// Like Check() but does some more comprehensive checking.
@@ -176,11 +180,8 @@ class PERFTOOLS_DLL_DECL PageHeap {
// should keep this value big because various incarnations of Linux
// have small limits on the number of mmap() regions per
// address-space.
- static const int kMinSystemAlloc = 1 << (20 - kPageShift);
-
- // For all span-lengths < kMaxPages we keep an exact-size list.
- // REQUIRED: kMaxPages >= kMinSystemAlloc;
- static const size_t kMaxPages = kMinSystemAlloc;
+ // REQUIRED: kMinSystemAlloc <= kMaxPages;
+ static const int kMinSystemAlloc = kMaxPages;
// Never delay scavenging for more than the following number of
// deallocated pages. With 4K pages, this comes to 4GB of
@@ -192,8 +193,8 @@ class PERFTOOLS_DLL_DECL PageHeap {
static const int kDefaultReleaseDelay = 1 << 18;
// Pick the appropriate map and cache types based on pointer size
- typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap;
- typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache;
+ typedef MapSelector<kAddressBits>::Type PageMap;
+ typedef MapSelector<kAddressBits>::CacheType PageMapCache;
PageMap pagemap_;
mutable PageMapCache pagemap_cache_;
diff --git a/src/pprof b/src/pprof
index e67e42e..a503964 100755
--- a/src/pprof
+++ b/src/pprof
@@ -89,6 +89,7 @@ my %obj_tool_map = (
);
my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local
my $GV = "gv";
+my $EVINCE = "evince"; # could also be xpdf or perhaps acroread
my $KCACHEGRIND = "kcachegrind";
my $PS2PDF = "ps2pdf";
# These are used for dynamic profiles
@@ -103,6 +104,7 @@ my $GROWTH_PAGE = "/pprof/growth";
my $CONTENTION_PAGE = "/pprof/contention";
my $WALL_PAGE = "/pprof/wall(?:\\?.*)?"; # accepts options like namefilter
my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?";
+my $CENSUSPROFILE_PAGE = "/pprof/censusprofile"; # must support "?seconds=#"
my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST
my $PROGRAM_NAME_PAGE = "/pprof/cmdline";
@@ -110,7 +112,7 @@ my $PROGRAM_NAME_PAGE = "/pprof/cmdline";
# All the alternatives must begin with /.
my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" .
"$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" .
- "$FILTEREDPROFILE_PAGE)";
+ "$FILTEREDPROFILE_PAGE|$CENSUSPROFILE_PAGE)";
# default binary name
my $UNKNOWN_BINARY = "(unknown)";
@@ -148,7 +150,7 @@ pprof [options] <profile>
The /<service> can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile,
$GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall,
- or /pprof/filteredprofile.
+ $CENSUSPROFILE_PAGE, or /pprof/filteredprofile.
For instance: "pprof http://myserver.com:80$HEAP_PAGE".
If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling).
pprof --symbols <program>
@@ -180,6 +182,7 @@ Output type:
--text Generate text report
--callgrind Generate callgrind format to stdout
--gv Generate Postscript and display
+ --evince Generate PDF and display
--web Generate SVG and display
--list=<regexp> Generate source listing of matching routines
--disasm=<regexp> Generate disassembly of matching routines
@@ -304,6 +307,7 @@ sub Init() {
$main::opt_disasm = "";
$main::opt_symbols = 0;
$main::opt_gv = 0;
+ $main::opt_evince = 0;
$main::opt_web = 0;
$main::opt_dot = 0;
$main::opt_ps = 0;
@@ -372,6 +376,7 @@ sub Init() {
"disasm=s" => \$main::opt_disasm,
"symbols!" => \$main::opt_symbols,
"gv!" => \$main::opt_gv,
+ "evince!" => \$main::opt_evince,
"web!" => \$main::opt_web,
"dot!" => \$main::opt_dot,
"ps!" => \$main::opt_ps,
@@ -452,6 +457,7 @@ sub Init() {
($main::opt_disasm eq '' ? 0 : 1) +
($main::opt_symbols == 0 ? 0 : 1) +
$main::opt_gv +
+ $main::opt_evince +
$main::opt_web +
$main::opt_dot +
$main::opt_ps +
@@ -646,6 +652,8 @@ sub Main() {
if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
if ($main::opt_gv) {
RunGV(TempName($main::next_tmpfile, "ps"), "");
+ } elsif ($main::opt_evince) {
+ RunEvince(TempName($main::next_tmpfile, "pdf"), "");
} elsif ($main::opt_web) {
my $tmp = TempName($main::next_tmpfile, "svg");
RunWeb($tmp);
@@ -708,6 +716,12 @@ sub RunGV {
}
}
+sub RunEvince {
+ my $fname = shift;
+ my $bg = shift; # "" or " &" if we should run in background
+ system("$EVINCE " . $fname . $bg);
+}
+
sub RunWeb {
my $fname = shift;
print STDERR "Loading web page file:///$fname\n";
@@ -805,6 +819,7 @@ sub InteractiveCommand {
$main::opt_disasm = 0;
$main::opt_list = 0;
$main::opt_gv = 0;
+ $main::opt_evince = 0;
$main::opt_cum = 0;
if (m/^\s*(text|top)(\d*)\s*(.*)/) {
@@ -878,11 +893,14 @@ sub InteractiveCommand {
PrintDisassembly($libs, $flat, $cumulative, $routine, $total);
return 1;
}
- if (m/^\s*(gv|web)\s*(.*)/) {
+ if (m/^\s*(gv|web|evince)\s*(.*)/) {
$main::opt_gv = 0;
+ $main::opt_evince = 0;
$main::opt_web = 0;
if ($1 eq "gv") {
$main::opt_gv = 1;
+ } elsif ($1 eq "evince") {
+ $main::opt_evince = 1;
} elsif ($1 eq "web") {
$main::opt_web = 1;
}
@@ -902,6 +920,8 @@ sub InteractiveCommand {
if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
if ($main::opt_gv) {
RunGV(TempName($main::next_tmpfile, "ps"), " &");
+ } elsif ($main::opt_evince) {
+ RunEvince(TempName($main::next_tmpfile, "pdf"), " &");
} elsif ($main::opt_web) {
RunWeb(TempName($main::next_tmpfile, "svg"));
}
@@ -1685,6 +1705,8 @@ sub PrintDot {
my $output;
if ($main::opt_gv) {
$output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps");
+ } elsif ($main::opt_evince) {
+ $output = "| $DOT -Tps2 | $PS2PDF - " . TempName($main::next_tmpfile, "pdf");
} elsif ($main::opt_ps) {
$output = "| $DOT -Tps2";
} elsif ($main::opt_pdf) {
@@ -2955,7 +2977,7 @@ sub FetchDynamicProfile {
my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout);
my $cmd = "$fetcher '$url' > '$tmp_profile'";
- if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/){
+ if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){
print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n";
if ($encourage_patience) {
print STDERR "Be patient...\n";
@@ -3531,16 +3553,18 @@ sub ReadHeapProfile {
# The sampling frequency is the rate of a Poisson process.
# This means that the probability of sampling an allocation of
# size X with sampling rate Y is 1 - exp(-X/Y)
- my $ratio;
- $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
- my $scale_factor;
- $scale_factor = 1/(1 - exp(-$ratio));
- $n1 *= $scale_factor;
- $s1 *= $scale_factor;
- $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
- $scale_factor = 1/(1 - exp(-$ratio));
- $n2 *= $scale_factor;
- $s2 *= $scale_factor;
+ if ($n1 != 0) {
+ my $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
+ my $scale_factor = 1/(1 - exp(-$ratio));
+ $n1 *= $scale_factor;
+ $s1 *= $scale_factor;
+ }
+ if ($n2 != 0) {
+ my $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
+ my $scale_factor = 1/(1 - exp(-$ratio));
+ $n2 *= $scale_factor;
+ $s2 *= $scale_factor;
+ }
} else {
# Remote-heap version 1
my $ratio;
@@ -4091,9 +4115,15 @@ sub ExtractSymbols {
my $symbols = {};
- # Map each PC value to the containing library
- my %seen = ();
- foreach my $lib (@{$libs}) {
+ # Map each PC value to the containing library. To make this faster,
+ # we sort libraries by their starting pc value (highest first), and
+ # advance through the libraries as we advance the pc. Sometimes the
+ # addresses of libraries may overlap with the addresses of the main
+ # binary, so to make sure the libraries 'win', we iterate over the
+ # libraries in reverse order (which assumes the binary doesn't start
+ # in the middle of a library, which seems a fair assumption).
+ my @pcs = (sort { $a cmp $b } keys(%{$pcset})); # pcset is 0-extended strings
+ foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) {
my $libname = $lib->[0];
my $start = $lib->[1];
my $finish = $lib->[2];
@@ -4101,12 +4131,21 @@ sub ExtractSymbols {
# Get list of pcs that belong in this library.
my $contained = [];
- foreach my $pc (keys(%{$pcset})) {
- if (!$seen{$pc} && ($pc ge $start) && ($pc le $finish)) {
- $seen{$pc} = 1;
- push(@{$contained}, $pc);
- }
- }
+ my ($start_pc_index, $finish_pc_index);
+ # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index].
+ for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0;
+ $finish_pc_index--) {
+ last if $pcs[$finish_pc_index - 1] le $finish;
+ }
+ # Find smallest start_pc_index such that $start <= $pc[$start_pc_index].
+ for ($start_pc_index = $finish_pc_index; $start_pc_index > 0;
+ $start_pc_index--) {
+ last if $pcs[$start_pc_index - 1] lt $start;
+ }
+ # This keeps PC values higher than $pc[$finish_pc_index] in @pcs,
+ # in case there are overlaps in libraries and the main binary.
+ @{$contained} = splice(@pcs, $start_pc_index,
+ $finish_pc_index - $start_pc_index);
# Map to symbols
MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols);
}
diff --git a/src/profiler.cc b/src/profiler.cc
index 3ac51d4..38fbb93 100644
--- a/src/profiler.cc
+++ b/src/profiler.cc
@@ -111,7 +111,7 @@ class CpuProfiler {
int (*filter_)(void*);
void* filter_arg_;
- // Opague token returned by the profile handler. To be used when calling
+ // Opaque token returned by the profile handler. To be used when calling
// ProfileHandlerUnregisterCallback.
ProfileHandlerToken* prof_handler_token_;
diff --git a/src/system-alloc.cc b/src/system-alloc.cc
index 2505959..e589469 100644
--- a/src/system-alloc.cc
+++ b/src/system-alloc.cc
@@ -46,6 +46,7 @@
#include <sys/mman.h>
#endif
#include <errno.h>
+#include "common.h"
#include "system-alloc.h"
#include "internal_logging.h"
#include "base/logging.h"
@@ -73,6 +74,24 @@ static const bool kDebugMode = false;
static const bool kDebugMode = true;
#endif
+// Anonymous namespace to avoid name conflicts on "CheckAddressBits".
+namespace {
+
+// Check that no bit is set at position ADDRESS_BITS or higher.
+template <int ADDRESS_BITS> bool CheckAddressBits(uintptr_t ptr) {
+ return (ptr >> ADDRESS_BITS) == 0;
+}
+
+// Specialize for the bit width of a pointer to avoid undefined shift.
+template <> bool CheckAddressBits<8 * sizeof(void*)>(uintptr_t ptr) {
+ return true;
+}
+
+} // Anonymous namespace to avoid name conflicts on "CheckAddressBits".
+
+COMPILE_ASSERT(kAddressBits <= 8 * sizeof(void*),
+ address_bits_larger_than_pointer_size);
+
// Structure for discovering alignment
union MemoryAligner {
void* p;
@@ -443,7 +462,16 @@ void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size,
if (a == NULL) continue;
if (a->usable_ && !a->failed_) {
void* result = a->Alloc(size, actual_size, alignment);
- if (result != NULL) return result;
+ if (result != NULL) {
+ if (actual_size) {
+ CheckAddressBits<kAddressBits>(
+ reinterpret_cast<uintptr_t>(result) + *actual_size - 1);
+ } else {
+ CheckAddressBits<kAddressBits>(
+ reinterpret_cast<uintptr_t>(result) + size - 1);
+ }
+ return result;
+ }
}
}
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index 93bdd1d..245407d 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -110,6 +110,7 @@
#include <errno.h>
#include <stdarg.h>
#include <algorithm>
+#include <limits>
#include <google/tcmalloc.h>
#include "base/commandlineflags.h"
#include "base/basictypes.h" // gets us PRIu64
@@ -136,7 +137,9 @@
# define WIN32_DO_PATCHING 1
#endif
-using std::max;
+using STL_NAMESPACE::max;
+using STL_NAMESPACE::numeric_limits;
+using STL_NAMESPACE::vector;
using tcmalloc::AlignmentForSize;
using tcmalloc::PageHeap;
using tcmalloc::PageHeapAllocator;
@@ -439,6 +442,52 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
static const double MB = 1048576.0;
+ const uint64_t virtual_memory_used = (stats.pageheap.system_bytes
+ + stats.metadata_bytes);
+ const uint64_t physical_memory_used = (virtual_memory_used
+ - stats.pageheap.unmapped_bytes);
+ const uint64_t bytes_in_use_by_app = (physical_memory_used
+ - stats.metadata_bytes
+ - stats.pageheap.free_bytes
+ - stats.central_bytes
+ - stats.transfer_bytes
+ - stats.thread_bytes);
+
+ out->printf(
+ "------------------------------------------------\n"
+ "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes in use by application\n"
+ "MALLOC: + %12" PRIu64 " (%7.1f MB) Bytes in page heap freelist\n"
+ "MALLOC: + %12" PRIu64 " (%7.1f MB) Bytes in central cache freelist\n"
+ "MALLOC: + %12" PRIu64 " (%7.1f MB) Bytes in transfer cache freelist\n"
+ "MALLOC: + %12" PRIu64 " (%7.1f MB) Bytes in thread cache freelists\n"
+ "MALLOC: + %12" PRIu64 " (%7.1f MB) Bytes in malloc metadata\n"
+ "MALLOC: ------------\n"
+ "MALLOC: = %12" PRIu64 " (%7.1f MB) Actual memory used (physical + swap)\n"
+ "MALLOC: + %12" PRIu64 " (%7.1f MB) Bytes released to OS (aka unmapped)\n"
+ "MALLOC: ------------\n"
+ "MALLOC: = %12" PRIu64 " (%7.1f MB) Virtual address space used\n"
+ "MALLOC:\n"
+ "MALLOC: %12" PRIu64 " Spans in use\n"
+ "MALLOC: %12" PRIu64 " Thread heaps in use\n"
+ "MALLOC: %12" PRIu64 " Tcmalloc page size\n"
+ "------------------------------------------------\n"
+ "Call ReleaseFreeMemory() to release freelist memory to the OS"
+ " (via madvise()).\n"
+ "Bytes released to the OS take up virtual address space"
+ " but no physical memory.\n",
+ bytes_in_use_by_app, bytes_in_use_by_app / MB,
+ stats.pageheap.free_bytes, stats.pageheap.free_bytes / MB,
+ stats.central_bytes, stats.central_bytes / MB,
+ stats.transfer_bytes, stats.transfer_bytes / MB,
+ stats.thread_bytes, stats.thread_bytes / MB,
+ stats.metadata_bytes, stats.metadata_bytes / MB,
+ physical_memory_used, physical_memory_used / MB,
+ stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MB,
+ virtual_memory_used, virtual_memory_used / MB,
+ uint64_t(Static::span_allocator()->inuse()),
+ uint64_t(ThreadCache::HeapsInUse()),
+ uint64_t(kPageSize));
+
if (level >= 2) {
out->printf("------------------------------------------------\n");
out->printf("Size class breakdown\n");
@@ -464,38 +513,6 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
out->printf("------------------------------------------------\n");
DumpSystemAllocatorStats(out);
}
-
- const uint64_t bytes_in_use = stats.pageheap.system_bytes
- - stats.pageheap.free_bytes
- - stats.pageheap.unmapped_bytes
- - stats.central_bytes
- - stats.transfer_bytes
- - stats.thread_bytes;
-
- out->printf("------------------------------------------------\n"
- "MALLOC: %12" PRIu64 " (%7.1f MB) Heap size\n"
- "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes in use by application\n"
- "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in page heap\n"
- "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes unmapped in page heap\n"
- "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in central cache\n"
- "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in transfer cache\n"
- "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in thread caches\n"
- "MALLOC: %12" PRIu64 " Spans in use\n"
- "MALLOC: %12" PRIu64 " Thread heaps in use\n"
- "MALLOC: %12" PRIu64 " (%7.1f MB) Metadata allocated\n"
- "MALLOC: %12" PRIu64 " Tcmalloc page size\n"
- "------------------------------------------------\n",
- stats.pageheap.system_bytes, stats.pageheap.system_bytes / MB,
- bytes_in_use, bytes_in_use / MB,
- stats.pageheap.free_bytes, stats.pageheap.free_bytes / MB,
- stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MB,
- stats.central_bytes, stats.central_bytes / MB,
- stats.transfer_bytes, stats.transfer_bytes / MB,
- stats.thread_bytes, stats.thread_bytes / MB,
- uint64_t(Static::span_allocator()->inuse()),
- uint64_t(ThreadCache::HeapsInUse()),
- stats.metadata_bytes, stats.metadata_bytes / MB,
- uint64_t(kPageSize));
}
static void PrintStats(int level) {
@@ -609,6 +626,20 @@ class TCMallocImplementation : public MallocExtension {
}
}
+ // We may print an extra, tcmalloc-specific warning message here.
+ virtual void GetHeapSample(MallocExtensionWriter* writer) {
+ if (FLAGS_tcmalloc_sample_parameter == 0) {
+ const char* const kWarningMsg =
+ "#\n# WARNING: This heap profile does not have any data in it,\n"
+ "# because the application was run with heap sampling turned off.\n"
+ "# To get useful data from from GetHeapSample(), you must first\n"
+ "# set the environment variable TCMALLOC_SAMPLE_PARAMETER to a\n"
+ "# positive sampling period, such as 524288.\n#\n";
+ writer->append(kWarningMsg, strlen(kWarningMsg));
+ }
+ MallocExtension::GetHeapSample(writer);
+ }
+
virtual void** ReadStackTraces(int* sample_period) {
tcmalloc::StackTraceTable table;
{
@@ -753,6 +784,99 @@ class TCMallocImplementation : public MallocExtension {
// unnamed namespace, we need to move the definition below it in the
// file.
virtual size_t GetAllocatedSize(void* ptr);
+
+ virtual void GetFreeListSizes(vector<MallocExtension::FreeListInfo>* v) {
+ static const char* kCentralCacheType = "tcmalloc.central";
+ static const char* kTransferCacheType = "tcmalloc.transfer";
+ static const char* kThreadCacheType = "tcmalloc.thread";
+ static const char* kPageHeapType = "tcmalloc.page";
+ static const char* kPageHeapUnmappedType = "tcmalloc.page_unmapped";
+ static const char* kLargeSpanType = "tcmalloc.large";
+ static const char* kLargeUnmappedSpanType = "tcmalloc.large_unmapped";
+
+ v->clear();
+
+ // central class information
+ int64 prev_class_size = 0;
+ for (int cl = 1; cl < kNumClasses; ++cl) {
+ size_t class_size = Static::sizemap()->ByteSizeForClass(cl);
+ MallocExtension::FreeListInfo i;
+ i.min_object_size = prev_class_size + 1;
+ i.max_object_size = class_size;
+ i.total_bytes_free =
+ Static::central_cache()[cl].length() * class_size;
+ i.type = kCentralCacheType;
+ v->push_back(i);
+
+ // transfer cache
+ i.total_bytes_free =
+ Static::central_cache()[cl].tc_length() * class_size;
+ i.type = kTransferCacheType;
+ v->push_back(i);
+
+ prev_class_size = Static::sizemap()->ByteSizeForClass(cl);
+ }
+
+ // Add stats from per-thread heaps
+ uint64_t class_count[kNumClasses];
+ memset(class_count, 0, sizeof(class_count));
+ {
+ SpinLockHolder h(Static::pageheap_lock());
+ uint64_t thread_bytes = 0;
+ ThreadCache::GetThreadStats(&thread_bytes, class_count);
+ }
+
+ prev_class_size = 0;
+ for (int cl = 1; cl < kNumClasses; ++cl) {
+ MallocExtension::FreeListInfo i;
+ i.min_object_size = prev_class_size + 1;
+ i.max_object_size = Static::sizemap()->ByteSizeForClass(cl);
+ i.total_bytes_free =
+ class_count[cl] * Static::sizemap()->ByteSizeForClass(cl);
+ i.type = kThreadCacheType;
+ v->push_back(i);
+ }
+
+ // append page heap info
+ int64 page_count_normal[kMaxPages];
+ int64 page_count_returned[kMaxPages];
+ int64 span_count_normal;
+ int64 span_count_returned;
+ {
+ SpinLockHolder h(Static::pageheap_lock());
+ Static::pageheap()->GetClassSizes(page_count_normal,
+ page_count_returned,
+ &span_count_normal,
+ &span_count_returned);
+ }
+
+ // spans: mapped
+ MallocExtension::FreeListInfo span_info;
+ span_info.type = kLargeSpanType;
+ span_info.max_object_size = (numeric_limits<size_t>::max)();
+ span_info.min_object_size = kMaxPages << kPageShift;
+ span_info.total_bytes_free = span_count_normal << kPageShift;
+ v->push_back(span_info);
+
+ // spans: unmapped
+ span_info.type = kLargeUnmappedSpanType;
+ span_info.total_bytes_free = span_count_returned << kPageShift;
+ v->push_back(span_info);
+
+ for (int s = 1; s < kMaxPages; s++) {
+ MallocExtension::FreeListInfo i;
+ i.max_object_size = (s << kPageShift);
+ i.min_object_size = ((s - 1) << kPageShift);
+
+ i.type = kPageHeapType;
+ i.total_bytes_free = (s << kPageShift) * page_count_normal[s];
+ v->push_back(i);
+
+ i.type = kPageHeapUnmappedType;
+ i.total_bytes_free = (s << kPageShift) * page_count_returned[s];
+ v->push_back(i);
+ }
+ }
};
// The constructor allocates an object to ensure that initialization
diff --git a/src/tests/debugallocation_test.cc b/src/tests/debugallocation_test.cc
index c482187..f10e2dc 100644
--- a/src/tests/debugallocation_test.cc
+++ b/src/tests/debugallocation_test.cc
@@ -259,7 +259,10 @@ TEST(DebugAllocationTest, GetAllocatedSizeTest) {
}
TEST(DebugAllocationTest, HugeAlloc) {
- const size_t kTooBig = ~static_cast<size_t>(0);
+ // This must not be a const variable so it doesn't form an
+ // integral-constant-expression which can be *statically* rejected by the
+ // compiler as too large for the allocation.
+ size_t kTooBig = ~static_cast<size_t>(0);
void* a = NULL;
char* b = NULL;
@@ -273,8 +276,9 @@ TEST(DebugAllocationTest, HugeAlloc) {
EXPECT_EQ(NULL, b);
// kAlsoTooBig is small enough not to get caught by debugallocation's check,
- // but will still fall through to tcmalloc's check.
- const size_t kAlsoTooBig = kTooBig - 1024;
+ // but will still fall through to tcmalloc's check. This must also be
+ // a non-const variable. See kTooBig for more details.
+ size_t kAlsoTooBig = kTooBig - 1024;
a = malloc(kAlsoTooBig);
EXPECT_EQ(NULL, a);
diff --git a/src/tests/malloc_extension_test.cc b/src/tests/malloc_extension_test.cc
index ef76766..60f4919 100644
--- a/src/tests/malloc_extension_test.cc
+++ b/src/tests/malloc_extension_test.cc
@@ -39,6 +39,8 @@
#include <google/malloc_extension.h>
#include <google/malloc_extension_c.h>
+using STL_NAMESPACE::vector;
+
int main(int argc, char** argv) {
void* a = malloc(1000);
@@ -70,6 +72,30 @@ int main(int argc, char** argv) {
ASSERT_LE(MallocExtension_GetAllocatedSize(a), 5000);
ASSERT_GE(MallocExtension_GetEstimatedAllocatedSize(1000), 1000);
+ // test invariant: size of freelist = heap_size - allocated_bytes
+ free(malloc(32000));
+ size_t heap_size = 0;
+ size_t allocated = 0;
+ ASSERT_TRUE(MallocExtension::instance()->GetNumericProperty(
+ "generic.current_allocated_bytes", &allocated));
+ ASSERT_TRUE(MallocExtension::instance()->GetNumericProperty(
+ "generic.heap_size", &heap_size));
+ vector<MallocExtension::FreeListInfo> info;
+ MallocExtension::instance()->GetFreeListSizes(&info);
+
+ ASSERT_GE(info.size(), 0);
+ int64 free_bytes = 0;
+ for (vector<MallocExtension::FreeListInfo>::const_iterator it = info.begin();
+ it != info.end();
+ ++it) {
+ free_bytes += it->total_bytes_free;
+ }
+
+ // don't expect an exact equality since the calls to query the heap
+ // themselves free and allocate memory
+ size_t error = abs((heap_size - allocated) - free_bytes);
+ ASSERT_LT(error, 0.15 * heap_size);
+
free(a);
printf("DONE\n");
diff --git a/src/tests/sampling_test.cc b/src/tests/sampling_test.cc
index b75e70e..c1bd693 100644
--- a/src/tests/sampling_test.cc
+++ b/src/tests/sampling_test.cc
@@ -45,6 +45,8 @@
using std::string;
+extern "C" void* AllocateAllocate() ATTRIBUTE_NOINLINE;
+
extern "C" void* AllocateAllocate() {
// The VLOG's are mostly to discourage inlining
VLOG(1, "Allocating some more");
diff --git a/src/tests/sampling_test.sh b/src/tests/sampling_test.sh
index 8c96bc1..2a58426 100755
--- a/src/tests/sampling_test.sh
+++ b/src/tests/sampling_test.sh
@@ -81,13 +81,13 @@ mkdir "$OUTDIR" || die "Unable to create $OUTDIR"
echo "Testing heap output..."
"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap" \
- | grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \
+ | grep '[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \
|| die "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap"
echo "OK"
echo "Testing growth output..."
"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth" \
- | grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \
+ | grep '[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \
|| die "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth"
echo "OK"
diff --git a/src/tests/system-alloc_unittest.cc b/src/tests/system-alloc_unittest.cc
index a160a34..da76285 100644
--- a/src/tests/system-alloc_unittest.cc
+++ b/src/tests/system-alloc_unittest.cc
@@ -38,7 +38,9 @@
#include <inttypes.h> // another place uintptr_t might be defined
#endif
#include <sys/types.h>
+#include <algorithm>
#include "base/logging.h"
+#include "common.h"
#include "system-alloc.h"
class ArraySysAllocator : public SysAllocator {
@@ -98,6 +100,18 @@ static void TestBasicInvoked() {
CHECK(a.invoked_);
}
+#if 0 // could port this to various OSs, but won't bother for now
+TEST(AddressBits, CpuVirtualBits) {
+ // Check that kAddressBits is as least as large as either the number of bits
+ // in a pointer or as the number of virtual bits handled by the processor.
+ // To be effective this test must be run on each processor model.
+ const int kPointerBits = 8 * sizeof(void*);
+ const int kImplementedVirtualBits = NumImplementedVirtualBits();
+
+ CHECK_GE(kAddressBits, min(kImplementedVirtualBits, kPointerBits));
+}
+#endif
+
int main(int argc, char** argv) {
TestBasicInvoked();
diff --git a/src/tests/tcmalloc_unittest.cc b/src/tests/tcmalloc_unittest.cc
index 522c0d9..c528846 100644
--- a/src/tests/tcmalloc_unittest.cc
+++ b/src/tests/tcmalloc_unittest.cc
@@ -100,17 +100,12 @@
# define cfree free // don't bother to try to test these obsolete fns
# define valloc malloc
# define pvalloc malloc
-# ifdef PERFTOOLS_NO_ALIGNED_MALLOC
-# define _aligned_malloc(size, alignment) malloc(size)
-# else
-# include <malloc.h> // for _aligned_malloc
-# endif
-# define memalign(alignment, size) _aligned_malloc(size, alignment)
-// Assume if we fail, it's because of out-of-memory.
-// Note, this isn't a perfect analogue: we don't enforce constraints on "align"
+// I'd like to map posix_memalign to _aligned_malloc, but _aligned_malloc
+// must be paired with _aligned_free (not normal free), which is too
+// invasive a change to how we allocate memory here. So just bail
# include <errno.h>
-# define posix_memalign(pptr, align, size) \
- ((*(pptr)=_aligned_malloc(size, align)) ? 0 : ENOMEM)
+# define memalign(alignment, size) malloc(size)
+# define posix_memalign(pptr, align, size) ((*(pptr)=malloc(size)) ? 0 : ENOMEM)
#endif
// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old
@@ -1033,6 +1028,14 @@ static int RunAllTests(int argc, char** argv) {
free(p1);
VerifyDeleteHookWasCalled();
+ // Windows has _aligned_malloc. Let's test that that's captured too.
+#if (defined(_MSC_VER) || defined(__MINGW32__)) && !defined(PERFTOOLS_NO_ALIGNED_MALLOC)
+ p1 = _aligned_malloc(sizeof(p1) * 2, 64);
+ VerifyNewHookWasCalled();
+ _aligned_free(p1);
+ VerifyDeleteHookWasCalled();
+#endif
+
p1 = valloc(60);
VerifyNewHookWasCalled();
free(p1);
diff --git a/src/windows/config.h b/src/windows/config.h
index 6d6f771..0b91031 100644
--- a/src/windows/config.h
+++ b/src/windows/config.h
@@ -92,7 +92,7 @@
#undef HAVE_LINUX_PTRACE_H
/* Define to 1 if you have the <malloc.h> header file. */
-#undef HAVE_MALLOC_H
+#define HAVE_MALLOC_H 1
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
diff --git a/src/windows/patch_functions.cc b/src/windows/patch_functions.cc
index deb841b..fc57c82 100644
--- a/src/windows/patch_functions.cc
+++ b/src/windows/patch_functions.cc
@@ -175,7 +175,7 @@ class LibcInfo {
kNew, kNewArray, kDelete, kDeleteArray,
kNewNothrow, kNewArrayNothrow, kDeleteNothrow, kDeleteArrayNothrow,
// These are windows-only functions from malloc.h
- k_Msize, k_Expand, k_Aligned_malloc, k_Aligned_free,
+ k_Msize, k_Expand,
kNumFunctions
};
@@ -274,12 +274,12 @@ template<int> class LibcInfoWithPatchFunctions : public LibcInfo {
const std::nothrow_t&) __THROW;
static size_t Perftools__msize(void *ptr) __THROW;
static void* Perftools__expand(void *ptr, size_t size) __THROW;
- static void* Perftools__aligned_malloc(size_t size, size_t alignment) __THROW;
- static void Perftools__aligned_free(void *ptr) __THROW;
// malloc.h also defines these functions:
+ // _aligned_malloc, _aligned_free,
// _recalloc, _aligned_offset_malloc, _aligned_realloc, _aligned_recalloc
// _aligned_offset_realloc, _aligned_offset_recalloc, _malloca, _freea
// But they seem pretty obscure, and I'm fine not overriding them for now.
+ // It may be they all call into malloc/free anyway.
};
// This is a subset of MODDULEENTRY32, that we need for patching.
@@ -300,10 +300,19 @@ struct ModuleEntryCopy {
ModuleEntryCopy(const MODULEINFO& mi) {
this->modBaseAddr = mi.lpBaseOfDll;
this->modBaseSize = mi.SizeOfImage;
- for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++)
- rgProcAddresses[i] = (GenericFnPtr)::GetProcAddress(
+ LPVOID modEndAddr = (char*)mi.lpBaseOfDll + mi.SizeOfImage;
+ for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) {
+ FARPROC target = ::GetProcAddress(
reinterpret_cast<const HMODULE>(mi.lpBaseOfDll),
LibcInfo::function_name(i));
+ // Sometimes a DLL forwards a function to a function in another
+ // DLL. We don't want to patch those forwarded functions --
+ // they'll get patched when the other DLL is processed.
+ if (target >= modBaseAddr && target < modEndAddr)
+ rgProcAddresses[i] = (GenericFnPtr)target;
+ else
+ rgProcAddresses[i] = (GenericFnPtr)NULL;
+ }
}
};
@@ -390,7 +399,7 @@ const char* const LibcInfo::function_name_[] = {
NULL, // kMangledNewArrayNothrow,
NULL, // kMangledDeleteNothrow,
NULL, // kMangledDeleteArrayNothrow,
- "_msize", "_expand", "_aligned_malloc", "_aligned_free",
+ "_msize", "_expand",
};
// For mingw, I can't patch the new/delete here, because the
@@ -421,14 +430,6 @@ const GenericFnPtr LibcInfo::static_fn_[] = {
#endif
(GenericFnPtr)&::_msize,
(GenericFnPtr)&::_expand,
-#ifdef PERFTOOLS_NO_ALIGNED_MALLOC // for older versions of mingw
- // _aligned_malloc isn't always available in mingw, so don't try to patch.
- (GenericFnPtr)NULL,
- (GenericFnPtr)NULL,
-#else
- (GenericFnPtr)&::_aligned_malloc,
- (GenericFnPtr)&::_aligned_free,
-#endif
};
template<int T> GenericFnPtr LibcInfoWithPatchFunctions<T>::origstub_fn_[] = {
@@ -451,8 +452,6 @@ const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = {
(GenericFnPtr)&Perftools_deletearray_nothrow,
(GenericFnPtr)&Perftools__msize,
(GenericFnPtr)&Perftools__expand,
- (GenericFnPtr)&Perftools__aligned_malloc,
- (GenericFnPtr)&Perftools__aligned_free,
};
/*static*/ WindowsInfo::FunctionInfo WindowsInfo::function_info_[] = {
@@ -908,21 +907,6 @@ void* LibcInfoWithPatchFunctions<T>::Perftools__expand(void *ptr,
return NULL;
}
-template<int T>
-void* LibcInfoWithPatchFunctions<T>::Perftools__aligned_malloc(size_t size,
- size_t alignment)
- __THROW {
- void* result = do_memalign_or_cpp_memalign(alignment, size);
- MallocHook::InvokeNewHook(result, size);
- return result;
-}
-
-template<int T>
-void LibcInfoWithPatchFunctions<T>::Perftools__aligned_free(void *ptr) __THROW {
- MallocHook::InvokeDeleteHook(ptr);
- do_free_with_callback(ptr, (void (*)(void*))origstub_fn_[k_Aligned_free]);
-}
-
LPVOID WINAPI WindowsInfo::Perftools_HeapAlloc(HANDLE hHeap, DWORD dwFlags,
DWORD_PTR dwBytes) {
LPVOID result = ((LPVOID (WINAPI *)(HANDLE, DWORD, DWORD_PTR))
diff --git a/src/windows/port.cc b/src/windows/port.cc
index d62fa9d..32f3c31 100644
--- a/src/windows/port.cc
+++ b/src/windows/port.cc
@@ -83,6 +83,18 @@ extern "C" PERFTOOLS_DLL_DECL void* __sbrk(ptrdiff_t increment) {
return NULL;
}
+// We need to write to 'stderr' without having windows allocate memory.
+// The safest way is via a low-level call like WriteConsoleA(). But
+// even then we need to be sure to print in small bursts so as to not
+// require memory allocation.
+extern "C" PERFTOOLS_DLL_DECL void WriteToStderr(const char* buf, int len) {
+ // Looks like windows allocates for writes of >80 bytes
+ for (int i = 0; i < len; i += 80) {
+ write(STDERR_FILENO, buf + i, std::min(80, len - i));
+ }
+}
+
+
// -----------------------------------------------------------------------
// Threads code
diff --git a/src/windows/port.h b/src/windows/port.h
index 66745d1..81a68e6 100644
--- a/src/windows/port.h
+++ b/src/windows/port.h
@@ -277,6 +277,8 @@ enum { STDIN_FILENO = 0, STDOUT_FILENO = 1, STDERR_FILENO = 2 };
#define O_RDONLY _O_RDONLY
#endif
+extern "C" PERFTOOLS_DLL_DECL void WriteToStderr(const char* buf, int len);
+
// ----------------------------------- SYSTEM/PROCESS
typedef int pid_t;
#define getpid _getpid